430dd47cd08928eb670b3ec9ef4f2650e11741b1
[com/gs-lite.git] / src / ftacmp / analyze_fta.cc
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 #include<unistd.h>
17
18 #include "parse_fta.h"
19 #include "parse_schema.h"
20 #include "parse_ext_fcns.h"
21
22
23 #include"analyze_fta.h"
24
25 #include"type_objects.h"
26
27 #include <string>
28 #include<list>
29
30 using namespace std;
31
32 extern string hostname;                 // name of the current host
33
34 //                      Utility function
35
36 string int_to_string(int i){
37     string ret;
38     char tmpstr[100];
39     sprintf(tmpstr,"%d",i);
40     ret=tmpstr;
41     return(ret);
42 }
43
44
45 //                              Globals
46
47 //                      These represent derived information from the
48 //                      query analysis stage.  I extract them from a class,
49 //                      perhaps this is dangerous.
50
51 static gb_table *gb_tbl=NULL;                   // Table of all group-by attributes.
52 static aggregate_table *aggr_tbl=NULL;  // Table of all referenced aggregates.
53
54 // static cplx_lit_table *complex_literals=NULL;        // Table of literals with constructors.
55 static param_table *param_tbl=NULL;             // Table of all referenced parameters.
56
57 vector<scalarexp_t *> partial_fcns_list;
58 int wh_partial_start, wh_partial_end;
59 int gb_partial_start, gb_partial_end;
60 int aggr_partial_start, aggr_partial_end;
61 int sl_partial_start, sl_partial_end;
62
63
64 //                      Infer the table of a column refrence and return the table ref.
65 //                      First, extract the
66 //                      field name and table name.  If no table name is used,
67 //                      search all tables to try to find a unique match.
68 //                      Of course, plenty of error checking.
69
70 //              Return the set of tablevar indices in the FROM clause
71 //              which contain a field with the same name.
72 vector<int> find_source_tables(string field, tablevar_list_t *fm, table_list *Schema){
73         int i;
74         vector<int> tv;
75 //      vector<string> tn = fm->get_schema_names();
76         vector<int> tn = fm->get_schema_refs();
77 // printf("Calling find_source_tables on field %s\n",field.c_str());
78         for(i=0;i<tn.size();i++){
79 //              if(Schema->contains_field(Schema->find_tbl(tn[i]), field) ){
80                 if(Schema->contains_field(tn[i], field) ){
81                         tv.push_back(i);
82 // printf("\tfound in table %s\n",tn[i].c_str());
83                 }
84         }
85         return(tv);
86 }
87
88 int infer_tablevar_from_ifpref(ifpref_t *ir, tablevar_list_t *fm){
89         int i;
90         string tname = ir->get_tablevar();
91         if(tname ==""){
92                 if(fm->size()==1) return 0;
93                 fprintf(stderr,"ERROR, interface parameter %s has no tablevar specified and there is more than one table variable in the FROM clause.\n",ir->to_string().c_str());
94                 return -1;
95         }
96         for(i=0;i<fm->size();++i){
97                 if(tname == fm->get_tablevar_name(i))
98                         return i;
99         }
100         fprintf(stderr,"ERROR, interface parameter %s has no matching table variable in the FROM clause.\n",ir->to_string().c_str());
101         return -1;
102 }
103
104
105 //              compute the index of the tablevar in the from clause that the
106 //              colref is in.
107 //              return -1 if no tablevar can be imputed.
108 int infer_tablevar_from_colref(colref_t *cr, tablevar_list_t *fm, table_list *schema){
109         int i;
110         string table_name;
111         int table_ref;
112         vector<int> tv;
113         vector<tablevar_t *> fm_tbls = fm->get_table_list();
114
115         string field = cr->get_field();
116
117 // printf("Calling infer_tablevar_from_colref on field %s.\n",field.c_str());
118         if(cr->uses_default_table() ){
119                 tv = find_source_tables(field, fm, schema);
120                 if(tv.size() > 1){
121                         fprintf(stderr,"ERROR, line %d, character %d : field %s exists in multiple table variables: ",
122                                 cr->get_lineno(), cr->get_charno(),field.c_str() );
123                         for(i=0;i<tv.size();i++){
124                                 fprintf(stderr,"%s ",fm_tbls[ tv[i] ]->to_string().c_str() );
125                         }
126                         fprintf(stderr,"\n\tYou must specify one of these.\n");
127                         return(-1);
128                 }
129                 if(tv.size() == 0){
130                         fprintf(stderr,"ERROR, line %d, character %d: field %s does not exist in any table.\n",
131                                 cr->get_lineno(), cr->get_charno(),field.c_str() );
132                         return(-1);
133                 }
134
135                 return(tv[0]);
136         }
137
138 //                      The table source is named -- but is it a schema name
139 //                      or a var name?
140
141         string interface = cr->get_interface();
142         table_name = cr->get_table_name();
143
144 //              if interface is not specified, prefer to look at the tablevar names
145 //              Check for duplicates.
146         if(interface==""){
147                 for(i=0;i<fm_tbls.size();++i){
148                         if(table_name == fm_tbls[i]->get_var_name())
149                                 tv.push_back(i);
150                 }
151                 if(tv.size() > 1){
152                         fprintf(stderr,"ERROR, there are two or more table variables for column ref %s.%s (line %d, char %d).\n",table_name.c_str(), field.c_str(), cr->get_lineno(), cr->get_charno() );
153                         return(-1);
154                 }
155                 if(tv.size() == 1) return(tv[0]);
156         }
157
158 //              Tableref not found by looking at tableref vars, or an interface
159 //              was specified.  Try to match on schema and interface.
160 //              Check for duplicates.
161         for(i=0;i<fm_tbls.size();++i){
162                 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
163                         tv.push_back(i);
164         }
165         if(tv.size() > 1){
166                 fprintf(stderr,"ERROR, (line %d, char %d) there are two or more table variables whose schemas match for column ref \n",
167                         cr->get_lineno(), cr->get_charno() );
168                 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
169                 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
170                 return(-1);
171         }
172
173         if(tv.size() == 0 ){
174                 fprintf(stderr,"ERROR, line %d, character %d : no table reference found for column ref ", cr->get_lineno(), cr->get_charno());
175                 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
176                 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
177                 return(-1)      ;
178         }
179
180         return(tv[0]);
181 }
182
183
184 //                      Reset temporal properties of a scalar expression
185 void reset_temporal(scalarexp_t *se){
186         col_id ci;
187         vector<scalarexp_t *> operands;
188         int o;
189
190         se->get_data_type()->reset_temporal();
191
192         switch(se->get_operator_type()){
193         case SE_LITERAL:
194         case SE_PARAM:
195         case SE_IFACE_PARAM:
196         case SE_COLREF:
197                 return;
198         case SE_UNARY_OP:
199                 reset_temporal(se->get_left_se());
200                 return;
201         case SE_BINARY_OP:
202                 reset_temporal(se->get_left_se());
203                 reset_temporal(se->get_right_se());
204                 return;
205         case SE_AGGR_STAR:
206                 return;
207         case SE_AGGR_SE:
208                 reset_temporal(se->get_left_se());
209                 return;
210         case SE_FUNC:
211                 operands = se->get_operands();
212                 for(o=0;o<operands.size();o++){
213                         reset_temporal(operands[o]);
214                 }
215                 return;
216         default:
217                 fprintf(stderr,"INTERNAL ERROR in reset_temporal, line %d, character %d: unknown operator type %d\n",
218                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
219                 exit(1);
220         }
221 }
222
223 //              Verify that column references exist in their
224 //              declared tables.  As a side effect, assign
225 //              their data types.  Other side effects :
226 //
227 //              return -1 on error
228
229 int verify_colref(scalarexp_t *se, tablevar_list_t *fm,
230                                         table_list *schema, gb_table *gtbl){
231         int l_ret, r_ret;
232         int gb_ref;
233         colref_t *cr;
234         ifpref_t *ir;
235         string field, table_source, type_name;
236         data_type *dt;
237         vector<string> tn;
238         vector<int> tv;
239         int table_var;
240         int o;
241         vector<scalarexp_t *> operands;
242
243         switch(se->get_operator_type()){
244         case SE_LITERAL:
245         case SE_PARAM:
246                 return(1);
247         case SE_IFACE_PARAM:
248                 ir = se->get_ifpref();
249                 table_var = infer_tablevar_from_ifpref(ir, fm);
250                 if(table_var < 0) return(table_var);
251                 ir->set_tablevar_ref(table_var);
252                 return(1);
253         case SE_UNARY_OP:
254                 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
255         case SE_BINARY_OP:
256                 l_ret = verify_colref(se->get_left_se(), fm, schema, gtbl);
257                 r_ret = verify_colref(se->get_right_se(), fm, schema, gtbl);
258                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
259                 return(1);
260         case SE_COLREF:
261                 cr = se->get_colref();
262                 field = cr->get_field();
263
264 //                              Determine if this is really a GB ref.
265 //                              (the parser can only see that its a colref).
266                 if(gtbl != NULL){
267                         gb_ref = gtbl->find_gb(cr, fm, schema);
268                 }else{
269                         gb_ref = -1;
270                 }
271
272                 se->set_gb_ref(gb_ref);
273
274                 if(gb_ref < 0){
275 //                              Its a colref, verify its existance and
276 //                              record the data type.
277                         table_var = infer_tablevar_from_colref(cr,fm,schema);
278                         if(table_var < 0) return(table_var);
279
280         //                      Store the table ref in the colref.
281                         cr->set_tablevar_ref(table_var);
282                         cr->set_schema_ref(fm->get_schema_ref(table_var));
283                         cr->set_interface("");
284                         cr->set_table_name(fm->get_tablevar_name(table_var));
285
286
287                         type_name = schema->get_type_name(cr->get_schema_ref(), field);
288                         param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
289                         dt = new data_type(type_name, modifiers);
290                         se->set_data_type(dt);
291                 }else{
292 //                              Else, its a gbref, use the GB var's data type.
293                         se->set_data_type(gtbl->get_data_type(gb_ref));
294                 }
295
296                 return(1);
297         case SE_AGGR_STAR:
298                 return(1);
299         case SE_AGGR_SE:
300                 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
301         case SE_FUNC:
302                 operands = se->get_operands();
303                 r_ret = 1;
304                 for(o=0;o<operands.size();o++){
305                         l_ret = verify_colref(operands[o], fm, schema, gtbl);
306                         if(l_ret < 0) r_ret = -1;
307                 }
308                 return(r_ret);
309         default:
310                 fprintf(stderr,"INTERNAL ERROR in verify_colref, line %d, character %d: unknown operator type %d\n",
311                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
312                 return(-1);
313         }
314         return(-1);
315 }
316
317
318 int verify_predicate_colref(predicate_t *pr, tablevar_list_t *fm, table_list *schema, gb_table *gtbl){
319         int l_ret, r_ret;
320         std::vector<scalarexp_t *> op_list;
321         int o;
322
323         switch(pr->get_operator_type()){
324         case PRED_IN:
325                 return(verify_colref(pr->get_left_se(),fm,schema, gtbl) );
326         case PRED_COMPARE:
327                 l_ret = verify_colref(pr->get_left_se(),fm,schema, gtbl) ;
328                 r_ret = verify_colref(pr->get_right_se(),fm,schema, gtbl) ;
329                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
330                 return(1);
331         case PRED_UNARY_OP:
332                 return(verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl));
333         case PRED_BINARY_OP:
334                 l_ret = verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl) ;
335                 r_ret = verify_predicate_colref(pr->get_right_pr(),fm,schema, gtbl) ;
336                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
337                 return(1);
338         case PRED_FUNC:
339                 op_list = pr->get_op_list();
340                 l_ret = 0;
341                 for(o=0;o<op_list.size();++o){
342                         if(verify_colref(op_list[o],fm,schema,gtbl) < 0) l_ret = -1;
343                 }
344                 return(l_ret);
345         default:
346                 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
347                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
348         }
349
350         return(-1);
351 }
352
353
354 bool literal_only_se(scalarexp_t *se){          // really only literals.
355         int o;
356         vector<scalarexp_t *> operands;
357
358         if(se == NULL) return(1);
359         switch(se->get_operator_type()){
360         case SE_LITERAL:
361                 return(true);
362         case SE_PARAM:
363                 return(false);
364         case SE_IFACE_PARAM:
365                 return(false);
366         case SE_UNARY_OP:
367                 return( literal_only_se(se->get_left_se()) );
368         case SE_BINARY_OP:
369                 return( literal_only_se(se->get_left_se()) &&
370                                 literal_only_se(se->get_right_se()) );
371         case SE_COLREF:
372                 return false;
373         case SE_AGGR_STAR:
374                 return false;
375         case SE_AGGR_SE:
376                 return false;
377                 return(1);
378         case SE_FUNC:
379                 return false;
380         default:
381                 return false;
382         }
383         return false;
384 }
385
386
387
388
389 //              Verify that column references exist in their
390 //              declared tables.  As a side effect, assign
391 //              their data types.  Other side effects :
392 //
393
394 int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema){
395         int l_ret, r_ret;
396         int gb_ref;
397         colref_t *cr;
398         string field, table_source, type_name;
399         data_type *dt;
400         vector<string> tn;
401         vector<int> tv;
402         int tablevar_ref;
403         int o;
404         vector<scalarexp_t *> operands;
405
406         if(se == NULL) return(1);
407
408         switch(se->get_operator_type()){
409         case SE_LITERAL:
410                 return(1);
411         case SE_PARAM:
412                 return(1);
413         case SE_IFACE_PARAM:
414                 return(1);
415         case SE_UNARY_OP:
416                 return( bind_to_schema_se(se->get_left_se(), fm, schema) );
417         case SE_BINARY_OP:
418                 l_ret = bind_to_schema_se(se->get_left_se(), fm, schema);
419                 r_ret = bind_to_schema_se(se->get_right_se(), fm, schema);
420                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
421                 return(1);
422         case SE_COLREF:
423                 if(se->is_gb()) return(1);      // gb ref not a colref.
424
425                 cr = se->get_colref();
426                 field = cr->get_field();
427
428                 tablevar_ref = infer_tablevar_from_colref(cr,fm,schema);
429                 if(tablevar_ref < 0){
430                         return(tablevar_ref);
431                 }else{
432         //                      Store the table ref in the colref.
433                         cr->set_tablevar_ref(tablevar_ref);
434                         cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
435                         cr->set_interface("");
436                         cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
437
438 //                              Check the data type
439                         type_name = schema->get_type_name(cr->get_schema_ref(), field);
440                         param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
441                         data_type dt(type_name, modifiers);
442 //                      if(! dt.equals(se->get_data_type()) ){
443 //                      if(! dt.subsumes_type(se->get_data_type()) ){
444                         if(! se->get_data_type()->subsumes_type(&dt) ){
445                                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se: se's type is %d, table's is %d, colref is %s.\n",
446                                         dt.type_indicator(), se->get_data_type()->type_indicator(), cr->to_string().c_str());
447                                 return(-1);
448                         }
449                 }
450                 return(1);
451         case SE_AGGR_STAR:
452                 return(1);
453         case SE_AGGR_SE:        // Probably I should just return,
454                                                 // aggregate se's are explicitly bound to the schema.
455 //                      return( bind_to_schema_se(se->get_left_se(), fm, schema, gtbl) );
456                 return(1);
457         case SE_FUNC:
458                 if(se->get_aggr_ref() >= 0) return 1;
459
460                 operands = se->get_operands();
461                 r_ret = 1;
462                 for(o=0;o<operands.size();o++){
463                         l_ret = bind_to_schema_se(operands[o], fm, schema);
464                         if(l_ret < 0) r_ret = -1;
465                 }
466                 return(r_ret);
467         default:
468                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se, line %d, character %d: unknown operator type %d\n",
469                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
470                 return(-1);
471         }
472         return(-1);
473 }
474
475
476 int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema){
477         int l_ret, r_ret;
478         vector<scalarexp_t *> op_list;
479         int o;
480
481         switch(pr->get_operator_type()){
482         case PRED_IN:
483                 return(bind_to_schema_se(pr->get_left_se(),fm,schema) );
484         case PRED_COMPARE:
485                 l_ret = bind_to_schema_se(pr->get_left_se(),fm,schema) ;
486                 r_ret = bind_to_schema_se(pr->get_right_se(),fm,schema) ;
487                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
488                 return(1);
489         case PRED_UNARY_OP:
490                 return(bind_to_schema_pr(pr->get_left_pr(),fm,schema));
491         case PRED_BINARY_OP:
492                 l_ret = bind_to_schema_pr(pr->get_left_pr(),fm,schema) ;
493                 r_ret = bind_to_schema_pr(pr->get_right_pr(),fm,schema) ;
494                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
495                 return(1);
496         case PRED_FUNC:
497                 op_list = pr->get_op_list();
498                 l_ret = 0;
499                 for(o=0;o<op_list.size();++o){
500                         if(bind_to_schema_se(op_list[o],fm,schema) < 0) l_ret = -1;
501                 }
502                 return(l_ret);
503         default:
504                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_pr, line %d, character %d, unknown predicate operator type %d\n",
505                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
506         }
507
508         return(-1);
509 }
510
511
512
513
514
515
516 //                      verify_colref assigned data types to the column refs.
517 //                      Now assign data types to all other nodes in the
518 //                      scalar expression.
519 //
520 //                      return -1 on error
521
522 temporal_type compute_se_temporal(scalarexp_t *se, map<col_id, temporal_type> &tcol){
523         int l_ret, r_ret;
524         data_type *dt;
525         bool bret;
526         vector<scalarexp_t *> operands;
527         vector<data_type *> odt;
528         int o, fcn_id;
529         vector<bool> handle_ind;
530
531         switch(se->get_operator_type()){
532         case SE_LITERAL:
533                 return(constant_t);
534         case SE_PARAM:
535                 return(varying_t);
536         case SE_IFACE_PARAM:
537                 return(varying_t);              // actually, this should not be called.
538         case SE_UNARY_OP:
539                 return data_type::compute_temporal_type(
540                         compute_se_temporal(se->get_left_se(), tcol), se->get_op()
541                 );
542         case SE_BINARY_OP:
543                 return data_type::compute_temporal_type(
544                         compute_se_temporal(se->get_left_se(), tcol),
545                         compute_se_temporal(se->get_right_se(), tcol),
546                         se->get_left_se()->get_data_type()->get_type(),
547                         se->get_right_se()->get_data_type()->get_type(),
548                         se->get_op()
549                 );
550         case SE_COLREF:
551                 {
552                         col_id cid(se->get_colref() );
553                         if(tcol.count(cid) > 0){ return tcol[cid];
554                         }else{ return varying_t;}
555                 }
556         case SE_AGGR_STAR:
557         case SE_AGGR_SE:
558         case SE_FUNC:
559         default:
560                 return varying_t;
561         }
562         return(varying_t);
563 }
564
565
566
567 //                      verify_colref assigned data types to the column refs.
568 //                      Now assign data types to all other nodes in the
569 //                      scalar expression.
570 //
571 //                      return -1 on error
572
573 int assign_data_types(scalarexp_t *se, table_list *schema,
574                                                 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
575         int l_ret, r_ret;
576         data_type *dt;
577         bool bret;
578         vector<scalarexp_t *> operands;
579         vector<data_type *> odt;
580         int o, fcn_id;
581         vector<bool> handle_ind;
582         vector<bool> constant_ind;
583
584         switch(se->get_operator_type()){
585         case SE_LITERAL:
586                 dt = new data_type( se->get_literal()->get_type() );
587                 se->set_data_type(dt);
588                 if( ! dt->is_defined() ){
589                         fprintf(stderr,"ERROR, Literal type is undefined, line =%d, char = %d, literal=%s\n",
590                                 se->get_literal()->get_lineno(),se->get_literal()->get_charno(), se->get_literal()->to_string().c_str() );
591                         return(-1);
592                 }else{
593                         return(1);
594                 }
595         case SE_PARAM:
596                 {
597                         string pname = se->get_param_name();
598                         dt = param_tbl->get_data_type(pname);
599 // A SE_PARRAM can change its value mid-query so using one
600 // to set a window is dangerous.  TODO check for this and issue a warning.
601                         dt->set_temporal(constant_t);
602                         se->set_data_type(dt);
603                         if( ! dt->is_defined() ){
604                                 fprintf(stderr,"ERROR, parameter %s has undefined type, line =%d, char = %d\n",
605                                         pname.c_str(), se->get_lineno(),se->get_charno() );
606                                 return(-1);
607                         }
608                         return(1);
609                 }
610         case SE_IFACE_PARAM:
611                 dt = new data_type( "STRING" );
612                 se->set_data_type(dt);
613                 return(1);
614         case SE_UNARY_OP:
615                 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
616                 if(l_ret < 0) return -1;
617
618                 dt = new data_type(se->get_left_se()->get_data_type(),se->get_op() );
619                 se->set_data_type(dt);
620                 if( ! dt->is_defined() ){
621                         fprintf(stderr,"ERROR, unary operator %s not defined for type %s, line=%d, char = %d\n",
622                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
623                                 se->get_lineno(), se->get_charno() );
624                         return(-1);
625                 }else{
626                         return(1);
627                 }
628         case SE_BINARY_OP:
629                 l_ret = assign_data_types(se->get_left_se(),  schema, fta_tree, Ext_fcns);
630                 r_ret = assign_data_types(se->get_right_se(),  schema, fta_tree, Ext_fcns);
631                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
632
633                 dt = new data_type(se->get_left_se()->get_data_type(),se->get_right_se()->get_data_type(),se->get_op() );
634                 se->set_data_type(dt);
635                 if( ! dt->is_defined() ){
636                         fprintf(stderr,"ERROR, Binary operator %s not defined for type %s, %s line=%d, char = %d\n",
637                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
638                                 se->get_right_se()->get_data_type()->to_string().c_str(),
639                                 se->get_lineno(), se->get_charno() );
640                         return(-1);
641                 }else{
642                         return(1);
643                 }
644         case SE_COLREF:
645                 dt = se->get_data_type();
646                 bret = dt->is_defined();
647                 if( bret ){
648                         return(1);
649                 }else{
650                         fprintf(stderr,"ERROR, column reference type  is undefined, line =%d, char = %d, colref=%s\n",
651                                 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
652                         return(-1);
653                 }
654         case SE_AGGR_STAR:
655                 dt = new data_type("Int");      // changed Uint to Int
656                 se->set_data_type(dt);
657                 return(1);
658         case SE_AGGR_SE:
659                 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
660                 if(l_ret < 0) return -1;
661
662                 dt = new data_type();
663                 dt->set_aggr_data_type(se->get_op(), se->get_left_se()->get_data_type());
664                 se->set_data_type(dt);
665
666                 if( ! dt->is_defined() ){
667                         fprintf(stderr,"ERROR, aggregate %s not defined for type %s, line=%d, char = %d\n",
668                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
669                                 se->get_lineno(), se->get_charno() );
670                         return(-1);
671                 }else{
672                         return(1);
673                 }
674         case SE_FUNC:
675
676                 operands = se->get_operands();
677                 r_ret = 1;
678                 for(o=0;o<operands.size();o++){
679                         l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns);
680                         odt.push_back(operands[o]->get_data_type());
681                         if(l_ret < 0) r_ret = -1;
682                 }
683                 if(r_ret < 0) return(r_ret);
684
685 //                      Is it an aggregate extraction function?
686                 fcn_id = Ext_fcns->lookup_extr(se->get_op(), odt);
687                 if(fcn_id >= 0){
688                         int actual_fcn_id = Ext_fcns->get_actual_fcn_id(fcn_id);
689                         int subaggr_id = Ext_fcns->get_subaggr_id(fcn_id);
690                         int n_fcn_params = Ext_fcns->get_nparams(actual_fcn_id);
691 //                              Construct a se for the subaggregate.
692                         vector<scalarexp_t *> op_a;
693                         int n_aggr_oprs = operands.size()-n_fcn_params+1;
694                         for(o=0;o<n_aggr_oprs;++o){
695                                         op_a.push_back(operands[o]);
696                         }
697 //                              check handle params
698                         vector<bool> handle_a = Ext_fcns->get_handle_indicators(subaggr_id);
699                         for(o=0;o<op_a.size();o++){
700                         if(handle_a[o]){
701                                 if(op_a[o]->get_operator_type() != SE_LITERAL &&
702                                                 op_a[o]->get_operator_type() != SE_IFACE_PARAM &&
703                                                 op_a[o]->get_operator_type() != SE_PARAM){
704                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
705                                 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
706                                                 return(-1);
707                                         }
708                                 }
709                         }
710                         vector<bool> is_const_a=Ext_fcns->get_const_indicators(subaggr_id);
711                         for(o=0;o<op_a.size();o++){
712                         if(is_const_a[o]){
713                                 if(op_a[o]->get_data_type()->get_temporal() != constant_t){
714                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be constant.\n  Line=%d, char=%d.\n",
715                                 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
716                                                 return(-1);
717                                         }
718                                 }
719                         }
720
721                         scalarexp_t *se_a  = new scalarexp_t(Ext_fcns->get_fcn_name(subaggr_id).c_str(), op_a);
722                         se_a->set_fcn_id(subaggr_id);
723                         se_a->set_data_type(Ext_fcns->get_fcn_dt(subaggr_id));
724                         se_a->set_aggr_id(0);           // label this as a UDAF.
725
726
727 //                              Change this se to be the actual function
728                         vector<scalarexp_t *> op_f;
729                         op_f.push_back(se_a);
730                         for(o=n_aggr_oprs;o<operands.size();++o)
731                                 op_f.push_back(operands[o]);
732 //                              check handle params
733                         vector<bool> handle_f = Ext_fcns->get_handle_indicators(actual_fcn_id);
734                         for(o=0;o<op_f.size();o++){
735                         if(handle_f[o]){
736                                 if(op_f[o]->get_operator_type() != SE_LITERAL &&
737                                                 op_f[o]->get_operator_type() != SE_IFACE_PARAM &&
738                                                 op_f[o]->get_operator_type() != SE_PARAM){
739                                                 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
740                                 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
741                                                 return(-1);
742                                         }
743                                 }
744                         }
745                         vector<bool> is_const_f=Ext_fcns->get_const_indicators(actual_fcn_id);
746                         for(o=0;o<op_f.size();o++){
747                         if(is_const_f[o]){
748                                 if(op_f[o]->get_data_type()->get_temporal() != constant_t){
749                                                 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be constant.\n  Line=%d, char=%d.\n",
750                                 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
751                                                 return(-1);
752                                         }
753                                 }
754                         }
755
756                         se->param_list = op_f;
757                         se->op = Ext_fcns->get_fcn_name(actual_fcn_id);
758                         se->set_fcn_id(actual_fcn_id);
759                         se->set_data_type(Ext_fcns->get_fcn_dt(actual_fcn_id));
760                         return(1);
761                 }
762                 if(fcn_id == -2){
763                         fprintf(stderr,"Warning: multiple subsuming aggregate extractors found for %s\n",se->get_op().c_str());
764                 }
765
766 //                      Is it a UDAF?
767                 fcn_id = Ext_fcns->lookup_udaf(se->get_op(), odt);
768                 if(fcn_id >= 0){
769                         se->set_fcn_id(fcn_id);
770                         se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
771                         se->set_aggr_id(0);             // label this as a UDAF.
772 //                      Finally, verify that all HANDLE parameters are literals or params.
773                         handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
774                         for(o=0;o<operands.size();o++){
775                                 if(handle_ind[o]){
776                                         if(operands[o]->get_operator_type() != SE_LITERAL &&
777                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
778                                                 operands[o]->get_operator_type() != SE_PARAM){
779                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
780                                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
781                                                 return(-1);
782                                         }
783                                 }
784                         }
785                         constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
786                         for(o=0;o<operands.size();o++){
787                         if(constant_ind[o]){
788                                 if(operands[o]->get_data_type()->get_temporal() != constant_t){
789                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s  must be constant.\n  Line=%d, char=%d.\n",
790                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
791                                                 return(-1);
792                                         }
793                                 }
794                         }
795
796 //      UDAFS as superaggregates not yet supported.
797 if(se->is_superaggr()){
798 fprintf(stderr,"WARNING: UDAF superagggregates (%s) are not yet supported, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
799 se->set_superaggr(false);
800 }
801                         return(1);
802                 }
803                 if(fcn_id == -2){
804                         fprintf(stderr,"Warning: multiple subsuming UDAFs found for %s\n",se->get_op().c_str());
805                 }
806
807 //                      Is it a stateful fcn?
808                 fcn_id = Ext_fcns->lookup_sfun(se->get_op(), odt);
809                 if(fcn_id >= 0){
810                         se->set_fcn_id(fcn_id);
811                         se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
812                         se->set_storage_state(Ext_fcns->get_storage_state(fcn_id)); // label as sfun
813 //                      Finally, verify that all HANDLE parameters are literals or params.
814                         handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
815                         for(o=0;o<operands.size();o++){
816                                 if(handle_ind[o]){
817                                         if(operands[o]->get_operator_type() != SE_LITERAL &&
818                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
819                                                 operands[o]->get_operator_type() != SE_PARAM){
820                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
821                                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
822                                                 return(-1);
823                                         }
824                                 }
825                         }
826                         constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
827                         for(o=0;o<operands.size();o++){
828                         if(constant_ind[o]){
829                                 if(operands[o]->get_data_type()->get_temporal() != constant_t){
830                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s  must be constant.\n  Line=%d, char=%d.\n",
831                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
832                                                 return(-1);
833                                         }
834                                 }
835                         }
836
837                         if(se->is_superaggr()){
838                                 fprintf(stderr,"WARNING: stateful function %s cannot be marked as a superaggregate, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
839                         }
840                         return(1);
841                 }
842                 if(fcn_id == -2){
843                         fprintf(stderr,"Warning: multiple stateful fcns found for %s\n",se->get_op().c_str());
844                 }
845
846
847 //                      Is it a regular function?
848                 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), odt);
849                 if( fcn_id < 0 ){
850                         fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
851                         for(o=0;o<operands.size();o++){
852                                 if(o>0) fprintf(stderr,", ");
853                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
854                         }
855                         fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
856                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
857
858                         return(-1);
859                 }
860
861                 se->set_fcn_id(fcn_id);
862                 dt = Ext_fcns->get_fcn_dt(fcn_id);
863
864                 if(! dt->is_defined() ){
865                         fprintf(stderr,"ERROR, external function %s(",se->get_op().c_str());
866                         for(o=0;o<operands.size();o++){
867                                 if(o>0) fprintf(stderr,", ");
868                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
869                         }
870                         fprintf(stderr,") has undefined type, line %d, char %d\n", se->get_lineno(), se->get_charno() );
871                         return(-1);
872                 }
873
874 //                      Finally, verify that all HANDLE parameters are literals or params.
875                 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
876                 for(o=0;o<operands.size();o++){
877                         if(handle_ind[o]){
878                                 if(operands[o]->get_operator_type() != SE_LITERAL &&
879                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
880                                                 operands[o]->get_operator_type() != SE_PARAM){
881                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
882                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
883                                         return(-1);
884                                 }
885                         }
886                 }
887                 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
888                 for(o=0;o<operands.size();o++){
889                 if(constant_ind[o]){
890                         if(operands[o]->get_data_type()->get_temporal() != constant_t){
891                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s  must be constant.\n  Line=%d, char=%d.\n",
892                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
893                                         return(-1);
894                                 }
895                         }
896                 }
897
898
899                 if(se->is_superaggr()){
900                         fprintf(stderr,"WARNING: function %s cannot be marked as a superaggregate, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
901                 }
902
903                 se->set_data_type(dt);
904                 return(1);
905         default:
906                 fprintf(stderr,"INTERNAL ERROR in assign_data_types, line %d, character %d: unknown operator type %d\n",
907                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
908                 return(-1);
909         }
910         return(-1);
911 }
912
913
914 int assign_predicate_data_types(predicate_t *pr, table_list *schema,
915                                                         table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
916         int l_ret, r_ret;
917         int i;
918         data_type *dt, *dtl;
919         vector<data_type *> odt;
920         vector<literal_t *> litl;
921         vector<scalarexp_t *> operands;
922         vector<bool> handle_ind;
923         vector<bool> constant_ind;
924         int o, fcn_id;
925
926         switch(pr->get_operator_type()){
927         case PRED_IN:
928                 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set);
929                 litl = pr->get_lit_vec();
930                 dt = pr->get_left_se()->get_data_type();
931
932                 for(i=0;i<litl.size();i++){
933                         dtl = new data_type( litl[i]->get_type() );
934                         if( ! dt->is_comparable(dtl,pr->get_op()) ){
935                                 fprintf(stderr,"ERROR line %d, char %d: IS_IN types must be comparable (lhs type is %s, rhs type is %s).\n",
936                                         litl[i]->get_lineno(), litl[i]->get_charno(), dt->to_string().c_str(),dtl->to_string().c_str() );
937                                 delete dtl;
938                                 return(-1);
939                         }
940                         delete dtl;
941                 }
942                 return(1);
943         case PRED_COMPARE:
944                 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
945                 r_ret = assign_data_types(pr->get_right_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
946                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
947
948                 if( !(pr->get_left_se()->get_data_type()->is_comparable(pr->get_right_se()->get_data_type(), pr->get_op() ) )){
949                         fprintf(stderr,"ERROR line %d, char %d, operands of comparison must have comparable types (%s %s %s).\n",
950                                 pr->get_lineno(), pr->get_charno(), pr->get_left_se()->get_data_type()->to_string().c_str(),
951                                  pr->get_right_se()->get_data_type()->to_string().c_str(), pr->get_op().c_str() );
952                         return(-1);
953                 }else{
954                         return(1);
955                 }
956         case PRED_UNARY_OP:
957                 return(assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns)); // , ext_fcn_set));
958         case PRED_BINARY_OP:
959                 l_ret = assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
960                 r_ret = assign_predicate_data_types(pr->get_right_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
961                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
962                 return(1);
963         case PRED_FUNC:
964                 operands = pr->get_op_list();
965                 r_ret = 1;
966                 for(o=0;o<operands.size();o++){
967                         l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns); // , ext_fcn_set);
968                         odt.push_back(operands[o]->get_data_type());
969                         if(l_ret < 0) r_ret = -1;
970                 }
971                 if(r_ret < 0) return(r_ret);
972
973                 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), odt);
974                 if( fcn_id < 0 ){
975                         fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
976                         for(o=0;o<operands.size();o++){
977                                 if(o>0) fprintf(stderr,", ");
978                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
979                         }
980                         fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
981                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
982                         return(-1);
983                 }
984
985 //              ext_fcn_set.insert(fcn_id);
986                 pr->set_fcn_id(fcn_id);
987
988 //                      Finally, verify that all HANDLE parameters are literals or params.
989                 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
990                 for(o=0;o<operands.size();o++){
991                         if(handle_ind[o]){
992                                 if(operands[o]->get_operator_type() != SE_LITERAL &&
993                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
994                                                 operands[o]->get_operator_type() != SE_PARAM){
995                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
996                                 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
997                                         exit(1);
998                                 }
999                         }
1000                 }
1001                 constant_ind = Ext_fcns->get_const_indicators(pr->get_fcn_id());
1002                 for(o=0;o<operands.size();o++){
1003                 if(constant_ind[o]){
1004                         if(operands[o]->get_data_type()->get_temporal() != constant_t){
1005                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s  must be constant.\n  Line=%d, char=%d.\n",
1006                         o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
1007                                         exit(1);
1008                                 }
1009                         }
1010                 }
1011
1012
1013 //                      Check if this predicate function is special sampling function
1014                 pr->is_sampling_fcn = Ext_fcns->is_sampling_fcn(pr->get_fcn_id());
1015
1016
1017                 return(l_ret);
1018         default:
1019                 fprintf(stderr,"INTERNAL ERROR in assign_predicate_data_types, line %d, character %d, unknown predicate operator type %d\n",
1020                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1021         }
1022
1023         return(-1);
1024 }
1025
1026
1027
1028 /////////////////////////////////////////////////////////////////////
1029 ////////////////                Make a deep copy of a se / pred tree
1030 /////////////////////////////////////////////////////////////////////
1031
1032
1033 //              duplicate a select element
1034 select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl){
1035         return new select_element(dup_se(sl->se,aggr_tbl),sl->name.c_str());
1036 }
1037
1038 //              duplicate a scalar expression.
1039 scalarexp_t *dup_se(scalarexp_t *se,
1040                                   aggregate_table *aggr_tbl
1041                                  ){
1042   int p;
1043   vector<scalarexp_t *> operand_list;
1044   vector<data_type *> dt_signature;
1045   scalarexp_t *ret_se, *l_se, *r_se;
1046
1047   switch(se->get_operator_type()){
1048     case SE_LITERAL:
1049                 ret_se = new scalarexp_t(se->get_literal());
1050                 ret_se->use_decorations_of(se);
1051                 return(ret_se);
1052
1053     case SE_PARAM:
1054                 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1055                 ret_se->use_decorations_of(se);
1056                 return(ret_se);
1057
1058     case SE_IFACE_PARAM:
1059                 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1060                 ret_se->use_decorations_of(se);
1061                 return(ret_se);
1062
1063     case SE_COLREF:
1064                 ret_se = new scalarexp_t(se->get_colref()->duplicate());
1065                 ret_se->rhs.scalarp = se->rhs.scalarp;  // carry along notation
1066                 ret_se->use_decorations_of(se);
1067                 return(ret_se);
1068
1069     case SE_UNARY_OP:
1070                 l_se = dup_se(se->get_left_se(),  aggr_tbl);
1071                 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1072                 ret_se->use_decorations_of(se);
1073                 return(ret_se);
1074
1075     case SE_BINARY_OP:
1076                 l_se = dup_se(se->get_left_se(), aggr_tbl);
1077                 r_se = dup_se(se->get_right_se(), aggr_tbl);
1078
1079                 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1080                 ret_se->use_decorations_of(se);
1081
1082                 return(ret_se);
1083
1084     case SE_AGGR_STAR:
1085                 ret_se = scalarexp_t::make_star_aggr(se->get_op().c_str());
1086                 ret_se->use_decorations_of(se);
1087                 return(ret_se);
1088
1089     case SE_AGGR_SE:
1090                 l_se = dup_se(se->get_left_se(),  aggr_tbl);
1091                 ret_se = scalarexp_t::make_se_aggr(se->get_op().c_str(), l_se);
1092                 ret_se->use_decorations_of(se);
1093                 return(ret_se);
1094
1095         case SE_FUNC:
1096                 {
1097                         operand_list = se->get_operands();
1098                         vector<scalarexp_t *> new_operands;
1099                         for(p=0;p<operand_list.size();p++){
1100                                 l_se = dup_se(operand_list[p], aggr_tbl);
1101                                 new_operands.push_back(l_se);
1102                         }
1103
1104                         ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1105                         ret_se->use_decorations_of(se);
1106                         return(ret_se);
1107                 }
1108
1109         default:
1110                 printf("INTERNAL ERROR in dup_se: operator type %d\n",se->get_operator_type());
1111                 exit(1);
1112         break;
1113   }
1114   return(NULL);
1115
1116 }
1117
1118
1119
1120 predicate_t *dup_pr(predicate_t *pr,
1121                                                  aggregate_table *aggr_tbl
1122                                                  ){
1123
1124   vector<literal_t *> llist;
1125   scalarexp_t *se_l, *se_r;
1126   predicate_t *pr_l, *pr_r, *ret_pr;
1127   vector<scalarexp_t *> op_list, new_op_list;
1128   int o;
1129
1130
1131         switch(pr->get_operator_type()){
1132         case PRED_IN:
1133                 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1134                 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1135                 return(ret_pr);
1136
1137         case PRED_COMPARE:
1138                 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1139                 se_r = dup_se(pr->get_right_se(),  aggr_tbl);
1140                 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1141                 return(ret_pr);
1142
1143         case PRED_UNARY_OP:
1144                 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1145                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1146                 return(ret_pr);
1147
1148         case PRED_BINARY_OP:
1149                 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1150                 pr_r = dup_pr(pr->get_right_pr(), aggr_tbl);
1151                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1152                 return(ret_pr);
1153         case PRED_FUNC:
1154                 op_list = pr->get_op_list();
1155                 for(o=0;o<op_list.size();++o){
1156                         se_l = dup_se(op_list[o], aggr_tbl);
1157                         new_op_list.push_back(se_l);
1158                 }
1159                 ret_pr=  new predicate_t(pr->get_op().c_str(), new_op_list);
1160                 ret_pr->set_fcn_id(pr->get_fcn_id());
1161                 ret_pr->is_sampling_fcn = pr->is_sampling_fcn;
1162                 return(ret_pr);
1163
1164         default:
1165                 fprintf(stderr,"INTERNAL ERROR in dup_pr, line %d, character %d, unknown predicate operator type %d\n",
1166                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1167                 exit(1);
1168         }
1169
1170         return(0);
1171
1172 }
1173
1174 table_exp_t *dup_table_exp(table_exp_t *te){
1175         int i;
1176         table_exp_t *ret = new table_exp_t();
1177
1178         ret->query_type = te->query_type;
1179
1180         ss_map::iterator ss_i;
1181         for(ss_i=te->nmap.begin();ss_i!=te->nmap.end();++ss_i){
1182                 ret->nmap[(*ss_i).first] = (*ss_i).second;
1183         }
1184
1185         for(i=0;i<te->query_params.size();++i){
1186                 ret->query_params.push_back(new
1187                  var_pair_t(te->query_params[i]->name,te->query_params[i]->val) );
1188         }
1189
1190         if(te->sl){
1191                 ret->sl = new select_list_t();
1192                 ret->sl->lineno = te->sl->lineno; ret->sl->charno = te->sl->charno;
1193                 vector<select_element *> select_list = te->sl->get_select_list();
1194                 for(i=0;i<select_list.size();++i){
1195                         scalarexp_t *se = dup_se(select_list[i]->se,NULL);
1196                         ret->sl->append(se,select_list[i]->name);
1197                 }
1198         }
1199
1200         ret->fm = te->fm->duplicate();
1201
1202         if(te->wh) ret->wh = dup_pr(te->wh,NULL);
1203         if(te->hv) ret->hv = dup_pr(te->hv,NULL);
1204         if(te->cleaning_when) ret->cleaning_when = dup_pr(te->cleaning_when,NULL);
1205         if(te->cleaning_by) ret->cleaning_by = dup_pr(te->cleaning_by,NULL);
1206         if(te->closing_when) ret->closing_when = dup_pr(te->closing_when,NULL);
1207
1208         for(i=0;i<te->gb.size();++i){
1209                 extended_gb_t *tmp_g =  te->gb[i]->duplicate();
1210                 ret->gb.push_back(tmp_g);
1211         }
1212
1213         ret->mergevars = te->mergevars;
1214         if(te->slack)
1215                 ret->slack = dup_se(te->slack,NULL);
1216         ret->lineno = te->lineno;
1217         ret->charno = te->charno;
1218
1219         return(ret);
1220 }
1221
1222
1223
1224
1225
1226
1227
1228 /////////////////////////////////////////////////////////////////////////
1229 //                      Bind colrefs to a member of their FROM list
1230
1231 void bind_colref_se(scalarexp_t *se,
1232                                   vector<tablevar_t *> &fm,
1233                                   int prev_ref, int new_ref
1234                                  ){
1235   int p;
1236   vector<scalarexp_t *> operand_list;
1237   colref_t *cr;
1238   ifpref_t *ir;
1239
1240   switch(se->get_operator_type()){
1241     case SE_LITERAL:
1242     case SE_PARAM:
1243                 return;
1244     case SE_IFACE_PARAM:
1245                 ir = se->get_ifpref();
1246                 if(ir->get_tablevar_ref() == prev_ref){
1247                         ir->set_tablevar_ref(new_ref);
1248                         ir->set_tablevar(fm[new_ref]->get_var_name());
1249                 }
1250                 return;
1251
1252     case SE_COLREF:
1253                 cr=se->get_colref();
1254                 if(cr->get_tablevar_ref() == prev_ref){
1255                         cr->set_tablevar_ref(new_ref);
1256 //                      cr->set_interface(fm[new_ref]->get_interface());
1257                         cr->set_table_name(fm[new_ref]->get_var_name());
1258                 }
1259                 return;
1260
1261     case SE_UNARY_OP:
1262                 bind_colref_se(se->get_left_se(),  fm, prev_ref, new_ref);
1263                 return;
1264
1265     case SE_BINARY_OP:
1266                 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);
1267                 bind_colref_se(se->get_right_se(),  fm, prev_ref, new_ref);
1268                 return;
1269
1270     case SE_AGGR_STAR:
1271     case SE_AGGR_SE:
1272                 return;
1273
1274         case SE_FUNC:
1275                 if(se->get_aggr_ref() >= 0) return;
1276
1277                 operand_list = se->get_operands();
1278                 for(p=0;p<operand_list.size();p++){
1279                         bind_colref_se(operand_list[p], fm, prev_ref, new_ref);
1280                 }
1281                 return;
1282
1283         default:
1284                 printf("INTERNAL ERROR in bind_colref_se: operator type %d\n",se->get_operator_type());
1285                 exit(1);
1286         break;
1287   }
1288   return;
1289
1290 }
1291
1292
1293
1294
1295 void bind_colref_pr(predicate_t *pr,
1296                                   vector<tablevar_t *> &fm,
1297                                   int prev_ref, int new_ref
1298                                  ){
1299   vector<scalarexp_t *> op_list;
1300   int o;
1301
1302         switch(pr->get_operator_type()){
1303         case PRED_IN:
1304                 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1305                 return;
1306
1307         case PRED_COMPARE:
1308                 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1309                 bind_colref_se(pr->get_right_se(),  fm, prev_ref, new_ref);
1310                 return;
1311
1312         case PRED_UNARY_OP:
1313                 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1314                 return;
1315
1316         case PRED_BINARY_OP:
1317                 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1318                 bind_colref_pr(pr->get_right_pr(), fm, prev_ref, new_ref);
1319                 return;
1320         case PRED_FUNC:
1321                 op_list = pr->get_op_list();
1322                 for(o=0;o<op_list.size();++o){
1323                         bind_colref_se(op_list[o], fm, prev_ref, new_ref);
1324                 }
1325                 return;
1326
1327         default:
1328                 fprintf(stderr,"INTERNAL ERROR in bind_colref_pr, line %d, character %d, unknown predicate operator type %d\n",
1329                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1330                 exit(1);
1331         }
1332
1333         return;
1334
1335 }
1336
1337
1338 /////////////////////////////////////////////////////////////////////
1339 //              verify that the se refs only literals and params.
1340 //          (use to verify that the expression should stay in the hfta
1341 //               during a split)
1342 /////////////////////////////////////////////////////////////////////
1343
1344 bool is_literal_or_param_only(scalarexp_t *se){
1345         int o;
1346         vector<scalarexp_t *> operands;
1347         bool sum = true;
1348
1349         if(se == NULL) return(true);
1350
1351         switch(se->get_operator_type()){
1352         case SE_LITERAL:
1353         case SE_PARAM:
1354                 return(true);
1355         case SE_IFACE_PARAM:
1356                 return(false);          // need to treat as colref
1357         case SE_UNARY_OP:
1358                 return(is_literal_or_param_only(se->get_left_se()) );
1359         case SE_BINARY_OP:
1360                 return(
1361                         is_literal_or_param_only(se->get_left_se()) &&
1362                         is_literal_or_param_only(se->get_right_se())
1363                         );
1364         case SE_COLREF:
1365                 return(false);
1366         case SE_AGGR_STAR:
1367         case SE_AGGR_SE:
1368                 return(false);
1369         case SE_FUNC:
1370 //                      The fcn might have special meaning at the lfta ...
1371                 return(false);
1372
1373         default:
1374                 fprintf(stderr,"INTERNAL ERROR in is_literal_or_param_only, line %d, character %d: unknown operator type %d\n",
1375                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1376                 exit(1);
1377         }
1378         return(0);
1379 }
1380
1381
1382
1383 /////////////////////////////////////////////////////////////////////
1384 //              Search for gb refs.
1385 //          (use to verify that no gbrefs in a gb def.)
1386 /////////////////////////////////////////////////////////////////////
1387
1388
1389 int count_gb_se(scalarexp_t *se){
1390         int o;
1391         vector<scalarexp_t *> operands;
1392         int sum = 0;
1393
1394         if(se == NULL) return(0);
1395
1396         switch(se->get_operator_type()){
1397         case SE_LITERAL:
1398         case SE_PARAM:
1399         case SE_IFACE_PARAM:
1400                 return(0);
1401         case SE_UNARY_OP:
1402                 return(count_gb_se(se->get_left_se()) );
1403         case SE_BINARY_OP:
1404                 return(
1405                         count_gb_se(se->get_left_se()) +
1406                         count_gb_se(se->get_right_se())
1407                         );
1408         case SE_COLREF:
1409                 if(se->get_gb_ref() < 0) return(0);
1410                 return(1);
1411         case SE_AGGR_STAR:
1412         case SE_AGGR_SE:
1413                 return(0);
1414         case SE_FUNC:
1415                 operands = se->get_operands();
1416                 for(o=0;o<operands.size();o++){
1417                         sum +=  count_gb_se(operands[o]);
1418                 }
1419                 return(sum);
1420
1421         default:
1422                 fprintf(stderr,"INTERNAL ERROR in count_gb_se, line %d, character %d: unknown operator type %d\n",
1423                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1424                 exit(1);
1425         }
1426         return(0);
1427 }
1428
1429
1430 /////////////////////////////////////////////////////////////////////
1431 ////////////////                Search for stateful fcns.
1432 /////////////////////////////////////////////////////////////////////
1433
1434
1435 int se_refs_sfun(scalarexp_t *se){
1436         int o;
1437         vector<scalarexp_t *> operands;
1438         int sum = 0;
1439
1440         if(se == NULL) return(0);
1441
1442         switch(se->get_operator_type()){
1443         case SE_LITERAL:
1444         case SE_PARAM:
1445         case SE_IFACE_PARAM:
1446                 return(0);
1447         case SE_UNARY_OP:
1448                 return(se_refs_sfun(se->get_left_se()) );
1449         case SE_BINARY_OP:
1450                 return(
1451                         se_refs_sfun(se->get_left_se()) +
1452                         se_refs_sfun(se->get_right_se())
1453                         );
1454         case SE_COLREF:
1455                 return(0);
1456         case SE_AGGR_STAR:
1457         case SE_AGGR_SE:
1458                 return(0);
1459         case SE_FUNC:
1460                 operands = se->get_operands();
1461                 for(o=0;o<operands.size();o++){
1462                         sum +=  se_refs_sfun(operands[o]);
1463                 }
1464                 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1465
1466 //                      for now, stateful functions count as aggregates.
1467                 if(se->get_storage_state() != "")
1468                         sum++;
1469
1470                 return(sum);
1471
1472         default:
1473                 fprintf(stderr,"INTERNAL ERROR in se_refs_sfun, line %d, character %d: unknown operator type %d\n",
1474                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1475                 exit(1);
1476         }
1477         return(0);
1478 }
1479
1480
1481 //              Return a count of the number of stateful fcns in this predicate.
1482 int pred_refs_sfun(predicate_t *pr){
1483         vector<scalarexp_t *> op_list;
1484         int o, aggr_sum;
1485
1486         switch(pr->get_operator_type()){
1487         case PRED_IN:
1488                 return(se_refs_sfun(pr->get_left_se()) );
1489         case PRED_COMPARE:
1490                 return(
1491                         se_refs_sfun(pr->get_left_se()) +
1492                         se_refs_sfun(pr->get_right_se())
1493                 );
1494         case PRED_UNARY_OP:
1495                 return(pred_refs_sfun(pr->get_left_pr()) );
1496         case PRED_BINARY_OP:
1497                 return(
1498                         pred_refs_sfun(pr->get_left_pr()) +
1499                         pred_refs_sfun(pr->get_right_pr())
1500                 );
1501         case PRED_FUNC:
1502                 op_list = pr->get_op_list();
1503                 aggr_sum = 0;
1504                 for(o=0;o<op_list.size();++o){
1505                         aggr_sum += se_refs_sfun(op_list[o]);
1506                 }
1507                 return(aggr_sum);
1508
1509         default:
1510                 fprintf(stderr,"INTERNAL ERROR in pred_refs_sfun, line %d, character %d, unknown predicate operator type %d\n",
1511                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1512                 exit(1);
1513         }
1514
1515         return(0);
1516 }
1517
1518 //////////////////////////////////////////////////
1519
1520 /////////////////////////////////////////////////////////////////////
1521 ////////////////                Search for aggregates.
1522 /////////////////////////////////////////////////////////////////////
1523
1524
1525 int count_aggr_se(scalarexp_t *se, bool strict){
1526         int o;
1527         vector<scalarexp_t *> operands;
1528         int sum = 0;
1529
1530         if(se == NULL) return(0);
1531
1532         switch(se->get_operator_type()){
1533         case SE_LITERAL:
1534         case SE_PARAM:
1535         case SE_IFACE_PARAM:
1536                 return(0);
1537         case SE_UNARY_OP:
1538                 return(count_aggr_se(se->get_left_se(), strict) );
1539         case SE_BINARY_OP:
1540                 return(
1541                         count_aggr_se(se->get_left_se(), strict) +
1542                         count_aggr_se(se->get_right_se(), strict)
1543                         );
1544         case SE_COLREF:
1545                 return(0);
1546         case SE_AGGR_STAR:
1547         case SE_AGGR_SE:
1548                 return(1);
1549         case SE_FUNC:
1550                 operands = se->get_operands();
1551                 for(o=0;o<operands.size();o++){
1552                         sum +=  count_aggr_se(operands[o], strict);
1553                 }
1554                 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1555
1556 //                      now, stateful functions can count as aggregates.
1557 //                      if we are being strict.
1558                 if(! strict && se->get_storage_state() != "")
1559                         sum++;
1560
1561                 return(sum);
1562
1563         default:
1564                 fprintf(stderr,"INTERNAL ERROR in count_aggr_se, line %d, character %d: unknown operator type %d\n",
1565                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1566                 exit(1);
1567         }
1568         return(0);
1569 }
1570
1571
1572 //              Return a count of the number of aggregate fcns in this predicate.
1573 int count_aggr_pred(predicate_t *pr, bool strict){
1574         vector<scalarexp_t *> op_list;
1575         int o, aggr_sum;
1576
1577         switch(pr->get_operator_type()){
1578         case PRED_IN:
1579                 return(count_aggr_se(pr->get_left_se(), strict) );
1580         case PRED_COMPARE:
1581                 return(
1582                         count_aggr_se(pr->get_left_se(), strict) +
1583                         count_aggr_se(pr->get_right_se(), strict)
1584                 );
1585         case PRED_UNARY_OP:
1586                 return(count_aggr_pred(pr->get_left_pr(), strict) );
1587         case PRED_BINARY_OP:
1588                 return(
1589                         count_aggr_pred(pr->get_left_pr(), strict) +
1590                         count_aggr_pred(pr->get_right_pr(), strict)
1591                 );
1592         case PRED_FUNC:
1593                 op_list = pr->get_op_list();
1594                 aggr_sum = 0;
1595                 for(o=0;o<op_list.size();++o){
1596                         aggr_sum += count_aggr_se(op_list[o], strict);
1597                 }
1598                 return(aggr_sum);
1599
1600         default:
1601                 fprintf(stderr,"INTERNAL ERROR in count_aggr_pred, line %d, character %d, unknown predicate operator type %d\n",
1602                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1603                 exit(1);
1604         }
1605
1606         return(0);
1607 }
1608
1609 //////////////////////////////////////////////////
1610 ///             Analyze tablevar refs
1611
1612 void get_tablevar_ref_se(scalarexp_t *se, vector<int> &reflist){
1613         int o;
1614         vector<scalarexp_t *> operands;
1615         int vref;
1616         colref_t *cr;
1617         ifpref_t *ir;
1618
1619         if(se == NULL) return;
1620
1621         switch(se->get_operator_type()){
1622         case SE_LITERAL:
1623         case SE_PARAM:
1624                 return;
1625         case SE_IFACE_PARAM:
1626                 ir = se->get_ifpref();
1627                 vref = ir->get_tablevar_ref();
1628                 for(o=0;o<reflist.size();++o){
1629                         if(vref == reflist[o]) return;
1630                 }
1631                 reflist.push_back(vref);
1632                 return;
1633         case SE_UNARY_OP:
1634                 get_tablevar_ref_se(se->get_left_se(), reflist);
1635                 return;
1636         case SE_BINARY_OP:
1637                 get_tablevar_ref_se(se->get_left_se(), reflist);
1638                 get_tablevar_ref_se(se->get_right_se(), reflist);
1639                 return;
1640         case SE_COLREF:
1641                 if(se->is_gb()) return;
1642                 cr = se->get_colref();
1643                 vref = cr->get_tablevar_ref();
1644                 for(o=0;o<reflist.size();++o){
1645                         if(vref == reflist[o]) return;
1646                 }
1647                 reflist.push_back(vref);
1648                 return;
1649         case SE_AGGR_STAR:
1650         case SE_AGGR_SE:
1651                 return;
1652         case SE_FUNC:
1653                 if(se->get_aggr_ref() >= 0) return;
1654
1655                 operands = se->get_operands();
1656                 for(o=0;o<operands.size();o++){
1657                         get_tablevar_ref_se(operands[o], reflist);
1658                 }
1659                 return;
1660
1661         default:
1662                 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_se, line %d, character %d: unknown operator type %d\n",
1663                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1664                 exit(1);
1665         }
1666         return;
1667 }
1668
1669
1670 void get_tablevar_ref_pr(predicate_t *pr, vector<int> &reflist){
1671         vector<scalarexp_t *> op_list;
1672         int o;
1673
1674         switch(pr->get_operator_type()){
1675         case PRED_IN:
1676                 get_tablevar_ref_se(pr->get_left_se(),reflist);
1677                 return;
1678         case PRED_COMPARE:
1679                 get_tablevar_ref_se(pr->get_left_se(),reflist);
1680                 get_tablevar_ref_se(pr->get_right_se(),reflist);
1681                 return;
1682         case PRED_UNARY_OP:
1683                 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1684                 return;
1685         case PRED_BINARY_OP:
1686                 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1687                 get_tablevar_ref_pr(pr->get_right_pr(),reflist);
1688                 return;
1689         case PRED_FUNC:
1690                 op_list = pr->get_op_list();
1691                 for(o=0;o<op_list.size();++o){
1692                         get_tablevar_ref_se(op_list[o],reflist);
1693                 }
1694                 return;
1695         default:
1696                 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_pr, line %d, character %d, unknown predicate operator type %d\n",
1697                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1698         }
1699
1700         return;
1701 }
1702
1703
1704 //                      Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1705
1706 void gather_fcn_states_se(scalarexp_t *se, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1707         int agg_id;
1708         int o;
1709         vector<scalarexp_t *> operands;
1710
1711         switch(se->get_operator_type()){
1712         case SE_LITERAL:
1713         case SE_PARAM:
1714         case SE_IFACE_PARAM:
1715                 return;
1716         case SE_UNARY_OP:
1717                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns) ;
1718                 return;
1719         case SE_BINARY_OP:
1720                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1721                 gather_fcn_states_se(se->get_right_se(), states_refd,Ext_fcns);
1722                 return;
1723         case SE_COLREF:
1724                 return;
1725         case SE_AGGR_STAR:
1726                 return;
1727         case SE_AGGR_SE:
1728                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1729                 return;
1730         case SE_FUNC:
1731                 operands = se->get_operands();
1732                 for(o=0;o<operands.size();o++){
1733                         gather_fcn_states_se(operands[o], states_refd, Ext_fcns);
1734                 }
1735                 if(se->get_storage_state() != ""){
1736                         states_refd.insert(se->get_storage_state());
1737                 }
1738                 return;
1739
1740         default:
1741                 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_se, line %d, character %d: unknown operator type %d\n",
1742                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1743                 exit(1);
1744         }
1745         return;
1746 }
1747
1748
1749 //                      Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1750
1751 void gather_fcn_states_pr(predicate_t *pr, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1752         vector<scalarexp_t *> op_list;
1753         int o;
1754
1755         switch(pr->get_operator_type()){
1756         case PRED_IN:
1757                 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1758                 return;
1759         case PRED_COMPARE:
1760                 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1761                 gather_fcn_states_se(pr->get_right_se(),states_refd, Ext_fcns) ;
1762                 return;
1763         case PRED_UNARY_OP:
1764                 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns);
1765                 return;
1766         case PRED_BINARY_OP:
1767                 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns) ;
1768                 gather_fcn_states_pr(pr->get_right_pr(),states_refd, Ext_fcns) ;
1769                 return;
1770         case PRED_FUNC:
1771                 op_list = pr->get_op_list();
1772                 for(o=0;o<op_list.size();++o){
1773                         gather_fcn_states_se(op_list[o],states_refd, Ext_fcns);
1774                 }
1775                 return;
1776
1777         default:
1778                 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_pr, line %d, character %d, unknown predicate operator type %d\n",
1779                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1780                 exit(1);
1781         }
1782
1783         return;
1784 }
1785
1786
1787
1788
1789 //                      walk se tree and collect aggregates into aggregate table.
1790 //                      duplicate aggregates receive the same idx to the table.
1791
1792 void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *aggregate_table, ext_fcn_list *Ext_fcns){
1793         int agg_id;
1794         int o;
1795         vector<scalarexp_t *> operands;
1796
1797         switch(se->get_operator_type()){
1798         case SE_LITERAL:
1799         case SE_PARAM:
1800         case SE_IFACE_PARAM:
1801                 return;
1802         case SE_UNARY_OP:
1803                 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns) ;
1804                 return;
1805         case SE_BINARY_OP:
1806                 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns);
1807                 build_aggr_tbl_fm_se(se->get_right_se(), aggregate_table,Ext_fcns);
1808                 return;
1809         case SE_COLREF:
1810                 return;
1811         case SE_AGGR_STAR:
1812                 agg_id = aggregate_table->add_aggr(se->get_op(),NULL,se->is_superaggr());
1813                 se->set_aggr_id(agg_id);
1814                 return;
1815         case SE_AGGR_SE:
1816                 agg_id = aggregate_table->add_aggr(se->get_op(),se->get_left_se(),se->is_superaggr());
1817                 se->set_aggr_id(agg_id);
1818                 return;
1819         case SE_FUNC:
1820                 operands = se->get_operands();
1821                 for(o=0;o<operands.size();o++){
1822                         build_aggr_tbl_fm_se(operands[o], aggregate_table, Ext_fcns);
1823                 }
1824                 if(se->get_aggr_ref() >= 0){ // it's been tagged as a UDAF
1825                         agg_id = aggregate_table->add_aggr(se->get_op(), se->get_fcn_id(), operands, Ext_fcns->get_storage_dt(se->get_fcn_id()), se->is_superaggr(), Ext_fcns->is_running_aggr(se->get_fcn_id()),Ext_fcns->has_lfta_bailout(se->get_fcn_id()));
1826                         se->set_aggr_id(agg_id);
1827                 }
1828                 return;
1829
1830         default:
1831                 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_se, line %d, character %d: unknown operator type %d\n",
1832                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1833                 exit(1);
1834         }
1835         return;
1836 }
1837
1838
1839 //                      walk se tree and collect aggregates into aggregate table.
1840 //                      duplicate aggregates receive the same idx to the table.
1841
1842 void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *aggregate_table,ext_fcn_list *Ext_fcns){
1843         vector<scalarexp_t *> op_list;
1844         int o;
1845
1846         switch(pr->get_operator_type()){
1847         case PRED_IN:
1848                 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1849                 return;
1850         case PRED_COMPARE:
1851                 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1852                 build_aggr_tbl_fm_se(pr->get_right_se(),aggregate_table, Ext_fcns) ;
1853                 return;
1854         case PRED_UNARY_OP:
1855                 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns);
1856                 return;
1857         case PRED_BINARY_OP:
1858                 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns) ;
1859                 build_aggr_tbl_fm_pred(pr->get_right_pr(),aggregate_table, Ext_fcns) ;
1860                 return;
1861         case PRED_FUNC:
1862                 op_list = pr->get_op_list();
1863                 for(o=0;o<op_list.size();++o){
1864                         build_aggr_tbl_fm_se(op_list[o],aggregate_table, Ext_fcns);
1865                 }
1866                 return;
1867
1868         default:
1869                 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_pred, line %d, character %d, unknown predicate operator type %d\n",
1870                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1871                 exit(1);
1872         }
1873
1874         return;
1875 }
1876
1877
1878 //                      Return true if the two scalar expressions
1879 //                      represent the same value (e.g., use to eliminate
1880 //                      duplicate aggregates).
1881 bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2){
1882         vector<scalarexp_t *> operands1;
1883         vector<scalarexp_t *> operands2;
1884         int o;
1885
1886 //              First handle the case of nulls (e.g. COUNT aggrs)
1887         if(se1 == NULL && se2 == NULL) return(true);
1888         if(se1 == NULL || se2 == NULL) return(false);
1889
1890 //              In all cases, must be the same oeprator type and same operator.
1891         if(se1->get_operator_type() != se2->get_operator_type())
1892                 return(false);
1893         if(se1->get_op() != se2->get_op() )
1894                 return(false);
1895
1896         switch(se1->get_operator_type()){
1897         case SE_LITERAL:
1898                 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1899         case SE_PARAM:
1900                 return(se1->get_param_name() == se2->get_param_name() );
1901         case SE_IFACE_PARAM:
1902                 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1903         case SE_UNARY_OP:
1904                 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1905         case SE_BINARY_OP:
1906                 if(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) )
1907                         return(is_equivalent_se(se1->get_right_se(), se2->get_right_se()) );
1908                 return(false);
1909         case SE_COLREF:
1910                 if(se1->is_gb() && se2->is_gb())
1911                         return( se1->get_gb_ref() == se2->get_gb_ref() );
1912                 if(se1->is_gb() || se2->is_gb())
1913                         return(false);
1914                 return(se1->get_colref()->is_equivalent(se2->get_colref()) );
1915         case SE_AGGR_STAR:
1916                 return(true);
1917         case SE_AGGR_SE:
1918                 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1919         case SE_FUNC:
1920                 if(se1->get_op() != se2->get_op()) return(false);
1921
1922                 operands1 = se1->get_operands();
1923                 operands2 = se2->get_operands();
1924                 if(operands1.size() != operands2.size()) return(false);
1925
1926                 for(o=0;o<operands1.size();o++){
1927                         if(! is_equivalent_se(operands1[o], operands2[o]) )
1928                                 return(false);
1929                 }
1930                 return(true);
1931         default:
1932                 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
1933                                 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
1934                 exit(1);
1935         }
1936         return(false);
1937 }
1938
1939
1940 //              Similar to is_equivalent_se, but with a looser definition
1941 //              of equivalence of colrefs.  Here, say they are equivalent
1942 //              if their base table is the same.  Use to find equivalent
1943 //              predicates on base tables.
1944 bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema){
1945         vector<scalarexp_t *> operands1;
1946         vector<scalarexp_t *> operands2;
1947         int o;
1948
1949         if(se1->get_operator_type() == SE_COLREF && se1->is_gb()){
1950                 se1 = se1->get_right_se();
1951         }
1952         if(se2->get_operator_type() == SE_COLREF && se2->is_gb()){
1953                 se2 = se2->get_right_se();
1954         }
1955
1956 //              First handle the case of nulls (e.g. COUNT aggrs)
1957         if(se1 == NULL && se2 == NULL) return(true);
1958         if(se1 == NULL || se2 == NULL) return(false);
1959
1960 //              In all cases, must be the same oeprator type and same operator.
1961         if(se1->get_operator_type() != se2->get_operator_type())
1962                 return(false);
1963         if(se1->get_op() != se2->get_op() )
1964                 return(false);
1965
1966         switch(se1->get_operator_type()){
1967         case SE_LITERAL:
1968                 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1969         case SE_PARAM:
1970                 return(se1->get_param_name() == se2->get_param_name() );
1971         case SE_IFACE_PARAM:
1972                 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1973         case SE_UNARY_OP:
1974                 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1975         case SE_BINARY_OP:
1976                 if(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) )
1977                         return(is_equivalent_se_base(se1->get_right_se(), se2->get_right_se(), Schema) );
1978                 return(false);
1979         case SE_COLREF:
1980 /*
1981                 if(se1->is_gb() && se2->is_gb())
1982                         return( se1->get_gb_ref() == se2->get_gb_ref() );
1983                 if(se1->is_gb() || se2->is_gb())
1984                         return(false);
1985 */
1986                 return(se1->get_colref()->is_equivalent_base(se2->get_colref(), Schema) );
1987         case SE_AGGR_STAR:
1988                 return(true);
1989         case SE_AGGR_SE:
1990                 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1991         case SE_FUNC:
1992                 if(se1->get_op() != se2->get_op()) return(false);
1993
1994                 operands1 = se1->get_operands();
1995                 operands2 = se2->get_operands();
1996                 if(operands1.size() != operands2.size()) return(false);
1997
1998                 for(o=0;o<operands1.size();o++){
1999                         if(! is_equivalent_se_base(operands1[o], operands2[o], Schema) )
2000                                 return(false);
2001                 }
2002                 return(true);
2003         default:
2004                 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
2005                                 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
2006                 exit(1);
2007         }
2008         return(false);
2009 }
2010
2011
2012 //              Find predicates which are equivalent when
2013 //              looking at the base tables.  Use to find
2014 //              common prefilter.
2015 bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema){
2016 int i, o;
2017
2018 //              First handle the case of nulls
2019         if(p1 == NULL && p2 == NULL) return(true);
2020         if(p1 == NULL || p2 == NULL) return(false);
2021
2022
2023   if(p1->get_operator_type() != p2->get_operator_type())
2024          return(false);
2025   if(p1->get_op() != p2->get_op())
2026          return(false);
2027
2028     vector<literal_t *> ll1;
2029     vector<literal_t *> ll2;
2030         vector<scalarexp_t *> op_list1, op_list2;
2031
2032
2033   switch(p2->get_operator_type()){
2034      case PRED_COMPARE:
2035         if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2036             return(false);
2037         return( is_equivalent_se_base(p1->get_right_se(),p2->get_right_se(), Schema) );
2038     break;
2039     case PRED_IN:
2040         if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2041             return(false);
2042         ll1 = p1->get_lit_vec();
2043         ll2 = p2->get_lit_vec();
2044         if(ll1.size() != ll2.size())
2045             return(false);
2046         for(i=0;i<ll1.size();i++){
2047           if(! ll1[i]->is_equivalent( ll2[i] ) )
2048             return(false);
2049         }
2050         return(true);
2051     break;
2052      case PRED_UNARY_OP:
2053         return(is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema) );
2054     break;
2055      case PRED_BINARY_OP:
2056         if(! is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema))
2057             return(false);
2058         return(is_equivalent_pred_base(p1->get_right_pr(), p2->get_right_pr(), Schema) );
2059     break;
2060          case PRED_FUNC:
2061                 op_list1 = p1->get_op_list();
2062                 op_list2 = p2->get_op_list();
2063                 if(op_list1.size() != op_list2.size()) return(false);
2064                 for(o=0;o<op_list1.size();++o){
2065                         if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) ) return(false);
2066                 }
2067                 return(true);
2068
2069    }
2070
2071     return(false);
2072 }
2073
2074
2075
2076 bool is_equivalent_class_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema,ext_fcn_list *Ext_fcns){
2077   if((p1->get_operator_type()!=PRED_FUNC)||(p2->get_operator_type()!=PRED_FUNC))
2078          return(false);
2079   if(p1->get_fcn_id() != p2->get_fcn_id())
2080                 return false;
2081   vector<bool> cl_op = Ext_fcns->get_class_indicators(p1->get_fcn_id());
2082   int o;
2083   vector<scalarexp_t *> op_list1 = p1->get_op_list();
2084   vector<scalarexp_t *> op_list2 = p2->get_op_list();
2085   if(op_list1.size() != op_list2.size()) return(false);
2086   for(o=0;o<op_list1.size();++o){
2087           if(cl_op[o]){
2088                 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) )
2089                         return(false);
2090         }
2091   }
2092   return true;
2093
2094 }
2095
2096
2097
2098
2099 //                      Verify that the scalar expression (in a such that clause)
2100 //                      is acceptable in an aggregation query.  No column
2101 //                      references allowed outside aggergates, except for
2102 //                      references to group-by attributes.
2103 //                      return true if OK, false if bad.
2104 bool verify_aggr_query_se(scalarexp_t *se){
2105         vector <scalarexp_t *> operands;
2106         int o;
2107
2108     switch(se->get_operator_type()){
2109     case SE_LITERAL:
2110     case SE_PARAM:
2111     case SE_IFACE_PARAM:
2112         return(true );
2113     case SE_UNARY_OP:
2114         return(verify_aggr_query_se(se->get_left_se() ) );
2115     case SE_BINARY_OP:
2116         return(verify_aggr_query_se(se->get_left_se() ) &&
2117             verify_aggr_query_se(se->get_right_se() ) );
2118     case SE_COLREF:
2119         if(se->is_gb() ) return(true);
2120         fprintf(stderr,"ERROR: the select clause in an aggregate query can "
2121                         "only reference constants, group-by attributes, and "
2122                         "aggregates,  (%s) line %d, character %d.\n",
2123                         se->get_colref()->to_string().c_str(),
2124                                                 se->get_lineno(), se->get_charno() );
2125         return(false);
2126     case SE_AGGR_STAR:
2127     case SE_AGGR_SE:
2128 //                      colrefs and gbrefs allowed.
2129 //                      check for nested aggregation elsewhere, so just return TRUE
2130         return(true);
2131         case SE_FUNC:
2132 //                      If its a UDAF, just return true
2133                 if(se->get_aggr_ref() >= 0) return true;
2134
2135                 operands = se->get_operands();
2136
2137                 for(o=0;o<operands.size();o++){
2138                         if(! verify_aggr_query_se(operands[o]) )
2139                                 return(false);
2140                 }
2141                 return(true);
2142     default:
2143         fprintf(stderr,"INTERNAL ERROR in verify_aggr_query_se, line %d, character %d: unknown operator type %d\n",
2144                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2145         exit(1);
2146     }
2147     return(false);
2148 }
2149
2150
2151
2152
2153 //                      Find complex literals.
2154 //                      NOTE : This analysis should be deferred to
2155 //                                 code generation time.
2156 //                      This analysis drills into aggr se specs.
2157 //                      Shouldn't this be done at the aggregate table?
2158 //                      But, its not a major loss of efficiency.
2159 //                              UPDATE : drilling into aggr se's is causnig a problem
2160 //                                      so I've eliminated it.
2161
2162 bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2163                                                                 cplx_lit_table *complex_literals){
2164         literal_t *l;
2165         vector<scalarexp_t *> operands;
2166         int o;
2167         scalarexp_t *param_se;
2168         data_type *dt;
2169
2170         switch(se->get_operator_type()){
2171         case SE_LITERAL:
2172                 l = se->get_literal();
2173                 if(l->constructor_name() != ""){
2174                         int cl_idx = complex_literals->add_cpx_lit(l, false);
2175                         l->set_cpx_lit_ref(cl_idx);
2176                 }
2177                 return(true);
2178         case SE_PARAM:
2179                 return(true );
2180 //                      SE_IFACE_PARAM should not exist when this is called.
2181         case SE_UNARY_OP:
2182                 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2183         case SE_BINARY_OP:
2184                 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) &&
2185                         find_complex_literal_se(se->get_right_se(), Ext_fcns, complex_literals ) );
2186         case SE_COLREF:
2187                 return(true);
2188         case SE_AGGR_STAR:
2189                 return(true);
2190         case SE_AGGR_SE:
2191                 return true;
2192 //              return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2193         case SE_FUNC:
2194                 if(se->get_aggr_ref() >= 0) return true;
2195
2196                 operands = se->get_operands();
2197                 for(o=0;o<operands.size();o++){
2198                         find_complex_literal_se(operands[o], Ext_fcns, complex_literals);
2199                 }
2200                 return(true);
2201         default:
2202                 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_se, line %d, character %d: unknown operator type %d\n",
2203                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2204                 exit(1);
2205         }
2206         return(false);
2207 }
2208
2209
2210
2211
2212 void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2213                                                                 cplx_lit_table *complex_literals){
2214         int i,o;
2215         vector<literal_t *> litl;
2216         vector<scalarexp_t *> op_list;
2217
2218
2219         switch(pr->get_operator_type()){
2220         case PRED_IN:
2221                 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2222                 litl = pr->get_lit_vec();
2223                 for(i=0;i<litl.size();i++){
2224                         if(litl[i]->constructor_name() != ""){
2225                                 int cl_idx = complex_literals->add_cpx_lit(litl[i],false);
2226                                 litl[i]->set_cpx_lit_ref(cl_idx);
2227                         }
2228                 }
2229                 return;
2230         case PRED_COMPARE:
2231                 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2232                 find_complex_literal_se(pr->get_right_se(), Ext_fcns, complex_literals) ;
2233                 return;
2234         case PRED_UNARY_OP:
2235                 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals);
2236                 return;
2237         case PRED_BINARY_OP:
2238                 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals) ;
2239                 find_complex_literal_pr(pr->get_right_pr(), Ext_fcns, complex_literals) ;
2240                 return;
2241         case PRED_FUNC:
2242                 op_list = pr->get_op_list();
2243                 for(o=0;o<op_list.size();++o){
2244                         find_complex_literal_se(op_list[o],Ext_fcns, complex_literals);
2245                 }
2246                 return;
2247         default:
2248                 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_pr, line %d, character %d, unknown predicate operator type %d\n",
2249                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2250                 exit(1);
2251         }
2252
2253         return;
2254 }
2255
2256
2257 //              Find all things which are passed as handle parameters to functions
2258 //              (query parameters, (simple) literals, complex literals)
2259 //              These expressions MUST be processed with find_complex_literal_??
2260 //              first.
2261 //                      TODO: this analysis drills into the aggregate SEs.
2262 //                      Shouldn't this be done on the aggr table SEs instead?
2263 //                      to avoid duplication.  THe handle registration
2264 //                      might be expensive ...
2265 //                      REVISED : drilling into aggr se's is causing problems, eliminated.
2266
2267 void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2268                                                 vector<handle_param_tbl_entry *> &handle_tbl){
2269         vector<scalarexp_t *> operands;
2270         vector<bool> handle_ind;
2271         int o;
2272         scalarexp_t *param_se;
2273         data_type *dt;
2274         literal_t *l;
2275
2276         switch(se->get_operator_type()){
2277         case SE_LITERAL:
2278                 return;
2279         case SE_PARAM:
2280                 return;
2281 //              case SE_IFACE_PARAM:            SHOULD NOT EXIST when this is called
2282         case SE_UNARY_OP:
2283                 find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2284                 return;
2285         case SE_BINARY_OP:
2286                 find_param_handles_se(se->get_left_se(), Ext_fcns , handle_tbl) ;
2287                 find_param_handles_se(se->get_right_se(), Ext_fcns, handle_tbl ) ;
2288                 return;
2289         case SE_COLREF:
2290                 return;
2291         case SE_AGGR_STAR:
2292                 return;
2293         case SE_AGGR_SE:
2294 //              find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2295                 return;
2296         case SE_FUNC:
2297                 if(se->get_aggr_ref() >= 0) return ;
2298
2299                 operands = se->get_operands();
2300                 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
2301                 for(o=0;o<operands.size();o++){
2302                         if(handle_ind[o]){
2303                                 handle_param_tbl_entry *he;
2304                                 param_se = operands[o];
2305                                 if(param_se->get_operator_type() != SE_LITERAL &&
2306                                                 param_se->get_operator_type() != SE_PARAM){
2307                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
2308                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
2309                                         exit(1);
2310                                 }
2311
2312                                 if(param_se->get_operator_type() == SE_PARAM){
2313                                         he = new handle_param_tbl_entry(
2314                                                 se->get_op(), o, param_se->get_param_name(),
2315                                                 param_se->get_data_type()->get_type_str());
2316                                 }else{
2317                                         l = param_se->get_literal();
2318                                         if(l->is_cpx_lit()){
2319                                                 he = new handle_param_tbl_entry(
2320                                                         se->get_op(), o, l->get_cpx_lit_ref(),
2321                                                 param_se->get_data_type()->get_type_str());
2322                                         }else{
2323                                                 he = new handle_param_tbl_entry(
2324                                                         se->get_op(), o, l,
2325                                                 param_se->get_data_type()->get_type_str());
2326                                         }
2327                                 }
2328                                 param_se->set_handle_ref(handle_tbl.size());
2329                                 handle_tbl.push_back(he);
2330                         }else{
2331                                 find_param_handles_se(operands[o], Ext_fcns, handle_tbl ) ;
2332                         }
2333                 }
2334                 return;
2335         default:
2336                 fprintf(stderr,"INTERNAL ERROR in find_param_handles, line %d, character %d: unknown operator type %d\n",
2337                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2338                 exit(1);
2339         }
2340         return;
2341 }
2342
2343
2344 void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2345                                                 vector<handle_param_tbl_entry *> &handle_tbl){
2346         vector<literal_t *> litl;
2347         vector<scalarexp_t *> op_list;
2348         scalarexp_t *param_se;
2349         vector<bool> handle_ind;
2350         int o;
2351         literal_t *l;
2352
2353         switch(pr->get_operator_type()){
2354         case PRED_IN:
2355                 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2356                 return;
2357         case PRED_COMPARE:
2358                 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2359                 find_param_handles_se(pr->get_right_se(), Ext_fcns, handle_tbl) ;
2360                 return;
2361         case PRED_UNARY_OP:
2362                 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl);
2363                 return;
2364         case PRED_BINARY_OP:
2365                 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl) ;
2366                 find_param_handles_pr(pr->get_right_pr(), Ext_fcns, handle_tbl) ;
2367                 return;
2368         case PRED_FUNC:
2369                 op_list = pr->get_op_list();
2370                 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
2371                 for(o=0;o<op_list.size();++o){
2372                         if(handle_ind[o]){
2373                                 handle_param_tbl_entry *he;
2374                                 param_se = op_list[o];
2375                                 if(param_se->get_operator_type() != SE_LITERAL &&
2376                                                 param_se->get_operator_type() != SE_PARAM){
2377                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
2378                                 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
2379                                         exit(1);
2380                                 }
2381
2382                                 if(param_se->get_operator_type() == SE_PARAM){
2383                                         he = new handle_param_tbl_entry(
2384                                                 pr->get_op(), o, param_se->get_param_name(),
2385                                                 param_se->get_data_type()->get_type_str());
2386                                 }else{
2387                                         l = param_se->get_literal();
2388                                         if(l->is_cpx_lit()){
2389                                                 he = new handle_param_tbl_entry(
2390                                                         pr->get_op(), o, l->get_cpx_lit_ref(),
2391                                                 param_se->get_data_type()->get_type_str());
2392                                         }else{
2393                                                 he = new handle_param_tbl_entry(
2394                                                         pr->get_op(), o, l,
2395                                                 param_se->get_data_type()->get_type_str());
2396                                         }
2397                                 }
2398                                 param_se->set_handle_ref(handle_tbl.size());
2399                                 handle_tbl.push_back(he);
2400                         }else{
2401                                 find_param_handles_se(op_list[o], Ext_fcns, handle_tbl ) ;
2402                         }
2403                 }
2404                 return;
2405         default:
2406                 fprintf(stderr,"INTERNAL ERROR in find_param_handles_pr, line %d, character %d, unknown predicate operator type %d\n",
2407                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2408                 exit(1);
2409         }
2410
2411         return;
2412 }
2413
2414
2415 //                      Verify the HAVING predicate : it
2416 //                      can access gb vars, aggregates, and constants,
2417 //                      but not colrefs.
2418 //                      return 1 if OK, -1 if bad.
2419 //                      Perhaps replace by a pair of fcns which counts non-gb colrefs?
2420
2421 //                      Extended to deal with cleaning_by, cleaning_when :
2422 //                      verify that any aggregate function
2423 //                      has the multiple output property.
2424
2425 int verify_having_se(scalarexp_t *se, const char *clause, ext_fcn_list *Ext_fcns){
2426         int l_ret, r_ret;
2427         vector<scalarexp_t *> operands;
2428         vector<data_type *> odt;
2429         int o;
2430
2431         switch(se->get_operator_type()){
2432         case SE_LITERAL:
2433                 return(1);
2434         case SE_PARAM:
2435         case SE_IFACE_PARAM:
2436                 return(1);
2437         case SE_UNARY_OP:
2438                 return(verify_having_se(se->get_left_se(), clause, Ext_fcns) );
2439         case SE_BINARY_OP:
2440                 l_ret = verify_having_se(se->get_left_se(), clause, Ext_fcns);
2441                 r_ret = verify_having_se(se->get_right_se(), clause, Ext_fcns);
2442                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
2443                 return(1);
2444         case SE_COLREF:
2445                 if(se->is_gb()) return 1;
2446                 fprintf(stderr,"ERROR, %s clause references a non-group by attribute line =%d, char = %d, colref=%s\n", clause,
2447                         se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
2448                 return(-1);
2449         case SE_AGGR_STAR:
2450         case SE_AGGR_SE:
2451 //                      colrefs and gbrefs allowed.
2452 //                      check for nested aggregation elsewhere, so just return TRUE
2453                 if(!se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2454                         fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2455                                 se->get_lineno(),se->get_charno(), se->get_op().c_str() );
2456                         return(-1);
2457                 }
2458
2459 //                              Ensure that aggregate refs allow multiple outputs
2460 //                              in CLEANING_WHEN, CLEANING_BY
2461                 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2462                         if(! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2463                                 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2464                                   se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2465                                 return(-1);
2466                         }
2467                 }
2468
2469
2470                 return(1);
2471         case SE_FUNC:
2472                 if(se->get_aggr_ref() >= 0 && !se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2473                         fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2474                         se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_op().c_str() );
2475                 return(-1);
2476                 }
2477
2478                 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2479                         if(se->get_aggr_ref() >= 0  && ! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2480                                 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2481                                   se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2482                                 return(-1);
2483                         }
2484                 }
2485
2486                 if(se->get_aggr_ref() >= 0)     // don't descent into aggregates.
2487                         return 1;
2488
2489                 operands = se->get_operands();
2490                 r_ret = 1;
2491                 for(o=0;o<operands.size();o++){
2492                         l_ret = verify_having_se(operands[o], clause, Ext_fcns);
2493                         if(l_ret < 0) r_ret = -1;
2494                 }
2495                 if(r_ret < 0) return(-1); else return(1);
2496                 return(1);
2497         default:
2498                 fprintf(stderr,"INTERNAL ERROR in verify_having_se, line %d, character %d: unknown operator type %d\n",
2499                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2500                 return(-1);
2501         }
2502         return(-1);
2503 }
2504
2505
2506 //                      Verify the HAVING predicate : it
2507 //                      can access gb vars, aggregates, and constants,
2508 //                      but not colrefs.
2509 //                      return 1 if OK, -1 if bad.
2510 //                      Perhaps replace by a pair of fcns which counts non-gb colrefs?
2511
2512
2513 int verify_having_pred(predicate_t *pr, const char *clause, ext_fcn_list *Ext_fcns){
2514         int l_ret, r_ret;
2515         vector<literal_t *> litl;
2516         vector<scalarexp_t *> op_list;
2517         int o;
2518
2519         switch(pr->get_operator_type()){
2520         case PRED_IN:
2521                 return(verify_having_se(pr->get_left_se(), clause, Ext_fcns));
2522         case PRED_COMPARE:
2523                 l_ret = verify_having_se(pr->get_left_se(), clause, Ext_fcns) ;
2524                 r_ret = verify_having_se(pr->get_right_se(), clause, Ext_fcns) ;
2525                 if( (l_ret < 0) || (r_ret < 0) ) return(-1); else return(1);
2526         case PRED_UNARY_OP:
2527                 return(verify_having_pred(pr->get_left_pr(), clause, Ext_fcns));
2528         case PRED_BINARY_OP:
2529                 l_ret = verify_having_pred(pr->get_left_pr(), clause, Ext_fcns);
2530                 r_ret = verify_having_pred(pr->get_right_pr(), clause, Ext_fcns);
2531                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
2532                 return(1);
2533         case PRED_FUNC:
2534                 op_list = pr->get_op_list();
2535                 l_ret = 1;
2536                 for(o=0;o<op_list.size();++o){
2537                         if( verify_having_se(op_list[o], clause, Ext_fcns) < 0) l_ret = -1;
2538                 }
2539                 return(l_ret);
2540
2541         default:
2542                 fprintf(stderr,"INTERNAL ERROR in verify_having_pred, line %d, character %d, unknown predicate operator type %d\n",
2543                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2544         }
2545
2546         return(-1);
2547 }
2548
2549
2550 //////////////////////////////////////////////////////////////////////////
2551 //////////////////////////////////////////////////////////////////////////
2552 ///////                 cnf and pred analysis and manipulation
2553
2554 // ----------------------------------------------------------------------
2555 //  Convert the predicates to a list of conjuncts
2556 //  (not actually cnf).  Do some analysis
2557 //  on their properties.
2558 // ----------------------------------------------------------------------
2559
2560
2561 //  Put into list clist the predicates that
2562 //  are AND'ed together.
2563
2564 void make_cnf_from_pr(predicate_t *pr, vector<cnf_elem *> &clist){
2565
2566   if(pr == NULL) return;
2567
2568   switch(pr->get_operator_type()){
2569      case PRED_COMPARE:
2570         clist.push_back(new cnf_elem(pr));
2571         return;
2572         break;
2573      case PRED_IN:
2574         clist.push_back(new cnf_elem(pr));
2575         return;
2576         break;
2577      case PRED_UNARY_OP:
2578         clist.push_back(new cnf_elem(pr));
2579         return;
2580         break;
2581      case PRED_BINARY_OP:
2582         if(pr->get_op() == "OR"){
2583                         clist.push_back(new cnf_elem(pr));
2584                         return;
2585                 }
2586                 if(pr->get_op() =="AND"){
2587                    make_cnf_from_pr(pr->get_left_pr(),clist);
2588                    make_cnf_from_pr(pr->get_right_pr(),clist);
2589                    return;
2590                 }
2591         case PRED_FUNC:
2592         clist.push_back(new cnf_elem(pr));
2593         return;
2594         break;
2595         default:
2596                 fprintf(stderr,"INTERNAL ERROR in make_cnf_from_pr: I don't recognize predicate operator %s\n",pr->get_op().c_str());
2597                 exit(1);
2598                         break;
2599            }
2600 }
2601
2602
2603
2604 //  Find out what things are referenced in a se,
2605 //  to use for analyzing a predicate.
2606 //  Currently, is it simple (no operators), does it
2607 //  reference a group-by column, does it reference an
2608 //  attribute of a table.
2609 //
2610 //      analyze_cnf_se and analyze_cnf_pr are called by analyze_cnf
2611
2612
2613 void analyze_cnf_se(scalarexp_t *se, int &s, int &g, int &a, int &agr){
2614  int p;
2615  vector<scalarexp_t *> operand_list;
2616
2617         switch(se->get_operator_type()){
2618         case SE_LITERAL:
2619         case SE_PARAM:
2620         case SE_IFACE_PARAM:
2621                 return;
2622         case SE_COLREF:
2623                 if(se->is_gb() ) g=1;
2624                 else                    a=1;
2625                 return;
2626         case SE_UNARY_OP:
2627                 s=0;
2628                 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2629                 return;
2630         case SE_BINARY_OP:
2631                 s=0;
2632                 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2633                 analyze_cnf_se(se->get_right_se(),s,g,a,agr);
2634                 return;
2635         case SE_AGGR_STAR:
2636         case SE_AGGR_SE:
2637                 agr = 1;
2638                 return;
2639         case SE_FUNC:
2640                 if(se->get_aggr_ref() >= 0){
2641                         agr = 1;
2642                         return;
2643                 }
2644                 s = 0;
2645                 operand_list = se->get_operands();
2646                 for(p=0;p<operand_list.size();p++){
2647                         analyze_cnf_se(operand_list[p],s,g,a,agr);
2648                 }
2649         break;
2650         }
2651
2652         return;
2653 }
2654
2655
2656
2657 void analyze_cnf_pr(predicate_t *pr, int &g, int &a,  int &agr){
2658 int dum_simple, o;
2659 vector<scalarexp_t *> op_list;
2660
2661
2662         switch(pr->get_operator_type()){
2663         case PRED_COMPARE:
2664                 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2665                 analyze_cnf_se(pr->get_right_se(),dum_simple,g,a,agr);
2666                 return;
2667         case PRED_IN:
2668                 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2669                 return;
2670         case PRED_UNARY_OP:
2671                 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2672                 return;
2673         case PRED_BINARY_OP:
2674                 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2675                 analyze_cnf_pr(pr->get_right_pr(),g,a,agr);
2676                 return;
2677         case PRED_FUNC:
2678                 op_list = pr->get_op_list();
2679                 for(o=0;o<op_list.size();++o){
2680                         analyze_cnf_se(op_list[o],dum_simple,g,a,agr);
2681                 }
2682                 return;
2683         default:
2684                 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2685                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2686                 exit(1);
2687         }
2688 }
2689
2690
2691
2692 //  analyze a conjunct of a predicate.
2693 //  Is it atomic (e.g., a single predicate),
2694 //  and if so do a further analysis.
2695
2696 void analyze_cnf(cnf_elem *c){
2697
2698 //  analyze the predicate.
2699    analyze_cnf_pr(c->pr, c->pr_gb, c->pr_attr, c->pr_aggr);
2700
2701    if((c->pr->get_operator_type()!= PRED_COMPARE) && (c->pr->get_operator_type()!= PRED_IN)){
2702                 return;
2703    }
2704
2705
2706 //  its an atomic predicate -- get more info
2707    c->is_atom = 1;
2708
2709         if(c->pr->get_op() == "=")
2710                 c->eq_pred = 1;
2711         else
2712                 c->eq_pred = 0;
2713
2714         if(c->pr->get_operator_type() == PRED_IN)
2715                 c->in_pred = 1;
2716         else
2717                 c->in_pred = 0;
2718
2719         c->l_simple = 1; c->l_gb = c->l_attr = c->l_aggr = 0;
2720         analyze_cnf_se(c->pr->get_left_se(),c->l_simple,c->l_gb,c->l_attr, c->l_aggr);
2721
2722         if(c->pr->get_operator_type() == PRED_COMPARE){
2723                 c->r_simple = 1; c->r_gb = c->r_attr = c->r_aggr = 0;
2724                 analyze_cnf_se(c->pr->get_left_se(),c->r_simple,c->r_gb,c->r_attr, c->r_aggr);
2725         }
2726 }
2727
2728 void analyze_constraint_se(scalarexp_t *se,
2729                         int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op, ext_fcn_list *Ext_fcns, bool enter_gb){
2730  int l_agr, l_gb, l_par, l_func, l_op;
2731  int r_agr, r_gb, r_par, r_func, r_op;
2732  int p;
2733  vector<scalarexp_t *> operand_list;
2734
2735         switch(se->get_operator_type()){
2736         case SE_LITERAL:
2737         case SE_IFACE_PARAM:
2738                 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2739                 return;
2740         case SE_PARAM:
2741                 n_agr=0; n_gb = 0; n_par = 1; n_func = 0; n_op = 0;
2742                 return;
2743         case SE_COLREF:
2744                 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2745                 if(se->is_gb() ){
2746                         if(enter_gb){
2747                                 analyze_constraint_se(se->get_right_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2748                         }else{
2749                                 n_gb=1;
2750                         }
2751                 }
2752                 return;
2753         case SE_UNARY_OP:
2754                 analyze_constraint_se(se->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2755                 n_op++;
2756                 return;
2757         case SE_BINARY_OP:
2758                 analyze_constraint_se(se->get_left_se(),l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2759                 analyze_constraint_se(se->get_right_se(),r_agr,r_gb,r_par, r_func,r_op,Ext_fcns,enter_gb);
2760                 n_agr=l_agr+r_agr;
2761                 n_gb=l_gb+r_gb;
2762                 n_par=l_par+r_par;
2763                 n_func=l_func+r_func;
2764                 n_op=l_op+r_op+1;
2765                 return;
2766         case SE_AGGR_STAR:
2767         case SE_AGGR_SE:
2768                 n_agr=1; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2769                 return;
2770         case SE_FUNC:
2771                 if(se->get_aggr_ref() >= 0){
2772                         n_agr=1; n_gb = 0; n_par = 0; n_op = 0;
2773                         if(Ext_fcns)
2774                                 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2775                         else
2776                                 n_func = 1;
2777                         return;
2778                 }
2779                 n_agr=0; n_gb = 0; n_par = 0;  n_op = 0;
2780                 if(Ext_fcns)
2781                         n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2782                 else
2783                         n_func = 1;
2784                 operand_list = se->get_operands();
2785                 for(p=0;p<operand_list.size();p++){
2786                         analyze_constraint_se(operand_list[p],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2787                         n_agr+=l_agr;
2788                         n_gb+=l_gb;
2789                         n_par+=l_par;
2790                         n_func+=l_func;
2791                         n_op += l_op;
2792                 }
2793         break;
2794         }
2795
2796         return;
2797 }
2798
2799 //              Estimate the cost of a constraint.
2800 //              WARNING a lot of cost assumptions are embedded in the code.
2801 void analyze_constraint_pr(predicate_t *pr,
2802                 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op,
2803                 int &n_cmp_s, int &n_cmp_c, int &n_in, int &n_pred, int &n_bool, ext_fcn_list *Ext_fcns, bool enter_gb){
2804  int l_agr, l_gb, l_par, l_func, l_op, l_cmp_s, l_cmp_c, l_in, l_pred,l_bool;
2805  int r_agr, r_gb, r_par, r_func, r_op, r_cmp_s, r_cmp_c, r_in, r_pred,r_bool;
2806
2807 int o;
2808 vector<scalarexp_t *> op_list;
2809
2810
2811         switch(pr->get_operator_type()){
2812         case PRED_COMPARE:
2813                 analyze_constraint_se(pr->get_left_se(),l_agr,l_gb,l_par,l_func, l_op,Ext_fcns,enter_gb);
2814                 analyze_constraint_se(pr->get_right_se(),r_agr,r_gb,r_par,r_func,r_op,Ext_fcns,enter_gb);
2815                 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2816                 n_func=l_func+r_func; n_op=l_op+r_op;
2817                 if(pr->get_left_se()->get_data_type()->complex_comparison(
2818                         pr->get_right_se()->get_data_type())
2819             ){
2820                         n_cmp_s = 0; n_cmp_c=1;
2821                 }else{
2822                         n_cmp_s = 1; n_cmp_c=0;
2823                 }
2824                 n_in = 0; n_pred = 0; n_bool = 0;
2825                 return;
2826         case PRED_IN:
2827 //                      Tread IN predicate as sequence of comparisons
2828                 analyze_constraint_se(pr->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2829                 if(pr->get_left_se()->get_data_type()->complex_comparison(
2830                         pr->get_right_se()->get_data_type())
2831             ){
2832                         n_cmp_s = 0; n_cmp_c=pr->get_lit_vec().size();
2833                 }else{
2834                         n_cmp_s = pr->get_lit_vec().size(); n_cmp_c=0;
2835                 }
2836                 n_in = 0; n_pred = 0; n_bool = 0;
2837                 return;
2838         case PRED_UNARY_OP:
2839                 analyze_constraint_pr(pr->get_left_pr(),n_agr,n_gb,n_par,n_func,n_op,n_cmp_s,n_cmp_c,n_in,n_pred,n_bool,Ext_fcns,enter_gb);
2840                 n_bool++;
2841                 return;
2842         case PRED_BINARY_OP:
2843                 analyze_constraint_pr(pr->get_left_pr(),l_agr,l_gb,l_par,l_func,l_op,l_cmp_s,l_cmp_c,l_in,l_pred,l_bool,Ext_fcns,enter_gb);
2844                 analyze_constraint_pr(pr->get_right_pr(),r_agr,r_gb,r_par,r_func,r_op,r_cmp_s,r_cmp_c,r_in,r_pred,r_bool,Ext_fcns,enter_gb);
2845                 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2846                 n_func=l_func+r_func; n_op=l_op+r_op;
2847                 n_cmp_s=l_cmp_s+r_cmp_s; n_cmp_c=l_cmp_c+r_cmp_c;
2848                 n_in=l_in+r_in; n_pred=l_pred+r_pred; n_bool=l_bool+r_bool+1;
2849                 return;
2850         case PRED_FUNC:
2851                 n_agr=n_gb=n_par=n_func=n_op=n_cmp_s=n_cmp_c=n_in=n_bool=0;
2852                 if(Ext_fcns)
2853                         n_pred = Ext_fcns->estimate_fcn_cost(pr->get_fcn_id());
2854                 else
2855                         n_pred = 1;
2856                 op_list = pr->get_op_list();
2857                 for(o=0;o<op_list.size();++o){
2858                         analyze_constraint_se(op_list[o],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2859                         n_agr+=l_agr; n_gb+=l_gb; n_par+=l_par; n_func+=l_func; n_op+=l_op;
2860                 }
2861                 return;
2862         default:
2863                 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2864                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2865                 exit(1);
2866         }
2867 }
2868
2869 void compute_cnf_cost(cnf_elem *c, ext_fcn_list *Ext_fcns){
2870  int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2871         analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2872                                                 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,false);
2873
2874 //printf("nfunc=%d n_pred=%d, n_cmp_c=%d, n_op=%d, n_cmp_s=%d,n_bool=%d\n", n_func, n_pred, n_cmp_c, n_op, n_cmp_s, n_bool);
2875         c->cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2876 }
2877
2878 bool prefilter_compatible(cnf_elem *c, ext_fcn_list *Ext_fcns){
2879  int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2880         analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2881                                                 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,true);
2882 //printf("prefilter_compatible, n_par=%d, n_gb=%d, n_agr=%d, n_func=%d, n_pred=%d, n_comp_c=%d, n_cmp_s=%d, n_bool=%d\n",n_gb,n_par,n_agr,n_func,n_pred,n_cmp_c,n_cmp_s,n_bool);
2883         if(n_par || n_agr)
2884                 return false;
2885         int cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2886 //printf("cost=%d\n",cost);
2887         return cost<10;
2888 }
2889
2890 //              The prefilter needs to translate constraints on
2891 //              gbvars into constraints involving their underlying SEs.
2892 //              The following two routines attach GB def info.
2893
2894 void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl){
2895  int p;
2896  vector<scalarexp_t *> operand_list;
2897
2898         switch(se->get_operator_type()){
2899         case SE_LITERAL:
2900         case SE_IFACE_PARAM:
2901         case SE_PARAM:
2902         case SE_AGGR_STAR:
2903                 return;
2904         case SE_COLREF:
2905                 if(se->is_gb() ){
2906                          se->rhs.scalarp = gtbl->get_def(se->get_gb_ref());
2907                 }
2908                 return;
2909         case SE_UNARY_OP:
2910                 insert_gb_def_se(se->get_left_se(),gtbl);
2911                 return;
2912         case SE_BINARY_OP:
2913                 insert_gb_def_se(se->get_left_se(),gtbl);
2914                 insert_gb_def_se(se->get_right_se(),gtbl);
2915                 return;
2916         case SE_AGGR_SE:
2917                 insert_gb_def_se(se->get_left_se(),gtbl);
2918                 return;
2919         case SE_FUNC:
2920                 operand_list = se->get_operands();
2921                 for(p=0;p<operand_list.size();p++){
2922                         insert_gb_def_se(operand_list[p],gtbl);
2923                 }
2924         break;
2925         }
2926
2927         return;
2928 }
2929 void insert_gb_def_pr(predicate_t *pr, gb_table *gtbl){
2930 vector<scalarexp_t *> op_list;
2931 int o;
2932
2933         switch(pr->get_operator_type()){
2934         case PRED_COMPARE:
2935                 insert_gb_def_se(pr->get_left_se(),gtbl);
2936                 insert_gb_def_se(pr->get_right_se(),gtbl);
2937                 return;
2938         case PRED_IN:
2939                 insert_gb_def_se(pr->get_left_se(),gtbl);
2940                 return;
2941         case PRED_UNARY_OP:
2942                 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2943                 return;
2944         case PRED_BINARY_OP:
2945                 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2946                 insert_gb_def_pr(pr->get_right_pr(),gtbl);
2947                 return;
2948         case PRED_FUNC:
2949                 op_list = pr->get_op_list();
2950                 for(o=0;o<op_list.size();++o){
2951                         insert_gb_def_se(op_list[o],gtbl);
2952                 }
2953                 return;
2954         default:
2955                 fprintf(stderr,"INTERNAL ERROR in insert_gb_def_pr, line %d, character %d, unknown predicate operator type %d\n",
2956                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2957                 exit(1);
2958         }
2959 }
2960
2961 //              Substitute gbrefs with their definitions
2962 void subs_gbrefs_se(scalarexp_t *se, table_list *Schema){
2963  int p;
2964  vector<scalarexp_t *> operand_list;
2965  scalarexp_t *lse,*rse;
2966  colref_t *cr;
2967  string b_tbl;
2968  int b_idx;
2969
2970         switch(se->get_operator_type()){
2971         case SE_LITERAL:
2972         case SE_IFACE_PARAM:
2973         case SE_PARAM:
2974         case SE_AGGR_STAR:
2975                 return;
2976         case SE_COLREF:
2977                 cr = se->get_colref();
2978                 b_tbl = Schema->get_basetbl_name(cr->schema_ref,cr->field);
2979                 b_idx = Schema->get_table_ref(b_tbl);
2980                 cr->tablevar_ref = b_idx;
2981                 return;
2982         case SE_UNARY_OP:
2983                 lse=se->get_left_se();
2984                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2985                         se->lhs.scalarp = lse->get_right_se();
2986                         subs_gbrefs_se(se,Schema);
2987                         return;
2988                 }
2989                 subs_gbrefs_se(se->get_left_se(),Schema);
2990                 return;
2991         case SE_BINARY_OP:
2992                 lse=se->get_left_se();
2993                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2994                         se->lhs.scalarp = lse->get_right_se();
2995                         subs_gbrefs_se(se,Schema);
2996                         return;
2997                 }
2998                 rse=se->get_right_se();
2999                 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3000                         se->rhs.scalarp = rse->get_right_se();
3001                         subs_gbrefs_se(se,Schema);
3002                         return;
3003                 }
3004                 subs_gbrefs_se(se->get_left_se(),Schema);
3005                 subs_gbrefs_se(se->get_right_se(),Schema);
3006                 return;
3007         case SE_AGGR_SE:
3008                 lse=se->get_left_se();
3009                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3010                         se->lhs.scalarp = lse->get_right_se();
3011                         subs_gbrefs_se(se,Schema);
3012                         return;
3013                 }
3014                 subs_gbrefs_se(se->get_left_se(),Schema);
3015                 return;
3016         case SE_FUNC:
3017                 operand_list = se->get_operands();
3018                 for(p=0;p<operand_list.size();p++){
3019                         lse=operand_list[p];
3020                         if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3021                                 se->param_list[p] = lse->get_right_se();
3022                                 subs_gbrefs_se(se,Schema);
3023                                 return;
3024                         }
3025                 }
3026                 for(p=0;p<operand_list.size();p++){
3027                         subs_gbrefs_se(operand_list[p],Schema);
3028                 }
3029         break;
3030         }
3031
3032         return;
3033 }
3034
3035 void subs_gbrefs_pr(predicate_t *pr, table_list *Schema){
3036 vector<scalarexp_t *> op_list;
3037 int o;
3038 scalarexp_t *lse,*rse;
3039
3040         switch(pr->get_operator_type()){
3041         case PRED_COMPARE:
3042                 lse=pr->get_left_se();
3043                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3044                         pr->lhs.sexp = lse->get_right_se();
3045                         subs_gbrefs_pr(pr,Schema);
3046                         return;
3047                 }
3048                 rse=pr->get_right_se();
3049                 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3050                         pr->rhs.sexp = rse->get_right_se();
3051                         subs_gbrefs_pr(pr,Schema);
3052                         return;
3053                 }
3054                 subs_gbrefs_se(pr->get_left_se(),Schema);
3055                 subs_gbrefs_se(pr->get_right_se(),Schema);
3056                 return;
3057         case PRED_IN:
3058                 lse=pr->get_left_se();
3059                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3060                         pr->lhs.sexp = lse->get_right_se();
3061                         subs_gbrefs_pr(pr,Schema);
3062                         return;
3063                 }
3064                 subs_gbrefs_se(pr->get_left_se(),Schema);
3065                 return;
3066         case PRED_UNARY_OP:
3067                 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3068                 return;
3069         case PRED_BINARY_OP:
3070                 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3071                 subs_gbrefs_pr(pr->get_right_pr(),Schema);
3072                 return;
3073         case PRED_FUNC:
3074                 op_list = pr->get_op_list();
3075                 for(o=0;o<op_list.size();++o){
3076                         lse=op_list[o];
3077                         if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3078                                 pr->param_list[o] = lse->get_right_se();
3079                                 subs_gbrefs_pr(pr,Schema);
3080                                 return;
3081                         }
3082                         subs_gbrefs_se(op_list[o],Schema);
3083                 }
3084                 return;
3085         default:
3086                 fprintf(stderr,"INTERNAL ERROR in subs_gbrefs_pr, line %d, character %d, unknown predicate operator type %d\n",
3087                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3088                 exit(1);
3089         }
3090 }
3091
3092
3093 //              Search for references to "expensive" fields.
3094 int expensive_refs_se(scalarexp_t *se, table_list *Schema){
3095  int p;
3096  vector<scalarexp_t *> operand_list;
3097  int cnt=0;
3098 table_def *td;
3099 param_list *plist;
3100
3101         switch(se->get_operator_type()){
3102         case SE_LITERAL:
3103         case SE_IFACE_PARAM:
3104         case SE_PARAM:
3105         case SE_AGGR_STAR:
3106         case SE_AGGR_SE:
3107                 return 0;
3108         case SE_COLREF:
3109                 if(se->is_gb())
3110                         return expensive_refs_se(se->rhs.scalarp,Schema);
3111                 td = Schema->get_table(se->lhs.colref->schema_ref);
3112                 plist = td->get_modifier_list(se->lhs.colref->field);
3113                 if(plist->contains_key("expensive"))
3114                         return 1;
3115                 return 0;
3116         case SE_UNARY_OP:
3117                 return expensive_refs_se(se->get_left_se(),Schema);
3118         case SE_BINARY_OP:
3119                 cnt += expensive_refs_se(se->get_left_se(),Schema);
3120                 cnt += expensive_refs_se(se->get_right_se(),Schema);
3121                 return cnt;
3122         case SE_FUNC:
3123                 operand_list = se->get_operands();
3124                 for(p=0;p<operand_list.size();p++){
3125                         cnt += expensive_refs_se(operand_list[p],Schema);
3126                 }
3127                 return cnt;
3128         break;
3129         }
3130
3131         return 0;
3132 }
3133
3134 int expensive_refs_pr(predicate_t *pr, table_list *Schema){
3135 vector<scalarexp_t *> op_list;
3136 int o;
3137 int cnt=0;
3138
3139         switch(pr->get_operator_type()){
3140         case PRED_COMPARE:
3141                 cnt += expensive_refs_se(pr->get_left_se(),Schema);
3142                 cnt += expensive_refs_se(pr->get_right_se(),Schema);
3143                 return cnt;
3144         case PRED_IN:
3145                 return expensive_refs_se(pr->get_left_se(),Schema);
3146         case PRED_UNARY_OP:
3147                 return expensive_refs_pr(pr->get_left_pr(),Schema);
3148         case PRED_BINARY_OP:
3149                 cnt += expensive_refs_pr(pr->get_left_pr(),Schema);
3150                 cnt += expensive_refs_pr(pr->get_right_pr(),Schema);
3151                 return cnt;
3152         case PRED_FUNC:
3153                 op_list = pr->get_op_list();
3154                 for(o=0;o<op_list.size();++o){
3155                         cnt += expensive_refs_se(op_list[o],Schema);
3156                 }
3157                 return cnt;
3158         default:
3159                 fprintf(stderr,"INTERNAL ERROR in expensive_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3160                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3161                 exit(1);
3162         }
3163 }
3164
3165
3166 //              TODO: allow "cheap" functions and predicates.
3167 bool simple_field_constraint(cnf_elem *c){
3168         vector<literal_t *> ll;
3169         int l;
3170         predicate_t *p = c->pr;
3171  int l_agr, l_gb, l_par, l_func, l_op;
3172  int r_agr, r_gb, r_par, r_func, r_op;
3173  col_id_set left_colids, right_colids;
3174
3175 //                      Verify that it is a simple atom
3176         switch(p->get_operator_type()){
3177         case PRED_COMPARE:
3178 //                              Must be an equality predicate which references
3179 //                              which referecnes no aggregates, parameters, functions, or
3180 //                              group-by variables, and should be a constraint of
3181 //                              a single colref.
3182 //                              AND should not require a complex comparison.
3183                 if(p->get_op() != "=") return(false);
3184                 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3185                 analyze_constraint_se(p->get_right_se(),r_agr, r_gb, r_par, r_func,l_op,NULL,false);
3186                 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ||
3187                    r_agr>0 || r_gb>0 || r_par>0 || r_func>0 ) return(false);
3188 //                              I will count on there being no gbvars in the constraint.
3189 //                              TODO: allow gbvars which are colrefs.
3190                 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3191                 gather_se_col_ids(p->get_right_se(), right_colids, NULL);
3192                 if(left_colids.size()+right_colids.size() != 1) return(false);
3193
3194
3195 //                      Normalize : the colref should be on the lhs.
3196                 if(right_colids.size() > 0){
3197                         p->swap_scalar_operands();
3198                 }
3199
3200 //                      Disallow complex (and therefore expensive) comparisons.
3201                 if(p->get_left_se()->get_data_type()->complex_comparison(
3202                         p->get_right_se()->get_data_type() ) )
3203                                 return(false);
3204
3205 //                      passed all the tests.
3206                 return(true);
3207         case PRED_IN:
3208 //                      LHS must be a non-gbvar colref.
3209                 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3210                 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ) return(false);
3211 //                              I will count on there being no gbvars in the constraint.
3212 //                              TODO: allow gbvars which are colrefs.
3213                 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3214                 if(left_colids.size() != 1) return(false);
3215 //                      Disallow complex (and therefore expensive) comparisons.
3216                 if(p->get_left_se()->get_data_type()->complex_comparison(
3217                         p->get_left_se()->get_data_type() ) )
3218                                 return(false);
3219
3220
3221 //                      All entries in the IN list must be literals
3222 //                      Currently, this is the only possibility.
3223                 return(true);
3224                 break;
3225         case PRED_UNARY_OP:
3226                 return(false);
3227         case PRED_BINARY_OP:
3228                 return(false);
3229         case PRED_FUNC:
3230                 return(false);
3231         default:
3232                 fprintf(stderr,"INTERNAL ERROR in simple_field_cosntraint, line %d, character %d, unknown predicate operator type %d\n",
3233                         p->get_lineno(), p->get_charno(), p->get_operator_type() );
3234                 exit(1);
3235         }
3236
3237         return(false);
3238 }
3239
3240 //              As the name implies, return the colref constrained by the
3241 //              cnf elem.  I will be counting on the LHS being a SE pointing
3242 //              to a colref.
3243
3244 //                      This fcn assumes that in fact exactly
3245 //                      one colref is constrained.
3246 colref_t *get_constrained_colref(scalarexp_t *se){
3247  int p;
3248  vector<scalarexp_t *> operand_list;
3249 colref_t *ret;
3250
3251         switch(se->get_operator_type()){
3252         case SE_LITERAL:
3253                 return(NULL);
3254         case SE_PARAM:
3255         case SE_IFACE_PARAM:
3256                 return(NULL);
3257         case SE_COLREF:
3258                 return(se->get_colref());
3259         case SE_UNARY_OP:
3260                 return(get_constrained_colref(se->get_left_se()));
3261         case SE_BINARY_OP:
3262                 ret=get_constrained_colref(se->get_left_se());
3263                 if(ret == NULL) return(get_constrained_colref(se->get_right_se()));
3264                 else return ret;
3265         case SE_AGGR_STAR:
3266         case SE_AGGR_SE:
3267                 return(NULL);
3268         case SE_FUNC:
3269                 if(se->get_aggr_ref() >= 0) return NULL;
3270
3271                 operand_list = se->get_operands();
3272                 for(p=0;p<operand_list.size();p++){
3273                         ret=get_constrained_colref(operand_list[p]);
3274                         if(ret != NULL) return(ret);
3275
3276                 }
3277                 return(NULL);
3278         break;
3279         }
3280
3281         return(NULL);
3282 }
3283
3284
3285 colref_t *get_constrained_colref(predicate_t *p){
3286         return(get_constrained_colref(p->get_left_se()));
3287 }
3288 colref_t *get_constrained_colref(cnf_elem *c){
3289         return get_constrained_colref(c->pr->get_left_se());
3290 }
3291
3292
3293
3294
3295 /*
3296 void add_colref_constraint_to_cnf(cnf_elem *dst, predicate_t *src_p,
3297                                                         string target_fld, string target_tbl, int tblref){
3298
3299 //                      Make a copy of the predicate to be added.
3300 //                      ASSUME no aggregates.
3301         predicate_t *pr = dup_pr(src_p,NULL);
3302
3303 //                      Modify the ref to the base table.
3304 //                      ASSUME lhs is the colref
3305         pr->get_left_se()->get_colref()->set_table_name(target_tbl);
3306         pr->get_left_se()->get_colref()->set_table_ref(tblref);
3307
3308         if(dst->pr == NULL) dst->pr = pr;
3309         else dst->pr = new predicate_t("OR", dst->pr, pr);
3310
3311 }
3312 */
3313
3314
3315 //////////////////////////////////////////////////////
3316 ///////////////         Represent a node in a predicate tree
3317 struct common_pred_node{
3318         set<int> lftas;
3319         predicate_t *pr;
3320         vector<predicate_t *> predecessor_preds;
3321         vector<common_pred_node *> children;
3322
3323         string target_tbl;
3324         string target_fld;
3325         int target_ref;
3326
3327         common_pred_node(){
3328                 pr = NULL;
3329         }
3330 };
3331
3332
3333 predicate_t *make_common_pred(common_pred_node *pn){
3334   int n;
3335
3336         if(pn->children.size() == 0){
3337                 if(pn->pr == NULL){
3338                         fprintf(stderr,"INTERNAL ERROR in make_common_pred, pred node ahs no children and no predicate.\n");
3339                         exit(1);
3340                 }
3341                 return( dup_pr(pn->pr,NULL) );
3342         }
3343
3344         predicate_t *curr_pr = make_common_pred( pn->children[0] );
3345     for(n=1;n<pn->children.size();++n){
3346                 curr_pr = new predicate_t("OR", make_common_pred(pn->children[n]),curr_pr);
3347         }
3348
3349         if(pn->pr != NULL)
3350                 curr_pr = new predicate_t("AND", dup_pr(pn->pr,NULL), curr_pr);
3351
3352         return(curr_pr);
3353 }
3354
3355
3356 bool operator<(const cnf_set &c1, const cnf_set &c2){
3357         if(c1.lfta_id.size() < c2.lfta_id.size())
3358                 return true;
3359         return false;
3360 }
3361
3362
3363 //              Compute the predicates for the prefilter.
3364 //              the prefilter preds are returned in prefilter_preds.
3365 //              pred_ids is the set of predicates used in the prefilter.
3366 //              the encoding is the lfta index, in the top 16 bits,
3367 //              then the index of the cnf element in the bottom 16 bits.
3368 //              This set of for identifying which preds do not need
3369 //              to be generated in the lftas.
3370 void find_common_filter(vector< vector<cnf_elem *> > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, vector<cnf_set *> &prefilter_preds, set<unsigned int > &pred_ids){
3371         int p, p2, l, c;
3372
3373         vector<cnf_set *> pred_list, sort_list;
3374
3375 //              Create list of tagged, prefilter-safe CNFs.
3376         for(l=0;l<where_list.size();++l){
3377                 for(c=0;c<where_list[l].size();++c){
3378                         if(prefilter_compatible(where_list[l][c],Ext_fcns)){
3379                                 if(expensive_refs_pr(where_list[l][c]->pr,Schema)==0)
3380                                         pred_list.push_back(new cnf_set(where_list[l][c]->pr,l,c));
3381                         }
3382                 }
3383         }
3384
3385 //              Eliminate duplicates
3386         for(p=0;p<pred_list.size();++p){
3387                 if(pred_list[p]){
3388                         for(p2=p+1;p2<pred_list.size();++p2){
3389                                 if(pred_list[p2]){
3390                                         if(is_equivalent_pred_base(pred_list[p]->pr, pred_list[p2]->pr,Schema)){
3391                                                 pred_list[p]->subsume(pred_list[p2]);
3392                                                 delete pred_list[p2];
3393                                                 pred_list[p2] = NULL;
3394                                         }
3395                                 }
3396                         }
3397                 }
3398         }
3399
3400 //              combine preds that occur in the exact same lftas.
3401         for(p=0;p<pred_list.size();++p){
3402                 if(pred_list[p]){
3403                         for(p2=p+1;p2<pred_list.size();++p2){
3404                                 if(pred_list[p2]){
3405                                         if(pred_list[p]->lfta_id == pred_list[p2]->lfta_id){
3406                                                 pred_list[p]->combine_pred(pred_list[p2]);
3407                                                 delete pred_list[p2];
3408                                                 pred_list[p2] = NULL;
3409                                         }
3410                                 }
3411                         }
3412                 }
3413         }
3414
3415 //              Compress the list
3416         for(p=0;p<pred_list.size();++p){
3417                 if(pred_list[p]){
3418                         sort_list.push_back(pred_list[p]);
3419                 }
3420         }
3421 //              Sort it
3422         sort(sort_list.begin(), sort_list.end(),compare_cnf_set());
3423
3424 //              Return the top preds, up to 64 of them.
3425         for(p=0;p<sort_list.size() && p<64;p++){
3426                 prefilter_preds.push_back(sort_list[p]);
3427                 sort_list[p]->add_pred_ids(pred_ids);
3428         }
3429
3430 //              Substitute gb refs with their defs
3431 //              While I'm at it, substitute base table sch ref for tblref.
3432         for(p=0;p<prefilter_preds.size() ;p++){
3433                 subs_gbrefs_pr(prefilter_preds[p]->pr,Schema);
3434         }
3435
3436 }
3437
3438
3439
3440
3441
3442 ///////////////////////////////////////////////////////////////////////////
3443 //////////////////////////////////////////////////////////////////////////
3444
3445 //              Find partial functions and register them.
3446 //              Do a DFS so that nested partial fcn calls
3447 //              get evaluated in the right order.
3448 //              Don't drill down into aggregates -- their arguments are evaluated
3449 //              earlier than the select list is.
3450 //
3451 //              Modification for function caching:
3452 //              Pass in a ref counter, and partial fcn indicator.
3453 //              Cache fcns ref'd at least once.
3454 //              pass in NULL for fcn_ref_cnt to turn off fcn caching analysis
3455
3456
3457 void find_partial_fcns(scalarexp_t *se, vector<scalarexp_t *> *pf_list,
3458                 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3459                 ext_fcn_list *Ext_fcns){
3460         vector<scalarexp_t *> operands;
3461         int o, f;
3462
3463         if(se == NULL) return;
3464
3465         switch(se->get_operator_type()){
3466         case SE_LITERAL:
3467         case SE_PARAM:
3468         case SE_IFACE_PARAM:
3469                 return;
3470         case SE_UNARY_OP:
3471                 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3472                 return;
3473         case SE_BINARY_OP:
3474                 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3475                 find_partial_fcns(se->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3476                 return;
3477         case SE_COLREF:
3478                 return;
3479         case SE_AGGR_STAR:
3480                 return;
3481         case SE_AGGR_SE:
3482 //              find_partial_fcns(se->get_left_se(), pf_list, Ext_fcns) ;
3483                 return;
3484         case SE_FUNC:
3485                 if(se->get_aggr_ref() >= 0) return;
3486
3487                 operands = se->get_operands();
3488                 for(o=0;o<operands.size();o++){
3489                         find_partial_fcns(operands[o], pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3490                 }
3491
3492                 if(Ext_fcns->is_partial(se->get_fcn_id()) || Ext_fcns->get_fcn_cost(se->get_fcn_id()) >= COST_HIGH){
3493                         if(fcn_ref_cnt){
3494                           for(f=0;f<pf_list->size();++f){
3495                                 if(is_equivalent_se(se,(*pf_list)[f])){
3496                                         se->set_partial_ref(f);
3497                                         (*fcn_ref_cnt)[f]++;
3498                                         break;
3499                                 }
3500                           }
3501                         }else{
3502                                 f=pf_list->size();
3503                         }
3504                         if(f==pf_list->size() && (Ext_fcns->is_partial(se->get_fcn_id()) ||  fcn_ref_cnt)){
3505                                 se->set_partial_ref(pf_list->size());
3506                                 pf_list->push_back(se);
3507                                 if(fcn_ref_cnt){
3508                                         fcn_ref_cnt->push_back(1);
3509                                         is_partial_fcn->push_back(Ext_fcns->is_partial(se->get_fcn_id()));
3510                                 }
3511                         }
3512                 }
3513                 return;
3514         default:
3515                 fprintf(stderr,"INTERNAL ERROR in find_partial_fcns, line %d, character %d: unknown operator type %d\n",
3516                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3517                 exit(1);
3518         }
3519         return;
3520 }
3521
3522
3523 void find_partial_fcns_pr(predicate_t *pr,  vector<scalarexp_t *> *pf_list,
3524                 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3525                                                                         ext_fcn_list *Ext_fcns){
3526         vector<literal_t *> litl;
3527         vector<scalarexp_t *> op_list;
3528         int o;
3529
3530         switch(pr->get_operator_type()){
3531         case PRED_IN:
3532                 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3533                 return;
3534         case PRED_COMPARE:
3535                 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3536                 find_partial_fcns(pr->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3537                 return;
3538         case PRED_UNARY_OP:
3539                 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3540                 return;
3541         case PRED_BINARY_OP:
3542                 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3543                 find_partial_fcns_pr(pr->get_right_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3544                 return;
3545         case PRED_FUNC:
3546                 op_list = pr->get_op_list();
3547                 for(o=0;o<op_list.size();++o){
3548                         find_partial_fcns(op_list[o],pf_list,fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3549                 }
3550                 return;
3551         default:
3552                 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3553                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3554                 exit(1);
3555         }
3556
3557         return;
3558 }
3559
3560
3561
3562 void find_combinable_preds(predicate_t *pr,  vector<predicate_t *> *pr_list,
3563                                                                 table_list *Schema, ext_fcn_list *Ext_fcns){
3564         vector<literal_t *> litl;
3565         vector<scalarexp_t *> op_list;
3566         int f,o;
3567
3568         switch(pr->get_operator_type()){
3569         case PRED_IN:
3570                 return;
3571         case PRED_COMPARE:
3572                 return;
3573         case PRED_UNARY_OP:
3574                 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns);
3575                 return;
3576         case PRED_BINARY_OP:
3577                 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns) ;
3578                 find_combinable_preds(pr->get_right_pr(), pr_list, Schema, Ext_fcns) ;
3579                 return;
3580         case PRED_FUNC:
3581                 if(Ext_fcns->is_combinable(pr->get_fcn_id())){
3582                   for(f=0;f<pr_list->size();++f){
3583                         if(is_equivalent_pred_base(pr,(*pr_list)[f],Schema)){
3584                                 pr->set_combinable_ref(f);
3585                                 break;
3586                         }
3587                   }
3588                   if(f == pr_list->size()){
3589                         pr->set_combinable_ref(pr_list->size());
3590                         pr_list->push_back(pr);
3591                   }
3592                 }
3593                 return;
3594         default:
3595                 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3596                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3597                 exit(1);
3598         }
3599
3600         return;
3601 }
3602
3603
3604 //--------------------------------------------------------------------
3605 //              Collect refs to aggregates.
3606
3607
3608 void collect_agg_refs(scalarexp_t *se, set<int> &agg_refs){
3609         vector<scalarexp_t *> operands;
3610         int o;
3611
3612         if(se == NULL) return;
3613
3614         switch(se->get_operator_type()){
3615         case SE_LITERAL:
3616         case SE_PARAM:
3617         case SE_IFACE_PARAM:
3618                 return;
3619         case SE_UNARY_OP:
3620                 collect_agg_refs(se->get_left_se(), agg_refs) ;
3621                 return;
3622         case SE_BINARY_OP:
3623                 collect_agg_refs(se->get_left_se(), agg_refs);
3624                 collect_agg_refs(se->get_right_se(), agg_refs);
3625                 return;
3626         case SE_COLREF:
3627                 return;
3628         case SE_AGGR_STAR:
3629         case SE_AGGR_SE:
3630                 agg_refs.insert(se->get_aggr_ref());
3631                 return;
3632         case SE_FUNC:
3633                 if(se->get_aggr_ref() >= 0) agg_refs.insert(se->get_aggr_ref());
3634
3635                 operands = se->get_operands();
3636                 for(o=0;o<operands.size();o++){
3637                         collect_agg_refs(operands[o], agg_refs);
3638                 }
3639
3640                 return;
3641         default:
3642                 fprintf(stderr,"INTERNAL ERROR in collect_agg_refs, line %d, character %d: unknown operator type %d\n",
3643                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3644                 exit(1);
3645         }
3646         return;
3647 }
3648
3649
3650 void collect_aggr_refs_pr(predicate_t *pr,  set<int> &agg_refs){
3651         vector<literal_t *> litl;
3652         vector<scalarexp_t *> op_list;
3653         int o;
3654
3655         switch(pr->get_operator_type()){
3656         case PRED_IN:
3657                 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3658                 return;
3659         case PRED_COMPARE:
3660                 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3661                 collect_agg_refs(pr->get_right_se(), agg_refs) ;
3662                 return;
3663         case PRED_UNARY_OP:
3664                 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs);
3665                 return;
3666         case PRED_BINARY_OP:
3667                 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs) ;
3668                 collect_aggr_refs_pr(pr->get_right_pr(), agg_refs) ;
3669                 return;
3670         case PRED_FUNC:
3671                 op_list = pr->get_op_list();
3672                 for(o=0;o<op_list.size();++o){
3673                         collect_agg_refs(op_list[o],agg_refs);
3674                 }
3675                 return;
3676         default:
3677                 fprintf(stderr,"INTERNAL ERROR in collect_aggr_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3678                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3679                 exit(1);
3680         }
3681
3682         return;
3683 }
3684
3685
3686 //--------------------------------------------------------------------
3687 //              Collect previously registered partial fcn refs.
3688 //              Do a DFS so that nested partial fcn calls
3689 //              get evaluated in the right order.
3690 //              Don't drill down into aggregates -- their arguments are evaluated
3691 //              earlier than the select list is.
3692 //              ------------->>> THEN WHY AM I DRILLING DOWN INTO AGGREGATES?
3693
3694 void collect_partial_fcns(scalarexp_t *se, set<int> &pfcn_refs){
3695         vector<scalarexp_t *> operands;
3696         int o;
3697
3698         if(se == NULL) return;
3699
3700         switch(se->get_operator_type()){
3701         case SE_LITERAL:
3702         case SE_PARAM:
3703         case SE_IFACE_PARAM:
3704                 return;
3705         case SE_UNARY_OP:
3706                 collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3707                 return;
3708         case SE_BINARY_OP:
3709                 collect_partial_fcns(se->get_left_se(), pfcn_refs);
3710                 collect_partial_fcns(se->get_right_se(), pfcn_refs);
3711                 return;
3712         case SE_COLREF:
3713                 return;
3714         case SE_AGGR_STAR:
3715                 return;
3716         case SE_AGGR_SE:
3717 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3718                 return;
3719         case SE_FUNC:
3720                 if(se->get_aggr_ref() >= 0) return;
3721
3722                 operands = se->get_operands();
3723                 for(o=0;o<operands.size();o++){
3724                         collect_partial_fcns(operands[o], pfcn_refs);
3725                 }
3726
3727                 if(se->is_partial()){
3728                         pfcn_refs.insert(se->get_partial_ref());
3729                 }
3730
3731                 return;
3732         default:
3733                 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns, line %d, character %d: unknown operator type %d\n",
3734                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3735                 exit(1);
3736         }
3737         return;
3738 }
3739
3740
3741 void collect_partial_fcns_pr(predicate_t *pr,  set<int> &pfcn_refs){
3742         vector<literal_t *> litl;
3743         vector<scalarexp_t *> op_list;
3744         int o;
3745
3746         switch(pr->get_operator_type()){
3747         case PRED_IN:
3748                 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3749                 return;
3750         case PRED_COMPARE:
3751                 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3752                 collect_partial_fcns(pr->get_right_se(), pfcn_refs) ;
3753                 return;
3754         case PRED_UNARY_OP:
3755                 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs);
3756                 return;
3757         case PRED_BINARY_OP:
3758                 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs) ;
3759                 collect_partial_fcns_pr(pr->get_right_pr(), pfcn_refs) ;
3760                 return;
3761         case PRED_FUNC:
3762                 op_list = pr->get_op_list();
3763                 for(o=0;o<op_list.size();++o){
3764                         collect_partial_fcns(op_list[o],pfcn_refs);
3765                 }
3766                 return;
3767         default:
3768                 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns_pr, line %d, character %d, unknown predicate operator type %d\n",
3769                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3770                 exit(1);
3771         }
3772
3773         return;
3774 }
3775
3776
3777
3778
3779 ///////////////////////////////////////////////////////////////
3780 ////////////    Exported Functions      ///////////////////////////
3781 ///////////////////////////////////////////////////////////////
3782
3783
3784 //              Count and collect refs to interface parameters.
3785
3786 int count_se_ifp_refs(scalarexp_t *se, set<string> &ifpnames){
3787         vector<scalarexp_t *> operands;
3788         int o;
3789         int ret = 0;
3790
3791         if(se == NULL) return 0;
3792
3793         switch(se->get_operator_type()){
3794         case SE_LITERAL:
3795         case SE_PARAM:
3796                 return 0;
3797         case SE_IFACE_PARAM:
3798                         ifpnames.insert(se->get_ifpref()->to_string());
3799                 return 1;
3800         case SE_UNARY_OP:
3801                 return count_se_ifp_refs(se->get_left_se(), ifpnames) ;
3802         case SE_BINARY_OP:
3803                 ret = count_se_ifp_refs(se->get_left_se(), ifpnames);
3804                 ret += count_se_ifp_refs(se->get_right_se(), ifpnames);
3805                 return ret;
3806         case SE_COLREF:
3807                 return 0;
3808         case SE_AGGR_STAR:
3809                 return 0;
3810         case SE_AGGR_SE:
3811 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3812                 return 0;
3813         case SE_FUNC:
3814                 if(se->get_aggr_ref() >= 0) return 0;
3815
3816                 operands = se->get_operands();
3817                 for(o=0;o<operands.size();o++){
3818                         ret += count_se_ifp_refs(operands[o], ifpnames);
3819                 }
3820
3821                 return ret;
3822         default:
3823                 fprintf(stderr,"INTERNAL ERROR in count_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3824                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3825                 exit(1);
3826         }
3827         return 0;
3828 }
3829
3830
3831 int count_pr_ifp_refs(predicate_t *pr,  set<string> &ifpnames){
3832         vector<literal_t *> litl;
3833         vector<scalarexp_t *> op_list;
3834         int o;
3835         int ret = 0;
3836         if(pr == NULL) return 0;
3837
3838         switch(pr->get_operator_type()){
3839         case PRED_IN:
3840                 return count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3841         case PRED_COMPARE:
3842                 ret = count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3843                 ret += count_se_ifp_refs(pr->get_right_se(), ifpnames) ;
3844                 return ret;
3845         case PRED_UNARY_OP:
3846                 return count_pr_ifp_refs(pr->get_left_pr(), ifpnames);
3847         case PRED_BINARY_OP:
3848                 ret = count_pr_ifp_refs(pr->get_left_pr(), ifpnames) ;
3849                 ret += count_pr_ifp_refs(pr->get_right_pr(), ifpnames) ;
3850                 return ret;
3851         case PRED_FUNC:
3852                 op_list = pr->get_op_list();
3853                 for(o=0;o<op_list.size();++o){
3854                         ret += count_se_ifp_refs(op_list[o],ifpnames);
3855                 }
3856                 return ret;
3857         default:
3858                 fprintf(stderr,"INTERNAL ERROR in count_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3859                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3860                 exit(1);
3861         }
3862
3863         return 0;
3864 }
3865
3866 //              Resolve ifp refs, convert them to string literals.
3867
3868 int resolve_se_ifp_refs(scalarexp_t *se, string ifm, string ifn, ifq_t *ifdb,  string &err){
3869         vector<scalarexp_t *> operands;
3870         vector<string> ifvals;
3871         int o;
3872         int ierr;
3873         string serr;
3874         int ret = 0;
3875         literal_t *tmp_l;
3876         ifpref_t *ir;
3877
3878         if(se == NULL) return 0;
3879
3880         switch(se->get_operator_type()){
3881         case SE_LITERAL:
3882         case SE_PARAM:
3883                 return 0;
3884         case SE_IFACE_PARAM:
3885                 ir = se->get_ifpref();
3886                 ifvals = ifdb->get_iface_vals(ifm, ifn, ir->get_pname(), ierr, serr);
3887                 if(ierr){
3888                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", "+serr+"\n";
3889                         return 1;
3890                 }
3891                 if(ifvals.size() == 0){
3892                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", no parameter values.\n";
3893                         return 1;
3894                 }
3895                 if(ifvals.size() > 1){
3896                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", multiple parameter values ("+int_to_string(ifvals.size())+").\n";
3897                         return 1;
3898                 }
3899                 tmp_l = new literal_t( ifvals[0]);
3900                 se->convert_to_literal(tmp_l);
3901                 return 0;
3902         case SE_UNARY_OP:
3903                 return resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err) ;
3904         case SE_BINARY_OP:
3905                 ret = resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err);
3906                 ret += resolve_se_ifp_refs( se->get_right_se(), ifm, ifn,ifdb,err);
3907                 return ret;
3908         case SE_COLREF:
3909                 return 0;
3910         case SE_AGGR_STAR:
3911                 return 0;
3912         case SE_AGGR_SE:
3913 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3914                 return 0;
3915         case SE_FUNC:
3916                 if(se->get_aggr_ref() >= 0) return 0;
3917
3918                 operands = se->get_operands();
3919                 for(o=0;o<operands.size();o++){
3920                         ret += resolve_se_ifp_refs(operands[o], ifm, ifn, ifdb,err);
3921                 }
3922
3923                 return ret;
3924         default:
3925                 fprintf(stderr,"INTERNAL ERROR in resolve_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3926                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3927                 exit(1);
3928         }
3929         return 0;
3930 }
3931
3932
3933 int resolve_pr_ifp_refs(predicate_t *pr,  string ifm, string ifn, ifq_t *ifdb,  string &err){
3934         vector<literal_t *> litl;
3935         vector<scalarexp_t *> op_list;
3936         int o;
3937         int ret = 0;
3938
3939         switch(pr->get_operator_type()){
3940         case PRED_IN:
3941                 return resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3942         case PRED_COMPARE:
3943                 ret = resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3944                 ret += resolve_se_ifp_refs(pr->get_right_se(), ifm, ifn, ifdb, err) ;
3945                 return ret;
3946         case PRED_UNARY_OP:
3947                 return resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err);
3948         case PRED_BINARY_OP:
3949                 ret = resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err) ;
3950                 ret += resolve_pr_ifp_refs(pr->get_right_pr(), ifm, ifn, ifdb, err) ;
3951                 return ret;
3952         case PRED_FUNC:
3953                 op_list = pr->get_op_list();
3954                 for(o=0;o<op_list.size();++o){
3955                         ret += resolve_se_ifp_refs(op_list[o],ifm, ifn, ifdb, err);
3956                 }
3957                 return ret;
3958         default:
3959                 fprintf(stderr,"INTERNAL ERROR in resolve_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3960                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3961                 exit(1);
3962         }
3963
3964         return 0;
3965 }
3966
3967
3968 string impute_query_name(table_exp_t *fta_tree, string default_nm){
3969         string retval = fta_tree->get_val_of_name("query_name");
3970         if(retval == "") retval = default_nm;
3971         if(retval == "") retval = "default_query";
3972         return(retval);
3973 }
3974
3975 //              Convert the parse tree into an intermediate form,
3976 //              which admits analysis better.
3977 //
3978 //              TODO : rationalize the error return policy.
3979 //
3980 //              TODO : the query_summary_class object contains
3981 //                      the parse tree.
3982 //              TODO: revisit the issue when nested subqueries are implemented.
3983 //              One possibility: implement accessor methods to hide the
3984 //              complexity
3985 //              For now: this class contains data structures not in table_exp_t
3986 //              (with a bit of duplication)
3987
3988 //              Return NULL on error.
3989 //              print error messages to stderr.
3990
3991
3992 query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema,
3993                                 ext_fcn_list *Ext_fcns, string default_name){
3994         int i,j, k, retval;
3995
3996 //                      Create the summary struct -- no analysis is done here.
3997         query_summary_class *qs = new query_summary_class(fta_tree);
3998         qs->query_type = fta_tree->query_type;
3999
4000 //////////////          Do common analysis
4001
4002 //              Extract query name.  Already imputed for the qnodes.
4003 //      qs->query_name = impute_query_name(fta_tree, default_name);
4004         qs->query_name = default_name;
4005 //printf("query name is %s\n",qs->query_name.c_str());
4006
4007 //              extract definitions.  Don't grab the query name.
4008
4009         map<string, string> nmap = fta_tree->get_name_map();
4010         map<string, string>::iterator nmi;
4011         for(nmi=nmap.begin(); nmi!=nmap.end(); ++nmi){
4012                 string pname = (*nmi).first;
4013                 if(pname != "query_name" )
4014                         (qs->definitions)[pname] = (*nmi).second;
4015         }
4016
4017 ///
4018 ///                             FROM analysis
4019
4020 //              First, verify that all the referenced tables are defined.
4021 //              Then, bind the tablerefs in the FROM list to schemas in
4022 //              the schema list.
4023         tablevar_list_t *tlist = fta_tree->get_from();
4024         vector<tablevar_t *> tbl_vec = tlist->get_table_list();
4025
4026         bool found_error = false;
4027         for(i=0;i<tbl_vec.size();i++){
4028                 int sch_no = schema->find_tbl(tbl_vec[i]->get_schema_name());
4029                 if(sch_no < 0)  {
4030                   fprintf(stderr,"Error, table <%s> not found in the schema file\n",
4031                         tbl_vec[i]->get_schema_name().c_str() );
4032                   fprintf(stderr,"\tline=%d, char=%d\n",tbl_vec[i]->get_lineno(),
4033                                         tbl_vec[i]->get_charno() );
4034                   return(NULL);
4035                 }
4036
4037                 tbl_vec[i]->set_schema_ref(sch_no);
4038
4039 //                              If accessing a UDOP, mangle the name
4040 //                      This needs to be done in translate_fta.cc, not here.
4041 /*
4042                 if(schema->get_schema_type(sch_no) == OPERATOR_VIEW_SCHEMA){
4043                         string mngl_name = tbl_vec[i]->get_schema_name() + silo_nm;
4044                         tbl_vec[i]->set_schema_name(mngl_name);
4045                 }
4046 */
4047
4048 //                      No FTA schema should have an interface defined on it.
4049                 if(tbl_vec[i]->get_interface()!="" && schema->get_schema_type(sch_no) != PROTOCOL_SCHEMA){
4050                         fprintf(stderr,"WARNING: interface %s specified for schema %s, but this schema is a STREAM and does not have an interface.\n",tbl_vec[i]->get_interface().c_str(), tbl_vec[i]->get_schema_name().c_str());
4051                 }
4052 //                      Fill in default interface
4053                 if(tbl_vec[i]->get_interface()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4054                         tbl_vec[i]->set_interface("default");
4055                         tbl_vec[i]->set_ifq(true);
4056                 }
4057 //                      Fill in default machine
4058                 if(tbl_vec[i]->get_interface()!=""  && tbl_vec[i]->get_machine()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA && (! tbl_vec[i]->get_ifq())){
4059                         tbl_vec[i]->set_machine(hostname);
4060                 }
4061
4062                 if(schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4063 //                      Record the set of interfaces accessed
4064                         string ifstr;
4065                         if(tbl_vec[i]->get_ifq()){
4066                                 ifstr = "["+tbl_vec[i]->get_interface()+"]";
4067                         }else{
4068                                 if(tbl_vec[i]->get_machine() != "localhost"){
4069                                         ifstr = "&apos;"+tbl_vec[i]->get_machine()+"&apos;."+tbl_vec[i]->get_interface();
4070                                 }else{
4071                                         ifstr = tbl_vec[i]->get_interface();
4072                                 }
4073                         }
4074 //printf("ifstr is %s, i=%d, machine=%s, interface=%s\n",ifstr.c_str(),i,tbl_vec[i]->get_machine().c_str(),tbl_vec[i]->get_interface().c_str());
4075                         if(qs->definitions.count("_referenced_ifaces")){
4076                                 ifstr = qs->definitions["_referenced_ifaces"]+","+ifstr;
4077                         }
4078                         qs->definitions["_referenced_ifaces"] = ifstr;
4079                 }
4080
4081         }
4082         if(found_error) return(NULL);
4083
4084 //                      Ensure that all tablevars have are named
4085 //                      and that no two tablevars have the same name.
4086         int tblvar_no = 0;
4087 //              First, gather the set of variable
4088         set<string> tblvar_names;
4089         for(i=0;i<tbl_vec.size();i++){
4090                 if(tbl_vec[i]->get_var_name() != ""){
4091                         if(tblvar_names.count(tbl_vec[i]->get_var_name()) > 0){
4092                                 fprintf(stderr,"ERROR, query has two table variables named %s.  line=%d, char=%d\n", tbl_vec[i]->get_var_name().c_str(), tbl_vec[i]->get_lineno(), tbl_vec[i]->get_charno());
4093                                 return(NULL);
4094                         }
4095                         tblvar_names.insert(tbl_vec[i]->get_var_name());
4096                 }
4097         }
4098 //              Now generate variable names for unnamed tablevars
4099         for(i=0;i<tbl_vec.size();i++){
4100                 if(tbl_vec[i]->get_var_name() == ""){
4101                         char tmpstr[200];
4102                         sprintf(tmpstr,"_t%d",tblvar_no);
4103                         string newvar = tmpstr;
4104                         while(tblvar_names.count(newvar) > 0){
4105                                 tblvar_no++;
4106                                 sprintf(tmpstr,"_t%d",tblvar_no);
4107                                 newvar = tmpstr;
4108                         }
4109                         tbl_vec[i]->set_range_var(newvar);
4110                         tblvar_names.insert(newvar);
4111                 }
4112         }
4113
4114 //              Process inner/outer join properties
4115         int jprop = fta_tree->get_from()->get_properties();
4116 //              Require explicit INNER_JOIN, ... specification for join queries.
4117         if(jprop < 0){
4118                 if(qs->query_type != MERGE_QUERY && tbl_vec.size() > 1){
4119                         fprintf(stderr,"ERROR, a join query must specify one of INNER_JOIM, OUTER_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN, FILTER_JOIN.\n");
4120                         return(NULL);
4121                 }
4122         }
4123
4124         if(jprop == OUTER_JOIN_PROPERTY){
4125                 for(i=0;i<tbl_vec.size();i++) tbl_vec[i]->set_property(1);
4126         }
4127         if(jprop == LEFT_OUTER_JOIN_PROPERTY)
4128                 tbl_vec[0]->set_property(1);
4129         if(jprop == RIGHT_OUTER_JOIN_PROPERTY)
4130                 tbl_vec[tbl_vec.size()-1]->set_property(1);
4131         if(jprop == FILTER_JOIN_PROPERTY){
4132                 if(fta_tree->get_from()->get_temporal_range() == 0){
4133                         fprintf(stderr,"ERROR, a filter join must have a non-zero temporal range.\n");
4134                         return NULL;
4135                 }
4136                 if(tbl_vec.size() != 2){
4137                         fprintf(stderr,"ERROR, a filter join must be between two table variables.\n");
4138                         return NULL;
4139                 }
4140                 colref_t *cr = fta_tree->get_from()->get_colref();
4141                 string field = cr->get_field();
4142
4143                 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), field);
4144                 if(fi0 < 0){
4145                         fprintf(stderr,"ERROR, temporal attribute %s for a filter join can't be found in schema %s\n",field.c_str(), tbl_vec[0]->get_schema_name().c_str());
4146                         return NULL;
4147                 }
4148                 cr->set_schema_ref(tbl_vec[0]->get_schema_ref());
4149                 cr->set_tablevar_ref(0);
4150                 string type_name = schema->get_type_name(tbl_vec[0]->get_schema_ref(),field);
4151                 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
4152                 data_type *dt0 = new data_type(type_name, modifiers);
4153                 string dt0_type = dt0->get_type_str();
4154                 if(dt0_type != "INT" && dt0_type != "UINT" && dt0_type != "LLONG" && dt0_type != "ULLONG"){
4155 //              if(dt0->get_type_str() != "UINT"){
4156                         fprintf(stderr,"ERROR, the temporal attribute in a filter join must be one of INT/UINT/LLONG/ULLONG.\n");
4157                         return NULL;
4158                 }
4159                 if(! dt0->is_increasing()){
4160                         fprintf(stderr,"ERROR, the temporal attribtue in a filter join must be temporal increasing.\n");
4161                         return NULL;
4162                 }
4163         }
4164
4165
4166
4167 /////////////////////
4168 ///             Build the query param table
4169         vector<var_pair_t *> query_params = fta_tree->query_params;
4170         int p;
4171         for(p=0;p<query_params.size();++p){
4172                 string pname = query_params[p]->name;
4173                 string dtname = query_params[p]->val;
4174
4175                 if(pname == ""){
4176                         fprintf(stderr,"ERROR parameter has empty name.\n");
4177                         found_error = true;
4178                 }
4179                 if(dtname == ""){
4180                         fprintf(stderr,"ERROR parameter %s has empty type.\n",pname.c_str());
4181                         found_error = true;
4182                 }
4183                 data_type *dt = new data_type(dtname);
4184                 if(!(dt->is_defined())){
4185                         fprintf(stderr,"ERROR parameter %s has invalid type (%s).\n",pname.c_str(), dtname.c_str());
4186                         found_error = true;
4187                 }
4188
4189                 qs->add_query_param(pname, dt, false);
4190         }
4191         if(found_error) return(NULL);
4192 //              unpack the param table to a global for easier analysis.
4193         param_tbl=qs->param_tbl;
4194
4195 //////////////////              MERGE specialized analysis
4196
4197         if(qs->query_type == MERGE_QUERY){
4198 //                      Verify that
4199 //                              1) there are two *different* streams ref'd in the FROM clause
4200 //                                      However, only emit a warning.
4201 //                                      (can't detect a problem if one of the interfaces is the
4202 //                                       default interface).
4203 //                              2) They have the same layout (e.g. same types but the
4204 //                                      names can be different
4205 //                              3) the two columns can unambiguously be mapped to
4206 //                                      fields of the two tables, one per table.  Exception:
4207 //                                      the column names are the same and exist in both tables.
4208 //                                      FURTHERMORE the positions must be the same
4209 //                              4) after mapping, verify that both colrefs are temporal
4210 //                                      and in the same direction.
4211                 if(tbl_vec.size() < 2){
4212                         fprintf(stderr,"ERROR, a MERGE query operates over at least 2 tables, %lu were supplied.\n",tbl_vec.size() );
4213                         return(NULL);
4214                 }
4215
4216                 vector<field_entry *> fev0 = schema->get_fields(
4217                         tbl_vec[0]->get_schema_name()
4218                 );
4219
4220
4221                 int cv;
4222                 for(cv=1;cv<tbl_vec.size();++cv){
4223                         vector<field_entry *> fev1 = schema->get_fields(
4224                                 tbl_vec[cv]->get_schema_name()
4225                         );
4226
4227                         if(fev0.size() != fev1.size()){
4228                                 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4229                                 return(NULL);
4230                         }
4231
4232 //                      Only need to ensure that the list of types are the same.
4233 //                      THe first table supplies the output colnames,
4234 //                      and all temporal properties are lost, except for the
4235 //                      merge-by columns.
4236                         int f;
4237                         for(f=0;f<fev0.size();++f){
4238                                 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4239                                 data_type dt1(fev1[f]->get_type(),fev1[f]->get_modifier_list());
4240                                 if(! dt0.equal_subtypes(&dt1) ){
4241                                 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4242                                         return(NULL);
4243                                 }
4244                         }
4245                 }
4246
4247 //              copy over the merge-by cols.
4248                 qs->mvars = fta_tree->mergevars;
4249
4250                 if(qs->mvars.size() == 0){      // need to discover the merge vars.
4251                         int mergevar_pos = -1;
4252                         int f;
4253                         for(f=0;f<fev0.size();++f){
4254                                 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4255                                 if(dt0.is_temporal()){
4256                                         mergevar_pos = f;
4257                                         break;
4258                                 }
4259                         }
4260                         if(mergevar_pos >= 0){
4261                                 for(cv=0;cv<tbl_vec.size();++cv){
4262                                         vector<field_entry *> fev1 = schema->get_fields(tbl_vec[cv]->get_schema_name());
4263                                         qs->mvars.push_back(new colref_t(tbl_vec[cv]->get_var_name().c_str(),fev1[mergevar_pos]->get_name().c_str() ));
4264                                 }
4265                         }else{
4266                                 fprintf(stderr,"ERROR, no merge-by column found.\n");
4267                                 return(NULL);
4268                         }
4269                 }
4270
4271 //                      Ensure same number of tables, merge cols.
4272                 if(tbl_vec.size() != qs->mvars.size()){
4273                         fprintf(stderr,"ERROR, merge query has different numbers of table variables (%lu) and merge columns (%lu)\n",tbl_vec.size(), qs->mvars.size());
4274                         return(NULL);
4275                 }
4276
4277 //              Ensure that the merge-by are from different tables
4278 //              also, sort colrefs so that they align with the FROM list using tmp_crl
4279                 set<int> refd_sources;
4280                 vector<colref_t *> tmp_crl(qs->mvars.size(),NULL);
4281                 for(cv=0;cv<qs->mvars.size();++cv){
4282                         int tblvar=infer_tablevar_from_colref(qs->mvars[cv],fta_tree->fm,schema);
4283                         if(tblvar<0){
4284                                 fprintf(stderr,"ERROR, Merge column %d (%s) was not found in any of the tables.\n",cv,qs->mvars[cv]->to_string().c_str());
4285                         }
4286                         refd_sources.insert(tblvar);
4287                         tmp_crl[tblvar] = qs->mvars[cv];
4288                 }
4289                 if(refd_sources.size() != qs->mvars.size()){
4290                         fprintf(stderr,"ERROR, The %lu merge columns reference only %lu table variables.\n",qs->mvars.size(), refd_sources.size());
4291                         return(NULL);
4292                 }
4293
4294 //                      1-1 mapping, so use tmp_crl as the merge column list.
4295                 qs->mvars = tmp_crl;
4296
4297
4298
4299 //                      Look up the colrefs in their schemas, verify that
4300 //                      they are at the same place, that they are both temporal
4301 //                      in the same way.
4302 //                      It seems that this should be done more in the schema objects.
4303                 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), qs->mvars[0]->get_field());
4304                 if(fi0 < 0){
4305                         fprintf(stderr,"ERROR, Merge temporal field %s not found.\n",qs->mvars[0]->get_field().c_str());
4306                         exit(1);
4307                 }
4308                 for(cv=1;cv<qs->mvars.size();++cv){
4309                         int fi1 = schema->get_field_idx(tbl_vec[cv]->get_schema_name(), qs->mvars[0]->get_field());
4310                         if(fi0!=fi1){
4311                                 fprintf(stderr,"ERROR, the merge columns for table variables %s and %s must be in the same position.\n",tbl_vec[0]->get_var_name().c_str(), tbl_vec[cv]->get_var_name().c_str());
4312                                 return NULL;
4313                         }
4314                 }
4315
4316                 field_entry *fe0 = schema->get_field(tbl_vec[0]->get_schema_name(),fi0);
4317                 data_type dt0(fe0->get_type(),fe0->get_modifier_list());
4318                 if( (!dt0.is_temporal()) ){
4319                         fprintf(stderr,"ERROR, merge column %d must be temporal.\n",0);
4320                         return(NULL);
4321                 }
4322                 for(cv=0;cv<qs->mvars.size();++cv){
4323                         field_entry *fe1 = schema->get_field(tbl_vec[cv]->get_schema_name(),fi0);
4324                         data_type dt1(fe1->get_type(),fe1->get_modifier_list());
4325                         if( (!dt1.is_temporal()) ){
4326                                 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",cv);
4327                                 return(NULL);
4328                         }
4329
4330
4331                         if( dt0.get_temporal() != dt1.get_temporal()){
4332                                 fprintf(stderr,"ERROR, the merge columns (0 and %d) must be temporal in the same direction.\n",cv);
4333                                 return(NULL);
4334                         }
4335                 }
4336
4337 //                      If there is a SLACK specification, verify
4338 //                      that it is literal-only and that its type is compatible
4339 //                      with that of the merge columns
4340                 qs->slack = fta_tree->slack;
4341                 if(qs->slack){
4342                         if(! literal_only_se(qs->slack)){
4343                                 fprintf(stderr,"ERROR, the SLACK expression is not literal-only.\n");
4344                                 return NULL;
4345                         }
4346
4347                         assign_data_types(qs->slack, schema, fta_tree, Ext_fcns );
4348                         data_type sdt(&dt0, qs->slack->get_data_type(), string("+"));
4349                         if(sdt.get_type() == undefined_t){
4350                                 fprintf(stderr,"ERROR, the SLACK expression data type is not compatible with the data type of the merge columns.\n");
4351                                 return NULL;
4352                         }
4353                 }
4354
4355
4356 //                      All the tests have passed, there is nothing
4357 //                      else to fill in.
4358
4359         }
4360
4361 //////////////////              SELECT specialized analysis
4362
4363         if(qs->query_type == SELECT_QUERY){
4364 //              unpack the gb_tbl, aggr_tbl, param_tbl, and complex_literals
4365 //              objects into globals, for easier syntax.
4366         gb_tbl = qs->gb_tbl;
4367         aggr_tbl = qs->aggr_tbl;
4368
4369
4370 //              Build the table of group-by attributes.
4371 //              (se processing done automatically).
4372 //              NOTE : Doing the SE processing here is getting cumbersome,
4373 //                      I should process these individually.
4374 //              NOTE : I should check for duplicate names.
4375 //              NOTE : I should ensure that the def of one GB does not
4376 //                      refrence the value of another.
4377         vector<extended_gb_t *> gb_list = fta_tree->get_groupby();
4378         int n_temporal = 0;
4379         string temporal_gbvars = "";
4380         map<string, int> gset_gbnames;
4381
4382 //              For generating the set of GB patterns for this aggregation query.
4383         vector<bool> inner_pattern;
4384         vector<vector<bool> > pattern_set;
4385         vector<vector<vector<bool> > > pattern_components;
4386
4387         vector<gb_t *> r_gbs, c_gbs, g_gbs;
4388         int n_patterns;
4389
4390         for(i=0;i<gb_list.size();i++){
4391                 switch(gb_list[i]->type){
4392                 case gb_egb_type:
4393                         retval = gb_tbl->add_gb_attr(
4394                                 gb_list[i]->gb, fta_tree->fm, schema,fta_tree, Ext_fcns
4395                         );
4396                         if(retval < 0){
4397                                 return NULL;  // nothing added to gb_tbl, so this can trigger a segfault 2 lines below
4398                         }else{
4399                                 if(gb_tbl->get_data_type(i)->is_temporal()){
4400                                         n_temporal++;
4401                                         if(temporal_gbvars != "") temporal_gbvars+=" ";
4402                                         temporal_gbvars += gb_tbl->get_name(i);
4403                                 }
4404                         }
4405
4406                         inner_pattern.clear();
4407                         pattern_set.clear();
4408                         inner_pattern.push_back(true);
4409                         pattern_set.push_back(inner_pattern);
4410                         pattern_components.push_back(pattern_set);
4411
4412                         gb_tbl->gb_entry_type.push_back("");
4413                         gb_tbl->gb_entry_count.push_back(1);
4414                         gb_tbl->pattern_components.push_back(pattern_set);
4415
4416                 break;
4417                 case rollup_egb_type:
4418                         r_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4419                         for(j=0;j<r_gbs.size();++j){
4420                                 retval = gb_tbl->add_gb_attr(
4421                                         r_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4422                                 );
4423                                 if(retval < 0){
4424                                         found_error = true;
4425                                 }else{          // rollup gb can't be temporal
4426                                         gb_tbl->reset_temporal(gb_tbl->size()-1);
4427                                 }
4428                         }
4429
4430                         inner_pattern.resize(r_gbs.size());
4431                         pattern_set.clear();
4432                         for(j=0;j<=r_gbs.size();++j){
4433                                 for(k=0;k<r_gbs.size();++k){
4434                                         if(k < j)
4435                                                 inner_pattern[k] = true;
4436                                         else
4437                                                 inner_pattern[k] = false;
4438                                 }
4439                                 pattern_set.push_back(inner_pattern);
4440                         }
4441                         pattern_components.push_back(pattern_set);
4442
4443                         gb_tbl->gb_entry_type.push_back("ROLLUP");
4444                         gb_tbl->gb_entry_count.push_back(r_gbs.size());
4445                         gb_tbl->pattern_components.push_back(pattern_set);
4446                 break;
4447                 case cube_egb_type:
4448                         c_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4449                         for(j=0;j<c_gbs.size();++j){
4450                                 retval = gb_tbl->add_gb_attr(
4451                                         c_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4452                                 );
4453                                 if(retval < 0){
4454                                         found_error = true;
4455                                 }else{          // cube gb can't be temporal
4456                                         gb_tbl->reset_temporal(gb_tbl->size()-1);
4457                                 }
4458                         }
4459
4460                         inner_pattern.resize(c_gbs.size());
4461                         pattern_set.clear();
4462                         n_patterns = 1 << c_gbs.size();
4463                         for(j=0;j<n_patterns;++j){
4464                                 int test_bit = 1;
4465                                 for(k=0;k<c_gbs.size();++k,test_bit = test_bit << 1){
4466                                         if((j & test_bit) != 0)
4467                                                 inner_pattern[k] = true;
4468                                         else
4469                                                 inner_pattern[k] = false;
4470                                 }
4471                                 pattern_set.push_back(inner_pattern);
4472                         }
4473                         pattern_components.push_back(pattern_set);
4474
4475                         gb_tbl->gb_entry_type.push_back("CUBE");
4476                         gb_tbl->gb_entry_count.push_back(c_gbs.size());
4477                         gb_tbl->pattern_components.push_back(pattern_set);
4478                 break;
4479                 case gsets_egb_type:
4480                 {
4481                         gset_gbnames.clear();
4482                         for(j=0;j<gb_list[i]->gb_lists.size();++j){
4483                                 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4484                                 for(k=0;k<g_gbs.size();++k){
4485                                         if(g_gbs[k]->type != GB_COLREF){
4486                                                 fprintf(stderr,"Error, group-by fields in a GROUPING_SETS clause must be table references, not computed values (field is %s\n",g_gbs[k]->name.c_str());
4487                                                 found_error = true;
4488                                         }else{
4489                                                 if(gset_gbnames.count(g_gbs[k]->name) == 0){
4490                                                         retval = gb_tbl->add_gb_attr(
4491                                                                 g_gbs[k], fta_tree->fm, schema,fta_tree, Ext_fcns
4492                                                         );
4493                                                         if(retval < 0){
4494                                                                 found_error = true;
4495                                                         }else{          // gsets gb can't be temporal
4496                                                                 gb_tbl->reset_temporal(gb_tbl->size()-1);
4497                                                         }
4498                                                         int pos = gset_gbnames.size();
4499                                                         gset_gbnames[g_gbs[k]->name] = pos;
4500                                                 }
4501                                         }
4502                                 }
4503                         }
4504
4505                         if(gset_gbnames.size() > 63){
4506                                 fprintf(stderr,"Error, at most 63 distinct fields can be referenced in a GROUPING_SETS clause.\n");
4507                                 found_error = true;
4508                         }
4509
4510                         inner_pattern.resize(gset_gbnames.size());
4511                         pattern_set.clear();
4512                         set<unsigned long long int> signatures;
4513                         for(j=0;j<gb_list[i]->gb_lists.size();++j){
4514                                 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4515                                 set<string> refd_gbs;
4516                                 for(k=0;k<g_gbs.size();++k){
4517                                         refd_gbs.insert(g_gbs[k]->name);
4518                                 }
4519                                 fill(inner_pattern.begin(),inner_pattern.end(),false);
4520                                 unsigned long long int signature = 0;
4521                                 set<string>::iterator ssi;
4522                                 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4523                                         inner_pattern[gset_gbnames[(*ssi)]] = true;
4524                                         signature |= (1 << gset_gbnames[(*ssi)]);
4525                                 }
4526                                 if(signatures.count(signature)){
4527                                         fprintf(stderr,"Warning, duplicate GROUPING_SETS pattern found, ignoring:\n\t");
4528                                         set<string>::iterator ssi;
4529                                         for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4530                                                 fprintf(stderr," %s",(*ssi).c_str());
4531                                         }
4532                                         fprintf(stderr,"\n");
4533                                 }else{
4534                                         signatures.insert(signature);
4535                                         pattern_set.push_back(inner_pattern);
4536                                 }
4537                         }
4538                         pattern_components.push_back(pattern_set);
4539
4540                         gb_tbl->gb_entry_type.push_back("GROUPING_SETS");
4541                         gb_tbl->gb_entry_count.push_back(gset_gbnames.size());
4542                         gb_tbl->pattern_components.push_back(pattern_set);
4543                 }
4544                 break;
4545                 default:
4546                 break;
4547                 }
4548         }
4549         if(found_error) return(NULL);
4550         if(n_temporal > 1){
4551                 fprintf(stderr,"ERROR, query has multiple temporal group-by variables (%s).  Cast away the temporality of all but one of these.\n", temporal_gbvars.c_str());
4552                 return NULL;
4553         }
4554
4555 //              Compute the set of patterns.  Take the cross product of all pattern components.
4556         vector<vector<bool> > gb_patterns;
4557         int n_components = pattern_components.size();
4558         vector<int> pattern_pos(n_components,0);
4559         bool done = false;
4560         while(! done){
4561                 vector<bool> pattern;
4562                 for(j=0;j<n_components;j++){
4563                         pattern.insert(pattern.end(),pattern_components[j][pattern_pos[j]].begin(),
4564                                 pattern_components[j][pattern_pos[j]].end());
4565                 }
4566                 gb_patterns.push_back(pattern);
4567                 for(j=0;j<n_components;j++){
4568                         pattern_pos[j]++;
4569                         if(pattern_pos[j] >= pattern_components[j].size())
4570                                 pattern_pos[j] = 0;
4571                         else
4572                                 break;
4573                 }
4574                 if(j >= n_components)
4575                         done = true;
4576         }
4577         gb_tbl->gb_patterns = gb_patterns;
4578
4579
4580 //              Process the supergroup, if any.
4581         vector<colref_t *> sgb = fta_tree->get_supergb();
4582         for(i=0;i<sgb.size();++i){
4583                 int gbr = gb_tbl->find_gb(sgb[i],fta_tree->fm, schema);
4584                 if(gbr < 0){
4585                         fprintf(stderr, "ERROR, supergroup attribute %s is not defined as a group-by variable.\n",sgb[i]->to_string().c_str());
4586                         found_error = true;
4587                 }
4588                 if(qs->sg_tbl.count(gbr)){
4589                         fprintf(stderr,"WARNING, duplicate supergroup attribute %s.\n",sgb[i]->to_string().c_str());
4590                 }
4591                 qs->sg_tbl.insert(gbr);
4592         }
4593         if(found_error) return(NULL);
4594
4595         if(qs->sg_tbl.size() > 0 && gb_tbl->gb_patterns.size()>0){
4596                 fprintf(stderr,"Error, SUPERGROUP incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4597                 return NULL;
4598         }
4599
4600
4601
4602         predicate_t *wh = fta_tree->get_where();
4603         predicate_t *hv = fta_tree->get_having();
4604         predicate_t *cw = fta_tree->get_cleaning_when();
4605         predicate_t *cb = fta_tree->get_cleaning_by();
4606         predicate_t *closew = fta_tree->get_closing_when();
4607
4608         if(closew != NULL  && gb_tbl->gb_patterns.size()>1){
4609                 fprintf(stderr,"Error, CLOSING_WHEN incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4610                 return NULL;
4611         }
4612
4613
4614
4615 //              Verify that all column references are valid, and if so assign
4616 //              the data type.
4617
4618         vector<select_element *> sl_list = fta_tree->get_sl_vec();
4619         for(i=0;i<sl_list.size();i++){
4620                 retval = verify_colref(sl_list[i]->se, fta_tree->fm, schema, gb_tbl);
4621                 if(retval < 0) found_error = true;
4622         }
4623         if(wh != NULL)
4624                 retval = verify_predicate_colref(wh, fta_tree->fm, schema, gb_tbl);
4625         if(retval < 0) found_error = true;
4626         if(hv != NULL)
4627                 retval = verify_predicate_colref(hv, fta_tree->fm, schema, gb_tbl);
4628         if(retval < 0) found_error = true;
4629         if(cw != NULL)
4630                 retval = verify_predicate_colref(cw, fta_tree->fm, schema, gb_tbl);
4631         if(retval < 0) found_error = true;
4632         if(cb != NULL)
4633                 retval = verify_predicate_colref(cb, fta_tree->fm, schema, gb_tbl);
4634         if(retval < 0) found_error = true;
4635         if(closew != NULL)
4636                 retval = verify_predicate_colref(closew, fta_tree->fm, schema, gb_tbl);
4637         if(retval < 0) found_error = true;
4638
4639         if(found_error) return(NULL);
4640
4641 //              Verify that all of the scalar expressions
4642 //              and comparison predicates have compatible types.
4643
4644         n_temporal = 0;
4645         string temporal_output_fields;
4646         for(i=0;i<sl_list.size();i++){
4647                 retval = assign_data_types(sl_list[i]->se, schema, fta_tree, Ext_fcns );
4648                 if(retval < 0){
4649                          found_error = true;
4650                 }else{
4651                         if(sl_list[i]->se->get_data_type()->is_temporal()){
4652                                 n_temporal++;
4653                                 temporal_output_fields += " "+int_to_string(i);
4654                         }
4655                 }
4656         }
4657         if(n_temporal > 1){
4658                 fprintf(stderr,"ERROR, query has multiple temporal output fields (positions%s).  Cast away the temporality of all but one of these.\n", temporal_output_fields.c_str());
4659                 found_error=true;
4660         }
4661         if(wh != NULL)
4662                 retval = assign_predicate_data_types(wh, schema, fta_tree, Ext_fcns);
4663         if(retval < 0) found_error = true;
4664         if(hv != NULL)
4665                 retval = assign_predicate_data_types(hv, schema, fta_tree, Ext_fcns);
4666         if(retval < 0) found_error = true;
4667         if(cw != NULL)
4668                 retval = assign_predicate_data_types(cw, schema, fta_tree, Ext_fcns);
4669         if(retval < 0) found_error = true;
4670         if(cb != NULL)
4671                 retval = assign_predicate_data_types(cb, schema, fta_tree, Ext_fcns);
4672         if(retval < 0) found_error = true;
4673         if(closew != NULL)
4674                 retval = assign_predicate_data_types(closew, schema, fta_tree, Ext_fcns);
4675         if(retval < 0) found_error = true;
4676
4677         if(found_error) return(NULL);
4678
4679 //                      Impute names for the unnamed columns.
4680         set<string> curr_names;
4681         int s;
4682         for(s=0;s<sl_list.size();++s){
4683                 curr_names.insert(sl_list[s]->name);
4684         }
4685         for(s=0;s<sl_list.size();++s){
4686                 if(sl_list[s]->name == "")
4687                         sl_list[s]->name = impute_colname(curr_names, sl_list[s]->se);
4688         }
4689
4690
4691 //              Check the aggregates.
4692 //              No aggrs allowed in the WHERE predicate.
4693 //              (no aggrs in the GB defs, but that is examined elsewhere)
4694 //              Therefore, aggregates are allowed only the select clause.
4695 //
4696 //              The query is an aggregation query if there is a group-by clause, or
4697 //              if any aggregate is referenced.  If there is a group-by clause,
4698 //              at least one aggregate must be referenced.
4699 //              If the query is an aggregate query, the scalar expressions in
4700 //              the select clause can reference only constants, aggregates, or group-by
4701 //              attributes.
4702 //              Also, if the query is an aggregate query, build a table referencing
4703 //              the aggregates.
4704 //
4705 //              No nested aggregates allowed.
4706 //
4707
4708 //              First, count references in the WHERE predicate.
4709 //              (if there are any references, report an error).
4710 //                      can ref group vars, tuple fields, and stateful fcns.
4711
4712         if(wh != NULL){
4713                 retval = count_aggr_pred(wh, true);
4714                 if(retval > 0){
4715                         fprintf(stderr,"ERROR, no aggregate references are allowed in the WHERE clause.\n");
4716                         return(NULL);
4717                 }
4718         }
4719
4720 //              NOTE : Here I need an analysis of the having clause
4721 //              to verify that it only refs GB attrs and aggregates.
4722 //                      (also, superaggregates, stateful fcns)
4723         if(hv!=NULL){
4724                 retval = verify_having_pred(hv, "HAVING", Ext_fcns);
4725                 if(retval < 0) return(NULL);
4726         }
4727
4728 //              Cleaning by has same reference rules as Having
4729         if(cb!=NULL){
4730                 retval = verify_having_pred(cb, "CLEANING_BY", Ext_fcns);
4731                 if(retval < 0) return(NULL);
4732         }
4733
4734 //              Cleaning when has same reference rules as Having,
4735 //              except that references to non-superaggregates are not allowed.
4736 //              This is tested for when "CLEANING_BY" is passed in as the clause.
4737         if(cw!=NULL){
4738                 retval = verify_having_pred(cw, "CLEANING_WHEN", Ext_fcns);
4739                 if(retval < 0) return(NULL);
4740         }
4741
4742 //              CLOSING_WHEN : same rules as HAVING
4743         if(closew!=NULL){
4744                 retval = verify_having_pred(closew, "CLOSING_WHEN", Ext_fcns);
4745                 if(retval < 0) return(NULL);
4746         }
4747
4748
4749 //              Collect aggregates in the HAVING and CLEANING clauses
4750         if(hv != NULL){
4751                 build_aggr_tbl_fm_pred(hv, aggr_tbl, Ext_fcns);
4752         }
4753         if(cw != NULL){
4754                 build_aggr_tbl_fm_pred(cw, aggr_tbl, Ext_fcns);
4755         }
4756         if(cb != NULL){
4757                 build_aggr_tbl_fm_pred(cb, aggr_tbl, Ext_fcns);
4758         }
4759         if(closew != NULL){
4760                 build_aggr_tbl_fm_pred(closew, aggr_tbl, Ext_fcns);
4761         }
4762
4763 //              Collect aggregate refs in the SELECT clause.
4764
4765         for(i=0;i<sl_list.size();i++)
4766                 build_aggr_tbl_fm_se(sl_list[i]->se, aggr_tbl, Ext_fcns);
4767
4768
4769 //              Collect references to states of stateful functions
4770         if(wh != NULL){
4771                 gather_fcn_states_pr(wh, qs->states_refd, Ext_fcns);
4772         }
4773         if(hv != NULL){
4774                 gather_fcn_states_pr(hv, qs->states_refd, Ext_fcns);
4775         }
4776         if(cw != NULL){
4777                 gather_fcn_states_pr(cw, qs->states_refd, Ext_fcns);
4778         }
4779         if(cb != NULL){
4780                 gather_fcn_states_pr(cb, qs->states_refd, Ext_fcns);
4781         }
4782         if(closew != NULL){                     // should be no stateful fcns here ...
4783                 gather_fcn_states_pr(closew, qs->states_refd, Ext_fcns);
4784         }
4785         for(i=0;i<sl_list.size();i++)
4786                 gather_fcn_states_se(sl_list[i]->se, qs->states_refd, Ext_fcns);
4787
4788
4789 //              If this is an aggregate query, it had normally references
4790 //              some aggregates.  Its not necessary though, just emit a warning.
4791 //              (acts as SELECT DISTINCT)
4792
4793         bool is_aggr_query = gb_tbl->size() > 0 || aggr_tbl->size() > 0;
4794         if(is_aggr_query && aggr_tbl->size() == 0){
4795                 fprintf(stderr,"Warning, query contains a group-by clause but does not reference aggregates..\n");
4796         }
4797
4798 //              If this is an aggregate query,
4799 //                      1) verify that the SEs in the SELECT clause reference
4800 //                              only constants, aggregates, and group-by attributes.
4801 //                      2) No aggregate scalar expression references an aggregate
4802 //                              or any stateful function.
4803 //                      3) either it references both CLEANING clauses or neither.
4804 //                      4) all superaggregates must have the superaggr_allowed property.
4805 //                      5) all aggregates ref'd in the CLEANING_WHEN ad CLEANING_BY
4806 //                         clauses must have the multiple_output property.
4807
4808
4809         if(is_aggr_query){
4810                 if(gb_list.size() == 0){
4811                         fprintf(stderr,"ERROR, aggregation queries must have at least one group-by variable (which should be temporal).\n");
4812                         return NULL;
4813                 }
4814 //                      Ensure that at least one gbvar is temporal
4815                 if(! fta_tree->name_exists("no_temporal_aggr")){
4816                         bool found_temporal = false;
4817                 for(i=0;i<gb_tbl->size();i++){
4818                                 if(gb_tbl->get_data_type(i)->is_temporal()){
4819                                         found_temporal = true;
4820                                 }
4821                         }
4822                         if(! found_temporal){
4823                                 fprintf(stderr,"ERROR, at least one of the group-by variables must be temporal (unless no_temporal_aggr is set)\n");
4824                                 exit(1);
4825                         }
4826                 }
4827
4828                 if((!cb && cw) || (cb && !cw)){
4829                         fprintf(stderr,"ERROR, an aggregate query must either include both a CLEANING_WHEN and a CLEANING_BY clause, or neither.\n");
4830                         return(NULL);
4831                 }
4832
4833                 bool refs_running = false;
4834                 int a;
4835                 for(a=0; a<aggr_tbl->size(); ++a){
4836                         refs_running |= aggr_tbl->is_running_aggr(a);
4837                 }
4838
4839                 if(closew){
4840                         if(cb || cw){
4841                                 fprintf(stderr, "ERROR, cannot reference both CLOSING_WHEN and either CLEANING_WHEN or CLEANING_BY.\n");
4842                                 return(NULL);
4843                         }
4844                         if(!refs_running){
4845                                 fprintf(stderr, "ERROR, if you reference CLOSING_WHEN you must reference at least one running window aggregate.\n");
4846                                 return(NULL);
4847                         }
4848                 }
4849
4850                 if(refs_running && !closew){
4851                                 fprintf(stderr, "ERROR, if you reference a running window aggregate you must reference a CLOSING_WHEN clause.\n");
4852                         return(NULL);
4853                 }
4854
4855                 bool st_ok = true;
4856                 for(i=0;i<sl_list.size();i++){
4857                         bool ret_bool = verify_aggr_query_se(sl_list[i]->se);
4858                         st_ok = st_ok && ret_bool;
4859                 }
4860                 if(! st_ok)
4861                         return(NULL);
4862
4863                 for(i=0;i<aggr_tbl->size();i++){
4864                         if(aggr_tbl->is_superaggr(i)){
4865                                 if(! aggr_tbl->superaggr_allowed(i)){
4866                                         fprintf(stderr,"ERROR, aggregate %s cannot be a superaggregate\n",aggr_tbl->get_op(i).c_str());
4867                                         return NULL;
4868                                 }
4869                         }
4870                         if(aggr_tbl->is_builtin(i)){
4871                                 if(count_aggr_se(aggr_tbl->get_aggr_se(i), true) > 0){
4872                                         fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4873                                         return(NULL);
4874                                 }
4875                         }else{
4876                                 vector<scalarexp_t *> opl = aggr_tbl->get_operand_list(i);
4877                                 int o;
4878                                 for(o=0;o<opl.size();++o){
4879                                         if(count_aggr_se(opl[o], true) > 0){
4880                                                 fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4881                                                 return(NULL);
4882                                         }
4883                                 }
4884                         }
4885                 }
4886         }else{
4887 //                      Ensure that non-aggregate query doesn't reference some things
4888                 if(cb || cw){
4889                         fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLEANING_WHEN or a CLEANING_BY clause.\n");
4890                         return(NULL);
4891                 }
4892                 if(closew){
4893                         fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLOSING_WHEN clause.\n");
4894                         return(NULL);
4895                 }
4896                 if(qs->states_refd.size()){
4897                         fprintf(stderr,"ERROR, a non-aggregate query may not refernece stateful functions.\n");
4898                         return(NULL);
4899                 }
4900         }
4901
4902
4903
4904 //              Convert the predicates into CNF.  OK to pass NULL ptr.
4905         make_cnf_from_pr(wh, qs->wh_cnf);
4906         make_cnf_from_pr(hv, qs->hav_cnf);
4907         make_cnf_from_pr(cb, qs->cb_cnf);
4908         make_cnf_from_pr(cw, qs->cw_cnf);
4909         make_cnf_from_pr(closew, qs->closew_cnf);
4910
4911 //              Analyze the predicates.
4912
4913         for(i=0;i<qs->wh_cnf.size();i++)
4914                 analyze_cnf(qs->wh_cnf[i]);
4915         for(i=0;i<qs->hav_cnf.size();i++)
4916                 analyze_cnf(qs->hav_cnf[i]);
4917         for(i=0;i<qs->cb_cnf.size();i++)
4918                 analyze_cnf(qs->cb_cnf[i]);
4919         for(i=0;i<qs->cw_cnf.size();i++)
4920                 analyze_cnf(qs->cw_cnf[i]);
4921         for(i=0;i<qs->closew_cnf.size();i++)
4922                 analyze_cnf(qs->closew_cnf[i]);
4923
4924
4925 //                      At this point, the old analysis program
4926 //                      gathered all refs to partial functions,
4927 //                      complex literals, and parameters accessed via a handle.
4928 //                      I think its better to delay this
4929 //                      until code generation time, as the query will be
4930 //                      in general split.
4931
4932     }
4933
4934         return(qs);
4935 }
4936
4937 ///////////////////////////////////////////////////////////////////////
4938
4939 //              Expand gbvars with their definitions.
4940
4941 scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl){
4942         int o;
4943
4944         switch(se->get_operator_type()){
4945         case SE_LITERAL:
4946         case SE_PARAM:
4947         case SE_IFACE_PARAM:
4948                 return se;
4949         case SE_UNARY_OP:
4950                 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4951                 return se;
4952         case SE_BINARY_OP:
4953                 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4954                 se->rhs.scalarp = expand_gbvars_se(se->get_right_se(),gb_tbl);
4955                 return se;
4956         case SE_COLREF:
4957                 if( se->is_gb() ){
4958                         return( dup_se(gb_tbl.get_def(se->get_gb_ref()),NULL) );
4959                 }
4960                 return se;
4961 //                      don't descend into aggr defs.
4962         case SE_AGGR_STAR:
4963                 return se;
4964         case SE_AGGR_SE:
4965                 return se;
4966         case SE_FUNC:
4967                 for(o=0;o<se->param_list.size();o++){
4968                         se->param_list[o] = expand_gbvars_se(se->param_list[o], gb_tbl);
4969                 }
4970                 return se;
4971         default:
4972                 fprintf(stderr,"INTERNAL ERROR in expand_gbvars, line %d, character %d: unknown operator type %d\n",
4973                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
4974                 exit(1);
4975         }
4976         return se;
4977 }
4978
4979 void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl){
4980         vector<scalarexp_t *> op_list;
4981         int o;
4982         bool found = false;
4983
4984         switch(pr->get_operator_type()){
4985         case PRED_IN:
4986                 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(), gb_tbl);
4987                 return;
4988         case PRED_COMPARE:
4989                 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(),gb_tbl) ;
4990                 pr->rhs.sexp = expand_gbvars_se(pr->get_right_se(),gb_tbl) ;
4991                 return;
4992         case PRED_UNARY_OP:
4993                 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
4994                 return;
4995         case PRED_BINARY_OP:
4996                 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
4997                 expand_gbvars_pr(pr->get_right_pr(),gb_tbl) ;
4998                 return;
4999         case PRED_FUNC:
5000                 for(o=0;o<pr->param_list.size();++o){
5001                         pr->param_list[o] = expand_gbvars_se(pr->param_list[o],gb_tbl) ;
5002                 }
5003                 return;
5004         default:
5005                 fprintf(stderr,"INTERNAL ERROR in expand_gbvars_pr, line %d, character %d, unknown predicate operator type %d\n",
5006                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5007         }
5008         return;
5009 }
5010
5011
5012
5013
5014 //              return true if the se / pr contains any gbvar on the list.
5015
5016
5017 bool contains_gb_se(scalarexp_t *se, set<int> &gref_set){
5018         vector<scalarexp_t *> operands;
5019         int o;
5020         bool found = false;
5021
5022         switch(se->get_operator_type()){
5023         case SE_LITERAL:
5024         case SE_PARAM:
5025         case SE_IFACE_PARAM:
5026                 return false;
5027         case SE_UNARY_OP:
5028                 return contains_gb_se(se->get_left_se(),gref_set);
5029         case SE_BINARY_OP:
5030                 return( contains_gb_se(se->get_left_se(),gref_set) ||
5031                         contains_gb_se(se->get_right_se(),gref_set) );
5032         case SE_COLREF:
5033                 if( se->is_gb() ){
5034                         return( gref_set.count(se->get_gb_ref()) > 0);
5035                 }
5036                 return false;
5037 //                      don't descend into aggr defs.
5038         case SE_AGGR_STAR:
5039                 return false;
5040         case SE_AGGR_SE:
5041                 return false;
5042         case SE_FUNC:
5043                 operands = se->get_operands();
5044                 for(o=0;o<operands.size();o++){
5045                         found = found || contains_gb_se(operands[o], gref_set);
5046                 }
5047                 return found;
5048         default:
5049                 fprintf(stderr,"INTERNAL ERROR in contains_gb_se, line %d, character %d: unknown operator type %d\n",
5050                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5051                 exit(1);
5052         }
5053         return false;
5054 }
5055
5056
5057 bool contains_gb_pr(predicate_t *pr, set<int> &gref_set){
5058         vector<scalarexp_t *> op_list;
5059         int o;
5060         bool found = false;
5061
5062         switch(pr->get_operator_type()){
5063         case PRED_IN:
5064                 return contains_gb_se(pr->get_left_se(), gref_set);
5065         case PRED_COMPARE:
5066                 return (contains_gb_se(pr->get_left_se(),gref_set)
5067                         || contains_gb_se(pr->get_right_se(),gref_set) );
5068         case PRED_UNARY_OP:
5069                 return contains_gb_pr(pr->get_left_pr(),gref_set) ;
5070         case PRED_BINARY_OP:
5071                 return (contains_gb_pr(pr->get_left_pr(),gref_set)
5072                         || contains_gb_pr(pr->get_right_pr(),gref_set) );
5073         case PRED_FUNC:
5074                 op_list = pr->get_op_list();
5075                 for(o=0;o<op_list.size();++o){
5076                         found = found ||contains_gb_se(op_list[o],gref_set) ;
5077                 }
5078                 return found;
5079         default:
5080                 fprintf(stderr,"INTERNAL ERROR in contains_gb_pr, line %d, character %d, unknown predicate operator type %d\n",
5081                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5082         }
5083
5084         return found;
5085 }
5086
5087
5088 //              Gather the set of columns accessed in this se.
5089 //              Descend into aggregate functions.
5090
5091 void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set, gb_table *gtbl){
5092         col_id ci;
5093         vector<scalarexp_t *> operands;
5094         int o;
5095
5096         if(! se)
5097                 return;
5098
5099         switch(se->get_operator_type()){
5100         case SE_LITERAL:
5101         case SE_PARAM:
5102         case SE_IFACE_PARAM:
5103                 return;
5104         case SE_UNARY_OP:
5105                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5106                 return;
5107         case SE_BINARY_OP:
5108                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5109                 gather_se_col_ids(se->get_right_se(),cid_set,gtbl);
5110                 return;
5111         case SE_COLREF:
5112                 if(! se->is_gb() ){
5113                         ci.load_from_colref(se->get_colref() );
5114                         if(ci.tblvar_ref < 0){
5115                                 fprintf(stderr,"INTERNAL WARNING: unbound colref (%s) accessed.\n",ci.field.c_str());
5116                         }
5117                         cid_set.insert(ci);
5118                 }else{
5119                         if(gtbl==NULL){
5120                                 fprintf(stderr,"INTERNAL ERROR: gbvar ref in gather_se_col_ids, but gtbl is NULL.\n");
5121                                 exit(1);
5122                         }
5123                         gather_se_col_ids(gtbl->get_def(se->get_gb_ref()),cid_set,gtbl);
5124                 }
5125                 return;
5126         case SE_AGGR_STAR:
5127                 return;
5128         case SE_AGGR_SE:
5129                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5130                 return;
5131         case SE_FUNC:
5132                 operands = se->get_operands();
5133                 for(o=0;o<operands.size();o++){
5134                         gather_se_col_ids(operands[o], cid_set,gtbl);
5135                 }
5136                 return;
5137         default:
5138                 fprintf(stderr,"INTERNAL ERROR in gather_se_col_ids, line %d, character %d: unknown operator type %d\n",
5139                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5140                 exit(1);
5141         }
5142 }
5143
5144
5145 //              Gather the set of columns accessed in this se.
5146
5147 void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl){
5148         vector<scalarexp_t *> op_list;
5149         int o;
5150
5151         switch(pr->get_operator_type()){
5152         case PRED_IN:
5153                 gather_se_col_ids(pr->get_left_se(), cid_set,gtbl);
5154                 return;
5155         case PRED_COMPARE:
5156                 gather_se_col_ids(pr->get_left_se(),cid_set,gtbl) ;
5157                 gather_se_col_ids(pr->get_right_se(),cid_set,gtbl) ;
5158                 return;
5159         case PRED_UNARY_OP:
5160                 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5161                 return;
5162         case PRED_BINARY_OP:
5163                 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5164                 gather_pr_col_ids(pr->get_right_pr(),cid_set,gtbl) ;
5165                 return;
5166         case PRED_FUNC:
5167                 op_list = pr->get_op_list();
5168                 for(o=0;o<op_list.size();++o){
5169                         gather_se_col_ids(op_list[o],cid_set,gtbl) ;
5170                 }
5171                 return;
5172         default:
5173                 fprintf(stderr,"INTERNAL ERROR in gather_pr_col_ids, line %d, character %d, unknown predicate operator type %d\n",
5174                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5175         }
5176 }
5177
5178
5179
5180
5181 //              Gather the set of special operator or comparison functions referenced by this se.
5182
5183 void gather_se_opcmp_fcns(scalarexp_t *se, set<string> &fcn_set){
5184         col_id ci;
5185         data_type *ldt, *rdt;
5186         int o;
5187         vector<scalarexp_t *> operands;
5188
5189         switch(se->get_operator_type()){
5190         case SE_LITERAL:
5191                 if( se->get_literal()->constructor_name() != "")
5192                         fcn_set.insert( se->get_literal()->constructor_name() );
5193                 return;
5194         case SE_PARAM:
5195                 return;
5196 //                      SE_IFACE_PARAM should not exist when this is called.
5197         case SE_UNARY_OP:
5198                 ldt = se->get_left_se()->get_data_type();
5199                 if(ldt->complex_operator(se->get_op()) ){
5200                         fcn_set.insert( ldt->get_complex_operator(se->get_op()) );
5201                 }
5202                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5203                 return;
5204         case SE_BINARY_OP:
5205                 ldt = se->get_left_se()->get_data_type();
5206                 rdt = se->get_right_se()->get_data_type();
5207
5208                 if(ldt->complex_operator(rdt, se->get_op()) ){
5209                         fcn_set.insert( ldt->get_complex_operator(rdt, se->get_op()) );
5210                 }
5211                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5212                 gather_se_opcmp_fcns(se->get_right_se(),fcn_set);
5213                 return;
5214         case SE_COLREF:
5215                 return;
5216         case SE_AGGR_STAR:
5217                 return;
5218         case SE_AGGR_SE:
5219                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5220                 return;
5221         case SE_FUNC:
5222                 operands = se->get_operands();
5223                 for(o=0;o<operands.size();o++){
5224                         gather_se_opcmp_fcns(operands[o], fcn_set);
5225                 }
5226                 return;
5227         default:
5228                 fprintf(stderr,"INTERNAL ERROR in gather_se_opcmp_fcns, line %d, character %d: unknown operator type %d\n",
5229                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5230                 exit(1);
5231         }
5232 }
5233
5234
5235 //              Gather the set of special operator or comparison functions referenced by this se.
5236
5237 void gather_pr_opcmp_fcns(predicate_t *pr, set<string> &fcn_set){
5238         data_type *ldt, *rdt;
5239         vector<scalarexp_t *> operands;
5240         int o;
5241
5242         switch(pr->get_operator_type()){
5243         case PRED_IN:
5244                 ldt = pr->get_left_se()->get_data_type();
5245                 if(ldt->complex_comparison(ldt) ){
5246                         fcn_set.insert( ldt->get_comparison_fcn(ldt) );
5247                 }
5248                 gather_se_opcmp_fcns(pr->get_left_se(), fcn_set);
5249                 return;
5250         case PRED_COMPARE:
5251                 ldt = pr->get_left_se()->get_data_type();
5252                 rdt = pr->get_right_se()->get_data_type();
5253                 if(ldt->complex_comparison(rdt) ){
5254                         fcn_set.insert( ldt->get_comparison_fcn(rdt) );
5255                 }
5256                 gather_se_opcmp_fcns(pr->get_left_se(),fcn_set) ;
5257                 gather_se_opcmp_fcns(pr->get_right_se(),fcn_set) ;
5258                 return;
5259         case PRED_UNARY_OP:
5260                 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5261                 return;
5262         case PRED_BINARY_OP:
5263                 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5264                 gather_pr_opcmp_fcns(pr->get_right_pr(),fcn_set) ;
5265                 return;
5266         case PRED_FUNC:
5267                 operands = pr->get_op_list();
5268                 for(o=0;o<operands.size();o++){
5269                         gather_se_opcmp_fcns(operands[o], fcn_set);
5270                 }
5271                 return;
5272         default:
5273                 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
5274                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5275         }
5276 }
5277
5278
5279
5280
5281 //              find the temporal variable divisor if any.
5282 //              Only forms allowed : temporal_colref, temporal_colref/const
5283 //              temporal_colref/const + const
5284
5285
5286 long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt,string &fnm){
5287         long long int retval = 0;
5288         data_type *ldt, *rdt;
5289         int o;
5290         vector<scalarexp_t *> operands;
5291         scalarexp_t *t_se, *c_se;
5292         string the_op;
5293
5294         switch(se->get_operator_type()){
5295         case SE_LITERAL:
5296                 return(-1);
5297         case SE_PARAM:
5298                 return(-1);
5299 //                      SE_IFACE_PARAM should not exist when this is called.
5300         case SE_UNARY_OP:
5301                 return(-1);
5302         case SE_BINARY_OP:
5303                 ldt = se->get_left_se()->get_data_type();
5304                 if(ldt->is_temporal()){
5305                         t_se = se->get_left_se();
5306                         c_se = se->get_right_se();
5307                 }else{
5308                         t_se = se->get_left_se();
5309                         c_se = se->get_right_se();
5310                 }
5311                 if((! t_se->get_data_type()->is_temporal()) ||  c_se->get_data_type()->is_temporal())
5312                         return -1;
5313
5314                 the_op = se->get_op();
5315                 if(the_op == "+" || the_op == "-")
5316                         return find_temporal_divisor(t_se, gbt,fnm);
5317                 if(the_op == "/"){
5318                         if(t_se->get_operator_type() == SE_COLREF && c_se->get_operator_type() == SE_LITERAL){
5319                                 fnm = t_se->get_colref()->get_field();
5320                                 string lits = c_se->get_literal()->to_string();
5321                                 sscanf(lits.c_str(),"%qd",&retval);
5322                                 return retval;
5323                         }
5324                 }
5325
5326                 return -1;
5327         case SE_COLREF:
5328                 if(se->is_gb()){
5329                         return find_temporal_divisor(gbt->get_def(se->get_gb_ref()), gbt,fnm);
5330                 }
5331                 if(se->get_data_type()->is_temporal()){
5332                         fnm = se->get_colref()->get_field();
5333                         return 1;
5334                 }
5335                 return 0;
5336         case SE_AGGR_STAR:
5337                 return -1;
5338         case SE_AGGR_SE:
5339                 return -1;
5340         case SE_FUNC:
5341                 return -1;
5342         default:
5343                 fprintf(stderr,"INTERNAL ERROR in find_temporal_divisor, line %d, character %d: unknown operator type %d\n",
5344                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5345                 exit(1);
5346         }
5347 }
5348
5349
5350 //                      impute_colnames:
5351 //                      Create meaningful but unique names for the columns.
5352 string impute_colname(vector<select_element *> &sel_list, scalarexp_t *se){
5353         set<string> curr_names;
5354         int s;
5355         for(s=0;s<sel_list.size();++s){
5356                 curr_names.insert(sel_list[s]->name);
5357         }
5358         return impute_colname(curr_names, se);
5359 }
5360
5361 string impute_colname(set<string> &curr_names, scalarexp_t *se){
5362 string ret;
5363 scalarexp_t *seo;
5364 vector<scalarexp_t *> operand_list;
5365 string opstr;
5366
5367         switch(se->get_operator_type()){
5368         case SE_LITERAL:
5369                 ret = "Literal";
5370                 break;
5371     case SE_PARAM:
5372                 ret = "Param_" + se->get_param_name();
5373                 break;
5374     case SE_IFACE_PARAM:
5375                 ret = "Iparam_" + se->get_ifpref()->get_pname();
5376                 break;
5377     case SE_COLREF:
5378                 ret =  se->get_colref()->get_field() ;
5379                 break;
5380     case SE_UNARY_OP:
5381     case SE_BINARY_OP:
5382                 ret = "Field";
5383                 break;
5384     case SE_AGGR_STAR:
5385                 ret = "Cnt";
5386                 break;
5387     case SE_AGGR_SE:
5388                 ret = se->get_op();
5389                 seo = se->get_left_se();
5390                 switch(se->get_left_se()->get_operator_type()){
5391                 case SE_PARAM:
5392                         ret += "_PARAM_"+seo->get_param_name();
5393                         break;
5394                 case SE_IFACE_PARAM:
5395                         ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5396                         break;
5397                 case SE_COLREF:
5398                         opstr =  seo->get_colref()->get_field();
5399                         if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5400                                 ret += "_" + opstr;
5401                         }else{
5402                                 ret = opstr;
5403                         }
5404                         break;
5405                 case SE_AGGR_STAR:
5406                 case SE_AGGR_SE:
5407                         opstr = seo->get_op();
5408                         if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5409                                 ret += "_" + seo->get_op();
5410                         }else{
5411                                 ret = opstr;
5412                         }
5413                         break;
5414                 case SE_FUNC:
5415                         opstr = seo->get_op();
5416                         ret += "_" + seo->get_op();
5417                         break;
5418         case SE_UNARY_OP:
5419         case SE_BINARY_OP:
5420                         ret += "_SE";
5421                         break;
5422                 default:
5423                         ret += "_";
5424                         break;
5425                 }
5426                 break;
5427         case SE_FUNC:
5428                 ret = se->get_op();
5429                 operand_list = se->get_operands();
5430                 if(operand_list.size() > 0){
5431                         seo = operand_list[0];
5432                         switch(seo->get_operator_type()){
5433                         case SE_PARAM:
5434                                 ret += "_PARAM_"+seo->get_param_name();
5435                                 break;
5436                         case SE_IFACE_PARAM:
5437                                 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5438                                 break;
5439                         case SE_COLREF:
5440                                 ret += "_" + seo->get_colref()->get_field();
5441                                 break;
5442                         case SE_AGGR_STAR:
5443                         case SE_AGGR_SE:
5444                         case SE_FUNC:
5445                                 ret += "_" + seo->get_op();
5446                                 break;
5447                 case SE_UNARY_OP:
5448                 case SE_BINARY_OP:
5449                                 ret += "_SE";
5450                         break;
5451                         default:
5452                                 ret += "_";
5453                                 break;
5454                         }
5455                 }else{
5456                         ret += "_func";
5457                 }
5458                 break;
5459         }
5460
5461         if(ret == "Field"){
5462                 if(curr_names.count("Field0") == 0)
5463                         ret = "Field0";
5464         }
5465         int iter = 1;
5466         string base = ret;
5467         while(curr_names.count(ret) > 0){
5468                 char tmpstr[500];
5469                 sprintf(tmpstr,"%s%d",base.c_str(),iter);
5470                 ret = tmpstr;
5471                 iter++;
5472         }
5473
5474
5475         curr_names.insert(ret);
5476         return(ret);
5477
5478 }
5479
5480
5481
5482 //////////////////////////////////////////////////////////////////////
5483 //////////////          Methods of defined classes ///////////////////////
5484 //////////////////////////////////////////////////////////////////////
5485
5486 //              helper fcn to enable col_id as map key.
5487
5488   bool operator<(const col_id &cr1, const col_id &cr2){
5489         if(cr1.tblvar_ref < cr2.tblvar_ref) return(true);
5490         if(cr1.tblvar_ref == cr2.tblvar_ref)
5491            return (cr1.field < cr2.field);
5492         return(false);
5493   }
5494
5495
5496 //              Process the GB variables.
5497 //              At parse time, GB vars are either GB_COLREF,
5498 //              or GB_COMPUTED if the AS keyword is used.
5499 //              Cast GB vars as named entities with a SE as
5500 //              their definition (the colref in the case of GB_COLREF).
5501 //
5502 //              TODO: if there is a gbref in a gbdef,
5503 //              then I won't be able to compute the value without
5504 //              a complex dependence analysis.  So verify that there is no
5505 //              gbref in any of the GBdefs.
5506 //              BUT: a GBVAR_COLREF should be converted to a regular colref,
5507 //              which is not yet done.
5508 //
5509 //              TODO : sort out issue of GBVAR naming and identification.
5510 //              Determine where it is advantageous to convert GV_COLREF
5511 //              GBVARS to colrefs -- e.g. in group definition, in the WHERE clause,
5512 //              etc.
5513 //
5514 //              return -1 if there is a problem.
5515
5516 int gb_table::add_gb_attr(
5517                                                   gb_t *gb,
5518                                                   tablevar_list_t *fm,
5519                                                   table_list *schema,
5520                                                   table_exp_t *fta_tree,
5521                                                   ext_fcn_list *Ext_fcns
5522                                                   ){
5523         colref_t *cr;
5524         int retval;
5525         gb_table_entry *entry;
5526
5527         if(gb->type == GB_COLREF){
5528                 if(gb->table != "")
5529                         cr = new colref_t(
5530                                 gb->interface.c_str(),gb->table.c_str(), gb->name.c_str()
5531                         );
5532                 else
5533                         cr = new colref_t(gb->name.c_str());
5534
5535                 int tablevar_ref = infer_tablevar_from_colref(cr, fm, schema);
5536                 if(tablevar_ref < 0) return(tablevar_ref);
5537
5538                 cr->set_tablevar_ref(tablevar_ref);
5539                 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
5540                 cr->set_interface("");
5541                 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
5542
5543                 entry = new gb_table_entry();
5544                 entry->name.field = cr->get_field();
5545                 entry->name.tblvar_ref = tablevar_ref;
5546                 entry->definition = new scalarexp_t(cr);
5547                 entry->ref_type = GBVAR_COLREF;
5548         }else{
5549                 entry = new gb_table_entry();
5550                 entry->name.field = gb->name;
5551                 entry->name.tblvar_ref = -1;
5552                 entry->definition = gb->def;
5553                 entry->ref_type = GBVAR_SE;
5554         }
5555
5556         retval = verify_colref(entry->definition, fm, schema, NULL);
5557         if(retval < 0) return(retval);
5558
5559         retval = assign_data_types(entry->definition, schema, fta_tree, Ext_fcns);
5560         if(retval < 0) return(retval);
5561
5562 //              Verify that the gbvar def references no aggregates and no gbvars.
5563         if(count_gb_se(entry->definition) > 0){
5564                 fprintf(stderr,"ERROR, group-by variable %s references other group-by variables in its definition.\n",entry->name.field.c_str() );
5565                 return(-1);
5566         }
5567         if(count_aggr_se(entry->definition, true) > 0){
5568                 fprintf(stderr,"ERROR, group-by variable %s references aggregates in its definition.\n",entry->name.field.c_str() );
5569                 return(-1);
5570         }
5571
5572 //                      Check for duplicates
5573         int i;
5574         for(i=0;i<gtbl.size();++i){
5575                 if(entry->name.field == gtbl[i]->name.field){
5576                         fprintf(stderr,"ERROR, duplicate group-by variable name %s, positions %d and %lu.\n",entry->name.field.c_str(),i,gtbl.size());
5577                         return -1;
5578                 }
5579         }
5580
5581
5582         gtbl.push_back(entry);
5583
5584         return(1);
5585 }
5586
5587
5588 //                      Try to determine if the colref is actually
5589 //                      a gbvar ref.
5590 //                      a) if no tablename associated with the colref,
5591 //                              1) try to find a matching GB_COMPUTED gbvar.
5592 //                              2) failing that, try to match to a single tablevar
5593 //                              3) if successful, search among GB_COLREF
5594 //                      b) else, try to match the tablename to a single tablevar
5595 //                              if successful, search among GB_COLREF
5596 int gb_table::find_gb(colref_t *cr, tablevar_list_t *fm, table_list *schema){
5597         string c_field = cr->get_field();
5598         int c_tblref;
5599         int n_tbl;
5600         int i;
5601         vector<int> candidates;
5602
5603         if(cr->uses_default_table()){
5604                 for(i=0;i<gtbl.size();i++){
5605                         if(gtbl[i]->ref_type==GBVAR_SE && c_field == gtbl[i]->name.field){
5606                                 return(i);
5607                         }
5608                 }
5609                 candidates = find_source_tables(c_field, fm, schema);
5610                 if(candidates.size() != 1) return(-1); // can't find unique tablevar
5611                 for(i=0;i<gtbl.size();i++){
5612                         if(gtbl[i]->ref_type==GBVAR_COLREF &&
5613                                   c_field == gtbl[i]->name.field &&
5614                                   candidates[0] == gtbl[i]->name.tblvar_ref){
5615                                 return(i);
5616                         }
5617                 }
5618                 return(-1); // colref is not in gb table.
5619         }
5620
5621 //                      A table name must have been given.
5622         vector<tablevar_t *> fm_tbls = fm->get_table_list();
5623         string interface = cr->get_interface();
5624         string table_name = cr->get_table_name();
5625
5626
5627 //                      if no interface name is given, try to search for the table
5628 //                      name among the tablevar names first.
5629         if(interface==""){
5630                 for(i=0;i<fm_tbls.size();++i){
5631                         if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5632                                 candidates.push_back(i);
5633                 }
5634                 if(candidates.size()>1) return(-1);
5635                 if(candidates.size()==1){
5636                         for(i=0;i<gtbl.size();i++){
5637                                 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5638                                         c_field == gtbl[i]->name.field &&
5639                                         candidates[0] == gtbl[i]->name.tblvar_ref){
5640                                         return(i);
5641                                 }
5642                         }
5643                         return(-1);  // match semantics of bind to tablevar name first
5644                 }
5645         }
5646
5647 //              Interface name given, or no interface but no
5648 //              no tablevar match.  Try to match on schema name.
5649         for(i=0;i<fm_tbls.size();++i){
5650                 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5651                         candidates.push_back(i);
5652         }
5653         if(candidates.size() != 1) return(-1);
5654         for(i=0;i<gtbl.size();i++){
5655                 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5656                         c_field == gtbl[i]->name.field &&
5657                         candidates[0] == gtbl[i]->name.tblvar_ref){
5658                         return(i);
5659                 }
5660         }
5661
5662 //              No match found.
5663         return(-1);
5664
5665 }
5666
5667
5668
5669 bool aggr_table_entry::fta_legal(ext_fcn_list *Ext_fcns){
5670         if(is_builtin()){
5671                 if( (op == "COUNT") || (op == "SUM") || (op == "MIN") ||
5672                         (op == "MAX") || (op == "AND_AGGR") || (op == "OR_AGGR") ||
5673                         (op == "XOR_AGGR") )
5674                                 return(true);
5675         }else{
5676                 return Ext_fcns->fta_legal(fcn_id);
5677         }
5678         return(false);
5679 }
5680
5681
5682 //              Return the set of subaggregates required to compute
5683 //              the desired aggregate.  THe operand of the subaggregates
5684 //              can only be * or the scalarexp used in the superaggr.
5685 //              This is indicated by the use_se vector.
5686
5687 //              Is this code generation specific?
5688
5689 vector<string> aggr_table_entry::get_subaggr_fcns(vector<bool> &use_se){
5690         vector<string> ret;
5691
5692         if(op == "COUNT"){
5693                 ret.push_back("COUNT");
5694                 use_se.push_back(false);
5695         }
5696         if(op == "SUM"){
5697                 ret.push_back("SUM");
5698                 use_se.push_back(true);
5699         }
5700         if(op == "AVG"){
5701                 ret.push_back("SUM");
5702                 ret.push_back("COUNT");
5703                 use_se.push_back(true);
5704                 use_se.push_back(false);
5705         }
5706         if(op == "MIN"){
5707                 ret.push_back("MIN");
5708                 use_se.push_back(true);
5709         }
5710         if(op == "MAX"){
5711                 ret.push_back("MAX");
5712                 use_se.push_back(true);
5713         }
5714         if(op == "AND_AGGR"){
5715                 ret.push_back("AND_AGGR");
5716                 use_se.push_back(true);
5717         }
5718         if(op == "OR_AGGR"){
5719                 ret.push_back("OR_AGGR");
5720                 use_se.push_back(true);
5721         }
5722         if(op == "XOR_AGGR"){
5723                 ret.push_back("XOR_AGGR");
5724                 use_se.push_back(true);
5725         }
5726
5727         return(ret);
5728 }
5729
5730 //                      Code generation specific?
5731
5732 vector<data_type *> aggr_table_entry::get_subaggr_dt(){
5733         vector<data_type *> ret;
5734         data_type *dt;
5735
5736         if(op == "COUNT"){
5737                 dt = new data_type("Int"); // was Uint
5738                 ret.push_back( dt );
5739         }
5740         if(op == "SUM"){
5741                 dt = new data_type();
5742                 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5743                 ret.push_back(dt);
5744         }
5745         if(op == "AVG"){
5746                 dt = new data_type();
5747                 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5748                 ret.push_back( dt );
5749                 dt = new data_type("Int");
5750                 ret.push_back( dt );
5751         }
5752         if(op == "MIN"){
5753                 dt = new data_type();
5754                 dt->set_aggr_data_type( "MIN",operand->get_data_type() );
5755                 ret.push_back( dt );
5756         }
5757         if(op == "MAX"){
5758                 dt = new data_type();
5759                 dt->set_aggr_data_type( "MAX",operand->get_data_type() );
5760                 ret.push_back( dt );
5761         }
5762         if(op == "AND_AGGR"){
5763                 dt = new data_type();
5764                 dt->set_aggr_data_type( "AND_AGGR",operand->get_data_type() );
5765                 ret.push_back( dt );
5766         }
5767         if(op == "OR_AGGR"){
5768                 dt = new data_type();
5769                 dt->set_aggr_data_type( "OR_AGGR",operand->get_data_type() );
5770                 ret.push_back( dt );
5771         }
5772         if(op == "XOR_AGGR"){
5773                 dt = new data_type();
5774                 dt->set_aggr_data_type( "XOR_AGGR",operand->get_data_type() );
5775                 ret.push_back( dt );
5776         }
5777
5778         return(ret);
5779 }
5780
5781 //              Code generation specific?
5782
5783 scalarexp_t *aggr_table_entry::make_superaggr_se(vector<scalarexp_t *> se_refs){
5784         scalarexp_t *se_l, *se_r, *ret_se = NULL;
5785
5786         if(op == "COUNT"){
5787                 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5788                 return(ret_se);
5789         }
5790         if(op == "SUM"){
5791                 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5792                 return(ret_se);
5793         }
5794         if(op == "AVG"){
5795                 se_l = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5796                 se_r = scalarexp_t::make_se_aggr("SUM", se_refs[1]);
5797
5798                 ret_se = new scalarexp_t("/", se_l, se_r);
5799                 return(ret_se);
5800         }
5801         if(op == "MIN"){
5802                 ret_se = scalarexp_t::make_se_aggr("MIN", se_refs[0]);
5803                 return(ret_se);
5804         }
5805         if(op == "MAX"){
5806                 ret_se = scalarexp_t::make_se_aggr("MAX", se_refs[0]);
5807                 return(ret_se);
5808         }
5809         if(op == "AND_AGGR"){
5810                 ret_se = scalarexp_t::make_se_aggr("AND_AGGR", se_refs[0]);
5811                 return(ret_se);
5812         }
5813         if(op == "OR_AGGR"){
5814                 ret_se = scalarexp_t::make_se_aggr("OR_AGGR", se_refs[0]);
5815                 return(ret_se);
5816         }
5817         if(op == "XOR_AGGR"){
5818                 ret_se = scalarexp_t::make_se_aggr("XOR_AGGR", se_refs[0]);
5819                 return(ret_se);
5820         }
5821
5822         return(ret_se);
5823
5824 }
5825
5826
5827 //              Add a built-in aggr.
5828 int aggregate_table::add_aggr(string op, scalarexp_t *se, bool is_super){
5829         int i;
5830
5831         for(i=0;i<agr_tbl.size();i++){
5832                 if(agr_tbl[i]->is_builtin() && op == agr_tbl[i]->op
5833                   && is_equivalent_se(se,agr_tbl[i]->operand) ){
5834 //                && is_super == agr_tbl[i]->is_superaggr())
5835                         if(is_super) agr_tbl[i]->set_super(true);
5836                         return(i);
5837                 }
5838         }
5839
5840         aggr_table_entry *ate = new aggr_table_entry(op, se, is_super);
5841         agr_tbl.push_back(ate);
5842         return(agr_tbl.size() - 1);
5843 }
5844
5845 //              add a UDAF
5846 int aggregate_table::add_aggr(string op, int fcn_id, vector<scalarexp_t *> opl, data_type *sdt, bool is_super, bool is_running, bool has_lfta_bailout){
5847         int i,o;
5848
5849         for(i=0;i<agr_tbl.size();i++){
5850                 if((! agr_tbl[i]->is_builtin()) && fcn_id == agr_tbl[i]->fcn_id
5851                                 && opl.size() == agr_tbl[i]->oplist.size() ){
5852 //                              && is_super == agr_tbl[i]->is_superaggr() ){
5853                         for(o=0;o<opl.size();++o){
5854                                 if(! is_equivalent_se(opl[o],agr_tbl[i]->oplist[o]) )
5855                                         break;
5856                         }
5857                         if(o == opl.size()){
5858                                 if(is_super) agr_tbl[i]->set_super(true);
5859                                 return i;
5860                         }
5861                 }
5862         }
5863
5864         aggr_table_entry *ate = new aggr_table_entry(op, fcn_id, opl, sdt,is_super,is_running, has_lfta_bailout);
5865         agr_tbl.push_back(ate);
5866         return(agr_tbl.size() - 1);
5867 }
5868
5869
5870 int cplx_lit_table::add_cpx_lit(literal_t *l, bool is_handle_ref){
5871         int i;
5872
5873         for(i=0;i<cplx_lit_tbl.size();i++){
5874                 if(l->is_equivalent(cplx_lit_tbl[i])){
5875                         hdl_ref_tbl[i] = hdl_ref_tbl[i] | is_handle_ref;
5876                         return(i);
5877                 }
5878         }
5879
5880         cplx_lit_tbl.push_back(l);
5881         hdl_ref_tbl.push_back(is_handle_ref);
5882         return(cplx_lit_tbl.size() - 1);
5883 }
5884
5885
5886
5887 //------------------------------------------------------------
5888 //              parse_fta code
5889
5890
5891 gb_t *gb_t::duplicate(){
5892         gb_t *ret = new gb_t(interface.c_str(), table.c_str(), name.c_str());
5893         ret->type = type;
5894         ret->lineno = lineno;
5895         ret->charno = charno;
5896         if(def != NULL)
5897                 ret->def = dup_se(def,NULL);
5898         return ret;
5899 }