Add new udafs and RMR support to gsprintconsole_ves
[com/gs-lite.git] / src / ftacmp / analyze_fta.cc
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 #include<unistd.h>
17
18 #include "parse_fta.h"
19 #include "parse_schema.h"
20 #include "parse_ext_fcns.h"
21
22
23 #include"analyze_fta.h"
24
25 #include"type_objects.h"
26
27 #include <string>
28 #include<list>
29
30 using namespace std;
31
32 extern string hostname;                 // name of the current host
33
34 //                      Utility function
35
36 string int_to_string(int i){
37     string ret;
38     char tmpstr[100];
39     sprintf(tmpstr,"%d",i);
40     ret=tmpstr;
41     return(ret);
42 }
43
44
45 //                              Globals
46
47 //                      These represent derived information from the
48 //                      query analysis stage.  I extract them from a class,
49 //                      perhaps this is dangerous.
50
51 static gb_table *gb_tbl=NULL;                   // Table of all group-by attributes.
52 static aggregate_table *aggr_tbl=NULL;  // Table of all referenced aggregates.
53
54 // static cplx_lit_table *complex_literals=NULL;        // Table of literals with constructors.
55 static param_table *param_tbl=NULL;             // Table of all referenced parameters.
56
57 vector<scalarexp_t *> partial_fcns_list;
58 int wh_partial_start, wh_partial_end;
59 int gb_partial_start, gb_partial_end;
60 int aggr_partial_start, aggr_partial_end;
61 int sl_partial_start, sl_partial_end;
62
63
64 //                      Infer the table of a column refrence and return the table ref.
65 //                      First, extract the
66 //                      field name and table name.  If no table name is used,
67 //                      search all tables to try to find a unique match.
68 //                      Of course, plenty of error checking.
69
70 //              Return the set of tablevar indices in the FROM clause
71 //              which contain a field with the same name.
72 vector<int> find_source_tables(string field, tablevar_list_t *fm, table_list *Schema){
73         int i;
74         vector<int> tv;
75 //      vector<string> tn = fm->get_schema_names();
76         vector<int> tn = fm->get_schema_refs();
77 // printf("Calling find_source_tables on field %s\n",field.c_str());
78         for(i=0;i<tn.size();i++){
79 //              if(Schema->contains_field(Schema->find_tbl(tn[i]), field) ){
80                 if(Schema->contains_field(tn[i], field) ){
81                         tv.push_back(i);
82 // printf("\tfound in table %s\n",tn[i].c_str());
83                 }
84         }
85         return(tv);
86 }
87
88 int infer_tablevar_from_ifpref(ifpref_t *ir, tablevar_list_t *fm){
89         int i;
90         string tname = ir->get_tablevar();
91         if(tname ==""){
92                 if(fm->size()==1) return 0;
93                 fprintf(stderr,"ERROR, interface parameter %s has no tablevar specified and there is more than one table variable in the FROM clause.\n",ir->to_string().c_str());
94                 return -1;
95         }
96         for(i=0;i<fm->size();++i){
97                 if(tname == fm->get_tablevar_name(i))
98                         return i;
99         }
100         fprintf(stderr,"ERROR, interface parameter %s has no matching table variable in the FROM clause.\n",ir->to_string().c_str());
101         return -1;
102 }
103
104
105 //              compute the index of the tablevar in the from clause that the
106 //              colref is in.
107 //              return -1 if no tablevar can be imputed.
108 int infer_tablevar_from_colref(colref_t *cr, tablevar_list_t *fm, table_list *schema){
109         int i;
110         string table_name;
111         int table_ref;
112         vector<int> tv;
113         vector<tablevar_t *> fm_tbls = fm->get_table_list();
114
115         string field = cr->get_field();
116
117 // printf("Calling infer_tablevar_from_colref on field %s.\n",field.c_str());
118         if(cr->uses_default_table() ){
119                 tv = find_source_tables(field, fm, schema);
120                 if(tv.size() > 1){
121                         fprintf(stderr,"ERROR, line %d, character %d : field %s exists in multiple table variables: ",
122                                 cr->get_lineno(), cr->get_charno(),field.c_str() );
123                         for(i=0;i<tv.size();i++){
124                                 fprintf(stderr,"%s ",fm_tbls[ tv[i] ]->to_string().c_str() );
125                         }
126                         fprintf(stderr,"\n\tYou must specify one of these.\n");
127                         return(-1);
128                 }
129                 if(tv.size() == 0){
130                         fprintf(stderr,"ERROR, line %d, character %d: field %s does not exist in any table.\n",
131                                 cr->get_lineno(), cr->get_charno(),field.c_str() );
132                         return(-1);
133                 }
134
135                 return(tv[0]);
136         }
137
138 //                      The table source is named -- but is it a schema name
139 //                      or a var name?
140
141         string interface = cr->get_interface();
142         table_name = cr->get_table_name();
143
144 //              if interface is not specified, prefer to look at the tablevar names
145 //              Check for duplicates.
146         if(interface==""){
147                 for(i=0;i<fm_tbls.size();++i){
148                         if(table_name == fm_tbls[i]->get_var_name())
149                                 tv.push_back(i);
150                 }
151                 if(tv.size() > 1){
152                         fprintf(stderr,"ERROR, there are two or more table variables for column ref %s.%s (line %d, char %d).\n",table_name.c_str(), field.c_str(), cr->get_lineno(), cr->get_charno() );
153                         return(-1);
154                 }
155                 if(tv.size() == 1) return(tv[0]);
156         }
157
158 //              Tableref not found by looking at tableref vars, or an interface
159 //              was specified.  Try to match on schema and interface.
160 //              Check for duplicates.
161         for(i=0;i<fm_tbls.size();++i){
162                 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
163                         tv.push_back(i);
164         }
165         if(tv.size() > 1){
166                 fprintf(stderr,"ERROR, (line %d, char %d) there are two or more table variables whose schemas match for column ref \n",
167                         cr->get_lineno(), cr->get_charno() );
168                 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
169                 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
170                 return(-1);
171         }
172
173         if(tv.size() == 0 ){
174                 fprintf(stderr,"ERROR, line %d, character %d : no table reference found for column ref ", cr->get_lineno(), cr->get_charno());
175                 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
176                 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
177                 return(-1)      ;
178         }
179
180         return(tv[0]);
181 }
182
183
184 //                      Reset temporal properties of a scalar expression
185 void reset_temporal(scalarexp_t *se){
186         col_id ci;
187         vector<scalarexp_t *> operands;
188         int o;
189
190         se->get_data_type()->reset_temporal();
191
192         switch(se->get_operator_type()){
193         case SE_LITERAL:
194         case SE_PARAM:
195         case SE_IFACE_PARAM:
196         case SE_COLREF:
197                 return;
198         case SE_UNARY_OP:
199                 reset_temporal(se->get_left_se());
200                 return;
201         case SE_BINARY_OP:
202                 reset_temporal(se->get_left_se());
203                 reset_temporal(se->get_right_se());
204                 return;
205         case SE_AGGR_STAR:
206                 return;
207         case SE_AGGR_SE:
208                 reset_temporal(se->get_left_se());
209                 return;
210         case SE_FUNC:
211                 operands = se->get_operands();
212                 for(o=0;o<operands.size();o++){
213                         reset_temporal(operands[o]);
214                 }
215                 return;
216         default:
217                 fprintf(stderr,"INTERNAL ERROR in reset_temporal, line %d, character %d: unknown operator type %d\n",
218                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
219                 exit(1);
220         }
221 }
222
223 //              Verify that column references exist in their
224 //              declared tables.  As a side effect, assign
225 //              their data types.  Other side effects :
226 //
227 //              return -1 on error
228
229 int verify_colref(scalarexp_t *se, tablevar_list_t *fm,
230                                         table_list *schema, gb_table *gtbl){
231         int l_ret, r_ret;
232         int gb_ref;
233         colref_t *cr;
234         ifpref_t *ir;
235         string field, table_source, type_name;
236         data_type *dt;
237         vector<string> tn;
238         vector<int> tv;
239         int table_var;
240         int o;
241         vector<scalarexp_t *> operands;
242
243         switch(se->get_operator_type()){
244         case SE_LITERAL:
245         case SE_PARAM:
246                 return(1);
247         case SE_IFACE_PARAM:
248                 ir = se->get_ifpref();
249                 table_var = infer_tablevar_from_ifpref(ir, fm);
250                 if(table_var < 0) return(table_var);
251                 ir->set_tablevar_ref(table_var);
252                 return(1);
253         case SE_UNARY_OP:
254                 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
255         case SE_BINARY_OP:
256                 l_ret = verify_colref(se->get_left_se(), fm, schema, gtbl);
257                 r_ret = verify_colref(se->get_right_se(), fm, schema, gtbl);
258                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
259                 return(1);
260         case SE_COLREF:
261                 cr = se->get_colref();
262                 field = cr->get_field();
263
264 //                              Determine if this is really a GB ref.
265 //                              (the parser can only see that its a colref).
266                 if(gtbl != NULL){
267                         gb_ref = gtbl->find_gb(cr, fm, schema);
268                 }else{
269                         gb_ref = -1;
270                 }
271
272                 se->set_gb_ref(gb_ref);
273
274                 if(gb_ref < 0){
275 //                              Its a colref, verify its existance and
276 //                              record the data type.
277                         table_var = infer_tablevar_from_colref(cr,fm,schema);
278                         if(table_var < 0) return(table_var);
279
280         //                      Store the table ref in the colref.
281                         cr->set_tablevar_ref(table_var);
282                         cr->set_schema_ref(fm->get_schema_ref(table_var));
283                         cr->set_interface("");
284                         cr->set_table_name(fm->get_tablevar_name(table_var));
285
286                         if(! schema->contains_field(cr->get_schema_ref(), field)){
287                                 fprintf(stderr, "Error, field %s is not in stream %s\n", field.c_str(), schema->get_table_name( cr->get_schema_ref() ).c_str());
288                                 return -1;
289                         }
290
291                         type_name = schema->get_type_name(cr->get_schema_ref(), field);
292                         param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
293                         dt = new data_type(type_name, modifiers);
294                         se->set_data_type(dt);
295                 }else{
296 //                              Else, its a gbref, use the GB var's data type.
297                         se->set_data_type(gtbl->get_data_type(gb_ref));
298                 }
299
300                 return(1);
301         case SE_AGGR_STAR:
302                 return(1);
303         case SE_AGGR_SE:
304                 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
305         case SE_FUNC:
306                 operands = se->get_operands();
307                 r_ret = 1;
308                 for(o=0;o<operands.size();o++){
309                         l_ret = verify_colref(operands[o], fm, schema, gtbl);
310                         if(l_ret < 0) r_ret = -1;
311                 }
312                 return(r_ret);
313         default:
314                 fprintf(stderr,"INTERNAL ERROR in verify_colref, line %d, character %d: unknown operator type %d\n",
315                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
316                 return(-1);
317         }
318         return(-1);
319 }
320
321
322 int verify_predicate_colref(predicate_t *pr, tablevar_list_t *fm, table_list *schema, gb_table *gtbl){
323         int l_ret, r_ret;
324         std::vector<scalarexp_t *> op_list;
325         int o;
326
327         switch(pr->get_operator_type()){
328         case PRED_IN:
329                 return(verify_colref(pr->get_left_se(),fm,schema, gtbl) );
330         case PRED_COMPARE:
331                 l_ret = verify_colref(pr->get_left_se(),fm,schema, gtbl) ;
332                 r_ret = verify_colref(pr->get_right_se(),fm,schema, gtbl) ;
333                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
334                 return(1);
335         case PRED_UNARY_OP:
336                 return(verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl));
337         case PRED_BINARY_OP:
338                 l_ret = verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl) ;
339                 r_ret = verify_predicate_colref(pr->get_right_pr(),fm,schema, gtbl) ;
340                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
341                 return(1);
342         case PRED_FUNC:
343                 op_list = pr->get_op_list();
344                 l_ret = 0;
345                 for(o=0;o<op_list.size();++o){
346                         if(verify_colref(op_list[o],fm,schema,gtbl) < 0) l_ret = -1;
347                 }
348                 return(l_ret);
349         default:
350                 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
351                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
352         }
353
354         return(-1);
355 }
356
357
358 bool literal_only_se(scalarexp_t *se){          // really only literals.
359         int o;
360         vector<scalarexp_t *> operands;
361
362         if(se == NULL) return(1);
363         switch(se->get_operator_type()){
364         case SE_LITERAL:
365                 return(true);
366         case SE_PARAM:
367                 return(false);
368         case SE_IFACE_PARAM:
369                 return(false);
370         case SE_UNARY_OP:
371                 return( literal_only_se(se->get_left_se()) );
372         case SE_BINARY_OP:
373                 return( literal_only_se(se->get_left_se()) &&
374                                 literal_only_se(se->get_right_se()) );
375         case SE_COLREF:
376                 return false;
377         case SE_AGGR_STAR:
378                 return false;
379         case SE_AGGR_SE:
380                 return false;
381                 return(1);
382         case SE_FUNC:
383                 return false;
384         default:
385                 return false;
386         }
387         return false;
388 }
389
390
391
392
393 //              Verify that column references exist in their
394 //              declared tables.  As a side effect, assign
395 //              their data types.  Other side effects :
396 //
397
398 int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema){
399         int l_ret, r_ret;
400         int gb_ref;
401         colref_t *cr;
402         string field, table_source, type_name;
403         data_type *dt;
404         vector<string> tn;
405         vector<int> tv;
406         int tablevar_ref;
407         int o;
408         vector<scalarexp_t *> operands;
409
410         if(se == NULL) return(1);
411
412         switch(se->get_operator_type()){
413         case SE_LITERAL:
414                 return(1);
415         case SE_PARAM:
416                 return(1);
417         case SE_IFACE_PARAM:
418                 return(1);
419         case SE_UNARY_OP:
420                 return( bind_to_schema_se(se->get_left_se(), fm, schema) );
421         case SE_BINARY_OP:
422                 l_ret = bind_to_schema_se(se->get_left_se(), fm, schema);
423                 r_ret = bind_to_schema_se(se->get_right_se(), fm, schema);
424                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
425                 return(1);
426         case SE_COLREF:
427                 if(se->is_gb()) return(1);      // gb ref not a colref.
428
429                 cr = se->get_colref();
430                 field = cr->get_field();
431
432                 tablevar_ref = infer_tablevar_from_colref(cr,fm,schema);
433                 if(tablevar_ref < 0){
434                         return(tablevar_ref);
435                 }else{
436         //                      Store the table ref in the colref.
437                         cr->set_tablevar_ref(tablevar_ref);
438                         cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
439                         cr->set_interface("");
440                         cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
441
442 //                              Check the data type
443                         type_name = schema->get_type_name(cr->get_schema_ref(), field);
444                         param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
445                         data_type dt(type_name, modifiers);
446 //                      if(! dt.equals(se->get_data_type()) ){
447 //                      if(! dt.subsumes_type(se->get_data_type()) ){
448                         if(! se->get_data_type()->subsumes_type(&dt) ){
449                                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se: se's type is %d, table's is %d, colref is %s.\n",
450                                         dt.type_indicator(), se->get_data_type()->type_indicator(), cr->to_string().c_str());
451                                 return(-1);
452                         }
453                 }
454                 return(1);
455         case SE_AGGR_STAR:
456                 return(1);
457         case SE_AGGR_SE:        // Probably I should just return,
458                                                 // aggregate se's are explicitly bound to the schema.
459 //                      return( bind_to_schema_se(se->get_left_se(), fm, schema, gtbl) );
460                 return(1);
461         case SE_FUNC:
462                 if(se->get_aggr_ref() >= 0) return 1;
463
464                 operands = se->get_operands();
465                 r_ret = 1;
466                 for(o=0;o<operands.size();o++){
467                         l_ret = bind_to_schema_se(operands[o], fm, schema);
468                         if(l_ret < 0) r_ret = -1;
469                 }
470                 return(r_ret);
471         default:
472                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se, line %d, character %d: unknown operator type %d\n",
473                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
474                 return(-1);
475         }
476         return(-1);
477 }
478
479
480 int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema){
481         int l_ret, r_ret;
482         vector<scalarexp_t *> op_list;
483         int o;
484
485         switch(pr->get_operator_type()){
486         case PRED_IN:
487                 return(bind_to_schema_se(pr->get_left_se(),fm,schema) );
488         case PRED_COMPARE:
489                 l_ret = bind_to_schema_se(pr->get_left_se(),fm,schema) ;
490                 r_ret = bind_to_schema_se(pr->get_right_se(),fm,schema) ;
491                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
492                 return(1);
493         case PRED_UNARY_OP:
494                 return(bind_to_schema_pr(pr->get_left_pr(),fm,schema));
495         case PRED_BINARY_OP:
496                 l_ret = bind_to_schema_pr(pr->get_left_pr(),fm,schema) ;
497                 r_ret = bind_to_schema_pr(pr->get_right_pr(),fm,schema) ;
498                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
499                 return(1);
500         case PRED_FUNC:
501                 op_list = pr->get_op_list();
502                 l_ret = 0;
503                 for(o=0;o<op_list.size();++o){
504                         if(bind_to_schema_se(op_list[o],fm,schema) < 0) l_ret = -1;
505                 }
506                 return(l_ret);
507         default:
508                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_pr, line %d, character %d, unknown predicate operator type %d\n",
509                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
510         }
511
512         return(-1);
513 }
514
515
516
517
518
519
520 //                      verify_colref assigned data types to the column refs.
521 //                      Now assign data types to all other nodes in the
522 //                      scalar expression.
523 //
524 //                      return -1 on error
525
526 temporal_type compute_se_temporal(scalarexp_t *se, map<col_id, temporal_type> &tcol){
527         int l_ret, r_ret;
528         data_type *dt;
529         bool bret;
530         vector<scalarexp_t *> operands;
531         vector<data_type *> odt;
532         int o, fcn_id;
533         vector<bool> handle_ind;
534
535         switch(se->get_operator_type()){
536         case SE_LITERAL:
537                 return(constant_t);
538         case SE_PARAM:
539                 return(varying_t);
540         case SE_IFACE_PARAM:
541                 return(varying_t);              // actually, this should not be called.
542         case SE_UNARY_OP:
543                 return data_type::compute_temporal_type(
544                         compute_se_temporal(se->get_left_se(), tcol), se->get_op()
545                 );
546         case SE_BINARY_OP:
547                 return data_type::compute_temporal_type(
548                         compute_se_temporal(se->get_left_se(), tcol),
549                         compute_se_temporal(se->get_right_se(), tcol),
550                         se->get_left_se()->get_data_type()->get_type(),
551                         se->get_right_se()->get_data_type()->get_type(),
552                         se->get_op()
553                 );
554         case SE_COLREF:
555                 {
556                         col_id cid(se->get_colref() );
557                         if(tcol.count(cid) > 0){ return tcol[cid];
558                         }else{ return varying_t;}
559                 }
560         case SE_AGGR_STAR:
561         case SE_AGGR_SE:
562         case SE_FUNC:
563         default:
564                 return varying_t;
565         }
566         return(varying_t);
567 }
568
569
570
571 //                      verify_colref assigned data types to the column refs.
572 //                      Now assign data types to all other nodes in the
573 //                      scalar expression.
574 //
575 //                      return -1 on error
576
577 int assign_data_types(scalarexp_t *se, table_list *schema,
578                                                 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
579         int l_ret, r_ret;
580         data_type *dt;
581         bool bret;
582         vector<scalarexp_t *> operands;
583         vector<data_type *> odt;
584         int o, fcn_id;
585         vector<bool> handle_ind;
586         vector<bool> constant_ind;
587
588         switch(se->get_operator_type()){
589         case SE_LITERAL:
590                 dt = new data_type( se->get_literal()->get_type() );
591                 se->set_data_type(dt);
592                 if( ! dt->is_defined() ){
593                         fprintf(stderr,"ERROR, Literal type is undefined, line =%d, char = %d, literal=%s\n",
594                                 se->get_literal()->get_lineno(),se->get_literal()->get_charno(), se->get_literal()->to_string().c_str() );
595                         return(-1);
596                 }else{
597                         return(1);
598                 }
599         case SE_PARAM:
600                 {
601                         string pname = se->get_param_name();
602                         dt = param_tbl->get_data_type(pname);
603 // A SE_PARRAM can change its value mid-query so using one
604 // to set a window is dangerous.  TODO check for this and issue a warning.
605                         dt->set_temporal(constant_t);
606                         se->set_data_type(dt);
607                         if( ! dt->is_defined() ){
608                                 fprintf(stderr,"ERROR, parameter %s has undefined type, line =%d, char = %d\n",
609                                         pname.c_str(), se->get_lineno(),se->get_charno() );
610                                 return(-1);
611                         }
612                         return(1);
613                 }
614         case SE_IFACE_PARAM:
615                 dt = new data_type( "STRING" );
616                 se->set_data_type(dt);
617                 return(1);
618         case SE_UNARY_OP:
619                 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
620                 if(l_ret < 0) return -1;
621
622                 dt = new data_type(se->get_left_se()->get_data_type(),se->get_op() );
623                 se->set_data_type(dt);
624                 if( ! dt->is_defined() ){
625                         fprintf(stderr,"ERROR, unary operator %s not defined for type %s, line=%d, char = %d\n",
626                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
627                                 se->get_lineno(), se->get_charno() );
628                         return(-1);
629                 }else{
630                         return(1);
631                 }
632         case SE_BINARY_OP:
633                 l_ret = assign_data_types(se->get_left_se(),  schema, fta_tree, Ext_fcns);
634                 r_ret = assign_data_types(se->get_right_se(),  schema, fta_tree, Ext_fcns);
635                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
636
637                 dt = new data_type(se->get_left_se()->get_data_type(),se->get_right_se()->get_data_type(),se->get_op() );
638                 se->set_data_type(dt);
639                 if( ! dt->is_defined() ){
640                         fprintf(stderr,"ERROR, Binary operator %s not defined for type %s, %s line=%d, char = %d\n",
641                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
642                                 se->get_right_se()->get_data_type()->to_string().c_str(),
643                                 se->get_lineno(), se->get_charno() );
644                         return(-1);
645                 }else{
646                         return(1);
647                 }
648         case SE_COLREF:
649                 dt = se->get_data_type();
650                 bret = dt->is_defined();
651                 if( bret ){
652                         return(1);
653                 }else{
654                         fprintf(stderr,"ERROR, column reference type  is undefined, line =%d, char = %d, colref=%s\n",
655                                 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
656                         return(-1);
657                 }
658         case SE_AGGR_STAR:
659                 dt = new data_type("Int");      // changed Uint to Int
660                 se->set_data_type(dt);
661                 return(1);
662         case SE_AGGR_SE:
663                 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
664                 if(l_ret < 0) return -1;
665
666                 dt = new data_type();
667                 dt->set_aggr_data_type(se->get_op(), se->get_left_se()->get_data_type());
668                 se->set_data_type(dt);
669
670                 if( ! dt->is_defined() ){
671                         fprintf(stderr,"ERROR, aggregate %s not defined for type %s, line=%d, char = %d\n",
672                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
673                                 se->get_lineno(), se->get_charno() );
674                         return(-1);
675                 }else{
676                         return(1);
677                 }
678         case SE_FUNC:
679
680                 operands = se->get_operands();
681                 r_ret = 1;
682                 for(o=0;o<operands.size();o++){
683                         l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns);
684                         odt.push_back(operands[o]->get_data_type());
685                         if(l_ret < 0) r_ret = -1;
686                 }
687                 if(r_ret < 0) return(r_ret);
688
689 //                      Is it an aggregate extraction function?
690                 fcn_id = Ext_fcns->lookup_extr(se->get_op(), odt);
691                 if(fcn_id >= 0){
692                         int actual_fcn_id = Ext_fcns->get_actual_fcn_id(fcn_id);
693                         int subaggr_id = Ext_fcns->get_subaggr_id(fcn_id);
694                         int n_fcn_params = Ext_fcns->get_nparams(actual_fcn_id);
695 //                              Construct a se for the subaggregate.
696                         vector<scalarexp_t *> op_a;
697                         int n_aggr_oprs = operands.size()-n_fcn_params+1;
698                         for(o=0;o<n_aggr_oprs;++o){
699                                         op_a.push_back(operands[o]);
700                         }
701 //                              check handle params
702                         vector<bool> handle_a = Ext_fcns->get_handle_indicators(subaggr_id);
703                         for(o=0;o<op_a.size();o++){
704                         if(handle_a[o]){
705                                 if(op_a[o]->get_operator_type() != SE_LITERAL &&
706                                                 op_a[o]->get_operator_type() != SE_IFACE_PARAM &&
707                                                 op_a[o]->get_operator_type() != SE_PARAM){
708                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
709                                 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
710                                                 return(-1);
711                                         }
712                                 }
713                         }
714                         vector<bool> is_const_a=Ext_fcns->get_const_indicators(subaggr_id);
715                         for(o=0;o<op_a.size();o++){
716                         if(is_const_a[o]){
717                                 if(op_a[o]->get_data_type()->get_temporal() != constant_t){
718                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be constant.\n  Line=%d, char=%d.\n",
719                                 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
720                                                 return(-1);
721                                         }
722                                 }
723                         }
724
725                         scalarexp_t *se_a  = new scalarexp_t(Ext_fcns->get_fcn_name(subaggr_id).c_str(), op_a);
726                         se_a->set_fcn_id(subaggr_id);
727                         se_a->set_data_type(Ext_fcns->get_fcn_dt(subaggr_id));
728                         se_a->set_aggr_id(0);           // label this as a UDAF.
729
730
731 //                              Change this se to be the actual function
732                         vector<scalarexp_t *> op_f;
733                         op_f.push_back(se_a);
734                         for(o=n_aggr_oprs;o<operands.size();++o)
735                                 op_f.push_back(operands[o]);
736 //                              check handle params
737                         vector<bool> handle_f = Ext_fcns->get_handle_indicators(actual_fcn_id);
738                         for(o=0;o<op_f.size();o++){
739                         if(handle_f[o]){
740                                 if(op_f[o]->get_operator_type() != SE_LITERAL &&
741                                                 op_f[o]->get_operator_type() != SE_IFACE_PARAM &&
742                                                 op_f[o]->get_operator_type() != SE_PARAM){
743                                                 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
744                                 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
745                                                 return(-1);
746                                         }
747                                 }
748                         }
749                         vector<bool> is_const_f=Ext_fcns->get_const_indicators(actual_fcn_id);
750                         for(o=0;o<op_f.size();o++){
751                         if(is_const_f[o]){
752                                 if(op_f[o]->get_data_type()->get_temporal() != constant_t){
753                                                 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be constant.\n  Line=%d, char=%d.\n",
754                                 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
755                                                 return(-1);
756                                         }
757                                 }
758                         }
759
760                         se->param_list = op_f;
761                         se->op = Ext_fcns->get_fcn_name(actual_fcn_id);
762                         se->set_fcn_id(actual_fcn_id);
763                         se->set_data_type(Ext_fcns->get_fcn_dt(actual_fcn_id));
764                         return(1);
765                 }
766                 if(fcn_id == -2){
767                         fprintf(stderr,"Warning: multiple subsuming aggregate extractors found for %s\n",se->get_op().c_str());
768                 }
769
770 //                      Is it a UDAF?
771                 fcn_id = Ext_fcns->lookup_udaf(se->get_op(), odt);
772                 if(fcn_id >= 0){
773                         se->set_fcn_id(fcn_id);
774                         se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
775                         se->set_aggr_id(0);             // label this as a UDAF.
776 //                      Finally, verify that all HANDLE parameters are literals or params.
777                         handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
778                         for(o=0;o<operands.size();o++){
779                                 if(handle_ind[o]){
780                                         if(operands[o]->get_operator_type() != SE_LITERAL &&
781                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
782                                                 operands[o]->get_operator_type() != SE_PARAM){
783                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
784                                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
785                                                 return(-1);
786                                         }
787                                 }
788                         }
789                         constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
790                         for(o=0;o<operands.size();o++){
791                         if(constant_ind[o]){
792                                 if(operands[o]->get_data_type()->get_temporal() != constant_t){
793                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s  must be constant.\n  Line=%d, char=%d.\n",
794                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
795                                                 return(-1);
796                                         }
797                                 }
798                         }
799
800 //      UDAFS as superaggregates not yet supported.
801 if(se->is_superaggr()){
802 fprintf(stderr,"WARNING: UDAF superagggregates (%s) are not yet supported, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
803 se->set_superaggr(false);
804 }
805                         return(1);
806                 }
807                 if(fcn_id == -2){
808                         fprintf(stderr,"Warning: multiple subsuming UDAFs found for %s\n",se->get_op().c_str());
809                 }
810
811 //                      Is it a stateful fcn?
812                 fcn_id = Ext_fcns->lookup_sfun(se->get_op(), odt);
813                 if(fcn_id >= 0){
814                         se->set_fcn_id(fcn_id);
815                         se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
816                         se->set_storage_state(Ext_fcns->get_storage_state(fcn_id)); // label as sfun
817 //                      Finally, verify that all HANDLE parameters are literals or params.
818                         handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
819                         for(o=0;o<operands.size();o++){
820                                 if(handle_ind[o]){
821                                         if(operands[o]->get_operator_type() != SE_LITERAL &&
822                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
823                                                 operands[o]->get_operator_type() != SE_PARAM){
824                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
825                                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
826                                                 return(-1);
827                                         }
828                                 }
829                         }
830                         constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
831                         for(o=0;o<operands.size();o++){
832                         if(constant_ind[o]){
833                                 if(operands[o]->get_data_type()->get_temporal() != constant_t){
834                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s  must be constant.\n  Line=%d, char=%d.\n",
835                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
836                                                 return(-1);
837                                         }
838                                 }
839                         }
840
841                         if(se->is_superaggr()){
842                                 fprintf(stderr,"WARNING: stateful function %s cannot be marked as a superaggregate, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
843                         }
844                         return(1);
845                 }
846                 if(fcn_id == -2){
847                         fprintf(stderr,"Warning: multiple stateful fcns found for %s\n",se->get_op().c_str());
848                 }
849
850
851 //                      Is it a regular function?
852                 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), odt);
853                 if( fcn_id < 0 ){
854                         fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
855                         for(o=0;o<operands.size();o++){
856                                 if(o>0) fprintf(stderr,", ");
857                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
858                         }
859                         fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
860                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
861
862                         return(-1);
863                 }
864
865                 se->set_fcn_id(fcn_id);
866                 dt = Ext_fcns->get_fcn_dt(fcn_id);
867
868                 if(! dt->is_defined() ){
869                         fprintf(stderr,"ERROR, external function %s(",se->get_op().c_str());
870                         for(o=0;o<operands.size();o++){
871                                 if(o>0) fprintf(stderr,", ");
872                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
873                         }
874                         fprintf(stderr,") has undefined type, line %d, char %d\n", se->get_lineno(), se->get_charno() );
875                         return(-1);
876                 }
877
878 //                      Finally, verify that all HANDLE parameters are literals or params.
879                 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
880                 for(o=0;o<operands.size();o++){
881                         if(handle_ind[o]){
882                                 if(operands[o]->get_operator_type() != SE_LITERAL &&
883                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
884                                                 operands[o]->get_operator_type() != SE_PARAM){
885                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
886                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
887                                         return(-1);
888                                 }
889                         }
890                 }
891                 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
892                 for(o=0;o<operands.size();o++){
893                 if(constant_ind[o]){
894                         if(operands[o]->get_data_type()->get_temporal() != constant_t){
895                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s  must be constant.\n  Line=%d, char=%d.\n",
896                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
897                                         return(-1);
898                                 }
899                         }
900                 }
901
902
903                 if(se->is_superaggr()){
904                         fprintf(stderr,"WARNING: function %s cannot be marked as a superaggregate, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
905                 }
906
907                 se->set_data_type(dt);
908                 return(1);
909         default:
910                 fprintf(stderr,"INTERNAL ERROR in assign_data_types, line %d, character %d: unknown operator type %d\n",
911                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
912                 return(-1);
913         }
914         return(-1);
915 }
916
917
918 int assign_predicate_data_types(predicate_t *pr, table_list *schema,
919                                                         table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
920         int l_ret, r_ret;
921         int i;
922         data_type *dt, *dtl;
923         vector<data_type *> odt;
924         vector<literal_t *> litl;
925         vector<scalarexp_t *> operands;
926         vector<bool> handle_ind;
927         vector<bool> constant_ind;
928         int o, fcn_id;
929
930         switch(pr->get_operator_type()){
931         case PRED_IN:
932                 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set);
933                 litl = pr->get_lit_vec();
934                 dt = pr->get_left_se()->get_data_type();
935
936                 for(i=0;i<litl.size();i++){
937                         dtl = new data_type( litl[i]->get_type() );
938                         if( ! dt->is_comparable(dtl,pr->get_op()) ){
939                                 fprintf(stderr,"ERROR line %d, char %d: IS_IN types must be comparable (lhs type is %s, rhs type is %s).\n",
940                                         litl[i]->get_lineno(), litl[i]->get_charno(), dt->to_string().c_str(),dtl->to_string().c_str() );
941                                 delete dtl;
942                                 return(-1);
943                         }
944                         delete dtl;
945                 }
946                 return(1);
947         case PRED_COMPARE:
948                 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
949                 r_ret = assign_data_types(pr->get_right_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
950                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
951
952                 if( !(pr->get_left_se()->get_data_type()->is_comparable(pr->get_right_se()->get_data_type(), pr->get_op() ) )){
953                         fprintf(stderr,"ERROR line %d, char %d, operands of comparison must have comparable types (%s %s %s).\n",
954                                 pr->get_lineno(), pr->get_charno(), pr->get_left_se()->get_data_type()->to_string().c_str(),
955                                  pr->get_right_se()->get_data_type()->to_string().c_str(), pr->get_op().c_str() );
956                         return(-1);
957                 }else{
958                         return(1);
959                 }
960         case PRED_UNARY_OP:
961                 return(assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns)); // , ext_fcn_set));
962         case PRED_BINARY_OP:
963                 l_ret = assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
964                 r_ret = assign_predicate_data_types(pr->get_right_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
965                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
966                 return(1);
967         case PRED_FUNC:
968                 operands = pr->get_op_list();
969                 r_ret = 1;
970                 for(o=0;o<operands.size();o++){
971                         l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns); // , ext_fcn_set);
972                         odt.push_back(operands[o]->get_data_type());
973                         if(l_ret < 0) r_ret = -1;
974                 }
975                 if(r_ret < 0) return(r_ret);
976
977                 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), odt);
978                 if( fcn_id < 0 ){
979                         fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
980                         for(o=0;o<operands.size();o++){
981                                 if(o>0) fprintf(stderr,", ");
982                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
983                         }
984                         fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
985                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
986                         return(-1);
987                 }
988
989 //              ext_fcn_set.insert(fcn_id);
990                 pr->set_fcn_id(fcn_id);
991
992 //                      Finally, verify that all HANDLE parameters are literals or params.
993                 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
994                 for(o=0;o<operands.size();o++){
995                         if(handle_ind[o]){
996                                 if(operands[o]->get_operator_type() != SE_LITERAL &&
997                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
998                                                 operands[o]->get_operator_type() != SE_PARAM){
999                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
1000                                 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
1001                                         exit(1);
1002                                 }
1003                         }
1004                 }
1005                 constant_ind = Ext_fcns->get_const_indicators(pr->get_fcn_id());
1006                 for(o=0;o<operands.size();o++){
1007                 if(constant_ind[o]){
1008                         if(operands[o]->get_data_type()->get_temporal() != constant_t){
1009                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s  must be constant.\n  Line=%d, char=%d.\n",
1010                         o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
1011                                         exit(1);
1012                                 }
1013                         }
1014                 }
1015
1016
1017 //                      Check if this predicate function is special sampling function
1018                 pr->is_sampling_fcn = Ext_fcns->is_sampling_fcn(pr->get_fcn_id());
1019
1020
1021                 return(l_ret);
1022         default:
1023                 fprintf(stderr,"INTERNAL ERROR in assign_predicate_data_types, line %d, character %d, unknown predicate operator type %d\n",
1024                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1025         }
1026
1027         return(-1);
1028 }
1029
1030
1031
1032 /////////////////////////////////////////////////////////////////////
1033 ////////////////                Make a deep copy of a se / pred tree
1034 /////////////////////////////////////////////////////////////////////
1035
1036
1037 //              duplicate a select element
1038 select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl){
1039         return new select_element(dup_se(sl->se,aggr_tbl),sl->name.c_str());
1040 }
1041
1042 //              duplicate a scalar expression.
1043 scalarexp_t *dup_se(scalarexp_t *se,
1044                                   aggregate_table *aggr_tbl
1045                                  ){
1046   int p;
1047   vector<scalarexp_t *> operand_list;
1048   vector<data_type *> dt_signature;
1049   scalarexp_t *ret_se, *l_se, *r_se;
1050
1051   switch(se->get_operator_type()){
1052     case SE_LITERAL:
1053                 ret_se = new scalarexp_t(se->get_literal());
1054                 ret_se->use_decorations_of(se);
1055                 return(ret_se);
1056
1057     case SE_PARAM:
1058                 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1059                 ret_se->use_decorations_of(se);
1060                 return(ret_se);
1061
1062     case SE_IFACE_PARAM:
1063                 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1064                 ret_se->use_decorations_of(se);
1065                 return(ret_se);
1066
1067     case SE_COLREF:
1068                 ret_se = new scalarexp_t(se->get_colref()->duplicate());
1069                 ret_se->rhs.scalarp = se->rhs.scalarp;  // carry along notation
1070                 ret_se->use_decorations_of(se);
1071                 return(ret_se);
1072
1073     case SE_UNARY_OP:
1074                 l_se = dup_se(se->get_left_se(),  aggr_tbl);
1075                 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1076                 ret_se->use_decorations_of(se);
1077                 return(ret_se);
1078
1079     case SE_BINARY_OP:
1080                 l_se = dup_se(se->get_left_se(), aggr_tbl);
1081                 r_se = dup_se(se->get_right_se(), aggr_tbl);
1082
1083                 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1084                 ret_se->use_decorations_of(se);
1085
1086                 return(ret_se);
1087
1088     case SE_AGGR_STAR:
1089                 ret_se = scalarexp_t::make_star_aggr(se->get_op().c_str());
1090                 ret_se->use_decorations_of(se);
1091                 return(ret_se);
1092
1093     case SE_AGGR_SE:
1094                 l_se = dup_se(se->get_left_se(),  aggr_tbl);
1095                 ret_se = scalarexp_t::make_se_aggr(se->get_op().c_str(), l_se);
1096                 ret_se->use_decorations_of(se);
1097                 return(ret_se);
1098
1099         case SE_FUNC:
1100                 {
1101                         operand_list = se->get_operands();
1102                         vector<scalarexp_t *> new_operands;
1103                         for(p=0;p<operand_list.size();p++){
1104                                 l_se = dup_se(operand_list[p], aggr_tbl);
1105                                 new_operands.push_back(l_se);
1106                         }
1107
1108                         ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1109                         ret_se->use_decorations_of(se);
1110                         return(ret_se);
1111                 }
1112
1113         default:
1114                 printf("INTERNAL ERROR in dup_se: operator type %d\n",se->get_operator_type());
1115                 exit(1);
1116         break;
1117   }
1118   return(NULL);
1119
1120 }
1121
1122
1123
1124 predicate_t *dup_pr(predicate_t *pr,
1125                                                  aggregate_table *aggr_tbl
1126                                                  ){
1127
1128   vector<literal_t *> llist;
1129   scalarexp_t *se_l, *se_r;
1130   predicate_t *pr_l, *pr_r, *ret_pr;
1131   vector<scalarexp_t *> op_list, new_op_list;
1132   int o;
1133
1134
1135         switch(pr->get_operator_type()){
1136         case PRED_IN:
1137                 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1138                 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1139                 return(ret_pr);
1140
1141         case PRED_COMPARE:
1142                 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1143                 se_r = dup_se(pr->get_right_se(),  aggr_tbl);
1144                 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1145                 return(ret_pr);
1146
1147         case PRED_UNARY_OP:
1148                 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1149                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1150                 return(ret_pr);
1151
1152         case PRED_BINARY_OP:
1153                 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1154                 pr_r = dup_pr(pr->get_right_pr(), aggr_tbl);
1155                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1156                 return(ret_pr);
1157         case PRED_FUNC:
1158                 op_list = pr->get_op_list();
1159                 for(o=0;o<op_list.size();++o){
1160                         se_l = dup_se(op_list[o], aggr_tbl);
1161                         new_op_list.push_back(se_l);
1162                 }
1163                 ret_pr=  new predicate_t(pr->get_op().c_str(), new_op_list);
1164                 ret_pr->set_fcn_id(pr->get_fcn_id());
1165                 ret_pr->is_sampling_fcn = pr->is_sampling_fcn;
1166                 return(ret_pr);
1167
1168         default:
1169                 fprintf(stderr,"INTERNAL ERROR in dup_pr, line %d, character %d, unknown predicate operator type %d\n",
1170                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1171                 exit(1);
1172         }
1173
1174         return(0);
1175
1176 }
1177
1178 table_exp_t *dup_table_exp(table_exp_t *te){
1179         int i;
1180         table_exp_t *ret = new table_exp_t();
1181
1182         ret->query_type = te->query_type;
1183
1184         ss_map::iterator ss_i;
1185         for(ss_i=te->nmap.begin();ss_i!=te->nmap.end();++ss_i){
1186                 ret->nmap[(*ss_i).first] = (*ss_i).second;
1187         }
1188
1189         for(i=0;i<te->query_params.size();++i){
1190                 ret->query_params.push_back(new
1191                  var_pair_t(te->query_params[i]->name,te->query_params[i]->val) );
1192         }
1193
1194         if(te->sl){
1195                 ret->sl = new select_list_t();
1196                 ret->sl->lineno = te->sl->lineno; ret->sl->charno = te->sl->charno;
1197                 vector<select_element *> select_list = te->sl->get_select_list();
1198                 for(i=0;i<select_list.size();++i){
1199                         scalarexp_t *se = dup_se(select_list[i]->se,NULL);
1200                         ret->sl->append(se,select_list[i]->name);
1201                 }
1202         }
1203
1204         ret->fm = te->fm->duplicate();
1205
1206         if(te->wh) ret->wh = dup_pr(te->wh,NULL);
1207         if(te->hv) ret->hv = dup_pr(te->hv,NULL);
1208         if(te->cleaning_when) ret->cleaning_when = dup_pr(te->cleaning_when,NULL);
1209         if(te->cleaning_by) ret->cleaning_by = dup_pr(te->cleaning_by,NULL);
1210         if(te->closing_when) ret->closing_when = dup_pr(te->closing_when,NULL);
1211
1212         for(i=0;i<te->gb.size();++i){
1213                 extended_gb_t *tmp_g =  te->gb[i]->duplicate();
1214                 ret->gb.push_back(tmp_g);
1215         }
1216
1217         ret->mergevars = te->mergevars;
1218         if(te->slack)
1219                 ret->slack = dup_se(te->slack,NULL);
1220         ret->lineno = te->lineno;
1221         ret->charno = te->charno;
1222
1223         return(ret);
1224 }
1225
1226
1227
1228
1229
1230
1231
1232 /////////////////////////////////////////////////////////////////////////
1233 //                      Bind colrefs to a member of their FROM list
1234
1235 void bind_colref_se(scalarexp_t *se,
1236                                   vector<tablevar_t *> &fm,
1237                                   int prev_ref, int new_ref
1238                                  ){
1239   int p;
1240   vector<scalarexp_t *> operand_list;
1241   colref_t *cr;
1242   ifpref_t *ir;
1243
1244   switch(se->get_operator_type()){
1245     case SE_LITERAL:
1246     case SE_PARAM:
1247                 return;
1248     case SE_IFACE_PARAM:
1249                 ir = se->get_ifpref();
1250                 if(ir->get_tablevar_ref() == prev_ref){
1251                         ir->set_tablevar_ref(new_ref);
1252                         ir->set_tablevar(fm[new_ref]->get_var_name());
1253                 }
1254                 return;
1255
1256     case SE_COLREF:
1257                 cr=se->get_colref();
1258                 if(cr->get_tablevar_ref() == prev_ref){
1259                         cr->set_tablevar_ref(new_ref);
1260 //                      cr->set_interface(fm[new_ref]->get_interface());
1261                         cr->set_table_name(fm[new_ref]->get_var_name());
1262                 }
1263                 return;
1264
1265     case SE_UNARY_OP:
1266                 bind_colref_se(se->get_left_se(),  fm, prev_ref, new_ref);
1267                 return;
1268
1269     case SE_BINARY_OP:
1270                 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);
1271                 bind_colref_se(se->get_right_se(),  fm, prev_ref, new_ref);
1272                 return;
1273
1274     case SE_AGGR_STAR:
1275     case SE_AGGR_SE:
1276                 return;
1277
1278         case SE_FUNC:
1279                 if(se->get_aggr_ref() >= 0) return;
1280
1281                 operand_list = se->get_operands();
1282                 for(p=0;p<operand_list.size();p++){
1283                         bind_colref_se(operand_list[p], fm, prev_ref, new_ref);
1284                 }
1285                 return;
1286
1287         default:
1288                 printf("INTERNAL ERROR in bind_colref_se: operator type %d\n",se->get_operator_type());
1289                 exit(1);
1290         break;
1291   }
1292   return;
1293
1294 }
1295
1296
1297
1298
1299 void bind_colref_pr(predicate_t *pr,
1300                                   vector<tablevar_t *> &fm,
1301                                   int prev_ref, int new_ref
1302                                  ){
1303   vector<scalarexp_t *> op_list;
1304   int o;
1305
1306         switch(pr->get_operator_type()){
1307         case PRED_IN:
1308                 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1309                 return;
1310
1311         case PRED_COMPARE:
1312                 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1313                 bind_colref_se(pr->get_right_se(),  fm, prev_ref, new_ref);
1314                 return;
1315
1316         case PRED_UNARY_OP:
1317                 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1318                 return;
1319
1320         case PRED_BINARY_OP:
1321                 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1322                 bind_colref_pr(pr->get_right_pr(), fm, prev_ref, new_ref);
1323                 return;
1324         case PRED_FUNC:
1325                 op_list = pr->get_op_list();
1326                 for(o=0;o<op_list.size();++o){
1327                         bind_colref_se(op_list[o], fm, prev_ref, new_ref);
1328                 }
1329                 return;
1330
1331         default:
1332                 fprintf(stderr,"INTERNAL ERROR in bind_colref_pr, line %d, character %d, unknown predicate operator type %d\n",
1333                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1334                 exit(1);
1335         }
1336
1337         return;
1338
1339 }
1340
1341
1342 /////////////////////////////////////////////////////////////////////
1343 //              verify that the se refs only literals and params.
1344 //          (use to verify that the expression should stay in the hfta
1345 //               during a split)
1346 /////////////////////////////////////////////////////////////////////
1347
1348 bool is_literal_or_param_only(scalarexp_t *se){
1349         int o;
1350         vector<scalarexp_t *> operands;
1351         bool sum = true;
1352
1353         if(se == NULL) return(true);
1354
1355         switch(se->get_operator_type()){
1356         case SE_LITERAL:
1357         case SE_PARAM:
1358                 return(true);
1359         case SE_IFACE_PARAM:
1360                 return(false);          // need to treat as colref
1361         case SE_UNARY_OP:
1362                 return(is_literal_or_param_only(se->get_left_se()) );
1363         case SE_BINARY_OP:
1364                 return(
1365                         is_literal_or_param_only(se->get_left_se()) &&
1366                         is_literal_or_param_only(se->get_right_se())
1367                         );
1368         case SE_COLREF:
1369                 return(false);
1370         case SE_AGGR_STAR:
1371         case SE_AGGR_SE:
1372                 return(false);
1373         case SE_FUNC:
1374 //                      The fcn might have special meaning at the lfta ...
1375                 return(false);
1376
1377         default:
1378                 fprintf(stderr,"INTERNAL ERROR in is_literal_or_param_only, line %d, character %d: unknown operator type %d\n",
1379                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1380                 exit(1);
1381         }
1382         return(0);
1383 }
1384
1385
1386
1387 /////////////////////////////////////////////////////////////////////
1388 //              Search for gb refs.
1389 //          (use to verify that no gbrefs in a gb def.)
1390 /////////////////////////////////////////////////////////////////////
1391
1392
1393 int count_gb_se(scalarexp_t *se){
1394         int o;
1395         vector<scalarexp_t *> operands;
1396         int sum = 0;
1397
1398         if(se == NULL) return(0);
1399
1400         switch(se->get_operator_type()){
1401         case SE_LITERAL:
1402         case SE_PARAM:
1403         case SE_IFACE_PARAM:
1404                 return(0);
1405         case SE_UNARY_OP:
1406                 return(count_gb_se(se->get_left_se()) );
1407         case SE_BINARY_OP:
1408                 return(
1409                         count_gb_se(se->get_left_se()) +
1410                         count_gb_se(se->get_right_se())
1411                         );
1412         case SE_COLREF:
1413                 if(se->get_gb_ref() < 0) return(0);
1414                 return(1);
1415         case SE_AGGR_STAR:
1416         case SE_AGGR_SE:
1417                 return(0);
1418         case SE_FUNC:
1419                 operands = se->get_operands();
1420                 for(o=0;o<operands.size();o++){
1421                         sum +=  count_gb_se(operands[o]);
1422                 }
1423                 return(sum);
1424
1425         default:
1426                 fprintf(stderr,"INTERNAL ERROR in count_gb_se, line %d, character %d: unknown operator type %d\n",
1427                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1428                 exit(1);
1429         }
1430         return(0);
1431 }
1432
1433
1434 /////////////////////////////////////////////////////////////////////
1435 ////////////////                Search for stateful fcns.
1436 /////////////////////////////////////////////////////////////////////
1437
1438
1439 int se_refs_sfun(scalarexp_t *se){
1440         int o;
1441         vector<scalarexp_t *> operands;
1442         int sum = 0;
1443
1444         if(se == NULL) return(0);
1445
1446         switch(se->get_operator_type()){
1447         case SE_LITERAL:
1448         case SE_PARAM:
1449         case SE_IFACE_PARAM:
1450                 return(0);
1451         case SE_UNARY_OP:
1452                 return(se_refs_sfun(se->get_left_se()) );
1453         case SE_BINARY_OP:
1454                 return(
1455                         se_refs_sfun(se->get_left_se()) +
1456                         se_refs_sfun(se->get_right_se())
1457                         );
1458         case SE_COLREF:
1459                 return(0);
1460         case SE_AGGR_STAR:
1461         case SE_AGGR_SE:
1462                 return(0);
1463         case SE_FUNC:
1464                 operands = se->get_operands();
1465                 for(o=0;o<operands.size();o++){
1466                         sum +=  se_refs_sfun(operands[o]);
1467                 }
1468                 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1469
1470 //                      for now, stateful functions count as aggregates.
1471                 if(se->get_storage_state() != "")
1472                         sum++;
1473
1474                 return(sum);
1475
1476         default:
1477                 fprintf(stderr,"INTERNAL ERROR in se_refs_sfun, line %d, character %d: unknown operator type %d\n",
1478                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1479                 exit(1);
1480         }
1481         return(0);
1482 }
1483
1484
1485 //              Return a count of the number of stateful fcns in this predicate.
1486 int pred_refs_sfun(predicate_t *pr){
1487         vector<scalarexp_t *> op_list;
1488         int o, aggr_sum;
1489
1490         switch(pr->get_operator_type()){
1491         case PRED_IN:
1492                 return(se_refs_sfun(pr->get_left_se()) );
1493         case PRED_COMPARE:
1494                 return(
1495                         se_refs_sfun(pr->get_left_se()) +
1496                         se_refs_sfun(pr->get_right_se())
1497                 );
1498         case PRED_UNARY_OP:
1499                 return(pred_refs_sfun(pr->get_left_pr()) );
1500         case PRED_BINARY_OP:
1501                 return(
1502                         pred_refs_sfun(pr->get_left_pr()) +
1503                         pred_refs_sfun(pr->get_right_pr())
1504                 );
1505         case PRED_FUNC:
1506                 op_list = pr->get_op_list();
1507                 aggr_sum = 0;
1508                 for(o=0;o<op_list.size();++o){
1509                         aggr_sum += se_refs_sfun(op_list[o]);
1510                 }
1511                 return(aggr_sum);
1512
1513         default:
1514                 fprintf(stderr,"INTERNAL ERROR in pred_refs_sfun, line %d, character %d, unknown predicate operator type %d\n",
1515                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1516                 exit(1);
1517         }
1518
1519         return(0);
1520 }
1521
1522 //////////////////////////////////////////////////
1523
1524 /////////////////////////////////////////////////////////////////////
1525 ////////////////                Search for aggregates.
1526 /////////////////////////////////////////////////////////////////////
1527
1528
1529 int count_aggr_se(scalarexp_t *se, bool strict){
1530         int o;
1531         vector<scalarexp_t *> operands;
1532         int sum = 0;
1533
1534         if(se == NULL) return(0);
1535
1536         switch(se->get_operator_type()){
1537         case SE_LITERAL:
1538         case SE_PARAM:
1539         case SE_IFACE_PARAM:
1540                 return(0);
1541         case SE_UNARY_OP:
1542                 return(count_aggr_se(se->get_left_se(), strict) );
1543         case SE_BINARY_OP:
1544                 return(
1545                         count_aggr_se(se->get_left_se(), strict) +
1546                         count_aggr_se(se->get_right_se(), strict)
1547                         );
1548         case SE_COLREF:
1549                 return(0);
1550         case SE_AGGR_STAR:
1551         case SE_AGGR_SE:
1552                 return(1);
1553         case SE_FUNC:
1554                 operands = se->get_operands();
1555                 for(o=0;o<operands.size();o++){
1556                         sum +=  count_aggr_se(operands[o], strict);
1557                 }
1558                 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1559
1560 //                      now, stateful functions can count as aggregates.
1561 //                      if we are being strict.
1562                 if(! strict && se->get_storage_state() != "")
1563                         sum++;
1564
1565                 return(sum);
1566
1567         default:
1568                 fprintf(stderr,"INTERNAL ERROR in count_aggr_se, line %d, character %d: unknown operator type %d\n",
1569                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1570                 exit(1);
1571         }
1572         return(0);
1573 }
1574
1575
1576 //              Return a count of the number of aggregate fcns in this predicate.
1577 int count_aggr_pred(predicate_t *pr, bool strict){
1578         vector<scalarexp_t *> op_list;
1579         int o, aggr_sum;
1580
1581         switch(pr->get_operator_type()){
1582         case PRED_IN:
1583                 return(count_aggr_se(pr->get_left_se(), strict) );
1584         case PRED_COMPARE:
1585                 return(
1586                         count_aggr_se(pr->get_left_se(), strict) +
1587                         count_aggr_se(pr->get_right_se(), strict)
1588                 );
1589         case PRED_UNARY_OP:
1590                 return(count_aggr_pred(pr->get_left_pr(), strict) );
1591         case PRED_BINARY_OP:
1592                 return(
1593                         count_aggr_pred(pr->get_left_pr(), strict) +
1594                         count_aggr_pred(pr->get_right_pr(), strict)
1595                 );
1596         case PRED_FUNC:
1597                 op_list = pr->get_op_list();
1598                 aggr_sum = 0;
1599                 for(o=0;o<op_list.size();++o){
1600                         aggr_sum += count_aggr_se(op_list[o], strict);
1601                 }
1602                 return(aggr_sum);
1603
1604         default:
1605                 fprintf(stderr,"INTERNAL ERROR in count_aggr_pred, line %d, character %d, unknown predicate operator type %d\n",
1606                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1607                 exit(1);
1608         }
1609
1610         return(0);
1611 }
1612
1613 //////////////////////////////////////////////////
1614 ///             Analyze tablevar refs
1615
1616 void get_tablevar_ref_se(scalarexp_t *se, vector<int> &reflist){
1617         int o;
1618         vector<scalarexp_t *> operands;
1619         int vref;
1620         colref_t *cr;
1621         ifpref_t *ir;
1622
1623         if(se == NULL) return;
1624
1625         switch(se->get_operator_type()){
1626         case SE_LITERAL:
1627         case SE_PARAM:
1628                 return;
1629         case SE_IFACE_PARAM:
1630                 ir = se->get_ifpref();
1631                 vref = ir->get_tablevar_ref();
1632                 for(o=0;o<reflist.size();++o){
1633                         if(vref == reflist[o]) return;
1634                 }
1635                 reflist.push_back(vref);
1636                 return;
1637         case SE_UNARY_OP:
1638                 get_tablevar_ref_se(se->get_left_se(), reflist);
1639                 return;
1640         case SE_BINARY_OP:
1641                 get_tablevar_ref_se(se->get_left_se(), reflist);
1642                 get_tablevar_ref_se(se->get_right_se(), reflist);
1643                 return;
1644         case SE_COLREF:
1645                 if(se->is_gb()) return;
1646                 cr = se->get_colref();
1647                 vref = cr->get_tablevar_ref();
1648                 for(o=0;o<reflist.size();++o){
1649                         if(vref == reflist[o]) return;
1650                 }
1651                 reflist.push_back(vref);
1652                 return;
1653         case SE_AGGR_STAR:
1654         case SE_AGGR_SE:
1655                 return;
1656         case SE_FUNC:
1657                 if(se->get_aggr_ref() >= 0) return;
1658
1659                 operands = se->get_operands();
1660                 for(o=0;o<operands.size();o++){
1661                         get_tablevar_ref_se(operands[o], reflist);
1662                 }
1663                 return;
1664
1665         default:
1666                 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_se, line %d, character %d: unknown operator type %d\n",
1667                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1668                 exit(1);
1669         }
1670         return;
1671 }
1672
1673
1674 void get_tablevar_ref_pr(predicate_t *pr, vector<int> &reflist){
1675         vector<scalarexp_t *> op_list;
1676         int o;
1677
1678         switch(pr->get_operator_type()){
1679         case PRED_IN:
1680                 get_tablevar_ref_se(pr->get_left_se(),reflist);
1681                 return;
1682         case PRED_COMPARE:
1683                 get_tablevar_ref_se(pr->get_left_se(),reflist);
1684                 get_tablevar_ref_se(pr->get_right_se(),reflist);
1685                 return;
1686         case PRED_UNARY_OP:
1687                 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1688                 return;
1689         case PRED_BINARY_OP:
1690                 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1691                 get_tablevar_ref_pr(pr->get_right_pr(),reflist);
1692                 return;
1693         case PRED_FUNC:
1694                 op_list = pr->get_op_list();
1695                 for(o=0;o<op_list.size();++o){
1696                         get_tablevar_ref_se(op_list[o],reflist);
1697                 }
1698                 return;
1699         default:
1700                 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_pr, line %d, character %d, unknown predicate operator type %d\n",
1701                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1702         }
1703
1704         return;
1705 }
1706
1707
1708 //                      Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1709
1710 void gather_fcn_states_se(scalarexp_t *se, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1711         int agg_id;
1712         int o;
1713         vector<scalarexp_t *> operands;
1714
1715         switch(se->get_operator_type()){
1716         case SE_LITERAL:
1717         case SE_PARAM:
1718         case SE_IFACE_PARAM:
1719                 return;
1720         case SE_UNARY_OP:
1721                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns) ;
1722                 return;
1723         case SE_BINARY_OP:
1724                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1725                 gather_fcn_states_se(se->get_right_se(), states_refd,Ext_fcns);
1726                 return;
1727         case SE_COLREF:
1728                 return;
1729         case SE_AGGR_STAR:
1730                 return;
1731         case SE_AGGR_SE:
1732                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1733                 return;
1734         case SE_FUNC:
1735                 operands = se->get_operands();
1736                 for(o=0;o<operands.size();o++){
1737                         gather_fcn_states_se(operands[o], states_refd, Ext_fcns);
1738                 }
1739                 if(se->get_storage_state() != ""){
1740                         states_refd.insert(se->get_storage_state());
1741                 }
1742                 return;
1743
1744         default:
1745                 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_se, line %d, character %d: unknown operator type %d\n",
1746                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1747                 exit(1);
1748         }
1749         return;
1750 }
1751
1752
1753 //                      Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1754
1755 void gather_fcn_states_pr(predicate_t *pr, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1756         vector<scalarexp_t *> op_list;
1757         int o;
1758
1759         switch(pr->get_operator_type()){
1760         case PRED_IN:
1761                 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1762                 return;
1763         case PRED_COMPARE:
1764                 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1765                 gather_fcn_states_se(pr->get_right_se(),states_refd, Ext_fcns) ;
1766                 return;
1767         case PRED_UNARY_OP:
1768                 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns);
1769                 return;
1770         case PRED_BINARY_OP:
1771                 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns) ;
1772                 gather_fcn_states_pr(pr->get_right_pr(),states_refd, Ext_fcns) ;
1773                 return;
1774         case PRED_FUNC:
1775                 op_list = pr->get_op_list();
1776                 for(o=0;o<op_list.size();++o){
1777                         gather_fcn_states_se(op_list[o],states_refd, Ext_fcns);
1778                 }
1779                 return;
1780
1781         default:
1782                 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_pr, line %d, character %d, unknown predicate operator type %d\n",
1783                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1784                 exit(1);
1785         }
1786
1787         return;
1788 }
1789
1790
1791
1792
1793 //                      walk se tree and collect aggregates into aggregate table.
1794 //                      duplicate aggregates receive the same idx to the table.
1795
1796 void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *aggregate_table, ext_fcn_list *Ext_fcns){
1797         int agg_id;
1798         int o;
1799         vector<scalarexp_t *> operands;
1800
1801         switch(se->get_operator_type()){
1802         case SE_LITERAL:
1803         case SE_PARAM:
1804         case SE_IFACE_PARAM:
1805                 return;
1806         case SE_UNARY_OP:
1807                 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns) ;
1808                 return;
1809         case SE_BINARY_OP:
1810                 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns);
1811                 build_aggr_tbl_fm_se(se->get_right_se(), aggregate_table,Ext_fcns);
1812                 return;
1813         case SE_COLREF:
1814                 return;
1815         case SE_AGGR_STAR:
1816                 agg_id = aggregate_table->add_aggr(se->get_op(),NULL,se->is_superaggr());
1817                 se->set_aggr_id(agg_id);
1818                 return;
1819         case SE_AGGR_SE:
1820                 agg_id = aggregate_table->add_aggr(se->get_op(),se->get_left_se(),se->is_superaggr());
1821                 se->set_aggr_id(agg_id);
1822                 return;
1823         case SE_FUNC:
1824                 operands = se->get_operands();
1825                 for(o=0;o<operands.size();o++){
1826                         build_aggr_tbl_fm_se(operands[o], aggregate_table, Ext_fcns);
1827                 }
1828                 if(se->get_aggr_ref() >= 0){ // it's been tagged as a UDAF
1829                         agg_id = aggregate_table->add_aggr(se->get_op(), se->get_fcn_id(), operands, Ext_fcns->get_storage_dt(se->get_fcn_id()), se->is_superaggr(), Ext_fcns->is_running_aggr(se->get_fcn_id()),Ext_fcns->has_lfta_bailout(se->get_fcn_id()));
1830                         se->set_aggr_id(agg_id);
1831                 }
1832                 return;
1833
1834         default:
1835                 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_se, line %d, character %d: unknown operator type %d\n",
1836                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1837                 exit(1);
1838         }
1839         return;
1840 }
1841
1842
1843 //                      walk se tree and collect aggregates into aggregate table.
1844 //                      duplicate aggregates receive the same idx to the table.
1845
1846 void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *aggregate_table,ext_fcn_list *Ext_fcns){
1847         vector<scalarexp_t *> op_list;
1848         int o;
1849
1850         switch(pr->get_operator_type()){
1851         case PRED_IN:
1852                 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1853                 return;
1854         case PRED_COMPARE:
1855                 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1856                 build_aggr_tbl_fm_se(pr->get_right_se(),aggregate_table, Ext_fcns) ;
1857                 return;
1858         case PRED_UNARY_OP:
1859                 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns);
1860                 return;
1861         case PRED_BINARY_OP:
1862                 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns) ;
1863                 build_aggr_tbl_fm_pred(pr->get_right_pr(),aggregate_table, Ext_fcns) ;
1864                 return;
1865         case PRED_FUNC:
1866                 op_list = pr->get_op_list();
1867                 for(o=0;o<op_list.size();++o){
1868                         build_aggr_tbl_fm_se(op_list[o],aggregate_table, Ext_fcns);
1869                 }
1870                 return;
1871
1872         default:
1873                 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_pred, line %d, character %d, unknown predicate operator type %d\n",
1874                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1875                 exit(1);
1876         }
1877
1878         return;
1879 }
1880
1881
1882 //                      Return true if the two scalar expressions
1883 //                      represent the same value (e.g., use to eliminate
1884 //                      duplicate aggregates).
1885 bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2){
1886         vector<scalarexp_t *> operands1;
1887         vector<scalarexp_t *> operands2;
1888         int o;
1889
1890 //              First handle the case of nulls (e.g. COUNT aggrs)
1891         if(se1 == NULL && se2 == NULL) return(true);
1892         if(se1 == NULL || se2 == NULL) return(false);
1893
1894 //              In all cases, must be the same oeprator type and same operator.
1895         if(se1->get_operator_type() != se2->get_operator_type())
1896                 return(false);
1897         if(se1->get_op() != se2->get_op() )
1898                 return(false);
1899
1900         switch(se1->get_operator_type()){
1901         case SE_LITERAL:
1902                 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1903         case SE_PARAM:
1904                 return(se1->get_param_name() == se2->get_param_name() );
1905         case SE_IFACE_PARAM:
1906                 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1907         case SE_UNARY_OP:
1908                 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1909         case SE_BINARY_OP:
1910                 if(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) )
1911                         return(is_equivalent_se(se1->get_right_se(), se2->get_right_se()) );
1912                 return(false);
1913         case SE_COLREF:
1914                 if(se1->is_gb() && se2->is_gb())
1915                         return( se1->get_gb_ref() == se2->get_gb_ref() );
1916                 if(se1->is_gb() || se2->is_gb())
1917                         return(false);
1918                 return(se1->get_colref()->is_equivalent(se2->get_colref()) );
1919         case SE_AGGR_STAR:
1920                 return(true);
1921         case SE_AGGR_SE:
1922                 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1923         case SE_FUNC:
1924                 if(se1->get_op() != se2->get_op()) return(false);
1925
1926                 operands1 = se1->get_operands();
1927                 operands2 = se2->get_operands();
1928                 if(operands1.size() != operands2.size()) return(false);
1929
1930                 for(o=0;o<operands1.size();o++){
1931                         if(! is_equivalent_se(operands1[o], operands2[o]) )
1932                                 return(false);
1933                 }
1934                 return(true);
1935         default:
1936                 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
1937                                 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
1938                 exit(1);
1939         }
1940         return(false);
1941 }
1942
1943
1944 //              Similar to is_equivalent_se, but with a looser definition
1945 //              of equivalence of colrefs.  Here, say they are equivalent
1946 //              if their base table is the same.  Use to find equivalent
1947 //              predicates on base tables.
1948 bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema){
1949         vector<scalarexp_t *> operands1;
1950         vector<scalarexp_t *> operands2;
1951         int o;
1952
1953         if(se1->get_operator_type() == SE_COLREF && se1->is_gb()){
1954                 se1 = se1->get_right_se();
1955         }
1956         if(se2->get_operator_type() == SE_COLREF && se2->is_gb()){
1957                 se2 = se2->get_right_se();
1958         }
1959
1960 //              First handle the case of nulls (e.g. COUNT aggrs)
1961         if(se1 == NULL && se2 == NULL) return(true);
1962         if(se1 == NULL || se2 == NULL) return(false);
1963
1964 //              In all cases, must be the same oeprator type and same operator.
1965         if(se1->get_operator_type() != se2->get_operator_type())
1966                 return(false);
1967         if(se1->get_op() != se2->get_op() )
1968                 return(false);
1969
1970         switch(se1->get_operator_type()){
1971         case SE_LITERAL:
1972                 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1973         case SE_PARAM:
1974                 return(se1->get_param_name() == se2->get_param_name() );
1975         case SE_IFACE_PARAM:
1976                 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1977         case SE_UNARY_OP:
1978                 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1979         case SE_BINARY_OP:
1980                 if(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) )
1981                         return(is_equivalent_se_base(se1->get_right_se(), se2->get_right_se(), Schema) );
1982                 return(false);
1983         case SE_COLREF:
1984 /*
1985                 if(se1->is_gb() && se2->is_gb())
1986                         return( se1->get_gb_ref() == se2->get_gb_ref() );
1987                 if(se1->is_gb() || se2->is_gb())
1988                         return(false);
1989 */
1990                 return(se1->get_colref()->is_equivalent_base(se2->get_colref(), Schema) );
1991         case SE_AGGR_STAR:
1992                 return(true);
1993         case SE_AGGR_SE:
1994                 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1995         case SE_FUNC:
1996                 if(se1->get_op() != se2->get_op()) return(false);
1997
1998                 operands1 = se1->get_operands();
1999                 operands2 = se2->get_operands();
2000                 if(operands1.size() != operands2.size()) return(false);
2001
2002                 for(o=0;o<operands1.size();o++){
2003                         if(! is_equivalent_se_base(operands1[o], operands2[o], Schema) )
2004                                 return(false);
2005                 }
2006                 return(true);
2007         default:
2008                 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
2009                                 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
2010                 exit(1);
2011         }
2012         return(false);
2013 }
2014
2015
2016 //              Find predicates which are equivalent when
2017 //              looking at the base tables.  Use to find
2018 //              common prefilter.
2019 bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema){
2020 int i, o;
2021
2022 //              First handle the case of nulls
2023         if(p1 == NULL && p2 == NULL) return(true);
2024         if(p1 == NULL || p2 == NULL) return(false);
2025
2026
2027   if(p1->get_operator_type() != p2->get_operator_type())
2028          return(false);
2029   if(p1->get_op() != p2->get_op())
2030          return(false);
2031
2032     vector<literal_t *> ll1;
2033     vector<literal_t *> ll2;
2034         vector<scalarexp_t *> op_list1, op_list2;
2035
2036
2037   switch(p2->get_operator_type()){
2038      case PRED_COMPARE:
2039         if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2040             return(false);
2041         return( is_equivalent_se_base(p1->get_right_se(),p2->get_right_se(), Schema) );
2042     break;
2043     case PRED_IN:
2044         if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2045             return(false);
2046         ll1 = p1->get_lit_vec();
2047         ll2 = p2->get_lit_vec();
2048         if(ll1.size() != ll2.size())
2049             return(false);
2050         for(i=0;i<ll1.size();i++){
2051           if(! ll1[i]->is_equivalent( ll2[i] ) )
2052             return(false);
2053         }
2054         return(true);
2055     break;
2056      case PRED_UNARY_OP:
2057         return(is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema) );
2058     break;
2059      case PRED_BINARY_OP:
2060         if(! is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema))
2061             return(false);
2062         return(is_equivalent_pred_base(p1->get_right_pr(), p2->get_right_pr(), Schema) );
2063     break;
2064          case PRED_FUNC:
2065                 op_list1 = p1->get_op_list();
2066                 op_list2 = p2->get_op_list();
2067                 if(op_list1.size() != op_list2.size()) return(false);
2068                 for(o=0;o<op_list1.size();++o){
2069                         if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) ) return(false);
2070                 }
2071                 return(true);
2072
2073    }
2074
2075     return(false);
2076 }
2077
2078
2079
2080 bool is_equivalent_class_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema,ext_fcn_list *Ext_fcns){
2081   if((p1->get_operator_type()!=PRED_FUNC)||(p2->get_operator_type()!=PRED_FUNC))
2082          return(false);
2083   if(p1->get_fcn_id() != p2->get_fcn_id())
2084                 return false;
2085   vector<bool> cl_op = Ext_fcns->get_class_indicators(p1->get_fcn_id());
2086   int o;
2087   vector<scalarexp_t *> op_list1 = p1->get_op_list();
2088   vector<scalarexp_t *> op_list2 = p2->get_op_list();
2089   if(op_list1.size() != op_list2.size()) return(false);
2090   for(o=0;o<op_list1.size();++o){
2091           if(cl_op[o]){
2092                 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) )
2093                         return(false);
2094         }
2095   }
2096   return true;
2097
2098 }
2099
2100
2101
2102
2103 //                      Verify that the scalar expression (in a such that clause)
2104 //                      is acceptable in an aggregation query.  No column
2105 //                      references allowed outside aggergates, except for
2106 //                      references to group-by attributes.
2107 //                      return true if OK, false if bad.
2108 bool verify_aggr_query_se(scalarexp_t *se){
2109         vector <scalarexp_t *> operands;
2110         int o;
2111
2112     switch(se->get_operator_type()){
2113     case SE_LITERAL:
2114     case SE_PARAM:
2115     case SE_IFACE_PARAM:
2116         return(true );
2117     case SE_UNARY_OP:
2118         return(verify_aggr_query_se(se->get_left_se() ) );
2119     case SE_BINARY_OP:
2120         return(verify_aggr_query_se(se->get_left_se() ) &&
2121             verify_aggr_query_se(se->get_right_se() ) );
2122     case SE_COLREF:
2123         if(se->is_gb() ) return(true);
2124         fprintf(stderr,"ERROR: the select clause in an aggregate query can "
2125                         "only reference constants, group-by attributes, and "
2126                         "aggregates,  (%s) line %d, character %d.\n",
2127                         se->get_colref()->to_string().c_str(),
2128                                                 se->get_lineno(), se->get_charno() );
2129         return(false);
2130     case SE_AGGR_STAR:
2131     case SE_AGGR_SE:
2132 //                      colrefs and gbrefs allowed.
2133 //                      check for nested aggregation elsewhere, so just return TRUE
2134         return(true);
2135         case SE_FUNC:
2136 //                      If its a UDAF, just return true
2137                 if(se->get_aggr_ref() >= 0) return true;
2138
2139                 operands = se->get_operands();
2140
2141                 for(o=0;o<operands.size();o++){
2142                         if(! verify_aggr_query_se(operands[o]) )
2143                                 return(false);
2144                 }
2145                 return(true);
2146     default:
2147         fprintf(stderr,"INTERNAL ERROR in verify_aggr_query_se, line %d, character %d: unknown operator type %d\n",
2148                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2149         exit(1);
2150     }
2151     return(false);
2152 }
2153
2154
2155
2156
2157 //                      Find complex literals.
2158 //                      NOTE : This analysis should be deferred to
2159 //                                 code generation time.
2160 //                      This analysis drills into aggr se specs.
2161 //                      Shouldn't this be done at the aggregate table?
2162 //                      But, its not a major loss of efficiency.
2163 //                              UPDATE : drilling into aggr se's is causnig a problem
2164 //                                      so I've eliminated it.
2165
2166 bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2167                                                                 cplx_lit_table *complex_literals){
2168         literal_t *l;
2169         vector<scalarexp_t *> operands;
2170         int o;
2171         scalarexp_t *param_se;
2172         data_type *dt;
2173
2174         switch(se->get_operator_type()){
2175         case SE_LITERAL:
2176                 l = se->get_literal();
2177                 if(l->constructor_name() != ""){
2178                         int cl_idx = complex_literals->add_cpx_lit(l, false);
2179                         l->set_cpx_lit_ref(cl_idx);
2180                 }
2181                 return(true);
2182         case SE_PARAM:
2183                 return(true );
2184 //                      SE_IFACE_PARAM should not exist when this is called.
2185         case SE_UNARY_OP:
2186                 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2187         case SE_BINARY_OP:
2188                 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) &&
2189                         find_complex_literal_se(se->get_right_se(), Ext_fcns, complex_literals ) );
2190         case SE_COLREF:
2191                 return(true);
2192         case SE_AGGR_STAR:
2193                 return(true);
2194         case SE_AGGR_SE:
2195                 return true;
2196 //              return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2197         case SE_FUNC:
2198                 if(se->get_aggr_ref() >= 0) return true;
2199
2200                 operands = se->get_operands();
2201                 for(o=0;o<operands.size();o++){
2202                         find_complex_literal_se(operands[o], Ext_fcns, complex_literals);
2203                 }
2204                 return(true);
2205         default:
2206                 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_se, line %d, character %d: unknown operator type %d\n",
2207                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2208                 exit(1);
2209         }
2210         return(false);
2211 }
2212
2213
2214
2215
2216 void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2217                                                                 cplx_lit_table *complex_literals){
2218         int i,o;
2219         vector<literal_t *> litl;
2220         vector<scalarexp_t *> op_list;
2221
2222
2223         switch(pr->get_operator_type()){
2224         case PRED_IN:
2225                 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2226                 litl = pr->get_lit_vec();
2227                 for(i=0;i<litl.size();i++){
2228                         if(litl[i]->constructor_name() != ""){
2229                                 int cl_idx = complex_literals->add_cpx_lit(litl[i],false);
2230                                 litl[i]->set_cpx_lit_ref(cl_idx);
2231                         }
2232                 }
2233                 return;
2234         case PRED_COMPARE:
2235                 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2236                 find_complex_literal_se(pr->get_right_se(), Ext_fcns, complex_literals) ;
2237                 return;
2238         case PRED_UNARY_OP:
2239                 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals);
2240                 return;
2241         case PRED_BINARY_OP:
2242                 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals) ;
2243                 find_complex_literal_pr(pr->get_right_pr(), Ext_fcns, complex_literals) ;
2244                 return;
2245         case PRED_FUNC:
2246                 op_list = pr->get_op_list();
2247                 for(o=0;o<op_list.size();++o){
2248                         find_complex_literal_se(op_list[o],Ext_fcns, complex_literals);
2249                 }
2250                 return;
2251         default:
2252                 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_pr, line %d, character %d, unknown predicate operator type %d\n",
2253                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2254                 exit(1);
2255         }
2256
2257         return;
2258 }
2259
2260
2261 //              Find all things which are passed as handle parameters to functions
2262 //              (query parameters, (simple) literals, complex literals)
2263 //              These expressions MUST be processed with find_complex_literal_??
2264 //              first.
2265 //                      TODO: this analysis drills into the aggregate SEs.
2266 //                      Shouldn't this be done on the aggr table SEs instead?
2267 //                      to avoid duplication.  THe handle registration
2268 //                      might be expensive ...
2269 //                      REVISED : drilling into aggr se's is causing problems, eliminated.
2270
2271 void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2272                                                 vector<handle_param_tbl_entry *> &handle_tbl){
2273         vector<scalarexp_t *> operands;
2274         vector<bool> handle_ind;
2275         int o;
2276         scalarexp_t *param_se;
2277         data_type *dt;
2278         literal_t *l;
2279
2280         switch(se->get_operator_type()){
2281         case SE_LITERAL:
2282                 return;
2283         case SE_PARAM:
2284                 return;
2285 //              case SE_IFACE_PARAM:            SHOULD NOT EXIST when this is called
2286         case SE_UNARY_OP:
2287                 find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2288                 return;
2289         case SE_BINARY_OP:
2290                 find_param_handles_se(se->get_left_se(), Ext_fcns , handle_tbl) ;
2291                 find_param_handles_se(se->get_right_se(), Ext_fcns, handle_tbl ) ;
2292                 return;
2293         case SE_COLREF:
2294                 return;
2295         case SE_AGGR_STAR:
2296                 return;
2297         case SE_AGGR_SE:
2298 //              find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2299                 return;
2300         case SE_FUNC:
2301                 if(se->get_aggr_ref() >= 0) return ;
2302
2303                 operands = se->get_operands();
2304                 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
2305                 for(o=0;o<operands.size();o++){
2306                         if(handle_ind[o]){
2307                                 handle_param_tbl_entry *he;
2308                                 param_se = operands[o];
2309                                 if(param_se->get_operator_type() != SE_LITERAL &&
2310                                                 param_se->get_operator_type() != SE_PARAM){
2311                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
2312                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
2313                                         exit(1);
2314                                 }
2315
2316                                 if(param_se->get_operator_type() == SE_PARAM){
2317                                         he = new handle_param_tbl_entry(
2318                                                 se->get_op(), o, param_se->get_param_name(),
2319                                                 param_se->get_data_type()->get_type_str());
2320                                 }else{
2321                                         l = param_se->get_literal();
2322                                         if(l->is_cpx_lit()){
2323                                                 he = new handle_param_tbl_entry(
2324                                                         se->get_op(), o, l->get_cpx_lit_ref(),
2325                                                 param_se->get_data_type()->get_type_str());
2326                                         }else{
2327                                                 he = new handle_param_tbl_entry(
2328                                                         se->get_op(), o, l,
2329                                                 param_se->get_data_type()->get_type_str());
2330                                         }
2331                                 }
2332                                 param_se->set_handle_ref(handle_tbl.size());
2333                                 handle_tbl.push_back(he);
2334                         }else{
2335                                 find_param_handles_se(operands[o], Ext_fcns, handle_tbl ) ;
2336                         }
2337                 }
2338                 return;
2339         default:
2340                 fprintf(stderr,"INTERNAL ERROR in find_param_handles, line %d, character %d: unknown operator type %d\n",
2341                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2342                 exit(1);
2343         }
2344         return;
2345 }
2346
2347
2348 void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2349                                                 vector<handle_param_tbl_entry *> &handle_tbl){
2350         vector<literal_t *> litl;
2351         vector<scalarexp_t *> op_list;
2352         scalarexp_t *param_se;
2353         vector<bool> handle_ind;
2354         int o;
2355         literal_t *l;
2356
2357         switch(pr->get_operator_type()){
2358         case PRED_IN:
2359                 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2360                 return;
2361         case PRED_COMPARE:
2362                 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2363                 find_param_handles_se(pr->get_right_se(), Ext_fcns, handle_tbl) ;
2364                 return;
2365         case PRED_UNARY_OP:
2366                 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl);
2367                 return;
2368         case PRED_BINARY_OP:
2369                 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl) ;
2370                 find_param_handles_pr(pr->get_right_pr(), Ext_fcns, handle_tbl) ;
2371                 return;
2372         case PRED_FUNC:
2373                 op_list = pr->get_op_list();
2374                 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
2375                 for(o=0;o<op_list.size();++o){
2376                         if(handle_ind[o]){
2377                                 handle_param_tbl_entry *he;
2378                                 param_se = op_list[o];
2379                                 if(param_se->get_operator_type() != SE_LITERAL &&
2380                                                 param_se->get_operator_type() != SE_PARAM){
2381                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
2382                                 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
2383                                         exit(1);
2384                                 }
2385
2386                                 if(param_se->get_operator_type() == SE_PARAM){
2387                                         he = new handle_param_tbl_entry(
2388                                                 pr->get_op(), o, param_se->get_param_name(),
2389                                                 param_se->get_data_type()->get_type_str());
2390                                 }else{
2391                                         l = param_se->get_literal();
2392                                         if(l->is_cpx_lit()){
2393                                                 he = new handle_param_tbl_entry(
2394                                                         pr->get_op(), o, l->get_cpx_lit_ref(),
2395                                                 param_se->get_data_type()->get_type_str());
2396                                         }else{
2397                                                 he = new handle_param_tbl_entry(
2398                                                         pr->get_op(), o, l,
2399                                                 param_se->get_data_type()->get_type_str());
2400                                         }
2401                                 }
2402                                 param_se->set_handle_ref(handle_tbl.size());
2403                                 handle_tbl.push_back(he);
2404                         }else{
2405                                 find_param_handles_se(op_list[o], Ext_fcns, handle_tbl ) ;
2406                         }
2407                 }
2408                 return;
2409         default:
2410                 fprintf(stderr,"INTERNAL ERROR in find_param_handles_pr, line %d, character %d, unknown predicate operator type %d\n",
2411                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2412                 exit(1);
2413         }
2414
2415         return;
2416 }
2417
2418
2419 //                      Verify the HAVING predicate : it
2420 //                      can access gb vars, aggregates, and constants,
2421 //                      but not colrefs.
2422 //                      return 1 if OK, -1 if bad.
2423 //                      Perhaps replace by a pair of fcns which counts non-gb colrefs?
2424
2425 //                      Extended to deal with cleaning_by, cleaning_when :
2426 //                      verify that any aggregate function
2427 //                      has the multiple output property.
2428
2429 int verify_having_se(scalarexp_t *se, const char *clause, ext_fcn_list *Ext_fcns){
2430         int l_ret, r_ret;
2431         vector<scalarexp_t *> operands;
2432         vector<data_type *> odt;
2433         int o;
2434
2435         switch(se->get_operator_type()){
2436         case SE_LITERAL:
2437                 return(1);
2438         case SE_PARAM:
2439         case SE_IFACE_PARAM:
2440                 return(1);
2441         case SE_UNARY_OP:
2442                 return(verify_having_se(se->get_left_se(), clause, Ext_fcns) );
2443         case SE_BINARY_OP:
2444                 l_ret = verify_having_se(se->get_left_se(), clause, Ext_fcns);
2445                 r_ret = verify_having_se(se->get_right_se(), clause, Ext_fcns);
2446                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
2447                 return(1);
2448         case SE_COLREF:
2449                 if(se->is_gb()) return 1;
2450                 fprintf(stderr,"ERROR, %s clause references a non-group by attribute line =%d, char = %d, colref=%s\n", clause,
2451                         se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
2452                 return(-1);
2453         case SE_AGGR_STAR:
2454         case SE_AGGR_SE:
2455 //                      colrefs and gbrefs allowed.
2456 //                      check for nested aggregation elsewhere, so just return TRUE
2457                 if(!se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2458                         fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2459                                 se->get_lineno(),se->get_charno(), se->get_op().c_str() );
2460                         return(-1);
2461                 }
2462
2463 //                              Ensure that aggregate refs allow multiple outputs
2464 //                              in CLEANING_WHEN, CLEANING_BY
2465                 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2466                         if(! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2467                                 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2468                                   se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2469                                 return(-1);
2470                         }
2471                 }
2472
2473
2474                 return(1);
2475         case SE_FUNC:
2476                 if(se->get_aggr_ref() >= 0 && !se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2477                         fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2478                         se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_op().c_str() );
2479                 return(-1);
2480                 }
2481
2482                 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2483                         if(se->get_aggr_ref() >= 0  && ! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2484                                 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2485                                   se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2486                                 return(-1);
2487                         }
2488                 }
2489
2490                 if(se->get_aggr_ref() >= 0)     // don't descent into aggregates.
2491                         return 1;
2492
2493                 operands = se->get_operands();
2494                 r_ret = 1;
2495                 for(o=0;o<operands.size();o++){
2496                         l_ret = verify_having_se(operands[o], clause, Ext_fcns);
2497                         if(l_ret < 0) r_ret = -1;
2498                 }
2499                 if(r_ret < 0) return(-1); else return(1);
2500                 return(1);
2501         default:
2502                 fprintf(stderr,"INTERNAL ERROR in verify_having_se, line %d, character %d: unknown operator type %d\n",
2503                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2504                 return(-1);
2505         }
2506         return(-1);
2507 }
2508
2509
2510 //                      Verify the HAVING predicate : it
2511 //                      can access gb vars, aggregates, and constants,
2512 //                      but not colrefs.
2513 //                      return 1 if OK, -1 if bad.
2514 //                      Perhaps replace by a pair of fcns which counts non-gb colrefs?
2515
2516
2517 int verify_having_pred(predicate_t *pr, const char *clause, ext_fcn_list *Ext_fcns){
2518         int l_ret, r_ret;
2519         vector<literal_t *> litl;
2520         vector<scalarexp_t *> op_list;
2521         int o;
2522
2523         switch(pr->get_operator_type()){
2524         case PRED_IN:
2525                 return(verify_having_se(pr->get_left_se(), clause, Ext_fcns));
2526         case PRED_COMPARE:
2527                 l_ret = verify_having_se(pr->get_left_se(), clause, Ext_fcns) ;
2528                 r_ret = verify_having_se(pr->get_right_se(), clause, Ext_fcns) ;
2529                 if( (l_ret < 0) || (r_ret < 0) ) return(-1); else return(1);
2530         case PRED_UNARY_OP:
2531                 return(verify_having_pred(pr->get_left_pr(), clause, Ext_fcns));
2532         case PRED_BINARY_OP:
2533                 l_ret = verify_having_pred(pr->get_left_pr(), clause, Ext_fcns);
2534                 r_ret = verify_having_pred(pr->get_right_pr(), clause, Ext_fcns);
2535                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
2536                 return(1);
2537         case PRED_FUNC:
2538                 op_list = pr->get_op_list();
2539                 l_ret = 1;
2540                 for(o=0;o<op_list.size();++o){
2541                         if( verify_having_se(op_list[o], clause, Ext_fcns) < 0) l_ret = -1;
2542                 }
2543                 return(l_ret);
2544
2545         default:
2546                 fprintf(stderr,"INTERNAL ERROR in verify_having_pred, line %d, character %d, unknown predicate operator type %d\n",
2547                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2548         }
2549
2550         return(-1);
2551 }
2552
2553
2554 //////////////////////////////////////////////////////////////////////////
2555 //////////////////////////////////////////////////////////////////////////
2556 ///////                 cnf and pred analysis and manipulation
2557
2558 // ----------------------------------------------------------------------
2559 //  Convert the predicates to a list of conjuncts
2560 //  (not actually cnf).  Do some analysis
2561 //  on their properties.
2562 // ----------------------------------------------------------------------
2563
2564
2565 //  Put into list clist the predicates that
2566 //  are AND'ed together.
2567
2568 void make_cnf_from_pr(predicate_t *pr, vector<cnf_elem *> &clist){
2569
2570   if(pr == NULL) return;
2571
2572   switch(pr->get_operator_type()){
2573      case PRED_COMPARE:
2574         clist.push_back(new cnf_elem(pr));
2575         return;
2576         break;
2577      case PRED_IN:
2578         clist.push_back(new cnf_elem(pr));
2579         return;
2580         break;
2581      case PRED_UNARY_OP:
2582         clist.push_back(new cnf_elem(pr));
2583         return;
2584         break;
2585      case PRED_BINARY_OP:
2586         if(pr->get_op() == "OR"){
2587                         clist.push_back(new cnf_elem(pr));
2588                         return;
2589                 }
2590                 if(pr->get_op() =="AND"){
2591                    make_cnf_from_pr(pr->get_left_pr(),clist);
2592                    make_cnf_from_pr(pr->get_right_pr(),clist);
2593                    return;
2594                 }
2595         case PRED_FUNC:
2596         clist.push_back(new cnf_elem(pr));
2597         return;
2598         break;
2599         default:
2600                 fprintf(stderr,"INTERNAL ERROR in make_cnf_from_pr: I don't recognize predicate operator %s\n",pr->get_op().c_str());
2601                 exit(1);
2602                         break;
2603            }
2604 }
2605
2606
2607
2608 //  Find out what things are referenced in a se,
2609 //  to use for analyzing a predicate.
2610 //  Currently, is it simple (no operators), does it
2611 //  reference a group-by column, does it reference an
2612 //  attribute of a table.
2613 //
2614 //      analyze_cnf_se and analyze_cnf_pr are called by analyze_cnf
2615
2616
2617 void analyze_cnf_se(scalarexp_t *se, int &s, int &g, int &a, int &agr){
2618  int p;
2619  vector<scalarexp_t *> operand_list;
2620
2621         switch(se->get_operator_type()){
2622         case SE_LITERAL:
2623         case SE_PARAM:
2624         case SE_IFACE_PARAM:
2625                 return;
2626         case SE_COLREF:
2627                 if(se->is_gb() ) g=1;
2628                 else                    a=1;
2629                 return;
2630         case SE_UNARY_OP:
2631                 s=0;
2632                 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2633                 return;
2634         case SE_BINARY_OP:
2635                 s=0;
2636                 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2637                 analyze_cnf_se(se->get_right_se(),s,g,a,agr);
2638                 return;
2639         case SE_AGGR_STAR:
2640         case SE_AGGR_SE:
2641                 agr = 1;
2642                 return;
2643         case SE_FUNC:
2644                 if(se->get_aggr_ref() >= 0){
2645                         agr = 1;
2646                         return;
2647                 }
2648                 s = 0;
2649                 operand_list = se->get_operands();
2650                 for(p=0;p<operand_list.size();p++){
2651                         analyze_cnf_se(operand_list[p],s,g,a,agr);
2652                 }
2653         break;
2654         }
2655
2656         return;
2657 }
2658
2659
2660
2661 void analyze_cnf_pr(predicate_t *pr, int &g, int &a,  int &agr){
2662 int dum_simple, o;
2663 vector<scalarexp_t *> op_list;
2664
2665
2666         switch(pr->get_operator_type()){
2667         case PRED_COMPARE:
2668                 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2669                 analyze_cnf_se(pr->get_right_se(),dum_simple,g,a,agr);
2670                 return;
2671         case PRED_IN:
2672                 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2673                 return;
2674         case PRED_UNARY_OP:
2675                 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2676                 return;
2677         case PRED_BINARY_OP:
2678                 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2679                 analyze_cnf_pr(pr->get_right_pr(),g,a,agr);
2680                 return;
2681         case PRED_FUNC:
2682                 op_list = pr->get_op_list();
2683                 for(o=0;o<op_list.size();++o){
2684                         analyze_cnf_se(op_list[o],dum_simple,g,a,agr);
2685                 }
2686                 return;
2687         default:
2688                 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2689                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2690                 exit(1);
2691         }
2692 }
2693
2694
2695
2696 //  analyze a conjunct of a predicate.
2697 //  Is it atomic (e.g., a single predicate),
2698 //  and if so do a further analysis.
2699
2700 void analyze_cnf(cnf_elem *c){
2701
2702 //  analyze the predicate.
2703    analyze_cnf_pr(c->pr, c->pr_gb, c->pr_attr, c->pr_aggr);
2704
2705    if((c->pr->get_operator_type()!= PRED_COMPARE) && (c->pr->get_operator_type()!= PRED_IN)){
2706                 return;
2707    }
2708
2709
2710 //  its an atomic predicate -- get more info
2711    c->is_atom = 1;
2712
2713         if(c->pr->get_op() == "=")
2714                 c->eq_pred = 1;
2715         else
2716                 c->eq_pred = 0;
2717
2718         if(c->pr->get_operator_type() == PRED_IN)
2719                 c->in_pred = 1;
2720         else
2721                 c->in_pred = 0;
2722
2723         c->l_simple = 1; c->l_gb = c->l_attr = c->l_aggr = 0;
2724         analyze_cnf_se(c->pr->get_left_se(),c->l_simple,c->l_gb,c->l_attr, c->l_aggr);
2725
2726         if(c->pr->get_operator_type() == PRED_COMPARE){
2727                 c->r_simple = 1; c->r_gb = c->r_attr = c->r_aggr = 0;
2728                 analyze_cnf_se(c->pr->get_left_se(),c->r_simple,c->r_gb,c->r_attr, c->r_aggr);
2729         }
2730 }
2731
2732 void analyze_constraint_se(scalarexp_t *se,
2733                         int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op, ext_fcn_list *Ext_fcns, bool enter_gb){
2734  int l_agr, l_gb, l_par, l_func, l_op;
2735  int r_agr, r_gb, r_par, r_func, r_op;
2736  int p;
2737  vector<scalarexp_t *> operand_list;
2738
2739         switch(se->get_operator_type()){
2740         case SE_LITERAL:
2741         case SE_IFACE_PARAM:
2742                 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2743                 return;
2744         case SE_PARAM:
2745                 n_agr=0; n_gb = 0; n_par = 1; n_func = 0; n_op = 0;
2746                 return;
2747         case SE_COLREF:
2748                 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2749                 if(se->is_gb() ){
2750                         if(enter_gb){
2751                                 analyze_constraint_se(se->get_right_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2752                         }else{
2753                                 n_gb=1;
2754                         }
2755                 }
2756                 return;
2757         case SE_UNARY_OP:
2758                 analyze_constraint_se(se->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2759                 n_op++;
2760                 return;
2761         case SE_BINARY_OP:
2762                 analyze_constraint_se(se->get_left_se(),l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2763                 analyze_constraint_se(se->get_right_se(),r_agr,r_gb,r_par, r_func,r_op,Ext_fcns,enter_gb);
2764                 n_agr=l_agr+r_agr;
2765                 n_gb=l_gb+r_gb;
2766                 n_par=l_par+r_par;
2767                 n_func=l_func+r_func;
2768                 n_op=l_op+r_op+1;
2769                 return;
2770         case SE_AGGR_STAR:
2771         case SE_AGGR_SE:
2772                 n_agr=1; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2773                 return;
2774         case SE_FUNC:
2775                 if(se->get_aggr_ref() >= 0){
2776                         n_agr=1; n_gb = 0; n_par = 0; n_op = 0;
2777                         if(Ext_fcns)
2778                                 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2779                         else
2780                                 n_func = 1;
2781                         return;
2782                 }
2783                 n_agr=0; n_gb = 0; n_par = 0;  n_op = 0;
2784                 if(Ext_fcns)
2785                         n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2786                 else
2787                         n_func = 1;
2788                 operand_list = se->get_operands();
2789                 for(p=0;p<operand_list.size();p++){
2790                         analyze_constraint_se(operand_list[p],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2791                         n_agr+=l_agr;
2792                         n_gb+=l_gb;
2793                         n_par+=l_par;
2794                         n_func+=l_func;
2795                         n_op += l_op;
2796                 }
2797         break;
2798         }
2799
2800         return;
2801 }
2802
2803 //              Estimate the cost of a constraint.
2804 //              WARNING a lot of cost assumptions are embedded in the code.
2805 void analyze_constraint_pr(predicate_t *pr,
2806                 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op,
2807                 int &n_cmp_s, int &n_cmp_c, int &n_in, int &n_pred, int &n_bool, ext_fcn_list *Ext_fcns, bool enter_gb){
2808  int l_agr, l_gb, l_par, l_func, l_op, l_cmp_s, l_cmp_c, l_in, l_pred,l_bool;
2809  int r_agr, r_gb, r_par, r_func, r_op, r_cmp_s, r_cmp_c, r_in, r_pred,r_bool;
2810
2811 int o;
2812 vector<scalarexp_t *> op_list;
2813
2814
2815         switch(pr->get_operator_type()){
2816         case PRED_COMPARE:
2817                 analyze_constraint_se(pr->get_left_se(),l_agr,l_gb,l_par,l_func, l_op,Ext_fcns,enter_gb);
2818                 analyze_constraint_se(pr->get_right_se(),r_agr,r_gb,r_par,r_func,r_op,Ext_fcns,enter_gb);
2819                 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2820                 n_func=l_func+r_func; n_op=l_op+r_op;
2821                 if(pr->get_left_se()->get_data_type()->complex_comparison(
2822                         pr->get_right_se()->get_data_type())
2823             ){
2824                         n_cmp_s = 0; n_cmp_c=1;
2825                 }else{
2826                         n_cmp_s = 1; n_cmp_c=0;
2827                 }
2828                 n_in = 0; n_pred = 0; n_bool = 0;
2829                 return;
2830         case PRED_IN:
2831 //                      Tread IN predicate as sequence of comparisons
2832                 analyze_constraint_se(pr->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2833                 if(pr->get_left_se()->get_data_type()->complex_comparison(
2834                         pr->get_right_se()->get_data_type())
2835             ){
2836                         n_cmp_s = 0; n_cmp_c=pr->get_lit_vec().size();
2837                 }else{
2838                         n_cmp_s = pr->get_lit_vec().size(); n_cmp_c=0;
2839                 }
2840                 n_in = 0; n_pred = 0; n_bool = 0;
2841                 return;
2842         case PRED_UNARY_OP:
2843                 analyze_constraint_pr(pr->get_left_pr(),n_agr,n_gb,n_par,n_func,n_op,n_cmp_s,n_cmp_c,n_in,n_pred,n_bool,Ext_fcns,enter_gb);
2844                 n_bool++;
2845                 return;
2846         case PRED_BINARY_OP:
2847                 analyze_constraint_pr(pr->get_left_pr(),l_agr,l_gb,l_par,l_func,l_op,l_cmp_s,l_cmp_c,l_in,l_pred,l_bool,Ext_fcns,enter_gb);
2848                 analyze_constraint_pr(pr->get_right_pr(),r_agr,r_gb,r_par,r_func,r_op,r_cmp_s,r_cmp_c,r_in,r_pred,r_bool,Ext_fcns,enter_gb);
2849                 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2850                 n_func=l_func+r_func; n_op=l_op+r_op;
2851                 n_cmp_s=l_cmp_s+r_cmp_s; n_cmp_c=l_cmp_c+r_cmp_c;
2852                 n_in=l_in+r_in; n_pred=l_pred+r_pred; n_bool=l_bool+r_bool+1;
2853                 return;
2854         case PRED_FUNC:
2855                 n_agr=n_gb=n_par=n_func=n_op=n_cmp_s=n_cmp_c=n_in=n_bool=0;
2856                 if(Ext_fcns)
2857                         n_pred = Ext_fcns->estimate_fcn_cost(pr->get_fcn_id());
2858                 else
2859                         n_pred = 1;
2860                 op_list = pr->get_op_list();
2861                 for(o=0;o<op_list.size();++o){
2862                         analyze_constraint_se(op_list[o],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2863                         n_agr+=l_agr; n_gb+=l_gb; n_par+=l_par; n_func+=l_func; n_op+=l_op;
2864                 }
2865                 return;
2866         default:
2867                 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2868                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2869                 exit(1);
2870         }
2871 }
2872
2873 void compute_cnf_cost(cnf_elem *c, ext_fcn_list *Ext_fcns){
2874  int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2875         analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2876                                                 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,false);
2877
2878 //printf("nfunc=%d n_pred=%d, n_cmp_c=%d, n_op=%d, n_cmp_s=%d,n_bool=%d\n", n_func, n_pred, n_cmp_c, n_op, n_cmp_s, n_bool);
2879         c->cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2880 }
2881
2882 bool prefilter_compatible(cnf_elem *c, ext_fcn_list *Ext_fcns){
2883  int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2884         analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2885                                                 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,true);
2886 //printf("prefilter_compatible, n_par=%d, n_gb=%d, n_agr=%d, n_func=%d, n_pred=%d, n_comp_c=%d, n_cmp_s=%d, n_bool=%d\n",n_gb,n_par,n_agr,n_func,n_pred,n_cmp_c,n_cmp_s,n_bool);
2887         if(n_par || n_agr)
2888                 return false;
2889         int cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2890 //printf("cost=%d\n",cost);
2891         return cost<10;
2892 }
2893
2894 //              The prefilter needs to translate constraints on
2895 //              gbvars into constraints involving their underlying SEs.
2896 //              The following two routines attach GB def info.
2897
2898 void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl){
2899  int p;
2900  vector<scalarexp_t *> operand_list;
2901
2902         switch(se->get_operator_type()){
2903         case SE_LITERAL:
2904         case SE_IFACE_PARAM:
2905         case SE_PARAM:
2906         case SE_AGGR_STAR:
2907                 return;
2908         case SE_COLREF:
2909                 if(se->is_gb() ){
2910                          se->rhs.scalarp = gtbl->get_def(se->get_gb_ref());
2911                 }
2912                 return;
2913         case SE_UNARY_OP:
2914                 insert_gb_def_se(se->get_left_se(),gtbl);
2915                 return;
2916         case SE_BINARY_OP:
2917                 insert_gb_def_se(se->get_left_se(),gtbl);
2918                 insert_gb_def_se(se->get_right_se(),gtbl);
2919                 return;
2920         case SE_AGGR_SE:
2921                 insert_gb_def_se(se->get_left_se(),gtbl);
2922                 return;
2923         case SE_FUNC:
2924                 operand_list = se->get_operands();
2925                 for(p=0;p<operand_list.size();p++){
2926                         insert_gb_def_se(operand_list[p],gtbl);
2927                 }
2928         break;
2929         }
2930
2931         return;
2932 }
2933 void insert_gb_def_pr(predicate_t *pr, gb_table *gtbl){
2934 vector<scalarexp_t *> op_list;
2935 int o;
2936
2937         switch(pr->get_operator_type()){
2938         case PRED_COMPARE:
2939                 insert_gb_def_se(pr->get_left_se(),gtbl);
2940                 insert_gb_def_se(pr->get_right_se(),gtbl);
2941                 return;
2942         case PRED_IN:
2943                 insert_gb_def_se(pr->get_left_se(),gtbl);
2944                 return;
2945         case PRED_UNARY_OP:
2946                 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2947                 return;
2948         case PRED_BINARY_OP:
2949                 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2950                 insert_gb_def_pr(pr->get_right_pr(),gtbl);
2951                 return;
2952         case PRED_FUNC:
2953                 op_list = pr->get_op_list();
2954                 for(o=0;o<op_list.size();++o){
2955                         insert_gb_def_se(op_list[o],gtbl);
2956                 }
2957                 return;
2958         default:
2959                 fprintf(stderr,"INTERNAL ERROR in insert_gb_def_pr, line %d, character %d, unknown predicate operator type %d\n",
2960                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2961                 exit(1);
2962         }
2963 }
2964
2965 //              Substitute gbrefs with their definitions
2966 void subs_gbrefs_se(scalarexp_t *se, table_list *Schema){
2967  int p;
2968  vector<scalarexp_t *> operand_list;
2969  scalarexp_t *lse,*rse;
2970  colref_t *cr;
2971  string b_tbl;
2972  int b_idx;
2973
2974         switch(se->get_operator_type()){
2975         case SE_LITERAL:
2976         case SE_IFACE_PARAM:
2977         case SE_PARAM:
2978         case SE_AGGR_STAR:
2979                 return;
2980         case SE_COLREF:
2981                 cr = se->get_colref();
2982                 b_tbl = Schema->get_basetbl_name(cr->schema_ref,cr->field);
2983                 b_idx = Schema->get_table_ref(b_tbl);
2984                 cr->tablevar_ref = b_idx;
2985                 return;
2986         case SE_UNARY_OP:
2987                 lse=se->get_left_se();
2988                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2989                         se->lhs.scalarp = lse->get_right_se();
2990                         subs_gbrefs_se(se,Schema);
2991                         return;
2992                 }
2993                 subs_gbrefs_se(se->get_left_se(),Schema);
2994                 return;
2995         case SE_BINARY_OP:
2996                 lse=se->get_left_se();
2997                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2998                         se->lhs.scalarp = lse->get_right_se();
2999                         subs_gbrefs_se(se,Schema);
3000                         return;
3001                 }
3002                 rse=se->get_right_se();
3003                 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3004                         se->rhs.scalarp = rse->get_right_se();
3005                         subs_gbrefs_se(se,Schema);
3006                         return;
3007                 }
3008                 subs_gbrefs_se(se->get_left_se(),Schema);
3009                 subs_gbrefs_se(se->get_right_se(),Schema);
3010                 return;
3011         case SE_AGGR_SE:
3012                 lse=se->get_left_se();
3013                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3014                         se->lhs.scalarp = lse->get_right_se();
3015                         subs_gbrefs_se(se,Schema);
3016                         return;
3017                 }
3018                 subs_gbrefs_se(se->get_left_se(),Schema);
3019                 return;
3020         case SE_FUNC:
3021                 operand_list = se->get_operands();
3022                 for(p=0;p<operand_list.size();p++){
3023                         lse=operand_list[p];
3024                         if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3025                                 se->param_list[p] = lse->get_right_se();
3026                                 subs_gbrefs_se(se,Schema);
3027                                 return;
3028                         }
3029                 }
3030                 for(p=0;p<operand_list.size();p++){
3031                         subs_gbrefs_se(operand_list[p],Schema);
3032                 }
3033         break;
3034         }
3035
3036         return;
3037 }
3038
3039 void subs_gbrefs_pr(predicate_t *pr, table_list *Schema){
3040 vector<scalarexp_t *> op_list;
3041 int o;
3042 scalarexp_t *lse,*rse;
3043
3044         switch(pr->get_operator_type()){
3045         case PRED_COMPARE:
3046                 lse=pr->get_left_se();
3047                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3048                         pr->lhs.sexp = lse->get_right_se();
3049                         subs_gbrefs_pr(pr,Schema);
3050                         return;
3051                 }
3052                 rse=pr->get_right_se();
3053                 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3054                         pr->rhs.sexp = rse->get_right_se();
3055                         subs_gbrefs_pr(pr,Schema);
3056                         return;
3057                 }
3058                 subs_gbrefs_se(pr->get_left_se(),Schema);
3059                 subs_gbrefs_se(pr->get_right_se(),Schema);
3060                 return;
3061         case PRED_IN:
3062                 lse=pr->get_left_se();
3063                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3064                         pr->lhs.sexp = lse->get_right_se();
3065                         subs_gbrefs_pr(pr,Schema);
3066                         return;
3067                 }
3068                 subs_gbrefs_se(pr->get_left_se(),Schema);
3069                 return;
3070         case PRED_UNARY_OP:
3071                 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3072                 return;
3073         case PRED_BINARY_OP:
3074                 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3075                 subs_gbrefs_pr(pr->get_right_pr(),Schema);
3076                 return;
3077         case PRED_FUNC:
3078                 op_list = pr->get_op_list();
3079                 for(o=0;o<op_list.size();++o){
3080                         lse=op_list[o];
3081                         if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3082                                 pr->param_list[o] = lse->get_right_se();
3083                                 subs_gbrefs_pr(pr,Schema);
3084                                 return;
3085                         }
3086                         subs_gbrefs_se(op_list[o],Schema);
3087                 }
3088                 return;
3089         default:
3090                 fprintf(stderr,"INTERNAL ERROR in subs_gbrefs_pr, line %d, character %d, unknown predicate operator type %d\n",
3091                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3092                 exit(1);
3093         }
3094 }
3095
3096
3097 //              Search for references to "expensive" fields.
3098 int expensive_refs_se(scalarexp_t *se, table_list *Schema){
3099  int p;
3100  vector<scalarexp_t *> operand_list;
3101  int cnt=0;
3102 table_def *td;
3103 param_list *plist;
3104
3105         switch(se->get_operator_type()){
3106         case SE_LITERAL:
3107         case SE_IFACE_PARAM:
3108         case SE_PARAM:
3109         case SE_AGGR_STAR:
3110         case SE_AGGR_SE:
3111                 return 0;
3112         case SE_COLREF:
3113                 if(se->is_gb())
3114                         return expensive_refs_se(se->rhs.scalarp,Schema);
3115                 td = Schema->get_table(se->lhs.colref->schema_ref);
3116                 plist = td->get_modifier_list(se->lhs.colref->field);
3117                 if(plist->contains_key("expensive"))
3118                         return 1;
3119                 return 0;
3120         case SE_UNARY_OP:
3121                 return expensive_refs_se(se->get_left_se(),Schema);
3122         case SE_BINARY_OP:
3123                 cnt += expensive_refs_se(se->get_left_se(),Schema);
3124                 cnt += expensive_refs_se(se->get_right_se(),Schema);
3125                 return cnt;
3126         case SE_FUNC:
3127                 operand_list = se->get_operands();
3128                 for(p=0;p<operand_list.size();p++){
3129                         cnt += expensive_refs_se(operand_list[p],Schema);
3130                 }
3131                 return cnt;
3132         break;
3133         }
3134
3135         return 0;
3136 }
3137
3138 int expensive_refs_pr(predicate_t *pr, table_list *Schema){
3139 vector<scalarexp_t *> op_list;
3140 int o;
3141 int cnt=0;
3142
3143         switch(pr->get_operator_type()){
3144         case PRED_COMPARE:
3145                 cnt += expensive_refs_se(pr->get_left_se(),Schema);
3146                 cnt += expensive_refs_se(pr->get_right_se(),Schema);
3147                 return cnt;
3148         case PRED_IN:
3149                 return expensive_refs_se(pr->get_left_se(),Schema);
3150         case PRED_UNARY_OP:
3151                 return expensive_refs_pr(pr->get_left_pr(),Schema);
3152         case PRED_BINARY_OP:
3153                 cnt += expensive_refs_pr(pr->get_left_pr(),Schema);
3154                 cnt += expensive_refs_pr(pr->get_right_pr(),Schema);
3155                 return cnt;
3156         case PRED_FUNC:
3157                 op_list = pr->get_op_list();
3158                 for(o=0;o<op_list.size();++o){
3159                         cnt += expensive_refs_se(op_list[o],Schema);
3160                 }
3161                 return cnt;
3162         default:
3163                 fprintf(stderr,"INTERNAL ERROR in expensive_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3164                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3165                 exit(1);
3166         }
3167 }
3168
3169
3170 //              TODO: allow "cheap" functions and predicates.
3171 bool simple_field_constraint(cnf_elem *c){
3172         vector<literal_t *> ll;
3173         int l;
3174         predicate_t *p = c->pr;
3175  int l_agr, l_gb, l_par, l_func, l_op;
3176  int r_agr, r_gb, r_par, r_func, r_op;
3177  col_id_set left_colids, right_colids;
3178
3179 //                      Verify that it is a simple atom
3180         switch(p->get_operator_type()){
3181         case PRED_COMPARE:
3182 //                              Must be an equality predicate which references
3183 //                              which referecnes no aggregates, parameters, functions, or
3184 //                              group-by variables, and should be a constraint of
3185 //                              a single colref.
3186 //                              AND should not require a complex comparison.
3187                 if(p->get_op() != "=") return(false);
3188                 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3189                 analyze_constraint_se(p->get_right_se(),r_agr, r_gb, r_par, r_func,l_op,NULL,false);
3190                 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ||
3191                    r_agr>0 || r_gb>0 || r_par>0 || r_func>0 ) return(false);
3192 //                              I will count on there being no gbvars in the constraint.
3193 //                              TODO: allow gbvars which are colrefs.
3194                 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3195                 gather_se_col_ids(p->get_right_se(), right_colids, NULL);
3196                 if(left_colids.size()+right_colids.size() != 1) return(false);
3197
3198
3199 //                      Normalize : the colref should be on the lhs.
3200                 if(right_colids.size() > 0){
3201                         p->swap_scalar_operands();
3202                 }
3203
3204 //                      Disallow complex (and therefore expensive) comparisons.
3205                 if(p->get_left_se()->get_data_type()->complex_comparison(
3206                         p->get_right_se()->get_data_type() ) )
3207                                 return(false);
3208
3209 //                      passed all the tests.
3210                 return(true);
3211         case PRED_IN:
3212 //                      LHS must be a non-gbvar colref.
3213                 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3214                 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ) return(false);
3215 //                              I will count on there being no gbvars in the constraint.
3216 //                              TODO: allow gbvars which are colrefs.
3217                 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3218                 if(left_colids.size() != 1) return(false);
3219 //                      Disallow complex (and therefore expensive) comparisons.
3220                 if(p->get_left_se()->get_data_type()->complex_comparison(
3221                         p->get_left_se()->get_data_type() ) )
3222                                 return(false);
3223
3224
3225 //                      All entries in the IN list must be literals
3226 //                      Currently, this is the only possibility.
3227                 return(true);
3228                 break;
3229         case PRED_UNARY_OP:
3230                 return(false);
3231         case PRED_BINARY_OP:
3232                 return(false);
3233         case PRED_FUNC:
3234                 return(false);
3235         default:
3236                 fprintf(stderr,"INTERNAL ERROR in simple_field_cosntraint, line %d, character %d, unknown predicate operator type %d\n",
3237                         p->get_lineno(), p->get_charno(), p->get_operator_type() );
3238                 exit(1);
3239         }
3240
3241         return(false);
3242 }
3243
3244 //              As the name implies, return the colref constrained by the
3245 //              cnf elem.  I will be counting on the LHS being a SE pointing
3246 //              to a colref.
3247
3248 //                      This fcn assumes that in fact exactly
3249 //                      one colref is constrained.
3250 colref_t *get_constrained_colref(scalarexp_t *se){
3251  int p;
3252  vector<scalarexp_t *> operand_list;
3253 colref_t *ret;
3254
3255         switch(se->get_operator_type()){
3256         case SE_LITERAL:
3257                 return(NULL);
3258         case SE_PARAM:
3259         case SE_IFACE_PARAM:
3260                 return(NULL);
3261         case SE_COLREF:
3262                 return(se->get_colref());
3263         case SE_UNARY_OP:
3264                 return(get_constrained_colref(se->get_left_se()));
3265         case SE_BINARY_OP:
3266                 ret=get_constrained_colref(se->get_left_se());
3267                 if(ret == NULL) return(get_constrained_colref(se->get_right_se()));
3268                 else return ret;
3269         case SE_AGGR_STAR:
3270         case SE_AGGR_SE:
3271                 return(NULL);
3272         case SE_FUNC:
3273                 if(se->get_aggr_ref() >= 0) return NULL;
3274
3275                 operand_list = se->get_operands();
3276                 for(p=0;p<operand_list.size();p++){
3277                         ret=get_constrained_colref(operand_list[p]);
3278                         if(ret != NULL) return(ret);
3279
3280                 }
3281                 return(NULL);
3282         break;
3283         }
3284
3285         return(NULL);
3286 }
3287
3288
3289 colref_t *get_constrained_colref(predicate_t *p){
3290         return(get_constrained_colref(p->get_left_se()));
3291 }
3292 colref_t *get_constrained_colref(cnf_elem *c){
3293         return get_constrained_colref(c->pr->get_left_se());
3294 }
3295
3296
3297
3298
3299 /*
3300 void add_colref_constraint_to_cnf(cnf_elem *dst, predicate_t *src_p,
3301                                                         string target_fld, string target_tbl, int tblref){
3302
3303 //                      Make a copy of the predicate to be added.
3304 //                      ASSUME no aggregates.
3305         predicate_t *pr = dup_pr(src_p,NULL);
3306
3307 //                      Modify the ref to the base table.
3308 //                      ASSUME lhs is the colref
3309         pr->get_left_se()->get_colref()->set_table_name(target_tbl);
3310         pr->get_left_se()->get_colref()->set_table_ref(tblref);
3311
3312         if(dst->pr == NULL) dst->pr = pr;
3313         else dst->pr = new predicate_t("OR", dst->pr, pr);
3314
3315 }
3316 */
3317
3318
3319 //////////////////////////////////////////////////////
3320 ///////////////         Represent a node in a predicate tree
3321 struct common_pred_node{
3322         set<int> lftas;
3323         predicate_t *pr;
3324         vector<predicate_t *> predecessor_preds;
3325         vector<common_pred_node *> children;
3326
3327         string target_tbl;
3328         string target_fld;
3329         int target_ref;
3330
3331         common_pred_node(){
3332                 pr = NULL;
3333         }
3334 };
3335
3336
3337 predicate_t *make_common_pred(common_pred_node *pn){
3338   int n;
3339
3340         if(pn->children.size() == 0){
3341                 if(pn->pr == NULL){
3342                         fprintf(stderr,"INTERNAL ERROR in make_common_pred, pred node ahs no children and no predicate.\n");
3343                         exit(1);
3344                 }
3345                 return( dup_pr(pn->pr,NULL) );
3346         }
3347
3348         predicate_t *curr_pr = make_common_pred( pn->children[0] );
3349     for(n=1;n<pn->children.size();++n){
3350                 curr_pr = new predicate_t("OR", make_common_pred(pn->children[n]),curr_pr);
3351         }
3352
3353         if(pn->pr != NULL)
3354                 curr_pr = new predicate_t("AND", dup_pr(pn->pr,NULL), curr_pr);
3355
3356         return(curr_pr);
3357 }
3358
3359
3360 bool operator<(const cnf_set &c1, const cnf_set &c2){
3361         if(c1.lfta_id.size() < c2.lfta_id.size())
3362                 return true;
3363         return false;
3364 }
3365
3366
3367 //              Compute the predicates for the prefilter.
3368 //              the prefilter preds are returned in prefilter_preds.
3369 //              pred_ids is the set of predicates used in the prefilter.
3370 //              the encoding is the lfta index, in the top 16 bits,
3371 //              then the index of the cnf element in the bottom 16 bits.
3372 //              This set of for identifying which preds do not need
3373 //              to be generated in the lftas.
3374 void find_common_filter(vector< vector<cnf_elem *> > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, vector<cnf_set *> &prefilter_preds, set<unsigned int > &pred_ids){
3375         int p, p2, l, c;
3376
3377         vector<cnf_set *> pred_list, sort_list;
3378
3379 //              Create list of tagged, prefilter-safe CNFs.
3380         for(l=0;l<where_list.size();++l){
3381                 for(c=0;c<where_list[l].size();++c){
3382                         if(prefilter_compatible(where_list[l][c],Ext_fcns)){
3383                                 if(expensive_refs_pr(where_list[l][c]->pr,Schema)==0)
3384                                         pred_list.push_back(new cnf_set(where_list[l][c]->pr,l,c));
3385                         }
3386                 }
3387         }
3388
3389 //              Eliminate duplicates
3390         for(p=0;p<pred_list.size();++p){
3391                 if(pred_list[p]){
3392                         for(p2=p+1;p2<pred_list.size();++p2){
3393                                 if(pred_list[p2]){
3394                                         if(is_equivalent_pred_base(pred_list[p]->pr, pred_list[p2]->pr,Schema)){
3395                                                 pred_list[p]->subsume(pred_list[p2]);
3396                                                 delete pred_list[p2];
3397                                                 pred_list[p2] = NULL;
3398                                         }
3399                                 }
3400                         }
3401                 }
3402         }
3403
3404 //              combine preds that occur in the exact same lftas.
3405         for(p=0;p<pred_list.size();++p){
3406                 if(pred_list[p]){
3407                         for(p2=p+1;p2<pred_list.size();++p2){
3408                                 if(pred_list[p2]){
3409                                         if(pred_list[p]->lfta_id == pred_list[p2]->lfta_id){
3410                                                 pred_list[p]->combine_pred(pred_list[p2]);
3411                                                 delete pred_list[p2];
3412                                                 pred_list[p2] = NULL;
3413                                         }
3414                                 }
3415                         }
3416                 }
3417         }
3418
3419 //              Compress the list
3420         for(p=0;p<pred_list.size();++p){
3421                 if(pred_list[p]){
3422                         sort_list.push_back(pred_list[p]);
3423                 }
3424         }
3425 //              Sort it
3426         sort(sort_list.begin(), sort_list.end(),compare_cnf_set());
3427
3428 //              Return the top preds, up to 64 of them.
3429         for(p=0;p<sort_list.size() && p<64;p++){
3430                 prefilter_preds.push_back(sort_list[p]);
3431                 sort_list[p]->add_pred_ids(pred_ids);
3432         }
3433
3434 //              Substitute gb refs with their defs
3435 //              While I'm at it, substitute base table sch ref for tblref.
3436         for(p=0;p<prefilter_preds.size() ;p++){
3437                 subs_gbrefs_pr(prefilter_preds[p]->pr,Schema);
3438         }
3439
3440 }
3441
3442
3443
3444
3445
3446 ///////////////////////////////////////////////////////////////////////////
3447 //////////////////////////////////////////////////////////////////////////
3448
3449 //              Find partial functions and register them.
3450 //              Do a DFS so that nested partial fcn calls
3451 //              get evaluated in the right order.
3452 //              Don't drill down into aggregates -- their arguments are evaluated
3453 //              earlier than the select list is.
3454 //
3455 //              Modification for function caching:
3456 //              Pass in a ref counter, and partial fcn indicator.
3457 //              Cache fcns ref'd at least once.
3458 //              pass in NULL for fcn_ref_cnt to turn off fcn caching analysis
3459
3460
3461 void find_partial_fcns(scalarexp_t *se, vector<scalarexp_t *> *pf_list,
3462                 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3463                 ext_fcn_list *Ext_fcns){
3464         vector<scalarexp_t *> operands;
3465         int o, f;
3466
3467         if(se == NULL) return;
3468
3469         switch(se->get_operator_type()){
3470         case SE_LITERAL:
3471         case SE_PARAM:
3472         case SE_IFACE_PARAM:
3473                 return;
3474         case SE_UNARY_OP:
3475                 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3476                 return;
3477         case SE_BINARY_OP:
3478                 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3479                 find_partial_fcns(se->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3480                 return;
3481         case SE_COLREF:
3482                 return;
3483         case SE_AGGR_STAR:
3484                 return;
3485         case SE_AGGR_SE:
3486 //              find_partial_fcns(se->get_left_se(), pf_list, Ext_fcns) ;
3487                 return;
3488         case SE_FUNC:
3489                 if(se->get_aggr_ref() >= 0) return;
3490
3491                 operands = se->get_operands();
3492                 for(o=0;o<operands.size();o++){
3493                         find_partial_fcns(operands[o], pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3494                 }
3495
3496                 if(Ext_fcns->is_partial(se->get_fcn_id()) || Ext_fcns->get_fcn_cost(se->get_fcn_id()) >= COST_HIGH){
3497                         if(fcn_ref_cnt){
3498                           for(f=0;f<pf_list->size();++f){
3499                                 if(is_equivalent_se(se,(*pf_list)[f])){
3500                                         se->set_partial_ref(f);
3501                                         (*fcn_ref_cnt)[f]++;
3502                                         break;
3503                                 }
3504                           }
3505                         }else{
3506                                 f=pf_list->size();
3507                         }
3508                         if(f==pf_list->size() && (Ext_fcns->is_partial(se->get_fcn_id()) ||  fcn_ref_cnt)){
3509                                 se->set_partial_ref(pf_list->size());
3510                                 pf_list->push_back(se);
3511                                 if(fcn_ref_cnt){
3512                                         fcn_ref_cnt->push_back(1);
3513                                         is_partial_fcn->push_back(Ext_fcns->is_partial(se->get_fcn_id()));
3514                                 }
3515                         }
3516                 }
3517                 return;
3518         default:
3519                 fprintf(stderr,"INTERNAL ERROR in find_partial_fcns, line %d, character %d: unknown operator type %d\n",
3520                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3521                 exit(1);
3522         }
3523         return;
3524 }
3525
3526
3527 void find_partial_fcns_pr(predicate_t *pr,  vector<scalarexp_t *> *pf_list,
3528                 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3529                                                                         ext_fcn_list *Ext_fcns){
3530         vector<literal_t *> litl;
3531         vector<scalarexp_t *> op_list;
3532         int o;
3533
3534         switch(pr->get_operator_type()){
3535         case PRED_IN:
3536                 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3537                 return;
3538         case PRED_COMPARE:
3539                 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3540                 find_partial_fcns(pr->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3541                 return;
3542         case PRED_UNARY_OP:
3543                 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3544                 return;
3545         case PRED_BINARY_OP:
3546                 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3547                 find_partial_fcns_pr(pr->get_right_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3548                 return;
3549         case PRED_FUNC:
3550                 op_list = pr->get_op_list();
3551                 for(o=0;o<op_list.size();++o){
3552                         find_partial_fcns(op_list[o],pf_list,fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3553                 }
3554                 return;
3555         default:
3556                 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3557                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3558                 exit(1);
3559         }
3560
3561         return;
3562 }
3563
3564
3565
3566 void find_combinable_preds(predicate_t *pr,  vector<predicate_t *> *pr_list,
3567                                                                 table_list *Schema, ext_fcn_list *Ext_fcns){
3568         vector<literal_t *> litl;
3569         vector<scalarexp_t *> op_list;
3570         int f,o;
3571
3572         switch(pr->get_operator_type()){
3573         case PRED_IN:
3574                 return;
3575         case PRED_COMPARE:
3576                 return;
3577         case PRED_UNARY_OP:
3578                 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns);
3579                 return;
3580         case PRED_BINARY_OP:
3581                 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns) ;
3582                 find_combinable_preds(pr->get_right_pr(), pr_list, Schema, Ext_fcns) ;
3583                 return;
3584         case PRED_FUNC:
3585                 if(Ext_fcns->is_combinable(pr->get_fcn_id())){
3586                   for(f=0;f<pr_list->size();++f){
3587                         if(is_equivalent_pred_base(pr,(*pr_list)[f],Schema)){
3588                                 pr->set_combinable_ref(f);
3589                                 break;
3590                         }
3591                   }
3592                   if(f == pr_list->size()){
3593                         pr->set_combinable_ref(pr_list->size());
3594                         pr_list->push_back(pr);
3595                   }
3596                 }
3597                 return;
3598         default:
3599                 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3600                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3601                 exit(1);
3602         }
3603
3604         return;
3605 }
3606
3607
3608 //--------------------------------------------------------------------
3609 //              Collect refs to aggregates.
3610
3611
3612 void collect_agg_refs(scalarexp_t *se, set<int> &agg_refs){
3613         vector<scalarexp_t *> operands;
3614         int o;
3615
3616         if(se == NULL) return;
3617
3618         switch(se->get_operator_type()){
3619         case SE_LITERAL:
3620         case SE_PARAM:
3621         case SE_IFACE_PARAM:
3622                 return;
3623         case SE_UNARY_OP:
3624                 collect_agg_refs(se->get_left_se(), agg_refs) ;
3625                 return;
3626         case SE_BINARY_OP:
3627                 collect_agg_refs(se->get_left_se(), agg_refs);
3628                 collect_agg_refs(se->get_right_se(), agg_refs);
3629                 return;
3630         case SE_COLREF:
3631                 return;
3632         case SE_AGGR_STAR:
3633         case SE_AGGR_SE:
3634                 agg_refs.insert(se->get_aggr_ref());
3635                 return;
3636         case SE_FUNC:
3637                 if(se->get_aggr_ref() >= 0) agg_refs.insert(se->get_aggr_ref());
3638
3639                 operands = se->get_operands();
3640                 for(o=0;o<operands.size();o++){
3641                         collect_agg_refs(operands[o], agg_refs);
3642                 }
3643
3644                 return;
3645         default:
3646                 fprintf(stderr,"INTERNAL ERROR in collect_agg_refs, line %d, character %d: unknown operator type %d\n",
3647                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3648                 exit(1);
3649         }
3650         return;
3651 }
3652
3653
3654 void collect_aggr_refs_pr(predicate_t *pr,  set<int> &agg_refs){
3655         vector<literal_t *> litl;
3656         vector<scalarexp_t *> op_list;
3657         int o;
3658
3659         switch(pr->get_operator_type()){
3660         case PRED_IN:
3661                 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3662                 return;
3663         case PRED_COMPARE:
3664                 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3665                 collect_agg_refs(pr->get_right_se(), agg_refs) ;
3666                 return;
3667         case PRED_UNARY_OP:
3668                 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs);
3669                 return;
3670         case PRED_BINARY_OP:
3671                 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs) ;
3672                 collect_aggr_refs_pr(pr->get_right_pr(), agg_refs) ;
3673                 return;
3674         case PRED_FUNC:
3675                 op_list = pr->get_op_list();
3676                 for(o=0;o<op_list.size();++o){
3677                         collect_agg_refs(op_list[o],agg_refs);
3678                 }
3679                 return;
3680         default:
3681                 fprintf(stderr,"INTERNAL ERROR in collect_aggr_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3682                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3683                 exit(1);
3684         }
3685
3686         return;
3687 }
3688
3689
3690 //--------------------------------------------------------------------
3691 //              Collect previously registered partial fcn refs.
3692 //              Do a DFS so that nested partial fcn calls
3693 //              get evaluated in the right order.
3694 //              Don't drill down into aggregates -- their arguments are evaluated
3695 //              earlier than the select list is.
3696 //              ------------->>> THEN WHY AM I DRILLING DOWN INTO AGGREGATES?
3697
3698 void collect_partial_fcns(scalarexp_t *se, set<int> &pfcn_refs){
3699         vector<scalarexp_t *> operands;
3700         int o;
3701
3702         if(se == NULL) return;
3703
3704         switch(se->get_operator_type()){
3705         case SE_LITERAL:
3706         case SE_PARAM:
3707         case SE_IFACE_PARAM:
3708                 return;
3709         case SE_UNARY_OP:
3710                 collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3711                 return;
3712         case SE_BINARY_OP:
3713                 collect_partial_fcns(se->get_left_se(), pfcn_refs);
3714                 collect_partial_fcns(se->get_right_se(), pfcn_refs);
3715                 return;
3716         case SE_COLREF:
3717                 return;
3718         case SE_AGGR_STAR:
3719                 return;
3720         case SE_AGGR_SE:
3721 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3722                 return;
3723         case SE_FUNC:
3724                 if(se->get_aggr_ref() >= 0) return;
3725
3726                 operands = se->get_operands();
3727                 for(o=0;o<operands.size();o++){
3728                         collect_partial_fcns(operands[o], pfcn_refs);
3729                 }
3730
3731                 if(se->is_partial()){
3732                         pfcn_refs.insert(se->get_partial_ref());
3733                 }
3734
3735                 return;
3736         default:
3737                 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns, line %d, character %d: unknown operator type %d\n",
3738                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3739                 exit(1);
3740         }
3741         return;
3742 }
3743
3744
3745 void collect_partial_fcns_pr(predicate_t *pr,  set<int> &pfcn_refs){
3746         vector<literal_t *> litl;
3747         vector<scalarexp_t *> op_list;
3748         int o;
3749
3750         switch(pr->get_operator_type()){
3751         case PRED_IN:
3752                 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3753                 return;
3754         case PRED_COMPARE:
3755                 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3756                 collect_partial_fcns(pr->get_right_se(), pfcn_refs) ;
3757                 return;
3758         case PRED_UNARY_OP:
3759                 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs);
3760                 return;
3761         case PRED_BINARY_OP:
3762                 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs) ;
3763                 collect_partial_fcns_pr(pr->get_right_pr(), pfcn_refs) ;
3764                 return;
3765         case PRED_FUNC:
3766                 op_list = pr->get_op_list();
3767                 for(o=0;o<op_list.size();++o){
3768                         collect_partial_fcns(op_list[o],pfcn_refs);
3769                 }
3770                 return;
3771         default:
3772                 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns_pr, line %d, character %d, unknown predicate operator type %d\n",
3773                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3774                 exit(1);
3775         }
3776
3777         return;
3778 }
3779
3780
3781
3782
3783 ///////////////////////////////////////////////////////////////
3784 ////////////    Exported Functions      ///////////////////////////
3785 ///////////////////////////////////////////////////////////////
3786
3787
3788 //              Count and collect refs to interface parameters.
3789
3790 int count_se_ifp_refs(scalarexp_t *se, set<string> &ifpnames){
3791         vector<scalarexp_t *> operands;
3792         int o;
3793         int ret = 0;
3794
3795         if(se == NULL) return 0;
3796
3797         switch(se->get_operator_type()){
3798         case SE_LITERAL:
3799         case SE_PARAM:
3800                 return 0;
3801         case SE_IFACE_PARAM:
3802                         ifpnames.insert(se->get_ifpref()->to_string());
3803                 return 1;
3804         case SE_UNARY_OP:
3805                 return count_se_ifp_refs(se->get_left_se(), ifpnames) ;
3806         case SE_BINARY_OP:
3807                 ret = count_se_ifp_refs(se->get_left_se(), ifpnames);
3808                 ret += count_se_ifp_refs(se->get_right_se(), ifpnames);
3809                 return ret;
3810         case SE_COLREF:
3811                 return 0;
3812         case SE_AGGR_STAR:
3813                 return 0;
3814         case SE_AGGR_SE:
3815 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3816                 return 0;
3817         case SE_FUNC:
3818                 if(se->get_aggr_ref() >= 0) return 0;
3819
3820                 operands = se->get_operands();
3821                 for(o=0;o<operands.size();o++){
3822                         ret += count_se_ifp_refs(operands[o], ifpnames);
3823                 }
3824
3825                 return ret;
3826         default:
3827                 fprintf(stderr,"INTERNAL ERROR in count_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3828                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3829                 exit(1);
3830         }
3831         return 0;
3832 }
3833
3834
3835 int count_pr_ifp_refs(predicate_t *pr,  set<string> &ifpnames){
3836         vector<literal_t *> litl;
3837         vector<scalarexp_t *> op_list;
3838         int o;
3839         int ret = 0;
3840         if(pr == NULL) return 0;
3841
3842         switch(pr->get_operator_type()){
3843         case PRED_IN:
3844                 return count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3845         case PRED_COMPARE:
3846                 ret = count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3847                 ret += count_se_ifp_refs(pr->get_right_se(), ifpnames) ;
3848                 return ret;
3849         case PRED_UNARY_OP:
3850                 return count_pr_ifp_refs(pr->get_left_pr(), ifpnames);
3851         case PRED_BINARY_OP:
3852                 ret = count_pr_ifp_refs(pr->get_left_pr(), ifpnames) ;
3853                 ret += count_pr_ifp_refs(pr->get_right_pr(), ifpnames) ;
3854                 return ret;
3855         case PRED_FUNC:
3856                 op_list = pr->get_op_list();
3857                 for(o=0;o<op_list.size();++o){
3858                         ret += count_se_ifp_refs(op_list[o],ifpnames);
3859                 }
3860                 return ret;
3861         default:
3862                 fprintf(stderr,"INTERNAL ERROR in count_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3863                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3864                 exit(1);
3865         }
3866
3867         return 0;
3868 }
3869
3870 //              Resolve ifp refs, convert them to string literals.
3871
3872 int resolve_se_ifp_refs(scalarexp_t *se, string ifm, string ifn, ifq_t *ifdb,  string &err){
3873         vector<scalarexp_t *> operands;
3874         vector<string> ifvals;
3875         int o;
3876         int ierr;
3877         string serr;
3878         int ret = 0;
3879         literal_t *tmp_l;
3880         ifpref_t *ir;
3881
3882         if(se == NULL) return 0;
3883
3884         switch(se->get_operator_type()){
3885         case SE_LITERAL:
3886         case SE_PARAM:
3887                 return 0;
3888         case SE_IFACE_PARAM:
3889                 ir = se->get_ifpref();
3890                 ifvals = ifdb->get_iface_vals(ifm, ifn, ir->get_pname(), ierr, serr);
3891                 if(ierr){
3892                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", "+serr+"\n";
3893                         return 1;
3894                 }
3895                 if(ifvals.size() == 0){
3896                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", no parameter values.\n";
3897                         return 1;
3898                 }
3899                 if(ifvals.size() > 1){
3900                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", multiple parameter values ("+int_to_string(ifvals.size())+").\n";
3901                         return 1;
3902                 }
3903                 tmp_l = new literal_t( ifvals[0]);
3904                 se->convert_to_literal(tmp_l);
3905                 return 0;
3906         case SE_UNARY_OP:
3907                 return resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err) ;
3908         case SE_BINARY_OP:
3909                 ret = resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err);
3910                 ret += resolve_se_ifp_refs( se->get_right_se(), ifm, ifn,ifdb,err);
3911                 return ret;
3912         case SE_COLREF:
3913                 return 0;
3914         case SE_AGGR_STAR:
3915                 return 0;
3916         case SE_AGGR_SE:
3917 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3918                 return 0;
3919         case SE_FUNC:
3920                 if(se->get_aggr_ref() >= 0) return 0;
3921
3922                 operands = se->get_operands();
3923                 for(o=0;o<operands.size();o++){
3924                         ret += resolve_se_ifp_refs(operands[o], ifm, ifn, ifdb,err);
3925                 }
3926
3927                 return ret;
3928         default:
3929                 fprintf(stderr,"INTERNAL ERROR in resolve_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3930                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3931                 exit(1);
3932         }
3933         return 0;
3934 }
3935
3936
3937 int resolve_pr_ifp_refs(predicate_t *pr,  string ifm, string ifn, ifq_t *ifdb,  string &err){
3938         vector<literal_t *> litl;
3939         vector<scalarexp_t *> op_list;
3940         int o;
3941         int ret = 0;
3942
3943         switch(pr->get_operator_type()){
3944         case PRED_IN:
3945                 return resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3946         case PRED_COMPARE:
3947                 ret = resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3948                 ret += resolve_se_ifp_refs(pr->get_right_se(), ifm, ifn, ifdb, err) ;
3949                 return ret;
3950         case PRED_UNARY_OP:
3951                 return resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err);
3952         case PRED_BINARY_OP:
3953                 ret = resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err) ;
3954                 ret += resolve_pr_ifp_refs(pr->get_right_pr(), ifm, ifn, ifdb, err) ;
3955                 return ret;
3956         case PRED_FUNC:
3957                 op_list = pr->get_op_list();
3958                 for(o=0;o<op_list.size();++o){
3959                         ret += resolve_se_ifp_refs(op_list[o],ifm, ifn, ifdb, err);
3960                 }
3961                 return ret;
3962         default:
3963                 fprintf(stderr,"INTERNAL ERROR in resolve_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3964                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3965                 exit(1);
3966         }
3967
3968         return 0;
3969 }
3970
3971
3972 string impute_query_name(table_exp_t *fta_tree, string default_nm){
3973         string retval = fta_tree->get_val_of_name("query_name");
3974         if(retval == "") retval = default_nm;
3975         if(retval == "") retval = "default_query";
3976         return(retval);
3977 }
3978
3979 //              Convert the parse tree into an intermediate form,
3980 //              which admits analysis better.
3981 //
3982 //              TODO : rationalize the error return policy.
3983 //
3984 //              TODO : the query_summary_class object contains
3985 //                      the parse tree.
3986 //              TODO: revisit the issue when nested subqueries are implemented.
3987 //              One possibility: implement accessor methods to hide the
3988 //              complexity
3989 //              For now: this class contains data structures not in table_exp_t
3990 //              (with a bit of duplication)
3991
3992 //              Return NULL on error.
3993 //              print error messages to stderr.
3994
3995
3996 query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema,
3997                                 ext_fcn_list *Ext_fcns, string default_name){
3998         int i,j, k, retval;
3999
4000 //                      Create the summary struct -- no analysis is done here.
4001         query_summary_class *qs = new query_summary_class(fta_tree);
4002         qs->query_type = fta_tree->query_type;
4003
4004 //////////////          Do common analysis
4005
4006 //              Extract query name.  Already imputed for the qnodes.
4007 //      qs->query_name = impute_query_name(fta_tree, default_name);
4008         qs->query_name = default_name;
4009 //printf("query name is %s\n",qs->query_name.c_str());
4010
4011 //              extract definitions.  Don't grab the query name.
4012
4013         map<string, string> nmap = fta_tree->get_name_map();
4014         map<string, string>::iterator nmi;
4015         for(nmi=nmap.begin(); nmi!=nmap.end(); ++nmi){
4016                 string pname = (*nmi).first;
4017                 if(pname != "query_name" )
4018                         (qs->definitions)[pname] = (*nmi).second;
4019         }
4020
4021 ///
4022 ///                             FROM analysis
4023
4024 //              First, verify that all the referenced tables are defined.
4025 //              Then, bind the tablerefs in the FROM list to schemas in
4026 //              the schema list.
4027         tablevar_list_t *tlist = fta_tree->get_from();
4028         vector<tablevar_t *> tbl_vec = tlist->get_table_list();
4029
4030         bool found_error = false;
4031         for(i=0;i<tbl_vec.size();i++){
4032                 int sch_no = schema->find_tbl(tbl_vec[i]->get_schema_name());
4033                 if(sch_no < 0)  {
4034                   fprintf(stderr,"Error, table <%s> not found in the schema file\n",
4035                         tbl_vec[i]->get_schema_name().c_str() );
4036                   fprintf(stderr,"\tline=%d, char=%d\n",tbl_vec[i]->get_lineno(),
4037                                         tbl_vec[i]->get_charno() );
4038                   return(NULL);
4039                 }
4040
4041                 tbl_vec[i]->set_schema_ref(sch_no);
4042
4043 //                              If accessing a UDOP, mangle the name
4044 //                      This needs to be done in translate_fta.cc, not here.
4045 /*
4046                 if(schema->get_schema_type(sch_no) == OPERATOR_VIEW_SCHEMA){
4047                         string mngl_name = tbl_vec[i]->get_schema_name() + silo_nm;
4048                         tbl_vec[i]->set_schema_name(mngl_name);
4049                 }
4050 */
4051
4052 //                      No FTA schema should have an interface defined on it.
4053                 if(tbl_vec[i]->get_interface()!="" && schema->get_schema_type(sch_no) != PROTOCOL_SCHEMA){
4054                         fprintf(stderr,"WARNING: interface %s specified for schema %s, but this schema is a STREAM and does not have an interface.\n",tbl_vec[i]->get_interface().c_str(), tbl_vec[i]->get_schema_name().c_str());
4055                 }
4056 //                      Fill in default interface
4057                 if(tbl_vec[i]->get_interface()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4058                         tbl_vec[i]->set_interface("default");
4059                         tbl_vec[i]->set_ifq(true);
4060                 }
4061 //                      Fill in default machine
4062                 if(tbl_vec[i]->get_interface()!=""  && tbl_vec[i]->get_machine()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA && (! tbl_vec[i]->get_ifq())){
4063                         tbl_vec[i]->set_machine(hostname);
4064                 }
4065
4066                 if(schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4067 //                      Record the set of interfaces accessed
4068                         string ifstr;
4069                         if(tbl_vec[i]->get_ifq()){
4070                                 ifstr = "["+tbl_vec[i]->get_interface()+"]";
4071                         }else{
4072                                 if(tbl_vec[i]->get_machine() != "localhost"){
4073                                         ifstr = "&apos;"+tbl_vec[i]->get_machine()+"&apos;."+tbl_vec[i]->get_interface();
4074                                 }else{
4075                                         ifstr = tbl_vec[i]->get_interface();
4076                                 }
4077                         }
4078 //printf("ifstr is %s, i=%d, machine=%s, interface=%s\n",ifstr.c_str(),i,tbl_vec[i]->get_machine().c_str(),tbl_vec[i]->get_interface().c_str());
4079                         if(qs->definitions.count("_referenced_ifaces")){
4080                                 ifstr = qs->definitions["_referenced_ifaces"]+","+ifstr;
4081                         }
4082                         qs->definitions["_referenced_ifaces"] = ifstr;
4083                 }
4084
4085         }
4086         if(found_error) return(NULL);
4087
4088 //                      Ensure that all tablevars have are named
4089 //                      and that no two tablevars have the same name.
4090         int tblvar_no = 0;
4091 //              First, gather the set of variable
4092         set<string> tblvar_names;
4093         for(i=0;i<tbl_vec.size();i++){
4094                 if(tbl_vec[i]->get_var_name() != ""){
4095                         if(tblvar_names.count(tbl_vec[i]->get_var_name()) > 0){
4096                                 fprintf(stderr,"ERROR, query has two table variables named %s.  line=%d, char=%d\n", tbl_vec[i]->get_var_name().c_str(), tbl_vec[i]->get_lineno(), tbl_vec[i]->get_charno());
4097                                 return(NULL);
4098                         }
4099                         tblvar_names.insert(tbl_vec[i]->get_var_name());
4100                 }
4101         }
4102 //              Now generate variable names for unnamed tablevars
4103         for(i=0;i<tbl_vec.size();i++){
4104                 if(tbl_vec[i]->get_var_name() == ""){
4105                         char tmpstr[200];
4106                         sprintf(tmpstr,"_t%d",tblvar_no);
4107                         string newvar = tmpstr;
4108                         while(tblvar_names.count(newvar) > 0){
4109                                 tblvar_no++;
4110                                 sprintf(tmpstr,"_t%d",tblvar_no);
4111                                 newvar = tmpstr;
4112                         }
4113                         tbl_vec[i]->set_range_var(newvar);
4114                         tblvar_names.insert(newvar);
4115                 }
4116         }
4117
4118 //              Process inner/outer join properties
4119         int jprop = fta_tree->get_from()->get_properties();
4120 //              Require explicit INNER_JOIN, ... specification for join queries.
4121         if(jprop < 0){
4122                 if(qs->query_type != MERGE_QUERY && tbl_vec.size() > 1){
4123                         fprintf(stderr,"ERROR, a join query must specify one of INNER_JOIM, OUTER_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN, WATCHLIST_JOIN, FILTER_JOIN.\n");
4124                         return(NULL);
4125                 }
4126         }
4127
4128         if(jprop == OUTER_JOIN_PROPERTY){
4129                 for(i=0;i<tbl_vec.size();i++) tbl_vec[i]->set_property(1);
4130         }
4131         if(jprop == LEFT_OUTER_JOIN_PROPERTY)
4132                 tbl_vec[0]->set_property(1);
4133         if(jprop == RIGHT_OUTER_JOIN_PROPERTY)
4134                 tbl_vec[tbl_vec.size()-1]->set_property(1);
4135         if(jprop == FILTER_JOIN_PROPERTY){
4136                 if(fta_tree->get_from()->get_temporal_range() == 0){
4137                         fprintf(stderr,"ERROR, a filter join must have a non-zero temporal range.\n");
4138                         return NULL;
4139                 }
4140                 if(tbl_vec.size() != 2){
4141                         fprintf(stderr,"ERROR, a filter join must be between two table variables.\n");
4142                         return NULL;
4143                 }
4144                 colref_t *cr = fta_tree->get_from()->get_colref();
4145                 string field = cr->get_field();
4146
4147                 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), field);
4148                 if(fi0 < 0){
4149                         fprintf(stderr,"ERROR, temporal attribute %s for a filter join can't be found in schema %s\n",field.c_str(), tbl_vec[0]->get_schema_name().c_str());
4150                         return NULL;
4151                 }
4152                 cr->set_schema_ref(tbl_vec[0]->get_schema_ref());
4153                 cr->set_tablevar_ref(0);
4154                 string type_name = schema->get_type_name(tbl_vec[0]->get_schema_ref(),field);
4155                 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
4156                 data_type *dt0 = new data_type(type_name, modifiers);
4157                 string dt0_type = dt0->get_type_str();
4158                 if(dt0_type != "INT" && dt0_type != "UINT" && dt0_type != "LLONG" && dt0_type != "ULLONG"){
4159                         fprintf(stderr,"ERROR, the temporal attribute in a filter join must be one of INT/UINT/LLONG/ULLONG.\n");
4160                         return NULL;
4161                 }
4162                 if(! dt0->is_increasing()){
4163                         fprintf(stderr,"ERROR, the temporal attribute in a filter join must be temporal increasing.\n");
4164                         return NULL;
4165                 }
4166         }
4167
4168
4169
4170 /////////////////////
4171 ///             Build the query param table
4172         vector<var_pair_t *> query_params = fta_tree->query_params;
4173         int p;
4174         for(p=0;p<query_params.size();++p){
4175                 string pname = query_params[p]->name;
4176                 string dtname = query_params[p]->val;
4177
4178                 if(pname == ""){
4179                         fprintf(stderr,"ERROR parameter has empty name.\n");
4180                         found_error = true;
4181                 }
4182                 if(dtname == ""){
4183                         fprintf(stderr,"ERROR parameter %s has empty type.\n",pname.c_str());
4184                         found_error = true;
4185                 }
4186                 data_type *dt = new data_type(dtname);
4187                 if(!(dt->is_defined())){
4188                         fprintf(stderr,"ERROR parameter %s has invalid type (%s).\n",pname.c_str(), dtname.c_str());
4189                         found_error = true;
4190                 }
4191
4192                 qs->add_query_param(pname, dt, false);
4193         }
4194         if(found_error) return(NULL);
4195 //              unpack the param table to a global for easier analysis.
4196         param_tbl=qs->param_tbl;
4197
4198
4199 //////////////////              WATCHLIST specialized analysis
4200         if(qs->query_type == WATCHLIST_QUERY){
4201 //              Populate a SELECT clause?
4202         }
4203
4204 //////////////////              MERGE specialized analysis
4205
4206         if(qs->query_type == MERGE_QUERY){
4207 //                      Verify that
4208 //                              1) there are two *different* streams ref'd in the FROM clause
4209 //                                      However, only emit a warning.
4210 //                                      (can't detect a problem if one of the interfaces is the
4211 //                                       default interface).
4212 //                              2) They have the same layout (e.g. same types but the
4213 //                                      names can be different
4214 //                              3) the two columns can unambiguously be mapped to
4215 //                                      fields of the two tables, one per table.  Exception:
4216 //                                      the column names are the same and exist in both tables.
4217 //                                      FURTHERMORE the positions must be the same
4218 //                              4) after mapping, verify that both colrefs are temporal
4219 //                                      and in the same direction.
4220                 if(tbl_vec.size() < 2){
4221                         fprintf(stderr,"ERROR, a MERGE query operates over at least 2 tables, %lu were supplied.\n",tbl_vec.size() );
4222                         return(NULL);
4223                 }
4224
4225                 vector<field_entry *> fev0 = schema->get_fields(
4226                         tbl_vec[0]->get_schema_name()
4227                 );
4228
4229
4230                 int cv;
4231                 for(cv=1;cv<tbl_vec.size();++cv){
4232                         vector<field_entry *> fev1 = schema->get_fields(
4233                                 tbl_vec[cv]->get_schema_name()
4234                         );
4235
4236                         if(fev0.size() != fev1.size()){
4237                                 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4238                                 return(NULL);
4239                         }
4240
4241 //                      Only need to ensure that the list of types are the same.
4242 //                      THe first table supplies the output colnames,
4243 //                      and all temporal properties are lost, except for the
4244 //                      merge-by columns.
4245                         int f;
4246                         for(f=0;f<fev0.size();++f){
4247                                 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4248                                 data_type dt1(fev1[f]->get_type(),fev1[f]->get_modifier_list());
4249                                 if(! dt0.equal_subtypes(&dt1) ){
4250                                 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4251                                         return(NULL);
4252                                 }
4253                         }
4254                 }
4255
4256 //              copy over the merge-by cols.
4257                 qs->mvars = fta_tree->mergevars;
4258
4259                 if(qs->mvars.size() == 0){      // need to discover the merge vars.
4260                         int mergevar_pos = -1;
4261                         int f;
4262                         for(f=0;f<fev0.size();++f){
4263                                 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4264                                 if(dt0.is_temporal()){
4265                                         mergevar_pos = f;
4266                                         break;
4267                                 }
4268                         }
4269                         if(mergevar_pos >= 0){
4270                                 for(cv=0;cv<tbl_vec.size();++cv){
4271                                         vector<field_entry *> fev1 = schema->get_fields(tbl_vec[cv]->get_schema_name());
4272                                         qs->mvars.push_back(new colref_t(tbl_vec[cv]->get_var_name().c_str(),fev1[mergevar_pos]->get_name().c_str() ));
4273                                 }
4274                         }else{
4275                                 fprintf(stderr,"ERROR, no merge-by column found.\n");
4276                                 return(NULL);
4277                         }
4278                 }
4279
4280 //                      Ensure same number of tables, merge cols.
4281                 if(tbl_vec.size() != qs->mvars.size()){
4282                         fprintf(stderr,"ERROR, merge query has different numbers of table variables (%lu) and merge columns (%lu)\n",tbl_vec.size(), qs->mvars.size());
4283                         return(NULL);
4284                 }
4285
4286 //              Ensure that the merge-by are from different tables
4287 //              also, sort colrefs so that they align with the FROM list using tmp_crl
4288                 set<int> refd_sources;
4289                 vector<colref_t *> tmp_crl(qs->mvars.size(),NULL);
4290                 for(cv=0;cv<qs->mvars.size();++cv){
4291                         int tblvar=infer_tablevar_from_colref(qs->mvars[cv],fta_tree->fm,schema);
4292                         if(tblvar<0){
4293                                 fprintf(stderr,"ERROR, Merge column %d (%s) was not found in any of the tables.\n",cv,qs->mvars[cv]->to_string().c_str());
4294                                 exit(1);
4295                         }
4296                         refd_sources.insert(tblvar);
4297                         tmp_crl[tblvar] = qs->mvars[cv];
4298                 }
4299                 if(refd_sources.size() != qs->mvars.size()){
4300                         fprintf(stderr,"ERROR, The %lu merge columns reference only %lu table variables.\n",qs->mvars.size(), refd_sources.size());
4301                         return(NULL);
4302                 }
4303
4304 //                      1-1 mapping, so use tmp_crl as the merge column list.
4305                 qs->mvars = tmp_crl;
4306
4307
4308
4309 //                      Look up the colrefs in their schemas, verify that
4310 //                      they are at the same place, that they are both temporal
4311 //                      in the same way.
4312 //                      It seems that this should be done more in the schema objects.
4313                 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), qs->mvars[0]->get_field());
4314                 if(fi0 < 0){
4315                         fprintf(stderr,"ERROR, Merge temporal field %s not found.\n",qs->mvars[0]->get_field().c_str());
4316                         exit(1);
4317                 }
4318                 for(cv=1;cv<qs->mvars.size();++cv){
4319                         int fi1 = schema->get_field_idx(tbl_vec[cv]->get_schema_name(), qs->mvars[0]->get_field());
4320                         if(fi0!=fi1){
4321                                 fprintf(stderr,"ERROR, the merge columns for table variables %s and %s must be in the same position.\n",tbl_vec[0]->get_var_name().c_str(), tbl_vec[cv]->get_var_name().c_str());
4322                                 return NULL;
4323                         }
4324                 }
4325
4326                 field_entry *fe0 = schema->get_field(tbl_vec[0]->get_schema_name(),fi0);
4327                 data_type dt0(fe0->get_type(),fe0->get_modifier_list());
4328                 if( (!dt0.is_temporal()) ){
4329                         fprintf(stderr,"ERROR, merge column %d must be temporal.\n",0);
4330                         return(NULL);
4331                 }
4332                 for(cv=0;cv<qs->mvars.size();++cv){
4333                         field_entry *fe1 = schema->get_field(tbl_vec[cv]->get_schema_name(),fi0);
4334                         data_type dt1(fe1->get_type(),fe1->get_modifier_list());
4335                         if( (!dt1.is_temporal()) ){
4336                                 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",cv);
4337                                 return(NULL);
4338                         }
4339
4340
4341                         if( dt0.get_temporal() != dt1.get_temporal()){
4342                                 fprintf(stderr,"ERROR, the merge columns (0 and %d) must be temporal in the same direction.\n",cv);
4343                                 return(NULL);
4344                         }
4345                 }
4346
4347 //                      If there is a SLACK specification, verify
4348 //                      that it is literal-only and that its type is compatible
4349 //                      with that of the merge columns
4350                 qs->slack = fta_tree->slack;
4351                 if(qs->slack){
4352                         if(! literal_only_se(qs->slack)){
4353                                 fprintf(stderr,"ERROR, the SLACK expression is not literal-only.\n");
4354                                 return NULL;
4355                         }
4356
4357                         assign_data_types(qs->slack, schema, fta_tree, Ext_fcns );
4358                         data_type sdt(&dt0, qs->slack->get_data_type(), string("+"));
4359                         if(sdt.get_type() == undefined_t){
4360                                 fprintf(stderr,"ERROR, the SLACK expression data type is not compatible with the data type of the merge columns.\n");
4361                                 return NULL;
4362                         }
4363                 }
4364
4365
4366 //                      All the tests have passed, there is nothing
4367 //                      else to fill in.
4368
4369         }
4370
4371 //////////////////              SELECT specialized analysis
4372
4373         if(qs->query_type == SELECT_QUERY){
4374 //              unpack the gb_tbl, aggr_tbl, param_tbl, and complex_literals
4375 //              objects into globals, for easier syntax.
4376         gb_tbl = qs->gb_tbl;
4377         aggr_tbl = qs->aggr_tbl;
4378
4379
4380 //              Build the table of group-by attributes.
4381 //              (se processing done automatically).
4382 //              NOTE : Doing the SE processing here is getting cumbersome,
4383 //                      I should process these individually.
4384 //              NOTE : I should check for duplicate names.
4385 //              NOTE : I should ensure that the def of one GB does not
4386 //                      refrence the value of another.
4387         vector<extended_gb_t *> gb_list = fta_tree->get_groupby();
4388         int n_temporal = 0;
4389         string temporal_gbvars = "";
4390         map<string, int> gset_gbnames;
4391
4392 //              For generating the set of GB patterns for this aggregation query.
4393         vector<bool> inner_pattern;
4394         vector<vector<bool> > pattern_set;
4395         vector<vector<vector<bool> > > pattern_components;
4396
4397         vector<gb_t *> r_gbs, c_gbs, g_gbs;
4398         int n_patterns;
4399
4400         for(i=0;i<gb_list.size();i++){
4401                 switch(gb_list[i]->type){
4402                 case gb_egb_type:
4403                         retval = gb_tbl->add_gb_attr(
4404                                 gb_list[i]->gb, fta_tree->fm, schema,fta_tree, Ext_fcns
4405                         );
4406                         if(retval < 0){
4407                                 return NULL;  // nothing added to gb_tbl, so this can trigger a segfault 2 lines below
4408                         }else{
4409                                 if(gb_tbl->get_data_type(i)->is_temporal()){
4410                                         n_temporal++;
4411                                         if(temporal_gbvars != "") temporal_gbvars+=" ";
4412                                         temporal_gbvars += gb_tbl->get_name(i);
4413                                 }
4414                         }
4415
4416                         inner_pattern.clear();
4417                         pattern_set.clear();
4418                         inner_pattern.push_back(true);
4419                         pattern_set.push_back(inner_pattern);
4420                         pattern_components.push_back(pattern_set);
4421
4422                         gb_tbl->gb_entry_type.push_back("");
4423                         gb_tbl->gb_entry_count.push_back(1);
4424                         gb_tbl->pattern_components.push_back(pattern_set);
4425
4426                 break;
4427                 case rollup_egb_type:
4428                         r_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4429                         for(j=0;j<r_gbs.size();++j){
4430                                 retval = gb_tbl->add_gb_attr(
4431                                         r_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4432                                 );
4433                                 if(retval < 0){
4434                                         found_error = true;
4435                                 }else{          // rollup gb can't be temporal
4436                                         gb_tbl->reset_temporal(gb_tbl->size()-1);
4437                                 }
4438                         }
4439
4440                         inner_pattern.resize(r_gbs.size());
4441                         pattern_set.clear();
4442                         for(j=0;j<=r_gbs.size();++j){
4443                                 for(k=0;k<r_gbs.size();++k){
4444                                         if(k < j)
4445                                                 inner_pattern[k] = true;
4446                                         else
4447                                                 inner_pattern[k] = false;
4448                                 }
4449                                 pattern_set.push_back(inner_pattern);
4450                         }
4451                         pattern_components.push_back(pattern_set);
4452
4453                         gb_tbl->gb_entry_type.push_back("ROLLUP");
4454                         gb_tbl->gb_entry_count.push_back(r_gbs.size());
4455                         gb_tbl->pattern_components.push_back(pattern_set);
4456                 break;
4457                 case cube_egb_type:
4458                         c_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4459                         for(j=0;j<c_gbs.size();++j){
4460                                 retval = gb_tbl->add_gb_attr(
4461                                         c_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4462                                 );
4463                                 if(retval < 0){
4464                                         found_error = true;
4465                                 }else{          // cube gb can't be temporal
4466                                         gb_tbl->reset_temporal(gb_tbl->size()-1);
4467                                 }
4468                         }
4469
4470                         inner_pattern.resize(c_gbs.size());
4471                         pattern_set.clear();
4472                         n_patterns = 1 << c_gbs.size();
4473                         for(j=0;j<n_patterns;++j){
4474                                 int test_bit = 1;
4475                                 for(k=0;k<c_gbs.size();++k,test_bit = test_bit << 1){
4476                                         if((j & test_bit) != 0)
4477                                                 inner_pattern[k] = true;
4478                                         else
4479                                                 inner_pattern[k] = false;
4480                                 }
4481                                 pattern_set.push_back(inner_pattern);
4482                         }
4483                         pattern_components.push_back(pattern_set);
4484
4485                         gb_tbl->gb_entry_type.push_back("CUBE");
4486                         gb_tbl->gb_entry_count.push_back(c_gbs.size());
4487                         gb_tbl->pattern_components.push_back(pattern_set);
4488                 break;
4489                 case gsets_egb_type:
4490                 {
4491                         gset_gbnames.clear();
4492                         for(j=0;j<gb_list[i]->gb_lists.size();++j){
4493                                 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4494                                 for(k=0;k<g_gbs.size();++k){
4495                                         if(g_gbs[k]->type != GB_COLREF){
4496                                                 fprintf(stderr,"Error, group-by fields in a GROUPING_SETS clause must be table references, not computed values (field is %s\n",g_gbs[k]->name.c_str());
4497                                                 found_error = true;
4498                                         }else{
4499                                                 if(gset_gbnames.count(g_gbs[k]->name) == 0){
4500                                                         retval = gb_tbl->add_gb_attr(
4501                                                                 g_gbs[k], fta_tree->fm, schema,fta_tree, Ext_fcns
4502                                                         );
4503                                                         if(retval < 0){
4504                                                                 found_error = true;
4505                                                         }else{          // gsets gb can't be temporal
4506                                                                 gb_tbl->reset_temporal(gb_tbl->size()-1);
4507                                                         }
4508                                                         int pos = gset_gbnames.size();
4509                                                         gset_gbnames[g_gbs[k]->name] = pos;
4510                                                 }
4511                                         }
4512                                 }
4513                         }
4514
4515                         if(gset_gbnames.size() > 63){
4516                                 fprintf(stderr,"Error, at most 63 distinct fields can be referenced in a GROUPING_SETS clause.\n");
4517                                 found_error = true;
4518                         }
4519
4520                         inner_pattern.resize(gset_gbnames.size());
4521                         pattern_set.clear();
4522                         set<unsigned long long int> signatures;
4523                         for(j=0;j<gb_list[i]->gb_lists.size();++j){
4524                                 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4525                                 set<string> refd_gbs;
4526                                 for(k=0;k<g_gbs.size();++k){
4527                                         refd_gbs.insert(g_gbs[k]->name);
4528                                 }
4529                                 fill(inner_pattern.begin(),inner_pattern.end(),false);
4530                                 unsigned long long int signature = 0;
4531                                 set<string>::iterator ssi;
4532                                 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4533                                         inner_pattern[gset_gbnames[(*ssi)]] = true;
4534                                         signature |= (1 << gset_gbnames[(*ssi)]);
4535                                 }
4536                                 if(signatures.count(signature)){
4537                                         fprintf(stderr,"Warning, duplicate GROUPING_SETS pattern found, ignoring:\n\t");
4538                                         set<string>::iterator ssi;
4539                                         for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4540                                                 fprintf(stderr," %s",(*ssi).c_str());
4541                                         }
4542                                         fprintf(stderr,"\n");
4543                                 }else{
4544                                         signatures.insert(signature);
4545                                         pattern_set.push_back(inner_pattern);
4546                                 }
4547                         }
4548                         pattern_components.push_back(pattern_set);
4549
4550                         gb_tbl->gb_entry_type.push_back("GROUPING_SETS");
4551                         gb_tbl->gb_entry_count.push_back(gset_gbnames.size());
4552                         gb_tbl->pattern_components.push_back(pattern_set);
4553                 }
4554                 break;
4555                 default:
4556                 break;
4557                 }
4558         }
4559         if(found_error) return(NULL);
4560         if(n_temporal > 1){
4561                 fprintf(stderr,"ERROR, query has multiple temporal group-by variables (%s).  Cast away the temporality of all but one of these.\n", temporal_gbvars.c_str());
4562                 return NULL;
4563         }
4564
4565 //              Compute the set of patterns.  Take the cross product of all pattern components.
4566         vector<vector<bool> > gb_patterns;
4567         int n_components = pattern_components.size();
4568         vector<int> pattern_pos(n_components,0);
4569         bool done = false;
4570         while(! done){
4571                 vector<bool> pattern;
4572                 for(j=0;j<n_components;j++){
4573                         pattern.insert(pattern.end(),pattern_components[j][pattern_pos[j]].begin(),
4574                                 pattern_components[j][pattern_pos[j]].end());
4575                 }
4576                 gb_patterns.push_back(pattern);
4577                 for(j=0;j<n_components;j++){
4578                         pattern_pos[j]++;
4579                         if(pattern_pos[j] >= pattern_components[j].size())
4580                                 pattern_pos[j] = 0;
4581                         else
4582                                 break;
4583                 }
4584                 if(j >= n_components)
4585                         done = true;
4586         }
4587         gb_tbl->gb_patterns = gb_patterns;
4588
4589
4590 //              Process the supergroup, if any.
4591         vector<colref_t *> sgb = fta_tree->get_supergb();
4592         for(i=0;i<sgb.size();++i){
4593                 int gbr = gb_tbl->find_gb(sgb[i],fta_tree->fm, schema);
4594                 if(gbr < 0){
4595                         fprintf(stderr, "ERROR, supergroup attribute %s is not defined as a group-by variable.\n",sgb[i]->to_string().c_str());
4596                         found_error = true;
4597                 }
4598                 if(qs->sg_tbl.count(gbr)){
4599                         fprintf(stderr,"WARNING, duplicate supergroup attribute %s.\n",sgb[i]->to_string().c_str());
4600                 }
4601                 qs->sg_tbl.insert(gbr);
4602         }
4603         if(found_error) return(NULL);
4604
4605         if(qs->sg_tbl.size() > 0 && gb_tbl->gb_patterns.size()>0){
4606                 fprintf(stderr,"Error, SUPERGROUP incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4607                 return NULL;
4608         }
4609
4610
4611
4612         predicate_t *wh = fta_tree->get_where();
4613         predicate_t *hv = fta_tree->get_having();
4614         predicate_t *cw = fta_tree->get_cleaning_when();
4615         predicate_t *cb = fta_tree->get_cleaning_by();
4616         predicate_t *closew = fta_tree->get_closing_when();
4617
4618         if(closew != NULL  && gb_tbl->gb_patterns.size()>1){
4619                 fprintf(stderr,"Error, CLOSING_WHEN incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4620                 return NULL;
4621         }
4622
4623
4624
4625 //              Verify that all column references are valid, and if so assign
4626 //              the data type.
4627
4628         vector<select_element *> sl_list = fta_tree->get_sl_vec();
4629         for(i=0;i<sl_list.size();i++){
4630                 retval = verify_colref(sl_list[i]->se, fta_tree->fm, schema, gb_tbl);
4631                 if(retval < 0) found_error = true;
4632         }
4633         if(wh != NULL)
4634                 retval = verify_predicate_colref(wh, fta_tree->fm, schema, gb_tbl);
4635         if(retval < 0) found_error = true;
4636         if(hv != NULL)
4637                 retval = verify_predicate_colref(hv, fta_tree->fm, schema, gb_tbl);
4638         if(retval < 0) found_error = true;
4639         if(cw != NULL)
4640                 retval = verify_predicate_colref(cw, fta_tree->fm, schema, gb_tbl);
4641         if(retval < 0) found_error = true;
4642         if(cb != NULL)
4643                 retval = verify_predicate_colref(cb, fta_tree->fm, schema, gb_tbl);
4644         if(retval < 0) found_error = true;
4645         if(closew != NULL)
4646                 retval = verify_predicate_colref(closew, fta_tree->fm, schema, gb_tbl);
4647         if(retval < 0) found_error = true;
4648
4649         if(found_error) return(NULL);
4650
4651 //              Verify that all of the scalar expressions
4652 //              and comparison predicates have compatible types.
4653
4654         n_temporal = 0;
4655         string temporal_output_fields;
4656         for(i=0;i<sl_list.size();i++){
4657                 retval = assign_data_types(sl_list[i]->se, schema, fta_tree, Ext_fcns );
4658                 if(retval < 0){
4659                          found_error = true;
4660                 }else{
4661                         if(sl_list[i]->se->get_data_type()->is_temporal()){
4662                                 n_temporal++;
4663                                 temporal_output_fields += " "+int_to_string(i);
4664                         }
4665                 }
4666         }
4667         if(n_temporal > 1){
4668                 fprintf(stderr,"ERROR, query has multiple temporal output fields (positions%s).  Cast away the temporality of all but one of these.\n", temporal_output_fields.c_str());
4669                 found_error=true;
4670         }
4671         if(wh != NULL)
4672                 retval = assign_predicate_data_types(wh, schema, fta_tree, Ext_fcns);
4673         if(retval < 0) found_error = true;
4674         if(hv != NULL)
4675                 retval = assign_predicate_data_types(hv, schema, fta_tree, Ext_fcns);
4676         if(retval < 0) found_error = true;
4677         if(cw != NULL)
4678                 retval = assign_predicate_data_types(cw, schema, fta_tree, Ext_fcns);
4679         if(retval < 0) found_error = true;
4680         if(cb != NULL)
4681                 retval = assign_predicate_data_types(cb, schema, fta_tree, Ext_fcns);
4682         if(retval < 0) found_error = true;
4683         if(closew != NULL)
4684                 retval = assign_predicate_data_types(closew, schema, fta_tree, Ext_fcns);
4685         if(retval < 0) found_error = true;
4686
4687         if(found_error) return(NULL);
4688
4689 //                      Impute names for the unnamed columns.
4690         set<string> curr_names;
4691         int s;
4692         for(s=0;s<sl_list.size();++s){
4693                 curr_names.insert(sl_list[s]->name);
4694         }
4695         for(s=0;s<sl_list.size();++s){
4696                 if(sl_list[s]->name == "")
4697                         sl_list[s]->name = impute_colname(curr_names, sl_list[s]->se);
4698         }
4699
4700
4701 //              Check the aggregates.
4702 //              No aggrs allowed in the WHERE predicate.
4703 //              (no aggrs in the GB defs, but that is examined elsewhere)
4704 //              Therefore, aggregates are allowed only the select clause.
4705 //
4706 //              The query is an aggregation query if there is a group-by clause, or
4707 //              if any aggregate is referenced.  If there is a group-by clause,
4708 //              at least one aggregate must be referenced.
4709 //              If the query is an aggregate query, the scalar expressions in
4710 //              the select clause can reference only constants, aggregates, or group-by
4711 //              attributes.
4712 //              Also, if the query is an aggregate query, build a table referencing
4713 //              the aggregates.
4714 //
4715 //              No nested aggregates allowed.
4716 //
4717
4718 //              First, count references in the WHERE predicate.
4719 //              (if there are any references, report an error).
4720 //                      can ref group vars, tuple fields, and stateful fcns.
4721
4722         if(wh != NULL){
4723                 retval = count_aggr_pred(wh, true);
4724                 if(retval > 0){
4725                         fprintf(stderr,"ERROR, no aggregate references are allowed in the WHERE clause.\n");
4726                         return(NULL);
4727                 }
4728         }
4729
4730 //              NOTE : Here I need an analysis of the having clause
4731 //              to verify that it only refs GB attrs and aggregates.
4732 //                      (also, superaggregates, stateful fcns)
4733         if(hv!=NULL){
4734                 retval = verify_having_pred(hv, "HAVING", Ext_fcns);
4735                 if(retval < 0) return(NULL);
4736         }
4737
4738 //              Cleaning by has same reference rules as Having
4739         if(cb!=NULL){
4740                 retval = verify_having_pred(cb, "CLEANING_BY", Ext_fcns);
4741                 if(retval < 0) return(NULL);
4742         }
4743
4744 //              Cleaning when has same reference rules as Having,
4745 //              except that references to non-superaggregates are not allowed.
4746 //              This is tested for when "CLEANING_BY" is passed in as the clause.
4747         if(cw!=NULL){
4748                 retval = verify_having_pred(cw, "CLEANING_WHEN", Ext_fcns);
4749                 if(retval < 0) return(NULL);
4750         }
4751
4752 //              CLOSING_WHEN : same rules as HAVING
4753         if(closew!=NULL){
4754                 retval = verify_having_pred(closew, "CLOSING_WHEN", Ext_fcns);
4755                 if(retval < 0) return(NULL);
4756         }
4757
4758
4759 //              Collect aggregates in the HAVING and CLEANING clauses
4760         if(hv != NULL){
4761                 build_aggr_tbl_fm_pred(hv, aggr_tbl, Ext_fcns);
4762         }
4763         if(cw != NULL){
4764                 build_aggr_tbl_fm_pred(cw, aggr_tbl, Ext_fcns);
4765         }
4766         if(cb != NULL){
4767                 build_aggr_tbl_fm_pred(cb, aggr_tbl, Ext_fcns);
4768         }
4769         if(closew != NULL){
4770                 build_aggr_tbl_fm_pred(closew, aggr_tbl, Ext_fcns);
4771         }
4772
4773 //              Collect aggregate refs in the SELECT clause.
4774
4775         for(i=0;i<sl_list.size();i++)
4776                 build_aggr_tbl_fm_se(sl_list[i]->se, aggr_tbl, Ext_fcns);
4777
4778
4779 //              Collect references to states of stateful functions
4780         if(wh != NULL){
4781                 gather_fcn_states_pr(wh, qs->states_refd, Ext_fcns);
4782         }
4783         if(hv != NULL){
4784                 gather_fcn_states_pr(hv, qs->states_refd, Ext_fcns);
4785         }
4786         if(cw != NULL){
4787                 gather_fcn_states_pr(cw, qs->states_refd, Ext_fcns);
4788         }
4789         if(cb != NULL){
4790                 gather_fcn_states_pr(cb, qs->states_refd, Ext_fcns);
4791         }
4792         if(closew != NULL){                     // should be no stateful fcns here ...
4793                 gather_fcn_states_pr(closew, qs->states_refd, Ext_fcns);
4794         }
4795         for(i=0;i<sl_list.size();i++)
4796                 gather_fcn_states_se(sl_list[i]->se, qs->states_refd, Ext_fcns);
4797
4798
4799 //              If this is an aggregate query, it had normally references
4800 //              some aggregates.  Its not necessary though, just emit a warning.
4801 //              (acts as SELECT DISTINCT)
4802
4803         bool is_aggr_query = gb_tbl->size() > 0 || aggr_tbl->size() > 0;
4804         if(is_aggr_query && aggr_tbl->size() == 0){
4805                 fprintf(stderr,"Warning, query contains a group-by clause but does not reference aggregates..\n");
4806         }
4807
4808 //              If this is an aggregate query,
4809 //                      1) verify that the SEs in the SELECT clause reference
4810 //                              only constants, aggregates, and group-by attributes.
4811 //                      2) No aggregate scalar expression references an aggregate
4812 //                              or any stateful function.
4813 //                      3) either it references both CLEANING clauses or neither.
4814 //                      4) all superaggregates must have the superaggr_allowed property.
4815 //                      5) all aggregates ref'd in the CLEANING_WHEN ad CLEANING_BY
4816 //                         clauses must have the multiple_output property.
4817
4818
4819         if(is_aggr_query){
4820                 if(gb_list.size() == 0){
4821                         fprintf(stderr,"ERROR, aggregation queries must have at least one group-by variable (which should be temporal).\n");
4822                         return NULL;
4823                 }
4824 //                      Ensure that at least one gbvar is temporal
4825                 if(! fta_tree->name_exists("no_temporal_aggr")){
4826                         bool found_temporal = false;
4827                 for(i=0;i<gb_tbl->size();i++){
4828                                 if(gb_tbl->get_data_type(i)->is_temporal()){
4829                                         found_temporal = true;
4830                                 }
4831                         }
4832                         if(! found_temporal){
4833                                 fprintf(stderr,"ERROR, at least one of the group-by variables must be temporal (unless no_temporal_aggr is set)\n");
4834                                 exit(1);
4835                         }
4836                 }
4837
4838                 if((!cb && cw) || (cb && !cw)){
4839                         fprintf(stderr,"ERROR, an aggregate query must either include both a CLEANING_WHEN and a CLEANING_BY clause, or neither.\n");
4840                         return(NULL);
4841                 }
4842
4843                 bool refs_running = false;
4844                 int a;
4845                 for(a=0; a<aggr_tbl->size(); ++a){
4846                         refs_running |= aggr_tbl->is_running_aggr(a);
4847                 }
4848
4849                 if(closew){
4850                         if(cb || cw){
4851                                 fprintf(stderr, "ERROR, cannot reference both CLOSING_WHEN and either CLEANING_WHEN or CLEANING_BY.\n");
4852                                 return(NULL);
4853                         }
4854                         if(!refs_running){
4855                                 fprintf(stderr, "ERROR, if you reference CLOSING_WHEN you must reference at least one running window aggregate.\n");
4856                                 return(NULL);
4857                         }
4858                 }
4859
4860                 if(refs_running && !closew){
4861                                 fprintf(stderr, "ERROR, if you reference a running window aggregate you must reference a CLOSING_WHEN clause.\n");
4862                         return(NULL);
4863                 }
4864
4865                 bool st_ok = true;
4866                 for(i=0;i<sl_list.size();i++){
4867                         bool ret_bool = verify_aggr_query_se(sl_list[i]->se);
4868                         st_ok = st_ok && ret_bool;
4869                 }
4870                 if(! st_ok)
4871                         return(NULL);
4872
4873                 for(i=0;i<aggr_tbl->size();i++){
4874                         if(aggr_tbl->is_superaggr(i)){
4875                                 if(! aggr_tbl->superaggr_allowed(i)){
4876                                         fprintf(stderr,"ERROR, aggregate %s cannot be a superaggregate\n",aggr_tbl->get_op(i).c_str());
4877                                         return NULL;
4878                                 }
4879                         }
4880                         if(aggr_tbl->is_builtin(i)){
4881                                 if(count_aggr_se(aggr_tbl->get_aggr_se(i), true) > 0){
4882                                         fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4883                                         return(NULL);
4884                                 }
4885                         }else{
4886                                 vector<scalarexp_t *> opl = aggr_tbl->get_operand_list(i);
4887                                 int o;
4888                                 for(o=0;o<opl.size();++o){
4889                                         if(count_aggr_se(opl[o], true) > 0){
4890                                                 fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4891                                                 return(NULL);
4892                                         }
4893                                 }
4894                         }
4895                 }
4896         }else{
4897 //                      Ensure that non-aggregate query doesn't reference some things
4898                 if(cb || cw){
4899                         fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLEANING_WHEN or a CLEANING_BY clause.\n");
4900                         return(NULL);
4901                 }
4902                 if(closew){
4903                         fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLOSING_WHEN clause.\n");
4904                         return(NULL);
4905                 }
4906                 if(qs->states_refd.size()){
4907                         fprintf(stderr,"ERROR, a non-aggregate query may not refernece stateful functions.\n");
4908                         return(NULL);
4909                 }
4910         }
4911
4912
4913
4914 //              Convert the predicates into CNF.  OK to pass NULL ptr.
4915         make_cnf_from_pr(wh, qs->wh_cnf);
4916         make_cnf_from_pr(hv, qs->hav_cnf);
4917         make_cnf_from_pr(cb, qs->cb_cnf);
4918         make_cnf_from_pr(cw, qs->cw_cnf);
4919         make_cnf_from_pr(closew, qs->closew_cnf);
4920
4921 //              Analyze the predicates.
4922
4923         for(i=0;i<qs->wh_cnf.size();i++)
4924                 analyze_cnf(qs->wh_cnf[i]);
4925         for(i=0;i<qs->hav_cnf.size();i++)
4926                 analyze_cnf(qs->hav_cnf[i]);
4927         for(i=0;i<qs->cb_cnf.size();i++)
4928                 analyze_cnf(qs->cb_cnf[i]);
4929         for(i=0;i<qs->cw_cnf.size();i++)
4930                 analyze_cnf(qs->cw_cnf[i]);
4931         for(i=0;i<qs->closew_cnf.size();i++)
4932                 analyze_cnf(qs->closew_cnf[i]);
4933
4934
4935 //                      At this point, the old analysis program
4936 //                      gathered all refs to partial functions,
4937 //                      complex literals, and parameters accessed via a handle.
4938 //                      I think its better to delay this
4939 //                      until code generation time, as the query will be
4940 //                      in general split.
4941
4942     }
4943
4944         return(qs);
4945 }
4946
4947 ///////////////////////////////////////////////////////////////////////
4948
4949 //              Expand gbvars with their definitions.
4950
4951 scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl){
4952         int o;
4953
4954         switch(se->get_operator_type()){
4955         case SE_LITERAL:
4956         case SE_PARAM:
4957         case SE_IFACE_PARAM:
4958                 return se;
4959         case SE_UNARY_OP:
4960                 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4961                 return se;
4962         case SE_BINARY_OP:
4963                 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4964                 se->rhs.scalarp = expand_gbvars_se(se->get_right_se(),gb_tbl);
4965                 return se;
4966         case SE_COLREF:
4967                 if( se->is_gb() ){
4968                         return( dup_se(gb_tbl.get_def(se->get_gb_ref()),NULL) );
4969                 }
4970                 return se;
4971 //                      don't descend into aggr defs.
4972         case SE_AGGR_STAR:
4973                 return se;
4974         case SE_AGGR_SE:
4975                 return se;
4976         case SE_FUNC:
4977                 for(o=0;o<se->param_list.size();o++){
4978                         se->param_list[o] = expand_gbvars_se(se->param_list[o], gb_tbl);
4979                 }
4980                 return se;
4981         default:
4982                 fprintf(stderr,"INTERNAL ERROR in expand_gbvars, line %d, character %d: unknown operator type %d\n",
4983                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
4984                 exit(1);
4985         }
4986         return se;
4987 }
4988
4989 void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl){
4990         vector<scalarexp_t *> op_list;
4991         int o;
4992         bool found = false;
4993
4994         switch(pr->get_operator_type()){
4995         case PRED_IN:
4996                 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(), gb_tbl);
4997                 return;
4998         case PRED_COMPARE:
4999                 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(),gb_tbl) ;
5000                 pr->rhs.sexp = expand_gbvars_se(pr->get_right_se(),gb_tbl) ;
5001                 return;
5002         case PRED_UNARY_OP:
5003                 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
5004                 return;
5005         case PRED_BINARY_OP:
5006                 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
5007                 expand_gbvars_pr(pr->get_right_pr(),gb_tbl) ;
5008                 return;
5009         case PRED_FUNC:
5010                 for(o=0;o<pr->param_list.size();++o){
5011                         pr->param_list[o] = expand_gbvars_se(pr->param_list[o],gb_tbl) ;
5012                 }
5013                 return;
5014         default:
5015                 fprintf(stderr,"INTERNAL ERROR in expand_gbvars_pr, line %d, character %d, unknown predicate operator type %d\n",
5016                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5017         }
5018         return;
5019 }
5020
5021
5022
5023
5024 //              return true if the se / pr contains any gbvar on the list.
5025
5026
5027 bool contains_gb_se(scalarexp_t *se, set<int> &gref_set){
5028         vector<scalarexp_t *> operands;
5029         int o;
5030         bool found = false;
5031
5032         switch(se->get_operator_type()){
5033         case SE_LITERAL:
5034         case SE_PARAM:
5035         case SE_IFACE_PARAM:
5036                 return false;
5037         case SE_UNARY_OP:
5038                 return contains_gb_se(se->get_left_se(),gref_set);
5039         case SE_BINARY_OP:
5040                 return( contains_gb_se(se->get_left_se(),gref_set) ||
5041                         contains_gb_se(se->get_right_se(),gref_set) );
5042         case SE_COLREF:
5043                 if( se->is_gb() ){
5044                         return( gref_set.count(se->get_gb_ref()) > 0);
5045                 }
5046                 return false;
5047 //                      don't descend into aggr defs.
5048         case SE_AGGR_STAR:
5049                 return false;
5050         case SE_AGGR_SE:
5051                 return false;
5052         case SE_FUNC:
5053                 operands = se->get_operands();
5054                 for(o=0;o<operands.size();o++){
5055                         found = found || contains_gb_se(operands[o], gref_set);
5056                 }
5057                 return found;
5058         default:
5059                 fprintf(stderr,"INTERNAL ERROR in contains_gb_se, line %d, character %d: unknown operator type %d\n",
5060                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5061                 exit(1);
5062         }
5063         return false;
5064 }
5065
5066
5067 bool contains_gb_pr(predicate_t *pr, set<int> &gref_set){
5068         vector<scalarexp_t *> op_list;
5069         int o;
5070         bool found = false;
5071
5072         switch(pr->get_operator_type()){
5073         case PRED_IN:
5074                 return contains_gb_se(pr->get_left_se(), gref_set);
5075         case PRED_COMPARE:
5076                 return (contains_gb_se(pr->get_left_se(),gref_set)
5077                         || contains_gb_se(pr->get_right_se(),gref_set) );
5078         case PRED_UNARY_OP:
5079                 return contains_gb_pr(pr->get_left_pr(),gref_set) ;
5080         case PRED_BINARY_OP:
5081                 return (contains_gb_pr(pr->get_left_pr(),gref_set)
5082                         || contains_gb_pr(pr->get_right_pr(),gref_set) );
5083         case PRED_FUNC:
5084                 op_list = pr->get_op_list();
5085                 for(o=0;o<op_list.size();++o){
5086                         found = found ||contains_gb_se(op_list[o],gref_set) ;
5087                 }
5088                 return found;
5089         default:
5090                 fprintf(stderr,"INTERNAL ERROR in contains_gb_pr, line %d, character %d, unknown predicate operator type %d\n",
5091                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5092         }
5093
5094         return found;
5095 }
5096
5097
5098 //              Gather the set of columns accessed in this se.
5099 //              Descend into aggregate functions.
5100
5101 void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set, gb_table *gtbl){
5102         col_id ci;
5103         vector<scalarexp_t *> operands;
5104         int o;
5105
5106         if(! se)
5107                 return;
5108
5109         switch(se->get_operator_type()){
5110         case SE_LITERAL:
5111         case SE_PARAM:
5112         case SE_IFACE_PARAM:
5113                 return;
5114         case SE_UNARY_OP:
5115                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5116                 return;
5117         case SE_BINARY_OP:
5118                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5119                 gather_se_col_ids(se->get_right_se(),cid_set,gtbl);
5120                 return;
5121         case SE_COLREF:
5122                 if(! se->is_gb() ){
5123                         ci.load_from_colref(se->get_colref() );
5124                         if(ci.tblvar_ref < 0){
5125                                 fprintf(stderr,"INTERNAL WARNING: unbound colref (%s) accessed.\n",ci.field.c_str());
5126                         }
5127                         cid_set.insert(ci);
5128                 }else{
5129                         if(gtbl==NULL){
5130                                 fprintf(stderr,"INTERNAL ERROR: gbvar ref in gather_se_col_ids, but gtbl is NULL.\n");
5131                                 exit(1);
5132                         }
5133                         gather_se_col_ids(gtbl->get_def(se->get_gb_ref()),cid_set,gtbl);
5134                 }
5135                 return;
5136         case SE_AGGR_STAR:
5137                 return;
5138         case SE_AGGR_SE:
5139                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5140                 return;
5141         case SE_FUNC:
5142                 operands = se->get_operands();
5143                 for(o=0;o<operands.size();o++){
5144                         gather_se_col_ids(operands[o], cid_set,gtbl);
5145                 }
5146                 return;
5147         default:
5148                 fprintf(stderr,"INTERNAL ERROR in gather_se_col_ids, line %d, character %d: unknown operator type %d\n",
5149                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5150                 exit(1);
5151         }
5152 }
5153
5154
5155 //              Gather the set of columns accessed in this se.
5156
5157 void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl){
5158         vector<scalarexp_t *> op_list;
5159         int o;
5160
5161         switch(pr->get_operator_type()){
5162         case PRED_IN:
5163                 gather_se_col_ids(pr->get_left_se(), cid_set,gtbl);
5164                 return;
5165         case PRED_COMPARE:
5166                 gather_se_col_ids(pr->get_left_se(),cid_set,gtbl) ;
5167                 gather_se_col_ids(pr->get_right_se(),cid_set,gtbl) ;
5168                 return;
5169         case PRED_UNARY_OP:
5170                 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5171                 return;
5172         case PRED_BINARY_OP:
5173                 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5174                 gather_pr_col_ids(pr->get_right_pr(),cid_set,gtbl) ;
5175                 return;
5176         case PRED_FUNC:
5177                 op_list = pr->get_op_list();
5178                 for(o=0;o<op_list.size();++o){
5179                         gather_se_col_ids(op_list[o],cid_set,gtbl) ;
5180                 }
5181                 return;
5182         default:
5183                 fprintf(stderr,"INTERNAL ERROR in gather_pr_col_ids, line %d, character %d, unknown predicate operator type %d\n",
5184                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5185         }
5186 }
5187
5188
5189
5190
5191 //              Gather the set of special operator or comparison functions referenced by this se.
5192
5193 void gather_se_opcmp_fcns(scalarexp_t *se, set<string> &fcn_set){
5194         col_id ci;
5195         data_type *ldt, *rdt;
5196         int o;
5197         vector<scalarexp_t *> operands;
5198
5199         switch(se->get_operator_type()){
5200         case SE_LITERAL:
5201                 if( se->get_literal()->constructor_name() != "")
5202                         fcn_set.insert( se->get_literal()->constructor_name() );
5203                 return;
5204         case SE_PARAM:
5205                 return;
5206 //                      SE_IFACE_PARAM should not exist when this is called.
5207         case SE_UNARY_OP:
5208                 ldt = se->get_left_se()->get_data_type();
5209                 if(ldt->complex_operator(se->get_op()) ){
5210                         fcn_set.insert( ldt->get_complex_operator(se->get_op()) );
5211                 }
5212                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5213                 return;
5214         case SE_BINARY_OP:
5215                 ldt = se->get_left_se()->get_data_type();
5216                 rdt = se->get_right_se()->get_data_type();
5217
5218                 if(ldt->complex_operator(rdt, se->get_op()) ){
5219                         fcn_set.insert( ldt->get_complex_operator(rdt, se->get_op()) );
5220                 }
5221                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5222                 gather_se_opcmp_fcns(se->get_right_se(),fcn_set);
5223                 return;
5224         case SE_COLREF:
5225                 return;
5226         case SE_AGGR_STAR:
5227                 return;
5228         case SE_AGGR_SE:
5229                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5230                 return;
5231         case SE_FUNC:
5232                 operands = se->get_operands();
5233                 for(o=0;o<operands.size();o++){
5234                         gather_se_opcmp_fcns(operands[o], fcn_set);
5235                 }
5236                 return;
5237         default:
5238                 fprintf(stderr,"INTERNAL ERROR in gather_se_opcmp_fcns, line %d, character %d: unknown operator type %d\n",
5239                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5240                 exit(1);
5241         }
5242 }
5243
5244
5245 //              Gather the set of special operator or comparison functions referenced by this se.
5246
5247 void gather_pr_opcmp_fcns(predicate_t *pr, set<string> &fcn_set){
5248         data_type *ldt, *rdt;
5249         vector<scalarexp_t *> operands;
5250         int o;
5251
5252         switch(pr->get_operator_type()){
5253         case PRED_IN:
5254                 ldt = pr->get_left_se()->get_data_type();
5255                 if(ldt->complex_comparison(ldt) ){
5256                         fcn_set.insert( ldt->get_equals_fcn(ldt) );
5257                 }
5258                 gather_se_opcmp_fcns(pr->get_left_se(), fcn_set);
5259                 return;
5260         case PRED_COMPARE:
5261                 ldt = pr->get_left_se()->get_data_type();
5262                 rdt = pr->get_right_se()->get_data_type();
5263                 if(ldt->complex_comparison(rdt) ){
5264                         fcn_set.insert( ldt->get_comparison_fcn(ldt) );
5265                 }
5266                 gather_se_opcmp_fcns(pr->get_left_se(),fcn_set) ;
5267                 gather_se_opcmp_fcns(pr->get_right_se(),fcn_set) ;
5268                 return;
5269         case PRED_UNARY_OP:
5270                 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5271                 return;
5272         case PRED_BINARY_OP:
5273                 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5274                 gather_pr_opcmp_fcns(pr->get_right_pr(),fcn_set) ;
5275                 return;
5276         case PRED_FUNC:
5277                 operands = pr->get_op_list();
5278                 for(o=0;o<operands.size();o++){
5279                         gather_se_opcmp_fcns(operands[o], fcn_set);
5280                 }
5281                 return;
5282         default:
5283                 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
5284                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5285         }
5286 }
5287
5288
5289
5290
5291 //              find the temporal variable divisor if any.
5292 //              Only forms allowed : temporal_colref, temporal_colref/const
5293 //              temporal_colref/const + const
5294
5295
5296 long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt,string &fnm){
5297         long long int retval = 0;
5298         data_type *ldt, *rdt;
5299         int o;
5300         vector<scalarexp_t *> operands;
5301         scalarexp_t *t_se, *c_se;
5302         string the_op;
5303
5304         switch(se->get_operator_type()){
5305         case SE_LITERAL:
5306                 return(-1);
5307         case SE_PARAM:
5308                 return(-1);
5309 //                      SE_IFACE_PARAM should not exist when this is called.
5310         case SE_UNARY_OP:
5311                 return(-1);
5312         case SE_BINARY_OP:
5313                 ldt = se->get_left_se()->get_data_type();
5314                 if(ldt->is_temporal()){
5315                         t_se = se->get_left_se();
5316                         c_se = se->get_right_se();
5317                 }else{
5318                         t_se = se->get_left_se();
5319                         c_se = se->get_right_se();
5320                 }
5321                 if((! t_se->get_data_type()->is_temporal()) ||  c_se->get_data_type()->is_temporal())
5322                         return -1;
5323
5324                 the_op = se->get_op();
5325                 if(the_op == "+" || the_op == "-")
5326                         return find_temporal_divisor(t_se, gbt,fnm);
5327                 if(the_op == "/"){
5328                         if(t_se->get_operator_type() == SE_COLREF && c_se->get_operator_type() == SE_LITERAL){
5329                                 fnm = t_se->get_colref()->get_field();
5330                                 string lits = c_se->get_literal()->to_string();
5331                                 sscanf(lits.c_str(),"%qd",&retval);
5332                                 return retval;
5333                         }
5334                 }
5335
5336                 return -1;
5337         case SE_COLREF:
5338                 if(se->is_gb()){
5339                         return find_temporal_divisor(gbt->get_def(se->get_gb_ref()), gbt,fnm);
5340                 }
5341                 if(se->get_data_type()->is_temporal()){
5342                         fnm = se->get_colref()->get_field();
5343                         return 1;
5344                 }
5345                 return 0;
5346         case SE_AGGR_STAR:
5347                 return -1;
5348         case SE_AGGR_SE:
5349                 return -1;
5350         case SE_FUNC:
5351                 return -1;
5352         default:
5353                 fprintf(stderr,"INTERNAL ERROR in find_temporal_divisor, line %d, character %d: unknown operator type %d\n",
5354                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5355                 exit(1);
5356         }
5357 }
5358
5359
5360 //                      impute_colnames:
5361 //                      Create meaningful but unique names for the columns.
5362 string impute_colname(vector<select_element *> &sel_list, scalarexp_t *se){
5363         set<string> curr_names;
5364         int s;
5365         for(s=0;s<sel_list.size();++s){
5366                 curr_names.insert(sel_list[s]->name);
5367         }
5368         return impute_colname(curr_names, se);
5369 }
5370
5371 string impute_colname(set<string> &curr_names, scalarexp_t *se){
5372 string ret;
5373 scalarexp_t *seo;
5374 vector<scalarexp_t *> operand_list;
5375 string opstr;
5376
5377         switch(se->get_operator_type()){
5378         case SE_LITERAL:
5379                 ret = "Literal";
5380                 break;
5381     case SE_PARAM:
5382                 ret = "Param_" + se->get_param_name();
5383                 break;
5384     case SE_IFACE_PARAM:
5385                 ret = "Iparam_" + se->get_ifpref()->get_pname();
5386                 break;
5387     case SE_COLREF:
5388                 ret =  se->get_colref()->get_field() ;
5389                 break;
5390     case SE_UNARY_OP:
5391     case SE_BINARY_OP:
5392                 ret = "Field";
5393                 break;
5394     case SE_AGGR_STAR:
5395                 ret = "Cnt";
5396                 break;
5397     case SE_AGGR_SE:
5398                 ret = se->get_op();
5399                 seo = se->get_left_se();
5400                 switch(se->get_left_se()->get_operator_type()){
5401                 case SE_PARAM:
5402                         ret += "_PARAM_"+seo->get_param_name();
5403                         break;
5404                 case SE_IFACE_PARAM:
5405                         ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5406                         break;
5407                 case SE_COLREF:
5408                         opstr =  seo->get_colref()->get_field();
5409                         if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5410                                 ret += "_" + opstr;
5411                         }else{
5412                                 ret = opstr;
5413                         }
5414                         break;
5415                 case SE_AGGR_STAR:
5416                 case SE_AGGR_SE:
5417                         opstr = seo->get_op();
5418                         if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5419                                 ret += "_" + seo->get_op();
5420                         }else{
5421                                 ret = opstr;
5422                         }
5423                         break;
5424                 case SE_FUNC:
5425                         opstr = seo->get_op();
5426                         ret += "_" + seo->get_op();
5427                         break;
5428         case SE_UNARY_OP:
5429         case SE_BINARY_OP:
5430                         ret += "_SE";
5431                         break;
5432                 default:
5433                         ret += "_";
5434                         break;
5435                 }
5436                 break;
5437         case SE_FUNC:
5438                 ret = se->get_op();
5439                 operand_list = se->get_operands();
5440                 if(operand_list.size() > 0){
5441                         seo = operand_list[0];
5442                         switch(seo->get_operator_type()){
5443                         case SE_PARAM:
5444                                 ret += "_PARAM_"+seo->get_param_name();
5445                                 break;
5446                         case SE_IFACE_PARAM:
5447                                 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5448                                 break;
5449                         case SE_COLREF:
5450                                 ret += "_" + seo->get_colref()->get_field();
5451                                 break;
5452                         case SE_AGGR_STAR:
5453                         case SE_AGGR_SE:
5454                         case SE_FUNC:
5455                                 ret += "_" + seo->get_op();
5456                                 break;
5457                 case SE_UNARY_OP:
5458                 case SE_BINARY_OP:
5459                                 ret += "_SE";
5460                         break;
5461                         default:
5462                                 ret += "_";
5463                                 break;
5464                         }
5465                 }else{
5466                         ret += "_func";
5467                 }
5468                 break;
5469         }
5470
5471         if(ret == "Field"){
5472                 if(curr_names.count("Field0") == 0)
5473                         ret = "Field0";
5474         }
5475         int iter = 1;
5476         string base = ret;
5477         while(curr_names.count(ret) > 0){
5478                 char tmpstr[500];
5479                 sprintf(tmpstr,"%s%d",base.c_str(),iter);
5480                 ret = tmpstr;
5481                 iter++;
5482         }
5483
5484
5485         curr_names.insert(ret);
5486         return(ret);
5487
5488 }
5489
5490
5491
5492 //////////////////////////////////////////////////////////////////////
5493 //////////////          Methods of defined classes ///////////////////////
5494 //////////////////////////////////////////////////////////////////////
5495
5496 //              helper fcn to enable col_id as map key.
5497
5498   bool operator<(const col_id &cr1, const col_id &cr2){
5499         if(cr1.tblvar_ref < cr2.tblvar_ref) return(true);
5500         if(cr1.tblvar_ref == cr2.tblvar_ref)
5501            return (cr1.field < cr2.field);
5502         return(false);
5503   }
5504
5505
5506 //              Process the GB variables.
5507 //              At parse time, GB vars are either GB_COLREF,
5508 //              or GB_COMPUTED if the AS keyword is used.
5509 //              Cast GB vars as named entities with a SE as
5510 //              their definition (the colref in the case of GB_COLREF).
5511 //
5512 //              TODO: if there is a gbref in a gbdef,
5513 //              then I won't be able to compute the value without
5514 //              a complex dependence analysis.  So verify that there is no
5515 //              gbref in any of the GBdefs.
5516 //              BUT: a GBVAR_COLREF should be converted to a regular colref,
5517 //              which is not yet done.
5518 //
5519 //              TODO : sort out issue of GBVAR naming and identification.
5520 //              Determine where it is advantageous to convert GV_COLREF
5521 //              GBVARS to colrefs -- e.g. in group definition, in the WHERE clause,
5522 //              etc.
5523 //
5524 //              return -1 if there is a problem.
5525
5526 int gb_table::add_gb_attr(
5527                                                   gb_t *gb,
5528                                                   tablevar_list_t *fm,
5529                                                   table_list *schema,
5530                                                   table_exp_t *fta_tree,
5531                                                   ext_fcn_list *Ext_fcns
5532                                                   ){
5533         colref_t *cr;
5534         int retval;
5535         gb_table_entry *entry;
5536
5537         if(gb->type == GB_COLREF){
5538                 if(gb->table != "")
5539                         cr = new colref_t(
5540                                 gb->interface.c_str(),gb->table.c_str(), gb->name.c_str()
5541                         );
5542                 else
5543                         cr = new colref_t(gb->name.c_str());
5544
5545                 int tablevar_ref = infer_tablevar_from_colref(cr, fm, schema);
5546                 if(tablevar_ref < 0) return(tablevar_ref);
5547
5548                 cr->set_tablevar_ref(tablevar_ref);
5549                 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
5550                 cr->set_interface("");
5551                 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
5552
5553                 entry = new gb_table_entry();
5554                 entry->name.field = cr->get_field();
5555                 entry->name.tblvar_ref = tablevar_ref;
5556                 entry->definition = new scalarexp_t(cr);
5557                 entry->ref_type = GBVAR_COLREF;
5558         }else{
5559                 entry = new gb_table_entry();
5560                 entry->name.field = gb->name;
5561                 entry->name.tblvar_ref = -1;
5562                 entry->definition = gb->def;
5563                 entry->ref_type = GBVAR_SE;
5564         }
5565
5566         retval = verify_colref(entry->definition, fm, schema, NULL);
5567         if(retval < 0) return(retval);
5568
5569         retval = assign_data_types(entry->definition, schema, fta_tree, Ext_fcns);
5570         if(retval < 0) return(retval);
5571
5572 //              Verify that the gbvar def references no aggregates and no gbvars.
5573         if(count_gb_se(entry->definition) > 0){
5574                 fprintf(stderr,"ERROR, group-by variable %s references other group-by variables in its definition.\n",entry->name.field.c_str() );
5575                 return(-1);
5576         }
5577         if(count_aggr_se(entry->definition, true) > 0){
5578                 fprintf(stderr,"ERROR, group-by variable %s references aggregates in its definition.\n",entry->name.field.c_str() );
5579                 return(-1);
5580         }
5581
5582 //                      Check for duplicates
5583         int i;
5584         for(i=0;i<gtbl.size();++i){
5585                 if(entry->name.field == gtbl[i]->name.field){
5586                         fprintf(stderr,"ERROR, duplicate group-by variable name %s, positions %d and %lu.\n",entry->name.field.c_str(),i,gtbl.size());
5587                         return -1;
5588                 }
5589         }
5590
5591
5592         gtbl.push_back(entry);
5593
5594         return(1);
5595 }
5596
5597
5598 //                      Try to determine if the colref is actually
5599 //                      a gbvar ref.
5600 //                      a) if no tablename associated with the colref,
5601 //                              1) try to find a matching GB_COMPUTED gbvar.
5602 //                              2) failing that, try to match to a single tablevar
5603 //                              3) if successful, search among GB_COLREF
5604 //                      b) else, try to match the tablename to a single tablevar
5605 //                              if successful, search among GB_COLREF
5606 int gb_table::find_gb(colref_t *cr, tablevar_list_t *fm, table_list *schema){
5607         string c_field = cr->get_field();
5608         int c_tblref;
5609         int n_tbl;
5610         int i;
5611         vector<int> candidates;
5612
5613         if(cr->uses_default_table()){
5614                 for(i=0;i<gtbl.size();i++){
5615                         if(gtbl[i]->ref_type==GBVAR_SE && c_field == gtbl[i]->name.field){
5616                                 return(i);
5617                         }
5618                 }
5619                 candidates = find_source_tables(c_field, fm, schema);
5620                 if(candidates.size() != 1) return(-1); // can't find unique tablevar
5621                 for(i=0;i<gtbl.size();i++){
5622                         if(gtbl[i]->ref_type==GBVAR_COLREF &&
5623                                   c_field == gtbl[i]->name.field &&
5624                                   candidates[0] == gtbl[i]->name.tblvar_ref){
5625                                 return(i);
5626                         }
5627                 }
5628                 return(-1); // colref is not in gb table.
5629         }
5630
5631 //                      A table name must have been given.
5632         vector<tablevar_t *> fm_tbls = fm->get_table_list();
5633         string interface = cr->get_interface();
5634         string table_name = cr->get_table_name();
5635
5636
5637 //                      if no interface name is given, try to search for the table
5638 //                      name among the tablevar names first.
5639         if(interface==""){
5640                 for(i=0;i<fm_tbls.size();++i){
5641                         if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5642                                 candidates.push_back(i);
5643                 }
5644                 if(candidates.size()>1) return(-1);
5645                 if(candidates.size()==1){
5646                         for(i=0;i<gtbl.size();i++){
5647                                 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5648                                         c_field == gtbl[i]->name.field &&
5649                                         candidates[0] == gtbl[i]->name.tblvar_ref){
5650                                         return(i);
5651                                 }
5652                         }
5653                         return(-1);  // match semantics of bind to tablevar name first
5654                 }
5655         }
5656
5657 //              Interface name given, or no interface but no
5658 //              no tablevar match.  Try to match on schema name.
5659         for(i=0;i<fm_tbls.size();++i){
5660                 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5661                         candidates.push_back(i);
5662         }
5663         if(candidates.size() != 1) return(-1);
5664         for(i=0;i<gtbl.size();i++){
5665                 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5666                         c_field == gtbl[i]->name.field &&
5667                         candidates[0] == gtbl[i]->name.tblvar_ref){
5668                         return(i);
5669                 }
5670         }
5671
5672 //              No match found.
5673         return(-1);
5674
5675 }
5676
5677
5678
5679 bool aggr_table_entry::fta_legal(ext_fcn_list *Ext_fcns){
5680         if(is_builtin()){
5681                 if( (op == "COUNT") || (op == "SUM") || (op == "MIN") ||
5682                         (op == "MAX") || (op == "AND_AGGR") || (op == "OR_AGGR") ||
5683                         (op == "XOR_AGGR") )
5684                                 return(true);
5685         }else{
5686                 return Ext_fcns->fta_legal(fcn_id);
5687         }
5688         return(false);
5689 }
5690
5691
5692 //              Return the set of subaggregates required to compute
5693 //              the desired aggregate.  THe operand of the subaggregates
5694 //              can only be * or the scalarexp used in the superaggr.
5695 //              This is indicated by the use_se vector.
5696
5697 //              Is this code generation specific?
5698
5699 vector<string> aggr_table_entry::get_subaggr_fcns(vector<bool> &use_se){
5700         vector<string> ret;
5701
5702         if(op == "COUNT"){
5703                 ret.push_back("COUNT");
5704                 use_se.push_back(false);
5705         }
5706         if(op == "SUM"){
5707                 ret.push_back("SUM");
5708                 use_se.push_back(true);
5709         }
5710         if(op == "AVG"){
5711                 ret.push_back("SUM");
5712                 ret.push_back("COUNT");
5713                 use_se.push_back(true);
5714                 use_se.push_back(false);
5715         }
5716         if(op == "MIN"){
5717                 ret.push_back("MIN");
5718                 use_se.push_back(true);
5719         }
5720         if(op == "MAX"){
5721                 ret.push_back("MAX");
5722                 use_se.push_back(true);
5723         }
5724         if(op == "AND_AGGR"){
5725                 ret.push_back("AND_AGGR");
5726                 use_se.push_back(true);
5727         }
5728         if(op == "OR_AGGR"){
5729                 ret.push_back("OR_AGGR");
5730                 use_se.push_back(true);
5731         }
5732         if(op == "XOR_AGGR"){
5733                 ret.push_back("XOR_AGGR");
5734                 use_se.push_back(true);
5735         }
5736
5737         return(ret);
5738 }
5739
5740 //                      Code generation specific?
5741
5742 vector<data_type *> aggr_table_entry::get_subaggr_dt(){
5743         vector<data_type *> ret;
5744         data_type *dt;
5745
5746         if(op == "COUNT"){
5747                 dt = new data_type("Int"); // was Uint
5748                 ret.push_back( dt );
5749         }
5750         if(op == "SUM"){
5751                 dt = new data_type();
5752                 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5753                 ret.push_back(dt);
5754         }
5755         if(op == "AVG"){
5756                 dt = new data_type();
5757                 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5758                 ret.push_back( dt );
5759                 dt = new data_type("Int");
5760                 ret.push_back( dt );
5761         }
5762         if(op == "MIN"){
5763                 dt = new data_type();
5764                 dt->set_aggr_data_type( "MIN",operand->get_data_type() );
5765                 ret.push_back( dt );
5766         }
5767         if(op == "MAX"){
5768                 dt = new data_type();
5769                 dt->set_aggr_data_type( "MAX",operand->get_data_type() );
5770                 ret.push_back( dt );
5771         }
5772         if(op == "AND_AGGR"){
5773                 dt = new data_type();
5774                 dt->set_aggr_data_type( "AND_AGGR",operand->get_data_type() );
5775                 ret.push_back( dt );
5776         }
5777         if(op == "OR_AGGR"){
5778                 dt = new data_type();
5779                 dt->set_aggr_data_type( "OR_AGGR",operand->get_data_type() );
5780                 ret.push_back( dt );
5781         }
5782         if(op == "XOR_AGGR"){
5783                 dt = new data_type();
5784                 dt->set_aggr_data_type( "XOR_AGGR",operand->get_data_type() );
5785                 ret.push_back( dt );
5786         }
5787
5788         return(ret);
5789 }
5790
5791 //              Code generation specific?
5792
5793 scalarexp_t *aggr_table_entry::make_superaggr_se(vector<scalarexp_t *> se_refs){
5794         scalarexp_t *se_l, *se_r, *ret_se = NULL;
5795
5796         if(op == "COUNT"){
5797                 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5798                 return(ret_se);
5799         }
5800         if(op == "SUM"){
5801                 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5802                 return(ret_se);
5803         }
5804         if(op == "AVG"){
5805                 se_l = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5806                 se_r = scalarexp_t::make_se_aggr("SUM", se_refs[1]);
5807
5808                 ret_se = new scalarexp_t("/", se_l, se_r);
5809                 return(ret_se);
5810         }
5811         if(op == "MIN"){
5812                 ret_se = scalarexp_t::make_se_aggr("MIN", se_refs[0]);
5813                 return(ret_se);
5814         }
5815         if(op == "MAX"){
5816                 ret_se = scalarexp_t::make_se_aggr("MAX", se_refs[0]);
5817                 return(ret_se);
5818         }
5819         if(op == "AND_AGGR"){
5820                 ret_se = scalarexp_t::make_se_aggr("AND_AGGR", se_refs[0]);
5821                 return(ret_se);
5822         }
5823         if(op == "OR_AGGR"){
5824                 ret_se = scalarexp_t::make_se_aggr("OR_AGGR", se_refs[0]);
5825                 return(ret_se);
5826         }
5827         if(op == "XOR_AGGR"){
5828                 ret_se = scalarexp_t::make_se_aggr("XOR_AGGR", se_refs[0]);
5829                 return(ret_se);
5830         }
5831
5832         return(ret_se);
5833
5834 }
5835
5836
5837 //              Add a built-in aggr.
5838 int aggregate_table::add_aggr(string op, scalarexp_t *se, bool is_super){
5839         int i;
5840
5841         for(i=0;i<agr_tbl.size();i++){
5842                 if(agr_tbl[i]->is_builtin() && op == agr_tbl[i]->op
5843                   && is_equivalent_se(se,agr_tbl[i]->operand) ){
5844 //                && is_super == agr_tbl[i]->is_superaggr())
5845                         if(is_super) agr_tbl[i]->set_super(true);
5846                         return(i);
5847                 }
5848         }
5849
5850         aggr_table_entry *ate = new aggr_table_entry(op, se, is_super);
5851         agr_tbl.push_back(ate);
5852         return(agr_tbl.size() - 1);
5853 }
5854
5855 //              add a UDAF
5856 int aggregate_table::add_aggr(string op, int fcn_id, vector<scalarexp_t *> opl, data_type *sdt, bool is_super, bool is_running, bool has_lfta_bailout){
5857         int i,o;
5858
5859         for(i=0;i<agr_tbl.size();i++){
5860                 if((! agr_tbl[i]->is_builtin()) && fcn_id == agr_tbl[i]->fcn_id
5861                                 && opl.size() == agr_tbl[i]->oplist.size() ){
5862 //                              && is_super == agr_tbl[i]->is_superaggr() ){
5863                         for(o=0;o<opl.size();++o){
5864                                 if(! is_equivalent_se(opl[o],agr_tbl[i]->oplist[o]) )
5865                                         break;
5866                         }
5867                         if(o == opl.size()){
5868                                 if(is_super) agr_tbl[i]->set_super(true);
5869                                 return i;
5870                         }
5871                 }
5872         }
5873
5874         aggr_table_entry *ate = new aggr_table_entry(op, fcn_id, opl, sdt,is_super,is_running, has_lfta_bailout);
5875         agr_tbl.push_back(ate);
5876         return(agr_tbl.size() - 1);
5877 }
5878
5879
5880 int cplx_lit_table::add_cpx_lit(literal_t *l, bool is_handle_ref){
5881         int i;
5882
5883         for(i=0;i<cplx_lit_tbl.size();i++){
5884                 if(l->is_equivalent(cplx_lit_tbl[i])){
5885                         hdl_ref_tbl[i] = hdl_ref_tbl[i] | is_handle_ref;
5886                         return(i);
5887                 }
5888         }
5889
5890         cplx_lit_tbl.push_back(l);
5891         hdl_ref_tbl.push_back(is_handle_ref);
5892         return(cplx_lit_tbl.size() - 1);
5893 }
5894
5895
5896
5897 //------------------------------------------------------------
5898 //              parse_fta code
5899
5900
5901 gb_t *gb_t::duplicate(){
5902         gb_t *ret = new gb_t(interface.c_str(), table.c_str(), name.c_str());
5903         ret->type = type;
5904         ret->lineno = lineno;
5905         ret->charno = charno;
5906         if(def != NULL)
5907                 ret->def = dup_se(def,NULL);
5908         return ret;
5909 }