Added watchlist support
[com/gs-lite.git] / src / ftacmp / analyze_fta.cc
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 #include<unistd.h>
17
18 #include "parse_fta.h"
19 #include "parse_schema.h"
20 #include "parse_ext_fcns.h"
21
22
23 #include"analyze_fta.h"
24
25 #include"type_objects.h"
26
27 #include <string>
28 #include<list>
29
30 using namespace std;
31
32 extern string hostname;                 // name of the current host
33
34 //                      Utility function
35
36 string int_to_string(int i){
37     string ret;
38     char tmpstr[100];
39     sprintf(tmpstr,"%d",i);
40     ret=tmpstr;
41     return(ret);
42 }
43
44
45 //                              Globals
46
47 //                      These represent derived information from the
48 //                      query analysis stage.  I extract them from a class,
49 //                      perhaps this is dangerous.
50
51 static gb_table *gb_tbl=NULL;                   // Table of all group-by attributes.
52 static aggregate_table *aggr_tbl=NULL;  // Table of all referenced aggregates.
53
54 // static cplx_lit_table *complex_literals=NULL;        // Table of literals with constructors.
55 static param_table *param_tbl=NULL;             // Table of all referenced parameters.
56
57 vector<scalarexp_t *> partial_fcns_list;
58 int wh_partial_start, wh_partial_end;
59 int gb_partial_start, gb_partial_end;
60 int aggr_partial_start, aggr_partial_end;
61 int sl_partial_start, sl_partial_end;
62
63
64 //                      Infer the table of a column refrence and return the table ref.
65 //                      First, extract the
66 //                      field name and table name.  If no table name is used,
67 //                      search all tables to try to find a unique match.
68 //                      Of course, plenty of error checking.
69
70 //              Return the set of tablevar indices in the FROM clause
71 //              which contain a field with the same name.
72 vector<int> find_source_tables(string field, tablevar_list_t *fm, table_list *Schema){
73         int i;
74         vector<int> tv;
75 //      vector<string> tn = fm->get_schema_names();
76         vector<int> tn = fm->get_schema_refs();
77 // printf("Calling find_source_tables on field %s\n",field.c_str());
78         for(i=0;i<tn.size();i++){
79 //              if(Schema->contains_field(Schema->find_tbl(tn[i]), field) ){
80                 if(Schema->contains_field(tn[i], field) ){
81                         tv.push_back(i);
82 // printf("\tfound in table %s\n",tn[i].c_str());
83                 }
84         }
85         return(tv);
86 }
87
88 int infer_tablevar_from_ifpref(ifpref_t *ir, tablevar_list_t *fm){
89         int i;
90         string tname = ir->get_tablevar();
91         if(tname ==""){
92                 if(fm->size()==1) return 0;
93                 fprintf(stderr,"ERROR, interface parameter %s has no tablevar specified and there is more than one table variable in the FROM clause.\n",ir->to_string().c_str());
94                 return -1;
95         }
96         for(i=0;i<fm->size();++i){
97                 if(tname == fm->get_tablevar_name(i))
98                         return i;
99         }
100         fprintf(stderr,"ERROR, interface parameter %s has no matching table variable in the FROM clause.\n",ir->to_string().c_str());
101         return -1;
102 }
103
104
105 //              compute the index of the tablevar in the from clause that the
106 //              colref is in.
107 //              return -1 if no tablevar can be imputed.
108 int infer_tablevar_from_colref(colref_t *cr, tablevar_list_t *fm, table_list *schema){
109         int i;
110         string table_name;
111         int table_ref;
112         vector<int> tv;
113         vector<tablevar_t *> fm_tbls = fm->get_table_list();
114
115         string field = cr->get_field();
116
117 // printf("Calling infer_tablevar_from_colref on field %s.\n",field.c_str());
118         if(cr->uses_default_table() ){
119                 tv = find_source_tables(field, fm, schema);
120                 if(tv.size() > 1){
121                         fprintf(stderr,"ERROR, line %d, character %d : field %s exists in multiple table variables: ",
122                                 cr->get_lineno(), cr->get_charno(),field.c_str() );
123                         for(i=0;i<tv.size();i++){
124                                 fprintf(stderr,"%s ",fm_tbls[ tv[i] ]->to_string().c_str() );
125                         }
126                         fprintf(stderr,"\n\tYou must specify one of these.\n");
127                         return(-1);
128                 }
129                 if(tv.size() == 0){
130                         fprintf(stderr,"ERROR, line %d, character %d: field %s does not exist in any table.\n",
131                                 cr->get_lineno(), cr->get_charno(),field.c_str() );
132                         return(-1);
133                 }
134
135                 return(tv[0]);
136         }
137
138 //                      The table source is named -- but is it a schema name
139 //                      or a var name?
140
141         string interface = cr->get_interface();
142         table_name = cr->get_table_name();
143
144 //              if interface is not specified, prefer to look at the tablevar names
145 //              Check for duplicates.
146         if(interface==""){
147                 for(i=0;i<fm_tbls.size();++i){
148                         if(table_name == fm_tbls[i]->get_var_name())
149                                 tv.push_back(i);
150                 }
151                 if(tv.size() > 1){
152                         fprintf(stderr,"ERROR, there are two or more table variables for column ref %s.%s (line %d, char %d).\n",table_name.c_str(), field.c_str(), cr->get_lineno(), cr->get_charno() );
153                         return(-1);
154                 }
155                 if(tv.size() == 1) return(tv[0]);
156         }
157
158 //              Tableref not found by looking at tableref vars, or an interface
159 //              was specified.  Try to match on schema and interface.
160 //              Check for duplicates.
161         for(i=0;i<fm_tbls.size();++i){
162                 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
163                         tv.push_back(i);
164         }
165         if(tv.size() > 1){
166                 fprintf(stderr,"ERROR, (line %d, char %d) there are two or more table variables whose schemas match for column ref \n",
167                         cr->get_lineno(), cr->get_charno() );
168                 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
169                 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
170                 return(-1);
171         }
172
173         if(tv.size() == 0 ){
174                 fprintf(stderr,"ERROR, line %d, character %d : no table reference found for column ref ", cr->get_lineno(), cr->get_charno());
175                 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
176                 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
177                 return(-1)      ;
178         }
179
180         return(tv[0]);
181 }
182
183
184 //                      Reset temporal properties of a scalar expression
185 void reset_temporal(scalarexp_t *se){
186         col_id ci;
187         vector<scalarexp_t *> operands;
188         int o;
189
190         se->get_data_type()->reset_temporal();
191
192         switch(se->get_operator_type()){
193         case SE_LITERAL:
194         case SE_PARAM:
195         case SE_IFACE_PARAM:
196         case SE_COLREF:
197                 return;
198         case SE_UNARY_OP:
199                 reset_temporal(se->get_left_se());
200                 return;
201         case SE_BINARY_OP:
202                 reset_temporal(se->get_left_se());
203                 reset_temporal(se->get_right_se());
204                 return;
205         case SE_AGGR_STAR:
206                 return;
207         case SE_AGGR_SE:
208                 reset_temporal(se->get_left_se());
209                 return;
210         case SE_FUNC:
211                 operands = se->get_operands();
212                 for(o=0;o<operands.size();o++){
213                         reset_temporal(operands[o]);
214                 }
215                 return;
216         default:
217                 fprintf(stderr,"INTERNAL ERROR in reset_temporal, line %d, character %d: unknown operator type %d\n",
218                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
219                 exit(1);
220         }
221 }
222
223 //              Verify that column references exist in their
224 //              declared tables.  As a side effect, assign
225 //              their data types.  Other side effects :
226 //
227 //              return -1 on error
228
229 int verify_colref(scalarexp_t *se, tablevar_list_t *fm,
230                                         table_list *schema, gb_table *gtbl){
231         int l_ret, r_ret;
232         int gb_ref;
233         colref_t *cr;
234         ifpref_t *ir;
235         string field, table_source, type_name;
236         data_type *dt;
237         vector<string> tn;
238         vector<int> tv;
239         int table_var;
240         int o;
241         vector<scalarexp_t *> operands;
242
243         switch(se->get_operator_type()){
244         case SE_LITERAL:
245         case SE_PARAM:
246                 return(1);
247         case SE_IFACE_PARAM:
248                 ir = se->get_ifpref();
249                 table_var = infer_tablevar_from_ifpref(ir, fm);
250                 if(table_var < 0) return(table_var);
251                 ir->set_tablevar_ref(table_var);
252                 return(1);
253         case SE_UNARY_OP:
254                 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
255         case SE_BINARY_OP:
256                 l_ret = verify_colref(se->get_left_se(), fm, schema, gtbl);
257                 r_ret = verify_colref(se->get_right_se(), fm, schema, gtbl);
258                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
259                 return(1);
260         case SE_COLREF:
261                 cr = se->get_colref();
262                 field = cr->get_field();
263
264 //                              Determine if this is really a GB ref.
265 //                              (the parser can only see that its a colref).
266                 if(gtbl != NULL){
267                         gb_ref = gtbl->find_gb(cr, fm, schema);
268                 }else{
269                         gb_ref = -1;
270                 }
271
272                 se->set_gb_ref(gb_ref);
273
274                 if(gb_ref < 0){
275 //                              Its a colref, verify its existance and
276 //                              record the data type.
277                         table_var = infer_tablevar_from_colref(cr,fm,schema);
278                         if(table_var < 0) return(table_var);
279
280         //                      Store the table ref in the colref.
281                         cr->set_tablevar_ref(table_var);
282                         cr->set_schema_ref(fm->get_schema_ref(table_var));
283                         cr->set_interface("");
284                         cr->set_table_name(fm->get_tablevar_name(table_var));
285
286                         if(! schema->contains_field(cr->get_schema_ref(), field)){
287                                 fprintf(stderr, "Error, field %s is not in stream %s\n", field.c_str(), schema->get_table_name( cr->get_schema_ref() ).c_str());
288                                 return -1;
289                         }
290
291                         type_name = schema->get_type_name(cr->get_schema_ref(), field);
292                         param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
293                         dt = new data_type(type_name, modifiers);
294                         se->set_data_type(dt);
295                 }else{
296 //                              Else, its a gbref, use the GB var's data type.
297                         se->set_data_type(gtbl->get_data_type(gb_ref));
298                 }
299
300                 return(1);
301         case SE_AGGR_STAR:
302                 return(1);
303         case SE_AGGR_SE:
304                 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
305         case SE_FUNC:
306                 operands = se->get_operands();
307                 r_ret = 1;
308                 for(o=0;o<operands.size();o++){
309                         l_ret = verify_colref(operands[o], fm, schema, gtbl);
310                         if(l_ret < 0) r_ret = -1;
311                 }
312                 return(r_ret);
313         default:
314                 fprintf(stderr,"INTERNAL ERROR in verify_colref, line %d, character %d: unknown operator type %d\n",
315                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
316                 return(-1);
317         }
318         return(-1);
319 }
320
321
322 int verify_predicate_colref(predicate_t *pr, tablevar_list_t *fm, table_list *schema, gb_table *gtbl){
323         int l_ret, r_ret;
324         std::vector<scalarexp_t *> op_list;
325         int o;
326
327         switch(pr->get_operator_type()){
328         case PRED_IN:
329                 return(verify_colref(pr->get_left_se(),fm,schema, gtbl) );
330         case PRED_COMPARE:
331                 l_ret = verify_colref(pr->get_left_se(),fm,schema, gtbl) ;
332                 r_ret = verify_colref(pr->get_right_se(),fm,schema, gtbl) ;
333                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
334                 return(1);
335         case PRED_UNARY_OP:
336                 return(verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl));
337         case PRED_BINARY_OP:
338                 l_ret = verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl) ;
339                 r_ret = verify_predicate_colref(pr->get_right_pr(),fm,schema, gtbl) ;
340                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
341                 return(1);
342         case PRED_FUNC:
343                 op_list = pr->get_op_list();
344                 l_ret = 0;
345                 for(o=0;o<op_list.size();++o){
346                         if(verify_colref(op_list[o],fm,schema,gtbl) < 0) l_ret = -1;
347                 }
348                 return(l_ret);
349         default:
350                 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
351                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
352         }
353
354         return(-1);
355 }
356
357
358 bool literal_only_se(scalarexp_t *se){          // really only literals.
359         int o;
360         vector<scalarexp_t *> operands;
361
362         if(se == NULL) return(1);
363         switch(se->get_operator_type()){
364         case SE_LITERAL:
365                 return(true);
366         case SE_PARAM:
367                 return(false);
368         case SE_IFACE_PARAM:
369                 return(false);
370         case SE_UNARY_OP:
371                 return( literal_only_se(se->get_left_se()) );
372         case SE_BINARY_OP:
373                 return( literal_only_se(se->get_left_se()) &&
374                                 literal_only_se(se->get_right_se()) );
375         case SE_COLREF:
376                 return false;
377         case SE_AGGR_STAR:
378                 return false;
379         case SE_AGGR_SE:
380                 return false;
381                 return(1);
382         case SE_FUNC:
383                 return false;
384         default:
385                 return false;
386         }
387         return false;
388 }
389
390
391
392
393 //              Verify that column references exist in their
394 //              declared tables.  As a side effect, assign
395 //              their data types.  Other side effects :
396 //
397
398 int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema){
399         int l_ret, r_ret;
400         int gb_ref;
401         colref_t *cr;
402         string field, table_source, type_name;
403         data_type *dt;
404         vector<string> tn;
405         vector<int> tv;
406         int tablevar_ref;
407         int o;
408         vector<scalarexp_t *> operands;
409
410         if(se == NULL) return(1);
411
412         switch(se->get_operator_type()){
413         case SE_LITERAL:
414                 return(1);
415         case SE_PARAM:
416                 return(1);
417         case SE_IFACE_PARAM:
418                 return(1);
419         case SE_UNARY_OP:
420                 return( bind_to_schema_se(se->get_left_se(), fm, schema) );
421         case SE_BINARY_OP:
422                 l_ret = bind_to_schema_se(se->get_left_se(), fm, schema);
423                 r_ret = bind_to_schema_se(se->get_right_se(), fm, schema);
424                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
425                 return(1);
426         case SE_COLREF:
427                 if(se->is_gb()) return(1);      // gb ref not a colref.
428
429                 cr = se->get_colref();
430                 field = cr->get_field();
431
432                 tablevar_ref = infer_tablevar_from_colref(cr,fm,schema);
433                 if(tablevar_ref < 0){
434                         return(tablevar_ref);
435                 }else{
436         //                      Store the table ref in the colref.
437                         cr->set_tablevar_ref(tablevar_ref);
438                         cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
439                         cr->set_interface("");
440                         cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
441
442 //                              Check the data type
443                         type_name = schema->get_type_name(cr->get_schema_ref(), field);
444                         param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
445                         data_type dt(type_name, modifiers);
446 //                      if(! dt.equals(se->get_data_type()) ){
447 //                      if(! dt.subsumes_type(se->get_data_type()) ){
448                         if(! se->get_data_type()->subsumes_type(&dt) ){
449                                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se: se's type is %d, table's is %d, colref is %s.\n",
450                                         dt.type_indicator(), se->get_data_type()->type_indicator(), cr->to_string().c_str());
451                                 return(-1);
452                         }
453                 }
454                 return(1);
455         case SE_AGGR_STAR:
456                 return(1);
457         case SE_AGGR_SE:        // Probably I should just return,
458                                                 // aggregate se's are explicitly bound to the schema.
459 //                      return( bind_to_schema_se(se->get_left_se(), fm, schema, gtbl) );
460                 return(1);
461         case SE_FUNC:
462                 if(se->get_aggr_ref() >= 0) return 1;
463
464                 operands = se->get_operands();
465                 r_ret = 1;
466                 for(o=0;o<operands.size();o++){
467                         l_ret = bind_to_schema_se(operands[o], fm, schema);
468                         if(l_ret < 0) r_ret = -1;
469                 }
470                 return(r_ret);
471         default:
472                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se, line %d, character %d: unknown operator type %d\n",
473                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
474                 return(-1);
475         }
476         return(-1);
477 }
478
479
480 int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema){
481         int l_ret, r_ret;
482         vector<scalarexp_t *> op_list;
483         int o;
484
485         switch(pr->get_operator_type()){
486         case PRED_IN:
487                 return(bind_to_schema_se(pr->get_left_se(),fm,schema) );
488         case PRED_COMPARE:
489                 l_ret = bind_to_schema_se(pr->get_left_se(),fm,schema) ;
490                 r_ret = bind_to_schema_se(pr->get_right_se(),fm,schema) ;
491                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
492                 return(1);
493         case PRED_UNARY_OP:
494                 return(bind_to_schema_pr(pr->get_left_pr(),fm,schema));
495         case PRED_BINARY_OP:
496                 l_ret = bind_to_schema_pr(pr->get_left_pr(),fm,schema) ;
497                 r_ret = bind_to_schema_pr(pr->get_right_pr(),fm,schema) ;
498                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
499                 return(1);
500         case PRED_FUNC:
501                 op_list = pr->get_op_list();
502                 l_ret = 0;
503                 for(o=0;o<op_list.size();++o){
504                         if(bind_to_schema_se(op_list[o],fm,schema) < 0) l_ret = -1;
505                 }
506                 return(l_ret);
507         default:
508                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_pr, line %d, character %d, unknown predicate operator type %d\n",
509                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
510         }
511
512         return(-1);
513 }
514
515
516
517
518
519
520 //                      verify_colref assigned data types to the column refs.
521 //                      Now assign data types to all other nodes in the
522 //                      scalar expression.
523 //
524 //                      return -1 on error
525
526 temporal_type compute_se_temporal(scalarexp_t *se, map<col_id, temporal_type> &tcol){
527         int l_ret, r_ret;
528         data_type *dt;
529         bool bret;
530         vector<scalarexp_t *> operands;
531         vector<data_type *> odt;
532         int o, fcn_id;
533         vector<bool> handle_ind;
534
535         switch(se->get_operator_type()){
536         case SE_LITERAL:
537                 return(constant_t);
538         case SE_PARAM:
539                 return(varying_t);
540         case SE_IFACE_PARAM:
541                 return(varying_t);              // actually, this should not be called.
542         case SE_UNARY_OP:
543                 return data_type::compute_temporal_type(
544                         compute_se_temporal(se->get_left_se(), tcol), se->get_op()
545                 );
546         case SE_BINARY_OP:
547                 return data_type::compute_temporal_type(
548                         compute_se_temporal(se->get_left_se(), tcol),
549                         compute_se_temporal(se->get_right_se(), tcol),
550                         se->get_left_se()->get_data_type()->get_type(),
551                         se->get_right_se()->get_data_type()->get_type(),
552                         se->get_op()
553                 );
554         case SE_COLREF:
555                 {
556                         col_id cid(se->get_colref() );
557                         if(tcol.count(cid) > 0){ return tcol[cid];
558                         }else{ return varying_t;}
559                 }
560         case SE_AGGR_STAR:
561         case SE_AGGR_SE:
562         case SE_FUNC:
563         default:
564                 return varying_t;
565         }
566         return(varying_t);
567 }
568
569
570
571 //                      verify_colref assigned data types to the column refs.
572 //                      Now assign data types to all other nodes in the
573 //                      scalar expression.
574 //
575 //                      return -1 on error
576
577 int assign_data_types(scalarexp_t *se, table_list *schema,
578                                                 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
579         int l_ret, r_ret;
580         data_type *dt;
581         bool bret;
582         vector<scalarexp_t *> operands;
583         vector<data_type *> odt;
584         int o, fcn_id;
585         vector<bool> handle_ind;
586         vector<bool> constant_ind;
587
588         switch(se->get_operator_type()){
589         case SE_LITERAL:
590                 dt = new data_type( se->get_literal()->get_type() );
591                 se->set_data_type(dt);
592                 if( ! dt->is_defined() ){
593                         fprintf(stderr,"ERROR, Literal type is undefined, line =%d, char = %d, literal=%s\n",
594                                 se->get_literal()->get_lineno(),se->get_literal()->get_charno(), se->get_literal()->to_string().c_str() );
595                         return(-1);
596                 }else{
597                         return(1);
598                 }
599         case SE_PARAM:
600                 {
601                         string pname = se->get_param_name();
602                         dt = param_tbl->get_data_type(pname);
603 // A SE_PARRAM can change its value mid-query so using one
604 // to set a window is dangerous.  TODO check for this and issue a warning.
605                         dt->set_temporal(constant_t);
606                         se->set_data_type(dt);
607                         if( ! dt->is_defined() ){
608                                 fprintf(stderr,"ERROR, parameter %s has undefined type, line =%d, char = %d\n",
609                                         pname.c_str(), se->get_lineno(),se->get_charno() );
610                                 return(-1);
611                         }
612                         return(1);
613                 }
614         case SE_IFACE_PARAM:
615                 dt = new data_type( "STRING" );
616                 se->set_data_type(dt);
617                 return(1);
618         case SE_UNARY_OP:
619                 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
620                 if(l_ret < 0) return -1;
621
622                 dt = new data_type(se->get_left_se()->get_data_type(),se->get_op() );
623                 se->set_data_type(dt);
624                 if( ! dt->is_defined() ){
625                         fprintf(stderr,"ERROR, unary operator %s not defined for type %s, line=%d, char = %d\n",
626                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
627                                 se->get_lineno(), se->get_charno() );
628                         return(-1);
629                 }else{
630                         return(1);
631                 }
632         case SE_BINARY_OP:
633                 l_ret = assign_data_types(se->get_left_se(),  schema, fta_tree, Ext_fcns);
634                 r_ret = assign_data_types(se->get_right_se(),  schema, fta_tree, Ext_fcns);
635                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
636
637                 dt = new data_type(se->get_left_se()->get_data_type(),se->get_right_se()->get_data_type(),se->get_op() );
638                 se->set_data_type(dt);
639                 if( ! dt->is_defined() ){
640                         fprintf(stderr,"ERROR, Binary operator %s not defined for type %s, %s line=%d, char = %d\n",
641                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
642                                 se->get_right_se()->get_data_type()->to_string().c_str(),
643                                 se->get_lineno(), se->get_charno() );
644                         return(-1);
645                 }else{
646                         return(1);
647                 }
648         case SE_COLREF:
649                 dt = se->get_data_type();
650                 bret = dt->is_defined();
651                 if( bret ){
652                         return(1);
653                 }else{
654                         fprintf(stderr,"ERROR, column reference type  is undefined, line =%d, char = %d, colref=%s\n",
655                                 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
656                         return(-1);
657                 }
658         case SE_AGGR_STAR:
659                 dt = new data_type("Int");      // changed Uint to Int
660                 se->set_data_type(dt);
661                 return(1);
662         case SE_AGGR_SE:
663                 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
664                 if(l_ret < 0) return -1;
665
666                 dt = new data_type();
667                 dt->set_aggr_data_type(se->get_op(), se->get_left_se()->get_data_type());
668                 se->set_data_type(dt);
669
670                 if( ! dt->is_defined() ){
671                         fprintf(stderr,"ERROR, aggregate %s not defined for type %s, line=%d, char = %d\n",
672                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
673                                 se->get_lineno(), se->get_charno() );
674                         return(-1);
675                 }else{
676                         return(1);
677                 }
678         case SE_FUNC:
679
680                 operands = se->get_operands();
681                 r_ret = 1;
682                 for(o=0;o<operands.size();o++){
683                         l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns);
684                         odt.push_back(operands[o]->get_data_type());
685                         if(l_ret < 0) r_ret = -1;
686                 }
687                 if(r_ret < 0) return(r_ret);
688
689 //                      Is it an aggregate extraction function?
690                 fcn_id = Ext_fcns->lookup_extr(se->get_op(), odt);
691                 if(fcn_id >= 0){
692                         int actual_fcn_id = Ext_fcns->get_actual_fcn_id(fcn_id);
693                         int subaggr_id = Ext_fcns->get_subaggr_id(fcn_id);
694                         int n_fcn_params = Ext_fcns->get_nparams(actual_fcn_id);
695 //                              Construct a se for the subaggregate.
696                         vector<scalarexp_t *> op_a;
697                         int n_aggr_oprs = operands.size()-n_fcn_params+1;
698                         for(o=0;o<n_aggr_oprs;++o){
699                                         op_a.push_back(operands[o]);
700                         }
701 //                              check handle params
702                         vector<bool> handle_a = Ext_fcns->get_handle_indicators(subaggr_id);
703                         for(o=0;o<op_a.size();o++){
704                         if(handle_a[o]){
705                                 if(op_a[o]->get_operator_type() != SE_LITERAL &&
706                                                 op_a[o]->get_operator_type() != SE_IFACE_PARAM &&
707                                                 op_a[o]->get_operator_type() != SE_PARAM){
708                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
709                                 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
710                                                 return(-1);
711                                         }
712                                 }
713                         }
714                         vector<bool> is_const_a=Ext_fcns->get_const_indicators(subaggr_id);
715                         for(o=0;o<op_a.size();o++){
716                         if(is_const_a[o]){
717                                 if(op_a[o]->get_data_type()->get_temporal() != constant_t){
718                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be constant.\n  Line=%d, char=%d.\n",
719                                 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
720                                                 return(-1);
721                                         }
722                                 }
723                         }
724
725                         scalarexp_t *se_a  = new scalarexp_t(Ext_fcns->get_fcn_name(subaggr_id).c_str(), op_a);
726                         se_a->set_fcn_id(subaggr_id);
727                         se_a->set_data_type(Ext_fcns->get_fcn_dt(subaggr_id));
728                         se_a->set_aggr_id(0);           // label this as a UDAF.
729
730
731 //                              Change this se to be the actual function
732                         vector<scalarexp_t *> op_f;
733                         op_f.push_back(se_a);
734                         for(o=n_aggr_oprs;o<operands.size();++o)
735                                 op_f.push_back(operands[o]);
736 //                              check handle params
737                         vector<bool> handle_f = Ext_fcns->get_handle_indicators(actual_fcn_id);
738                         for(o=0;o<op_f.size();o++){
739                         if(handle_f[o]){
740                                 if(op_f[o]->get_operator_type() != SE_LITERAL &&
741                                                 op_f[o]->get_operator_type() != SE_IFACE_PARAM &&
742                                                 op_f[o]->get_operator_type() != SE_PARAM){
743                                                 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
744                                 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
745                                                 return(-1);
746                                         }
747                                 }
748                         }
749                         vector<bool> is_const_f=Ext_fcns->get_const_indicators(actual_fcn_id);
750                         for(o=0;o<op_f.size();o++){
751                         if(is_const_f[o]){
752                                 if(op_f[o]->get_data_type()->get_temporal() != constant_t){
753                                                 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be constant.\n  Line=%d, char=%d.\n",
754                                 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
755                                                 return(-1);
756                                         }
757                                 }
758                         }
759
760                         se->param_list = op_f;
761                         se->op = Ext_fcns->get_fcn_name(actual_fcn_id);
762                         se->set_fcn_id(actual_fcn_id);
763                         se->set_data_type(Ext_fcns->get_fcn_dt(actual_fcn_id));
764                         return(1);
765                 }
766                 if(fcn_id == -2){
767                         fprintf(stderr,"Warning: multiple subsuming aggregate extractors found for %s\n",se->get_op().c_str());
768                 }
769
770 //                      Is it a UDAF?
771                 fcn_id = Ext_fcns->lookup_udaf(se->get_op(), odt);
772                 if(fcn_id >= 0){
773                         se->set_fcn_id(fcn_id);
774                         se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
775                         se->set_aggr_id(0);             // label this as a UDAF.
776 //                      Finally, verify that all HANDLE parameters are literals or params.
777                         handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
778                         for(o=0;o<operands.size();o++){
779                                 if(handle_ind[o]){
780                                         if(operands[o]->get_operator_type() != SE_LITERAL &&
781                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
782                                                 operands[o]->get_operator_type() != SE_PARAM){
783                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
784                                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
785                                                 return(-1);
786                                         }
787                                 }
788                         }
789                         constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
790                         for(o=0;o<operands.size();o++){
791                         if(constant_ind[o]){
792                                 if(operands[o]->get_data_type()->get_temporal() != constant_t){
793                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s  must be constant.\n  Line=%d, char=%d.\n",
794                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
795                                                 return(-1);
796                                         }
797                                 }
798                         }
799
800 //      UDAFS as superaggregates not yet supported.
801 if(se->is_superaggr()){
802 fprintf(stderr,"WARNING: UDAF superagggregates (%s) are not yet supported, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
803 se->set_superaggr(false);
804 }
805                         return(1);
806                 }
807                 if(fcn_id == -2){
808                         fprintf(stderr,"Warning: multiple subsuming UDAFs found for %s\n",se->get_op().c_str());
809                 }
810
811 //                      Is it a stateful fcn?
812                 fcn_id = Ext_fcns->lookup_sfun(se->get_op(), odt);
813                 if(fcn_id >= 0){
814                         se->set_fcn_id(fcn_id);
815                         se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
816                         se->set_storage_state(Ext_fcns->get_storage_state(fcn_id)); // label as sfun
817 //                      Finally, verify that all HANDLE parameters are literals or params.
818                         handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
819                         for(o=0;o<operands.size();o++){
820                                 if(handle_ind[o]){
821                                         if(operands[o]->get_operator_type() != SE_LITERAL &&
822                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
823                                                 operands[o]->get_operator_type() != SE_PARAM){
824                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
825                                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
826                                                 return(-1);
827                                         }
828                                 }
829                         }
830                         constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
831                         for(o=0;o<operands.size();o++){
832                         if(constant_ind[o]){
833                                 if(operands[o]->get_data_type()->get_temporal() != constant_t){
834                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s  must be constant.\n  Line=%d, char=%d.\n",
835                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
836                                                 return(-1);
837                                         }
838                                 }
839                         }
840
841                         if(se->is_superaggr()){
842                                 fprintf(stderr,"WARNING: stateful function %s cannot be marked as a superaggregate, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
843                         }
844                         return(1);
845                 }
846                 if(fcn_id == -2){
847                         fprintf(stderr,"Warning: multiple stateful fcns found for %s\n",se->get_op().c_str());
848                 }
849
850
851 //                      Is it a regular function?
852                 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), odt);
853                 if( fcn_id < 0 ){
854                         fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
855                         for(o=0;o<operands.size();o++){
856                                 if(o>0) fprintf(stderr,", ");
857                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
858                         }
859                         fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
860                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
861
862                         return(-1);
863                 }
864
865                 se->set_fcn_id(fcn_id);
866                 dt = Ext_fcns->get_fcn_dt(fcn_id);
867
868                 if(! dt->is_defined() ){
869                         fprintf(stderr,"ERROR, external function %s(",se->get_op().c_str());
870                         for(o=0;o<operands.size();o++){
871                                 if(o>0) fprintf(stderr,", ");
872                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
873                         }
874                         fprintf(stderr,") has undefined type, line %d, char %d\n", se->get_lineno(), se->get_charno() );
875                         return(-1);
876                 }
877
878 //                      Finally, verify that all HANDLE parameters are literals or params.
879                 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
880                 for(o=0;o<operands.size();o++){
881                         if(handle_ind[o]){
882                                 if(operands[o]->get_operator_type() != SE_LITERAL &&
883                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
884                                                 operands[o]->get_operator_type() != SE_PARAM){
885                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
886                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
887                                         return(-1);
888                                 }
889                         }
890                 }
891                 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
892                 for(o=0;o<operands.size();o++){
893                 if(constant_ind[o]){
894                         if(operands[o]->get_data_type()->get_temporal() != constant_t){
895                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s  must be constant.\n  Line=%d, char=%d.\n",
896                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
897                                         return(-1);
898                                 }
899                         }
900                 }
901
902
903                 if(se->is_superaggr()){
904                         fprintf(stderr,"WARNING: function %s cannot be marked as a superaggregate, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
905                 }
906
907                 se->set_data_type(dt);
908                 return(1);
909         default:
910                 fprintf(stderr,"INTERNAL ERROR in assign_data_types, line %d, character %d: unknown operator type %d\n",
911                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
912                 return(-1);
913         }
914         return(-1);
915 }
916
917
918 int assign_predicate_data_types(predicate_t *pr, table_list *schema,
919                                                         table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
920         int l_ret, r_ret;
921         int i;
922         data_type *dt, *dtl;
923         vector<data_type *> odt;
924         vector<literal_t *> litl;
925         vector<scalarexp_t *> operands;
926         vector<bool> handle_ind;
927         vector<bool> constant_ind;
928         int o, fcn_id;
929
930         switch(pr->get_operator_type()){
931         case PRED_IN:
932                 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set);
933                 litl = pr->get_lit_vec();
934                 dt = pr->get_left_se()->get_data_type();
935
936                 for(i=0;i<litl.size();i++){
937                         dtl = new data_type( litl[i]->get_type() );
938                         if( ! dt->is_comparable(dtl,pr->get_op()) ){
939                                 fprintf(stderr,"ERROR line %d, char %d: IS_IN types must be comparable (lhs type is %s, rhs type is %s).\n",
940                                         litl[i]->get_lineno(), litl[i]->get_charno(), dt->to_string().c_str(),dtl->to_string().c_str() );
941                                 delete dtl;
942                                 return(-1);
943                         }
944                         delete dtl;
945                 }
946                 return(1);
947         case PRED_COMPARE:
948                 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
949                 r_ret = assign_data_types(pr->get_right_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
950                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
951
952                 if( !(pr->get_left_se()->get_data_type()->is_comparable(pr->get_right_se()->get_data_type(), pr->get_op() ) )){
953                         fprintf(stderr,"ERROR line %d, char %d, operands of comparison must have comparable types (%s %s %s).\n",
954                                 pr->get_lineno(), pr->get_charno(), pr->get_left_se()->get_data_type()->to_string().c_str(),
955                                  pr->get_right_se()->get_data_type()->to_string().c_str(), pr->get_op().c_str() );
956                         return(-1);
957                 }else{
958                         return(1);
959                 }
960         case PRED_UNARY_OP:
961                 return(assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns)); // , ext_fcn_set));
962         case PRED_BINARY_OP:
963                 l_ret = assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
964                 r_ret = assign_predicate_data_types(pr->get_right_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
965                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
966                 return(1);
967         case PRED_FUNC:
968                 operands = pr->get_op_list();
969                 r_ret = 1;
970                 for(o=0;o<operands.size();o++){
971                         l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns); // , ext_fcn_set);
972                         odt.push_back(operands[o]->get_data_type());
973                         if(l_ret < 0) r_ret = -1;
974                 }
975                 if(r_ret < 0) return(r_ret);
976
977                 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), odt);
978                 if( fcn_id < 0 ){
979                         fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
980                         for(o=0;o<operands.size();o++){
981                                 if(o>0) fprintf(stderr,", ");
982                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
983                         }
984                         fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
985                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
986                         return(-1);
987                 }
988
989 //              ext_fcn_set.insert(fcn_id);
990                 pr->set_fcn_id(fcn_id);
991
992 //                      Finally, verify that all HANDLE parameters are literals or params.
993                 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
994                 for(o=0;o<operands.size();o++){
995                         if(handle_ind[o]){
996                                 if(operands[o]->get_operator_type() != SE_LITERAL &&
997                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
998                                                 operands[o]->get_operator_type() != SE_PARAM){
999                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
1000                                 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
1001                                         exit(1);
1002                                 }
1003                         }
1004                 }
1005                 constant_ind = Ext_fcns->get_const_indicators(pr->get_fcn_id());
1006                 for(o=0;o<operands.size();o++){
1007                 if(constant_ind[o]){
1008                         if(operands[o]->get_data_type()->get_temporal() != constant_t){
1009                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s  must be constant.\n  Line=%d, char=%d.\n",
1010                         o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
1011                                         exit(1);
1012                                 }
1013                         }
1014                 }
1015
1016
1017 //                      Check if this predicate function is special sampling function
1018                 pr->is_sampling_fcn = Ext_fcns->is_sampling_fcn(pr->get_fcn_id());
1019
1020
1021                 return(l_ret);
1022         default:
1023                 fprintf(stderr,"INTERNAL ERROR in assign_predicate_data_types, line %d, character %d, unknown predicate operator type %d\n",
1024                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1025         }
1026
1027         return(-1);
1028 }
1029
1030
1031
1032 /////////////////////////////////////////////////////////////////////
1033 ////////////////                Make a deep copy of a se / pred tree
1034 /////////////////////////////////////////////////////////////////////
1035
1036
1037 //              duplicate a select element
1038 select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl){
1039         return new select_element(dup_se(sl->se,aggr_tbl),sl->name.c_str());
1040 }
1041
1042 //              duplicate a scalar expression.
1043 scalarexp_t *dup_se(scalarexp_t *se,
1044                                   aggregate_table *aggr_tbl
1045                                  ){
1046   int p;
1047   vector<scalarexp_t *> operand_list;
1048   vector<data_type *> dt_signature;
1049   scalarexp_t *ret_se, *l_se, *r_se;
1050
1051   switch(se->get_operator_type()){
1052     case SE_LITERAL:
1053                 ret_se = new scalarexp_t(se->get_literal());
1054                 ret_se->use_decorations_of(se);
1055                 return(ret_se);
1056
1057     case SE_PARAM:
1058                 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1059                 ret_se->use_decorations_of(se);
1060                 return(ret_se);
1061
1062     case SE_IFACE_PARAM:
1063                 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1064                 ret_se->use_decorations_of(se);
1065                 return(ret_se);
1066
1067     case SE_COLREF:
1068                 ret_se = new scalarexp_t(se->get_colref()->duplicate());
1069                 ret_se->rhs.scalarp = se->rhs.scalarp;  // carry along notation
1070                 ret_se->use_decorations_of(se);
1071                 return(ret_se);
1072
1073     case SE_UNARY_OP:
1074                 l_se = dup_se(se->get_left_se(),  aggr_tbl);
1075                 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1076                 ret_se->use_decorations_of(se);
1077                 return(ret_se);
1078
1079     case SE_BINARY_OP:
1080                 l_se = dup_se(se->get_left_se(), aggr_tbl);
1081                 r_se = dup_se(se->get_right_se(), aggr_tbl);
1082
1083                 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1084                 ret_se->use_decorations_of(se);
1085
1086                 return(ret_se);
1087
1088     case SE_AGGR_STAR:
1089                 ret_se = scalarexp_t::make_star_aggr(se->get_op().c_str());
1090                 ret_se->use_decorations_of(se);
1091                 return(ret_se);
1092
1093     case SE_AGGR_SE:
1094                 l_se = dup_se(se->get_left_se(),  aggr_tbl);
1095                 ret_se = scalarexp_t::make_se_aggr(se->get_op().c_str(), l_se);
1096                 ret_se->use_decorations_of(se);
1097                 return(ret_se);
1098
1099         case SE_FUNC:
1100                 {
1101                         operand_list = se->get_operands();
1102                         vector<scalarexp_t *> new_operands;
1103                         for(p=0;p<operand_list.size();p++){
1104                                 l_se = dup_se(operand_list[p], aggr_tbl);
1105                                 new_operands.push_back(l_se);
1106                         }
1107
1108                         ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1109                         ret_se->use_decorations_of(se);
1110                         return(ret_se);
1111                 }
1112
1113         default:
1114                 printf("INTERNAL ERROR in dup_se: operator type %d\n",se->get_operator_type());
1115                 exit(1);
1116         break;
1117   }
1118   return(NULL);
1119
1120 }
1121
1122
1123
1124 predicate_t *dup_pr(predicate_t *pr,
1125                                                  aggregate_table *aggr_tbl
1126                                                  ){
1127
1128   vector<literal_t *> llist;
1129   scalarexp_t *se_l, *se_r;
1130   predicate_t *pr_l, *pr_r, *ret_pr;
1131   vector<scalarexp_t *> op_list, new_op_list;
1132   int o;
1133
1134
1135         switch(pr->get_operator_type()){
1136         case PRED_IN:
1137                 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1138                 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1139                 return(ret_pr);
1140
1141         case PRED_COMPARE:
1142                 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1143                 se_r = dup_se(pr->get_right_se(),  aggr_tbl);
1144                 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1145                 return(ret_pr);
1146
1147         case PRED_UNARY_OP:
1148                 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1149                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1150                 return(ret_pr);
1151
1152         case PRED_BINARY_OP:
1153                 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1154                 pr_r = dup_pr(pr->get_right_pr(), aggr_tbl);
1155                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1156                 return(ret_pr);
1157         case PRED_FUNC:
1158                 op_list = pr->get_op_list();
1159                 for(o=0;o<op_list.size();++o){
1160                         se_l = dup_se(op_list[o], aggr_tbl);
1161                         new_op_list.push_back(se_l);
1162                 }
1163                 ret_pr=  new predicate_t(pr->get_op().c_str(), new_op_list);
1164                 ret_pr->set_fcn_id(pr->get_fcn_id());
1165                 ret_pr->is_sampling_fcn = pr->is_sampling_fcn;
1166                 return(ret_pr);
1167
1168         default:
1169                 fprintf(stderr,"INTERNAL ERROR in dup_pr, line %d, character %d, unknown predicate operator type %d\n",
1170                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1171                 exit(1);
1172         }
1173
1174         return(0);
1175
1176 }
1177
1178 table_exp_t *dup_table_exp(table_exp_t *te){
1179         int i;
1180         table_exp_t *ret = new table_exp_t();
1181
1182         ret->query_type = te->query_type;
1183
1184         ss_map::iterator ss_i;
1185         for(ss_i=te->nmap.begin();ss_i!=te->nmap.end();++ss_i){
1186                 ret->nmap[(*ss_i).first] = (*ss_i).second;
1187         }
1188
1189         for(i=0;i<te->query_params.size();++i){
1190                 ret->query_params.push_back(new
1191                  var_pair_t(te->query_params[i]->name,te->query_params[i]->val) );
1192         }
1193
1194         if(te->sl){
1195                 ret->sl = new select_list_t();
1196                 ret->sl->lineno = te->sl->lineno; ret->sl->charno = te->sl->charno;
1197                 vector<select_element *> select_list = te->sl->get_select_list();
1198                 for(i=0;i<select_list.size();++i){
1199                         scalarexp_t *se = dup_se(select_list[i]->se,NULL);
1200                         ret->sl->append(se,select_list[i]->name);
1201                 }
1202         }
1203
1204         ret->fm = te->fm->duplicate();
1205
1206         if(te->wh) ret->wh = dup_pr(te->wh,NULL);
1207         if(te->hv) ret->hv = dup_pr(te->hv,NULL);
1208         if(te->cleaning_when) ret->cleaning_when = dup_pr(te->cleaning_when,NULL);
1209         if(te->cleaning_by) ret->cleaning_by = dup_pr(te->cleaning_by,NULL);
1210         if(te->closing_when) ret->closing_when = dup_pr(te->closing_when,NULL);
1211
1212         for(i=0;i<te->gb.size();++i){
1213                 extended_gb_t *tmp_g =  te->gb[i]->duplicate();
1214                 ret->gb.push_back(tmp_g);
1215         }
1216
1217         ret->mergevars = te->mergevars;
1218         if(te->slack)
1219                 ret->slack = dup_se(te->slack,NULL);
1220         ret->lineno = te->lineno;
1221         ret->charno = te->charno;
1222
1223         return(ret);
1224 }
1225
1226
1227
1228
1229
1230
1231
1232 /////////////////////////////////////////////////////////////////////////
1233 //                      Bind colrefs to a member of their FROM list
1234
1235 void bind_colref_se(scalarexp_t *se,
1236                                   vector<tablevar_t *> &fm,
1237                                   int prev_ref, int new_ref
1238                                  ){
1239   int p;
1240   vector<scalarexp_t *> operand_list;
1241   colref_t *cr;
1242   ifpref_t *ir;
1243
1244   switch(se->get_operator_type()){
1245     case SE_LITERAL:
1246     case SE_PARAM:
1247                 return;
1248     case SE_IFACE_PARAM:
1249                 ir = se->get_ifpref();
1250                 if(ir->get_tablevar_ref() == prev_ref){
1251                         ir->set_tablevar_ref(new_ref);
1252                         ir->set_tablevar(fm[new_ref]->get_var_name());
1253                 }
1254                 return;
1255
1256     case SE_COLREF:
1257                 cr=se->get_colref();
1258                 if(cr->get_tablevar_ref() == prev_ref){
1259                         cr->set_tablevar_ref(new_ref);
1260 //                      cr->set_interface(fm[new_ref]->get_interface());
1261                         cr->set_table_name(fm[new_ref]->get_var_name());
1262                 }
1263                 return;
1264
1265     case SE_UNARY_OP:
1266                 bind_colref_se(se->get_left_se(),  fm, prev_ref, new_ref);
1267                 return;
1268
1269     case SE_BINARY_OP:
1270                 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);
1271                 bind_colref_se(se->get_right_se(),  fm, prev_ref, new_ref);
1272                 return;
1273
1274     case SE_AGGR_STAR:
1275     case SE_AGGR_SE:
1276                 return;
1277
1278         case SE_FUNC:
1279                 if(se->get_aggr_ref() >= 0) return;
1280
1281                 operand_list = se->get_operands();
1282                 for(p=0;p<operand_list.size();p++){
1283                         bind_colref_se(operand_list[p], fm, prev_ref, new_ref);
1284                 }
1285                 return;
1286
1287         default:
1288                 printf("INTERNAL ERROR in bind_colref_se: operator type %d\n",se->get_operator_type());
1289                 exit(1);
1290         break;
1291   }
1292   return;
1293
1294 }
1295
1296
1297
1298
1299 void bind_colref_pr(predicate_t *pr,
1300                                   vector<tablevar_t *> &fm,
1301                                   int prev_ref, int new_ref
1302                                  ){
1303   vector<scalarexp_t *> op_list;
1304   int o;
1305
1306         switch(pr->get_operator_type()){
1307         case PRED_IN:
1308                 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1309                 return;
1310
1311         case PRED_COMPARE:
1312                 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1313                 bind_colref_se(pr->get_right_se(),  fm, prev_ref, new_ref);
1314                 return;
1315
1316         case PRED_UNARY_OP:
1317                 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1318                 return;
1319
1320         case PRED_BINARY_OP:
1321                 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1322                 bind_colref_pr(pr->get_right_pr(), fm, prev_ref, new_ref);
1323                 return;
1324         case PRED_FUNC:
1325                 op_list = pr->get_op_list();
1326                 for(o=0;o<op_list.size();++o){
1327                         bind_colref_se(op_list[o], fm, prev_ref, new_ref);
1328                 }
1329                 return;
1330
1331         default:
1332                 fprintf(stderr,"INTERNAL ERROR in bind_colref_pr, line %d, character %d, unknown predicate operator type %d\n",
1333                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1334                 exit(1);
1335         }
1336
1337         return;
1338
1339 }
1340
1341
1342 /////////////////////////////////////////////////////////////////////
1343 //              verify that the se refs only literals and params.
1344 //          (use to verify that the expression should stay in the hfta
1345 //               during a split)
1346 /////////////////////////////////////////////////////////////////////
1347
1348 bool is_literal_or_param_only(scalarexp_t *se){
1349         int o;
1350         vector<scalarexp_t *> operands;
1351         bool sum = true;
1352
1353         if(se == NULL) return(true);
1354
1355         switch(se->get_operator_type()){
1356         case SE_LITERAL:
1357         case SE_PARAM:
1358                 return(true);
1359         case SE_IFACE_PARAM:
1360                 return(false);          // need to treat as colref
1361         case SE_UNARY_OP:
1362                 return(is_literal_or_param_only(se->get_left_se()) );
1363         case SE_BINARY_OP:
1364                 return(
1365                         is_literal_or_param_only(se->get_left_se()) &&
1366                         is_literal_or_param_only(se->get_right_se())
1367                         );
1368         case SE_COLREF:
1369                 return(false);
1370         case SE_AGGR_STAR:
1371         case SE_AGGR_SE:
1372                 return(false);
1373         case SE_FUNC:
1374 //                      The fcn might have special meaning at the lfta ...
1375                 return(false);
1376
1377         default:
1378                 fprintf(stderr,"INTERNAL ERROR in is_literal_or_param_only, line %d, character %d: unknown operator type %d\n",
1379                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1380                 exit(1);
1381         }
1382         return(0);
1383 }
1384
1385
1386
1387 /////////////////////////////////////////////////////////////////////
1388 //              Search for gb refs.
1389 //          (use to verify that no gbrefs in a gb def.)
1390 /////////////////////////////////////////////////////////////////////
1391
1392
1393 int count_gb_se(scalarexp_t *se){
1394         int o;
1395         vector<scalarexp_t *> operands;
1396         int sum = 0;
1397
1398         if(se == NULL) return(0);
1399
1400         switch(se->get_operator_type()){
1401         case SE_LITERAL:
1402         case SE_PARAM:
1403         case SE_IFACE_PARAM:
1404                 return(0);
1405         case SE_UNARY_OP:
1406                 return(count_gb_se(se->get_left_se()) );
1407         case SE_BINARY_OP:
1408                 return(
1409                         count_gb_se(se->get_left_se()) +
1410                         count_gb_se(se->get_right_se())
1411                         );
1412         case SE_COLREF:
1413                 if(se->get_gb_ref() < 0) return(0);
1414                 return(1);
1415         case SE_AGGR_STAR:
1416         case SE_AGGR_SE:
1417                 return(0);
1418         case SE_FUNC:
1419                 operands = se->get_operands();
1420                 for(o=0;o<operands.size();o++){
1421                         sum +=  count_gb_se(operands[o]);
1422                 }
1423                 return(sum);
1424
1425         default:
1426                 fprintf(stderr,"INTERNAL ERROR in count_gb_se, line %d, character %d: unknown operator type %d\n",
1427                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1428                 exit(1);
1429         }
1430         return(0);
1431 }
1432
1433
1434 /////////////////////////////////////////////////////////////////////
1435 ////////////////                Search for stateful fcns.
1436 /////////////////////////////////////////////////////////////////////
1437
1438
1439 int se_refs_sfun(scalarexp_t *se){
1440         int o;
1441         vector<scalarexp_t *> operands;
1442         int sum = 0;
1443
1444         if(se == NULL) return(0);
1445
1446         switch(se->get_operator_type()){
1447         case SE_LITERAL:
1448         case SE_PARAM:
1449         case SE_IFACE_PARAM:
1450                 return(0);
1451         case SE_UNARY_OP:
1452                 return(se_refs_sfun(se->get_left_se()) );
1453         case SE_BINARY_OP:
1454                 return(
1455                         se_refs_sfun(se->get_left_se()) +
1456                         se_refs_sfun(se->get_right_se())
1457                         );
1458         case SE_COLREF:
1459                 return(0);
1460         case SE_AGGR_STAR:
1461         case SE_AGGR_SE:
1462                 return(0);
1463         case SE_FUNC:
1464                 operands = se->get_operands();
1465                 for(o=0;o<operands.size();o++){
1466                         sum +=  se_refs_sfun(operands[o]);
1467                 }
1468                 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1469
1470 //                      for now, stateful functions count as aggregates.
1471                 if(se->get_storage_state() != "")
1472                         sum++;
1473
1474                 return(sum);
1475
1476         default:
1477                 fprintf(stderr,"INTERNAL ERROR in se_refs_sfun, line %d, character %d: unknown operator type %d\n",
1478                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1479                 exit(1);
1480         }
1481         return(0);
1482 }
1483
1484
1485 //              Return a count of the number of stateful fcns in this predicate.
1486 int pred_refs_sfun(predicate_t *pr){
1487         vector<scalarexp_t *> op_list;
1488         int o, aggr_sum;
1489
1490         switch(pr->get_operator_type()){
1491         case PRED_IN:
1492                 return(se_refs_sfun(pr->get_left_se()) );
1493         case PRED_COMPARE:
1494                 return(
1495                         se_refs_sfun(pr->get_left_se()) +
1496                         se_refs_sfun(pr->get_right_se())
1497                 );
1498         case PRED_UNARY_OP:
1499                 return(pred_refs_sfun(pr->get_left_pr()) );
1500         case PRED_BINARY_OP:
1501                 return(
1502                         pred_refs_sfun(pr->get_left_pr()) +
1503                         pred_refs_sfun(pr->get_right_pr())
1504                 );
1505         case PRED_FUNC:
1506                 op_list = pr->get_op_list();
1507                 aggr_sum = 0;
1508                 for(o=0;o<op_list.size();++o){
1509                         aggr_sum += se_refs_sfun(op_list[o]);
1510                 }
1511                 return(aggr_sum);
1512
1513         default:
1514                 fprintf(stderr,"INTERNAL ERROR in pred_refs_sfun, line %d, character %d, unknown predicate operator type %d\n",
1515                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1516                 exit(1);
1517         }
1518
1519         return(0);
1520 }
1521
1522 //////////////////////////////////////////////////
1523
1524 /////////////////////////////////////////////////////////////////////
1525 ////////////////                Search for aggregates.
1526 /////////////////////////////////////////////////////////////////////
1527
1528
1529 int count_aggr_se(scalarexp_t *se, bool strict){
1530         int o;
1531         vector<scalarexp_t *> operands;
1532         int sum = 0;
1533
1534         if(se == NULL) return(0);
1535
1536         switch(se->get_operator_type()){
1537         case SE_LITERAL:
1538         case SE_PARAM:
1539         case SE_IFACE_PARAM:
1540                 return(0);
1541         case SE_UNARY_OP:
1542                 return(count_aggr_se(se->get_left_se(), strict) );
1543         case SE_BINARY_OP:
1544                 return(
1545                         count_aggr_se(se->get_left_se(), strict) +
1546                         count_aggr_se(se->get_right_se(), strict)
1547                         );
1548         case SE_COLREF:
1549                 return(0);
1550         case SE_AGGR_STAR:
1551         case SE_AGGR_SE:
1552                 return(1);
1553         case SE_FUNC:
1554                 operands = se->get_operands();
1555                 for(o=0;o<operands.size();o++){
1556                         sum +=  count_aggr_se(operands[o], strict);
1557                 }
1558                 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1559
1560 //                      now, stateful functions can count as aggregates.
1561 //                      if we are being strict.
1562                 if(! strict && se->get_storage_state() != "")
1563                         sum++;
1564
1565                 return(sum);
1566
1567         default:
1568                 fprintf(stderr,"INTERNAL ERROR in count_aggr_se, line %d, character %d: unknown operator type %d\n",
1569                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1570                 exit(1);
1571         }
1572         return(0);
1573 }
1574
1575
1576 //              Return a count of the number of aggregate fcns in this predicate.
1577 int count_aggr_pred(predicate_t *pr, bool strict){
1578         vector<scalarexp_t *> op_list;
1579         int o, aggr_sum;
1580
1581         switch(pr->get_operator_type()){
1582         case PRED_IN:
1583                 return(count_aggr_se(pr->get_left_se(), strict) );
1584         case PRED_COMPARE:
1585                 return(
1586                         count_aggr_se(pr->get_left_se(), strict) +
1587                         count_aggr_se(pr->get_right_se(), strict)
1588                 );
1589         case PRED_UNARY_OP:
1590                 return(count_aggr_pred(pr->get_left_pr(), strict) );
1591         case PRED_BINARY_OP:
1592                 return(
1593                         count_aggr_pred(pr->get_left_pr(), strict) +
1594                         count_aggr_pred(pr->get_right_pr(), strict)
1595                 );
1596         case PRED_FUNC:
1597                 op_list = pr->get_op_list();
1598                 aggr_sum = 0;
1599                 for(o=0;o<op_list.size();++o){
1600                         aggr_sum += count_aggr_se(op_list[o], strict);
1601                 }
1602                 return(aggr_sum);
1603
1604         default:
1605                 fprintf(stderr,"INTERNAL ERROR in count_aggr_pred, line %d, character %d, unknown predicate operator type %d\n",
1606                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1607                 exit(1);
1608         }
1609
1610         return(0);
1611 }
1612
1613 //////////////////////////////////////////////////
1614 ///             Analyze tablevar refs
1615
1616 void get_tablevar_ref_se(scalarexp_t *se, vector<int> &reflist){
1617         int o;
1618         vector<scalarexp_t *> operands;
1619         int vref;
1620         colref_t *cr;
1621         ifpref_t *ir;
1622
1623         if(se == NULL) return;
1624
1625         switch(se->get_operator_type()){
1626         case SE_LITERAL:
1627         case SE_PARAM:
1628                 return;
1629         case SE_IFACE_PARAM:
1630                 ir = se->get_ifpref();
1631                 vref = ir->get_tablevar_ref();
1632                 for(o=0;o<reflist.size();++o){
1633                         if(vref == reflist[o]) return;
1634                 }
1635                 reflist.push_back(vref);
1636                 return;
1637         case SE_UNARY_OP:
1638                 get_tablevar_ref_se(se->get_left_se(), reflist);
1639                 return;
1640         case SE_BINARY_OP:
1641                 get_tablevar_ref_se(se->get_left_se(), reflist);
1642                 get_tablevar_ref_se(se->get_right_se(), reflist);
1643                 return;
1644         case SE_COLREF:
1645                 if(se->is_gb()) return;
1646                 cr = se->get_colref();
1647                 vref = cr->get_tablevar_ref();
1648                 for(o=0;o<reflist.size();++o){
1649                         if(vref == reflist[o]) return;
1650                 }
1651                 reflist.push_back(vref);
1652                 return;
1653         case SE_AGGR_STAR:
1654         case SE_AGGR_SE:
1655                 return;
1656         case SE_FUNC:
1657                 if(se->get_aggr_ref() >= 0) return;
1658
1659                 operands = se->get_operands();
1660                 for(o=0;o<operands.size();o++){
1661                         get_tablevar_ref_se(operands[o], reflist);
1662                 }
1663                 return;
1664
1665         default:
1666                 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_se, line %d, character %d: unknown operator type %d\n",
1667                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1668                 exit(1);
1669         }
1670         return;
1671 }
1672
1673
1674 void get_tablevar_ref_pr(predicate_t *pr, vector<int> &reflist){
1675         vector<scalarexp_t *> op_list;
1676         int o;
1677
1678         switch(pr->get_operator_type()){
1679         case PRED_IN:
1680                 get_tablevar_ref_se(pr->get_left_se(),reflist);
1681                 return;
1682         case PRED_COMPARE:
1683                 get_tablevar_ref_se(pr->get_left_se(),reflist);
1684                 get_tablevar_ref_se(pr->get_right_se(),reflist);
1685                 return;
1686         case PRED_UNARY_OP:
1687                 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1688                 return;
1689         case PRED_BINARY_OP:
1690                 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1691                 get_tablevar_ref_pr(pr->get_right_pr(),reflist);
1692                 return;
1693         case PRED_FUNC:
1694                 op_list = pr->get_op_list();
1695                 for(o=0;o<op_list.size();++o){
1696                         get_tablevar_ref_se(op_list[o],reflist);
1697                 }
1698                 return;
1699         default:
1700                 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_pr, line %d, character %d, unknown predicate operator type %d\n",
1701                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1702         }
1703
1704         return;
1705 }
1706
1707
1708 //                      Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1709
1710 void gather_fcn_states_se(scalarexp_t *se, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1711         int agg_id;
1712         int o;
1713         vector<scalarexp_t *> operands;
1714
1715         switch(se->get_operator_type()){
1716         case SE_LITERAL:
1717         case SE_PARAM:
1718         case SE_IFACE_PARAM:
1719                 return;
1720         case SE_UNARY_OP:
1721                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns) ;
1722                 return;
1723         case SE_BINARY_OP:
1724                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1725                 gather_fcn_states_se(se->get_right_se(), states_refd,Ext_fcns);
1726                 return;
1727         case SE_COLREF:
1728                 return;
1729         case SE_AGGR_STAR:
1730                 return;
1731         case SE_AGGR_SE:
1732                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1733                 return;
1734         case SE_FUNC:
1735                 operands = se->get_operands();
1736                 for(o=0;o<operands.size();o++){
1737                         gather_fcn_states_se(operands[o], states_refd, Ext_fcns);
1738                 }
1739                 if(se->get_storage_state() != ""){
1740                         states_refd.insert(se->get_storage_state());
1741                 }
1742                 return;
1743
1744         default:
1745                 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_se, line %d, character %d: unknown operator type %d\n",
1746                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1747                 exit(1);
1748         }
1749         return;
1750 }
1751
1752
1753 //                      Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1754
1755 void gather_fcn_states_pr(predicate_t *pr, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1756         vector<scalarexp_t *> op_list;
1757         int o;
1758
1759         switch(pr->get_operator_type()){
1760         case PRED_IN:
1761                 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1762                 return;
1763         case PRED_COMPARE:
1764                 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1765                 gather_fcn_states_se(pr->get_right_se(),states_refd, Ext_fcns) ;
1766                 return;
1767         case PRED_UNARY_OP:
1768                 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns);
1769                 return;
1770         case PRED_BINARY_OP:
1771                 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns) ;
1772                 gather_fcn_states_pr(pr->get_right_pr(),states_refd, Ext_fcns) ;
1773                 return;
1774         case PRED_FUNC:
1775                 op_list = pr->get_op_list();
1776                 for(o=0;o<op_list.size();++o){
1777                         gather_fcn_states_se(op_list[o],states_refd, Ext_fcns);
1778                 }
1779                 return;
1780
1781         default:
1782                 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_pr, line %d, character %d, unknown predicate operator type %d\n",
1783                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1784                 exit(1);
1785         }
1786
1787         return;
1788 }
1789
1790
1791
1792
1793 //                      walk se tree and collect aggregates into aggregate table.
1794 //                      duplicate aggregates receive the same idx to the table.
1795
1796 void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *aggregate_table, ext_fcn_list *Ext_fcns){
1797         int agg_id;
1798         int o;
1799         vector<scalarexp_t *> operands;
1800
1801         switch(se->get_operator_type()){
1802         case SE_LITERAL:
1803         case SE_PARAM:
1804         case SE_IFACE_PARAM:
1805                 return;
1806         case SE_UNARY_OP:
1807                 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns) ;
1808                 return;
1809         case SE_BINARY_OP:
1810                 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns);
1811                 build_aggr_tbl_fm_se(se->get_right_se(), aggregate_table,Ext_fcns);
1812                 return;
1813         case SE_COLREF:
1814                 return;
1815         case SE_AGGR_STAR:
1816                 agg_id = aggregate_table->add_aggr(se->get_op(),NULL,se->is_superaggr());
1817                 se->set_aggr_id(agg_id);
1818                 return;
1819         case SE_AGGR_SE:
1820                 agg_id = aggregate_table->add_aggr(se->get_op(),se->get_left_se(),se->is_superaggr());
1821                 se->set_aggr_id(agg_id);
1822                 return;
1823         case SE_FUNC:
1824                 operands = se->get_operands();
1825                 for(o=0;o<operands.size();o++){
1826                         build_aggr_tbl_fm_se(operands[o], aggregate_table, Ext_fcns);
1827                 }
1828                 if(se->get_aggr_ref() >= 0){ // it's been tagged as a UDAF
1829                         agg_id = aggregate_table->add_aggr(se->get_op(), se->get_fcn_id(), operands, Ext_fcns->get_storage_dt(se->get_fcn_id()), se->is_superaggr(), Ext_fcns->is_running_aggr(se->get_fcn_id()),Ext_fcns->has_lfta_bailout(se->get_fcn_id()));
1830                         se->set_aggr_id(agg_id);
1831                 }
1832                 return;
1833
1834         default:
1835                 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_se, line %d, character %d: unknown operator type %d\n",
1836                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
1837                 exit(1);
1838         }
1839         return;
1840 }
1841
1842
1843 //                      walk se tree and collect aggregates into aggregate table.
1844 //                      duplicate aggregates receive the same idx to the table.
1845
1846 void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *aggregate_table,ext_fcn_list *Ext_fcns){
1847         vector<scalarexp_t *> op_list;
1848         int o;
1849
1850         switch(pr->get_operator_type()){
1851         case PRED_IN:
1852                 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1853                 return;
1854         case PRED_COMPARE:
1855                 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1856                 build_aggr_tbl_fm_se(pr->get_right_se(),aggregate_table, Ext_fcns) ;
1857                 return;
1858         case PRED_UNARY_OP:
1859                 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns);
1860                 return;
1861         case PRED_BINARY_OP:
1862                 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns) ;
1863                 build_aggr_tbl_fm_pred(pr->get_right_pr(),aggregate_table, Ext_fcns) ;
1864                 return;
1865         case PRED_FUNC:
1866                 op_list = pr->get_op_list();
1867                 for(o=0;o<op_list.size();++o){
1868                         build_aggr_tbl_fm_se(op_list[o],aggregate_table, Ext_fcns);
1869                 }
1870                 return;
1871
1872         default:
1873                 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_pred, line %d, character %d, unknown predicate operator type %d\n",
1874                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1875                 exit(1);
1876         }
1877
1878         return;
1879 }
1880
1881
1882 //                      Return true if the two scalar expressions
1883 //                      represent the same value (e.g., use to eliminate
1884 //                      duplicate aggregates).
1885 bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2){
1886         vector<scalarexp_t *> operands1;
1887         vector<scalarexp_t *> operands2;
1888         int o;
1889
1890 //              First handle the case of nulls (e.g. COUNT aggrs)
1891         if(se1 == NULL && se2 == NULL) return(true);
1892         if(se1 == NULL || se2 == NULL) return(false);
1893
1894 //              In all cases, must be the same oeprator type and same operator.
1895         if(se1->get_operator_type() != se2->get_operator_type())
1896                 return(false);
1897         if(se1->get_op() != se2->get_op() )
1898                 return(false);
1899
1900         switch(se1->get_operator_type()){
1901         case SE_LITERAL:
1902                 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1903         case SE_PARAM:
1904                 return(se1->get_param_name() == se2->get_param_name() );
1905         case SE_IFACE_PARAM:
1906                 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1907         case SE_UNARY_OP:
1908                 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1909         case SE_BINARY_OP:
1910                 if(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) )
1911                         return(is_equivalent_se(se1->get_right_se(), se2->get_right_se()) );
1912                 return(false);
1913         case SE_COLREF:
1914                 if(se1->is_gb() && se2->is_gb())
1915                         return( se1->get_gb_ref() == se2->get_gb_ref() );
1916                 if(se1->is_gb() || se2->is_gb())
1917                         return(false);
1918                 return(se1->get_colref()->is_equivalent(se2->get_colref()) );
1919         case SE_AGGR_STAR:
1920                 return(true);
1921         case SE_AGGR_SE:
1922                 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1923         case SE_FUNC:
1924                 if(se1->get_op() != se2->get_op()) return(false);
1925
1926                 operands1 = se1->get_operands();
1927                 operands2 = se2->get_operands();
1928                 if(operands1.size() != operands2.size()) return(false);
1929
1930                 for(o=0;o<operands1.size();o++){
1931                         if(! is_equivalent_se(operands1[o], operands2[o]) )
1932                                 return(false);
1933                 }
1934                 return(true);
1935         default:
1936                 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
1937                                 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
1938                 exit(1);
1939         }
1940         return(false);
1941 }
1942
1943
1944 //              Similar to is_equivalent_se, but with a looser definition
1945 //              of equivalence of colrefs.  Here, say they are equivalent
1946 //              if their base table is the same.  Use to find equivalent
1947 //              predicates on base tables.
1948 bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema){
1949         vector<scalarexp_t *> operands1;
1950         vector<scalarexp_t *> operands2;
1951         int o;
1952
1953         if(se1->get_operator_type() == SE_COLREF && se1->is_gb()){
1954                 se1 = se1->get_right_se();
1955         }
1956         if(se2->get_operator_type() == SE_COLREF && se2->is_gb()){
1957                 se2 = se2->get_right_se();
1958         }
1959
1960 //              First handle the case of nulls (e.g. COUNT aggrs)
1961         if(se1 == NULL && se2 == NULL) return(true);
1962         if(se1 == NULL || se2 == NULL) return(false);
1963
1964 //              In all cases, must be the same oeprator type and same operator.
1965         if(se1->get_operator_type() != se2->get_operator_type())
1966                 return(false);
1967         if(se1->get_op() != se2->get_op() )
1968                 return(false);
1969
1970         switch(se1->get_operator_type()){
1971         case SE_LITERAL:
1972                 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1973         case SE_PARAM:
1974                 return(se1->get_param_name() == se2->get_param_name() );
1975         case SE_IFACE_PARAM:
1976                 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1977         case SE_UNARY_OP:
1978                 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1979         case SE_BINARY_OP:
1980                 if(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) )
1981                         return(is_equivalent_se_base(se1->get_right_se(), se2->get_right_se(), Schema) );
1982                 return(false);
1983         case SE_COLREF:
1984 /*
1985                 if(se1->is_gb() && se2->is_gb())
1986                         return( se1->get_gb_ref() == se2->get_gb_ref() );
1987                 if(se1->is_gb() || se2->is_gb())
1988                         return(false);
1989 */
1990                 return(se1->get_colref()->is_equivalent_base(se2->get_colref(), Schema) );
1991         case SE_AGGR_STAR:
1992                 return(true);
1993         case SE_AGGR_SE:
1994                 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1995         case SE_FUNC:
1996                 if(se1->get_op() != se2->get_op()) return(false);
1997
1998                 operands1 = se1->get_operands();
1999                 operands2 = se2->get_operands();
2000                 if(operands1.size() != operands2.size()) return(false);
2001
2002                 for(o=0;o<operands1.size();o++){
2003                         if(! is_equivalent_se_base(operands1[o], operands2[o], Schema) )
2004                                 return(false);
2005                 }
2006                 return(true);
2007         default:
2008                 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
2009                                 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
2010                 exit(1);
2011         }
2012         return(false);
2013 }
2014
2015
2016 //              Find predicates which are equivalent when
2017 //              looking at the base tables.  Use to find
2018 //              common prefilter.
2019 bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema){
2020 int i, o;
2021
2022 //              First handle the case of nulls
2023         if(p1 == NULL && p2 == NULL) return(true);
2024         if(p1 == NULL || p2 == NULL) return(false);
2025
2026
2027   if(p1->get_operator_type() != p2->get_operator_type())
2028          return(false);
2029   if(p1->get_op() != p2->get_op())
2030          return(false);
2031
2032     vector<literal_t *> ll1;
2033     vector<literal_t *> ll2;
2034         vector<scalarexp_t *> op_list1, op_list2;
2035
2036
2037   switch(p2->get_operator_type()){
2038      case PRED_COMPARE:
2039         if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2040             return(false);
2041         return( is_equivalent_se_base(p1->get_right_se(),p2->get_right_se(), Schema) );
2042     break;
2043     case PRED_IN:
2044         if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2045             return(false);
2046         ll1 = p1->get_lit_vec();
2047         ll2 = p2->get_lit_vec();
2048         if(ll1.size() != ll2.size())
2049             return(false);
2050         for(i=0;i<ll1.size();i++){
2051           if(! ll1[i]->is_equivalent( ll2[i] ) )
2052             return(false);
2053         }
2054         return(true);
2055     break;
2056      case PRED_UNARY_OP:
2057         return(is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema) );
2058     break;
2059      case PRED_BINARY_OP:
2060         if(! is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema))
2061             return(false);
2062         return(is_equivalent_pred_base(p1->get_right_pr(), p2->get_right_pr(), Schema) );
2063     break;
2064          case PRED_FUNC:
2065                 op_list1 = p1->get_op_list();
2066                 op_list2 = p2->get_op_list();
2067                 if(op_list1.size() != op_list2.size()) return(false);
2068                 for(o=0;o<op_list1.size();++o){
2069                         if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) ) return(false);
2070                 }
2071                 return(true);
2072
2073    }
2074
2075     return(false);
2076 }
2077
2078
2079
2080 bool is_equivalent_class_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema,ext_fcn_list *Ext_fcns){
2081   if((p1->get_operator_type()!=PRED_FUNC)||(p2->get_operator_type()!=PRED_FUNC))
2082          return(false);
2083   if(p1->get_fcn_id() != p2->get_fcn_id())
2084                 return false;
2085   vector<bool> cl_op = Ext_fcns->get_class_indicators(p1->get_fcn_id());
2086   int o;
2087   vector<scalarexp_t *> op_list1 = p1->get_op_list();
2088   vector<scalarexp_t *> op_list2 = p2->get_op_list();
2089   if(op_list1.size() != op_list2.size()) return(false);
2090   for(o=0;o<op_list1.size();++o){
2091           if(cl_op[o]){
2092                 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) )
2093                         return(false);
2094         }
2095   }
2096   return true;
2097
2098 }
2099
2100
2101
2102
2103 //                      Verify that the scalar expression (in a such that clause)
2104 //                      is acceptable in an aggregation query.  No column
2105 //                      references allowed outside aggergates, except for
2106 //                      references to group-by attributes.
2107 //                      return true if OK, false if bad.
2108 bool verify_aggr_query_se(scalarexp_t *se){
2109         vector <scalarexp_t *> operands;
2110         int o;
2111
2112     switch(se->get_operator_type()){
2113     case SE_LITERAL:
2114     case SE_PARAM:
2115     case SE_IFACE_PARAM:
2116         return(true );
2117     case SE_UNARY_OP:
2118         return(verify_aggr_query_se(se->get_left_se() ) );
2119     case SE_BINARY_OP:
2120         return(verify_aggr_query_se(se->get_left_se() ) &&
2121             verify_aggr_query_se(se->get_right_se() ) );
2122     case SE_COLREF:
2123         if(se->is_gb() ) return(true);
2124         fprintf(stderr,"ERROR: the select clause in an aggregate query can "
2125                         "only reference constants, group-by attributes, and "
2126                         "aggregates,  (%s) line %d, character %d.\n",
2127                         se->get_colref()->to_string().c_str(),
2128                                                 se->get_lineno(), se->get_charno() );
2129         return(false);
2130     case SE_AGGR_STAR:
2131     case SE_AGGR_SE:
2132 //                      colrefs and gbrefs allowed.
2133 //                      check for nested aggregation elsewhere, so just return TRUE
2134         return(true);
2135         case SE_FUNC:
2136 //                      If its a UDAF, just return true
2137                 if(se->get_aggr_ref() >= 0) return true;
2138
2139                 operands = se->get_operands();
2140
2141                 for(o=0;o<operands.size();o++){
2142                         if(! verify_aggr_query_se(operands[o]) )
2143                                 return(false);
2144                 }
2145                 return(true);
2146     default:
2147         fprintf(stderr,"INTERNAL ERROR in verify_aggr_query_se, line %d, character %d: unknown operator type %d\n",
2148                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2149         exit(1);
2150     }
2151     return(false);
2152 }
2153
2154
2155
2156
2157 //                      Find complex literals.
2158 //                      NOTE : This analysis should be deferred to
2159 //                                 code generation time.
2160 //                      This analysis drills into aggr se specs.
2161 //                      Shouldn't this be done at the aggregate table?
2162 //                      But, its not a major loss of efficiency.
2163 //                              UPDATE : drilling into aggr se's is causnig a problem
2164 //                                      so I've eliminated it.
2165
2166 bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2167                                                                 cplx_lit_table *complex_literals){
2168         literal_t *l;
2169         vector<scalarexp_t *> operands;
2170         int o;
2171         scalarexp_t *param_se;
2172         data_type *dt;
2173
2174         switch(se->get_operator_type()){
2175         case SE_LITERAL:
2176                 l = se->get_literal();
2177                 if(l->constructor_name() != ""){
2178                         int cl_idx = complex_literals->add_cpx_lit(l, false);
2179                         l->set_cpx_lit_ref(cl_idx);
2180                 }
2181                 return(true);
2182         case SE_PARAM:
2183                 return(true );
2184 //                      SE_IFACE_PARAM should not exist when this is called.
2185         case SE_UNARY_OP:
2186                 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2187         case SE_BINARY_OP:
2188                 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) &&
2189                         find_complex_literal_se(se->get_right_se(), Ext_fcns, complex_literals ) );
2190         case SE_COLREF:
2191                 return(true);
2192         case SE_AGGR_STAR:
2193                 return(true);
2194         case SE_AGGR_SE:
2195                 return true;
2196 //              return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2197         case SE_FUNC:
2198                 if(se->get_aggr_ref() >= 0) return true;
2199
2200                 operands = se->get_operands();
2201                 for(o=0;o<operands.size();o++){
2202                         find_complex_literal_se(operands[o], Ext_fcns, complex_literals);
2203                 }
2204                 return(true);
2205         default:
2206                 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_se, line %d, character %d: unknown operator type %d\n",
2207                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2208                 exit(1);
2209         }
2210         return(false);
2211 }
2212
2213
2214
2215
2216 void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2217                                                                 cplx_lit_table *complex_literals){
2218         int i,o;
2219         vector<literal_t *> litl;
2220         vector<scalarexp_t *> op_list;
2221
2222
2223         switch(pr->get_operator_type()){
2224         case PRED_IN:
2225                 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2226                 litl = pr->get_lit_vec();
2227                 for(i=0;i<litl.size();i++){
2228                         if(litl[i]->constructor_name() != ""){
2229                                 int cl_idx = complex_literals->add_cpx_lit(litl[i],false);
2230                                 litl[i]->set_cpx_lit_ref(cl_idx);
2231                         }
2232                 }
2233                 return;
2234         case PRED_COMPARE:
2235                 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2236                 find_complex_literal_se(pr->get_right_se(), Ext_fcns, complex_literals) ;
2237                 return;
2238         case PRED_UNARY_OP:
2239                 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals);
2240                 return;
2241         case PRED_BINARY_OP:
2242                 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals) ;
2243                 find_complex_literal_pr(pr->get_right_pr(), Ext_fcns, complex_literals) ;
2244                 return;
2245         case PRED_FUNC:
2246                 op_list = pr->get_op_list();
2247                 for(o=0;o<op_list.size();++o){
2248                         find_complex_literal_se(op_list[o],Ext_fcns, complex_literals);
2249                 }
2250                 return;
2251         default:
2252                 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_pr, line %d, character %d, unknown predicate operator type %d\n",
2253                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2254                 exit(1);
2255         }
2256
2257         return;
2258 }
2259
2260
2261 //              Find all things which are passed as handle parameters to functions
2262 //              (query parameters, (simple) literals, complex literals)
2263 //              These expressions MUST be processed with find_complex_literal_??
2264 //              first.
2265 //                      TODO: this analysis drills into the aggregate SEs.
2266 //                      Shouldn't this be done on the aggr table SEs instead?
2267 //                      to avoid duplication.  THe handle registration
2268 //                      might be expensive ...
2269 //                      REVISED : drilling into aggr se's is causing problems, eliminated.
2270
2271 void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2272                                                 vector<handle_param_tbl_entry *> &handle_tbl){
2273         vector<scalarexp_t *> operands;
2274         vector<bool> handle_ind;
2275         int o;
2276         scalarexp_t *param_se;
2277         data_type *dt;
2278         literal_t *l;
2279
2280         switch(se->get_operator_type()){
2281         case SE_LITERAL:
2282                 return;
2283         case SE_PARAM:
2284                 return;
2285 //              case SE_IFACE_PARAM:            SHOULD NOT EXIST when this is called
2286         case SE_UNARY_OP:
2287                 find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2288                 return;
2289         case SE_BINARY_OP:
2290                 find_param_handles_se(se->get_left_se(), Ext_fcns , handle_tbl) ;
2291                 find_param_handles_se(se->get_right_se(), Ext_fcns, handle_tbl ) ;
2292                 return;
2293         case SE_COLREF:
2294                 return;
2295         case SE_AGGR_STAR:
2296                 return;
2297         case SE_AGGR_SE:
2298 //              find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2299                 return;
2300         case SE_FUNC:
2301                 if(se->get_aggr_ref() >= 0) return ;
2302
2303                 operands = se->get_operands();
2304                 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
2305                 for(o=0;o<operands.size();o++){
2306                         if(handle_ind[o]){
2307                                 handle_param_tbl_entry *he;
2308                                 param_se = operands[o];
2309                                 if(param_se->get_operator_type() != SE_LITERAL &&
2310                                                 param_se->get_operator_type() != SE_PARAM){
2311                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
2312                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
2313                                         exit(1);
2314                                 }
2315
2316                                 if(param_se->get_operator_type() == SE_PARAM){
2317                                         he = new handle_param_tbl_entry(
2318                                                 se->get_op(), o, param_se->get_param_name(),
2319                                                 param_se->get_data_type()->get_type_str());
2320                                 }else{
2321                                         l = param_se->get_literal();
2322                                         if(l->is_cpx_lit()){
2323                                                 he = new handle_param_tbl_entry(
2324                                                         se->get_op(), o, l->get_cpx_lit_ref(),
2325                                                 param_se->get_data_type()->get_type_str());
2326                                         }else{
2327                                                 he = new handle_param_tbl_entry(
2328                                                         se->get_op(), o, l,
2329                                                 param_se->get_data_type()->get_type_str());
2330                                         }
2331                                 }
2332                                 param_se->set_handle_ref(handle_tbl.size());
2333                                 handle_tbl.push_back(he);
2334                         }else{
2335                                 find_param_handles_se(operands[o], Ext_fcns, handle_tbl ) ;
2336                         }
2337                 }
2338                 return;
2339         default:
2340                 fprintf(stderr,"INTERNAL ERROR in find_param_handles, line %d, character %d: unknown operator type %d\n",
2341                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2342                 exit(1);
2343         }
2344         return;
2345 }
2346
2347
2348 void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2349                                                 vector<handle_param_tbl_entry *> &handle_tbl){
2350         vector<literal_t *> litl;
2351         vector<scalarexp_t *> op_list;
2352         scalarexp_t *param_se;
2353         vector<bool> handle_ind;
2354         int o;
2355         literal_t *l;
2356
2357         switch(pr->get_operator_type()){
2358         case PRED_IN:
2359                 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2360                 return;
2361         case PRED_COMPARE:
2362                 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2363                 find_param_handles_se(pr->get_right_se(), Ext_fcns, handle_tbl) ;
2364                 return;
2365         case PRED_UNARY_OP:
2366                 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl);
2367                 return;
2368         case PRED_BINARY_OP:
2369                 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl) ;
2370                 find_param_handles_pr(pr->get_right_pr(), Ext_fcns, handle_tbl) ;
2371                 return;
2372         case PRED_FUNC:
2373                 op_list = pr->get_op_list();
2374                 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
2375                 for(o=0;o<op_list.size();++o){
2376                         if(handle_ind[o]){
2377                                 handle_param_tbl_entry *he;
2378                                 param_se = op_list[o];
2379                                 if(param_se->get_operator_type() != SE_LITERAL &&
2380                                                 param_se->get_operator_type() != SE_PARAM){
2381                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",
2382                                 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
2383                                         exit(1);
2384                                 }
2385
2386                                 if(param_se->get_operator_type() == SE_PARAM){
2387                                         he = new handle_param_tbl_entry(
2388                                                 pr->get_op(), o, param_se->get_param_name(),
2389                                                 param_se->get_data_type()->get_type_str());
2390                                 }else{
2391                                         l = param_se->get_literal();
2392                                         if(l->is_cpx_lit()){
2393                                                 he = new handle_param_tbl_entry(
2394                                                         pr->get_op(), o, l->get_cpx_lit_ref(),
2395                                                 param_se->get_data_type()->get_type_str());
2396                                         }else{
2397                                                 he = new handle_param_tbl_entry(
2398                                                         pr->get_op(), o, l,
2399                                                 param_se->get_data_type()->get_type_str());
2400                                         }
2401                                 }
2402                                 param_se->set_handle_ref(handle_tbl.size());
2403                                 handle_tbl.push_back(he);
2404                         }else{
2405                                 find_param_handles_se(op_list[o], Ext_fcns, handle_tbl ) ;
2406                         }
2407                 }
2408                 return;
2409         default:
2410                 fprintf(stderr,"INTERNAL ERROR in find_param_handles_pr, line %d, character %d, unknown predicate operator type %d\n",
2411                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2412                 exit(1);
2413         }
2414
2415         return;
2416 }
2417
2418
2419 //                      Verify the HAVING predicate : it
2420 //                      can access gb vars, aggregates, and constants,
2421 //                      but not colrefs.
2422 //                      return 1 if OK, -1 if bad.
2423 //                      Perhaps replace by a pair of fcns which counts non-gb colrefs?
2424
2425 //                      Extended to deal with cleaning_by, cleaning_when :
2426 //                      verify that any aggregate function
2427 //                      has the multiple output property.
2428
2429 int verify_having_se(scalarexp_t *se, const char *clause, ext_fcn_list *Ext_fcns){
2430         int l_ret, r_ret;
2431         vector<scalarexp_t *> operands;
2432         vector<data_type *> odt;
2433         int o;
2434
2435         switch(se->get_operator_type()){
2436         case SE_LITERAL:
2437                 return(1);
2438         case SE_PARAM:
2439         case SE_IFACE_PARAM:
2440                 return(1);
2441         case SE_UNARY_OP:
2442                 return(verify_having_se(se->get_left_se(), clause, Ext_fcns) );
2443         case SE_BINARY_OP:
2444                 l_ret = verify_having_se(se->get_left_se(), clause, Ext_fcns);
2445                 r_ret = verify_having_se(se->get_right_se(), clause, Ext_fcns);
2446                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
2447                 return(1);
2448         case SE_COLREF:
2449                 if(se->is_gb()) return 1;
2450                 fprintf(stderr,"ERROR, %s clause references a non-group by attribute line =%d, char = %d, colref=%s\n", clause,
2451                         se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
2452                 return(-1);
2453         case SE_AGGR_STAR:
2454         case SE_AGGR_SE:
2455 //                      colrefs and gbrefs allowed.
2456 //                      check for nested aggregation elsewhere, so just return TRUE
2457                 if(!se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2458                         fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2459                                 se->get_lineno(),se->get_charno(), se->get_op().c_str() );
2460                         return(-1);
2461                 }
2462
2463 //                              Ensure that aggregate refs allow multiple outputs
2464 //                              in CLEANING_WHEN, CLEANING_BY
2465                 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2466                         if(! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2467                                 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2468                                   se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2469                                 return(-1);
2470                         }
2471                 }
2472
2473
2474                 return(1);
2475         case SE_FUNC:
2476                 if(se->get_aggr_ref() >= 0 && !se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2477                         fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2478                         se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_op().c_str() );
2479                 return(-1);
2480                 }
2481
2482                 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2483                         if(se->get_aggr_ref() >= 0  && ! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2484                                 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2485                                   se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2486                                 return(-1);
2487                         }
2488                 }
2489
2490                 if(se->get_aggr_ref() >= 0)     // don't descent into aggregates.
2491                         return 1;
2492
2493                 operands = se->get_operands();
2494                 r_ret = 1;
2495                 for(o=0;o<operands.size();o++){
2496                         l_ret = verify_having_se(operands[o], clause, Ext_fcns);
2497                         if(l_ret < 0) r_ret = -1;
2498                 }
2499                 if(r_ret < 0) return(-1); else return(1);
2500                 return(1);
2501         default:
2502                 fprintf(stderr,"INTERNAL ERROR in verify_having_se, line %d, character %d: unknown operator type %d\n",
2503                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
2504                 return(-1);
2505         }
2506         return(-1);
2507 }
2508
2509
2510 //                      Verify the HAVING predicate : it
2511 //                      can access gb vars, aggregates, and constants,
2512 //                      but not colrefs.
2513 //                      return 1 if OK, -1 if bad.
2514 //                      Perhaps replace by a pair of fcns which counts non-gb colrefs?
2515
2516
2517 int verify_having_pred(predicate_t *pr, const char *clause, ext_fcn_list *Ext_fcns){
2518         int l_ret, r_ret;
2519         vector<literal_t *> litl;
2520         vector<scalarexp_t *> op_list;
2521         int o;
2522
2523         switch(pr->get_operator_type()){
2524         case PRED_IN:
2525                 return(verify_having_se(pr->get_left_se(), clause, Ext_fcns));
2526         case PRED_COMPARE:
2527                 l_ret = verify_having_se(pr->get_left_se(), clause, Ext_fcns) ;
2528                 r_ret = verify_having_se(pr->get_right_se(), clause, Ext_fcns) ;
2529                 if( (l_ret < 0) || (r_ret < 0) ) return(-1); else return(1);
2530         case PRED_UNARY_OP:
2531                 return(verify_having_pred(pr->get_left_pr(), clause, Ext_fcns));
2532         case PRED_BINARY_OP:
2533                 l_ret = verify_having_pred(pr->get_left_pr(), clause, Ext_fcns);
2534                 r_ret = verify_having_pred(pr->get_right_pr(), clause, Ext_fcns);
2535                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
2536                 return(1);
2537         case PRED_FUNC:
2538                 op_list = pr->get_op_list();
2539                 l_ret = 1;
2540                 for(o=0;o<op_list.size();++o){
2541                         if( verify_having_se(op_list[o], clause, Ext_fcns) < 0) l_ret = -1;
2542                 }
2543                 return(l_ret);
2544
2545         default:
2546                 fprintf(stderr,"INTERNAL ERROR in verify_having_pred, line %d, character %d, unknown predicate operator type %d\n",
2547                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2548         }
2549
2550         return(-1);
2551 }
2552
2553
2554 //////////////////////////////////////////////////////////////////////////
2555 //////////////////////////////////////////////////////////////////////////
2556 ///////                 cnf and pred analysis and manipulation
2557
2558 // ----------------------------------------------------------------------
2559 //  Convert the predicates to a list of conjuncts
2560 //  (not actually cnf).  Do some analysis
2561 //  on their properties.
2562 // ----------------------------------------------------------------------
2563
2564
2565 //  Put into list clist the predicates that
2566 //  are AND'ed together.
2567
2568 void make_cnf_from_pr(predicate_t *pr, vector<cnf_elem *> &clist){
2569
2570   if(pr == NULL) return;
2571
2572   switch(pr->get_operator_type()){
2573      case PRED_COMPARE:
2574         clist.push_back(new cnf_elem(pr));
2575         return;
2576         break;
2577      case PRED_IN:
2578         clist.push_back(new cnf_elem(pr));
2579         return;
2580         break;
2581      case PRED_UNARY_OP:
2582         clist.push_back(new cnf_elem(pr));
2583         return;
2584         break;
2585      case PRED_BINARY_OP:
2586         if(pr->get_op() == "OR"){
2587                         clist.push_back(new cnf_elem(pr));
2588                         return;
2589                 }
2590                 if(pr->get_op() =="AND"){
2591                    make_cnf_from_pr(pr->get_left_pr(),clist);
2592                    make_cnf_from_pr(pr->get_right_pr(),clist);
2593                    return;
2594                 }
2595         case PRED_FUNC:
2596         clist.push_back(new cnf_elem(pr));
2597         return;
2598         break;
2599         default:
2600                 fprintf(stderr,"INTERNAL ERROR in make_cnf_from_pr: I don't recognize predicate operator %s\n",pr->get_op().c_str());
2601                 exit(1);
2602                         break;
2603            }
2604 }
2605
2606
2607
2608 //  Find out what things are referenced in a se,
2609 //  to use for analyzing a predicate.
2610 //  Currently, is it simple (no operators), does it
2611 //  reference a group-by column, does it reference an
2612 //  attribute of a table.
2613 //
2614 //      analyze_cnf_se and analyze_cnf_pr are called by analyze_cnf
2615
2616
2617 void analyze_cnf_se(scalarexp_t *se, int &s, int &g, int &a, int &agr){
2618  int p;
2619  vector<scalarexp_t *> operand_list;
2620
2621         switch(se->get_operator_type()){
2622         case SE_LITERAL:
2623         case SE_PARAM:
2624         case SE_IFACE_PARAM:
2625                 return;
2626         case SE_COLREF:
2627                 if(se->is_gb() ) g=1;
2628                 else                    a=1;
2629                 return;
2630         case SE_UNARY_OP:
2631                 s=0;
2632                 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2633                 return;
2634         case SE_BINARY_OP:
2635                 s=0;
2636                 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2637                 analyze_cnf_se(se->get_right_se(),s,g,a,agr);
2638                 return;
2639         case SE_AGGR_STAR:
2640         case SE_AGGR_SE:
2641                 agr = 1;
2642                 return;
2643         case SE_FUNC:
2644                 if(se->get_aggr_ref() >= 0){
2645                         agr = 1;
2646                         return;
2647                 }
2648                 s = 0;
2649                 operand_list = se->get_operands();
2650                 for(p=0;p<operand_list.size();p++){
2651                         analyze_cnf_se(operand_list[p],s,g,a,agr);
2652                 }
2653         break;
2654         }
2655
2656         return;
2657 }
2658
2659
2660
2661 void analyze_cnf_pr(predicate_t *pr, int &g, int &a,  int &agr){
2662 int dum_simple, o;
2663 vector<scalarexp_t *> op_list;
2664
2665
2666         switch(pr->get_operator_type()){
2667         case PRED_COMPARE:
2668                 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2669                 analyze_cnf_se(pr->get_right_se(),dum_simple,g,a,agr);
2670                 return;
2671         case PRED_IN:
2672                 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2673                 return;
2674         case PRED_UNARY_OP:
2675                 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2676                 return;
2677         case PRED_BINARY_OP:
2678                 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2679                 analyze_cnf_pr(pr->get_right_pr(),g,a,agr);
2680                 return;
2681         case PRED_FUNC:
2682                 op_list = pr->get_op_list();
2683                 for(o=0;o<op_list.size();++o){
2684                         analyze_cnf_se(op_list[o],dum_simple,g,a,agr);
2685                 }
2686                 return;
2687         default:
2688                 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2689                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2690                 exit(1);
2691         }
2692 }
2693
2694
2695
2696 //  analyze a conjunct of a predicate.
2697 //  Is it atomic (e.g., a single predicate),
2698 //  and if so do a further analysis.
2699
2700 void analyze_cnf(cnf_elem *c){
2701
2702 //  analyze the predicate.
2703    analyze_cnf_pr(c->pr, c->pr_gb, c->pr_attr, c->pr_aggr);
2704
2705    if((c->pr->get_operator_type()!= PRED_COMPARE) && (c->pr->get_operator_type()!= PRED_IN)){
2706                 return;
2707    }
2708
2709
2710 //  its an atomic predicate -- get more info
2711    c->is_atom = 1;
2712
2713         if(c->pr->get_op() == "=")
2714                 c->eq_pred = 1;
2715         else
2716                 c->eq_pred = 0;
2717
2718         if(c->pr->get_operator_type() == PRED_IN)
2719                 c->in_pred = 1;
2720         else
2721                 c->in_pred = 0;
2722
2723         c->l_simple = 1; c->l_gb = c->l_attr = c->l_aggr = 0;
2724         analyze_cnf_se(c->pr->get_left_se(),c->l_simple,c->l_gb,c->l_attr, c->l_aggr);
2725
2726         if(c->pr->get_operator_type() == PRED_COMPARE){
2727                 c->r_simple = 1; c->r_gb = c->r_attr = c->r_aggr = 0;
2728                 analyze_cnf_se(c->pr->get_left_se(),c->r_simple,c->r_gb,c->r_attr, c->r_aggr);
2729         }
2730 }
2731
2732 void analyze_constraint_se(scalarexp_t *se,
2733                         int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op, ext_fcn_list *Ext_fcns, bool enter_gb){
2734  int l_agr, l_gb, l_par, l_func, l_op;
2735  int r_agr, r_gb, r_par, r_func, r_op;
2736  int p;
2737  vector<scalarexp_t *> operand_list;
2738
2739         switch(se->get_operator_type()){
2740         case SE_LITERAL:
2741         case SE_IFACE_PARAM:
2742                 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2743                 return;
2744         case SE_PARAM:
2745                 n_agr=0; n_gb = 0; n_par = 1; n_func = 0; n_op = 0;
2746                 return;
2747         case SE_COLREF:
2748                 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2749                 if(se->is_gb() ){
2750                         if(enter_gb){
2751                                 analyze_constraint_se(se->get_right_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2752                         }else{
2753                                 n_gb=1;
2754                         }
2755                 }
2756                 return;
2757         case SE_UNARY_OP:
2758                 analyze_constraint_se(se->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2759                 n_op++;
2760                 return;
2761         case SE_BINARY_OP:
2762                 analyze_constraint_se(se->get_left_se(),l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2763                 analyze_constraint_se(se->get_right_se(),r_agr,r_gb,r_par, r_func,r_op,Ext_fcns,enter_gb);
2764                 n_agr=l_agr+r_agr;
2765                 n_gb=l_gb+r_gb;
2766                 n_par=l_par+r_par;
2767                 n_func=l_func+r_func;
2768                 n_op=l_op+r_op+1;
2769                 return;
2770         case SE_AGGR_STAR:
2771         case SE_AGGR_SE:
2772                 n_agr=1; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2773                 return;
2774         case SE_FUNC:
2775                 if(se->get_aggr_ref() >= 0){
2776                         n_agr=1; n_gb = 0; n_par = 0; n_op = 0;
2777                         if(Ext_fcns)
2778                                 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2779                         else
2780                                 n_func = 1;
2781                         return;
2782                 }
2783                 n_agr=0; n_gb = 0; n_par = 0;  n_op = 0;
2784                 if(Ext_fcns)
2785                         n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2786                 else
2787                         n_func = 1;
2788                 operand_list = se->get_operands();
2789                 for(p=0;p<operand_list.size();p++){
2790                         analyze_constraint_se(operand_list[p],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2791                         n_agr+=l_agr;
2792                         n_gb+=l_gb;
2793                         n_par+=l_par;
2794                         n_func+=l_func;
2795                         n_op += l_op;
2796                 }
2797         break;
2798         }
2799
2800         return;
2801 }
2802
2803 //              Estimate the cost of a constraint.
2804 //              WARNING a lot of cost assumptions are embedded in the code.
2805 void analyze_constraint_pr(predicate_t *pr,
2806                 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op,
2807                 int &n_cmp_s, int &n_cmp_c, int &n_in, int &n_pred, int &n_bool, ext_fcn_list *Ext_fcns, bool enter_gb){
2808  int l_agr, l_gb, l_par, l_func, l_op, l_cmp_s, l_cmp_c, l_in, l_pred,l_bool;
2809  int r_agr, r_gb, r_par, r_func, r_op, r_cmp_s, r_cmp_c, r_in, r_pred,r_bool;
2810
2811 int o;
2812 vector<scalarexp_t *> op_list;
2813
2814
2815         switch(pr->get_operator_type()){
2816         case PRED_COMPARE:
2817                 analyze_constraint_se(pr->get_left_se(),l_agr,l_gb,l_par,l_func, l_op,Ext_fcns,enter_gb);
2818                 analyze_constraint_se(pr->get_right_se(),r_agr,r_gb,r_par,r_func,r_op,Ext_fcns,enter_gb);
2819                 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2820                 n_func=l_func+r_func; n_op=l_op+r_op;
2821                 if(pr->get_left_se()->get_data_type()->complex_comparison(
2822                         pr->get_right_se()->get_data_type())
2823             ){
2824                         n_cmp_s = 0; n_cmp_c=1;
2825                 }else{
2826                         n_cmp_s = 1; n_cmp_c=0;
2827                 }
2828                 n_in = 0; n_pred = 0; n_bool = 0;
2829                 return;
2830         case PRED_IN:
2831 //                      Tread IN predicate as sequence of comparisons
2832                 analyze_constraint_se(pr->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2833                 if(pr->get_left_se()->get_data_type()->complex_comparison(
2834                         pr->get_right_se()->get_data_type())
2835             ){
2836                         n_cmp_s = 0; n_cmp_c=pr->get_lit_vec().size();
2837                 }else{
2838                         n_cmp_s = pr->get_lit_vec().size(); n_cmp_c=0;
2839                 }
2840                 n_in = 0; n_pred = 0; n_bool = 0;
2841                 return;
2842         case PRED_UNARY_OP:
2843                 analyze_constraint_pr(pr->get_left_pr(),n_agr,n_gb,n_par,n_func,n_op,n_cmp_s,n_cmp_c,n_in,n_pred,n_bool,Ext_fcns,enter_gb);
2844                 n_bool++;
2845                 return;
2846         case PRED_BINARY_OP:
2847                 analyze_constraint_pr(pr->get_left_pr(),l_agr,l_gb,l_par,l_func,l_op,l_cmp_s,l_cmp_c,l_in,l_pred,l_bool,Ext_fcns,enter_gb);
2848                 analyze_constraint_pr(pr->get_right_pr(),r_agr,r_gb,r_par,r_func,r_op,r_cmp_s,r_cmp_c,r_in,r_pred,r_bool,Ext_fcns,enter_gb);
2849                 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2850                 n_func=l_func+r_func; n_op=l_op+r_op;
2851                 n_cmp_s=l_cmp_s+r_cmp_s; n_cmp_c=l_cmp_c+r_cmp_c;
2852                 n_in=l_in+r_in; n_pred=l_pred+r_pred; n_bool=l_bool+r_bool+1;
2853                 return;
2854         case PRED_FUNC:
2855                 n_agr=n_gb=n_par=n_func=n_op=n_cmp_s=n_cmp_c=n_in=n_bool=0;
2856                 if(Ext_fcns)
2857                         n_pred = Ext_fcns->estimate_fcn_cost(pr->get_fcn_id());
2858                 else
2859                         n_pred = 1;
2860                 op_list = pr->get_op_list();
2861                 for(o=0;o<op_list.size();++o){
2862                         analyze_constraint_se(op_list[o],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2863                         n_agr+=l_agr; n_gb+=l_gb; n_par+=l_par; n_func+=l_func; n_op+=l_op;
2864                 }
2865                 return;
2866         default:
2867                 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2868                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2869                 exit(1);
2870         }
2871 }
2872
2873 void compute_cnf_cost(cnf_elem *c, ext_fcn_list *Ext_fcns){
2874  int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2875         analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2876                                                 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,false);
2877
2878 //printf("nfunc=%d n_pred=%d, n_cmp_c=%d, n_op=%d, n_cmp_s=%d,n_bool=%d\n", n_func, n_pred, n_cmp_c, n_op, n_cmp_s, n_bool);
2879         c->cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2880 }
2881
2882 bool prefilter_compatible(cnf_elem *c, ext_fcn_list *Ext_fcns){
2883  int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2884         analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2885                                                 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,true);
2886 //printf("prefilter_compatible, n_par=%d, n_gb=%d, n_agr=%d, n_func=%d, n_pred=%d, n_comp_c=%d, n_cmp_s=%d, n_bool=%d\n",n_gb,n_par,n_agr,n_func,n_pred,n_cmp_c,n_cmp_s,n_bool);
2887         if(n_par || n_agr)
2888                 return false;
2889         int cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2890 //printf("cost=%d\n",cost);
2891         return cost<10;
2892 }
2893
2894 //              The prefilter needs to translate constraints on
2895 //              gbvars into constraints involving their underlying SEs.
2896 //              The following two routines attach GB def info.
2897
2898 void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl){
2899  int p;
2900  vector<scalarexp_t *> operand_list;
2901
2902         switch(se->get_operator_type()){
2903         case SE_LITERAL:
2904         case SE_IFACE_PARAM:
2905         case SE_PARAM:
2906         case SE_AGGR_STAR:
2907                 return;
2908         case SE_COLREF:
2909                 if(se->is_gb() ){
2910                          se->rhs.scalarp = gtbl->get_def(se->get_gb_ref());
2911                 }
2912                 return;
2913         case SE_UNARY_OP:
2914                 insert_gb_def_se(se->get_left_se(),gtbl);
2915                 return;
2916         case SE_BINARY_OP:
2917                 insert_gb_def_se(se->get_left_se(),gtbl);
2918                 insert_gb_def_se(se->get_right_se(),gtbl);
2919                 return;
2920         case SE_AGGR_SE:
2921                 insert_gb_def_se(se->get_left_se(),gtbl);
2922                 return;
2923         case SE_FUNC:
2924                 operand_list = se->get_operands();
2925                 for(p=0;p<operand_list.size();p++){
2926                         insert_gb_def_se(operand_list[p],gtbl);
2927                 }
2928         break;
2929         }
2930
2931         return;
2932 }
2933 void insert_gb_def_pr(predicate_t *pr, gb_table *gtbl){
2934 vector<scalarexp_t *> op_list;
2935 int o;
2936
2937         switch(pr->get_operator_type()){
2938         case PRED_COMPARE:
2939                 insert_gb_def_se(pr->get_left_se(),gtbl);
2940                 insert_gb_def_se(pr->get_right_se(),gtbl);
2941                 return;
2942         case PRED_IN:
2943                 insert_gb_def_se(pr->get_left_se(),gtbl);
2944                 return;
2945         case PRED_UNARY_OP:
2946                 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2947                 return;
2948         case PRED_BINARY_OP:
2949                 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2950                 insert_gb_def_pr(pr->get_right_pr(),gtbl);
2951                 return;
2952         case PRED_FUNC:
2953                 op_list = pr->get_op_list();
2954                 for(o=0;o<op_list.size();++o){
2955                         insert_gb_def_se(op_list[o],gtbl);
2956                 }
2957                 return;
2958         default:
2959                 fprintf(stderr,"INTERNAL ERROR in insert_gb_def_pr, line %d, character %d, unknown predicate operator type %d\n",
2960                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2961                 exit(1);
2962         }
2963 }
2964
2965 //              Substitute gbrefs with their definitions
2966 void subs_gbrefs_se(scalarexp_t *se, table_list *Schema){
2967  int p;
2968  vector<scalarexp_t *> operand_list;
2969  scalarexp_t *lse,*rse;
2970  colref_t *cr;
2971  string b_tbl;
2972  int b_idx;
2973
2974         switch(se->get_operator_type()){
2975         case SE_LITERAL:
2976         case SE_IFACE_PARAM:
2977         case SE_PARAM:
2978         case SE_AGGR_STAR:
2979                 return;
2980         case SE_COLREF:
2981                 cr = se->get_colref();
2982                 b_tbl = Schema->get_basetbl_name(cr->schema_ref,cr->field);
2983                 b_idx = Schema->get_table_ref(b_tbl);
2984                 cr->tablevar_ref = b_idx;
2985                 return;
2986         case SE_UNARY_OP:
2987                 lse=se->get_left_se();
2988                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2989                         se->lhs.scalarp = lse->get_right_se();
2990                         subs_gbrefs_se(se,Schema);
2991                         return;
2992                 }
2993                 subs_gbrefs_se(se->get_left_se(),Schema);
2994                 return;
2995         case SE_BINARY_OP:
2996                 lse=se->get_left_se();
2997                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2998                         se->lhs.scalarp = lse->get_right_se();
2999                         subs_gbrefs_se(se,Schema);
3000                         return;
3001                 }
3002                 rse=se->get_right_se();
3003                 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3004                         se->rhs.scalarp = rse->get_right_se();
3005                         subs_gbrefs_se(se,Schema);
3006                         return;
3007                 }
3008                 subs_gbrefs_se(se->get_left_se(),Schema);
3009                 subs_gbrefs_se(se->get_right_se(),Schema);
3010                 return;
3011         case SE_AGGR_SE:
3012                 lse=se->get_left_se();
3013                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3014                         se->lhs.scalarp = lse->get_right_se();
3015                         subs_gbrefs_se(se,Schema);
3016                         return;
3017                 }
3018                 subs_gbrefs_se(se->get_left_se(),Schema);
3019                 return;
3020         case SE_FUNC:
3021                 operand_list = se->get_operands();
3022                 for(p=0;p<operand_list.size();p++){
3023                         lse=operand_list[p];
3024                         if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3025                                 se->param_list[p] = lse->get_right_se();
3026                                 subs_gbrefs_se(se,Schema);
3027                                 return;
3028                         }
3029                 }
3030                 for(p=0;p<operand_list.size();p++){
3031                         subs_gbrefs_se(operand_list[p],Schema);
3032                 }
3033         break;
3034         }
3035
3036         return;
3037 }
3038
3039 void subs_gbrefs_pr(predicate_t *pr, table_list *Schema){
3040 vector<scalarexp_t *> op_list;
3041 int o;
3042 scalarexp_t *lse,*rse;
3043
3044         switch(pr->get_operator_type()){
3045         case PRED_COMPARE:
3046                 lse=pr->get_left_se();
3047                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3048                         pr->lhs.sexp = lse->get_right_se();
3049                         subs_gbrefs_pr(pr,Schema);
3050                         return;
3051                 }
3052                 rse=pr->get_right_se();
3053                 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3054                         pr->rhs.sexp = rse->get_right_se();
3055                         subs_gbrefs_pr(pr,Schema);
3056                         return;
3057                 }
3058                 subs_gbrefs_se(pr->get_left_se(),Schema);
3059                 subs_gbrefs_se(pr->get_right_se(),Schema);
3060                 return;
3061         case PRED_IN:
3062                 lse=pr->get_left_se();
3063                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3064                         pr->lhs.sexp = lse->get_right_se();
3065                         subs_gbrefs_pr(pr,Schema);
3066                         return;
3067                 }
3068                 subs_gbrefs_se(pr->get_left_se(),Schema);
3069                 return;
3070         case PRED_UNARY_OP:
3071                 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3072                 return;
3073         case PRED_BINARY_OP:
3074                 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3075                 subs_gbrefs_pr(pr->get_right_pr(),Schema);
3076                 return;
3077         case PRED_FUNC:
3078                 op_list = pr->get_op_list();
3079                 for(o=0;o<op_list.size();++o){
3080                         lse=op_list[o];
3081                         if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3082                                 pr->param_list[o] = lse->get_right_se();
3083                                 subs_gbrefs_pr(pr,Schema);
3084                                 return;
3085                         }
3086                         subs_gbrefs_se(op_list[o],Schema);
3087                 }
3088                 return;
3089         default:
3090                 fprintf(stderr,"INTERNAL ERROR in subs_gbrefs_pr, line %d, character %d, unknown predicate operator type %d\n",
3091                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3092                 exit(1);
3093         }
3094 }
3095
3096
3097 //              Search for references to "expensive" fields.
3098 int expensive_refs_se(scalarexp_t *se, table_list *Schema){
3099  int p;
3100  vector<scalarexp_t *> operand_list;
3101  int cnt=0;
3102 table_def *td;
3103 param_list *plist;
3104
3105         switch(se->get_operator_type()){
3106         case SE_LITERAL:
3107         case SE_IFACE_PARAM:
3108         case SE_PARAM:
3109         case SE_AGGR_STAR:
3110         case SE_AGGR_SE:
3111                 return 0;
3112         case SE_COLREF:
3113                 if(se->is_gb())
3114                         return expensive_refs_se(se->rhs.scalarp,Schema);
3115                 td = Schema->get_table(se->lhs.colref->schema_ref);
3116                 plist = td->get_modifier_list(se->lhs.colref->field);
3117                 if(plist->contains_key("expensive"))
3118                         return 1;
3119                 return 0;
3120         case SE_UNARY_OP:
3121                 return expensive_refs_se(se->get_left_se(),Schema);
3122         case SE_BINARY_OP:
3123                 cnt += expensive_refs_se(se->get_left_se(),Schema);
3124                 cnt += expensive_refs_se(se->get_right_se(),Schema);
3125                 return cnt;
3126         case SE_FUNC:
3127                 operand_list = se->get_operands();
3128                 for(p=0;p<operand_list.size();p++){
3129                         cnt += expensive_refs_se(operand_list[p],Schema);
3130                 }
3131                 return cnt;
3132         break;
3133         }
3134
3135         return 0;
3136 }
3137
3138 int expensive_refs_pr(predicate_t *pr, table_list *Schema){
3139 vector<scalarexp_t *> op_list;
3140 int o;
3141 int cnt=0;
3142
3143         switch(pr->get_operator_type()){
3144         case PRED_COMPARE:
3145                 cnt += expensive_refs_se(pr->get_left_se(),Schema);
3146                 cnt += expensive_refs_se(pr->get_right_se(),Schema);
3147                 return cnt;
3148         case PRED_IN:
3149                 return expensive_refs_se(pr->get_left_se(),Schema);
3150         case PRED_UNARY_OP:
3151                 return expensive_refs_pr(pr->get_left_pr(),Schema);
3152         case PRED_BINARY_OP:
3153                 cnt += expensive_refs_pr(pr->get_left_pr(),Schema);
3154                 cnt += expensive_refs_pr(pr->get_right_pr(),Schema);
3155                 return cnt;
3156         case PRED_FUNC:
3157                 op_list = pr->get_op_list();
3158                 for(o=0;o<op_list.size();++o){
3159                         cnt += expensive_refs_se(op_list[o],Schema);
3160                 }
3161                 return cnt;
3162         default:
3163                 fprintf(stderr,"INTERNAL ERROR in expensive_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3164                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3165                 exit(1);
3166         }
3167 }
3168
3169
3170 //              TODO: allow "cheap" functions and predicates.
3171 bool simple_field_constraint(cnf_elem *c){
3172         vector<literal_t *> ll;
3173         int l;
3174         predicate_t *p = c->pr;
3175  int l_agr, l_gb, l_par, l_func, l_op;
3176  int r_agr, r_gb, r_par, r_func, r_op;
3177  col_id_set left_colids, right_colids;
3178
3179 //                      Verify that it is a simple atom
3180         switch(p->get_operator_type()){
3181         case PRED_COMPARE:
3182 //                              Must be an equality predicate which references
3183 //                              which referecnes no aggregates, parameters, functions, or
3184 //                              group-by variables, and should be a constraint of
3185 //                              a single colref.
3186 //                              AND should not require a complex comparison.
3187                 if(p->get_op() != "=") return(false);
3188                 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3189                 analyze_constraint_se(p->get_right_se(),r_agr, r_gb, r_par, r_func,l_op,NULL,false);
3190                 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ||
3191                    r_agr>0 || r_gb>0 || r_par>0 || r_func>0 ) return(false);
3192 //                              I will count on there being no gbvars in the constraint.
3193 //                              TODO: allow gbvars which are colrefs.
3194                 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3195                 gather_se_col_ids(p->get_right_se(), right_colids, NULL);
3196                 if(left_colids.size()+right_colids.size() != 1) return(false);
3197
3198
3199 //                      Normalize : the colref should be on the lhs.
3200                 if(right_colids.size() > 0){
3201                         p->swap_scalar_operands();
3202                 }
3203
3204 //                      Disallow complex (and therefore expensive) comparisons.
3205                 if(p->get_left_se()->get_data_type()->complex_comparison(
3206                         p->get_right_se()->get_data_type() ) )
3207                                 return(false);
3208
3209 //                      passed all the tests.
3210                 return(true);
3211         case PRED_IN:
3212 //                      LHS must be a non-gbvar colref.
3213                 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3214                 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ) return(false);
3215 //                              I will count on there being no gbvars in the constraint.
3216 //                              TODO: allow gbvars which are colrefs.
3217                 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3218                 if(left_colids.size() != 1) return(false);
3219 //                      Disallow complex (and therefore expensive) comparisons.
3220                 if(p->get_left_se()->get_data_type()->complex_comparison(
3221                         p->get_left_se()->get_data_type() ) )
3222                                 return(false);
3223
3224
3225 //                      All entries in the IN list must be literals
3226 //                      Currently, this is the only possibility.
3227                 return(true);
3228                 break;
3229         case PRED_UNARY_OP:
3230                 return(false);
3231         case PRED_BINARY_OP:
3232                 return(false);
3233         case PRED_FUNC:
3234                 return(false);
3235         default:
3236                 fprintf(stderr,"INTERNAL ERROR in simple_field_cosntraint, line %d, character %d, unknown predicate operator type %d\n",
3237                         p->get_lineno(), p->get_charno(), p->get_operator_type() );
3238                 exit(1);
3239         }
3240
3241         return(false);
3242 }
3243
3244 //              As the name implies, return the colref constrained by the
3245 //              cnf elem.  I will be counting on the LHS being a SE pointing
3246 //              to a colref.
3247
3248 //                      This fcn assumes that in fact exactly
3249 //                      one colref is constrained.
3250 colref_t *get_constrained_colref(scalarexp_t *se){
3251  int p;
3252  vector<scalarexp_t *> operand_list;
3253 colref_t *ret;
3254
3255         switch(se->get_operator_type()){
3256         case SE_LITERAL:
3257                 return(NULL);
3258         case SE_PARAM:
3259         case SE_IFACE_PARAM:
3260                 return(NULL);
3261         case SE_COLREF:
3262                 return(se->get_colref());
3263         case SE_UNARY_OP:
3264                 return(get_constrained_colref(se->get_left_se()));
3265         case SE_BINARY_OP:
3266                 ret=get_constrained_colref(se->get_left_se());
3267                 if(ret == NULL) return(get_constrained_colref(se->get_right_se()));
3268                 else return ret;
3269         case SE_AGGR_STAR:
3270         case SE_AGGR_SE:
3271                 return(NULL);
3272         case SE_FUNC:
3273                 if(se->get_aggr_ref() >= 0) return NULL;
3274
3275                 operand_list = se->get_operands();
3276                 for(p=0;p<operand_list.size();p++){
3277                         ret=get_constrained_colref(operand_list[p]);
3278                         if(ret != NULL) return(ret);
3279
3280                 }
3281                 return(NULL);
3282         break;
3283         }
3284
3285         return(NULL);
3286 }
3287
3288
3289 colref_t *get_constrained_colref(predicate_t *p){
3290         return(get_constrained_colref(p->get_left_se()));
3291 }
3292 colref_t *get_constrained_colref(cnf_elem *c){
3293         return get_constrained_colref(c->pr->get_left_se());
3294 }
3295
3296
3297
3298
3299 /*
3300 void add_colref_constraint_to_cnf(cnf_elem *dst, predicate_t *src_p,
3301                                                         string target_fld, string target_tbl, int tblref){
3302
3303 //                      Make a copy of the predicate to be added.
3304 //                      ASSUME no aggregates.
3305         predicate_t *pr = dup_pr(src_p,NULL);
3306
3307 //                      Modify the ref to the base table.
3308 //                      ASSUME lhs is the colref
3309         pr->get_left_se()->get_colref()->set_table_name(target_tbl);
3310         pr->get_left_se()->get_colref()->set_table_ref(tblref);
3311
3312         if(dst->pr == NULL) dst->pr = pr;
3313         else dst->pr = new predicate_t("OR", dst->pr, pr);
3314
3315 }
3316 */
3317
3318
3319 //////////////////////////////////////////////////////
3320 ///////////////         Represent a node in a predicate tree
3321 struct common_pred_node{
3322         set<int> lftas;
3323         predicate_t *pr;
3324         vector<predicate_t *> predecessor_preds;
3325         vector<common_pred_node *> children;
3326
3327         string target_tbl;
3328         string target_fld;
3329         int target_ref;
3330
3331         common_pred_node(){
3332                 pr = NULL;
3333         }
3334 };
3335
3336
3337 predicate_t *make_common_pred(common_pred_node *pn){
3338   int n;
3339
3340         if(pn->children.size() == 0){
3341                 if(pn->pr == NULL){
3342                         fprintf(stderr,"INTERNAL ERROR in make_common_pred, pred node ahs no children and no predicate.\n");
3343                         exit(1);
3344                 }
3345                 return( dup_pr(pn->pr,NULL) );
3346         }
3347
3348         predicate_t *curr_pr = make_common_pred( pn->children[0] );
3349     for(n=1;n<pn->children.size();++n){
3350                 curr_pr = new predicate_t("OR", make_common_pred(pn->children[n]),curr_pr);
3351         }
3352
3353         if(pn->pr != NULL)
3354                 curr_pr = new predicate_t("AND", dup_pr(pn->pr,NULL), curr_pr);
3355
3356         return(curr_pr);
3357 }
3358
3359
3360 bool operator<(const cnf_set &c1, const cnf_set &c2){
3361         if(c1.lfta_id.size() < c2.lfta_id.size())
3362                 return true;
3363         return false;
3364 }
3365
3366
3367 //              Compute the predicates for the prefilter.
3368 //              the prefilter preds are returned in prefilter_preds.
3369 //              pred_ids is the set of predicates used in the prefilter.
3370 //              the encoding is the lfta index, in the top 16 bits,
3371 //              then the index of the cnf element in the bottom 16 bits.
3372 //              This set of for identifying which preds do not need
3373 //              to be generated in the lftas.
3374 void find_common_filter(vector< vector<cnf_elem *> > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, vector<cnf_set *> &prefilter_preds, set<unsigned int > &pred_ids){
3375         int p, p2, l, c;
3376
3377         vector<cnf_set *> pred_list, sort_list;
3378
3379 //              Create list of tagged, prefilter-safe CNFs.
3380         for(l=0;l<where_list.size();++l){
3381                 for(c=0;c<where_list[l].size();++c){
3382                         if(prefilter_compatible(where_list[l][c],Ext_fcns)){
3383                                 if(expensive_refs_pr(where_list[l][c]->pr,Schema)==0)
3384                                         pred_list.push_back(new cnf_set(where_list[l][c]->pr,l,c));
3385                         }
3386                 }
3387         }
3388
3389 //              Eliminate duplicates
3390         for(p=0;p<pred_list.size();++p){
3391                 if(pred_list[p]){
3392                         for(p2=p+1;p2<pred_list.size();++p2){
3393                                 if(pred_list[p2]){
3394                                         if(is_equivalent_pred_base(pred_list[p]->pr, pred_list[p2]->pr,Schema)){
3395                                                 pred_list[p]->subsume(pred_list[p2]);
3396                                                 delete pred_list[p2];
3397                                                 pred_list[p2] = NULL;
3398                                         }
3399                                 }
3400                         }
3401                 }
3402         }
3403
3404 //              combine preds that occur in the exact same lftas.
3405         for(p=0;p<pred_list.size();++p){
3406                 if(pred_list[p]){
3407                         for(p2=p+1;p2<pred_list.size();++p2){
3408                                 if(pred_list[p2]){
3409                                         if(pred_list[p]->lfta_id == pred_list[p2]->lfta_id){
3410                                                 pred_list[p]->combine_pred(pred_list[p2]);
3411                                                 delete pred_list[p2];
3412                                                 pred_list[p2] = NULL;
3413                                         }
3414                                 }
3415                         }
3416                 }
3417         }
3418
3419 //              Compress the list
3420         for(p=0;p<pred_list.size();++p){
3421                 if(pred_list[p]){
3422                         sort_list.push_back(pred_list[p]);
3423                 }
3424         }
3425 //              Sort it
3426         sort(sort_list.begin(), sort_list.end(),compare_cnf_set());
3427
3428 //              Return the top preds, up to 64 of them.
3429         for(p=0;p<sort_list.size() && p<64;p++){
3430                 prefilter_preds.push_back(sort_list[p]);
3431                 sort_list[p]->add_pred_ids(pred_ids);
3432         }
3433
3434 //              Substitute gb refs with their defs
3435 //              While I'm at it, substitute base table sch ref for tblref.
3436         for(p=0;p<prefilter_preds.size() ;p++){
3437                 subs_gbrefs_pr(prefilter_preds[p]->pr,Schema);
3438         }
3439
3440 }
3441
3442
3443
3444
3445
3446 ///////////////////////////////////////////////////////////////////////////
3447 //////////////////////////////////////////////////////////////////////////
3448
3449 //              Find partial functions and register them.
3450 //              Do a DFS so that nested partial fcn calls
3451 //              get evaluated in the right order.
3452 //              Don't drill down into aggregates -- their arguments are evaluated
3453 //              earlier than the select list is.
3454 //
3455 //              Modification for function caching:
3456 //              Pass in a ref counter, and partial fcn indicator.
3457 //              Cache fcns ref'd at least once.
3458 //              pass in NULL for fcn_ref_cnt to turn off fcn caching analysis
3459
3460
3461 void find_partial_fcns(scalarexp_t *se, vector<scalarexp_t *> *pf_list,
3462                 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3463                 ext_fcn_list *Ext_fcns){
3464         vector<scalarexp_t *> operands;
3465         int o, f;
3466
3467         if(se == NULL) return;
3468
3469         switch(se->get_operator_type()){
3470         case SE_LITERAL:
3471         case SE_PARAM:
3472         case SE_IFACE_PARAM:
3473                 return;
3474         case SE_UNARY_OP:
3475                 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3476                 return;
3477         case SE_BINARY_OP:
3478                 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3479                 find_partial_fcns(se->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3480                 return;
3481         case SE_COLREF:
3482                 return;
3483         case SE_AGGR_STAR:
3484                 return;
3485         case SE_AGGR_SE:
3486 //              find_partial_fcns(se->get_left_se(), pf_list, Ext_fcns) ;
3487                 return;
3488         case SE_FUNC:
3489                 if(se->get_aggr_ref() >= 0) return;
3490
3491                 operands = se->get_operands();
3492                 for(o=0;o<operands.size();o++){
3493                         find_partial_fcns(operands[o], pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3494                 }
3495
3496                 if(Ext_fcns->is_partial(se->get_fcn_id()) || Ext_fcns->get_fcn_cost(se->get_fcn_id()) >= COST_HIGH){
3497                         if(fcn_ref_cnt){
3498                           for(f=0;f<pf_list->size();++f){
3499                                 if(is_equivalent_se(se,(*pf_list)[f])){
3500                                         se->set_partial_ref(f);
3501                                         (*fcn_ref_cnt)[f]++;
3502                                         break;
3503                                 }
3504                           }
3505                         }else{
3506                                 f=pf_list->size();
3507                         }
3508                         if(f==pf_list->size() && (Ext_fcns->is_partial(se->get_fcn_id()) ||  fcn_ref_cnt)){
3509                                 se->set_partial_ref(pf_list->size());
3510                                 pf_list->push_back(se);
3511                                 if(fcn_ref_cnt){
3512                                         fcn_ref_cnt->push_back(1);
3513                                         is_partial_fcn->push_back(Ext_fcns->is_partial(se->get_fcn_id()));
3514                                 }
3515                         }
3516                 }
3517                 return;
3518         default:
3519                 fprintf(stderr,"INTERNAL ERROR in find_partial_fcns, line %d, character %d: unknown operator type %d\n",
3520                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3521                 exit(1);
3522         }
3523         return;
3524 }
3525
3526
3527 void find_partial_fcns_pr(predicate_t *pr,  vector<scalarexp_t *> *pf_list,
3528                 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3529                                                                         ext_fcn_list *Ext_fcns){
3530         vector<literal_t *> litl;
3531         vector<scalarexp_t *> op_list;
3532         int o;
3533
3534         switch(pr->get_operator_type()){
3535         case PRED_IN:
3536                 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3537                 return;
3538         case PRED_COMPARE:
3539                 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3540                 find_partial_fcns(pr->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3541                 return;
3542         case PRED_UNARY_OP:
3543                 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3544                 return;
3545         case PRED_BINARY_OP:
3546                 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3547                 find_partial_fcns_pr(pr->get_right_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3548                 return;
3549         case PRED_FUNC:
3550                 op_list = pr->get_op_list();
3551                 for(o=0;o<op_list.size();++o){
3552                         find_partial_fcns(op_list[o],pf_list,fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3553                 }
3554                 return;
3555         default:
3556                 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3557                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3558                 exit(1);
3559         }
3560
3561         return;
3562 }
3563
3564
3565
3566 void find_combinable_preds(predicate_t *pr,  vector<predicate_t *> *pr_list,
3567                                                                 table_list *Schema, ext_fcn_list *Ext_fcns){
3568         vector<literal_t *> litl;
3569         vector<scalarexp_t *> op_list;
3570         int f,o;
3571
3572         switch(pr->get_operator_type()){
3573         case PRED_IN:
3574                 return;
3575         case PRED_COMPARE:
3576                 return;
3577         case PRED_UNARY_OP:
3578                 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns);
3579                 return;
3580         case PRED_BINARY_OP:
3581                 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns) ;
3582                 find_combinable_preds(pr->get_right_pr(), pr_list, Schema, Ext_fcns) ;
3583                 return;
3584         case PRED_FUNC:
3585                 if(Ext_fcns->is_combinable(pr->get_fcn_id())){
3586                   for(f=0;f<pr_list->size();++f){
3587                         if(is_equivalent_pred_base(pr,(*pr_list)[f],Schema)){
3588                                 pr->set_combinable_ref(f);
3589                                 break;
3590                         }
3591                   }
3592                   if(f == pr_list->size()){
3593                         pr->set_combinable_ref(pr_list->size());
3594                         pr_list->push_back(pr);
3595                   }
3596                 }
3597                 return;
3598         default:
3599                 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3600                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3601                 exit(1);
3602         }
3603
3604         return;
3605 }
3606
3607
3608 //--------------------------------------------------------------------
3609 //              Collect refs to aggregates.
3610
3611
3612 void collect_agg_refs(scalarexp_t *se, set<int> &agg_refs){
3613         vector<scalarexp_t *> operands;
3614         int o;
3615
3616         if(se == NULL) return;
3617
3618         switch(se->get_operator_type()){
3619         case SE_LITERAL:
3620         case SE_PARAM:
3621         case SE_IFACE_PARAM:
3622                 return;
3623         case SE_UNARY_OP:
3624                 collect_agg_refs(se->get_left_se(), agg_refs) ;
3625                 return;
3626         case SE_BINARY_OP:
3627                 collect_agg_refs(se->get_left_se(), agg_refs);
3628                 collect_agg_refs(se->get_right_se(), agg_refs);
3629                 return;
3630         case SE_COLREF:
3631                 return;
3632         case SE_AGGR_STAR:
3633         case SE_AGGR_SE:
3634                 agg_refs.insert(se->get_aggr_ref());
3635                 return;
3636         case SE_FUNC:
3637                 if(se->get_aggr_ref() >= 0) agg_refs.insert(se->get_aggr_ref());
3638
3639                 operands = se->get_operands();
3640                 for(o=0;o<operands.size();o++){
3641                         collect_agg_refs(operands[o], agg_refs);
3642                 }
3643
3644                 return;
3645         default:
3646                 fprintf(stderr,"INTERNAL ERROR in collect_agg_refs, line %d, character %d: unknown operator type %d\n",
3647                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3648                 exit(1);
3649         }
3650         return;
3651 }
3652
3653
3654 void collect_aggr_refs_pr(predicate_t *pr,  set<int> &agg_refs){
3655         vector<literal_t *> litl;
3656         vector<scalarexp_t *> op_list;
3657         int o;
3658
3659         switch(pr->get_operator_type()){
3660         case PRED_IN:
3661                 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3662                 return;
3663         case PRED_COMPARE:
3664                 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3665                 collect_agg_refs(pr->get_right_se(), agg_refs) ;
3666                 return;
3667         case PRED_UNARY_OP:
3668                 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs);
3669                 return;
3670         case PRED_BINARY_OP:
3671                 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs) ;
3672                 collect_aggr_refs_pr(pr->get_right_pr(), agg_refs) ;
3673                 return;
3674         case PRED_FUNC:
3675                 op_list = pr->get_op_list();
3676                 for(o=0;o<op_list.size();++o){
3677                         collect_agg_refs(op_list[o],agg_refs);
3678                 }
3679                 return;
3680         default:
3681                 fprintf(stderr,"INTERNAL ERROR in collect_aggr_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3682                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3683                 exit(1);
3684         }
3685
3686         return;
3687 }
3688
3689
3690 //--------------------------------------------------------------------
3691 //              Collect previously registered partial fcn refs.
3692 //              Do a DFS so that nested partial fcn calls
3693 //              get evaluated in the right order.
3694 //              Don't drill down into aggregates -- their arguments are evaluated
3695 //              earlier than the select list is.
3696 //              ------------->>> THEN WHY AM I DRILLING DOWN INTO AGGREGATES?
3697
3698 void collect_partial_fcns(scalarexp_t *se, set<int> &pfcn_refs){
3699         vector<scalarexp_t *> operands;
3700         int o;
3701
3702         if(se == NULL) return;
3703
3704         switch(se->get_operator_type()){
3705         case SE_LITERAL:
3706         case SE_PARAM:
3707         case SE_IFACE_PARAM:
3708                 return;
3709         case SE_UNARY_OP:
3710                 collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3711                 return;
3712         case SE_BINARY_OP:
3713                 collect_partial_fcns(se->get_left_se(), pfcn_refs);
3714                 collect_partial_fcns(se->get_right_se(), pfcn_refs);
3715                 return;
3716         case SE_COLREF:
3717                 return;
3718         case SE_AGGR_STAR:
3719                 return;
3720         case SE_AGGR_SE:
3721 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3722                 return;
3723         case SE_FUNC:
3724                 if(se->get_aggr_ref() >= 0) return;
3725
3726                 operands = se->get_operands();
3727                 for(o=0;o<operands.size();o++){
3728                         collect_partial_fcns(operands[o], pfcn_refs);
3729                 }
3730
3731                 if(se->is_partial()){
3732                         pfcn_refs.insert(se->get_partial_ref());
3733                 }
3734
3735                 return;
3736         default:
3737                 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns, line %d, character %d: unknown operator type %d\n",
3738                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3739                 exit(1);
3740         }
3741         return;
3742 }
3743
3744
3745 void collect_partial_fcns_pr(predicate_t *pr,  set<int> &pfcn_refs){
3746         vector<literal_t *> litl;
3747         vector<scalarexp_t *> op_list;
3748         int o;
3749
3750         switch(pr->get_operator_type()){
3751         case PRED_IN:
3752                 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3753                 return;
3754         case PRED_COMPARE:
3755                 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3756                 collect_partial_fcns(pr->get_right_se(), pfcn_refs) ;
3757                 return;
3758         case PRED_UNARY_OP:
3759                 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs);
3760                 return;
3761         case PRED_BINARY_OP:
3762                 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs) ;
3763                 collect_partial_fcns_pr(pr->get_right_pr(), pfcn_refs) ;
3764                 return;
3765         case PRED_FUNC:
3766                 op_list = pr->get_op_list();
3767                 for(o=0;o<op_list.size();++o){
3768                         collect_partial_fcns(op_list[o],pfcn_refs);
3769                 }
3770                 return;
3771         default:
3772                 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns_pr, line %d, character %d, unknown predicate operator type %d\n",
3773                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3774                 exit(1);
3775         }
3776
3777         return;
3778 }
3779
3780
3781
3782
3783 ///////////////////////////////////////////////////////////////
3784 ////////////    Exported Functions      ///////////////////////////
3785 ///////////////////////////////////////////////////////////////
3786
3787
3788 //              Count and collect refs to interface parameters.
3789
3790 int count_se_ifp_refs(scalarexp_t *se, set<string> &ifpnames){
3791         vector<scalarexp_t *> operands;
3792         int o;
3793         int ret = 0;
3794
3795         if(se == NULL) return 0;
3796
3797         switch(se->get_operator_type()){
3798         case SE_LITERAL:
3799         case SE_PARAM:
3800                 return 0;
3801         case SE_IFACE_PARAM:
3802                         ifpnames.insert(se->get_ifpref()->to_string());
3803                 return 1;
3804         case SE_UNARY_OP:
3805                 return count_se_ifp_refs(se->get_left_se(), ifpnames) ;
3806         case SE_BINARY_OP:
3807                 ret = count_se_ifp_refs(se->get_left_se(), ifpnames);
3808                 ret += count_se_ifp_refs(se->get_right_se(), ifpnames);
3809                 return ret;
3810         case SE_COLREF:
3811                 return 0;
3812         case SE_AGGR_STAR:
3813                 return 0;
3814         case SE_AGGR_SE:
3815 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3816                 return 0;
3817         case SE_FUNC:
3818                 if(se->get_aggr_ref() >= 0) return 0;
3819
3820                 operands = se->get_operands();
3821                 for(o=0;o<operands.size();o++){
3822                         ret += count_se_ifp_refs(operands[o], ifpnames);
3823                 }
3824
3825                 return ret;
3826         default:
3827                 fprintf(stderr,"INTERNAL ERROR in count_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3828                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3829                 exit(1);
3830         }
3831         return 0;
3832 }
3833
3834
3835 int count_pr_ifp_refs(predicate_t *pr,  set<string> &ifpnames){
3836         vector<literal_t *> litl;
3837         vector<scalarexp_t *> op_list;
3838         int o;
3839         int ret = 0;
3840         if(pr == NULL) return 0;
3841
3842         switch(pr->get_operator_type()){
3843         case PRED_IN:
3844                 return count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3845         case PRED_COMPARE:
3846                 ret = count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3847                 ret += count_se_ifp_refs(pr->get_right_se(), ifpnames) ;
3848                 return ret;
3849         case PRED_UNARY_OP:
3850                 return count_pr_ifp_refs(pr->get_left_pr(), ifpnames);
3851         case PRED_BINARY_OP:
3852                 ret = count_pr_ifp_refs(pr->get_left_pr(), ifpnames) ;
3853                 ret += count_pr_ifp_refs(pr->get_right_pr(), ifpnames) ;
3854                 return ret;
3855         case PRED_FUNC:
3856                 op_list = pr->get_op_list();
3857                 for(o=0;o<op_list.size();++o){
3858                         ret += count_se_ifp_refs(op_list[o],ifpnames);
3859                 }
3860                 return ret;
3861         default:
3862                 fprintf(stderr,"INTERNAL ERROR in count_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3863                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3864                 exit(1);
3865         }
3866
3867         return 0;
3868 }
3869
3870 //              Resolve ifp refs, convert them to string literals.
3871
3872 int resolve_se_ifp_refs(scalarexp_t *se, string ifm, string ifn, ifq_t *ifdb,  string &err){
3873         vector<scalarexp_t *> operands;
3874         vector<string> ifvals;
3875         int o;
3876         int ierr;
3877         string serr;
3878         int ret = 0;
3879         literal_t *tmp_l;
3880         ifpref_t *ir;
3881
3882         if(se == NULL) return 0;
3883
3884         switch(se->get_operator_type()){
3885         case SE_LITERAL:
3886         case SE_PARAM:
3887                 return 0;
3888         case SE_IFACE_PARAM:
3889                 ir = se->get_ifpref();
3890                 ifvals = ifdb->get_iface_vals(ifm, ifn, ir->get_pname(), ierr, serr);
3891                 if(ierr){
3892                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", "+serr+"\n";
3893                         return 1;
3894                 }
3895                 if(ifvals.size() == 0){
3896                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", no parameter values.\n";
3897                         return 1;
3898                 }
3899                 if(ifvals.size() > 1){
3900                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", multiple parameter values ("+int_to_string(ifvals.size())+").\n";
3901                         return 1;
3902                 }
3903                 tmp_l = new literal_t( ifvals[0]);
3904                 se->convert_to_literal(tmp_l);
3905                 return 0;
3906         case SE_UNARY_OP:
3907                 return resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err) ;
3908         case SE_BINARY_OP:
3909                 ret = resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err);
3910                 ret += resolve_se_ifp_refs( se->get_right_se(), ifm, ifn,ifdb,err);
3911                 return ret;
3912         case SE_COLREF:
3913                 return 0;
3914         case SE_AGGR_STAR:
3915                 return 0;
3916         case SE_AGGR_SE:
3917 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3918                 return 0;
3919         case SE_FUNC:
3920                 if(se->get_aggr_ref() >= 0) return 0;
3921
3922                 operands = se->get_operands();
3923                 for(o=0;o<operands.size();o++){
3924                         ret += resolve_se_ifp_refs(operands[o], ifm, ifn, ifdb,err);
3925                 }
3926
3927                 return ret;
3928         default:
3929                 fprintf(stderr,"INTERNAL ERROR in resolve_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3930                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
3931                 exit(1);
3932         }
3933         return 0;
3934 }
3935
3936
3937 int resolve_pr_ifp_refs(predicate_t *pr,  string ifm, string ifn, ifq_t *ifdb,  string &err){
3938         vector<literal_t *> litl;
3939         vector<scalarexp_t *> op_list;
3940         int o;
3941         int ret = 0;
3942
3943         switch(pr->get_operator_type()){
3944         case PRED_IN:
3945                 return resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3946         case PRED_COMPARE:
3947                 ret = resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3948                 ret += resolve_se_ifp_refs(pr->get_right_se(), ifm, ifn, ifdb, err) ;
3949                 return ret;
3950         case PRED_UNARY_OP:
3951                 return resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err);
3952         case PRED_BINARY_OP:
3953                 ret = resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err) ;
3954                 ret += resolve_pr_ifp_refs(pr->get_right_pr(), ifm, ifn, ifdb, err) ;
3955                 return ret;
3956         case PRED_FUNC:
3957                 op_list = pr->get_op_list();
3958                 for(o=0;o<op_list.size();++o){
3959                         ret += resolve_se_ifp_refs(op_list[o],ifm, ifn, ifdb, err);
3960                 }
3961                 return ret;
3962         default:
3963                 fprintf(stderr,"INTERNAL ERROR in resolve_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3964                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3965                 exit(1);
3966         }
3967
3968         return 0;
3969 }
3970
3971
3972 string impute_query_name(table_exp_t *fta_tree, string default_nm){
3973         string retval = fta_tree->get_val_of_name("query_name");
3974         if(retval == "") retval = default_nm;
3975         if(retval == "") retval = "default_query";
3976         return(retval);
3977 }
3978
3979 //              Convert the parse tree into an intermediate form,
3980 //              which admits analysis better.
3981 //
3982 //              TODO : rationalize the error return policy.
3983 //
3984 //              TODO : the query_summary_class object contains
3985 //                      the parse tree.
3986 //              TODO: revisit the issue when nested subqueries are implemented.
3987 //              One possibility: implement accessor methods to hide the
3988 //              complexity
3989 //              For now: this class contains data structures not in table_exp_t
3990 //              (with a bit of duplication)
3991
3992 //              Return NULL on error.
3993 //              print error messages to stderr.
3994
3995
3996 query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema,
3997                                 ext_fcn_list *Ext_fcns, string default_name){
3998         int i,j, k, retval;
3999
4000 //                      Create the summary struct -- no analysis is done here.
4001         query_summary_class *qs = new query_summary_class(fta_tree);
4002         qs->query_type = fta_tree->query_type;
4003
4004 //////////////          Do common analysis
4005
4006 //              Extract query name.  Already imputed for the qnodes.
4007 //      qs->query_name = impute_query_name(fta_tree, default_name);
4008         qs->query_name = default_name;
4009 //printf("query name is %s\n",qs->query_name.c_str());
4010
4011 //              extract definitions.  Don't grab the query name.
4012
4013         map<string, string> nmap = fta_tree->get_name_map();
4014         map<string, string>::iterator nmi;
4015         for(nmi=nmap.begin(); nmi!=nmap.end(); ++nmi){
4016                 string pname = (*nmi).first;
4017                 if(pname != "query_name" )
4018                         (qs->definitions)[pname] = (*nmi).second;
4019         }
4020
4021 ///
4022 ///                             FROM analysis
4023
4024 //              First, verify that all the referenced tables are defined.
4025 //              Then, bind the tablerefs in the FROM list to schemas in
4026 //              the schema list.
4027         tablevar_list_t *tlist = fta_tree->get_from();
4028         vector<tablevar_t *> tbl_vec = tlist->get_table_list();
4029
4030         bool found_error = false;
4031         for(i=0;i<tbl_vec.size();i++){
4032                 int sch_no = schema->find_tbl(tbl_vec[i]->get_schema_name());
4033                 if(sch_no < 0)  {
4034                   fprintf(stderr,"Error, table <%s> not found in the schema file\n",
4035                         tbl_vec[i]->get_schema_name().c_str() );
4036                   fprintf(stderr,"\tline=%d, char=%d\n",tbl_vec[i]->get_lineno(),
4037                                         tbl_vec[i]->get_charno() );
4038                   return(NULL);
4039                 }
4040
4041                 tbl_vec[i]->set_schema_ref(sch_no);
4042
4043 //                              If accessing a UDOP, mangle the name
4044 //                      This needs to be done in translate_fta.cc, not here.
4045 /*
4046                 if(schema->get_schema_type(sch_no) == OPERATOR_VIEW_SCHEMA){
4047                         string mngl_name = tbl_vec[i]->get_schema_name() + silo_nm;
4048                         tbl_vec[i]->set_schema_name(mngl_name);
4049                 }
4050 */
4051
4052 //                      No FTA schema should have an interface defined on it.
4053                 if(tbl_vec[i]->get_interface()!="" && schema->get_schema_type(sch_no) != PROTOCOL_SCHEMA){
4054                         fprintf(stderr,"WARNING: interface %s specified for schema %s, but this schema is a STREAM and does not have an interface.\n",tbl_vec[i]->get_interface().c_str(), tbl_vec[i]->get_schema_name().c_str());
4055                 }
4056 //                      Fill in default interface
4057                 if(tbl_vec[i]->get_interface()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4058                         tbl_vec[i]->set_interface("default");
4059                         tbl_vec[i]->set_ifq(true);
4060                 }
4061 //                      Fill in default machine
4062                 if(tbl_vec[i]->get_interface()!=""  && tbl_vec[i]->get_machine()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA && (! tbl_vec[i]->get_ifq())){
4063                         tbl_vec[i]->set_machine(hostname);
4064                 }
4065
4066                 if(schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4067 //                      Record the set of interfaces accessed
4068                         string ifstr;
4069                         if(tbl_vec[i]->get_ifq()){
4070                                 ifstr = "["+tbl_vec[i]->get_interface()+"]";
4071                         }else{
4072                                 if(tbl_vec[i]->get_machine() != "localhost"){
4073                                         ifstr = "&apos;"+tbl_vec[i]->get_machine()+"&apos;."+tbl_vec[i]->get_interface();
4074                                 }else{
4075                                         ifstr = tbl_vec[i]->get_interface();
4076                                 }
4077                         }
4078 //printf("ifstr is %s, i=%d, machine=%s, interface=%s\n",ifstr.c_str(),i,tbl_vec[i]->get_machine().c_str(),tbl_vec[i]->get_interface().c_str());
4079                         if(qs->definitions.count("_referenced_ifaces")){
4080                                 ifstr = qs->definitions["_referenced_ifaces"]+","+ifstr;
4081                         }
4082                         qs->definitions["_referenced_ifaces"] = ifstr;
4083                 }
4084
4085         }
4086         if(found_error) return(NULL);
4087
4088 //                      Ensure that all tablevars have are named
4089 //                      and that no two tablevars have the same name.
4090         int tblvar_no = 0;
4091 //              First, gather the set of variable
4092         set<string> tblvar_names;
4093         for(i=0;i<tbl_vec.size();i++){
4094                 if(tbl_vec[i]->get_var_name() != ""){
4095                         if(tblvar_names.count(tbl_vec[i]->get_var_name()) > 0){
4096                                 fprintf(stderr,"ERROR, query has two table variables named %s.  line=%d, char=%d\n", tbl_vec[i]->get_var_name().c_str(), tbl_vec[i]->get_lineno(), tbl_vec[i]->get_charno());
4097                                 return(NULL);
4098                         }
4099                         tblvar_names.insert(tbl_vec[i]->get_var_name());
4100                 }
4101         }
4102 //              Now generate variable names for unnamed tablevars
4103         for(i=0;i<tbl_vec.size();i++){
4104                 if(tbl_vec[i]->get_var_name() == ""){
4105                         char tmpstr[200];
4106                         sprintf(tmpstr,"_t%d",tblvar_no);
4107                         string newvar = tmpstr;
4108                         while(tblvar_names.count(newvar) > 0){
4109                                 tblvar_no++;
4110                                 sprintf(tmpstr,"_t%d",tblvar_no);
4111                                 newvar = tmpstr;
4112                         }
4113                         tbl_vec[i]->set_range_var(newvar);
4114                         tblvar_names.insert(newvar);
4115                 }
4116         }
4117
4118 //              Process inner/outer join properties
4119         int jprop = fta_tree->get_from()->get_properties();
4120 //              Require explicit INNER_JOIN, ... specification for join queries.
4121         if(jprop < 0){
4122                 if(qs->query_type != MERGE_QUERY && tbl_vec.size() > 1){
4123                         fprintf(stderr,"ERROR, a join query must specify one of INNER_JOIM, OUTER_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN, WATCHLIST_JOIN, FILTER_JOIN.\n");
4124                         return(NULL);
4125                 }
4126         }
4127
4128         if(jprop == OUTER_JOIN_PROPERTY){
4129                 for(i=0;i<tbl_vec.size();i++) tbl_vec[i]->set_property(1);
4130         }
4131         if(jprop == LEFT_OUTER_JOIN_PROPERTY)
4132                 tbl_vec[0]->set_property(1);
4133         if(jprop == RIGHT_OUTER_JOIN_PROPERTY)
4134                 tbl_vec[tbl_vec.size()-1]->set_property(1);
4135         if(jprop == FILTER_JOIN_PROPERTY){
4136                 if(fta_tree->get_from()->get_temporal_range() == 0){
4137                         fprintf(stderr,"ERROR, a filter join must have a non-zero temporal range.\n");
4138                         return NULL;
4139                 }
4140                 if(tbl_vec.size() != 2){
4141                         fprintf(stderr,"ERROR, a filter join must be between two table variables.\n");
4142                         return NULL;
4143                 }
4144                 colref_t *cr = fta_tree->get_from()->get_colref();
4145                 string field = cr->get_field();
4146
4147                 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), field);
4148                 if(fi0 < 0){
4149                         fprintf(stderr,"ERROR, temporal attribute %s for a filter join can't be found in schema %s\n",field.c_str(), tbl_vec[0]->get_schema_name().c_str());
4150                         return NULL;
4151                 }
4152                 cr->set_schema_ref(tbl_vec[0]->get_schema_ref());
4153                 cr->set_tablevar_ref(0);
4154                 string type_name = schema->get_type_name(tbl_vec[0]->get_schema_ref(),field);
4155                 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
4156                 data_type *dt0 = new data_type(type_name, modifiers);
4157                 string dt0_type = dt0->get_type_str();
4158                 if(dt0_type != "INT" && dt0_type != "UINT" && dt0_type != "LLONG" && dt0_type != "ULLONG"){
4159                         fprintf(stderr,"ERROR, the temporal attribute in a filter join must be one of INT/UINT/LLONG/ULLONG.\n");
4160                         return NULL;
4161                 }
4162                 if(! dt0->is_increasing()){
4163                         fprintf(stderr,"ERROR, the temporal attribute in a filter join must be temporal increasing.\n");
4164                         return NULL;
4165                 }
4166         }
4167
4168
4169
4170 /////////////////////
4171 ///             Build the query param table
4172         vector<var_pair_t *> query_params = fta_tree->query_params;
4173         int p;
4174         for(p=0;p<query_params.size();++p){
4175                 string pname = query_params[p]->name;
4176                 string dtname = query_params[p]->val;
4177
4178                 if(pname == ""){
4179                         fprintf(stderr,"ERROR parameter has empty name.\n");
4180                         found_error = true;
4181                 }
4182                 if(dtname == ""){
4183                         fprintf(stderr,"ERROR parameter %s has empty type.\n",pname.c_str());
4184                         found_error = true;
4185                 }
4186                 data_type *dt = new data_type(dtname);
4187                 if(!(dt->is_defined())){
4188                         fprintf(stderr,"ERROR parameter %s has invalid type (%s).\n",pname.c_str(), dtname.c_str());
4189                         found_error = true;
4190                 }
4191
4192                 qs->add_query_param(pname, dt, false);
4193         }
4194         if(found_error) return(NULL);
4195 //              unpack the param table to a global for easier analysis.
4196         param_tbl=qs->param_tbl;
4197
4198
4199 //////////////////              WATCHLIST specialized analysis
4200         if(qs->query_type == WATCHLIST_QUERY){
4201 //              Populate a SELECT clause?
4202         }
4203
4204 //////////////////              MERGE specialized analysis
4205
4206         if(qs->query_type == MERGE_QUERY){
4207 //                      Verify that
4208 //                              1) there are two *different* streams ref'd in the FROM clause
4209 //                                      However, only emit a warning.
4210 //                                      (can't detect a problem if one of the interfaces is the
4211 //                                       default interface).
4212 //                              2) They have the same layout (e.g. same types but the
4213 //                                      names can be different
4214 //                              3) the two columns can unambiguously be mapped to
4215 //                                      fields of the two tables, one per table.  Exception:
4216 //                                      the column names are the same and exist in both tables.
4217 //                                      FURTHERMORE the positions must be the same
4218 //                              4) after mapping, verify that both colrefs are temporal
4219 //                                      and in the same direction.
4220                 if(tbl_vec.size() < 2){
4221                         fprintf(stderr,"ERROR, a MERGE query operates over at least 2 tables, %lu were supplied.\n",tbl_vec.size() );
4222                         return(NULL);
4223                 }
4224
4225                 vector<field_entry *> fev0 = schema->get_fields(
4226                         tbl_vec[0]->get_schema_name()
4227                 );
4228
4229
4230                 int cv;
4231                 for(cv=1;cv<tbl_vec.size();++cv){
4232                         vector<field_entry *> fev1 = schema->get_fields(
4233                                 tbl_vec[cv]->get_schema_name()
4234                         );
4235
4236                         if(fev0.size() != fev1.size()){
4237                                 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4238                                 return(NULL);
4239                         }
4240
4241 //                      Only need to ensure that the list of types are the same.
4242 //                      THe first table supplies the output colnames,
4243 //                      and all temporal properties are lost, except for the
4244 //                      merge-by columns.
4245                         int f;
4246                         for(f=0;f<fev0.size();++f){
4247                                 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4248                                 data_type dt1(fev1[f]->get_type(),fev1[f]->get_modifier_list());
4249                                 if(! dt0.equal_subtypes(&dt1) ){
4250                                 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4251                                         return(NULL);
4252                                 }
4253                         }
4254                 }
4255
4256 //              copy over the merge-by cols.
4257                 qs->mvars = fta_tree->mergevars;
4258
4259                 if(qs->mvars.size() == 0){      // need to discover the merge vars.
4260                         int mergevar_pos = -1;
4261                         int f;
4262                         for(f=0;f<fev0.size();++f){
4263                                 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4264                                 if(dt0.is_temporal()){
4265                                         mergevar_pos = f;
4266                                         break;
4267                                 }
4268                         }
4269                         if(mergevar_pos >= 0){
4270                                 for(cv=0;cv<tbl_vec.size();++cv){
4271                                         vector<field_entry *> fev1 = schema->get_fields(tbl_vec[cv]->get_schema_name());
4272                                         qs->mvars.push_back(new colref_t(tbl_vec[cv]->get_var_name().c_str(),fev1[mergevar_pos]->get_name().c_str() ));
4273                                 }
4274                         }else{
4275                                 fprintf(stderr,"ERROR, no merge-by column found.\n");
4276                                 return(NULL);
4277                         }
4278                 }
4279
4280 //                      Ensure same number of tables, merge cols.
4281                 if(tbl_vec.size() != qs->mvars.size()){
4282                         fprintf(stderr,"ERROR, merge query has different numbers of table variables (%lu) and merge columns (%lu)\n",tbl_vec.size(), qs->mvars.size());
4283                         return(NULL);
4284                 }
4285
4286 //              Ensure that the merge-by are from different tables
4287 //              also, sort colrefs so that they align with the FROM list using tmp_crl
4288                 set<int> refd_sources;
4289                 vector<colref_t *> tmp_crl(qs->mvars.size(),NULL);
4290                 for(cv=0;cv<qs->mvars.size();++cv){
4291                         int tblvar=infer_tablevar_from_colref(qs->mvars[cv],fta_tree->fm,schema);
4292                         if(tblvar<0){
4293                                 fprintf(stderr,"ERROR, Merge column %d (%s) was not found in any of the tables.\n",cv,qs->mvars[cv]->to_string().c_str());
4294                         }
4295                         refd_sources.insert(tblvar);
4296                         tmp_crl[tblvar] = qs->mvars[cv];
4297                 }
4298                 if(refd_sources.size() != qs->mvars.size()){
4299                         fprintf(stderr,"ERROR, The %lu merge columns reference only %lu table variables.\n",qs->mvars.size(), refd_sources.size());
4300                         return(NULL);
4301                 }
4302
4303 //                      1-1 mapping, so use tmp_crl as the merge column list.
4304                 qs->mvars = tmp_crl;
4305
4306
4307
4308 //                      Look up the colrefs in their schemas, verify that
4309 //                      they are at the same place, that they are both temporal
4310 //                      in the same way.
4311 //                      It seems that this should be done more in the schema objects.
4312                 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), qs->mvars[0]->get_field());
4313                 if(fi0 < 0){
4314                         fprintf(stderr,"ERROR, Merge temporal field %s not found.\n",qs->mvars[0]->get_field().c_str());
4315                         exit(1);
4316                 }
4317                 for(cv=1;cv<qs->mvars.size();++cv){
4318                         int fi1 = schema->get_field_idx(tbl_vec[cv]->get_schema_name(), qs->mvars[0]->get_field());
4319                         if(fi0!=fi1){
4320                                 fprintf(stderr,"ERROR, the merge columns for table variables %s and %s must be in the same position.\n",tbl_vec[0]->get_var_name().c_str(), tbl_vec[cv]->get_var_name().c_str());
4321                                 return NULL;
4322                         }
4323                 }
4324
4325                 field_entry *fe0 = schema->get_field(tbl_vec[0]->get_schema_name(),fi0);
4326                 data_type dt0(fe0->get_type(),fe0->get_modifier_list());
4327                 if( (!dt0.is_temporal()) ){
4328                         fprintf(stderr,"ERROR, merge column %d must be temporal.\n",0);
4329                         return(NULL);
4330                 }
4331                 for(cv=0;cv<qs->mvars.size();++cv){
4332                         field_entry *fe1 = schema->get_field(tbl_vec[cv]->get_schema_name(),fi0);
4333                         data_type dt1(fe1->get_type(),fe1->get_modifier_list());
4334                         if( (!dt1.is_temporal()) ){
4335                                 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",cv);
4336                                 return(NULL);
4337                         }
4338
4339
4340                         if( dt0.get_temporal() != dt1.get_temporal()){
4341                                 fprintf(stderr,"ERROR, the merge columns (0 and %d) must be temporal in the same direction.\n",cv);
4342                                 return(NULL);
4343                         }
4344                 }
4345
4346 //                      If there is a SLACK specification, verify
4347 //                      that it is literal-only and that its type is compatible
4348 //                      with that of the merge columns
4349                 qs->slack = fta_tree->slack;
4350                 if(qs->slack){
4351                         if(! literal_only_se(qs->slack)){
4352                                 fprintf(stderr,"ERROR, the SLACK expression is not literal-only.\n");
4353                                 return NULL;
4354                         }
4355
4356                         assign_data_types(qs->slack, schema, fta_tree, Ext_fcns );
4357                         data_type sdt(&dt0, qs->slack->get_data_type(), string("+"));
4358                         if(sdt.get_type() == undefined_t){
4359                                 fprintf(stderr,"ERROR, the SLACK expression data type is not compatible with the data type of the merge columns.\n");
4360                                 return NULL;
4361                         }
4362                 }
4363
4364
4365 //                      All the tests have passed, there is nothing
4366 //                      else to fill in.
4367
4368         }
4369
4370 //////////////////              SELECT specialized analysis
4371
4372         if(qs->query_type == SELECT_QUERY){
4373 //              unpack the gb_tbl, aggr_tbl, param_tbl, and complex_literals
4374 //              objects into globals, for easier syntax.
4375         gb_tbl = qs->gb_tbl;
4376         aggr_tbl = qs->aggr_tbl;
4377
4378
4379 //              Build the table of group-by attributes.
4380 //              (se processing done automatically).
4381 //              NOTE : Doing the SE processing here is getting cumbersome,
4382 //                      I should process these individually.
4383 //              NOTE : I should check for duplicate names.
4384 //              NOTE : I should ensure that the def of one GB does not
4385 //                      refrence the value of another.
4386         vector<extended_gb_t *> gb_list = fta_tree->get_groupby();
4387         int n_temporal = 0;
4388         string temporal_gbvars = "";
4389         map<string, int> gset_gbnames;
4390
4391 //              For generating the set of GB patterns for this aggregation query.
4392         vector<bool> inner_pattern;
4393         vector<vector<bool> > pattern_set;
4394         vector<vector<vector<bool> > > pattern_components;
4395
4396         vector<gb_t *> r_gbs, c_gbs, g_gbs;
4397         int n_patterns;
4398
4399         for(i=0;i<gb_list.size();i++){
4400                 switch(gb_list[i]->type){
4401                 case gb_egb_type:
4402                         retval = gb_tbl->add_gb_attr(
4403                                 gb_list[i]->gb, fta_tree->fm, schema,fta_tree, Ext_fcns
4404                         );
4405                         if(retval < 0){
4406                                 return NULL;  // nothing added to gb_tbl, so this can trigger a segfault 2 lines below
4407                         }else{
4408                                 if(gb_tbl->get_data_type(i)->is_temporal()){
4409                                         n_temporal++;
4410                                         if(temporal_gbvars != "") temporal_gbvars+=" ";
4411                                         temporal_gbvars += gb_tbl->get_name(i);
4412                                 }
4413                         }
4414
4415                         inner_pattern.clear();
4416                         pattern_set.clear();
4417                         inner_pattern.push_back(true);
4418                         pattern_set.push_back(inner_pattern);
4419                         pattern_components.push_back(pattern_set);
4420
4421                         gb_tbl->gb_entry_type.push_back("");
4422                         gb_tbl->gb_entry_count.push_back(1);
4423                         gb_tbl->pattern_components.push_back(pattern_set);
4424
4425                 break;
4426                 case rollup_egb_type:
4427                         r_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4428                         for(j=0;j<r_gbs.size();++j){
4429                                 retval = gb_tbl->add_gb_attr(
4430                                         r_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4431                                 );
4432                                 if(retval < 0){
4433                                         found_error = true;
4434                                 }else{          // rollup gb can't be temporal
4435                                         gb_tbl->reset_temporal(gb_tbl->size()-1);
4436                                 }
4437                         }
4438
4439                         inner_pattern.resize(r_gbs.size());
4440                         pattern_set.clear();
4441                         for(j=0;j<=r_gbs.size();++j){
4442                                 for(k=0;k<r_gbs.size();++k){
4443                                         if(k < j)
4444                                                 inner_pattern[k] = true;
4445                                         else
4446                                                 inner_pattern[k] = false;
4447                                 }
4448                                 pattern_set.push_back(inner_pattern);
4449                         }
4450                         pattern_components.push_back(pattern_set);
4451
4452                         gb_tbl->gb_entry_type.push_back("ROLLUP");
4453                         gb_tbl->gb_entry_count.push_back(r_gbs.size());
4454                         gb_tbl->pattern_components.push_back(pattern_set);
4455                 break;
4456                 case cube_egb_type:
4457                         c_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4458                         for(j=0;j<c_gbs.size();++j){
4459                                 retval = gb_tbl->add_gb_attr(
4460                                         c_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4461                                 );
4462                                 if(retval < 0){
4463                                         found_error = true;
4464                                 }else{          // cube gb can't be temporal
4465                                         gb_tbl->reset_temporal(gb_tbl->size()-1);
4466                                 }
4467                         }
4468
4469                         inner_pattern.resize(c_gbs.size());
4470                         pattern_set.clear();
4471                         n_patterns = 1 << c_gbs.size();
4472                         for(j=0;j<n_patterns;++j){
4473                                 int test_bit = 1;
4474                                 for(k=0;k<c_gbs.size();++k,test_bit = test_bit << 1){
4475                                         if((j & test_bit) != 0)
4476                                                 inner_pattern[k] = true;
4477                                         else
4478                                                 inner_pattern[k] = false;
4479                                 }
4480                                 pattern_set.push_back(inner_pattern);
4481                         }
4482                         pattern_components.push_back(pattern_set);
4483
4484                         gb_tbl->gb_entry_type.push_back("CUBE");
4485                         gb_tbl->gb_entry_count.push_back(c_gbs.size());
4486                         gb_tbl->pattern_components.push_back(pattern_set);
4487                 break;
4488                 case gsets_egb_type:
4489                 {
4490                         gset_gbnames.clear();
4491                         for(j=0;j<gb_list[i]->gb_lists.size();++j){
4492                                 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4493                                 for(k=0;k<g_gbs.size();++k){
4494                                         if(g_gbs[k]->type != GB_COLREF){
4495                                                 fprintf(stderr,"Error, group-by fields in a GROUPING_SETS clause must be table references, not computed values (field is %s\n",g_gbs[k]->name.c_str());
4496                                                 found_error = true;
4497                                         }else{
4498                                                 if(gset_gbnames.count(g_gbs[k]->name) == 0){
4499                                                         retval = gb_tbl->add_gb_attr(
4500                                                                 g_gbs[k], fta_tree->fm, schema,fta_tree, Ext_fcns
4501                                                         );
4502                                                         if(retval < 0){
4503                                                                 found_error = true;
4504                                                         }else{          // gsets gb can't be temporal
4505                                                                 gb_tbl->reset_temporal(gb_tbl->size()-1);
4506                                                         }
4507                                                         int pos = gset_gbnames.size();
4508                                                         gset_gbnames[g_gbs[k]->name] = pos;
4509                                                 }
4510                                         }
4511                                 }
4512                         }
4513
4514                         if(gset_gbnames.size() > 63){
4515                                 fprintf(stderr,"Error, at most 63 distinct fields can be referenced in a GROUPING_SETS clause.\n");
4516                                 found_error = true;
4517                         }
4518
4519                         inner_pattern.resize(gset_gbnames.size());
4520                         pattern_set.clear();
4521                         set<unsigned long long int> signatures;
4522                         for(j=0;j<gb_list[i]->gb_lists.size();++j){
4523                                 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4524                                 set<string> refd_gbs;
4525                                 for(k=0;k<g_gbs.size();++k){
4526                                         refd_gbs.insert(g_gbs[k]->name);
4527                                 }
4528                                 fill(inner_pattern.begin(),inner_pattern.end(),false);
4529                                 unsigned long long int signature = 0;
4530                                 set<string>::iterator ssi;
4531                                 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4532                                         inner_pattern[gset_gbnames[(*ssi)]] = true;
4533                                         signature |= (1 << gset_gbnames[(*ssi)]);
4534                                 }
4535                                 if(signatures.count(signature)){
4536                                         fprintf(stderr,"Warning, duplicate GROUPING_SETS pattern found, ignoring:\n\t");
4537                                         set<string>::iterator ssi;
4538                                         for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4539                                                 fprintf(stderr," %s",(*ssi).c_str());
4540                                         }
4541                                         fprintf(stderr,"\n");
4542                                 }else{
4543                                         signatures.insert(signature);
4544                                         pattern_set.push_back(inner_pattern);
4545                                 }
4546                         }
4547                         pattern_components.push_back(pattern_set);
4548
4549                         gb_tbl->gb_entry_type.push_back("GROUPING_SETS");
4550                         gb_tbl->gb_entry_count.push_back(gset_gbnames.size());
4551                         gb_tbl->pattern_components.push_back(pattern_set);
4552                 }
4553                 break;
4554                 default:
4555                 break;
4556                 }
4557         }
4558         if(found_error) return(NULL);
4559         if(n_temporal > 1){
4560                 fprintf(stderr,"ERROR, query has multiple temporal group-by variables (%s).  Cast away the temporality of all but one of these.\n", temporal_gbvars.c_str());
4561                 return NULL;
4562         }
4563
4564 //              Compute the set of patterns.  Take the cross product of all pattern components.
4565         vector<vector<bool> > gb_patterns;
4566         int n_components = pattern_components.size();
4567         vector<int> pattern_pos(n_components,0);
4568         bool done = false;
4569         while(! done){
4570                 vector<bool> pattern;
4571                 for(j=0;j<n_components;j++){
4572                         pattern.insert(pattern.end(),pattern_components[j][pattern_pos[j]].begin(),
4573                                 pattern_components[j][pattern_pos[j]].end());
4574                 }
4575                 gb_patterns.push_back(pattern);
4576                 for(j=0;j<n_components;j++){
4577                         pattern_pos[j]++;
4578                         if(pattern_pos[j] >= pattern_components[j].size())
4579                                 pattern_pos[j] = 0;
4580                         else
4581                                 break;
4582                 }
4583                 if(j >= n_components)
4584                         done = true;
4585         }
4586         gb_tbl->gb_patterns = gb_patterns;
4587
4588
4589 //              Process the supergroup, if any.
4590         vector<colref_t *> sgb = fta_tree->get_supergb();
4591         for(i=0;i<sgb.size();++i){
4592                 int gbr = gb_tbl->find_gb(sgb[i],fta_tree->fm, schema);
4593                 if(gbr < 0){
4594                         fprintf(stderr, "ERROR, supergroup attribute %s is not defined as a group-by variable.\n",sgb[i]->to_string().c_str());
4595                         found_error = true;
4596                 }
4597                 if(qs->sg_tbl.count(gbr)){
4598                         fprintf(stderr,"WARNING, duplicate supergroup attribute %s.\n",sgb[i]->to_string().c_str());
4599                 }
4600                 qs->sg_tbl.insert(gbr);
4601         }
4602         if(found_error) return(NULL);
4603
4604         if(qs->sg_tbl.size() > 0 && gb_tbl->gb_patterns.size()>0){
4605                 fprintf(stderr,"Error, SUPERGROUP incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4606                 return NULL;
4607         }
4608
4609
4610
4611         predicate_t *wh = fta_tree->get_where();
4612         predicate_t *hv = fta_tree->get_having();
4613         predicate_t *cw = fta_tree->get_cleaning_when();
4614         predicate_t *cb = fta_tree->get_cleaning_by();
4615         predicate_t *closew = fta_tree->get_closing_when();
4616
4617         if(closew != NULL  && gb_tbl->gb_patterns.size()>1){
4618                 fprintf(stderr,"Error, CLOSING_WHEN incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4619                 return NULL;
4620         }
4621
4622
4623
4624 //              Verify that all column references are valid, and if so assign
4625 //              the data type.
4626
4627         vector<select_element *> sl_list = fta_tree->get_sl_vec();
4628         for(i=0;i<sl_list.size();i++){
4629                 retval = verify_colref(sl_list[i]->se, fta_tree->fm, schema, gb_tbl);
4630                 if(retval < 0) found_error = true;
4631         }
4632         if(wh != NULL)
4633                 retval = verify_predicate_colref(wh, fta_tree->fm, schema, gb_tbl);
4634         if(retval < 0) found_error = true;
4635         if(hv != NULL)
4636                 retval = verify_predicate_colref(hv, fta_tree->fm, schema, gb_tbl);
4637         if(retval < 0) found_error = true;
4638         if(cw != NULL)
4639                 retval = verify_predicate_colref(cw, fta_tree->fm, schema, gb_tbl);
4640         if(retval < 0) found_error = true;
4641         if(cb != NULL)
4642                 retval = verify_predicate_colref(cb, fta_tree->fm, schema, gb_tbl);
4643         if(retval < 0) found_error = true;
4644         if(closew != NULL)
4645                 retval = verify_predicate_colref(closew, fta_tree->fm, schema, gb_tbl);
4646         if(retval < 0) found_error = true;
4647
4648         if(found_error) return(NULL);
4649
4650 //              Verify that all of the scalar expressions
4651 //              and comparison predicates have compatible types.
4652
4653         n_temporal = 0;
4654         string temporal_output_fields;
4655         for(i=0;i<sl_list.size();i++){
4656                 retval = assign_data_types(sl_list[i]->se, schema, fta_tree, Ext_fcns );
4657                 if(retval < 0){
4658                          found_error = true;
4659                 }else{
4660                         if(sl_list[i]->se->get_data_type()->is_temporal()){
4661                                 n_temporal++;
4662                                 temporal_output_fields += " "+int_to_string(i);
4663                         }
4664                 }
4665         }
4666         if(n_temporal > 1){
4667                 fprintf(stderr,"ERROR, query has multiple temporal output fields (positions%s).  Cast away the temporality of all but one of these.\n", temporal_output_fields.c_str());
4668                 found_error=true;
4669         }
4670         if(wh != NULL)
4671                 retval = assign_predicate_data_types(wh, schema, fta_tree, Ext_fcns);
4672         if(retval < 0) found_error = true;
4673         if(hv != NULL)
4674                 retval = assign_predicate_data_types(hv, schema, fta_tree, Ext_fcns);
4675         if(retval < 0) found_error = true;
4676         if(cw != NULL)
4677                 retval = assign_predicate_data_types(cw, schema, fta_tree, Ext_fcns);
4678         if(retval < 0) found_error = true;
4679         if(cb != NULL)
4680                 retval = assign_predicate_data_types(cb, schema, fta_tree, Ext_fcns);
4681         if(retval < 0) found_error = true;
4682         if(closew != NULL)
4683                 retval = assign_predicate_data_types(closew, schema, fta_tree, Ext_fcns);
4684         if(retval < 0) found_error = true;
4685
4686         if(found_error) return(NULL);
4687
4688 //                      Impute names for the unnamed columns.
4689         set<string> curr_names;
4690         int s;
4691         for(s=0;s<sl_list.size();++s){
4692                 curr_names.insert(sl_list[s]->name);
4693         }
4694         for(s=0;s<sl_list.size();++s){
4695                 if(sl_list[s]->name == "")
4696                         sl_list[s]->name = impute_colname(curr_names, sl_list[s]->se);
4697         }
4698
4699
4700 //              Check the aggregates.
4701 //              No aggrs allowed in the WHERE predicate.
4702 //              (no aggrs in the GB defs, but that is examined elsewhere)
4703 //              Therefore, aggregates are allowed only the select clause.
4704 //
4705 //              The query is an aggregation query if there is a group-by clause, or
4706 //              if any aggregate is referenced.  If there is a group-by clause,
4707 //              at least one aggregate must be referenced.
4708 //              If the query is an aggregate query, the scalar expressions in
4709 //              the select clause can reference only constants, aggregates, or group-by
4710 //              attributes.
4711 //              Also, if the query is an aggregate query, build a table referencing
4712 //              the aggregates.
4713 //
4714 //              No nested aggregates allowed.
4715 //
4716
4717 //              First, count references in the WHERE predicate.
4718 //              (if there are any references, report an error).
4719 //                      can ref group vars, tuple fields, and stateful fcns.
4720
4721         if(wh != NULL){
4722                 retval = count_aggr_pred(wh, true);
4723                 if(retval > 0){
4724                         fprintf(stderr,"ERROR, no aggregate references are allowed in the WHERE clause.\n");
4725                         return(NULL);
4726                 }
4727         }
4728
4729 //              NOTE : Here I need an analysis of the having clause
4730 //              to verify that it only refs GB attrs and aggregates.
4731 //                      (also, superaggregates, stateful fcns)
4732         if(hv!=NULL){
4733                 retval = verify_having_pred(hv, "HAVING", Ext_fcns);
4734                 if(retval < 0) return(NULL);
4735         }
4736
4737 //              Cleaning by has same reference rules as Having
4738         if(cb!=NULL){
4739                 retval = verify_having_pred(cb, "CLEANING_BY", Ext_fcns);
4740                 if(retval < 0) return(NULL);
4741         }
4742
4743 //              Cleaning when has same reference rules as Having,
4744 //              except that references to non-superaggregates are not allowed.
4745 //              This is tested for when "CLEANING_BY" is passed in as the clause.
4746         if(cw!=NULL){
4747                 retval = verify_having_pred(cw, "CLEANING_WHEN", Ext_fcns);
4748                 if(retval < 0) return(NULL);
4749         }
4750
4751 //              CLOSING_WHEN : same rules as HAVING
4752         if(closew!=NULL){
4753                 retval = verify_having_pred(closew, "CLOSING_WHEN", Ext_fcns);
4754                 if(retval < 0) return(NULL);
4755         }
4756
4757
4758 //              Collect aggregates in the HAVING and CLEANING clauses
4759         if(hv != NULL){
4760                 build_aggr_tbl_fm_pred(hv, aggr_tbl, Ext_fcns);
4761         }
4762         if(cw != NULL){
4763                 build_aggr_tbl_fm_pred(cw, aggr_tbl, Ext_fcns);
4764         }
4765         if(cb != NULL){
4766                 build_aggr_tbl_fm_pred(cb, aggr_tbl, Ext_fcns);
4767         }
4768         if(closew != NULL){
4769                 build_aggr_tbl_fm_pred(closew, aggr_tbl, Ext_fcns);
4770         }
4771
4772 //              Collect aggregate refs in the SELECT clause.
4773
4774         for(i=0;i<sl_list.size();i++)
4775                 build_aggr_tbl_fm_se(sl_list[i]->se, aggr_tbl, Ext_fcns);
4776
4777
4778 //              Collect references to states of stateful functions
4779         if(wh != NULL){
4780                 gather_fcn_states_pr(wh, qs->states_refd, Ext_fcns);
4781         }
4782         if(hv != NULL){
4783                 gather_fcn_states_pr(hv, qs->states_refd, Ext_fcns);
4784         }
4785         if(cw != NULL){
4786                 gather_fcn_states_pr(cw, qs->states_refd, Ext_fcns);
4787         }
4788         if(cb != NULL){
4789                 gather_fcn_states_pr(cb, qs->states_refd, Ext_fcns);
4790         }
4791         if(closew != NULL){                     // should be no stateful fcns here ...
4792                 gather_fcn_states_pr(closew, qs->states_refd, Ext_fcns);
4793         }
4794         for(i=0;i<sl_list.size();i++)
4795                 gather_fcn_states_se(sl_list[i]->se, qs->states_refd, Ext_fcns);
4796
4797
4798 //              If this is an aggregate query, it had normally references
4799 //              some aggregates.  Its not necessary though, just emit a warning.
4800 //              (acts as SELECT DISTINCT)
4801
4802         bool is_aggr_query = gb_tbl->size() > 0 || aggr_tbl->size() > 0;
4803         if(is_aggr_query && aggr_tbl->size() == 0){
4804                 fprintf(stderr,"Warning, query contains a group-by clause but does not reference aggregates..\n");
4805         }
4806
4807 //              If this is an aggregate query,
4808 //                      1) verify that the SEs in the SELECT clause reference
4809 //                              only constants, aggregates, and group-by attributes.
4810 //                      2) No aggregate scalar expression references an aggregate
4811 //                              or any stateful function.
4812 //                      3) either it references both CLEANING clauses or neither.
4813 //                      4) all superaggregates must have the superaggr_allowed property.
4814 //                      5) all aggregates ref'd in the CLEANING_WHEN ad CLEANING_BY
4815 //                         clauses must have the multiple_output property.
4816
4817
4818         if(is_aggr_query){
4819                 if(gb_list.size() == 0){
4820                         fprintf(stderr,"ERROR, aggregation queries must have at least one group-by variable (which should be temporal).\n");
4821                         return NULL;
4822                 }
4823 //                      Ensure that at least one gbvar is temporal
4824                 if(! fta_tree->name_exists("no_temporal_aggr")){
4825                         bool found_temporal = false;
4826                 for(i=0;i<gb_tbl->size();i++){
4827                                 if(gb_tbl->get_data_type(i)->is_temporal()){
4828                                         found_temporal = true;
4829                                 }
4830                         }
4831                         if(! found_temporal){
4832                                 fprintf(stderr,"ERROR, at least one of the group-by variables must be temporal (unless no_temporal_aggr is set)\n");
4833                                 exit(1);
4834                         }
4835                 }
4836
4837                 if((!cb && cw) || (cb && !cw)){
4838                         fprintf(stderr,"ERROR, an aggregate query must either include both a CLEANING_WHEN and a CLEANING_BY clause, or neither.\n");
4839                         return(NULL);
4840                 }
4841
4842                 bool refs_running = false;
4843                 int a;
4844                 for(a=0; a<aggr_tbl->size(); ++a){
4845                         refs_running |= aggr_tbl->is_running_aggr(a);
4846                 }
4847
4848                 if(closew){
4849                         if(cb || cw){
4850                                 fprintf(stderr, "ERROR, cannot reference both CLOSING_WHEN and either CLEANING_WHEN or CLEANING_BY.\n");
4851                                 return(NULL);
4852                         }
4853                         if(!refs_running){
4854                                 fprintf(stderr, "ERROR, if you reference CLOSING_WHEN you must reference at least one running window aggregate.\n");
4855                                 return(NULL);
4856                         }
4857                 }
4858
4859                 if(refs_running && !closew){
4860                                 fprintf(stderr, "ERROR, if you reference a running window aggregate you must reference a CLOSING_WHEN clause.\n");
4861                         return(NULL);
4862                 }
4863
4864                 bool st_ok = true;
4865                 for(i=0;i<sl_list.size();i++){
4866                         bool ret_bool = verify_aggr_query_se(sl_list[i]->se);
4867                         st_ok = st_ok && ret_bool;
4868                 }
4869                 if(! st_ok)
4870                         return(NULL);
4871
4872                 for(i=0;i<aggr_tbl->size();i++){
4873                         if(aggr_tbl->is_superaggr(i)){
4874                                 if(! aggr_tbl->superaggr_allowed(i)){
4875                                         fprintf(stderr,"ERROR, aggregate %s cannot be a superaggregate\n",aggr_tbl->get_op(i).c_str());
4876                                         return NULL;
4877                                 }
4878                         }
4879                         if(aggr_tbl->is_builtin(i)){
4880                                 if(count_aggr_se(aggr_tbl->get_aggr_se(i), true) > 0){
4881                                         fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4882                                         return(NULL);
4883                                 }
4884                         }else{
4885                                 vector<scalarexp_t *> opl = aggr_tbl->get_operand_list(i);
4886                                 int o;
4887                                 for(o=0;o<opl.size();++o){
4888                                         if(count_aggr_se(opl[o], true) > 0){
4889                                                 fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4890                                                 return(NULL);
4891                                         }
4892                                 }
4893                         }
4894                 }
4895         }else{
4896 //                      Ensure that non-aggregate query doesn't reference some things
4897                 if(cb || cw){
4898                         fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLEANING_WHEN or a CLEANING_BY clause.\n");
4899                         return(NULL);
4900                 }
4901                 if(closew){
4902                         fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLOSING_WHEN clause.\n");
4903                         return(NULL);
4904                 }
4905                 if(qs->states_refd.size()){
4906                         fprintf(stderr,"ERROR, a non-aggregate query may not refernece stateful functions.\n");
4907                         return(NULL);
4908                 }
4909         }
4910
4911
4912
4913 //              Convert the predicates into CNF.  OK to pass NULL ptr.
4914         make_cnf_from_pr(wh, qs->wh_cnf);
4915         make_cnf_from_pr(hv, qs->hav_cnf);
4916         make_cnf_from_pr(cb, qs->cb_cnf);
4917         make_cnf_from_pr(cw, qs->cw_cnf);
4918         make_cnf_from_pr(closew, qs->closew_cnf);
4919
4920 //              Analyze the predicates.
4921
4922         for(i=0;i<qs->wh_cnf.size();i++)
4923                 analyze_cnf(qs->wh_cnf[i]);
4924         for(i=0;i<qs->hav_cnf.size();i++)
4925                 analyze_cnf(qs->hav_cnf[i]);
4926         for(i=0;i<qs->cb_cnf.size();i++)
4927                 analyze_cnf(qs->cb_cnf[i]);
4928         for(i=0;i<qs->cw_cnf.size();i++)
4929                 analyze_cnf(qs->cw_cnf[i]);
4930         for(i=0;i<qs->closew_cnf.size();i++)
4931                 analyze_cnf(qs->closew_cnf[i]);
4932
4933
4934 //                      At this point, the old analysis program
4935 //                      gathered all refs to partial functions,
4936 //                      complex literals, and parameters accessed via a handle.
4937 //                      I think its better to delay this
4938 //                      until code generation time, as the query will be
4939 //                      in general split.
4940
4941     }
4942
4943         return(qs);
4944 }
4945
4946 ///////////////////////////////////////////////////////////////////////
4947
4948 //              Expand gbvars with their definitions.
4949
4950 scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl){
4951         int o;
4952
4953         switch(se->get_operator_type()){
4954         case SE_LITERAL:
4955         case SE_PARAM:
4956         case SE_IFACE_PARAM:
4957                 return se;
4958         case SE_UNARY_OP:
4959                 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4960                 return se;
4961         case SE_BINARY_OP:
4962                 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4963                 se->rhs.scalarp = expand_gbvars_se(se->get_right_se(),gb_tbl);
4964                 return se;
4965         case SE_COLREF:
4966                 if( se->is_gb() ){
4967                         return( dup_se(gb_tbl.get_def(se->get_gb_ref()),NULL) );
4968                 }
4969                 return se;
4970 //                      don't descend into aggr defs.
4971         case SE_AGGR_STAR:
4972                 return se;
4973         case SE_AGGR_SE:
4974                 return se;
4975         case SE_FUNC:
4976                 for(o=0;o<se->param_list.size();o++){
4977                         se->param_list[o] = expand_gbvars_se(se->param_list[o], gb_tbl);
4978                 }
4979                 return se;
4980         default:
4981                 fprintf(stderr,"INTERNAL ERROR in expand_gbvars, line %d, character %d: unknown operator type %d\n",
4982                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
4983                 exit(1);
4984         }
4985         return se;
4986 }
4987
4988 void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl){
4989         vector<scalarexp_t *> op_list;
4990         int o;
4991         bool found = false;
4992
4993         switch(pr->get_operator_type()){
4994         case PRED_IN:
4995                 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(), gb_tbl);
4996                 return;
4997         case PRED_COMPARE:
4998                 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(),gb_tbl) ;
4999                 pr->rhs.sexp = expand_gbvars_se(pr->get_right_se(),gb_tbl) ;
5000                 return;
5001         case PRED_UNARY_OP:
5002                 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
5003                 return;
5004         case PRED_BINARY_OP:
5005                 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
5006                 expand_gbvars_pr(pr->get_right_pr(),gb_tbl) ;
5007                 return;
5008         case PRED_FUNC:
5009                 for(o=0;o<pr->param_list.size();++o){
5010                         pr->param_list[o] = expand_gbvars_se(pr->param_list[o],gb_tbl) ;
5011                 }
5012                 return;
5013         default:
5014                 fprintf(stderr,"INTERNAL ERROR in expand_gbvars_pr, line %d, character %d, unknown predicate operator type %d\n",
5015                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5016         }
5017         return;
5018 }
5019
5020
5021
5022
5023 //              return true if the se / pr contains any gbvar on the list.
5024
5025
5026 bool contains_gb_se(scalarexp_t *se, set<int> &gref_set){
5027         vector<scalarexp_t *> operands;
5028         int o;
5029         bool found = false;
5030
5031         switch(se->get_operator_type()){
5032         case SE_LITERAL:
5033         case SE_PARAM:
5034         case SE_IFACE_PARAM:
5035                 return false;
5036         case SE_UNARY_OP:
5037                 return contains_gb_se(se->get_left_se(),gref_set);
5038         case SE_BINARY_OP:
5039                 return( contains_gb_se(se->get_left_se(),gref_set) ||
5040                         contains_gb_se(se->get_right_se(),gref_set) );
5041         case SE_COLREF:
5042                 if( se->is_gb() ){
5043                         return( gref_set.count(se->get_gb_ref()) > 0);
5044                 }
5045                 return false;
5046 //                      don't descend into aggr defs.
5047         case SE_AGGR_STAR:
5048                 return false;
5049         case SE_AGGR_SE:
5050                 return false;
5051         case SE_FUNC:
5052                 operands = se->get_operands();
5053                 for(o=0;o<operands.size();o++){
5054                         found = found || contains_gb_se(operands[o], gref_set);
5055                 }
5056                 return found;
5057         default:
5058                 fprintf(stderr,"INTERNAL ERROR in contains_gb_se, line %d, character %d: unknown operator type %d\n",
5059                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5060                 exit(1);
5061         }
5062         return false;
5063 }
5064
5065
5066 bool contains_gb_pr(predicate_t *pr, set<int> &gref_set){
5067         vector<scalarexp_t *> op_list;
5068         int o;
5069         bool found = false;
5070
5071         switch(pr->get_operator_type()){
5072         case PRED_IN:
5073                 return contains_gb_se(pr->get_left_se(), gref_set);
5074         case PRED_COMPARE:
5075                 return (contains_gb_se(pr->get_left_se(),gref_set)
5076                         || contains_gb_se(pr->get_right_se(),gref_set) );
5077         case PRED_UNARY_OP:
5078                 return contains_gb_pr(pr->get_left_pr(),gref_set) ;
5079         case PRED_BINARY_OP:
5080                 return (contains_gb_pr(pr->get_left_pr(),gref_set)
5081                         || contains_gb_pr(pr->get_right_pr(),gref_set) );
5082         case PRED_FUNC:
5083                 op_list = pr->get_op_list();
5084                 for(o=0;o<op_list.size();++o){
5085                         found = found ||contains_gb_se(op_list[o],gref_set) ;
5086                 }
5087                 return found;
5088         default:
5089                 fprintf(stderr,"INTERNAL ERROR in contains_gb_pr, line %d, character %d, unknown predicate operator type %d\n",
5090                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5091         }
5092
5093         return found;
5094 }
5095
5096
5097 //              Gather the set of columns accessed in this se.
5098 //              Descend into aggregate functions.
5099
5100 void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set, gb_table *gtbl){
5101         col_id ci;
5102         vector<scalarexp_t *> operands;
5103         int o;
5104
5105         if(! se)
5106                 return;
5107
5108         switch(se->get_operator_type()){
5109         case SE_LITERAL:
5110         case SE_PARAM:
5111         case SE_IFACE_PARAM:
5112                 return;
5113         case SE_UNARY_OP:
5114                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5115                 return;
5116         case SE_BINARY_OP:
5117                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5118                 gather_se_col_ids(se->get_right_se(),cid_set,gtbl);
5119                 return;
5120         case SE_COLREF:
5121                 if(! se->is_gb() ){
5122                         ci.load_from_colref(se->get_colref() );
5123                         if(ci.tblvar_ref < 0){
5124                                 fprintf(stderr,"INTERNAL WARNING: unbound colref (%s) accessed.\n",ci.field.c_str());
5125                         }
5126                         cid_set.insert(ci);
5127                 }else{
5128                         if(gtbl==NULL){
5129                                 fprintf(stderr,"INTERNAL ERROR: gbvar ref in gather_se_col_ids, but gtbl is NULL.\n");
5130                                 exit(1);
5131                         }
5132                         gather_se_col_ids(gtbl->get_def(se->get_gb_ref()),cid_set,gtbl);
5133                 }
5134                 return;
5135         case SE_AGGR_STAR:
5136                 return;
5137         case SE_AGGR_SE:
5138                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5139                 return;
5140         case SE_FUNC:
5141                 operands = se->get_operands();
5142                 for(o=0;o<operands.size();o++){
5143                         gather_se_col_ids(operands[o], cid_set,gtbl);
5144                 }
5145                 return;
5146         default:
5147                 fprintf(stderr,"INTERNAL ERROR in gather_se_col_ids, line %d, character %d: unknown operator type %d\n",
5148                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5149                 exit(1);
5150         }
5151 }
5152
5153
5154 //              Gather the set of columns accessed in this se.
5155
5156 void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl){
5157         vector<scalarexp_t *> op_list;
5158         int o;
5159
5160         switch(pr->get_operator_type()){
5161         case PRED_IN:
5162                 gather_se_col_ids(pr->get_left_se(), cid_set,gtbl);
5163                 return;
5164         case PRED_COMPARE:
5165                 gather_se_col_ids(pr->get_left_se(),cid_set,gtbl) ;
5166                 gather_se_col_ids(pr->get_right_se(),cid_set,gtbl) ;
5167                 return;
5168         case PRED_UNARY_OP:
5169                 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5170                 return;
5171         case PRED_BINARY_OP:
5172                 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5173                 gather_pr_col_ids(pr->get_right_pr(),cid_set,gtbl) ;
5174                 return;
5175         case PRED_FUNC:
5176                 op_list = pr->get_op_list();
5177                 for(o=0;o<op_list.size();++o){
5178                         gather_se_col_ids(op_list[o],cid_set,gtbl) ;
5179                 }
5180                 return;
5181         default:
5182                 fprintf(stderr,"INTERNAL ERROR in gather_pr_col_ids, line %d, character %d, unknown predicate operator type %d\n",
5183                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5184         }
5185 }
5186
5187
5188
5189
5190 //              Gather the set of special operator or comparison functions referenced by this se.
5191
5192 void gather_se_opcmp_fcns(scalarexp_t *se, set<string> &fcn_set){
5193         col_id ci;
5194         data_type *ldt, *rdt;
5195         int o;
5196         vector<scalarexp_t *> operands;
5197
5198         switch(se->get_operator_type()){
5199         case SE_LITERAL:
5200                 if( se->get_literal()->constructor_name() != "")
5201                         fcn_set.insert( se->get_literal()->constructor_name() );
5202                 return;
5203         case SE_PARAM:
5204                 return;
5205 //                      SE_IFACE_PARAM should not exist when this is called.
5206         case SE_UNARY_OP:
5207                 ldt = se->get_left_se()->get_data_type();
5208                 if(ldt->complex_operator(se->get_op()) ){
5209                         fcn_set.insert( ldt->get_complex_operator(se->get_op()) );
5210                 }
5211                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5212                 return;
5213         case SE_BINARY_OP:
5214                 ldt = se->get_left_se()->get_data_type();
5215                 rdt = se->get_right_se()->get_data_type();
5216
5217                 if(ldt->complex_operator(rdt, se->get_op()) ){
5218                         fcn_set.insert( ldt->get_complex_operator(rdt, se->get_op()) );
5219                 }
5220                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5221                 gather_se_opcmp_fcns(se->get_right_se(),fcn_set);
5222                 return;
5223         case SE_COLREF:
5224                 return;
5225         case SE_AGGR_STAR:
5226                 return;
5227         case SE_AGGR_SE:
5228                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5229                 return;
5230         case SE_FUNC:
5231                 operands = se->get_operands();
5232                 for(o=0;o<operands.size();o++){
5233                         gather_se_opcmp_fcns(operands[o], fcn_set);
5234                 }
5235                 return;
5236         default:
5237                 fprintf(stderr,"INTERNAL ERROR in gather_se_opcmp_fcns, line %d, character %d: unknown operator type %d\n",
5238                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5239                 exit(1);
5240         }
5241 }
5242
5243
5244 //              Gather the set of special operator or comparison functions referenced by this se.
5245
5246 void gather_pr_opcmp_fcns(predicate_t *pr, set<string> &fcn_set){
5247         data_type *ldt, *rdt;
5248         vector<scalarexp_t *> operands;
5249         int o;
5250
5251         switch(pr->get_operator_type()){
5252         case PRED_IN:
5253                 ldt = pr->get_left_se()->get_data_type();
5254                 if(ldt->complex_comparison(ldt) ){
5255                         fcn_set.insert( ldt->get_comparison_fcn(ldt) );
5256                 }
5257                 gather_se_opcmp_fcns(pr->get_left_se(), fcn_set);
5258                 return;
5259         case PRED_COMPARE:
5260                 ldt = pr->get_left_se()->get_data_type();
5261                 rdt = pr->get_right_se()->get_data_type();
5262                 if(ldt->complex_comparison(rdt) ){
5263                         fcn_set.insert( ldt->get_comparison_fcn(rdt) );
5264                 }
5265                 gather_se_opcmp_fcns(pr->get_left_se(),fcn_set) ;
5266                 gather_se_opcmp_fcns(pr->get_right_se(),fcn_set) ;
5267                 return;
5268         case PRED_UNARY_OP:
5269                 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5270                 return;
5271         case PRED_BINARY_OP:
5272                 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5273                 gather_pr_opcmp_fcns(pr->get_right_pr(),fcn_set) ;
5274                 return;
5275         case PRED_FUNC:
5276                 operands = pr->get_op_list();
5277                 for(o=0;o<operands.size();o++){
5278                         gather_se_opcmp_fcns(operands[o], fcn_set);
5279                 }
5280                 return;
5281         default:
5282                 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
5283                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5284         }
5285 }
5286
5287
5288
5289
5290 //              find the temporal variable divisor if any.
5291 //              Only forms allowed : temporal_colref, temporal_colref/const
5292 //              temporal_colref/const + const
5293
5294
5295 long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt,string &fnm){
5296         long long int retval = 0;
5297         data_type *ldt, *rdt;
5298         int o;
5299         vector<scalarexp_t *> operands;
5300         scalarexp_t *t_se, *c_se;
5301         string the_op;
5302
5303         switch(se->get_operator_type()){
5304         case SE_LITERAL:
5305                 return(-1);
5306         case SE_PARAM:
5307                 return(-1);
5308 //                      SE_IFACE_PARAM should not exist when this is called.
5309         case SE_UNARY_OP:
5310                 return(-1);
5311         case SE_BINARY_OP:
5312                 ldt = se->get_left_se()->get_data_type();
5313                 if(ldt->is_temporal()){
5314                         t_se = se->get_left_se();
5315                         c_se = se->get_right_se();
5316                 }else{
5317                         t_se = se->get_left_se();
5318                         c_se = se->get_right_se();
5319                 }
5320                 if((! t_se->get_data_type()->is_temporal()) ||  c_se->get_data_type()->is_temporal())
5321                         return -1;
5322
5323                 the_op = se->get_op();
5324                 if(the_op == "+" || the_op == "-")
5325                         return find_temporal_divisor(t_se, gbt,fnm);
5326                 if(the_op == "/"){
5327                         if(t_se->get_operator_type() == SE_COLREF && c_se->get_operator_type() == SE_LITERAL){
5328                                 fnm = t_se->get_colref()->get_field();
5329                                 string lits = c_se->get_literal()->to_string();
5330                                 sscanf(lits.c_str(),"%qd",&retval);
5331                                 return retval;
5332                         }
5333                 }
5334
5335                 return -1;
5336         case SE_COLREF:
5337                 if(se->is_gb()){
5338                         return find_temporal_divisor(gbt->get_def(se->get_gb_ref()), gbt,fnm);
5339                 }
5340                 if(se->get_data_type()->is_temporal()){
5341                         fnm = se->get_colref()->get_field();
5342                         return 1;
5343                 }
5344                 return 0;
5345         case SE_AGGR_STAR:
5346                 return -1;
5347         case SE_AGGR_SE:
5348                 return -1;
5349         case SE_FUNC:
5350                 return -1;
5351         default:
5352                 fprintf(stderr,"INTERNAL ERROR in find_temporal_divisor, line %d, character %d: unknown operator type %d\n",
5353                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
5354                 exit(1);
5355         }
5356 }
5357
5358
5359 //                      impute_colnames:
5360 //                      Create meaningful but unique names for the columns.
5361 string impute_colname(vector<select_element *> &sel_list, scalarexp_t *se){
5362         set<string> curr_names;
5363         int s;
5364         for(s=0;s<sel_list.size();++s){
5365                 curr_names.insert(sel_list[s]->name);
5366         }
5367         return impute_colname(curr_names, se);
5368 }
5369
5370 string impute_colname(set<string> &curr_names, scalarexp_t *se){
5371 string ret;
5372 scalarexp_t *seo;
5373 vector<scalarexp_t *> operand_list;
5374 string opstr;
5375
5376         switch(se->get_operator_type()){
5377         case SE_LITERAL:
5378                 ret = "Literal";
5379                 break;
5380     case SE_PARAM:
5381                 ret = "Param_" + se->get_param_name();
5382                 break;
5383     case SE_IFACE_PARAM:
5384                 ret = "Iparam_" + se->get_ifpref()->get_pname();
5385                 break;
5386     case SE_COLREF:
5387                 ret =  se->get_colref()->get_field() ;
5388                 break;
5389     case SE_UNARY_OP:
5390     case SE_BINARY_OP:
5391                 ret = "Field";
5392                 break;
5393     case SE_AGGR_STAR:
5394                 ret = "Cnt";
5395                 break;
5396     case SE_AGGR_SE:
5397                 ret = se->get_op();
5398                 seo = se->get_left_se();
5399                 switch(se->get_left_se()->get_operator_type()){
5400                 case SE_PARAM:
5401                         ret += "_PARAM_"+seo->get_param_name();
5402                         break;
5403                 case SE_IFACE_PARAM:
5404                         ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5405                         break;
5406                 case SE_COLREF:
5407                         opstr =  seo->get_colref()->get_field();
5408                         if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5409                                 ret += "_" + opstr;
5410                         }else{
5411                                 ret = opstr;
5412                         }
5413                         break;
5414                 case SE_AGGR_STAR:
5415                 case SE_AGGR_SE:
5416                         opstr = seo->get_op();
5417                         if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5418                                 ret += "_" + seo->get_op();
5419                         }else{
5420                                 ret = opstr;
5421                         }
5422                         break;
5423                 case SE_FUNC:
5424                         opstr = seo->get_op();
5425                         ret += "_" + seo->get_op();
5426                         break;
5427         case SE_UNARY_OP:
5428         case SE_BINARY_OP:
5429                         ret += "_SE";
5430                         break;
5431                 default:
5432                         ret += "_";
5433                         break;
5434                 }
5435                 break;
5436         case SE_FUNC:
5437                 ret = se->get_op();
5438                 operand_list = se->get_operands();
5439                 if(operand_list.size() > 0){
5440                         seo = operand_list[0];
5441                         switch(seo->get_operator_type()){
5442                         case SE_PARAM:
5443                                 ret += "_PARAM_"+seo->get_param_name();
5444                                 break;
5445                         case SE_IFACE_PARAM:
5446                                 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5447                                 break;
5448                         case SE_COLREF:
5449                                 ret += "_" + seo->get_colref()->get_field();
5450                                 break;
5451                         case SE_AGGR_STAR:
5452                         case SE_AGGR_SE:
5453                         case SE_FUNC:
5454                                 ret += "_" + seo->get_op();
5455                                 break;
5456                 case SE_UNARY_OP:
5457                 case SE_BINARY_OP:
5458                                 ret += "_SE";
5459                         break;
5460                         default:
5461                                 ret += "_";
5462                                 break;
5463                         }
5464                 }else{
5465                         ret += "_func";
5466                 }
5467                 break;
5468         }
5469
5470         if(ret == "Field"){
5471                 if(curr_names.count("Field0") == 0)
5472                         ret = "Field0";
5473         }
5474         int iter = 1;
5475         string base = ret;
5476         while(curr_names.count(ret) > 0){
5477                 char tmpstr[500];
5478                 sprintf(tmpstr,"%s%d",base.c_str(),iter);
5479                 ret = tmpstr;
5480                 iter++;
5481         }
5482
5483
5484         curr_names.insert(ret);
5485         return(ret);
5486
5487 }
5488
5489
5490
5491 //////////////////////////////////////////////////////////////////////
5492 //////////////          Methods of defined classes ///////////////////////
5493 //////////////////////////////////////////////////////////////////////
5494
5495 //              helper fcn to enable col_id as map key.
5496
5497   bool operator<(const col_id &cr1, const col_id &cr2){
5498         if(cr1.tblvar_ref < cr2.tblvar_ref) return(true);
5499         if(cr1.tblvar_ref == cr2.tblvar_ref)
5500            return (cr1.field < cr2.field);
5501         return(false);
5502   }
5503
5504
5505 //              Process the GB variables.
5506 //              At parse time, GB vars are either GB_COLREF,
5507 //              or GB_COMPUTED if the AS keyword is used.
5508 //              Cast GB vars as named entities with a SE as
5509 //              their definition (the colref in the case of GB_COLREF).
5510 //
5511 //              TODO: if there is a gbref in a gbdef,
5512 //              then I won't be able to compute the value without
5513 //              a complex dependence analysis.  So verify that there is no
5514 //              gbref in any of the GBdefs.
5515 //              BUT: a GBVAR_COLREF should be converted to a regular colref,
5516 //              which is not yet done.
5517 //
5518 //              TODO : sort out issue of GBVAR naming and identification.
5519 //              Determine where it is advantageous to convert GV_COLREF
5520 //              GBVARS to colrefs -- e.g. in group definition, in the WHERE clause,
5521 //              etc.
5522 //
5523 //              return -1 if there is a problem.
5524
5525 int gb_table::add_gb_attr(
5526                                                   gb_t *gb,
5527                                                   tablevar_list_t *fm,
5528                                                   table_list *schema,
5529                                                   table_exp_t *fta_tree,
5530                                                   ext_fcn_list *Ext_fcns
5531                                                   ){
5532         colref_t *cr;
5533         int retval;
5534         gb_table_entry *entry;
5535
5536         if(gb->type == GB_COLREF){
5537                 if(gb->table != "")
5538                         cr = new colref_t(
5539                                 gb->interface.c_str(),gb->table.c_str(), gb->name.c_str()
5540                         );
5541                 else
5542                         cr = new colref_t(gb->name.c_str());
5543
5544                 int tablevar_ref = infer_tablevar_from_colref(cr, fm, schema);
5545                 if(tablevar_ref < 0) return(tablevar_ref);
5546
5547                 cr->set_tablevar_ref(tablevar_ref);
5548                 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
5549                 cr->set_interface("");
5550                 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
5551
5552                 entry = new gb_table_entry();
5553                 entry->name.field = cr->get_field();
5554                 entry->name.tblvar_ref = tablevar_ref;
5555                 entry->definition = new scalarexp_t(cr);
5556                 entry->ref_type = GBVAR_COLREF;
5557         }else{
5558                 entry = new gb_table_entry();
5559                 entry->name.field = gb->name;
5560                 entry->name.tblvar_ref = -1;
5561                 entry->definition = gb->def;
5562                 entry->ref_type = GBVAR_SE;
5563         }
5564
5565         retval = verify_colref(entry->definition, fm, schema, NULL);
5566         if(retval < 0) return(retval);
5567
5568         retval = assign_data_types(entry->definition, schema, fta_tree, Ext_fcns);
5569         if(retval < 0) return(retval);
5570
5571 //              Verify that the gbvar def references no aggregates and no gbvars.
5572         if(count_gb_se(entry->definition) > 0){
5573                 fprintf(stderr,"ERROR, group-by variable %s references other group-by variables in its definition.\n",entry->name.field.c_str() );
5574                 return(-1);
5575         }
5576         if(count_aggr_se(entry->definition, true) > 0){
5577                 fprintf(stderr,"ERROR, group-by variable %s references aggregates in its definition.\n",entry->name.field.c_str() );
5578                 return(-1);
5579         }
5580
5581 //                      Check for duplicates
5582         int i;
5583         for(i=0;i<gtbl.size();++i){
5584                 if(entry->name.field == gtbl[i]->name.field){
5585                         fprintf(stderr,"ERROR, duplicate group-by variable name %s, positions %d and %lu.\n",entry->name.field.c_str(),i,gtbl.size());
5586                         return -1;
5587                 }
5588         }
5589
5590
5591         gtbl.push_back(entry);
5592
5593         return(1);
5594 }
5595
5596
5597 //                      Try to determine if the colref is actually
5598 //                      a gbvar ref.
5599 //                      a) if no tablename associated with the colref,
5600 //                              1) try to find a matching GB_COMPUTED gbvar.
5601 //                              2) failing that, try to match to a single tablevar
5602 //                              3) if successful, search among GB_COLREF
5603 //                      b) else, try to match the tablename to a single tablevar
5604 //                              if successful, search among GB_COLREF
5605 int gb_table::find_gb(colref_t *cr, tablevar_list_t *fm, table_list *schema){
5606         string c_field = cr->get_field();
5607         int c_tblref;
5608         int n_tbl;
5609         int i;
5610         vector<int> candidates;
5611
5612         if(cr->uses_default_table()){
5613                 for(i=0;i<gtbl.size();i++){
5614                         if(gtbl[i]->ref_type==GBVAR_SE && c_field == gtbl[i]->name.field){
5615                                 return(i);
5616                         }
5617                 }
5618                 candidates = find_source_tables(c_field, fm, schema);
5619                 if(candidates.size() != 1) return(-1); // can't find unique tablevar
5620                 for(i=0;i<gtbl.size();i++){
5621                         if(gtbl[i]->ref_type==GBVAR_COLREF &&
5622                                   c_field == gtbl[i]->name.field &&
5623                                   candidates[0] == gtbl[i]->name.tblvar_ref){
5624                                 return(i);
5625                         }
5626                 }
5627                 return(-1); // colref is not in gb table.
5628         }
5629
5630 //                      A table name must have been given.
5631         vector<tablevar_t *> fm_tbls = fm->get_table_list();
5632         string interface = cr->get_interface();
5633         string table_name = cr->get_table_name();
5634
5635
5636 //                      if no interface name is given, try to search for the table
5637 //                      name among the tablevar names first.
5638         if(interface==""){
5639                 for(i=0;i<fm_tbls.size();++i){
5640                         if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5641                                 candidates.push_back(i);
5642                 }
5643                 if(candidates.size()>1) return(-1);
5644                 if(candidates.size()==1){
5645                         for(i=0;i<gtbl.size();i++){
5646                                 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5647                                         c_field == gtbl[i]->name.field &&
5648                                         candidates[0] == gtbl[i]->name.tblvar_ref){
5649                                         return(i);
5650                                 }
5651                         }
5652                         return(-1);  // match semantics of bind to tablevar name first
5653                 }
5654         }
5655
5656 //              Interface name given, or no interface but no
5657 //              no tablevar match.  Try to match on schema name.
5658         for(i=0;i<fm_tbls.size();++i){
5659                 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5660                         candidates.push_back(i);
5661         }
5662         if(candidates.size() != 1) return(-1);
5663         for(i=0;i<gtbl.size();i++){
5664                 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5665                         c_field == gtbl[i]->name.field &&
5666                         candidates[0] == gtbl[i]->name.tblvar_ref){
5667                         return(i);
5668                 }
5669         }
5670
5671 //              No match found.
5672         return(-1);
5673
5674 }
5675
5676
5677
5678 bool aggr_table_entry::fta_legal(ext_fcn_list *Ext_fcns){
5679         if(is_builtin()){
5680                 if( (op == "COUNT") || (op == "SUM") || (op == "MIN") ||
5681                         (op == "MAX") || (op == "AND_AGGR") || (op == "OR_AGGR") ||
5682                         (op == "XOR_AGGR") )
5683                                 return(true);
5684         }else{
5685                 return Ext_fcns->fta_legal(fcn_id);
5686         }
5687         return(false);
5688 }
5689
5690
5691 //              Return the set of subaggregates required to compute
5692 //              the desired aggregate.  THe operand of the subaggregates
5693 //              can only be * or the scalarexp used in the superaggr.
5694 //              This is indicated by the use_se vector.
5695
5696 //              Is this code generation specific?
5697
5698 vector<string> aggr_table_entry::get_subaggr_fcns(vector<bool> &use_se){
5699         vector<string> ret;
5700
5701         if(op == "COUNT"){
5702                 ret.push_back("COUNT");
5703                 use_se.push_back(false);
5704         }
5705         if(op == "SUM"){
5706                 ret.push_back("SUM");
5707                 use_se.push_back(true);
5708         }
5709         if(op == "AVG"){
5710                 ret.push_back("SUM");
5711                 ret.push_back("COUNT");
5712                 use_se.push_back(true);
5713                 use_se.push_back(false);
5714         }
5715         if(op == "MIN"){
5716                 ret.push_back("MIN");
5717                 use_se.push_back(true);
5718         }
5719         if(op == "MAX"){
5720                 ret.push_back("MAX");
5721                 use_se.push_back(true);
5722         }
5723         if(op == "AND_AGGR"){
5724                 ret.push_back("AND_AGGR");
5725                 use_se.push_back(true);
5726         }
5727         if(op == "OR_AGGR"){
5728                 ret.push_back("OR_AGGR");
5729                 use_se.push_back(true);
5730         }
5731         if(op == "XOR_AGGR"){
5732                 ret.push_back("XOR_AGGR");
5733                 use_se.push_back(true);
5734         }
5735
5736         return(ret);
5737 }
5738
5739 //                      Code generation specific?
5740
5741 vector<data_type *> aggr_table_entry::get_subaggr_dt(){
5742         vector<data_type *> ret;
5743         data_type *dt;
5744
5745         if(op == "COUNT"){
5746                 dt = new data_type("Int"); // was Uint
5747                 ret.push_back( dt );
5748         }
5749         if(op == "SUM"){
5750                 dt = new data_type();
5751                 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5752                 ret.push_back(dt);
5753         }
5754         if(op == "AVG"){
5755                 dt = new data_type();
5756                 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5757                 ret.push_back( dt );
5758                 dt = new data_type("Int");
5759                 ret.push_back( dt );
5760         }
5761         if(op == "MIN"){
5762                 dt = new data_type();
5763                 dt->set_aggr_data_type( "MIN",operand->get_data_type() );
5764                 ret.push_back( dt );
5765         }
5766         if(op == "MAX"){
5767                 dt = new data_type();
5768                 dt->set_aggr_data_type( "MAX",operand->get_data_type() );
5769                 ret.push_back( dt );
5770         }
5771         if(op == "AND_AGGR"){
5772                 dt = new data_type();
5773                 dt->set_aggr_data_type( "AND_AGGR",operand->get_data_type() );
5774                 ret.push_back( dt );
5775         }
5776         if(op == "OR_AGGR"){
5777                 dt = new data_type();
5778                 dt->set_aggr_data_type( "OR_AGGR",operand->get_data_type() );
5779                 ret.push_back( dt );
5780         }
5781         if(op == "XOR_AGGR"){
5782                 dt = new data_type();
5783                 dt->set_aggr_data_type( "XOR_AGGR",operand->get_data_type() );
5784                 ret.push_back( dt );
5785         }
5786
5787         return(ret);
5788 }
5789
5790 //              Code generation specific?
5791
5792 scalarexp_t *aggr_table_entry::make_superaggr_se(vector<scalarexp_t *> se_refs){
5793         scalarexp_t *se_l, *se_r, *ret_se = NULL;
5794
5795         if(op == "COUNT"){
5796                 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5797                 return(ret_se);
5798         }
5799         if(op == "SUM"){
5800                 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5801                 return(ret_se);
5802         }
5803         if(op == "AVG"){
5804                 se_l = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5805                 se_r = scalarexp_t::make_se_aggr("SUM", se_refs[1]);
5806
5807                 ret_se = new scalarexp_t("/", se_l, se_r);
5808                 return(ret_se);
5809         }
5810         if(op == "MIN"){
5811                 ret_se = scalarexp_t::make_se_aggr("MIN", se_refs[0]);
5812                 return(ret_se);
5813         }
5814         if(op == "MAX"){
5815                 ret_se = scalarexp_t::make_se_aggr("MAX", se_refs[0]);
5816                 return(ret_se);
5817         }
5818         if(op == "AND_AGGR"){
5819                 ret_se = scalarexp_t::make_se_aggr("AND_AGGR", se_refs[0]);
5820                 return(ret_se);
5821         }
5822         if(op == "OR_AGGR"){
5823                 ret_se = scalarexp_t::make_se_aggr("OR_AGGR", se_refs[0]);
5824                 return(ret_se);
5825         }
5826         if(op == "XOR_AGGR"){
5827                 ret_se = scalarexp_t::make_se_aggr("XOR_AGGR", se_refs[0]);
5828                 return(ret_se);
5829         }
5830
5831         return(ret_se);
5832
5833 }
5834
5835
5836 //              Add a built-in aggr.
5837 int aggregate_table::add_aggr(string op, scalarexp_t *se, bool is_super){
5838         int i;
5839
5840         for(i=0;i<agr_tbl.size();i++){
5841                 if(agr_tbl[i]->is_builtin() && op == agr_tbl[i]->op
5842                   && is_equivalent_se(se,agr_tbl[i]->operand) ){
5843 //                && is_super == agr_tbl[i]->is_superaggr())
5844                         if(is_super) agr_tbl[i]->set_super(true);
5845                         return(i);
5846                 }
5847         }
5848
5849         aggr_table_entry *ate = new aggr_table_entry(op, se, is_super);
5850         agr_tbl.push_back(ate);
5851         return(agr_tbl.size() - 1);
5852 }
5853
5854 //              add a UDAF
5855 int aggregate_table::add_aggr(string op, int fcn_id, vector<scalarexp_t *> opl, data_type *sdt, bool is_super, bool is_running, bool has_lfta_bailout){
5856         int i,o;
5857
5858         for(i=0;i<agr_tbl.size();i++){
5859                 if((! agr_tbl[i]->is_builtin()) && fcn_id == agr_tbl[i]->fcn_id
5860                                 && opl.size() == agr_tbl[i]->oplist.size() ){
5861 //                              && is_super == agr_tbl[i]->is_superaggr() ){
5862                         for(o=0;o<opl.size();++o){
5863                                 if(! is_equivalent_se(opl[o],agr_tbl[i]->oplist[o]) )
5864                                         break;
5865                         }
5866                         if(o == opl.size()){
5867                                 if(is_super) agr_tbl[i]->set_super(true);
5868                                 return i;
5869                         }
5870                 }
5871         }
5872
5873         aggr_table_entry *ate = new aggr_table_entry(op, fcn_id, opl, sdt,is_super,is_running, has_lfta_bailout);
5874         agr_tbl.push_back(ate);
5875         return(agr_tbl.size() - 1);
5876 }
5877
5878
5879 int cplx_lit_table::add_cpx_lit(literal_t *l, bool is_handle_ref){
5880         int i;
5881
5882         for(i=0;i<cplx_lit_tbl.size();i++){
5883                 if(l->is_equivalent(cplx_lit_tbl[i])){
5884                         hdl_ref_tbl[i] = hdl_ref_tbl[i] | is_handle_ref;
5885                         return(i);
5886                 }
5887         }
5888
5889         cplx_lit_tbl.push_back(l);
5890         hdl_ref_tbl.push_back(is_handle_ref);
5891         return(cplx_lit_tbl.size() - 1);
5892 }
5893
5894
5895
5896 //------------------------------------------------------------
5897 //              parse_fta code
5898
5899
5900 gb_t *gb_t::duplicate(){
5901         gb_t *ret = new gb_t(interface.c_str(), table.c_str(), name.c_str());
5902         ret->type = type;
5903         ret->lineno = lineno;
5904         ret->charno = charno;
5905         if(def != NULL)
5906                 ret->def = dup_se(def,NULL);
5907         return ret;
5908 }