Added quantiling UDAFs
[com/gs-lite.git] / src / ftacmp / analyze_fta.cc
1 /* ------------------------------------------------\r
2 Copyright 2014 AT&T Intellectual Property\r
3    Licensed under the Apache License, Version 2.0 (the "License");\r
4    you may not use this file except in compliance with the License.\r
5    You may obtain a copy of the License at\r
6 \r
7      http://www.apache.org/licenses/LICENSE-2.0\r
8 \r
9    Unless required by applicable law or agreed to in writing, software\r
10    distributed under the License is distributed on an "AS IS" BASIS,\r
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
12    See the License for the specific language governing permissions and\r
13    limitations under the License.\r
14  ------------------------------------------- */\r
15 \r
16 #include<unistd.h>\r
17 \r
18 #include "parse_fta.h"\r
19 #include "parse_schema.h"\r
20 #include "parse_ext_fcns.h"\r
21 \r
22 \r
23 #include"analyze_fta.h"\r
24 \r
25 #include"type_objects.h"\r
26 \r
27 #include <string>\r
28 #include<list>\r
29 \r
30 using namespace std;\r
31 \r
32 extern string hostname;                 // name of the current host\r
33 \r
34 //                      Utility function\r
35 \r
36 string int_to_string(int i){\r
37     string ret;\r
38     char tmpstr[100];\r
39     sprintf(tmpstr,"%d",i);\r
40     ret=tmpstr;\r
41     return(ret);\r
42 }\r
43 \r
44 \r
45 //                              Globals\r
46 \r
47 //                      These represent derived information from the\r
48 //                      query analysis stage.  I extract them from a class,\r
49 //                      perhaps this is dangerous.\r
50 \r
51 static gb_table *gb_tbl=NULL;                   // Table of all group-by attributes.\r
52 static aggregate_table *aggr_tbl=NULL;  // Table of all referenced aggregates.\r
53 \r
54 // static cplx_lit_table *complex_literals=NULL;        // Table of literals with constructors.\r
55 static param_table *param_tbl=NULL;             // Table of all referenced parameters.\r
56 \r
57 vector<scalarexp_t *> partial_fcns_list;\r
58 int wh_partial_start, wh_partial_end;\r
59 int gb_partial_start, gb_partial_end;\r
60 int aggr_partial_start, aggr_partial_end;\r
61 int sl_partial_start, sl_partial_end;\r
62 \r
63 \r
64 //                      Infer the table of a column refrence and return the table ref.\r
65 //                      First, extract the\r
66 //                      field name and table name.  If no table name is used,\r
67 //                      search all tables to try to find a unique match.\r
68 //                      Of course, plenty of error checking.\r
69 \r
70 //              Return the set of tablevar indices in the FROM clause\r
71 //              which contain a field with the same name.\r
72 vector<int> find_source_tables(string field, tablevar_list_t *fm, table_list *Schema){\r
73         int i;\r
74         vector<int> tv;\r
75 //      vector<string> tn = fm->get_schema_names();\r
76         vector<int> tn = fm->get_schema_refs();\r
77 // printf("Calling find_source_tables on field %s\n",field.c_str());\r
78         for(i=0;i<tn.size();i++){\r
79 //              if(Schema->contains_field(Schema->find_tbl(tn[i]), field) ){\r
80                 if(Schema->contains_field(tn[i], field) ){\r
81                         tv.push_back(i);\r
82 // printf("\tfound in table %s\n",tn[i].c_str());\r
83                 }\r
84         }\r
85         return(tv);\r
86 }\r
87 \r
88 int infer_tablevar_from_ifpref(ifpref_t *ir, tablevar_list_t *fm){\r
89         int i;\r
90         string tname = ir->get_tablevar();\r
91         if(tname ==""){\r
92                 if(fm->size()==1) return 0;\r
93                 fprintf(stderr,"ERROR, interface parameter %s has no tablevar specified and there is more than one table variable in the FROM clause.\n",ir->to_string().c_str());\r
94                 return -1;\r
95         }\r
96         for(i=0;i<fm->size();++i){\r
97                 if(tname == fm->get_tablevar_name(i))\r
98                         return i;\r
99         }\r
100         fprintf(stderr,"ERROR, interface parameter %s has no matching table variable in the FROM clause.\n",ir->to_string().c_str());\r
101         return -1;\r
102 }\r
103 \r
104 \r
105 //              compute the index of the tablevar in the from clause that the\r
106 //              colref is in.\r
107 //              return -1 if no tablevar can be imputed.\r
108 int infer_tablevar_from_colref(colref_t *cr, tablevar_list_t *fm, table_list *schema){\r
109         int i;\r
110         string table_name;\r
111         int table_ref;\r
112         vector<int> tv;\r
113         vector<tablevar_t *> fm_tbls = fm->get_table_list();\r
114 \r
115         string field = cr->get_field();\r
116 \r
117 // printf("Calling infer_tablevar_from_colref on field %s.\n",field.c_str());\r
118         if(cr->uses_default_table() ){\r
119                 tv = find_source_tables(field, fm, schema);\r
120                 if(tv.size() > 1){\r
121                         fprintf(stderr,"ERROR, line %d, character %d : field %s exists in multiple table variables: ",\r
122                                 cr->get_lineno(), cr->get_charno(),field.c_str() );\r
123                         for(i=0;i<tv.size();i++){\r
124                                 fprintf(stderr,"%s ",fm_tbls[ tv[i] ]->to_string().c_str() );\r
125                         }\r
126                         fprintf(stderr,"\n\tYou must specify one of these.\n");\r
127                         return(-1);\r
128                 }\r
129                 if(tv.size() == 0){\r
130                         fprintf(stderr,"ERROR, line %d, character %d: field %s does not exist in any table.\n",\r
131                                 cr->get_lineno(), cr->get_charno(),field.c_str() );\r
132                         return(-1);\r
133                 }\r
134 \r
135                 return(tv[0]);\r
136         }\r
137 \r
138 //                      The table source is named -- but is it a schema name\r
139 //                      or a var name?\r
140 \r
141         string interface = cr->get_interface();\r
142         table_name = cr->get_table_name();\r
143 \r
144 //              if interface is not specified, prefer to look at the tablevar names\r
145 //              Check for duplicates.\r
146         if(interface==""){\r
147                 for(i=0;i<fm_tbls.size();++i){\r
148                         if(table_name == fm_tbls[i]->get_var_name())\r
149                                 tv.push_back(i);\r
150                 }\r
151                 if(tv.size() > 1){\r
152                         fprintf(stderr,"ERROR, there are two or more table variables for column ref %s.%s (line %d, char %d).\n",table_name.c_str(), field.c_str(), cr->get_lineno(), cr->get_charno() );\r
153                         return(-1);\r
154                 }\r
155                 if(tv.size() == 1) return(tv[0]);\r
156         }\r
157 \r
158 //              Tableref not found by looking at tableref vars, or an interface\r
159 //              was specified.  Try to match on schema and interface.\r
160 //              Check for duplicates.\r
161         for(i=0;i<fm_tbls.size();++i){\r
162                 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())\r
163                         tv.push_back(i);\r
164         }\r
165         if(tv.size() > 1){\r
166                 fprintf(stderr,"ERROR, (line %d, char %d) there are two or more table variables whose schemas match for column ref \n",\r
167                         cr->get_lineno(), cr->get_charno() );\r
168                 if(interface != "") fprintf(stderr,"%s.",interface.c_str());\r
169                 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());\r
170                 return(-1);\r
171         }\r
172 \r
173         if(tv.size() == 0 ){\r
174                 fprintf(stderr,"ERROR, line %d, character %d : no table reference found for column ref ", cr->get_lineno(), cr->get_charno());\r
175                 if(interface != "") fprintf(stderr,"%s.",interface.c_str());\r
176                 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());\r
177                 return(-1)      ;\r
178         }\r
179 \r
180         return(tv[0]);\r
181 }\r
182 \r
183 \r
184 //                      Reset temporal properties of a scalar expression\r
185 void reset_temporal(scalarexp_t *se){\r
186         col_id ci;\r
187         vector<scalarexp_t *> operands;\r
188         int o;\r
189 \r
190         se->get_data_type()->reset_temporal();\r
191 \r
192         switch(se->get_operator_type()){\r
193         case SE_LITERAL:\r
194         case SE_PARAM:\r
195         case SE_IFACE_PARAM:\r
196         case SE_COLREF:\r
197                 return;\r
198         case SE_UNARY_OP:\r
199                 reset_temporal(se->get_left_se());\r
200                 return;\r
201         case SE_BINARY_OP:\r
202                 reset_temporal(se->get_left_se());\r
203                 reset_temporal(se->get_right_se());\r
204                 return;\r
205         case SE_AGGR_STAR:\r
206                 return;\r
207         case SE_AGGR_SE:\r
208                 reset_temporal(se->get_left_se());\r
209                 return;\r
210         case SE_FUNC:\r
211                 operands = se->get_operands();\r
212                 for(o=0;o<operands.size();o++){\r
213                         reset_temporal(operands[o]);\r
214                 }\r
215                 return;\r
216         default:\r
217                 fprintf(stderr,"INTERNAL ERROR in reset_temporal, line %d, character %d: unknown operator type %d\n",\r
218                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
219                 exit(1);\r
220         }\r
221 }\r
222 \r
223 //              Verify that column references exist in their\r
224 //              declared tables.  As a side effect, assign\r
225 //              their data types.  Other side effects :\r
226 //\r
227 //              return -1 on error\r
228 \r
229 int verify_colref(scalarexp_t *se, tablevar_list_t *fm,\r
230                                         table_list *schema, gb_table *gtbl){\r
231         int l_ret, r_ret;\r
232         int gb_ref;\r
233         colref_t *cr;\r
234         ifpref_t *ir;\r
235         string field, table_source, type_name;\r
236         data_type *dt;\r
237         vector<string> tn;\r
238         vector<int> tv;\r
239         int table_var;\r
240         int o;\r
241         vector<scalarexp_t *> operands;\r
242 \r
243         switch(se->get_operator_type()){\r
244         case SE_LITERAL:\r
245         case SE_PARAM:\r
246                 return(1);\r
247         case SE_IFACE_PARAM:\r
248                 ir = se->get_ifpref();\r
249                 table_var = infer_tablevar_from_ifpref(ir, fm);\r
250                 if(table_var < 0) return(table_var);\r
251                 ir->set_tablevar_ref(table_var);\r
252                 return(1);\r
253         case SE_UNARY_OP:\r
254                 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );\r
255         case SE_BINARY_OP:\r
256                 l_ret = verify_colref(se->get_left_se(), fm, schema, gtbl);\r
257                 r_ret = verify_colref(se->get_right_se(), fm, schema, gtbl);\r
258                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);\r
259                 return(1);\r
260         case SE_COLREF:\r
261                 cr = se->get_colref();\r
262                 field = cr->get_field();\r
263 \r
264 //                              Determine if this is really a GB ref.\r
265 //                              (the parser can only see that its a colref).\r
266                 if(gtbl != NULL){\r
267                         gb_ref = gtbl->find_gb(cr, fm, schema);\r
268                 }else{\r
269                         gb_ref = -1;\r
270                 }\r
271 \r
272                 se->set_gb_ref(gb_ref);\r
273 \r
274                 if(gb_ref < 0){\r
275 //                              Its a colref, verify its existance and\r
276 //                              record the data type.\r
277                         table_var = infer_tablevar_from_colref(cr,fm,schema);\r
278                         if(table_var < 0) return(table_var);\r
279 \r
280         //                      Store the table ref in the colref.\r
281                         cr->set_tablevar_ref(table_var);\r
282                         cr->set_schema_ref(fm->get_schema_ref(table_var));\r
283                         cr->set_interface("");\r
284                         cr->set_table_name(fm->get_tablevar_name(table_var));\r
285 \r
286 \r
287                         type_name = schema->get_type_name(cr->get_schema_ref(), field);\r
288                         param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);\r
289                         dt = new data_type(type_name, modifiers);\r
290                         se->set_data_type(dt);\r
291                 }else{\r
292 //                              Else, its a gbref, use the GB var's data type.\r
293                         se->set_data_type(gtbl->get_data_type(gb_ref));\r
294                 }\r
295 \r
296                 return(1);\r
297         case SE_AGGR_STAR:\r
298                 return(1);\r
299         case SE_AGGR_SE:\r
300                 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );\r
301         case SE_FUNC:\r
302                 operands = se->get_operands();\r
303                 r_ret = 1;\r
304                 for(o=0;o<operands.size();o++){\r
305                         l_ret = verify_colref(operands[o], fm, schema, gtbl);\r
306                         if(l_ret < 0) r_ret = -1;\r
307                 }\r
308                 return(r_ret);\r
309         default:\r
310                 fprintf(stderr,"INTERNAL ERROR in verify_colref, line %d, character %d: unknown operator type %d\n",\r
311                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
312                 return(-1);\r
313         }\r
314         return(-1);\r
315 }\r
316 \r
317 \r
318 int verify_predicate_colref(predicate_t *pr, tablevar_list_t *fm, table_list *schema, gb_table *gtbl){\r
319         int l_ret, r_ret;\r
320         std::vector<scalarexp_t *> op_list;\r
321         int o;\r
322 \r
323         switch(pr->get_operator_type()){\r
324         case PRED_IN:\r
325                 return(verify_colref(pr->get_left_se(),fm,schema, gtbl) );\r
326         case PRED_COMPARE:\r
327                 l_ret = verify_colref(pr->get_left_se(),fm,schema, gtbl) ;\r
328                 r_ret = verify_colref(pr->get_right_se(),fm,schema, gtbl) ;\r
329                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);\r
330                 return(1);\r
331         case PRED_UNARY_OP:\r
332                 return(verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl));\r
333         case PRED_BINARY_OP:\r
334                 l_ret = verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl) ;\r
335                 r_ret = verify_predicate_colref(pr->get_right_pr(),fm,schema, gtbl) ;\r
336                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);\r
337                 return(1);\r
338         case PRED_FUNC:\r
339                 op_list = pr->get_op_list();\r
340                 l_ret = 0;\r
341                 for(o=0;o<op_list.size();++o){\r
342                         if(verify_colref(op_list[o],fm,schema,gtbl) < 0) l_ret = -1;\r
343                 }\r
344                 return(l_ret);\r
345         default:\r
346                 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",\r
347                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
348         }\r
349 \r
350         return(-1);\r
351 }\r
352 \r
353 \r
354 bool literal_only_se(scalarexp_t *se){          // really only literals.\r
355         int o;\r
356         vector<scalarexp_t *> operands;\r
357 \r
358         if(se == NULL) return(1);\r
359         switch(se->get_operator_type()){\r
360         case SE_LITERAL:\r
361                 return(true);\r
362         case SE_PARAM:\r
363                 return(false);\r
364         case SE_IFACE_PARAM:\r
365                 return(false);\r
366         case SE_UNARY_OP:\r
367                 return( literal_only_se(se->get_left_se()) );\r
368         case SE_BINARY_OP:\r
369                 return( literal_only_se(se->get_left_se()) &&\r
370                                 literal_only_se(se->get_right_se()) );\r
371         case SE_COLREF:\r
372                 return false;\r
373         case SE_AGGR_STAR:\r
374                 return false;\r
375         case SE_AGGR_SE:\r
376                 return false;\r
377                 return(1);\r
378         case SE_FUNC:\r
379                 return false;\r
380         default:\r
381                 return false;\r
382         }\r
383         return false;\r
384 }\r
385 \r
386 \r
387 \r
388 \r
389 //              Verify that column references exist in their\r
390 //              declared tables.  As a side effect, assign\r
391 //              their data types.  Other side effects :\r
392 //\r
393 \r
394 int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema){\r
395         int l_ret, r_ret;\r
396         int gb_ref;\r
397         colref_t *cr;\r
398         string field, table_source, type_name;\r
399         data_type *dt;\r
400         vector<string> tn;\r
401         vector<int> tv;\r
402         int tablevar_ref;\r
403         int o;\r
404         vector<scalarexp_t *> operands;\r
405 \r
406         if(se == NULL) return(1);\r
407 \r
408         switch(se->get_operator_type()){\r
409         case SE_LITERAL:\r
410                 return(1);\r
411         case SE_PARAM:\r
412                 return(1);\r
413         case SE_IFACE_PARAM:\r
414                 return(1);\r
415         case SE_UNARY_OP:\r
416                 return( bind_to_schema_se(se->get_left_se(), fm, schema) );\r
417         case SE_BINARY_OP:\r
418                 l_ret = bind_to_schema_se(se->get_left_se(), fm, schema);\r
419                 r_ret = bind_to_schema_se(se->get_right_se(), fm, schema);\r
420                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);\r
421                 return(1);\r
422         case SE_COLREF:\r
423                 if(se->is_gb()) return(1);      // gb ref not a colref.\r
424 \r
425                 cr = se->get_colref();\r
426                 field = cr->get_field();\r
427 \r
428                 tablevar_ref = infer_tablevar_from_colref(cr,fm,schema);\r
429                 if(tablevar_ref < 0){\r
430                         return(tablevar_ref);\r
431                 }else{\r
432         //                      Store the table ref in the colref.\r
433                         cr->set_tablevar_ref(tablevar_ref);\r
434                         cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));\r
435                         cr->set_interface("");\r
436                         cr->set_table_name(fm->get_tablevar_name(tablevar_ref));\r
437 \r
438 //                              Check the data type\r
439                         type_name = schema->get_type_name(cr->get_schema_ref(), field);\r
440                         param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);\r
441                         data_type dt(type_name, modifiers);\r
442 //                      if(! dt.equals(se->get_data_type()) ){\r
443 //                      if(! dt.subsumes_type(se->get_data_type()) ){\r
444                         if(! se->get_data_type()->subsumes_type(&dt) ){\r
445                                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se: se's type is %d, table's is %d, colref is %s.\n",\r
446                                         dt.type_indicator(), se->get_data_type()->type_indicator(), cr->to_string().c_str());\r
447                                 return(-1);\r
448                         }\r
449                 }\r
450                 return(1);\r
451         case SE_AGGR_STAR:\r
452                 return(1);\r
453         case SE_AGGR_SE:        // Probably I should just return,\r
454                                                 // aggregate se's are explicitly bound to the schema.\r
455 //                      return( bind_to_schema_se(se->get_left_se(), fm, schema, gtbl) );\r
456                 return(1);\r
457         case SE_FUNC:\r
458                 if(se->get_aggr_ref() >= 0) return 1;\r
459 \r
460                 operands = se->get_operands();\r
461                 r_ret = 1;\r
462                 for(o=0;o<operands.size();o++){\r
463                         l_ret = bind_to_schema_se(operands[o], fm, schema);\r
464                         if(l_ret < 0) r_ret = -1;\r
465                 }\r
466                 return(r_ret);\r
467         default:\r
468                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se, line %d, character %d: unknown operator type %d\n",\r
469                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
470                 return(-1);\r
471         }\r
472         return(-1);\r
473 }\r
474 \r
475 \r
476 int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema){\r
477         int l_ret, r_ret;\r
478         vector<scalarexp_t *> op_list;\r
479         int o;\r
480 \r
481         switch(pr->get_operator_type()){\r
482         case PRED_IN:\r
483                 return(bind_to_schema_se(pr->get_left_se(),fm,schema) );\r
484         case PRED_COMPARE:\r
485                 l_ret = bind_to_schema_se(pr->get_left_se(),fm,schema) ;\r
486                 r_ret = bind_to_schema_se(pr->get_right_se(),fm,schema) ;\r
487                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);\r
488                 return(1);\r
489         case PRED_UNARY_OP:\r
490                 return(bind_to_schema_pr(pr->get_left_pr(),fm,schema));\r
491         case PRED_BINARY_OP:\r
492                 l_ret = bind_to_schema_pr(pr->get_left_pr(),fm,schema) ;\r
493                 r_ret = bind_to_schema_pr(pr->get_right_pr(),fm,schema) ;\r
494                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);\r
495                 return(1);\r
496         case PRED_FUNC:\r
497                 op_list = pr->get_op_list();\r
498                 l_ret = 0;\r
499                 for(o=0;o<op_list.size();++o){\r
500                         if(bind_to_schema_se(op_list[o],fm,schema) < 0) l_ret = -1;\r
501                 }\r
502                 return(l_ret);\r
503         default:\r
504                 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_pr, line %d, character %d, unknown predicate operator type %d\n",\r
505                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
506         }\r
507 \r
508         return(-1);\r
509 }\r
510 \r
511 \r
512 \r
513 \r
514 \r
515 \r
516 //                      verify_colref assigned data types to the column refs.\r
517 //                      Now assign data types to all other nodes in the\r
518 //                      scalar expression.\r
519 //\r
520 //                      return -1 on error\r
521 \r
522 temporal_type compute_se_temporal(scalarexp_t *se, map<col_id, temporal_type> &tcol){\r
523         int l_ret, r_ret;\r
524         data_type *dt;\r
525         bool bret;\r
526         vector<scalarexp_t *> operands;\r
527         vector<data_type *> odt;\r
528         int o, fcn_id;\r
529         vector<bool> handle_ind;\r
530 \r
531         switch(se->get_operator_type()){\r
532         case SE_LITERAL:\r
533                 return(constant_t);\r
534         case SE_PARAM:\r
535                 return(varying_t);\r
536         case SE_IFACE_PARAM:\r
537                 return(varying_t);              // actually, this should not be called.\r
538         case SE_UNARY_OP:\r
539                 return data_type::compute_temporal_type(\r
540                         compute_se_temporal(se->get_left_se(), tcol), se->get_op()\r
541                 );\r
542         case SE_BINARY_OP:\r
543                 return data_type::compute_temporal_type(\r
544                         compute_se_temporal(se->get_left_se(), tcol),\r
545                         compute_se_temporal(se->get_right_se(), tcol),\r
546                         se->get_left_se()->get_data_type()->get_type(),\r
547                         se->get_right_se()->get_data_type()->get_type(),\r
548                         se->get_op()\r
549                 );\r
550         case SE_COLREF:\r
551                 {\r
552                         col_id cid(se->get_colref() );\r
553                         if(tcol.count(cid) > 0){ return tcol[cid];\r
554                         }else{ return varying_t;}\r
555                 }\r
556         case SE_AGGR_STAR:\r
557         case SE_AGGR_SE:\r
558         case SE_FUNC:\r
559         default:\r
560                 return varying_t;\r
561         }\r
562         return(varying_t);\r
563 }\r
564 \r
565 \r
566 \r
567 //                      verify_colref assigned data types to the column refs.\r
568 //                      Now assign data types to all other nodes in the\r
569 //                      scalar expression.\r
570 //\r
571 //                      return -1 on error\r
572 \r
573 int assign_data_types(scalarexp_t *se, table_list *schema,\r
574                                                 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){\r
575         int l_ret, r_ret;\r
576         data_type *dt;\r
577         bool bret;\r
578         vector<scalarexp_t *> operands;\r
579         vector<data_type *> odt;\r
580         int o, fcn_id;\r
581         vector<bool> handle_ind;\r
582         vector<bool> constant_ind;\r
583 \r
584         switch(se->get_operator_type()){\r
585         case SE_LITERAL:\r
586                 dt = new data_type( se->get_literal()->get_type() );\r
587                 se->set_data_type(dt);\r
588                 if( ! dt->is_defined() ){\r
589                         fprintf(stderr,"ERROR, Literal type is undefined, line =%d, char = %d, literal=%s\n",\r
590                                 se->get_literal()->get_lineno(),se->get_literal()->get_charno(), se->get_literal()->to_string().c_str() );\r
591                         return(-1);\r
592                 }else{\r
593                         return(1);\r
594                 }\r
595         case SE_PARAM:\r
596                 {\r
597                         string pname = se->get_param_name();\r
598                         dt = param_tbl->get_data_type(pname);\r
599 // A SE_PARRAM can change its value mid-query so using one\r
600 // to set a window is dangerous.  TODO check for this and issue a warning.\r
601                         dt->set_temporal(constant_t);\r
602                         se->set_data_type(dt);\r
603                         if( ! dt->is_defined() ){\r
604                                 fprintf(stderr,"ERROR, parameter %s has undefined type, line =%d, char = %d\n",\r
605                                         pname.c_str(), se->get_lineno(),se->get_charno() );\r
606                                 return(-1);\r
607                         }\r
608                         return(1);\r
609                 }\r
610         case SE_IFACE_PARAM:\r
611                 dt = new data_type( "STRING" );\r
612                 se->set_data_type(dt);\r
613                 return(1);\r
614         case SE_UNARY_OP:\r
615                 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);\r
616                 if(l_ret < 0) return -1;\r
617 \r
618                 dt = new data_type(se->get_left_se()->get_data_type(),se->get_op() );\r
619                 se->set_data_type(dt);\r
620                 if( ! dt->is_defined() ){\r
621                         fprintf(stderr,"ERROR, unary operator %s not defined for type %s, line=%d, char = %d\n",\r
622                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),\r
623                                 se->get_lineno(), se->get_charno() );\r
624                         return(-1);\r
625                 }else{\r
626                         return(1);\r
627                 }\r
628         case SE_BINARY_OP:\r
629                 l_ret = assign_data_types(se->get_left_se(),  schema, fta_tree, Ext_fcns);\r
630                 r_ret = assign_data_types(se->get_right_se(),  schema, fta_tree, Ext_fcns);\r
631                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);\r
632 \r
633                 dt = new data_type(se->get_left_se()->get_data_type(),se->get_right_se()->get_data_type(),se->get_op() );\r
634                 se->set_data_type(dt);\r
635                 if( ! dt->is_defined() ){\r
636                         fprintf(stderr,"ERROR, Binary operator %s not defined for type %s, %s line=%d, char = %d\n",\r
637                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),\r
638                                 se->get_right_se()->get_data_type()->to_string().c_str(),\r
639                                 se->get_lineno(), se->get_charno() );\r
640                         return(-1);\r
641                 }else{\r
642                         return(1);\r
643                 }\r
644         case SE_COLREF:\r
645                 dt = se->get_data_type();\r
646                 bret = dt->is_defined();\r
647                 if( bret ){\r
648                         return(1);\r
649                 }else{\r
650                         fprintf(stderr,"ERROR, column reference type  is undefined, line =%d, char = %d, colref=%s\n",\r
651                                 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );\r
652                         return(-1);\r
653                 }\r
654         case SE_AGGR_STAR:\r
655                 dt = new data_type("Int");      // changed Uint to Int\r
656                 se->set_data_type(dt);\r
657                 return(1);\r
658         case SE_AGGR_SE:\r
659                 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);\r
660                 if(l_ret < 0) return -1;\r
661 \r
662                 dt = new data_type();\r
663                 dt->set_aggr_data_type(se->get_op(), se->get_left_se()->get_data_type());\r
664                 se->set_data_type(dt);\r
665 \r
666                 if( ! dt->is_defined() ){\r
667                         fprintf(stderr,"ERROR, aggregate %s not defined for type %s, line=%d, char = %d\n",\r
668                                 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),\r
669                                 se->get_lineno(), se->get_charno() );\r
670                         return(-1);\r
671                 }else{\r
672                         return(1);\r
673                 }\r
674         case SE_FUNC:\r
675 \r
676                 operands = se->get_operands();\r
677                 r_ret = 1;\r
678                 for(o=0;o<operands.size();o++){\r
679                         l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns);\r
680                         odt.push_back(operands[o]->get_data_type());\r
681                         if(l_ret < 0) r_ret = -1;\r
682                 }\r
683                 if(r_ret < 0) return(r_ret);\r
684 \r
685 //                      Is it an aggregate extraction function?\r
686                 fcn_id = Ext_fcns->lookup_extr(se->get_op(), odt);\r
687                 if(fcn_id >= 0){\r
688                         int actual_fcn_id = Ext_fcns->get_actual_fcn_id(fcn_id);\r
689                         int subaggr_id = Ext_fcns->get_subaggr_id(fcn_id);\r
690                         int n_fcn_params = Ext_fcns->get_nparams(actual_fcn_id);\r
691 //                              Construct a se for the subaggregate.\r
692                         vector<scalarexp_t *> op_a;\r
693                         int n_aggr_oprs = operands.size()-n_fcn_params+1;\r
694                         for(o=0;o<n_aggr_oprs;++o){\r
695                                         op_a.push_back(operands[o]);\r
696                         }\r
697 //                              check handle params\r
698                         vector<bool> handle_a = Ext_fcns->get_handle_indicators(subaggr_id);\r
699                         for(o=0;o<op_a.size();o++){\r
700                         if(handle_a[o]){\r
701                                 if(op_a[o]->get_operator_type() != SE_LITERAL &&\r
702                                                 op_a[o]->get_operator_type() != SE_IFACE_PARAM &&\r
703                                                 op_a[o]->get_operator_type() != SE_PARAM){\r
704                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",\r
705                                 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
706                                                 return(-1);\r
707                                         }\r
708                                 }\r
709                         }\r
710                         vector<bool> is_const_a=Ext_fcns->get_const_indicators(subaggr_id);\r
711                         for(o=0;o<op_a.size();o++){\r
712                         if(is_const_a[o]){\r
713                                 if(op_a[o]->get_data_type()->get_temporal() != constant_t){\r
714                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be constant.\n  Line=%d, char=%d.\n",\r
715                                 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
716                                                 return(-1);\r
717                                         }\r
718                                 }\r
719                         }\r
720 \r
721                         scalarexp_t *se_a  = new scalarexp_t(Ext_fcns->get_fcn_name(subaggr_id).c_str(), op_a);\r
722                         se_a->set_fcn_id(subaggr_id);\r
723                         se_a->set_data_type(Ext_fcns->get_fcn_dt(subaggr_id));\r
724                         se_a->set_aggr_id(0);           // label this as a UDAF.\r
725 \r
726 \r
727 //                              Change this se to be the actual function\r
728                         vector<scalarexp_t *> op_f;\r
729                         op_f.push_back(se_a);\r
730                         for(o=n_aggr_oprs;o<operands.size();++o)\r
731                                 op_f.push_back(operands[o]);\r
732 //                              check handle params\r
733                         vector<bool> handle_f = Ext_fcns->get_handle_indicators(actual_fcn_id);\r
734                         for(o=0;o<op_f.size();o++){\r
735                         if(handle_f[o]){\r
736                                 if(op_f[o]->get_operator_type() != SE_LITERAL &&\r
737                                                 op_f[o]->get_operator_type() != SE_IFACE_PARAM &&\r
738                                                 op_f[o]->get_operator_type() != SE_PARAM){\r
739                                                 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",\r
740                                 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
741                                                 return(-1);\r
742                                         }\r
743                                 }\r
744                         }\r
745                         vector<bool> is_const_f=Ext_fcns->get_const_indicators(actual_fcn_id);\r
746                         for(o=0;o<op_f.size();o++){\r
747                         if(is_const_f[o]){\r
748                                 if(op_f[o]->get_data_type()->get_temporal() != constant_t){\r
749                                                 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be constant.\n  Line=%d, char=%d.\n",\r
750                                 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
751                                                 return(-1);\r
752                                         }\r
753                                 }\r
754                         }\r
755 \r
756                         se->param_list = op_f;\r
757                         se->op = Ext_fcns->get_fcn_name(actual_fcn_id);\r
758                         se->set_fcn_id(actual_fcn_id);\r
759                         se->set_data_type(Ext_fcns->get_fcn_dt(actual_fcn_id));\r
760                         return(1);\r
761                 }\r
762                 if(fcn_id == -2){\r
763                         fprintf(stderr,"Warning: multiple subsuming aggregate extractors found for %s\n",se->get_op().c_str());\r
764                 }\r
765 \r
766 //                      Is it a UDAF?\r
767                 fcn_id = Ext_fcns->lookup_udaf(se->get_op(), odt);\r
768                 if(fcn_id >= 0){\r
769                         se->set_fcn_id(fcn_id);\r
770                         se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));\r
771                         se->set_aggr_id(0);             // label this as a UDAF.\r
772 //                      Finally, verify that all HANDLE parameters are literals or params.\r
773                         handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );\r
774                         for(o=0;o<operands.size();o++){\r
775                                 if(handle_ind[o]){\r
776                                         if(operands[o]->get_operator_type() != SE_LITERAL &&\r
777                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&\r
778                                                 operands[o]->get_operator_type() != SE_PARAM){\r
779                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",\r
780                                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
781                                                 return(-1);\r
782                                         }\r
783                                 }\r
784                         }\r
785                         constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());\r
786                         for(o=0;o<operands.size();o++){\r
787                         if(constant_ind[o]){\r
788                                 if(operands[o]->get_data_type()->get_temporal() != constant_t){\r
789                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s  must be constant.\n  Line=%d, char=%d.\n",\r
790                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
791                                                 return(-1);\r
792                                         }\r
793                                 }\r
794                         }\r
795 \r
796 //      UDAFS as superaggregates not yet supported.\r
797 if(se->is_superaggr()){\r
798 fprintf(stderr,"WARNING: UDAF superagggregates (%s) are not yet supported, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
799 se->set_superaggr(false);\r
800 }\r
801                         return(1);\r
802                 }\r
803                 if(fcn_id == -2){\r
804                         fprintf(stderr,"Warning: multiple subsuming UDAFs found for %s\n",se->get_op().c_str());\r
805                 }\r
806 \r
807 //                      Is it a stateful fcn?\r
808                 fcn_id = Ext_fcns->lookup_sfun(se->get_op(), odt);\r
809                 if(fcn_id >= 0){\r
810                         se->set_fcn_id(fcn_id);\r
811                         se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));\r
812                         se->set_storage_state(Ext_fcns->get_storage_state(fcn_id)); // label as sfun\r
813 //                      Finally, verify that all HANDLE parameters are literals or params.\r
814                         handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );\r
815                         for(o=0;o<operands.size();o++){\r
816                                 if(handle_ind[o]){\r
817                                         if(operands[o]->get_operator_type() != SE_LITERAL &&\r
818                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&\r
819                                                 operands[o]->get_operator_type() != SE_PARAM){\r
820                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",\r
821                                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
822                                                 return(-1);\r
823                                         }\r
824                                 }\r
825                         }\r
826                         constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());\r
827                         for(o=0;o<operands.size();o++){\r
828                         if(constant_ind[o]){\r
829                                 if(operands[o]->get_data_type()->get_temporal() != constant_t){\r
830                                                 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s  must be constant.\n  Line=%d, char=%d.\n",\r
831                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
832                                                 return(-1);\r
833                                         }\r
834                                 }\r
835                         }\r
836 \r
837                         if(se->is_superaggr()){\r
838                                 fprintf(stderr,"WARNING: stateful function %s cannot be marked as a superaggregate, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
839                         }\r
840                         return(1);\r
841                 }\r
842                 if(fcn_id == -2){\r
843                         fprintf(stderr,"Warning: multiple stateful fcns found for %s\n",se->get_op().c_str());\r
844                 }\r
845 \r
846 \r
847 //                      Is it a regular function?\r
848                 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), odt);\r
849                 if( fcn_id < 0 ){\r
850                         fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());\r
851                         for(o=0;o<operands.size();o++){\r
852                                 if(o>0) fprintf(stderr,", ");\r
853                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());\r
854                         }\r
855                         fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );\r
856                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");\r
857 \r
858                         return(-1);\r
859                 }\r
860 \r
861                 se->set_fcn_id(fcn_id);\r
862                 dt = Ext_fcns->get_fcn_dt(fcn_id);\r
863 \r
864                 if(! dt->is_defined() ){\r
865                         fprintf(stderr,"ERROR, external function %s(",se->get_op().c_str());\r
866                         for(o=0;o<operands.size();o++){\r
867                                 if(o>0) fprintf(stderr,", ");\r
868                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());\r
869                         }\r
870                         fprintf(stderr,") has undefined type, line %d, char %d\n", se->get_lineno(), se->get_charno() );\r
871                         return(-1);\r
872                 }\r
873 \r
874 //                      Finally, verify that all HANDLE parameters are literals or params.\r
875                 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );\r
876                 for(o=0;o<operands.size();o++){\r
877                         if(handle_ind[o]){\r
878                                 if(operands[o]->get_operator_type() != SE_LITERAL &&\r
879                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&\r
880                                                 operands[o]->get_operator_type() != SE_PARAM){\r
881                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",\r
882                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
883                                         return(-1);\r
884                                 }\r
885                         }\r
886                 }\r
887                 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());\r
888                 for(o=0;o<operands.size();o++){\r
889                 if(constant_ind[o]){\r
890                         if(operands[o]->get_data_type()->get_temporal() != constant_t){\r
891                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s  must be constant.\n  Line=%d, char=%d.\n",\r
892                         o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
893                                         return(-1);\r
894                                 }\r
895                         }\r
896                 }\r
897 \r
898 \r
899                 if(se->is_superaggr()){\r
900                         fprintf(stderr,"WARNING: function %s cannot be marked as a superaggregate, ignored.\n  Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
901                 }\r
902 \r
903                 se->set_data_type(dt);\r
904                 return(1);\r
905         default:\r
906                 fprintf(stderr,"INTERNAL ERROR in assign_data_types, line %d, character %d: unknown operator type %d\n",\r
907                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
908                 return(-1);\r
909         }\r
910         return(-1);\r
911 }\r
912 \r
913 \r
914 int assign_predicate_data_types(predicate_t *pr, table_list *schema,\r
915                                                         table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){\r
916         int l_ret, r_ret;\r
917         int i;\r
918         data_type *dt, *dtl;\r
919         vector<data_type *> odt;\r
920         vector<literal_t *> litl;\r
921         vector<scalarexp_t *> operands;\r
922         vector<bool> handle_ind;\r
923         vector<bool> constant_ind;\r
924         int o, fcn_id;\r
925 \r
926         switch(pr->get_operator_type()){\r
927         case PRED_IN:\r
928                 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set);\r
929                 litl = pr->get_lit_vec();\r
930                 dt = pr->get_left_se()->get_data_type();\r
931 \r
932                 for(i=0;i<litl.size();i++){\r
933                         dtl = new data_type( litl[i]->get_type() );\r
934                         if( ! dt->is_comparable(dtl,pr->get_op()) ){\r
935                                 fprintf(stderr,"ERROR line %d, char %d: IS_IN types must be comparable (lhs type is %s, rhs type is %s).\n",\r
936                                         litl[i]->get_lineno(), litl[i]->get_charno(), dt->to_string().c_str(),dtl->to_string().c_str() );\r
937                                 delete dtl;\r
938                                 return(-1);\r
939                         }\r
940                         delete dtl;\r
941                 }\r
942                 return(1);\r
943         case PRED_COMPARE:\r
944                 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;\r
945                 r_ret = assign_data_types(pr->get_right_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;\r
946                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);\r
947 \r
948                 if( !(pr->get_left_se()->get_data_type()->is_comparable(pr->get_right_se()->get_data_type(), pr->get_op() ) )){\r
949                         fprintf(stderr,"ERROR line %d, char %d, operands of comparison must have comparable types (%s %s %s).\n",\r
950                                 pr->get_lineno(), pr->get_charno(), pr->get_left_se()->get_data_type()->to_string().c_str(),\r
951                                  pr->get_right_se()->get_data_type()->to_string().c_str(), pr->get_op().c_str() );\r
952                         return(-1);\r
953                 }else{\r
954                         return(1);\r
955                 }\r
956         case PRED_UNARY_OP:\r
957                 return(assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns)); // , ext_fcn_set));\r
958         case PRED_BINARY_OP:\r
959                 l_ret = assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);\r
960                 r_ret = assign_predicate_data_types(pr->get_right_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);\r
961                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);\r
962                 return(1);\r
963         case PRED_FUNC:\r
964                 operands = pr->get_op_list();\r
965                 r_ret = 1;\r
966                 for(o=0;o<operands.size();o++){\r
967                         l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns); // , ext_fcn_set);\r
968                         odt.push_back(operands[o]->get_data_type());\r
969                         if(l_ret < 0) r_ret = -1;\r
970                 }\r
971                 if(r_ret < 0) return(r_ret);\r
972 \r
973                 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), odt);\r
974                 if( fcn_id < 0 ){\r
975                         fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());\r
976                         for(o=0;o<operands.size();o++){\r
977                                 if(o>0) fprintf(stderr,", ");\r
978                                 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());\r
979                         }\r
980                         fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );\r
981                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");\r
982                         return(-1);\r
983                 }\r
984 \r
985 //              ext_fcn_set.insert(fcn_id);\r
986                 pr->set_fcn_id(fcn_id);\r
987 \r
988 //                      Finally, verify that all HANDLE parameters are literals or params.\r
989                 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );\r
990                 for(o=0;o<operands.size();o++){\r
991                         if(handle_ind[o]){\r
992                                 if(operands[o]->get_operator_type() != SE_LITERAL &&\r
993                                                 operands[o]->get_operator_type() != SE_IFACE_PARAM &&\r
994                                                 operands[o]->get_operator_type() != SE_PARAM){\r
995                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",\r
996                                 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());\r
997                                         exit(1);\r
998                                 }\r
999                         }\r
1000                 }\r
1001                 constant_ind = Ext_fcns->get_const_indicators(pr->get_fcn_id());\r
1002                 for(o=0;o<operands.size();o++){\r
1003                 if(constant_ind[o]){\r
1004                         if(operands[o]->get_data_type()->get_temporal() != constant_t){\r
1005                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s  must be constant.\n  Line=%d, char=%d.\n",\r
1006                         o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());\r
1007                                         exit(1);\r
1008                                 }\r
1009                         }\r
1010                 }\r
1011 \r
1012 \r
1013 //                      Check if this predicate function is special sampling function\r
1014                 pr->is_sampling_fcn = Ext_fcns->is_sampling_fcn(pr->get_fcn_id());\r
1015 \r
1016 \r
1017                 return(l_ret);\r
1018         default:\r
1019                 fprintf(stderr,"INTERNAL ERROR in assign_predicate_data_types, line %d, character %d, unknown predicate operator type %d\n",\r
1020                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
1021         }\r
1022 \r
1023         return(-1);\r
1024 }\r
1025 \r
1026 \r
1027 \r
1028 /////////////////////////////////////////////////////////////////////\r
1029 ////////////////                Make a deep copy of a se / pred tree\r
1030 /////////////////////////////////////////////////////////////////////\r
1031 \r
1032 \r
1033 //              duplicate a select element\r
1034 select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl){\r
1035         return new select_element(dup_se(sl->se,aggr_tbl),sl->name.c_str());\r
1036 }\r
1037 \r
1038 //              duplicate a scalar expression.\r
1039 scalarexp_t *dup_se(scalarexp_t *se,\r
1040                                   aggregate_table *aggr_tbl\r
1041                                  ){\r
1042   int p;\r
1043   vector<scalarexp_t *> operand_list;\r
1044   vector<data_type *> dt_signature;\r
1045   scalarexp_t *ret_se, *l_se, *r_se;\r
1046 \r
1047   switch(se->get_operator_type()){\r
1048     case SE_LITERAL:\r
1049                 ret_se = new scalarexp_t(se->get_literal());\r
1050                 ret_se->use_decorations_of(se);\r
1051                 return(ret_se);\r
1052 \r
1053     case SE_PARAM:\r
1054                 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());\r
1055                 ret_se->use_decorations_of(se);\r
1056                 return(ret_se);\r
1057 \r
1058     case SE_IFACE_PARAM:\r
1059                 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());\r
1060                 ret_se->use_decorations_of(se);\r
1061                 return(ret_se);\r
1062 \r
1063     case SE_COLREF:\r
1064                 ret_se = new scalarexp_t(se->get_colref()->duplicate());\r
1065                 ret_se->rhs.scalarp = se->rhs.scalarp;  // carry along notation\r
1066                 ret_se->use_decorations_of(se);\r
1067                 return(ret_se);\r
1068 \r
1069     case SE_UNARY_OP:\r
1070                 l_se = dup_se(se->get_left_se(),  aggr_tbl);\r
1071                 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);\r
1072                 ret_se->use_decorations_of(se);\r
1073                 return(ret_se);\r
1074 \r
1075     case SE_BINARY_OP:\r
1076                 l_se = dup_se(se->get_left_se(), aggr_tbl);\r
1077                 r_se = dup_se(se->get_right_se(), aggr_tbl);\r
1078 \r
1079                 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);\r
1080                 ret_se->use_decorations_of(se);\r
1081 \r
1082                 return(ret_se);\r
1083 \r
1084     case SE_AGGR_STAR:\r
1085                 ret_se = scalarexp_t::make_star_aggr(se->get_op().c_str());\r
1086                 ret_se->use_decorations_of(se);\r
1087                 return(ret_se);\r
1088 \r
1089     case SE_AGGR_SE:\r
1090                 l_se = dup_se(se->get_left_se(),  aggr_tbl);\r
1091                 ret_se = scalarexp_t::make_se_aggr(se->get_op().c_str(), l_se);\r
1092                 ret_se->use_decorations_of(se);\r
1093                 return(ret_se);\r
1094 \r
1095         case SE_FUNC:\r
1096                 {\r
1097                         operand_list = se->get_operands();\r
1098                         vector<scalarexp_t *> new_operands;\r
1099                         for(p=0;p<operand_list.size();p++){\r
1100                                 l_se = dup_se(operand_list[p], aggr_tbl);\r
1101                                 new_operands.push_back(l_se);\r
1102                         }\r
1103 \r
1104                         ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);\r
1105                         ret_se->use_decorations_of(se);\r
1106                         return(ret_se);\r
1107                 }\r
1108 \r
1109         default:\r
1110                 printf("INTERNAL ERROR in dup_se: operator type %d\n",se->get_operator_type());\r
1111                 exit(1);\r
1112         break;\r
1113   }\r
1114   return(NULL);\r
1115 \r
1116 }\r
1117 \r
1118 \r
1119 \r
1120 predicate_t *dup_pr(predicate_t *pr,\r
1121                                                  aggregate_table *aggr_tbl\r
1122                                                  ){\r
1123 \r
1124   vector<literal_t *> llist;\r
1125   scalarexp_t *se_l, *se_r;\r
1126   predicate_t *pr_l, *pr_r, *ret_pr;\r
1127   vector<scalarexp_t *> op_list, new_op_list;\r
1128   int o;\r
1129 \r
1130 \r
1131         switch(pr->get_operator_type()){\r
1132         case PRED_IN:\r
1133                 se_l = dup_se(pr->get_left_se(), aggr_tbl);\r
1134                 ret_pr = new predicate_t(se_l, pr->get_lit_vec());\r
1135                 return(ret_pr);\r
1136 \r
1137         case PRED_COMPARE:\r
1138                 se_l = dup_se(pr->get_left_se(), aggr_tbl);\r
1139                 se_r = dup_se(pr->get_right_se(),  aggr_tbl);\r
1140                 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);\r
1141                 return(ret_pr);\r
1142 \r
1143         case PRED_UNARY_OP:\r
1144                 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);\r
1145                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);\r
1146                 return(ret_pr);\r
1147 \r
1148         case PRED_BINARY_OP:\r
1149                 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);\r
1150                 pr_r = dup_pr(pr->get_right_pr(), aggr_tbl);\r
1151                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);\r
1152                 return(ret_pr);\r
1153         case PRED_FUNC:\r
1154                 op_list = pr->get_op_list();\r
1155                 for(o=0;o<op_list.size();++o){\r
1156                         se_l = dup_se(op_list[o], aggr_tbl);\r
1157                         new_op_list.push_back(se_l);\r
1158                 }\r
1159                 ret_pr=  new predicate_t(pr->get_op().c_str(), new_op_list);\r
1160                 ret_pr->set_fcn_id(pr->get_fcn_id());\r
1161                 ret_pr->is_sampling_fcn = pr->is_sampling_fcn;\r
1162                 return(ret_pr);\r
1163 \r
1164         default:\r
1165                 fprintf(stderr,"INTERNAL ERROR in dup_pr, line %d, character %d, unknown predicate operator type %d\n",\r
1166                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
1167                 exit(1);\r
1168         }\r
1169 \r
1170         return(0);\r
1171 \r
1172 }\r
1173 \r
1174 table_exp_t *dup_table_exp(table_exp_t *te){\r
1175         int i;\r
1176         table_exp_t *ret = new table_exp_t();\r
1177 \r
1178         ret->query_type = te->query_type;\r
1179 \r
1180         ss_map::iterator ss_i;\r
1181         for(ss_i=te->nmap.begin();ss_i!=te->nmap.end();++ss_i){\r
1182                 ret->nmap[(*ss_i).first] = (*ss_i).second;\r
1183         }\r
1184 \r
1185         for(i=0;i<te->query_params.size();++i){\r
1186                 ret->query_params.push_back(new\r
1187                  var_pair_t(te->query_params[i]->name,te->query_params[i]->val) );\r
1188         }\r
1189 \r
1190         if(te->sl){\r
1191                 ret->sl = new select_list_t();\r
1192                 ret->sl->lineno = te->sl->lineno; ret->sl->charno = te->sl->charno;\r
1193                 vector<select_element *> select_list = te->sl->get_select_list();\r
1194                 for(i=0;i<select_list.size();++i){\r
1195                         scalarexp_t *se = dup_se(select_list[i]->se,NULL);\r
1196                         ret->sl->append(se,select_list[i]->name);\r
1197                 }\r
1198         }\r
1199 \r
1200         ret->fm = te->fm->duplicate();\r
1201 \r
1202         if(te->wh) ret->wh = dup_pr(te->wh,NULL);\r
1203         if(te->hv) ret->hv = dup_pr(te->hv,NULL);\r
1204         if(te->cleaning_when) ret->cleaning_when = dup_pr(te->cleaning_when,NULL);\r
1205         if(te->cleaning_by) ret->cleaning_by = dup_pr(te->cleaning_by,NULL);\r
1206         if(te->closing_when) ret->closing_when = dup_pr(te->closing_when,NULL);\r
1207 \r
1208         for(i=0;i<te->gb.size();++i){\r
1209                 extended_gb_t *tmp_g =  te->gb[i]->duplicate();\r
1210                 ret->gb.push_back(tmp_g);\r
1211         }\r
1212 \r
1213         ret->mergevars = te->mergevars;\r
1214         if(te->slack)\r
1215                 ret->slack = dup_se(te->slack,NULL);\r
1216         ret->lineno = te->lineno;\r
1217         ret->charno = te->charno;\r
1218 \r
1219         return(ret);\r
1220 }\r
1221 \r
1222 \r
1223 \r
1224 \r
1225 \r
1226 \r
1227 \r
1228 /////////////////////////////////////////////////////////////////////////\r
1229 //                      Bind colrefs to a member of their FROM list\r
1230 \r
1231 void bind_colref_se(scalarexp_t *se,\r
1232                                   vector<tablevar_t *> &fm,\r
1233                                   int prev_ref, int new_ref\r
1234                                  ){\r
1235   int p;\r
1236   vector<scalarexp_t *> operand_list;\r
1237   colref_t *cr;\r
1238   ifpref_t *ir;\r
1239 \r
1240   switch(se->get_operator_type()){\r
1241     case SE_LITERAL:\r
1242     case SE_PARAM:\r
1243                 return;\r
1244     case SE_IFACE_PARAM:\r
1245                 ir = se->get_ifpref();\r
1246                 if(ir->get_tablevar_ref() == prev_ref){\r
1247                         ir->set_tablevar_ref(new_ref);\r
1248                         ir->set_tablevar(fm[new_ref]->get_var_name());\r
1249                 }\r
1250                 return;\r
1251 \r
1252     case SE_COLREF:\r
1253                 cr=se->get_colref();\r
1254                 if(cr->get_tablevar_ref() == prev_ref){\r
1255                         cr->set_tablevar_ref(new_ref);\r
1256 //                      cr->set_interface(fm[new_ref]->get_interface());\r
1257                         cr->set_table_name(fm[new_ref]->get_var_name());\r
1258                 }\r
1259                 return;\r
1260 \r
1261     case SE_UNARY_OP:\r
1262                 bind_colref_se(se->get_left_se(),  fm, prev_ref, new_ref);\r
1263                 return;\r
1264 \r
1265     case SE_BINARY_OP:\r
1266                 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);\r
1267                 bind_colref_se(se->get_right_se(),  fm, prev_ref, new_ref);\r
1268                 return;\r
1269 \r
1270     case SE_AGGR_STAR:\r
1271     case SE_AGGR_SE:\r
1272                 return;\r
1273 \r
1274         case SE_FUNC:\r
1275                 if(se->get_aggr_ref() >= 0) return;\r
1276 \r
1277                 operand_list = se->get_operands();\r
1278                 for(p=0;p<operand_list.size();p++){\r
1279                         bind_colref_se(operand_list[p], fm, prev_ref, new_ref);\r
1280                 }\r
1281                 return;\r
1282 \r
1283         default:\r
1284                 printf("INTERNAL ERROR in bind_colref_se: operator type %d\n",se->get_operator_type());\r
1285                 exit(1);\r
1286         break;\r
1287   }\r
1288   return;\r
1289 \r
1290 }\r
1291 \r
1292 \r
1293 \r
1294 \r
1295 void bind_colref_pr(predicate_t *pr,\r
1296                                   vector<tablevar_t *> &fm,\r
1297                                   int prev_ref, int new_ref\r
1298                                  ){\r
1299   vector<scalarexp_t *> op_list;\r
1300   int o;\r
1301 \r
1302         switch(pr->get_operator_type()){\r
1303         case PRED_IN:\r
1304                 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);\r
1305                 return;\r
1306 \r
1307         case PRED_COMPARE:\r
1308                 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);\r
1309                 bind_colref_se(pr->get_right_se(),  fm, prev_ref, new_ref);\r
1310                 return;\r
1311 \r
1312         case PRED_UNARY_OP:\r
1313                 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);\r
1314                 return;\r
1315 \r
1316         case PRED_BINARY_OP:\r
1317                 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);\r
1318                 bind_colref_pr(pr->get_right_pr(), fm, prev_ref, new_ref);\r
1319                 return;\r
1320         case PRED_FUNC:\r
1321                 op_list = pr->get_op_list();\r
1322                 for(o=0;o<op_list.size();++o){\r
1323                         bind_colref_se(op_list[o], fm, prev_ref, new_ref);\r
1324                 }\r
1325                 return;\r
1326 \r
1327         default:\r
1328                 fprintf(stderr,"INTERNAL ERROR in bind_colref_pr, line %d, character %d, unknown predicate operator type %d\n",\r
1329                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
1330                 exit(1);\r
1331         }\r
1332 \r
1333         return;\r
1334 \r
1335 }\r
1336 \r
1337 \r
1338 /////////////////////////////////////////////////////////////////////\r
1339 //              verify that the se refs only literals and params.\r
1340 //          (use to verify that the expression should stay in the hfta\r
1341 //               during a split)\r
1342 /////////////////////////////////////////////////////////////////////\r
1343 \r
1344 bool is_literal_or_param_only(scalarexp_t *se){\r
1345         int o;\r
1346         vector<scalarexp_t *> operands;\r
1347         bool sum = true;\r
1348 \r
1349         if(se == NULL) return(true);\r
1350 \r
1351         switch(se->get_operator_type()){\r
1352         case SE_LITERAL:\r
1353         case SE_PARAM:\r
1354                 return(true);\r
1355         case SE_IFACE_PARAM:\r
1356                 return(false);          // need to treat as colref\r
1357         case SE_UNARY_OP:\r
1358                 return(is_literal_or_param_only(se->get_left_se()) );\r
1359         case SE_BINARY_OP:\r
1360                 return(\r
1361                         is_literal_or_param_only(se->get_left_se()) &&\r
1362                         is_literal_or_param_only(se->get_right_se())\r
1363                         );\r
1364         case SE_COLREF:\r
1365                 return(false);\r
1366         case SE_AGGR_STAR:\r
1367         case SE_AGGR_SE:\r
1368                 return(false);\r
1369         case SE_FUNC:\r
1370 //                      The fcn might have special meaning at the lfta ...\r
1371                 return(false);\r
1372 \r
1373         default:\r
1374                 fprintf(stderr,"INTERNAL ERROR in is_literal_or_param_only, line %d, character %d: unknown operator type %d\n",\r
1375                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
1376                 exit(1);\r
1377         }\r
1378         return(0);\r
1379 }\r
1380 \r
1381 \r
1382 \r
1383 /////////////////////////////////////////////////////////////////////\r
1384 //              Search for gb refs.\r
1385 //          (use to verify that no gbrefs in a gb def.)\r
1386 /////////////////////////////////////////////////////////////////////\r
1387 \r
1388 \r
1389 int count_gb_se(scalarexp_t *se){\r
1390         int o;\r
1391         vector<scalarexp_t *> operands;\r
1392         int sum = 0;\r
1393 \r
1394         if(se == NULL) return(0);\r
1395 \r
1396         switch(se->get_operator_type()){\r
1397         case SE_LITERAL:\r
1398         case SE_PARAM:\r
1399         case SE_IFACE_PARAM:\r
1400                 return(0);\r
1401         case SE_UNARY_OP:\r
1402                 return(count_gb_se(se->get_left_se()) );\r
1403         case SE_BINARY_OP:\r
1404                 return(\r
1405                         count_gb_se(se->get_left_se()) +\r
1406                         count_gb_se(se->get_right_se())\r
1407                         );\r
1408         case SE_COLREF:\r
1409                 if(se->get_gb_ref() < 0) return(0);\r
1410                 return(1);\r
1411         case SE_AGGR_STAR:\r
1412         case SE_AGGR_SE:\r
1413                 return(0);\r
1414         case SE_FUNC:\r
1415                 operands = se->get_operands();\r
1416                 for(o=0;o<operands.size();o++){\r
1417                         sum +=  count_gb_se(operands[o]);\r
1418                 }\r
1419                 return(sum);\r
1420 \r
1421         default:\r
1422                 fprintf(stderr,"INTERNAL ERROR in count_gb_se, line %d, character %d: unknown operator type %d\n",\r
1423                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
1424                 exit(1);\r
1425         }\r
1426         return(0);\r
1427 }\r
1428 \r
1429 \r
1430 /////////////////////////////////////////////////////////////////////\r
1431 ////////////////                Search for stateful fcns.\r
1432 /////////////////////////////////////////////////////////////////////\r
1433 \r
1434 \r
1435 int se_refs_sfun(scalarexp_t *se){\r
1436         int o;\r
1437         vector<scalarexp_t *> operands;\r
1438         int sum = 0;\r
1439 \r
1440         if(se == NULL) return(0);\r
1441 \r
1442         switch(se->get_operator_type()){\r
1443         case SE_LITERAL:\r
1444         case SE_PARAM:\r
1445         case SE_IFACE_PARAM:\r
1446                 return(0);\r
1447         case SE_UNARY_OP:\r
1448                 return(se_refs_sfun(se->get_left_se()) );\r
1449         case SE_BINARY_OP:\r
1450                 return(\r
1451                         se_refs_sfun(se->get_left_se()) +\r
1452                         se_refs_sfun(se->get_right_se())\r
1453                         );\r
1454         case SE_COLREF:\r
1455                 return(0);\r
1456         case SE_AGGR_STAR:\r
1457         case SE_AGGR_SE:\r
1458                 return(0);\r
1459         case SE_FUNC:\r
1460                 operands = se->get_operands();\r
1461                 for(o=0;o<operands.size();o++){\r
1462                         sum +=  se_refs_sfun(operands[o]);\r
1463                 }\r
1464                 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?\r
1465 \r
1466 //                      for now, stateful functions count as aggregates.\r
1467                 if(se->get_storage_state() != "")\r
1468                         sum++;\r
1469 \r
1470                 return(sum);\r
1471 \r
1472         default:\r
1473                 fprintf(stderr,"INTERNAL ERROR in se_refs_sfun, line %d, character %d: unknown operator type %d\n",\r
1474                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
1475                 exit(1);\r
1476         }\r
1477         return(0);\r
1478 }\r
1479 \r
1480 \r
1481 //              Return a count of the number of stateful fcns in this predicate.\r
1482 int pred_refs_sfun(predicate_t *pr){\r
1483         vector<scalarexp_t *> op_list;\r
1484         int o, aggr_sum;\r
1485 \r
1486         switch(pr->get_operator_type()){\r
1487         case PRED_IN:\r
1488                 return(se_refs_sfun(pr->get_left_se()) );\r
1489         case PRED_COMPARE:\r
1490                 return(\r
1491                         se_refs_sfun(pr->get_left_se()) +\r
1492                         se_refs_sfun(pr->get_right_se())\r
1493                 );\r
1494         case PRED_UNARY_OP:\r
1495                 return(pred_refs_sfun(pr->get_left_pr()) );\r
1496         case PRED_BINARY_OP:\r
1497                 return(\r
1498                         pred_refs_sfun(pr->get_left_pr()) +\r
1499                         pred_refs_sfun(pr->get_right_pr())\r
1500                 );\r
1501         case PRED_FUNC:\r
1502                 op_list = pr->get_op_list();\r
1503                 aggr_sum = 0;\r
1504                 for(o=0;o<op_list.size();++o){\r
1505                         aggr_sum += se_refs_sfun(op_list[o]);\r
1506                 }\r
1507                 return(aggr_sum);\r
1508 \r
1509         default:\r
1510                 fprintf(stderr,"INTERNAL ERROR in pred_refs_sfun, line %d, character %d, unknown predicate operator type %d\n",\r
1511                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
1512                 exit(1);\r
1513         }\r
1514 \r
1515         return(0);\r
1516 }\r
1517 \r
1518 //////////////////////////////////////////////////\r
1519 \r
1520 /////////////////////////////////////////////////////////////////////\r
1521 ////////////////                Search for aggregates.\r
1522 /////////////////////////////////////////////////////////////////////\r
1523 \r
1524 \r
1525 int count_aggr_se(scalarexp_t *se, bool strict){\r
1526         int o;\r
1527         vector<scalarexp_t *> operands;\r
1528         int sum = 0;\r
1529 \r
1530         if(se == NULL) return(0);\r
1531 \r
1532         switch(se->get_operator_type()){\r
1533         case SE_LITERAL:\r
1534         case SE_PARAM:\r
1535         case SE_IFACE_PARAM:\r
1536                 return(0);\r
1537         case SE_UNARY_OP:\r
1538                 return(count_aggr_se(se->get_left_se(), strict) );\r
1539         case SE_BINARY_OP:\r
1540                 return(\r
1541                         count_aggr_se(se->get_left_se(), strict) +\r
1542                         count_aggr_se(se->get_right_se(), strict)\r
1543                         );\r
1544         case SE_COLREF:\r
1545                 return(0);\r
1546         case SE_AGGR_STAR:\r
1547         case SE_AGGR_SE:\r
1548                 return(1);\r
1549         case SE_FUNC:\r
1550                 operands = se->get_operands();\r
1551                 for(o=0;o<operands.size();o++){\r
1552                         sum +=  count_aggr_se(operands[o], strict);\r
1553                 }\r
1554                 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?\r
1555 \r
1556 //                      now, stateful functions can count as aggregates.\r
1557 //                      if we are being strict.\r
1558                 if(! strict && se->get_storage_state() != "")\r
1559                         sum++;\r
1560 \r
1561                 return(sum);\r
1562 \r
1563         default:\r
1564                 fprintf(stderr,"INTERNAL ERROR in count_aggr_se, line %d, character %d: unknown operator type %d\n",\r
1565                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
1566                 exit(1);\r
1567         }\r
1568         return(0);\r
1569 }\r
1570 \r
1571 \r
1572 //              Return a count of the number of aggregate fcns in this predicate.\r
1573 int count_aggr_pred(predicate_t *pr, bool strict){\r
1574         vector<scalarexp_t *> op_list;\r
1575         int o, aggr_sum;\r
1576 \r
1577         switch(pr->get_operator_type()){\r
1578         case PRED_IN:\r
1579                 return(count_aggr_se(pr->get_left_se(), strict) );\r
1580         case PRED_COMPARE:\r
1581                 return(\r
1582                         count_aggr_se(pr->get_left_se(), strict) +\r
1583                         count_aggr_se(pr->get_right_se(), strict)\r
1584                 );\r
1585         case PRED_UNARY_OP:\r
1586                 return(count_aggr_pred(pr->get_left_pr(), strict) );\r
1587         case PRED_BINARY_OP:\r
1588                 return(\r
1589                         count_aggr_pred(pr->get_left_pr(), strict) +\r
1590                         count_aggr_pred(pr->get_right_pr(), strict)\r
1591                 );\r
1592         case PRED_FUNC:\r
1593                 op_list = pr->get_op_list();\r
1594                 aggr_sum = 0;\r
1595                 for(o=0;o<op_list.size();++o){\r
1596                         aggr_sum += count_aggr_se(op_list[o], strict);\r
1597                 }\r
1598                 return(aggr_sum);\r
1599 \r
1600         default:\r
1601                 fprintf(stderr,"INTERNAL ERROR in count_aggr_pred, line %d, character %d, unknown predicate operator type %d\n",\r
1602                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
1603                 exit(1);\r
1604         }\r
1605 \r
1606         return(0);\r
1607 }\r
1608 \r
1609 //////////////////////////////////////////////////\r
1610 ///             Analyze tablevar refs\r
1611 \r
1612 void get_tablevar_ref_se(scalarexp_t *se, vector<int> &reflist){\r
1613         int o;\r
1614         vector<scalarexp_t *> operands;\r
1615         int vref;\r
1616         colref_t *cr;\r
1617         ifpref_t *ir;\r
1618 \r
1619         if(se == NULL) return;\r
1620 \r
1621         switch(se->get_operator_type()){\r
1622         case SE_LITERAL:\r
1623         case SE_PARAM:\r
1624                 return;\r
1625         case SE_IFACE_PARAM:\r
1626                 ir = se->get_ifpref();\r
1627                 vref = ir->get_tablevar_ref();\r
1628                 for(o=0;o<reflist.size();++o){\r
1629                         if(vref == reflist[o]) return;\r
1630                 }\r
1631                 reflist.push_back(vref);\r
1632                 return;\r
1633         case SE_UNARY_OP:\r
1634                 get_tablevar_ref_se(se->get_left_se(), reflist);\r
1635                 return;\r
1636         case SE_BINARY_OP:\r
1637                 get_tablevar_ref_se(se->get_left_se(), reflist);\r
1638                 get_tablevar_ref_se(se->get_right_se(), reflist);\r
1639                 return;\r
1640         case SE_COLREF:\r
1641                 if(se->is_gb()) return;\r
1642                 cr = se->get_colref();\r
1643                 vref = cr->get_tablevar_ref();\r
1644                 for(o=0;o<reflist.size();++o){\r
1645                         if(vref == reflist[o]) return;\r
1646                 }\r
1647                 reflist.push_back(vref);\r
1648                 return;\r
1649         case SE_AGGR_STAR:\r
1650         case SE_AGGR_SE:\r
1651                 return;\r
1652         case SE_FUNC:\r
1653                 if(se->get_aggr_ref() >= 0) return;\r
1654 \r
1655                 operands = se->get_operands();\r
1656                 for(o=0;o<operands.size();o++){\r
1657                         get_tablevar_ref_se(operands[o], reflist);\r
1658                 }\r
1659                 return;\r
1660 \r
1661         default:\r
1662                 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_se, line %d, character %d: unknown operator type %d\n",\r
1663                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
1664                 exit(1);\r
1665         }\r
1666         return;\r
1667 }\r
1668 \r
1669 \r
1670 void get_tablevar_ref_pr(predicate_t *pr, vector<int> &reflist){\r
1671         vector<scalarexp_t *> op_list;\r
1672         int o;\r
1673 \r
1674         switch(pr->get_operator_type()){\r
1675         case PRED_IN:\r
1676                 get_tablevar_ref_se(pr->get_left_se(),reflist);\r
1677                 return;\r
1678         case PRED_COMPARE:\r
1679                 get_tablevar_ref_se(pr->get_left_se(),reflist);\r
1680                 get_tablevar_ref_se(pr->get_right_se(),reflist);\r
1681                 return;\r
1682         case PRED_UNARY_OP:\r
1683                 get_tablevar_ref_pr(pr->get_left_pr(),reflist);\r
1684                 return;\r
1685         case PRED_BINARY_OP:\r
1686                 get_tablevar_ref_pr(pr->get_left_pr(),reflist);\r
1687                 get_tablevar_ref_pr(pr->get_right_pr(),reflist);\r
1688                 return;\r
1689         case PRED_FUNC:\r
1690                 op_list = pr->get_op_list();\r
1691                 for(o=0;o<op_list.size();++o){\r
1692                         get_tablevar_ref_se(op_list[o],reflist);\r
1693                 }\r
1694                 return;\r
1695         default:\r
1696                 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_pr, line %d, character %d, unknown predicate operator type %d\n",\r
1697                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
1698         }\r
1699 \r
1700         return;\r
1701 }\r
1702 \r
1703 \r
1704 //                      Walk SE tree and gather STATES ref'd by STATEFUL fcns.\r
1705 \r
1706 void gather_fcn_states_se(scalarexp_t *se, set<string> &states_refd, ext_fcn_list *Ext_fcns){\r
1707         int agg_id;\r
1708         int o;\r
1709         vector<scalarexp_t *> operands;\r
1710 \r
1711         switch(se->get_operator_type()){\r
1712         case SE_LITERAL:\r
1713         case SE_PARAM:\r
1714         case SE_IFACE_PARAM:\r
1715                 return;\r
1716         case SE_UNARY_OP:\r
1717                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns) ;\r
1718                 return;\r
1719         case SE_BINARY_OP:\r
1720                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);\r
1721                 gather_fcn_states_se(se->get_right_se(), states_refd,Ext_fcns);\r
1722                 return;\r
1723         case SE_COLREF:\r
1724                 return;\r
1725         case SE_AGGR_STAR:\r
1726                 return;\r
1727         case SE_AGGR_SE:\r
1728                 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);\r
1729                 return;\r
1730         case SE_FUNC:\r
1731                 operands = se->get_operands();\r
1732                 for(o=0;o<operands.size();o++){\r
1733                         gather_fcn_states_se(operands[o], states_refd, Ext_fcns);\r
1734                 }\r
1735                 if(se->get_storage_state() != ""){\r
1736                         states_refd.insert(se->get_storage_state());\r
1737                 }\r
1738                 return;\r
1739 \r
1740         default:\r
1741                 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_se, line %d, character %d: unknown operator type %d\n",\r
1742                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
1743                 exit(1);\r
1744         }\r
1745         return;\r
1746 }\r
1747 \r
1748 \r
1749 //                      Walk SE tree and gather STATES ref'd by STATEFUL fcns.\r
1750 \r
1751 void gather_fcn_states_pr(predicate_t *pr, set<string> &states_refd, ext_fcn_list *Ext_fcns){\r
1752         vector<scalarexp_t *> op_list;\r
1753         int o;\r
1754 \r
1755         switch(pr->get_operator_type()){\r
1756         case PRED_IN:\r
1757                 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;\r
1758                 return;\r
1759         case PRED_COMPARE:\r
1760                 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;\r
1761                 gather_fcn_states_se(pr->get_right_se(),states_refd, Ext_fcns) ;\r
1762                 return;\r
1763         case PRED_UNARY_OP:\r
1764                 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns);\r
1765                 return;\r
1766         case PRED_BINARY_OP:\r
1767                 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns) ;\r
1768                 gather_fcn_states_pr(pr->get_right_pr(),states_refd, Ext_fcns) ;\r
1769                 return;\r
1770         case PRED_FUNC:\r
1771                 op_list = pr->get_op_list();\r
1772                 for(o=0;o<op_list.size();++o){\r
1773                         gather_fcn_states_se(op_list[o],states_refd, Ext_fcns);\r
1774                 }\r
1775                 return;\r
1776 \r
1777         default:\r
1778                 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_pr, line %d, character %d, unknown predicate operator type %d\n",\r
1779                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
1780                 exit(1);\r
1781         }\r
1782 \r
1783         return;\r
1784 }\r
1785 \r
1786 \r
1787 \r
1788 \r
1789 //                      walk se tree and collect aggregates into aggregate table.\r
1790 //                      duplicate aggregates receive the same idx to the table.\r
1791 \r
1792 void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *aggregate_table, ext_fcn_list *Ext_fcns){\r
1793         int agg_id;\r
1794         int o;\r
1795         vector<scalarexp_t *> operands;\r
1796 \r
1797         switch(se->get_operator_type()){\r
1798         case SE_LITERAL:\r
1799         case SE_PARAM:\r
1800         case SE_IFACE_PARAM:\r
1801                 return;\r
1802         case SE_UNARY_OP:\r
1803                 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns) ;\r
1804                 return;\r
1805         case SE_BINARY_OP:\r
1806                 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns);\r
1807                 build_aggr_tbl_fm_se(se->get_right_se(), aggregate_table,Ext_fcns);\r
1808                 return;\r
1809         case SE_COLREF:\r
1810                 return;\r
1811         case SE_AGGR_STAR:\r
1812                 agg_id = aggregate_table->add_aggr(se->get_op(),NULL,se->is_superaggr());\r
1813                 se->set_aggr_id(agg_id);\r
1814                 return;\r
1815         case SE_AGGR_SE:\r
1816                 agg_id = aggregate_table->add_aggr(se->get_op(),se->get_left_se(),se->is_superaggr());\r
1817                 se->set_aggr_id(agg_id);\r
1818                 return;\r
1819         case SE_FUNC:\r
1820                 operands = se->get_operands();\r
1821                 for(o=0;o<operands.size();o++){\r
1822                         build_aggr_tbl_fm_se(operands[o], aggregate_table, Ext_fcns);\r
1823                 }\r
1824                 if(se->get_aggr_ref() >= 0){ // it's been tagged as a UDAF\r
1825                         agg_id = aggregate_table->add_aggr(se->get_op(), se->get_fcn_id(), operands, Ext_fcns->get_storage_dt(se->get_fcn_id()), se->is_superaggr(), Ext_fcns->is_running_aggr(se->get_fcn_id()),Ext_fcns->has_lfta_bailout(se->get_fcn_id()));\r
1826                         se->set_aggr_id(agg_id);\r
1827                 }\r
1828                 return;\r
1829 \r
1830         default:\r
1831                 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_se, line %d, character %d: unknown operator type %d\n",\r
1832                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
1833                 exit(1);\r
1834         }\r
1835         return;\r
1836 }\r
1837 \r
1838 \r
1839 //                      walk se tree and collect aggregates into aggregate table.\r
1840 //                      duplicate aggregates receive the same idx to the table.\r
1841 \r
1842 void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *aggregate_table,ext_fcn_list *Ext_fcns){\r
1843         vector<scalarexp_t *> op_list;\r
1844         int o;\r
1845 \r
1846         switch(pr->get_operator_type()){\r
1847         case PRED_IN:\r
1848                 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;\r
1849                 return;\r
1850         case PRED_COMPARE:\r
1851                 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;\r
1852                 build_aggr_tbl_fm_se(pr->get_right_se(),aggregate_table, Ext_fcns) ;\r
1853                 return;\r
1854         case PRED_UNARY_OP:\r
1855                 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns);\r
1856                 return;\r
1857         case PRED_BINARY_OP:\r
1858                 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns) ;\r
1859                 build_aggr_tbl_fm_pred(pr->get_right_pr(),aggregate_table, Ext_fcns) ;\r
1860                 return;\r
1861         case PRED_FUNC:\r
1862                 op_list = pr->get_op_list();\r
1863                 for(o=0;o<op_list.size();++o){\r
1864                         build_aggr_tbl_fm_se(op_list[o],aggregate_table, Ext_fcns);\r
1865                 }\r
1866                 return;\r
1867 \r
1868         default:\r
1869                 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_pred, line %d, character %d, unknown predicate operator type %d\n",\r
1870                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
1871                 exit(1);\r
1872         }\r
1873 \r
1874         return;\r
1875 }\r
1876 \r
1877 \r
1878 //                      Return true if the two scalar expressions\r
1879 //                      represent the same value (e.g., use to eliminate\r
1880 //                      duplicate aggregates).\r
1881 bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2){\r
1882         vector<scalarexp_t *> operands1;\r
1883         vector<scalarexp_t *> operands2;\r
1884         int o;\r
1885 \r
1886 //              First handle the case of nulls (e.g. COUNT aggrs)\r
1887         if(se1 == NULL && se2 == NULL) return(true);\r
1888         if(se1 == NULL || se2 == NULL) return(false);\r
1889 \r
1890 //              In all cases, must be the same oeprator type and same operator.\r
1891         if(se1->get_operator_type() != se2->get_operator_type())\r
1892                 return(false);\r
1893         if(se1->get_op() != se2->get_op() )\r
1894                 return(false);\r
1895 \r
1896         switch(se1->get_operator_type()){\r
1897         case SE_LITERAL:\r
1898                 return(se1->get_literal()->is_equivalent(se2->get_literal()) );\r
1899         case SE_PARAM:\r
1900                 return(se1->get_param_name() == se2->get_param_name() );\r
1901         case SE_IFACE_PARAM:\r
1902                 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );\r
1903         case SE_UNARY_OP:\r
1904                 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );\r
1905         case SE_BINARY_OP:\r
1906                 if(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) )\r
1907                         return(is_equivalent_se(se1->get_right_se(), se2->get_right_se()) );\r
1908                 return(false);\r
1909         case SE_COLREF:\r
1910                 if(se1->is_gb() && se2->is_gb())\r
1911                         return( se1->get_gb_ref() == se2->get_gb_ref() );\r
1912                 if(se1->is_gb() || se2->is_gb())\r
1913                         return(false);\r
1914                 return(se1->get_colref()->is_equivalent(se2->get_colref()) );\r
1915         case SE_AGGR_STAR:\r
1916                 return(true);\r
1917         case SE_AGGR_SE:\r
1918                 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );\r
1919         case SE_FUNC:\r
1920                 if(se1->get_op() != se2->get_op()) return(false);\r
1921 \r
1922                 operands1 = se1->get_operands();\r
1923                 operands2 = se2->get_operands();\r
1924                 if(operands1.size() != operands2.size()) return(false);\r
1925 \r
1926                 for(o=0;o<operands1.size();o++){\r
1927                         if(! is_equivalent_se(operands1[o], operands2[o]) )\r
1928                                 return(false);\r
1929                 }\r
1930                 return(true);\r
1931         default:\r
1932                 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",\r
1933                                 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());\r
1934                 exit(1);\r
1935         }\r
1936         return(false);\r
1937 }\r
1938 \r
1939 \r
1940 //              Similar to is_equivalent_se, but with a looser definition\r
1941 //              of equivalence of colrefs.  Here, say they are equivalent\r
1942 //              if their base table is the same.  Use to find equivalent\r
1943 //              predicates on base tables.\r
1944 bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema){\r
1945         vector<scalarexp_t *> operands1;\r
1946         vector<scalarexp_t *> operands2;\r
1947         int o;\r
1948 \r
1949         if(se1->get_operator_type() == SE_COLREF && se1->is_gb()){\r
1950                 se1 = se1->get_right_se();\r
1951         }\r
1952         if(se2->get_operator_type() == SE_COLREF && se2->is_gb()){\r
1953                 se2 = se2->get_right_se();\r
1954         }\r
1955 \r
1956 //              First handle the case of nulls (e.g. COUNT aggrs)\r
1957         if(se1 == NULL && se2 == NULL) return(true);\r
1958         if(se1 == NULL || se2 == NULL) return(false);\r
1959 \r
1960 //              In all cases, must be the same oeprator type and same operator.\r
1961         if(se1->get_operator_type() != se2->get_operator_type())\r
1962                 return(false);\r
1963         if(se1->get_op() != se2->get_op() )\r
1964                 return(false);\r
1965 \r
1966         switch(se1->get_operator_type()){\r
1967         case SE_LITERAL:\r
1968                 return(se1->get_literal()->is_equivalent(se2->get_literal()) );\r
1969         case SE_PARAM:\r
1970                 return(se1->get_param_name() == se2->get_param_name() );\r
1971         case SE_IFACE_PARAM:\r
1972                 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );\r
1973         case SE_UNARY_OP:\r
1974                 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );\r
1975         case SE_BINARY_OP:\r
1976                 if(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) )\r
1977                         return(is_equivalent_se_base(se1->get_right_se(), se2->get_right_se(), Schema) );\r
1978                 return(false);\r
1979         case SE_COLREF:\r
1980 /*\r
1981                 if(se1->is_gb() && se2->is_gb())\r
1982                         return( se1->get_gb_ref() == se2->get_gb_ref() );\r
1983                 if(se1->is_gb() || se2->is_gb())\r
1984                         return(false);\r
1985 */\r
1986                 return(se1->get_colref()->is_equivalent_base(se2->get_colref(), Schema) );\r
1987         case SE_AGGR_STAR:\r
1988                 return(true);\r
1989         case SE_AGGR_SE:\r
1990                 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );\r
1991         case SE_FUNC:\r
1992                 if(se1->get_op() != se2->get_op()) return(false);\r
1993 \r
1994                 operands1 = se1->get_operands();\r
1995                 operands2 = se2->get_operands();\r
1996                 if(operands1.size() != operands2.size()) return(false);\r
1997 \r
1998                 for(o=0;o<operands1.size();o++){\r
1999                         if(! is_equivalent_se_base(operands1[o], operands2[o], Schema) )\r
2000                                 return(false);\r
2001                 }\r
2002                 return(true);\r
2003         default:\r
2004                 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",\r
2005                                 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());\r
2006                 exit(1);\r
2007         }\r
2008         return(false);\r
2009 }\r
2010 \r
2011 \r
2012 //              Find predicates which are equivalent when\r
2013 //              looking at the base tables.  Use to find\r
2014 //              common prefilter.\r
2015 bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema){\r
2016 int i, o;\r
2017 \r
2018 //              First handle the case of nulls\r
2019         if(p1 == NULL && p2 == NULL) return(true);\r
2020         if(p1 == NULL || p2 == NULL) return(false);\r
2021 \r
2022 \r
2023   if(p1->get_operator_type() != p2->get_operator_type())\r
2024          return(false);\r
2025   if(p1->get_op() != p2->get_op())\r
2026          return(false);\r
2027 \r
2028     vector<literal_t *> ll1;\r
2029     vector<literal_t *> ll2;\r
2030         vector<scalarexp_t *> op_list1, op_list2;\r
2031 \r
2032 \r
2033   switch(p2->get_operator_type()){\r
2034      case PRED_COMPARE:\r
2035         if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )\r
2036             return(false);\r
2037         return( is_equivalent_se_base(p1->get_right_se(),p2->get_right_se(), Schema) );\r
2038     break;\r
2039     case PRED_IN:\r
2040         if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )\r
2041             return(false);\r
2042         ll1 = p1->get_lit_vec();\r
2043         ll2 = p2->get_lit_vec();\r
2044         if(ll1.size() != ll2.size())\r
2045             return(false);\r
2046         for(i=0;i<ll1.size();i++){\r
2047           if(! ll1[i]->is_equivalent( ll2[i] ) )\r
2048             return(false);\r
2049         }\r
2050         return(true);\r
2051     break;\r
2052      case PRED_UNARY_OP:\r
2053         return(is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema) );\r
2054     break;\r
2055      case PRED_BINARY_OP:\r
2056         if(! is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema))\r
2057             return(false);\r
2058         return(is_equivalent_pred_base(p1->get_right_pr(), p2->get_right_pr(), Schema) );\r
2059     break;\r
2060          case PRED_FUNC:\r
2061                 op_list1 = p1->get_op_list();\r
2062                 op_list2 = p2->get_op_list();\r
2063                 if(op_list1.size() != op_list2.size()) return(false);\r
2064                 for(o=0;o<op_list1.size();++o){\r
2065                         if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) ) return(false);\r
2066                 }\r
2067                 return(true);\r
2068 \r
2069    }\r
2070 \r
2071     return(false);\r
2072 }\r
2073 \r
2074 \r
2075 \r
2076 bool is_equivalent_class_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema,ext_fcn_list *Ext_fcns){\r
2077   if((p1->get_operator_type()!=PRED_FUNC)||(p2->get_operator_type()!=PRED_FUNC))\r
2078          return(false);\r
2079   if(p1->get_fcn_id() != p2->get_fcn_id())\r
2080                 return false;\r
2081   vector<bool> cl_op = Ext_fcns->get_class_indicators(p1->get_fcn_id());\r
2082   int o;\r
2083   vector<scalarexp_t *> op_list1 = p1->get_op_list();\r
2084   vector<scalarexp_t *> op_list2 = p2->get_op_list();\r
2085   if(op_list1.size() != op_list2.size()) return(false);\r
2086   for(o=0;o<op_list1.size();++o){\r
2087           if(cl_op[o]){\r
2088                 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) )\r
2089                         return(false);\r
2090         }\r
2091   }\r
2092   return true;\r
2093 \r
2094 }\r
2095 \r
2096 \r
2097 \r
2098 \r
2099 //                      Verify that the scalar expression (in a such that clause)\r
2100 //                      is acceptable in an aggregation query.  No column\r
2101 //                      references allowed outside aggergates, except for\r
2102 //                      references to group-by attributes.\r
2103 //                      return true if OK, false if bad.\r
2104 bool verify_aggr_query_se(scalarexp_t *se){\r
2105         vector <scalarexp_t *> operands;\r
2106         int o;\r
2107 \r
2108     switch(se->get_operator_type()){\r
2109     case SE_LITERAL:\r
2110     case SE_PARAM:\r
2111     case SE_IFACE_PARAM:\r
2112         return(true );\r
2113     case SE_UNARY_OP:\r
2114         return(verify_aggr_query_se(se->get_left_se() ) );\r
2115     case SE_BINARY_OP:\r
2116         return(verify_aggr_query_se(se->get_left_se() ) &&\r
2117             verify_aggr_query_se(se->get_right_se() ) );\r
2118     case SE_COLREF:\r
2119         if(se->is_gb() ) return(true);\r
2120         fprintf(stderr,"ERROR: the select clause in an aggregate query can "\r
2121                         "only reference constants, group-by attributes, and "\r
2122                         "aggregates,  (%s) line %d, character %d.\n",\r
2123                         se->get_colref()->to_string().c_str(),\r
2124                                                 se->get_lineno(), se->get_charno() );\r
2125         return(false);\r
2126     case SE_AGGR_STAR:\r
2127     case SE_AGGR_SE:\r
2128 //                      colrefs and gbrefs allowed.\r
2129 //                      check for nested aggregation elsewhere, so just return TRUE\r
2130         return(true);\r
2131         case SE_FUNC:\r
2132 //                      If its a UDAF, just return true\r
2133                 if(se->get_aggr_ref() >= 0) return true;\r
2134 \r
2135                 operands = se->get_operands();\r
2136 \r
2137                 for(o=0;o<operands.size();o++){\r
2138                         if(! verify_aggr_query_se(operands[o]) )\r
2139                                 return(false);\r
2140                 }\r
2141                 return(true);\r
2142     default:\r
2143         fprintf(stderr,"INTERNAL ERROR in verify_aggr_query_se, line %d, character %d: unknown operator type %d\n",\r
2144                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
2145         exit(1);\r
2146     }\r
2147     return(false);\r
2148 }\r
2149 \r
2150 \r
2151 \r
2152 \r
2153 //                      Find complex literals.\r
2154 //                      NOTE : This analysis should be deferred to\r
2155 //                                 code generation time.\r
2156 //                      This analysis drills into aggr se specs.\r
2157 //                      Shouldn't this be done at the aggregate table?\r
2158 //                      But, its not a major loss of efficiency.\r
2159 //                              UPDATE : drilling into aggr se's is causnig a problem\r
2160 //                                      so I've eliminated it.\r
2161 \r
2162 bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,\r
2163                                                                 cplx_lit_table *complex_literals){\r
2164         literal_t *l;\r
2165         vector<scalarexp_t *> operands;\r
2166         int o;\r
2167         scalarexp_t *param_se;\r
2168         data_type *dt;\r
2169 \r
2170         switch(se->get_operator_type()){\r
2171         case SE_LITERAL:\r
2172                 l = se->get_literal();\r
2173                 if(l->constructor_name() != ""){\r
2174                         int cl_idx = complex_literals->add_cpx_lit(l, false);\r
2175                         l->set_cpx_lit_ref(cl_idx);\r
2176                 }\r
2177                 return(true);\r
2178         case SE_PARAM:\r
2179                 return(true );\r
2180 //                      SE_IFACE_PARAM should not exist when this is called.\r
2181         case SE_UNARY_OP:\r
2182                 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );\r
2183         case SE_BINARY_OP:\r
2184                 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) &&\r
2185                         find_complex_literal_se(se->get_right_se(), Ext_fcns, complex_literals ) );\r
2186         case SE_COLREF:\r
2187                 return(true);\r
2188         case SE_AGGR_STAR:\r
2189                 return(true);\r
2190         case SE_AGGR_SE:\r
2191                 return true;\r
2192 //              return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );\r
2193         case SE_FUNC:\r
2194                 if(se->get_aggr_ref() >= 0) return true;\r
2195 \r
2196                 operands = se->get_operands();\r
2197                 for(o=0;o<operands.size();o++){\r
2198                         find_complex_literal_se(operands[o], Ext_fcns, complex_literals);\r
2199                 }\r
2200                 return(true);\r
2201         default:\r
2202                 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_se, line %d, character %d: unknown operator type %d\n",\r
2203                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
2204                 exit(1);\r
2205         }\r
2206         return(false);\r
2207 }\r
2208 \r
2209 \r
2210 \r
2211 \r
2212 void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,\r
2213                                                                 cplx_lit_table *complex_literals){\r
2214         int i,o;\r
2215         vector<literal_t *> litl;\r
2216         vector<scalarexp_t *> op_list;\r
2217 \r
2218 \r
2219         switch(pr->get_operator_type()){\r
2220         case PRED_IN:\r
2221                 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;\r
2222                 litl = pr->get_lit_vec();\r
2223                 for(i=0;i<litl.size();i++){\r
2224                         if(litl[i]->constructor_name() != ""){\r
2225                                 int cl_idx = complex_literals->add_cpx_lit(litl[i],false);\r
2226                                 litl[i]->set_cpx_lit_ref(cl_idx);\r
2227                         }\r
2228                 }\r
2229                 return;\r
2230         case PRED_COMPARE:\r
2231                 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;\r
2232                 find_complex_literal_se(pr->get_right_se(), Ext_fcns, complex_literals) ;\r
2233                 return;\r
2234         case PRED_UNARY_OP:\r
2235                 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals);\r
2236                 return;\r
2237         case PRED_BINARY_OP:\r
2238                 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals) ;\r
2239                 find_complex_literal_pr(pr->get_right_pr(), Ext_fcns, complex_literals) ;\r
2240                 return;\r
2241         case PRED_FUNC:\r
2242                 op_list = pr->get_op_list();\r
2243                 for(o=0;o<op_list.size();++o){\r
2244                         find_complex_literal_se(op_list[o],Ext_fcns, complex_literals);\r
2245                 }\r
2246                 return;\r
2247         default:\r
2248                 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_pr, line %d, character %d, unknown predicate operator type %d\n",\r
2249                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
2250                 exit(1);\r
2251         }\r
2252 \r
2253         return;\r
2254 }\r
2255 \r
2256 \r
2257 //              Find all things which are passed as handle parameters to functions\r
2258 //              (query parameters, (simple) literals, complex literals)\r
2259 //              These expressions MUST be processed with find_complex_literal_??\r
2260 //              first.\r
2261 //                      TODO: this analysis drills into the aggregate SEs.\r
2262 //                      Shouldn't this be done on the aggr table SEs instead?\r
2263 //                      to avoid duplication.  THe handle registration\r
2264 //                      might be expensive ...\r
2265 //                      REVISED : drilling into aggr se's is causing problems, eliminated.\r
2266 \r
2267 void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,\r
2268                                                 vector<handle_param_tbl_entry *> &handle_tbl){\r
2269         vector<scalarexp_t *> operands;\r
2270         vector<bool> handle_ind;\r
2271         int o;\r
2272         scalarexp_t *param_se;\r
2273         data_type *dt;\r
2274         literal_t *l;\r
2275 \r
2276         switch(se->get_operator_type()){\r
2277         case SE_LITERAL:\r
2278                 return;\r
2279         case SE_PARAM:\r
2280                 return;\r
2281 //              case SE_IFACE_PARAM:            SHOULD NOT EXIST when this is called\r
2282         case SE_UNARY_OP:\r
2283                 find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;\r
2284                 return;\r
2285         case SE_BINARY_OP:\r
2286                 find_param_handles_se(se->get_left_se(), Ext_fcns , handle_tbl) ;\r
2287                 find_param_handles_se(se->get_right_se(), Ext_fcns, handle_tbl ) ;\r
2288                 return;\r
2289         case SE_COLREF:\r
2290                 return;\r
2291         case SE_AGGR_STAR:\r
2292                 return;\r
2293         case SE_AGGR_SE:\r
2294 //              find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;\r
2295                 return;\r
2296         case SE_FUNC:\r
2297                 if(se->get_aggr_ref() >= 0) return ;\r
2298 \r
2299                 operands = se->get_operands();\r
2300                 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );\r
2301                 for(o=0;o<operands.size();o++){\r
2302                         if(handle_ind[o]){\r
2303                                 handle_param_tbl_entry *he;\r
2304                                 param_se = operands[o];\r
2305                                 if(param_se->get_operator_type() != SE_LITERAL &&\r
2306                                                 param_se->get_operator_type() != SE_PARAM){\r
2307                                         fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",\r
2308                                 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());\r
2309                                         exit(1);\r
2310                                 }\r
2311 \r
2312                                 if(param_se->get_operator_type() == SE_PARAM){\r
2313                                         he = new handle_param_tbl_entry(\r
2314                                                 se->get_op(), o, param_se->get_param_name(),\r
2315                                                 param_se->get_data_type()->get_type_str());\r
2316                                 }else{\r
2317                                         l = param_se->get_literal();\r
2318                                         if(l->is_cpx_lit()){\r
2319                                                 he = new handle_param_tbl_entry(\r
2320                                                         se->get_op(), o, l->get_cpx_lit_ref(),\r
2321                                                 param_se->get_data_type()->get_type_str());\r
2322                                         }else{\r
2323                                                 he = new handle_param_tbl_entry(\r
2324                                                         se->get_op(), o, l,\r
2325                                                 param_se->get_data_type()->get_type_str());\r
2326                                         }\r
2327                                 }\r
2328                                 param_se->set_handle_ref(handle_tbl.size());\r
2329                                 handle_tbl.push_back(he);\r
2330                         }else{\r
2331                                 find_param_handles_se(operands[o], Ext_fcns, handle_tbl ) ;\r
2332                         }\r
2333                 }\r
2334                 return;\r
2335         default:\r
2336                 fprintf(stderr,"INTERNAL ERROR in find_param_handles, line %d, character %d: unknown operator type %d\n",\r
2337                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
2338                 exit(1);\r
2339         }\r
2340         return;\r
2341 }\r
2342 \r
2343 \r
2344 void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,\r
2345                                                 vector<handle_param_tbl_entry *> &handle_tbl){\r
2346         vector<literal_t *> litl;\r
2347         vector<scalarexp_t *> op_list;\r
2348         scalarexp_t *param_se;\r
2349         vector<bool> handle_ind;\r
2350         int o;\r
2351         literal_t *l;\r
2352 \r
2353         switch(pr->get_operator_type()){\r
2354         case PRED_IN:\r
2355                 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;\r
2356                 return;\r
2357         case PRED_COMPARE:\r
2358                 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;\r
2359                 find_param_handles_se(pr->get_right_se(), Ext_fcns, handle_tbl) ;\r
2360                 return;\r
2361         case PRED_UNARY_OP:\r
2362                 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl);\r
2363                 return;\r
2364         case PRED_BINARY_OP:\r
2365                 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl) ;\r
2366                 find_param_handles_pr(pr->get_right_pr(), Ext_fcns, handle_tbl) ;\r
2367                 return;\r
2368         case PRED_FUNC:\r
2369                 op_list = pr->get_op_list();\r
2370                 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );\r
2371                 for(o=0;o<op_list.size();++o){\r
2372                         if(handle_ind[o]){\r
2373                                 handle_param_tbl_entry *he;\r
2374                                 param_se = op_list[o];\r
2375                                 if(param_se->get_operator_type() != SE_LITERAL &&\r
2376                                                 param_se->get_operator_type() != SE_PARAM){\r
2377                                         fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n  Line=%d, char=%d.\n",\r
2378                                 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());\r
2379                                         exit(1);\r
2380                                 }\r
2381 \r
2382                                 if(param_se->get_operator_type() == SE_PARAM){\r
2383                                         he = new handle_param_tbl_entry(\r
2384                                                 pr->get_op(), o, param_se->get_param_name(),\r
2385                                                 param_se->get_data_type()->get_type_str());\r
2386                                 }else{\r
2387                                         l = param_se->get_literal();\r
2388                                         if(l->is_cpx_lit()){\r
2389                                                 he = new handle_param_tbl_entry(\r
2390                                                         pr->get_op(), o, l->get_cpx_lit_ref(),\r
2391                                                 param_se->get_data_type()->get_type_str());\r
2392                                         }else{\r
2393                                                 he = new handle_param_tbl_entry(\r
2394                                                         pr->get_op(), o, l,\r
2395                                                 param_se->get_data_type()->get_type_str());\r
2396                                         }\r
2397                                 }\r
2398                                 param_se->set_handle_ref(handle_tbl.size());\r
2399                                 handle_tbl.push_back(he);\r
2400                         }else{\r
2401                                 find_param_handles_se(op_list[o], Ext_fcns, handle_tbl ) ;\r
2402                         }\r
2403                 }\r
2404                 return;\r
2405         default:\r
2406                 fprintf(stderr,"INTERNAL ERROR in find_param_handles_pr, line %d, character %d, unknown predicate operator type %d\n",\r
2407                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
2408                 exit(1);\r
2409         }\r
2410 \r
2411         return;\r
2412 }\r
2413 \r
2414 \r
2415 //                      Verify the HAVING predicate : it\r
2416 //                      can access gb vars, aggregates, and constants,\r
2417 //                      but not colrefs.\r
2418 //                      return 1 if OK, -1 if bad.\r
2419 //                      Perhaps replace by a pair of fcns which counts non-gb colrefs?\r
2420 \r
2421 //                      Extended to deal with cleaning_by, cleaning_when :\r
2422 //                      verify that any aggregate function\r
2423 //                      has the multiple output property.\r
2424 \r
2425 int verify_having_se(scalarexp_t *se, const char *clause, ext_fcn_list *Ext_fcns){\r
2426         int l_ret, r_ret;\r
2427         vector<scalarexp_t *> operands;\r
2428         vector<data_type *> odt;\r
2429         int o;\r
2430 \r
2431         switch(se->get_operator_type()){\r
2432         case SE_LITERAL:\r
2433                 return(1);\r
2434         case SE_PARAM:\r
2435         case SE_IFACE_PARAM:\r
2436                 return(1);\r
2437         case SE_UNARY_OP:\r
2438                 return(verify_having_se(se->get_left_se(), clause, Ext_fcns) );\r
2439         case SE_BINARY_OP:\r
2440                 l_ret = verify_having_se(se->get_left_se(), clause, Ext_fcns);\r
2441                 r_ret = verify_having_se(se->get_right_se(), clause, Ext_fcns);\r
2442                 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);\r
2443                 return(1);\r
2444         case SE_COLREF:\r
2445                 if(se->is_gb()) return 1;\r
2446                 fprintf(stderr,"ERROR, %s clause references a non-group by attribute line =%d, char = %d, colref=%s\n", clause,\r
2447                         se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );\r
2448                 return(-1);\r
2449         case SE_AGGR_STAR:\r
2450         case SE_AGGR_SE:\r
2451 //                      colrefs and gbrefs allowed.\r
2452 //                      check for nested aggregation elsewhere, so just return TRUE\r
2453                 if(!se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){\r
2454                         fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,\r
2455                                 se->get_lineno(),se->get_charno(), se->get_op().c_str() );\r
2456                         return(-1);\r
2457                 }\r
2458 \r
2459 //                              Ensure that aggregate refs allow multiple outputs\r
2460 //                              in CLEANING_WHEN, CLEANING_BY\r
2461                 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){\r
2462                         if(! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){\r
2463                                 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,\r
2464                                   se->get_op().c_str(),se->get_lineno(),se->get_charno() );\r
2465                                 return(-1);\r
2466                         }\r
2467                 }\r
2468 \r
2469 \r
2470                 return(1);\r
2471         case SE_FUNC:\r
2472                 if(se->get_aggr_ref() >= 0 && !se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){\r
2473                         fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,\r
2474                         se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_op().c_str() );\r
2475                 return(-1);\r
2476                 }\r
2477 \r
2478                 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){\r
2479                         if(se->get_aggr_ref() >= 0  && ! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){\r
2480                                 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,\r
2481                                   se->get_op().c_str(),se->get_lineno(),se->get_charno() );\r
2482                                 return(-1);\r
2483                         }\r
2484                 }\r
2485 \r
2486                 if(se->get_aggr_ref() >= 0)     // don't descent into aggregates.\r
2487                         return 1;\r
2488 \r
2489                 operands = se->get_operands();\r
2490                 r_ret = 1;\r
2491                 for(o=0;o<operands.size();o++){\r
2492                         l_ret = verify_having_se(operands[o], clause, Ext_fcns);\r
2493                         if(l_ret < 0) r_ret = -1;\r
2494                 }\r
2495                 if(r_ret < 0) return(-1); else return(1);\r
2496                 return(1);\r
2497         default:\r
2498                 fprintf(stderr,"INTERNAL ERROR in verify_having_se, line %d, character %d: unknown operator type %d\n",\r
2499                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
2500                 return(-1);\r
2501         }\r
2502         return(-1);\r
2503 }\r
2504 \r
2505 \r
2506 //                      Verify the HAVING predicate : it\r
2507 //                      can access gb vars, aggregates, and constants,\r
2508 //                      but not colrefs.\r
2509 //                      return 1 if OK, -1 if bad.\r
2510 //                      Perhaps replace by a pair of fcns which counts non-gb colrefs?\r
2511 \r
2512 \r
2513 int verify_having_pred(predicate_t *pr, const char *clause, ext_fcn_list *Ext_fcns){\r
2514         int l_ret, r_ret;\r
2515         vector<literal_t *> litl;\r
2516         vector<scalarexp_t *> op_list;\r
2517         int o;\r
2518 \r
2519         switch(pr->get_operator_type()){\r
2520         case PRED_IN:\r
2521                 return(verify_having_se(pr->get_left_se(), clause, Ext_fcns));\r
2522         case PRED_COMPARE:\r
2523                 l_ret = verify_having_se(pr->get_left_se(), clause, Ext_fcns) ;\r
2524                 r_ret = verify_having_se(pr->get_right_se(), clause, Ext_fcns) ;\r
2525                 if( (l_ret < 0) || (r_ret < 0) ) return(-1); else return(1);\r
2526         case PRED_UNARY_OP:\r
2527                 return(verify_having_pred(pr->get_left_pr(), clause, Ext_fcns));\r
2528         case PRED_BINARY_OP:\r
2529                 l_ret = verify_having_pred(pr->get_left_pr(), clause, Ext_fcns);\r
2530                 r_ret = verify_having_pred(pr->get_right_pr(), clause, Ext_fcns);\r
2531                 if( (l_ret < 0) || (r_ret < 0) ) return(-1);\r
2532                 return(1);\r
2533         case PRED_FUNC:\r
2534                 op_list = pr->get_op_list();\r
2535                 l_ret = 1;\r
2536                 for(o=0;o<op_list.size();++o){\r
2537                         if( verify_having_se(op_list[o], clause, Ext_fcns) < 0) l_ret = -1;\r
2538                 }\r
2539                 return(l_ret);\r
2540 \r
2541         default:\r
2542                 fprintf(stderr,"INTERNAL ERROR in verify_having_pred, line %d, character %d, unknown predicate operator type %d\n",\r
2543                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
2544         }\r
2545 \r
2546         return(-1);\r
2547 }\r
2548 \r
2549 \r
2550 //////////////////////////////////////////////////////////////////////////\r
2551 //////////////////////////////////////////////////////////////////////////\r
2552 ///////                 cnf and pred analysis and manipulation\r
2553 \r
2554 // ----------------------------------------------------------------------\r
2555 //  Convert the predicates to a list of conjuncts\r
2556 //  (not actually cnf).  Do some analysis\r
2557 //  on their properties.\r
2558 // ----------------------------------------------------------------------\r
2559 \r
2560 \r
2561 //  Put into list clist the predicates that\r
2562 //  are AND'ed together.\r
2563 \r
2564 void make_cnf_from_pr(predicate_t *pr, vector<cnf_elem *> &clist){\r
2565 \r
2566   if(pr == NULL) return;\r
2567 \r
2568   switch(pr->get_operator_type()){\r
2569      case PRED_COMPARE:\r
2570         clist.push_back(new cnf_elem(pr));\r
2571         return;\r
2572         break;\r
2573      case PRED_IN:\r
2574         clist.push_back(new cnf_elem(pr));\r
2575         return;\r
2576         break;\r
2577      case PRED_UNARY_OP:\r
2578         clist.push_back(new cnf_elem(pr));\r
2579         return;\r
2580         break;\r
2581      case PRED_BINARY_OP:\r
2582         if(pr->get_op() == "OR"){\r
2583                         clist.push_back(new cnf_elem(pr));\r
2584                         return;\r
2585                 }\r
2586                 if(pr->get_op() =="AND"){\r
2587                    make_cnf_from_pr(pr->get_left_pr(),clist);\r
2588                    make_cnf_from_pr(pr->get_right_pr(),clist);\r
2589                    return;\r
2590                 }\r
2591         case PRED_FUNC:\r
2592         clist.push_back(new cnf_elem(pr));\r
2593         return;\r
2594         break;\r
2595         default:\r
2596                 fprintf(stderr,"INTERNAL ERROR in make_cnf_from_pr: I don't recognize predicate operator %s\n",pr->get_op().c_str());\r
2597                 exit(1);\r
2598                         break;\r
2599            }\r
2600 }\r
2601 \r
2602 \r
2603 \r
2604 //  Find out what things are referenced in a se,\r
2605 //  to use for analyzing a predicate.\r
2606 //  Currently, is it simple (no operators), does it\r
2607 //  reference a group-by column, does it reference an\r
2608 //  attribute of a table.\r
2609 //\r
2610 //      analyze_cnf_se and analyze_cnf_pr are called by analyze_cnf\r
2611 \r
2612 \r
2613 void analyze_cnf_se(scalarexp_t *se, int &s, int &g, int &a, int &agr){\r
2614  int p;\r
2615  vector<scalarexp_t *> operand_list;\r
2616 \r
2617         switch(se->get_operator_type()){\r
2618         case SE_LITERAL:\r
2619         case SE_PARAM:\r
2620         case SE_IFACE_PARAM:\r
2621                 return;\r
2622         case SE_COLREF:\r
2623                 if(se->is_gb() ) g=1;\r
2624                 else                    a=1;\r
2625                 return;\r
2626         case SE_UNARY_OP:\r
2627                 s=0;\r
2628                 analyze_cnf_se(se->get_left_se(),s,g,a,agr);\r
2629                 return;\r
2630         case SE_BINARY_OP:\r
2631                 s=0;\r
2632                 analyze_cnf_se(se->get_left_se(),s,g,a,agr);\r
2633                 analyze_cnf_se(se->get_right_se(),s,g,a,agr);\r
2634                 return;\r
2635         case SE_AGGR_STAR:\r
2636         case SE_AGGR_SE:\r
2637                 agr = 1;\r
2638                 return;\r
2639         case SE_FUNC:\r
2640                 if(se->get_aggr_ref() >= 0){\r
2641                         agr = 1;\r
2642                         return;\r
2643                 }\r
2644                 s = 0;\r
2645                 operand_list = se->get_operands();\r
2646                 for(p=0;p<operand_list.size();p++){\r
2647                         analyze_cnf_se(operand_list[p],s,g,a,agr);\r
2648                 }\r
2649         break;\r
2650         }\r
2651 \r
2652         return;\r
2653 }\r
2654 \r
2655 \r
2656 \r
2657 void analyze_cnf_pr(predicate_t *pr, int &g, int &a,  int &agr){\r
2658 int dum_simple, o;\r
2659 vector<scalarexp_t *> op_list;\r
2660 \r
2661 \r
2662         switch(pr->get_operator_type()){\r
2663         case PRED_COMPARE:\r
2664                 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);\r
2665                 analyze_cnf_se(pr->get_right_se(),dum_simple,g,a,agr);\r
2666                 return;\r
2667         case PRED_IN:\r
2668                 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);\r
2669                 return;\r
2670         case PRED_UNARY_OP:\r
2671                 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);\r
2672                 return;\r
2673         case PRED_BINARY_OP:\r
2674                 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);\r
2675                 analyze_cnf_pr(pr->get_right_pr(),g,a,agr);\r
2676                 return;\r
2677         case PRED_FUNC:\r
2678                 op_list = pr->get_op_list();\r
2679                 for(o=0;o<op_list.size();++o){\r
2680                         analyze_cnf_se(op_list[o],dum_simple,g,a,agr);\r
2681                 }\r
2682                 return;\r
2683         default:\r
2684                 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",\r
2685                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
2686                 exit(1);\r
2687         }\r
2688 }\r
2689 \r
2690 \r
2691 \r
2692 //  analyze a conjunct of a predicate.\r
2693 //  Is it atomic (e.g., a single predicate),\r
2694 //  and if so do a further analysis.\r
2695 \r
2696 void analyze_cnf(cnf_elem *c){\r
2697 \r
2698 //  analyze the predicate.\r
2699    analyze_cnf_pr(c->pr, c->pr_gb, c->pr_attr, c->pr_aggr);\r
2700 \r
2701    if((c->pr->get_operator_type()!= PRED_COMPARE) && (c->pr->get_operator_type()!= PRED_IN)){\r
2702                 return;\r
2703    }\r
2704 \r
2705 \r
2706 //  its an atomic predicate -- get more info\r
2707    c->is_atom = 1;\r
2708 \r
2709         if(c->pr->get_op() == "=")\r
2710                 c->eq_pred = 1;\r
2711         else\r
2712                 c->eq_pred = 0;\r
2713 \r
2714         if(c->pr->get_operator_type() == PRED_IN)\r
2715                 c->in_pred = 1;\r
2716         else\r
2717                 c->in_pred = 0;\r
2718 \r
2719         c->l_simple = 1; c->l_gb = c->l_attr = c->l_aggr = 0;\r
2720         analyze_cnf_se(c->pr->get_left_se(),c->l_simple,c->l_gb,c->l_attr, c->l_aggr);\r
2721 \r
2722         if(c->pr->get_operator_type() == PRED_COMPARE){\r
2723                 c->r_simple = 1; c->r_gb = c->r_attr = c->r_aggr = 0;\r
2724                 analyze_cnf_se(c->pr->get_left_se(),c->r_simple,c->r_gb,c->r_attr, c->r_aggr);\r
2725         }\r
2726 }\r
2727 \r
2728 void analyze_constraint_se(scalarexp_t *se,\r
2729                         int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op, ext_fcn_list *Ext_fcns, bool enter_gb){\r
2730  int l_agr, l_gb, l_par, l_func, l_op;\r
2731  int r_agr, r_gb, r_par, r_func, r_op;\r
2732  int p;\r
2733  vector<scalarexp_t *> operand_list;\r
2734 \r
2735         switch(se->get_operator_type()){\r
2736         case SE_LITERAL:\r
2737         case SE_IFACE_PARAM:\r
2738                 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;\r
2739                 return;\r
2740         case SE_PARAM:\r
2741                 n_agr=0; n_gb = 0; n_par = 1; n_func = 0; n_op = 0;\r
2742                 return;\r
2743         case SE_COLREF:\r
2744                 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;\r
2745                 if(se->is_gb() ){\r
2746                         if(enter_gb){\r
2747                                 analyze_constraint_se(se->get_right_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);\r
2748                         }else{\r
2749                                 n_gb=1;\r
2750                         }\r
2751                 }\r
2752                 return;\r
2753         case SE_UNARY_OP:\r
2754                 analyze_constraint_se(se->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);\r
2755                 n_op++;\r
2756                 return;\r
2757         case SE_BINARY_OP:\r
2758                 analyze_constraint_se(se->get_left_se(),l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);\r
2759                 analyze_constraint_se(se->get_right_se(),r_agr,r_gb,r_par, r_func,r_op,Ext_fcns,enter_gb);\r
2760                 n_agr=l_agr+r_agr;\r
2761                 n_gb=l_gb+r_gb;\r
2762                 n_par=l_par+r_par;\r
2763                 n_func=l_func+r_func;\r
2764                 n_op=l_op+r_op+1;\r
2765                 return;\r
2766         case SE_AGGR_STAR:\r
2767         case SE_AGGR_SE:\r
2768                 n_agr=1; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;\r
2769                 return;\r
2770         case SE_FUNC:\r
2771                 if(se->get_aggr_ref() >= 0){\r
2772                         n_agr=1; n_gb = 0; n_par = 0; n_op = 0;\r
2773                         if(Ext_fcns)\r
2774                                 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());\r
2775                         else\r
2776                                 n_func = 1;\r
2777                         return;\r
2778                 }\r
2779                 n_agr=0; n_gb = 0; n_par = 0;  n_op = 0;\r
2780                 if(Ext_fcns)\r
2781                         n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());\r
2782                 else\r
2783                         n_func = 1;\r
2784                 operand_list = se->get_operands();\r
2785                 for(p=0;p<operand_list.size();p++){\r
2786                         analyze_constraint_se(operand_list[p],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);\r
2787                         n_agr+=l_agr;\r
2788                         n_gb+=l_gb;\r
2789                         n_par+=l_par;\r
2790                         n_func+=l_func;\r
2791                         n_op += l_op;\r
2792                 }\r
2793         break;\r
2794         }\r
2795 \r
2796         return;\r
2797 }\r
2798 \r
2799 //              Estimate the cost of a constraint.\r
2800 //              WARNING a lot of cost assumptions are embedded in the code.\r
2801 void analyze_constraint_pr(predicate_t *pr,\r
2802                 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op,\r
2803                 int &n_cmp_s, int &n_cmp_c, int &n_in, int &n_pred, int &n_bool, ext_fcn_list *Ext_fcns, bool enter_gb){\r
2804  int l_agr, l_gb, l_par, l_func, l_op, l_cmp_s, l_cmp_c, l_in, l_pred,l_bool;\r
2805  int r_agr, r_gb, r_par, r_func, r_op, r_cmp_s, r_cmp_c, r_in, r_pred,r_bool;\r
2806 \r
2807 int o;\r
2808 vector<scalarexp_t *> op_list;\r
2809 \r
2810 \r
2811         switch(pr->get_operator_type()){\r
2812         case PRED_COMPARE:\r
2813                 analyze_constraint_se(pr->get_left_se(),l_agr,l_gb,l_par,l_func, l_op,Ext_fcns,enter_gb);\r
2814                 analyze_constraint_se(pr->get_right_se(),r_agr,r_gb,r_par,r_func,r_op,Ext_fcns,enter_gb);\r
2815                 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;\r
2816                 n_func=l_func+r_func; n_op=l_op+r_op;\r
2817                 if(pr->get_left_se()->get_data_type()->complex_comparison(\r
2818                         pr->get_right_se()->get_data_type())\r
2819             ){\r
2820                         n_cmp_s = 0; n_cmp_c=1;\r
2821                 }else{\r
2822                         n_cmp_s = 1; n_cmp_c=0;\r
2823                 }\r
2824                 n_in = 0; n_pred = 0; n_bool = 0;\r
2825                 return;\r
2826         case PRED_IN:\r
2827 //                      Tread IN predicate as sequence of comparisons\r
2828                 analyze_constraint_se(pr->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);\r
2829                 if(pr->get_left_se()->get_data_type()->complex_comparison(\r
2830                         pr->get_right_se()->get_data_type())\r
2831             ){\r
2832                         n_cmp_s = 0; n_cmp_c=pr->get_lit_vec().size();\r
2833                 }else{\r
2834                         n_cmp_s = pr->get_lit_vec().size(); n_cmp_c=0;\r
2835                 }\r
2836                 n_in = 0; n_pred = 0; n_bool = 0;\r
2837                 return;\r
2838         case PRED_UNARY_OP:\r
2839                 analyze_constraint_pr(pr->get_left_pr(),n_agr,n_gb,n_par,n_func,n_op,n_cmp_s,n_cmp_c,n_in,n_pred,n_bool,Ext_fcns,enter_gb);\r
2840                 n_bool++;\r
2841                 return;\r
2842         case PRED_BINARY_OP:\r
2843                 analyze_constraint_pr(pr->get_left_pr(),l_agr,l_gb,l_par,l_func,l_op,l_cmp_s,l_cmp_c,l_in,l_pred,l_bool,Ext_fcns,enter_gb);\r
2844                 analyze_constraint_pr(pr->get_right_pr(),r_agr,r_gb,r_par,r_func,r_op,r_cmp_s,r_cmp_c,r_in,r_pred,r_bool,Ext_fcns,enter_gb);\r
2845                 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;\r
2846                 n_func=l_func+r_func; n_op=l_op+r_op;\r
2847                 n_cmp_s=l_cmp_s+r_cmp_s; n_cmp_c=l_cmp_c+r_cmp_c;\r
2848                 n_in=l_in+r_in; n_pred=l_pred+r_pred; n_bool=l_bool+r_bool+1;\r
2849                 return;\r
2850         case PRED_FUNC:\r
2851                 n_agr=n_gb=n_par=n_func=n_op=n_cmp_s=n_cmp_c=n_in=n_bool=0;\r
2852                 if(Ext_fcns)\r
2853                         n_pred = Ext_fcns->estimate_fcn_cost(pr->get_fcn_id());\r
2854                 else\r
2855                         n_pred = 1;\r
2856                 op_list = pr->get_op_list();\r
2857                 for(o=0;o<op_list.size();++o){\r
2858                         analyze_constraint_se(op_list[o],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);\r
2859                         n_agr+=l_agr; n_gb+=l_gb; n_par+=l_par; n_func+=l_func; n_op+=l_op;\r
2860                 }\r
2861                 return;\r
2862         default:\r
2863                 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",\r
2864                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
2865                 exit(1);\r
2866         }\r
2867 }\r
2868 \r
2869 void compute_cnf_cost(cnf_elem *c, ext_fcn_list *Ext_fcns){\r
2870  int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;\r
2871         analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,\r
2872                                                 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,false);\r
2873 \r
2874 //printf("nfunc=%d n_pred=%d, n_cmp_c=%d, n_op=%d, n_cmp_s=%d,n_bool=%d\n", n_func, n_pred, n_cmp_c, n_op, n_cmp_s, n_bool);\r
2875         c->cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;\r
2876 }\r
2877 \r
2878 bool prefilter_compatible(cnf_elem *c, ext_fcn_list *Ext_fcns){\r
2879  int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;\r
2880         analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,\r
2881                                                 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,true);\r
2882 //printf("prefilter_compatible, n_par=%d, n_gb=%d, n_agr=%d, n_func=%d, n_pred=%d, n_comp_c=%d, n_cmp_s=%d, n_bool=%d\n",n_gb,n_par,n_agr,n_func,n_pred,n_cmp_c,n_cmp_s,n_bool);\r
2883         if(n_par || n_agr)\r
2884                 return false;\r
2885         int cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;\r
2886 //printf("cost=%d\n",cost);\r
2887         return cost<10;\r
2888 }\r
2889 \r
2890 //              The prefilter needs to translate constraints on\r
2891 //              gbvars into constraints involving their underlying SEs.\r
2892 //              The following two routines attach GB def info.\r
2893 \r
2894 void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl){\r
2895  int p;\r
2896  vector<scalarexp_t *> operand_list;\r
2897 \r
2898         switch(se->get_operator_type()){\r
2899         case SE_LITERAL:\r
2900         case SE_IFACE_PARAM:\r
2901         case SE_PARAM:\r
2902         case SE_AGGR_STAR:\r
2903                 return;\r
2904         case SE_COLREF:\r
2905                 if(se->is_gb() ){\r
2906                          se->rhs.scalarp = gtbl->get_def(se->get_gb_ref());\r
2907                 }\r
2908                 return;\r
2909         case SE_UNARY_OP:\r
2910                 insert_gb_def_se(se->get_left_se(),gtbl);\r
2911                 return;\r
2912         case SE_BINARY_OP:\r
2913                 insert_gb_def_se(se->get_left_se(),gtbl);\r
2914                 insert_gb_def_se(se->get_right_se(),gtbl);\r
2915                 return;\r
2916         case SE_AGGR_SE:\r
2917                 insert_gb_def_se(se->get_left_se(),gtbl);\r
2918                 return;\r
2919         case SE_FUNC:\r
2920                 operand_list = se->get_operands();\r
2921                 for(p=0;p<operand_list.size();p++){\r
2922                         insert_gb_def_se(operand_list[p],gtbl);\r
2923                 }\r
2924         break;\r
2925         }\r
2926 \r
2927         return;\r
2928 }\r
2929 void insert_gb_def_pr(predicate_t *pr, gb_table *gtbl){\r
2930 vector<scalarexp_t *> op_list;\r
2931 int o;\r
2932 \r
2933         switch(pr->get_operator_type()){\r
2934         case PRED_COMPARE:\r
2935                 insert_gb_def_se(pr->get_left_se(),gtbl);\r
2936                 insert_gb_def_se(pr->get_right_se(),gtbl);\r
2937                 return;\r
2938         case PRED_IN:\r
2939                 insert_gb_def_se(pr->get_left_se(),gtbl);\r
2940                 return;\r
2941         case PRED_UNARY_OP:\r
2942                 insert_gb_def_pr(pr->get_left_pr(),gtbl);\r
2943                 return;\r
2944         case PRED_BINARY_OP:\r
2945                 insert_gb_def_pr(pr->get_left_pr(),gtbl);\r
2946                 insert_gb_def_pr(pr->get_right_pr(),gtbl);\r
2947                 return;\r
2948         case PRED_FUNC:\r
2949                 op_list = pr->get_op_list();\r
2950                 for(o=0;o<op_list.size();++o){\r
2951                         insert_gb_def_se(op_list[o],gtbl);\r
2952                 }\r
2953                 return;\r
2954         default:\r
2955                 fprintf(stderr,"INTERNAL ERROR in insert_gb_def_pr, line %d, character %d, unknown predicate operator type %d\n",\r
2956                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
2957                 exit(1);\r
2958         }\r
2959 }\r
2960 \r
2961 //              Substitute gbrefs with their definitions\r
2962 void subs_gbrefs_se(scalarexp_t *se, table_list *Schema){\r
2963  int p;\r
2964  vector<scalarexp_t *> operand_list;\r
2965  scalarexp_t *lse,*rse;\r
2966  colref_t *cr;\r
2967  string b_tbl;\r
2968  int b_idx;\r
2969 \r
2970         switch(se->get_operator_type()){\r
2971         case SE_LITERAL:\r
2972         case SE_IFACE_PARAM:\r
2973         case SE_PARAM:\r
2974         case SE_AGGR_STAR:\r
2975                 return;\r
2976         case SE_COLREF:\r
2977                 cr = se->get_colref();\r
2978                 b_tbl = Schema->get_basetbl_name(cr->schema_ref,cr->field);\r
2979                 b_idx = Schema->get_table_ref(b_tbl);\r
2980                 cr->tablevar_ref = b_idx;\r
2981                 return;\r
2982         case SE_UNARY_OP:\r
2983                 lse=se->get_left_se();\r
2984                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){\r
2985                         se->lhs.scalarp = lse->get_right_se();\r
2986                         subs_gbrefs_se(se,Schema);\r
2987                         return;\r
2988                 }\r
2989                 subs_gbrefs_se(se->get_left_se(),Schema);\r
2990                 return;\r
2991         case SE_BINARY_OP:\r
2992                 lse=se->get_left_se();\r
2993                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){\r
2994                         se->lhs.scalarp = lse->get_right_se();\r
2995                         subs_gbrefs_se(se,Schema);\r
2996                         return;\r
2997                 }\r
2998                 rse=se->get_right_se();\r
2999                 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){\r
3000                         se->rhs.scalarp = rse->get_right_se();\r
3001                         subs_gbrefs_se(se,Schema);\r
3002                         return;\r
3003                 }\r
3004                 subs_gbrefs_se(se->get_left_se(),Schema);\r
3005                 subs_gbrefs_se(se->get_right_se(),Schema);\r
3006                 return;\r
3007         case SE_AGGR_SE:\r
3008                 lse=se->get_left_se();\r
3009                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){\r
3010                         se->lhs.scalarp = lse->get_right_se();\r
3011                         subs_gbrefs_se(se,Schema);\r
3012                         return;\r
3013                 }\r
3014                 subs_gbrefs_se(se->get_left_se(),Schema);\r
3015                 return;\r
3016         case SE_FUNC:\r
3017                 operand_list = se->get_operands();\r
3018                 for(p=0;p<operand_list.size();p++){\r
3019                         lse=operand_list[p];\r
3020                         if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){\r
3021                                 se->param_list[p] = lse->get_right_se();\r
3022                                 subs_gbrefs_se(se,Schema);\r
3023                                 return;\r
3024                         }\r
3025                 }\r
3026                 for(p=0;p<operand_list.size();p++){\r
3027                         subs_gbrefs_se(operand_list[p],Schema);\r
3028                 }\r
3029         break;\r
3030         }\r
3031 \r
3032         return;\r
3033 }\r
3034 \r
3035 void subs_gbrefs_pr(predicate_t *pr, table_list *Schema){\r
3036 vector<scalarexp_t *> op_list;\r
3037 int o;\r
3038 scalarexp_t *lse,*rse;\r
3039 \r
3040         switch(pr->get_operator_type()){\r
3041         case PRED_COMPARE:\r
3042                 lse=pr->get_left_se();\r
3043                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){\r
3044                         pr->lhs.sexp = lse->get_right_se();\r
3045                         subs_gbrefs_pr(pr,Schema);\r
3046                         return;\r
3047                 }\r
3048                 rse=pr->get_right_se();\r
3049                 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){\r
3050                         pr->rhs.sexp = rse->get_right_se();\r
3051                         subs_gbrefs_pr(pr,Schema);\r
3052                         return;\r
3053                 }\r
3054                 subs_gbrefs_se(pr->get_left_se(),Schema);\r
3055                 subs_gbrefs_se(pr->get_right_se(),Schema);\r
3056                 return;\r
3057         case PRED_IN:\r
3058                 lse=pr->get_left_se();\r
3059                 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){\r
3060                         pr->lhs.sexp = lse->get_right_se();\r
3061                         subs_gbrefs_pr(pr,Schema);\r
3062                         return;\r
3063                 }\r
3064                 subs_gbrefs_se(pr->get_left_se(),Schema);\r
3065                 return;\r
3066         case PRED_UNARY_OP:\r
3067                 subs_gbrefs_pr(pr->get_left_pr(),Schema);\r
3068                 return;\r
3069         case PRED_BINARY_OP:\r
3070                 subs_gbrefs_pr(pr->get_left_pr(),Schema);\r
3071                 subs_gbrefs_pr(pr->get_right_pr(),Schema);\r
3072                 return;\r
3073         case PRED_FUNC:\r
3074                 op_list = pr->get_op_list();\r
3075                 for(o=0;o<op_list.size();++o){\r
3076                         lse=op_list[o];\r
3077                         if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){\r
3078                                 pr->param_list[o] = lse->get_right_se();\r
3079                                 subs_gbrefs_pr(pr,Schema);\r
3080                                 return;\r
3081                         }\r
3082                         subs_gbrefs_se(op_list[o],Schema);\r
3083                 }\r
3084                 return;\r
3085         default:\r
3086                 fprintf(stderr,"INTERNAL ERROR in subs_gbrefs_pr, line %d, character %d, unknown predicate operator type %d\n",\r
3087                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
3088                 exit(1);\r
3089         }\r
3090 }\r
3091 \r
3092 \r
3093 //              Search for references to "expensive" fields.\r
3094 int expensive_refs_se(scalarexp_t *se, table_list *Schema){\r
3095  int p;\r
3096  vector<scalarexp_t *> operand_list;\r
3097  int cnt=0;\r
3098 table_def *td;\r
3099 param_list *plist;\r
3100 \r
3101         switch(se->get_operator_type()){\r
3102         case SE_LITERAL:\r
3103         case SE_IFACE_PARAM:\r
3104         case SE_PARAM:\r
3105         case SE_AGGR_STAR:\r
3106         case SE_AGGR_SE:\r
3107                 return 0;\r
3108         case SE_COLREF:\r
3109                 if(se->is_gb())\r
3110                         return expensive_refs_se(se->rhs.scalarp,Schema);\r
3111                 td = Schema->get_table(se->lhs.colref->schema_ref);\r
3112                 plist = td->get_modifier_list(se->lhs.colref->field);\r
3113                 if(plist->contains_key("expensive"))\r
3114                         return 1;\r
3115                 return 0;\r
3116         case SE_UNARY_OP:\r
3117                 return expensive_refs_se(se->get_left_se(),Schema);\r
3118         case SE_BINARY_OP:\r
3119                 cnt += expensive_refs_se(se->get_left_se(),Schema);\r
3120                 cnt += expensive_refs_se(se->get_right_se(),Schema);\r
3121                 return cnt;\r
3122         case SE_FUNC:\r
3123                 operand_list = se->get_operands();\r
3124                 for(p=0;p<operand_list.size();p++){\r
3125                         cnt += expensive_refs_se(operand_list[p],Schema);\r
3126                 }\r
3127                 return cnt;\r
3128         break;\r
3129         }\r
3130 \r
3131         return 0;\r
3132 }\r
3133 \r
3134 int expensive_refs_pr(predicate_t *pr, table_list *Schema){\r
3135 vector<scalarexp_t *> op_list;\r
3136 int o;\r
3137 int cnt=0;\r
3138 \r
3139         switch(pr->get_operator_type()){\r
3140         case PRED_COMPARE:\r
3141                 cnt += expensive_refs_se(pr->get_left_se(),Schema);\r
3142                 cnt += expensive_refs_se(pr->get_right_se(),Schema);\r
3143                 return cnt;\r
3144         case PRED_IN:\r
3145                 return expensive_refs_se(pr->get_left_se(),Schema);\r
3146         case PRED_UNARY_OP:\r
3147                 return expensive_refs_pr(pr->get_left_pr(),Schema);\r
3148         case PRED_BINARY_OP:\r
3149                 cnt += expensive_refs_pr(pr->get_left_pr(),Schema);\r
3150                 cnt += expensive_refs_pr(pr->get_right_pr(),Schema);\r
3151                 return cnt;\r
3152         case PRED_FUNC:\r
3153                 op_list = pr->get_op_list();\r
3154                 for(o=0;o<op_list.size();++o){\r
3155                         cnt += expensive_refs_se(op_list[o],Schema);\r
3156                 }\r
3157                 return cnt;\r
3158         default:\r
3159                 fprintf(stderr,"INTERNAL ERROR in expensive_refs_pr, line %d, character %d, unknown predicate operator type %d\n",\r
3160                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
3161                 exit(1);\r
3162         }\r
3163 }\r
3164 \r
3165 \r
3166 //              TODO: allow "cheap" functions and predicates.\r
3167 bool simple_field_constraint(cnf_elem *c){\r
3168         vector<literal_t *> ll;\r
3169         int l;\r
3170         predicate_t *p = c->pr;\r
3171  int l_agr, l_gb, l_par, l_func, l_op;\r
3172  int r_agr, r_gb, r_par, r_func, r_op;\r
3173  col_id_set left_colids, right_colids;\r
3174 \r
3175 //                      Verify that it is a simple atom\r
3176         switch(p->get_operator_type()){\r
3177         case PRED_COMPARE:\r
3178 //                              Must be an equality predicate which references\r
3179 //                              which referecnes no aggregates, parameters, functions, or\r
3180 //                              group-by variables, and should be a constraint of\r
3181 //                              a single colref.\r
3182 //                              AND should not require a complex comparison.\r
3183                 if(p->get_op() != "=") return(false);\r
3184                 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);\r
3185                 analyze_constraint_se(p->get_right_se(),r_agr, r_gb, r_par, r_func,l_op,NULL,false);\r
3186                 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ||\r
3187                    r_agr>0 || r_gb>0 || r_par>0 || r_func>0 ) return(false);\r
3188 //                              I will count on there being no gbvars in the constraint.\r
3189 //                              TODO: allow gbvars which are colrefs.\r
3190                 gather_se_col_ids(p->get_left_se(), left_colids, NULL);\r
3191                 gather_se_col_ids(p->get_right_se(), right_colids, NULL);\r
3192                 if(left_colids.size()+right_colids.size() != 1) return(false);\r
3193 \r
3194 \r
3195 //                      Normalize : the colref should be on the lhs.\r
3196                 if(right_colids.size() > 0){\r
3197                         p->swap_scalar_operands();\r
3198                 }\r
3199 \r
3200 //                      Disallow complex (and therefore expensive) comparisons.\r
3201                 if(p->get_left_se()->get_data_type()->complex_comparison(\r
3202                         p->get_right_se()->get_data_type() ) )\r
3203                                 return(false);\r
3204 \r
3205 //                      passed all the tests.\r
3206                 return(true);\r
3207         case PRED_IN:\r
3208 //                      LHS must be a non-gbvar colref.\r
3209                 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);\r
3210                 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ) return(false);\r
3211 //                              I will count on there being no gbvars in the constraint.\r
3212 //                              TODO: allow gbvars which are colrefs.\r
3213                 gather_se_col_ids(p->get_left_se(), left_colids, NULL);\r
3214                 if(left_colids.size() != 1) return(false);\r
3215 //                      Disallow complex (and therefore expensive) comparisons.\r
3216                 if(p->get_left_se()->get_data_type()->complex_comparison(\r
3217                         p->get_left_se()->get_data_type() ) )\r
3218                                 return(false);\r
3219 \r
3220 \r
3221 //                      All entries in the IN list must be literals\r
3222 //                      Currently, this is the only possibility.\r
3223                 return(true);\r
3224                 break;\r
3225         case PRED_UNARY_OP:\r
3226                 return(false);\r
3227         case PRED_BINARY_OP:\r
3228                 return(false);\r
3229         case PRED_FUNC:\r
3230                 return(false);\r
3231         default:\r
3232                 fprintf(stderr,"INTERNAL ERROR in simple_field_cosntraint, line %d, character %d, unknown predicate operator type %d\n",\r
3233                         p->get_lineno(), p->get_charno(), p->get_operator_type() );\r
3234                 exit(1);\r
3235         }\r
3236 \r
3237         return(false);\r
3238 }\r
3239 \r
3240 //              As the name implies, return the colref constrained by the\r
3241 //              cnf elem.  I will be counting on the LHS being a SE pointing\r
3242 //              to a colref.\r
3243 \r
3244 //                      This fcn assumes that in fact exactly\r
3245 //                      one colref is constrained.\r
3246 colref_t *get_constrained_colref(scalarexp_t *se){\r
3247  int p;\r
3248  vector<scalarexp_t *> operand_list;\r
3249 colref_t *ret;\r
3250 \r
3251         switch(se->get_operator_type()){\r
3252         case SE_LITERAL:\r
3253                 return(NULL);\r
3254         case SE_PARAM:\r
3255         case SE_IFACE_PARAM:\r
3256                 return(NULL);\r
3257         case SE_COLREF:\r
3258                 return(se->get_colref());\r
3259         case SE_UNARY_OP:\r
3260                 return(get_constrained_colref(se->get_left_se()));\r
3261         case SE_BINARY_OP:\r
3262                 ret=get_constrained_colref(se->get_left_se());\r
3263                 if(ret == NULL) return(get_constrained_colref(se->get_right_se()));\r
3264                 else return ret;\r
3265         case SE_AGGR_STAR:\r
3266         case SE_AGGR_SE:\r
3267                 return(NULL);\r
3268         case SE_FUNC:\r
3269                 if(se->get_aggr_ref() >= 0) return NULL;\r
3270 \r
3271                 operand_list = se->get_operands();\r
3272                 for(p=0;p<operand_list.size();p++){\r
3273                         ret=get_constrained_colref(operand_list[p]);\r
3274                         if(ret != NULL) return(ret);\r
3275 \r
3276                 }\r
3277                 return(NULL);\r
3278         break;\r
3279         }\r
3280 \r
3281         return(NULL);\r
3282 }\r
3283 \r
3284 \r
3285 colref_t *get_constrained_colref(predicate_t *p){\r
3286         return(get_constrained_colref(p->get_left_se()));\r
3287 }\r
3288 colref_t *get_constrained_colref(cnf_elem *c){\r
3289         return get_constrained_colref(c->pr->get_left_se());\r
3290 }\r
3291 \r
3292 \r
3293 \r
3294 \r
3295 /*\r
3296 void add_colref_constraint_to_cnf(cnf_elem *dst, predicate_t *src_p,\r
3297                                                         string target_fld, string target_tbl, int tblref){\r
3298 \r
3299 //                      Make a copy of the predicate to be added.\r
3300 //                      ASSUME no aggregates.\r
3301         predicate_t *pr = dup_pr(src_p,NULL);\r
3302 \r
3303 //                      Modify the ref to the base table.\r
3304 //                      ASSUME lhs is the colref\r
3305         pr->get_left_se()->get_colref()->set_table_name(target_tbl);\r
3306         pr->get_left_se()->get_colref()->set_table_ref(tblref);\r
3307 \r
3308         if(dst->pr == NULL) dst->pr = pr;\r
3309         else dst->pr = new predicate_t("OR", dst->pr, pr);\r
3310 \r
3311 }\r
3312 */\r
3313 \r
3314 \r
3315 //////////////////////////////////////////////////////\r
3316 ///////////////         Represent a node in a predicate tree\r
3317 struct common_pred_node{\r
3318         set<int> lftas;\r
3319         predicate_t *pr;\r
3320         vector<predicate_t *> predecessor_preds;\r
3321         vector<common_pred_node *> children;\r
3322 \r
3323         string target_tbl;\r
3324         string target_fld;\r
3325         int target_ref;\r
3326 \r
3327         common_pred_node(){\r
3328                 pr = NULL;\r
3329         }\r
3330 };\r
3331 \r
3332 \r
3333 predicate_t *make_common_pred(common_pred_node *pn){\r
3334   int n;\r
3335 \r
3336         if(pn->children.size() == 0){\r
3337                 if(pn->pr == NULL){\r
3338                         fprintf(stderr,"INTERNAL ERROR in make_common_pred, pred node ahs no children and no predicate.\n");\r
3339                         exit(1);\r
3340                 }\r
3341                 return( dup_pr(pn->pr,NULL) );\r
3342         }\r
3343 \r
3344         predicate_t *curr_pr = make_common_pred( pn->children[0] );\r
3345     for(n=1;n<pn->children.size();++n){\r
3346                 curr_pr = new predicate_t("OR", make_common_pred(pn->children[n]),curr_pr);\r
3347         }\r
3348 \r
3349         if(pn->pr != NULL)\r
3350                 curr_pr = new predicate_t("AND", dup_pr(pn->pr,NULL), curr_pr);\r
3351 \r
3352         return(curr_pr);\r
3353 }\r
3354 \r
3355 \r
3356 bool operator<(const cnf_set &c1, const cnf_set &c2){\r
3357         if(c1.lfta_id.size() < c2.lfta_id.size())\r
3358                 return true;\r
3359         return false;\r
3360 }\r
3361 \r
3362 \r
3363 //              Compute the predicates for the prefilter.\r
3364 //              the prefilter preds are returned in prefilter_preds.\r
3365 //              pred_ids is the set of predicates used in the prefilter.\r
3366 //              the encoding is the lfta index, in the top 16 bits,\r
3367 //              then the index of the cnf element in the bottom 16 bits.\r
3368 //              This set of for identifying which preds do not need\r
3369 //              to be generated in the lftas.\r
3370 void find_common_filter(vector< vector<cnf_elem *> > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, vector<cnf_set *> &prefilter_preds, set<unsigned int > &pred_ids){\r
3371         int p, p2, l, c;\r
3372 \r
3373         vector<cnf_set *> pred_list, sort_list;\r
3374 \r
3375 //              Create list of tagged, prefilter-safe CNFs.\r
3376         for(l=0;l<where_list.size();++l){\r
3377                 for(c=0;c<where_list[l].size();++c){\r
3378                         if(prefilter_compatible(where_list[l][c],Ext_fcns)){\r
3379                                 if(expensive_refs_pr(where_list[l][c]->pr,Schema)==0)\r
3380                                         pred_list.push_back(new cnf_set(where_list[l][c]->pr,l,c));\r
3381                         }\r
3382                 }\r
3383         }\r
3384 \r
3385 //              Eliminate duplicates\r
3386         for(p=0;p<pred_list.size();++p){\r
3387                 if(pred_list[p]){\r
3388                         for(p2=p+1;p2<pred_list.size();++p2){\r
3389                                 if(pred_list[p2]){\r
3390                                         if(is_equivalent_pred_base(pred_list[p]->pr, pred_list[p2]->pr,Schema)){\r
3391                                                 pred_list[p]->subsume(pred_list[p2]);\r
3392                                                 delete pred_list[p2];\r
3393                                                 pred_list[p2] = NULL;\r
3394                                         }\r
3395                                 }\r
3396                         }\r
3397                 }\r
3398         }\r
3399 \r
3400 //              combine preds that occur in the exact same lftas.\r
3401         for(p=0;p<pred_list.size();++p){\r
3402                 if(pred_list[p]){\r
3403                         for(p2=p+1;p2<pred_list.size();++p2){\r
3404                                 if(pred_list[p2]){\r
3405                                         if(pred_list[p]->lfta_id == pred_list[p2]->lfta_id){\r
3406                                                 pred_list[p]->combine_pred(pred_list[p2]);\r
3407                                                 delete pred_list[p2];\r
3408                                                 pred_list[p2] = NULL;\r
3409                                         }\r
3410                                 }\r
3411                         }\r
3412                 }\r
3413         }\r
3414 \r
3415 //              Compress the list\r
3416         for(p=0;p<pred_list.size();++p){\r
3417                 if(pred_list[p]){\r
3418                         sort_list.push_back(pred_list[p]);\r
3419                 }\r
3420         }\r
3421 //              Sort it\r
3422         sort(sort_list.begin(), sort_list.end(),compare_cnf_set());\r
3423 \r
3424 //              Return the top preds, up to 64 of them.\r
3425         for(p=0;p<sort_list.size() && p<64;p++){\r
3426                 prefilter_preds.push_back(sort_list[p]);\r
3427                 sort_list[p]->add_pred_ids(pred_ids);\r
3428         }\r
3429 \r
3430 //              Substitute gb refs with their defs\r
3431 //              While I'm at it, substitute base table sch ref for tblref.\r
3432         for(p=0;p<prefilter_preds.size() ;p++){\r
3433                 subs_gbrefs_pr(prefilter_preds[p]->pr,Schema);\r
3434         }\r
3435 \r
3436 }\r
3437 \r
3438 \r
3439 \r
3440 \r
3441 \r
3442 ///////////////////////////////////////////////////////////////////////////\r
3443 //////////////////////////////////////////////////////////////////////////\r
3444 \r
3445 //              Find partial functions and register them.\r
3446 //              Do a DFS so that nested partial fcn calls\r
3447 //              get evaluated in the right order.\r
3448 //              Don't drill down into aggregates -- their arguments are evaluated\r
3449 //              earlier than the select list is.\r
3450 //\r
3451 //              Modification for function caching:\r
3452 //              Pass in a ref counter, and partial fcn indicator.\r
3453 //              Cache fcns ref'd at least once.\r
3454 //              pass in NULL for fcn_ref_cnt to turn off fcn caching analysis\r
3455 \r
3456 \r
3457 void find_partial_fcns(scalarexp_t *se, vector<scalarexp_t *> *pf_list,\r
3458                 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,\r
3459                 ext_fcn_list *Ext_fcns){\r
3460         vector<scalarexp_t *> operands;\r
3461         int o, f;\r
3462 \r
3463         if(se == NULL) return;\r
3464 \r
3465         switch(se->get_operator_type()){\r
3466         case SE_LITERAL:\r
3467         case SE_PARAM:\r
3468         case SE_IFACE_PARAM:\r
3469                 return;\r
3470         case SE_UNARY_OP:\r
3471                 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;\r
3472                 return;\r
3473         case SE_BINARY_OP:\r
3474                 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);\r
3475                 find_partial_fcns(se->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);\r
3476                 return;\r
3477         case SE_COLREF:\r
3478                 return;\r
3479         case SE_AGGR_STAR:\r
3480                 return;\r
3481         case SE_AGGR_SE:\r
3482 //              find_partial_fcns(se->get_left_se(), pf_list, Ext_fcns) ;\r
3483                 return;\r
3484         case SE_FUNC:\r
3485                 if(se->get_aggr_ref() >= 0) return;\r
3486 \r
3487                 operands = se->get_operands();\r
3488                 for(o=0;o<operands.size();o++){\r
3489                         find_partial_fcns(operands[o], pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);\r
3490                 }\r
3491 \r
3492                 if(Ext_fcns->is_partial(se->get_fcn_id()) || Ext_fcns->get_fcn_cost(se->get_fcn_id()) >= COST_HIGH){\r
3493                         if(fcn_ref_cnt){\r
3494                           for(f=0;f<pf_list->size();++f){\r
3495                                 if(is_equivalent_se(se,(*pf_list)[f])){\r
3496                                         se->set_partial_ref(f);\r
3497                                         (*fcn_ref_cnt)[f]++;\r
3498                                         break;\r
3499                                 }\r
3500                           }\r
3501                         }else{\r
3502                                 f=pf_list->size();\r
3503                         }\r
3504                         if(f==pf_list->size() && (Ext_fcns->is_partial(se->get_fcn_id()) ||  fcn_ref_cnt)){\r
3505                                 se->set_partial_ref(pf_list->size());\r
3506                                 pf_list->push_back(se);\r
3507                                 if(fcn_ref_cnt){\r
3508                                         fcn_ref_cnt->push_back(1);\r
3509                                         is_partial_fcn->push_back(Ext_fcns->is_partial(se->get_fcn_id()));\r
3510                                 }\r
3511                         }\r
3512                 }\r
3513                 return;\r
3514         default:\r
3515                 fprintf(stderr,"INTERNAL ERROR in find_partial_fcns, line %d, character %d: unknown operator type %d\n",\r
3516                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
3517                 exit(1);\r
3518         }\r
3519         return;\r
3520 }\r
3521 \r
3522 \r
3523 void find_partial_fcns_pr(predicate_t *pr,  vector<scalarexp_t *> *pf_list,\r
3524                 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,\r
3525                                                                         ext_fcn_list *Ext_fcns){\r
3526         vector<literal_t *> litl;\r
3527         vector<scalarexp_t *> op_list;\r
3528         int o;\r
3529 \r
3530         switch(pr->get_operator_type()){\r
3531         case PRED_IN:\r
3532                 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;\r
3533                 return;\r
3534         case PRED_COMPARE:\r
3535                 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;\r
3536                 find_partial_fcns(pr->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;\r
3537                 return;\r
3538         case PRED_UNARY_OP:\r
3539                 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);\r
3540                 return;\r
3541         case PRED_BINARY_OP:\r
3542                 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;\r
3543                 find_partial_fcns_pr(pr->get_right_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;\r
3544                 return;\r
3545         case PRED_FUNC:\r
3546                 op_list = pr->get_op_list();\r
3547                 for(o=0;o<op_list.size();++o){\r
3548                         find_partial_fcns(op_list[o],pf_list,fcn_ref_cnt, is_partial_fcn, Ext_fcns);\r
3549                 }\r
3550                 return;\r
3551         default:\r
3552                 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",\r
3553                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
3554                 exit(1);\r
3555         }\r
3556 \r
3557         return;\r
3558 }\r
3559 \r
3560 \r
3561 \r
3562 void find_combinable_preds(predicate_t *pr,  vector<predicate_t *> *pr_list,\r
3563                                                                 table_list *Schema, ext_fcn_list *Ext_fcns){\r
3564         vector<literal_t *> litl;\r
3565         vector<scalarexp_t *> op_list;\r
3566         int f,o;\r
3567 \r
3568         switch(pr->get_operator_type()){\r
3569         case PRED_IN:\r
3570                 return;\r
3571         case PRED_COMPARE:\r
3572                 return;\r
3573         case PRED_UNARY_OP:\r
3574                 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns);\r
3575                 return;\r
3576         case PRED_BINARY_OP:\r
3577                 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns) ;\r
3578                 find_combinable_preds(pr->get_right_pr(), pr_list, Schema, Ext_fcns) ;\r
3579                 return;\r
3580         case PRED_FUNC:\r
3581                 if(Ext_fcns->is_combinable(pr->get_fcn_id())){\r
3582                   for(f=0;f<pr_list->size();++f){\r
3583                         if(is_equivalent_pred_base(pr,(*pr_list)[f],Schema)){\r
3584                                 pr->set_combinable_ref(f);\r
3585                                 break;\r
3586                         }\r
3587                   }\r
3588                   if(f == pr_list->size()){\r
3589                         pr->set_combinable_ref(pr_list->size());\r
3590                         pr_list->push_back(pr);\r
3591                   }\r
3592                 }\r
3593                 return;\r
3594         default:\r
3595                 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",\r
3596                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
3597                 exit(1);\r
3598         }\r
3599 \r
3600         return;\r
3601 }\r
3602 \r
3603 \r
3604 //--------------------------------------------------------------------\r
3605 //              Collect refs to aggregates.\r
3606 \r
3607 \r
3608 void collect_agg_refs(scalarexp_t *se, set<int> &agg_refs){\r
3609         vector<scalarexp_t *> operands;\r
3610         int o;\r
3611 \r
3612         if(se == NULL) return;\r
3613 \r
3614         switch(se->get_operator_type()){\r
3615         case SE_LITERAL:\r
3616         case SE_PARAM:\r
3617         case SE_IFACE_PARAM:\r
3618                 return;\r
3619         case SE_UNARY_OP:\r
3620                 collect_agg_refs(se->get_left_se(), agg_refs) ;\r
3621                 return;\r
3622         case SE_BINARY_OP:\r
3623                 collect_agg_refs(se->get_left_se(), agg_refs);\r
3624                 collect_agg_refs(se->get_right_se(), agg_refs);\r
3625                 return;\r
3626         case SE_COLREF:\r
3627                 return;\r
3628         case SE_AGGR_STAR:\r
3629         case SE_AGGR_SE:\r
3630                 agg_refs.insert(se->get_aggr_ref());\r
3631                 return;\r
3632         case SE_FUNC:\r
3633                 if(se->get_aggr_ref() >= 0) agg_refs.insert(se->get_aggr_ref());\r
3634 \r
3635                 operands = se->get_operands();\r
3636                 for(o=0;o<operands.size();o++){\r
3637                         collect_agg_refs(operands[o], agg_refs);\r
3638                 }\r
3639 \r
3640                 return;\r
3641         default:\r
3642                 fprintf(stderr,"INTERNAL ERROR in collect_agg_refs, line %d, character %d: unknown operator type %d\n",\r
3643                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
3644                 exit(1);\r
3645         }\r
3646         return;\r
3647 }\r
3648 \r
3649 \r
3650 void collect_aggr_refs_pr(predicate_t *pr,  set<int> &agg_refs){\r
3651         vector<literal_t *> litl;\r
3652         vector<scalarexp_t *> op_list;\r
3653         int o;\r
3654 \r
3655         switch(pr->get_operator_type()){\r
3656         case PRED_IN:\r
3657                 collect_agg_refs(pr->get_left_se(), agg_refs) ;\r
3658                 return;\r
3659         case PRED_COMPARE:\r
3660                 collect_agg_refs(pr->get_left_se(), agg_refs) ;\r
3661                 collect_agg_refs(pr->get_right_se(), agg_refs) ;\r
3662                 return;\r
3663         case PRED_UNARY_OP:\r
3664                 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs);\r
3665                 return;\r
3666         case PRED_BINARY_OP:\r
3667                 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs) ;\r
3668                 collect_aggr_refs_pr(pr->get_right_pr(), agg_refs) ;\r
3669                 return;\r
3670         case PRED_FUNC:\r
3671                 op_list = pr->get_op_list();\r
3672                 for(o=0;o<op_list.size();++o){\r
3673                         collect_agg_refs(op_list[o],agg_refs);\r
3674                 }\r
3675                 return;\r
3676         default:\r
3677                 fprintf(stderr,"INTERNAL ERROR in collect_aggr_refs_pr, line %d, character %d, unknown predicate operator type %d\n",\r
3678                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
3679                 exit(1);\r
3680         }\r
3681 \r
3682         return;\r
3683 }\r
3684 \r
3685 \r
3686 //--------------------------------------------------------------------\r
3687 //              Collect previously registered partial fcn refs.\r
3688 //              Do a DFS so that nested partial fcn calls\r
3689 //              get evaluated in the right order.\r
3690 //              Don't drill down into aggregates -- their arguments are evaluated\r
3691 //              earlier than the select list is.\r
3692 //              ------------->>> THEN WHY AM I DRILLING DOWN INTO AGGREGATES?\r
3693 \r
3694 void collect_partial_fcns(scalarexp_t *se, set<int> &pfcn_refs){\r
3695         vector<scalarexp_t *> operands;\r
3696         int o;\r
3697 \r
3698         if(se == NULL) return;\r
3699 \r
3700         switch(se->get_operator_type()){\r
3701         case SE_LITERAL:\r
3702         case SE_PARAM:\r
3703         case SE_IFACE_PARAM:\r
3704                 return;\r
3705         case SE_UNARY_OP:\r
3706                 collect_partial_fcns(se->get_left_se(), pfcn_refs) ;\r
3707                 return;\r
3708         case SE_BINARY_OP:\r
3709                 collect_partial_fcns(se->get_left_se(), pfcn_refs);\r
3710                 collect_partial_fcns(se->get_right_se(), pfcn_refs);\r
3711                 return;\r
3712         case SE_COLREF:\r
3713                 return;\r
3714         case SE_AGGR_STAR:\r
3715                 return;\r
3716         case SE_AGGR_SE:\r
3717 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;\r
3718                 return;\r
3719         case SE_FUNC:\r
3720                 if(se->get_aggr_ref() >= 0) return;\r
3721 \r
3722                 operands = se->get_operands();\r
3723                 for(o=0;o<operands.size();o++){\r
3724                         collect_partial_fcns(operands[o], pfcn_refs);\r
3725                 }\r
3726 \r
3727                 if(se->is_partial()){\r
3728                         pfcn_refs.insert(se->get_partial_ref());\r
3729                 }\r
3730 \r
3731                 return;\r
3732         default:\r
3733                 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns, line %d, character %d: unknown operator type %d\n",\r
3734                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
3735                 exit(1);\r
3736         }\r
3737         return;\r
3738 }\r
3739 \r
3740 \r
3741 void collect_partial_fcns_pr(predicate_t *pr,  set<int> &pfcn_refs){\r
3742         vector<literal_t *> litl;\r
3743         vector<scalarexp_t *> op_list;\r
3744         int o;\r
3745 \r
3746         switch(pr->get_operator_type()){\r
3747         case PRED_IN:\r
3748                 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;\r
3749                 return;\r
3750         case PRED_COMPARE:\r
3751                 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;\r
3752                 collect_partial_fcns(pr->get_right_se(), pfcn_refs) ;\r
3753                 return;\r
3754         case PRED_UNARY_OP:\r
3755                 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs);\r
3756                 return;\r
3757         case PRED_BINARY_OP:\r
3758                 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs) ;\r
3759                 collect_partial_fcns_pr(pr->get_right_pr(), pfcn_refs) ;\r
3760                 return;\r
3761         case PRED_FUNC:\r
3762                 op_list = pr->get_op_list();\r
3763                 for(o=0;o<op_list.size();++o){\r
3764                         collect_partial_fcns(op_list[o],pfcn_refs);\r
3765                 }\r
3766                 return;\r
3767         default:\r
3768                 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns_pr, line %d, character %d, unknown predicate operator type %d\n",\r
3769                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
3770                 exit(1);\r
3771         }\r
3772 \r
3773         return;\r
3774 }\r
3775 \r
3776 \r
3777 \r
3778 \r
3779 ///////////////////////////////////////////////////////////////\r
3780 ////////////    Exported Functions      ///////////////////////////\r
3781 ///////////////////////////////////////////////////////////////\r
3782 \r
3783 \r
3784 //              Count and collect refs to interface parameters.\r
3785 \r
3786 int count_se_ifp_refs(scalarexp_t *se, set<string> &ifpnames){\r
3787         vector<scalarexp_t *> operands;\r
3788         int o;\r
3789         int ret = 0;\r
3790 \r
3791         if(se == NULL) return 0;\r
3792 \r
3793         switch(se->get_operator_type()){\r
3794         case SE_LITERAL:\r
3795         case SE_PARAM:\r
3796                 return 0;\r
3797         case SE_IFACE_PARAM:\r
3798                         ifpnames.insert(se->get_ifpref()->to_string());\r
3799                 return 1;\r
3800         case SE_UNARY_OP:\r
3801                 return count_se_ifp_refs(se->get_left_se(), ifpnames) ;\r
3802         case SE_BINARY_OP:\r
3803                 ret = count_se_ifp_refs(se->get_left_se(), ifpnames);\r
3804                 ret += count_se_ifp_refs(se->get_right_se(), ifpnames);\r
3805                 return ret;\r
3806         case SE_COLREF:\r
3807                 return 0;\r
3808         case SE_AGGR_STAR:\r
3809                 return 0;\r
3810         case SE_AGGR_SE:\r
3811 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;\r
3812                 return 0;\r
3813         case SE_FUNC:\r
3814                 if(se->get_aggr_ref() >= 0) return 0;\r
3815 \r
3816                 operands = se->get_operands();\r
3817                 for(o=0;o<operands.size();o++){\r
3818                         ret += count_se_ifp_refs(operands[o], ifpnames);\r
3819                 }\r
3820 \r
3821                 return ret;\r
3822         default:\r
3823                 fprintf(stderr,"INTERNAL ERROR in count_se_ifp_refs, line %d, character %d: unknown operator type %d\n",\r
3824                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
3825                 exit(1);\r
3826         }\r
3827         return 0;\r
3828 }\r
3829 \r
3830 \r
3831 int count_pr_ifp_refs(predicate_t *pr,  set<string> &ifpnames){\r
3832         vector<literal_t *> litl;\r
3833         vector<scalarexp_t *> op_list;\r
3834         int o;\r
3835         int ret = 0;\r
3836         if(pr == NULL) return 0;\r
3837 \r
3838         switch(pr->get_operator_type()){\r
3839         case PRED_IN:\r
3840                 return count_se_ifp_refs(pr->get_left_se(), ifpnames) ;\r
3841         case PRED_COMPARE:\r
3842                 ret = count_se_ifp_refs(pr->get_left_se(), ifpnames) ;\r
3843                 ret += count_se_ifp_refs(pr->get_right_se(), ifpnames) ;\r
3844                 return ret;\r
3845         case PRED_UNARY_OP:\r
3846                 return count_pr_ifp_refs(pr->get_left_pr(), ifpnames);\r
3847         case PRED_BINARY_OP:\r
3848                 ret = count_pr_ifp_refs(pr->get_left_pr(), ifpnames) ;\r
3849                 ret += count_pr_ifp_refs(pr->get_right_pr(), ifpnames) ;\r
3850                 return ret;\r
3851         case PRED_FUNC:\r
3852                 op_list = pr->get_op_list();\r
3853                 for(o=0;o<op_list.size();++o){\r
3854                         ret += count_se_ifp_refs(op_list[o],ifpnames);\r
3855                 }\r
3856                 return ret;\r
3857         default:\r
3858                 fprintf(stderr,"INTERNAL ERROR in count_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",\r
3859                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
3860                 exit(1);\r
3861         }\r
3862 \r
3863         return 0;\r
3864 }\r
3865 \r
3866 //              Resolve ifp refs, convert them to string literals.\r
3867 \r
3868 int resolve_se_ifp_refs(scalarexp_t *se, string ifm, string ifn, ifq_t *ifdb,  string &err){\r
3869         vector<scalarexp_t *> operands;\r
3870         vector<string> ifvals;\r
3871         int o;\r
3872         int ierr;\r
3873         string serr;\r
3874         int ret = 0;\r
3875         literal_t *tmp_l;\r
3876         ifpref_t *ir;\r
3877 \r
3878         if(se == NULL) return 0;\r
3879 \r
3880         switch(se->get_operator_type()){\r
3881         case SE_LITERAL:\r
3882         case SE_PARAM:\r
3883                 return 0;\r
3884         case SE_IFACE_PARAM:\r
3885                 ir = se->get_ifpref();\r
3886                 ifvals = ifdb->get_iface_vals(ifm, ifn, ir->get_pname(), ierr, serr);\r
3887                 if(ierr){\r
3888                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", "+serr+"\n";\r
3889                         return 1;\r
3890                 }\r
3891                 if(ifvals.size() == 0){\r
3892                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", no parameter values.\n";\r
3893                         return 1;\r
3894                 }\r
3895                 if(ifvals.size() > 1){\r
3896                         err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", multiple parameter values ("+int_to_string(ifvals.size())+").\n";\r
3897                         return 1;\r
3898                 }\r
3899                 tmp_l = new literal_t( ifvals[0]);\r
3900                 se->convert_to_literal(tmp_l);\r
3901                 return 0;\r
3902         case SE_UNARY_OP:\r
3903                 return resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err) ;\r
3904         case SE_BINARY_OP:\r
3905                 ret = resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err);\r
3906                 ret += resolve_se_ifp_refs( se->get_right_se(), ifm, ifn,ifdb,err);\r
3907                 return ret;\r
3908         case SE_COLREF:\r
3909                 return 0;\r
3910         case SE_AGGR_STAR:\r
3911                 return 0;\r
3912         case SE_AGGR_SE:\r
3913 //              collect_partial_fcns(se->get_left_se(), pfcn_refs) ;\r
3914                 return 0;\r
3915         case SE_FUNC:\r
3916                 if(se->get_aggr_ref() >= 0) return 0;\r
3917 \r
3918                 operands = se->get_operands();\r
3919                 for(o=0;o<operands.size();o++){\r
3920                         ret += resolve_se_ifp_refs(operands[o], ifm, ifn, ifdb,err);\r
3921                 }\r
3922 \r
3923                 return ret;\r
3924         default:\r
3925                 fprintf(stderr,"INTERNAL ERROR in resolve_se_ifp_refs, line %d, character %d: unknown operator type %d\n",\r
3926                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
3927                 exit(1);\r
3928         }\r
3929         return 0;\r
3930 }\r
3931 \r
3932 \r
3933 int resolve_pr_ifp_refs(predicate_t *pr,  string ifm, string ifn, ifq_t *ifdb,  string &err){\r
3934         vector<literal_t *> litl;\r
3935         vector<scalarexp_t *> op_list;\r
3936         int o;\r
3937         int ret = 0;\r
3938 \r
3939         switch(pr->get_operator_type()){\r
3940         case PRED_IN:\r
3941                 return resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;\r
3942         case PRED_COMPARE:\r
3943                 ret = resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;\r
3944                 ret += resolve_se_ifp_refs(pr->get_right_se(), ifm, ifn, ifdb, err) ;\r
3945                 return ret;\r
3946         case PRED_UNARY_OP:\r
3947                 return resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err);\r
3948         case PRED_BINARY_OP:\r
3949                 ret = resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err) ;\r
3950                 ret += resolve_pr_ifp_refs(pr->get_right_pr(), ifm, ifn, ifdb, err) ;\r
3951                 return ret;\r
3952         case PRED_FUNC:\r
3953                 op_list = pr->get_op_list();\r
3954                 for(o=0;o<op_list.size();++o){\r
3955                         ret += resolve_se_ifp_refs(op_list[o],ifm, ifn, ifdb, err);\r
3956                 }\r
3957                 return ret;\r
3958         default:\r
3959                 fprintf(stderr,"INTERNAL ERROR in resolve_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",\r
3960                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
3961                 exit(1);\r
3962         }\r
3963 \r
3964         return 0;\r
3965 }\r
3966 \r
3967 \r
3968 string impute_query_name(table_exp_t *fta_tree, string default_nm){\r
3969         string retval = fta_tree->get_val_of_name("query_name");\r
3970         if(retval == "") retval = default_nm;\r
3971         if(retval == "") retval = "default_query";\r
3972         return(retval);\r
3973 }\r
3974 \r
3975 //              Convert the parse tree into an intermediate form,\r
3976 //              which admits analysis better.\r
3977 //\r
3978 //              TODO : rationalize the error return policy.\r
3979 //\r
3980 //              TODO : the query_summary_class object contains\r
3981 //                      the parse tree.\r
3982 //              TODO: revisit the issue when nested subqueries are implemented.\r
3983 //              One possibility: implement accessor methods to hide the\r
3984 //              complexity\r
3985 //              For now: this class contains data structures not in table_exp_t\r
3986 //              (with a bit of duplication)\r
3987 \r
3988 //              Return NULL on error.\r
3989 //              print error messages to stderr.\r
3990 \r
3991 \r
3992 query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema,\r
3993                                 ext_fcn_list *Ext_fcns, string default_name){\r
3994         int i,j, k, retval;\r
3995 \r
3996 //                      Create the summary struct -- no analysis is done here.\r
3997         query_summary_class *qs = new query_summary_class(fta_tree);\r
3998         qs->query_type = fta_tree->query_type;\r
3999 \r
4000 //////////////          Do common analysis\r
4001 \r
4002 //              Extract query name.  Already imputed for the qnodes.\r
4003 //      qs->query_name = impute_query_name(fta_tree, default_name);\r
4004         qs->query_name = default_name;\r
4005 //printf("query name is %s\n",qs->query_name.c_str());\r
4006 \r
4007 //              extract definitions.  Don't grab the query name.\r
4008 \r
4009         map<string, string> nmap = fta_tree->get_name_map();\r
4010         map<string, string>::iterator nmi;\r
4011         for(nmi=nmap.begin(); nmi!=nmap.end(); ++nmi){\r
4012                 string pname = (*nmi).first;\r
4013                 if(pname != "query_name" )\r
4014                         (qs->definitions)[pname] = (*nmi).second;\r
4015         }\r
4016 \r
4017 ///\r
4018 ///                             FROM analysis\r
4019 \r
4020 //              First, verify that all the referenced tables are defined.\r
4021 //              Then, bind the tablerefs in the FROM list to schemas in\r
4022 //              the schema list.\r
4023         tablevar_list_t *tlist = fta_tree->get_from();\r
4024         vector<tablevar_t *> tbl_vec = tlist->get_table_list();\r
4025 \r
4026         bool found_error = false;\r
4027         for(i=0;i<tbl_vec.size();i++){\r
4028                 int sch_no = schema->find_tbl(tbl_vec[i]->get_schema_name());\r
4029                 if(sch_no < 0)  {\r
4030                   fprintf(stderr,"Error, table <%s> not found in the schema file\n",\r
4031                         tbl_vec[i]->get_schema_name().c_str() );\r
4032                   fprintf(stderr,"\tline=%d, char=%d\n",tbl_vec[i]->get_lineno(),\r
4033                                         tbl_vec[i]->get_charno() );\r
4034                   return(NULL);\r
4035                 }\r
4036 \r
4037                 tbl_vec[i]->set_schema_ref(sch_no);\r
4038 \r
4039 //                              If accessing a UDOP, mangle the name\r
4040 //                      This needs to be done in translate_fta.cc, not here.\r
4041 /*\r
4042                 if(schema->get_schema_type(sch_no) == OPERATOR_VIEW_SCHEMA){\r
4043                         string mngl_name = tbl_vec[i]->get_schema_name() + silo_nm;\r
4044                         tbl_vec[i]->set_schema_name(mngl_name);\r
4045                 }\r
4046 */\r
4047 \r
4048 //                      No FTA schema should have an interface defined on it.\r
4049                 if(tbl_vec[i]->get_interface()!="" && schema->get_schema_type(sch_no) != PROTOCOL_SCHEMA){\r
4050                         fprintf(stderr,"WARNING: interface %s specified for schema %s, but this schema is a STREAM and does not have an interface.\n",tbl_vec[i]->get_interface().c_str(), tbl_vec[i]->get_schema_name().c_str());\r
4051                 }\r
4052 //                      Fill in default interface\r
4053                 if(tbl_vec[i]->get_interface()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){\r
4054                         tbl_vec[i]->set_interface("default");\r
4055                         tbl_vec[i]->set_ifq(true);\r
4056                 }\r
4057 //                      Fill in default machine\r
4058                 if(tbl_vec[i]->get_interface()!=""  && tbl_vec[i]->get_machine()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA && (! tbl_vec[i]->get_ifq())){\r
4059                         tbl_vec[i]->set_machine(hostname);\r
4060                 }\r
4061 \r
4062                 if(schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){\r
4063 //                      Record the set of interfaces accessed\r
4064                         string ifstr;\r
4065                         if(tbl_vec[i]->get_ifq()){\r
4066                                 ifstr = "["+tbl_vec[i]->get_interface()+"]";\r
4067                         }else{\r
4068                                 if(tbl_vec[i]->get_machine() != "localhost"){\r
4069                                         ifstr = "&apos;"+tbl_vec[i]->get_machine()+"&apos;."+tbl_vec[i]->get_interface();\r
4070                                 }else{\r
4071                                         ifstr = tbl_vec[i]->get_interface();\r
4072                                 }\r
4073                         }\r
4074 //printf("ifstr is %s, i=%d, machine=%s, interface=%s\n",ifstr.c_str(),i,tbl_vec[i]->get_machine().c_str(),tbl_vec[i]->get_interface().c_str());\r
4075                         if(qs->definitions.count("_referenced_ifaces")){\r
4076                                 ifstr = qs->definitions["_referenced_ifaces"]+","+ifstr;\r
4077                         }\r
4078                         qs->definitions["_referenced_ifaces"] = ifstr;\r
4079                 }\r
4080 \r
4081         }\r
4082         if(found_error) return(NULL);\r
4083 \r
4084 //                      Ensure that all tablevars have are named\r
4085 //                      and that no two tablevars have the same name.\r
4086         int tblvar_no = 0;\r
4087 //              First, gather the set of variable\r
4088         set<string> tblvar_names;\r
4089         for(i=0;i<tbl_vec.size();i++){\r
4090                 if(tbl_vec[i]->get_var_name() != ""){\r
4091                         if(tblvar_names.count(tbl_vec[i]->get_var_name()) > 0){\r
4092                                 fprintf(stderr,"ERROR, query has two table variables named %s.  line=%d, char=%d\n", tbl_vec[i]->get_var_name().c_str(), tbl_vec[i]->get_lineno(), tbl_vec[i]->get_charno());\r
4093                                 return(NULL);\r
4094                         }\r
4095                         tblvar_names.insert(tbl_vec[i]->get_var_name());\r
4096                 }\r
4097         }\r
4098 //              Now generate variable names for unnamed tablevars\r
4099         for(i=0;i<tbl_vec.size();i++){\r
4100                 if(tbl_vec[i]->get_var_name() == ""){\r
4101                         char tmpstr[200];\r
4102                         sprintf(tmpstr,"_t%d",tblvar_no);\r
4103                         string newvar = tmpstr;\r
4104                         while(tblvar_names.count(newvar) > 0){\r
4105                                 tblvar_no++;\r
4106                                 sprintf(tmpstr,"_t%d",tblvar_no);\r
4107                                 newvar = tmpstr;\r
4108                         }\r
4109                         tbl_vec[i]->set_range_var(newvar);\r
4110                         tblvar_names.insert(newvar);\r
4111                 }\r
4112         }\r
4113 \r
4114 //              Process inner/outer join properties\r
4115         int jprop = fta_tree->get_from()->get_properties();\r
4116 //              Require explicit INNER_JOIN, ... specification for join queries.\r
4117         if(jprop < 0){\r
4118                 if(qs->query_type != MERGE_QUERY && tbl_vec.size() > 1){\r
4119                         fprintf(stderr,"ERROR, a join query must specify one of INNER_JOIM, OUTER_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN, FILTER_JOIN.\n");\r
4120                         return(NULL);\r
4121                 }\r
4122         }\r
4123 \r
4124         if(jprop == OUTER_JOIN_PROPERTY){\r
4125                 for(i=0;i<tbl_vec.size();i++) tbl_vec[i]->set_property(1);\r
4126         }\r
4127         if(jprop == LEFT_OUTER_JOIN_PROPERTY)\r
4128                 tbl_vec[0]->set_property(1);\r
4129         if(jprop == RIGHT_OUTER_JOIN_PROPERTY)\r
4130                 tbl_vec[tbl_vec.size()-1]->set_property(1);\r
4131         if(jprop == FILTER_JOIN_PROPERTY){\r
4132                 if(fta_tree->get_from()->get_temporal_range() == 0){\r
4133                         fprintf(stderr,"ERROR, a filter join must have a non-zero tempoal range.\n");\r
4134                         return NULL;\r
4135                 }\r
4136                 if(tbl_vec.size() != 2){\r
4137                         fprintf(stderr,"ERROR, a filter join must be between two table variables.\n");\r
4138                         return NULL;\r
4139                 }\r
4140                 colref_t *cr = fta_tree->get_from()->get_colref();\r
4141                 string field = cr->get_field();\r
4142 \r
4143                 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), field);\r
4144                 if(fi0 < 0){\r
4145                         fprintf(stderr,"ERROR, temporal attribute %s for a filter join can't be found in schema %s\n",field.c_str(), tbl_vec[0]->get_schema_name().c_str());\r
4146                         return NULL;\r
4147                 }\r
4148                 cr->set_schema_ref(tbl_vec[0]->get_schema_ref());\r
4149                 cr->set_tablevar_ref(0);\r
4150                 string type_name = schema->get_type_name(tbl_vec[0]->get_schema_ref(),field);\r
4151                 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);\r
4152                 data_type *dt0 = new data_type(type_name, modifiers);\r
4153                 if(dt0->get_type_str() != "UINT"){\r
4154                         fprintf(stderr,"ERROR, the temporal attribute in a filter join must be a UINT.\n");\r
4155                         return NULL;\r
4156                 }\r
4157                 if(! dt0->is_increasing()){\r
4158                         fprintf(stderr,"ERROR, the temporal attribtue in a filter join must be temporal increasing.\n");\r
4159                         return NULL;\r
4160                 }\r
4161         }\r
4162 \r
4163 \r
4164 \r
4165 /////////////////////\r
4166 ///             Build the query param table\r
4167         vector<var_pair_t *> query_params = fta_tree->query_params;\r
4168         int p;\r
4169         for(p=0;p<query_params.size();++p){\r
4170                 string pname = query_params[p]->name;\r
4171                 string dtname = query_params[p]->val;\r
4172 \r
4173                 if(pname == ""){\r
4174                         fprintf(stderr,"ERROR parameter has empty name.\n");\r
4175                         found_error = true;\r
4176                 }\r
4177                 if(dtname == ""){\r
4178                         fprintf(stderr,"ERROR parameter %s has empty type.\n",pname.c_str());\r
4179                         found_error = true;\r
4180                 }\r
4181                 data_type *dt = new data_type(dtname);\r
4182                 if(!(dt->is_defined())){\r
4183                         fprintf(stderr,"ERROR parameter %s has invalid type (%s).\n",pname.c_str(), dtname.c_str());\r
4184                         found_error = true;\r
4185                 }\r
4186 \r
4187                 qs->add_query_param(pname, dt, false);\r
4188         }\r
4189         if(found_error) return(NULL);\r
4190 //              unpack the param table to a global for easier analysis.\r
4191         param_tbl=qs->param_tbl;\r
4192 \r
4193 //////////////////              MERGE specialized analysis\r
4194 \r
4195         if(qs->query_type == MERGE_QUERY){\r
4196 //                      Verify that\r
4197 //                              1) there are two *different* streams ref'd in the FROM clause\r
4198 //                                      However, only emit a warning.\r
4199 //                                      (can't detect a problem if one of the interfaces is the\r
4200 //                                       default interface).\r
4201 //                              2) They have the same layout (e.g. same types but the\r
4202 //                                      names can be different\r
4203 //                              3) the two columns can unambiguously be mapped to\r
4204 //                                      fields of the two tables, one per table.  Exception:\r
4205 //                                      the column names are the same and exist in both tables.\r
4206 //                                      FURTHERMORE the positions must be the same\r
4207 //                              4) after mapping, verify that both colrefs are temporal\r
4208 //                                      and in the same direction.\r
4209                 if(tbl_vec.size() < 2){\r
4210                         fprintf(stderr,"ERROR, a MERGE query operates over at least 2 tables, %lu were supplied.\n",tbl_vec.size() );\r
4211                         return(NULL);\r
4212                 }\r
4213 \r
4214                 vector<field_entry *> fev0 = schema->get_fields(\r
4215                         tbl_vec[0]->get_schema_name()\r
4216                 );\r
4217 \r
4218 \r
4219                 int cv;\r
4220                 for(cv=1;cv<tbl_vec.size();++cv){\r
4221                         vector<field_entry *> fev1 = schema->get_fields(\r
4222                                 tbl_vec[cv]->get_schema_name()\r
4223                         );\r
4224 \r
4225                         if(fev0.size() != fev1.size()){\r
4226                                 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());\r
4227                                 return(NULL);\r
4228                         }\r
4229 \r
4230 //                      Only need to ensure that the list of types are the same.\r
4231 //                      THe first table supplies the output colnames,\r
4232 //                      and all temporal properties are lost, except for the\r
4233 //                      merge-by columns.\r
4234                         int f;\r
4235                         for(f=0;f<fev0.size();++f){\r
4236                                 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());\r
4237                                 data_type dt1(fev1[f]->get_type(),fev1[f]->get_modifier_list());\r
4238                                 if(! dt0.equal_subtypes(&dt1) ){\r
4239                                 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());\r
4240                                         return(NULL);\r
4241                                 }\r
4242                         }\r
4243                 }\r
4244 \r
4245 //              copy over the merge-by cols.\r
4246                 qs->mvars = fta_tree->mergevars;\r
4247 \r
4248                 if(qs->mvars.size() == 0){      // need to discover the merge vars.\r
4249                         int mergevar_pos = -1;\r
4250                         int f;\r
4251                         for(f=0;f<fev0.size();++f){\r
4252                                 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());\r
4253                                 if(dt0.is_temporal()){\r
4254                                         mergevar_pos = f;\r
4255                                         break;\r
4256                                 }\r
4257                         }\r
4258                         if(mergevar_pos >= 0){\r
4259                                 for(cv=0;cv<tbl_vec.size();++cv){\r
4260                                         vector<field_entry *> fev1 = schema->get_fields(tbl_vec[cv]->get_schema_name());\r
4261                                         qs->mvars.push_back(new colref_t(tbl_vec[cv]->get_var_name().c_str(),fev1[mergevar_pos]->get_name().c_str() ));\r
4262                                 }\r
4263                         }else{\r
4264                                 fprintf(stderr,"ERROR, no merge-by column found.\n");\r
4265                                 return(NULL);\r
4266                         }\r
4267                 }\r
4268 \r
4269 //                      Ensure same number of tables, merge cols.\r
4270                 if(tbl_vec.size() != qs->mvars.size()){\r
4271                         fprintf(stderr,"ERROR, merge query has different numbers of table variables (%lu) and merge columns (%lu)\n",tbl_vec.size(), qs->mvars.size());\r
4272                         return(NULL);\r
4273                 }\r
4274 \r
4275 //              Ensure that the merge-by are from different tables\r
4276 //              also, sort colrefs so that they align with the FROM list using tmp_crl\r
4277                 set<int> refd_sources;\r
4278                 vector<colref_t *> tmp_crl(qs->mvars.size(),NULL);\r
4279                 for(cv=0;cv<qs->mvars.size();++cv){\r
4280                         int tblvar=infer_tablevar_from_colref(qs->mvars[cv],fta_tree->fm,schema);\r
4281                         if(tblvar<0){\r
4282                                 fprintf(stderr,"ERROR, Merge column %d (%s) was not found in any of the tables.\n",cv,qs->mvars[cv]->to_string().c_str());\r
4283                         }\r
4284                         refd_sources.insert(tblvar);\r
4285                         tmp_crl[tblvar] = qs->mvars[cv];\r
4286                 }\r
4287                 if(refd_sources.size() != qs->mvars.size()){\r
4288                         fprintf(stderr,"ERROR, The %lu merge columns reference only %lu table variables.\n",qs->mvars.size(), refd_sources.size());\r
4289                         return(NULL);\r
4290                 }\r
4291 \r
4292 //                      1-1 mapping, so use tmp_crl as the merge column list.\r
4293                 qs->mvars = tmp_crl;\r
4294 \r
4295 \r
4296 \r
4297 //                      Look up the colrefs in their schemas, verify that\r
4298 //                      they are at the same place, that they are both temporal\r
4299 //                      in the same way.\r
4300 //                      It seems that this should be done more in the schema objects.\r
4301                 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), qs->mvars[0]->get_field());\r
4302                 if(fi0 < 0){\r
4303                         fprintf(stderr,"ERROR, Merge temporal field %s not found.\n",qs->mvars[0]->get_field().c_str());\r
4304                         exit(1);\r
4305                 }\r
4306                 for(cv=1;cv<qs->mvars.size();++cv){\r
4307                         int fi1 = schema->get_field_idx(tbl_vec[cv]->get_schema_name(), qs->mvars[0]->get_field());\r
4308                         if(fi0!=fi1){\r
4309                                 fprintf(stderr,"ERROR, the merge columns for table variables %s and %s must be in the same position.\n",tbl_vec[0]->get_var_name().c_str(), tbl_vec[cv]->get_var_name().c_str());\r
4310                                 return NULL;\r
4311                         }\r
4312                 }\r
4313 \r
4314                 field_entry *fe0 = schema->get_field(tbl_vec[0]->get_schema_name(),fi0);\r
4315                 data_type dt0(fe0->get_type(),fe0->get_modifier_list());\r
4316                 if( (!dt0.is_temporal()) ){\r
4317                         fprintf(stderr,"ERROR, merge column %d must be temporal.\n",0);\r
4318                         return(NULL);\r
4319                 }\r
4320                 for(cv=0;cv<qs->mvars.size();++cv){\r
4321                         field_entry *fe1 = schema->get_field(tbl_vec[cv]->get_schema_name(),fi0);\r
4322                         data_type dt1(fe1->get_type(),fe1->get_modifier_list());\r
4323                         if( (!dt1.is_temporal()) ){\r
4324                                 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",cv);\r
4325                                 return(NULL);\r
4326                         }\r
4327 \r
4328 \r
4329                         if( dt0.get_temporal() != dt1.get_temporal()){\r
4330                                 fprintf(stderr,"ERROR, the merge columns (0 and %d) must be temporal in the same direction.\n",cv);\r
4331                                 return(NULL);\r
4332                         }\r
4333                 }\r
4334 \r
4335 //                      If there is a SLACK specification, verify\r
4336 //                      that it is literal-only and that its type is compatible\r
4337 //                      with that of the merge columns\r
4338                 qs->slack = fta_tree->slack;\r
4339                 if(qs->slack){\r
4340                         if(! literal_only_se(qs->slack)){\r
4341                                 fprintf(stderr,"ERROR, the SLACK expression is not literal-only.\n");\r
4342                                 return NULL;\r
4343                         }\r
4344 \r
4345                         assign_data_types(qs->slack, schema, fta_tree, Ext_fcns );\r
4346                         data_type sdt(&dt0, qs->slack->get_data_type(), string("+"));\r
4347                         if(sdt.get_type() == undefined_t){\r
4348                                 fprintf(stderr,"ERROR, the SLACK expression data type is not compatible with the data type of the merge columns.\n");\r
4349                                 return NULL;\r
4350                         }\r
4351                 }\r
4352 \r
4353 \r
4354 //                      All the tests have passed, there is nothing\r
4355 //                      else to fill in.\r
4356 \r
4357         }\r
4358 \r
4359 //////////////////              SELECT specialized analysis\r
4360 \r
4361         if(qs->query_type == SELECT_QUERY){\r
4362 //              unpack the gb_tbl, aggr_tbl, param_tbl, and complex_literals\r
4363 //              objects into globals, for easier syntax.\r
4364         gb_tbl = qs->gb_tbl;\r
4365         aggr_tbl = qs->aggr_tbl;\r
4366 \r
4367 \r
4368 //              Build the table of group-by attributes.\r
4369 //              (se processing done automatically).\r
4370 //              NOTE : Doing the SE processing here is getting cumbersome,\r
4371 //                      I should process these individually.\r
4372 //              NOTE : I should check for duplicate names.\r
4373 //              NOTE : I should ensure that the def of one GB does not\r
4374 //                      refrence the value of another.\r
4375         vector<extended_gb_t *> gb_list = fta_tree->get_groupby();\r
4376         int n_temporal = 0;\r
4377         string temporal_gbvars = "";\r
4378         map<string, int> gset_gbnames;\r
4379 \r
4380 //              For generating the set of GB patterns for this aggregation query.\r
4381         vector<bool> inner_pattern;\r
4382         vector<vector<bool> > pattern_set;\r
4383         vector<vector<vector<bool> > > pattern_components;\r
4384 \r
4385         vector<gb_t *> r_gbs, c_gbs, g_gbs;\r
4386         int n_patterns;\r
4387 \r
4388         for(i=0;i<gb_list.size();i++){\r
4389                 switch(gb_list[i]->type){\r
4390                 case gb_egb_type:\r
4391                         retval = gb_tbl->add_gb_attr(\r
4392                                 gb_list[i]->gb, fta_tree->fm, schema,fta_tree, Ext_fcns\r
4393                         );\r
4394                         if(retval < 0){\r
4395                                 return NULL;  // nothing added to gb_tbl, so this can trigger a segfault 2 lines below\r
4396                         }else{\r
4397                                 if(gb_tbl->get_data_type(i)->is_temporal()){\r
4398                                         n_temporal++;\r
4399                                         if(temporal_gbvars != "") temporal_gbvars+=" ";\r
4400                                         temporal_gbvars += gb_tbl->get_name(i);\r
4401                                 }\r
4402                         }\r
4403 \r
4404                         inner_pattern.clear();\r
4405                         pattern_set.clear();\r
4406                         inner_pattern.push_back(true);\r
4407                         pattern_set.push_back(inner_pattern);\r
4408                         pattern_components.push_back(pattern_set);\r
4409 \r
4410                         gb_tbl->gb_entry_type.push_back("");\r
4411                         gb_tbl->gb_entry_count.push_back(1);\r
4412                         gb_tbl->pattern_components.push_back(pattern_set);\r
4413 \r
4414                 break;\r
4415                 case rollup_egb_type:\r
4416                         r_gbs = gb_list[i]->gb_lists[0]->get_gb_list();\r
4417                         for(j=0;j<r_gbs.size();++j){\r
4418                                 retval = gb_tbl->add_gb_attr(\r
4419                                         r_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns\r
4420                                 );\r
4421                                 if(retval < 0){\r
4422                                         found_error = true;\r
4423                                 }else{          // rollup gb can't be temporal\r
4424                                         gb_tbl->reset_temporal(gb_tbl->size()-1);\r
4425                                 }\r
4426                         }\r
4427 \r
4428                         inner_pattern.resize(r_gbs.size());\r
4429                         pattern_set.clear();\r
4430                         for(j=0;j<=r_gbs.size();++j){\r
4431                                 for(k=0;k<r_gbs.size();++k){\r
4432                                         if(k < j)\r
4433                                                 inner_pattern[k] = true;\r
4434                                         else\r
4435                                                 inner_pattern[k] = false;\r
4436                                 }\r
4437                                 pattern_set.push_back(inner_pattern);\r
4438                         }\r
4439                         pattern_components.push_back(pattern_set);\r
4440 \r
4441                         gb_tbl->gb_entry_type.push_back("ROLLUP");\r
4442                         gb_tbl->gb_entry_count.push_back(r_gbs.size());\r
4443                         gb_tbl->pattern_components.push_back(pattern_set);\r
4444                 break;\r
4445                 case cube_egb_type:\r
4446                         c_gbs = gb_list[i]->gb_lists[0]->get_gb_list();\r
4447                         for(j=0;j<c_gbs.size();++j){\r
4448                                 retval = gb_tbl->add_gb_attr(\r
4449                                         c_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns\r
4450                                 );\r
4451                                 if(retval < 0){\r
4452                                         found_error = true;\r
4453                                 }else{          // cube gb can't be temporal\r
4454                                         gb_tbl->reset_temporal(gb_tbl->size()-1);\r
4455                                 }\r
4456                         }\r
4457 \r
4458                         inner_pattern.resize(c_gbs.size());\r
4459                         pattern_set.clear();\r
4460                         n_patterns = 1 << c_gbs.size();\r
4461                         for(j=0;j<n_patterns;++j){\r
4462                                 int test_bit = 1;\r
4463                                 for(k=0;k<c_gbs.size();++k,test_bit = test_bit << 1){\r
4464                                         if((j & test_bit) != 0)\r
4465                                                 inner_pattern[k] = true;\r
4466                                         else\r
4467                                                 inner_pattern[k] = false;\r
4468                                 }\r
4469                                 pattern_set.push_back(inner_pattern);\r
4470                         }\r
4471                         pattern_components.push_back(pattern_set);\r
4472 \r
4473                         gb_tbl->gb_entry_type.push_back("CUBE");\r
4474                         gb_tbl->gb_entry_count.push_back(c_gbs.size());\r
4475                         gb_tbl->pattern_components.push_back(pattern_set);\r
4476                 break;\r
4477                 case gsets_egb_type:\r
4478                 {\r
4479                         gset_gbnames.clear();\r
4480                         for(j=0;j<gb_list[i]->gb_lists.size();++j){\r
4481                                 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();\r
4482                                 for(k=0;k<g_gbs.size();++k){\r
4483                                         if(g_gbs[k]->type != GB_COLREF){\r
4484                                                 fprintf(stderr,"Error, group-by fields in a GROUPING_SETS clause must be table references, not computed values (field is %s\n",g_gbs[k]->name.c_str());\r
4485                                                 found_error = true;\r
4486                                         }else{\r
4487                                                 if(gset_gbnames.count(g_gbs[k]->name) == 0){\r
4488                                                         retval = gb_tbl->add_gb_attr(\r
4489                                                                 g_gbs[k], fta_tree->fm, schema,fta_tree, Ext_fcns\r
4490                                                         );\r
4491                                                         if(retval < 0){\r
4492                                                                 found_error = true;\r
4493                                                         }else{          // gsets gb can't be temporal\r
4494                                                                 gb_tbl->reset_temporal(gb_tbl->size()-1);\r
4495                                                         }\r
4496                                                         int pos = gset_gbnames.size();\r
4497                                                         gset_gbnames[g_gbs[k]->name] = pos;\r
4498                                                 }\r
4499                                         }\r
4500                                 }\r
4501                         }\r
4502 \r
4503                         if(gset_gbnames.size() > 63){\r
4504                                 fprintf(stderr,"Error, at most 63 distinct fields can be referenced in a GROUPING_SETS clause.\n");\r
4505                                 found_error = true;\r
4506                         }\r
4507 \r
4508                         inner_pattern.resize(gset_gbnames.size());\r
4509                         pattern_set.clear();\r
4510                         set<unsigned long long int> signatures;\r
4511                         for(j=0;j<gb_list[i]->gb_lists.size();++j){\r
4512                                 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();\r
4513                                 set<string> refd_gbs;\r
4514                                 for(k=0;k<g_gbs.size();++k){\r
4515                                         refd_gbs.insert(g_gbs[k]->name);\r
4516                                 }\r
4517                                 fill(inner_pattern.begin(),inner_pattern.end(),false);\r
4518                                 unsigned long long int signature = 0;\r
4519                                 set<string>::iterator ssi;\r
4520                                 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){\r
4521                                         inner_pattern[gset_gbnames[(*ssi)]] = true;\r
4522                                         signature |= (1 << gset_gbnames[(*ssi)]);\r
4523                                 }\r
4524                                 if(signatures.count(signature)){\r
4525                                         fprintf(stderr,"Warning, duplicate GROUPING_SETS pattern found, ignoring:\n\t");\r
4526                                         set<string>::iterator ssi;\r
4527                                         for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){\r
4528                                                 fprintf(stderr," %s",(*ssi).c_str());\r
4529                                         }\r
4530                                         fprintf(stderr,"\n");\r
4531                                 }else{\r
4532                                         signatures.insert(signature);\r
4533                                         pattern_set.push_back(inner_pattern);\r
4534                                 }\r
4535                         }\r
4536                         pattern_components.push_back(pattern_set);\r
4537 \r
4538                         gb_tbl->gb_entry_type.push_back("GROUPING_SETS");\r
4539                         gb_tbl->gb_entry_count.push_back(gset_gbnames.size());\r
4540                         gb_tbl->pattern_components.push_back(pattern_set);\r
4541                 }\r
4542                 break;\r
4543                 default:\r
4544                 break;\r
4545                 }\r
4546         }\r
4547         if(found_error) return(NULL);\r
4548         if(n_temporal > 1){\r
4549                 fprintf(stderr,"ERROR, query has multiple temporal group-by variables (%s).  Cast away the temporality of all but one of these.\n", temporal_gbvars.c_str());\r
4550                 return NULL;\r
4551         }\r
4552 \r
4553 //              Compute the set of patterns.  Take the cross product of all pattern components.\r
4554         vector<vector<bool> > gb_patterns;\r
4555         int n_components = pattern_components.size();\r
4556         vector<int> pattern_pos(n_components,0);\r
4557         bool done = false;\r
4558         while(! done){\r
4559                 vector<bool> pattern;\r
4560                 for(j=0;j<n_components;j++){\r
4561                         pattern.insert(pattern.end(),pattern_components[j][pattern_pos[j]].begin(),\r
4562                                 pattern_components[j][pattern_pos[j]].end());\r
4563                 }\r
4564                 gb_patterns.push_back(pattern);\r
4565                 for(j=0;j<n_components;j++){\r
4566                         pattern_pos[j]++;\r
4567                         if(pattern_pos[j] >= pattern_components[j].size())\r
4568                                 pattern_pos[j] = 0;\r
4569                         else\r
4570                                 break;\r
4571                 }\r
4572                 if(j >= n_components)\r
4573                         done = true;\r
4574         }\r
4575         gb_tbl->gb_patterns = gb_patterns;\r
4576 \r
4577 \r
4578 //              Process the supergroup, if any.\r
4579         vector<colref_t *> sgb = fta_tree->get_supergb();\r
4580         for(i=0;i<sgb.size();++i){\r
4581                 int gbr = gb_tbl->find_gb(sgb[i],fta_tree->fm, schema);\r
4582                 if(gbr < 0){\r
4583                         fprintf(stderr, "ERROR, supergroup attribute %s is not defined as a group-by variable.\n",sgb[i]->to_string().c_str());\r
4584                         found_error = true;\r
4585                 }\r
4586                 if(qs->sg_tbl.count(gbr)){\r
4587                         fprintf(stderr,"WARNING, duplicate supergroup attribute %s.\n",sgb[i]->to_string().c_str());\r
4588                 }\r
4589                 qs->sg_tbl.insert(gbr);\r
4590         }\r
4591         if(found_error) return(NULL);\r
4592 \r
4593         if(qs->sg_tbl.size() > 0 && gb_tbl->gb_patterns.size()>0){\r
4594                 fprintf(stderr,"Error, SUPERGROUP incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");\r
4595                 return NULL;\r
4596         }\r
4597 \r
4598 \r
4599 \r
4600         predicate_t *wh = fta_tree->get_where();\r
4601         predicate_t *hv = fta_tree->get_having();\r
4602         predicate_t *cw = fta_tree->get_cleaning_when();\r
4603         predicate_t *cb = fta_tree->get_cleaning_by();\r
4604         predicate_t *closew = fta_tree->get_closing_when();\r
4605 \r
4606         if(closew != NULL  && gb_tbl->gb_patterns.size()>1){\r
4607                 fprintf(stderr,"Error, CLOSING_WHEN incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");\r
4608                 return NULL;\r
4609         }\r
4610 \r
4611 \r
4612 \r
4613 //              Verify that all column references are valid, and if so assign\r
4614 //              the data type.\r
4615 \r
4616         vector<select_element *> sl_list = fta_tree->get_sl_vec();\r
4617         for(i=0;i<sl_list.size();i++){\r
4618                 retval = verify_colref(sl_list[i]->se, fta_tree->fm, schema, gb_tbl);\r
4619                 if(retval < 0) found_error = true;\r
4620         }\r
4621         if(wh != NULL)\r
4622                 retval = verify_predicate_colref(wh, fta_tree->fm, schema, gb_tbl);\r
4623         if(retval < 0) found_error = true;\r
4624         if(hv != NULL)\r
4625                 retval = verify_predicate_colref(hv, fta_tree->fm, schema, gb_tbl);\r
4626         if(retval < 0) found_error = true;\r
4627         if(cw != NULL)\r
4628                 retval = verify_predicate_colref(cw, fta_tree->fm, schema, gb_tbl);\r
4629         if(retval < 0) found_error = true;\r
4630         if(cb != NULL)\r
4631                 retval = verify_predicate_colref(cb, fta_tree->fm, schema, gb_tbl);\r
4632         if(retval < 0) found_error = true;\r
4633         if(closew != NULL)\r
4634                 retval = verify_predicate_colref(closew, fta_tree->fm, schema, gb_tbl);\r
4635         if(retval < 0) found_error = true;\r
4636 \r
4637         if(found_error) return(NULL);\r
4638 \r
4639 //              Verify that all of the scalar expressions\r
4640 //              and comparison predicates have compatible types.\r
4641 \r
4642         n_temporal = 0;\r
4643         string temporal_output_fields;\r
4644         for(i=0;i<sl_list.size();i++){\r
4645                 retval = assign_data_types(sl_list[i]->se, schema, fta_tree, Ext_fcns );\r
4646                 if(retval < 0){\r
4647                          found_error = true;\r
4648                 }else{\r
4649                         if(sl_list[i]->se->get_data_type()->is_temporal()){\r
4650                                 n_temporal++;\r
4651                                 temporal_output_fields += " "+int_to_string(i);\r
4652                         }\r
4653                 }\r
4654         }\r
4655         if(n_temporal > 1){\r
4656                 fprintf(stderr,"ERROR, query has multiple temporal output fields (positions%s).  Cast away the temporality of all but one of these.\n", temporal_output_fields.c_str());\r
4657                 found_error=true;\r
4658         }\r
4659         if(wh != NULL)\r
4660                 retval = assign_predicate_data_types(wh, schema, fta_tree, Ext_fcns);\r
4661         if(retval < 0) found_error = true;\r
4662         if(hv != NULL)\r
4663                 retval = assign_predicate_data_types(hv, schema, fta_tree, Ext_fcns);\r
4664         if(retval < 0) found_error = true;\r
4665         if(cw != NULL)\r
4666                 retval = assign_predicate_data_types(cw, schema, fta_tree, Ext_fcns);\r
4667         if(retval < 0) found_error = true;\r
4668         if(cb != NULL)\r
4669                 retval = assign_predicate_data_types(cb, schema, fta_tree, Ext_fcns);\r
4670         if(retval < 0) found_error = true;\r
4671         if(closew != NULL)\r
4672                 retval = assign_predicate_data_types(closew, schema, fta_tree, Ext_fcns);\r
4673         if(retval < 0) found_error = true;\r
4674 \r
4675         if(found_error) return(NULL);\r
4676 \r
4677 //                      Impute names for the unnamed columns.\r
4678         set<string> curr_names;\r
4679         int s;\r
4680         for(s=0;s<sl_list.size();++s){\r
4681                 curr_names.insert(sl_list[s]->name);\r
4682         }\r
4683         for(s=0;s<sl_list.size();++s){\r
4684                 if(sl_list[s]->name == "")\r
4685                         sl_list[s]->name = impute_colname(curr_names, sl_list[s]->se);\r
4686         }\r
4687 \r
4688 \r
4689 //              Check the aggregates.\r
4690 //              No aggrs allowed in the WHERE predicate.\r
4691 //              (no aggrs in the GB defs, but that is examined elsewhere)\r
4692 //              Therefore, aggregates are allowed only the select clause.\r
4693 //\r
4694 //              The query is an aggregation query if there is a group-by clause, or\r
4695 //              if any aggregate is referenced.  If there is a group-by clause,\r
4696 //              at least one aggregate must be referenced.\r
4697 //              If the query is an aggregate query, the scalar expressions in\r
4698 //              the select clause can reference only constants, aggregates, or group-by\r
4699 //              attributes.\r
4700 //              Also, if the query is an aggregate query, build a table referencing\r
4701 //              the aggregates.\r
4702 //\r
4703 //              No nested aggregates allowed.\r
4704 //\r
4705 \r
4706 //              First, count references in the WHERE predicate.\r
4707 //              (if there are any references, report an error).\r
4708 //                      can ref group vars, tuple fields, and stateful fcns.\r
4709 \r
4710         if(wh != NULL){\r
4711                 retval = count_aggr_pred(wh, true);\r
4712                 if(retval > 0){\r
4713                         fprintf(stderr,"ERROR, no aggregate references are allowed in the WHERE clause.\n");\r
4714                         return(NULL);\r
4715                 }\r
4716         }\r
4717 \r
4718 //              NOTE : Here I need an analysis of the having clause\r
4719 //              to verify that it only refs GB attrs and aggregates.\r
4720 //                      (also, superaggregates, stateful fcns)\r
4721         if(hv!=NULL){\r
4722                 retval = verify_having_pred(hv, "HAVING", Ext_fcns);\r
4723                 if(retval < 0) return(NULL);\r
4724         }\r
4725 \r
4726 //              Cleaning by has same reference rules as Having\r
4727         if(cb!=NULL){\r
4728                 retval = verify_having_pred(cb, "CLEANING_BY", Ext_fcns);\r
4729                 if(retval < 0) return(NULL);\r
4730         }\r
4731 \r
4732 //              Cleaning when has same reference rules as Having,\r
4733 //              except that references to non-superaggregates are not allowed.\r
4734 //              This is tested for when "CLEANING_BY" is passed in as the clause.\r
4735         if(cw!=NULL){\r
4736                 retval = verify_having_pred(cw, "CLEANING_WHEN", Ext_fcns);\r
4737                 if(retval < 0) return(NULL);\r
4738         }\r
4739 \r
4740 //              CLOSING_WHEN : same rules as HAVING\r
4741         if(closew!=NULL){\r
4742                 retval = verify_having_pred(closew, "CLOSING_WHEN", Ext_fcns);\r
4743                 if(retval < 0) return(NULL);\r
4744         }\r
4745 \r
4746 \r
4747 //              Collect aggregates in the HAVING and CLEANING clauses\r
4748         if(hv != NULL){\r
4749                 build_aggr_tbl_fm_pred(hv, aggr_tbl, Ext_fcns);\r
4750         }\r
4751         if(cw != NULL){\r
4752                 build_aggr_tbl_fm_pred(cw, aggr_tbl, Ext_fcns);\r
4753         }\r
4754         if(cb != NULL){\r
4755                 build_aggr_tbl_fm_pred(cb, aggr_tbl, Ext_fcns);\r
4756         }\r
4757         if(closew != NULL){\r
4758                 build_aggr_tbl_fm_pred(closew, aggr_tbl, Ext_fcns);\r
4759         }\r
4760 \r
4761 //              Collect aggregate refs in the SELECT clause.\r
4762 \r
4763         for(i=0;i<sl_list.size();i++)\r
4764                 build_aggr_tbl_fm_se(sl_list[i]->se, aggr_tbl, Ext_fcns);\r
4765 \r
4766 \r
4767 //              Collect references to states of stateful functions\r
4768         if(wh != NULL){\r
4769                 gather_fcn_states_pr(wh, qs->states_refd, Ext_fcns);\r
4770         }\r
4771         if(hv != NULL){\r
4772                 gather_fcn_states_pr(hv, qs->states_refd, Ext_fcns);\r
4773         }\r
4774         if(cw != NULL){\r
4775                 gather_fcn_states_pr(cw, qs->states_refd, Ext_fcns);\r
4776         }\r
4777         if(cb != NULL){\r
4778                 gather_fcn_states_pr(cb, qs->states_refd, Ext_fcns);\r
4779         }\r
4780         if(closew != NULL){                     // should be no stateful fcns here ...\r
4781                 gather_fcn_states_pr(closew, qs->states_refd, Ext_fcns);\r
4782         }\r
4783         for(i=0;i<sl_list.size();i++)\r
4784                 gather_fcn_states_se(sl_list[i]->se, qs->states_refd, Ext_fcns);\r
4785 \r
4786 \r
4787 //              If this is an aggregate query, it had normally references\r
4788 //              some aggregates.  Its not necessary though, just emit a warning.\r
4789 //              (acts as SELECT DISTINCT)\r
4790 \r
4791         bool is_aggr_query = gb_tbl->size() > 0 || aggr_tbl->size() > 0;\r
4792         if(is_aggr_query && aggr_tbl->size() == 0){\r
4793                 fprintf(stderr,"Warning, query contains a group-by clause but does not reference aggregates..\n");\r
4794         }\r
4795 \r
4796 //              If this is an aggregate query,\r
4797 //                      1) verify that the SEs in the SELECT clause reference\r
4798 //                              only constants, aggregates, and group-by attributes.\r
4799 //                      2) No aggregate scalar expression references an aggregate\r
4800 //                              or any stateful function.\r
4801 //                      3) either it references both CLEANING clauses or neither.\r
4802 //                      4) all superaggregates must have the superaggr_allowed property.\r
4803 //                      5) all aggregates ref'd in the CLEANING_WHEN ad CLEANING_BY\r
4804 //                         clauses must have the multiple_output property.\r
4805 \r
4806 \r
4807         if(is_aggr_query){\r
4808                 if(gb_list.size() == 0){\r
4809                         fprintf(stderr,"ERROR, aggregation queries must have at least one group-by variable (which should be temporal).\n");\r
4810                         return NULL;\r
4811                 }\r
4812 //                      Ensure that at least one gbvar is temporal\r
4813                 if(! fta_tree->name_exists("no_temporal_aggr")){\r
4814                         bool found_temporal = false;\r
4815                 for(i=0;i<gb_tbl->size();i++){\r
4816                                 if(gb_tbl->get_data_type(i)->is_temporal()){\r
4817                                         found_temporal = true;\r
4818                                 }\r
4819                         }\r
4820                         if(! found_temporal){\r
4821                                 fprintf(stderr,"ERROR, at least one of the group-by variables must be temporal (unless no_temporal_aggr is set)\n");\r
4822                                 exit(1);\r
4823                         }\r
4824                 }\r
4825 \r
4826                 if((!cb && cw) || (cb && !cw)){\r
4827                         fprintf(stderr,"ERROR, an aggregate query must either include both a CLEANING_WHEN and a CLEANING_BY clause, or neither.\n");\r
4828                         return(NULL);\r
4829                 }\r
4830 \r
4831                 bool refs_running = false;\r
4832                 int a;\r
4833                 for(a=0; a<aggr_tbl->size(); ++a){\r
4834                         refs_running |= aggr_tbl->is_running_aggr(a);\r
4835                 }\r
4836 \r
4837                 if(closew){\r
4838                         if(cb || cw){\r
4839                                 fprintf(stderr, "ERROR, cannot reference both CLOSING_WHEN and either CLEANING_WHEN or CLEANING_BY.\n");\r
4840                                 return(NULL);\r
4841                         }\r
4842                         if(!refs_running){\r
4843                                 fprintf(stderr, "ERROR, if you reference CLOSING_WHEN you must reference at least one running window aggregate.\n");\r
4844                                 return(NULL);\r
4845                         }\r
4846                 }\r
4847 \r
4848                 if(refs_running && !closew){\r
4849                                 fprintf(stderr, "ERROR, if you reference a running window aggregate you must reference a CLOSING_WHEN clause.\n");\r
4850                         return(NULL);\r
4851                 }\r
4852 \r
4853                 bool st_ok = true;\r
4854                 for(i=0;i<sl_list.size();i++){\r
4855                         bool ret_bool = verify_aggr_query_se(sl_list[i]->se);\r
4856                         st_ok = st_ok && ret_bool;\r
4857                 }\r
4858                 if(! st_ok)\r
4859                         return(NULL);\r
4860 \r
4861                 for(i=0;i<aggr_tbl->size();i++){\r
4862                         if(aggr_tbl->is_superaggr(i)){\r
4863                                 if(! aggr_tbl->superaggr_allowed(i)){\r
4864                                         fprintf(stderr,"ERROR, aggregate %s cannot be a superaggregate\n",aggr_tbl->get_op(i).c_str());\r
4865                                         return NULL;\r
4866                                 }\r
4867                         }\r
4868                         if(aggr_tbl->is_builtin(i)){\r
4869                                 if(count_aggr_se(aggr_tbl->get_aggr_se(i), true) > 0){\r
4870                                         fprintf(stderr,"ERROR no nested aggregation allowed.\n");\r
4871                                         return(NULL);\r
4872                                 }\r
4873                         }else{\r
4874                                 vector<scalarexp_t *> opl = aggr_tbl->get_operand_list(i);\r
4875                                 int o;\r
4876                                 for(o=0;o<opl.size();++o){\r
4877                                         if(count_aggr_se(opl[o], true) > 0){\r
4878                                                 fprintf(stderr,"ERROR no nested aggregation allowed.\n");\r
4879                                                 return(NULL);\r
4880                                         }\r
4881                                 }\r
4882                         }\r
4883                 }\r
4884         }else{\r
4885 //                      Ensure that non-aggregate query doesn't reference some things\r
4886                 if(cb || cw){\r
4887                         fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLEANING_WHEN or a CLEANING_BY clause.\n");\r
4888                         return(NULL);\r
4889                 }\r
4890                 if(closew){\r
4891                         fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLOSING_WHEN clause.\n");\r
4892                         return(NULL);\r
4893                 }\r
4894                 if(qs->states_refd.size()){\r
4895                         fprintf(stderr,"ERROR, a non-aggregate query may not refernece stateful functions.\n");\r
4896                         return(NULL);\r
4897                 }\r
4898         }\r
4899 \r
4900 \r
4901 \r
4902 //              Convert the predicates into CNF.  OK to pass NULL ptr.\r
4903         make_cnf_from_pr(wh, qs->wh_cnf);\r
4904         make_cnf_from_pr(hv, qs->hav_cnf);\r
4905         make_cnf_from_pr(cb, qs->cb_cnf);\r
4906         make_cnf_from_pr(cw, qs->cw_cnf);\r
4907         make_cnf_from_pr(closew, qs->closew_cnf);\r
4908 \r
4909 //              Analyze the predicates.\r
4910 \r
4911         for(i=0;i<qs->wh_cnf.size();i++)\r
4912                 analyze_cnf(qs->wh_cnf[i]);\r
4913         for(i=0;i<qs->hav_cnf.size();i++)\r
4914                 analyze_cnf(qs->hav_cnf[i]);\r
4915         for(i=0;i<qs->cb_cnf.size();i++)\r
4916                 analyze_cnf(qs->cb_cnf[i]);\r
4917         for(i=0;i<qs->cw_cnf.size();i++)\r
4918                 analyze_cnf(qs->cw_cnf[i]);\r
4919         for(i=0;i<qs->closew_cnf.size();i++)\r
4920                 analyze_cnf(qs->closew_cnf[i]);\r
4921 \r
4922 \r
4923 //                      At this point, the old analysis program\r
4924 //                      gathered all refs to partial functions,\r
4925 //                      complex literals, and parameters accessed via a handle.\r
4926 //                      I think its better to delay this\r
4927 //                      until code generation time, as the query will be\r
4928 //                      in general split.\r
4929 \r
4930     }\r
4931 \r
4932         return(qs);\r
4933 }\r
4934 \r
4935 ///////////////////////////////////////////////////////////////////////\r
4936 \r
4937 //              Expand gbvars with their definitions.\r
4938 \r
4939 scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl){\r
4940         int o;\r
4941 \r
4942         switch(se->get_operator_type()){\r
4943         case SE_LITERAL:\r
4944         case SE_PARAM:\r
4945         case SE_IFACE_PARAM:\r
4946                 return se;\r
4947         case SE_UNARY_OP:\r
4948                 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);\r
4949                 return se;\r
4950         case SE_BINARY_OP:\r
4951                 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);\r
4952                 se->rhs.scalarp = expand_gbvars_se(se->get_right_se(),gb_tbl);\r
4953                 return se;\r
4954         case SE_COLREF:\r
4955                 if( se->is_gb() ){\r
4956                         return( dup_se(gb_tbl.get_def(se->get_gb_ref()),NULL) );\r
4957                 }\r
4958                 return se;\r
4959 //                      don't descend into aggr defs.\r
4960         case SE_AGGR_STAR:\r
4961                 return se;\r
4962         case SE_AGGR_SE:\r
4963                 return se;\r
4964         case SE_FUNC:\r
4965                 for(o=0;o<se->param_list.size();o++){\r
4966                         se->param_list[o] = expand_gbvars_se(se->param_list[o], gb_tbl);\r
4967                 }\r
4968                 return se;\r
4969         default:\r
4970                 fprintf(stderr,"INTERNAL ERROR in expand_gbvars, line %d, character %d: unknown operator type %d\n",\r
4971                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
4972                 exit(1);\r
4973         }\r
4974         return se;\r
4975 }\r
4976 \r
4977 void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl){\r
4978         vector<scalarexp_t *> op_list;\r
4979         int o;\r
4980         bool found = false;\r
4981 \r
4982         switch(pr->get_operator_type()){\r
4983         case PRED_IN:\r
4984                 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(), gb_tbl);\r
4985                 return;\r
4986         case PRED_COMPARE:\r
4987                 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(),gb_tbl) ;\r
4988                 pr->rhs.sexp = expand_gbvars_se(pr->get_right_se(),gb_tbl) ;\r
4989                 return;\r
4990         case PRED_UNARY_OP:\r
4991                 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;\r
4992                 return;\r
4993         case PRED_BINARY_OP:\r
4994                 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;\r
4995                 expand_gbvars_pr(pr->get_right_pr(),gb_tbl) ;\r
4996                 return;\r
4997         case PRED_FUNC:\r
4998                 for(o=0;o<pr->param_list.size();++o){\r
4999                         pr->param_list[o] = expand_gbvars_se(pr->param_list[o],gb_tbl) ;\r
5000                 }\r
5001                 return;\r
5002         default:\r
5003                 fprintf(stderr,"INTERNAL ERROR in expand_gbvars_pr, line %d, character %d, unknown predicate operator type %d\n",\r
5004                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
5005         }\r
5006         return;\r
5007 }\r
5008 \r
5009 \r
5010 \r
5011 \r
5012 //              return true if the se / pr contains any gbvar on the list.\r
5013 \r
5014 \r
5015 bool contains_gb_se(scalarexp_t *se, set<int> &gref_set){\r
5016         vector<scalarexp_t *> operands;\r
5017         int o;\r
5018         bool found = false;\r
5019 \r
5020         switch(se->get_operator_type()){\r
5021         case SE_LITERAL:\r
5022         case SE_PARAM:\r
5023         case SE_IFACE_PARAM:\r
5024                 return false;\r
5025         case SE_UNARY_OP:\r
5026                 return contains_gb_se(se->get_left_se(),gref_set);\r
5027         case SE_BINARY_OP:\r
5028                 return( contains_gb_se(se->get_left_se(),gref_set) ||\r
5029                         contains_gb_se(se->get_right_se(),gref_set) );\r
5030         case SE_COLREF:\r
5031                 if( se->is_gb() ){\r
5032                         return( gref_set.count(se->get_gb_ref()) > 0);\r
5033                 }\r
5034                 return false;\r
5035 //                      don't descend into aggr defs.\r
5036         case SE_AGGR_STAR:\r
5037                 return false;\r
5038         case SE_AGGR_SE:\r
5039                 return false;\r
5040         case SE_FUNC:\r
5041                 operands = se->get_operands();\r
5042                 for(o=0;o<operands.size();o++){\r
5043                         found = found || contains_gb_se(operands[o], gref_set);\r
5044                 }\r
5045                 return found;\r
5046         default:\r
5047                 fprintf(stderr,"INTERNAL ERROR in contains_gb_se, line %d, character %d: unknown operator type %d\n",\r
5048                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
5049                 exit(1);\r
5050         }\r
5051         return false;\r
5052 }\r
5053 \r
5054 \r
5055 bool contains_gb_pr(predicate_t *pr, set<int> &gref_set){\r
5056         vector<scalarexp_t *> op_list;\r
5057         int o;\r
5058         bool found = false;\r
5059 \r
5060         switch(pr->get_operator_type()){\r
5061         case PRED_IN:\r
5062                 return contains_gb_se(pr->get_left_se(), gref_set);\r
5063         case PRED_COMPARE:\r
5064                 return (contains_gb_se(pr->get_left_se(),gref_set)\r
5065                         || contains_gb_se(pr->get_right_se(),gref_set) );\r
5066         case PRED_UNARY_OP:\r
5067                 return contains_gb_pr(pr->get_left_pr(),gref_set) ;\r
5068         case PRED_BINARY_OP:\r
5069                 return (contains_gb_pr(pr->get_left_pr(),gref_set)\r
5070                         || contains_gb_pr(pr->get_right_pr(),gref_set) );\r
5071         case PRED_FUNC:\r
5072                 op_list = pr->get_op_list();\r
5073                 for(o=0;o<op_list.size();++o){\r
5074                         found = found ||contains_gb_se(op_list[o],gref_set) ;\r
5075                 }\r
5076                 return found;\r
5077         default:\r
5078                 fprintf(stderr,"INTERNAL ERROR in contains_gb_pr, line %d, character %d, unknown predicate operator type %d\n",\r
5079                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
5080         }\r
5081 \r
5082         return found;\r
5083 }\r
5084 \r
5085 \r
5086 //              Gather the set of columns accessed in this se.\r
5087 //              Descend into aggregate functions.\r
5088 \r
5089 void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set, gb_table *gtbl){\r
5090         col_id ci;\r
5091         vector<scalarexp_t *> operands;\r
5092         int o;\r
5093 \r
5094         if(! se)\r
5095                 return;\r
5096 \r
5097         switch(se->get_operator_type()){\r
5098         case SE_LITERAL:\r
5099         case SE_PARAM:\r
5100         case SE_IFACE_PARAM:\r
5101                 return;\r
5102         case SE_UNARY_OP:\r
5103                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);\r
5104                 return;\r
5105         case SE_BINARY_OP:\r
5106                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);\r
5107                 gather_se_col_ids(se->get_right_se(),cid_set,gtbl);\r
5108                 return;\r
5109         case SE_COLREF:\r
5110                 if(! se->is_gb() ){\r
5111                         ci.load_from_colref(se->get_colref() );\r
5112                         if(ci.tblvar_ref < 0){\r
5113                                 fprintf(stderr,"INTERNAL WARNING: unbound colref (%s) accessed.\n",ci.field.c_str());\r
5114                         }\r
5115                         cid_set.insert(ci);\r
5116                 }else{\r
5117                         if(gtbl==NULL){\r
5118                                 fprintf(stderr,"INTERNAL ERROR: gbvar ref in gather_se_col_ids, but gtbl is NULL.\n");\r
5119                                 exit(1);\r
5120                         }\r
5121                         gather_se_col_ids(gtbl->get_def(se->get_gb_ref()),cid_set,gtbl);\r
5122                 }\r
5123                 return;\r
5124         case SE_AGGR_STAR:\r
5125                 return;\r
5126         case SE_AGGR_SE:\r
5127                 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);\r
5128                 return;\r
5129         case SE_FUNC:\r
5130                 operands = se->get_operands();\r
5131                 for(o=0;o<operands.size();o++){\r
5132                         gather_se_col_ids(operands[o], cid_set,gtbl);\r
5133                 }\r
5134                 return;\r
5135         default:\r
5136                 fprintf(stderr,"INTERNAL ERROR in gather_se_col_ids, line %d, character %d: unknown operator type %d\n",\r
5137                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
5138                 exit(1);\r
5139         }\r
5140 }\r
5141 \r
5142 \r
5143 //              Gather the set of columns accessed in this se.\r
5144 \r
5145 void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl){\r
5146         vector<scalarexp_t *> op_list;\r
5147         int o;\r
5148 \r
5149         switch(pr->get_operator_type()){\r
5150         case PRED_IN:\r
5151                 gather_se_col_ids(pr->get_left_se(), cid_set,gtbl);\r
5152                 return;\r
5153         case PRED_COMPARE:\r
5154                 gather_se_col_ids(pr->get_left_se(),cid_set,gtbl) ;\r
5155                 gather_se_col_ids(pr->get_right_se(),cid_set,gtbl) ;\r
5156                 return;\r
5157         case PRED_UNARY_OP:\r
5158                 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;\r
5159                 return;\r
5160         case PRED_BINARY_OP:\r
5161                 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;\r
5162                 gather_pr_col_ids(pr->get_right_pr(),cid_set,gtbl) ;\r
5163                 return;\r
5164         case PRED_FUNC:\r
5165                 op_list = pr->get_op_list();\r
5166                 for(o=0;o<op_list.size();++o){\r
5167                         gather_se_col_ids(op_list[o],cid_set,gtbl) ;\r
5168                 }\r
5169                 return;\r
5170         default:\r
5171                 fprintf(stderr,"INTERNAL ERROR in gather_pr_col_ids, line %d, character %d, unknown predicate operator type %d\n",\r
5172                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
5173         }\r
5174 }\r
5175 \r
5176 \r
5177 \r
5178 \r
5179 //              Gather the set of special operator or comparison functions referenced by this se.\r
5180 \r
5181 void gather_se_opcmp_fcns(scalarexp_t *se, set<string> &fcn_set){\r
5182         col_id ci;\r
5183         data_type *ldt, *rdt;\r
5184         int o;\r
5185         vector<scalarexp_t *> operands;\r
5186 \r
5187         switch(se->get_operator_type()){\r
5188         case SE_LITERAL:\r
5189                 if( se->get_literal()->constructor_name() != "")\r
5190                         fcn_set.insert( se->get_literal()->constructor_name() );\r
5191                 return;\r
5192         case SE_PARAM:\r
5193                 return;\r
5194 //                      SE_IFACE_PARAM should not exist when this is called.\r
5195         case SE_UNARY_OP:\r
5196                 ldt = se->get_left_se()->get_data_type();\r
5197                 if(ldt->complex_operator(se->get_op()) ){\r
5198                         fcn_set.insert( ldt->get_complex_operator(se->get_op()) );\r
5199                 }\r
5200                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);\r
5201                 return;\r
5202         case SE_BINARY_OP:\r
5203                 ldt = se->get_left_se()->get_data_type();\r
5204                 rdt = se->get_right_se()->get_data_type();\r
5205 \r
5206                 if(ldt->complex_operator(rdt, se->get_op()) ){\r
5207                         fcn_set.insert( ldt->get_complex_operator(rdt, se->get_op()) );\r
5208                 }\r
5209                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);\r
5210                 gather_se_opcmp_fcns(se->get_right_se(),fcn_set);\r
5211                 return;\r
5212         case SE_COLREF:\r
5213                 return;\r
5214         case SE_AGGR_STAR:\r
5215                 return;\r
5216         case SE_AGGR_SE:\r
5217                 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);\r
5218                 return;\r
5219         case SE_FUNC:\r
5220                 operands = se->get_operands();\r
5221                 for(o=0;o<operands.size();o++){\r
5222                         gather_se_opcmp_fcns(operands[o], fcn_set);\r
5223                 }\r
5224                 return;\r
5225         default:\r
5226                 fprintf(stderr,"INTERNAL ERROR in gather_se_opcmp_fcns, line %d, character %d: unknown operator type %d\n",\r
5227                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
5228                 exit(1);\r
5229         }\r
5230 }\r
5231 \r
5232 \r
5233 //              Gather the set of special operator or comparison functions referenced by this se.\r
5234 \r
5235 void gather_pr_opcmp_fcns(predicate_t *pr, set<string> &fcn_set){\r
5236         data_type *ldt, *rdt;\r
5237         vector<scalarexp_t *> operands;\r
5238         int o;\r
5239 \r
5240         switch(pr->get_operator_type()){\r
5241         case PRED_IN:\r
5242                 ldt = pr->get_left_se()->get_data_type();\r
5243                 if(ldt->complex_comparison(ldt) ){\r
5244                         fcn_set.insert( ldt->get_comparison_fcn(ldt) );\r
5245                 }\r
5246                 gather_se_opcmp_fcns(pr->get_left_se(), fcn_set);\r
5247                 return;\r
5248         case PRED_COMPARE:\r
5249                 ldt = pr->get_left_se()->get_data_type();\r
5250                 rdt = pr->get_right_se()->get_data_type();\r
5251                 if(ldt->complex_comparison(rdt) ){\r
5252                         fcn_set.insert( ldt->get_comparison_fcn(rdt) );\r
5253                 }\r
5254                 gather_se_opcmp_fcns(pr->get_left_se(),fcn_set) ;\r
5255                 gather_se_opcmp_fcns(pr->get_right_se(),fcn_set) ;\r
5256                 return;\r
5257         case PRED_UNARY_OP:\r
5258                 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;\r
5259                 return;\r
5260         case PRED_BINARY_OP:\r
5261                 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;\r
5262                 gather_pr_opcmp_fcns(pr->get_right_pr(),fcn_set) ;\r
5263                 return;\r
5264         case PRED_FUNC:\r
5265                 operands = pr->get_op_list();\r
5266                 for(o=0;o<operands.size();o++){\r
5267                         gather_se_opcmp_fcns(operands[o], fcn_set);\r
5268                 }\r
5269                 return;\r
5270         default:\r
5271                 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",\r
5272                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );\r
5273         }\r
5274 }\r
5275 \r
5276 \r
5277 \r
5278 \r
5279 //              find the temporal variable divisor if any.\r
5280 //              Only forms allowed : temporal_colref, temporal_colref/const\r
5281 //              temporal_colref/const + const\r
5282 \r
5283 \r
5284 long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt,string &fnm){\r
5285         long long int retval = 0;\r
5286         data_type *ldt, *rdt;\r
5287         int o;\r
5288         vector<scalarexp_t *> operands;\r
5289         scalarexp_t *t_se, *c_se;\r
5290         string the_op;\r
5291 \r
5292         switch(se->get_operator_type()){\r
5293         case SE_LITERAL:\r
5294                 return(-1);\r
5295         case SE_PARAM:\r
5296                 return(-1);\r
5297 //                      SE_IFACE_PARAM should not exist when this is called.\r
5298         case SE_UNARY_OP:\r
5299                 return(-1);\r
5300         case SE_BINARY_OP:\r
5301                 ldt = se->get_left_se()->get_data_type();\r
5302                 if(ldt->is_temporal()){\r
5303                         t_se = se->get_left_se();\r
5304                         c_se = se->get_right_se();\r
5305                 }else{\r
5306                         t_se = se->get_left_se();\r
5307                         c_se = se->get_right_se();\r
5308                 }\r
5309                 if((! t_se->get_data_type()->is_temporal()) ||  c_se->get_data_type()->is_temporal())\r
5310                         return -1;\r
5311 \r
5312                 the_op = se->get_op();\r
5313                 if(the_op == "+" || the_op == "-")\r
5314                         return find_temporal_divisor(t_se, gbt,fnm);\r
5315                 if(the_op == "/"){\r
5316                         if(t_se->get_operator_type() == SE_COLREF && c_se->get_operator_type() == SE_LITERAL){\r
5317                                 fnm = t_se->get_colref()->get_field();\r
5318                                 string lits = c_se->get_literal()->to_string();\r
5319                                 sscanf(lits.c_str(),"%qd",&retval);\r
5320                                 return retval;\r
5321                         }\r
5322                 }\r
5323 \r
5324                 return -1;\r
5325         case SE_COLREF:\r
5326                 if(se->is_gb()){\r
5327                         return find_temporal_divisor(gbt->get_def(se->get_gb_ref()), gbt,fnm);\r
5328                 }\r
5329                 if(se->get_data_type()->is_temporal()){\r
5330                         fnm = se->get_colref()->get_field();\r
5331                         return 1;\r
5332                 }\r
5333                 return 0;\r
5334         case SE_AGGR_STAR:\r
5335                 return -1;\r
5336         case SE_AGGR_SE:\r
5337                 return -1;\r
5338         case SE_FUNC:\r
5339                 return -1;\r
5340         default:\r
5341                 fprintf(stderr,"INTERNAL ERROR in find_temporal_divisor, line %d, character %d: unknown operator type %d\n",\r
5342                                 se->get_lineno(), se->get_charno(),se->get_operator_type());\r
5343                 exit(1);\r
5344         }\r
5345 }\r
5346 \r
5347 \r
5348 //                      impute_colnames:\r
5349 //                      Create meaningful but unique names for the columns.\r
5350 string impute_colname(vector<select_element *> &sel_list, scalarexp_t *se){\r
5351         set<string> curr_names;\r
5352         int s;\r
5353         for(s=0;s<sel_list.size();++s){\r
5354                 curr_names.insert(sel_list[s]->name);\r
5355         }\r
5356         return impute_colname(curr_names, se);\r
5357 }\r
5358 \r
5359 string impute_colname(set<string> &curr_names, scalarexp_t *se){\r
5360 string ret;\r
5361 scalarexp_t *seo;\r
5362 vector<scalarexp_t *> operand_list;\r
5363 string opstr;\r
5364 \r
5365         switch(se->get_operator_type()){\r
5366         case SE_LITERAL:\r
5367                 ret = "Literal";\r
5368                 break;\r
5369     case SE_PARAM:\r
5370                 ret = "Param_" + se->get_param_name();\r
5371                 break;\r
5372     case SE_IFACE_PARAM:\r
5373                 ret = "Iparam_" + se->get_ifpref()->get_pname();\r
5374                 break;\r
5375     case SE_COLREF:\r
5376                 ret =  se->get_colref()->get_field() ;\r
5377                 break;\r
5378     case SE_UNARY_OP:\r
5379     case SE_BINARY_OP:\r
5380                 ret = "Field";\r
5381                 break;\r
5382     case SE_AGGR_STAR:\r
5383                 ret = "Cnt";\r
5384                 break;\r
5385     case SE_AGGR_SE:\r
5386                 ret = se->get_op();\r
5387                 seo = se->get_left_se();\r
5388                 switch(se->get_left_se()->get_operator_type()){\r
5389                 case SE_PARAM:\r
5390                         ret += "_PARAM_"+seo->get_param_name();\r
5391                         break;\r
5392                 case SE_IFACE_PARAM:\r
5393                         ret += "_IPARAM_"+seo->get_ifpref()->get_pname();\r
5394                         break;\r
5395                 case SE_COLREF:\r
5396                         opstr =  seo->get_colref()->get_field();\r
5397                         if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){\r
5398                                 ret += "_" + opstr;\r
5399                         }else{\r
5400                                 ret = opstr;\r
5401                         }\r
5402                         break;\r
5403                 case SE_AGGR_STAR:\r
5404                 case SE_AGGR_SE:\r
5405                         opstr = seo->get_op();\r
5406                         if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){\r
5407                                 ret += "_" + seo->get_op();\r
5408                         }else{\r
5409                                 ret = opstr;\r
5410                         }\r
5411                         break;\r
5412                 case SE_FUNC:\r
5413                         opstr = seo->get_op();\r
5414                         ret += "_" + seo->get_op();\r
5415                         break;\r
5416         case SE_UNARY_OP:\r
5417         case SE_BINARY_OP:\r
5418                         ret += "_SE";\r
5419                         break;\r
5420                 default:\r
5421                         ret += "_";\r
5422                         break;\r
5423                 }\r
5424                 break;\r
5425         case SE_FUNC:\r
5426                 ret = se->get_op();\r
5427                 operand_list = se->get_operands();\r
5428                 if(operand_list.size() > 0){\r
5429                         seo = operand_list[0];\r
5430                         switch(seo->get_operator_type()){\r
5431                         case SE_PARAM:\r
5432                                 ret += "_PARAM_"+seo->get_param_name();\r
5433                                 break;\r
5434                         case SE_IFACE_PARAM:\r
5435                                 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();\r
5436                                 break;\r
5437                         case SE_COLREF:\r
5438                                 ret += "_" + seo->get_colref()->get_field();\r
5439                                 break;\r
5440                         case SE_AGGR_STAR:\r
5441                         case SE_AGGR_SE:\r
5442                         case SE_FUNC:\r
5443                                 ret += "_" + seo->get_op();\r
5444                                 break;\r
5445                 case SE_UNARY_OP:\r
5446                 case SE_BINARY_OP:\r
5447                                 ret += "_SE";\r
5448                         break;\r
5449                         default:\r
5450                                 ret += "_";\r
5451                                 break;\r
5452                         }\r
5453                 }else{\r
5454                         ret += "_func";\r
5455                 }\r
5456                 break;\r
5457         }\r
5458 \r
5459         if(ret == "Field"){\r
5460                 if(curr_names.count("Field0") == 0)\r
5461                         ret = "Field0";\r
5462         }\r
5463         int iter = 1;\r
5464         string base = ret;\r
5465         while(curr_names.count(ret) > 0){\r
5466                 char tmpstr[500];\r
5467                 sprintf(tmpstr,"%s%d",base.c_str(),iter);\r
5468                 ret = tmpstr;\r
5469                 iter++;\r
5470         }\r
5471 \r
5472 \r
5473         curr_names.insert(ret);\r
5474         return(ret);\r
5475 \r
5476 }\r
5477 \r
5478 \r
5479 \r
5480 //////////////////////////////////////////////////////////////////////\r
5481 //////////////          Methods of defined classes ///////////////////////\r
5482 //////////////////////////////////////////////////////////////////////\r
5483 \r
5484 //              helper fcn to enable col_id as map key.\r
5485 \r
5486   bool operator<(const col_id &cr1, const col_id &cr2){\r
5487         if(cr1.tblvar_ref < cr2.tblvar_ref) return(true);\r
5488         if(cr1.tblvar_ref == cr2.tblvar_ref)\r
5489            return (cr1.field < cr2.field);\r
5490         return(false);\r
5491   }\r
5492 \r
5493 \r
5494 //              Process the GB variables.\r
5495 //              At parse time, GB vars are either GB_COLREF,\r
5496 //              or GB_COMPUTED if the AS keyword is used.\r
5497 //              Cast GB vars as named entities with a SE as\r
5498 //              their definition (the colref in the case of GB_COLREF).\r
5499 //\r
5500 //              TODO: if there is a gbref in a gbdef,\r
5501 //              then I won't be able to compute the value without\r
5502 //              a complex dependence analysis.  So verify that there is no\r
5503 //              gbref in any of the GBdefs.\r
5504 //              BUT: a GBVAR_COLREF should be converted to a regular colref,\r
5505 //              which is not yet done.\r
5506 //\r
5507 //              TODO : sort out issue of GBVAR naming and identification.\r
5508 //              Determine where it is advantageous to convert GV_COLREF\r
5509 //              GBVARS to colrefs -- e.g. in group definition, in the WHERE clause,\r
5510 //              etc.\r
5511 //\r
5512 //              return -1 if there is a problem.\r
5513 \r
5514 int gb_table::add_gb_attr(\r
5515                                                   gb_t *gb,\r
5516                                                   tablevar_list_t *fm,\r
5517                                                   table_list *schema,\r
5518                                                   table_exp_t *fta_tree,\r
5519                                                   ext_fcn_list *Ext_fcns\r
5520                                                   ){\r
5521         colref_t *cr;\r
5522         int retval;\r
5523         gb_table_entry *entry;\r
5524 \r
5525         if(gb->type == GB_COLREF){\r
5526                 if(gb->table != "")\r
5527                         cr = new colref_t(\r
5528                                 gb->interface.c_str(),gb->table.c_str(), gb->name.c_str()\r
5529                         );\r
5530                 else\r
5531                         cr = new colref_t(gb->name.c_str());\r
5532 \r
5533                 int tablevar_ref = infer_tablevar_from_colref(cr, fm, schema);\r
5534                 if(tablevar_ref < 0) return(tablevar_ref);\r
5535 \r
5536                 cr->set_tablevar_ref(tablevar_ref);\r
5537                 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));\r
5538                 cr->set_interface("");\r
5539                 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));\r
5540 \r
5541                 entry = new gb_table_entry();\r
5542                 entry->name.field = cr->get_field();\r
5543                 entry->name.tblvar_ref = tablevar_ref;\r
5544                 entry->definition = new scalarexp_t(cr);\r
5545                 entry->ref_type = GBVAR_COLREF;\r
5546         }else{\r
5547                 entry = new gb_table_entry();\r
5548                 entry->name.field = gb->name;\r
5549                 entry->name.tblvar_ref = -1;\r
5550                 entry->definition = gb->def;\r
5551                 entry->ref_type = GBVAR_SE;\r
5552         }\r
5553 \r
5554         retval = verify_colref(entry->definition, fm, schema, NULL);\r
5555         if(retval < 0) return(retval);\r
5556 \r
5557         retval = assign_data_types(entry->definition, schema, fta_tree, Ext_fcns);\r
5558         if(retval < 0) return(retval);\r
5559 \r
5560 //              Verify that the gbvar def references no aggregates and no gbvars.\r
5561         if(count_gb_se(entry->definition) > 0){\r
5562                 fprintf(stderr,"ERROR, group-by variable %s references other group-by variables in its definition.\n",entry->name.field.c_str() );\r
5563                 return(-1);\r
5564         }\r
5565         if(count_aggr_se(entry->definition, true) > 0){\r
5566                 fprintf(stderr,"ERROR, group-by variable %s references aggregates in its definition.\n",entry->name.field.c_str() );\r
5567                 return(-1);\r
5568         }\r
5569 \r
5570 //                      Check for duplicates\r
5571         int i;\r
5572         for(i=0;i<gtbl.size();++i){\r
5573                 if(entry->name.field == gtbl[i]->name.field){\r
5574                         fprintf(stderr,"ERROR, duplicate group-by variable name %s, positions %d and %lu.\n",entry->name.field.c_str(),i,gtbl.size());\r
5575                         return -1;\r
5576                 }\r
5577         }\r
5578 \r
5579 \r
5580         gtbl.push_back(entry);\r
5581 \r
5582         return(1);\r
5583 }\r
5584 \r
5585 \r
5586 //                      Try to determine if the colref is actually\r
5587 //                      a gbvar ref.\r
5588 //                      a) if no tablename associated with the colref,\r
5589 //                              1) try to find a matching GB_COMPUTED gbvar.\r
5590 //                              2) failing that, try to match to a single tablevar\r
5591 //                              3) if successful, search among GB_COLREF\r
5592 //                      b) else, try to match the tablename to a single tablevar\r
5593 //                              if successful, search among GB_COLREF\r
5594 int gb_table::find_gb(colref_t *cr, tablevar_list_t *fm, table_list *schema){\r
5595         string c_field = cr->get_field();\r
5596         int c_tblref;\r
5597         int n_tbl;\r
5598         int i;\r
5599         vector<int> candidates;\r
5600 \r
5601         if(cr->uses_default_table()){\r
5602                 for(i=0;i<gtbl.size();i++){\r
5603                         if(gtbl[i]->ref_type==GBVAR_SE && c_field == gtbl[i]->name.field){\r
5604                                 return(i);\r
5605                         }\r
5606                 }\r
5607                 candidates = find_source_tables(c_field, fm, schema);\r
5608                 if(candidates.size() != 1) return(-1); // can't find unique tablevar\r
5609                 for(i=0;i<gtbl.size();i++){\r
5610                         if(gtbl[i]->ref_type==GBVAR_COLREF &&\r
5611                                   c_field == gtbl[i]->name.field &&\r
5612                                   candidates[0] == gtbl[i]->name.tblvar_ref){\r
5613                                 return(i);\r
5614                         }\r
5615                 }\r
5616                 return(-1); // colref is not in gb table.\r
5617         }\r
5618 \r
5619 //                      A table name must have been given.\r
5620         vector<tablevar_t *> fm_tbls = fm->get_table_list();\r
5621         string interface = cr->get_interface();\r
5622         string table_name = cr->get_table_name();\r
5623 \r
5624 \r
5625 //                      if no interface name is given, try to search for the table\r
5626 //                      name among the tablevar names first.\r
5627         if(interface==""){\r
5628                 for(i=0;i<fm_tbls.size();++i){\r
5629                         if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())\r
5630                                 candidates.push_back(i);\r
5631                 }\r
5632                 if(candidates.size()>1) return(-1);\r
5633                 if(candidates.size()==1){\r
5634                         for(i=0;i<gtbl.size();i++){\r
5635                                 if(gtbl[i]->ref_type==GBVAR_COLREF &&\r
5636                                         c_field == gtbl[i]->name.field &&\r
5637                                         candidates[0] == gtbl[i]->name.tblvar_ref){\r
5638                                         return(i);\r
5639                                 }\r
5640                         }\r
5641                         return(-1);  // match semantics of bind to tablevar name first\r
5642                 }\r
5643         }\r
5644 \r
5645 //              Interface name given, or no interface but no\r
5646 //              no tablevar match.  Try to match on schema name.\r
5647         for(i=0;i<fm_tbls.size();++i){\r
5648                 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())\r
5649                         candidates.push_back(i);\r
5650         }\r
5651         if(candidates.size() != 1) return(-1);\r
5652         for(i=0;i<gtbl.size();i++){\r
5653                 if(gtbl[i]->ref_type==GBVAR_COLREF &&\r
5654                         c_field == gtbl[i]->name.field &&\r
5655                         candidates[0] == gtbl[i]->name.tblvar_ref){\r
5656                         return(i);\r
5657                 }\r
5658         }\r
5659 \r
5660 //              No match found.\r
5661         return(-1);\r
5662 \r
5663 }\r
5664 \r
5665 \r
5666 \r
5667 bool aggr_table_entry::fta_legal(ext_fcn_list *Ext_fcns){\r
5668         if(is_builtin()){\r
5669                 if( (op == "COUNT") || (op == "SUM") || (op == "MIN") ||\r
5670                         (op == "MAX") || (op == "AND_AGGR") || (op == "OR_AGGR") ||\r
5671                         (op == "XOR_AGGR") )\r
5672                                 return(true);\r
5673         }else{\r
5674                 return Ext_fcns->fta_legal(fcn_id);\r
5675         }\r
5676         return(false);\r
5677 }\r
5678 \r
5679 \r
5680 //              Return the set of subaggregates required to compute\r
5681 //              the desired aggregate.  THe operand of the subaggregates\r
5682 //              can only be * or the scalarexp used in the superaggr.\r
5683 //              This is indicated by the use_se vector.\r
5684 \r
5685 //              Is this code generation specific?\r
5686 \r
5687 vector<string> aggr_table_entry::get_subaggr_fcns(vector<bool> &use_se){\r
5688         vector<string> ret;\r
5689 \r
5690         if(op == "COUNT"){\r
5691                 ret.push_back("COUNT");\r
5692                 use_se.push_back(false);\r
5693         }\r
5694         if(op == "SUM"){\r
5695                 ret.push_back("SUM");\r
5696                 use_se.push_back(true);\r
5697         }\r
5698         if(op == "AVG"){\r
5699                 ret.push_back("SUM");\r
5700                 ret.push_back("COUNT");\r
5701                 use_se.push_back(true);\r
5702                 use_se.push_back(false);\r
5703         }\r
5704         if(op == "MIN"){\r
5705                 ret.push_back("MIN");\r
5706                 use_se.push_back(true);\r
5707         }\r
5708         if(op == "MAX"){\r
5709                 ret.push_back("MAX");\r
5710                 use_se.push_back(true);\r
5711         }\r
5712         if(op == "AND_AGGR"){\r
5713                 ret.push_back("AND_AGGR");\r
5714                 use_se.push_back(true);\r
5715         }\r
5716         if(op == "OR_AGGR"){\r
5717                 ret.push_back("OR_AGGR");\r
5718                 use_se.push_back(true);\r
5719         }\r
5720         if(op == "XOR_AGGR"){\r
5721                 ret.push_back("XOR_AGGR");\r
5722                 use_se.push_back(true);\r
5723         }\r
5724 \r
5725         return(ret);\r
5726 }\r
5727 \r
5728 //                      Code generation specific?\r
5729 \r
5730 vector<data_type *> aggr_table_entry::get_subaggr_dt(){\r
5731         vector<data_type *> ret;\r
5732         data_type *dt;\r
5733 \r
5734         if(op == "COUNT"){\r
5735                 dt = new data_type("Int"); // was Uint\r
5736                 ret.push_back( dt );\r
5737         }\r
5738         if(op == "SUM"){\r
5739                 dt = new data_type();\r
5740                 dt->set_aggr_data_type( "SUM",operand->get_data_type() );\r
5741                 ret.push_back(dt);\r
5742         }\r
5743         if(op == "AVG"){\r
5744                 dt = new data_type();\r
5745                 dt->set_aggr_data_type( "SUM",operand->get_data_type() );\r
5746                 ret.push_back( dt );\r
5747                 dt = new data_type("Int");\r
5748                 ret.push_back( dt );\r
5749         }\r
5750         if(op == "MIN"){\r
5751                 dt = new data_type();\r
5752                 dt->set_aggr_data_type( "MIN",operand->get_data_type() );\r
5753                 ret.push_back( dt );\r
5754         }\r
5755         if(op == "MAX"){\r
5756                 dt = new data_type();\r
5757                 dt->set_aggr_data_type( "MAX",operand->get_data_type() );\r
5758                 ret.push_back( dt );\r
5759         }\r
5760         if(op == "AND_AGGR"){\r
5761                 dt = new data_type();\r
5762                 dt->set_aggr_data_type( "AND_AGGR",operand->get_data_type() );\r
5763                 ret.push_back( dt );\r
5764         }\r
5765         if(op == "OR_AGGR"){\r
5766                 dt = new data_type();\r
5767                 dt->set_aggr_data_type( "OR_AGGR",operand->get_data_type() );\r
5768                 ret.push_back( dt );\r
5769         }\r
5770         if(op == "XOR_AGGR"){\r
5771                 dt = new data_type();\r
5772                 dt->set_aggr_data_type( "XOR_AGGR",operand->get_data_type() );\r
5773                 ret.push_back( dt );\r
5774         }\r
5775 \r
5776         return(ret);\r
5777 }\r
5778 \r
5779 //              Code generation specific?\r
5780 \r
5781 scalarexp_t *aggr_table_entry::make_superaggr_se(vector<scalarexp_t *> se_refs){\r
5782         scalarexp_t *se_l, *se_r, *ret_se = NULL;\r
5783 \r
5784         if(op == "COUNT"){\r
5785                 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);\r
5786                 return(ret_se);\r
5787         }\r
5788         if(op == "SUM"){\r
5789                 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);\r
5790                 return(ret_se);\r
5791         }\r
5792         if(op == "AVG"){\r
5793                 se_l = scalarexp_t::make_se_aggr("SUM", se_refs[0]);\r
5794                 se_r = scalarexp_t::make_se_aggr("SUM", se_refs[1]);\r
5795 \r
5796                 ret_se = new scalarexp_t("/", se_l, se_r);\r
5797                 return(ret_se);\r
5798         }\r
5799         if(op == "MIN"){\r
5800                 ret_se = scalarexp_t::make_se_aggr("MIN", se_refs[0]);\r
5801                 return(ret_se);\r
5802         }\r
5803         if(op == "MAX"){\r
5804                 ret_se = scalarexp_t::make_se_aggr("MAX", se_refs[0]);\r
5805                 return(ret_se);\r
5806         }\r
5807         if(op == "AND_AGGR"){\r
5808                 ret_se = scalarexp_t::make_se_aggr("AND_AGGR", se_refs[0]);\r
5809                 return(ret_se);\r
5810         }\r
5811         if(op == "OR_AGGR"){\r
5812                 ret_se = scalarexp_t::make_se_aggr("OR_AGGR", se_refs[0]);\r
5813                 return(ret_se);\r
5814         }\r
5815         if(op == "XOR_AGGR"){\r
5816                 ret_se = scalarexp_t::make_se_aggr("XOR_AGGR", se_refs[0]);\r
5817                 return(ret_se);\r
5818         }\r
5819 \r
5820         return(ret_se);\r
5821 \r
5822 }\r
5823 \r
5824 \r
5825 //              Add a built-in aggr.\r
5826 int aggregate_table::add_aggr(string op, scalarexp_t *se, bool is_super){\r
5827         int i;\r
5828 \r
5829         for(i=0;i<agr_tbl.size();i++){\r
5830                 if(agr_tbl[i]->is_builtin() && op == agr_tbl[i]->op\r
5831                   && is_equivalent_se(se,agr_tbl[i]->operand) ){\r
5832 //                && is_super == agr_tbl[i]->is_superaggr())\r
5833                         if(is_super) agr_tbl[i]->set_super(true);\r
5834                         return(i);\r
5835                 }\r
5836         }\r
5837 \r
5838         aggr_table_entry *ate = new aggr_table_entry(op, se, is_super);\r
5839         agr_tbl.push_back(ate);\r
5840         return(agr_tbl.size() - 1);\r
5841 }\r
5842 \r
5843 //              add a UDAF\r
5844 int aggregate_table::add_aggr(string op, int fcn_id, vector<scalarexp_t *> opl, data_type *sdt, bool is_super, bool is_running, bool has_lfta_bailout){\r
5845         int i,o;\r
5846 \r
5847         for(i=0;i<agr_tbl.size();i++){\r
5848                 if((! agr_tbl[i]->is_builtin()) && fcn_id == agr_tbl[i]->fcn_id\r
5849                                 && opl.size() == agr_tbl[i]->oplist.size() ){\r
5850 //                              && is_super == agr_tbl[i]->is_superaggr() ){\r
5851                         for(o=0;o<opl.size();++o){\r
5852                                 if(! is_equivalent_se(opl[o],agr_tbl[i]->oplist[o]) )\r
5853                                         break;\r
5854                         }\r
5855                         if(o == opl.size()){\r
5856                                 if(is_super) agr_tbl[i]->set_super(true);\r
5857                                 return i;\r
5858                         }\r
5859                 }\r
5860         }\r
5861 \r
5862         aggr_table_entry *ate = new aggr_table_entry(op, fcn_id, opl, sdt,is_super,is_running, has_lfta_bailout);\r
5863         agr_tbl.push_back(ate);\r
5864         return(agr_tbl.size() - 1);\r
5865 }\r
5866 \r
5867 \r
5868 int cplx_lit_table::add_cpx_lit(literal_t *l, bool is_handle_ref){\r
5869         int i;\r
5870 \r
5871         for(i=0;i<cplx_lit_tbl.size();i++){\r
5872                 if(l->is_equivalent(cplx_lit_tbl[i])){\r
5873                         hdl_ref_tbl[i] = hdl_ref_tbl[i] | is_handle_ref;\r
5874                         return(i);\r
5875                 }\r
5876         }\r
5877 \r
5878         cplx_lit_tbl.push_back(l);\r
5879         hdl_ref_tbl.push_back(is_handle_ref);\r
5880         return(cplx_lit_tbl.size() - 1);\r
5881 }\r
5882 \r
5883 \r
5884 \r
5885 //------------------------------------------------------------\r
5886 //              parse_fta code\r
5887 \r
5888 \r
5889 gb_t *gb_t::duplicate(){\r
5890         gb_t *ret = new gb_t(interface.c_str(), table.c_str(), name.c_str());\r
5891         ret->type = type;\r
5892         ret->lineno = lineno;\r
5893         ret->charno = charno;\r
5894         if(def != NULL)\r
5895                 ret->def = dup_se(def,NULL);\r
5896         return ret;\r
5897 }\r