1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
15 #include"parse_partn.h"
16 #include "parse_schema.h"
17 #include"analyze_fta.h"
23 int find_field(scalarexp_t *se, string &fld){
24 vector<scalarexp_t *> operands;
27 switch(se->get_operator_type()){
32 return 100; // force failure, but no error message.
34 return find_field(se->get_left_se(), fld);
36 return find_field(se->get_left_se(), fld)+find_field(se->get_right_se(), fld);
38 fld = se->get_colref()->get_field();
42 // Should be no aggregates
45 return 100; // force failure, but no error message.
47 fprintf(stderr,"INTERNAL ERROR in find_field, unknown operator type %d\n", se->get_operator_type());
54 int partn_def_t::verify(table_list *schema, string &err_msg){
55 if(schema->find_tbl(protocol) < 0){
56 err_msg += "ERROR, partition definition "+name+" uses source "+protocol+", but its not in the schema.\n";
59 if(schema->get_schema_type(schema->find_tbl(protocol)) != PROTOCOL_SCHEMA){
60 err_msg += "ERROR, partition definition "+name+" uses source "+protocol+", but its not a PROTOCOL.\n";
64 tablevar_t *fme = new tablevar_t(protocol.c_str());
65 tablevar_list_t fm(fme);
67 // Validate the SE and assign data types.
70 for(i=0;i<se_list.size();++i){
71 int ret = verify_colref(se_list[i],&fm,schema,NULL);
73 err_msg += "ERROR validating entry "+int_to_string(i)+" of partition definition "+name+".\n";
80 // Ensure that the data type is an integer type.
81 for(i=0;i<se_list.size();++i){
82 data_type *dt = se_list[i]->get_data_type();
83 string type_s = dt->get_type_str();
84 if(! ( type_s=="UINT" || type_s=="INT" || type_s=="ULLONG" || type_s=="LLONG") ){
85 err_msg += "ERROR, entry "+int_to_string(i)+" of partition definition "+name+" has type "+type_s+", but it must be one of UINT, INT, ULLONG, LLONG.\n";
92 // Ensure that each SE ref's a field only once.
93 for(i=0;i<se_list.size();++i){
95 int ret = find_field(se_list[i],fld);
97 err_msg += "ERROR validating entry "+int_to_string(i)+" of partition definition "+name+".\n";
101 err_msg += "ERROR validating entry "+int_to_string(i)+" of partition definition "+name+": the expression references a field "+int_to_string(ret)+" times, but it should reference a field exactly once.\n";
103 field_list.push_back("");
105 field_list.push_back(fld);
115 void find_colref_path(scalarexp_t *se, vector<scalarexp_t *> &se_stack){
116 vector<scalarexp_t *> operands;
119 switch(se->get_operator_type()){
125 find_colref_path(se->get_left_se(), se_stack);
126 if(se_stack.size()>0)
127 se_stack.push_back(se);
130 find_colref_path(se->get_left_se(), se_stack);
131 if(se_stack.size()>0){
132 se_stack.push_back(se);
135 find_colref_path(se->get_right_se(), se_stack);
136 if(se_stack.size()>0)
137 se_stack.push_back(se);
140 se_stack.push_back(se);
144 // Should be no aggregates
147 operands = se->get_operands();
148 for(o=0;o<operands.size();o++){
149 find_colref_path(operands[o], se_stack);
150 if(se_stack.size()>0){
151 se_stack.push_back(se);
157 fprintf(stderr,"INTERNAL ERROR in find_colref_path, unknown operator type %d\n", se->get_operator_type());
166 unsigned long int uli;
168 unsigned long long int ulli;
172 void convert_to_final() {};
174 void load_val(dtype dt, const char *val){
178 sscanf(val,"%llu",&(v.ulli));
181 sscanf(val,"%lld",&(v.lli));
184 sscanf(val,"%llu",&(v.ulli));
187 sscanf(val,"%lld",&(v.lli));
190 fprintf(stderr,"INTERNAL ERROR, data type %d passed to param_int::load_val.\n",dt);
195 void add(dtype dt, param_int &l, param_int &r){
197 if(dt==int_t || dt==llong_t)
198 if(l.d==int_t || l.d==llong_t)
199 if(r.d==int_t || r.d==llong_t) v.lli = l.v.lli+r.v.lli; else v.lli = l.v.lli+r.v.ulli;
201 if(r.d==int_t || r.d==llong_t) v.lli = l.v.ulli+r.v.lli; else v.lli = l.v.ulli+r.v.ulli;
203 if(l.d==int_t || l.d==llong_t)
204 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.lli+r.v.lli; else v.ulli = l.v.lli+r.v.ulli;
206 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.ulli+r.v.lli; else v.ulli = l.v.ulli+r.v.ulli;
208 void subtract(dtype dt, param_int &l, param_int &r){
210 if(dt==int_t || dt==llong_t)
211 if(l.d==int_t || l.d==llong_t)
212 if(r.d==int_t || r.d==llong_t) v.lli = l.v.lli-r.v.lli; else v.lli = l.v.lli-r.v.ulli;
214 if(r.d==int_t || r.d==llong_t) v.lli = l.v.ulli-r.v.lli; else v.lli = l.v.ulli-r.v.ulli;
216 if(l.d==int_t || l.d==llong_t)
217 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.lli-r.v.lli; else v.ulli = l.v.lli-r.v.ulli;
219 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.ulli-r.v.lli; else v.ulli = l.v.ulli-r.v.ulli;
221 void mult(dtype dt, param_int &l, param_int &r){
223 if(dt==int_t || dt==llong_t)
224 if(l.d==int_t || l.d==llong_t)
225 if(r.d==int_t || r.d==llong_t) v.lli = l.v.lli*r.v.lli; else v.lli = l.v.lli*r.v.ulli;
227 if(r.d==int_t || r.d==llong_t) v.lli = l.v.ulli*r.v.lli; else v.lli = l.v.ulli*r.v.ulli;
229 if(l.d==int_t || l.d==llong_t)
230 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.lli*r.v.lli; else v.ulli = l.v.lli*r.v.ulli;
232 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.ulli*r.v.lli; else v.ulli = l.v.ulli*r.v.ulli;
234 void div(dtype dt, param_int &l, param_int &r){
236 if(dt==int_t || dt==llong_t)
237 if(l.d==int_t || l.d==llong_t)
238 if(r.d==int_t || r.d==llong_t) v.lli = l.v.lli/r.v.lli; else v.lli = l.v.lli/r.v.ulli;
240 if(r.d==int_t || r.d==llong_t) v.lli = l.v.ulli/r.v.lli; else v.lli = l.v.ulli/r.v.ulli;
242 if(l.d==int_t || l.d==llong_t)
243 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.lli/r.v.lli; else v.ulli = l.v.lli/r.v.ulli;
245 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.ulli/r.v.lli; else v.ulli = l.v.ulli/r.v.ulli;
247 void bit_and(dtype dt, param_int &l, param_int &r){
249 if(dt==int_t || dt==llong_t)
250 if(l.d==int_t || l.d==llong_t)
251 if(r.d==int_t || r.d==llong_t) v.lli = l.v.lli&r.v.lli; else v.lli = l.v.lli&r.v.ulli;
253 if(r.d==int_t || r.d==llong_t) v.lli = l.v.ulli&r.v.lli; else v.lli = l.v.ulli&r.v.ulli;
255 if(l.d==int_t || l.d==llong_t)
256 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.lli&r.v.lli; else v.ulli = l.v.lli&r.v.ulli;
258 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.ulli&r.v.lli; else v.ulli = l.v.ulli&r.v.ulli;
260 void bit_or(dtype dt, param_int &l, param_int &r){
262 if(dt==int_t || dt==llong_t)
263 if(l.d==int_t || l.d==llong_t)
264 if(r.d==int_t || r.d==llong_t) v.lli = l.v.lli|r.v.lli; else v.lli = l.v.lli|r.v.ulli;
266 if(r.d==int_t || r.d==llong_t) v.lli = l.v.ulli|r.v.lli; else v.lli = l.v.ulli|r.v.ulli;
268 if(l.d==int_t || l.d==llong_t)
269 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.lli|r.v.lli; else v.ulli = l.v.lli|r.v.ulli;
271 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.ulli|r.v.lli; else v.ulli = l.v.ulli|r.v.ulli;
273 void modulo(dtype dt, param_int &l, param_int &r){
275 if(dt==int_t || dt==llong_t)
276 if(l.d==int_t || l.d==llong_t)
277 if(r.d==int_t || r.d==llong_t) v.lli = l.v.lli%r.v.lli; else v.lli = l.v.lli%r.v.ulli;
279 if(r.d==int_t || r.d==llong_t) v.lli = l.v.ulli%r.v.lli; else v.lli = l.v.ulli%r.v.ulli;
281 if(l.d==int_t || l.d==llong_t)
282 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.lli%r.v.lli; else v.ulli = l.v.lli%r.v.ulli;
284 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.ulli%r.v.lli; else v.ulli = l.v.ulli%r.v.ulli;
286 void shift_left(dtype dt, param_int &l, param_int &r){
288 if(dt==int_t || dt==llong_t)
289 if(l.d==int_t || l.d==llong_t)
290 if(r.d==int_t || r.d==llong_t) v.lli = l.v.lli<<r.v.lli; else v.lli = l.v.lli<<r.v.ulli;
292 if(r.d==int_t || r.d==llong_t) v.lli = l.v.ulli<<r.v.lli; else v.lli = l.v.ulli<<r.v.ulli;
294 if(l.d==int_t || l.d==llong_t)
295 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.lli<<r.v.lli; else v.ulli = l.v.lli<<r.v.ulli;
297 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.ulli<<r.v.lli; else v.ulli = l.v.ulli<<r.v.ulli;
299 void shift_right(dtype dt, param_int &l, param_int &r){
301 if(dt==int_t || dt==llong_t)
302 if(l.d==int_t || l.d==llong_t)
303 if(r.d==int_t || r.d==llong_t) v.lli = l.v.lli>>r.v.lli; else v.lli = l.v.lli>>r.v.ulli;
305 if(r.d==int_t || r.d==llong_t) v.lli = l.v.ulli>>r.v.lli; else v.lli = l.v.ulli>>r.v.ulli;
307 if(l.d==int_t || l.d==llong_t)
308 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.lli>>r.v.lli; else v.ulli = l.v.lli>>r.v.ulli;
310 if(r.d==int_t || r.d==llong_t) v.ulli = l.v.ulli>>r.v.lli; else v.ulli = l.v.ulli>>r.v.ulli;
313 void plus(dtype dt, param_int &l){
315 if(dt==int_t || dt==llong_t)
316 if(l.d==int_t || l.d==llong_t)
321 if(l.d==int_t || l.d==llong_t)
326 void minus(dtype dt, param_int &l){
328 if(dt==int_t || dt==llong_t)
329 if(l.d==int_t || l.d==llong_t)
334 if(l.d==int_t || l.d==llong_t)
339 void abs_not(dtype dt, param_int &l){
341 if(dt==int_t || dt==llong_t)
342 if(l.d==int_t || l.d==llong_t)
347 if(l.d==int_t || l.d==llong_t)
352 void bit_not(dtype dt, param_int &l){
354 if(dt==int_t || dt==llong_t)
355 if(l.d==int_t || l.d==llong_t)
360 if(l.d==int_t || l.d==llong_t)
368 bool int_operand_divides(param_int &lp, param_int &rp){
369 if( (lp.d==int_t || lp.d==llong_t)){
371 lp.v.ulli = -lp.v.lli;
373 lp.v.ulli = lp.v.lli;
375 if( (rp.d==int_t || rp.d==llong_t)){
377 rp.v.ulli = -rp.v.lli;
379 rp.v.ulli = rp.v.lli;
381 unsigned long long int d = rp.v.ulli / lp.v.ulli;
382 unsigned long long int r = rp.v.ulli - d * lp.v.ulli;
386 bool int_operand_implies(param_int &lp, param_int &rp){
387 unsigned long long int d = (lp.v.ulli & rp.v.ulli) | (!lp.v.ulli);
394 void extract_param_int(scalarexp_t *se, param_int &p_int) {
397 dtype dt = se->get_data_type()->get_type();
398 switch(se->get_operator_type()){
400 p_int.load_val(se->get_data_type()->get_type(),se->to_string().c_str());
403 extract_param_int(se->get_left_se(), lp);
404 op_str = se->get_op();
405 if(op_str=="+") p_int.plus(dt,lp);
406 if(op_str=="-") p_int.minus(dt,lp);
407 if(op_str=="!") p_int.abs_not(dt,lp);
408 if(op_str=="~") p_int.bit_not(dt,lp);
411 extract_param_int(se->get_left_se(), lp);
412 extract_param_int(se->get_right_se(), rp);
413 op_str = se->get_op();
414 if(op_str=="+") p_int.add(dt,lp,rp);
415 if(op_str=="-") p_int.subtract(dt,lp,rp);
416 if(op_str=="*") p_int.mult(dt,lp,rp);
417 if(op_str=="/") p_int.div(dt,lp,rp);
418 if(op_str=="&") p_int.bit_and(dt,lp,rp);
419 if(op_str=="|") p_int.bit_or(dt,lp,rp);
420 if(op_str=="%") p_int.modulo(dt,lp,rp);
421 if(op_str=="<<") p_int.shift_left(dt,lp,rp);
422 if(op_str==">>") p_int.shift_right(dt,lp,rp);
425 fprintf(stderr,"INTERNAL ERROR in find_field, invalid operator type %d\n", se->get_operator_type());
433 void get_int_operand(scalarexp_t *parent, scalarexp_t *child, param_int &p_int){
434 if(parent->get_operator_type() != SE_BINARY_OP){
435 fprintf(stderr,"INTERNAL ERROR, operator type %d in get_int_operand.\n",
436 parent->get_operator_type());
439 if(parent->get_right_se() == child){
440 extract_param_int(parent->get_left_se(), p_int);
441 p_int.convert_to_final();
443 extract_param_int(parent->get_right_se(), p_int);
444 p_int.convert_to_final();
449 bool partition_compatible(scalarexp_t *pse, scalarexp_t *gse){
450 vector<scalarexp_t *> pstack, gstack;
452 find_colref_path(pse, pstack);
453 find_colref_path(gse, gstack);
457 param_int p_int, g_int;
459 while(ppos<pstack.size()-1 && gpos<gstack.size()-1){
461 // first, try to raise both stack pointers.
462 if(ppos<pstack.size()-1 && gpos<gstack.size()-1){
463 p_op = pstack[ppos+1]->get_op();
464 g_op = gstack[gpos+1]->get_op();
467 get_int_operand(pstack[ppos+1],pstack[ppos],p_int);
468 get_int_operand(gstack[gpos+1],gstack[gpos],g_int);
469 if(int_operand_divides(g_int, p_int)){
478 get_int_operand(pstack[ppos+1],pstack[ppos],p_int);
479 get_int_operand(gstack[gpos+1],gstack[gpos],g_int);
480 if(int_operand_implies(p_int, g_int)){
489 get_int_operand(pstack[ppos+1],pstack[ppos],p_int);
490 get_int_operand(gstack[gpos+1],gstack[gpos],g_int);
491 if(int_operand_implies(g_int, p_int)){
501 if(ppos<pstack.size()-1){
502 p_op = pstack[ppos+1]->get_op();
503 if(p_op=="+" || p_op=="-" || p_op=="*" || p_op=="/" || p_op=="&" || p_op=="|"){
508 if(gpos<gstack.size()-1){
509 g_op = gstack[gpos+1]->get_op();
510 if(g_op=="+" || g_op=="-" || g_op=="*" || g_op=="/" || g_op=="&" || g_op=="|"){
516 return false; // no reduction rule applies
519 return true; // reduced both stacks.
523 bool partn_def_t::is_compatible(gb_table *gb_tbl){
524 vector<string> gb_flds;
527 for(i=0;i<gb_tbl->size();++i){
529 if(find_field(gb_tbl->get_def(i),gfld)==1){
530 gb_flds.push_back(gfld);
532 gb_flds.push_back("");
536 for(p=p;p<se_list.size();++p){
537 for(g=0;g<gb_tbl->size();++g){
538 if(partition_compatible(se_list[p], gb_tbl->get_def(g)))
541 if(g==gb_tbl->size()){