Add support for query key extraction
[com/gs-lite.git] / src / ftacmp / parse_schema.cc
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 #include <string>
17 #include"parse_fta.h"
18 #include "parse_schema.h"
19 #include "type_objects.h"
20 #include <stdio.h>
21 #include <stdlib.h>
22 // #include <algo.h>
23 #include<algorithm>
24
25 using namespace std;
26
27 table_list *Schema;
28
29 table_def::table_def(const char *name, param_list *oprop, field_entry_list *fel,
30                         subqueryspec_list *ql, param_list *selp){
31         table_name =name;
32         fields = fel->get_list();
33         schema_type = OPERATOR_VIEW_SCHEMA;
34         qspec_list = ql->spec_list;
35
36         if(oprop == NULL) op_properties = new param_list();
37         else            op_properties = oprop;
38         if(selp == NULL) selpush = new param_list();
39         else            selpush = selp;
40         base_tables = new param_list();
41 };
42
43 table_def *table_def::make_shallow_copy(string n){
44         table_def *ret = new table_def();
45         ret->table_name = n;
46         ret->fields = fields;
47         ret->schema_type = schema_type;
48         ret->base_tables = base_tables;
49         ret->op_properties = op_properties;
50         ret->qspec_list = qspec_list;
51         ret->selpush = selpush;
52
53         return ret;
54 }
55
56 void table_def::mangle_subq_names(std::string mngl){
57         int i;
58         for(i=0;i<qspec_list.size();++i){
59                 subquery_spec *s = qspec_list[i]->duplicate();
60                 s->name += mngl;
61                 qspec_list[i] = s;
62         }
63 }
64
65
66
67 bool table_def::contains_field(string f){
68   int i;
69
70   for(i=0;i<fields.size();i++){
71         if(fields[i]->get_name() == f){
72                 return(true);
73         }
74   }
75   return(false);
76
77 }
78
79 int table_def::get_field_idx(std::string f){
80   int i;
81   for(i=0;i<fields.size();i++){
82         if(fields[i]->get_name() == f){
83                 return(i);
84         }
85   }
86   return(-1);
87 }
88
89
90 string table_def::get_type_name(std::string f){
91   int i;
92   for(i=0;i<fields.size();i++){
93         if(fields[i]->get_name() == f){
94                 return(fields[i]->get_type());
95         }
96   }
97   return("INTERNAL ERROR undefined field " + f);
98 }
99
100 param_list *table_def::get_modifier_list(std::string f){
101   int i;
102   for(i=0;i<fields.size();i++){
103         if(fields[i]->get_name() == f){
104                 return(fields[i]->get_modifier_list());
105         }
106   }
107   fprintf(stderr,"INTERNAL ERROR, no field %s in table %s, call is get_modifier_list.\n",
108                         f.c_str(), table_name.c_str() );
109   exit(1);
110   return(NULL);
111 }
112
113
114 string table_def::get_fcn(std::string f){
115   int i;
116   for(i=0;i<fields.size();i++){
117         if(fields[i]->get_name() == f){
118                 return(fields[i]->get_fcn());
119         }
120   }
121   return("INTERNAL ERROR undefined field " + f);
122
123 }
124
125
126 string table_def::get_field_basetable(std::string f){
127   int i;
128   for(i=0;i<fields.size();i++){
129         if(fields[i]->get_name() == f){
130                 return(fields[i]->get_basetable());
131         }
132   }
133   return("INTERNAL ERROR undefined field " + f);
134
135 }
136
137 int table_def::verify_no_duplicates(std::string &err){
138
139         int f1, f2;
140         for(f1=0;f1<fields.size()-1;f1++){
141                 string f1_name = fields[f1]->get_name();
142                 for(f2=f1+1;f2<fields.size();f2++){
143                         if(f1_name == fields[f2]->get_name()){
144                                 err.append("Error, table ");
145                                 err.append(table_name);
146                                 err.append(" has a duplicate field :");
147                                 err.append(f1_name);
148                                 err.append("\n");
149                                 return(1);
150                         }
151                 }
152         }
153         return(0);
154 }
155
156 int table_def::verify_access_fcns(std::string &err){
157         int retval = 0, f;
158
159         for(f=0;f<fields.size();++f){
160                 if(fields[f]->get_fcn() == ""){
161                         err += "Error, PROTOCOL field "+table_name+"."+fields[f]->get_name()+" has an empty access function.\n";
162                         retval = 1;
163                 }
164         }
165
166         return(retval);
167 }
168
169 int table_def::add_field(field_entry *fe){
170         string fe_name = fe->get_name();
171         int f;
172
173         for(f=0;f<fields.size();f++){
174                 if(fe_name == fields[f]->get_name()){
175                         return(-1);
176                 }
177         }
178         fields.push_back(fe);
179         return(0);
180 }
181
182
183 vector<string> table_list::get_table_names(){
184         vector<string> retval;
185         int i;
186         for(i=0;i<tbl_list.size();i++){
187                 retval.push_back(tbl_list[i]->get_tbl_name());
188         }
189         return(retval);
190 }
191
192
193 int table_list::find_tbl(string t){
194         int i;
195         for(i=0;i<tbl_list.size();i++)
196                 if(tbl_list[i]->get_tbl_name() == t)
197                         return(i);
198 //      fprintf(stderr,"INTERNAL ERROR: Could not find table %s in table_list::find_tbl\n",t.c_str());
199         return(-1);
200 }
201
202 vector<field_entry *> table_list::get_fields(string t){
203         int pos = find_tbl(t);
204         if(pos<0){
205                 vector<field_entry *> r;
206                 return(r);
207         }
208         return(tbl_list[pos]->get_fields());
209 }
210
211 field_entry *table_list::get_field(string t, int i){
212         int pos = find_tbl(t);
213         if(pos<0){
214                 return(NULL);
215         }
216         return(tbl_list[pos]->get_field(i));
217 }
218
219 int table_list::get_field_idx(string t, string f){
220         int pos = find_tbl(t);
221         if(pos<0){
222                 return(-1);
223         }
224         return(tbl_list[pos]->get_field_idx(f));
225 }
226
227
228 vector<int> table_list::get_tblref_of_field(string f){
229         int i;
230         vector<int> retval;
231
232         for(i=0;i<tbl_list.size();i++){
233                 if( tbl_list[i]->contains_field(f) ){
234                         retval.push_back(i);
235                 }
236         }
237         return(retval);
238 }
239
240 //              TODO: this seems to duplicate find_tbl
241 int table_list::get_table_ref(string t){
242         int i;
243         for(i=0;i<tbl_list.size();i++){
244                 if(tbl_list[i]->get_tbl_name() == t ){
245                         return(i);
246                 }
247         }
248         return(-1);
249 }
250
251
252 //                              Use to unroll hierarchically defined
253 //                              tables.  Used for source tables.
254 //                              Also, do some sanity checking, better
255 //                              to find the errors now when its easy to report
256 //                              than later when its obscure.
257 //
258 //                              Also, process the unpacking functions.
259 //                              and verify that all field unpacking functions
260 //                              are listed in the schema.
261 int table_list::unroll_tables(string &err){
262         int f, tref, p, t, ret=0, retval=0;
263
264 //              First, verify there are no repeat field names in any
265 //              of the tables.
266         set<string> found_names;
267         set<string> dup_names;
268         for(t=0;t<tbl_list.size();t++){
269                 if(found_names.find(tbl_list[t]->get_tbl_name()) != found_names.end()){
270                         dup_names.insert(tbl_list[t]->get_tbl_name());
271                 }else{
272                         found_names.insert(tbl_list[t]->get_tbl_name());
273                 }
274         }
275         if(dup_names.size()>0){
276                 fprintf(stderr, "Error, the schema has duplicate names:");
277                 for(auto dit=dup_names.begin();dit!=dup_names.end(); ++dit)
278                         fprintf(stderr, " %s",(*dit).c_str());
279                 fprintf(stderr, "\n");
280                 retval = 1;
281         }
282
283
284 //              extrack unpack functions
285         for(t=0;t<tbl_list.size();t++){
286           if(tbl_list[t]->get_schema_type() == UNPACK_FCNS_SCHEMA){
287                 for(f=0;f<tbl_list[t]->ufcn_list.size();f++){
288                         ufcn_fcn[ tbl_list[t]->ufcn_list[f]->name ] = tbl_list[t]->ufcn_list[f]->fcn;
289                         ufcn_cost[ tbl_list[t]->ufcn_list[f]->name ] = tbl_list[t]->ufcn_list[f]->cost;
290                         
291                 }
292           }
293         }
294
295         for(t=0;t<tbl_list.size();t++){
296           if(tbl_list[t]->get_schema_type() != UNPACK_FCNS_SCHEMA){
297 //                      No duplicate field names
298                 ret = tbl_list[t]->verify_no_duplicates(err);
299                 if(ret) retval = ret;
300
301 //                      every field has an access function
302                 if(tbl_list[t]->get_schema_type() == PROTOCOL_SCHEMA){
303                         ret = tbl_list[t]->verify_access_fcns(err);
304                         if(ret) retval = ret;
305                 }
306
307 //                      Every type can be parsed
308                 vector<field_entry *> flds = tbl_list[t]->get_fields();
309                 for(f=0;f<flds.size();++f){
310                         data_type dt(flds[f]->get_type());
311                         if(dt.get_type() == undefined_t){
312                                 err += "ERROR, field "+flds[f]->get_name()+" of table "+tbl_list[t]->get_tbl_name()+" has unrecognized type "+flds[f]->get_type()+"\n";
313                                 retval = 1;
314                         }
315                         if(dt.get_type() == fstring_t){
316                                 err += "ERROR, field "+flds[f]->get_name()+" of table "+tbl_list[t]->get_tbl_name()+" has unsupported type "+flds[f]->get_type()+"\n";
317                                 retval = 1;
318                         }
319                 }
320
321 //                      Ensure that the unpack functions, if any, exist.
322                 for(f=0;f<flds.size();++f){
323                         set<string> ufcns = flds[f]->get_unpack_fcns();
324                         set<string>::iterator ssi;
325                         for(ssi=ufcns.begin();ssi!=ufcns.end();ssi++){
326                                 if(ufcn_fcn.count((*ssi))==0){
327                                         err += "ERROR, field "+flds[f]->get_name()+" of table "+tbl_list[t]->get_tbl_name()+" has unrecognized unpacking function "+(*ssi)+"\n";
328                                         retval = 1;
329                                 }
330                         }
331                 }
332
333 //                      annote the original source of the field -- for prefilter
334                 string tbl_name = tbl_list[t]->get_tbl_name();
335                 vector<field_entry *> fev = tbl_list[t]->get_fields();
336                 for(f=0;f<fev.size();++f)
337                         fev[f]->set_basetable(tbl_name);
338           }
339         }
340
341         if(retval) return(retval);
342
343 //              Next, build a predecessors graph.
344 //              Verify that all referenced tables exist.
345
346         vector< vector<int> > predecessors;             // list of tables inherited from.
347         vector<int> n_pred;                                             // number of (remaining) predecessors.
348                                                                                         // -1 indicates a processed table.
349
350         for(t=0;t<tbl_list.size();t++){
351           if(tbl_list[t]->get_schema_type() != UNPACK_FCNS_SCHEMA){
352                 vector<string> pred_tbls = tbl_list[t]->get_pred_tbls();
353                 vector<int> pred_ref;
354                 for(p=0;p<pred_tbls.size();p++){
355                         tref = this->get_table_ref(pred_tbls[p]);
356                         if(tref < 0){
357                                 err.append("Error: table ");
358                                 err.append(tbl_list[t]->get_tbl_name());
359                                 err.append(" referenced non-existent table ");
360                                 err.append(pred_tbls[p]);
361                                 err.append("\n");
362                                 return(2);
363                         }else{
364                                 pred_ref.push_back(tref);
365                         }
366                 }
367                 predecessors.push_back(pred_ref);
368                 n_pred.push_back(pred_ref.size());
369           }else{
370                 vector<int> tmp_iv;
371                 predecessors.push_back(tmp_iv);
372                 n_pred.push_back(0);
373           }
374         }
375
376
377         int n_remaining = predecessors.size();
378         int n_total = n_remaining;
379
380 //              Run through the DAG and pull off one root at a time (n_pred == 0).
381 //              there might be a cycle, so iterate until n_remaining == 0.
382
383         while(n_remaining > 0){
384
385 //              Find a root
386                 int root;
387                 for(root=0;root < n_total;root++){
388                         if(n_pred[root] == 0)
389                                 break;
390                 }
391                 if(root == n_total){    // didn't find a root.
392                         err.append("Error : cycle in inheritance among the following tables:");
393                         int r;
394                         for(r=0;r<n_total;r++){
395                                 if(n_pred[r] > 0){
396                                         err.append(" ");
397                                         err.append(tbl_list[r]->get_tbl_name());
398                                 }
399                         }
400                         return(3);
401                 }
402
403 //                      I'd adding fields from the root table to the
404                 vector<field_entry *> pred_fields = tbl_list[root]->get_fields();
405
406
407 //                      Scan for all successors of the root.
408                 int s, f;
409                 for(s=0;s<n_total;s++){
410                         if(find((predecessors[s]).begin(), (predecessors[s]).end(), root) !=
411                                         (predecessors[s]).end() ){
412
413 //                      s is a successor : add the fields from the root.
414                                 for(f=0;f<pred_fields.size();f++){
415                                         retval = tbl_list[s]->add_field(pred_fields[f]);
416                                         if(retval < 0){
417                                                 err.append("Warning: field ");
418                                                 err.append(pred_fields[f]->get_name());
419                                                 err.append(" already exists in table ");
420                                                 err.append(tbl_list[s]->get_tbl_name());
421                                                 err.append(" (inheriting from table ");
422                                                 err.append(tbl_list[root]->get_tbl_name());
423                                                 err.append(").\n");
424                                         }
425                                 }
426 //                      and insert schema_ids
427                                 set<int> s_schema_ids = tbl_list[root]->get_all_schema_ids();
428                                 for(auto sit=s_schema_ids.begin(); sit!=s_schema_ids.end(); ++sit){
429                                         tbl_list[s]->add_to_all_schema_ids((*sit));
430                                 }
431
432 //                      s has one less predecessor.
433                                 n_pred[s]--;
434                         }
435                 }
436
437 //                      Indicate that the root has been processed.
438                 n_pred[root] = -1;
439                 n_remaining--;
440         }
441
442
443 //                      Done!
444         return(0);
445 }
446
447 int table_list::add_table(table_def *td){
448         int tref = get_table_ref(td->get_tbl_name());
449         if(tref >= 0) return(tref);
450         tbl_list.push_back(td);
451         return(tbl_list.size() - 1);
452 }
453
454
455
456 //////////////////////////////////////////////////////
457 //////////              Serialize functions.
458 //////////              The deserialize fcn is the parser.
459
460
461 string param_list::to_string(){
462         string retval;
463         map<string, string>::iterator mssi;
464         bool first_exec=true;
465         for(mssi=pmap.begin();mssi!=pmap.end();++mssi){
466                 if(first_exec){  first_exec=false; }
467                 else{ retval+=",";  }
468                 retval += (*mssi).first + " ";
469                 retval += (*mssi).second;
470         }
471         return(retval);
472 }
473
474 string field_entry::to_string(){
475         string retval = type + " " + name + " " + function;
476         if(mod_list->size() > 0){
477                 retval += " ( " + mod_list->to_string() + " ) ";
478         }
479
480         return(retval);
481 }
482
483
484
485
486 string table_def::to_string(){
487         int q;
488         string retval;
489         switch(schema_type){
490         case PROTOCOL_SCHEMA:
491                 retval = "TABLE ";
492                 break;
493         case STREAM_SCHEMA:
494                 retval = "STREAM ";
495                 break;
496         case OPERATOR_VIEW_SCHEMA:
497                 retval += "OPERATOR_VIEW ";
498                 break;
499         default:
500                 retval = "ERROR UNKNOWN TABLE TYPE ";
501                 break;
502         }
503
504         retval += table_name + " ";
505
506         if(base_tables->size() > 0){
507                 retval += "( "+base_tables->to_string() + " ) ";
508         }
509
510         retval += "{\n";
511
512         if(schema_type == OPERATOR_VIEW_SCHEMA){
513                 retval += "\tOPERATOR ("+op_properties->to_string()+")\n";
514                 retval += "\tFIELDS{\n";
515         }
516
517         int f;
518         for(f=0;f<fields.size();f++){
519                 retval += "\t" + fields[f]->to_string() + ";\n";
520         }
521
522         if(schema_type == OPERATOR_VIEW_SCHEMA){
523                 retval += "\tSUBQUERIES{\n";
524                 for(q=0;q<qspec_list.size();++q){
525                         if(q>0) retval += ";\n";
526                         retval += qspec_list[q]->to_string();
527                 }
528                 retval += "\t}\n";
529                 retval += "\tSELECTION_PUSHDOWN ("+selpush->to_string()+")\n";
530         }
531
532         
533
534         retval += "}\n";
535
536         return(retval);
537 }
538
539 string table_list::to_string(){
540         string retval;
541         int t;
542         for(t=0;t<tbl_list.size();t++){
543                 retval += tbl_list[t]->to_string();
544                 retval += "\n";
545         }
546         return(retval);
547 }