a9bc60cf0dbcc9895c25dcdb5f475a1965f88d87
[com/gs-lite.git] / src / ftacmp / parse_schema.cc
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 #include <string>
17 #include"parse_fta.h"
18 #include "parse_schema.h"
19 #include "type_objects.h"
20 #include <stdio.h>
21 #include <stdlib.h>
22 // #include <algo.h>
23 #include<algorithm>
24
25 using namespace std;
26
27 table_list *Schema;
28
29 table_def::table_def(const char *name, param_list *oprop, field_entry_list *fel,
30                         subqueryspec_list *ql, param_list *selp){
31         table_name =name;
32         fields = fel->get_list();
33         schema_type = OPERATOR_VIEW_SCHEMA;
34         qspec_list = ql->spec_list;
35
36         if(oprop == NULL) op_properties = new param_list();
37         else            op_properties = oprop;
38         if(selp == NULL) selpush = new param_list();
39         else            selpush = selp;
40         base_tables = new param_list();
41 };
42
43 table_def *table_def::make_shallow_copy(string n){
44         table_def *ret = new table_def();
45         ret->table_name = n;
46         ret->fields = fields;
47         ret->schema_type = schema_type;
48         ret->base_tables = base_tables;
49         ret->op_properties = op_properties;
50         ret->qspec_list = qspec_list;
51         ret->selpush = selpush;
52
53         return ret;
54 }
55
56 void table_def::mangle_subq_names(std::string mngl){
57         int i;
58         for(i=0;i<qspec_list.size();++i){
59                 subquery_spec *s = qspec_list[i]->duplicate();
60                 s->name += mngl;
61                 qspec_list[i] = s;
62         }
63 }
64
65
66
67 bool table_def::contains_field(string f){
68   int i;
69
70   for(i=0;i<fields.size();i++){
71         if(fields[i]->get_name() == f){
72                 return(true);
73         }
74   }
75   return(false);
76
77 }
78
79 int table_def::get_field_idx(std::string f){
80   int i;
81   for(i=0;i<fields.size();i++){
82         if(fields[i]->get_name() == f){
83                 return(i);
84         }
85   }
86   return(-1);
87 }
88
89
90 string table_def::get_type_name(std::string f){
91   int i;
92   for(i=0;i<fields.size();i++){
93         if(fields[i]->get_name() == f){
94                 return(fields[i]->get_type());
95         }
96   }
97   return("INTERNAL ERROR undefined field " + f);
98 }
99
100 param_list *table_def::get_modifier_list(std::string f){
101   int i;
102   for(i=0;i<fields.size();i++){
103         if(fields[i]->get_name() == f){
104                 return(fields[i]->get_modifier_list());
105         }
106   }
107   fprintf(stderr,"INTERNAL ERROR, no field %s in table %s, call is get_modifier_list.\n",
108                         f.c_str(), table_name.c_str() );
109   exit(1);
110   return(NULL);
111 }
112
113
114 string table_def::get_fcn(std::string f){
115   int i;
116   for(i=0;i<fields.size();i++){
117         if(fields[i]->get_name() == f){
118                 return(fields[i]->get_fcn());
119         }
120   }
121   return("INTERNAL ERROR undefined field " + f);
122
123 }
124
125
126 string table_def::get_field_basetable(std::string f){
127   int i;
128   for(i=0;i<fields.size();i++){
129         if(fields[i]->get_name() == f){
130                 return(fields[i]->get_basetable());
131         }
132   }
133   return("INTERNAL ERROR undefined field " + f);
134
135 }
136
137 int table_def::verify_no_duplicates(std::string &err){
138
139         int f1, f2;
140         for(f1=0;f1<fields.size()-1;f1++){
141                 string f1_name = fields[f1]->get_name();
142                 for(f2=f1+1;f2<fields.size();f2++){
143                         if(f1_name == fields[f2]->get_name()){
144                                 err.append("Error, table ");
145                                 err.append(table_name);
146                                 err.append(" has a duplicate field :");
147                                 err.append(f1_name);
148                                 err.append("\n");
149                                 return(1);
150                         }
151                 }
152         }
153         return(0);
154 }
155
156 int table_def::verify_access_fcns(std::string &err){
157         int retval = 0, f;
158
159         for(f=0;f<fields.size();++f){
160                 if(fields[f]->get_fcn() == ""){
161                         err += "Error, PROTOCOL field "+table_name+"."+fields[f]->get_name()+" has an empty access function.\n";
162                         retval = 1;
163                 }
164         }
165
166         return(retval);
167 }
168
169 int table_def::add_field(field_entry *fe){
170         string fe_name = fe->get_name();
171         int f;
172
173         for(f=0;f<fields.size();f++){
174                 if(fe_name == fields[f]->get_name()){
175                         return(-1);
176                 }
177         }
178         fields.push_back(fe);
179         return(0);
180 }
181
182
183 vector<string> table_list::get_table_names(){
184         vector<string> retval;
185         int i;
186         for(i=0;i<tbl_list.size();i++){
187                 retval.push_back(tbl_list[i]->get_tbl_name());
188         }
189         return(retval);
190 }
191
192
193 int table_list::find_tbl(string t){
194         int i;
195         for(i=0;i<tbl_list.size();i++)
196                 if(tbl_list[i]->get_tbl_name() == t)
197                         return(i);
198 //      fprintf(stderr,"INTERNAL ERROR: Could not find table %s in table_list::find_tbl\n",t.c_str());
199         return(-1);
200 }
201
202 vector<field_entry *> table_list::get_fields(string t){
203         int pos = find_tbl(t);
204         if(pos<0){
205                 vector<field_entry *> r;
206                 return(r);
207         }
208         return(tbl_list[pos]->get_fields());
209 }
210
211 field_entry *table_list::get_field(string t, int i){
212         int pos = find_tbl(t);
213         if(pos<0){
214                 return(NULL);
215         }
216         return(tbl_list[pos]->get_field(i));
217 }
218
219 int table_list::get_field_idx(string t, string f){
220         int pos = find_tbl(t);
221         if(pos<0){
222                 return(-1);
223         }
224         return(tbl_list[pos]->get_field_idx(f));
225 }
226
227
228 vector<int> table_list::get_tblref_of_field(string f){
229         int i;
230         vector<int> retval;
231
232         for(i=0;i<tbl_list.size();i++){
233                 if( tbl_list[i]->contains_field(f) ){
234                         retval.push_back(i);
235                 }
236         }
237         return(retval);
238 }
239
240 //              TODO: this seems to duplicate find_tbl
241 int table_list::get_table_ref(string t){
242         int i;
243         for(i=0;i<tbl_list.size();i++){
244                 if(tbl_list[i]->get_tbl_name() == t ){
245                         return(i);
246                 }
247         }
248         return(-1);
249 }
250
251
252 //                              Use to unroll hierarchically defined
253 //                              tables.  Used for source tables.
254 //                              Also, do some sanity checking, better
255 //                              to find the errors now when its easy to report
256 //                              than later when its obscure.
257 //
258 //                              Also, process the unpacking functions.
259 //                              and verify that all field unpacking functions
260 //                              are listed in the schema.
261 int table_list::unroll_tables(string &err){
262 //              First, verify there are no repeat field names in any
263 //              of the tables.
264
265         int f, tref, p, t, ret, retval;
266
267         for(t=0;t<tbl_list.size();t++){
268           if(tbl_list[t]->get_schema_type() == UNPACK_FCNS_SCHEMA){
269                 for(f=0;f<tbl_list[t]->ufcn_list.size();f++){
270                         ufcn_fcn[ tbl_list[t]->ufcn_list[f]->name ] = tbl_list[t]->ufcn_list[f]->fcn;
271                         ufcn_cost[ tbl_list[t]->ufcn_list[f]->name ] = tbl_list[t]->ufcn_list[f]->cost;
272                         
273                 }
274           }
275         }
276
277         for(t=0;t<tbl_list.size();t++){
278           if(tbl_list[t]->get_schema_type() != UNPACK_FCNS_SCHEMA){
279 //                      No duplicate field names
280                 ret = tbl_list[t]->verify_no_duplicates(err);
281                 if(ret) retval = ret;
282
283 //                      every field has an access function
284                 if(tbl_list[t]->get_schema_type() == PROTOCOL_SCHEMA){
285                         retval = tbl_list[t]->verify_access_fcns(err);
286                         if(ret) retval = ret;
287                 }
288
289 //                      Every type can be parsed
290                 vector<field_entry *> flds = tbl_list[t]->get_fields();
291                 for(f=0;f<flds.size();++f){
292                         data_type dt(flds[f]->get_type());
293                         if(dt.get_type() == undefined_t){
294                                 err += "ERROR, field "+flds[f]->get_name()+" of table "+tbl_list[t]->get_tbl_name()+" has unrecognized type "+flds[f]->get_type()+"\n";
295                                 retval = 1;
296                         }
297                         if(dt.get_type() == fstring_t){
298                                 err += "ERROR, field "+flds[f]->get_name()+" of table "+tbl_list[t]->get_tbl_name()+" has unsupported type "+flds[f]->get_type()+"\n";
299                                 retval = 1;
300                         }
301                 }
302
303 //                      Ensure that the unpack functions, if any, exist.
304                 for(f=0;f<flds.size();++f){
305                         set<string> ufcns = flds[f]->get_unpack_fcns();
306                         set<string>::iterator ssi;
307                         for(ssi=ufcns.begin();ssi!=ufcns.end();ssi++){
308                                 if(ufcn_fcn.count((*ssi))==0){
309                                         err += "ERROR, field "+flds[f]->get_name()+" of table "+tbl_list[t]->get_tbl_name()+" has unrecognized unpacking function "+(*ssi)+"\n";
310                                         retval = 1;
311                                 }
312                         }
313                 }
314
315 //                      annote the original source of the field -- for prefilter
316                 string tbl_name = tbl_list[t]->get_tbl_name();
317                 vector<field_entry *> fev = tbl_list[t]->get_fields();
318                 for(f=0;f<fev.size();++f)
319                         fev[f]->set_basetable(tbl_name);
320           }
321         }
322
323         if(retval) return(retval);
324
325 //              Next, build a predecessors graph.
326 //              Verify that all referenced tables exist.
327
328         vector< vector<int> > predecessors;             // list of tables inherited from.
329         vector<int> n_pred;                                             // number of (remaining) predecessors.
330                                                                                         // -1 indicates a processed table.
331
332         for(t=0;t<tbl_list.size();t++){
333           if(tbl_list[t]->get_schema_type() != UNPACK_FCNS_SCHEMA){
334                 vector<string> pred_tbls = tbl_list[t]->get_pred_tbls();
335                 vector<int> pred_ref;
336                 for(p=0;p<pred_tbls.size();p++){
337                         tref = this->get_table_ref(pred_tbls[p]);
338                         if(tref < 0){
339                                 err.append("Error: table ");
340                                 err.append(tbl_list[t]->get_tbl_name());
341                                 err.append(" referenced non-existent table ");
342                                 err.append(pred_tbls[p]);
343                                 err.append("\n");
344                                 return(2);
345                         }else{
346                                 pred_ref.push_back(tref);
347                         }
348                 }
349                 predecessors.push_back(pred_ref);
350                 n_pred.push_back(pred_ref.size());
351           }else{
352                 vector<int> tmp_iv;
353                 predecessors.push_back(tmp_iv);
354                 n_pred.push_back(0);
355           }
356         }
357
358
359         int n_remaining = predecessors.size();
360         int n_total = n_remaining;
361
362 //              Run through the DAG and pull off one root at a time (n_pred == 0).
363 //              there might be a cycle, so iterate until n_remaining == 0.
364
365         while(n_remaining > 0){
366
367 //              Find a root
368                 int root;
369                 for(root=0;root < n_total;root++){
370                         if(n_pred[root] == 0)
371                                 break;
372                 }
373                 if(root == n_total){    // didn't find a root.
374                         err.append("Error : cycle in inheritance among the following tables:");
375                         int r;
376                         for(r=0;r<n_total;r++){
377                                 if(n_pred[r] > 0){
378                                         err.append(" ");
379                                         err.append(tbl_list[r]->get_tbl_name());
380                                 }
381                         }
382                         return(3);
383                 }
384
385 //                      I'd adding fields from the root table to the
386                 vector<field_entry *> pred_fields = tbl_list[root]->get_fields();
387
388
389 //                      Scan for all successors of the root.
390                 int s, f;
391                 for(s=0;s<n_total;s++){
392                         if(find((predecessors[s]).begin(), (predecessors[s]).end(), root) !=
393                                         (predecessors[s]).end() ){
394
395 //                      s is a successor : add the fields from the root.
396                                 for(f=0;f<pred_fields.size();f++){
397                                         retval = tbl_list[s]->add_field(pred_fields[f]);
398                                         if(retval < 0){
399                                                 err.append("Warning: field ");
400                                                 err.append(pred_fields[f]->get_name());
401                                                 err.append(" already exists in table ");
402                                                 err.append(tbl_list[s]->get_tbl_name());
403                                                 err.append(" (inheriting from table ");
404                                                 err.append(tbl_list[root]->get_tbl_name());
405                                                 err.append(").\n");
406                                         }
407                                 }
408
409 //                      s has one less predecessor.
410                                 n_pred[s]--;
411                         }
412                 }
413
414 //                      Indicate that the root has been processed.
415                 n_pred[root] = -1;
416                 n_remaining--;
417         }
418
419
420 //                      Done!
421         return(0);
422 }
423
424 int table_list::add_table(table_def *td){
425         int tref = get_table_ref(td->get_tbl_name());
426         if(tref >= 0) return(tref);
427         tbl_list.push_back(td);
428         return(tbl_list.size() - 1);
429 }
430
431
432
433 //////////////////////////////////////////////////////
434 //////////              Serialize functions.
435 //////////              The deserialize fcn is the parser.
436
437
438 string param_list::to_string(){
439         string retval;
440         map<string, string>::iterator mssi;
441         bool first_exec=true;
442         for(mssi=pmap.begin();mssi!=pmap.end();++mssi){
443                 if(first_exec){  first_exec=false; }
444                 else{ retval+=",";  }
445                 retval += (*mssi).first + " ";
446                 retval += (*mssi).second;
447         }
448         return(retval);
449 }
450
451 string field_entry::to_string(){
452         string retval = type + " " + name + " " + function;
453         if(mod_list->size() > 0){
454                 retval += " ( " + mod_list->to_string() + " ) ";
455         }
456
457         return(retval);
458 }
459
460
461
462
463 string table_def::to_string(){
464         int q;
465         string retval;
466         switch(schema_type){
467         case PROTOCOL_SCHEMA:
468                 retval = "TABLE ";
469                 break;
470         case STREAM_SCHEMA:
471                 retval = "STREAM ";
472                 break;
473         case OPERATOR_VIEW_SCHEMA:
474                 retval += "OPERATOR_VIEW ";
475                 break;
476         default:
477                 retval = "ERROR UNKNOWN TABLE TYPE ";
478                 break;
479         }
480
481         retval += table_name + " ";
482
483         if(base_tables->size() > 0){
484                 retval += "( "+base_tables->to_string() + " ) ";
485         }
486
487         retval += "{\n";
488
489         if(schema_type == OPERATOR_VIEW_SCHEMA){
490                 retval += "\tOPERATOR ("+op_properties->to_string()+")\n";
491                 retval += "\tFIELDS{\n";
492         }
493
494         int f;
495         for(f=0;f<fields.size();f++){
496                 retval += "\t" + fields[f]->to_string() + ";\n";
497         }
498
499         if(schema_type == OPERATOR_VIEW_SCHEMA){
500                 retval += "\tSUBQUERIES{\n";
501                 for(q=0;q<qspec_list.size();++q){
502                         if(q>0) retval += ";\n";
503                         retval += qspec_list[q]->to_string();
504                 }
505                 retval += "\t}\n";
506                 retval += "\tSELECTION_PUSHDOWN ("+selpush->to_string()+")\n";
507         }
508
509         
510
511         retval += "}\n";
512
513         return(retval);
514 }
515
516 string table_list::to_string(){
517         string retval;
518         int t;
519         for(t=0;t<tbl_list.size();t++){
520                 retval += tbl_list[t]->to_string();
521                 retval += "\n";
522         }
523         return(retval);
524 }