3c009ba36547384f9e56ab4833b012c0a64c4c39
[com/gs-lite.git] / src / ftacmp / parse_schema.h
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15 #ifndef __SCHEMA_DEF_H_INCLUDED__
16 #define __SCHEMA_DEF_H_INCLUDED__
17
18 #include <string>
19 #include <vector>
20 #include <map>
21 #include<set>
22
23 #include <string.h>
24 #include <stdlib.h>
25
26
27 //              A param_list is used to represent a list of
28 //              parameters with optional values.
29
30 class param_list{
31 private:
32         std::map< std::string, std::string > pmap;
33
34 public:
35         param_list(){};
36         param_list(const char *key){
37                 pmap[key]="";
38         };
39         param_list(const char *key, const char *val){
40                 pmap[key]=val;
41         };
42
43         param_list *append(const char *key){
44                 pmap[key]="";
45                 return(this);
46         };
47         param_list *append(const char *key, const char *val){
48                 pmap[key]=val;
49                 return(this);
50         };
51         param_list *append( std::string key){
52                 pmap[key]="";
53                 return(this);
54         };
55
56         int size(){return pmap.size();};
57
58
59         bool contains_key(std::string key){
60                 return(pmap.count(key)>0);
61         }
62
63         int delete_key(std::string k){
64                 return pmap.erase(k);
65         }
66
67         std::string val_of(std::string key){
68                 if(pmap.count(key)>0)
69                         return(pmap[key]);
70                 return(std::string(""));
71         }
72
73         std::vector<std::string> get_key_vec(){
74                 std::vector<std::string> retval;
75                 std::map<std::string, std::string>::iterator mssi;
76                 for(mssi=pmap.begin();mssi!=pmap.end();++mssi){
77                         retval.push_back( (*mssi).first );
78                 }
79                 return(retval);
80         }
81
82         std::string to_string();
83 };
84
85
86 //              list of names, order matters.
87 class name_vec{
88 public:
89         std::vector<std::string> svec;
90         std::vector<std::string> nvec;
91         std::vector<param_list *> pvec;
92
93         name_vec(char *c, char *n, param_list *p){
94                 svec.push_back(c);
95                 nvec.push_back(n);
96                 if(p) pvec.push_back(p);
97                 else pvec.push_back(new param_list());
98         };
99
100         name_vec *append(char *c, char *n, param_list *p){
101                 svec.push_back(c);
102                 nvec.push_back(n);
103                 if(p) pvec.push_back(p);
104                 else pvec.push_back(new param_list());
105                 return this;
106         };
107 };
108
109
110
111 //                      A field in a STREAM or PROTOCOL
112
113
114 class field_entry{
115 private:
116         std::string type;                       // data type
117         std::string name;                       // name in a query
118         std::string function;           // access function, if any (PROTOCOL only).
119         param_list *mod_list;           // special properties.
120         std::set<std::string> ufcns;    // unpacking functions, if any.
121
122         std::string base_table;         // for hierarchically structured data sources,
123                                                                 // the bast table where the field is defined.
124                                                                 // mostly used for computing the LFTA prefilter.
125 public:
126
127         field_entry(const char *t, const char *n, const char *f, param_list *plist, param_list *ulist){
128                 if(plist == NULL)
129                         mod_list = new param_list();
130                 else
131                         mod_list = plist;
132                 if(ulist){
133                         int u;
134                         std::vector<std::string> tmp_ufl = ulist->get_key_vec();
135                         for(u=0;u<tmp_ufl.size();++u)
136                                 ufcns.insert(tmp_ufl[u]);
137                 }
138
139                 type=t; name=n; function=f;
140                 base_table = "";
141         };
142
143         field_entry(std::string t, std::string n, std::string f, param_list *plist, const std::set<std::string> &ulist){
144                 if(plist == NULL)
145                         mod_list = new param_list();
146                 else
147                         mod_list = plist;
148                 ufcns = ulist;
149                 type=t; name=n; function=f;
150                 base_table = "";
151         };
152
153         field_entry(std::string n, std::string t){
154                 name = n;
155                 type = t;
156                 mod_list = new param_list();
157         }
158
159         void add_unpack_fcns(param_list *ufl){
160                 std::vector<std::string> new_ufl = ufl->get_key_vec();
161                 int u;
162                 for(u=0;u<new_ufl.size();++u)
163                         ufcns.insert(new_ufl[u]);
164         }
165
166
167         param_list *get_modifier_list(){return mod_list;        };
168         std::string get_type(){return(type);};
169         std::string get_name(){return(name);};
170         std::string get_fcn(){return(function);};
171         std::set<std::string> get_unpack_fcns(){
172                 return ufcns;
173         }
174
175         void set_basetable(std::string b){base_table=b;};
176         std::string get_basetable(){return base_table;};
177
178         std::string to_string();
179
180         int delete_modifier(std::string k){
181                 return mod_list->delete_key(k);
182         }
183         void add_modifier(const char *k, const char *v){
184                 mod_list->append(k,v);
185         }
186         void add_modifier(const char *k){
187                 mod_list->append(k);
188         }
189
190 };
191
192
193 //              list of fields.  An intermediate parse structure.
194 //              it gets loaded into table_def.fields
195
196 class field_entry_list{
197 private:
198         std::vector<field_entry *> fl;
199
200 public:
201         field_entry_list(){};
202
203         field_entry_list(field_entry *f){
204                 fl.push_back(f);
205         };
206
207         field_entry_list *append_field(field_entry *f){
208                 fl.push_back(f);
209                 return(this);
210         };
211
212         std::vector<field_entry *> get_list(){return fl;        };
213 };
214
215 class subquery_spec{
216 public:
217         std::string name;
218         std::vector<std::string> types;
219         std::vector<std::string> names;
220         std::vector<param_list *> modifiers;
221
222
223         subquery_spec(){}
224
225         subquery_spec(const char *n, name_vec *t){
226                 name = n;
227                 types = t->svec;
228                 names = t->nvec;
229                 modifiers = t->pvec;
230         };
231
232         std::string to_string(){
233                 std::string ret = name+" (";
234                 int i;
235                 for(i=0;i<types.size();++i){
236                         if(i>0) ret+=", ";
237                         ret += types[i] + " " + names[i];
238                         if(modifiers[i]->size() >0){
239                                 ret+=" ("+modifiers[i]->to_string()+") ";
240                         }
241                 }
242                 ret += ") ";
243                 return(ret);
244         };
245
246         subquery_spec *duplicate(){
247                 subquery_spec *ret = new subquery_spec();
248                 ret->name = name;
249                 ret->types = types;
250                 ret->names = names;
251
252                 return ret;
253         }
254
255
256
257 };
258
259 class subqueryspec_list{
260 public:
261         std::vector<subquery_spec *> spec_list;
262
263         subqueryspec_list(subquery_spec *ss){
264                 spec_list.push_back(ss);
265         };
266         subqueryspec_list *append(subquery_spec *ss){
267                 spec_list.push_back(ss);
268                 return this;
269         };
270 };
271
272 class unpack_fcn{
273 public:
274         std::string name;
275         std::string fcn;
276         int cost;
277
278         unpack_fcn(const char *n, const char *f, const char *c){
279                 name = n;
280                 fcn = f;
281                 cost = atoi(c);
282         };
283 };
284
285 class unpack_fcn_list{
286 public:
287         std::vector<unpack_fcn *> ufcn_v;
288
289         unpack_fcn_list(unpack_fcn *u){
290                 ufcn_v.push_back(u);
291         };
292
293         unpack_fcn_list *append(unpack_fcn *u){
294                 ufcn_v.push_back(u);
295                 return this;
296         };
297 };
298
299
300
301
302 //              forward definition, needed for table_def
303 class table_exp_t;
304 struct query_list_t;
305
306 /* ============================================
307                 The schema can support several different
308                 flavors of table.
309                         PROTOCOL : the base data that an FTA can retrieve.
310                         STREAM : Data created by an FTA or a stream operator.
311                 More to come.  Perhaps this is better handled by
312                 annotations in the schema def.
313    ============================================= */
314
315 #define PROTOCOL_SCHEMA 1
316 #define STREAM_SCHEMA 2
317 #define OPERATOR_VIEW_SCHEMA 3
318 #define UNPACK_FCNS_SCHEMA 4
319
320 //                      Represent a STREAM, PROTOCOL, OPERATOR_VIEW, or UNPACK_FCN list.
321
322 class table_def{
323 private:
324         std::string table_name;
325         std::vector<field_entry *> fields;
326         param_list *base_tables;        // if PROTOCOL, the PROTOCOLS that
327                                                                 // this PROTOCOL inherits fields from.
328         int schema_type;        // STREAM_SCHEMA, PROTOCOL_SCHEMA, OPERATOR_VIEW_SCHEMA
329 //              For operator_view tables
330         param_list *op_properties;
331         std::vector<subquery_spec *> qspec_list;
332         param_list *selpush;
333
334 public:
335 //              for unpacking function group specs.
336         std::vector<unpack_fcn *> ufcn_list;
337
338
339
340 //              Unpack functions defined at the PROTOCOL level are added to
341 //              PROTOCOL fields here ... implying that ony those fields
342 //              explicitly defined in the PROTOCOL (as opposed to inherited)
343 //              get the PROTOCOL-wide unpack functions.
344         table_def(const char *name, param_list *plist, param_list *ufcn_l, field_entry_list *fel, int sch_t){
345         int f;
346                 if(plist == NULL)
347                         base_tables = new param_list();
348                 else
349                         base_tables = plist;
350                 table_name =name;
351                 fields = fel->get_list();
352                 schema_type = sch_t;
353
354 //                      fields inherit table-level unpacking functions, if any.
355                 if(ufcn_l){
356                         for(f=0;f<fields.size();++f)
357                                 fields[f]->add_unpack_fcns(ufcn_l);
358                 }
359
360                 op_properties = new param_list();
361                 selpush = new param_list();
362         };
363
364         table_def(const char *name, param_list *oprop, field_entry_list *fel,
365                                 subqueryspec_list *ql, param_list *selp);
366
367         table_def(unpack_fcn_list *ufcn_l){
368                 schema_type = UNPACK_FCNS_SCHEMA;
369                 ufcn_list = ufcn_l->ufcn_v;
370         }
371
372         table_def(){};
373
374     table_def *make_shallow_copy(std::string n);
375
376         void mangle_subq_names(std::string mngl);
377
378         std::string get_tbl_name(){return table_name;   };
379         std::vector<field_entry *> get_fields(){return(fields); };
380
381         field_entry *get_field(int i){
382                         if(i>=0 && i<fields.size()) return(fields[i]);
383                         return NULL;
384         };
385
386         std::string get_field_name(int i){
387                         if(i>=0 && i<fields.size()) return(fields[i]->get_name());
388                         return "";
389         };
390
391         bool contains_field(std::string f);
392         bool contains_field(int f);
393
394         int get_field_idx(std::string f);
395         std::string get_type_name(std::string f);
396         param_list *get_modifier_list(std::string f);
397         std::string get_fcn(std::string f);
398
399         std::string get_op_prop(std::string s){
400                 return op_properties->val_of(s);
401         };
402
403 //              Used in generating the LFTA prefilter
404         std::string get_field_basetable(std::string f);
405
406
407         int verify_no_duplicates(std::string &err);
408         int verify_access_fcns(std::string &err);
409
410
411         std::vector<std::string> get_pred_tbls(){
412                 return base_tables->get_key_vec() ;
413         };
414
415         int add_field(field_entry *fe);
416
417         int get_schema_type(){return schema_type;};
418
419         std::vector<subquery_spec *> get_subqueryspecs(){return qspec_list;};
420
421         std::string to_string();
422         std::string to_stream_string(){
423                 int tmp_sch = schema_type;
424                 schema_type = STREAM_SCHEMA;
425                 std::string ret = this->to_string();
426                 schema_type = tmp_sch;
427                 return ret;
428         }
429 };
430
431
432 //              A Schema -- a collection of stream layout definitions.
433
434 class table_list{
435 private:
436         std::vector<table_def *> tbl_list;
437         //              for an unpack_fcn_list, collect from the set of
438         //              UNPACK_FCNS_SCHEMA in the table list.
439                 std::map<std::string, std::string> ufcn_fcn;
440                 std::map<std::string, int> ufcn_cost;
441
442
443 public:
444         table_list(table_def *td){tbl_list.push_back(td);       };
445         table_list(){};
446
447         table_list *append_table(table_def *td){
448                 tbl_list.push_back(td);
449                 return(this);
450         };
451
452         int add_table(table_def *td);
453         table_def *get_table(int t){
454                 if(t<0 || t>tbl_list.size()) return(NULL);
455                 return(tbl_list[t]);
456         };
457
458         int add_duplicate_table(std::string src, std::string dest){
459                 int src_pos = this->find_tbl(src);
460                 if(src_pos<0)
461                         return src_pos;
462                 table_def *dest_tbl = tbl_list[src_pos]->make_shallow_copy(dest);
463                 tbl_list.push_back(dest_tbl);
464                 return tbl_list.size()-1;
465         }
466
467         void mangle_subq_names(int pos, std::string mngl){
468                 tbl_list[pos]->mangle_subq_names(mngl);
469         }
470
471
472         int size(){return tbl_list.size();};
473
474 /////////////
475 //              Accessor methods : get table and field info without
476 //              descending into the underlying data structures.
477 //              Can specify a table by name (string), or by index (int)
478 //              (e.g. returned by get_table_ref)
479
480         int get_ufcn_cost(std::string fname){
481                 if(ufcn_cost.count(fname))
482                         return ufcn_cost[fname];
483                 else
484                         return -1;
485         }
486         std::string get_ufcn_fcn(std::string fname){
487                 if(ufcn_fcn.count(fname))
488                         return ufcn_fcn[fname];
489                 else
490                         return "ERROR_ufcn_fcn_of_"+fname+"_not_found";
491         }
492
493         std::string get_table_name(int i){
494                 if(i>tbl_list.size()) return("");
495                 else return tbl_list[i]->get_tbl_name();
496         };
497         std::vector<std::string> get_table_names();
498
499         std::vector<field_entry *> get_fields(std::string t);
500         field_entry *get_field(std::string t, int i);
501         field_entry *get_field(int t, std::string f){
502                 return tbl_list[t]->get_field(tbl_list[t]->get_field_idx(f));
503         }
504         int get_field_idx(std::string t, std::string f);
505
506         int find_tbl(std::string t);
507
508         std::vector<int> get_tblref_of_field(std::string f);
509
510         int get_table_ref(std::string t);
511
512         std::string get_type_name(int t, std::string f){
513                 return(tbl_list[t]->get_type_name(f));
514         };
515
516         param_list *get_modifier_list(int t, std::string f){
517                 return(tbl_list[t]->get_modifier_list(f));
518         };
519
520         std::string get_fcn(int t, std::string f){
521                 return(tbl_list[t]->get_fcn(f));
522         };
523
524         int get_schema_type(int t){
525                 return(tbl_list[t]->get_schema_type());
526         };
527
528         std::string get_op_prop(int t, std::string s){
529                 return(tbl_list[t]->get_op_prop(s));
530         };
531
532         std::vector<subquery_spec *> get_subqueryspecs(int t){
533                 return tbl_list[t]->get_subqueryspecs();
534         };
535
536
537 //              Used in generating the LFTA prefilter
538         std::string get_basetbl_name(int t, std::string f){
539                 return(tbl_list[t]->get_field_basetable(f));
540         };
541
542         bool contains_field(int t, std::string f){
543                 return(tbl_list[t]->contains_field(f));
544         };
545
546
547 //////////////
548 //              Additional methods
549
550 //                      Process field inheritance for PROTOCOL tables.
551         int unroll_tables(std::string &err);
552
553         std::string to_string();
554 };
555 #endif