45366a54e346ea343a7c242ea9625565534044dd
[com/gs-lite.git] / src / ftacmp / parse_schema.h
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15 #ifndef __SCHEMA_DEF_H_INCLUDED__
16 #define __SCHEMA_DEF_H_INCLUDED__
17
18 #include <string>
19 #include <vector>
20 #include <map>
21 #include<set>
22
23 #include <string.h>
24 #include <stdlib.h>
25
26
27 //              A param_list is used to represent a list of
28 //              parameters with optional values.
29
30 class param_list{
31 private:
32         std::map< std::string, std::string > pmap;
33
34 public:
35         param_list(){};
36         param_list(const char *key){
37                 pmap[key]="";
38         };
39         param_list(const char *key, const char *val){
40                 pmap[key]=val;
41         };
42
43         param_list *append(const char *key){
44                 pmap[key]="";
45                 return(this);
46         };
47         param_list *append(const char *key, const char *val){
48                 pmap[key]=val;
49                 return(this);
50         };
51         param_list *append( std::string key){
52                 pmap[key]="";
53                 return(this);
54         };
55
56         int size(){return pmap.size();};
57
58
59         bool contains_key(std::string key){
60                 return(pmap.count(key)>0);
61         }
62
63         int delete_key(std::string k){
64                 return pmap.erase(k);
65         }
66
67         std::string val_of(std::string key){
68                 if(pmap.count(key)>0)
69                         return(pmap[key]);
70                 return(std::string(""));
71         }
72
73         std::vector<std::string> get_key_vec(){
74                 std::vector<std::string> retval;
75                 std::map<std::string, std::string>::iterator mssi;
76                 for(mssi=pmap.begin();mssi!=pmap.end();++mssi){
77                         retval.push_back( (*mssi).first );
78                 }
79                 return(retval);
80         }
81
82         std::string to_string();
83 };
84
85
86 //              list of names, order matters.
87 class name_vec{
88 public:
89         std::vector<std::string> svec;
90         std::vector<std::string> nvec;
91         std::vector<param_list *> pvec;
92
93         name_vec(char *c, char *n, param_list *p){
94                 svec.push_back(c);
95                 nvec.push_back(n);
96                 if(p) pvec.push_back(p);
97                 else pvec.push_back(new param_list());
98         };
99
100         name_vec *append(char *c, char *n, param_list *p){
101                 svec.push_back(c);
102                 nvec.push_back(n);
103                 if(p) pvec.push_back(p);
104                 else pvec.push_back(new param_list());
105                 return this;
106         };
107 };
108
109
110
111 //                      A field in a STREAM or PROTOCOL
112
113
114 class field_entry{
115 private:
116         std::string type;                       // data type
117         std::string name;                       // name in a query
118         std::string function;           // access function, if any (PROTOCOL only).
119         param_list *mod_list;           // special properties.
120         std::set<std::string> ufcns;    // unpacking functions, if any.
121
122         std::string base_table;         // for hierarchically structured data sources,
123                                                                 // the bast table where the field is defined.
124                                                                 // mostly used for computing the LFTA prefilter.
125 public:
126
127         field_entry(const char *t, const char *n, const char *f, param_list *plist, param_list *ulist){
128                 if(plist == NULL)
129                         mod_list = new param_list();
130                 else
131                         mod_list = plist;
132                 if(ulist){
133                         int u;
134                         std::vector<std::string> tmp_ufl = ulist->get_key_vec();
135                         for(u=0;u<tmp_ufl.size();++u)
136                                 ufcns.insert(tmp_ufl[u]);
137                 }
138
139                 type=t; name=n; function=f;
140                 base_table = "";
141         };
142
143         field_entry(std::string t, std::string n, std::string f, param_list *plist, const std::set<std::string> &ulist){
144                 if(plist == NULL)
145                         mod_list = new param_list();
146                 else
147                         mod_list = plist;
148                 ufcns = ulist;
149                 type=t; name=n; function=f;
150                 base_table = "";
151         };
152
153         field_entry(std::string n, std::string t){
154                 name = n;
155                 type = t;
156                 mod_list = new param_list();
157         }
158
159         void add_unpack_fcns(param_list *ufl){
160                 std::vector<std::string> new_ufl = ufl->get_key_vec();
161                 int u;
162                 for(u=0;u<new_ufl.size();++u)
163                         ufcns.insert(new_ufl[u]);
164         }
165
166
167         param_list *get_modifier_list(){return mod_list;        };
168         std::string get_type(){return(type);};
169         std::string get_name(){return(name);};
170         std::string get_fcn(){return(function);};
171         std::set<std::string> get_unpack_fcns(){
172                 return ufcns;
173         }
174
175         void set_basetable(std::string b){base_table=b;};
176         std::string get_basetable(){return base_table;};
177
178         std::string to_string();
179
180         int delete_modifier(std::string k){
181                 return mod_list->delete_key(k);
182         }
183         void add_modifier(const char *k, const char *v){
184                 mod_list->append(k,v);
185         }
186         void add_modifier(const char *k){
187                 mod_list->append(k);
188         }
189
190 };
191
192
193 //              list of fields.  An intermediate parse structure.
194 //              it gets loaded into table_def.fields
195
196 class field_entry_list{
197 private:
198         std::vector<field_entry *> fl;
199
200 public:
201         field_entry_list(){};
202
203         field_entry_list(field_entry *f){
204                 fl.push_back(f);
205         };
206
207         field_entry_list *append_field(field_entry *f){
208                 fl.push_back(f);
209                 return(this);
210         };
211
212         std::vector<field_entry *> get_list(){return fl;        };
213 };
214
215 class subquery_spec{
216 public:
217         std::string name;
218         std::vector<std::string> types;
219         std::vector<std::string> names;
220         std::vector<param_list *> modifiers;
221
222
223         subquery_spec(){}
224
225         subquery_spec(const char *n, name_vec *t){
226                 name = n;
227                 types = t->svec;
228                 names = t->nvec;
229                 modifiers = t->pvec;
230         };
231
232         std::string to_string(){
233                 std::string ret = name+" (";
234                 int i;
235                 for(i=0;i<types.size();++i){
236                         if(i>0) ret+=", ";
237                         ret += types[i] + " " + names[i];
238                         if(modifiers[i]->size() >0){
239                                 ret+=" ("+modifiers[i]->to_string()+") ";
240                         }
241                 }
242                 ret += ") ";
243                 return(ret);
244         };
245
246         subquery_spec *duplicate(){
247                 subquery_spec *ret = new subquery_spec();
248                 ret->name = name;
249                 ret->types = types;
250                 ret->names = names;
251
252                 return ret;
253         }
254
255
256
257 };
258
259 class subqueryspec_list{
260 public:
261         std::vector<subquery_spec *> spec_list;
262
263         subqueryspec_list(subquery_spec *ss){
264                 spec_list.push_back(ss);
265         };
266         subqueryspec_list *append(subquery_spec *ss){
267                 spec_list.push_back(ss);
268                 return this;
269         };
270 };
271
272 class unpack_fcn{
273 public:
274         std::string name;
275         std::string fcn;
276         int cost;
277
278         unpack_fcn(const char *n, const char *f, const char *c){
279                 name = n;
280                 fcn = f;
281                 cost = atoi(c);
282         };
283 };
284
285 class unpack_fcn_list{
286 public:
287         std::vector<unpack_fcn *> ufcn_v;
288
289         unpack_fcn_list(unpack_fcn *u){
290                 ufcn_v.push_back(u);
291         };
292
293         unpack_fcn_list *append(unpack_fcn *u){
294                 ufcn_v.push_back(u);
295                 return this;
296         };
297 };
298
299
300
301
302 //              forward definition, needed for table_def
303 class table_exp_t;
304 struct query_list_t;
305
306 /* ============================================
307                 The schema can support several different
308                 flavors of table.
309                         PROTOCOL : the base data that an FTA can retrieve.
310                         STREAM : Data created by an FTA or a stream operator.
311                 More to come.  Perhaps this is better handled by
312                 annotations in the schema def.
313    ============================================= */
314
315 #define PROTOCOL_SCHEMA 1
316 #define STREAM_SCHEMA 2
317 #define OPERATOR_VIEW_SCHEMA 3
318 #define UNPACK_FCNS_SCHEMA 4
319
320 //                      Represent a STREAM, PROTOCOL, OPERATOR_VIEW, or UNPACK_FCN list.
321
322 class table_def{
323 private:
324         std::string table_name;
325         std::vector<field_entry *> fields;
326         param_list *base_tables;        // if PROTOCOL, the PROTOCOLS that
327                                                                 // this PROTOCOL inherits fields from.
328         int schema_type;        // STREAM_SCHEMA, PROTOCOL_SCHEMA, OPERATOR_VIEW_SCHEMA
329         int schema_id;          // the id associated with the protocol
330         std::set<int> all_schema_ids;   // schema_id plus the inherited ones
331 //              For operator_view tables
332         param_list *op_properties;
333         std::vector<subquery_spec *> qspec_list;
334         param_list *selpush;
335
336 public:
337 //              for unpacking function group specs.
338         std::vector<unpack_fcn *> ufcn_list;
339
340
341
342 //              Unpack functions defined at the PROTOCOL level are added to
343 //              PROTOCOL fields here ... implying that ony those fields
344 //              explicitly defined in the PROTOCOL (as opposed to inherited)
345 //              get the PROTOCOL-wide unpack functions.
346         table_def(const char *name, param_list *plist, param_list *ufcn_l, field_entry_list *fel, int sch_t){
347         int f;
348                 schema_id = -1;
349                         base_tables = new param_list();
350                 if(plist != NULL){
351                         std::vector<std::string> pkeys = plist->get_key_vec();
352                         for(int p=0;p<pkeys.size();++p){
353                                 std::string val = plist->val_of(pkeys[p]);
354                                 if(val!=""){
355                                         if(pkeys[p] == "schema_id" || pkeys[p] == "schemaId"){
356                                                 schema_id = atoi(val.c_str());
357                                                 if(schema_id <= 0){
358                                                         fprintf(stderr,"Error, Protocol %s has a schema_id value of %d, must be larger than 0.\n",name, schema_id);
359                                                         exit(1);
360                                                 }
361                                                 all_schema_ids.insert(schema_id);
362                                         }
363                                 }else{
364                                         base_tables->append(pkeys[p]);
365                                 }
366                         }
367                 }
368                 table_name =name;
369                 fields = fel->get_list();
370                 schema_type = sch_t;
371
372 //                      fields inherit table-level unpacking functions, if any.
373                 if(ufcn_l){
374                         for(f=0;f<fields.size();++f)
375                                 fields[f]->add_unpack_fcns(ufcn_l);
376                 }
377
378                 op_properties = new param_list();
379                 selpush = new param_list();
380         };
381
382         table_def(const char *name, param_list *oprop, field_entry_list *fel,
383                                 subqueryspec_list *ql, param_list *selp);
384
385         table_def(unpack_fcn_list *ufcn_l){
386                 schema_type = UNPACK_FCNS_SCHEMA;
387                 ufcn_list = ufcn_l->ufcn_v;
388         }
389
390         table_def(){};
391
392     table_def *make_shallow_copy(std::string n);
393
394         void mangle_subq_names(std::string mngl);
395
396         std::string get_tbl_name(){return table_name;   };
397         std::vector<field_entry *> get_fields(){return(fields); };
398
399         field_entry *get_field(int i){
400                         if(i>=0 && i<fields.size()) return(fields[i]);
401                         return NULL;
402         };
403
404         std::string get_field_name(int i){
405                         if(i>=0 && i<fields.size()) return(fields[i]->get_name());
406                         return "";
407         };
408
409         bool contains_field(std::string f);
410         bool contains_field(int f);
411
412         int get_field_idx(std::string f);
413         std::string get_type_name(std::string f);
414         param_list *get_modifier_list(std::string f);
415         std::string get_fcn(std::string f);
416
417         std::string get_op_prop(std::string s){
418                 return op_properties->val_of(s);
419         };
420
421 //              Used in generating the LFTA prefilter
422         std::string get_field_basetable(std::string f);
423
424
425         int verify_no_duplicates(std::string &err);
426         int verify_access_fcns(std::string &err);
427
428
429         std::vector<std::string> get_pred_tbls(){
430                 return base_tables->get_key_vec() ;
431         };
432
433         int add_field(field_entry *fe);
434
435         int get_schema_type(){return schema_type;};
436
437         int get_schema_id(){return schema_id;};
438
439         std::set<int> get_all_schema_ids(){ return all_schema_ids;}
440         void add_to_all_schema_ids(int sid){
441                 all_schema_ids.insert(sid);
442         }
443
444
445         std::vector<subquery_spec *> get_subqueryspecs(){return qspec_list;};
446
447         std::string to_string();
448         std::string to_stream_string(){
449                 int tmp_sch = schema_type;
450                 schema_type = STREAM_SCHEMA;
451                 std::string ret = this->to_string();
452                 schema_type = tmp_sch;
453                 return ret;
454         }
455 };
456
457
458 //              A Schema -- a collection of stream layout definitions.
459
460 class table_list{
461 private:
462         std::vector<table_def *> tbl_list;
463         //              for an unpack_fcn_list, collect from the set of
464         //              UNPACK_FCNS_SCHEMA in the table list.
465                 std::map<std::string, std::string> ufcn_fcn;
466                 std::map<std::string, int> ufcn_cost;
467
468
469 public:
470         table_list(table_def *td){tbl_list.push_back(td);       };
471         table_list(){};
472
473         table_list *append_table(table_def *td){
474                 tbl_list.push_back(td);
475                 return(this);
476         };
477
478         int add_table(table_def *td);
479         table_def *get_table(int t){
480                 if(t<0 || t>tbl_list.size()) return(NULL);
481                 return(tbl_list[t]);
482         };
483
484         int add_duplicate_table(std::string src, std::string dest){
485                 int src_pos = this->find_tbl(src);
486                 if(src_pos<0)
487                         return src_pos;
488                 table_def *dest_tbl = tbl_list[src_pos]->make_shallow_copy(dest);
489                 tbl_list.push_back(dest_tbl);
490                 return tbl_list.size()-1;
491         }
492
493         void mangle_subq_names(int pos, std::string mngl){
494                 tbl_list[pos]->mangle_subq_names(mngl);
495         }
496
497
498         int size(){return tbl_list.size();};
499
500 /////////////
501 //              Accessor methods : get table and field info without
502 //              descending into the underlying data structures.
503 //              Can specify a table by name (string), or by index (int)
504 //              (e.g. returned by get_table_ref)
505
506         int get_ufcn_cost(std::string fname){
507                 if(ufcn_cost.count(fname))
508                         return ufcn_cost[fname];
509                 else
510                         return -1;
511         }
512         std::string get_ufcn_fcn(std::string fname){
513                 if(ufcn_fcn.count(fname))
514                         return ufcn_fcn[fname];
515                 else
516                         return "ERROR_ufcn_fcn_of_"+fname+"_not_found";
517         }
518
519         std::string get_table_name(int i){
520                 if(i>tbl_list.size()) return("");
521                 else return tbl_list[i]->get_tbl_name();
522         };
523         std::vector<std::string> get_table_names();
524
525         std::vector<field_entry *> get_fields(std::string t);
526         field_entry *get_field(std::string t, int i);
527         field_entry *get_field(int t, std::string f){
528                 return tbl_list[t]->get_field(tbl_list[t]->get_field_idx(f));
529         }
530         int get_field_idx(std::string t, std::string f);
531
532         int find_tbl(std::string t);
533
534         std::vector<int> get_tblref_of_field(std::string f);
535
536         int get_table_ref(std::string t);
537
538         std::string get_type_name(int t, std::string f){
539                 return(tbl_list[t]->get_type_name(f));
540         };
541
542         param_list *get_modifier_list(int t, std::string f){
543                 return(tbl_list[t]->get_modifier_list(f));
544         };
545
546         std::string get_fcn(int t, std::string f){
547                 return(tbl_list[t]->get_fcn(f));
548         };
549
550         int get_schema_type(int t){
551                 return(tbl_list[t]->get_schema_type());
552         };
553
554         int get_schema_id(int t){
555                 return(tbl_list[t]->get_schema_id());
556         };
557
558         std::string get_op_prop(int t, std::string s){
559                 return(tbl_list[t]->get_op_prop(s));
560         };
561
562         std::vector<subquery_spec *> get_subqueryspecs(int t){
563                 return tbl_list[t]->get_subqueryspecs();
564         };
565
566
567 //              Used in generating the LFTA prefilter
568         std::string get_basetbl_name(int t, std::string f){
569                 return(tbl_list[t]->get_field_basetable(f));
570         };
571
572         bool contains_field(int t, std::string f){
573                 return(tbl_list[t]->contains_field(f));
574         };
575
576
577 //////////////
578 //              Additional methods
579
580 //                      Process field inheritance for PROTOCOL tables.
581         int unroll_tables(std::string &err);
582
583         std::string to_string();
584 };
585 #endif