Add new udafs and RMR support to gsprintconsole_ves
[com/gs-lite.git] / src / ftacmp / parse_schema.h
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15 #ifndef __SCHEMA_DEF_H_INCLUDED__
16 #define __SCHEMA_DEF_H_INCLUDED__
17
18 #include <string>
19 #include <vector>
20 #include <map>
21 #include<set>
22
23 #include <string.h>
24 #include <stdlib.h>
25
26
27 //              A param_list is used to represent a list of
28 //              parameters with optional values.
29
30 class param_list{
31 private:
32         std::map< std::string, std::string > pmap;
33
34 public:
35         param_list(){};
36         param_list(const char *key){
37                 pmap[key]="";
38         };
39         param_list(const char *key, const char *val){
40                 pmap[key]=val;
41         };
42
43         param_list *append(const char *key){
44                 pmap[key]="";
45                 return(this);
46         };
47         param_list *append(const char *key, const char *val){
48                 pmap[key]=val;
49                 return(this);
50         };
51         param_list *append( std::string key){
52                 pmap[key]="";
53                 return(this);
54         };
55
56         int size(){return pmap.size();};
57
58
59         bool contains_key(std::string key){
60                 return(pmap.count(key)>0);
61         }
62
63         int delete_key(std::string k){
64                 return pmap.erase(k);
65         }
66
67         std::string val_of(std::string key){
68                 if(pmap.count(key)>0)
69                         return(pmap[key]);
70                 return(std::string(""));
71         }
72
73         std::vector<std::string> get_key_vec(){
74                 std::vector<std::string> retval;
75                 std::map<std::string, std::string>::iterator mssi;
76                 for(mssi=pmap.begin();mssi!=pmap.end();++mssi){
77                         retval.push_back( (*mssi).first );
78                 }
79                 return(retval);
80         }
81
82         std::string to_string();
83 };
84
85
86 //              list of names, order matters.
87 class name_vec{
88 public:
89         std::vector<std::string> svec;
90         std::vector<std::string> nvec;
91         std::vector<param_list *> pvec;
92
93         name_vec(char *c, char *n, param_list *p){
94                 svec.push_back(c);
95                 nvec.push_back(n);
96                 if(p) pvec.push_back(p);
97                 else pvec.push_back(new param_list());
98         };
99
100         name_vec *append(char *c, char *n, param_list *p){
101                 svec.push_back(c);
102                 nvec.push_back(n);
103                 if(p) pvec.push_back(p);
104                 else pvec.push_back(new param_list());
105                 return this;
106         };
107 };
108
109
110
111 //                      A field in a STREAM or PROTOCOL
112
113
114 class field_entry{
115 private:
116         std::string type;                       // data type
117         std::string name;                       // name in a query
118         std::string function;           // access function, if any (PROTOCOL only).
119         param_list *mod_list;           // special properties.
120         std::set<std::string> ufcns;    // unpacking functions, if any.
121
122         std::string base_table;         // for hierarchically structured data sources,
123                                                                 // the bast table where the field is defined.
124                                                                 // mostly used for computing the LFTA prefilter.
125 public:
126
127         field_entry(const char *t, const char *n, const char *f, param_list *plist, param_list *ulist){
128                 if(plist == NULL)
129                         mod_list = new param_list();
130                 else
131                         mod_list = plist;
132                 if(ulist){
133                         int u;
134                         std::vector<std::string> tmp_ufl = ulist->get_key_vec();
135                         for(u=0;u<tmp_ufl.size();++u)
136                                 ufcns.insert(tmp_ufl[u]);
137                 }
138
139                 type=t; name=n; function=f;
140                 base_table = "";
141         };
142
143         field_entry(std::string t, std::string n, std::string f, param_list *plist, const std::set<std::string> &ulist){
144                 if(plist == NULL)
145                         mod_list = new param_list();
146                 else
147                         mod_list = plist;
148                 ufcns = ulist;
149                 type=t; name=n; function=f;
150                 base_table = "";
151         };
152
153         field_entry(std::string n, std::string t){
154                 name = n;
155                 type = t;
156                 mod_list = new param_list();
157         }
158
159         void add_unpack_fcns(param_list *ufl){
160                 std::vector<std::string> new_ufl = ufl->get_key_vec();
161                 int u;
162                 for(u=0;u<new_ufl.size();++u)
163                         ufcns.insert(new_ufl[u]);
164         }
165
166
167         param_list *get_modifier_list(){return mod_list;        };
168         std::string get_type(){return(type);};
169         std::string get_name(){return(name);};
170         std::string get_fcn(){return(function);};
171         std::set<std::string> get_unpack_fcns(){
172                 return ufcns;
173         }
174
175         void set_basetable(std::string b){base_table=b;};
176         std::string get_basetable(){return base_table;};
177
178         std::string to_string();
179
180         int delete_modifier(std::string k){
181                 return mod_list->delete_key(k);
182         }
183         void add_modifier(const char *k, const char *v){
184                 mod_list->append(k,v);
185         }
186         void add_modifier(const char *k){
187                 mod_list->append(k);
188         }
189
190 };
191
192
193 //              list of fields.  An intermediate parse structure.
194 //              it gets loaded into table_def.fields
195
196 class field_entry_list{
197 private:
198         std::vector<field_entry *> fl;
199
200 public:
201         field_entry_list(){};
202
203         field_entry_list(field_entry *f){
204                 fl.push_back(f);
205         };
206
207         field_entry_list *append_field(field_entry *f){
208                 fl.push_back(f);
209                 return(this);
210         };
211
212         std::vector<field_entry *> get_list(){return fl;        };
213 };
214
215 class subquery_spec{
216 public:
217         std::string name;
218         std::vector<std::string> types;
219         std::vector<std::string> names;
220         std::vector<param_list *> modifiers;
221
222
223         subquery_spec(){}
224
225         subquery_spec(const char *n, name_vec *t){
226                 name = n;
227                 types = t->svec;
228                 names = t->nvec;
229                 modifiers = t->pvec;
230         };
231
232         std::string to_string(){
233                 std::string ret = name+" (";
234                 int i;
235                 for(i=0;i<types.size();++i){
236                         if(i>0) ret+=", ";
237                         ret += types[i] + " " + names[i];
238                         if(modifiers[i]->size() >0){
239                                 ret+=" ("+modifiers[i]->to_string()+") ";
240                         }
241                 }
242                 ret += ") ";
243                 return(ret);
244         };
245
246         subquery_spec *duplicate(){
247                 subquery_spec *ret = new subquery_spec();
248                 ret->name = name;
249                 ret->types = types;
250                 ret->names = names;
251
252                 return ret;
253         }
254
255
256
257 };
258
259 class subqueryspec_list{
260 public:
261         std::vector<subquery_spec *> spec_list;
262
263         subqueryspec_list(subquery_spec *ss){
264                 spec_list.push_back(ss);
265         };
266         subqueryspec_list *append(subquery_spec *ss){
267                 spec_list.push_back(ss);
268                 return this;
269         };
270 };
271
272 class unpack_fcn{
273 public:
274         std::string name;
275         std::string fcn;
276         int cost;
277
278         unpack_fcn(const char *n, const char *f, const char *c){
279                 name = n;
280                 fcn = f;
281                 cost = atoi(c);
282         };
283 };
284
285 class unpack_fcn_list{
286 public:
287         std::vector<unpack_fcn *> ufcn_v;
288
289         unpack_fcn_list(unpack_fcn *u){
290                 ufcn_v.push_back(u);
291         };
292
293         unpack_fcn_list *append(unpack_fcn *u){
294                 ufcn_v.push_back(u);
295                 return this;
296         };
297 };
298
299
300
301
302 //              forward definition, needed for table_def
303 class table_exp_t;
304 struct query_list_t;
305
306 /* ============================================
307                 The schema can support several different
308                 flavors of table.
309                         PROTOCOL : the base data that an FTA can retrieve.
310                         STREAM : Data created by an FTA or a stream operator.
311                 More to come.  Perhaps this is better handled by
312                 annotations in the schema def.
313    ============================================= */
314
315 #define PROTOCOL_SCHEMA 1
316 #define STREAM_SCHEMA 2
317 #define OPERATOR_VIEW_SCHEMA 3
318 #define UNPACK_FCNS_SCHEMA 4
319 #define WATCHLIST_SCHEMA 5
320
321
322 //                      Represent a STREAM, PROTOCOL, OPERATOR_VIEW, or UNPACK_FCN list.
323
324 class table_def{
325 private:
326         std::string table_name;
327         std::vector<field_entry *> fields;
328         param_list *base_tables;        // if PROTOCOL, the PROTOCOLS that
329                                                                 // this PROTOCOL inherits fields from.
330         int schema_type;        // STREAM_SCHEMA, PROTOCOL_SCHEMA, OPERATOR_VIEW_SCHEMA
331         int schema_id;          // the id associated with the protocol
332         std::set<int> all_schema_ids;   // schema_id plus the inherited ones
333 //              For operator_view tables
334         param_list *op_properties;
335         std::vector<subquery_spec *> qspec_list;
336         param_list *selpush;
337         std::vector<std::string> key_flds;      // keys of a watchlist
338
339 public:
340 //              for unpacking function group specs.
341         std::vector<unpack_fcn *> ufcn_list;
342
343
344
345 //              Unpack functions defined at the PROTOCOL level are added to
346 //              PROTOCOL fields here ... implying that ony those fields
347 //              explicitly defined in the PROTOCOL (as opposed to inherited)
348 //              get the PROTOCOL-wide unpack functions.
349         table_def(const char *name, param_list *plist, param_list *ufcn_l, field_entry_list *fel, int sch_t){
350         int f;
351                 schema_id = -1;
352                         base_tables = new param_list();
353                 if(plist != NULL){
354                         std::vector<std::string> pkeys = plist->get_key_vec();
355                         for(int p=0;p<pkeys.size();++p){
356                                 std::string val = plist->val_of(pkeys[p]);
357                                 if(val!=""){
358                                         if(pkeys[p] == "schema_id" || pkeys[p] == "schemaId"){
359                                                 schema_id = atoi(val.c_str());
360                                                 if(schema_id <= 0){
361                                                         fprintf(stderr,"Error, Protocol %s has a schema_id value of %d, must be larger than 0.\n",name, schema_id);
362                                                         exit(1);
363                                                 }
364                                                 all_schema_ids.insert(schema_id);
365                                         }
366                                 }else{
367                                         base_tables->append(pkeys[p]);
368                                 }
369                         }
370                 }
371                 table_name =name;
372                 fields = fel->get_list();
373                 schema_type = sch_t;
374
375 //                      fields inherit table-level unpacking functions, if any.
376                 if(ufcn_l){
377                         for(f=0;f<fields.size();++f)
378                                 fields[f]->add_unpack_fcns(ufcn_l);
379                 }
380
381                 op_properties = new param_list();
382                 selpush = new param_list();
383         };
384
385         table_def(const char *name, param_list *oprop, field_entry_list *fel,
386                                 subqueryspec_list *ql, param_list *selp);
387
388         table_def(unpack_fcn_list *ufcn_l){
389                 schema_type = UNPACK_FCNS_SCHEMA;
390                 ufcn_list = ufcn_l->ufcn_v;
391         }
392
393         table_def(){};
394
395     table_def *make_shallow_copy(std::string n);
396
397         void mangle_subq_names(std::string mngl);
398
399         std::string get_tbl_name(){return table_name;   };
400         std::vector<field_entry *> get_fields(){return(fields); };
401
402         field_entry *get_field(int i){
403                         if(i>=0 && i<fields.size()) return(fields[i]);
404                         return NULL;
405         };
406
407         std::string get_field_name(int i){
408                         if(i>=0 && i<fields.size()) return(fields[i]->get_name());
409                         return "";
410         };
411
412         bool contains_field(std::string f);
413         bool contains_field(int f);
414
415         int get_field_idx(std::string f);
416         std::string get_type_name(std::string f);
417         param_list *get_modifier_list(std::string f);
418         std::string get_fcn(std::string f);
419
420         std::string get_op_prop(std::string s){
421                 return op_properties->val_of(s);
422         };
423
424         void set_keys(const std::vector<std::string> &kf){
425                 key_flds = kf;
426         }
427         std::vector<std::string> get_keys(){
428                 return key_flds;
429         }
430
431 //              Used in generating the LFTA prefilter
432         std::string get_field_basetable(std::string f);
433
434
435         int verify_no_duplicates(std::string &err);
436         int verify_access_fcns(std::string &err);
437
438
439         std::vector<std::string> get_pred_tbls(){
440                 return base_tables->get_key_vec() ;
441         };
442
443         int add_field(field_entry *fe);
444
445         int get_schema_type(){return schema_type;};
446
447         int get_schema_id(){return schema_id;};
448
449         std::set<int> get_all_schema_ids(){ return all_schema_ids;}
450         void add_to_all_schema_ids(int sid){
451                 all_schema_ids.insert(sid);
452         }
453
454
455         std::vector<subquery_spec *> get_subqueryspecs(){return qspec_list;};
456
457         std::string to_string();
458         std::string to_stream_string(){
459                 int tmp_sch = schema_type;
460                 schema_type = STREAM_SCHEMA;
461                 std::string ret = this->to_string();
462                 schema_type = tmp_sch;
463                 return ret;
464         }
465
466         bool is_stream(){
467                 return(schema_type == PROTOCOL_SCHEMA || schema_type == STREAM_SCHEMA || schema_type == OPERATOR_VIEW_SCHEMA);
468         }
469 };
470
471
472 //              A Schema -- a collection of stream layout definitions.
473
474 class table_list{
475 private:
476         std::vector<table_def *> tbl_list;
477         //              for an unpack_fcn_list, collect from the set of
478         //              UNPACK_FCNS_SCHEMA in the table list.
479                 std::map<std::string, std::string> ufcn_fcn;
480                 std::map<std::string, int> ufcn_cost;
481
482
483 public:
484         table_list(table_def *td){tbl_list.push_back(td);       };
485         table_list(){};
486
487         table_list *append_table(table_def *td){
488                 tbl_list.push_back(td);
489                 return(this);
490         };
491
492         int add_table(table_def *td);
493         table_def *get_table(int t){
494                 if(t<0 || t>tbl_list.size()) return(NULL);
495                 return(tbl_list[t]);
496         };
497
498         int add_duplicate_table(std::string src, std::string dest){
499                 int src_pos = this->find_tbl(src);
500                 if(src_pos<0)
501                         return src_pos;
502                 table_def *dest_tbl = tbl_list[src_pos]->make_shallow_copy(dest);
503                 tbl_list.push_back(dest_tbl);
504                 return tbl_list.size()-1;
505         }
506
507         void mangle_subq_names(int pos, std::string mngl){
508                 tbl_list[pos]->mangle_subq_names(mngl);
509         }
510
511
512         int size(){return tbl_list.size();};
513
514 /////////////
515 //              Accessor methods : get table and field info without
516 //              descending into the underlying data structures.
517 //              Can specify a table by name (string), or by index (int)
518 //              (e.g. returned by get_table_ref)
519
520         int get_ufcn_cost(std::string fname){
521                 if(ufcn_cost.count(fname))
522                         return ufcn_cost[fname];
523                 else
524                         return -1;
525         }
526         std::string get_ufcn_fcn(std::string fname){
527                 if(ufcn_fcn.count(fname))
528                         return ufcn_fcn[fname];
529                 else
530                         return "ERROR_ufcn_fcn_of_"+fname+"_not_found";
531         }
532
533         std::string get_table_name(int i){
534                 if(i>tbl_list.size()) return("");
535                 else return tbl_list[i]->get_tbl_name();
536         };
537         std::vector<std::string> get_table_names();
538
539         std::vector<field_entry *> get_fields(std::string t);
540         field_entry *get_field(std::string t, int i);
541         field_entry *get_field(int t, std::string f){
542                 return tbl_list[t]->get_field(tbl_list[t]->get_field_idx(f));
543         }
544         int get_field_idx(std::string t, std::string f);
545         int get_field_idx(int t, std::string f){
546                 return tbl_list[t]->get_field_idx(f);
547         }
548
549         int find_tbl(std::string t);
550
551         std::vector<int> get_tblref_of_field(std::string f);
552
553         int get_table_ref(std::string t);
554
555         std::string get_type_name(int t, std::string f){
556                 return(tbl_list[t]->get_type_name(f));
557         };
558
559         param_list *get_modifier_list(int t, std::string f){
560                 return(tbl_list[t]->get_modifier_list(f));
561         };
562
563         std::string get_fcn(int t, std::string f){
564                 return(tbl_list[t]->get_fcn(f));
565         };
566
567         int get_schema_type(int t){
568                 return(tbl_list[t]->get_schema_type());
569         };
570
571         int get_schema_id(int t){
572                 return(tbl_list[t]->get_schema_id());
573         };
574
575         bool is_stream(int t){
576                 return tbl_list[t]->is_stream();
577         }
578
579         std::string get_op_prop(int t, std::string s){
580                 return(tbl_list[t]->get_op_prop(s));
581         };
582
583         std::vector<subquery_spec *> get_subqueryspecs(int t){
584                 return tbl_list[t]->get_subqueryspecs();
585         };
586
587
588 //              Used in generating the LFTA prefilter
589         std::string get_basetbl_name(int t, std::string f){
590                 return(tbl_list[t]->get_field_basetable(f));
591         };
592
593         bool contains_field(int t, std::string f){
594                 return(tbl_list[t]->contains_field(f));
595         };
596
597
598 //////////////
599 //              Additional methods
600
601 //                      Process field inheritance for PROTOCOL tables.
602         int unroll_tables(std::string &err);
603
604         std::string to_string();
605 };
606 #endif