Added quantiling UDAFs
[com/gs-lite.git] / src / ftacmp / parse_schema.h
1 /* ------------------------------------------------\r
2 Copyright 2014 AT&T Intellectual Property\r
3    Licensed under the Apache License, Version 2.0 (the "License");\r
4    you may not use this file except in compliance with the License.\r
5    You may obtain a copy of the License at\r
6 \r
7      http://www.apache.org/licenses/LICENSE-2.0\r
8 \r
9    Unless required by applicable law or agreed to in writing, software\r
10    distributed under the License is distributed on an "AS IS" BASIS,\r
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
12    See the License for the specific language governing permissions and\r
13    limitations under the License.\r
14  ------------------------------------------- */\r
15 #ifndef __SCHEMA_DEF_H_INCLUDED__\r
16 #define __SCHEMA_DEF_H_INCLUDED__\r
17 \r
18 #include <string>\r
19 #include <vector>\r
20 #include <map>\r
21 #include<set>\r
22 \r
23 #include <string.h>\r
24 #include <stdlib.h>\r
25 \r
26 \r
27 //              A param_list is used to represent a list of\r
28 //              parameters with optional values.\r
29 \r
30 class param_list{\r
31 private:\r
32         std::map< std::string, std::string > pmap;\r
33 \r
34 public:\r
35         param_list(){};\r
36         param_list(const char *key){\r
37                 pmap[key]="";\r
38         };\r
39         param_list(const char *key, const char *val){\r
40                 pmap[key]=val;\r
41         };\r
42 \r
43         param_list *append(const char *key){\r
44                 pmap[key]="";\r
45                 return(this);\r
46         };\r
47         param_list *append(const char *key, const char *val){\r
48                 pmap[key]=val;\r
49                 return(this);\r
50         };\r
51         param_list *append( std::string key){\r
52                 pmap[key]="";\r
53                 return(this);\r
54         };\r
55 \r
56         int size(){return pmap.size();};\r
57 \r
58 \r
59         bool contains_key(std::string key){\r
60                 return(pmap.count(key)>0);\r
61         }\r
62 \r
63         int delete_key(std::string k){\r
64                 return pmap.erase(k);\r
65         }\r
66 \r
67         std::string val_of(std::string key){\r
68                 if(pmap.count(key)>0)\r
69                         return(pmap[key]);\r
70                 return(std::string(""));\r
71         }\r
72 \r
73         std::vector<std::string> get_key_vec(){\r
74                 std::vector<std::string> retval;\r
75                 std::map<std::string, std::string>::iterator mssi;\r
76                 for(mssi=pmap.begin();mssi!=pmap.end();++mssi){\r
77                         retval.push_back( (*mssi).first );\r
78                 }\r
79                 return(retval);\r
80         }\r
81 \r
82         std::string to_string();\r
83 };\r
84 \r
85 \r
86 //              list of names, order matters.\r
87 class name_vec{\r
88 public:\r
89         std::vector<std::string> svec;\r
90         std::vector<std::string> nvec;\r
91         std::vector<param_list *> pvec;\r
92 \r
93         name_vec(char *c, char *n, param_list *p){\r
94                 svec.push_back(c);\r
95                 nvec.push_back(n);\r
96                 if(p) pvec.push_back(p);\r
97                 else pvec.push_back(new param_list());\r
98         };\r
99 \r
100         name_vec *append(char *c, char *n, param_list *p){\r
101                 svec.push_back(c);\r
102                 nvec.push_back(n);\r
103                 if(p) pvec.push_back(p);\r
104                 else pvec.push_back(new param_list());\r
105                 return this;\r
106         };\r
107 };\r
108 \r
109 \r
110 \r
111 //                      A field in a STREAM or PROTOCOL\r
112 \r
113 \r
114 class field_entry{\r
115 private:\r
116         std::string type;                       // data type\r
117         std::string name;                       // name in a query\r
118         std::string function;           // access function, if any (PROTOCOL only).\r
119         param_list *mod_list;           // special properties.\r
120         std::set<std::string> ufcns;    // unpacking functions, if any.\r
121 \r
122         std::string base_table;         // for hierarchically structured data sources,\r
123                                                                 // the bast table where the field is defined.\r
124                                                                 // mostly used for computing the LFTA prefilter.\r
125 public:\r
126 \r
127         field_entry(const char *t, const char *n, const char *f, param_list *plist, param_list *ulist){\r
128                 if(plist == NULL)\r
129                         mod_list = new param_list();\r
130                 else\r
131                         mod_list = plist;\r
132                 if(ulist){\r
133                         int u;\r
134                         std::vector<std::string> tmp_ufl = ulist->get_key_vec();\r
135                         for(u=0;u<tmp_ufl.size();++u)\r
136                                 ufcns.insert(tmp_ufl[u]);\r
137                 }\r
138 \r
139                 type=t; name=n; function=f;\r
140                 base_table = "";\r
141         };\r
142 \r
143         field_entry(std::string t, std::string n, std::string f, param_list *plist, const std::set<std::string> &ulist){\r
144                 if(plist == NULL)\r
145                         mod_list = new param_list();\r
146                 else\r
147                         mod_list = plist;\r
148                 ufcns = ulist;\r
149                 type=t; name=n; function=f;\r
150                 base_table = "";\r
151         };\r
152 \r
153         field_entry(std::string n, std::string t){\r
154                 name = n;\r
155                 type = t;\r
156                 mod_list = new param_list();\r
157         }\r
158 \r
159         void add_unpack_fcns(param_list *ufl){\r
160                 std::vector<std::string> new_ufl = ufl->get_key_vec();\r
161                 int u;\r
162                 for(u=0;u<new_ufl.size();++u)\r
163                         ufcns.insert(new_ufl[u]);\r
164         }\r
165 \r
166 \r
167         param_list *get_modifier_list(){return mod_list;        };\r
168         std::string get_type(){return(type);};\r
169         std::string get_name(){return(name);};\r
170         std::string get_fcn(){return(function);};\r
171         std::set<std::string> get_unpack_fcns(){\r
172                 return ufcns;\r
173         }\r
174 \r
175         void set_basetable(std::string b){base_table=b;};\r
176         std::string get_basetable(){return base_table;};\r
177 \r
178         std::string to_string();\r
179 \r
180         int delete_modifier(std::string k){\r
181                 return mod_list->delete_key(k);\r
182         }\r
183         void add_modifier(const char *k, const char *v){\r
184                 mod_list->append(k,v);\r
185         }\r
186         void add_modifier(const char *k){\r
187                 mod_list->append(k);\r
188         }\r
189 \r
190 };\r
191 \r
192 \r
193 //              list of fields.  An intermediate parse structure.\r
194 //              it gets loaded into table_def.fields\r
195 \r
196 class field_entry_list{\r
197 private:\r
198         std::vector<field_entry *> fl;\r
199 \r
200 public:\r
201         field_entry_list(){};\r
202 \r
203         field_entry_list(field_entry *f){\r
204                 fl.push_back(f);\r
205         };\r
206 \r
207         field_entry_list *append_field(field_entry *f){\r
208                 fl.push_back(f);\r
209                 return(this);\r
210         };\r
211 \r
212         std::vector<field_entry *> get_list(){return fl;        };\r
213 };\r
214 \r
215 class subquery_spec{\r
216 public:\r
217         std::string name;\r
218         std::vector<std::string> types;\r
219         std::vector<std::string> names;\r
220         std::vector<param_list *> modifiers;\r
221 \r
222 \r
223         subquery_spec(){}\r
224 \r
225         subquery_spec(const char *n, name_vec *t){\r
226                 name = n;\r
227                 types = t->svec;\r
228                 names = t->nvec;\r
229                 modifiers = t->pvec;\r
230         };\r
231 \r
232         std::string to_string(){\r
233                 std::string ret = name+" (";\r
234                 int i;\r
235                 for(i=0;i<types.size();++i){\r
236                         if(i>0) ret+=", ";\r
237                         ret += types[i] + " " + names[i];\r
238                         if(modifiers[i]->size() >0){\r
239                                 ret+=" ("+modifiers[i]->to_string()+") ";\r
240                         }\r
241                 }\r
242                 ret += ") ";\r
243                 return(ret);\r
244         };\r
245 \r
246         subquery_spec *duplicate(){\r
247                 subquery_spec *ret = new subquery_spec();\r
248                 ret->name = name;\r
249                 ret->types = types;\r
250                 ret->names = names;\r
251 \r
252                 return ret;\r
253         }\r
254 \r
255 \r
256 \r
257 };\r
258 \r
259 class subqueryspec_list{\r
260 public:\r
261         std::vector<subquery_spec *> spec_list;\r
262 \r
263         subqueryspec_list(subquery_spec *ss){\r
264                 spec_list.push_back(ss);\r
265         };\r
266         subqueryspec_list *append(subquery_spec *ss){\r
267                 spec_list.push_back(ss);\r
268                 return this;\r
269         };\r
270 };\r
271 \r
272 class unpack_fcn{\r
273 public:\r
274         std::string name;\r
275         std::string fcn;\r
276         int cost;\r
277 \r
278         unpack_fcn(const char *n, const char *f, const char *c){\r
279                 name = n;\r
280                 fcn = f;\r
281                 cost = atoi(c);\r
282         };\r
283 };\r
284 \r
285 class unpack_fcn_list{\r
286 public:\r
287         std::vector<unpack_fcn *> ufcn_v;\r
288 \r
289         unpack_fcn_list(unpack_fcn *u){\r
290                 ufcn_v.push_back(u);\r
291         };\r
292 \r
293         unpack_fcn_list *append(unpack_fcn *u){\r
294                 ufcn_v.push_back(u);\r
295                 return this;\r
296         };\r
297 };\r
298 \r
299 \r
300 \r
301 \r
302 //              forward definition, needed for table_def\r
303 class table_exp_t;\r
304 struct query_list_t;\r
305 \r
306 /* ============================================\r
307                 The schema can support several different\r
308                 flavors of table.\r
309                         PROTOCOL : the base data that an FTA can retrieve.\r
310                         STREAM : Data created by an FTA or a stream operator.\r
311                 More to come.  Perhaps this is better handled by\r
312                 annotations in the schema def.\r
313    ============================================= */\r
314 \r
315 #define PROTOCOL_SCHEMA 1\r
316 #define STREAM_SCHEMA 2\r
317 #define OPERATOR_VIEW_SCHEMA 3\r
318 #define UNPACK_FCNS_SCHEMA 4\r
319 \r
320 //                      Represent a STREAM, PROTOCOL, OPERATOR_VIEW, or UNPACK_FCN list.\r
321 \r
322 class table_def{\r
323 private:\r
324         std::string table_name;\r
325         std::vector<field_entry *> fields;\r
326         param_list *base_tables;        // if PROTOCOL, the PROTOCOLS that\r
327                                                                 // this PROTOCOL inherits fields from.\r
328         int schema_type;        // STREAM_SCHEMA, PROTOCOL_SCHEMA, OPERATOR_VIEW_SCHEMA\r
329 //              For operator_view tables\r
330         param_list *op_properties;\r
331         std::vector<subquery_spec *> qspec_list;\r
332         param_list *selpush;\r
333 \r
334 public:\r
335 //              for unpacking function group specs.\r
336         std::vector<unpack_fcn *> ufcn_list;\r
337 \r
338 \r
339 \r
340 //              Unpack functions defined at the PROTOCOL level are added to\r
341 //              PROTOCOL fields here ... implying that ony those fields\r
342 //              explicitly defined in the PROTOCOL (as opposed to inherited)\r
343 //              get the PROTOCOL-wide unpack functions.\r
344         table_def(const char *name, param_list *plist, param_list *ufcn_l, field_entry_list *fel, int sch_t){\r
345         int f;\r
346                 if(plist == NULL)\r
347                         base_tables = new param_list();\r
348                 else\r
349                         base_tables = plist;\r
350                 table_name =name;\r
351                 fields = fel->get_list();\r
352                 schema_type = sch_t;\r
353 \r
354 //                      fields inherit table-level unpacking functions, if any.\r
355                 if(ufcn_l){\r
356                         for(f=0;f<fields.size();++f)\r
357                                 fields[f]->add_unpack_fcns(ufcn_l);\r
358                 }\r
359 \r
360                 op_properties = new param_list();\r
361                 selpush = new param_list();\r
362         };\r
363 \r
364         table_def(const char *name, param_list *oprop, field_entry_list *fel,\r
365                                 subqueryspec_list *ql, param_list *selp);\r
366 \r
367         table_def(unpack_fcn_list *ufcn_l){\r
368                 schema_type = UNPACK_FCNS_SCHEMA;\r
369                 ufcn_list = ufcn_l->ufcn_v;\r
370         }\r
371 \r
372         table_def(){};\r
373 \r
374     table_def *make_shallow_copy(std::string n);\r
375 \r
376         void mangle_subq_names(std::string mngl);\r
377 \r
378         std::string get_tbl_name(){return table_name;   };\r
379         std::vector<field_entry *> get_fields(){return(fields); };\r
380 \r
381         field_entry *get_field(int i){\r
382                         if(i>=0 && i<fields.size()) return(fields[i]);\r
383                         return NULL;\r
384         };\r
385 \r
386         std::string get_field_name(int i){\r
387                         if(i>=0 && i<fields.size()) return(fields[i]->get_name());\r
388                         return "";\r
389         };\r
390 \r
391         bool contains_field(std::string f);\r
392         bool contains_field(int f);\r
393 \r
394         int get_field_idx(std::string f);\r
395         std::string get_type_name(std::string f);\r
396         param_list *get_modifier_list(std::string f);\r
397         std::string get_fcn(std::string f);\r
398 \r
399         std::string get_op_prop(std::string s){\r
400                 return op_properties->val_of(s);\r
401         };\r
402 \r
403 //              Used in generating the LFTA prefilter\r
404         std::string get_field_basetable(std::string f);\r
405 \r
406 \r
407         int verify_no_duplicates(std::string &err);\r
408         int verify_access_fcns(std::string &err);\r
409 \r
410 \r
411         std::vector<std::string> get_pred_tbls(){\r
412                 return base_tables->get_key_vec() ;\r
413         };\r
414 \r
415         int add_field(field_entry *fe);\r
416 \r
417         int get_schema_type(){return schema_type;};\r
418 \r
419         std::vector<subquery_spec *> get_subqueryspecs(){return qspec_list;};\r
420 \r
421         std::string to_string();\r
422         std::string to_stream_string(){\r
423                 int tmp_sch = schema_type;\r
424                 schema_type = STREAM_SCHEMA;\r
425                 std::string ret = this->to_string();\r
426                 schema_type = tmp_sch;\r
427                 return ret;\r
428         }\r
429 };\r
430 \r
431 \r
432 //              A Schema -- a collection of stream layout definitions.\r
433 \r
434 class table_list{\r
435 private:\r
436         std::vector<table_def *> tbl_list;\r
437         //              for an unpack_fcn_list, collect from the set of\r
438         //              UNPACK_FCNS_SCHEMA in the table list.\r
439                 std::map<std::string, std::string> ufcn_fcn;\r
440                 std::map<std::string, int> ufcn_cost;\r
441 \r
442 \r
443 public:\r
444         table_list(table_def *td){tbl_list.push_back(td);       };\r
445         table_list(){};\r
446 \r
447         table_list *append_table(table_def *td){\r
448                 tbl_list.push_back(td);\r
449                 return(this);\r
450         };\r
451 \r
452         int add_table(table_def *td);\r
453         table_def *get_table(int t){\r
454                 if(t<0 || t>tbl_list.size()) return(NULL);\r
455                 return(tbl_list[t]);\r
456         };\r
457 \r
458         int add_duplicate_table(std::string src, std::string dest){\r
459                 int src_pos = this->find_tbl(src);\r
460                 if(src_pos<0)\r
461                         return src_pos;\r
462                 table_def *dest_tbl = tbl_list[src_pos]->make_shallow_copy(dest);\r
463                 tbl_list.push_back(dest_tbl);\r
464                 return tbl_list.size()-1;\r
465         }\r
466 \r
467         void mangle_subq_names(int pos, std::string mngl){\r
468                 tbl_list[pos]->mangle_subq_names(mngl);\r
469         }\r
470 \r
471 \r
472         int size(){return tbl_list.size();};\r
473 \r
474 /////////////\r
475 //              Accessor methods : get table and field info without\r
476 //              descending into the underlying data structures.\r
477 //              Can specify a table by name (string), or by index (int)\r
478 //              (e.g. returned by get_table_ref)\r
479 \r
480         int get_ufcn_cost(std::string fname){\r
481                 if(ufcn_cost.count(fname))\r
482                         return ufcn_cost[fname];\r
483                 else\r
484                         return -1;\r
485         }\r
486         std::string get_ufcn_fcn(std::string fname){\r
487                 if(ufcn_fcn.count(fname))\r
488                         return ufcn_fcn[fname];\r
489                 else\r
490                         return "ERROR_ufcn_fcn_of_"+fname+"_not_found";\r
491         }\r
492 \r
493         std::string get_table_name(int i){\r
494                 if(i>tbl_list.size()) return("");\r
495                 else return tbl_list[i]->get_tbl_name();\r
496         };\r
497         std::vector<std::string> get_table_names();\r
498 \r
499         std::vector<field_entry *> get_fields(std::string t);\r
500         field_entry *get_field(std::string t, int i);\r
501         field_entry *get_field(int t, std::string f){\r
502                 return tbl_list[t]->get_field(tbl_list[t]->get_field_idx(f));\r
503         }\r
504         int get_field_idx(std::string t, std::string f);\r
505 \r
506         int find_tbl(std::string t);\r
507 \r
508         std::vector<int> get_tblref_of_field(std::string f);\r
509 \r
510         int get_table_ref(std::string t);\r
511 \r
512         std::string get_type_name(int t, std::string f){\r
513                 return(tbl_list[t]->get_type_name(f));\r
514         };\r
515 \r
516         param_list *get_modifier_list(int t, std::string f){\r
517                 return(tbl_list[t]->get_modifier_list(f));\r
518         };\r
519 \r
520         std::string get_fcn(int t, std::string f){\r
521                 return(tbl_list[t]->get_fcn(f));\r
522         };\r
523 \r
524         int get_schema_type(int t){\r
525                 return(tbl_list[t]->get_schema_type());\r
526         };\r
527 \r
528         std::string get_op_prop(int t, std::string s){\r
529                 return(tbl_list[t]->get_op_prop(s));\r
530         };\r
531 \r
532         std::vector<subquery_spec *> get_subqueryspecs(int t){\r
533                 return tbl_list[t]->get_subqueryspecs();\r
534         };\r
535 \r
536 \r
537 //              Used in generating the LFTA prefilter\r
538         std::string get_basetbl_name(int t, std::string f){\r
539                 return(tbl_list[t]->get_field_basetable(f));\r
540         };\r
541 \r
542         bool contains_field(int t, std::string f){\r
543                 return(tbl_list[t]->contains_field(f));\r
544         };\r
545 \r
546 \r
547 //////////////\r
548 //              Additional methods\r
549 \r
550 //                      Process field inheritance for PROTOCOL tables.\r
551         int unroll_tables(std::string &err);\r
552 \r
553         std::string to_string();\r
554 };\r
555 #endif\r