src/ftacmp/stream_query.cc

   1 /* ------------------------------------------------\r
   2 Copyright 2014 AT&T Intellectual Property\r
   3    Licensed under the Apache License, Version 2.0 (the "License");\r
   4    you may not use this file except in compliance with the License.\r
   5    You may obtain a copy of the License at\r
   6 \r
   7      http://www.apache.org/licenses/LICENSE-2.0\r
   8 \r
   9    Unless required by applicable law or agreed to in writing, software\r
  10    distributed under the License is distributed on an "AS IS" BASIS,\r
  11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
  12    See the License for the specific language governing permissions and\r
  13    limitations under the License.\r
  14  ------------------------------------------- */\r
  15 #include"stream_query.h"\r
  16 #include"generate_utils.h"\r
  17 #include"analyze_fta.h"\r
  18 #include<algorithm>\r
  19 #include<utility>\r
  20 #include <list>\r
  21 \r
  22 static char tmpstr[500];\r
  23 \r
  24 using namespace std;\r
  25 \r
  26 \r
  27 //              Create a query plan from a query node and an existing\r
  28 //              query plan.  Use for lfta queries, the parent query plan provides\r
  29 //              the annotations.\r
  30 stream_query::stream_query(qp_node *qnode, stream_query *parent){\r
  31         query_plan.push_back(qnode);\r
  32         qhead = 0;\r
  33         qtail.push_back(0);\r
  34         attributes = qnode->get_fields();\r
  35         parameters = qnode->get_param_tbl();\r
  36         defines = parent->defines;\r
  37         query_name = qnode->get_node_name();\r
  38 }\r
  39 \r
  40 //              Copy the query plan.\r
  41 stream_query::stream_query(stream_query &src){\r
  42         query_plan = src.query_plan;\r
  43         qhead = src.qhead;\r
  44         qtail = src.qtail;\r
  45         attributes = src.attributes;\r
  46         parameters = src.parameters;\r
  47         defines = src.defines;\r
  48         query_name = src.query_name;\r
  49         gid = src.gid;\r
  50 }\r
  51 \r
  52 \r
  53 //              Create a query plan from an analyzed parse tree.\r
  54 //              Perform analyses to find the output node, input nodes, etc.\r
  55 \r
  56 stream_query::stream_query(query_summary_class *qs,table_list *Schema){\r
  57 //              Generate the query plan nodes from the analyzed parse tree.\r
  58 //              There is only one for now, so just assign the return value\r
  59 //              of create_query_nodes to query_plan\r
  60         error_code = 0;\r
  61         query_plan = create_query_nodes(qs,Schema);\r
  62     int i;\r
  63 if(query_plan.size() == 0){\r
  64   fprintf(stderr,"INTERNAL ERROR, zero-size query plan in stream_query::stream_query\n");\r
  65   exit(1);\r
  66 }\r
  67     for(i=0;i<query_plan.size();++i){\r
  68                 if(query_plan[i]){\r
  69                         if(query_plan[i]->get_error_code() != 0){\r
  70                                 error_code = query_plan[i]->get_error_code();\r
  71                                 err_str +=  query_plan[i]->get_error_str();\r
  72                         }\r
  73                 }\r
  74         }\r
  75         qhead = query_plan.size()-1;\r
  76         gid = -1;\r
  77 \r
  78 }\r
  79 \r
  80 \r
  81 stream_query * stream_query::add_query(query_summary_class *qs,table_list *Schema){\r
  82 //              Add another query block to the query plan\r
  83         error_code = 0;\r
  84         vector<qp_node *> new_nodes = create_query_nodes(qs, Schema);\r
  85         query_plan.insert(query_plan.end(),new_nodes.begin(), new_nodes.end());\r
  86         return this;\r
  87 }\r
  88 \r
  89 stream_query * stream_query::add_query(stream_query &src){\r
  90 //              Add another query block to the query plan\r
  91         error_code = 0;\r
  92         query_plan.insert(query_plan.end(),src.query_plan.begin(), src.query_plan.end());\r
  93         return this;\r
  94 }\r
  95 \r
  96 \r
  97 void stream_query::generate_protocol_se(map<string,stream_query *> &sq_map, table_list *Schema){\r
  98         int i,n;\r
  99 \r
 100 //              Mapping fields to protocol fields requires schema binding.\r
 101 //      First ensure all nodes are in the schema.\r
 102     for(n=0;n<query_plan.size();++n){\r
 103         if(query_plan[n] != NULL){\r
 104             Schema->add_table(query_plan[n]->get_fields());\r
 105         }\r
 106     }\r
 107 //              Now do schema binding\r
 108         for(n=0;n<query_plan.size();++n){\r
 109                 if(query_plan[n] != NULL){\r
 110                         query_plan[n]->bind_to_schema(Schema);\r
 111                 }\r
 112         }\r
 113 \r
 114 //                      create name-to-index map\r
 115         map<string, int> name_to_node;\r
 116         for(n=0;n<query_plan.size();++n){\r
 117                 if(query_plan[n]){\r
 118                         name_to_node[query_plan[n]->get_node_name()] = n;\r
 119                 }\r
 120         }\r
 121 \r
 122 //              Create a list of the nodes to process, in order.\r
 123 //              Search from the root down.\r
 124 //                      ASSUME tree plan.\r
 125 \r
 126         list<int> search_q;\r
 127         list<int> work_list;\r
 128 \r
 129         search_q.push_back(qhead);\r
 130         while(! search_q.empty()){\r
 131                 int the_q = search_q.front();\r
 132                 search_q.pop_front(); work_list.push_front(the_q);\r
 133                 vector<int> the_pred = query_plan[the_q]->get_predecessors();\r
 134                 for(i=0;i<the_pred.size();i++)\r
 135                         search_q.push_back(the_pred[i]);\r
 136         }\r
 137 \r
 138 //              Scan through the work list, from the front,\r
 139 //              gather the qp_node's ref'd, and call the\r
 140 //              protocol se generator.  Pass NULL for\r
 141 //              sources not found - presumably user-defined operator\r
 142 \r
 143         while(! work_list.empty()){\r
 144                 int the_q = work_list.front();\r
 145                 work_list.pop_front();\r
 146                 vector<qp_node *> q_sources;\r
 147                 vector<tablevar_t *> q_input_tbls = query_plan[the_q]->get_input_tbls();\r
 148                 for(i=0;i<q_input_tbls.size();++i){\r
 149                          string itbl_nm = q_input_tbls[i]->get_schema_name();\r
 150                          if(name_to_node.count(itbl_nm)>0){\r
 151                                  q_sources.push_back(query_plan[name_to_node[itbl_nm]]);\r
 152                          }else if(sq_map.count(itbl_nm)>0){\r
 153                                  q_sources.push_back(sq_map[itbl_nm]->get_query_head());\r
 154                          }else{\r
 155                                  q_sources.push_back(NULL);\r
 156                          }\r
 157                 }\r
 158                 query_plan[the_q]->create_protocol_se(q_sources, Schema);\r
 159         }\r
 160 \r
 161 //////////////////////////////////////////////////////////\r
 162 //                      trust but verify\r
 163 \r
 164 \r
 165 /*\r
 166         for(i=0;i<query_plan.size();++i){\r
 167                 if(query_plan[i]){\r
 168                         printf("query node %s, type=%s:\n",query_plan[i]->get_node_name().c_str(),\r
 169                                         query_plan[i]->node_type().c_str());\r
 170                         map<std::string, scalarexp_t *> *pse_map = query_plan[i]->get_protocol_se();\r
 171                         map<std::string, scalarexp_t *>::iterator mssi;\r
 172                         for(mssi=pse_map->begin();mssi!=pse_map->end();++mssi){\r
 173                                 if((*mssi).second)\r
 174                                         printf("\t%s : %s\n",(*mssi).first.c_str(), (*mssi).second->to_string().c_str());\r
 175                                 else\r
 176                                         printf("\t%s : NULL\n",(*mssi).first.c_str());\r
 177                         }\r
 178                         if(query_plan[i]->node_type() == "filter_join" || query_plan[i]->node_type() == "join_eq_hash_qpn"){\r
 179                                 vector<scalarexp_t *> pse_l;\r
 180                                 vector<scalarexp_t *> pse_r;\r
 181                                 if(query_plan[i]->node_type() == "filter_join"){\r
 182                                         pse_l = ((filter_join_qpn *)query_plan[i])->get_hash_l();\r
 183                                         pse_r = ((filter_join_qpn *)query_plan[i])->get_hash_r();\r
 184                                 }\r
 185                                 if(query_plan[i]->node_type() == "join_eq_hash_qpn"){\r
 186                                         pse_l = ((join_eq_hash_qpn *)query_plan[i])->get_hash_l();\r
 187                                         pse_r = ((join_eq_hash_qpn *)query_plan[i])->get_hash_r();\r
 188                                 }\r
 189                                 int p;\r
 190                                 for(p=0;p<pse_l.size();++p){\r
 191                                         if(pse_l[p] != NULL)\r
 192                                                 printf("\t\t%s = ",pse_l[p]->to_string().c_str());\r
 193                                         else\r
 194                                                 printf("\t\tNULL = ");\r
 195                                         if(pse_r[p] != NULL)\r
 196                                                 printf("%s\n",pse_r[p]->to_string().c_str());\r
 197                                         else\r
 198                                                 printf("NULL\n");\r
 199                                 }\r
 200                         }\r
 201                         if(query_plan[i]->node_type() == "sgah_qpn" || query_plan[i]->node_type() == "rsgah_qpn" || query_plan[i]->node_type() == "sgahcwcb_qpn"){\r
 202                                 vector<scalarexp_t *> pseg;\r
 203                 if(query_plan[i]->node_type() == "sgah_qpn")\r
 204                                         pseg = ((sgah_qpn *)query_plan[i])->get_gb_sources();\r
 205                 if(query_plan[i]->node_type() == "rsgah_qpn")\r
 206                                         pseg = ((rsgah_qpn *)query_plan[i])->get_gb_sources();\r
 207                 if(query_plan[i]->node_type() == "sgahcwcb_qpn")\r
 208                                         pseg = ((sgahcwcb_qpn *)query_plan[i])->get_gb_sources();\r
 209                                 int g;\r
 210                                 for(g=0;g<pseg.size();g++){\r
 211                                         if(pseg[g] != NULL)\r
 212                                                 printf("\t\tgb %d = %s\n",g,pseg[g]->to_string().c_str());\r
 213                                         else\r
 214                                                 printf("\t\tgb %d = NULL\n",g);\r
 215                                 }\r
 216                         }\r
 217                 }\r
 218         }\r
 219 */\r
 220 \r
 221 }\r
 222 \r
 223 bool stream_query::generate_linkage(){\r
 224         bool create_failed = false;\r
 225         int n, f,s;\r
 226 \r
 227 //                      Clear any leftover linkages\r
 228         for(n=0;n<query_plan.size();++n){\r
 229                 if(query_plan[n]){\r
 230                         query_plan[n]->clear_predecessors();\r
 231                         query_plan[n]->clear_successors();\r
 232                 }\r
 233         }\r
 234         qtail.clear();\r
 235 \r
 236 //                      create name-to-index map\r
 237         map<string, int> name_to_node;\r
 238         for(n=0;n<query_plan.size();++n){\r
 239                 if(query_plan[n]){\r
 240                         name_to_node[query_plan[n]->get_node_name()] = n;\r
 241                 }\r
 242         }\r
 243 \r
 244 //              Do the 2-way linkage.\r
 245         for(n=0;n<query_plan.size();++n){\r
 246                 if(query_plan[n]){\r
 247                   vector<tablevar_t *> fm  = query_plan[n]->get_input_tbls();\r
 248                   for(f=0;f<fm.size();++f){\r
 249                         string src_tbl = fm[f]->get_schema_name();\r
 250                         if(name_to_node.count(src_tbl)>0){\r
 251                                 int s = name_to_node[src_tbl];\r
 252                                 query_plan[n]->add_predecessor(s);\r
 253                                 query_plan[s]->add_successor(n);\r
 254                         }\r
 255                   }\r
 256                 }\r
 257         }\r
 258 \r
 259 //              Find the head (no successors) and the tails (at least one\r
 260 //              predecessor is external).\r
 261 //              Verify that there is only one head,\r
 262 //              and that all other nodes have one successor (because\r
 263 //               right now I can only handle trees).\r
 264 \r
 265         qhead = -1;             // no head yet found.\r
 266         for(n=0;n<query_plan.size();++n){\r
 267                 if(query_plan[n]){\r
 268                   vector<int> succ = query_plan[n]->get_successors();\r
 269 /*\r
 270                   if(succ.size() > 1){\r
 271                         fprintf(stderr,"ERROR, query node %s supplies data to multiple nodes, but currently only tree query plans are supported:\n",query_plan[n]->get_node_name().c_str());\r
 272                         for(s=0;s<succ.size();++s){\r
 273                                 fprintf(stderr,"%s ",query_plan[succ[s]]->get_node_name().c_str());\r
 274                         }\r
 275                         fprintf(stderr,"\n");\r
 276                         create_failed = true;\r
 277                   }\r
 278 */\r
 279                   if(succ.size() == 0){\r
 280                         if(qhead >= 0){\r
 281                                 fprintf(stderr,"ERROR, query nodes %s and %s both have zero successors.\n",query_plan[n]->get_node_name().c_str(), query_plan[qhead]->get_node_name().c_str());\r
 282                                 create_failed = true;\r
 283                         }else{\r
 284                                 qhead = n;\r
 285                         }\r
 286                   }\r
 287                   if(query_plan[n]->n_predecessors() < query_plan[n]->get_input_tbls().size()){\r
 288                         qtail.push_back(n);\r
 289                   }\r
 290                 }\r
 291         }\r
 292 \r
 293         return create_failed;\r
 294 }\r
 295 \r
 296 //              After the collection of query plan nodes is generated,\r
 297 //              analyze their structure to link them up into a tree (or dag?).\r
 298 //              Verify that the structure is acceptable.\r
 299 //              Do some other analysis and verification tasks (?)\r
 300 //              then gather summar information.\r
 301 int stream_query::generate_plan(table_list *Schema){\r
 302 \r
 303 //              The first thing to do is verify that the query plan\r
 304 //              nodes were successfully created.\r
 305         bool create_failed = false;\r
 306         int n,f,s;\r
 307         for(n=0;n<query_plan.size();++n){\r
 308                 if(query_plan[n]!=NULL && query_plan[n]->get_error_code()){\r
 309                         fprintf(stderr,"%s",query_plan[n]->get_error_str().c_str());\r
 310                         create_failed = true;\r
 311                 }\r
 312         }\r
 313 /*\r
 314 for(n=0;n<query_plan.size();++n){\r
 315 if(query_plan[n] != NULL){\r
 316 string nstr = query_plan[n]->get_node_name();\r
 317 printf("In generate_plan, node %d is %s, reads from:\n\t",n,nstr.c_str());\r
 318 vector<tablevar_t *> inv = query_plan[n]->get_input_tbls();\r
 319 int nn;\r
 320 for(nn=0;nn<inv.size();nn++){\r
 321 printf("%s (%d) ",inv[nn]->to_string().c_str(),inv[nn]->get_schema_ref());\r
 322 }\r
 323 printf("\n");\r
 324 }\r
 325 }\r
 326 */\r
 327 \r
 328         if(create_failed) return(1);\r
 329 \r
 330 //              Here, link up the query nodes, then verify that the\r
 331 //              structure is acceptable (single root, no cycles, no stranded\r
 332 //              nodes, etc.)\r
 333         create_failed = generate_linkage();\r
 334         if(create_failed) return -1;\r
 335 \r
 336 \r
 337 //              Here, do optimizations such as predicate pushing,\r
 338 //              join rearranging, etc.\r
 339 //                      Nothing to do yet.\r
 340 \r
 341 /*\r
 342 for(n=0;n<query_plan.size();++n){\r
 343 if(query_plan[n] != NULL){\r
 344 string nstr = query_plan[n]->get_node_name();\r
 345 printf("B generate_plan, node %d is %s, reads from:\n\t",n,nstr.c_str());\r
 346 vector<tablevar_t *> inv = query_plan[n]->get_input_tbls();\r
 347 int nn;\r
 348 for(nn=0;nn<inv.size();nn++){\r
 349 printf("%s (%d) ",inv[nn]->to_string().c_str(),inv[nn]->get_schema_ref());\r
 350 }\r
 351 printf("\n");\r
 352 }\r
 353 }\r
 354 printf("qhead=%d, qtail = ",qhead);\r
 355 int nn;\r
 356 for(nn=0;nn<qtail.size();++nn)\r
 357 printf("%d ",qtail[nn]);\r
 358 printf("\n");\r
 359 */\r
 360 \r
 361 //              Collect query summaries.  The query is reperesented by its head node.\r
 362         query_name = query_plan[qhead]->get_node_name();\r
 363         attributes = query_plan[qhead]->get_fields();\r
 364 //              TODO: The params and defines require a lot more thought.\r
 365         parameters = query_plan[qhead]->get_param_tbl();\r
 366         defines = query_plan[qhead]->get_definitions();\r
 367 \r
 368         return(0);\r
 369   };\r
 370 \r
 371 void stream_query::add_output_operator(ospec_str *o){\r
 372         output_specs.push_back(o);\r
 373 }\r
 374 \r
 375 \r
 376 void stream_query::get_external_libs(set<string> &libset){\r
 377 \r
 378         int qn,i;\r
 379         for(qn=0;qn<query_plan.size();++qn){\r
 380                 if(query_plan[qn] != NULL){\r
 381                         vector<string> op_libs = query_plan[qn]->external_libs();\r
 382                         for(i=0;i<op_libs.size();++i){\r
 383                                 libset.insert(op_libs[i]);\r
 384                         }\r
 385                 }\r
 386         }\r
 387 \r
 388         for(qn=0;qn<output_operators.size();++qn){\r
 389                 if(output_operators[qn] != NULL){\r
 390                         vector<string> op_libs = output_operators[qn]->external_libs();\r
 391                         for(i=0;i<op_libs.size();++i){\r
 392                                 libset.insert(op_libs[i]);\r
 393                         }\r
 394                 }\r
 395         }\r
 396 }\r
 397 \r
 398 \r
 399 \r
 400 //      Split into LFTA, HFTA components.\r
 401 //              Split this query into LFTA and HFTA queries.\r
 402 //              Four possible outcomes:\r
 403 //              1) the query reads from a protocol, but does not need to\r
 404 //                      split (can be evaluated as an LFTA).\r
 405 //                      The lfta query is the only element in the return vector,\r
 406 //                      and hfta_returned is false.\r
 407 //              2) the query reads from no protocol, and therefore cannot be split.\r
 408 //                      THe hfta query is the only element in the return vector,\r
 409 //                      and hfta_returned is true.\r
 410 //              3) reads from at least one protocol, but cannot be split : failure.\r
 411 //                      return vector is empty, the error conditions are written\r
 412 //                      in err_str and error_code\r
 413 //              4) The query splits into an hfta query and one or more LFTA queries.\r
 414 //                      the return vector has two or more elements, and hfta_returned\r
 415 //                      is true.  The last element is the HFTA.\r
 416 \r
 417 vector<stream_query *> stream_query::split_query(ext_fcn_list *Ext_fcns, table_list *Schema, bool &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){\r
 418         vector<stream_query *> queries;\r
 419         int l,q,s;\r
 420         int qp_hfta;\r
 421 \r
 422         hfta_returned = false;  // assume until proven otherwise\r
 423 \r
 424         for(l=0;l<qtail.size();++l){\r
 425                 int leaf = qtail[l];\r
 426 \r
 427 \r
 428                 vector<qp_node *> qnodes = query_plan[leaf]->split_node_for_fta(Ext_fcns, Schema, qp_hfta, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);\r
 429 \r
 430 \r
 431                 if(qnodes.size() == 0 || query_plan[leaf]->get_error_code()){   // error\r
 432 //printf("error\n");\r
 433                         error_code = query_plan[leaf]->get_error_code();\r
 434                         err_str = query_plan[leaf]->get_error_str();\r
 435                         vector<stream_query *> null_result;\r
 436                         return(null_result);\r
 437                 }\r
 438                 if(qnodes.size() == 1 && qp_hfta){  // nothing happened\r
 439 //printf("no change\n");\r
 440                         query_plan[leaf] = qnodes[0];\r
 441                 }\r
 442                 if(qnodes.size() == 1 && !qp_hfta){     // push to lfta\r
 443 //printf("lfta only\n");\r
 444                         queries.push_back(new stream_query(qnodes[0], this));\r
 445                         vector<int> succ = query_plan[leaf]->get_successors();\r
 446                         for(s=0;s<succ.size();++s){\r
 447                                 query_plan[succ[s]]->remove_predecessor(leaf);\r
 448                         }\r
 449                         query_plan[leaf] = NULL;        // delete it?\r
 450                 }\r
 451                 if(qnodes.size() > 1){  // actual splitting occurred.\r
 452                         if(!qp_hfta){           // internal consistency check.\r
 453                                 error_code = 1;\r
 454                                 err_str = "INTERNAL ERROR: mulitple nodes returned by split_node_for_fta, but none are hfta nodes.\n";\r
 455                                 vector<stream_query *> null_result;\r
 456                                 return(null_result);\r
 457                         }\r
 458 \r
 459                         for(q=0;q<qnodes.size()-qp_hfta;++q){  // process lfta nodes\r
 460 //printf("creating lfta %d (%s)\n",q,qnodes[q]->get_node_name().c_str());\r
 461                                 queries.push_back(new stream_query(qnodes[q], this));\r
 462                         }\r
 463 //                                      Use new hfta node\r
 464                         query_plan[leaf] = qnodes[qnodes.size()-1];\r
 465 //                                      Add in any extra hfta nodes\r
 466                         for(q=qnodes.size()-qp_hfta;q<qnodes.size()-1;++q)\r
 467                                 query_plan.push_back(qnodes[q]);\r
 468                 }\r
 469         }\r
 470 \r
 471     int n_ifprefs = 0;\r
 472         set<string> ifps;\r
 473         for(q=0;q<query_plan.size();++q){\r
 474                 if(query_plan[q] != NULL){\r
 475                          n_ifprefs += query_plan[q]->count_ifp_refs(ifps);\r
 476                          hfta_returned = true;\r
 477                 }\r
 478         }\r
 479 \r
 480         if(n_ifprefs){\r
 481                 set<string>::iterator ssi;\r
 482                 err_str += "ERROR, unresolved interface parameters in HFTA:\n";\r
 483                 for(ssi=ifps.begin();ssi!=ifps.end();++ssi){\r
 484                         err_str += (*ssi)+" ";\r
 485                 }\r
 486                 err_str += "\n";\r
 487                 error_code = 3;\r
 488                 vector<stream_query *> null_result;\r
 489                 return(null_result);\r
 490         }\r
 491 \r
 492 \r
 493         if(hfta_returned){\r
 494                 if(generate_linkage()){\r
 495                         fprintf(stderr,"INTERNAL ERROR, generate_linkage failed in split_query.\n");\r
 496                         exit(1);\r
 497                 }\r
 498                 queries.push_back(this);\r
 499         }\r
 500 \r
 501         return(queries);\r
 502 }\r
 503 \r
 504 \r
 505 \r
 506 vector<table_exp_t *> stream_query::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string silo_nm){\r
 507         vector<table_exp_t *> subqueries;\r
 508         int l,q;\r
 509 \r
 510         string root_name = this->get_output_tabledef()->get_tbl_name();\r
 511 \r
 512 \r
 513         for(l=0;l<qtail.size();++l){\r
 514                 int leaf = qtail[l];\r
 515                 vector<table_exp_t *> new_qnodes = query_plan[leaf]->extract_opview(Schema, qnodes, opviews, root_name, silo_nm);\r
 516 \r
 517                 for(q=0;q<new_qnodes.size();++q){  // process lfta nodes\r
 518                         subqueries.push_back( new_qnodes[q]);\r
 519                 }\r
 520         }\r
 521 \r
 522         return(subqueries);\r
 523 }\r
 524 \r
 525 \r
 526 \r
 527 \r
 528 string stream_query::make_schema(){\r
 529         return make_schema(qhead);\r
 530 }\r
 531 \r
 532 string stream_query::make_schema(int q){\r
 533         string ret="FTA{\n\n";\r
 534 \r
 535         ret += query_plan[q]->get_fields()->to_string();\r
 536 \r
 537         ret += "DEFINE{\n";\r
 538         ret += "\tquery_name '"+query_plan[q]->get_node_name()+"';\n";\r
 539 \r
 540         map<string, string> defs = query_plan[q]->get_definitions();\r
 541         map<string, string>::iterator dfi;\r
 542         for(dfi=defs.begin(); dfi!=defs.end(); ++dfi){\r
 543                 ret += "\t"+ (*dfi).first + " '" + (*dfi).second + "';\n";\r
 544         }\r
 545         ret += "}\n\n";\r
 546 \r
 547         ret += "PARAM{\n";\r
 548         param_table *params = query_plan[q]->get_param_tbl();\r
 549         vector<string> param_names = params->get_param_names();\r
 550         int p;\r
 551         for(p=0;p<param_names.size();p++){\r
 552                 data_type *dt = params->get_data_type( param_names[p] );\r
 553                 ret += "\t" + param_names[p] + " '" + dt->get_type_str() + "';\n";\r
 554         }\r
 555         ret += "}\n";\r
 556 \r
 557         ret += query_plan[q]->to_query_string();\r
 558         ret += "\n}\n\n";\r
 559 \r
 560         return(ret);\r
 561 \r
 562 }\r
 563 \r
 564 string stream_query::collect_refd_ifaces(){\r
 565         string ret="";\r
 566         int q;\r
 567         for(q=0;q<query_plan.size();++q){\r
 568                 if(query_plan[q]){\r
 569                         map<string, string> defs = query_plan[q]->get_definitions();\r
 570                         if(defs.count("_referenced_ifaces")){\r
 571                                 if(ret != "") ret += ",";\r
 572                                 ret += defs["_referenced_ifaces"];\r
 573                         }\r
 574                 }\r
 575         }\r
 576 \r
 577         return ret;\r
 578 }\r
 579 \r
 580 \r
 581 bool stream_query::stream_input_only(table_list *Schema){\r
 582         vector<tablevar_t *> input_tbls = this->get_input_tables();\r
 583         int i;\r
 584         for(i=0;i<input_tbls.size();++i){\r
 585                 int t = Schema->get_table_ref(input_tbls[i]->get_schema_name());\r
 586                 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA) return(false);\r
 587         }\r
 588         return(true);\r
 589 }\r
 590 \r
 591 //              Return input tables.  No duplicate removal performed.\r
 592 vector<tablevar_t *> stream_query::get_input_tables(){\r
 593         vector<tablevar_t *> retval;\r
 594 \r
 595 //                      create name-to-index map\r
 596         int n;\r
 597         map<string, int> name_to_node;\r
 598         for(n=0;n<query_plan.size();++n){\r
 599           if(query_plan[n]){\r
 600                 name_to_node[query_plan[n]->get_node_name()] = n;\r
 601           }\r
 602         }\r
 603 \r
 604         int l;\r
 605         for(l=0;l<qtail.size();++l){\r
 606                 int leaf = qtail[l];\r
 607                 vector<tablevar_t *> tmp_v = query_plan[leaf]->get_input_tbls();\r
 608                 int i;\r
 609                 for(i=0;i<tmp_v.size();++i){\r
 610                         if(name_to_node.count(tmp_v[i]->get_schema_name()) == 0)\r
 611                                 retval.push_back(tmp_v[i]);\r
 612                 }\r
 613         }\r
 614         return(retval);\r
 615 }\r
 616 \r
 617 \r
 618 void stream_query::compute_node_format(int q, vector<int> &nfmt, map<string, int> &op_idx){\r
 619         int netcnt = 0, hostcnt = 0;\r
 620         int i;\r
 621 \r
 622         vector<tablevar_t *> itbls = query_plan[q]->get_input_tbls();\r
 623         for(i=0;i<itbls.size();++i){\r
 624                 string tname = itbls[i]->get_schema_name();\r
 625                 if(op_idx.count(tname)){\r
 626                         int o = op_idx[tname];\r
 627                         if(nfmt[o] == UNKNOWNFORMAT)\r
 628                                 compute_node_format(o,nfmt,op_idx);\r
 629                         if(nfmt[o] == NETFORMAT) netcnt++;\r
 630                         else    hostcnt++;\r
 631                 }else{\r
 632                         netcnt++;\r
 633                 }\r
 634         }\r
 635         if(query_plan[q]->makes_transform()){\r
 636                 nfmt[q] = HOSTFORMAT;\r
 637         }else{\r
 638                 if(hostcnt>0){\r
 639                         nfmt[q] = HOSTFORMAT;\r
 640                 }else{\r
 641                         nfmt[q] = NETFORMAT;\r
 642                 }\r
 643         }\r
 644 //printf("query plan %d (%s) is ",q,query_plan[q]->get_node_name().c_str());\r
 645 //if(nfmt[q] == HOSTFORMAT) printf(" host format.\n");\r
 646 //else printf("net format\n");\r
 647 }\r
 648 \r
 649 \r
 650 string stream_query::generate_hfta(table_list *Schema, ext_fcn_list *Ext_fcns, opview_set &opviews, bool distributed_mode){\r
 651         int schref, ov_ix, i, q, param_sz;\r
 652         bool dag_graph = false;\r
 653 \r
 654 \r
 655 //              Bind the SEs in all query plan nodes to this schema, and\r
 656 //                      Add all tables used by this query to the schema.\r
 657 //                      Question: Will I be polluting the global schema by adding all\r
 658 //                      query node schemas?\r
 659 \r
 660 //              First ensure all nodes are in the schema.\r
 661         int qn;\r
 662         for(qn=0;qn<query_plan.size();++qn){\r
 663                 if(query_plan[qn] != NULL){\r
 664                         Schema->add_table(query_plan[qn]->get_fields());\r
 665                 }\r
 666         }\r
 667 //              Now do binding.\r
 668         for(qn=0;qn<query_plan.size();++qn){\r
 669                 if(query_plan[qn] != NULL){\r
 670                         query_plan[qn]->bind_to_schema(Schema);\r
 671                 }\r
 672         }\r
 673 \r
 674 //              Is it a DAG plan?\r
 675         set<string> qsources;\r
 676         int n;\r
 677         for(n=0;n<query_plan.size();++n){\r
 678           if(query_plan[n]){\r
 679                 vector<tablevar_t *> tmp_v = query_plan[n]->get_input_tbls();\r
 680                 int i;\r
 681                 for(i=0;i<tmp_v.size();++i){\r
 682                         if(qsources.count(tmp_v[i]->get_schema_name()) > 0)\r
 683                                 dag_graph = true;\r
 684                         qsources.insert(tmp_v[i]->get_schema_name());\r
 685                 }\r
 686           }\r
 687         }\r
 688 \r
 689 \r
 690 \r
 691 //              Collect set of tables ref'd in this HFTA\r
 692         set<int> tbl_set;\r
 693         for(qn=0;qn<query_plan.size();++qn){\r
 694           if(query_plan[qn]){\r
 695 //                      get names of the tables\r
 696                 vector<tablevar_t *> input_tbls = query_plan[qn]->get_input_tbls();\r
 697                 vector<tablevar_t *> output_tbls = query_plan[qn]->get_output_tbls();\r
 698 //                      Convert to tblrefs, add to set of ref'd tables\r
 699                 int i;\r
 700                 for(i=0;i<input_tbls.size();i++){\r
 701 //                      int t = Schema->get_table_ref(input_tbls[i]->get_schema_name());\r
 702                         int t = input_tbls[i]->get_schema_ref();\r
 703                         if(t < 0){\r
 704                                 fprintf(stderr,"INTERNAL ERROR in generate_hfta. "\r
 705                                                                 "query plan node %s references input table %s, which is not in schema.\n",\r
 706                                                                 query_name.c_str(), input_tbls[i]->get_schema_name().c_str());\r
 707                                 exit(1);\r
 708                         }\r
 709                         tbl_set.insert(t);\r
 710                 }\r
 711 \r
 712                 for(i=0;i<output_tbls.size();i++){\r
 713                         int t = Schema->get_table_ref(output_tbls[i]->get_schema_name());\r
 714                         if(t < 0){\r
 715                                 fprintf(stderr,"INTERNAL ERROR in generate_hfta."\r
 716                                                                 "query plan node %s references output table %s, which is not in schema.\n",\r
 717                                                                 query_name.c_str(), output_tbls[i]->get_schema_name().c_str());\r
 718                                 exit(1);\r
 719                         }\r
 720                         tbl_set.insert(t);\r
 721                 }\r
 722           }\r
 723         }\r
 724 \r
 725 //                      Collect map of lftas, query nodes\r
 726         map<string, int> op_idx;\r
 727         for(q=0;q<query_plan.size();q++){\r
 728           if(query_plan[q]){\r
 729                 op_idx[query_plan[q]->get_node_name()] = q;\r
 730           }\r
 731         }\r
 732 \r
 733 //              map of input tables must include query id and input\r
 734 //              source (0,1) becuase several queries might reference the same source\r
 735         vector<tablevar_t *> input_tbls = this->get_input_tables();\r
 736         vector<bool> input_tbl_free;\r
 737         for(i=0;i<input_tbls.size();++i){\r
 738                 input_tbl_free.push_back(true);\r
 739         }\r
 740         map<string, int> lfta_idx;\r
 741 //fprintf(stderr,"%d input tables, %d query nodes\n",input_tbls.size(), query_plan.size());\r
 742         for(q=0;q<query_plan.size();q++){\r
 743           if(query_plan[q]){\r
 744                 vector<tablevar_t *> itbls = query_plan[q]->get_input_tbls();\r
 745                 int it;\r
 746                 for(it=0;it<itbls.size();it++){\r
 747                         string tname = itbls[it]->get_schema_name()+"-"+int_to_string(q)+"-"+int_to_string(it);\r
 748                         string src_tblname = itbls[it]->get_schema_name();\r
 749                         bool src_is_external = false;\r
 750                         for(i=0;i<input_tbls.size();++i){\r
 751                                 if(src_tblname == input_tbls[i]->get_schema_name()){\r
 752                                   src_is_external = true;\r
 753                                   if(input_tbl_free[i]){\r
 754                                         lfta_idx[tname] = i;\r
 755                                         input_tbl_free[i] = false;\r
 756 //fprintf(stderr,"Adding %s (src_tblname=%s, q=%d, it=%d) to %d.\n",tname.c_str(), src_tblname.c_str(), q, it, i);\r
 757                                         break;\r
 758                                   }\r
 759                                 }\r
 760                         }\r
 761                         if(i==input_tbls.size() && src_is_external){\r
 762                                 fprintf(stderr,"INTERNAL ERROR in stream_query::generate_hfta, can't find free entry in input_tbls for query %d, intput %d (%s)\n",q,it,src_tblname.c_str());\r
 763                                 exit(1);\r
 764                         }\r
 765                 }\r
 766           }\r
 767         }\r
 768 /*\r
 769         for(i=0;i<input_tbls.size();++i){\r
 770                 string src_tblname = input_tbls[i]->get_schema_name();\r
 771                 lfta_idx[src_tblname] = i;\r
 772         }\r
 773 */\r
 774 \r
 775 //              Compute the output formats of the operators.\r
 776         vector<int> node_fmt(query_plan.size(),UNKNOWNFORMAT);\r
 777         compute_node_format(qhead, node_fmt, op_idx);\r
 778 \r
 779 \r
 780 //              Generate the schema strings for the outputs.\r
 781         string schema_str;\r
 782         for(i=0;i<query_plan.size();++i){\r
 783                 if(i != qhead && query_plan[i]){\r
 784                         string schema_tmpstr = this->make_schema(i);\r
 785                         schema_str += "gs_csp_t node"+int_to_string(i)+"_schema = "+make_C_embedded_string(schema_tmpstr)+";\n";\r
 786                 }\r
 787         }\r
 788 \r
 789         attributes = query_plan[qhead]->get_fields();\r
 790 \r
 791 \r
 792         string schema_tmpstr = this->make_schema();\r
 793         schema_str += "gs_csp_t "+generate_schema_string_name(query_name)+" = "+make_C_embedded_string(schema_tmpstr)+";\n";\r
 794 \r
 795 //              Generate the collection of tuple defs.\r
 796 \r
 797         string tuple_defs = "\n/*\tDefine tuple structures \t*/\n\n";\r
 798         set<int>::iterator si;\r
 799         for(si=tbl_set.begin(); si!=tbl_set.end(); ++si){\r
 800                 tuple_defs += generate_host_tuple_struct( Schema->get_table( (*si) ));\r
 801                 tuple_defs += "\n\n";\r
 802         }\r
 803 \r
 804 //              generate the finalize tuple function\r
 805         string finalize_str = generate_hfta_finalize_tuple(attributes);\r
 806 \r
 807 //              Analyze and make the output operators\r
 808         bool eat_input = false;\r
 809         string src_op = query_name;\r
 810         string pred_op = src_op;\r
 811         int last_op = -1;\r
 812         if(output_specs.size()>0)\r
 813                 eat_input = true;\r
 814         if(n_successors>0)\r
 815                 eat_input = false;      // must create stream output for successor HFTAs.\r
 816         int n_filestreams = 0;\r
 817         for(i=0;i<output_specs.size();++i){\r
 818                 if(output_specs[i]->operator_type == "stream" ){\r
 819                         eat_input = false;\r
 820                 }\r
 821                 if(output_specs[i]->operator_type == "file" || output_specs[i]->operator_type == "zfile" ){\r
 822                         last_op = i;\r
 823                         n_filestreams++;\r
 824                 }\r
 825         }\r
 826         int filestream_id = 0;\r
 827         for(i=0;i<output_specs.size();++i){\r
 828                 if(output_specs[i]->operator_type == "file" || output_specs[i]->operator_type == "zfile"){\r
 829 \r
 830                         int n_fstreams = output_specs[i]->n_partitions / n_parallel;\r
 831                         if(n_fstreams * n_parallel < output_specs[i]->n_partitions){\r
 832                                 n_fstreams++;\r
 833                                 if(n_parallel == 1 || query_name.find("__copy1") != string::npos){\r
 834                                         fprintf(stderr,"WARNING, in query %s, %d streams requested for %s output, but it must be a multiple of the hfta parallelism (%d), increasing number of output streams to %d.\n",query_name.c_str(), output_specs[i]->n_partitions,  output_specs[i]->operator_type.c_str(), n_parallel, n_fstreams*n_parallel);\r
 835                                 }\r
 836                         }\r
 837 //                      output_file_qpn *new_ofq = new output_file_qpn();\r
 838                         string filestream_tag = "";\r
 839                         if(n_filestreams>1){\r
 840                                 filestream_tag = "_fileoutput"+int_to_string(filestream_id);\r
 841                                 filestream_id++;\r
 842                         }\r
 843                         output_file_qpn *new_ofq = new output_file_qpn(pred_op, src_op, filestream_tag, query_plan[qhead]->get_fields(), output_specs[i], (i==last_op ? eat_input : false) );\r
 844 //                      if(n_fstreams > 1){\r
 845                         if(n_fstreams > 0){\r
 846                                 string err_str;\r
 847                                 bool err_ret = new_ofq->set_splitting_params(n_parallel,parallel_idx,n_fstreams,output_specs[i]->partitioning_flds,err_str);\r
 848                                 if(err_ret){\r
 849                                         fprintf(stderr,"%s",err_str.c_str());\r
 850                                         exit(1);\r
 851                                 }\r
 852                         }\r
 853                         output_operators.push_back(new_ofq );\r
 854                         pred_op = output_operators.back()->get_node_name();\r
 855                 }else if(! (output_specs[i]->operator_type == "stream" || output_specs[i]->operator_type == "Stream" || output_specs[i]->operator_type == "STREAM") ){\r
 856                         fprintf(stderr,"WARNING, output operator type %s (on query %s) is not supported, ignoring\n",output_specs[i]->operator_type.c_str(),query_name.c_str() );\r
 857                 }\r
 858         }\r
 859 \r
 860 \r
 861 \r
 862 //              Generate functors for the query nodes.\r
 863 \r
 864         string functor_defs = "\n/*\tFunctor definitions\t*/\n\n";\r
 865         for(qn=0;qn<query_plan.size();++qn){\r
 866                 if(query_plan[qn]!=NULL){\r
 867 //                              Compute whether the input needs a ntoh xform.\r
 868                         vector<bool> needs_xform;\r
 869                         vector<tablevar_t *> itbls = query_plan[qn]->get_input_tbls();\r
 870                         for(i=0;i<itbls.size();++i){\r
 871                                 string tname = itbls[i]->get_schema_name();\r
 872 //                              if(query_plan[qn]->makes_transform()){\r
 873                                         if(op_idx.count(tname)>0){\r
 874                                                 if(node_fmt[ op_idx[tname] ] == NETFORMAT){\r
 875                                                         needs_xform.push_back(true);\r
 876                                                 }else{\r
 877                                                         needs_xform.push_back(false);\r
 878                                                 }\r
 879                                         }else{\r
 880                                                 needs_xform.push_back(true);\r
 881                                         }\r
 882 //                              }else{\r
 883 //                                      if(op_idx.count(tname)>0){\r
 884 //                                              if(node_fmt[qn] != node_fmt[ op_idx[tname] ]){\r
 885 //                                                      needs_xform.push_back(true);\r
 886 //                                              }else{\r
 887 //                                                      needs_xform.push_back(false);\r
 888 //                                              }\r
 889 //                                      }else{\r
 890 //                                              if(node_fmt[qn] == HOSTFORMAT){\r
 891 //                                                      needs_xform.push_back(true);\r
 892 //                                              }else{\r
 893 //                                                      needs_xform.push_back(false);\r
 894 //                                              }\r
 895 //                                      }\r
 896 //                              }\r
 897                         }\r
 898 \r
 899                         functor_defs += query_plan[qn]->generate_functor(Schema, Ext_fcns, needs_xform);\r
 900                 }\r
 901         }\r
 902 \r
 903 //                      Generate output operator functors\r
 904 \r
 905         vector<bool> needs_xform;\r
 906         for(i=0;i<output_operators.size();++i)\r
 907                 functor_defs += output_operators[i]->generate_functor(Schema, Ext_fcns, needs_xform);\r
 908 \r
 909         string ret =\r
 910 "extern \"C\" {\n"\r
 911 "#include <lapp.h>\n"\r
 912 "#include <fta.h>\n"\r
 913 "#include <gshub.h>\n"\r
 914 "#include <stdlib.h>\n"\r
 915 "#include <stdio.h>\n"\r
 916 "#include <limits.h>\n"\r
 917 "}\n"\r
 918 ;\r
 919         if(dag_graph)\r
 920                 ret +=\r
 921 "#define PLAN_DAG\n"\r
 922 ;\r
 923         ret +=\r
 924 "#include <schemaparser.h>\n"\r
 925 "#include<hfta_runtime_library.h>\n"\r
 926 "\n"\r
 927 "#include <host_tuple.h>\n"\r
 928 "#include <hfta.h>\n"\r
 929 "#include <hfta_udaf.h>\n"\r
 930 "#include <hfta_sfun.h>\n"\r
 931 "\n"\r
 932 //"#define MAXSCHEMASZ 16384\n"\r
 933 "#include <stdio.h>\n\n"\r
 934 ;\r
 935 \r
 936 //              Get include file for each of the operators.\r
 937 //              avoid duplicate inserts.\r
 938         set<string> include_fls;\r
 939         for(qn=0;qn<query_plan.size();++qn){\r
 940                 if(query_plan[qn] != NULL)\r
 941                         include_fls.insert(query_plan[qn]->get_include_file());\r
 942         }\r
 943         for(i=0;i<output_operators.size();++i)\r
 944                         include_fls.insert(output_operators[i]->get_include_file());\r
 945         set<string>::iterator ssi;\r
 946         for(ssi=include_fls.begin();ssi!=include_fls.end();++ssi)\r
 947                 ret += (*ssi);\r
 948 \r
 949 //              Add defines for hash functions\r
 950         ret +=\r
 951 "\n"\r
 952 "#define hfta_BOOL_to_hash(x) (x)\n"\r
 953 "#define hfta_USHORT_to_hash(x) (x)\n"\r
 954 "#define hfta_UINT_to_hash(x) (x)\n"\r
 955 "#define hfta_IP_to_hash(x) (x)\n"\r
 956 "#define hfta_IPV6_to_hash(x) ( (x.v[0]) ^ (x.v[1]) ^ (x.v[2]) ^ (x.v[3]))\n"\r
 957 "#define hfta_INT_to_hash(x) (gs_uint32_t)(x)\n"\r
 958 "#define hfta_ULLONG_to_hash(x) ( (( (x) >>32)&0xffffffff) ^ ((x)&0xffffffff) )\n"\r
 959 "#define hfta_LLONG_to_hash(x) ( (( (x) >>32)&0xffffffff) ^ ((x)&0xffffffff) )\n"\r
 960 "#define hfta_FLOAT_to_hash(x) ( (( ((gs_uint64_t)(x)) >>32)&0xffffffff) ^ (((gs_uint64_t)(x))&0xffffffff) )\n"\r
 961 "\n"\r
 962 ;\r
 963 \r
 964 //      ret += "#define SERIOUS_LFTA \""+input_tbls[0]->get_schema_name()+"\"\n";\r
 965         ret += "#define OUTPUT_HFTA \""+query_name+"\"\n\n";\r
 966 \r
 967 //              HACK ALERT: I know for now that all query plans are\r
 968 //              single operator plans, but while SPX and SGAH can use the\r
 969 //              UNOP template, the merge operator must use the MULTOP template.\r
 970 //              WORSE HACK ALERT : merge does not translate its input,\r
 971 //              so don't apply finalize to the output.\r
 972 //              TODO: clean this up.\r
 973 \r
 974 //      string node_type = query_plan[0]->node_type();\r
 975 \r
 976         ret += schema_str;\r
 977         ret += tuple_defs;\r
 978 \r
 979 //                      Need to work on the input, output xform logic.\r
 980 //                      For now, just add it in.\r
 981 //      ret += finalize_str;\r
 982 \r
 983         if(node_fmt[qhead] == NETFORMAT){\r
 984                 ret +=\r
 985 "void finalize_tuple(host_tuple &tup){\n"\r
 986 "return;\n"\r
 987 "}\n"\r
 988 "\n";\r
 989         }else{\r
 990                 ret += finalize_str;\r
 991         }\r
 992 \r
 993         ret += functor_defs;\r
 994 \r
 995 //                      Parameter block management\r
 996 //                      The proper parameter block must be transmitted to each\r
 997 //                      external stream source.\r
 998 //                      There is a 1-1 mapping between the param blocks returned\r
 999 //                      by this list and the registered data sources ...\r
1000 //                      TODO: make this more manageable, but for now\r
1001 //                      there is no parameter block manipulation so I just\r
1002 //                      need to have the same number.\r
1003 \r
1004         ret +=\r
1005 "int get_lfta_params(gs_int32_t sz, void * value,list<param_block>& lst){\n"\r
1006 "               // for now every  lfta receive the full copy of hfta parameters\n"\r
1007 "        struct param_block pb;\n";\r
1008 \r
1009         set<string> lfta_seen;\r
1010         for(i=0;i<input_tbls.size();++i){\r
1011                 string src_tblname = input_tbls[i]->get_schema_name();\r
1012                 if(lfta_seen.count(src_tblname) == 0){\r
1013                   lfta_seen.insert(src_tblname);\r
1014                   schref = input_tbls[i]->get_schema_ref();\r
1015                   if(Schema->get_schema_type(schref) == OPERATOR_VIEW_SCHEMA){\r
1016                         ov_ix = input_tbls[i]->get_opview_idx();\r
1017                         opview_entry *opv = opviews.get_entry(ov_ix);\r
1018                         string op_param = "SUBQ:";\r
1019                         int q;\r
1020                         for(q=0;q<opv->subq_names.size();++q){\r
1021                                 if(q>0) op_param+=",";\r
1022                                 op_param+=opv->subq_names[q];\r
1023                         }\r
1024                         op_param+="\\n";\r
1025                         param_sz = op_param.size()-1;\r
1026 \r
1027                         sprintf(tmpstr,"\t\tpb.block_length = %d;\n",param_sz); ret+=tmpstr;\r
1028                         ret+=\r
1029 "        pb.data = malloc(pb.block_length);\n";\r
1030                         ret+="\t\tmemcpy(pb.data,\""+op_param+"\",pb.block_length);\n"\r
1031 "        lst.push_back(pb);\n\n";\r
1032                   }else{\r
1033                         ret+=\r
1034 "        pb.block_length = sz;\n"\r
1035 "        pb.data = malloc(pb.block_length);\n"\r
1036 "        memcpy(pb.data, value, pb.block_length);\n"\r
1037 "        lst.push_back(pb);\n\n";\r
1038                   }\r
1039                 }\r
1040         }\r
1041 \r
1042         ret +=\r
1043 "        return 0;\n"\r
1044 "}\n"\r
1045 "\n";\r
1046 \r
1047                 ret+=\r
1048 "struct FTA* alloc_hfta (struct FTAID ftaid, gs_uint32_t reusable, gs_int32_t command, gs_int32_t sz, void * value ) {\n"\r
1049 "\n"\r
1050 "       // find the lftas\n"\r
1051 "       list<lfta_info*> *lfta_list = new list<lfta_info*>;\n"\r
1052 "\n"\r
1053 "       FTAID f;\n"\r
1054 "       char schemabuf[MAXSCHEMASZ];\n"\r
1055 "       gs_schemahandle_t schema_handle;\n"\r
1056 "\n";\r
1057 \r
1058 //              Register the input data sources.\r
1059 //              Register a source only once.\r
1060 \r
1061 //      vector<string> ext_reg_txt;\r
1062         map<string, int> input_tbl_srcid;\r
1063         for(i=0;i<input_tbls.size();++i){\r
1064                 string src_tblname = input_tbls[i]->get_schema_name();\r
1065 //                      Use UDOP alias when in distributed mode.\r
1066 //                      the cluster manager will make the translation\r
1067 //                      using infr from qtree.xml\r
1068                 if(distributed_mode && input_tbls[i]->get_udop_alias() != "")\r
1069                         src_tblname = input_tbls[i]->get_udop_alias();\r
1070         if(input_tbl_srcid.count(src_tblname) == 0){\r
1071                 int srcid = input_tbl_srcid.size();\r
1072                 input_tbl_srcid[src_tblname] = srcid;\r
1073                 string tmp_s=\r
1074 "\n     // find "+src_tblname+"\n"\r
1075 "       if (fta_find(\""+src_tblname+"\",1,&f,schemabuf,MAXSCHEMASZ)!=0) {\n"\r
1076 "                           fprintf(stderr,\"HFTA::error:could not find LFTA \\n\");\n"\r
1077 "                           return 0;\n"\r
1078 "       }\n"\r
1079 "       //fprintf(stderr,\"HFTA::FTA found at %u[%u]\\n\",ftamsgid,ftaindex);\n"\r
1080 "\n"\r
1081 "       // parse the schema and get the schema handle\n"\r
1082 "       schema_handle = ftaschema_parse_string(schemabuf);\n"\r
1083 "       lfta_info* inf"+int_to_string(srcid)+" = new lfta_info();\n"\r
1084 "       inf"+int_to_string(srcid)+"->f = f;\n"\r
1085 "       inf"+int_to_string(srcid)+"->fta_name = strdup(\""+src_tblname+"\");\n"\r
1086 "       inf"+int_to_string(srcid)+"->schema = strdup(schemabuf);\n"\r
1087 "       inf"+int_to_string(srcid)+"->schema_handle = schema_handle;\n"\r
1088 "       lfta_list->push_back(inf"+int_to_string(srcid)+");\n\n";\r
1089 //              ext_reg_txt.push_back(tmp_s);\r
1090                 ret += tmp_s;\r
1091           }\r
1092         }\r
1093 \r
1094         ret+="\n";\r
1095         ret += "\tgs_schemahandle_t root_schema_handle = ftaschema_parse_string("+generate_schema_string_name(query_name)+");\n";\r
1096         for(i=0;i<query_plan.size();++i){\r
1097                 if(i != qhead && query_plan[i]){\r
1098                         ret += "\tgs_schemahandle_t op"+int_to_string(i)+"_schema_handle = ftaschema_parse_string(node"+int_to_string(i)+"_schema);\n";\r
1099                 }\r
1100         }\r
1101         ret+="\n";\r
1102 \r
1103 //              Create the operators.\r
1104         int n_basic_ops;\r
1105         for(q=0;q<query_plan.size();q++){\r
1106           if(query_plan[q]){\r
1107                 ret+=\r
1108 "\n"\r
1109 "       // create an instance of operator "+int_to_string(q)+" ("+query_plan[q]->get_node_name()+")     \n";\r
1110 \r
1111 //                      Create parameters for operator construction.\r
1112                 string op_params;\r
1113                 vector<tablevar_t *> itbls = query_plan[q]->get_input_tbls();\r
1114                 string tname = itbls[0]->get_schema_name();\r
1115 //              string li_tname = tname +"-"+int_to_string(q)+"-0";\r
1116 //              if(lfta_idx.count(li_tname)>0)\r
1117                 if(input_tbl_srcid.count(tname)>0){\r
1118 //                      ret += ext_reg_txt[lfta_idx[li_tname]];\r
1119 //                      op_params += "inf"+int_to_string( lfta_idx[li_tname] )+"->schema_handle";\r
1120                         op_params += "inf"+int_to_string( input_tbl_srcid[tname] )+"->schema_handle";\r
1121                 }else if(op_idx.count(tname)>0){\r
1122                         op_params += "op"+int_to_string( op_idx[tname] )+"_schema_handle";\r
1123                 }else{\r
1124                         fprintf(stderr,"INTERNAL ERROR, can't identify input table %s when creating operator (3) %d (%s)\n",tname.c_str(),q,query_plan[q]->get_node_name().c_str());\r
1125                         exit(1);\r
1126                 }\r
1127                 if(itbls.size()>1){\r
1128                         string tname = itbls[1]->get_schema_name();\r
1129 //                      string li_tname = tname +"-"+int_to_string(q)+"-1";\r
1130 //                      if(lfta_idx.count(li_tname)>0)\r
1131                         if(input_tbl_srcid.count(tname)>0){\r
1132 //                              ret += ext_reg_txt[lfta_idx[li_tname]];\r
1133 //                              op_params += ",inf"+int_to_string( lfta_idx[li_tname] )+"->schema_handle";\r
1134                                 op_params += ",inf"+int_to_string( input_tbl_srcid[tname] )+"->schema_handle";\r
1135                         }else if(op_idx.count(tname)>0){\r
1136                                 op_params += ",op"+int_to_string( op_idx[tname] )+"_schema_handle";\r
1137                         }else{\r
1138                                 fprintf(stderr,"INTERNAL ERROR, can't identify input table %s when creating operator (4) %d (%s)\n",tname.c_str(),q,query_plan[q]->get_node_name().c_str());\r
1139                                 exit(1);\r
1140                         }\r
1141                 }\r
1142                 ret += query_plan[q]->generate_operator(q,op_params);\r
1143                 ret +=\r
1144 "       operator_node* node"+int_to_string(q)+" = new operator_node(op"+int_to_string(q)+");\n";\r
1145 \r
1146                 n_basic_ops = q;\r
1147           }\r
1148         }\r
1149         n_basic_ops++;\r
1150 //                      Next for the output operators if any\r
1151         for(i=0;i<output_operators.size();++i){\r
1152                 ret += output_operators[i]->generate_operator(n_basic_ops+i,"root_schema_handle");\r
1153                 ret +=\r
1154 "       operator_node* node"+int_to_string(n_basic_ops+i)+" = new operator_node(op"+int_to_string(n_basic_ops+i)+");\n";\r
1155         }\r
1156 \r
1157 \r
1158 //              Link up operators.\r
1159         for(q=0;q<query_plan.size();++q){\r
1160           if(query_plan[q]){\r
1161 //                      NOTE: this code assume that the operator has at most\r
1162 //                      two inputs.  But the template code also makes\r
1163 //                      this assumption.  Both will need to be changed.\r
1164                 vector<tablevar_t *> itbls = query_plan[q]->get_input_tbls();\r
1165                 string tname = itbls[0]->get_schema_name();\r
1166 //              string li_tname = tname +"-"+int_to_string(q)+"-0";\r
1167 //              if(lfta_idx.count(li_tname)>0)\r
1168                 if(input_tbl_srcid.count(tname)>0){\r
1169 //                      ret += "\tnode"+int_to_string(q)+"->set_left_lfta(inf"+int_to_string( lfta_idx[li_tname] )+");\n";\r
1170                         ret += "\tnode"+int_to_string(q)+"->set_left_lfta(inf"+int_to_string( input_tbl_srcid[tname] )+");\n";\r
1171                 }else if(op_idx.count(tname)>0){\r
1172                         ret += "\tnode"+int_to_string(q)+"->set_left_child_node(node"+int_to_string( op_idx[tname] )+");\n";\r
1173                 }else{\r
1174                         fprintf(stderr,"INTERNAL ERROR, can't identify input table %s when linking operator (1) %d (%s)\n",tname.c_str(),q,query_plan[q]->get_node_name().c_str());\r
1175                         exit(1);\r
1176                 }\r
1177                 if(itbls.size()>1){\r
1178                         string tname = itbls[1]->get_schema_name();\r
1179 //                      string li_tname = tname +"-"+int_to_string(q)+"-1";\r
1180 //                      if(lfta_idx.count(li_tname)>0)\r
1181                         if(input_tbl_srcid.count(tname)>0){\r
1182 //                              ret += "\tnode"+int_to_string(q)+"->set_right_lfta(inf"+int_to_string( lfta_idx[li_tname] )+");\n";\r
1183                                 ret += "\tnode"+int_to_string(q)+"->set_right_lfta(inf"+int_to_string( input_tbl_srcid[tname] )+");\n";\r
1184                         }else if(op_idx.count(tname)>0){\r
1185                                 ret += "\tnode"+int_to_string(q)+"->set_right_child_node(node"+int_to_string( op_idx[tname] )+");\n";\r
1186                         }else{\r
1187                                 fprintf(stderr,"INTERNAL ERROR, can't identify input table %s (%s) when linking operator (2) %d (%s)\n",tname.c_str(), tname.c_str(),q,query_plan[q]->get_node_name().c_str());\r
1188                                 exit(1);\r
1189                         }\r
1190                 }\r
1191           }\r
1192         }\r
1193         for(i=0;i<output_operators.size();++i){\r
1194                 if(i==0)\r
1195                         ret += "\tnode"+int_to_string(n_basic_ops)+"->set_left_child_node(node"+int_to_string( qhead )+");\n";\r
1196                 else\r
1197                         ret += "\tnode"+int_to_string(n_basic_ops+i)+"->set_left_child_node(node"+int_to_string( n_basic_ops+i-1 )+");\n";\r
1198         }\r
1199 \r
1200         // FTA is reusable if reusable option is set explicitly or it doesn't have any parameters\r
1201 \r
1202         bool fta_reusable = false;\r
1203         if (query_plan[qhead]->get_val_of_def("reusable") == "yes" ||\r
1204                 query_plan[qhead]->get_param_tbl()->size() == 0) {\r
1205                 fta_reusable = 1;\r
1206         }\r
1207 \r
1208         int root_node = qhead;\r
1209         if(output_operators.size()>0)\r
1210                 root_node = n_basic_ops+i-1;\r
1211 \r
1212         ret+=\r
1213 "\n"\r
1214 "\n"\r
1215 "       MULTOP_HFTA* ftap = new MULTOP_HFTA(ftaid, OUTPUT_HFTA, command, sz, value, root_schema_handle, node"+int_to_string(root_node)+", lfta_list, " + (fta_reusable ?"true":"false") + ", reusable);\n"\r
1216 "       if(ftap->init_failed()){ delete ftap; return 0;}\n"\r
1217 "       return (FTA*)ftap;\n"\r
1218 "}\n"\r
1219 "\n"\r
1220 "\n";\r
1221 \r
1222 \r
1223         string comm_bufsize = "16*1024*1024";\r
1224         if(defines.count("hfta_comm_buf")>0){\r
1225                 comm_bufsize = defines["hfta_comm_buf"];\r
1226         }\r
1227 \r
1228         ret+=\r
1229 "\n"\r
1230 "int main(int argc, char * argv[]) {\n"\r
1231 "\n"\r
1232 "\n"\r
1233 "   /* parse the arguments */\n"\r
1234 "\n"\r
1235 "    gs_int32_t tip1,tip2,tip3,tip4;\n"\r
1236 "    endpoint gshub;\n"\r
1237 "    gs_sp_t instance_name;\n"\r
1238 "    if (argc<3) {\n"\r
1239 "                gslog(LOG_EMERG,\"Wrong arguments at startup\");\n"\r
1240 "        exit(1);\n"\r
1241 "    }\n"\r
1242 "\n"\r
1243 "    if ((sscanf(argv[1],\"%u.%u.%u.%u:%hu\",&tip1,&tip2,&tip3,&tip4,&(gshub.port)) != 5)) {\n"\r
1244 "        gslog(LOG_EMERG,\"HUB IP NOT DEFINED\");\n"\r
1245 "        exit(1);\n"\r
1246 "    }\n"\r
1247 "    gshub.ip=htonl(tip1<<24|tip2<<16|tip3<<8|tip4);\n"\r
1248 "    gshub.port=htons(gshub.port);\n"\r
1249 "    instance_name=strdup(argv[2]);\n"\r
1250 "    if (set_hub(gshub)!=0) {\n"\r
1251 "        gslog(LOG_EMERG,\"Could not set hub\");\n"\r
1252 "        exit(1);\n"\r
1253 "    }\n"\r
1254 "    if (set_instance_name(instance_name)!=0) {\n"\r
1255 "        gslog(LOG_EMERG,\"Could not set instance name\");\n"\r
1256 "        exit(1);\n"\r
1257 "    }\n"\r
1258 "\n"\r
1259 "\n"\r
1260 "    /* initialize host library  */\n"\r
1261 "\n"\r
1262 //"    fprintf(stderr,\"Initializing gscp\\n\");\n"\r
1263 "    gsopenlog(argv[0]);\n"\r
1264 "\n"\r
1265 "    if (hostlib_init(HFTA, "+comm_bufsize+", DEFAULTDEV, 0, 0)!=0) {\n"\r
1266 "        fprintf(stderr,\"%s::error:could not initialize gscp\\n\",\n"\r
1267 "               argv[0]);\n"\r
1268 "        exit(1);\n"\r
1269 "    }\n"\r
1270 "\n"\r
1271 "\n"\r
1272 "\n"\r
1273 "    FTAID ret = fta_register(OUTPUT_HFTA, " + (fta_reusable?"1":"0") + ", DEFAULTDEV, alloc_hfta, "+generate_schema_string_name(query_name)+", -1, 0ull);\n"\r
1274 "    fta_start_service(-1);\n"\r
1275 "\n"\r
1276 "   return 0;\n"\r
1277 "\n"\r
1278 "}\n"\r
1279 "\n";\r
1280 ////////////////////\r
1281         return(ret);\r
1282 }\r
1283 \r
1284 \r
1285 //              Checks if the node i is compatible with interface partitioning\r
1286 //              (can be pushed below the merge that combines partitioned stream)\r
1287 //              i - index of the query node in a query plan\r
1288 bool stream_query::is_partn_compatible(int index, map<string, int> lfta_names, vector<string> interface_names, vector<string> machine_names, ifq_t *ifaces_db, partn_def_list_t *partn_parse_result) {\r
1289         int i;\r
1290         qp_node* node = query_plan[index];\r
1291         qp_node* child_node = NULL;\r
1292 \r
1293         if (node->predecessors.empty())\r
1294                 return false;\r
1295 \r
1296         // all the node predecessors must be partition merges with the same partition definition\r
1297         partn_def_t* partn_def = NULL;\r
1298         for (i = 0; i < node->predecessors.size(); ++i) {\r
1299                 child_node = query_plan[node->predecessors[i]];\r
1300                 if (child_node->node_type() != "mrg_qpn")\r
1301                         return false;\r
1302 \r
1303                 // merge must have only one parent for this optimization to work\r
1304                 // check that all its successors are the same\r
1305                 for (int j = 1; j < child_node->successors.size(); ++j) {\r
1306                         if (child_node->successors[j] != child_node->successors[0])\r
1307                                 return false;\r
1308                 }\r
1309 \r
1310                 partn_def_t* new_partn_def = ((mrg_qpn*)child_node)->get_partn_definition(lfta_names, interface_names, machine_names, ifaces_db, partn_parse_result);\r
1311                 if (!new_partn_def)\r
1312                         return false;\r
1313                 if (!i)\r
1314                         partn_def = new_partn_def;\r
1315                 else if (new_partn_def != partn_def)\r
1316                         return false;\r
1317 \r
1318         }\r
1319 \r
1320         if (node->node_type() == "spx_qpn")     // spx nodes are always partition compatible\r
1321                 return true;\r
1322         else if (node->node_type() == "sgah_qpn") {\r
1323                 gb_table gb_tbl = ((sgah_qpn*)node)->gb_tbl;\r
1324                 return true; //partn_def->is_compatible(&gb_tbl);\r
1325         }\r
1326         else if (node->node_type() == "rsgah_qpn") {\r
1327                 gb_table gb_tbl = ((rsgah_qpn*)node)->gb_tbl;\r
1328                 return partn_def->is_compatible(&gb_tbl);\r
1329         }\r
1330         else if (node->node_type() == "sgahcwcb_qpn") {\r
1331                 gb_table gb_tbl = ((sgahcwcb_qpn*)node)->gb_tbl;\r
1332                 return partn_def->is_compatible(&gb_tbl);\r
1333         }\r
1334         else if (node->node_type() == "join_eq_hash_qpn") {\r
1335                 return true;\r
1336         }\r
1337         else\r
1338                 return false;\r
1339 }\r
1340 \r
1341 //              Push the operator below the merge that combines\r
1342 void stream_query::pushdown_partn_operator(int index) {\r
1343         qp_node* node = query_plan[index];\r
1344         int i;\r
1345         char node_index[128];\r
1346 \r
1347 //      fprintf(stderr, "Partn pushdown, query %s of type %s\n", node->get_node_name().c_str(), node->node_type().c_str());\r
1348 \r
1349 \r
1350         // HACK ALERT: When reordering merges we screw up slack computation\r
1351         // since slack should no longer be used, it is not an issue\r
1352 \r
1353 \r
1354         // we can safely reorder nodes that have one and only one temporal atribute in select list\r
1355         table_def* table_layout = node->get_fields();\r
1356         vector<field_entry*> fields = table_layout->get_fields();\r
1357         int merge_fieldpos = -1;\r
1358 \r
1359         data_type* dt;\r
1360         for (i = 0; i < fields.size(); ++i) {\r
1361                 data_type dt(fields[i]->get_type(),fields[i]->get_modifier_list());\r
1362                 if(dt.is_temporal()) {\r
1363                         if (merge_fieldpos != -1)       // more that one temporal field found\r
1364                                 return;\r
1365                         merge_fieldpos = i;\r
1366                 }\r
1367         }\r
1368 \r
1369         if (merge_fieldpos == -1)       // if no temporal fieldf found\r
1370                 return;\r
1371 \r
1372         std::vector<colref_t *> mvars;                  // the merge-by columns.\r
1373 \r
1374         // reodring procedure is different for unary operators and joins\r
1375         if (node->node_type() == "join_eq_hash_qpn") {\r
1376                 vector<qp_node*> new_nodes;\r
1377 \r
1378                 tablevar_t *left_table_name;\r
1379                 tablevar_t *right_table_name;\r
1380                 mrg_qpn* left_mrg = (mrg_qpn*)query_plan[node->predecessors[0]];\r
1381                 mrg_qpn* right_mrg = (mrg_qpn*)query_plan[node->predecessors[1]];\r
1382 \r
1383                 // for now we will only consider plans where both child merges\r
1384                 // merge the same set of streams\r
1385 \r
1386                 if (left_mrg->fm.size() != right_mrg->fm.size())\r
1387                         return;\r
1388 \r
1389                 // maping of interface names to table definitions\r
1390                 map<string, tablevar_t*> iface_map;\r
1391                 for (i = 0; i < left_mrg->fm.size(); i++) {\r
1392                         left_table_name = left_mrg->fm[i];\r
1393                         iface_map[left_table_name->get_machine() + left_table_name->get_interface()] = left_table_name;\r
1394                 }\r
1395 \r
1396                 for (i = 0; i < right_mrg->fm.size(); i++) {\r
1397                         right_table_name = right_mrg->fm[i];\r
1398 \r
1399                         // find corresponding left tabke\r
1400                         if (!iface_map.count(right_table_name->get_machine() + right_table_name->get_interface()))\r
1401                                 return;\r
1402 \r
1403                         left_table_name = iface_map[right_table_name->get_machine() + right_table_name->get_interface()];\r
1404 \r
1405                         // create new join nodes\r
1406                         sprintf(node_index, "_%d", i);\r
1407                         join_eq_hash_qpn* new_node = (join_eq_hash_qpn*)node->make_copy(node_index);\r
1408 \r
1409                         // make a copy of right_table_name\r
1410                         right_table_name = right_table_name->duplicate();\r
1411                         left_table_name->set_range_var(((join_eq_hash_qpn*)node)->from[0]->get_var_name());\r
1412                         right_table_name->set_range_var(((join_eq_hash_qpn*)node)->from[1]->get_var_name());\r
1413                         new_node->from[0] = left_table_name;\r
1414                         new_node->from[1] = right_table_name;\r
1415                         new_nodes.push_back(new_node);\r
1416                 }\r
1417 \r
1418                 // make right_mrg a new root\r
1419                 right_mrg->set_node_name(node->get_node_name());\r
1420                 right_mrg->table_layout = table_layout;\r
1421                 right_mrg->merge_fieldpos = merge_fieldpos;\r
1422 \r
1423                 for (i = 0; i < right_mrg->fm.size(); i++) {\r
1424                         // make newly create joins children of merge\r
1425                         sprintf(node_index, "_%d", i);\r
1426                         right_mrg->fm[i] = new tablevar_t(right_mrg->fm[i]->get_machine().c_str(), right_mrg->fm[i]->get_interface().c_str(), (node->get_node_name() + string(node_index)).c_str());\r
1427                         sprintf(node_index, "_m%d", i);\r
1428                         right_mrg->fm[i]->set_range_var(node_index);\r
1429                         right_mrg->mvars[i]->set_field(table_layout->get_field_name(merge_fieldpos));\r
1430 \r
1431                 }\r
1432 \r
1433                 if (left_mrg != right_mrg)\r
1434                         query_plan[node->predecessors[0]] = NULL;       // remove left merge from the plan\r
1435 \r
1436                 query_plan.insert(query_plan.end(), new_nodes.begin(), new_nodes.end());\r
1437 \r
1438         } else {        // unary operator\r
1439                 // get the child merge node\r
1440                 mrg_qpn* child_mrg = (mrg_qpn*)query_plan[node->predecessors[0]];\r
1441 \r
1442                 child_mrg->set_node_name(node->get_node_name());\r
1443                 child_mrg->table_layout = table_layout;\r
1444                 child_mrg->merge_fieldpos = merge_fieldpos;\r
1445 \r
1446                 // create new nodes for every source stream\r
1447                 for (i = 0; i < child_mrg->fm.size(); i++) {\r
1448                         tablevar_t *table_name = child_mrg->fm[i];\r
1449                         sprintf(node_index, "_%d", i);\r
1450                         qp_node* new_node = node->make_copy(node_index);\r
1451 \r
1452                         if (node->node_type() == "spx_qpn")\r
1453                                 ((spx_qpn*)new_node)->table_name = table_name;\r
1454                         else if (node->node_type() == "sgah_qpn")\r
1455                                 ((sgah_qpn*)new_node)->table_name = table_name;\r
1456                         else if (node->node_type() == "rsgah_qpn")\r
1457                                 ((rsgah_qpn*)new_node)->table_name = table_name;\r
1458                         else if (node->node_type() == "sgahcwcb_qpn")\r
1459                                 ((sgahcwcb_qpn*)new_node)->table_name = table_name;\r
1460                         table_name->set_range_var("_t0");\r
1461 \r
1462                         child_mrg->fm[i] = new tablevar_t(table_name->get_machine().c_str(), table_name->get_interface().c_str(), new_node->get_node_name().c_str());\r
1463                         sprintf(node_index, "_m%d", i);\r
1464                         child_mrg->fm[i]->set_range_var(node_index);\r
1465                         child_mrg->mvars[i]->set_field(table_layout->get_field_name(merge_fieldpos));\r
1466 \r
1467                         // add new node to query plan\r
1468                         query_plan.push_back(new_node);\r
1469                 }\r
1470         }\r
1471         query_plan[index] = NULL;\r
1472         generate_linkage();\r
1473 }\r
1474 \r
1475 \r
1476 //              Checks if the node i can be pushed below the merge\r
1477   bool stream_query::is_pushdown_compatible(int index, map<string, int> lfta_names, vector<string> interface_names, vector<string> machine_names) {\r
1478 \r
1479         int i;\r
1480         qp_node* node = query_plan[index];\r
1481         qp_node* child_node = NULL;\r
1482 \r
1483         if (node->predecessors.size() != 1)\r
1484                 return false;\r
1485 \r
1486         // node predecessor must be merge that combine streams from multiple hosts\r
1487         child_node = query_plan[node->predecessors[0]];\r
1488         if (child_node->node_type() != "mrg_qpn")\r
1489                 return false;\r
1490 \r
1491         if (!((mrg_qpn*)child_node)->is_multihost_merge())\r
1492                 return false;\r
1493 \r
1494         // merge must have only one parent for this optimization to work\r
1495         // check that all its successors are the same\r
1496         for (int j = 1; j < child_node->successors.size(); ++j) {\r
1497                 if (child_node->successors[j] != child_node->successors[0])\r
1498                         return false;\r
1499         }\r
1500 \r
1501         // selections can always be pushed down, aggregations can always be split into selection/aggr or aggr/aggr pair\r
1502         // and pushed down\r
1503         if (node->node_type() == "spx_qpn")\r
1504                 return true;\r
1505         else if (node->node_type() == "sgah_qpn")\r
1506                 return true;\r
1507         else\r
1508                 return false;\r
1509 \r
1510   }\r
1511 \r
1512 //              Push the operator below the merge\r
1513   void stream_query::pushdown_operator(int index, ext_fcn_list *Ext_fcns, table_list *Schema) {\r
1514         qp_node* node = query_plan[index];\r
1515         int i;\r
1516         char node_suffix[128];\r
1517 \r
1518         // we can only safely push down queries that have one and only one temporal atribute in select list\r
1519         table_def* table_layout = node->get_fields();\r
1520         vector<field_entry*> fields = table_layout->get_fields();\r
1521         int merge_fieldpos = -1;\r
1522 \r
1523         data_type* dt;\r
1524         for (i = 0; i < fields.size(); ++i) {\r
1525                 data_type dt(fields[i]->get_type(),fields[i]->get_modifier_list());\r
1526                 if(dt.is_temporal()) {\r
1527                         if (merge_fieldpos != -1)       // more that one temporal field found\r
1528                                 return;\r
1529                         merge_fieldpos = i;\r
1530                 }\r
1531         }\r
1532 \r
1533         if (merge_fieldpos == -1)       // if no temporal field found\r
1534                 return;\r
1535 \r
1536         std::vector<colref_t *> mvars;                  // the merge-by columns.\r
1537 \r
1538         fprintf(stderr, "Regular pushdown, query %s of type %s\n", node->get_node_name().c_str(), node->node_type().c_str());\r
1539 \r
1540         // get the child merge node\r
1541         mrg_qpn* child_mrg = (mrg_qpn*)query_plan[node->predecessors[0]];\r
1542 \r
1543         tablevar_t *table_name = NULL;\r
1544 \r
1545 \r
1546         if (node->node_type() == "spx_qpn") {\r
1547                 // get the child merge node\r
1548 \r
1549                 // create new nodes for every source stream\r
1550                 for (i = 0; i < child_mrg->fm.size(); i++) {\r
1551                         table_name = child_mrg->fm[i];\r
1552                         sprintf(node_suffix, "_%d", i);\r
1553                         qp_node* new_node = node->make_copy(node_suffix);\r
1554 \r
1555                         ((spx_qpn*)new_node)->table_name = table_name;\r
1556                         table_name->set_range_var("_t0");\r
1557 \r
1558                         child_mrg->fm[i] = new tablevar_t(table_name->get_machine().c_str(), table_name->get_interface().c_str(), new_node->get_node_name().c_str());\r
1559                         sprintf(node_suffix, "_m%d", i);\r
1560                         child_mrg->fm[i]->set_range_var(node_suffix);\r
1561                         child_mrg->mvars[i]->set_field(table_layout->get_field_name(merge_fieldpos));\r
1562 \r
1563                         // add new node to query plan\r
1564                         query_plan.push_back(new_node);\r
1565                 }\r
1566                 child_mrg->table_layout = table_layout;\r
1567                 child_mrg->merge_fieldpos = merge_fieldpos;\r
1568 \r
1569         } else {                // aggregation node\r
1570 \r
1571                 vector<qp_node*> new_nodes;\r
1572 \r
1573                 // split aggregations into high and low-level part\r
1574                 vector<qp_node *> split_nodes = ((sgah_qpn*)node)->split_node_for_hfta(Ext_fcns, Schema);\r
1575                 if (split_nodes.size() != 2)\r
1576                         return;\r
1577 \r
1578                 sgah_qpn* super_aggr = (sgah_qpn*)split_nodes[1];\r
1579                 super_aggr->table_name = ((sgah_qpn*)node)->table_name;\r
1580 \r
1581                 // group all the sources by host\r
1582                 map<string, vector<int> > host_map;\r
1583                 for (i = 0; i < child_mrg->fm.size(); i++) {\r
1584                         tablevar_t *table_name = child_mrg->fm[i];\r
1585                         if (host_map.count(table_name->get_machine()))\r
1586                                 host_map[table_name->get_machine()].push_back(i);\r
1587                         else {\r
1588                                 vector<int> tables;\r
1589                                 tables.push_back(i);\r
1590                                 host_map[table_name->get_machine()] = tables;\r
1591                         }\r
1592                 }\r
1593 \r
1594                 // create a new merge and low-level aggregation for each host\r
1595                 map<string, vector<int> >::iterator iter;\r
1596                 for (iter = host_map.begin(); iter != host_map.end(); iter++) {\r
1597                         string host_name = (*iter).first;\r
1598                         vector<int> tables = (*iter).second;\r
1599 \r
1600                         sprintf(node_suffix, "_%s", host_name.c_str());\r
1601                         string suffix(node_suffix);\r
1602                         untaboo(suffix);\r
1603                         mrg_qpn *new_mrg = (mrg_qpn *)child_mrg->make_copy(suffix);\r
1604                         for (i = 0; i < tables.size(); ++i) {\r
1605                                 sprintf(node_suffix, "_m%d", i);\r
1606                                 new_mrg->fm.push_back(child_mrg->fm[tables[i]]);\r
1607                                 new_mrg->mvars.push_back(child_mrg->mvars[i]);\r
1608                                 new_mrg->fm[i]->set_range_var(node_suffix);\r
1609                         }\r
1610                         qp_node* new_node = split_nodes[0]->make_copy(suffix);\r
1611 \r
1612                         if (new_node->node_type() == "spx_qpn")  {\r
1613                                 ((spx_qpn*)new_node)->table_name = new tablevar_t(host_name.c_str(), "IFACE", new_mrg->get_node_name().c_str());\r
1614                                 ((spx_qpn*)new_node)->table_name->set_range_var("_t0");\r
1615                         } else {\r
1616                                 ((sgah_qpn*)new_node)->table_name = new tablevar_t(host_name.c_str(), "IFACE", new_mrg->get_node_name().c_str());\r
1617                                 ((sgah_qpn*)new_node)->table_name->set_range_var("_t0");\r
1618                         }\r
1619                         query_plan.push_back(new_mrg);\r
1620                         new_nodes.push_back(new_node);\r
1621                 }\r
1622 \r
1623                 child_mrg->merge_fieldpos = merge_fieldpos;\r
1624                 if (split_nodes[0]->node_type() == "spx_qpn")\r
1625                         child_mrg->table_layout = create_attributes(child_mrg->get_node_name(), ((spx_qpn*)split_nodes[0])->select_list);\r
1626                 else\r
1627                         child_mrg->table_layout = create_attributes(child_mrg->get_node_name(), ((sgah_qpn*)split_nodes[0])->select_list);\r
1628 \r
1629 \r
1630                 // connect newly created nodes with parent multihost merge\r
1631                 for (i = 0; i < new_nodes.size(); ++i) {\r
1632                         if (new_nodes[i]->node_type() == "spx_qpn")\r
1633                                 child_mrg->fm[i] = new tablevar_t(((spx_qpn*)new_nodes[i])->table_name->get_machine().c_str(), "IFACE", new_nodes[i]->get_node_name().c_str());\r
1634                         else {\r
1635                                 child_mrg->fm[i] = new tablevar_t(((sgah_qpn*)new_nodes[i])->table_name->get_machine().c_str(), "IFACE", new_nodes[i]->get_node_name().c_str());\r
1636 \r
1637                         }\r
1638                         child_mrg->mvars[i]->set_field(child_mrg->table_layout->get_field_name(merge_fieldpos));\r
1639 \r
1640                         sprintf(node_suffix, "_m%d", i);\r
1641                         child_mrg->fm[i]->set_range_var(node_suffix);\r
1642                         query_plan.push_back(new_nodes[i]);\r
1643                 }\r
1644                 child_mrg->fm.resize(new_nodes.size());\r
1645                 child_mrg->mvars.resize(new_nodes.size());\r
1646 \r
1647                 // push the new high-level aggregation\r
1648                 query_plan.push_back(super_aggr);\r
1649 \r
1650         }\r
1651         query_plan[index] = NULL;\r
1652         generate_linkage();\r
1653 \r
1654   }\r
1655 \r
1656 //              Extract subtree rooted at node i into separate hfta\r
1657 stream_query* stream_query::extract_subtree(int index) {\r
1658 \r
1659         vector<int> nodes;\r
1660         stream_query* new_query = new stream_query(query_plan[index], this);\r
1661 \r
1662         nodes.push_back(index);\r
1663         for (int i = 0; i < nodes.size(); ++i) {\r
1664                 qp_node* node = query_plan[nodes[i]];\r
1665                 if (!node)\r
1666                         continue;\r
1667 \r
1668                 // add all children to nodes list\r
1669                 for (int j = 0; j < node->predecessors.size(); ++j)\r
1670                         nodes.push_back(node->predecessors[j]);\r
1671                 if (i)\r
1672                         new_query->query_plan.push_back(node);\r
1673 \r
1674                 query_plan[nodes[i]] = NULL;\r
1675         }\r
1676 \r
1677         return new_query;\r
1678 }\r
1679 \r
1680 //              Splits query that combines data from multiple hosts into separate hftas.\r
1681 vector<stream_query*> stream_query::split_multihost_query()  {\r
1682 \r
1683         vector<stream_query*> ret;\r
1684         char node_suffix[128];\r
1685         int i;\r
1686 \r
1687         // find merges combining multiple hosts into per-host groups\r
1688         int plan_size = query_plan.size();\r
1689         vector<mrg_qpn*> new_nodes;\r
1690 \r
1691         for (i = 0; i < plan_size; ++i) {\r
1692                 qp_node* node = query_plan[i];\r
1693                 if (node && node->node_type() == "mrg_qpn") {\r
1694                         mrg_qpn* mrg = (mrg_qpn*)node;\r
1695                         if (mrg->is_multihost_merge()) {\r
1696 \r
1697                                 // group all the sources by host\r
1698                                 map<string, vector<int> > host_map;\r
1699                                 for (int j = 0; j < mrg->fm.size(); j++) {\r
1700                                         tablevar_t *table_name = mrg->fm[j];\r
1701                                         if (host_map.count(table_name->get_machine()))\r
1702                                                 host_map[table_name->get_machine()].push_back(j);\r
1703                                         else {\r
1704                                                 vector<int> tables;\r
1705                                                 tables.push_back(j);\r
1706                                                 host_map[table_name->get_machine()] = tables;\r
1707                                         }\r
1708                                 }\r
1709 \r
1710                                 // create a new merge for each host\r
1711                                 map<string, vector<int> >::iterator iter;\r
1712                                 for (iter = host_map.begin(); iter != host_map.end(); iter++) {\r
1713                                         string host_name = (*iter).first;\r
1714                                         vector<int> tables = (*iter).second;\r
1715 \r
1716                                         if (tables.size() == 1)\r
1717                                                 continue;\r
1718 \r
1719                                         sprintf(node_suffix, "_%s", host_name.c_str());\r
1720                                         string suffix(node_suffix);\r
1721                                         untaboo(suffix);\r
1722                                         mrg_qpn *new_mrg = (mrg_qpn *)mrg->make_copy(suffix);\r
1723                                         for (int j = 0; j < tables.size(); ++j) {\r
1724                                                 new_mrg->fm.push_back(mrg->fm[tables[j]]);\r
1725                                                 new_mrg->mvars.push_back(mrg->mvars[j]);\r
1726                                                 sprintf(node_suffix, "m_%d", j);\r
1727                                                 new_mrg->fm[j]->set_range_var(node_suffix);\r
1728                                         }\r
1729                                         new_nodes.push_back(new_mrg);\r
1730                                 }\r
1731 \r
1732                                 if (!new_nodes.empty()) {\r
1733                                         // connect newly created merge nodes with parent multihost merge\r
1734                                         for (int j = 0; j < new_nodes.size(); ++j) {\r
1735                                                 mrg->fm[j] = new tablevar_t(new_nodes[j]->fm[0]->get_machine().c_str(), "IFACE", new_nodes[j]->get_node_name().c_str());\r
1736                                                 query_plan.push_back(new_nodes[j]);\r
1737                                         }\r
1738                                         mrg->fm.resize(new_nodes.size());\r
1739                                         mrg->mvars.resize(new_nodes.size());\r
1740                                         generate_linkage();\r
1741                                 }\r
1742 \r
1743 \r
1744                                 // now externalize the sources\r
1745                                 for (int j = 0; j < node->predecessors.size(); ++j) {\r
1746                                         //              Extract subtree rooted at node i into separate hfta\r
1747                                         stream_query* q = extract_subtree(node->predecessors[j]);\r
1748                                         if (q) {\r
1749                                                 q->generate_linkage();\r
1750                                                 ret.push_back(q);\r
1751                                         }\r
1752 \r
1753                                 }\r
1754                                 generate_linkage();\r
1755                         }\r
1756                 }\r
1757         }\r
1758 \r
1759         return ret;\r
1760 }\r
1761 \r
1762 \r
1763 //              Perform local FTA optimizations\r
1764 void stream_query::optimize(vector<stream_query *>& hfta_list, map<string, int> lfta_names, vector<string> interface_names, vector<string> machine_names, ext_fcn_list *Ext_fcns, table_list *Schema, ifq_t *ifaces_db, partn_def_list_t *partn_parse_result){\r
1765 \r
1766         // Topologically sort the nodes in query plan (leaf-first)\r
1767         int i = 0, j = 0;\r
1768         vector<int> sorted_nodes;\r
1769 \r
1770         int num_nodes = query_plan.size();\r
1771         bool* leaf_flags = new bool[num_nodes];\r
1772         memset(leaf_flags, 0, num_nodes * sizeof(bool));\r
1773 \r
1774         // run topological sort\r
1775         bool done = false;\r
1776 \r
1777         // add all leafs to sorted_nodes\r
1778         while (!done) {\r
1779                 done = true;\r
1780                 for (i = 0; i < num_nodes; ++i) {\r
1781                         if (!query_plan[i])\r
1782                                 continue;\r
1783 \r
1784                         if (!leaf_flags[i] && query_plan[i]->predecessors.empty()) {\r
1785                                 leaf_flags[i] = true;\r
1786                                 sorted_nodes.push_back(i);\r
1787                                 done = false;\r
1788 \r
1789                                 // remove the node from its parents predecessor lists\r
1790                                 // since we only care about number of predecessors, it is sufficient just to remove\r
1791                                 // one element from the parent's predecessors list\r
1792                                 for (int j = query_plan[i]->successors.size() - 1; j >= 0; --j)\r
1793                                         query_plan[query_plan[i]->successors[j]]->predecessors.pop_back();\r
1794                         }\r
1795                 }\r
1796         }\r
1797         delete[] leaf_flags;\r
1798         num_nodes = sorted_nodes.size();\r
1799         generate_linkage();             // rebuild the recently destroyed predecessor lists.\r
1800 \r
1801         // collect the information about interfaces nodes read from\r
1802         for (i = 0; i < num_nodes; ++i) {\r
1803                 qp_node* node = query_plan[sorted_nodes[i]];\r
1804                 vector<tablevar_t *> input_tables = node->get_input_tbls();\r
1805                 for (j = 0; j <  input_tables.size(); ++j) {\r
1806                         tablevar_t * table = input_tables[j];\r
1807                         if (lfta_names.count(table->schema_name)) {\r
1808                                 int index = lfta_names[table->schema_name];\r
1809                                 table->set_machine(machine_names[index]);\r
1810                                 table->set_interface(interface_names[index]);\r
1811                         }\r
1812                 }\r
1813         }\r
1814 \r
1815         /*\r
1816 \r
1817         // push eligible operators down in the query plan\r
1818         for (i = 0; i < num_nodes; ++i) {\r
1819                 if (partn_parse_result && is_partn_compatible(sorted_nodes[i], lfta_names, interface_names, machine_names, ifaces_db, partn_parse_result)) {\r
1820                         pushdown_partn_operator(sorted_nodes[i]);\r
1821                 } else if (is_pushdown_compatible(sorted_nodes[i], lfta_names, interface_names, machine_names)) {\r
1822                         pushdown_operator(sorted_nodes[i], Ext_fcns, Schema);\r
1823                 }\r
1824         }\r
1825 \r
1826         // split the query into multiple hftas if it combines the data from multiple hosts\r
1827         vector<stream_query*> hftas = split_multihost_query();\r
1828         hfta_list.insert(hfta_list.end(), hftas.begin(), hftas.end());\r
1829         */\r
1830 \r
1831         num_nodes = query_plan.size();\r
1832         // also split multi-way merges into two-way merges\r
1833         for (i = 0; i < num_nodes; ++i) {\r
1834                 qp_node* node = query_plan[i];\r
1835                 if (node && node->node_type() == "mrg_qpn") {\r
1836                         vector<mrg_qpn *> split_merge = ((mrg_qpn *)node)->split_sources();\r
1837 \r
1838                         query_plan.insert(query_plan.end(), split_merge.begin(), split_merge.end());\r
1839                         // delete query_plan[sorted_nodes[i]];\r
1840                         query_plan[i] = NULL;\r
1841                 }\r
1842         }\r
1843 \r
1844         generate_linkage();\r
1845 }\r
1846 \r
1847 \r
1848 \r
1849 table_def *stream_query::get_output_tabledef(){\r
1850         return( query_plan[qhead]->get_fields() );\r
1851 }\r
1852 \r
1853 \r
1854 \r
1855 //////////////////////////////////////////////////////////\r
1856 ////            De-siloing.  TO BE REMOVED\r
1857 \r
1858 void stream_query::desilo_lftas(map<string, int> &lfta_names,vector<string> &silo_names,table_list *Schema){\r
1859         int i,t,s;\r
1860 \r
1861         int suffix_len = silo_names.back().size();\r
1862 \r
1863         for(i=0;i<qtail.size();++i){\r
1864                 vector<tablevar_t *> itbls = query_plan[qtail[i]]->get_input_tbls();\r
1865                 for(t=0;t<itbls.size();++t){\r
1866                         string itbl_name = itbls[t]->get_schema_name();\r
1867                         if(lfta_names.count(itbl_name)>0){\r
1868 //printf("Query %s input %d references lfta input %s\n",query_plan[qtail[i]]->get_node_name().c_str(),t,itbl_name.c_str());\r
1869                                 vector<string> src_names;\r
1870                                 string lfta_base = itbl_name.substr(0,itbl_name.size()-suffix_len);\r
1871                                 for(s=0;s<silo_names.size();++s){\r
1872                                         string lfta_subsilo = lfta_base + silo_names[s];\r
1873 //printf("\t%s\n",lfta_subsilo.c_str());\r
1874                                         src_names.push_back(lfta_subsilo);\r
1875                                 }\r
1876                                 string merge_node_name = "desilo_"+query_plan[qtail[i]]->get_node_name()+\r
1877                                         "_input_"+int_to_string(t);\r
1878                                 mrg_qpn *merge_node = new mrg_qpn(merge_node_name,src_names,Schema);\r
1879                                 int m_op_pos = Schema->add_table(merge_node->table_layout);\r
1880                                 itbls[t]->set_schema(merge_node_name);\r
1881                                 itbls[t]->set_schema_ref(m_op_pos);\r
1882                                 query_plan.push_back(merge_node);\r
1883                         }\r
1884                 }\r
1885         }\r
1886 \r
1887         generate_linkage();\r
1888 }\r
1889 ////////////////////////////////////////\r
1890 ///             End de-siloing\r
1891 \r
1892 \r
1893 \r
1894 //                      Given a collection of LFTA stream queries,\r
1895 //                      extract their WHERE predicates\r
1896 //                      and pass them to an analysis routine which will extract\r
1897 //                      common elements\r
1898 //\r
1899 void get_common_lfta_filter(std::vector<stream_query *> lfta_list,table_list *Schema,ext_fcn_list *Ext_fcns, vector<cnf_set *> &prefilter_preds, set<unsigned int> &pred_ids){\r
1900         int s;\r
1901         std::vector< std::vector<cnf_elem *> > where_list;\r
1902 \r
1903 //              still safe to assume that LFTA queries have a single\r
1904 //              query node, which is at position 0.\r
1905         for(s=0;s<lfta_list.size();++s){\r
1906                 vector<cnf_elem *> cnf_list = lfta_list[s]->query_plan[0]->get_filter_clause();\r
1907                 if(lfta_list[s]->query_plan[0]->node_type() == "sgah_qpn"){\r
1908                         gb_table *gtbl = ((sgah_qpn *)(lfta_list[s]->query_plan[0]))->get_gb_tbl();\r
1909                         int c;\r
1910                         for(c=0;c<cnf_list.size();++c){\r
1911                                 insert_gb_def_pr(cnf_list[c]->pr,gtbl);\r
1912                         }\r
1913                 }\r
1914                 where_list.push_back(lfta_list[s]->query_plan[0]->get_filter_clause());\r
1915         }\r
1916 \r
1917         find_common_filter(where_list,Schema,Ext_fcns,prefilter_preds, pred_ids);\r
1918 }\r
1919 \r
1920 \r
1921 \r
1922 \r
1923 \r
1924 //                      Given a collection of LFTA stream queries,\r
1925 //                      extract the union of all temporal attributes referenced in select clauses\r
1926 //                      those attributes will need to be unpacked in prefilter\r
1927 //\r
1928 void get_prefilter_temporal_cids(std::vector<stream_query *> lfta_list, col_id_set &temp_cids){\r
1929         int s, sl;\r
1930         vector<scalarexp_t *> sl_list;\r
1931         gb_table *gb_tbl = NULL;\r
1932 \r
1933 \r
1934 //              still safe to assume that LFTA queries have a single\r
1935 //              query node, which is at position 0.\r
1936         for(s=0;s<lfta_list.size();++s){\r
1937 \r
1938                 if(lfta_list[s]->query_plan[0]->node_type() == "spx_qpn"){\r
1939                         spx_qpn *spx_node = (spx_qpn *)lfta_list[s]->query_plan[0];\r
1940                         sl_list = spx_node->get_select_se_list();\r
1941                 }\r
1942                 if(lfta_list[s]->query_plan[0]->node_type() == "sgah_qpn"){\r
1943                         sgah_qpn *sgah_node = (sgah_qpn *)lfta_list[s]->query_plan[0];\r
1944                         sl_list = sgah_node->get_select_se_list();\r
1945                         gb_tbl = sgah_node->get_gb_tbl();\r
1946                 }\r
1947 \r
1948                 for(sl=0;sl<sl_list.size();sl++){\r
1949                         data_type *sdt = sl_list[sl]->get_data_type();\r
1950                         if (sdt->is_temporal()) {\r
1951                                 gather_se_col_ids(sl_list[sl],temp_cids, gb_tbl);\r
1952                         }\r
1953                 }\r
1954         }\r
1955 }\r
1956 \r
1957 \r