Fixed newline characters throughout the code
[com/gs-lite.git] / src / tools / ascii2gdat.c
index 4c32970..367c652 100644 (file)
-/* ------------------------------------------------\r
- Copyright 2014 AT&T Intellectual Property\r
- Licensed under the Apache License, Version 2.0 (the "License");\r
- you may not use this file except in compliance with the License.\r
- You may obtain a copy of the License at\r
\r
- http://www.apache.org/licenses/LICENSE-2.0\r
\r
- Unless required by applicable law or agreed to in writing, software\r
- distributed under the License is distributed on an "AS IS" BASIS,\r
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
- See the License for the specific language governing permissions and\r
- limitations under the License.\r
- ------------------------------------------- */\r
-\r
-\r
-#include <stdlib.h>\r
-#include <stdio.h>\r
-#include <unistd.h>\r
-#include <schemaparser.h>\r
-#include "gsconfig.h"\r
-#include "gstypes.h"\r
-#include <math.h>\r
-\r
-\r
-FILE * ifd;\r
-FILE * ofd;\r
-FILE * sfd;\r
-\r
-#define MAX_GDAT_HEADER 10000\r
-\r
-gs_int8_t buf[2*MAXTUPLESZ];\r
-gs_int8_t schema_buf[MAX_GDAT_HEADER];\r
-gs_int8_t header_buf[MAX_GDAT_HEADER];\r
-\r
-\r
-gs_retval_t read_tuple(gs_uint32_t * sz, gs_sp_t data, gs_int32_t maxlen) {\r
-    gs_uint32_t nsz;\r
-    static gs_uint32_t read=0;\r
-again:\r
-    if (fread(&nsz,sizeof(gs_uint32_t),1,ifd)!=1) {\r
-        \r
-        exit(0);\r
-    }\r
-    read=read+sizeof(gs_uint32_t);\r
-    *sz=ntohl(nsz);\r
-    if ((*sz)>maxlen) {\r
-        fprintf(stderr,"INTERNAL ERROR tuple to long for fixed buffer. Tuple sz %u\n",\r
-                (*sz));\r
-        *sz=0;\r
-        return 0;\r
-    }\r
-    if (*sz==0) goto again;\r
-    if (fread(data,(*sz),1,ifd)!=1) {\r
-        fprintf(stderr,"UNEXPECTED END OF FILE. Tried to read tuple of size %u\n",\r
-                (*sz));\r
-        exit(0);\r
-    }\r
-    read=read+*sz;\r
-    return 0;\r
-}\r
-\r
-int main(int argc, char** argv) {\r
-    gs_schemahandle_t schema;\r
-    \r
-    gs_uint32_t rsize;\r
-    gs_int8_t rbuf[2*MAXTUPLESZ];\r
-    gs_int8_t wbuf[2*MAXTUPLESZ];\r
-    \r
-    gs_int32_t numberoffields;\r
-    gs_int32_t verbose=0;\r
-    gs_int32_t y;\r
-    gs_int32_t parserversion;\r
-    gs_uint32_t schemalen;\r
-    gs_sp_t me;\r
-    gs_int32_t ch;\r
-    \r
-    me=argv[0];\r
-    \r
-    if (argc<2) {\r
-        fprintf(stderr,\r
-                "%s::usage: %s -v -x <schema_file_name> <input_file_name> <output_file_name>\n",\r
-                me,me);\r
-        exit(1);\r
-    }\r
-    \r
-    while ((ch = getopt(argc, argv, "vx")) != -1) {\r
-        switch(ch) {\r
-            case 'v':\r
-                verbose=1;\r
-                break;\r
-            case 'x':\r
-                verbose=2;\r
-        }\r
-    }\r
-    argc -= optind;\r
-    if (argc < 1) {\r
-        fprintf(stderr,"%s::usage: %s -v -x <schema_file_name> <input_file_name> <output_file_name>\n",\r
-                me,me);\r
-        exit(1);\r
-    }\r
-    argv += optind;\r
-\r
-    if ((sfd=fopen(argv[0],"r"))==0) {\r
-        fprintf(stderr,"%s::error:could not open schema file %s\n",\r
-                me,argv[0]);\r
-        exit(1);\r
-    }\r
-\r
-    schemalen = fread(schema_buf, 1, MAX_GDAT_HEADER, sfd);\r
-    schema_buf[schemalen] = 0;\r
-    schema = ftaschema_parse_string_prot(schema_buf);\r
-    if (schema < 0) {\r
-        fprintf(stderr,"%s::error:unable to parse schema file %s\n",\r
-                me,argv[0]);\r
-        exit(1);        \r
-    }\r
-    fclose(sfd);\r
-    \r
-    argv++;\r
-    \r
-    if ((strcmp(argv[0],"-")!=0)&&(strcmp(argv[0],"stdin")!=0)) {\r
-        if ((ifd=fopen(argv[0],"r"))==0) {\r
-            fprintf(stderr,"%s::error:could not open input file %s\n",\r
-                    me,argv[0]);\r
-            exit(1);\r
-        }\r
-    } else {\r
-        ifd = stdin;\r
-    }\r
-\r
-    argv++;\r
-\r
-    if ((ofd=fopen(argv[0],"wb"))==0) {\r
-        fprintf(stderr,"%s::error:could not open output file %s\n",\r
-                me,argv[0]);\r
-        exit(1);\r
-    }\r
-    \r
-    parserversion = get_schemaparser_version();\r
-\r
-    // write GDAT header\r
-       sprintf(header_buf,"GDAT\nVERSION:%u\nSCHEMALENGTH:%lu\n",\r
-                       parserversion,schemalen+1);\r
-    // need to get ASCII version of schema\r
-       fwrite(header_buf,strlen(header_buf),1,ofd);\r
-       fwrite(schema_buf,schemalen+1,1,ofd);\r
-\r
-    /////////////////////////////////////////////\r
-\r
-    \r
-    if ((numberoffields=ftaschema_tuple_len(schema))<0) {\r
-        fprintf(stderr,"%s::error:could not get number of fields in schema\n",\r
-                me);\r
-        exit(1);\r
-    }\r
-    if (verbose==1) {\r
-        for(y=0; y<numberoffields;y++) {\r
-            printf("%s",ftaschema_field_name(schema,y));\r
-            if (y<numberoffields-1) printf("|");\r
-        }\r
-        printf("\n");\r
-    }\r
-\r
-    unsigned long long tup_cnt = 0;\r
-\r
-    while(!feof(ifd)) {\r
-        fgets(rbuf,2*MAXTUPLESZ,ifd);\r
-        \r
-        gs_uint32_t tuple_pos = ftaschema_get_tuple_metadata_offset(schema) + 1;     // position to copy string payload\r
-\r
-        char* field = strtok(rbuf, ","); \r
-        y = 0;\r
-        do {\r
-            gs_int32_t field_type = ftaschema_get_field_type_by_index(schema, y);\r
-            gs_int32_t field_offset = ftaschema_get_field_offset_by_index(schema, y);\r
-\r
-            gs_int32_t i;\r
-            gs_uint32_t ui;      \r
-            gs_uint64_t ul;   \r
-            gs_uint64_t l; \r
-            gs_float_t f,intpart,fractpart;     \r
-            struct timeval t;  \r
-            struct vstring32 vs;           \r
-            struct hfta_ipv6_str ip6;       \r
-            gs_uint32_t v[8];     \r
-               unsigned ip1,ip2,ip3,ip4;\r
-\r
-            switch (field_type) {\r
-                case BOOL_TYPE:    \r
-                    ui=(strncasecmp("TRUE",field,4)==0);\r
-                    memcpy(wbuf+field_offset,&ui,sizeof(ui));                                    \r
-                    break;\r
-                case INT_TYPE:\r
-                    sscanf(field,"%d",&i);\r
-                    memcpy(wbuf+field_offset,&i,sizeof(i));\r
-                    break;\r
-                case UINT_TYPE:\r
-                case USHORT_TYPE:                            \r
-                    sscanf(field,"%u",&ui);\r
-                    memcpy(wbuf+field_offset,&ui,sizeof(ui));                                    \r
-                    break;\r
-                case ULLONG_TYPE:\r
-                    sscanf(field,"%llu",&ul);\r
-                    memcpy(wbuf+field_offset,&ul,sizeof(ul));                    \r
-                    break;\r
-                case LLONG_TYPE:\r
-                    sscanf(field,"%ldu",&l);\r
-                    memcpy(wbuf+field_offset,&l,sizeof(l));                        \r
-                    break;\r
-                case FLOAT_TYPE:\r
-                    sscanf(field,"%f",&f);\r
-                    memcpy(wbuf+field_offset,&f,sizeof(f));                        \r
-                    break;\r
-                case VSTR_TYPE:\r
-                {\r
-                    if (!strcmp(field, " "))\r
-                        field[0] = 0;\r
-                    vs.length = strlen(field);\r
-                    vs.offset = tuple_pos;\r
-                    vs.reserved = 0;      \r
-                    memcpy(wbuf+vs.offset, field, vs.length);\r
-                    memcpy(wbuf+field_offset,&vs,sizeof(vs));  \r
-                    tuple_pos += vs.length;\r
-                }\r
-                    break;   \r
-                case IP_TYPE:\r
-                    sscanf(field,"%u.%u.%u.%u",&ip1,&ip2,&ip3,&ip4);\r
-                       ui=(ip1<<24)|(ip2<<16)|(ip3<<8)|ip4;\r
-                    memcpy(wbuf+field_offset,&ui,sizeof(ui));                        \r
-                    break;  \r
-                case IPV6_TYPE:\r
-                    sscanf(field,"%x:%x:%x:%x:%x:%x:%x:%x",&v[0],&v[1],&v[2],&v[3],&v[4],&v[5],&v[6],&v[7]);\r
-                       ip6.v[0]=htonl(v[0]<<16|v[1]);\r
-                       ip6.v[1]=htonl(v[2]<<16|v[3]);\r
-                       ip6.v[2]=htonl(v[4]<<16|v[5]);\r
-                       ip6.v[3]=htonl(v[6]<<16|v[7]);\r
-                    memcpy(wbuf+field_offset,&ip6,sizeof(ip6));                        \r
-                    break;                                                        \r
-                case TIMEVAL_TYPE:\r
-                    sscanf(field,"%lf sec",&f);  \r
-                    fractpart=modf(f,&intpart);                  \r
-                    t.tv_sec = intpart;\r
-                    t.tv_usec = fractpart * 1000000;\r
-                    memcpy(wbuf+field_offset,&t,sizeof(t));                        \r
-                    break; \r
-                default:\r
-                    break;\r
-            }\r
-            y++;\r
-            field = strtok(NULL, ",");            \r
-        } while (y < numberoffields);\r
-        gs_uint32_t tup_len = htonl(tuple_pos);\r
-        fwrite(&tup_len,sizeof(gs_uint32_t),1,ofd);\r
-        fwrite(wbuf,tuple_pos,1,ofd);\r
-        tup_cnt++;\r
-\r
-        if (tup_cnt % 1000000 == 0)\r
-            printf("dumped %llu tuples\n", tup_cnt);\r
-    }\r
-\r
-    fclose(ifd);\r
-    fclose(ofd);\r
-    if (verbose!=0) fflush(stdout);\r
-\r
-    exit(0);\r
-}\r
-\r
+/* ------------------------------------------------
+ Copyright 2014 AT&T Intellectual Property
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ------------------------------------------- */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <schemaparser.h>
+#include "gsconfig.h"
+#include "gstypes.h"
+#include <math.h>
+
+
+FILE * ifd;
+FILE * ofd;
+FILE * sfd;
+
+#define MAX_GDAT_HEADER 10000
+
+gs_int8_t buf[2*MAXTUPLESZ];
+gs_int8_t schema_buf[MAX_GDAT_HEADER];
+gs_int8_t header_buf[MAX_GDAT_HEADER];
+
+
+gs_retval_t read_tuple(gs_uint32_t * sz, gs_sp_t data, gs_int32_t maxlen) {
+    gs_uint32_t nsz;
+    static gs_uint32_t read=0;
+again:
+    if (fread(&nsz,sizeof(gs_uint32_t),1,ifd)!=1) {
+        
+        exit(0);
+    }
+    read=read+sizeof(gs_uint32_t);
+    *sz=ntohl(nsz);
+    if ((*sz)>maxlen) {
+        fprintf(stderr,"INTERNAL ERROR tuple to long for fixed buffer. Tuple sz %u\n",
+                (*sz));
+        *sz=0;
+        return 0;
+    }
+    if (*sz==0) goto again;
+    if (fread(data,(*sz),1,ifd)!=1) {
+        fprintf(stderr,"UNEXPECTED END OF FILE. Tried to read tuple of size %u\n",
+                (*sz));
+        exit(0);
+    }
+    read=read+*sz;
+    return 0;
+}
+
+int main(int argc, char** argv) {
+    gs_schemahandle_t schema;
+    
+    gs_uint32_t rsize;
+    gs_int8_t rbuf[2*MAXTUPLESZ];
+    gs_int8_t wbuf[2*MAXTUPLESZ];
+    
+    gs_int32_t numberoffields;
+    gs_int32_t verbose=0;
+    gs_int32_t y;
+    gs_int32_t parserversion;
+    gs_uint32_t schemalen;
+    gs_sp_t me;
+    gs_int32_t ch;
+    
+    me=argv[0];
+    
+    if (argc<2) {
+        fprintf(stderr,
+                "%s::usage: %s -v -x <schema_file_name> <input_file_name> <output_file_name>\n",
+                me,me);
+        exit(1);
+    }
+    
+    while ((ch = getopt(argc, argv, "vx")) != -1) {
+        switch(ch) {
+            case 'v':
+                verbose=1;
+                break;
+            case 'x':
+                verbose=2;
+        }
+    }
+    argc -= optind;
+    if (argc < 1) {
+        fprintf(stderr,"%s::usage: %s -v -x <schema_file_name> <input_file_name> <output_file_name>\n",
+                me,me);
+        exit(1);
+    }
+    argv += optind;
+
+    if ((sfd=fopen(argv[0],"r"))==0) {
+        fprintf(stderr,"%s::error:could not open schema file %s\n",
+                me,argv[0]);
+        exit(1);
+    }
+
+    schemalen = fread(schema_buf, 1, MAX_GDAT_HEADER, sfd);
+    schema_buf[schemalen] = 0;
+    schema = ftaschema_parse_string_prot(schema_buf);
+    if (schema < 0) {
+        fprintf(stderr,"%s::error:unable to parse schema file %s\n",
+                me,argv[0]);
+        exit(1);        
+    }
+    fclose(sfd);
+    
+    argv++;
+    
+    if ((strcmp(argv[0],"-")!=0)&&(strcmp(argv[0],"stdin")!=0)) {
+        if ((ifd=fopen(argv[0],"r"))==0) {
+            fprintf(stderr,"%s::error:could not open input file %s\n",
+                    me,argv[0]);
+            exit(1);
+        }
+    } else {
+        ifd = stdin;
+    }
+
+    argv++;
+
+    if ((ofd=fopen(argv[0],"wb"))==0) {
+        fprintf(stderr,"%s::error:could not open output file %s\n",
+                me,argv[0]);
+        exit(1);
+    }
+    
+    parserversion = get_schemaparser_version();
+
+    // write GDAT header
+       sprintf(header_buf,"GDAT\nVERSION:%u\nSCHEMALENGTH:%lu\n",
+                       parserversion,schemalen+1);
+    // need to get ASCII version of schema
+       fwrite(header_buf,strlen(header_buf),1,ofd);
+       fwrite(schema_buf,schemalen+1,1,ofd);
+
+    /////////////////////////////////////////////
+
+    
+    if ((numberoffields=ftaschema_tuple_len(schema))<0) {
+        fprintf(stderr,"%s::error:could not get number of fields in schema\n",
+                me);
+        exit(1);
+    }
+    if (verbose==1) {
+        for(y=0; y<numberoffields;y++) {
+            printf("%s",ftaschema_field_name(schema,y));
+            if (y<numberoffields-1) printf("|");
+        }
+        printf("\n");
+    }
+
+    unsigned long long tup_cnt = 0;
+
+    while(!feof(ifd)) {
+        fgets(rbuf,2*MAXTUPLESZ,ifd);
+        
+        gs_uint32_t tuple_pos = ftaschema_get_tuple_metadata_offset(schema) + 1;     // position to copy string payload
+
+        char* field = strtok(rbuf, ","); 
+        y = 0;
+        do {
+            gs_int32_t field_type = ftaschema_get_field_type_by_index(schema, y);
+            gs_int32_t field_offset = ftaschema_get_field_offset_by_index(schema, y);
+
+            gs_int32_t i;
+            gs_uint32_t ui;      
+            gs_uint64_t ul;   
+            gs_uint64_t l; 
+            gs_float_t f,intpart,fractpart;     
+            struct timeval t;  
+            struct vstring32 vs;           
+            struct hfta_ipv6_str ip6;       
+            gs_uint32_t v[8];     
+               unsigned ip1,ip2,ip3,ip4;
+
+            switch (field_type) {
+                case BOOL_TYPE:    
+                    ui=(strncasecmp("TRUE",field,4)==0);
+                    memcpy(wbuf+field_offset,&ui,sizeof(ui));                                    
+                    break;
+                case INT_TYPE:
+                    sscanf(field,"%d",&i);
+                    memcpy(wbuf+field_offset,&i,sizeof(i));
+                    break;
+                case UINT_TYPE:
+                case USHORT_TYPE:                            
+                    sscanf(field,"%u",&ui);
+                    memcpy(wbuf+field_offset,&ui,sizeof(ui));                                    
+                    break;
+                case ULLONG_TYPE:
+                    sscanf(field,"%llu",&ul);
+                    memcpy(wbuf+field_offset,&ul,sizeof(ul));                    
+                    break;
+                case LLONG_TYPE:
+                    sscanf(field,"%ldu",&l);
+                    memcpy(wbuf+field_offset,&l,sizeof(l));                        
+                    break;
+                case FLOAT_TYPE:
+                    sscanf(field,"%f",&f);
+                    memcpy(wbuf+field_offset,&f,sizeof(f));                        
+                    break;
+                case VSTR_TYPE:
+                {
+                    if (!strcmp(field, " "))
+                        field[0] = 0;
+                    vs.length = strlen(field);
+                    vs.offset = tuple_pos;
+                    vs.reserved = 0;      
+                    memcpy(wbuf+vs.offset, field, vs.length);
+                    memcpy(wbuf+field_offset,&vs,sizeof(vs));  
+                    tuple_pos += vs.length;
+                }
+                    break;   
+                case IP_TYPE:
+                    sscanf(field,"%u.%u.%u.%u",&ip1,&ip2,&ip3,&ip4);
+                       ui=(ip1<<24)|(ip2<<16)|(ip3<<8)|ip4;
+                    memcpy(wbuf+field_offset,&ui,sizeof(ui));                        
+                    break;  
+                case IPV6_TYPE:
+                    sscanf(field,"%x:%x:%x:%x:%x:%x:%x:%x",&v[0],&v[1],&v[2],&v[3],&v[4],&v[5],&v[6],&v[7]);
+                       ip6.v[0]=htonl(v[0]<<16|v[1]);
+                       ip6.v[1]=htonl(v[2]<<16|v[3]);
+                       ip6.v[2]=htonl(v[4]<<16|v[5]);
+                       ip6.v[3]=htonl(v[6]<<16|v[7]);
+                    memcpy(wbuf+field_offset,&ip6,sizeof(ip6));                        
+                    break;                                                        
+                case TIMEVAL_TYPE:
+                    sscanf(field,"%lf sec",&f);  
+                    fractpart=modf(f,&intpart);                  
+                    t.tv_sec = intpart;
+                    t.tv_usec = fractpart * 1000000;
+                    memcpy(wbuf+field_offset,&t,sizeof(t));                        
+                    break; 
+                default:
+                    break;
+            }
+            y++;
+            field = strtok(NULL, ",");            
+        } while (y < numberoffields);
+        gs_uint32_t tup_len = htonl(tuple_pos);
+        fwrite(&tup_len,sizeof(gs_uint32_t),1,ofd);
+        fwrite(wbuf,tuple_pos,1,ofd);
+        tup_cnt++;
+
+        if (tup_cnt % 1000000 == 0)
+            printf("dumped %llu tuples\n", tup_cnt);
+    }
+
+    fclose(ifd);
+    fclose(ofd);
+    if (verbose!=0) fflush(stdout);
+
+    exit(0);
+}
+