367c652c4fd3f20a91c70ac21a959880182dc9be
[com/gs-lite.git] / src / tools / ascii2gdat.c
1 /* ------------------------------------------------
2  Copyright 2014 AT&T Intellectual Property
3  Licensed under the Apache License, Version 2.0 (the "License");
4  you may not use this file except in compliance with the License.
5  You may obtain a copy of the License at
6  
7  http://www.apache.org/licenses/LICENSE-2.0
8  
9  Unless required by applicable law or agreed to in writing, software
10  distributed under the License is distributed on an "AS IS" BASIS,
11  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  See the License for the specific language governing permissions and
13  limitations under the License.
14  ------------------------------------------- */
15
16
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <unistd.h>
20 #include <schemaparser.h>
21 #include "gsconfig.h"
22 #include "gstypes.h"
23 #include <math.h>
24
25
26 FILE * ifd;
27 FILE * ofd;
28 FILE * sfd;
29
30 #define MAX_GDAT_HEADER 10000
31
32 gs_int8_t buf[2*MAXTUPLESZ];
33 gs_int8_t schema_buf[MAX_GDAT_HEADER];
34 gs_int8_t header_buf[MAX_GDAT_HEADER];
35
36
37 gs_retval_t read_tuple(gs_uint32_t * sz, gs_sp_t data, gs_int32_t maxlen) {
38     gs_uint32_t nsz;
39     static gs_uint32_t read=0;
40 again:
41     if (fread(&nsz,sizeof(gs_uint32_t),1,ifd)!=1) {
42         
43         exit(0);
44     }
45     read=read+sizeof(gs_uint32_t);
46     *sz=ntohl(nsz);
47     if ((*sz)>maxlen) {
48         fprintf(stderr,"INTERNAL ERROR tuple to long for fixed buffer. Tuple sz %u\n",
49                 (*sz));
50         *sz=0;
51         return 0;
52     }
53     if (*sz==0) goto again;
54     if (fread(data,(*sz),1,ifd)!=1) {
55         fprintf(stderr,"UNEXPECTED END OF FILE. Tried to read tuple of size %u\n",
56                 (*sz));
57         exit(0);
58     }
59     read=read+*sz;
60     return 0;
61 }
62
63 int main(int argc, char** argv) {
64     gs_schemahandle_t schema;
65     
66     gs_uint32_t rsize;
67     gs_int8_t rbuf[2*MAXTUPLESZ];
68     gs_int8_t wbuf[2*MAXTUPLESZ];
69     
70     gs_int32_t numberoffields;
71     gs_int32_t verbose=0;
72     gs_int32_t y;
73     gs_int32_t parserversion;
74     gs_uint32_t schemalen;
75     gs_sp_t me;
76     gs_int32_t ch;
77     
78     me=argv[0];
79     
80     if (argc<2) {
81         fprintf(stderr,
82                 "%s::usage: %s -v -x <schema_file_name> <input_file_name> <output_file_name>\n",
83                 me,me);
84         exit(1);
85     }
86     
87     while ((ch = getopt(argc, argv, "vx")) != -1) {
88         switch(ch) {
89             case 'v':
90                 verbose=1;
91                 break;
92             case 'x':
93                 verbose=2;
94         }
95     }
96     argc -= optind;
97     if (argc < 1) {
98         fprintf(stderr,"%s::usage: %s -v -x <schema_file_name> <input_file_name> <output_file_name>\n",
99                 me,me);
100         exit(1);
101     }
102     argv += optind;
103
104     if ((sfd=fopen(argv[0],"r"))==0) {
105         fprintf(stderr,"%s::error:could not open schema file %s\n",
106                 me,argv[0]);
107         exit(1);
108     }
109
110     schemalen = fread(schema_buf, 1, MAX_GDAT_HEADER, sfd);
111     schema_buf[schemalen] = 0;
112     schema = ftaschema_parse_string_prot(schema_buf);
113     if (schema < 0) {
114         fprintf(stderr,"%s::error:unable to parse schema file %s\n",
115                 me,argv[0]);
116         exit(1);        
117     }
118     fclose(sfd);
119     
120     argv++;
121     
122     if ((strcmp(argv[0],"-")!=0)&&(strcmp(argv[0],"stdin")!=0)) {
123         if ((ifd=fopen(argv[0],"r"))==0) {
124             fprintf(stderr,"%s::error:could not open input file %s\n",
125                     me,argv[0]);
126             exit(1);
127         }
128     } else {
129         ifd = stdin;
130     }
131
132     argv++;
133
134     if ((ofd=fopen(argv[0],"wb"))==0) {
135         fprintf(stderr,"%s::error:could not open output file %s\n",
136                 me,argv[0]);
137         exit(1);
138     }
139     
140     parserversion = get_schemaparser_version();
141
142     // write GDAT header
143         sprintf(header_buf,"GDAT\nVERSION:%u\nSCHEMALENGTH:%lu\n",
144                         parserversion,schemalen+1);
145     // need to get ASCII version of schema
146         fwrite(header_buf,strlen(header_buf),1,ofd);
147         fwrite(schema_buf,schemalen+1,1,ofd);
148
149     /////////////////////////////////////////////
150
151     
152     if ((numberoffields=ftaschema_tuple_len(schema))<0) {
153         fprintf(stderr,"%s::error:could not get number of fields in schema\n",
154                 me);
155         exit(1);
156     }
157     if (verbose==1) {
158         for(y=0; y<numberoffields;y++) {
159             printf("%s",ftaschema_field_name(schema,y));
160             if (y<numberoffields-1) printf("|");
161         }
162         printf("\n");
163     }
164
165     unsigned long long tup_cnt = 0;
166
167     while(!feof(ifd)) {
168         fgets(rbuf,2*MAXTUPLESZ,ifd);
169         
170         gs_uint32_t tuple_pos = ftaschema_get_tuple_metadata_offset(schema) + 1;     // position to copy string payload
171
172         char* field = strtok(rbuf, ","); 
173         y = 0;
174         do {
175             gs_int32_t field_type = ftaschema_get_field_type_by_index(schema, y);
176             gs_int32_t field_offset = ftaschema_get_field_offset_by_index(schema, y);
177
178             gs_int32_t i;
179             gs_uint32_t ui;      
180             gs_uint64_t ul;   
181             gs_uint64_t l; 
182             gs_float_t f,intpart,fractpart;     
183             struct timeval t;  
184             struct vstring32 vs;           
185             struct hfta_ipv6_str ip6;       
186             gs_uint32_t v[8];     
187                 unsigned ip1,ip2,ip3,ip4;
188
189             switch (field_type) {
190                 case BOOL_TYPE:    
191                     ui=(strncasecmp("TRUE",field,4)==0);
192                     memcpy(wbuf+field_offset,&ui,sizeof(ui));                                    
193                     break;
194                 case INT_TYPE:
195                     sscanf(field,"%d",&i);
196                     memcpy(wbuf+field_offset,&i,sizeof(i));
197                     break;
198                 case UINT_TYPE:
199                 case USHORT_TYPE:                            
200                     sscanf(field,"%u",&ui);
201                     memcpy(wbuf+field_offset,&ui,sizeof(ui));                                    
202                     break;
203                 case ULLONG_TYPE:
204                     sscanf(field,"%llu",&ul);
205                     memcpy(wbuf+field_offset,&ul,sizeof(ul));                    
206                     break;
207                 case LLONG_TYPE:
208                     sscanf(field,"%ldu",&l);
209                     memcpy(wbuf+field_offset,&l,sizeof(l));                        
210                     break;
211                 case FLOAT_TYPE:
212                     sscanf(field,"%f",&f);
213                     memcpy(wbuf+field_offset,&f,sizeof(f));                        
214                     break;
215                 case VSTR_TYPE:
216                 {
217                     if (!strcmp(field, " "))
218                         field[0] = 0;
219                     vs.length = strlen(field);
220                     vs.offset = tuple_pos;
221                     vs.reserved = 0;      
222                     memcpy(wbuf+vs.offset, field, vs.length);
223                     memcpy(wbuf+field_offset,&vs,sizeof(vs));  
224                     tuple_pos += vs.length;
225                 }
226                     break;   
227                 case IP_TYPE:
228                     sscanf(field,"%u.%u.%u.%u",&ip1,&ip2,&ip3,&ip4);
229                         ui=(ip1<<24)|(ip2<<16)|(ip3<<8)|ip4;
230                     memcpy(wbuf+field_offset,&ui,sizeof(ui));                        
231                     break;  
232                 case IPV6_TYPE:
233                     sscanf(field,"%x:%x:%x:%x:%x:%x:%x:%x",&v[0],&v[1],&v[2],&v[3],&v[4],&v[5],&v[6],&v[7]);
234                         ip6.v[0]=htonl(v[0]<<16|v[1]);
235                         ip6.v[1]=htonl(v[2]<<16|v[3]);
236                         ip6.v[2]=htonl(v[4]<<16|v[5]);
237                         ip6.v[3]=htonl(v[6]<<16|v[7]);
238                     memcpy(wbuf+field_offset,&ip6,sizeof(ip6));                        
239                     break;                                                        
240                 case TIMEVAL_TYPE:
241                     sscanf(field,"%lf sec",&f);  
242                     fractpart=modf(f,&intpart);                  
243                     t.tv_sec = intpart;
244                     t.tv_usec = fractpart * 1000000;
245                     memcpy(wbuf+field_offset,&t,sizeof(t));                        
246                     break; 
247                 default:
248                     break;
249             }
250             y++;
251             field = strtok(NULL, ",");            
252         } while (y < numberoffields);
253         gs_uint32_t tup_len = htonl(tuple_pos);
254         fwrite(&tup_len,sizeof(gs_uint32_t),1,ofd);
255         fwrite(wbuf,tuple_pos,1,ofd);
256         tup_cnt++;
257
258         if (tup_cnt % 1000000 == 0)
259             printf("dumped %llu tuples\n", tup_cnt);
260     }
261
262     fclose(ifd);
263     fclose(ofd);
264     if (verbose!=0) fflush(stdout);
265
266     exit(0);
267 }
268