Added quantiling UDAFs
[com/gs-lite.git] / src / lib / gscpaux / json.cpp
1 // Distributed under the MIT license. Copyright (c) 2010, Ivan Vashchaev\r
2 \r
3 #include <string.h>\r
4 #include "json.h"\r
5 \r
6 // true if character represent a digit\r
7 #define IS_DIGIT(c) (c >= '0' && c <= '9')\r
8 \r
9 // convert string to integer\r
10 static char *atoi(char *first, char *last, int *out)\r
11 {\r
12         int sign = 1;\r
13         if (first != last)\r
14         {\r
15                 if (*first == '-')\r
16                 {\r
17                         sign = -1;\r
18                         ++first;\r
19                 }\r
20                 else if (*first == '+')\r
21                 {\r
22                         ++first;\r
23                 }\r
24         }\r
25 \r
26         int result = 0;\r
27         for (; first != last && IS_DIGIT(*first); ++first)\r
28         {\r
29                 result = 10 * result + (*first - '0');\r
30         }\r
31         *out = result * sign;\r
32 \r
33         return first;\r
34 }\r
35 \r
36 // convert hexadecimal string to unsigned integer\r
37 static char *hatoui(char *first, char *last, unsigned int *out)\r
38 {\r
39         unsigned int result = 0;\r
40         for (; first != last; ++first)\r
41         {\r
42                 int digit;\r
43                 if (IS_DIGIT(*first))\r
44                 {\r
45                         digit = *first - '0';\r
46                 }\r
47                 else if (*first >= 'a' && *first <= 'f')\r
48                 {\r
49                         digit = *first - 'a' + 10;\r
50                 }\r
51                 else if (*first >= 'A' && *first <= 'F')\r
52                 {\r
53                         digit = *first - 'A' + 10;\r
54                 }\r
55                 else\r
56                 {\r
57                         break;\r
58                 }\r
59                 result = 16 * result + (unsigned int)digit;\r
60         }\r
61         *out = result;\r
62 \r
63         return first;\r
64 }\r
65 \r
66 // convert string to floating point\r
67 static char *atof(char *first, char *last, float *out)\r
68 {\r
69         // sign\r
70         float sign = 1;\r
71         if (first != last)\r
72         {\r
73                 if (*first == '-')\r
74                 {\r
75                         sign = -1;\r
76                         ++first;\r
77                 }\r
78                 else if (*first == '+')\r
79                 {\r
80                         ++first;\r
81                 }\r
82         }\r
83 \r
84         // integer part\r
85         float result = 0;\r
86         for (; first != last && IS_DIGIT(*first); ++first)\r
87         {\r
88                 result = 10 * result + (float)(*first - '0');\r
89         }\r
90 \r
91         // fraction part\r
92         if (first != last && *first == '.')\r
93         {\r
94                 ++first;\r
95 \r
96                 float inv_base = 0.1f;\r
97                 for (; first != last && IS_DIGIT(*first); ++first)\r
98                 {\r
99                         result += (float)(*first - '0') * inv_base;\r
100                         inv_base *= 0.1f;\r
101                 }\r
102         }\r
103 \r
104         // result w\o exponent\r
105         result *= sign;\r
106 \r
107         // exponent\r
108         bool exponent_negative = false;\r
109         int exponent = 0;\r
110         if (first != last && (*first == 'e' || *first == 'E'))\r
111         {\r
112                 ++first;\r
113 \r
114                 if (*first == '-')\r
115                 {\r
116                         exponent_negative = true;\r
117                         ++first;\r
118                 }\r
119                 else if (*first == '+')\r
120                 {\r
121                         ++first;\r
122                 }\r
123 \r
124                 for (; first != last && IS_DIGIT(*first); ++first)\r
125                 {\r
126                         exponent = 10 * exponent + (*first - '0');\r
127                 }\r
128         }\r
129 \r
130         if (exponent)\r
131         {\r
132                 float power_of_ten = 10;\r
133                 for (; exponent > 1; exponent--)\r
134                 {\r
135                         power_of_ten *= 10;\r
136                 }\r
137 \r
138                 if (exponent_negative)\r
139                 {\r
140                         result /= power_of_ten;\r
141                 }\r
142                 else\r
143                 {\r
144                         result *= power_of_ten;\r
145                 }\r
146         }\r
147 \r
148         *out = result;\r
149 \r
150         return first;\r
151 }\r
152 \r
153 static inline json_value *json_alloc(block_allocator *allocator)\r
154 {\r
155         json_value *value = (json_value *)allocator->malloc(sizeof(json_value));\r
156         memset(value, 0, sizeof(json_value));\r
157         return value;\r
158 }\r
159 \r
160 static inline void json_append(json_value *lhs, json_value *rhs)\r
161 {\r
162         rhs->parent = lhs;\r
163         if (lhs->last_child)\r
164         {\r
165                 lhs->last_child = lhs->last_child->next_sibling = rhs;\r
166         }\r
167         else\r
168         {\r
169                 lhs->first_child = lhs->last_child = rhs;\r
170         }\r
171 }\r
172 \r
173 #define ERROR(it, desc)\\r
174         *error_pos = it;\\r
175         *error_desc = desc;\\r
176         *error_line = 1 - escaped_newlines;\\r
177         for (char *c = it; c != source; --c)\\r
178                 if (*c == '\n') ++*error_line;\\r
179         return 0\r
180 \r
181 #define CHECK_TOP() if (!top) {ERROR(it, "Unexpected character");}\r
182 \r
183 json_value *json_parse(char *source, char **error_pos, const char **error_desc, int *error_line, block_allocator *allocator)\r
184 {\r
185         json_value *root = 0;\r
186         json_value *top = 0;\r
187 \r
188         char *name = 0;\r
189         char *it = source;\r
190 \r
191         int escaped_newlines = 0;\r
192 \r
193         while (*it)\r
194         {\r
195                 // skip white space\r
196                 while (*it == '\x20' || *it == '\x9' || *it == '\xD' || *it == '\xA')\r
197                 {\r
198                         ++it;\r
199                 }\r
200 \r
201                 switch (*it)\r
202                 {\r
203                 case '\0':\r
204                         break;\r
205                 case '{':\r
206                 case '[':\r
207                         {\r
208                                 // create new value\r
209                                 json_value *object = json_alloc(allocator);\r
210 \r
211                                 // name\r
212                                 object->name = name;\r
213                                 name = 0;\r
214 \r
215                                 // type\r
216                                 object->type = (*it == '{') ? JSON_OBJECT : JSON_ARRAY;\r
217 \r
218                                 // skip open character\r
219                                 ++it;\r
220 \r
221                                 // set top and root\r
222                                 if (top)\r
223                                 {\r
224                                         json_append(top, object);\r
225                                 }\r
226                                 else if (!root)\r
227                                 {\r
228                                         root = object;\r
229                                 }\r
230                                 else\r
231                                 {\r
232                                         ERROR(it, "Second root. Only one root allowed");\r
233                                 }\r
234                                 top = object;\r
235                         }\r
236                         break;\r
237 \r
238                 case '}':\r
239                 case ']':\r
240                         {\r
241                                 if (!top || top->type != ((*it == '}') ? JSON_OBJECT : JSON_ARRAY))\r
242                                 {\r
243                                         ERROR(it, "Mismatch closing brace/bracket");\r
244                                 }\r
245 \r
246                                 // skip close character\r
247                                 ++it;\r
248 \r
249                                 // set top\r
250                                 top = top->parent;\r
251                         }\r
252                         break;\r
253 \r
254                 case ':':\r
255                         if (!top || top->type != JSON_OBJECT)\r
256                         {\r
257                                 ERROR(it, "Unexpected character");\r
258                         }\r
259                         ++it;\r
260                         break;\r
261 \r
262                 case ',':\r
263                         CHECK_TOP();\r
264                         ++it;\r
265                         break;\r
266 \r
267                 case '"':\r
268                         {\r
269                                 CHECK_TOP();\r
270 \r
271                                 // skip '"' character\r
272                                 ++it;\r
273 \r
274                                 char *first = it;\r
275                                 char *last = it;\r
276                                 while (*it)\r
277                                 {\r
278                                         if ((unsigned char)*it < '\x20')\r
279                                         {\r
280                                                 ERROR(first, "Control characters not allowed in strings");\r
281                                         }\r
282                                         else if (*it == '\\')\r
283                                         {\r
284                                                 switch (it[1])\r
285                                                 {\r
286                                                 case '"':\r
287                                                         *last = '"';\r
288                                                         break;\r
289                                                 case '\\':\r
290                                                         *last = '\\';\r
291                                                         break;\r
292                                                 case '/':\r
293                                                         *last = '/';\r
294                                                         break;\r
295                                                 case 'b':\r
296                                                         *last = '\b';\r
297                                                         break;\r
298                                                 case 'f':\r
299                                                         *last = '\f';\r
300                                                         break;\r
301                                                 case 'n':\r
302                                                         *last = '\n';\r
303                                                         ++escaped_newlines;\r
304                                                         break;\r
305                                                 case 'r':\r
306                                                         *last = '\r';\r
307                                                         break;\r
308                                                 case 't':\r
309                                                         *last = '\t';\r
310                                                         break;\r
311                                                 case 'u':\r
312                                                         {\r
313                                                                 unsigned int codepoint;\r
314                                                                 if (hatoui(it + 2, it + 6, &codepoint) != it + 6)\r
315                                                                 {\r
316                                                                         ERROR(it, "Bad unicode codepoint");\r
317                                                                 }\r
318 \r
319                                                                 if (codepoint <= 0x7F)\r
320                                                                 {\r
321                                                                         *last = (char)codepoint;\r
322                                                                 }\r
323                                                                 else if (codepoint <= 0x7FF)\r
324                                                                 {\r
325                                                                         *last++ = (char)(0xC0 | (codepoint >> 6));\r
326                                                                         *last = (char)(0x80 | (codepoint & 0x3F));\r
327                                                                 }\r
328                                                                 else if (codepoint <= 0xFFFF)\r
329                                                                 {\r
330                                                                         *last++ = (char)(0xE0 | (codepoint >> 12));\r
331                                                                         *last++ = (char)(0x80 | ((codepoint >> 6) & 0x3F));\r
332                                                                         *last = (char)(0x80 | (codepoint & 0x3F));\r
333                                                                 }\r
334                                                         }\r
335                                                         it += 4;\r
336                                                         break;\r
337                                                 default:\r
338                                                         ERROR(first, "Unrecognized escape sequence");\r
339                                                 }\r
340 \r
341                                                 ++last;\r
342                                                 it += 2;\r
343                                         }\r
344                                         else if (*it == '"')\r
345                                         {\r
346                                                 *last = 0;\r
347                                                 ++it;\r
348                                                 break;\r
349                                         }\r
350                                         else\r
351                                         {\r
352                                                 *last++ = *it++;\r
353                                         }\r
354                                 }\r
355 \r
356                                 if (!name && top->type == JSON_OBJECT)\r
357                                 {\r
358                                         // field name in object\r
359                                         name = first;\r
360                                 }\r
361                                 else\r
362                                 {\r
363                                         // new string value\r
364                                         json_value *object = json_alloc(allocator);\r
365 \r
366                                         object->name = name;\r
367                                         name = 0;\r
368 \r
369                                         object->type = JSON_STRING;\r
370                                         object->string_value = first;\r
371 \r
372                                         json_append(top, object);\r
373                                 }\r
374                         }\r
375                         break;\r
376 \r
377                 case 'n':\r
378                 case 't':\r
379                 case 'f':\r
380                         {\r
381                                 CHECK_TOP();\r
382 \r
383                                 // new null/bool value\r
384                                 json_value *object = json_alloc(allocator);\r
385 \r
386                                 object->name = name;\r
387                                 name = 0;\r
388 \r
389                                 // null\r
390                                 if (it[0] == 'n' && it[1] == 'u' && it[2] == 'l' && it[3] == 'l')\r
391                                 {\r
392                                         object->type = JSON_NULL;\r
393                                         it += 4;\r
394                                 }\r
395                                 // true\r
396                                 else if (it[0] == 't' && it[1] == 'r' && it[2] == 'u' && it[3] == 'e')\r
397                                 {\r
398                                         object->type = JSON_BOOL;\r
399                                         object->int_value = 1;\r
400                                         it += 4;\r
401                                 }\r
402                                 // false\r
403                                 else if (it[0] == 'f' && it[1] == 'a' && it[2] == 'l' && it[3] == 's' && it[4] == 'e')\r
404                                 {\r
405                                         object->type = JSON_BOOL;\r
406                                         object->int_value = 0;\r
407                                         it += 5;\r
408                                 }\r
409                                 else\r
410                                 {\r
411                                         ERROR(it, "Unknown identifier");\r
412                                 }\r
413 \r
414                                 json_append(top, object);\r
415                         }\r
416                         break;\r
417 \r
418                 case '-':\r
419                 case '0':\r
420                 case '1':\r
421                 case '2':\r
422                 case '3':\r
423                 case '4':\r
424                 case '5':\r
425                 case '6':\r
426                 case '7':\r
427                 case '8':\r
428                 case '9':\r
429                         {\r
430                                 CHECK_TOP();\r
431 \r
432                                 // new number value\r
433                                 json_value *object = json_alloc(allocator);\r
434 \r
435                                 object->name = name;\r
436                                 name = 0;\r
437 \r
438                                 object->type = JSON_INT;\r
439 \r
440                                 char *first = it;\r
441                                 while (*it != '\x20' && *it != '\x9' && *it != '\xD' && *it != '\xA' && *it != ',' && *it != ']' && *it != '}')\r
442                                 {\r
443                                         if (*it == '.' || *it == 'e' || *it == 'E')\r
444                                         {\r
445                                                 object->type = JSON_FLOAT;\r
446                                         }\r
447                                         ++it;\r
448                                 }\r
449 \r
450                                 if (object->type == JSON_INT && atoi(first, it, &object->int_value) != it)\r
451                                 {\r
452                                         ERROR(first, "Bad integer number");\r
453                                 }\r
454 \r
455                                 if (object->type == JSON_FLOAT && atof(first, it, &object->float_value) != it)\r
456                                 {\r
457                                         ERROR(first, "Bad float number");\r
458                                 }\r
459 \r
460                                 json_append(top, object);\r
461                         }\r
462                         break;\r
463 \r
464                 default:\r
465                         ERROR(it, "Unexpected character");\r
466                 }\r
467         }\r
468 \r
469         if (top)\r
470         {\r
471                 ERROR(it, "Not all objects/arrays have been properly closed");\r
472         }\r
473 \r
474         return root;\r
475 }\r