5fdff19b35b60a2917e1d66d9fc37a741b84cf23
[ric-app/mc.git] / schemaparser / json.cc
1 // Distributed under the MIT license. Copyright (c) 2010, Ivan Vashchaev
2
3
4
5
6 #include <string.h>
7 #include "json.h"
8
9 namespace mc_json{
10
11 // true if character represent a digit
12 #define IS_DIGIT(c) (c >= '0' && c <= '9')
13
14 // convert string to integer
15 static char *atoi(char *first, char *last, int *out)
16 {
17         int sign = 1;
18         if (first != last)
19         {
20                 if (*first == '-')
21                 {
22                         sign = -1;
23                         ++first;
24                 }
25                 else if (*first == '+')
26                 {
27                         ++first;
28                 }
29         }
30
31         int result = 0;
32         for (; first != last && IS_DIGIT(*first); ++first)
33         {
34                 result = 10 * result + (*first - '0');
35         }
36         *out = result * sign;
37
38         return first;
39 }
40
41 // convert hexadecimal string to unsigned integer
42 static char *hatoui(char *first, char *last, unsigned int *out)
43 {
44         unsigned int result = 0;
45         for (; first != last; ++first)
46         {
47                 int digit;
48                 if (IS_DIGIT(*first))
49                 {
50                         digit = *first - '0';
51                 }
52                 else if (*first >= 'a' && *first <= 'f')
53                 {
54                         digit = *first - 'a' + 10;
55                 }
56                 else if (*first >= 'A' && *first <= 'F')
57                 {
58                         digit = *first - 'A' + 10;
59                 }
60                 else
61                 {
62                         break;
63                 }
64                 result = 16 * result + (unsigned int)digit;
65         }
66         *out = result;
67
68         return first;
69 }
70
71 // convert string to floating point
72 static char *atof(char *first, char *last, float *out)
73 {
74         // sign
75         float sign = 1;
76         if (first != last)
77         {
78                 if (*first == '-')
79                 {
80                         sign = -1;
81                         ++first;
82                 }
83                 else if (*first == '+')
84                 {
85                         ++first;
86                 }
87         }
88
89         // integer part
90         float result = 0;
91         for (; first != last && IS_DIGIT(*first); ++first)
92         {
93                 result = 10 * result + (float)(*first - '0');
94         }
95
96         // fraction part
97         if (first != last && *first == '.')
98         {
99                 ++first;
100
101                 float inv_base = 0.1f;
102                 for (; first != last && IS_DIGIT(*first); ++first)
103                 {
104                         result += (float)(*first - '0') * inv_base;
105                         inv_base *= 0.1f;
106                 }
107         }
108
109         // result w\o exponent
110         result *= sign;
111
112         // exponent
113         bool exponent_negative = false;
114         int exponent = 0;
115         if (first != last && (*first == 'e' || *first == 'E'))
116         {
117                 ++first;
118
119                 if (*first == '-')
120                 {
121                         exponent_negative = true;
122                         ++first;
123                 }
124                 else if (*first == '+')
125                 {
126                         ++first;
127                 }
128
129                 for (; first != last && IS_DIGIT(*first); ++first)
130                 {
131                         exponent = 10 * exponent + (*first - '0');
132                 }
133         }
134
135         if (exponent)
136         {
137                 float power_of_ten = 10;
138                 for (; exponent > 1; exponent--)
139                 {
140                         power_of_ten *= 10;
141                 }
142
143                 if (exponent_negative)
144                 {
145                         result /= power_of_ten;
146                 }
147                 else
148                 {
149                         result *= power_of_ten;
150                 }
151         }
152
153         *out = result;
154
155         return first;
156 }
157
158 static inline json_value *json_alloc(block_allocator *allocator)
159 {
160         json_value *value = (json_value *)allocator->malloc(sizeof(json_value));
161         memset(value, 0, sizeof(json_value));
162         return value;
163 }
164
165 static inline void json_append(json_value *lhs, json_value *rhs)
166 {
167         rhs->parent = lhs;
168         if (lhs->last_child)
169         {
170                 lhs->last_child = lhs->last_child->next_sibling = rhs;
171         }
172         else
173         {
174                 lhs->first_child = lhs->last_child = rhs;
175         }
176 }
177
178 #define ERROR(it, desc)\
179         *error_pos = it;\
180         *error_desc = desc;\
181         *error_line = 1 - escaped_newlines;\
182         for (char *c = it; c != source; --c)\
183                 if (*c == '\n') ++*error_line;\
184         return 0
185
186 #define CHECK_TOP() if (!top) {ERROR(it, "Unexpected character");}
187
188 json_value *json_parse(char *source, char **error_pos, const char **error_desc, int *error_line, block_allocator *allocator)
189 {
190         json_value *root = 0;
191         json_value *top = 0;
192
193         char *name = 0;
194         char *it = source;
195
196         int escaped_newlines = 0;
197
198         while (*it)
199         {
200                 // skip white space
201                 while (*it == '\x20' || *it == '\x9' || *it == '\xD' || *it == '\xA')
202                 {
203                         ++it;
204                 }
205
206                 switch (*it)
207                 {
208                 case '\0':
209                         break;
210                 case '{':
211                 case '[':
212                         {
213                                 // create new value
214                                 json_value *object = json_alloc(allocator);
215
216                                 // name
217                                 object->name = name;
218                                 name = 0;
219
220                                 // type
221                                 object->type = (*it == '{') ? JSON_OBJECT : JSON_ARRAY;
222
223                                 // skip open character
224                                 ++it;
225
226                                 // set top and root
227                                 if (top)
228                                 {
229                                         json_append(top, object);
230                                 }
231                                 else if (!root)
232                                 {
233                                         root = object;
234                                 }
235                                 else
236                                 {
237                                         ERROR(it, "Second root. Only one root allowed");
238                                 }
239                                 top = object;
240                         }
241                         break;
242
243                 case '}':
244                 case ']':
245                         {
246                                 if (!top || top->type != ((*it == '}') ? JSON_OBJECT : JSON_ARRAY))
247                                 {
248                                         ERROR(it, "Mismatch closing brace/bracket");
249                                 }
250
251                                 // skip close character
252                                 ++it;
253
254                                 // set top
255                                 top = top->parent;
256                         }
257                         break;
258
259                 case ':':
260                         if (!top || top->type != JSON_OBJECT)
261                         {
262                                 ERROR(it, "Unexpected character");
263                         }
264                         ++it;
265                         break;
266
267                 case ',':
268                         CHECK_TOP();
269                         ++it;
270                         break;
271
272                 case '"':
273                         {
274                                 CHECK_TOP();
275
276                                 // skip '"' character
277                                 ++it;
278
279                                 char *first = it;
280                                 char *last = it;
281                                 while (*it)
282                                 {
283                                         if ((unsigned char)*it < '\x20')
284                                         {
285                                                 ERROR(first, "Control characters not allowed in strings");
286                                         }
287                                         else if (*it == '\\')
288                                         {
289                                                 switch (it[1])
290                                                 {
291                                                 case '"':
292                                                         *last = '"';
293                                                         break;
294                                                 case '\\':
295                                                         *last = '\\';
296                                                         break;
297                                                 case '/':
298                                                         *last = '/';
299                                                         break;
300                                                 case 'b':
301                                                         *last = '\b';
302                                                         break;
303                                                 case 'f':
304                                                         *last = '\f';
305                                                         break;
306                                                 case 'n':
307                                                         *last = '\n';
308                                                         ++escaped_newlines;
309                                                         break;
310                                                 case 'r':
311                                                         *last = '\r';
312                                                         break;
313                                                 case 't':
314                                                         *last = '\t';
315                                                         break;
316                                                 case 'u':
317                                                         {
318                                                                 unsigned int codepoint;
319                                                                 if (hatoui(it + 2, it + 6, &codepoint) != it + 6)
320                                                                 {
321                                                                         ERROR(it, "Bad unicode codepoint");
322                                                                 }
323
324                                                                 if (codepoint <= 0x7F)
325                                                                 {
326                                                                         *last = (char)codepoint;
327                                                                 }
328                                                                 else if (codepoint <= 0x7FF)
329                                                                 {
330                                                                         *last++ = (char)(0xC0 | (codepoint >> 6));
331                                                                         *last = (char)(0x80 | (codepoint & 0x3F));
332                                                                 }
333                                                                 else if (codepoint <= 0xFFFF)
334                                                                 {
335                                                                         *last++ = (char)(0xE0 | (codepoint >> 12));
336                                                                         *last++ = (char)(0x80 | ((codepoint >> 6) & 0x3F));
337                                                                         *last = (char)(0x80 | (codepoint & 0x3F));
338                                                                 }
339                                                         }
340                                                         it += 4;
341                                                         break;
342                                                 default:
343                                                         ERROR(first, "Unrecognized escape sequence");
344                                                 }
345
346                                                 ++last;
347                                                 it += 2;
348                                         }
349                                         else if (*it == '"')
350                                         {
351                                                 *last = 0;
352                                                 ++it;
353                                                 break;
354                                         }
355                                         else
356                                         {
357                                                 *last++ = *it++;
358                                         }
359                                 }
360
361                                 if (!name && top->type == JSON_OBJECT)
362                                 {
363                                         // field name in object
364                                         name = first;
365                                 }
366                                 else
367                                 {
368                                         // new string value
369                                         json_value *object = json_alloc(allocator);
370
371                                         object->name = name;
372                                         name = 0;
373
374                                         object->type = JSON_STRING;
375                                         object->string_value = first;
376
377                                         json_append(top, object);
378                                 }
379                         }
380                         break;
381
382                 case 'n':
383                 case 't':
384                 case 'f':
385                         {
386                                 CHECK_TOP();
387
388                                 // new null/bool value
389                                 json_value *object = json_alloc(allocator);
390
391                                 object->name = name;
392                                 name = 0;
393
394                                 // null
395                                 if (it[0] == 'n' && it[1] == 'u' && it[2] == 'l' && it[3] == 'l')
396                                 {
397                                         object->type = JSON_NULL;
398                                         it += 4;
399                                 }
400                                 // true
401                                 else if (it[0] == 't' && it[1] == 'r' && it[2] == 'u' && it[3] == 'e')
402                                 {
403                                         object->type = JSON_BOOL;
404                                         object->int_value = 1;
405                                         it += 4;
406                                 }
407                                 // false
408                                 else if (it[0] == 'f' && it[1] == 'a' && it[2] == 'l' && it[3] == 's' && it[4] == 'e')
409                                 {
410                                         object->type = JSON_BOOL;
411                                         object->int_value = 0;
412                                         it += 5;
413                                 }
414                                 else
415                                 {
416                                         ERROR(it, "Unknown identifier");
417                                 }
418
419                                 json_append(top, object);
420                         }
421                         break;
422
423                 case '-':
424                 case '0':
425                 case '1':
426                 case '2':
427                 case '3':
428                 case '4':
429                 case '5':
430                 case '6':
431                 case '7':
432                 case '8':
433                 case '9':
434                         {
435                                 CHECK_TOP();
436
437                                 // new number value
438                                 json_value *object = json_alloc(allocator);
439
440                                 object->name = name;
441                                 name = 0;
442
443                                 object->type = JSON_INT;
444
445                                 char *first = it;
446                                 while (*it != '\x20' && *it != '\x9' && *it != '\xD' && *it != '\xA' && *it != ',' && *it != ']' && *it != '}')
447                                 {
448                                         if (*it == '.' || *it == 'e' || *it == 'E')
449                                         {
450                                                 object->type = JSON_FLOAT;
451                                         }
452                                         ++it;
453                                 }
454
455                                 if (object->type == JSON_INT && atoi(first, it, &object->int_value) != it)
456                                 {
457                                         ERROR(first, "Bad integer number");
458                                 }
459
460                                 if (object->type == JSON_FLOAT && atof(first, it, &object->float_value) != it)
461                                 {
462                                         ERROR(first, "Bad float number");
463                                 }
464
465                                 json_append(top, object);
466                         }
467                         break;
468
469                 default:
470                         ERROR(it, "Unexpected character");
471                 }
472         }
473
474         if (top)
475         {
476                 ERROR(it, "Not all objects/arrays have been properly closed");
477         }
478
479         return root;
480 }
481
482 } // end namespace mc_json