xref: /openbmc/qemu/qobject/json-parser.c (revision d538b255)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include <stdarg.h>
15 
16 #include "qemu-common.h"
17 #include "qapi/qmp/qstring.h"
18 #include "qapi/qmp/qint.h"
19 #include "qapi/qmp/qdict.h"
20 #include "qapi/qmp/qlist.h"
21 #include "qapi/qmp/qfloat.h"
22 #include "qapi/qmp/qbool.h"
23 #include "qapi/qmp/json-parser.h"
24 #include "qapi/qmp/json-lexer.h"
25 
26 typedef struct JSONParserContext
27 {
28     Error *err;
29     struct {
30         QObject **buf;
31         size_t pos;
32         size_t count;
33     } tokens;
34 } JSONParserContext;
35 
36 #define BUG_ON(cond) assert(!(cond))
37 
38 /**
39  * TODO
40  *
41  * 0) make errors meaningful again
42  * 1) add geometry information to tokens
43  * 3) should we return a parsed size?
44  * 4) deal with premature EOI
45  */
46 
47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
48 
49 /**
50  * Token manipulators
51  *
52  * tokens are dictionaries that contain a type, a string value, and geometry information
53  * about a token identified by the lexer.  These are routines that make working with
54  * these objects a bit easier.
55  */
56 static const char *token_get_value(QObject *obj)
57 {
58     return qdict_get_str(qobject_to_qdict(obj), "token");
59 }
60 
61 static JSONTokenType token_get_type(QObject *obj)
62 {
63     return qdict_get_int(qobject_to_qdict(obj), "type");
64 }
65 
66 /**
67  * Error handler
68  */
69 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
70                                            QObject *token, const char *msg, ...)
71 {
72     va_list ap;
73     char message[1024];
74     va_start(ap, msg);
75     vsnprintf(message, sizeof(message), msg, ap);
76     va_end(ap);
77     if (ctxt->err) {
78         error_free(ctxt->err);
79         ctxt->err = NULL;
80     }
81     error_setg(&ctxt->err, "JSON parse error, %s", message);
82 }
83 
84 /**
85  * String helpers
86  *
87  * These helpers are used to unescape strings.
88  */
89 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
90 {
91     if (wchar <= 0x007F) {
92         BUG_ON(buffer_length < 2);
93 
94         buffer[0] = wchar & 0x7F;
95         buffer[1] = 0;
96     } else if (wchar <= 0x07FF) {
97         BUG_ON(buffer_length < 3);
98 
99         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
100         buffer[1] = 0x80 | (wchar & 0x3F);
101         buffer[2] = 0;
102     } else {
103         BUG_ON(buffer_length < 4);
104 
105         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
106         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
107         buffer[2] = 0x80 | (wchar & 0x3F);
108         buffer[3] = 0;
109     }
110 }
111 
112 static int hex2decimal(char ch)
113 {
114     if (ch >= '0' && ch <= '9') {
115         return (ch - '0');
116     } else if (ch >= 'a' && ch <= 'f') {
117         return 10 + (ch - 'a');
118     } else if (ch >= 'A' && ch <= 'F') {
119         return 10 + (ch - 'A');
120     }
121 
122     return -1;
123 }
124 
125 /**
126  * parse_string(): Parse a json string and return a QObject
127  *
128  *  string
129  *      ""
130  *      " chars "
131  *  chars
132  *      char
133  *      char chars
134  *  char
135  *      any-Unicode-character-
136  *          except-"-or-\-or-
137  *          control-character
138  *      \"
139  *      \\
140  *      \/
141  *      \b
142  *      \f
143  *      \n
144  *      \r
145  *      \t
146  *      \u four-hex-digits
147  */
148 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
149 {
150     const char *ptr = token_get_value(token);
151     QString *str;
152     int double_quote = 1;
153 
154     if (*ptr == '"') {
155         double_quote = 1;
156     } else {
157         double_quote = 0;
158     }
159     ptr++;
160 
161     str = qstring_new();
162     while (*ptr &&
163            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
164         if (*ptr == '\\') {
165             ptr++;
166 
167             switch (*ptr) {
168             case '"':
169                 qstring_append(str, "\"");
170                 ptr++;
171                 break;
172             case '\'':
173                 qstring_append(str, "'");
174                 ptr++;
175                 break;
176             case '\\':
177                 qstring_append(str, "\\");
178                 ptr++;
179                 break;
180             case '/':
181                 qstring_append(str, "/");
182                 ptr++;
183                 break;
184             case 'b':
185                 qstring_append(str, "\b");
186                 ptr++;
187                 break;
188             case 'f':
189                 qstring_append(str, "\f");
190                 ptr++;
191                 break;
192             case 'n':
193                 qstring_append(str, "\n");
194                 ptr++;
195                 break;
196             case 'r':
197                 qstring_append(str, "\r");
198                 ptr++;
199                 break;
200             case 't':
201                 qstring_append(str, "\t");
202                 ptr++;
203                 break;
204             case 'u': {
205                 uint16_t unicode_char = 0;
206                 char utf8_char[4];
207                 int i = 0;
208 
209                 ptr++;
210 
211                 for (i = 0; i < 4; i++) {
212                     if (qemu_isxdigit(*ptr)) {
213                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
214                     } else {
215                         parse_error(ctxt, token,
216                                     "invalid hex escape sequence in string");
217                         goto out;
218                     }
219                     ptr++;
220                 }
221 
222                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
223                 qstring_append(str, utf8_char);
224             }   break;
225             default:
226                 parse_error(ctxt, token, "invalid escape sequence in string");
227                 goto out;
228             }
229         } else {
230             char dummy[2];
231 
232             dummy[0] = *ptr++;
233             dummy[1] = 0;
234 
235             qstring_append(str, dummy);
236         }
237     }
238 
239     return str;
240 
241 out:
242     QDECREF(str);
243     return NULL;
244 }
245 
246 static QObject *parser_context_pop_token(JSONParserContext *ctxt)
247 {
248     QObject *token;
249     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
250     token = ctxt->tokens.buf[ctxt->tokens.pos];
251     ctxt->tokens.pos++;
252     return token;
253 }
254 
255 /* Note: parser_context_{peek|pop}_token do not increment the
256  * token object's refcount. In both cases the references will continue
257  * to be tracked and cleaned up in parser_context_free(), so do not
258  * attempt to free the token object.
259  */
260 static QObject *parser_context_peek_token(JSONParserContext *ctxt)
261 {
262     QObject *token;
263     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
264     token = ctxt->tokens.buf[ctxt->tokens.pos];
265     return token;
266 }
267 
268 static void tokens_append_from_iter(QObject *obj, void *opaque)
269 {
270     JSONParserContext *ctxt = opaque;
271     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
272     ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
273     qobject_incref(obj);
274 }
275 
276 static JSONParserContext *parser_context_new(QList *tokens)
277 {
278     JSONParserContext *ctxt;
279     size_t count;
280 
281     if (!tokens) {
282         return NULL;
283     }
284 
285     count = qlist_size(tokens);
286     if (count == 0) {
287         return NULL;
288     }
289 
290     ctxt = g_malloc0(sizeof(JSONParserContext));
291     ctxt->tokens.pos = 0;
292     ctxt->tokens.count = count;
293     ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
294     qlist_iter(tokens, tokens_append_from_iter, ctxt);
295     ctxt->tokens.pos = 0;
296 
297     return ctxt;
298 }
299 
300 /* to support error propagation, ctxt->err must be freed separately */
301 static void parser_context_free(JSONParserContext *ctxt)
302 {
303     int i;
304     if (ctxt) {
305         for (i = 0; i < ctxt->tokens.count; i++) {
306             qobject_decref(ctxt->tokens.buf[i]);
307         }
308         g_free(ctxt->tokens.buf);
309         g_free(ctxt);
310     }
311 }
312 
313 /**
314  * Parsing rules
315  */
316 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
317 {
318     QObject *key = NULL, *token = NULL, *value, *peek;
319 
320     peek = parser_context_peek_token(ctxt);
321     if (peek == NULL) {
322         parse_error(ctxt, NULL, "premature EOI");
323         goto out;
324     }
325 
326     key = parse_value(ctxt, ap);
327     if (!key || qobject_type(key) != QTYPE_QSTRING) {
328         parse_error(ctxt, peek, "key is not a string in object");
329         goto out;
330     }
331 
332     token = parser_context_pop_token(ctxt);
333     if (token == NULL) {
334         parse_error(ctxt, NULL, "premature EOI");
335         goto out;
336     }
337 
338     if (token_get_type(token) != JSON_COLON) {
339         parse_error(ctxt, token, "missing : in object pair");
340         goto out;
341     }
342 
343     value = parse_value(ctxt, ap);
344     if (value == NULL) {
345         parse_error(ctxt, token, "Missing value in dict");
346         goto out;
347     }
348 
349     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
350 
351     qobject_decref(key);
352 
353     return 0;
354 
355 out:
356     qobject_decref(key);
357 
358     return -1;
359 }
360 
361 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
362 {
363     QDict *dict = NULL;
364     QObject *token, *peek;
365 
366     token = parser_context_pop_token(ctxt);
367     assert(token && token_get_type(token) == JSON_LCURLY);
368 
369     dict = qdict_new();
370 
371     peek = parser_context_peek_token(ctxt);
372     if (peek == NULL) {
373         parse_error(ctxt, NULL, "premature EOI");
374         goto out;
375     }
376 
377     if (token_get_type(peek) != JSON_RCURLY) {
378         if (parse_pair(ctxt, dict, ap) == -1) {
379             goto out;
380         }
381 
382         token = parser_context_pop_token(ctxt);
383         if (token == NULL) {
384             parse_error(ctxt, NULL, "premature EOI");
385             goto out;
386         }
387 
388         while (token_get_type(token) != JSON_RCURLY) {
389             if (token_get_type(token) != JSON_COMMA) {
390                 parse_error(ctxt, token, "expected separator in dict");
391                 goto out;
392             }
393 
394             if (parse_pair(ctxt, dict, ap) == -1) {
395                 goto out;
396             }
397 
398             token = parser_context_pop_token(ctxt);
399             if (token == NULL) {
400                 parse_error(ctxt, NULL, "premature EOI");
401                 goto out;
402             }
403         }
404     } else {
405         (void)parser_context_pop_token(ctxt);
406     }
407 
408     return QOBJECT(dict);
409 
410 out:
411     QDECREF(dict);
412     return NULL;
413 }
414 
415 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
416 {
417     QList *list = NULL;
418     QObject *token, *peek;
419 
420     token = parser_context_pop_token(ctxt);
421     assert(token && token_get_type(token) == JSON_LSQUARE);
422 
423     list = qlist_new();
424 
425     peek = parser_context_peek_token(ctxt);
426     if (peek == NULL) {
427         parse_error(ctxt, NULL, "premature EOI");
428         goto out;
429     }
430 
431     if (token_get_type(peek) != JSON_RSQUARE) {
432         QObject *obj;
433 
434         obj = parse_value(ctxt, ap);
435         if (obj == NULL) {
436             parse_error(ctxt, token, "expecting value");
437             goto out;
438         }
439 
440         qlist_append_obj(list, obj);
441 
442         token = parser_context_pop_token(ctxt);
443         if (token == NULL) {
444             parse_error(ctxt, NULL, "premature EOI");
445             goto out;
446         }
447 
448         while (token_get_type(token) != JSON_RSQUARE) {
449             if (token_get_type(token) != JSON_COMMA) {
450                 parse_error(ctxt, token, "expected separator in list");
451                 goto out;
452             }
453 
454             obj = parse_value(ctxt, ap);
455             if (obj == NULL) {
456                 parse_error(ctxt, token, "expecting value");
457                 goto out;
458             }
459 
460             qlist_append_obj(list, obj);
461 
462             token = parser_context_pop_token(ctxt);
463             if (token == NULL) {
464                 parse_error(ctxt, NULL, "premature EOI");
465                 goto out;
466             }
467         }
468     } else {
469         (void)parser_context_pop_token(ctxt);
470     }
471 
472     return QOBJECT(list);
473 
474 out:
475     QDECREF(list);
476     return NULL;
477 }
478 
479 static QObject *parse_keyword(JSONParserContext *ctxt)
480 {
481     QObject *token;
482     const char *val;
483 
484     token = parser_context_pop_token(ctxt);
485     assert(token && token_get_type(token) == JSON_KEYWORD);
486     val = token_get_value(token);
487 
488     if (!strcmp(val, "true")) {
489         return QOBJECT(qbool_from_bool(true));
490     } else if (!strcmp(val, "false")) {
491         return QOBJECT(qbool_from_bool(false));
492     } else if (!strcmp(val, "null")) {
493         return qnull();
494     }
495     parse_error(ctxt, token, "invalid keyword '%s'", val);
496     return NULL;
497 }
498 
499 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
500 {
501     QObject *token;
502     const char *val;
503 
504     if (ap == NULL) {
505         return NULL;
506     }
507 
508     token = parser_context_pop_token(ctxt);
509     assert(token && token_get_type(token) == JSON_ESCAPE);
510     val = token_get_value(token);
511 
512     if (!strcmp(val, "%p")) {
513         return va_arg(*ap, QObject *);
514     } else if (!strcmp(val, "%i")) {
515         return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
516     } else if (!strcmp(val, "%d")) {
517         return QOBJECT(qint_from_int(va_arg(*ap, int)));
518     } else if (!strcmp(val, "%ld")) {
519         return QOBJECT(qint_from_int(va_arg(*ap, long)));
520     } else if (!strcmp(val, "%lld") ||
521                !strcmp(val, "%I64d")) {
522         return QOBJECT(qint_from_int(va_arg(*ap, long long)));
523     } else if (!strcmp(val, "%s")) {
524         return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
525     } else if (!strcmp(val, "%f")) {
526         return QOBJECT(qfloat_from_double(va_arg(*ap, double)));
527     }
528     return NULL;
529 }
530 
531 static QObject *parse_literal(JSONParserContext *ctxt)
532 {
533     QObject *token;
534 
535     token = parser_context_pop_token(ctxt);
536     assert(token);
537 
538     switch (token_get_type(token)) {
539     case JSON_STRING:
540         return QOBJECT(qstring_from_escaped_str(ctxt, token));
541     case JSON_INTEGER: {
542         /* A possibility exists that this is a whole-valued float where the
543          * fractional part was left out due to being 0 (.0). It's not a big
544          * deal to treat these as ints in the parser, so long as users of the
545          * resulting QObject know to expect a QInt in place of a QFloat in
546          * cases like these.
547          *
548          * However, in some cases these values will overflow/underflow a
549          * QInt/int64 container, thus we should assume these are to be handled
550          * as QFloats/doubles rather than silently changing their values.
551          *
552          * strtoll() indicates these instances by setting errno to ERANGE
553          */
554         int64_t value;
555 
556         errno = 0; /* strtoll doesn't set errno on success */
557         value = strtoll(token_get_value(token), NULL, 10);
558         if (errno != ERANGE) {
559             return QOBJECT(qint_from_int(value));
560         }
561         /* fall through to JSON_FLOAT */
562     }
563     case JSON_FLOAT:
564         /* FIXME dependent on locale */
565         return QOBJECT(qfloat_from_double(strtod(token_get_value(token),
566                                                  NULL)));
567     default:
568         abort();
569     }
570 }
571 
572 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
573 {
574     QObject *token;
575 
576     token = parser_context_peek_token(ctxt);
577     if (token == NULL) {
578         parse_error(ctxt, NULL, "premature EOI");
579         return NULL;
580     }
581 
582     switch (token_get_type(token)) {
583     case JSON_LCURLY:
584         return parse_object(ctxt, ap);
585     case JSON_LSQUARE:
586         return parse_array(ctxt, ap);
587     case JSON_ESCAPE:
588         return parse_escape(ctxt, ap);
589     case JSON_INTEGER:
590     case JSON_FLOAT:
591     case JSON_STRING:
592         return parse_literal(ctxt);
593     case JSON_KEYWORD:
594         return parse_keyword(ctxt);
595     default:
596         parse_error(ctxt, token, "expecting value");
597         return NULL;
598     }
599 }
600 
601 QObject *json_parser_parse(QList *tokens, va_list *ap)
602 {
603     return json_parser_parse_err(tokens, ap, NULL);
604 }
605 
606 QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
607 {
608     JSONParserContext *ctxt = parser_context_new(tokens);
609     QObject *result;
610 
611     if (!ctxt) {
612         return NULL;
613     }
614 
615     result = parse_value(ctxt, ap);
616 
617     error_propagate(errp, ctxt->err);
618 
619     parser_context_free(ctxt);
620 
621     return result;
622 }
623