xref: /openbmc/qemu/qobject/json-parser.c (revision c5461660)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include <stdarg.h>
15 
16 #include "qemu-common.h"
17 #include "qapi/qmp/qstring.h"
18 #include "qapi/qmp/qint.h"
19 #include "qapi/qmp/qdict.h"
20 #include "qapi/qmp/qlist.h"
21 #include "qapi/qmp/qfloat.h"
22 #include "qapi/qmp/qbool.h"
23 #include "qapi/qmp/json-parser.h"
24 #include "qapi/qmp/json-lexer.h"
25 
26 typedef struct JSONParserContext
27 {
28     Error *err;
29     struct {
30         QObject **buf;
31         size_t pos;
32         size_t count;
33     } tokens;
34 } JSONParserContext;
35 
36 #define BUG_ON(cond) assert(!(cond))
37 
38 /**
39  * TODO
40  *
41  * 0) make errors meaningful again
42  * 1) add geometry information to tokens
43  * 3) should we return a parsed size?
44  * 4) deal with premature EOI
45  */
46 
47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
48 
49 /**
50  * Token manipulators
51  *
52  * tokens are dictionaries that contain a type, a string value, and geometry information
53  * about a token identified by the lexer.  These are routines that make working with
54  * these objects a bit easier.
55  */
56 static const char *token_get_value(QObject *obj)
57 {
58     return qdict_get_str(qobject_to_qdict(obj), "token");
59 }
60 
61 static JSONTokenType token_get_type(QObject *obj)
62 {
63     return qdict_get_int(qobject_to_qdict(obj), "type");
64 }
65 
66 static int token_is_keyword(QObject *obj, const char *value)
67 {
68     if (token_get_type(obj) != JSON_KEYWORD) {
69         return 0;
70     }
71 
72     return strcmp(token_get_value(obj), value) == 0;
73 }
74 
75 static int token_is_escape(QObject *obj, const char *value)
76 {
77     if (token_get_type(obj) != JSON_ESCAPE) {
78         return 0;
79     }
80 
81     return (strcmp(token_get_value(obj), value) == 0);
82 }
83 
84 /**
85  * Error handler
86  */
87 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
88                                            QObject *token, const char *msg, ...)
89 {
90     va_list ap;
91     char message[1024];
92     va_start(ap, msg);
93     vsnprintf(message, sizeof(message), msg, ap);
94     va_end(ap);
95     if (ctxt->err) {
96         error_free(ctxt->err);
97         ctxt->err = NULL;
98     }
99     error_setg(&ctxt->err, "JSON parse error, %s", message);
100 }
101 
102 /**
103  * String helpers
104  *
105  * These helpers are used to unescape strings.
106  */
107 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
108 {
109     if (wchar <= 0x007F) {
110         BUG_ON(buffer_length < 2);
111 
112         buffer[0] = wchar & 0x7F;
113         buffer[1] = 0;
114     } else if (wchar <= 0x07FF) {
115         BUG_ON(buffer_length < 3);
116 
117         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
118         buffer[1] = 0x80 | (wchar & 0x3F);
119         buffer[2] = 0;
120     } else {
121         BUG_ON(buffer_length < 4);
122 
123         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
124         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
125         buffer[2] = 0x80 | (wchar & 0x3F);
126         buffer[3] = 0;
127     }
128 }
129 
130 static int hex2decimal(char ch)
131 {
132     if (ch >= '0' && ch <= '9') {
133         return (ch - '0');
134     } else if (ch >= 'a' && ch <= 'f') {
135         return 10 + (ch - 'a');
136     } else if (ch >= 'A' && ch <= 'F') {
137         return 10 + (ch - 'A');
138     }
139 
140     return -1;
141 }
142 
143 /**
144  * parse_string(): Parse a json string and return a QObject
145  *
146  *  string
147  *      ""
148  *      " chars "
149  *  chars
150  *      char
151  *      char chars
152  *  char
153  *      any-Unicode-character-
154  *          except-"-or-\-or-
155  *          control-character
156  *      \"
157  *      \\
158  *      \/
159  *      \b
160  *      \f
161  *      \n
162  *      \r
163  *      \t
164  *      \u four-hex-digits
165  */
166 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
167 {
168     const char *ptr = token_get_value(token);
169     QString *str;
170     int double_quote = 1;
171 
172     if (*ptr == '"') {
173         double_quote = 1;
174     } else {
175         double_quote = 0;
176     }
177     ptr++;
178 
179     str = qstring_new();
180     while (*ptr &&
181            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
182         if (*ptr == '\\') {
183             ptr++;
184 
185             switch (*ptr) {
186             case '"':
187                 qstring_append(str, "\"");
188                 ptr++;
189                 break;
190             case '\'':
191                 qstring_append(str, "'");
192                 ptr++;
193                 break;
194             case '\\':
195                 qstring_append(str, "\\");
196                 ptr++;
197                 break;
198             case '/':
199                 qstring_append(str, "/");
200                 ptr++;
201                 break;
202             case 'b':
203                 qstring_append(str, "\b");
204                 ptr++;
205                 break;
206             case 'f':
207                 qstring_append(str, "\f");
208                 ptr++;
209                 break;
210             case 'n':
211                 qstring_append(str, "\n");
212                 ptr++;
213                 break;
214             case 'r':
215                 qstring_append(str, "\r");
216                 ptr++;
217                 break;
218             case 't':
219                 qstring_append(str, "\t");
220                 ptr++;
221                 break;
222             case 'u': {
223                 uint16_t unicode_char = 0;
224                 char utf8_char[4];
225                 int i = 0;
226 
227                 ptr++;
228 
229                 for (i = 0; i < 4; i++) {
230                     if (qemu_isxdigit(*ptr)) {
231                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
232                     } else {
233                         parse_error(ctxt, token,
234                                     "invalid hex escape sequence in string");
235                         goto out;
236                     }
237                     ptr++;
238                 }
239 
240                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
241                 qstring_append(str, utf8_char);
242             }   break;
243             default:
244                 parse_error(ctxt, token, "invalid escape sequence in string");
245                 goto out;
246             }
247         } else {
248             char dummy[2];
249 
250             dummy[0] = *ptr++;
251             dummy[1] = 0;
252 
253             qstring_append(str, dummy);
254         }
255     }
256 
257     return str;
258 
259 out:
260     QDECREF(str);
261     return NULL;
262 }
263 
264 static QObject *parser_context_pop_token(JSONParserContext *ctxt)
265 {
266     QObject *token;
267     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
268     token = ctxt->tokens.buf[ctxt->tokens.pos];
269     ctxt->tokens.pos++;
270     return token;
271 }
272 
273 /* Note: parser_context_{peek|pop}_token do not increment the
274  * token object's refcount. In both cases the references will continue
275  * to be tracked and cleaned up in parser_context_free(), so do not
276  * attempt to free the token object.
277  */
278 static QObject *parser_context_peek_token(JSONParserContext *ctxt)
279 {
280     QObject *token;
281     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
282     token = ctxt->tokens.buf[ctxt->tokens.pos];
283     return token;
284 }
285 
286 static JSONParserContext parser_context_save(JSONParserContext *ctxt)
287 {
288     JSONParserContext saved_ctxt = {0};
289     saved_ctxt.tokens.pos = ctxt->tokens.pos;
290     saved_ctxt.tokens.count = ctxt->tokens.count;
291     saved_ctxt.tokens.buf = ctxt->tokens.buf;
292     return saved_ctxt;
293 }
294 
295 static void parser_context_restore(JSONParserContext *ctxt,
296                                    JSONParserContext saved_ctxt)
297 {
298     ctxt->tokens.pos = saved_ctxt.tokens.pos;
299     ctxt->tokens.count = saved_ctxt.tokens.count;
300     ctxt->tokens.buf = saved_ctxt.tokens.buf;
301 }
302 
303 static void tokens_append_from_iter(QObject *obj, void *opaque)
304 {
305     JSONParserContext *ctxt = opaque;
306     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
307     ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
308     qobject_incref(obj);
309 }
310 
311 static JSONParserContext *parser_context_new(QList *tokens)
312 {
313     JSONParserContext *ctxt;
314     size_t count;
315 
316     if (!tokens) {
317         return NULL;
318     }
319 
320     count = qlist_size(tokens);
321     if (count == 0) {
322         return NULL;
323     }
324 
325     ctxt = g_malloc0(sizeof(JSONParserContext));
326     ctxt->tokens.pos = 0;
327     ctxt->tokens.count = count;
328     ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
329     qlist_iter(tokens, tokens_append_from_iter, ctxt);
330     ctxt->tokens.pos = 0;
331 
332     return ctxt;
333 }
334 
335 /* to support error propagation, ctxt->err must be freed separately */
336 static void parser_context_free(JSONParserContext *ctxt)
337 {
338     int i;
339     if (ctxt) {
340         for (i = 0; i < ctxt->tokens.count; i++) {
341             qobject_decref(ctxt->tokens.buf[i]);
342         }
343         g_free(ctxt->tokens.buf);
344         g_free(ctxt);
345     }
346 }
347 
348 /**
349  * Parsing rules
350  */
351 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
352 {
353     QObject *key = NULL, *token = NULL, *value, *peek;
354     JSONParserContext saved_ctxt = parser_context_save(ctxt);
355 
356     peek = parser_context_peek_token(ctxt);
357     if (peek == NULL) {
358         parse_error(ctxt, NULL, "premature EOI");
359         goto out;
360     }
361 
362     key = parse_value(ctxt, ap);
363     if (!key || qobject_type(key) != QTYPE_QSTRING) {
364         parse_error(ctxt, peek, "key is not a string in object");
365         goto out;
366     }
367 
368     token = parser_context_pop_token(ctxt);
369     if (token == NULL) {
370         parse_error(ctxt, NULL, "premature EOI");
371         goto out;
372     }
373 
374     if (token_get_type(token) != JSON_COLON) {
375         parse_error(ctxt, token, "missing : in object pair");
376         goto out;
377     }
378 
379     value = parse_value(ctxt, ap);
380     if (value == NULL) {
381         parse_error(ctxt, token, "Missing value in dict");
382         goto out;
383     }
384 
385     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
386 
387     qobject_decref(key);
388 
389     return 0;
390 
391 out:
392     parser_context_restore(ctxt, saved_ctxt);
393     qobject_decref(key);
394 
395     return -1;
396 }
397 
398 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
399 {
400     QDict *dict = NULL;
401     QObject *token, *peek;
402     JSONParserContext saved_ctxt = parser_context_save(ctxt);
403 
404     token = parser_context_pop_token(ctxt);
405     if (token == NULL) {
406         goto out;
407     }
408 
409     if (token_get_type(token) != JSON_LCURLY) {
410         goto out;
411     }
412 
413     dict = qdict_new();
414 
415     peek = parser_context_peek_token(ctxt);
416     if (peek == NULL) {
417         parse_error(ctxt, NULL, "premature EOI");
418         goto out;
419     }
420 
421     if (token_get_type(peek) != JSON_RCURLY) {
422         if (parse_pair(ctxt, dict, ap) == -1) {
423             goto out;
424         }
425 
426         token = parser_context_pop_token(ctxt);
427         if (token == NULL) {
428             parse_error(ctxt, NULL, "premature EOI");
429             goto out;
430         }
431 
432         while (token_get_type(token) != JSON_RCURLY) {
433             if (token_get_type(token) != JSON_COMMA) {
434                 parse_error(ctxt, token, "expected separator in dict");
435                 goto out;
436             }
437 
438             if (parse_pair(ctxt, dict, ap) == -1) {
439                 goto out;
440             }
441 
442             token = parser_context_pop_token(ctxt);
443             if (token == NULL) {
444                 parse_error(ctxt, NULL, "premature EOI");
445                 goto out;
446             }
447         }
448     } else {
449         (void)parser_context_pop_token(ctxt);
450     }
451 
452     return QOBJECT(dict);
453 
454 out:
455     parser_context_restore(ctxt, saved_ctxt);
456     QDECREF(dict);
457     return NULL;
458 }
459 
460 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
461 {
462     QList *list = NULL;
463     QObject *token, *peek;
464     JSONParserContext saved_ctxt = parser_context_save(ctxt);
465 
466     token = parser_context_pop_token(ctxt);
467     if (token == NULL) {
468         goto out;
469     }
470 
471     if (token_get_type(token) != JSON_LSQUARE) {
472         goto out;
473     }
474 
475     list = qlist_new();
476 
477     peek = parser_context_peek_token(ctxt);
478     if (peek == NULL) {
479         parse_error(ctxt, NULL, "premature EOI");
480         goto out;
481     }
482 
483     if (token_get_type(peek) != JSON_RSQUARE) {
484         QObject *obj;
485 
486         obj = parse_value(ctxt, ap);
487         if (obj == NULL) {
488             parse_error(ctxt, token, "expecting value");
489             goto out;
490         }
491 
492         qlist_append_obj(list, obj);
493 
494         token = parser_context_pop_token(ctxt);
495         if (token == NULL) {
496             parse_error(ctxt, NULL, "premature EOI");
497             goto out;
498         }
499 
500         while (token_get_type(token) != JSON_RSQUARE) {
501             if (token_get_type(token) != JSON_COMMA) {
502                 parse_error(ctxt, token, "expected separator in list");
503                 goto out;
504             }
505 
506             obj = parse_value(ctxt, ap);
507             if (obj == NULL) {
508                 parse_error(ctxt, token, "expecting value");
509                 goto out;
510             }
511 
512             qlist_append_obj(list, obj);
513 
514             token = parser_context_pop_token(ctxt);
515             if (token == NULL) {
516                 parse_error(ctxt, NULL, "premature EOI");
517                 goto out;
518             }
519         }
520     } else {
521         (void)parser_context_pop_token(ctxt);
522     }
523 
524     return QOBJECT(list);
525 
526 out:
527     parser_context_restore(ctxt, saved_ctxt);
528     QDECREF(list);
529     return NULL;
530 }
531 
532 static QObject *parse_keyword(JSONParserContext *ctxt)
533 {
534     QObject *token, *ret;
535     JSONParserContext saved_ctxt = parser_context_save(ctxt);
536 
537     token = parser_context_pop_token(ctxt);
538     if (token == NULL) {
539         goto out;
540     }
541 
542     if (token_get_type(token) != JSON_KEYWORD) {
543         goto out;
544     }
545 
546     if (token_is_keyword(token, "true")) {
547         ret = QOBJECT(qbool_from_bool(true));
548     } else if (token_is_keyword(token, "false")) {
549         ret = QOBJECT(qbool_from_bool(false));
550     } else if (token_is_keyword(token, "null")) {
551         ret = qnull();
552     } else {
553         parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
554         goto out;
555     }
556 
557     return ret;
558 
559 out:
560     parser_context_restore(ctxt, saved_ctxt);
561 
562     return NULL;
563 }
564 
565 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
566 {
567     QObject *token = NULL, *obj;
568     JSONParserContext saved_ctxt = parser_context_save(ctxt);
569 
570     if (ap == NULL) {
571         goto out;
572     }
573 
574     token = parser_context_pop_token(ctxt);
575     if (token == NULL) {
576         goto out;
577     }
578 
579     if (token_is_escape(token, "%p")) {
580         obj = va_arg(*ap, QObject *);
581     } else if (token_is_escape(token, "%i")) {
582         obj = QOBJECT(qbool_from_bool(va_arg(*ap, int)));
583     } else if (token_is_escape(token, "%d")) {
584         obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
585     } else if (token_is_escape(token, "%ld")) {
586         obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
587     } else if (token_is_escape(token, "%lld") ||
588                token_is_escape(token, "%I64d")) {
589         obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
590     } else if (token_is_escape(token, "%s")) {
591         obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
592     } else if (token_is_escape(token, "%f")) {
593         obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
594     } else {
595         goto out;
596     }
597 
598     return obj;
599 
600 out:
601     parser_context_restore(ctxt, saved_ctxt);
602 
603     return NULL;
604 }
605 
606 static QObject *parse_literal(JSONParserContext *ctxt)
607 {
608     QObject *token, *obj;
609     JSONParserContext saved_ctxt = parser_context_save(ctxt);
610 
611     token = parser_context_pop_token(ctxt);
612     if (token == NULL) {
613         goto out;
614     }
615 
616     switch (token_get_type(token)) {
617     case JSON_STRING:
618         obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
619         break;
620     case JSON_INTEGER: {
621         /* A possibility exists that this is a whole-valued float where the
622          * fractional part was left out due to being 0 (.0). It's not a big
623          * deal to treat these as ints in the parser, so long as users of the
624          * resulting QObject know to expect a QInt in place of a QFloat in
625          * cases like these.
626          *
627          * However, in some cases these values will overflow/underflow a
628          * QInt/int64 container, thus we should assume these are to be handled
629          * as QFloats/doubles rather than silently changing their values.
630          *
631          * strtoll() indicates these instances by setting errno to ERANGE
632          */
633         int64_t value;
634 
635         errno = 0; /* strtoll doesn't set errno on success */
636         value = strtoll(token_get_value(token), NULL, 10);
637         if (errno != ERANGE) {
638             obj = QOBJECT(qint_from_int(value));
639             break;
640         }
641         /* fall through to JSON_FLOAT */
642     }
643     case JSON_FLOAT:
644         /* FIXME dependent on locale */
645         obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
646         break;
647     default:
648         goto out;
649     }
650 
651     return obj;
652 
653 out:
654     parser_context_restore(ctxt, saved_ctxt);
655 
656     return NULL;
657 }
658 
659 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
660 {
661     QObject *obj;
662 
663     obj = parse_object(ctxt, ap);
664     if (obj == NULL) {
665         obj = parse_array(ctxt, ap);
666     }
667     if (obj == NULL) {
668         obj = parse_escape(ctxt, ap);
669     }
670     if (obj == NULL) {
671         obj = parse_keyword(ctxt);
672     }
673     if (obj == NULL) {
674         obj = parse_literal(ctxt);
675     }
676 
677     return obj;
678 }
679 
680 QObject *json_parser_parse(QList *tokens, va_list *ap)
681 {
682     return json_parser_parse_err(tokens, ap, NULL);
683 }
684 
685 QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
686 {
687     JSONParserContext *ctxt = parser_context_new(tokens);
688     QObject *result;
689 
690     if (!ctxt) {
691         return NULL;
692     }
693 
694     result = parse_value(ctxt, ap);
695 
696     error_propagate(errp, ctxt->err);
697 
698     parser_context_free(ctxt);
699 
700     return result;
701 }
702