xref: /openbmc/qemu/qobject/json-parser.c (revision 6b9606f6)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include <stdarg.h>
15 
16 #include "qemu-common.h"
17 #include "qapi/qmp/qstring.h"
18 #include "qapi/qmp/qint.h"
19 #include "qapi/qmp/qdict.h"
20 #include "qapi/qmp/qlist.h"
21 #include "qapi/qmp/qfloat.h"
22 #include "qapi/qmp/qbool.h"
23 #include "qapi/qmp/json-parser.h"
24 #include "qapi/qmp/json-lexer.h"
25 
26 typedef struct JSONParserContext
27 {
28     Error *err;
29     struct {
30         QObject **buf;
31         size_t pos;
32         size_t count;
33     } tokens;
34 } JSONParserContext;
35 
36 #define BUG_ON(cond) assert(!(cond))
37 
38 /**
39  * TODO
40  *
41  * 0) make errors meaningful again
42  * 1) add geometry information to tokens
43  * 3) should we return a parsed size?
44  * 4) deal with premature EOI
45  */
46 
47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
48 
49 /**
50  * Token manipulators
51  *
52  * tokens are dictionaries that contain a type, a string value, and geometry information
53  * about a token identified by the lexer.  These are routines that make working with
54  * these objects a bit easier.
55  */
56 static const char *token_get_value(QObject *obj)
57 {
58     return qdict_get_str(qobject_to_qdict(obj), "token");
59 }
60 
61 static JSONTokenType token_get_type(QObject *obj)
62 {
63     return qdict_get_int(qobject_to_qdict(obj), "type");
64 }
65 
66 /**
67  * Error handler
68  */
69 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
70                                            QObject *token, const char *msg, ...)
71 {
72     va_list ap;
73     char message[1024];
74     va_start(ap, msg);
75     vsnprintf(message, sizeof(message), msg, ap);
76     va_end(ap);
77     if (ctxt->err) {
78         error_free(ctxt->err);
79         ctxt->err = NULL;
80     }
81     error_setg(&ctxt->err, "JSON parse error, %s", message);
82 }
83 
84 /**
85  * String helpers
86  *
87  * These helpers are used to unescape strings.
88  */
89 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
90 {
91     if (wchar <= 0x007F) {
92         BUG_ON(buffer_length < 2);
93 
94         buffer[0] = wchar & 0x7F;
95         buffer[1] = 0;
96     } else if (wchar <= 0x07FF) {
97         BUG_ON(buffer_length < 3);
98 
99         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
100         buffer[1] = 0x80 | (wchar & 0x3F);
101         buffer[2] = 0;
102     } else {
103         BUG_ON(buffer_length < 4);
104 
105         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
106         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
107         buffer[2] = 0x80 | (wchar & 0x3F);
108         buffer[3] = 0;
109     }
110 }
111 
112 static int hex2decimal(char ch)
113 {
114     if (ch >= '0' && ch <= '9') {
115         return (ch - '0');
116     } else if (ch >= 'a' && ch <= 'f') {
117         return 10 + (ch - 'a');
118     } else if (ch >= 'A' && ch <= 'F') {
119         return 10 + (ch - 'A');
120     }
121 
122     return -1;
123 }
124 
125 /**
126  * parse_string(): Parse a json string and return a QObject
127  *
128  *  string
129  *      ""
130  *      " chars "
131  *  chars
132  *      char
133  *      char chars
134  *  char
135  *      any-Unicode-character-
136  *          except-"-or-\-or-
137  *          control-character
138  *      \"
139  *      \\
140  *      \/
141  *      \b
142  *      \f
143  *      \n
144  *      \r
145  *      \t
146  *      \u four-hex-digits
147  */
148 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
149 {
150     const char *ptr = token_get_value(token);
151     QString *str;
152     int double_quote = 1;
153 
154     if (*ptr == '"') {
155         double_quote = 1;
156     } else {
157         double_quote = 0;
158     }
159     ptr++;
160 
161     str = qstring_new();
162     while (*ptr &&
163            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
164         if (*ptr == '\\') {
165             ptr++;
166 
167             switch (*ptr) {
168             case '"':
169                 qstring_append(str, "\"");
170                 ptr++;
171                 break;
172             case '\'':
173                 qstring_append(str, "'");
174                 ptr++;
175                 break;
176             case '\\':
177                 qstring_append(str, "\\");
178                 ptr++;
179                 break;
180             case '/':
181                 qstring_append(str, "/");
182                 ptr++;
183                 break;
184             case 'b':
185                 qstring_append(str, "\b");
186                 ptr++;
187                 break;
188             case 'f':
189                 qstring_append(str, "\f");
190                 ptr++;
191                 break;
192             case 'n':
193                 qstring_append(str, "\n");
194                 ptr++;
195                 break;
196             case 'r':
197                 qstring_append(str, "\r");
198                 ptr++;
199                 break;
200             case 't':
201                 qstring_append(str, "\t");
202                 ptr++;
203                 break;
204             case 'u': {
205                 uint16_t unicode_char = 0;
206                 char utf8_char[4];
207                 int i = 0;
208 
209                 ptr++;
210 
211                 for (i = 0; i < 4; i++) {
212                     if (qemu_isxdigit(*ptr)) {
213                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
214                     } else {
215                         parse_error(ctxt, token,
216                                     "invalid hex escape sequence in string");
217                         goto out;
218                     }
219                     ptr++;
220                 }
221 
222                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
223                 qstring_append(str, utf8_char);
224             }   break;
225             default:
226                 parse_error(ctxt, token, "invalid escape sequence in string");
227                 goto out;
228             }
229         } else {
230             char dummy[2];
231 
232             dummy[0] = *ptr++;
233             dummy[1] = 0;
234 
235             qstring_append(str, dummy);
236         }
237     }
238 
239     return str;
240 
241 out:
242     QDECREF(str);
243     return NULL;
244 }
245 
246 static QObject *parser_context_pop_token(JSONParserContext *ctxt)
247 {
248     QObject *token;
249     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
250     token = ctxt->tokens.buf[ctxt->tokens.pos];
251     ctxt->tokens.pos++;
252     return token;
253 }
254 
255 /* Note: parser_context_{peek|pop}_token do not increment the
256  * token object's refcount. In both cases the references will continue
257  * to be tracked and cleaned up in parser_context_free(), so do not
258  * attempt to free the token object.
259  */
260 static QObject *parser_context_peek_token(JSONParserContext *ctxt)
261 {
262     QObject *token;
263     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
264     token = ctxt->tokens.buf[ctxt->tokens.pos];
265     return token;
266 }
267 
268 static JSONParserContext parser_context_save(JSONParserContext *ctxt)
269 {
270     JSONParserContext saved_ctxt = {0};
271     saved_ctxt.tokens.pos = ctxt->tokens.pos;
272     saved_ctxt.tokens.count = ctxt->tokens.count;
273     saved_ctxt.tokens.buf = ctxt->tokens.buf;
274     return saved_ctxt;
275 }
276 
277 static void parser_context_restore(JSONParserContext *ctxt,
278                                    JSONParserContext saved_ctxt)
279 {
280     ctxt->tokens.pos = saved_ctxt.tokens.pos;
281     ctxt->tokens.count = saved_ctxt.tokens.count;
282     ctxt->tokens.buf = saved_ctxt.tokens.buf;
283 }
284 
285 static void tokens_append_from_iter(QObject *obj, void *opaque)
286 {
287     JSONParserContext *ctxt = opaque;
288     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
289     ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
290     qobject_incref(obj);
291 }
292 
293 static JSONParserContext *parser_context_new(QList *tokens)
294 {
295     JSONParserContext *ctxt;
296     size_t count;
297 
298     if (!tokens) {
299         return NULL;
300     }
301 
302     count = qlist_size(tokens);
303     if (count == 0) {
304         return NULL;
305     }
306 
307     ctxt = g_malloc0(sizeof(JSONParserContext));
308     ctxt->tokens.pos = 0;
309     ctxt->tokens.count = count;
310     ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
311     qlist_iter(tokens, tokens_append_from_iter, ctxt);
312     ctxt->tokens.pos = 0;
313 
314     return ctxt;
315 }
316 
317 /* to support error propagation, ctxt->err must be freed separately */
318 static void parser_context_free(JSONParserContext *ctxt)
319 {
320     int i;
321     if (ctxt) {
322         for (i = 0; i < ctxt->tokens.count; i++) {
323             qobject_decref(ctxt->tokens.buf[i]);
324         }
325         g_free(ctxt->tokens.buf);
326         g_free(ctxt);
327     }
328 }
329 
330 /**
331  * Parsing rules
332  */
333 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
334 {
335     QObject *key = NULL, *token = NULL, *value, *peek;
336     JSONParserContext saved_ctxt = parser_context_save(ctxt);
337 
338     peek = parser_context_peek_token(ctxt);
339     if (peek == NULL) {
340         parse_error(ctxt, NULL, "premature EOI");
341         goto out;
342     }
343 
344     key = parse_value(ctxt, ap);
345     if (!key || qobject_type(key) != QTYPE_QSTRING) {
346         parse_error(ctxt, peek, "key is not a string in object");
347         goto out;
348     }
349 
350     token = parser_context_pop_token(ctxt);
351     if (token == NULL) {
352         parse_error(ctxt, NULL, "premature EOI");
353         goto out;
354     }
355 
356     if (token_get_type(token) != JSON_COLON) {
357         parse_error(ctxt, token, "missing : in object pair");
358         goto out;
359     }
360 
361     value = parse_value(ctxt, ap);
362     if (value == NULL) {
363         parse_error(ctxt, token, "Missing value in dict");
364         goto out;
365     }
366 
367     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
368 
369     qobject_decref(key);
370 
371     return 0;
372 
373 out:
374     parser_context_restore(ctxt, saved_ctxt);
375     qobject_decref(key);
376 
377     return -1;
378 }
379 
380 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
381 {
382     QDict *dict = NULL;
383     QObject *token, *peek;
384     JSONParserContext saved_ctxt = parser_context_save(ctxt);
385 
386     token = parser_context_pop_token(ctxt);
387     if (token == NULL) {
388         goto out;
389     }
390 
391     if (token_get_type(token) != JSON_LCURLY) {
392         goto out;
393     }
394 
395     dict = qdict_new();
396 
397     peek = parser_context_peek_token(ctxt);
398     if (peek == NULL) {
399         parse_error(ctxt, NULL, "premature EOI");
400         goto out;
401     }
402 
403     if (token_get_type(peek) != JSON_RCURLY) {
404         if (parse_pair(ctxt, dict, ap) == -1) {
405             goto out;
406         }
407 
408         token = parser_context_pop_token(ctxt);
409         if (token == NULL) {
410             parse_error(ctxt, NULL, "premature EOI");
411             goto out;
412         }
413 
414         while (token_get_type(token) != JSON_RCURLY) {
415             if (token_get_type(token) != JSON_COMMA) {
416                 parse_error(ctxt, token, "expected separator in dict");
417                 goto out;
418             }
419 
420             if (parse_pair(ctxt, dict, ap) == -1) {
421                 goto out;
422             }
423 
424             token = parser_context_pop_token(ctxt);
425             if (token == NULL) {
426                 parse_error(ctxt, NULL, "premature EOI");
427                 goto out;
428             }
429         }
430     } else {
431         (void)parser_context_pop_token(ctxt);
432     }
433 
434     return QOBJECT(dict);
435 
436 out:
437     parser_context_restore(ctxt, saved_ctxt);
438     QDECREF(dict);
439     return NULL;
440 }
441 
442 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
443 {
444     QList *list = NULL;
445     QObject *token, *peek;
446     JSONParserContext saved_ctxt = parser_context_save(ctxt);
447 
448     token = parser_context_pop_token(ctxt);
449     if (token == NULL) {
450         goto out;
451     }
452 
453     if (token_get_type(token) != JSON_LSQUARE) {
454         goto out;
455     }
456 
457     list = qlist_new();
458 
459     peek = parser_context_peek_token(ctxt);
460     if (peek == NULL) {
461         parse_error(ctxt, NULL, "premature EOI");
462         goto out;
463     }
464 
465     if (token_get_type(peek) != JSON_RSQUARE) {
466         QObject *obj;
467 
468         obj = parse_value(ctxt, ap);
469         if (obj == NULL) {
470             parse_error(ctxt, token, "expecting value");
471             goto out;
472         }
473 
474         qlist_append_obj(list, obj);
475 
476         token = parser_context_pop_token(ctxt);
477         if (token == NULL) {
478             parse_error(ctxt, NULL, "premature EOI");
479             goto out;
480         }
481 
482         while (token_get_type(token) != JSON_RSQUARE) {
483             if (token_get_type(token) != JSON_COMMA) {
484                 parse_error(ctxt, token, "expected separator in list");
485                 goto out;
486             }
487 
488             obj = parse_value(ctxt, ap);
489             if (obj == NULL) {
490                 parse_error(ctxt, token, "expecting value");
491                 goto out;
492             }
493 
494             qlist_append_obj(list, obj);
495 
496             token = parser_context_pop_token(ctxt);
497             if (token == NULL) {
498                 parse_error(ctxt, NULL, "premature EOI");
499                 goto out;
500             }
501         }
502     } else {
503         (void)parser_context_pop_token(ctxt);
504     }
505 
506     return QOBJECT(list);
507 
508 out:
509     parser_context_restore(ctxt, saved_ctxt);
510     QDECREF(list);
511     return NULL;
512 }
513 
514 static QObject *parse_keyword(JSONParserContext *ctxt)
515 {
516     QObject *token, *ret;
517     JSONParserContext saved_ctxt = parser_context_save(ctxt);
518     const char *val;
519 
520     token = parser_context_pop_token(ctxt);
521     if (token == NULL) {
522         goto out;
523     }
524 
525     if (token_get_type(token) != JSON_KEYWORD) {
526         goto out;
527     }
528 
529     val = token_get_value(token);
530 
531     if (!strcmp(val, "true")) {
532         ret = QOBJECT(qbool_from_bool(true));
533     } else if (!strcmp(val, "false")) {
534         ret = QOBJECT(qbool_from_bool(false));
535     } else if (!strcmp(val, "null")) {
536         ret = qnull();
537     } else {
538         parse_error(ctxt, token, "invalid keyword '%s'", val);
539         goto out;
540     }
541 
542     return ret;
543 
544 out:
545     parser_context_restore(ctxt, saved_ctxt);
546 
547     return NULL;
548 }
549 
550 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
551 {
552     QObject *token = NULL, *obj;
553     JSONParserContext saved_ctxt = parser_context_save(ctxt);
554     const char *val;
555 
556     if (ap == NULL) {
557         goto out;
558     }
559 
560     token = parser_context_pop_token(ctxt);
561     if (token == NULL) {
562         goto out;
563     }
564 
565     if (token_get_type(token) != JSON_ESCAPE) {
566         goto out;
567     }
568 
569     val = token_get_value(token);
570 
571     if (!strcmp(val, "%p")) {
572         obj = va_arg(*ap, QObject *);
573     } else if (!strcmp(val, "%i")) {
574         obj = QOBJECT(qbool_from_bool(va_arg(*ap, int)));
575     } else if (!strcmp(val, "%d")) {
576         obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
577     } else if (!strcmp(val, "%ld")) {
578         obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
579     } else if (!strcmp(val, "%lld") ||
580                !strcmp(val, "%I64d")) {
581         obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
582     } else if (!strcmp(val, "%s")) {
583         obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
584     } else if (!strcmp(val, "%f")) {
585         obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
586     } else {
587         goto out;
588     }
589 
590     return obj;
591 
592 out:
593     parser_context_restore(ctxt, saved_ctxt);
594 
595     return NULL;
596 }
597 
598 static QObject *parse_literal(JSONParserContext *ctxt)
599 {
600     QObject *token, *obj;
601     JSONParserContext saved_ctxt = parser_context_save(ctxt);
602 
603     token = parser_context_pop_token(ctxt);
604     if (token == NULL) {
605         goto out;
606     }
607 
608     switch (token_get_type(token)) {
609     case JSON_STRING:
610         obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
611         break;
612     case JSON_INTEGER: {
613         /* A possibility exists that this is a whole-valued float where the
614          * fractional part was left out due to being 0 (.0). It's not a big
615          * deal to treat these as ints in the parser, so long as users of the
616          * resulting QObject know to expect a QInt in place of a QFloat in
617          * cases like these.
618          *
619          * However, in some cases these values will overflow/underflow a
620          * QInt/int64 container, thus we should assume these are to be handled
621          * as QFloats/doubles rather than silently changing their values.
622          *
623          * strtoll() indicates these instances by setting errno to ERANGE
624          */
625         int64_t value;
626 
627         errno = 0; /* strtoll doesn't set errno on success */
628         value = strtoll(token_get_value(token), NULL, 10);
629         if (errno != ERANGE) {
630             obj = QOBJECT(qint_from_int(value));
631             break;
632         }
633         /* fall through to JSON_FLOAT */
634     }
635     case JSON_FLOAT:
636         /* FIXME dependent on locale */
637         obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
638         break;
639     default:
640         goto out;
641     }
642 
643     return obj;
644 
645 out:
646     parser_context_restore(ctxt, saved_ctxt);
647 
648     return NULL;
649 }
650 
651 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
652 {
653     QObject *obj;
654 
655     obj = parse_object(ctxt, ap);
656     if (obj == NULL) {
657         obj = parse_array(ctxt, ap);
658     }
659     if (obj == NULL) {
660         obj = parse_escape(ctxt, ap);
661     }
662     if (obj == NULL) {
663         obj = parse_keyword(ctxt);
664     }
665     if (obj == NULL) {
666         obj = parse_literal(ctxt);
667     }
668 
669     return obj;
670 }
671 
672 QObject *json_parser_parse(QList *tokens, va_list *ap)
673 {
674     return json_parser_parse_err(tokens, ap, NULL);
675 }
676 
677 QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
678 {
679     JSONParserContext *ctxt = parser_context_new(tokens);
680     QObject *result;
681 
682     if (!ctxt) {
683         return NULL;
684     }
685 
686     result = parse_value(ctxt, ap);
687 
688     error_propagate(errp, ctxt->err);
689 
690     parser_context_free(ctxt);
691 
692     return result;
693 }
694