xref: /openbmc/qemu/qobject/json-parser.c (revision 8692aa29798e0f2cb5069f2460bbe19ff538fc71)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qemu-common.h"
17 #include "qapi/qmp/qstring.h"
18 #include "qapi/qmp/qint.h"
19 #include "qapi/qmp/qdict.h"
20 #include "qapi/qmp/qlist.h"
21 #include "qapi/qmp/qfloat.h"
22 #include "qapi/qmp/qbool.h"
23 #include "qapi/qmp/json-parser.h"
24 #include "qapi/qmp/json-lexer.h"
25 #include "qapi/qmp/json-streamer.h"
26 
27 typedef struct JSONParserContext
28 {
29     Error *err;
30     JSONToken *current;
31     GQueue *buf;
32 } JSONParserContext;
33 
34 #define BUG_ON(cond) assert(!(cond))
35 
36 /**
37  * TODO
38  *
39  * 0) make errors meaningful again
40  * 1) add geometry information to tokens
41  * 3) should we return a parsed size?
42  * 4) deal with premature EOI
43  */
44 
45 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
46 
47 /**
48  * Error handler
49  */
50 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
51                                            JSONToken *token, const char *msg, ...)
52 {
53     va_list ap;
54     char message[1024];
55     va_start(ap, msg);
56     vsnprintf(message, sizeof(message), msg, ap);
57     va_end(ap);
58     if (ctxt->err) {
59         error_free(ctxt->err);
60         ctxt->err = NULL;
61     }
62     error_setg(&ctxt->err, "JSON parse error, %s", message);
63 }
64 
65 /**
66  * String helpers
67  *
68  * These helpers are used to unescape strings.
69  */
70 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
71 {
72     if (wchar <= 0x007F) {
73         BUG_ON(buffer_length < 2);
74 
75         buffer[0] = wchar & 0x7F;
76         buffer[1] = 0;
77     } else if (wchar <= 0x07FF) {
78         BUG_ON(buffer_length < 3);
79 
80         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
81         buffer[1] = 0x80 | (wchar & 0x3F);
82         buffer[2] = 0;
83     } else {
84         BUG_ON(buffer_length < 4);
85 
86         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
87         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
88         buffer[2] = 0x80 | (wchar & 0x3F);
89         buffer[3] = 0;
90     }
91 }
92 
93 static int hex2decimal(char ch)
94 {
95     if (ch >= '0' && ch <= '9') {
96         return (ch - '0');
97     } else if (ch >= 'a' && ch <= 'f') {
98         return 10 + (ch - 'a');
99     } else if (ch >= 'A' && ch <= 'F') {
100         return 10 + (ch - 'A');
101     }
102 
103     return -1;
104 }
105 
106 /**
107  * parse_string(): Parse a json string and return a QObject
108  *
109  *  string
110  *      ""
111  *      " chars "
112  *  chars
113  *      char
114  *      char chars
115  *  char
116  *      any-Unicode-character-
117  *          except-"-or-\-or-
118  *          control-character
119  *      \"
120  *      \\
121  *      \/
122  *      \b
123  *      \f
124  *      \n
125  *      \r
126  *      \t
127  *      \u four-hex-digits
128  */
129 static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
130                                          JSONToken *token)
131 {
132     const char *ptr = token->str;
133     QString *str;
134     int double_quote = 1;
135 
136     if (*ptr == '"') {
137         double_quote = 1;
138     } else {
139         double_quote = 0;
140     }
141     ptr++;
142 
143     str = qstring_new();
144     while (*ptr &&
145            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
146         if (*ptr == '\\') {
147             ptr++;
148 
149             switch (*ptr) {
150             case '"':
151                 qstring_append(str, "\"");
152                 ptr++;
153                 break;
154             case '\'':
155                 qstring_append(str, "'");
156                 ptr++;
157                 break;
158             case '\\':
159                 qstring_append(str, "\\");
160                 ptr++;
161                 break;
162             case '/':
163                 qstring_append(str, "/");
164                 ptr++;
165                 break;
166             case 'b':
167                 qstring_append(str, "\b");
168                 ptr++;
169                 break;
170             case 'f':
171                 qstring_append(str, "\f");
172                 ptr++;
173                 break;
174             case 'n':
175                 qstring_append(str, "\n");
176                 ptr++;
177                 break;
178             case 'r':
179                 qstring_append(str, "\r");
180                 ptr++;
181                 break;
182             case 't':
183                 qstring_append(str, "\t");
184                 ptr++;
185                 break;
186             case 'u': {
187                 uint16_t unicode_char = 0;
188                 char utf8_char[4];
189                 int i = 0;
190 
191                 ptr++;
192 
193                 for (i = 0; i < 4; i++) {
194                     if (qemu_isxdigit(*ptr)) {
195                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
196                     } else {
197                         parse_error(ctxt, token,
198                                     "invalid hex escape sequence in string");
199                         goto out;
200                     }
201                     ptr++;
202                 }
203 
204                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
205                 qstring_append(str, utf8_char);
206             }   break;
207             default:
208                 parse_error(ctxt, token, "invalid escape sequence in string");
209                 goto out;
210             }
211         } else {
212             char dummy[2];
213 
214             dummy[0] = *ptr++;
215             dummy[1] = 0;
216 
217             qstring_append(str, dummy);
218         }
219     }
220 
221     return str;
222 
223 out:
224     QDECREF(str);
225     return NULL;
226 }
227 
228 /* Note: the token object returned by parser_context_peek_token or
229  * parser_context_pop_token is deleted as soon as parser_context_pop_token
230  * is called again.
231  */
232 static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
233 {
234     g_free(ctxt->current);
235     assert(!g_queue_is_empty(ctxt->buf));
236     ctxt->current = g_queue_pop_head(ctxt->buf);
237     return ctxt->current;
238 }
239 
240 static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
241 {
242     assert(!g_queue_is_empty(ctxt->buf));
243     return g_queue_peek_head(ctxt->buf);
244 }
245 
246 static JSONParserContext *parser_context_new(GQueue *tokens)
247 {
248     JSONParserContext *ctxt;
249 
250     if (!tokens) {
251         return NULL;
252     }
253 
254     ctxt = g_malloc0(sizeof(JSONParserContext));
255     ctxt->buf = tokens;
256 
257     return ctxt;
258 }
259 
260 /* to support error propagation, ctxt->err must be freed separately */
261 static void parser_context_free(JSONParserContext *ctxt)
262 {
263     if (ctxt) {
264         while (!g_queue_is_empty(ctxt->buf)) {
265             parser_context_pop_token(ctxt);
266         }
267         g_free(ctxt->current);
268         g_queue_free(ctxt->buf);
269         g_free(ctxt);
270     }
271 }
272 
273 /**
274  * Parsing rules
275  */
276 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
277 {
278     QObject *key = NULL, *value;
279     JSONToken *peek, *token;
280 
281     peek = parser_context_peek_token(ctxt);
282     if (peek == NULL) {
283         parse_error(ctxt, NULL, "premature EOI");
284         goto out;
285     }
286 
287     key = parse_value(ctxt, ap);
288     if (!key || qobject_type(key) != QTYPE_QSTRING) {
289         parse_error(ctxt, peek, "key is not a string in object");
290         goto out;
291     }
292 
293     token = parser_context_pop_token(ctxt);
294     if (token == NULL) {
295         parse_error(ctxt, NULL, "premature EOI");
296         goto out;
297     }
298 
299     if (token->type != JSON_COLON) {
300         parse_error(ctxt, token, "missing : in object pair");
301         goto out;
302     }
303 
304     value = parse_value(ctxt, ap);
305     if (value == NULL) {
306         parse_error(ctxt, token, "Missing value in dict");
307         goto out;
308     }
309 
310     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
311 
312     qobject_decref(key);
313 
314     return 0;
315 
316 out:
317     qobject_decref(key);
318 
319     return -1;
320 }
321 
322 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
323 {
324     QDict *dict = NULL;
325     JSONToken *token, *peek;
326 
327     token = parser_context_pop_token(ctxt);
328     assert(token && token->type == JSON_LCURLY);
329 
330     dict = qdict_new();
331 
332     peek = parser_context_peek_token(ctxt);
333     if (peek == NULL) {
334         parse_error(ctxt, NULL, "premature EOI");
335         goto out;
336     }
337 
338     if (peek->type != JSON_RCURLY) {
339         if (parse_pair(ctxt, dict, ap) == -1) {
340             goto out;
341         }
342 
343         token = parser_context_pop_token(ctxt);
344         if (token == NULL) {
345             parse_error(ctxt, NULL, "premature EOI");
346             goto out;
347         }
348 
349         while (token->type != JSON_RCURLY) {
350             if (token->type != JSON_COMMA) {
351                 parse_error(ctxt, token, "expected separator in dict");
352                 goto out;
353             }
354 
355             if (parse_pair(ctxt, dict, ap) == -1) {
356                 goto out;
357             }
358 
359             token = parser_context_pop_token(ctxt);
360             if (token == NULL) {
361                 parse_error(ctxt, NULL, "premature EOI");
362                 goto out;
363             }
364         }
365     } else {
366         (void)parser_context_pop_token(ctxt);
367     }
368 
369     return QOBJECT(dict);
370 
371 out:
372     QDECREF(dict);
373     return NULL;
374 }
375 
376 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
377 {
378     QList *list = NULL;
379     JSONToken *token, *peek;
380 
381     token = parser_context_pop_token(ctxt);
382     assert(token && token->type == JSON_LSQUARE);
383 
384     list = qlist_new();
385 
386     peek = parser_context_peek_token(ctxt);
387     if (peek == NULL) {
388         parse_error(ctxt, NULL, "premature EOI");
389         goto out;
390     }
391 
392     if (peek->type != JSON_RSQUARE) {
393         QObject *obj;
394 
395         obj = parse_value(ctxt, ap);
396         if (obj == NULL) {
397             parse_error(ctxt, token, "expecting value");
398             goto out;
399         }
400 
401         qlist_append_obj(list, obj);
402 
403         token = parser_context_pop_token(ctxt);
404         if (token == NULL) {
405             parse_error(ctxt, NULL, "premature EOI");
406             goto out;
407         }
408 
409         while (token->type != JSON_RSQUARE) {
410             if (token->type != JSON_COMMA) {
411                 parse_error(ctxt, token, "expected separator in list");
412                 goto out;
413             }
414 
415             obj = parse_value(ctxt, ap);
416             if (obj == NULL) {
417                 parse_error(ctxt, token, "expecting value");
418                 goto out;
419             }
420 
421             qlist_append_obj(list, obj);
422 
423             token = parser_context_pop_token(ctxt);
424             if (token == NULL) {
425                 parse_error(ctxt, NULL, "premature EOI");
426                 goto out;
427             }
428         }
429     } else {
430         (void)parser_context_pop_token(ctxt);
431     }
432 
433     return QOBJECT(list);
434 
435 out:
436     QDECREF(list);
437     return NULL;
438 }
439 
440 static QObject *parse_keyword(JSONParserContext *ctxt)
441 {
442     JSONToken *token;
443 
444     token = parser_context_pop_token(ctxt);
445     assert(token && token->type == JSON_KEYWORD);
446 
447     if (!strcmp(token->str, "true")) {
448         return QOBJECT(qbool_from_bool(true));
449     } else if (!strcmp(token->str, "false")) {
450         return QOBJECT(qbool_from_bool(false));
451     } else if (!strcmp(token->str, "null")) {
452         return qnull();
453     }
454     parse_error(ctxt, token, "invalid keyword '%s'", token->str);
455     return NULL;
456 }
457 
458 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
459 {
460     JSONToken *token;
461 
462     if (ap == NULL) {
463         return NULL;
464     }
465 
466     token = parser_context_pop_token(ctxt);
467     assert(token && token->type == JSON_ESCAPE);
468 
469     if (!strcmp(token->str, "%p")) {
470         return va_arg(*ap, QObject *);
471     } else if (!strcmp(token->str, "%i")) {
472         return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
473     } else if (!strcmp(token->str, "%d")) {
474         return QOBJECT(qint_from_int(va_arg(*ap, int)));
475     } else if (!strcmp(token->str, "%ld")) {
476         return QOBJECT(qint_from_int(va_arg(*ap, long)));
477     } else if (!strcmp(token->str, "%lld") ||
478                !strcmp(token->str, "%I64d")) {
479         return QOBJECT(qint_from_int(va_arg(*ap, long long)));
480     } else if (!strcmp(token->str, "%s")) {
481         return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
482     } else if (!strcmp(token->str, "%f")) {
483         return QOBJECT(qfloat_from_double(va_arg(*ap, double)));
484     }
485     return NULL;
486 }
487 
488 static QObject *parse_literal(JSONParserContext *ctxt)
489 {
490     JSONToken *token;
491 
492     token = parser_context_pop_token(ctxt);
493     assert(token);
494 
495     switch (token->type) {
496     case JSON_STRING:
497         return QOBJECT(qstring_from_escaped_str(ctxt, token));
498     case JSON_INTEGER: {
499         /* A possibility exists that this is a whole-valued float where the
500          * fractional part was left out due to being 0 (.0). It's not a big
501          * deal to treat these as ints in the parser, so long as users of the
502          * resulting QObject know to expect a QInt in place of a QFloat in
503          * cases like these.
504          *
505          * However, in some cases these values will overflow/underflow a
506          * QInt/int64 container, thus we should assume these are to be handled
507          * as QFloats/doubles rather than silently changing their values.
508          *
509          * strtoll() indicates these instances by setting errno to ERANGE
510          */
511         int64_t value;
512 
513         errno = 0; /* strtoll doesn't set errno on success */
514         value = strtoll(token->str, NULL, 10);
515         if (errno != ERANGE) {
516             return QOBJECT(qint_from_int(value));
517         }
518         /* fall through to JSON_FLOAT */
519     }
520     case JSON_FLOAT:
521         /* FIXME dependent on locale; a pervasive issue in QEMU */
522         /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN,
523          * but those might be useful extensions beyond JSON */
524         return QOBJECT(qfloat_from_double(strtod(token->str, NULL)));
525     default:
526         abort();
527     }
528 }
529 
530 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
531 {
532     JSONToken *token;
533 
534     token = parser_context_peek_token(ctxt);
535     if (token == NULL) {
536         parse_error(ctxt, NULL, "premature EOI");
537         return NULL;
538     }
539 
540     switch (token->type) {
541     case JSON_LCURLY:
542         return parse_object(ctxt, ap);
543     case JSON_LSQUARE:
544         return parse_array(ctxt, ap);
545     case JSON_ESCAPE:
546         return parse_escape(ctxt, ap);
547     case JSON_INTEGER:
548     case JSON_FLOAT:
549     case JSON_STRING:
550         return parse_literal(ctxt);
551     case JSON_KEYWORD:
552         return parse_keyword(ctxt);
553     default:
554         parse_error(ctxt, token, "expecting value");
555         return NULL;
556     }
557 }
558 
559 QObject *json_parser_parse(GQueue *tokens, va_list *ap)
560 {
561     return json_parser_parse_err(tokens, ap, NULL);
562 }
563 
564 QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp)
565 {
566     JSONParserContext *ctxt = parser_context_new(tokens);
567     QObject *result;
568 
569     if (!ctxt) {
570         return NULL;
571     }
572 
573     result = parse_value(ctxt, ap);
574 
575     error_propagate(errp, ctxt->err);
576 
577     parser_context_free(ctxt);
578 
579     return result;
580 }
581