xref: /openbmc/qemu/qobject/json-parser.c (revision 966f2ec3)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/cutils.h"
16 #include "qemu/unicode.h"
17 #include "qapi/error.h"
18 #include "qemu-common.h"
19 #include "qapi/qmp/qbool.h"
20 #include "qapi/qmp/qdict.h"
21 #include "qapi/qmp/qlist.h"
22 #include "qapi/qmp/qnull.h"
23 #include "qapi/qmp/qnum.h"
24 #include "qapi/qmp/qstring.h"
25 #include "json-parser-int.h"
26 
27 struct JSONToken {
28     JSONTokenType type;
29     int x;
30     int y;
31     char str[];
32 };
33 
34 typedef struct JSONParserContext
35 {
36     Error *err;
37     JSONToken *current;
38     GQueue *buf;
39     va_list *ap;
40 } JSONParserContext;
41 
42 #define BUG_ON(cond) assert(!(cond))
43 
44 /**
45  * TODO
46  *
47  * 0) make errors meaningful again
48  * 1) add geometry information to tokens
49  * 3) should we return a parsed size?
50  * 4) deal with premature EOI
51  */
52 
53 static QObject *parse_value(JSONParserContext *ctxt);
54 
55 /**
56  * Error handler
57  */
58 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
59                                            JSONToken *token, const char *msg, ...)
60 {
61     va_list ap;
62     char message[1024];
63 
64     if (ctxt->err) {
65         return;
66     }
67     va_start(ap, msg);
68     vsnprintf(message, sizeof(message), msg, ap);
69     va_end(ap);
70     error_setg(&ctxt->err, "JSON parse error, %s", message);
71 }
72 
73 static int cvt4hex(const char *s)
74 {
75     int cp, i;
76 
77     cp = 0;
78     for (i = 0; i < 4; i++) {
79         if (!qemu_isxdigit(s[i])) {
80             return -1;
81         }
82         cp <<= 4;
83         if (s[i] >= '0' && s[i] <= '9') {
84             cp |= s[i] - '0';
85         } else if (s[i] >= 'a' && s[i] <= 'f') {
86             cp |= 10 + s[i] - 'a';
87         } else if (s[i] >= 'A' && s[i] <= 'F') {
88             cp |= 10 + s[i] - 'A';
89         } else {
90             return -1;
91         }
92     }
93     return cp;
94 }
95 
96 /**
97  * parse_string(): Parse a JSON string
98  *
99  * From RFC 8259 "The JavaScript Object Notation (JSON) Data
100  * Interchange Format":
101  *
102  *    char = unescaped /
103  *        escape (
104  *            %x22 /          ; "    quotation mark  U+0022
105  *            %x5C /          ; \    reverse solidus U+005C
106  *            %x2F /          ; /    solidus         U+002F
107  *            %x62 /          ; b    backspace       U+0008
108  *            %x66 /          ; f    form feed       U+000C
109  *            %x6E /          ; n    line feed       U+000A
110  *            %x72 /          ; r    carriage return U+000D
111  *            %x74 /          ; t    tab             U+0009
112  *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
113  *    escape = %x5C              ; \
114  *    quotation-mark = %x22      ; "
115  *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
116  *
117  * Extensions over RFC 8259:
118  * - Extra escape sequence in strings:
119  *   0x27 (apostrophe) is recognized after escape, too
120  * - Single-quoted strings:
121  *   Like double-quoted strings, except they're delimited by %x27
122  *   (apostrophe) instead of %x22 (quotation mark), and can't contain
123  *   unescaped apostrophe, but can contain unescaped quotation mark.
124  *
125  * Note:
126  * - Encoding is modified UTF-8.
127  * - Invalid Unicode characters are rejected.
128  * - Control characters \x00..\x1F are rejected by the lexer.
129  */
130 static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
131 {
132     const char *ptr = token->str;
133     QString *str;
134     char quote;
135     const char *beg;
136     int cp, trailing;
137     char *end;
138     ssize_t len;
139     char utf8_buf[5];
140 
141     assert(*ptr == '"' || *ptr == '\'');
142     quote = *ptr++;
143     str = qstring_new();
144 
145     while (*ptr != quote) {
146         assert(*ptr);
147         switch (*ptr) {
148         case '\\':
149             beg = ptr++;
150             switch (*ptr++) {
151             case '"':
152                 qstring_append_chr(str, '"');
153                 break;
154             case '\'':
155                 qstring_append_chr(str, '\'');
156                 break;
157             case '\\':
158                 qstring_append_chr(str, '\\');
159                 break;
160             case '/':
161                 qstring_append_chr(str, '/');
162                 break;
163             case 'b':
164                 qstring_append_chr(str, '\b');
165                 break;
166             case 'f':
167                 qstring_append_chr(str, '\f');
168                 break;
169             case 'n':
170                 qstring_append_chr(str, '\n');
171                 break;
172             case 'r':
173                 qstring_append_chr(str, '\r');
174                 break;
175             case 't':
176                 qstring_append_chr(str, '\t');
177                 break;
178             case 'u':
179                 cp = cvt4hex(ptr);
180                 ptr += 4;
181 
182                 /* handle surrogate pairs */
183                 if (cp >= 0xD800 && cp <= 0xDBFF
184                     && ptr[0] == '\\' && ptr[1] == 'u') {
185                     /* leading surrogate followed by \u */
186                     cp = 0x10000 + ((cp & 0x3FF) << 10);
187                     trailing = cvt4hex(ptr + 2);
188                     if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
189                         /* followed by trailing surrogate */
190                         cp |= trailing & 0x3FF;
191                         ptr += 6;
192                     } else {
193                         cp = -1; /* invalid */
194                     }
195                 }
196 
197                 if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
198                     parse_error(ctxt, token,
199                                 "%.*s is not a valid Unicode character",
200                                 (int)(ptr - beg), beg);
201                     goto out;
202                 }
203                 qstring_append(str, utf8_buf);
204                 break;
205             default:
206                 parse_error(ctxt, token, "invalid escape sequence in string");
207                 goto out;
208             }
209             break;
210         case '%':
211             if (ctxt->ap && ptr[1] != '%') {
212                 parse_error(ctxt, token, "can't interpolate into string");
213                 goto out;
214             }
215             ptr++;
216             /* fall through */
217         default:
218             cp = mod_utf8_codepoint(ptr, 6, &end);
219             if (cp < 0) {
220                 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
221                 goto out;
222             }
223             ptr = end;
224             len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
225             assert(len >= 0);
226             qstring_append(str, utf8_buf);
227         }
228     }
229 
230     return str;
231 
232 out:
233     qobject_unref(str);
234     return NULL;
235 }
236 
237 /* Note: the token object returned by parser_context_peek_token or
238  * parser_context_pop_token is deleted as soon as parser_context_pop_token
239  * is called again.
240  */
241 static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
242 {
243     g_free(ctxt->current);
244     ctxt->current = g_queue_pop_head(ctxt->buf);
245     return ctxt->current;
246 }
247 
248 static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
249 {
250     return g_queue_peek_head(ctxt->buf);
251 }
252 
253 /**
254  * Parsing rules
255  */
256 static int parse_pair(JSONParserContext *ctxt, QDict *dict)
257 {
258     QObject *value;
259     QString *key = NULL;
260     JSONToken *peek, *token;
261 
262     peek = parser_context_peek_token(ctxt);
263     if (peek == NULL) {
264         parse_error(ctxt, NULL, "premature EOI");
265         goto out;
266     }
267 
268     key = qobject_to(QString, parse_value(ctxt));
269     if (!key) {
270         parse_error(ctxt, peek, "key is not a string in object");
271         goto out;
272     }
273 
274     token = parser_context_pop_token(ctxt);
275     if (token == NULL) {
276         parse_error(ctxt, NULL, "premature EOI");
277         goto out;
278     }
279 
280     if (token->type != JSON_COLON) {
281         parse_error(ctxt, token, "missing : in object pair");
282         goto out;
283     }
284 
285     value = parse_value(ctxt);
286     if (value == NULL) {
287         parse_error(ctxt, token, "Missing value in dict");
288         goto out;
289     }
290 
291     qdict_put_obj(dict, qstring_get_str(key), value);
292 
293     qobject_unref(key);
294 
295     return 0;
296 
297 out:
298     qobject_unref(key);
299 
300     return -1;
301 }
302 
303 static QObject *parse_object(JSONParserContext *ctxt)
304 {
305     QDict *dict = NULL;
306     JSONToken *token, *peek;
307 
308     token = parser_context_pop_token(ctxt);
309     assert(token && token->type == JSON_LCURLY);
310 
311     dict = qdict_new();
312 
313     peek = parser_context_peek_token(ctxt);
314     if (peek == NULL) {
315         parse_error(ctxt, NULL, "premature EOI");
316         goto out;
317     }
318 
319     if (peek->type != JSON_RCURLY) {
320         if (parse_pair(ctxt, dict) == -1) {
321             goto out;
322         }
323 
324         token = parser_context_pop_token(ctxt);
325         if (token == NULL) {
326             parse_error(ctxt, NULL, "premature EOI");
327             goto out;
328         }
329 
330         while (token->type != JSON_RCURLY) {
331             if (token->type != JSON_COMMA) {
332                 parse_error(ctxt, token, "expected separator in dict");
333                 goto out;
334             }
335 
336             if (parse_pair(ctxt, dict) == -1) {
337                 goto out;
338             }
339 
340             token = parser_context_pop_token(ctxt);
341             if (token == NULL) {
342                 parse_error(ctxt, NULL, "premature EOI");
343                 goto out;
344             }
345         }
346     } else {
347         (void)parser_context_pop_token(ctxt);
348     }
349 
350     return QOBJECT(dict);
351 
352 out:
353     qobject_unref(dict);
354     return NULL;
355 }
356 
357 static QObject *parse_array(JSONParserContext *ctxt)
358 {
359     QList *list = NULL;
360     JSONToken *token, *peek;
361 
362     token = parser_context_pop_token(ctxt);
363     assert(token && token->type == JSON_LSQUARE);
364 
365     list = qlist_new();
366 
367     peek = parser_context_peek_token(ctxt);
368     if (peek == NULL) {
369         parse_error(ctxt, NULL, "premature EOI");
370         goto out;
371     }
372 
373     if (peek->type != JSON_RSQUARE) {
374         QObject *obj;
375 
376         obj = parse_value(ctxt);
377         if (obj == NULL) {
378             parse_error(ctxt, token, "expecting value");
379             goto out;
380         }
381 
382         qlist_append_obj(list, obj);
383 
384         token = parser_context_pop_token(ctxt);
385         if (token == NULL) {
386             parse_error(ctxt, NULL, "premature EOI");
387             goto out;
388         }
389 
390         while (token->type != JSON_RSQUARE) {
391             if (token->type != JSON_COMMA) {
392                 parse_error(ctxt, token, "expected separator in list");
393                 goto out;
394             }
395 
396             obj = parse_value(ctxt);
397             if (obj == NULL) {
398                 parse_error(ctxt, token, "expecting value");
399                 goto out;
400             }
401 
402             qlist_append_obj(list, obj);
403 
404             token = parser_context_pop_token(ctxt);
405             if (token == NULL) {
406                 parse_error(ctxt, NULL, "premature EOI");
407                 goto out;
408             }
409         }
410     } else {
411         (void)parser_context_pop_token(ctxt);
412     }
413 
414     return QOBJECT(list);
415 
416 out:
417     qobject_unref(list);
418     return NULL;
419 }
420 
421 static QObject *parse_keyword(JSONParserContext *ctxt)
422 {
423     JSONToken *token;
424 
425     token = parser_context_pop_token(ctxt);
426     assert(token && token->type == JSON_KEYWORD);
427 
428     if (!strcmp(token->str, "true")) {
429         return QOBJECT(qbool_from_bool(true));
430     } else if (!strcmp(token->str, "false")) {
431         return QOBJECT(qbool_from_bool(false));
432     } else if (!strcmp(token->str, "null")) {
433         return QOBJECT(qnull());
434     }
435     parse_error(ctxt, token, "invalid keyword '%s'", token->str);
436     return NULL;
437 }
438 
439 static QObject *parse_interpolation(JSONParserContext *ctxt)
440 {
441     JSONToken *token;
442 
443     token = parser_context_pop_token(ctxt);
444     assert(token && token->type == JSON_INTERP);
445 
446     if (!strcmp(token->str, "%p")) {
447         return va_arg(*ctxt->ap, QObject *);
448     } else if (!strcmp(token->str, "%i")) {
449         return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
450     } else if (!strcmp(token->str, "%d")) {
451         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
452     } else if (!strcmp(token->str, "%ld")) {
453         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
454     } else if (!strcmp(token->str, "%lld")) {
455         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
456     } else if (!strcmp(token->str, "%" PRId64)) {
457         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
458     } else if (!strcmp(token->str, "%u")) {
459         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
460     } else if (!strcmp(token->str, "%lu")) {
461         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
462     } else if (!strcmp(token->str, "%llu")) {
463         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
464     } else if (!strcmp(token->str, "%" PRIu64)) {
465         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
466     } else if (!strcmp(token->str, "%s")) {
467         return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
468     } else if (!strcmp(token->str, "%f")) {
469         return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
470     }
471     parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
472     return NULL;
473 }
474 
475 static QObject *parse_literal(JSONParserContext *ctxt)
476 {
477     JSONToken *token;
478 
479     token = parser_context_pop_token(ctxt);
480     assert(token);
481 
482     switch (token->type) {
483     case JSON_STRING:
484         return QOBJECT(parse_string(ctxt, token));
485     case JSON_INTEGER: {
486         /*
487          * Represent JSON_INTEGER as QNUM_I64 if possible, else as
488          * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64()
489          * and qemu_strtou64() fail with ERANGE when it's not
490          * possible.
491          *
492          * qnum_get_int() will then work for any signed 64-bit
493          * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
494          * integer, and qnum_get_double() both for any JSON_INTEGER
495          * and any JSON_FLOAT (with precision loss for integers beyond
496          * 53 bits)
497          */
498         int ret;
499         int64_t value;
500         uint64_t uvalue;
501 
502         ret = qemu_strtoi64(token->str, NULL, 10, &value);
503         if (!ret) {
504             return QOBJECT(qnum_from_int(value));
505         }
506         assert(ret == -ERANGE);
507 
508         if (token->str[0] != '-') {
509             ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
510             if (!ret) {
511                 return QOBJECT(qnum_from_uint(uvalue));
512             }
513             assert(ret == -ERANGE);
514         }
515         /* fall through to JSON_FLOAT */
516     }
517     case JSON_FLOAT:
518         /* FIXME dependent on locale; a pervasive issue in QEMU */
519         /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
520          * but those might be useful extensions beyond JSON */
521         return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
522     default:
523         abort();
524     }
525 }
526 
527 static QObject *parse_value(JSONParserContext *ctxt)
528 {
529     JSONToken *token;
530 
531     token = parser_context_peek_token(ctxt);
532     if (token == NULL) {
533         parse_error(ctxt, NULL, "premature EOI");
534         return NULL;
535     }
536 
537     switch (token->type) {
538     case JSON_LCURLY:
539         return parse_object(ctxt);
540     case JSON_LSQUARE:
541         return parse_array(ctxt);
542     case JSON_INTERP:
543         return parse_interpolation(ctxt);
544     case JSON_INTEGER:
545     case JSON_FLOAT:
546     case JSON_STRING:
547         return parse_literal(ctxt);
548     case JSON_KEYWORD:
549         return parse_keyword(ctxt);
550     default:
551         parse_error(ctxt, token, "expecting value");
552         return NULL;
553     }
554 }
555 
556 JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
557 {
558     JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
559 
560     token->type = type;
561     memcpy(token->str, tokstr->str, tokstr->len);
562     token->str[tokstr->len] = 0;
563     token->x = x;
564     token->y = y;
565     return token;
566 }
567 
568 QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
569 {
570     JSONParserContext ctxt = { .buf = tokens, .ap = ap };
571     QObject *result;
572 
573     result = parse_value(&ctxt);
574     assert(ctxt.err || g_queue_is_empty(ctxt.buf));
575 
576     error_propagate(errp, ctxt.err);
577 
578     while (!g_queue_is_empty(ctxt.buf)) {
579         parser_context_pop_token(&ctxt);
580     }
581     g_free(ctxt.current);
582 
583     return result;
584 }
585