xref: /openbmc/qemu/qobject/json-parser.c (revision 31cf4b97)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/cutils.h"
16 #include "qemu/unicode.h"
17 #include "qapi/error.h"
18 #include "qemu-common.h"
19 #include "qapi/qmp/qbool.h"
20 #include "qapi/qmp/qdict.h"
21 #include "qapi/qmp/qlist.h"
22 #include "qapi/qmp/qnull.h"
23 #include "qapi/qmp/qnum.h"
24 #include "qapi/qmp/qstring.h"
25 #include "json-parser-int.h"
26 
27 struct JSONToken {
28     JSONTokenType type;
29     int x;
30     int y;
31     char str[];
32 };
33 
34 typedef struct JSONParserContext
35 {
36     Error *err;
37     JSONToken *current;
38     GQueue *buf;
39     va_list *ap;
40 } JSONParserContext;
41 
42 #define BUG_ON(cond) assert(!(cond))
43 
44 /**
45  * TODO
46  *
47  * 0) make errors meaningful again
48  * 1) add geometry information to tokens
49  * 3) should we return a parsed size?
50  * 4) deal with premature EOI
51  */
52 
53 static QObject *parse_value(JSONParserContext *ctxt);
54 
55 /**
56  * Error handler
57  */
58 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
59                                            JSONToken *token, const char *msg, ...)
60 {
61     va_list ap;
62     char message[1024];
63 
64     if (ctxt->err) {
65         return;
66     }
67     va_start(ap, msg);
68     vsnprintf(message, sizeof(message), msg, ap);
69     va_end(ap);
70     error_setg(&ctxt->err, "JSON parse error, %s", message);
71 }
72 
73 static int cvt4hex(const char *s)
74 {
75     int cp, i;
76 
77     cp = 0;
78     for (i = 0; i < 4; i++) {
79         if (!qemu_isxdigit(s[i])) {
80             return -1;
81         }
82         cp <<= 4;
83         if (s[i] >= '0' && s[i] <= '9') {
84             cp |= s[i] - '0';
85         } else if (s[i] >= 'a' && s[i] <= 'f') {
86             cp |= 10 + s[i] - 'a';
87         } else if (s[i] >= 'A' && s[i] <= 'F') {
88             cp |= 10 + s[i] - 'A';
89         } else {
90             return -1;
91         }
92     }
93     return cp;
94 }
95 
96 /**
97  * parse_string(): Parse a JSON string
98  *
99  * From RFC 8259 "The JavaScript Object Notation (JSON) Data
100  * Interchange Format":
101  *
102  *    char = unescaped /
103  *        escape (
104  *            %x22 /          ; "    quotation mark  U+0022
105  *            %x5C /          ; \    reverse solidus U+005C
106  *            %x2F /          ; /    solidus         U+002F
107  *            %x62 /          ; b    backspace       U+0008
108  *            %x66 /          ; f    form feed       U+000C
109  *            %x6E /          ; n    line feed       U+000A
110  *            %x72 /          ; r    carriage return U+000D
111  *            %x74 /          ; t    tab             U+0009
112  *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
113  *    escape = %x5C              ; \
114  *    quotation-mark = %x22      ; "
115  *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
116  *
117  * Extensions over RFC 8259:
118  * - Extra escape sequence in strings:
119  *   0x27 (apostrophe) is recognized after escape, too
120  * - Single-quoted strings:
121  *   Like double-quoted strings, except they're delimited by %x27
122  *   (apostrophe) instead of %x22 (quotation mark), and can't contain
123  *   unescaped apostrophe, but can contain unescaped quotation mark.
124  *
125  * Note:
126  * - Encoding is modified UTF-8.
127  * - Invalid Unicode characters are rejected.
128  * - Control characters \x00..\x1F are rejected by the lexer.
129  */
130 static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
131 {
132     const char *ptr = token->str;
133     QString *str;
134     char quote;
135     const char *beg;
136     int cp, trailing;
137     char *end;
138     ssize_t len;
139     char utf8_buf[5];
140 
141     assert(*ptr == '"' || *ptr == '\'');
142     quote = *ptr++;
143     str = qstring_new();
144 
145     while (*ptr != quote) {
146         assert(*ptr);
147         switch (*ptr) {
148         case '\\':
149             beg = ptr++;
150             switch (*ptr++) {
151             case '"':
152                 qstring_append_chr(str, '"');
153                 break;
154             case '\'':
155                 qstring_append_chr(str, '\'');
156                 break;
157             case '\\':
158                 qstring_append_chr(str, '\\');
159                 break;
160             case '/':
161                 qstring_append_chr(str, '/');
162                 break;
163             case 'b':
164                 qstring_append_chr(str, '\b');
165                 break;
166             case 'f':
167                 qstring_append_chr(str, '\f');
168                 break;
169             case 'n':
170                 qstring_append_chr(str, '\n');
171                 break;
172             case 'r':
173                 qstring_append_chr(str, '\r');
174                 break;
175             case 't':
176                 qstring_append_chr(str, '\t');
177                 break;
178             case 'u':
179                 cp = cvt4hex(ptr);
180                 ptr += 4;
181 
182                 /* handle surrogate pairs */
183                 if (cp >= 0xD800 && cp <= 0xDBFF
184                     && ptr[0] == '\\' && ptr[1] == 'u') {
185                     /* leading surrogate followed by \u */
186                     cp = 0x10000 + ((cp & 0x3FF) << 10);
187                     trailing = cvt4hex(ptr + 2);
188                     if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
189                         /* followed by trailing surrogate */
190                         cp |= trailing & 0x3FF;
191                         ptr += 6;
192                     } else {
193                         cp = -1; /* invalid */
194                     }
195                 }
196 
197                 if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
198                     parse_error(ctxt, token,
199                                 "%.*s is not a valid Unicode character",
200                                 (int)(ptr - beg), beg);
201                     goto out;
202                 }
203                 qstring_append(str, utf8_buf);
204                 break;
205             default:
206                 parse_error(ctxt, token, "invalid escape sequence in string");
207                 goto out;
208             }
209             break;
210         case '%':
211             if (ctxt->ap && ptr[1] != '%') {
212                 parse_error(ctxt, token, "can't interpolate into string");
213                 goto out;
214             }
215             ptr++;
216             /* fall through */
217         default:
218             cp = mod_utf8_codepoint(ptr, 6, &end);
219             if (cp < 0) {
220                 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
221                 goto out;
222             }
223             ptr = end;
224             len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
225             assert(len >= 0);
226             qstring_append(str, utf8_buf);
227         }
228     }
229 
230     return str;
231 
232 out:
233     qobject_unref(str);
234     return NULL;
235 }
236 
237 /* Note: the token object returned by parser_context_peek_token or
238  * parser_context_pop_token is deleted as soon as parser_context_pop_token
239  * is called again.
240  */
241 static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
242 {
243     g_free(ctxt->current);
244     ctxt->current = g_queue_pop_head(ctxt->buf);
245     return ctxt->current;
246 }
247 
248 static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
249 {
250     return g_queue_peek_head(ctxt->buf);
251 }
252 
253 /**
254  * Parsing rules
255  */
256 static int parse_pair(JSONParserContext *ctxt, QDict *dict)
257 {
258     QObject *value;
259     QString *key = NULL;
260     JSONToken *peek, *token;
261 
262     peek = parser_context_peek_token(ctxt);
263     if (peek == NULL) {
264         parse_error(ctxt, NULL, "premature EOI");
265         goto out;
266     }
267 
268     key = qobject_to(QString, parse_value(ctxt));
269     if (!key) {
270         parse_error(ctxt, peek, "key is not a string in object");
271         goto out;
272     }
273 
274     token = parser_context_pop_token(ctxt);
275     if (token == NULL) {
276         parse_error(ctxt, NULL, "premature EOI");
277         goto out;
278     }
279 
280     if (token->type != JSON_COLON) {
281         parse_error(ctxt, token, "missing : in object pair");
282         goto out;
283     }
284 
285     value = parse_value(ctxt);
286     if (value == NULL) {
287         parse_error(ctxt, token, "Missing value in dict");
288         goto out;
289     }
290 
291     if (qdict_haskey(dict, qstring_get_str(key))) {
292         parse_error(ctxt, token, "duplicate key");
293         goto out;
294     }
295 
296     qdict_put_obj(dict, qstring_get_str(key), value);
297 
298     qobject_unref(key);
299 
300     return 0;
301 
302 out:
303     qobject_unref(key);
304 
305     return -1;
306 }
307 
308 static QObject *parse_object(JSONParserContext *ctxt)
309 {
310     QDict *dict = NULL;
311     JSONToken *token, *peek;
312 
313     token = parser_context_pop_token(ctxt);
314     assert(token && token->type == JSON_LCURLY);
315 
316     dict = qdict_new();
317 
318     peek = parser_context_peek_token(ctxt);
319     if (peek == NULL) {
320         parse_error(ctxt, NULL, "premature EOI");
321         goto out;
322     }
323 
324     if (peek->type != JSON_RCURLY) {
325         if (parse_pair(ctxt, dict) == -1) {
326             goto out;
327         }
328 
329         token = parser_context_pop_token(ctxt);
330         if (token == NULL) {
331             parse_error(ctxt, NULL, "premature EOI");
332             goto out;
333         }
334 
335         while (token->type != JSON_RCURLY) {
336             if (token->type != JSON_COMMA) {
337                 parse_error(ctxt, token, "expected separator in dict");
338                 goto out;
339             }
340 
341             if (parse_pair(ctxt, dict) == -1) {
342                 goto out;
343             }
344 
345             token = parser_context_pop_token(ctxt);
346             if (token == NULL) {
347                 parse_error(ctxt, NULL, "premature EOI");
348                 goto out;
349             }
350         }
351     } else {
352         (void)parser_context_pop_token(ctxt);
353     }
354 
355     return QOBJECT(dict);
356 
357 out:
358     qobject_unref(dict);
359     return NULL;
360 }
361 
362 static QObject *parse_array(JSONParserContext *ctxt)
363 {
364     QList *list = NULL;
365     JSONToken *token, *peek;
366 
367     token = parser_context_pop_token(ctxt);
368     assert(token && token->type == JSON_LSQUARE);
369 
370     list = qlist_new();
371 
372     peek = parser_context_peek_token(ctxt);
373     if (peek == NULL) {
374         parse_error(ctxt, NULL, "premature EOI");
375         goto out;
376     }
377 
378     if (peek->type != JSON_RSQUARE) {
379         QObject *obj;
380 
381         obj = parse_value(ctxt);
382         if (obj == NULL) {
383             parse_error(ctxt, token, "expecting value");
384             goto out;
385         }
386 
387         qlist_append_obj(list, obj);
388 
389         token = parser_context_pop_token(ctxt);
390         if (token == NULL) {
391             parse_error(ctxt, NULL, "premature EOI");
392             goto out;
393         }
394 
395         while (token->type != JSON_RSQUARE) {
396             if (token->type != JSON_COMMA) {
397                 parse_error(ctxt, token, "expected separator in list");
398                 goto out;
399             }
400 
401             obj = parse_value(ctxt);
402             if (obj == NULL) {
403                 parse_error(ctxt, token, "expecting value");
404                 goto out;
405             }
406 
407             qlist_append_obj(list, obj);
408 
409             token = parser_context_pop_token(ctxt);
410             if (token == NULL) {
411                 parse_error(ctxt, NULL, "premature EOI");
412                 goto out;
413             }
414         }
415     } else {
416         (void)parser_context_pop_token(ctxt);
417     }
418 
419     return QOBJECT(list);
420 
421 out:
422     qobject_unref(list);
423     return NULL;
424 }
425 
426 static QObject *parse_keyword(JSONParserContext *ctxt)
427 {
428     JSONToken *token;
429 
430     token = parser_context_pop_token(ctxt);
431     assert(token && token->type == JSON_KEYWORD);
432 
433     if (!strcmp(token->str, "true")) {
434         return QOBJECT(qbool_from_bool(true));
435     } else if (!strcmp(token->str, "false")) {
436         return QOBJECT(qbool_from_bool(false));
437     } else if (!strcmp(token->str, "null")) {
438         return QOBJECT(qnull());
439     }
440     parse_error(ctxt, token, "invalid keyword '%s'", token->str);
441     return NULL;
442 }
443 
444 static QObject *parse_interpolation(JSONParserContext *ctxt)
445 {
446     JSONToken *token;
447 
448     token = parser_context_pop_token(ctxt);
449     assert(token && token->type == JSON_INTERP);
450 
451     if (!strcmp(token->str, "%p")) {
452         return va_arg(*ctxt->ap, QObject *);
453     } else if (!strcmp(token->str, "%i")) {
454         return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
455     } else if (!strcmp(token->str, "%d")) {
456         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
457     } else if (!strcmp(token->str, "%ld")) {
458         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
459     } else if (!strcmp(token->str, "%lld")) {
460         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
461     } else if (!strcmp(token->str, "%" PRId64)) {
462         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
463     } else if (!strcmp(token->str, "%u")) {
464         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
465     } else if (!strcmp(token->str, "%lu")) {
466         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
467     } else if (!strcmp(token->str, "%llu")) {
468         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
469     } else if (!strcmp(token->str, "%" PRIu64)) {
470         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
471     } else if (!strcmp(token->str, "%s")) {
472         return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
473     } else if (!strcmp(token->str, "%f")) {
474         return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
475     }
476     parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
477     return NULL;
478 }
479 
480 static QObject *parse_literal(JSONParserContext *ctxt)
481 {
482     JSONToken *token;
483 
484     token = parser_context_pop_token(ctxt);
485     assert(token);
486 
487     switch (token->type) {
488     case JSON_STRING:
489         return QOBJECT(parse_string(ctxt, token));
490     case JSON_INTEGER: {
491         /*
492          * Represent JSON_INTEGER as QNUM_I64 if possible, else as
493          * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64()
494          * and qemu_strtou64() fail with ERANGE when it's not
495          * possible.
496          *
497          * qnum_get_int() will then work for any signed 64-bit
498          * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
499          * integer, and qnum_get_double() both for any JSON_INTEGER
500          * and any JSON_FLOAT (with precision loss for integers beyond
501          * 53 bits)
502          */
503         int ret;
504         int64_t value;
505         uint64_t uvalue;
506 
507         ret = qemu_strtoi64(token->str, NULL, 10, &value);
508         if (!ret) {
509             return QOBJECT(qnum_from_int(value));
510         }
511         assert(ret == -ERANGE);
512 
513         if (token->str[0] != '-') {
514             ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
515             if (!ret) {
516                 return QOBJECT(qnum_from_uint(uvalue));
517             }
518             assert(ret == -ERANGE);
519         }
520         /* fall through to JSON_FLOAT */
521     }
522     case JSON_FLOAT:
523         /* FIXME dependent on locale; a pervasive issue in QEMU */
524         /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
525          * but those might be useful extensions beyond JSON */
526         return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
527     default:
528         abort();
529     }
530 }
531 
532 static QObject *parse_value(JSONParserContext *ctxt)
533 {
534     JSONToken *token;
535 
536     token = parser_context_peek_token(ctxt);
537     if (token == NULL) {
538         parse_error(ctxt, NULL, "premature EOI");
539         return NULL;
540     }
541 
542     switch (token->type) {
543     case JSON_LCURLY:
544         return parse_object(ctxt);
545     case JSON_LSQUARE:
546         return parse_array(ctxt);
547     case JSON_INTERP:
548         return parse_interpolation(ctxt);
549     case JSON_INTEGER:
550     case JSON_FLOAT:
551     case JSON_STRING:
552         return parse_literal(ctxt);
553     case JSON_KEYWORD:
554         return parse_keyword(ctxt);
555     default:
556         parse_error(ctxt, token, "expecting value");
557         return NULL;
558     }
559 }
560 
561 JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
562 {
563     JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
564 
565     token->type = type;
566     memcpy(token->str, tokstr->str, tokstr->len);
567     token->str[tokstr->len] = 0;
568     token->x = x;
569     token->y = y;
570     return token;
571 }
572 
573 QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
574 {
575     JSONParserContext ctxt = { .buf = tokens, .ap = ap };
576     QObject *result;
577 
578     result = parse_value(&ctxt);
579     assert(ctxt.err || g_queue_is_empty(ctxt.buf));
580 
581     error_propagate(errp, ctxt.err);
582 
583     while (!g_queue_is_empty(ctxt.buf)) {
584         parser_context_pop_token(&ctxt);
585     }
586     g_free(ctxt.current);
587 
588     return result;
589 }
590