xref: /openbmc/qemu/qobject/json-parser.c (revision 04ddcda6a2387274b3f31a501be3affd172aea3d)
1  /*
2   * JSON Parser
3   *
4   * Copyright IBM, Corp. 2009
5   *
6   * Authors:
7   *  Anthony Liguori   <aliguori@us.ibm.com>
8   *
9   * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10   * See the COPYING.LIB file in the top-level directory.
11   *
12   */
13  
14  #include "qemu/osdep.h"
15  #include "qemu/ctype.h"
16  #include "qemu/cutils.h"
17  #include "qemu/unicode.h"
18  #include "qapi/error.h"
19  #include "qapi/qmp/qbool.h"
20  #include "qapi/qmp/qdict.h"
21  #include "qapi/qmp/qlist.h"
22  #include "qapi/qmp/qnull.h"
23  #include "qapi/qmp/qnum.h"
24  #include "qapi/qmp/qstring.h"
25  #include "json-parser-int.h"
26  
27  struct JSONToken {
28      JSONTokenType type;
29      int x;
30      int y;
31      char str[];
32  };
33  
34  typedef struct JSONParserContext {
35      Error *err;
36      JSONToken *current;
37      GQueue *buf;
38      va_list *ap;
39  } JSONParserContext;
40  
41  #define BUG_ON(cond) assert(!(cond))
42  
43  /**
44   * TODO
45   *
46   * 0) make errors meaningful again
47   * 1) add geometry information to tokens
48   * 3) should we return a parsed size?
49   * 4) deal with premature EOI
50   */
51  
52  static QObject *parse_value(JSONParserContext *ctxt);
53  
54  /**
55   * Error handler
56   */
parse_error(JSONParserContext * ctxt,JSONToken * token,const char * msg,...)57  static void G_GNUC_PRINTF(3, 4) parse_error(JSONParserContext *ctxt,
58                                             JSONToken *token, const char *msg, ...)
59  {
60      va_list ap;
61      char message[1024];
62  
63      if (ctxt->err) {
64          return;
65      }
66      va_start(ap, msg);
67      vsnprintf(message, sizeof(message), msg, ap);
68      va_end(ap);
69      error_setg(&ctxt->err, "JSON parse error, %s", message);
70  }
71  
cvt4hex(const char * s)72  static int cvt4hex(const char *s)
73  {
74      int cp, i;
75  
76      cp = 0;
77      for (i = 0; i < 4; i++) {
78          if (!qemu_isxdigit(s[i])) {
79              return -1;
80          }
81          cp <<= 4;
82          if (s[i] >= '0' && s[i] <= '9') {
83              cp |= s[i] - '0';
84          } else if (s[i] >= 'a' && s[i] <= 'f') {
85              cp |= 10 + s[i] - 'a';
86          } else if (s[i] >= 'A' && s[i] <= 'F') {
87              cp |= 10 + s[i] - 'A';
88          } else {
89              return -1;
90          }
91      }
92      return cp;
93  }
94  
95  /**
96   * parse_string(): Parse a JSON string
97   *
98   * From RFC 8259 "The JavaScript Object Notation (JSON) Data
99   * Interchange Format":
100   *
101   *    char = unescaped /
102   *        escape (
103   *            %x22 /          ; "    quotation mark  U+0022
104   *            %x5C /          ; \    reverse solidus U+005C
105   *            %x2F /          ; /    solidus         U+002F
106   *            %x62 /          ; b    backspace       U+0008
107   *            %x66 /          ; f    form feed       U+000C
108   *            %x6E /          ; n    line feed       U+000A
109   *            %x72 /          ; r    carriage return U+000D
110   *            %x74 /          ; t    tab             U+0009
111   *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
112   *    escape = %x5C              ; \
113   *    quotation-mark = %x22      ; "
114   *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
115   *
116   * Extensions over RFC 8259:
117   * - Extra escape sequence in strings:
118   *   0x27 (apostrophe) is recognized after escape, too
119   * - Single-quoted strings:
120   *   Like double-quoted strings, except they're delimited by %x27
121   *   (apostrophe) instead of %x22 (quotation mark), and can't contain
122   *   unescaped apostrophe, but can contain unescaped quotation mark.
123   *
124   * Note:
125   * - Encoding is modified UTF-8.
126   * - Invalid Unicode characters are rejected.
127   * - Control characters \x00..\x1F are rejected by the lexer.
128   */
parse_string(JSONParserContext * ctxt,JSONToken * token)129  static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
130  {
131      const char *ptr = token->str;
132      GString *str;
133      char quote;
134      const char *beg;
135      int cp, trailing;
136      char *end;
137      ssize_t len;
138      char utf8_buf[5];
139  
140      assert(*ptr == '"' || *ptr == '\'');
141      quote = *ptr++;
142      str = g_string_new(NULL);
143  
144      while (*ptr != quote) {
145          assert(*ptr);
146          switch (*ptr) {
147          case '\\':
148              beg = ptr++;
149              switch (*ptr++) {
150              case '"':
151                  g_string_append_c(str, '"');
152                  break;
153              case '\'':
154                  g_string_append_c(str, '\'');
155                  break;
156              case '\\':
157                  g_string_append_c(str, '\\');
158                  break;
159              case '/':
160                  g_string_append_c(str, '/');
161                  break;
162              case 'b':
163                  g_string_append_c(str, '\b');
164                  break;
165              case 'f':
166                  g_string_append_c(str, '\f');
167                  break;
168              case 'n':
169                  g_string_append_c(str, '\n');
170                  break;
171              case 'r':
172                  g_string_append_c(str, '\r');
173                  break;
174              case 't':
175                  g_string_append_c(str, '\t');
176                  break;
177              case 'u':
178                  cp = cvt4hex(ptr);
179                  ptr += 4;
180  
181                  /* handle surrogate pairs */
182                  if (cp >= 0xD800 && cp <= 0xDBFF
183                      && ptr[0] == '\\' && ptr[1] == 'u') {
184                      /* leading surrogate followed by \u */
185                      cp = 0x10000 + ((cp & 0x3FF) << 10);
186                      trailing = cvt4hex(ptr + 2);
187                      if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
188                          /* followed by trailing surrogate */
189                          cp |= trailing & 0x3FF;
190                          ptr += 6;
191                      } else {
192                          cp = -1; /* invalid */
193                      }
194                  }
195  
196                  if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
197                      parse_error(ctxt, token,
198                                  "%.*s is not a valid Unicode character",
199                                  (int)(ptr - beg), beg);
200                      goto out;
201                  }
202                  g_string_append(str, utf8_buf);
203                  break;
204              default:
205                  parse_error(ctxt, token, "invalid escape sequence in string");
206                  goto out;
207              }
208              break;
209          case '%':
210              if (ctxt->ap) {
211                  if (ptr[1] != '%') {
212                      parse_error(ctxt, token, "can't interpolate into string");
213                      goto out;
214                  }
215                  ptr++;
216              }
217              /* fall through */
218          default:
219              cp = mod_utf8_codepoint(ptr, 6, &end);
220              if (cp < 0) {
221                  parse_error(ctxt, token, "invalid UTF-8 sequence in string");
222                  goto out;
223              }
224              ptr = end;
225              len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
226              assert(len >= 0);
227              g_string_append(str, utf8_buf);
228          }
229      }
230  
231      return qstring_from_gstring(str);
232  
233  out:
234      g_string_free(str, true);
235      return NULL;
236  }
237  
238  /* Note: the token object returned by parser_context_peek_token or
239   * parser_context_pop_token is deleted as soon as parser_context_pop_token
240   * is called again.
241   */
parser_context_pop_token(JSONParserContext * ctxt)242  static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
243  {
244      g_free(ctxt->current);
245      ctxt->current = g_queue_pop_head(ctxt->buf);
246      return ctxt->current;
247  }
248  
parser_context_peek_token(JSONParserContext * ctxt)249  static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
250  {
251      return g_queue_peek_head(ctxt->buf);
252  }
253  
254  /**
255   * Parsing rules
256   */
parse_pair(JSONParserContext * ctxt,QDict * dict)257  static int parse_pair(JSONParserContext *ctxt, QDict *dict)
258  {
259      QObject *key_obj = NULL;
260      QString *key;
261      QObject *value;
262      JSONToken *peek, *token;
263  
264      peek = parser_context_peek_token(ctxt);
265      if (peek == NULL) {
266          parse_error(ctxt, NULL, "premature EOI");
267          goto out;
268      }
269  
270      key_obj = parse_value(ctxt);
271      key = qobject_to(QString, key_obj);
272      if (!key) {
273          parse_error(ctxt, peek, "key is not a string in object");
274          goto out;
275      }
276  
277      token = parser_context_pop_token(ctxt);
278      if (token == NULL) {
279          parse_error(ctxt, NULL, "premature EOI");
280          goto out;
281      }
282  
283      if (token->type != JSON_COLON) {
284          parse_error(ctxt, token, "missing : in object pair");
285          goto out;
286      }
287  
288      value = parse_value(ctxt);
289      if (value == NULL) {
290          parse_error(ctxt, token, "Missing value in dict");
291          goto out;
292      }
293  
294      if (qdict_haskey(dict, qstring_get_str(key))) {
295          parse_error(ctxt, token, "duplicate key");
296          goto out;
297      }
298  
299      qdict_put_obj(dict, qstring_get_str(key), value);
300  
301      qobject_unref(key_obj);
302      return 0;
303  
304  out:
305      qobject_unref(key_obj);
306      return -1;
307  }
308  
parse_object(JSONParserContext * ctxt)309  static QObject *parse_object(JSONParserContext *ctxt)
310  {
311      QDict *dict = NULL;
312      JSONToken *token, *peek;
313  
314      token = parser_context_pop_token(ctxt);
315      assert(token && token->type == JSON_LCURLY);
316  
317      dict = qdict_new();
318  
319      peek = parser_context_peek_token(ctxt);
320      if (peek == NULL) {
321          parse_error(ctxt, NULL, "premature EOI");
322          goto out;
323      }
324  
325      if (peek->type != JSON_RCURLY) {
326          if (parse_pair(ctxt, dict) == -1) {
327              goto out;
328          }
329  
330          token = parser_context_pop_token(ctxt);
331          if (token == NULL) {
332              parse_error(ctxt, NULL, "premature EOI");
333              goto out;
334          }
335  
336          while (token->type != JSON_RCURLY) {
337              if (token->type != JSON_COMMA) {
338                  parse_error(ctxt, token, "expected separator in dict");
339                  goto out;
340              }
341  
342              if (parse_pair(ctxt, dict) == -1) {
343                  goto out;
344              }
345  
346              token = parser_context_pop_token(ctxt);
347              if (token == NULL) {
348                  parse_error(ctxt, NULL, "premature EOI");
349                  goto out;
350              }
351          }
352      } else {
353          (void)parser_context_pop_token(ctxt);
354      }
355  
356      return QOBJECT(dict);
357  
358  out:
359      qobject_unref(dict);
360      return NULL;
361  }
362  
parse_array(JSONParserContext * ctxt)363  static QObject *parse_array(JSONParserContext *ctxt)
364  {
365      QList *list = NULL;
366      JSONToken *token, *peek;
367  
368      token = parser_context_pop_token(ctxt);
369      assert(token && token->type == JSON_LSQUARE);
370  
371      list = qlist_new();
372  
373      peek = parser_context_peek_token(ctxt);
374      if (peek == NULL) {
375          parse_error(ctxt, NULL, "premature EOI");
376          goto out;
377      }
378  
379      if (peek->type != JSON_RSQUARE) {
380          QObject *obj;
381  
382          obj = parse_value(ctxt);
383          if (obj == NULL) {
384              parse_error(ctxt, token, "expecting value");
385              goto out;
386          }
387  
388          qlist_append_obj(list, obj);
389  
390          token = parser_context_pop_token(ctxt);
391          if (token == NULL) {
392              parse_error(ctxt, NULL, "premature EOI");
393              goto out;
394          }
395  
396          while (token->type != JSON_RSQUARE) {
397              if (token->type != JSON_COMMA) {
398                  parse_error(ctxt, token, "expected separator in list");
399                  goto out;
400              }
401  
402              obj = parse_value(ctxt);
403              if (obj == NULL) {
404                  parse_error(ctxt, token, "expecting value");
405                  goto out;
406              }
407  
408              qlist_append_obj(list, obj);
409  
410              token = parser_context_pop_token(ctxt);
411              if (token == NULL) {
412                  parse_error(ctxt, NULL, "premature EOI");
413                  goto out;
414              }
415          }
416      } else {
417          (void)parser_context_pop_token(ctxt);
418      }
419  
420      return QOBJECT(list);
421  
422  out:
423      qobject_unref(list);
424      return NULL;
425  }
426  
parse_keyword(JSONParserContext * ctxt)427  static QObject *parse_keyword(JSONParserContext *ctxt)
428  {
429      JSONToken *token;
430  
431      token = parser_context_pop_token(ctxt);
432      assert(token && token->type == JSON_KEYWORD);
433  
434      if (!strcmp(token->str, "true")) {
435          return QOBJECT(qbool_from_bool(true));
436      } else if (!strcmp(token->str, "false")) {
437          return QOBJECT(qbool_from_bool(false));
438      } else if (!strcmp(token->str, "null")) {
439          return QOBJECT(qnull());
440      }
441      parse_error(ctxt, token, "invalid keyword '%s'", token->str);
442      return NULL;
443  }
444  
parse_interpolation(JSONParserContext * ctxt)445  static QObject *parse_interpolation(JSONParserContext *ctxt)
446  {
447      JSONToken *token;
448  
449      token = parser_context_pop_token(ctxt);
450      assert(token && token->type == JSON_INTERP);
451  
452      if (!strcmp(token->str, "%p")) {
453          return va_arg(*ctxt->ap, QObject *);
454      } else if (!strcmp(token->str, "%i")) {
455          return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
456      } else if (!strcmp(token->str, "%d")) {
457          return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
458      } else if (!strcmp(token->str, "%ld")) {
459          return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
460      } else if (!strcmp(token->str, "%lld")) {
461          return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
462      } else if (!strcmp(token->str, "%" PRId64)) {
463          return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
464      } else if (!strcmp(token->str, "%u")) {
465          return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
466      } else if (!strcmp(token->str, "%lu")) {
467          return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
468      } else if (!strcmp(token->str, "%llu")) {
469          return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
470      } else if (!strcmp(token->str, "%" PRIu64)) {
471          return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
472      } else if (!strcmp(token->str, "%s")) {
473          return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
474      } else if (!strcmp(token->str, "%f")) {
475          return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
476      }
477      parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
478      return NULL;
479  }
480  
parse_literal(JSONParserContext * ctxt)481  static QObject *parse_literal(JSONParserContext *ctxt)
482  {
483      JSONToken *token;
484  
485      token = parser_context_pop_token(ctxt);
486      assert(token);
487  
488      switch (token->type) {
489      case JSON_STRING:
490          return QOBJECT(parse_string(ctxt, token));
491      case JSON_INTEGER: {
492          /*
493           * Represent JSON_INTEGER as QNUM_I64 if possible, else as
494           * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64()
495           * and qemu_strtou64() fail with ERANGE when it's not
496           * possible.
497           *
498           * qnum_get_int() will then work for any signed 64-bit
499           * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
500           * integer, and qnum_get_double() both for any JSON_INTEGER
501           * and any JSON_FLOAT (with precision loss for integers beyond
502           * 53 bits)
503           */
504          int ret;
505          int64_t value;
506          uint64_t uvalue;
507  
508          ret = qemu_strtoi64(token->str, NULL, 10, &value);
509          if (!ret) {
510              return QOBJECT(qnum_from_int(value));
511          }
512          assert(ret == -ERANGE);
513  
514          if (token->str[0] != '-') {
515              ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
516              if (!ret) {
517                  return QOBJECT(qnum_from_uint(uvalue));
518              }
519              assert(ret == -ERANGE);
520          }
521      }
522      /* fall through to JSON_FLOAT */
523      case JSON_FLOAT:
524          /* FIXME dependent on locale; a pervasive issue in QEMU */
525          /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
526           * but those might be useful extensions beyond JSON */
527          return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
528      default:
529          abort();
530      }
531  }
532  
parse_value(JSONParserContext * ctxt)533  static QObject *parse_value(JSONParserContext *ctxt)
534  {
535      JSONToken *token;
536  
537      token = parser_context_peek_token(ctxt);
538      if (token == NULL) {
539          parse_error(ctxt, NULL, "premature EOI");
540          return NULL;
541      }
542  
543      switch (token->type) {
544      case JSON_LCURLY:
545          return parse_object(ctxt);
546      case JSON_LSQUARE:
547          return parse_array(ctxt);
548      case JSON_INTERP:
549          return parse_interpolation(ctxt);
550      case JSON_INTEGER:
551      case JSON_FLOAT:
552      case JSON_STRING:
553          return parse_literal(ctxt);
554      case JSON_KEYWORD:
555          return parse_keyword(ctxt);
556      default:
557          parse_error(ctxt, token, "expecting value");
558          return NULL;
559      }
560  }
561  
json_token(JSONTokenType type,int x,int y,GString * tokstr)562  JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
563  {
564      JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
565  
566      token->type = type;
567      memcpy(token->str, tokstr->str, tokstr->len);
568      token->str[tokstr->len] = 0;
569      token->x = x;
570      token->y = y;
571      return token;
572  }
573  
json_parser_parse(GQueue * tokens,va_list * ap,Error ** errp)574  QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
575  {
576      JSONParserContext ctxt = { .buf = tokens, .ap = ap };
577      QObject *result;
578  
579      result = parse_value(&ctxt);
580      assert(ctxt.err || g_queue_is_empty(ctxt.buf));
581  
582      error_propagate(errp, ctxt.err);
583  
584      while (!g_queue_is_empty(ctxt.buf)) {
585          parser_context_pop_token(&ctxt);
586      }
587      g_free(ctxt.current);
588  
589      return result;
590  }
591