1 /* 2 * JSON Parser 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. 10 * See the COPYING.LIB file in the top-level directory. 11 * 12 */ 13 14 #include <stdarg.h> 15 16 #include "qemu-common.h" 17 #include "qapi/qmp/qstring.h" 18 #include "qapi/qmp/qint.h" 19 #include "qapi/qmp/qdict.h" 20 #include "qapi/qmp/qlist.h" 21 #include "qapi/qmp/qfloat.h" 22 #include "qapi/qmp/qbool.h" 23 #include "qapi/qmp/json-parser.h" 24 #include "qapi/qmp/json-lexer.h" 25 26 typedef struct JSONParserContext 27 { 28 Error *err; 29 struct { 30 QObject **buf; 31 size_t pos; 32 size_t count; 33 } tokens; 34 } JSONParserContext; 35 36 #define BUG_ON(cond) assert(!(cond)) 37 38 /** 39 * TODO 40 * 41 * 0) make errors meaningful again 42 * 1) add geometry information to tokens 43 * 3) should we return a parsed size? 44 * 4) deal with premature EOI 45 */ 46 47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); 48 49 /** 50 * Token manipulators 51 * 52 * tokens are dictionaries that contain a type, a string value, and geometry information 53 * about a token identified by the lexer. These are routines that make working with 54 * these objects a bit easier. 55 */ 56 static const char *token_get_value(QObject *obj) 57 { 58 return qdict_get_str(qobject_to_qdict(obj), "token"); 59 } 60 61 static JSONTokenType token_get_type(QObject *obj) 62 { 63 return qdict_get_int(qobject_to_qdict(obj), "type"); 64 } 65 66 static int token_is_escape(QObject *obj, const char *value) 67 { 68 if (token_get_type(obj) != JSON_ESCAPE) { 69 return 0; 70 } 71 72 return (strcmp(token_get_value(obj), value) == 0); 73 } 74 75 /** 76 * Error handler 77 */ 78 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, 79 QObject *token, const char *msg, ...) 80 { 81 va_list ap; 82 char message[1024]; 83 va_start(ap, msg); 84 vsnprintf(message, sizeof(message), msg, ap); 85 va_end(ap); 86 if (ctxt->err) { 87 error_free(ctxt->err); 88 ctxt->err = NULL; 89 } 90 error_setg(&ctxt->err, "JSON parse error, %s", message); 91 } 92 93 /** 94 * String helpers 95 * 96 * These helpers are used to unescape strings. 97 */ 98 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) 99 { 100 if (wchar <= 0x007F) { 101 BUG_ON(buffer_length < 2); 102 103 buffer[0] = wchar & 0x7F; 104 buffer[1] = 0; 105 } else if (wchar <= 0x07FF) { 106 BUG_ON(buffer_length < 3); 107 108 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); 109 buffer[1] = 0x80 | (wchar & 0x3F); 110 buffer[2] = 0; 111 } else { 112 BUG_ON(buffer_length < 4); 113 114 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); 115 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); 116 buffer[2] = 0x80 | (wchar & 0x3F); 117 buffer[3] = 0; 118 } 119 } 120 121 static int hex2decimal(char ch) 122 { 123 if (ch >= '0' && ch <= '9') { 124 return (ch - '0'); 125 } else if (ch >= 'a' && ch <= 'f') { 126 return 10 + (ch - 'a'); 127 } else if (ch >= 'A' && ch <= 'F') { 128 return 10 + (ch - 'A'); 129 } 130 131 return -1; 132 } 133 134 /** 135 * parse_string(): Parse a json string and return a QObject 136 * 137 * string 138 * "" 139 * " chars " 140 * chars 141 * char 142 * char chars 143 * char 144 * any-Unicode-character- 145 * except-"-or-\-or- 146 * control-character 147 * \" 148 * \\ 149 * \/ 150 * \b 151 * \f 152 * \n 153 * \r 154 * \t 155 * \u four-hex-digits 156 */ 157 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token) 158 { 159 const char *ptr = token_get_value(token); 160 QString *str; 161 int double_quote = 1; 162 163 if (*ptr == '"') { 164 double_quote = 1; 165 } else { 166 double_quote = 0; 167 } 168 ptr++; 169 170 str = qstring_new(); 171 while (*ptr && 172 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { 173 if (*ptr == '\\') { 174 ptr++; 175 176 switch (*ptr) { 177 case '"': 178 qstring_append(str, "\""); 179 ptr++; 180 break; 181 case '\'': 182 qstring_append(str, "'"); 183 ptr++; 184 break; 185 case '\\': 186 qstring_append(str, "\\"); 187 ptr++; 188 break; 189 case '/': 190 qstring_append(str, "/"); 191 ptr++; 192 break; 193 case 'b': 194 qstring_append(str, "\b"); 195 ptr++; 196 break; 197 case 'f': 198 qstring_append(str, "\f"); 199 ptr++; 200 break; 201 case 'n': 202 qstring_append(str, "\n"); 203 ptr++; 204 break; 205 case 'r': 206 qstring_append(str, "\r"); 207 ptr++; 208 break; 209 case 't': 210 qstring_append(str, "\t"); 211 ptr++; 212 break; 213 case 'u': { 214 uint16_t unicode_char = 0; 215 char utf8_char[4]; 216 int i = 0; 217 218 ptr++; 219 220 for (i = 0; i < 4; i++) { 221 if (qemu_isxdigit(*ptr)) { 222 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); 223 } else { 224 parse_error(ctxt, token, 225 "invalid hex escape sequence in string"); 226 goto out; 227 } 228 ptr++; 229 } 230 231 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); 232 qstring_append(str, utf8_char); 233 } break; 234 default: 235 parse_error(ctxt, token, "invalid escape sequence in string"); 236 goto out; 237 } 238 } else { 239 char dummy[2]; 240 241 dummy[0] = *ptr++; 242 dummy[1] = 0; 243 244 qstring_append(str, dummy); 245 } 246 } 247 248 return str; 249 250 out: 251 QDECREF(str); 252 return NULL; 253 } 254 255 static QObject *parser_context_pop_token(JSONParserContext *ctxt) 256 { 257 QObject *token; 258 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 259 token = ctxt->tokens.buf[ctxt->tokens.pos]; 260 ctxt->tokens.pos++; 261 return token; 262 } 263 264 /* Note: parser_context_{peek|pop}_token do not increment the 265 * token object's refcount. In both cases the references will continue 266 * to be tracked and cleaned up in parser_context_free(), so do not 267 * attempt to free the token object. 268 */ 269 static QObject *parser_context_peek_token(JSONParserContext *ctxt) 270 { 271 QObject *token; 272 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 273 token = ctxt->tokens.buf[ctxt->tokens.pos]; 274 return token; 275 } 276 277 static JSONParserContext parser_context_save(JSONParserContext *ctxt) 278 { 279 JSONParserContext saved_ctxt = {0}; 280 saved_ctxt.tokens.pos = ctxt->tokens.pos; 281 saved_ctxt.tokens.count = ctxt->tokens.count; 282 saved_ctxt.tokens.buf = ctxt->tokens.buf; 283 return saved_ctxt; 284 } 285 286 static void parser_context_restore(JSONParserContext *ctxt, 287 JSONParserContext saved_ctxt) 288 { 289 ctxt->tokens.pos = saved_ctxt.tokens.pos; 290 ctxt->tokens.count = saved_ctxt.tokens.count; 291 ctxt->tokens.buf = saved_ctxt.tokens.buf; 292 } 293 294 static void tokens_append_from_iter(QObject *obj, void *opaque) 295 { 296 JSONParserContext *ctxt = opaque; 297 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 298 ctxt->tokens.buf[ctxt->tokens.pos++] = obj; 299 qobject_incref(obj); 300 } 301 302 static JSONParserContext *parser_context_new(QList *tokens) 303 { 304 JSONParserContext *ctxt; 305 size_t count; 306 307 if (!tokens) { 308 return NULL; 309 } 310 311 count = qlist_size(tokens); 312 if (count == 0) { 313 return NULL; 314 } 315 316 ctxt = g_malloc0(sizeof(JSONParserContext)); 317 ctxt->tokens.pos = 0; 318 ctxt->tokens.count = count; 319 ctxt->tokens.buf = g_malloc(count * sizeof(QObject *)); 320 qlist_iter(tokens, tokens_append_from_iter, ctxt); 321 ctxt->tokens.pos = 0; 322 323 return ctxt; 324 } 325 326 /* to support error propagation, ctxt->err must be freed separately */ 327 static void parser_context_free(JSONParserContext *ctxt) 328 { 329 int i; 330 if (ctxt) { 331 for (i = 0; i < ctxt->tokens.count; i++) { 332 qobject_decref(ctxt->tokens.buf[i]); 333 } 334 g_free(ctxt->tokens.buf); 335 g_free(ctxt); 336 } 337 } 338 339 /** 340 * Parsing rules 341 */ 342 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) 343 { 344 QObject *key = NULL, *token = NULL, *value, *peek; 345 JSONParserContext saved_ctxt = parser_context_save(ctxt); 346 347 peek = parser_context_peek_token(ctxt); 348 if (peek == NULL) { 349 parse_error(ctxt, NULL, "premature EOI"); 350 goto out; 351 } 352 353 key = parse_value(ctxt, ap); 354 if (!key || qobject_type(key) != QTYPE_QSTRING) { 355 parse_error(ctxt, peek, "key is not a string in object"); 356 goto out; 357 } 358 359 token = parser_context_pop_token(ctxt); 360 if (token == NULL) { 361 parse_error(ctxt, NULL, "premature EOI"); 362 goto out; 363 } 364 365 if (token_get_type(token) != JSON_COLON) { 366 parse_error(ctxt, token, "missing : in object pair"); 367 goto out; 368 } 369 370 value = parse_value(ctxt, ap); 371 if (value == NULL) { 372 parse_error(ctxt, token, "Missing value in dict"); 373 goto out; 374 } 375 376 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); 377 378 qobject_decref(key); 379 380 return 0; 381 382 out: 383 parser_context_restore(ctxt, saved_ctxt); 384 qobject_decref(key); 385 386 return -1; 387 } 388 389 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) 390 { 391 QDict *dict = NULL; 392 QObject *token, *peek; 393 JSONParserContext saved_ctxt = parser_context_save(ctxt); 394 395 token = parser_context_pop_token(ctxt); 396 if (token == NULL) { 397 goto out; 398 } 399 400 if (token_get_type(token) != JSON_LCURLY) { 401 goto out; 402 } 403 404 dict = qdict_new(); 405 406 peek = parser_context_peek_token(ctxt); 407 if (peek == NULL) { 408 parse_error(ctxt, NULL, "premature EOI"); 409 goto out; 410 } 411 412 if (token_get_type(peek) != JSON_RCURLY) { 413 if (parse_pair(ctxt, dict, ap) == -1) { 414 goto out; 415 } 416 417 token = parser_context_pop_token(ctxt); 418 if (token == NULL) { 419 parse_error(ctxt, NULL, "premature EOI"); 420 goto out; 421 } 422 423 while (token_get_type(token) != JSON_RCURLY) { 424 if (token_get_type(token) != JSON_COMMA) { 425 parse_error(ctxt, token, "expected separator in dict"); 426 goto out; 427 } 428 429 if (parse_pair(ctxt, dict, ap) == -1) { 430 goto out; 431 } 432 433 token = parser_context_pop_token(ctxt); 434 if (token == NULL) { 435 parse_error(ctxt, NULL, "premature EOI"); 436 goto out; 437 } 438 } 439 } else { 440 (void)parser_context_pop_token(ctxt); 441 } 442 443 return QOBJECT(dict); 444 445 out: 446 parser_context_restore(ctxt, saved_ctxt); 447 QDECREF(dict); 448 return NULL; 449 } 450 451 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) 452 { 453 QList *list = NULL; 454 QObject *token, *peek; 455 JSONParserContext saved_ctxt = parser_context_save(ctxt); 456 457 token = parser_context_pop_token(ctxt); 458 if (token == NULL) { 459 goto out; 460 } 461 462 if (token_get_type(token) != JSON_LSQUARE) { 463 goto out; 464 } 465 466 list = qlist_new(); 467 468 peek = parser_context_peek_token(ctxt); 469 if (peek == NULL) { 470 parse_error(ctxt, NULL, "premature EOI"); 471 goto out; 472 } 473 474 if (token_get_type(peek) != JSON_RSQUARE) { 475 QObject *obj; 476 477 obj = parse_value(ctxt, ap); 478 if (obj == NULL) { 479 parse_error(ctxt, token, "expecting value"); 480 goto out; 481 } 482 483 qlist_append_obj(list, obj); 484 485 token = parser_context_pop_token(ctxt); 486 if (token == NULL) { 487 parse_error(ctxt, NULL, "premature EOI"); 488 goto out; 489 } 490 491 while (token_get_type(token) != JSON_RSQUARE) { 492 if (token_get_type(token) != JSON_COMMA) { 493 parse_error(ctxt, token, "expected separator in list"); 494 goto out; 495 } 496 497 obj = parse_value(ctxt, ap); 498 if (obj == NULL) { 499 parse_error(ctxt, token, "expecting value"); 500 goto out; 501 } 502 503 qlist_append_obj(list, obj); 504 505 token = parser_context_pop_token(ctxt); 506 if (token == NULL) { 507 parse_error(ctxt, NULL, "premature EOI"); 508 goto out; 509 } 510 } 511 } else { 512 (void)parser_context_pop_token(ctxt); 513 } 514 515 return QOBJECT(list); 516 517 out: 518 parser_context_restore(ctxt, saved_ctxt); 519 QDECREF(list); 520 return NULL; 521 } 522 523 static QObject *parse_keyword(JSONParserContext *ctxt) 524 { 525 QObject *token, *ret; 526 JSONParserContext saved_ctxt = parser_context_save(ctxt); 527 const char *val; 528 529 token = parser_context_pop_token(ctxt); 530 if (token == NULL) { 531 goto out; 532 } 533 534 if (token_get_type(token) != JSON_KEYWORD) { 535 goto out; 536 } 537 538 val = token_get_value(token); 539 540 if (!strcmp(val, "true")) { 541 ret = QOBJECT(qbool_from_bool(true)); 542 } else if (!strcmp(val, "false")) { 543 ret = QOBJECT(qbool_from_bool(false)); 544 } else if (!strcmp(val, "null")) { 545 ret = qnull(); 546 } else { 547 parse_error(ctxt, token, "invalid keyword '%s'", val); 548 goto out; 549 } 550 551 return ret; 552 553 out: 554 parser_context_restore(ctxt, saved_ctxt); 555 556 return NULL; 557 } 558 559 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) 560 { 561 QObject *token = NULL, *obj; 562 JSONParserContext saved_ctxt = parser_context_save(ctxt); 563 564 if (ap == NULL) { 565 goto out; 566 } 567 568 token = parser_context_pop_token(ctxt); 569 if (token == NULL) { 570 goto out; 571 } 572 573 if (token_is_escape(token, "%p")) { 574 obj = va_arg(*ap, QObject *); 575 } else if (token_is_escape(token, "%i")) { 576 obj = QOBJECT(qbool_from_bool(va_arg(*ap, int))); 577 } else if (token_is_escape(token, "%d")) { 578 obj = QOBJECT(qint_from_int(va_arg(*ap, int))); 579 } else if (token_is_escape(token, "%ld")) { 580 obj = QOBJECT(qint_from_int(va_arg(*ap, long))); 581 } else if (token_is_escape(token, "%lld") || 582 token_is_escape(token, "%I64d")) { 583 obj = QOBJECT(qint_from_int(va_arg(*ap, long long))); 584 } else if (token_is_escape(token, "%s")) { 585 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *))); 586 } else if (token_is_escape(token, "%f")) { 587 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double))); 588 } else { 589 goto out; 590 } 591 592 return obj; 593 594 out: 595 parser_context_restore(ctxt, saved_ctxt); 596 597 return NULL; 598 } 599 600 static QObject *parse_literal(JSONParserContext *ctxt) 601 { 602 QObject *token, *obj; 603 JSONParserContext saved_ctxt = parser_context_save(ctxt); 604 605 token = parser_context_pop_token(ctxt); 606 if (token == NULL) { 607 goto out; 608 } 609 610 switch (token_get_type(token)) { 611 case JSON_STRING: 612 obj = QOBJECT(qstring_from_escaped_str(ctxt, token)); 613 break; 614 case JSON_INTEGER: { 615 /* A possibility exists that this is a whole-valued float where the 616 * fractional part was left out due to being 0 (.0). It's not a big 617 * deal to treat these as ints in the parser, so long as users of the 618 * resulting QObject know to expect a QInt in place of a QFloat in 619 * cases like these. 620 * 621 * However, in some cases these values will overflow/underflow a 622 * QInt/int64 container, thus we should assume these are to be handled 623 * as QFloats/doubles rather than silently changing their values. 624 * 625 * strtoll() indicates these instances by setting errno to ERANGE 626 */ 627 int64_t value; 628 629 errno = 0; /* strtoll doesn't set errno on success */ 630 value = strtoll(token_get_value(token), NULL, 10); 631 if (errno != ERANGE) { 632 obj = QOBJECT(qint_from_int(value)); 633 break; 634 } 635 /* fall through to JSON_FLOAT */ 636 } 637 case JSON_FLOAT: 638 /* FIXME dependent on locale */ 639 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL))); 640 break; 641 default: 642 goto out; 643 } 644 645 return obj; 646 647 out: 648 parser_context_restore(ctxt, saved_ctxt); 649 650 return NULL; 651 } 652 653 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) 654 { 655 QObject *obj; 656 657 obj = parse_object(ctxt, ap); 658 if (obj == NULL) { 659 obj = parse_array(ctxt, ap); 660 } 661 if (obj == NULL) { 662 obj = parse_escape(ctxt, ap); 663 } 664 if (obj == NULL) { 665 obj = parse_keyword(ctxt); 666 } 667 if (obj == NULL) { 668 obj = parse_literal(ctxt); 669 } 670 671 return obj; 672 } 673 674 QObject *json_parser_parse(QList *tokens, va_list *ap) 675 { 676 return json_parser_parse_err(tokens, ap, NULL); 677 } 678 679 QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp) 680 { 681 JSONParserContext *ctxt = parser_context_new(tokens); 682 QObject *result; 683 684 if (!ctxt) { 685 return NULL; 686 } 687 688 result = parse_value(ctxt, ap); 689 690 error_propagate(errp, ctxt->err); 691 692 parser_context_free(ctxt); 693 694 return result; 695 } 696