1 /* 2 * JSON Parser 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. 10 * See the COPYING.LIB file in the top-level directory. 11 * 12 */ 13 14 #include <stdarg.h> 15 16 #include "qemu-common.h" 17 #include "qapi/qmp/qstring.h" 18 #include "qapi/qmp/qint.h" 19 #include "qapi/qmp/qdict.h" 20 #include "qapi/qmp/qlist.h" 21 #include "qapi/qmp/qfloat.h" 22 #include "qapi/qmp/qbool.h" 23 #include "qapi/qmp/json-parser.h" 24 #include "qapi/qmp/json-lexer.h" 25 26 typedef struct JSONParserContext 27 { 28 Error *err; 29 struct { 30 QObject **buf; 31 size_t pos; 32 size_t count; 33 } tokens; 34 } JSONParserContext; 35 36 #define BUG_ON(cond) assert(!(cond)) 37 38 /** 39 * TODO 40 * 41 * 0) make errors meaningful again 42 * 1) add geometry information to tokens 43 * 3) should we return a parsed size? 44 * 4) deal with premature EOI 45 */ 46 47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); 48 49 /** 50 * Token manipulators 51 * 52 * tokens are dictionaries that contain a type, a string value, and geometry information 53 * about a token identified by the lexer. These are routines that make working with 54 * these objects a bit easier. 55 */ 56 static const char *token_get_value(QObject *obj) 57 { 58 return qdict_get_str(qobject_to_qdict(obj), "token"); 59 } 60 61 static JSONTokenType token_get_type(QObject *obj) 62 { 63 return qdict_get_int(qobject_to_qdict(obj), "type"); 64 } 65 66 static int token_is_operator(QObject *obj, char op) 67 { 68 const char *val; 69 70 if (token_get_type(obj) != JSON_OPERATOR) { 71 return 0; 72 } 73 74 val = token_get_value(obj); 75 76 return (val[0] == op) && (val[1] == 0); 77 } 78 79 static int token_is_keyword(QObject *obj, const char *value) 80 { 81 if (token_get_type(obj) != JSON_KEYWORD) { 82 return 0; 83 } 84 85 return strcmp(token_get_value(obj), value) == 0; 86 } 87 88 static int token_is_escape(QObject *obj, const char *value) 89 { 90 if (token_get_type(obj) != JSON_ESCAPE) { 91 return 0; 92 } 93 94 return (strcmp(token_get_value(obj), value) == 0); 95 } 96 97 /** 98 * Error handler 99 */ 100 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, 101 QObject *token, const char *msg, ...) 102 { 103 va_list ap; 104 char message[1024]; 105 va_start(ap, msg); 106 vsnprintf(message, sizeof(message), msg, ap); 107 va_end(ap); 108 if (ctxt->err) { 109 error_free(ctxt->err); 110 ctxt->err = NULL; 111 } 112 error_setg(&ctxt->err, "JSON parse error, %s", message); 113 } 114 115 /** 116 * String helpers 117 * 118 * These helpers are used to unescape strings. 119 */ 120 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) 121 { 122 if (wchar <= 0x007F) { 123 BUG_ON(buffer_length < 2); 124 125 buffer[0] = wchar & 0x7F; 126 buffer[1] = 0; 127 } else if (wchar <= 0x07FF) { 128 BUG_ON(buffer_length < 3); 129 130 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); 131 buffer[1] = 0x80 | (wchar & 0x3F); 132 buffer[2] = 0; 133 } else { 134 BUG_ON(buffer_length < 4); 135 136 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); 137 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); 138 buffer[2] = 0x80 | (wchar & 0x3F); 139 buffer[3] = 0; 140 } 141 } 142 143 static int hex2decimal(char ch) 144 { 145 if (ch >= '0' && ch <= '9') { 146 return (ch - '0'); 147 } else if (ch >= 'a' && ch <= 'f') { 148 return 10 + (ch - 'a'); 149 } else if (ch >= 'A' && ch <= 'F') { 150 return 10 + (ch - 'A'); 151 } 152 153 return -1; 154 } 155 156 /** 157 * parse_string(): Parse a json string and return a QObject 158 * 159 * string 160 * "" 161 * " chars " 162 * chars 163 * char 164 * char chars 165 * char 166 * any-Unicode-character- 167 * except-"-or-\-or- 168 * control-character 169 * \" 170 * \\ 171 * \/ 172 * \b 173 * \f 174 * \n 175 * \r 176 * \t 177 * \u four-hex-digits 178 */ 179 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token) 180 { 181 const char *ptr = token_get_value(token); 182 QString *str; 183 int double_quote = 1; 184 185 if (*ptr == '"') { 186 double_quote = 1; 187 } else { 188 double_quote = 0; 189 } 190 ptr++; 191 192 str = qstring_new(); 193 while (*ptr && 194 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { 195 if (*ptr == '\\') { 196 ptr++; 197 198 switch (*ptr) { 199 case '"': 200 qstring_append(str, "\""); 201 ptr++; 202 break; 203 case '\'': 204 qstring_append(str, "'"); 205 ptr++; 206 break; 207 case '\\': 208 qstring_append(str, "\\"); 209 ptr++; 210 break; 211 case '/': 212 qstring_append(str, "/"); 213 ptr++; 214 break; 215 case 'b': 216 qstring_append(str, "\b"); 217 ptr++; 218 break; 219 case 'f': 220 qstring_append(str, "\f"); 221 ptr++; 222 break; 223 case 'n': 224 qstring_append(str, "\n"); 225 ptr++; 226 break; 227 case 'r': 228 qstring_append(str, "\r"); 229 ptr++; 230 break; 231 case 't': 232 qstring_append(str, "\t"); 233 ptr++; 234 break; 235 case 'u': { 236 uint16_t unicode_char = 0; 237 char utf8_char[4]; 238 int i = 0; 239 240 ptr++; 241 242 for (i = 0; i < 4; i++) { 243 if (qemu_isxdigit(*ptr)) { 244 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); 245 } else { 246 parse_error(ctxt, token, 247 "invalid hex escape sequence in string"); 248 goto out; 249 } 250 ptr++; 251 } 252 253 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); 254 qstring_append(str, utf8_char); 255 } break; 256 default: 257 parse_error(ctxt, token, "invalid escape sequence in string"); 258 goto out; 259 } 260 } else { 261 char dummy[2]; 262 263 dummy[0] = *ptr++; 264 dummy[1] = 0; 265 266 qstring_append(str, dummy); 267 } 268 } 269 270 return str; 271 272 out: 273 QDECREF(str); 274 return NULL; 275 } 276 277 static QObject *parser_context_pop_token(JSONParserContext *ctxt) 278 { 279 QObject *token; 280 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 281 token = ctxt->tokens.buf[ctxt->tokens.pos]; 282 ctxt->tokens.pos++; 283 return token; 284 } 285 286 /* Note: parser_context_{peek|pop}_token do not increment the 287 * token object's refcount. In both cases the references will continue 288 * to be tracked and cleaned up in parser_context_free(), so do not 289 * attempt to free the token object. 290 */ 291 static QObject *parser_context_peek_token(JSONParserContext *ctxt) 292 { 293 QObject *token; 294 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 295 token = ctxt->tokens.buf[ctxt->tokens.pos]; 296 return token; 297 } 298 299 static JSONParserContext parser_context_save(JSONParserContext *ctxt) 300 { 301 JSONParserContext saved_ctxt = {0}; 302 saved_ctxt.tokens.pos = ctxt->tokens.pos; 303 saved_ctxt.tokens.count = ctxt->tokens.count; 304 saved_ctxt.tokens.buf = ctxt->tokens.buf; 305 return saved_ctxt; 306 } 307 308 static void parser_context_restore(JSONParserContext *ctxt, 309 JSONParserContext saved_ctxt) 310 { 311 ctxt->tokens.pos = saved_ctxt.tokens.pos; 312 ctxt->tokens.count = saved_ctxt.tokens.count; 313 ctxt->tokens.buf = saved_ctxt.tokens.buf; 314 } 315 316 static void tokens_append_from_iter(QObject *obj, void *opaque) 317 { 318 JSONParserContext *ctxt = opaque; 319 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 320 ctxt->tokens.buf[ctxt->tokens.pos++] = obj; 321 qobject_incref(obj); 322 } 323 324 static JSONParserContext *parser_context_new(QList *tokens) 325 { 326 JSONParserContext *ctxt; 327 size_t count; 328 329 if (!tokens) { 330 return NULL; 331 } 332 333 count = qlist_size(tokens); 334 if (count == 0) { 335 return NULL; 336 } 337 338 ctxt = g_malloc0(sizeof(JSONParserContext)); 339 ctxt->tokens.pos = 0; 340 ctxt->tokens.count = count; 341 ctxt->tokens.buf = g_malloc(count * sizeof(QObject *)); 342 qlist_iter(tokens, tokens_append_from_iter, ctxt); 343 ctxt->tokens.pos = 0; 344 345 return ctxt; 346 } 347 348 /* to support error propagation, ctxt->err must be freed separately */ 349 static void parser_context_free(JSONParserContext *ctxt) 350 { 351 int i; 352 if (ctxt) { 353 for (i = 0; i < ctxt->tokens.count; i++) { 354 qobject_decref(ctxt->tokens.buf[i]); 355 } 356 g_free(ctxt->tokens.buf); 357 g_free(ctxt); 358 } 359 } 360 361 /** 362 * Parsing rules 363 */ 364 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) 365 { 366 QObject *key = NULL, *token = NULL, *value, *peek; 367 JSONParserContext saved_ctxt = parser_context_save(ctxt); 368 369 peek = parser_context_peek_token(ctxt); 370 if (peek == NULL) { 371 parse_error(ctxt, NULL, "premature EOI"); 372 goto out; 373 } 374 375 key = parse_value(ctxt, ap); 376 if (!key || qobject_type(key) != QTYPE_QSTRING) { 377 parse_error(ctxt, peek, "key is not a string in object"); 378 goto out; 379 } 380 381 token = parser_context_pop_token(ctxt); 382 if (token == NULL) { 383 parse_error(ctxt, NULL, "premature EOI"); 384 goto out; 385 } 386 387 if (!token_is_operator(token, ':')) { 388 parse_error(ctxt, token, "missing : in object pair"); 389 goto out; 390 } 391 392 value = parse_value(ctxt, ap); 393 if (value == NULL) { 394 parse_error(ctxt, token, "Missing value in dict"); 395 goto out; 396 } 397 398 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); 399 400 qobject_decref(key); 401 402 return 0; 403 404 out: 405 parser_context_restore(ctxt, saved_ctxt); 406 qobject_decref(key); 407 408 return -1; 409 } 410 411 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) 412 { 413 QDict *dict = NULL; 414 QObject *token, *peek; 415 JSONParserContext saved_ctxt = parser_context_save(ctxt); 416 417 token = parser_context_pop_token(ctxt); 418 if (token == NULL) { 419 goto out; 420 } 421 422 if (!token_is_operator(token, '{')) { 423 goto out; 424 } 425 426 dict = qdict_new(); 427 428 peek = parser_context_peek_token(ctxt); 429 if (peek == NULL) { 430 parse_error(ctxt, NULL, "premature EOI"); 431 goto out; 432 } 433 434 if (!token_is_operator(peek, '}')) { 435 if (parse_pair(ctxt, dict, ap) == -1) { 436 goto out; 437 } 438 439 token = parser_context_pop_token(ctxt); 440 if (token == NULL) { 441 parse_error(ctxt, NULL, "premature EOI"); 442 goto out; 443 } 444 445 while (!token_is_operator(token, '}')) { 446 if (!token_is_operator(token, ',')) { 447 parse_error(ctxt, token, "expected separator in dict"); 448 goto out; 449 } 450 451 if (parse_pair(ctxt, dict, ap) == -1) { 452 goto out; 453 } 454 455 token = parser_context_pop_token(ctxt); 456 if (token == NULL) { 457 parse_error(ctxt, NULL, "premature EOI"); 458 goto out; 459 } 460 } 461 } else { 462 (void)parser_context_pop_token(ctxt); 463 } 464 465 return QOBJECT(dict); 466 467 out: 468 parser_context_restore(ctxt, saved_ctxt); 469 QDECREF(dict); 470 return NULL; 471 } 472 473 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) 474 { 475 QList *list = NULL; 476 QObject *token, *peek; 477 JSONParserContext saved_ctxt = parser_context_save(ctxt); 478 479 token = parser_context_pop_token(ctxt); 480 if (token == NULL) { 481 goto out; 482 } 483 484 if (!token_is_operator(token, '[')) { 485 goto out; 486 } 487 488 list = qlist_new(); 489 490 peek = parser_context_peek_token(ctxt); 491 if (peek == NULL) { 492 parse_error(ctxt, NULL, "premature EOI"); 493 goto out; 494 } 495 496 if (!token_is_operator(peek, ']')) { 497 QObject *obj; 498 499 obj = parse_value(ctxt, ap); 500 if (obj == NULL) { 501 parse_error(ctxt, token, "expecting value"); 502 goto out; 503 } 504 505 qlist_append_obj(list, obj); 506 507 token = parser_context_pop_token(ctxt); 508 if (token == NULL) { 509 parse_error(ctxt, NULL, "premature EOI"); 510 goto out; 511 } 512 513 while (!token_is_operator(token, ']')) { 514 if (!token_is_operator(token, ',')) { 515 parse_error(ctxt, token, "expected separator in list"); 516 goto out; 517 } 518 519 obj = parse_value(ctxt, ap); 520 if (obj == NULL) { 521 parse_error(ctxt, token, "expecting value"); 522 goto out; 523 } 524 525 qlist_append_obj(list, obj); 526 527 token = parser_context_pop_token(ctxt); 528 if (token == NULL) { 529 parse_error(ctxt, NULL, "premature EOI"); 530 goto out; 531 } 532 } 533 } else { 534 (void)parser_context_pop_token(ctxt); 535 } 536 537 return QOBJECT(list); 538 539 out: 540 parser_context_restore(ctxt, saved_ctxt); 541 QDECREF(list); 542 return NULL; 543 } 544 545 static QObject *parse_keyword(JSONParserContext *ctxt) 546 { 547 QObject *token, *ret; 548 JSONParserContext saved_ctxt = parser_context_save(ctxt); 549 550 token = parser_context_pop_token(ctxt); 551 if (token == NULL) { 552 goto out; 553 } 554 555 if (token_get_type(token) != JSON_KEYWORD) { 556 goto out; 557 } 558 559 if (token_is_keyword(token, "true")) { 560 ret = QOBJECT(qbool_from_bool(true)); 561 } else if (token_is_keyword(token, "false")) { 562 ret = QOBJECT(qbool_from_bool(false)); 563 } else if (token_is_keyword(token, "null")) { 564 ret = qnull(); 565 } else { 566 parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token)); 567 goto out; 568 } 569 570 return ret; 571 572 out: 573 parser_context_restore(ctxt, saved_ctxt); 574 575 return NULL; 576 } 577 578 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) 579 { 580 QObject *token = NULL, *obj; 581 JSONParserContext saved_ctxt = parser_context_save(ctxt); 582 583 if (ap == NULL) { 584 goto out; 585 } 586 587 token = parser_context_pop_token(ctxt); 588 if (token == NULL) { 589 goto out; 590 } 591 592 if (token_is_escape(token, "%p")) { 593 obj = va_arg(*ap, QObject *); 594 } else if (token_is_escape(token, "%i")) { 595 obj = QOBJECT(qbool_from_bool(va_arg(*ap, int))); 596 } else if (token_is_escape(token, "%d")) { 597 obj = QOBJECT(qint_from_int(va_arg(*ap, int))); 598 } else if (token_is_escape(token, "%ld")) { 599 obj = QOBJECT(qint_from_int(va_arg(*ap, long))); 600 } else if (token_is_escape(token, "%lld") || 601 token_is_escape(token, "%I64d")) { 602 obj = QOBJECT(qint_from_int(va_arg(*ap, long long))); 603 } else if (token_is_escape(token, "%s")) { 604 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *))); 605 } else if (token_is_escape(token, "%f")) { 606 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double))); 607 } else { 608 goto out; 609 } 610 611 return obj; 612 613 out: 614 parser_context_restore(ctxt, saved_ctxt); 615 616 return NULL; 617 } 618 619 static QObject *parse_literal(JSONParserContext *ctxt) 620 { 621 QObject *token, *obj; 622 JSONParserContext saved_ctxt = parser_context_save(ctxt); 623 624 token = parser_context_pop_token(ctxt); 625 if (token == NULL) { 626 goto out; 627 } 628 629 switch (token_get_type(token)) { 630 case JSON_STRING: 631 obj = QOBJECT(qstring_from_escaped_str(ctxt, token)); 632 break; 633 case JSON_INTEGER: { 634 /* A possibility exists that this is a whole-valued float where the 635 * fractional part was left out due to being 0 (.0). It's not a big 636 * deal to treat these as ints in the parser, so long as users of the 637 * resulting QObject know to expect a QInt in place of a QFloat in 638 * cases like these. 639 * 640 * However, in some cases these values will overflow/underflow a 641 * QInt/int64 container, thus we should assume these are to be handled 642 * as QFloats/doubles rather than silently changing their values. 643 * 644 * strtoll() indicates these instances by setting errno to ERANGE 645 */ 646 int64_t value; 647 648 errno = 0; /* strtoll doesn't set errno on success */ 649 value = strtoll(token_get_value(token), NULL, 10); 650 if (errno != ERANGE) { 651 obj = QOBJECT(qint_from_int(value)); 652 break; 653 } 654 /* fall through to JSON_FLOAT */ 655 } 656 case JSON_FLOAT: 657 /* FIXME dependent on locale */ 658 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL))); 659 break; 660 default: 661 goto out; 662 } 663 664 return obj; 665 666 out: 667 parser_context_restore(ctxt, saved_ctxt); 668 669 return NULL; 670 } 671 672 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) 673 { 674 QObject *obj; 675 676 obj = parse_object(ctxt, ap); 677 if (obj == NULL) { 678 obj = parse_array(ctxt, ap); 679 } 680 if (obj == NULL) { 681 obj = parse_escape(ctxt, ap); 682 } 683 if (obj == NULL) { 684 obj = parse_keyword(ctxt); 685 } 686 if (obj == NULL) { 687 obj = parse_literal(ctxt); 688 } 689 690 return obj; 691 } 692 693 QObject *json_parser_parse(QList *tokens, va_list *ap) 694 { 695 return json_parser_parse_err(tokens, ap, NULL); 696 } 697 698 QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp) 699 { 700 JSONParserContext *ctxt = parser_context_new(tokens); 701 QObject *result; 702 703 if (!ctxt) { 704 return NULL; 705 } 706 707 result = parse_value(ctxt, ap); 708 709 error_propagate(errp, ctxt->err); 710 711 parser_context_free(ctxt); 712 713 return result; 714 } 715