1 /* 2 * JSON Parser 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. 10 * See the COPYING.LIB file in the top-level directory. 11 * 12 */ 13 14 #include <stdarg.h> 15 16 #include "qemu-common.h" 17 #include "qapi/qmp/qstring.h" 18 #include "qapi/qmp/qint.h" 19 #include "qapi/qmp/qdict.h" 20 #include "qapi/qmp/qlist.h" 21 #include "qapi/qmp/qfloat.h" 22 #include "qapi/qmp/qbool.h" 23 #include "qapi/qmp/json-parser.h" 24 #include "qapi/qmp/json-lexer.h" 25 26 typedef struct JSONParserContext 27 { 28 Error *err; 29 struct { 30 QObject **buf; 31 size_t pos; 32 size_t count; 33 } tokens; 34 } JSONParserContext; 35 36 #define BUG_ON(cond) assert(!(cond)) 37 38 /** 39 * TODO 40 * 41 * 0) make errors meaningful again 42 * 1) add geometry information to tokens 43 * 3) should we return a parsed size? 44 * 4) deal with premature EOI 45 */ 46 47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); 48 49 /** 50 * Token manipulators 51 * 52 * tokens are dictionaries that contain a type, a string value, and geometry information 53 * about a token identified by the lexer. These are routines that make working with 54 * these objects a bit easier. 55 */ 56 static const char *token_get_value(QObject *obj) 57 { 58 return qdict_get_str(qobject_to_qdict(obj), "token"); 59 } 60 61 static JSONTokenType token_get_type(QObject *obj) 62 { 63 return qdict_get_int(qobject_to_qdict(obj), "type"); 64 } 65 66 static int token_is_keyword(QObject *obj, const char *value) 67 { 68 if (token_get_type(obj) != JSON_KEYWORD) { 69 return 0; 70 } 71 72 return strcmp(token_get_value(obj), value) == 0; 73 } 74 75 static int token_is_escape(QObject *obj, const char *value) 76 { 77 if (token_get_type(obj) != JSON_ESCAPE) { 78 return 0; 79 } 80 81 return (strcmp(token_get_value(obj), value) == 0); 82 } 83 84 /** 85 * Error handler 86 */ 87 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, 88 QObject *token, const char *msg, ...) 89 { 90 va_list ap; 91 char message[1024]; 92 va_start(ap, msg); 93 vsnprintf(message, sizeof(message), msg, ap); 94 va_end(ap); 95 if (ctxt->err) { 96 error_free(ctxt->err); 97 ctxt->err = NULL; 98 } 99 error_setg(&ctxt->err, "JSON parse error, %s", message); 100 } 101 102 /** 103 * String helpers 104 * 105 * These helpers are used to unescape strings. 106 */ 107 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) 108 { 109 if (wchar <= 0x007F) { 110 BUG_ON(buffer_length < 2); 111 112 buffer[0] = wchar & 0x7F; 113 buffer[1] = 0; 114 } else if (wchar <= 0x07FF) { 115 BUG_ON(buffer_length < 3); 116 117 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); 118 buffer[1] = 0x80 | (wchar & 0x3F); 119 buffer[2] = 0; 120 } else { 121 BUG_ON(buffer_length < 4); 122 123 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); 124 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); 125 buffer[2] = 0x80 | (wchar & 0x3F); 126 buffer[3] = 0; 127 } 128 } 129 130 static int hex2decimal(char ch) 131 { 132 if (ch >= '0' && ch <= '9') { 133 return (ch - '0'); 134 } else if (ch >= 'a' && ch <= 'f') { 135 return 10 + (ch - 'a'); 136 } else if (ch >= 'A' && ch <= 'F') { 137 return 10 + (ch - 'A'); 138 } 139 140 return -1; 141 } 142 143 /** 144 * parse_string(): Parse a json string and return a QObject 145 * 146 * string 147 * "" 148 * " chars " 149 * chars 150 * char 151 * char chars 152 * char 153 * any-Unicode-character- 154 * except-"-or-\-or- 155 * control-character 156 * \" 157 * \\ 158 * \/ 159 * \b 160 * \f 161 * \n 162 * \r 163 * \t 164 * \u four-hex-digits 165 */ 166 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token) 167 { 168 const char *ptr = token_get_value(token); 169 QString *str; 170 int double_quote = 1; 171 172 if (*ptr == '"') { 173 double_quote = 1; 174 } else { 175 double_quote = 0; 176 } 177 ptr++; 178 179 str = qstring_new(); 180 while (*ptr && 181 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { 182 if (*ptr == '\\') { 183 ptr++; 184 185 switch (*ptr) { 186 case '"': 187 qstring_append(str, "\""); 188 ptr++; 189 break; 190 case '\'': 191 qstring_append(str, "'"); 192 ptr++; 193 break; 194 case '\\': 195 qstring_append(str, "\\"); 196 ptr++; 197 break; 198 case '/': 199 qstring_append(str, "/"); 200 ptr++; 201 break; 202 case 'b': 203 qstring_append(str, "\b"); 204 ptr++; 205 break; 206 case 'f': 207 qstring_append(str, "\f"); 208 ptr++; 209 break; 210 case 'n': 211 qstring_append(str, "\n"); 212 ptr++; 213 break; 214 case 'r': 215 qstring_append(str, "\r"); 216 ptr++; 217 break; 218 case 't': 219 qstring_append(str, "\t"); 220 ptr++; 221 break; 222 case 'u': { 223 uint16_t unicode_char = 0; 224 char utf8_char[4]; 225 int i = 0; 226 227 ptr++; 228 229 for (i = 0; i < 4; i++) { 230 if (qemu_isxdigit(*ptr)) { 231 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); 232 } else { 233 parse_error(ctxt, token, 234 "invalid hex escape sequence in string"); 235 goto out; 236 } 237 ptr++; 238 } 239 240 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); 241 qstring_append(str, utf8_char); 242 } break; 243 default: 244 parse_error(ctxt, token, "invalid escape sequence in string"); 245 goto out; 246 } 247 } else { 248 char dummy[2]; 249 250 dummy[0] = *ptr++; 251 dummy[1] = 0; 252 253 qstring_append(str, dummy); 254 } 255 } 256 257 return str; 258 259 out: 260 QDECREF(str); 261 return NULL; 262 } 263 264 static QObject *parser_context_pop_token(JSONParserContext *ctxt) 265 { 266 QObject *token; 267 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 268 token = ctxt->tokens.buf[ctxt->tokens.pos]; 269 ctxt->tokens.pos++; 270 return token; 271 } 272 273 /* Note: parser_context_{peek|pop}_token do not increment the 274 * token object's refcount. In both cases the references will continue 275 * to be tracked and cleaned up in parser_context_free(), so do not 276 * attempt to free the token object. 277 */ 278 static QObject *parser_context_peek_token(JSONParserContext *ctxt) 279 { 280 QObject *token; 281 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 282 token = ctxt->tokens.buf[ctxt->tokens.pos]; 283 return token; 284 } 285 286 static JSONParserContext parser_context_save(JSONParserContext *ctxt) 287 { 288 JSONParserContext saved_ctxt = {0}; 289 saved_ctxt.tokens.pos = ctxt->tokens.pos; 290 saved_ctxt.tokens.count = ctxt->tokens.count; 291 saved_ctxt.tokens.buf = ctxt->tokens.buf; 292 return saved_ctxt; 293 } 294 295 static void parser_context_restore(JSONParserContext *ctxt, 296 JSONParserContext saved_ctxt) 297 { 298 ctxt->tokens.pos = saved_ctxt.tokens.pos; 299 ctxt->tokens.count = saved_ctxt.tokens.count; 300 ctxt->tokens.buf = saved_ctxt.tokens.buf; 301 } 302 303 static void tokens_append_from_iter(QObject *obj, void *opaque) 304 { 305 JSONParserContext *ctxt = opaque; 306 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 307 ctxt->tokens.buf[ctxt->tokens.pos++] = obj; 308 qobject_incref(obj); 309 } 310 311 static JSONParserContext *parser_context_new(QList *tokens) 312 { 313 JSONParserContext *ctxt; 314 size_t count; 315 316 if (!tokens) { 317 return NULL; 318 } 319 320 count = qlist_size(tokens); 321 if (count == 0) { 322 return NULL; 323 } 324 325 ctxt = g_malloc0(sizeof(JSONParserContext)); 326 ctxt->tokens.pos = 0; 327 ctxt->tokens.count = count; 328 ctxt->tokens.buf = g_malloc(count * sizeof(QObject *)); 329 qlist_iter(tokens, tokens_append_from_iter, ctxt); 330 ctxt->tokens.pos = 0; 331 332 return ctxt; 333 } 334 335 /* to support error propagation, ctxt->err must be freed separately */ 336 static void parser_context_free(JSONParserContext *ctxt) 337 { 338 int i; 339 if (ctxt) { 340 for (i = 0; i < ctxt->tokens.count; i++) { 341 qobject_decref(ctxt->tokens.buf[i]); 342 } 343 g_free(ctxt->tokens.buf); 344 g_free(ctxt); 345 } 346 } 347 348 /** 349 * Parsing rules 350 */ 351 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) 352 { 353 QObject *key = NULL, *token = NULL, *value, *peek; 354 JSONParserContext saved_ctxt = parser_context_save(ctxt); 355 356 peek = parser_context_peek_token(ctxt); 357 if (peek == NULL) { 358 parse_error(ctxt, NULL, "premature EOI"); 359 goto out; 360 } 361 362 key = parse_value(ctxt, ap); 363 if (!key || qobject_type(key) != QTYPE_QSTRING) { 364 parse_error(ctxt, peek, "key is not a string in object"); 365 goto out; 366 } 367 368 token = parser_context_pop_token(ctxt); 369 if (token == NULL) { 370 parse_error(ctxt, NULL, "premature EOI"); 371 goto out; 372 } 373 374 if (token_get_type(token) != JSON_COLON) { 375 parse_error(ctxt, token, "missing : in object pair"); 376 goto out; 377 } 378 379 value = parse_value(ctxt, ap); 380 if (value == NULL) { 381 parse_error(ctxt, token, "Missing value in dict"); 382 goto out; 383 } 384 385 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); 386 387 qobject_decref(key); 388 389 return 0; 390 391 out: 392 parser_context_restore(ctxt, saved_ctxt); 393 qobject_decref(key); 394 395 return -1; 396 } 397 398 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) 399 { 400 QDict *dict = NULL; 401 QObject *token, *peek; 402 JSONParserContext saved_ctxt = parser_context_save(ctxt); 403 404 token = parser_context_pop_token(ctxt); 405 if (token == NULL) { 406 goto out; 407 } 408 409 if (token_get_type(token) != JSON_LCURLY) { 410 goto out; 411 } 412 413 dict = qdict_new(); 414 415 peek = parser_context_peek_token(ctxt); 416 if (peek == NULL) { 417 parse_error(ctxt, NULL, "premature EOI"); 418 goto out; 419 } 420 421 if (token_get_type(peek) != JSON_RCURLY) { 422 if (parse_pair(ctxt, dict, ap) == -1) { 423 goto out; 424 } 425 426 token = parser_context_pop_token(ctxt); 427 if (token == NULL) { 428 parse_error(ctxt, NULL, "premature EOI"); 429 goto out; 430 } 431 432 while (token_get_type(token) != JSON_RCURLY) { 433 if (token_get_type(token) != JSON_COMMA) { 434 parse_error(ctxt, token, "expected separator in dict"); 435 goto out; 436 } 437 438 if (parse_pair(ctxt, dict, ap) == -1) { 439 goto out; 440 } 441 442 token = parser_context_pop_token(ctxt); 443 if (token == NULL) { 444 parse_error(ctxt, NULL, "premature EOI"); 445 goto out; 446 } 447 } 448 } else { 449 (void)parser_context_pop_token(ctxt); 450 } 451 452 return QOBJECT(dict); 453 454 out: 455 parser_context_restore(ctxt, saved_ctxt); 456 QDECREF(dict); 457 return NULL; 458 } 459 460 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) 461 { 462 QList *list = NULL; 463 QObject *token, *peek; 464 JSONParserContext saved_ctxt = parser_context_save(ctxt); 465 466 token = parser_context_pop_token(ctxt); 467 if (token == NULL) { 468 goto out; 469 } 470 471 if (token_get_type(token) != JSON_LSQUARE) { 472 goto out; 473 } 474 475 list = qlist_new(); 476 477 peek = parser_context_peek_token(ctxt); 478 if (peek == NULL) { 479 parse_error(ctxt, NULL, "premature EOI"); 480 goto out; 481 } 482 483 if (token_get_type(peek) != JSON_RSQUARE) { 484 QObject *obj; 485 486 obj = parse_value(ctxt, ap); 487 if (obj == NULL) { 488 parse_error(ctxt, token, "expecting value"); 489 goto out; 490 } 491 492 qlist_append_obj(list, obj); 493 494 token = parser_context_pop_token(ctxt); 495 if (token == NULL) { 496 parse_error(ctxt, NULL, "premature EOI"); 497 goto out; 498 } 499 500 while (token_get_type(token) != JSON_RSQUARE) { 501 if (token_get_type(token) != JSON_COMMA) { 502 parse_error(ctxt, token, "expected separator in list"); 503 goto out; 504 } 505 506 obj = parse_value(ctxt, ap); 507 if (obj == NULL) { 508 parse_error(ctxt, token, "expecting value"); 509 goto out; 510 } 511 512 qlist_append_obj(list, obj); 513 514 token = parser_context_pop_token(ctxt); 515 if (token == NULL) { 516 parse_error(ctxt, NULL, "premature EOI"); 517 goto out; 518 } 519 } 520 } else { 521 (void)parser_context_pop_token(ctxt); 522 } 523 524 return QOBJECT(list); 525 526 out: 527 parser_context_restore(ctxt, saved_ctxt); 528 QDECREF(list); 529 return NULL; 530 } 531 532 static QObject *parse_keyword(JSONParserContext *ctxt) 533 { 534 QObject *token, *ret; 535 JSONParserContext saved_ctxt = parser_context_save(ctxt); 536 537 token = parser_context_pop_token(ctxt); 538 if (token == NULL) { 539 goto out; 540 } 541 542 if (token_get_type(token) != JSON_KEYWORD) { 543 goto out; 544 } 545 546 if (token_is_keyword(token, "true")) { 547 ret = QOBJECT(qbool_from_bool(true)); 548 } else if (token_is_keyword(token, "false")) { 549 ret = QOBJECT(qbool_from_bool(false)); 550 } else if (token_is_keyword(token, "null")) { 551 ret = qnull(); 552 } else { 553 parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token)); 554 goto out; 555 } 556 557 return ret; 558 559 out: 560 parser_context_restore(ctxt, saved_ctxt); 561 562 return NULL; 563 } 564 565 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) 566 { 567 QObject *token = NULL, *obj; 568 JSONParserContext saved_ctxt = parser_context_save(ctxt); 569 570 if (ap == NULL) { 571 goto out; 572 } 573 574 token = parser_context_pop_token(ctxt); 575 if (token == NULL) { 576 goto out; 577 } 578 579 if (token_is_escape(token, "%p")) { 580 obj = va_arg(*ap, QObject *); 581 } else if (token_is_escape(token, "%i")) { 582 obj = QOBJECT(qbool_from_bool(va_arg(*ap, int))); 583 } else if (token_is_escape(token, "%d")) { 584 obj = QOBJECT(qint_from_int(va_arg(*ap, int))); 585 } else if (token_is_escape(token, "%ld")) { 586 obj = QOBJECT(qint_from_int(va_arg(*ap, long))); 587 } else if (token_is_escape(token, "%lld") || 588 token_is_escape(token, "%I64d")) { 589 obj = QOBJECT(qint_from_int(va_arg(*ap, long long))); 590 } else if (token_is_escape(token, "%s")) { 591 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *))); 592 } else if (token_is_escape(token, "%f")) { 593 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double))); 594 } else { 595 goto out; 596 } 597 598 return obj; 599 600 out: 601 parser_context_restore(ctxt, saved_ctxt); 602 603 return NULL; 604 } 605 606 static QObject *parse_literal(JSONParserContext *ctxt) 607 { 608 QObject *token, *obj; 609 JSONParserContext saved_ctxt = parser_context_save(ctxt); 610 611 token = parser_context_pop_token(ctxt); 612 if (token == NULL) { 613 goto out; 614 } 615 616 switch (token_get_type(token)) { 617 case JSON_STRING: 618 obj = QOBJECT(qstring_from_escaped_str(ctxt, token)); 619 break; 620 case JSON_INTEGER: { 621 /* A possibility exists that this is a whole-valued float where the 622 * fractional part was left out due to being 0 (.0). It's not a big 623 * deal to treat these as ints in the parser, so long as users of the 624 * resulting QObject know to expect a QInt in place of a QFloat in 625 * cases like these. 626 * 627 * However, in some cases these values will overflow/underflow a 628 * QInt/int64 container, thus we should assume these are to be handled 629 * as QFloats/doubles rather than silently changing their values. 630 * 631 * strtoll() indicates these instances by setting errno to ERANGE 632 */ 633 int64_t value; 634 635 errno = 0; /* strtoll doesn't set errno on success */ 636 value = strtoll(token_get_value(token), NULL, 10); 637 if (errno != ERANGE) { 638 obj = QOBJECT(qint_from_int(value)); 639 break; 640 } 641 /* fall through to JSON_FLOAT */ 642 } 643 case JSON_FLOAT: 644 /* FIXME dependent on locale */ 645 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL))); 646 break; 647 default: 648 goto out; 649 } 650 651 return obj; 652 653 out: 654 parser_context_restore(ctxt, saved_ctxt); 655 656 return NULL; 657 } 658 659 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) 660 { 661 QObject *obj; 662 663 obj = parse_object(ctxt, ap); 664 if (obj == NULL) { 665 obj = parse_array(ctxt, ap); 666 } 667 if (obj == NULL) { 668 obj = parse_escape(ctxt, ap); 669 } 670 if (obj == NULL) { 671 obj = parse_keyword(ctxt); 672 } 673 if (obj == NULL) { 674 obj = parse_literal(ctxt); 675 } 676 677 return obj; 678 } 679 680 QObject *json_parser_parse(QList *tokens, va_list *ap) 681 { 682 return json_parser_parse_err(tokens, ap, NULL); 683 } 684 685 QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp) 686 { 687 JSONParserContext *ctxt = parser_context_new(tokens); 688 QObject *result; 689 690 if (!ctxt) { 691 return NULL; 692 } 693 694 result = parse_value(ctxt, ap); 695 696 error_propagate(errp, ctxt->err); 697 698 parser_context_free(ctxt); 699 700 return result; 701 } 702