1 /* 2 * JSON Parser 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. 10 * See the COPYING.LIB file in the top-level directory. 11 * 12 */ 13 14 #include <stdarg.h> 15 16 #include "qemu-common.h" 17 #include "qapi/qmp/qstring.h" 18 #include "qapi/qmp/qint.h" 19 #include "qapi/qmp/qdict.h" 20 #include "qapi/qmp/qlist.h" 21 #include "qapi/qmp/qfloat.h" 22 #include "qapi/qmp/qbool.h" 23 #include "qapi/qmp/json-parser.h" 24 #include "qapi/qmp/json-lexer.h" 25 #include "qapi/qmp/qerror.h" 26 27 typedef struct JSONParserContext 28 { 29 Error *err; 30 struct { 31 QObject **buf; 32 size_t pos; 33 size_t count; 34 } tokens; 35 } JSONParserContext; 36 37 #define BUG_ON(cond) assert(!(cond)) 38 39 /** 40 * TODO 41 * 42 * 0) make errors meaningful again 43 * 1) add geometry information to tokens 44 * 3) should we return a parsed size? 45 * 4) deal with premature EOI 46 */ 47 48 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); 49 50 /** 51 * Token manipulators 52 * 53 * tokens are dictionaries that contain a type, a string value, and geometry information 54 * about a token identified by the lexer. These are routines that make working with 55 * these objects a bit easier. 56 */ 57 static const char *token_get_value(QObject *obj) 58 { 59 return qdict_get_str(qobject_to_qdict(obj), "token"); 60 } 61 62 static JSONTokenType token_get_type(QObject *obj) 63 { 64 return qdict_get_int(qobject_to_qdict(obj), "type"); 65 } 66 67 static int token_is_operator(QObject *obj, char op) 68 { 69 const char *val; 70 71 if (token_get_type(obj) != JSON_OPERATOR) { 72 return 0; 73 } 74 75 val = token_get_value(obj); 76 77 return (val[0] == op) && (val[1] == 0); 78 } 79 80 static int token_is_keyword(QObject *obj, const char *value) 81 { 82 if (token_get_type(obj) != JSON_KEYWORD) { 83 return 0; 84 } 85 86 return strcmp(token_get_value(obj), value) == 0; 87 } 88 89 static int token_is_escape(QObject *obj, const char *value) 90 { 91 if (token_get_type(obj) != JSON_ESCAPE) { 92 return 0; 93 } 94 95 return (strcmp(token_get_value(obj), value) == 0); 96 } 97 98 /** 99 * Error handler 100 */ 101 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, 102 QObject *token, const char *msg, ...) 103 { 104 va_list ap; 105 char message[1024]; 106 va_start(ap, msg); 107 vsnprintf(message, sizeof(message), msg, ap); 108 va_end(ap); 109 if (ctxt->err) { 110 error_free(ctxt->err); 111 ctxt->err = NULL; 112 } 113 error_setg(&ctxt->err, "JSON parse error, %s", message); 114 } 115 116 /** 117 * String helpers 118 * 119 * These helpers are used to unescape strings. 120 */ 121 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) 122 { 123 if (wchar <= 0x007F) { 124 BUG_ON(buffer_length < 2); 125 126 buffer[0] = wchar & 0x7F; 127 buffer[1] = 0; 128 } else if (wchar <= 0x07FF) { 129 BUG_ON(buffer_length < 3); 130 131 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); 132 buffer[1] = 0x80 | (wchar & 0x3F); 133 buffer[2] = 0; 134 } else { 135 BUG_ON(buffer_length < 4); 136 137 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); 138 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); 139 buffer[2] = 0x80 | (wchar & 0x3F); 140 buffer[3] = 0; 141 } 142 } 143 144 static int hex2decimal(char ch) 145 { 146 if (ch >= '0' && ch <= '9') { 147 return (ch - '0'); 148 } else if (ch >= 'a' && ch <= 'f') { 149 return 10 + (ch - 'a'); 150 } else if (ch >= 'A' && ch <= 'F') { 151 return 10 + (ch - 'A'); 152 } 153 154 return -1; 155 } 156 157 /** 158 * parse_string(): Parse a json string and return a QObject 159 * 160 * string 161 * "" 162 * " chars " 163 * chars 164 * char 165 * char chars 166 * char 167 * any-Unicode-character- 168 * except-"-or-\-or- 169 * control-character 170 * \" 171 * \\ 172 * \/ 173 * \b 174 * \f 175 * \n 176 * \r 177 * \t 178 * \u four-hex-digits 179 */ 180 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token) 181 { 182 const char *ptr = token_get_value(token); 183 QString *str; 184 int double_quote = 1; 185 186 if (*ptr == '"') { 187 double_quote = 1; 188 } else { 189 double_quote = 0; 190 } 191 ptr++; 192 193 str = qstring_new(); 194 while (*ptr && 195 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { 196 if (*ptr == '\\') { 197 ptr++; 198 199 switch (*ptr) { 200 case '"': 201 qstring_append(str, "\""); 202 ptr++; 203 break; 204 case '\'': 205 qstring_append(str, "'"); 206 ptr++; 207 break; 208 case '\\': 209 qstring_append(str, "\\"); 210 ptr++; 211 break; 212 case '/': 213 qstring_append(str, "/"); 214 ptr++; 215 break; 216 case 'b': 217 qstring_append(str, "\b"); 218 ptr++; 219 break; 220 case 'f': 221 qstring_append(str, "\f"); 222 ptr++; 223 break; 224 case 'n': 225 qstring_append(str, "\n"); 226 ptr++; 227 break; 228 case 'r': 229 qstring_append(str, "\r"); 230 ptr++; 231 break; 232 case 't': 233 qstring_append(str, "\t"); 234 ptr++; 235 break; 236 case 'u': { 237 uint16_t unicode_char = 0; 238 char utf8_char[4]; 239 int i = 0; 240 241 ptr++; 242 243 for (i = 0; i < 4; i++) { 244 if (qemu_isxdigit(*ptr)) { 245 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); 246 } else { 247 parse_error(ctxt, token, 248 "invalid hex escape sequence in string"); 249 goto out; 250 } 251 ptr++; 252 } 253 254 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); 255 qstring_append(str, utf8_char); 256 } break; 257 default: 258 parse_error(ctxt, token, "invalid escape sequence in string"); 259 goto out; 260 } 261 } else { 262 char dummy[2]; 263 264 dummy[0] = *ptr++; 265 dummy[1] = 0; 266 267 qstring_append(str, dummy); 268 } 269 } 270 271 return str; 272 273 out: 274 QDECREF(str); 275 return NULL; 276 } 277 278 static QObject *parser_context_pop_token(JSONParserContext *ctxt) 279 { 280 QObject *token; 281 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 282 token = ctxt->tokens.buf[ctxt->tokens.pos]; 283 ctxt->tokens.pos++; 284 return token; 285 } 286 287 /* Note: parser_context_{peek|pop}_token do not increment the 288 * token object's refcount. In both cases the references will continue 289 * to be tracked and cleaned up in parser_context_free(), so do not 290 * attempt to free the token object. 291 */ 292 static QObject *parser_context_peek_token(JSONParserContext *ctxt) 293 { 294 QObject *token; 295 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 296 token = ctxt->tokens.buf[ctxt->tokens.pos]; 297 return token; 298 } 299 300 static JSONParserContext parser_context_save(JSONParserContext *ctxt) 301 { 302 JSONParserContext saved_ctxt = {0}; 303 saved_ctxt.tokens.pos = ctxt->tokens.pos; 304 saved_ctxt.tokens.count = ctxt->tokens.count; 305 saved_ctxt.tokens.buf = ctxt->tokens.buf; 306 return saved_ctxt; 307 } 308 309 static void parser_context_restore(JSONParserContext *ctxt, 310 JSONParserContext saved_ctxt) 311 { 312 ctxt->tokens.pos = saved_ctxt.tokens.pos; 313 ctxt->tokens.count = saved_ctxt.tokens.count; 314 ctxt->tokens.buf = saved_ctxt.tokens.buf; 315 } 316 317 static void tokens_append_from_iter(QObject *obj, void *opaque) 318 { 319 JSONParserContext *ctxt = opaque; 320 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 321 ctxt->tokens.buf[ctxt->tokens.pos++] = obj; 322 qobject_incref(obj); 323 } 324 325 static JSONParserContext *parser_context_new(QList *tokens) 326 { 327 JSONParserContext *ctxt; 328 size_t count; 329 330 if (!tokens) { 331 return NULL; 332 } 333 334 count = qlist_size(tokens); 335 if (count == 0) { 336 return NULL; 337 } 338 339 ctxt = g_malloc0(sizeof(JSONParserContext)); 340 ctxt->tokens.pos = 0; 341 ctxt->tokens.count = count; 342 ctxt->tokens.buf = g_malloc(count * sizeof(QObject *)); 343 qlist_iter(tokens, tokens_append_from_iter, ctxt); 344 ctxt->tokens.pos = 0; 345 346 return ctxt; 347 } 348 349 /* to support error propagation, ctxt->err must be freed separately */ 350 static void parser_context_free(JSONParserContext *ctxt) 351 { 352 int i; 353 if (ctxt) { 354 for (i = 0; i < ctxt->tokens.count; i++) { 355 qobject_decref(ctxt->tokens.buf[i]); 356 } 357 g_free(ctxt->tokens.buf); 358 g_free(ctxt); 359 } 360 } 361 362 /** 363 * Parsing rules 364 */ 365 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) 366 { 367 QObject *key = NULL, *token = NULL, *value, *peek; 368 JSONParserContext saved_ctxt = parser_context_save(ctxt); 369 370 peek = parser_context_peek_token(ctxt); 371 if (peek == NULL) { 372 parse_error(ctxt, NULL, "premature EOI"); 373 goto out; 374 } 375 376 key = parse_value(ctxt, ap); 377 if (!key || qobject_type(key) != QTYPE_QSTRING) { 378 parse_error(ctxt, peek, "key is not a string in object"); 379 goto out; 380 } 381 382 token = parser_context_pop_token(ctxt); 383 if (token == NULL) { 384 parse_error(ctxt, NULL, "premature EOI"); 385 goto out; 386 } 387 388 if (!token_is_operator(token, ':')) { 389 parse_error(ctxt, token, "missing : in object pair"); 390 goto out; 391 } 392 393 value = parse_value(ctxt, ap); 394 if (value == NULL) { 395 parse_error(ctxt, token, "Missing value in dict"); 396 goto out; 397 } 398 399 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); 400 401 qobject_decref(key); 402 403 return 0; 404 405 out: 406 parser_context_restore(ctxt, saved_ctxt); 407 qobject_decref(key); 408 409 return -1; 410 } 411 412 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) 413 { 414 QDict *dict = NULL; 415 QObject *token, *peek; 416 JSONParserContext saved_ctxt = parser_context_save(ctxt); 417 418 token = parser_context_pop_token(ctxt); 419 if (token == NULL) { 420 goto out; 421 } 422 423 if (!token_is_operator(token, '{')) { 424 goto out; 425 } 426 427 dict = qdict_new(); 428 429 peek = parser_context_peek_token(ctxt); 430 if (peek == NULL) { 431 parse_error(ctxt, NULL, "premature EOI"); 432 goto out; 433 } 434 435 if (!token_is_operator(peek, '}')) { 436 if (parse_pair(ctxt, dict, ap) == -1) { 437 goto out; 438 } 439 440 token = parser_context_pop_token(ctxt); 441 if (token == NULL) { 442 parse_error(ctxt, NULL, "premature EOI"); 443 goto out; 444 } 445 446 while (!token_is_operator(token, '}')) { 447 if (!token_is_operator(token, ',')) { 448 parse_error(ctxt, token, "expected separator in dict"); 449 goto out; 450 } 451 452 if (parse_pair(ctxt, dict, ap) == -1) { 453 goto out; 454 } 455 456 token = parser_context_pop_token(ctxt); 457 if (token == NULL) { 458 parse_error(ctxt, NULL, "premature EOI"); 459 goto out; 460 } 461 } 462 } else { 463 (void)parser_context_pop_token(ctxt); 464 } 465 466 return QOBJECT(dict); 467 468 out: 469 parser_context_restore(ctxt, saved_ctxt); 470 QDECREF(dict); 471 return NULL; 472 } 473 474 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) 475 { 476 QList *list = NULL; 477 QObject *token, *peek; 478 JSONParserContext saved_ctxt = parser_context_save(ctxt); 479 480 token = parser_context_pop_token(ctxt); 481 if (token == NULL) { 482 goto out; 483 } 484 485 if (!token_is_operator(token, '[')) { 486 goto out; 487 } 488 489 list = qlist_new(); 490 491 peek = parser_context_peek_token(ctxt); 492 if (peek == NULL) { 493 parse_error(ctxt, NULL, "premature EOI"); 494 goto out; 495 } 496 497 if (!token_is_operator(peek, ']')) { 498 QObject *obj; 499 500 obj = parse_value(ctxt, ap); 501 if (obj == NULL) { 502 parse_error(ctxt, token, "expecting value"); 503 goto out; 504 } 505 506 qlist_append_obj(list, obj); 507 508 token = parser_context_pop_token(ctxt); 509 if (token == NULL) { 510 parse_error(ctxt, NULL, "premature EOI"); 511 goto out; 512 } 513 514 while (!token_is_operator(token, ']')) { 515 if (!token_is_operator(token, ',')) { 516 parse_error(ctxt, token, "expected separator in list"); 517 goto out; 518 } 519 520 obj = parse_value(ctxt, ap); 521 if (obj == NULL) { 522 parse_error(ctxt, token, "expecting value"); 523 goto out; 524 } 525 526 qlist_append_obj(list, obj); 527 528 token = parser_context_pop_token(ctxt); 529 if (token == NULL) { 530 parse_error(ctxt, NULL, "premature EOI"); 531 goto out; 532 } 533 } 534 } else { 535 (void)parser_context_pop_token(ctxt); 536 } 537 538 return QOBJECT(list); 539 540 out: 541 parser_context_restore(ctxt, saved_ctxt); 542 QDECREF(list); 543 return NULL; 544 } 545 546 static QObject *parse_keyword(JSONParserContext *ctxt) 547 { 548 QObject *token, *ret; 549 JSONParserContext saved_ctxt = parser_context_save(ctxt); 550 551 token = parser_context_pop_token(ctxt); 552 if (token == NULL) { 553 goto out; 554 } 555 556 if (token_get_type(token) != JSON_KEYWORD) { 557 goto out; 558 } 559 560 if (token_is_keyword(token, "true")) { 561 ret = QOBJECT(qbool_from_int(true)); 562 } else if (token_is_keyword(token, "false")) { 563 ret = QOBJECT(qbool_from_int(false)); 564 } else { 565 parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token)); 566 goto out; 567 } 568 569 return ret; 570 571 out: 572 parser_context_restore(ctxt, saved_ctxt); 573 574 return NULL; 575 } 576 577 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) 578 { 579 QObject *token = NULL, *obj; 580 JSONParserContext saved_ctxt = parser_context_save(ctxt); 581 582 if (ap == NULL) { 583 goto out; 584 } 585 586 token = parser_context_pop_token(ctxt); 587 if (token == NULL) { 588 goto out; 589 } 590 591 if (token_is_escape(token, "%p")) { 592 obj = va_arg(*ap, QObject *); 593 } else if (token_is_escape(token, "%i")) { 594 obj = QOBJECT(qbool_from_int(va_arg(*ap, int))); 595 } else if (token_is_escape(token, "%d")) { 596 obj = QOBJECT(qint_from_int(va_arg(*ap, int))); 597 } else if (token_is_escape(token, "%ld")) { 598 obj = QOBJECT(qint_from_int(va_arg(*ap, long))); 599 } else if (token_is_escape(token, "%lld") || 600 token_is_escape(token, "%I64d")) { 601 obj = QOBJECT(qint_from_int(va_arg(*ap, long long))); 602 } else if (token_is_escape(token, "%s")) { 603 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *))); 604 } else if (token_is_escape(token, "%f")) { 605 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double))); 606 } else { 607 goto out; 608 } 609 610 return obj; 611 612 out: 613 parser_context_restore(ctxt, saved_ctxt); 614 615 return NULL; 616 } 617 618 static QObject *parse_literal(JSONParserContext *ctxt) 619 { 620 QObject *token, *obj; 621 JSONParserContext saved_ctxt = parser_context_save(ctxt); 622 623 token = parser_context_pop_token(ctxt); 624 if (token == NULL) { 625 goto out; 626 } 627 628 switch (token_get_type(token)) { 629 case JSON_STRING: 630 obj = QOBJECT(qstring_from_escaped_str(ctxt, token)); 631 break; 632 case JSON_INTEGER: { 633 /* A possibility exists that this is a whole-valued float where the 634 * fractional part was left out due to being 0 (.0). It's not a big 635 * deal to treat these as ints in the parser, so long as users of the 636 * resulting QObject know to expect a QInt in place of a QFloat in 637 * cases like these. 638 * 639 * However, in some cases these values will overflow/underflow a 640 * QInt/int64 container, thus we should assume these are to be handled 641 * as QFloats/doubles rather than silently changing their values. 642 * 643 * strtoll() indicates these instances by setting errno to ERANGE 644 */ 645 int64_t value; 646 647 errno = 0; /* strtoll doesn't set errno on success */ 648 value = strtoll(token_get_value(token), NULL, 10); 649 if (errno != ERANGE) { 650 obj = QOBJECT(qint_from_int(value)); 651 break; 652 } 653 /* fall through to JSON_FLOAT */ 654 } 655 case JSON_FLOAT: 656 /* FIXME dependent on locale */ 657 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL))); 658 break; 659 default: 660 goto out; 661 } 662 663 return obj; 664 665 out: 666 parser_context_restore(ctxt, saved_ctxt); 667 668 return NULL; 669 } 670 671 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) 672 { 673 QObject *obj; 674 675 obj = parse_object(ctxt, ap); 676 if (obj == NULL) { 677 obj = parse_array(ctxt, ap); 678 } 679 if (obj == NULL) { 680 obj = parse_escape(ctxt, ap); 681 } 682 if (obj == NULL) { 683 obj = parse_keyword(ctxt); 684 } 685 if (obj == NULL) { 686 obj = parse_literal(ctxt); 687 } 688 689 return obj; 690 } 691 692 QObject *json_parser_parse(QList *tokens, va_list *ap) 693 { 694 return json_parser_parse_err(tokens, ap, NULL); 695 } 696 697 QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp) 698 { 699 JSONParserContext *ctxt = parser_context_new(tokens); 700 QObject *result; 701 702 if (!ctxt) { 703 return NULL; 704 } 705 706 result = parse_value(ctxt, ap); 707 708 error_propagate(errp, ctxt->err); 709 710 parser_context_free(ctxt); 711 712 return result; 713 } 714