1 /* 2 * JSON Parser 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. 10 * See the COPYING.LIB file in the top-level directory. 11 * 12 */ 13 14 #include <stdarg.h> 15 16 #include "qemu-common.h" 17 #include "qapi/qmp/qstring.h" 18 #include "qapi/qmp/qint.h" 19 #include "qapi/qmp/qdict.h" 20 #include "qapi/qmp/qlist.h" 21 #include "qapi/qmp/qfloat.h" 22 #include "qapi/qmp/qbool.h" 23 #include "qapi/qmp/json-parser.h" 24 #include "qapi/qmp/json-lexer.h" 25 26 typedef struct JSONParserContext 27 { 28 Error *err; 29 QObject *current; 30 GQueue *buf; 31 } JSONParserContext; 32 33 #define BUG_ON(cond) assert(!(cond)) 34 35 /** 36 * TODO 37 * 38 * 0) make errors meaningful again 39 * 1) add geometry information to tokens 40 * 3) should we return a parsed size? 41 * 4) deal with premature EOI 42 */ 43 44 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); 45 46 /** 47 * Token manipulators 48 * 49 * tokens are dictionaries that contain a type, a string value, and geometry information 50 * about a token identified by the lexer. These are routines that make working with 51 * these objects a bit easier. 52 */ 53 static const char *token_get_value(QObject *obj) 54 { 55 return qdict_get_str(qobject_to_qdict(obj), "token"); 56 } 57 58 static JSONTokenType token_get_type(QObject *obj) 59 { 60 return qdict_get_int(qobject_to_qdict(obj), "type"); 61 } 62 63 /** 64 * Error handler 65 */ 66 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, 67 QObject *token, const char *msg, ...) 68 { 69 va_list ap; 70 char message[1024]; 71 va_start(ap, msg); 72 vsnprintf(message, sizeof(message), msg, ap); 73 va_end(ap); 74 if (ctxt->err) { 75 error_free(ctxt->err); 76 ctxt->err = NULL; 77 } 78 error_setg(&ctxt->err, "JSON parse error, %s", message); 79 } 80 81 /** 82 * String helpers 83 * 84 * These helpers are used to unescape strings. 85 */ 86 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) 87 { 88 if (wchar <= 0x007F) { 89 BUG_ON(buffer_length < 2); 90 91 buffer[0] = wchar & 0x7F; 92 buffer[1] = 0; 93 } else if (wchar <= 0x07FF) { 94 BUG_ON(buffer_length < 3); 95 96 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); 97 buffer[1] = 0x80 | (wchar & 0x3F); 98 buffer[2] = 0; 99 } else { 100 BUG_ON(buffer_length < 4); 101 102 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); 103 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); 104 buffer[2] = 0x80 | (wchar & 0x3F); 105 buffer[3] = 0; 106 } 107 } 108 109 static int hex2decimal(char ch) 110 { 111 if (ch >= '0' && ch <= '9') { 112 return (ch - '0'); 113 } else if (ch >= 'a' && ch <= 'f') { 114 return 10 + (ch - 'a'); 115 } else if (ch >= 'A' && ch <= 'F') { 116 return 10 + (ch - 'A'); 117 } 118 119 return -1; 120 } 121 122 /** 123 * parse_string(): Parse a json string and return a QObject 124 * 125 * string 126 * "" 127 * " chars " 128 * chars 129 * char 130 * char chars 131 * char 132 * any-Unicode-character- 133 * except-"-or-\-or- 134 * control-character 135 * \" 136 * \\ 137 * \/ 138 * \b 139 * \f 140 * \n 141 * \r 142 * \t 143 * \u four-hex-digits 144 */ 145 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token) 146 { 147 const char *ptr = token_get_value(token); 148 QString *str; 149 int double_quote = 1; 150 151 if (*ptr == '"') { 152 double_quote = 1; 153 } else { 154 double_quote = 0; 155 } 156 ptr++; 157 158 str = qstring_new(); 159 while (*ptr && 160 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { 161 if (*ptr == '\\') { 162 ptr++; 163 164 switch (*ptr) { 165 case '"': 166 qstring_append(str, "\""); 167 ptr++; 168 break; 169 case '\'': 170 qstring_append(str, "'"); 171 ptr++; 172 break; 173 case '\\': 174 qstring_append(str, "\\"); 175 ptr++; 176 break; 177 case '/': 178 qstring_append(str, "/"); 179 ptr++; 180 break; 181 case 'b': 182 qstring_append(str, "\b"); 183 ptr++; 184 break; 185 case 'f': 186 qstring_append(str, "\f"); 187 ptr++; 188 break; 189 case 'n': 190 qstring_append(str, "\n"); 191 ptr++; 192 break; 193 case 'r': 194 qstring_append(str, "\r"); 195 ptr++; 196 break; 197 case 't': 198 qstring_append(str, "\t"); 199 ptr++; 200 break; 201 case 'u': { 202 uint16_t unicode_char = 0; 203 char utf8_char[4]; 204 int i = 0; 205 206 ptr++; 207 208 for (i = 0; i < 4; i++) { 209 if (qemu_isxdigit(*ptr)) { 210 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); 211 } else { 212 parse_error(ctxt, token, 213 "invalid hex escape sequence in string"); 214 goto out; 215 } 216 ptr++; 217 } 218 219 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); 220 qstring_append(str, utf8_char); 221 } break; 222 default: 223 parse_error(ctxt, token, "invalid escape sequence in string"); 224 goto out; 225 } 226 } else { 227 char dummy[2]; 228 229 dummy[0] = *ptr++; 230 dummy[1] = 0; 231 232 qstring_append(str, dummy); 233 } 234 } 235 236 return str; 237 238 out: 239 QDECREF(str); 240 return NULL; 241 } 242 243 /* Note: unless the token object returned by parser_context_peek_token 244 * or parser_context_pop_token is explicitly incref'd, it will be 245 * deleted as soon as parser_context_pop_token is called again. 246 */ 247 static QObject *parser_context_pop_token(JSONParserContext *ctxt) 248 { 249 qobject_decref(ctxt->current); 250 assert(!g_queue_is_empty(ctxt->buf)); 251 ctxt->current = g_queue_pop_head(ctxt->buf); 252 return ctxt->current; 253 } 254 255 static QObject *parser_context_peek_token(JSONParserContext *ctxt) 256 { 257 assert(!g_queue_is_empty(ctxt->buf)); 258 return g_queue_peek_head(ctxt->buf); 259 } 260 261 static JSONParserContext *parser_context_new(GQueue *tokens) 262 { 263 JSONParserContext *ctxt; 264 265 if (!tokens) { 266 return NULL; 267 } 268 269 ctxt = g_malloc0(sizeof(JSONParserContext)); 270 ctxt->buf = tokens; 271 272 return ctxt; 273 } 274 275 /* to support error propagation, ctxt->err must be freed separately */ 276 static void parser_context_free(JSONParserContext *ctxt) 277 { 278 if (ctxt) { 279 while (!g_queue_is_empty(ctxt->buf)) { 280 parser_context_pop_token(ctxt); 281 } 282 qobject_decref(ctxt->current); 283 g_queue_free(ctxt->buf); 284 g_free(ctxt); 285 } 286 } 287 288 /** 289 * Parsing rules 290 */ 291 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) 292 { 293 QObject *key = NULL, *token = NULL, *value, *peek; 294 295 peek = parser_context_peek_token(ctxt); 296 if (peek == NULL) { 297 parse_error(ctxt, NULL, "premature EOI"); 298 goto out; 299 } 300 301 key = parse_value(ctxt, ap); 302 if (!key || qobject_type(key) != QTYPE_QSTRING) { 303 parse_error(ctxt, peek, "key is not a string in object"); 304 goto out; 305 } 306 307 token = parser_context_pop_token(ctxt); 308 if (token == NULL) { 309 parse_error(ctxt, NULL, "premature EOI"); 310 goto out; 311 } 312 313 if (token_get_type(token) != JSON_COLON) { 314 parse_error(ctxt, token, "missing : in object pair"); 315 goto out; 316 } 317 318 value = parse_value(ctxt, ap); 319 if (value == NULL) { 320 parse_error(ctxt, token, "Missing value in dict"); 321 goto out; 322 } 323 324 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); 325 326 qobject_decref(key); 327 328 return 0; 329 330 out: 331 qobject_decref(key); 332 333 return -1; 334 } 335 336 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) 337 { 338 QDict *dict = NULL; 339 QObject *token, *peek; 340 341 token = parser_context_pop_token(ctxt); 342 assert(token && token_get_type(token) == JSON_LCURLY); 343 344 dict = qdict_new(); 345 346 peek = parser_context_peek_token(ctxt); 347 if (peek == NULL) { 348 parse_error(ctxt, NULL, "premature EOI"); 349 goto out; 350 } 351 352 if (token_get_type(peek) != JSON_RCURLY) { 353 if (parse_pair(ctxt, dict, ap) == -1) { 354 goto out; 355 } 356 357 token = parser_context_pop_token(ctxt); 358 if (token == NULL) { 359 parse_error(ctxt, NULL, "premature EOI"); 360 goto out; 361 } 362 363 while (token_get_type(token) != JSON_RCURLY) { 364 if (token_get_type(token) != JSON_COMMA) { 365 parse_error(ctxt, token, "expected separator in dict"); 366 goto out; 367 } 368 369 if (parse_pair(ctxt, dict, ap) == -1) { 370 goto out; 371 } 372 373 token = parser_context_pop_token(ctxt); 374 if (token == NULL) { 375 parse_error(ctxt, NULL, "premature EOI"); 376 goto out; 377 } 378 } 379 } else { 380 (void)parser_context_pop_token(ctxt); 381 } 382 383 return QOBJECT(dict); 384 385 out: 386 QDECREF(dict); 387 return NULL; 388 } 389 390 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) 391 { 392 QList *list = NULL; 393 QObject *token, *peek; 394 395 token = parser_context_pop_token(ctxt); 396 assert(token && token_get_type(token) == JSON_LSQUARE); 397 398 list = qlist_new(); 399 400 peek = parser_context_peek_token(ctxt); 401 if (peek == NULL) { 402 parse_error(ctxt, NULL, "premature EOI"); 403 goto out; 404 } 405 406 if (token_get_type(peek) != JSON_RSQUARE) { 407 QObject *obj; 408 409 obj = parse_value(ctxt, ap); 410 if (obj == NULL) { 411 parse_error(ctxt, token, "expecting value"); 412 goto out; 413 } 414 415 qlist_append_obj(list, obj); 416 417 token = parser_context_pop_token(ctxt); 418 if (token == NULL) { 419 parse_error(ctxt, NULL, "premature EOI"); 420 goto out; 421 } 422 423 while (token_get_type(token) != JSON_RSQUARE) { 424 if (token_get_type(token) != JSON_COMMA) { 425 parse_error(ctxt, token, "expected separator in list"); 426 goto out; 427 } 428 429 obj = parse_value(ctxt, ap); 430 if (obj == NULL) { 431 parse_error(ctxt, token, "expecting value"); 432 goto out; 433 } 434 435 qlist_append_obj(list, obj); 436 437 token = parser_context_pop_token(ctxt); 438 if (token == NULL) { 439 parse_error(ctxt, NULL, "premature EOI"); 440 goto out; 441 } 442 } 443 } else { 444 (void)parser_context_pop_token(ctxt); 445 } 446 447 return QOBJECT(list); 448 449 out: 450 QDECREF(list); 451 return NULL; 452 } 453 454 static QObject *parse_keyword(JSONParserContext *ctxt) 455 { 456 QObject *token; 457 const char *val; 458 459 token = parser_context_pop_token(ctxt); 460 assert(token && token_get_type(token) == JSON_KEYWORD); 461 val = token_get_value(token); 462 463 if (!strcmp(val, "true")) { 464 return QOBJECT(qbool_from_bool(true)); 465 } else if (!strcmp(val, "false")) { 466 return QOBJECT(qbool_from_bool(false)); 467 } else if (!strcmp(val, "null")) { 468 return qnull(); 469 } 470 parse_error(ctxt, token, "invalid keyword '%s'", val); 471 return NULL; 472 } 473 474 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) 475 { 476 QObject *token; 477 const char *val; 478 479 if (ap == NULL) { 480 return NULL; 481 } 482 483 token = parser_context_pop_token(ctxt); 484 assert(token && token_get_type(token) == JSON_ESCAPE); 485 val = token_get_value(token); 486 487 if (!strcmp(val, "%p")) { 488 return va_arg(*ap, QObject *); 489 } else if (!strcmp(val, "%i")) { 490 return QOBJECT(qbool_from_bool(va_arg(*ap, int))); 491 } else if (!strcmp(val, "%d")) { 492 return QOBJECT(qint_from_int(va_arg(*ap, int))); 493 } else if (!strcmp(val, "%ld")) { 494 return QOBJECT(qint_from_int(va_arg(*ap, long))); 495 } else if (!strcmp(val, "%lld") || 496 !strcmp(val, "%I64d")) { 497 return QOBJECT(qint_from_int(va_arg(*ap, long long))); 498 } else if (!strcmp(val, "%s")) { 499 return QOBJECT(qstring_from_str(va_arg(*ap, const char *))); 500 } else if (!strcmp(val, "%f")) { 501 return QOBJECT(qfloat_from_double(va_arg(*ap, double))); 502 } 503 return NULL; 504 } 505 506 static QObject *parse_literal(JSONParserContext *ctxt) 507 { 508 QObject *token; 509 510 token = parser_context_pop_token(ctxt); 511 assert(token); 512 513 switch (token_get_type(token)) { 514 case JSON_STRING: 515 return QOBJECT(qstring_from_escaped_str(ctxt, token)); 516 case JSON_INTEGER: { 517 /* A possibility exists that this is a whole-valued float where the 518 * fractional part was left out due to being 0 (.0). It's not a big 519 * deal to treat these as ints in the parser, so long as users of the 520 * resulting QObject know to expect a QInt in place of a QFloat in 521 * cases like these. 522 * 523 * However, in some cases these values will overflow/underflow a 524 * QInt/int64 container, thus we should assume these are to be handled 525 * as QFloats/doubles rather than silently changing their values. 526 * 527 * strtoll() indicates these instances by setting errno to ERANGE 528 */ 529 int64_t value; 530 531 errno = 0; /* strtoll doesn't set errno on success */ 532 value = strtoll(token_get_value(token), NULL, 10); 533 if (errno != ERANGE) { 534 return QOBJECT(qint_from_int(value)); 535 } 536 /* fall through to JSON_FLOAT */ 537 } 538 case JSON_FLOAT: 539 /* FIXME dependent on locale */ 540 return QOBJECT(qfloat_from_double(strtod(token_get_value(token), 541 NULL))); 542 default: 543 abort(); 544 } 545 } 546 547 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) 548 { 549 QObject *token; 550 551 token = parser_context_peek_token(ctxt); 552 if (token == NULL) { 553 parse_error(ctxt, NULL, "premature EOI"); 554 return NULL; 555 } 556 557 switch (token_get_type(token)) { 558 case JSON_LCURLY: 559 return parse_object(ctxt, ap); 560 case JSON_LSQUARE: 561 return parse_array(ctxt, ap); 562 case JSON_ESCAPE: 563 return parse_escape(ctxt, ap); 564 case JSON_INTEGER: 565 case JSON_FLOAT: 566 case JSON_STRING: 567 return parse_literal(ctxt); 568 case JSON_KEYWORD: 569 return parse_keyword(ctxt); 570 default: 571 parse_error(ctxt, token, "expecting value"); 572 return NULL; 573 } 574 } 575 576 QObject *json_parser_parse(GQueue *tokens, va_list *ap) 577 { 578 return json_parser_parse_err(tokens, ap, NULL); 579 } 580 581 QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp) 582 { 583 JSONParserContext *ctxt = parser_context_new(tokens); 584 QObject *result; 585 586 if (!ctxt) { 587 return NULL; 588 } 589 590 result = parse_value(ctxt, ap); 591 592 error_propagate(errp, ctxt->err); 593 594 parser_context_free(ctxt); 595 596 return result; 597 } 598