1a372823aSPaolo Bonzini /* 2a372823aSPaolo Bonzini * JSON lexer 3a372823aSPaolo Bonzini * 4a372823aSPaolo Bonzini * Copyright IBM, Corp. 2009 5a372823aSPaolo Bonzini * 6a372823aSPaolo Bonzini * Authors: 7a372823aSPaolo Bonzini * Anthony Liguori <aliguori@us.ibm.com> 8a372823aSPaolo Bonzini * 9a372823aSPaolo Bonzini * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. 10a372823aSPaolo Bonzini * See the COPYING.LIB file in the top-level directory. 11a372823aSPaolo Bonzini * 12a372823aSPaolo Bonzini */ 13a372823aSPaolo Bonzini 14f2ad72b3SPeter Maydell #include "qemu/osdep.h" 1586cdf9ecSMarkus Armbruster #include "json-parser-int.h" 16a372823aSPaolo Bonzini 17a372823aSPaolo Bonzini #define MAX_TOKEN_SIZE (64ULL << 20) 18a372823aSPaolo Bonzini 19a372823aSPaolo Bonzini /* 20eddc0a7fSMarkus Armbruster * From RFC 8259 "The JavaScript Object Notation (JSON) Data 21eddc0a7fSMarkus Armbruster * Interchange Format", with [comments in brackets]: 22ff5394adSEric Blake * 23eddc0a7fSMarkus Armbruster * The set of tokens includes six structural characters, strings, 24eddc0a7fSMarkus Armbruster * numbers, and three literal names. 25ff5394adSEric Blake * 26eddc0a7fSMarkus Armbruster * These are the six structural characters: 27ff5394adSEric Blake * 28eddc0a7fSMarkus Armbruster * begin-array = ws %x5B ws ; [ left square bracket 29eddc0a7fSMarkus Armbruster * begin-object = ws %x7B ws ; { left curly bracket 30eddc0a7fSMarkus Armbruster * end-array = ws %x5D ws ; ] right square bracket 31eddc0a7fSMarkus Armbruster * end-object = ws %x7D ws ; } right curly bracket 32eddc0a7fSMarkus Armbruster * name-separator = ws %x3A ws ; : colon 33eddc0a7fSMarkus Armbruster * value-separator = ws %x2C ws ; , comma 34ff5394adSEric Blake * 35eddc0a7fSMarkus Armbruster * Insignificant whitespace is allowed before or after any of the six 36eddc0a7fSMarkus Armbruster * structural characters. 37eddc0a7fSMarkus Armbruster * [This lexer accepts it before or after any token, which is actually 38eddc0a7fSMarkus Armbruster * the same, as the grammar always has structural characters between 39eddc0a7fSMarkus Armbruster * other tokens.] 40ff5394adSEric Blake * 41eddc0a7fSMarkus Armbruster * ws = *( 42eddc0a7fSMarkus Armbruster * %x20 / ; Space 43eddc0a7fSMarkus Armbruster * %x09 / ; Horizontal tab 44eddc0a7fSMarkus Armbruster * %x0A / ; Line feed or New line 45eddc0a7fSMarkus Armbruster * %x0D ) ; Carriage return 46a372823aSPaolo Bonzini * 47eddc0a7fSMarkus Armbruster * [...] three literal names: 48eddc0a7fSMarkus Armbruster * false null true 49eddc0a7fSMarkus Armbruster * [This lexer accepts [a-z]+, and leaves rejecting unknown literal 50eddc0a7fSMarkus Armbruster * names to the parser.] 51eddc0a7fSMarkus Armbruster * 52eddc0a7fSMarkus Armbruster * [Numbers:] 53eddc0a7fSMarkus Armbruster * 54eddc0a7fSMarkus Armbruster * number = [ minus ] int [ frac ] [ exp ] 55eddc0a7fSMarkus Armbruster * decimal-point = %x2E ; . 56eddc0a7fSMarkus Armbruster * digit1-9 = %x31-39 ; 1-9 57eddc0a7fSMarkus Armbruster * e = %x65 / %x45 ; e E 58eddc0a7fSMarkus Armbruster * exp = e [ minus / plus ] 1*DIGIT 59eddc0a7fSMarkus Armbruster * frac = decimal-point 1*DIGIT 60eddc0a7fSMarkus Armbruster * int = zero / ( digit1-9 *DIGIT ) 61eddc0a7fSMarkus Armbruster * minus = %x2D ; - 62eddc0a7fSMarkus Armbruster * plus = %x2B ; + 63eddc0a7fSMarkus Armbruster * zero = %x30 ; 0 64eddc0a7fSMarkus Armbruster * 65eddc0a7fSMarkus Armbruster * [Strings:] 66eddc0a7fSMarkus Armbruster * string = quotation-mark *char quotation-mark 67eddc0a7fSMarkus Armbruster * 68eddc0a7fSMarkus Armbruster * char = unescaped / 69eddc0a7fSMarkus Armbruster * escape ( 70eddc0a7fSMarkus Armbruster * %x22 / ; " quotation mark U+0022 71eddc0a7fSMarkus Armbruster * %x5C / ; \ reverse solidus U+005C 72eddc0a7fSMarkus Armbruster * %x2F / ; / solidus U+002F 73eddc0a7fSMarkus Armbruster * %x62 / ; b backspace U+0008 74eddc0a7fSMarkus Armbruster * %x66 / ; f form feed U+000C 75eddc0a7fSMarkus Armbruster * %x6E / ; n line feed U+000A 76eddc0a7fSMarkus Armbruster * %x72 / ; r carriage return U+000D 77eddc0a7fSMarkus Armbruster * %x74 / ; t tab U+0009 78eddc0a7fSMarkus Armbruster * %x75 4HEXDIG ) ; uXXXX U+XXXX 79eddc0a7fSMarkus Armbruster * escape = %x5C ; \ 80eddc0a7fSMarkus Armbruster * quotation-mark = %x22 ; " 81eddc0a7fSMarkus Armbruster * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF 82b2da4a4dSMarkus Armbruster * [This lexer accepts any non-control character after escape, and 83b2da4a4dSMarkus Armbruster * leaves rejecting invalid ones to the parser.] 84eddc0a7fSMarkus Armbruster * 85eddc0a7fSMarkus Armbruster * 86eddc0a7fSMarkus Armbruster * Extensions over RFC 8259: 87eddc0a7fSMarkus Armbruster * - Extra escape sequence in strings: 88eddc0a7fSMarkus Armbruster * 0x27 (apostrophe) is recognized after escape, too 89eddc0a7fSMarkus Armbruster * - Single-quoted strings: 90eddc0a7fSMarkus Armbruster * Like double-quoted strings, except they're delimited by %x27 91eddc0a7fSMarkus Armbruster * (apostrophe) instead of %x22 (quotation mark), and can't contain 92eddc0a7fSMarkus Armbruster * unescaped apostrophe, but can contain unescaped quotation mark. 932cbd15aaSMarkus Armbruster * - Interpolation, if enabled: 94f7617d45SMarkus Armbruster * The lexer accepts %[A-Za-z0-9]*, and leaves rejecting invalid 95f7617d45SMarkus Armbruster * ones to the parser. 96eddc0a7fSMarkus Armbruster * 97eddc0a7fSMarkus Armbruster * Note: 984b1c0cd7SMarkus Armbruster * - Input must be encoded in modified UTF-8. 99eddc0a7fSMarkus Armbruster * - Decoding and validating is left to the parser. 100a372823aSPaolo Bonzini */ 101a372823aSPaolo Bonzini 102a372823aSPaolo Bonzini enum json_lexer_state { 103b8d3b1daSMarkus Armbruster IN_ERROR = 0, /* must really be 0, see json_lexer[] */ 104*0f07a5d5SMarkus Armbruster IN_RECOVERY, 105a372823aSPaolo Bonzini IN_DQ_STRING_ESCAPE, 106a372823aSPaolo Bonzini IN_DQ_STRING, 107a372823aSPaolo Bonzini IN_SQ_STRING_ESCAPE, 108a372823aSPaolo Bonzini IN_SQ_STRING, 109a372823aSPaolo Bonzini IN_ZERO, 1104d400661SMarkus Armbruster IN_EXP_DIGITS, 1114d400661SMarkus Armbruster IN_EXP_SIGN, 112a372823aSPaolo Bonzini IN_EXP_E, 113a372823aSPaolo Bonzini IN_MANTISSA, 114a372823aSPaolo Bonzini IN_MANTISSA_DIGITS, 1154d400661SMarkus Armbruster IN_DIGITS, 1164d400661SMarkus Armbruster IN_SIGN, 117a372823aSPaolo Bonzini IN_KEYWORD, 11861030280SMarkus Armbruster IN_INTERP, 119a372823aSPaolo Bonzini IN_WHITESPACE, 120a372823aSPaolo Bonzini IN_START, 1212cbd15aaSMarkus Armbruster IN_START_INTERP, /* must be IN_START + 1 */ 122a372823aSPaolo Bonzini }; 123a372823aSPaolo Bonzini 1242cbd15aaSMarkus Armbruster QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START_INTERP); 125c0ee3afaSMarkus Armbruster QEMU_BUILD_BUG_ON(JSON_MAX >= 0x80); 1262cbd15aaSMarkus Armbruster QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1); 127b8d3b1daSMarkus Armbruster 128c0ee3afaSMarkus Armbruster #define LOOKAHEAD 0x80 129c0ee3afaSMarkus Armbruster #define TERMINAL(state) [0 ... 0xFF] = ((state) | LOOKAHEAD) 130a372823aSPaolo Bonzini 131a372823aSPaolo Bonzini static const uint8_t json_lexer[][256] = { 132b8d3b1daSMarkus Armbruster /* Relies on default initialization to IN_ERROR! */ 133b8d3b1daSMarkus Armbruster 134*0f07a5d5SMarkus Armbruster /* error recovery */ 135*0f07a5d5SMarkus Armbruster [IN_RECOVERY] = { 136*0f07a5d5SMarkus Armbruster /* 137*0f07a5d5SMarkus Armbruster * Skip characters until a structural character, an ASCII 138*0f07a5d5SMarkus Armbruster * control character other than '\t', or impossible UTF-8 139*0f07a5d5SMarkus Armbruster * bytes '\xFE', '\xFF'. Structural characters and line 140*0f07a5d5SMarkus Armbruster * endings are promising resynchronization points. Clients 141*0f07a5d5SMarkus Armbruster * may use the others to force the JSON parser into known-good 142*0f07a5d5SMarkus Armbruster * state; see docs/interop/qmp-spec.txt. 143*0f07a5d5SMarkus Armbruster */ 144*0f07a5d5SMarkus Armbruster [0 ... 0x1F] = IN_START | LOOKAHEAD, 145*0f07a5d5SMarkus Armbruster [0x20 ... 0xFD] = IN_RECOVERY, 146*0f07a5d5SMarkus Armbruster [0xFE ... 0xFF] = IN_START | LOOKAHEAD, 147*0f07a5d5SMarkus Armbruster ['\t'] = IN_RECOVERY, 148*0f07a5d5SMarkus Armbruster ['['] = IN_START | LOOKAHEAD, 149*0f07a5d5SMarkus Armbruster [']'] = IN_START | LOOKAHEAD, 150*0f07a5d5SMarkus Armbruster ['{'] = IN_START | LOOKAHEAD, 151*0f07a5d5SMarkus Armbruster ['}'] = IN_START | LOOKAHEAD, 152*0f07a5d5SMarkus Armbruster [':'] = IN_START | LOOKAHEAD, 153*0f07a5d5SMarkus Armbruster [','] = IN_START | LOOKAHEAD, 154*0f07a5d5SMarkus Armbruster }, 155*0f07a5d5SMarkus Armbruster 156a372823aSPaolo Bonzini /* double quote string */ 157a372823aSPaolo Bonzini [IN_DQ_STRING_ESCAPE] = { 158b2da4a4dSMarkus Armbruster [0x20 ... 0xFD] = IN_DQ_STRING, 159a372823aSPaolo Bonzini }, 160a372823aSPaolo Bonzini [IN_DQ_STRING] = { 161de930f45SMarkus Armbruster [0x20 ... 0xFD] = IN_DQ_STRING, 162a372823aSPaolo Bonzini ['\\'] = IN_DQ_STRING_ESCAPE, 163a372823aSPaolo Bonzini ['"'] = JSON_STRING, 164a372823aSPaolo Bonzini }, 165a372823aSPaolo Bonzini 166a372823aSPaolo Bonzini /* single quote string */ 167a372823aSPaolo Bonzini [IN_SQ_STRING_ESCAPE] = { 168b2da4a4dSMarkus Armbruster [0x20 ... 0xFD] = IN_SQ_STRING, 169a372823aSPaolo Bonzini }, 170a372823aSPaolo Bonzini [IN_SQ_STRING] = { 171de930f45SMarkus Armbruster [0x20 ... 0xFD] = IN_SQ_STRING, 172a372823aSPaolo Bonzini ['\\'] = IN_SQ_STRING_ESCAPE, 173a372823aSPaolo Bonzini ['\''] = JSON_STRING, 174a372823aSPaolo Bonzini }, 175a372823aSPaolo Bonzini 176a372823aSPaolo Bonzini /* Zero */ 177a372823aSPaolo Bonzini [IN_ZERO] = { 178a372823aSPaolo Bonzini TERMINAL(JSON_INTEGER), 179a372823aSPaolo Bonzini ['0' ... '9'] = IN_ERROR, 180a372823aSPaolo Bonzini ['.'] = IN_MANTISSA, 181a372823aSPaolo Bonzini }, 182a372823aSPaolo Bonzini 183a372823aSPaolo Bonzini /* Float */ 1844d400661SMarkus Armbruster [IN_EXP_DIGITS] = { 185a372823aSPaolo Bonzini TERMINAL(JSON_FLOAT), 1864d400661SMarkus Armbruster ['0' ... '9'] = IN_EXP_DIGITS, 187a372823aSPaolo Bonzini }, 188a372823aSPaolo Bonzini 1894d400661SMarkus Armbruster [IN_EXP_SIGN] = { 1904d400661SMarkus Armbruster ['0' ... '9'] = IN_EXP_DIGITS, 191a372823aSPaolo Bonzini }, 192a372823aSPaolo Bonzini 193a372823aSPaolo Bonzini [IN_EXP_E] = { 1944d400661SMarkus Armbruster ['-'] = IN_EXP_SIGN, 1954d400661SMarkus Armbruster ['+'] = IN_EXP_SIGN, 1964d400661SMarkus Armbruster ['0' ... '9'] = IN_EXP_DIGITS, 197a372823aSPaolo Bonzini }, 198a372823aSPaolo Bonzini 199a372823aSPaolo Bonzini [IN_MANTISSA_DIGITS] = { 200a372823aSPaolo Bonzini TERMINAL(JSON_FLOAT), 201a372823aSPaolo Bonzini ['0' ... '9'] = IN_MANTISSA_DIGITS, 202a372823aSPaolo Bonzini ['e'] = IN_EXP_E, 203a372823aSPaolo Bonzini ['E'] = IN_EXP_E, 204a372823aSPaolo Bonzini }, 205a372823aSPaolo Bonzini 206a372823aSPaolo Bonzini [IN_MANTISSA] = { 207a372823aSPaolo Bonzini ['0' ... '9'] = IN_MANTISSA_DIGITS, 208a372823aSPaolo Bonzini }, 209a372823aSPaolo Bonzini 210a372823aSPaolo Bonzini /* Number */ 2114d400661SMarkus Armbruster [IN_DIGITS] = { 212a372823aSPaolo Bonzini TERMINAL(JSON_INTEGER), 2134d400661SMarkus Armbruster ['0' ... '9'] = IN_DIGITS, 214a372823aSPaolo Bonzini ['e'] = IN_EXP_E, 215a372823aSPaolo Bonzini ['E'] = IN_EXP_E, 216a372823aSPaolo Bonzini ['.'] = IN_MANTISSA, 217a372823aSPaolo Bonzini }, 218a372823aSPaolo Bonzini 2194d400661SMarkus Armbruster [IN_SIGN] = { 220a372823aSPaolo Bonzini ['0'] = IN_ZERO, 2214d400661SMarkus Armbruster ['1' ... '9'] = IN_DIGITS, 222a372823aSPaolo Bonzini }, 223a372823aSPaolo Bonzini 224a372823aSPaolo Bonzini /* keywords */ 225a372823aSPaolo Bonzini [IN_KEYWORD] = { 226a372823aSPaolo Bonzini TERMINAL(JSON_KEYWORD), 227a372823aSPaolo Bonzini ['a' ... 'z'] = IN_KEYWORD, 228a372823aSPaolo Bonzini }, 229a372823aSPaolo Bonzini 230a372823aSPaolo Bonzini /* whitespace */ 231a372823aSPaolo Bonzini [IN_WHITESPACE] = { 232a372823aSPaolo Bonzini TERMINAL(JSON_SKIP), 233a372823aSPaolo Bonzini [' '] = IN_WHITESPACE, 234a372823aSPaolo Bonzini ['\t'] = IN_WHITESPACE, 235a372823aSPaolo Bonzini ['\r'] = IN_WHITESPACE, 236a372823aSPaolo Bonzini ['\n'] = IN_WHITESPACE, 237a372823aSPaolo Bonzini }, 238a372823aSPaolo Bonzini 23961030280SMarkus Armbruster /* interpolation */ 24061030280SMarkus Armbruster [IN_INTERP] = { 241f7617d45SMarkus Armbruster TERMINAL(JSON_INTERP), 242f7617d45SMarkus Armbruster ['A' ... 'Z'] = IN_INTERP, 243f7617d45SMarkus Armbruster ['a' ... 'z'] = IN_INTERP, 244f7617d45SMarkus Armbruster ['0' ... '9'] = IN_INTERP, 245a372823aSPaolo Bonzini }, 246a372823aSPaolo Bonzini 2472cbd15aaSMarkus Armbruster /* 2482cbd15aaSMarkus Armbruster * Two start states: 2492cbd15aaSMarkus Armbruster * - IN_START recognizes JSON tokens with our string extensions 2502cbd15aaSMarkus Armbruster * - IN_START_INTERP additionally recognizes interpolation. 2512cbd15aaSMarkus Armbruster */ 2522cbd15aaSMarkus Armbruster [IN_START ... IN_START_INTERP] = { 253a372823aSPaolo Bonzini ['"'] = IN_DQ_STRING, 254a372823aSPaolo Bonzini ['\''] = IN_SQ_STRING, 255a372823aSPaolo Bonzini ['0'] = IN_ZERO, 2564d400661SMarkus Armbruster ['1' ... '9'] = IN_DIGITS, 2574d400661SMarkus Armbruster ['-'] = IN_SIGN, 258c5461660SMarkus Armbruster ['{'] = JSON_LCURLY, 259c5461660SMarkus Armbruster ['}'] = JSON_RCURLY, 260c5461660SMarkus Armbruster ['['] = JSON_LSQUARE, 261c5461660SMarkus Armbruster [']'] = JSON_RSQUARE, 262c5461660SMarkus Armbruster [','] = JSON_COMMA, 263c5461660SMarkus Armbruster [':'] = JSON_COLON, 264a372823aSPaolo Bonzini ['a' ... 'z'] = IN_KEYWORD, 265a372823aSPaolo Bonzini [' '] = IN_WHITESPACE, 266a372823aSPaolo Bonzini ['\t'] = IN_WHITESPACE, 267a372823aSPaolo Bonzini ['\r'] = IN_WHITESPACE, 268a372823aSPaolo Bonzini ['\n'] = IN_WHITESPACE, 269a372823aSPaolo Bonzini }, 2702cbd15aaSMarkus Armbruster [IN_START_INTERP]['%'] = IN_INTERP, 271a372823aSPaolo Bonzini }; 272a372823aSPaolo Bonzini 273c0ee3afaSMarkus Armbruster static inline uint8_t next_state(JSONLexer *lexer, char ch, bool flush, 274c0ee3afaSMarkus Armbruster bool *char_consumed) 275c0ee3afaSMarkus Armbruster { 276c0ee3afaSMarkus Armbruster uint8_t next; 277c0ee3afaSMarkus Armbruster 278c0ee3afaSMarkus Armbruster assert(lexer->state <= ARRAY_SIZE(json_lexer)); 279c0ee3afaSMarkus Armbruster next = json_lexer[lexer->state][(uint8_t)ch]; 280c0ee3afaSMarkus Armbruster *char_consumed = !flush && !(next & LOOKAHEAD); 281c0ee3afaSMarkus Armbruster return next & ~LOOKAHEAD; 282c0ee3afaSMarkus Armbruster } 283c0ee3afaSMarkus Armbruster 2842cbd15aaSMarkus Armbruster void json_lexer_init(JSONLexer *lexer, bool enable_interpolation) 285a372823aSPaolo Bonzini { 2862cbd15aaSMarkus Armbruster lexer->start_state = lexer->state = enable_interpolation 2872cbd15aaSMarkus Armbruster ? IN_START_INTERP : IN_START; 288d2ca7c0bSPaolo Bonzini lexer->token = g_string_sized_new(3); 289a372823aSPaolo Bonzini lexer->x = lexer->y = 0; 290a372823aSPaolo Bonzini } 291a372823aSPaolo Bonzini 2927c1e1d54SMarc-André Lureau static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) 293a372823aSPaolo Bonzini { 294852dfa76SMarkus Armbruster int new_state; 295852dfa76SMarkus Armbruster bool char_consumed = false; 296a372823aSPaolo Bonzini 297a372823aSPaolo Bonzini lexer->x++; 298a372823aSPaolo Bonzini if (ch == '\n') { 299a372823aSPaolo Bonzini lexer->x = 0; 300a372823aSPaolo Bonzini lexer->y++; 301a372823aSPaolo Bonzini } 302a372823aSPaolo Bonzini 303852dfa76SMarkus Armbruster while (flush ? lexer->state != lexer->start_state : !char_consumed) { 304c0ee3afaSMarkus Armbruster new_state = next_state(lexer, ch, flush, &char_consumed); 305852dfa76SMarkus Armbruster if (char_consumed) { 306c0ee3afaSMarkus Armbruster assert(!flush); 307d2ca7c0bSPaolo Bonzini g_string_append_c(lexer->token, ch); 308a372823aSPaolo Bonzini } 309a372823aSPaolo Bonzini 310a372823aSPaolo Bonzini switch (new_state) { 311c5461660SMarkus Armbruster case JSON_LCURLY: 312c5461660SMarkus Armbruster case JSON_RCURLY: 313c5461660SMarkus Armbruster case JSON_LSQUARE: 314c5461660SMarkus Armbruster case JSON_RSQUARE: 315c5461660SMarkus Armbruster case JSON_COLON: 316c5461660SMarkus Armbruster case JSON_COMMA: 31761030280SMarkus Armbruster case JSON_INTERP: 318a372823aSPaolo Bonzini case JSON_INTEGER: 319a372823aSPaolo Bonzini case JSON_FLOAT: 320a372823aSPaolo Bonzini case JSON_KEYWORD: 321a372823aSPaolo Bonzini case JSON_STRING: 322037f2440SMarkus Armbruster json_message_process_token(lexer, lexer->token, new_state, 323037f2440SMarkus Armbruster lexer->x, lexer->y); 324a372823aSPaolo Bonzini /* fall through */ 325a372823aSPaolo Bonzini case JSON_SKIP: 326d2ca7c0bSPaolo Bonzini g_string_truncate(lexer->token, 0); 327*0f07a5d5SMarkus Armbruster /* fall through */ 328*0f07a5d5SMarkus Armbruster case IN_START: 3292cbd15aaSMarkus Armbruster new_state = lexer->start_state; 330a372823aSPaolo Bonzini break; 331a372823aSPaolo Bonzini case IN_ERROR: 332037f2440SMarkus Armbruster json_message_process_token(lexer, lexer->token, JSON_ERROR, 333037f2440SMarkus Armbruster lexer->x, lexer->y); 334*0f07a5d5SMarkus Armbruster new_state = IN_RECOVERY; 335*0f07a5d5SMarkus Armbruster /* fall through */ 336*0f07a5d5SMarkus Armbruster case IN_RECOVERY: 337d2ca7c0bSPaolo Bonzini g_string_truncate(lexer->token, 0); 338*0f07a5d5SMarkus Armbruster break; 339a372823aSPaolo Bonzini default: 340a372823aSPaolo Bonzini break; 341a372823aSPaolo Bonzini } 342a372823aSPaolo Bonzini lexer->state = new_state; 343852dfa76SMarkus Armbruster } 344a372823aSPaolo Bonzini 345a372823aSPaolo Bonzini /* Do not let a single token grow to an arbitrarily large size, 346a372823aSPaolo Bonzini * this is a security consideration. 347a372823aSPaolo Bonzini */ 348d2ca7c0bSPaolo Bonzini if (lexer->token->len > MAX_TOKEN_SIZE) { 349037f2440SMarkus Armbruster json_message_process_token(lexer, lexer->token, lexer->state, 350037f2440SMarkus Armbruster lexer->x, lexer->y); 351d2ca7c0bSPaolo Bonzini g_string_truncate(lexer->token, 0); 3522cbd15aaSMarkus Armbruster lexer->state = lexer->start_state; 353a372823aSPaolo Bonzini } 354a372823aSPaolo Bonzini } 355a372823aSPaolo Bonzini 3567c1e1d54SMarc-André Lureau void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) 357a372823aSPaolo Bonzini { 358a372823aSPaolo Bonzini size_t i; 359a372823aSPaolo Bonzini 360a372823aSPaolo Bonzini for (i = 0; i < size; i++) { 3617c1e1d54SMarc-André Lureau json_lexer_feed_char(lexer, buffer[i], false); 362a372823aSPaolo Bonzini } 363a372823aSPaolo Bonzini } 364a372823aSPaolo Bonzini 3657c1e1d54SMarc-André Lureau void json_lexer_flush(JSONLexer *lexer) 366a372823aSPaolo Bonzini { 3677c1e1d54SMarc-André Lureau json_lexer_feed_char(lexer, 0, true); 368852dfa76SMarkus Armbruster assert(lexer->state == lexer->start_state); 369f9277915SMarkus Armbruster json_message_process_token(lexer, lexer->token, JSON_END_OF_INPUT, 370f9277915SMarkus Armbruster lexer->x, lexer->y); 371a372823aSPaolo Bonzini } 372a372823aSPaolo Bonzini 373a372823aSPaolo Bonzini void json_lexer_destroy(JSONLexer *lexer) 374a372823aSPaolo Bonzini { 375d2ca7c0bSPaolo Bonzini g_string_free(lexer->token, true); 376a372823aSPaolo Bonzini } 377