xref: /openbmc/qemu/qobject/json-lexer.c (revision a372823a14461c454feaa86373bd672fd518847a)
1*a372823aSPaolo Bonzini /*
2*a372823aSPaolo Bonzini  * JSON lexer
3*a372823aSPaolo Bonzini  *
4*a372823aSPaolo Bonzini  * Copyright IBM, Corp. 2009
5*a372823aSPaolo Bonzini  *
6*a372823aSPaolo Bonzini  * Authors:
7*a372823aSPaolo Bonzini  *  Anthony Liguori   <aliguori@us.ibm.com>
8*a372823aSPaolo Bonzini  *
9*a372823aSPaolo Bonzini  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10*a372823aSPaolo Bonzini  * See the COPYING.LIB file in the top-level directory.
11*a372823aSPaolo Bonzini  *
12*a372823aSPaolo Bonzini  */
13*a372823aSPaolo Bonzini 
14*a372823aSPaolo Bonzini #include "qapi/qmp/qstring.h"
15*a372823aSPaolo Bonzini #include "qapi/qmp/qlist.h"
16*a372823aSPaolo Bonzini #include "qapi/qmp/qdict.h"
17*a372823aSPaolo Bonzini #include "qapi/qmp/qint.h"
18*a372823aSPaolo Bonzini #include "qemu-common.h"
19*a372823aSPaolo Bonzini #include "qapi/qmp/json-lexer.h"
20*a372823aSPaolo Bonzini 
21*a372823aSPaolo Bonzini #define MAX_TOKEN_SIZE (64ULL << 20)
22*a372823aSPaolo Bonzini 
23*a372823aSPaolo Bonzini /*
24*a372823aSPaolo Bonzini  * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
25*a372823aSPaolo Bonzini  * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
26*a372823aSPaolo Bonzini  * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
27*a372823aSPaolo Bonzini  * [{}\[\],:]
28*a372823aSPaolo Bonzini  * [a-z]+
29*a372823aSPaolo Bonzini  *
30*a372823aSPaolo Bonzini  */
31*a372823aSPaolo Bonzini 
32*a372823aSPaolo Bonzini enum json_lexer_state {
33*a372823aSPaolo Bonzini     IN_ERROR = 0,
34*a372823aSPaolo Bonzini     IN_DQ_UCODE3,
35*a372823aSPaolo Bonzini     IN_DQ_UCODE2,
36*a372823aSPaolo Bonzini     IN_DQ_UCODE1,
37*a372823aSPaolo Bonzini     IN_DQ_UCODE0,
38*a372823aSPaolo Bonzini     IN_DQ_STRING_ESCAPE,
39*a372823aSPaolo Bonzini     IN_DQ_STRING,
40*a372823aSPaolo Bonzini     IN_SQ_UCODE3,
41*a372823aSPaolo Bonzini     IN_SQ_UCODE2,
42*a372823aSPaolo Bonzini     IN_SQ_UCODE1,
43*a372823aSPaolo Bonzini     IN_SQ_UCODE0,
44*a372823aSPaolo Bonzini     IN_SQ_STRING_ESCAPE,
45*a372823aSPaolo Bonzini     IN_SQ_STRING,
46*a372823aSPaolo Bonzini     IN_ZERO,
47*a372823aSPaolo Bonzini     IN_DIGITS,
48*a372823aSPaolo Bonzini     IN_DIGIT,
49*a372823aSPaolo Bonzini     IN_EXP_E,
50*a372823aSPaolo Bonzini     IN_MANTISSA,
51*a372823aSPaolo Bonzini     IN_MANTISSA_DIGITS,
52*a372823aSPaolo Bonzini     IN_NONZERO_NUMBER,
53*a372823aSPaolo Bonzini     IN_NEG_NONZERO_NUMBER,
54*a372823aSPaolo Bonzini     IN_KEYWORD,
55*a372823aSPaolo Bonzini     IN_ESCAPE,
56*a372823aSPaolo Bonzini     IN_ESCAPE_L,
57*a372823aSPaolo Bonzini     IN_ESCAPE_LL,
58*a372823aSPaolo Bonzini     IN_ESCAPE_I,
59*a372823aSPaolo Bonzini     IN_ESCAPE_I6,
60*a372823aSPaolo Bonzini     IN_ESCAPE_I64,
61*a372823aSPaolo Bonzini     IN_WHITESPACE,
62*a372823aSPaolo Bonzini     IN_START,
63*a372823aSPaolo Bonzini };
64*a372823aSPaolo Bonzini 
65*a372823aSPaolo Bonzini #define TERMINAL(state) [0 ... 0x7F] = (state)
66*a372823aSPaolo Bonzini 
67*a372823aSPaolo Bonzini /* Return whether TERMINAL is a terminal state and the transition to it
68*a372823aSPaolo Bonzini    from OLD_STATE required lookahead.  This happens whenever the table
69*a372823aSPaolo Bonzini    below uses the TERMINAL macro.  */
70*a372823aSPaolo Bonzini #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
71*a372823aSPaolo Bonzini             (json_lexer[(old_state)][0] == (terminal))
72*a372823aSPaolo Bonzini 
73*a372823aSPaolo Bonzini static const uint8_t json_lexer[][256] =  {
74*a372823aSPaolo Bonzini     /* double quote string */
75*a372823aSPaolo Bonzini     [IN_DQ_UCODE3] = {
76*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_DQ_STRING,
77*a372823aSPaolo Bonzini         ['a' ... 'f'] = IN_DQ_STRING,
78*a372823aSPaolo Bonzini         ['A' ... 'F'] = IN_DQ_STRING,
79*a372823aSPaolo Bonzini     },
80*a372823aSPaolo Bonzini     [IN_DQ_UCODE2] = {
81*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_DQ_UCODE3,
82*a372823aSPaolo Bonzini         ['a' ... 'f'] = IN_DQ_UCODE3,
83*a372823aSPaolo Bonzini         ['A' ... 'F'] = IN_DQ_UCODE3,
84*a372823aSPaolo Bonzini     },
85*a372823aSPaolo Bonzini     [IN_DQ_UCODE1] = {
86*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_DQ_UCODE2,
87*a372823aSPaolo Bonzini         ['a' ... 'f'] = IN_DQ_UCODE2,
88*a372823aSPaolo Bonzini         ['A' ... 'F'] = IN_DQ_UCODE2,
89*a372823aSPaolo Bonzini     },
90*a372823aSPaolo Bonzini     [IN_DQ_UCODE0] = {
91*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_DQ_UCODE1,
92*a372823aSPaolo Bonzini         ['a' ... 'f'] = IN_DQ_UCODE1,
93*a372823aSPaolo Bonzini         ['A' ... 'F'] = IN_DQ_UCODE1,
94*a372823aSPaolo Bonzini     },
95*a372823aSPaolo Bonzini     [IN_DQ_STRING_ESCAPE] = {
96*a372823aSPaolo Bonzini         ['b'] = IN_DQ_STRING,
97*a372823aSPaolo Bonzini         ['f'] =  IN_DQ_STRING,
98*a372823aSPaolo Bonzini         ['n'] =  IN_DQ_STRING,
99*a372823aSPaolo Bonzini         ['r'] =  IN_DQ_STRING,
100*a372823aSPaolo Bonzini         ['t'] =  IN_DQ_STRING,
101*a372823aSPaolo Bonzini         ['/'] = IN_DQ_STRING,
102*a372823aSPaolo Bonzini         ['\\'] = IN_DQ_STRING,
103*a372823aSPaolo Bonzini         ['\''] = IN_DQ_STRING,
104*a372823aSPaolo Bonzini         ['\"'] = IN_DQ_STRING,
105*a372823aSPaolo Bonzini         ['u'] = IN_DQ_UCODE0,
106*a372823aSPaolo Bonzini     },
107*a372823aSPaolo Bonzini     [IN_DQ_STRING] = {
108*a372823aSPaolo Bonzini         [1 ... 0xBF] = IN_DQ_STRING,
109*a372823aSPaolo Bonzini         [0xC2 ... 0xF4] = IN_DQ_STRING,
110*a372823aSPaolo Bonzini         ['\\'] = IN_DQ_STRING_ESCAPE,
111*a372823aSPaolo Bonzini         ['"'] = JSON_STRING,
112*a372823aSPaolo Bonzini     },
113*a372823aSPaolo Bonzini 
114*a372823aSPaolo Bonzini     /* single quote string */
115*a372823aSPaolo Bonzini     [IN_SQ_UCODE3] = {
116*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_SQ_STRING,
117*a372823aSPaolo Bonzini         ['a' ... 'f'] = IN_SQ_STRING,
118*a372823aSPaolo Bonzini         ['A' ... 'F'] = IN_SQ_STRING,
119*a372823aSPaolo Bonzini     },
120*a372823aSPaolo Bonzini     [IN_SQ_UCODE2] = {
121*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_SQ_UCODE3,
122*a372823aSPaolo Bonzini         ['a' ... 'f'] = IN_SQ_UCODE3,
123*a372823aSPaolo Bonzini         ['A' ... 'F'] = IN_SQ_UCODE3,
124*a372823aSPaolo Bonzini     },
125*a372823aSPaolo Bonzini     [IN_SQ_UCODE1] = {
126*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_SQ_UCODE2,
127*a372823aSPaolo Bonzini         ['a' ... 'f'] = IN_SQ_UCODE2,
128*a372823aSPaolo Bonzini         ['A' ... 'F'] = IN_SQ_UCODE2,
129*a372823aSPaolo Bonzini     },
130*a372823aSPaolo Bonzini     [IN_SQ_UCODE0] = {
131*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_SQ_UCODE1,
132*a372823aSPaolo Bonzini         ['a' ... 'f'] = IN_SQ_UCODE1,
133*a372823aSPaolo Bonzini         ['A' ... 'F'] = IN_SQ_UCODE1,
134*a372823aSPaolo Bonzini     },
135*a372823aSPaolo Bonzini     [IN_SQ_STRING_ESCAPE] = {
136*a372823aSPaolo Bonzini         ['b'] = IN_SQ_STRING,
137*a372823aSPaolo Bonzini         ['f'] =  IN_SQ_STRING,
138*a372823aSPaolo Bonzini         ['n'] =  IN_SQ_STRING,
139*a372823aSPaolo Bonzini         ['r'] =  IN_SQ_STRING,
140*a372823aSPaolo Bonzini         ['t'] =  IN_SQ_STRING,
141*a372823aSPaolo Bonzini         ['/'] = IN_DQ_STRING,
142*a372823aSPaolo Bonzini         ['\\'] = IN_DQ_STRING,
143*a372823aSPaolo Bonzini         ['\''] = IN_SQ_STRING,
144*a372823aSPaolo Bonzini         ['\"'] = IN_SQ_STRING,
145*a372823aSPaolo Bonzini         ['u'] = IN_SQ_UCODE0,
146*a372823aSPaolo Bonzini     },
147*a372823aSPaolo Bonzini     [IN_SQ_STRING] = {
148*a372823aSPaolo Bonzini         [1 ... 0xBF] = IN_SQ_STRING,
149*a372823aSPaolo Bonzini         [0xC2 ... 0xF4] = IN_SQ_STRING,
150*a372823aSPaolo Bonzini         ['\\'] = IN_SQ_STRING_ESCAPE,
151*a372823aSPaolo Bonzini         ['\''] = JSON_STRING,
152*a372823aSPaolo Bonzini     },
153*a372823aSPaolo Bonzini 
154*a372823aSPaolo Bonzini     /* Zero */
155*a372823aSPaolo Bonzini     [IN_ZERO] = {
156*a372823aSPaolo Bonzini         TERMINAL(JSON_INTEGER),
157*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_ERROR,
158*a372823aSPaolo Bonzini         ['.'] = IN_MANTISSA,
159*a372823aSPaolo Bonzini     },
160*a372823aSPaolo Bonzini 
161*a372823aSPaolo Bonzini     /* Float */
162*a372823aSPaolo Bonzini     [IN_DIGITS] = {
163*a372823aSPaolo Bonzini         TERMINAL(JSON_FLOAT),
164*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_DIGITS,
165*a372823aSPaolo Bonzini     },
166*a372823aSPaolo Bonzini 
167*a372823aSPaolo Bonzini     [IN_DIGIT] = {
168*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_DIGITS,
169*a372823aSPaolo Bonzini     },
170*a372823aSPaolo Bonzini 
171*a372823aSPaolo Bonzini     [IN_EXP_E] = {
172*a372823aSPaolo Bonzini         ['-'] = IN_DIGIT,
173*a372823aSPaolo Bonzini         ['+'] = IN_DIGIT,
174*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_DIGITS,
175*a372823aSPaolo Bonzini     },
176*a372823aSPaolo Bonzini 
177*a372823aSPaolo Bonzini     [IN_MANTISSA_DIGITS] = {
178*a372823aSPaolo Bonzini         TERMINAL(JSON_FLOAT),
179*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_MANTISSA_DIGITS,
180*a372823aSPaolo Bonzini         ['e'] = IN_EXP_E,
181*a372823aSPaolo Bonzini         ['E'] = IN_EXP_E,
182*a372823aSPaolo Bonzini     },
183*a372823aSPaolo Bonzini 
184*a372823aSPaolo Bonzini     [IN_MANTISSA] = {
185*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_MANTISSA_DIGITS,
186*a372823aSPaolo Bonzini     },
187*a372823aSPaolo Bonzini 
188*a372823aSPaolo Bonzini     /* Number */
189*a372823aSPaolo Bonzini     [IN_NONZERO_NUMBER] = {
190*a372823aSPaolo Bonzini         TERMINAL(JSON_INTEGER),
191*a372823aSPaolo Bonzini         ['0' ... '9'] = IN_NONZERO_NUMBER,
192*a372823aSPaolo Bonzini         ['e'] = IN_EXP_E,
193*a372823aSPaolo Bonzini         ['E'] = IN_EXP_E,
194*a372823aSPaolo Bonzini         ['.'] = IN_MANTISSA,
195*a372823aSPaolo Bonzini     },
196*a372823aSPaolo Bonzini 
197*a372823aSPaolo Bonzini     [IN_NEG_NONZERO_NUMBER] = {
198*a372823aSPaolo Bonzini         ['0'] = IN_ZERO,
199*a372823aSPaolo Bonzini         ['1' ... '9'] = IN_NONZERO_NUMBER,
200*a372823aSPaolo Bonzini     },
201*a372823aSPaolo Bonzini 
202*a372823aSPaolo Bonzini     /* keywords */
203*a372823aSPaolo Bonzini     [IN_KEYWORD] = {
204*a372823aSPaolo Bonzini         TERMINAL(JSON_KEYWORD),
205*a372823aSPaolo Bonzini         ['a' ... 'z'] = IN_KEYWORD,
206*a372823aSPaolo Bonzini     },
207*a372823aSPaolo Bonzini 
208*a372823aSPaolo Bonzini     /* whitespace */
209*a372823aSPaolo Bonzini     [IN_WHITESPACE] = {
210*a372823aSPaolo Bonzini         TERMINAL(JSON_SKIP),
211*a372823aSPaolo Bonzini         [' '] = IN_WHITESPACE,
212*a372823aSPaolo Bonzini         ['\t'] = IN_WHITESPACE,
213*a372823aSPaolo Bonzini         ['\r'] = IN_WHITESPACE,
214*a372823aSPaolo Bonzini         ['\n'] = IN_WHITESPACE,
215*a372823aSPaolo Bonzini     },
216*a372823aSPaolo Bonzini 
217*a372823aSPaolo Bonzini     /* escape */
218*a372823aSPaolo Bonzini     [IN_ESCAPE_LL] = {
219*a372823aSPaolo Bonzini         ['d'] = JSON_ESCAPE,
220*a372823aSPaolo Bonzini     },
221*a372823aSPaolo Bonzini 
222*a372823aSPaolo Bonzini     [IN_ESCAPE_L] = {
223*a372823aSPaolo Bonzini         ['d'] = JSON_ESCAPE,
224*a372823aSPaolo Bonzini         ['l'] = IN_ESCAPE_LL,
225*a372823aSPaolo Bonzini     },
226*a372823aSPaolo Bonzini 
227*a372823aSPaolo Bonzini     [IN_ESCAPE_I64] = {
228*a372823aSPaolo Bonzini         ['d'] = JSON_ESCAPE,
229*a372823aSPaolo Bonzini     },
230*a372823aSPaolo Bonzini 
231*a372823aSPaolo Bonzini     [IN_ESCAPE_I6] = {
232*a372823aSPaolo Bonzini         ['4'] = IN_ESCAPE_I64,
233*a372823aSPaolo Bonzini     },
234*a372823aSPaolo Bonzini 
235*a372823aSPaolo Bonzini     [IN_ESCAPE_I] = {
236*a372823aSPaolo Bonzini         ['6'] = IN_ESCAPE_I6,
237*a372823aSPaolo Bonzini     },
238*a372823aSPaolo Bonzini 
239*a372823aSPaolo Bonzini     [IN_ESCAPE] = {
240*a372823aSPaolo Bonzini         ['d'] = JSON_ESCAPE,
241*a372823aSPaolo Bonzini         ['i'] = JSON_ESCAPE,
242*a372823aSPaolo Bonzini         ['p'] = JSON_ESCAPE,
243*a372823aSPaolo Bonzini         ['s'] = JSON_ESCAPE,
244*a372823aSPaolo Bonzini         ['f'] = JSON_ESCAPE,
245*a372823aSPaolo Bonzini         ['l'] = IN_ESCAPE_L,
246*a372823aSPaolo Bonzini         ['I'] = IN_ESCAPE_I,
247*a372823aSPaolo Bonzini     },
248*a372823aSPaolo Bonzini 
249*a372823aSPaolo Bonzini     /* top level rule */
250*a372823aSPaolo Bonzini     [IN_START] = {
251*a372823aSPaolo Bonzini         ['"'] = IN_DQ_STRING,
252*a372823aSPaolo Bonzini         ['\''] = IN_SQ_STRING,
253*a372823aSPaolo Bonzini         ['0'] = IN_ZERO,
254*a372823aSPaolo Bonzini         ['1' ... '9'] = IN_NONZERO_NUMBER,
255*a372823aSPaolo Bonzini         ['-'] = IN_NEG_NONZERO_NUMBER,
256*a372823aSPaolo Bonzini         ['{'] = JSON_OPERATOR,
257*a372823aSPaolo Bonzini         ['}'] = JSON_OPERATOR,
258*a372823aSPaolo Bonzini         ['['] = JSON_OPERATOR,
259*a372823aSPaolo Bonzini         [']'] = JSON_OPERATOR,
260*a372823aSPaolo Bonzini         [','] = JSON_OPERATOR,
261*a372823aSPaolo Bonzini         [':'] = JSON_OPERATOR,
262*a372823aSPaolo Bonzini         ['a' ... 'z'] = IN_KEYWORD,
263*a372823aSPaolo Bonzini         ['%'] = IN_ESCAPE,
264*a372823aSPaolo Bonzini         [' '] = IN_WHITESPACE,
265*a372823aSPaolo Bonzini         ['\t'] = IN_WHITESPACE,
266*a372823aSPaolo Bonzini         ['\r'] = IN_WHITESPACE,
267*a372823aSPaolo Bonzini         ['\n'] = IN_WHITESPACE,
268*a372823aSPaolo Bonzini     },
269*a372823aSPaolo Bonzini };
270*a372823aSPaolo Bonzini 
271*a372823aSPaolo Bonzini void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
272*a372823aSPaolo Bonzini {
273*a372823aSPaolo Bonzini     lexer->emit = func;
274*a372823aSPaolo Bonzini     lexer->state = IN_START;
275*a372823aSPaolo Bonzini     lexer->token = qstring_new();
276*a372823aSPaolo Bonzini     lexer->x = lexer->y = 0;
277*a372823aSPaolo Bonzini }
278*a372823aSPaolo Bonzini 
279*a372823aSPaolo Bonzini static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
280*a372823aSPaolo Bonzini {
281*a372823aSPaolo Bonzini     int char_consumed, new_state;
282*a372823aSPaolo Bonzini 
283*a372823aSPaolo Bonzini     lexer->x++;
284*a372823aSPaolo Bonzini     if (ch == '\n') {
285*a372823aSPaolo Bonzini         lexer->x = 0;
286*a372823aSPaolo Bonzini         lexer->y++;
287*a372823aSPaolo Bonzini     }
288*a372823aSPaolo Bonzini 
289*a372823aSPaolo Bonzini     do {
290*a372823aSPaolo Bonzini         new_state = json_lexer[lexer->state][(uint8_t)ch];
291*a372823aSPaolo Bonzini         char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
292*a372823aSPaolo Bonzini         if (char_consumed) {
293*a372823aSPaolo Bonzini             qstring_append_chr(lexer->token, ch);
294*a372823aSPaolo Bonzini         }
295*a372823aSPaolo Bonzini 
296*a372823aSPaolo Bonzini         switch (new_state) {
297*a372823aSPaolo Bonzini         case JSON_OPERATOR:
298*a372823aSPaolo Bonzini         case JSON_ESCAPE:
299*a372823aSPaolo Bonzini         case JSON_INTEGER:
300*a372823aSPaolo Bonzini         case JSON_FLOAT:
301*a372823aSPaolo Bonzini         case JSON_KEYWORD:
302*a372823aSPaolo Bonzini         case JSON_STRING:
303*a372823aSPaolo Bonzini             lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y);
304*a372823aSPaolo Bonzini             /* fall through */
305*a372823aSPaolo Bonzini         case JSON_SKIP:
306*a372823aSPaolo Bonzini             QDECREF(lexer->token);
307*a372823aSPaolo Bonzini             lexer->token = qstring_new();
308*a372823aSPaolo Bonzini             new_state = IN_START;
309*a372823aSPaolo Bonzini             break;
310*a372823aSPaolo Bonzini         case IN_ERROR:
311*a372823aSPaolo Bonzini             /* XXX: To avoid having previous bad input leaving the parser in an
312*a372823aSPaolo Bonzini              * unresponsive state where we consume unpredictable amounts of
313*a372823aSPaolo Bonzini              * subsequent "good" input, percolate this error state up to the
314*a372823aSPaolo Bonzini              * tokenizer/parser by forcing a NULL object to be emitted, then
315*a372823aSPaolo Bonzini              * reset state.
316*a372823aSPaolo Bonzini              *
317*a372823aSPaolo Bonzini              * Also note that this handling is required for reliable channel
318*a372823aSPaolo Bonzini              * negotiation between QMP and the guest agent, since chr(0xFF)
319*a372823aSPaolo Bonzini              * is placed at the beginning of certain events to ensure proper
320*a372823aSPaolo Bonzini              * delivery when the channel is in an unknown state. chr(0xFF) is
321*a372823aSPaolo Bonzini              * never a valid ASCII/UTF-8 sequence, so this should reliably
322*a372823aSPaolo Bonzini              * induce an error/flush state.
323*a372823aSPaolo Bonzini              */
324*a372823aSPaolo Bonzini             lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y);
325*a372823aSPaolo Bonzini             QDECREF(lexer->token);
326*a372823aSPaolo Bonzini             lexer->token = qstring_new();
327*a372823aSPaolo Bonzini             new_state = IN_START;
328*a372823aSPaolo Bonzini             lexer->state = new_state;
329*a372823aSPaolo Bonzini             return 0;
330*a372823aSPaolo Bonzini         default:
331*a372823aSPaolo Bonzini             break;
332*a372823aSPaolo Bonzini         }
333*a372823aSPaolo Bonzini         lexer->state = new_state;
334*a372823aSPaolo Bonzini     } while (!char_consumed && !flush);
335*a372823aSPaolo Bonzini 
336*a372823aSPaolo Bonzini     /* Do not let a single token grow to an arbitrarily large size,
337*a372823aSPaolo Bonzini      * this is a security consideration.
338*a372823aSPaolo Bonzini      */
339*a372823aSPaolo Bonzini     if (lexer->token->length > MAX_TOKEN_SIZE) {
340*a372823aSPaolo Bonzini         lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
341*a372823aSPaolo Bonzini         QDECREF(lexer->token);
342*a372823aSPaolo Bonzini         lexer->token = qstring_new();
343*a372823aSPaolo Bonzini         lexer->state = IN_START;
344*a372823aSPaolo Bonzini     }
345*a372823aSPaolo Bonzini 
346*a372823aSPaolo Bonzini     return 0;
347*a372823aSPaolo Bonzini }
348*a372823aSPaolo Bonzini 
349*a372823aSPaolo Bonzini int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
350*a372823aSPaolo Bonzini {
351*a372823aSPaolo Bonzini     size_t i;
352*a372823aSPaolo Bonzini 
353*a372823aSPaolo Bonzini     for (i = 0; i < size; i++) {
354*a372823aSPaolo Bonzini         int err;
355*a372823aSPaolo Bonzini 
356*a372823aSPaolo Bonzini         err = json_lexer_feed_char(lexer, buffer[i], false);
357*a372823aSPaolo Bonzini         if (err < 0) {
358*a372823aSPaolo Bonzini             return err;
359*a372823aSPaolo Bonzini         }
360*a372823aSPaolo Bonzini     }
361*a372823aSPaolo Bonzini 
362*a372823aSPaolo Bonzini     return 0;
363*a372823aSPaolo Bonzini }
364*a372823aSPaolo Bonzini 
365*a372823aSPaolo Bonzini int json_lexer_flush(JSONLexer *lexer)
366*a372823aSPaolo Bonzini {
367*a372823aSPaolo Bonzini     return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true);
368*a372823aSPaolo Bonzini }
369*a372823aSPaolo Bonzini 
370*a372823aSPaolo Bonzini void json_lexer_destroy(JSONLexer *lexer)
371*a372823aSPaolo Bonzini {
372*a372823aSPaolo Bonzini     QDECREF(lexer->token);
373*a372823aSPaolo Bonzini }
374