json-lexer.c (340db1ed82f8ced40a3e778c08963005369e2926) | json-lexer.c (eddc0a7f0ad84edd0f8dd27d4a70a305ccd7bc5f) |
---|---|
1/* 2 * JSON lexer 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * --- 4 unchanged lines hidden (view full) --- 13 14#include "qemu/osdep.h" 15#include "qemu-common.h" 16#include "qapi/qmp/json-lexer.h" 17 18#define MAX_TOKEN_SIZE (64ULL << 20) 19 20/* | 1/* 2 * JSON lexer 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * --- 4 unchanged lines hidden (view full) --- 13 14#include "qemu/osdep.h" 15#include "qemu-common.h" 16#include "qapi/qmp/json-lexer.h" 17 18#define MAX_TOKEN_SIZE (64ULL << 20) 19 20/* |
21 * Required by JSON (RFC 7159): | 21 * From RFC 8259 "The JavaScript Object Notation (JSON) Data 22 * Interchange Format", with [comments in brackets]: |
22 * | 23 * |
23 * \"([^\\\"]|\\[\"'\\/bfnrt]|\\u[0-9a-fA-F]{4})*\" 24 * -?(0|[1-9][0-9]*)(.[0-9]+)?([eE][-+]?[0-9]+)? 25 * [{}\[\],:] 26 * [a-z]+ # covers null, true, false | 24 * The set of tokens includes six structural characters, strings, 25 * numbers, and three literal names. |
27 * | 26 * |
28 * Extension of '' strings: | 27 * These are the six structural characters: |
29 * | 28 * |
30 * '([^\\']|\\[\"'\\/bfnrt]|\\u[0-9a-fA-F]{4})*' | 29 * begin-array = ws %x5B ws ; [ left square bracket 30 * begin-object = ws %x7B ws ; { left curly bracket 31 * end-array = ws %x5D ws ; ] right square bracket 32 * end-object = ws %x7D ws ; } right curly bracket 33 * name-separator = ws %x3A ws ; : colon 34 * value-separator = ws %x2C ws ; , comma |
31 * | 35 * |
32 * Extension for vararg handling in JSON construction: | 36 * Insignificant whitespace is allowed before or after any of the six 37 * structural characters. 38 * [This lexer accepts it before or after any token, which is actually 39 * the same, as the grammar always has structural characters between 40 * other tokens.] |
33 * | 41 * |
34 * %((l|ll|I64)?d|[ipsf]) | 42 * ws = *( 43 * %x20 / ; Space 44 * %x09 / ; Horizontal tab 45 * %x0A / ; Line feed or New line 46 * %x0D ) ; Carriage return |
35 * | 47 * |
48 * [...] three literal names: 49 * false null true 50 * [This lexer accepts [a-z]+, and leaves rejecting unknown literal 51 * names to the parser.] 52 * 53 * [Numbers:] 54 * 55 * number = [ minus ] int [ frac ] [ exp ] 56 * decimal-point = %x2E ; . 57 * digit1-9 = %x31-39 ; 1-9 58 * e = %x65 / %x45 ; e E 59 * exp = e [ minus / plus ] 1*DIGIT 60 * frac = decimal-point 1*DIGIT 61 * int = zero / ( digit1-9 *DIGIT ) 62 * minus = %x2D ; - 63 * plus = %x2B ; + 64 * zero = %x30 ; 0 65 * 66 * [Strings:] 67 * string = quotation-mark *char quotation-mark 68 * 69 * char = unescaped / 70 * escape ( 71 * %x22 / ; " quotation mark U+0022 72 * %x5C / ; \ reverse solidus U+005C 73 * %x2F / ; / solidus U+002F 74 * %x62 / ; b backspace U+0008 75 * %x66 / ; f form feed U+000C 76 * %x6E / ; n line feed U+000A 77 * %x72 / ; r carriage return U+000D 78 * %x74 / ; t tab U+0009 79 * %x75 4HEXDIG ) ; uXXXX U+XXXX 80 * escape = %x5C ; \ 81 * quotation-mark = %x22 ; " 82 * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF 83 * 84 * 85 * Extensions over RFC 8259: 86 * - Extra escape sequence in strings: 87 * 0x27 (apostrophe) is recognized after escape, too 88 * - Single-quoted strings: 89 * Like double-quoted strings, except they're delimited by %x27 90 * (apostrophe) instead of %x22 (quotation mark), and can't contain 91 * unescaped apostrophe, but can contain unescaped quotation mark. 92 * - Interpolation: 93 * interpolation = %((l|ll|I64)[du]|[ipsf]) 94 * 95 * Note: 96 * - Input must be encoded in UTF-8. 97 * - Decoding and validating is left to the parser. |
|
36 */ 37 38enum json_lexer_state { 39 IN_ERROR = 0, /* must really be 0, see json_lexer[] */ 40 IN_DQ_UCODE3, 41 IN_DQ_UCODE2, 42 IN_DQ_UCODE1, 43 IN_DQ_UCODE0, --- 347 unchanged lines hidden --- | 98 */ 99 100enum json_lexer_state { 101 IN_ERROR = 0, /* must really be 0, see json_lexer[] */ 102 IN_DQ_UCODE3, 103 IN_DQ_UCODE2, 104 IN_DQ_UCODE1, 105 IN_DQ_UCODE0, --- 347 unchanged lines hidden --- |