xref: /openbmc/qemu/scripts/qapi/parser.py (revision cd87c14cde5db42a2f13bfdbba1f3cbeb347a411)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20from typing import (
21    Dict,
22    List,
23    Optional,
24    Set,
25    Union,
26)
27
28from .common import must_match
29from .error import QAPISemError, QAPISourceError
30from .source import QAPISourceInfo
31
32
33# Return value alias for get_expr().
34_ExprValue = Union[List[object], Dict[str, object], str, bool]
35
36
37class QAPIParseError(QAPISourceError):
38    """Error class for all QAPI schema parsing errors."""
39    def __init__(self, parser: 'QAPISchemaParser', msg: str):
40        col = 1
41        for ch in parser.src[parser.line_pos:parser.pos]:
42            if ch == '\t':
43                col = (col + 7) % 8 + 1
44            else:
45                col += 1
46        super().__init__(parser.info, msg, col)
47
48
49class QAPISchemaParser:
50    """
51    Parse QAPI schema source.
52
53    Parse a JSON-esque schema file and process directives.  See
54    qapi-code-gen.txt section "Schema Syntax" for the exact syntax.
55    Grammatical validation is handled later by `expr.check_exprs()`.
56
57    :param fname: Source file name.
58    :param previously_included:
59        The absolute names of previously included source files,
60        if being invoked from another parser.
61    :param incl_info:
62       `QAPISourceInfo` belonging to the parent module.
63       ``None`` implies this is the root module.
64
65    :ivar exprs: Resulting parsed expressions.
66    :ivar docs: Resulting parsed documentation blocks.
67
68    :raise OSError: For problems reading the root schema document.
69    :raise QAPIError: For errors in the schema source.
70    """
71    def __init__(self,
72                 fname: str,
73                 previously_included: Optional[Set[str]] = None,
74                 incl_info: Optional[QAPISourceInfo] = None):
75        self._fname = fname
76        self._included = previously_included or set()
77        self._included.add(os.path.abspath(self._fname))
78        self.src = ''
79
80        # Lexer state (see `accept` for details):
81        self.info = QAPISourceInfo(self._fname, incl_info)
82        self.tok: Union[None, str] = None
83        self.pos = 0
84        self.cursor = 0
85        self.val: Optional[Union[bool, str]] = None
86        self.line_pos = 0
87
88        # Parser output:
89        self.exprs: List[Dict[str, object]] = []
90        self.docs: List[QAPIDoc] = []
91
92        # Showtime!
93        self._parse()
94
95    def _parse(self) -> None:
96        """
97        Parse the QAPI schema document.
98
99        :return: None.  Results are stored in ``.exprs`` and ``.docs``.
100        """
101        cur_doc = None
102
103        # May raise OSError; allow the caller to handle it.
104        with open(self._fname, 'r', encoding='utf-8') as fp:
105            self.src = fp.read()
106        if self.src == '' or self.src[-1] != '\n':
107            self.src += '\n'
108
109        # Prime the lexer:
110        self.accept()
111
112        # Parse until done:
113        while self.tok is not None:
114            info = self.info
115            if self.tok == '#':
116                self.reject_expr_doc(cur_doc)
117                for cur_doc in self.get_doc(info):
118                    self.docs.append(cur_doc)
119                continue
120
121            expr = self.get_expr()
122            if not isinstance(expr, dict):
123                raise QAPISemError(
124                    info, "top-level expression must be an object")
125
126            if 'include' in expr:
127                self.reject_expr_doc(cur_doc)
128                if len(expr) != 1:
129                    raise QAPISemError(info, "invalid 'include' directive")
130                include = expr['include']
131                if not isinstance(include, str):
132                    raise QAPISemError(info,
133                                       "value of 'include' must be a string")
134                incl_fname = os.path.join(os.path.dirname(self._fname),
135                                          include)
136                self.exprs.append({'expr': {'include': incl_fname},
137                                   'info': info})
138                exprs_include = self._include(include, info, incl_fname,
139                                              self._included)
140                if exprs_include:
141                    self.exprs.extend(exprs_include.exprs)
142                    self.docs.extend(exprs_include.docs)
143            elif "pragma" in expr:
144                self.reject_expr_doc(cur_doc)
145                if len(expr) != 1:
146                    raise QAPISemError(info, "invalid 'pragma' directive")
147                pragma = expr['pragma']
148                if not isinstance(pragma, dict):
149                    raise QAPISemError(
150                        info, "value of 'pragma' must be an object")
151                for name, value in pragma.items():
152                    self._pragma(name, value, info)
153            else:
154                expr_elem = {'expr': expr,
155                             'info': info}
156                if cur_doc:
157                    if not cur_doc.symbol:
158                        raise QAPISemError(
159                            cur_doc.info, "definition documentation required")
160                    expr_elem['doc'] = cur_doc
161                self.exprs.append(expr_elem)
162            cur_doc = None
163        self.reject_expr_doc(cur_doc)
164
165    @staticmethod
166    def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
167        if doc and doc.symbol:
168            raise QAPISemError(
169                doc.info,
170                "documentation for '%s' is not followed by the definition"
171                % doc.symbol)
172
173    @staticmethod
174    def _include(include: str,
175                 info: QAPISourceInfo,
176                 incl_fname: str,
177                 previously_included: Set[str]
178                 ) -> Optional['QAPISchemaParser']:
179        incl_abs_fname = os.path.abspath(incl_fname)
180        # catch inclusion cycle
181        inf: Optional[QAPISourceInfo] = info
182        while inf:
183            if incl_abs_fname == os.path.abspath(inf.fname):
184                raise QAPISemError(info, "inclusion loop for %s" % include)
185            inf = inf.parent
186
187        # skip multiple include of the same file
188        if incl_abs_fname in previously_included:
189            return None
190
191        try:
192            return QAPISchemaParser(incl_fname, previously_included, info)
193        except OSError as err:
194            raise QAPISemError(
195                info,
196                f"can't read include file '{incl_fname}': {err.strerror}"
197            ) from err
198
199    @staticmethod
200    def _pragma(name: str, value: object, info: QAPISourceInfo) -> None:
201
202        def check_list_str(name: str, value: object) -> List[str]:
203            if (not isinstance(value, list) or
204                    any(not isinstance(elt, str) for elt in value)):
205                raise QAPISemError(
206                    info,
207                    "pragma %s must be a list of strings" % name)
208            return value
209
210        pragma = info.pragma
211
212        if name == 'doc-required':
213            if not isinstance(value, bool):
214                raise QAPISemError(info,
215                                   "pragma 'doc-required' must be boolean")
216            pragma.doc_required = value
217        elif name == 'command-name-exceptions':
218            pragma.command_name_exceptions = check_list_str(name, value)
219        elif name == 'command-returns-exceptions':
220            pragma.command_returns_exceptions = check_list_str(name, value)
221        elif name == 'member-name-exceptions':
222            pragma.member_name_exceptions = check_list_str(name, value)
223        else:
224            raise QAPISemError(info, "unknown pragma '%s'" % name)
225
226    def accept(self, skip_comment: bool = True) -> None:
227        """
228        Read and store the next token.
229
230        :param skip_comment:
231            When false, return COMMENT tokens ("#").
232            This is used when reading documentation blocks.
233
234        :return:
235            None.  Several instance attributes are updated instead:
236
237            - ``.tok`` represents the token type.  See below for values.
238            - ``.info`` describes the token's source location.
239            - ``.val`` is the token's value, if any.  See below.
240            - ``.pos`` is the buffer index of the first character of
241              the token.
242
243        * Single-character tokens:
244
245            These are "{", "}", ":", ",", "[", and "]".
246            ``.tok`` holds the single character and ``.val`` is None.
247
248        * Multi-character tokens:
249
250          * COMMENT:
251
252            This token is not normally returned by the lexer, but it can
253            be when ``skip_comment`` is False.  ``.tok`` is "#", and
254            ``.val`` is a string including all chars until end-of-line,
255            including the "#" itself.
256
257          * STRING:
258
259            ``.tok`` is "'", the single quote.  ``.val`` contains the
260            string, excluding the surrounding quotes.
261
262          * TRUE and FALSE:
263
264            ``.tok`` is either "t" or "f", ``.val`` will be the
265            corresponding bool value.
266
267          * EOF:
268
269            ``.tok`` and ``.val`` will both be None at EOF.
270        """
271        while True:
272            self.tok = self.src[self.cursor]
273            self.pos = self.cursor
274            self.cursor += 1
275            self.val = None
276
277            if self.tok == '#':
278                if self.src[self.cursor] == '#':
279                    # Start of doc comment
280                    skip_comment = False
281                self.cursor = self.src.find('\n', self.cursor)
282                if not skip_comment:
283                    self.val = self.src[self.pos:self.cursor]
284                    return
285            elif self.tok in '{}:,[]':
286                return
287            elif self.tok == "'":
288                # Note: we accept only printable ASCII
289                string = ''
290                esc = False
291                while True:
292                    ch = self.src[self.cursor]
293                    self.cursor += 1
294                    if ch == '\n':
295                        raise QAPIParseError(self, "missing terminating \"'\"")
296                    if esc:
297                        # Note: we recognize only \\ because we have
298                        # no use for funny characters in strings
299                        if ch != '\\':
300                            raise QAPIParseError(self,
301                                                 "unknown escape \\%s" % ch)
302                        esc = False
303                    elif ch == '\\':
304                        esc = True
305                        continue
306                    elif ch == "'":
307                        self.val = string
308                        return
309                    if ord(ch) < 32 or ord(ch) >= 127:
310                        raise QAPIParseError(
311                            self, "funny character in string")
312                    string += ch
313            elif self.src.startswith('true', self.pos):
314                self.val = True
315                self.cursor += 3
316                return
317            elif self.src.startswith('false', self.pos):
318                self.val = False
319                self.cursor += 4
320                return
321            elif self.tok == '\n':
322                if self.cursor == len(self.src):
323                    self.tok = None
324                    return
325                self.info = self.info.next_line()
326                self.line_pos = self.cursor
327            elif not self.tok.isspace():
328                # Show up to next structural, whitespace or quote
329                # character
330                match = must_match('[^[\\]{}:,\\s\'"]+',
331                                   self.src[self.cursor-1:])
332                raise QAPIParseError(self, "stray '%s'" % match.group(0))
333
334    def get_members(self) -> Dict[str, object]:
335        expr: Dict[str, object] = OrderedDict()
336        if self.tok == '}':
337            self.accept()
338            return expr
339        if self.tok != "'":
340            raise QAPIParseError(self, "expected string or '}'")
341        while True:
342            key = self.val
343            assert isinstance(key, str)  # Guaranteed by tok == "'"
344
345            self.accept()
346            if self.tok != ':':
347                raise QAPIParseError(self, "expected ':'")
348            self.accept()
349            if key in expr:
350                raise QAPIParseError(self, "duplicate key '%s'" % key)
351            expr[key] = self.get_expr()
352            if self.tok == '}':
353                self.accept()
354                return expr
355            if self.tok != ',':
356                raise QAPIParseError(self, "expected ',' or '}'")
357            self.accept()
358            if self.tok != "'":
359                raise QAPIParseError(self, "expected string")
360
361    def get_values(self) -> List[object]:
362        expr: List[object] = []
363        if self.tok == ']':
364            self.accept()
365            return expr
366        if self.tok not in tuple("{['tf"):
367            raise QAPIParseError(
368                self, "expected '{', '[', ']', string, or boolean")
369        while True:
370            expr.append(self.get_expr())
371            if self.tok == ']':
372                self.accept()
373                return expr
374            if self.tok != ',':
375                raise QAPIParseError(self, "expected ',' or ']'")
376            self.accept()
377
378    def get_expr(self) -> _ExprValue:
379        expr: _ExprValue
380        if self.tok == '{':
381            self.accept()
382            expr = self.get_members()
383        elif self.tok == '[':
384            self.accept()
385            expr = self.get_values()
386        elif self.tok in tuple("'tf"):
387            assert isinstance(self.val, (str, bool))
388            expr = self.val
389            self.accept()
390        else:
391            raise QAPIParseError(
392                self, "expected '{', '[', string, or boolean")
393        return expr
394
395    def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']:
396        if self.val != '##':
397            raise QAPIParseError(
398                self, "junk after '##' at start of documentation comment")
399
400        docs = []
401        cur_doc = QAPIDoc(self, info)
402        self.accept(False)
403        while self.tok == '#':
404            assert isinstance(self.val, str)
405            if self.val.startswith('##'):
406                # End of doc comment
407                if self.val != '##':
408                    raise QAPIParseError(
409                        self,
410                        "junk after '##' at end of documentation comment")
411                cur_doc.end_comment()
412                docs.append(cur_doc)
413                self.accept()
414                return docs
415            if self.val.startswith('# ='):
416                if cur_doc.symbol:
417                    raise QAPIParseError(
418                        self,
419                        "unexpected '=' markup in definition documentation")
420                if cur_doc.body.text:
421                    cur_doc.end_comment()
422                    docs.append(cur_doc)
423                    cur_doc = QAPIDoc(self, info)
424            cur_doc.append(self.val)
425            self.accept(False)
426
427        raise QAPIParseError(self, "documentation comment must end with '##'")
428
429
430class QAPIDoc:
431    """
432    A documentation comment block, either definition or free-form
433
434    Definition documentation blocks consist of
435
436    * a body section: one line naming the definition, followed by an
437      overview (any number of lines)
438
439    * argument sections: a description of each argument (for commands
440      and events) or member (for structs, unions and alternates)
441
442    * features sections: a description of each feature flag
443
444    * additional (non-argument) sections, possibly tagged
445
446    Free-form documentation blocks consist only of a body section.
447    """
448
449    class Section:
450        def __init__(self, parser, name=None, indent=0):
451            # parser, for error messages about indentation
452            self._parser = parser
453            # optional section name (argument/member or section name)
454            self.name = name
455            self.text = ''
456            # the expected indent level of the text of this section
457            self._indent = indent
458
459        def append(self, line):
460            # Strip leading spaces corresponding to the expected indent level
461            # Blank lines are always OK.
462            if line:
463                indent = must_match(r'\s*', line).end()
464                if indent < self._indent:
465                    raise QAPIParseError(
466                        self._parser,
467                        "unexpected de-indent (expected at least %d spaces)" %
468                        self._indent)
469                line = line[self._indent:]
470
471            self.text += line.rstrip() + '\n'
472
473    class ArgSection(Section):
474        def __init__(self, parser, name, indent=0):
475            super().__init__(parser, name, indent)
476            self.member = None
477
478        def connect(self, member):
479            self.member = member
480
481    def __init__(self, parser, info):
482        # self._parser is used to report errors with QAPIParseError.  The
483        # resulting error position depends on the state of the parser.
484        # It happens to be the beginning of the comment.  More or less
485        # servicable, but action at a distance.
486        self._parser = parser
487        self.info = info
488        self.symbol = None
489        self.body = QAPIDoc.Section(parser)
490        # dict mapping parameter name to ArgSection
491        self.args = OrderedDict()
492        self.features = OrderedDict()
493        # a list of Section
494        self.sections = []
495        # the current section
496        self._section = self.body
497        self._append_line = self._append_body_line
498
499    def has_section(self, name):
500        """Return True if we have a section with this name."""
501        for i in self.sections:
502            if i.name == name:
503                return True
504        return False
505
506    def append(self, line):
507        """
508        Parse a comment line and add it to the documentation.
509
510        The way that the line is dealt with depends on which part of
511        the documentation we're parsing right now:
512        * The body section: ._append_line is ._append_body_line
513        * An argument section: ._append_line is ._append_args_line
514        * A features section: ._append_line is ._append_features_line
515        * An additional section: ._append_line is ._append_various_line
516        """
517        line = line[1:]
518        if not line:
519            self._append_freeform(line)
520            return
521
522        if line[0] != ' ':
523            raise QAPIParseError(self._parser, "missing space after #")
524        line = line[1:]
525        self._append_line(line)
526
527    def end_comment(self):
528        self._end_section()
529
530    @staticmethod
531    def _is_section_tag(name):
532        return name in ('Returns:', 'Since:',
533                        # those are often singular or plural
534                        'Note:', 'Notes:',
535                        'Example:', 'Examples:',
536                        'TODO:')
537
538    def _append_body_line(self, line):
539        """
540        Process a line of documentation text in the body section.
541
542        If this a symbol line and it is the section's first line, this
543        is a definition documentation block for that symbol.
544
545        If it's a definition documentation block, another symbol line
546        begins the argument section for the argument named by it, and
547        a section tag begins an additional section.  Start that
548        section and append the line to it.
549
550        Else, append the line to the current section.
551        """
552        name = line.split(' ', 1)[0]
553        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
554        # recognized, and get silently treated as ordinary text
555        if not self.symbol and not self.body.text and line.startswith('@'):
556            if not line.endswith(':'):
557                raise QAPIParseError(self._parser, "line should end with ':'")
558            self.symbol = line[1:-1]
559            # Invalid names are not checked here, but the name provided MUST
560            # match the following definition, which *is* validated in expr.py.
561            if not self.symbol:
562                raise QAPIParseError(
563                    self._parser, "name required after '@'")
564        elif self.symbol:
565            # This is a definition documentation block
566            if name.startswith('@') and name.endswith(':'):
567                self._append_line = self._append_args_line
568                self._append_args_line(line)
569            elif line == 'Features:':
570                self._append_line = self._append_features_line
571            elif self._is_section_tag(name):
572                self._append_line = self._append_various_line
573                self._append_various_line(line)
574            else:
575                self._append_freeform(line)
576        else:
577            # This is a free-form documentation block
578            self._append_freeform(line)
579
580    def _append_args_line(self, line):
581        """
582        Process a line of documentation text in an argument section.
583
584        A symbol line begins the next argument section, a section tag
585        section or a non-indented line after a blank line begins an
586        additional section.  Start that section and append the line to
587        it.
588
589        Else, append the line to the current section.
590
591        """
592        name = line.split(' ', 1)[0]
593
594        if name.startswith('@') and name.endswith(':'):
595            # If line is "@arg:   first line of description", find
596            # the index of 'f', which is the indent we expect for any
597            # following lines.  We then remove the leading "@arg:"
598            # from line and replace it with spaces so that 'f' has the
599            # same index as it did in the original line and can be
600            # handled the same way we will handle following lines.
601            indent = must_match(r'@\S*:\s*', line).end()
602            line = line[indent:]
603            if not line:
604                # Line was just the "@arg:" header; following lines
605                # are not indented
606                indent = 0
607            else:
608                line = ' ' * indent + line
609            self._start_args_section(name[1:-1], indent)
610        elif self._is_section_tag(name):
611            self._append_line = self._append_various_line
612            self._append_various_line(line)
613            return
614        elif (self._section.text.endswith('\n\n')
615              and line and not line[0].isspace()):
616            if line == 'Features:':
617                self._append_line = self._append_features_line
618            else:
619                self._start_section()
620                self._append_line = self._append_various_line
621                self._append_various_line(line)
622            return
623
624        self._append_freeform(line)
625
626    def _append_features_line(self, line):
627        name = line.split(' ', 1)[0]
628
629        if name.startswith('@') and name.endswith(':'):
630            # If line is "@arg:   first line of description", find
631            # the index of 'f', which is the indent we expect for any
632            # following lines.  We then remove the leading "@arg:"
633            # from line and replace it with spaces so that 'f' has the
634            # same index as it did in the original line and can be
635            # handled the same way we will handle following lines.
636            indent = must_match(r'@\S*:\s*', line).end()
637            line = line[indent:]
638            if not line:
639                # Line was just the "@arg:" header; following lines
640                # are not indented
641                indent = 0
642            else:
643                line = ' ' * indent + line
644            self._start_features_section(name[1:-1], indent)
645        elif self._is_section_tag(name):
646            self._append_line = self._append_various_line
647            self._append_various_line(line)
648            return
649        elif (self._section.text.endswith('\n\n')
650              and line and not line[0].isspace()):
651            self._start_section()
652            self._append_line = self._append_various_line
653            self._append_various_line(line)
654            return
655
656        self._append_freeform(line)
657
658    def _append_various_line(self, line):
659        """
660        Process a line of documentation text in an additional section.
661
662        A symbol line is an error.
663
664        A section tag begins an additional section.  Start that
665        section and append the line to it.
666
667        Else, append the line to the current section.
668        """
669        name = line.split(' ', 1)[0]
670
671        if name.startswith('@') and name.endswith(':'):
672            raise QAPIParseError(self._parser,
673                                 "'%s' can't follow '%s' section"
674                                 % (name, self.sections[0].name))
675        if self._is_section_tag(name):
676            # If line is "Section:   first line of description", find
677            # the index of 'f', which is the indent we expect for any
678            # following lines.  We then remove the leading "Section:"
679            # from line and replace it with spaces so that 'f' has the
680            # same index as it did in the original line and can be
681            # handled the same way we will handle following lines.
682            indent = must_match(r'\S*:\s*', line).end()
683            line = line[indent:]
684            if not line:
685                # Line was just the "Section:" header; following lines
686                # are not indented
687                indent = 0
688            else:
689                line = ' ' * indent + line
690            self._start_section(name[:-1], indent)
691
692        self._append_freeform(line)
693
694    def _start_symbol_section(self, symbols_dict, name, indent):
695        # FIXME invalid names other than the empty string aren't flagged
696        if not name:
697            raise QAPIParseError(self._parser, "invalid parameter name")
698        if name in symbols_dict:
699            raise QAPIParseError(self._parser,
700                                 "'%s' parameter name duplicated" % name)
701        assert not self.sections
702        self._end_section()
703        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
704        symbols_dict[name] = self._section
705
706    def _start_args_section(self, name, indent):
707        self._start_symbol_section(self.args, name, indent)
708
709    def _start_features_section(self, name, indent):
710        self._start_symbol_section(self.features, name, indent)
711
712    def _start_section(self, name=None, indent=0):
713        if name in ('Returns', 'Since') and self.has_section(name):
714            raise QAPIParseError(self._parser,
715                                 "duplicated '%s' section" % name)
716        self._end_section()
717        self._section = QAPIDoc.Section(self._parser, name, indent)
718        self.sections.append(self._section)
719
720    def _end_section(self):
721        if self._section:
722            text = self._section.text = self._section.text.strip()
723            if self._section.name and (not text or text.isspace()):
724                raise QAPIParseError(
725                    self._parser,
726                    "empty doc section '%s'" % self._section.name)
727            self._section = None
728
729    def _append_freeform(self, line):
730        match = re.match(r'(@\S+:)', line)
731        if match:
732            raise QAPIParseError(self._parser,
733                                 "'%s' not allowed in free-form documentation"
734                                 % match.group(1))
735        self._section.append(line)
736
737    def connect_member(self, member):
738        if member.name not in self.args:
739            # Undocumented TODO outlaw
740            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
741                                                        member.name)
742        self.args[member.name].connect(member)
743
744    def connect_feature(self, feature):
745        if feature.name not in self.features:
746            raise QAPISemError(feature.info,
747                               "feature '%s' lacks documentation"
748                               % feature.name)
749        self.features[feature.name].connect(feature)
750
751    def check_expr(self, expr):
752        if self.has_section('Returns') and 'command' not in expr:
753            raise QAPISemError(self.info,
754                               "'Returns:' is only valid for commands")
755
756    def check(self):
757
758        def check_args_section(args, what):
759            bogus = [name for name, section in args.items()
760                     if not section.member]
761            if bogus:
762                raise QAPISemError(
763                    self.info,
764                    "documented %s%s '%s' %s not exist" % (
765                        what,
766                        "s" if len(bogus) > 1 else "",
767                        "', '".join(bogus),
768                        "do" if len(bogus) > 1 else "does"
769                    ))
770
771        check_args_section(self.args, 'member')
772        check_args_section(self.features, 'feature')
773