xref: /openbmc/qemu/scripts/qapi/parser.py (revision 2e2097b4)
1# -*- coding: utf-8 -*-
3# QAPI schema parser
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
17from collections import OrderedDict
18import os
19import re
20from typing import (
22    Dict,
23    List,
24    Mapping,
25    Optional,
26    Set,
27    Union,
30from .common import must_match
31from .error import QAPISemError, QAPISourceError
32from .source import QAPISourceInfo
36    # pylint: disable=cyclic-import
37    # TODO: Remove cycle. [schema -> expr -> parser -> schema]
38    from .schema import QAPISchemaFeature, QAPISchemaMember
41# Return value alias for get_expr().
42_ExprValue = Union[List[object], Dict[str, object], str, bool]
45class QAPIExpression(Dict[str, object]):
46    # pylint: disable=too-few-public-methods
47    def __init__(self,
48                 data: Mapping[str, object],
49                 info: QAPISourceInfo,
50                 doc: Optional['QAPIDoc'] = None):
51        super().__init__(data)
52        self.info = info
53        self.doc: Optional['QAPIDoc'] = doc
56class QAPIParseError(QAPISourceError):
57    """Error class for all QAPI schema parsing errors."""
58    def __init__(self, parser: 'QAPISchemaParser', msg: str):
59        col = 1
60        for ch in parser.src[parser.line_pos:parser.pos]:
61            if ch == '\t':
62                col = (col + 7) % 8 + 1
63            else:
64                col += 1
65        super().__init__(parser.info, msg, col)
68class QAPISchemaParser:
69    """
70    Parse QAPI schema source.
72    Parse a JSON-esque schema file and process directives.  See
73    qapi-code-gen.txt section "Schema Syntax" for the exact syntax.
74    Grammatical validation is handled later by `expr.check_exprs()`.
76    :param fname: Source file name.
77    :param previously_included:
78        The absolute names of previously included source files,
79        if being invoked from another parser.
80    :param incl_info:
81       `QAPISourceInfo` belonging to the parent module.
82       ``None`` implies this is the root module.
84    :ivar exprs: Resulting parsed expressions.
85    :ivar docs: Resulting parsed documentation blocks.
87    :raise OSError: For problems reading the root schema document.
88    :raise QAPIError: For errors in the schema source.
89    """
90    def __init__(self,
91                 fname: str,
92                 previously_included: Optional[Set[str]] = None,
93                 incl_info: Optional[QAPISourceInfo] = None):
94        self._fname = fname
95        self._included = previously_included or set()
96        self._included.add(os.path.abspath(self._fname))
97        self.src = ''
99        # Lexer state (see `accept` for details):
100        self.info = QAPISourceInfo(self._fname, incl_info)
101        self.tok: Union[None, str] = None
102        self.pos = 0
103        self.cursor = 0
104        self.val: Optional[Union[bool, str]] = None
105        self.line_pos = 0
107        # Parser output:
108        self.exprs: List[QAPIExpression] = []
109        self.docs: List[QAPIDoc] = []
111        # Showtime!
112        self._parse()
114    def _parse(self) -> None:
115        """
116        Parse the QAPI schema document.
118        :return: None.  Results are stored in ``.exprs`` and ``.docs``.
119        """
120        cur_doc = None
122        # May raise OSError; allow the caller to handle it.
123        with open(self._fname, 'r', encoding='utf-8') as fp:
124            self.src = fp.read()
125        if self.src == '' or self.src[-1] != '\n':
126            self.src += '\n'
128        # Prime the lexer:
129        self.accept()
131        # Parse until done:
132        while self.tok is not None:
133            info = self.info
134            if self.tok == '#':
135                self.reject_expr_doc(cur_doc)
136                for cur_doc in self.get_doc(info):
137                    self.docs.append(cur_doc)
138                continue
140            expr = self.get_expr()
141            if not isinstance(expr, dict):
142                raise QAPISemError(
143                    info, "top-level expression must be an object")
145            if 'include' in expr:
146                self.reject_expr_doc(cur_doc)
147                if len(expr) != 1:
148                    raise QAPISemError(info, "invalid 'include' directive")
149                include = expr['include']
150                if not isinstance(include, str):
151                    raise QAPISemError(info,
152                                       "value of 'include' must be a string")
153                incl_fname = os.path.join(os.path.dirname(self._fname),
154                                          include)
155                self._add_expr(OrderedDict({'include': incl_fname}), info)
156                exprs_include = self._include(include, info, incl_fname,
157                                              self._included)
158                if exprs_include:
159                    self.exprs.extend(exprs_include.exprs)
160                    self.docs.extend(exprs_include.docs)
161            elif "pragma" in expr:
162                self.reject_expr_doc(cur_doc)
163                if len(expr) != 1:
164                    raise QAPISemError(info, "invalid 'pragma' directive")
165                pragma = expr['pragma']
166                if not isinstance(pragma, dict):
167                    raise QAPISemError(
168                        info, "value of 'pragma' must be an object")
169                for name, value in pragma.items():
170                    self._pragma(name, value, info)
171            else:
172                if cur_doc and not cur_doc.symbol:
173                    raise QAPISemError(
174                        cur_doc.info, "definition documentation required")
175                self._add_expr(expr, info, cur_doc)
176            cur_doc = None
177        self.reject_expr_doc(cur_doc)
179    def _add_expr(self, expr: Mapping[str, object],
180                  info: QAPISourceInfo,
181                  doc: Optional['QAPIDoc'] = None) -> None:
182        self.exprs.append(QAPIExpression(expr, info, doc))
184    @staticmethod
185    def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
186        if doc and doc.symbol:
187            raise QAPISemError(
188                doc.info,
189                "documentation for '%s' is not followed by the definition"
190                % doc.symbol)
192    @staticmethod
193    def _include(include: str,
194                 info: QAPISourceInfo,
195                 incl_fname: str,
196                 previously_included: Set[str]
197                 ) -> Optional['QAPISchemaParser']:
198        incl_abs_fname = os.path.abspath(incl_fname)
199        # catch inclusion cycle
200        inf: Optional[QAPISourceInfo] = info
201        while inf:
202            if incl_abs_fname == os.path.abspath(inf.fname):
203                raise QAPISemError(info, "inclusion loop for %s" % include)
204            inf = inf.parent
206        # skip multiple include of the same file
207        if incl_abs_fname in previously_included:
208            return None
210        try:
211            return QAPISchemaParser(incl_fname, previously_included, info)
212        except OSError as err:
213            raise QAPISemError(
214                info,
215                f"can't read include file '{incl_fname}': {err.strerror}"
216            ) from err
218    @staticmethod
219    def _pragma(name: str, value: object, info: QAPISourceInfo) -> None:
221        def check_list_str(name: str, value: object) -> List[str]:
222            if (not isinstance(value, list) or
223                    any(not isinstance(elt, str) for elt in value)):
224                raise QAPISemError(
225                    info,
226                    "pragma %s must be a list of strings" % name)
227            return value
229        pragma = info.pragma
231        if name == 'doc-required':
232            if not isinstance(value, bool):
233                raise QAPISemError(info,
234                                   "pragma 'doc-required' must be boolean")
235            pragma.doc_required = value
236        elif name == 'command-name-exceptions':
237            pragma.command_name_exceptions = check_list_str(name, value)
238        elif name == 'command-returns-exceptions':
239            pragma.command_returns_exceptions = check_list_str(name, value)
240        elif name == 'member-name-exceptions':
241            pragma.member_name_exceptions = check_list_str(name, value)
242        else:
243            raise QAPISemError(info, "unknown pragma '%s'" % name)
245    def accept(self, skip_comment: bool = True) -> None:
246        """
247        Read and store the next token.
249        :param skip_comment:
250            When false, return COMMENT tokens ("#").
251            This is used when reading documentation blocks.
253        :return:
254            None.  Several instance attributes are updated instead:
256            - ``.tok`` represents the token type.  See below for values.
257            - ``.info`` describes the token's source location.
258            - ``.val`` is the token's value, if any.  See below.
259            - ``.pos`` is the buffer index of the first character of
260              the token.
262        * Single-character tokens:
264            These are "{", "}", ":", ",", "[", and "]".
265            ``.tok`` holds the single character and ``.val`` is None.
267        * Multi-character tokens:
269          * COMMENT:
271            This token is not normally returned by the lexer, but it can
272            be when ``skip_comment`` is False.  ``.tok`` is "#", and
273            ``.val`` is a string including all chars until end-of-line,
274            including the "#" itself.
276          * STRING:
278            ``.tok`` is "'", the single quote.  ``.val`` contains the
279            string, excluding the surrounding quotes.
281          * TRUE and FALSE:
283            ``.tok`` is either "t" or "f", ``.val`` will be the
284            corresponding bool value.
286          * EOF:
288            ``.tok`` and ``.val`` will both be None at EOF.
289        """
290        while True:
291            self.tok = self.src[self.cursor]
292            self.pos = self.cursor
293            self.cursor += 1
294            self.val = None
296            if self.tok == '#':
297                if self.src[self.cursor] == '#':
298                    # Start of doc comment
299                    skip_comment = False
300                self.cursor = self.src.find('\n', self.cursor)
301                if not skip_comment:
302                    self.val = self.src[self.pos:self.cursor]
303                    return
304            elif self.tok in '{}:,[]':
305                return
306            elif self.tok == "'":
307                # Note: we accept only printable ASCII
308                string = ''
309                esc = False
310                while True:
311                    ch = self.src[self.cursor]
312                    self.cursor += 1
313                    if ch == '\n':
314                        raise QAPIParseError(self, "missing terminating \"'\"")
315                    if esc:
316                        # Note: we recognize only \\ because we have
317                        # no use for funny characters in strings
318                        if ch != '\\':
319                            raise QAPIParseError(self,
320                                                 "unknown escape \\%s" % ch)
321                        esc = False
322                    elif ch == '\\':
323                        esc = True
324                        continue
325                    elif ch == "'":
326                        self.val = string
327                        return
328                    if ord(ch) < 32 or ord(ch) >= 127:
329                        raise QAPIParseError(
330                            self, "funny character in string")
331                    string += ch
332            elif self.src.startswith('true', self.pos):
333                self.val = True
334                self.cursor += 3
335                return
336            elif self.src.startswith('false', self.pos):
337                self.val = False
338                self.cursor += 4
339                return
340            elif self.tok == '\n':
341                if self.cursor == len(self.src):
342                    self.tok = None
343                    return
344                self.info = self.info.next_line()
345                self.line_pos = self.cursor
346            elif not self.tok.isspace():
347                # Show up to next structural, whitespace or quote
348                # character
349                match = must_match('[^[\\]{}:,\\s\']+',
350                                   self.src[self.cursor-1:])
351                raise QAPIParseError(self, "stray '%s'" % match.group(0))
353    def get_members(self) -> Dict[str, object]:
354        expr: Dict[str, object] = OrderedDict()
355        if self.tok == '}':
356            self.accept()
357            return expr
358        if self.tok != "'":
359            raise QAPIParseError(self, "expected string or '}'")
360        while True:
361            key = self.val
362            assert isinstance(key, str)  # Guaranteed by tok == "'"
364            self.accept()
365            if self.tok != ':':
366                raise QAPIParseError(self, "expected ':'")
367            self.accept()
368            if key in expr:
369                raise QAPIParseError(self, "duplicate key '%s'" % key)
370            expr[key] = self.get_expr()
371            if self.tok == '}':
372                self.accept()
373                return expr
374            if self.tok != ',':
375                raise QAPIParseError(self, "expected ',' or '}'")
376            self.accept()
377            if self.tok != "'":
378                raise QAPIParseError(self, "expected string")
380    def get_values(self) -> List[object]:
381        expr: List[object] = []
382        if self.tok == ']':
383            self.accept()
384            return expr
385        if self.tok not in tuple("{['tf"):
386            raise QAPIParseError(
387                self, "expected '{', '[', ']', string, or boolean")
388        while True:
389            expr.append(self.get_expr())
390            if self.tok == ']':
391                self.accept()
392                return expr
393            if self.tok != ',':
394                raise QAPIParseError(self, "expected ',' or ']'")
395            self.accept()
397    def get_expr(self) -> _ExprValue:
398        expr: _ExprValue
399        if self.tok == '{':
400            self.accept()
401            expr = self.get_members()
402        elif self.tok == '[':
403            self.accept()
404            expr = self.get_values()
405        elif self.tok in tuple("'tf"):
406            assert isinstance(self.val, (str, bool))
407            expr = self.val
408            self.accept()
409        else:
410            raise QAPIParseError(
411                self, "expected '{', '[', string, or boolean")
412        return expr
414    def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']:
415        if self.val != '##':
416            raise QAPIParseError(
417                self, "junk after '##' at start of documentation comment")
419        docs = []
420        cur_doc = QAPIDoc(self, info)
421        self.accept(False)
422        while self.tok == '#':
423            assert isinstance(self.val, str)
424            if self.val.startswith('##'):
425                # End of doc comment
426                if self.val != '##':
427                    raise QAPIParseError(
428                        self,
429                        "junk after '##' at end of documentation comment")
430                cur_doc.end_comment()
431                docs.append(cur_doc)
432                self.accept()
433                return docs
434            if self.val.startswith('# ='):
435                if cur_doc.symbol:
436                    raise QAPIParseError(
437                        self,
438                        "unexpected '=' markup in definition documentation")
439                if cur_doc.body.text:
440                    cur_doc.end_comment()
441                    docs.append(cur_doc)
442                    cur_doc = QAPIDoc(self, info)
443            cur_doc.append(self.val)
444            self.accept(False)
446        raise QAPIParseError(self, "documentation comment must end with '##'")
449class QAPIDoc:
450    """
451    A documentation comment block, either definition or free-form
453    Definition documentation blocks consist of
455    * a body section: one line naming the definition, followed by an
456      overview (any number of lines)
458    * argument sections: a description of each argument (for commands
459      and events) or member (for structs, unions and alternates)
461    * features sections: a description of each feature flag
463    * additional (non-argument) sections, possibly tagged
465    Free-form documentation blocks consist only of a body section.
466    """
468    class Section:
469        # pylint: disable=too-few-public-methods
470        def __init__(self, parser: QAPISchemaParser,
471                     name: Optional[str] = None):
472            # parser, for error messages about indentation
473            self._parser = parser
474            # optional section name (argument/member or section name)
475            self.name = name
476            # section text without section name
477            self.text = ''
478            # indentation to strip (None means indeterminate)
479            self._indent = None if self.name else 0
481        def append(self, line: str) -> None:
482            line = line.rstrip()
484            if line:
485                indent = must_match(r'\s*', line).end()
486                if self._indent is None:
487                    # indeterminate indentation
488                    if self.text != '':
489                        # non-blank, non-first line determines indentation
490                        self._indent = indent
491                elif indent < self._indent:
492                    raise QAPIParseError(
493                        self._parser,
494                        "unexpected de-indent (expected at least %d spaces)" %
495                        self._indent)
496                line = line[self._indent:]
498            self.text += line + '\n'
500    class ArgSection(Section):
501        def __init__(self, parser: QAPISchemaParser,
502                     name: str):
503            super().__init__(parser, name)
504            self.member: Optional['QAPISchemaMember'] = None
506        def connect(self, member: 'QAPISchemaMember') -> None:
507            self.member = member
509    class NullSection(Section):
510        """
511        Immutable dummy section for use at the end of a doc block.
512        """
513        # pylint: disable=too-few-public-methods
514        def append(self, line: str) -> None:
515            assert False, "Text appended after end_comment() called."
517    def __init__(self, parser: QAPISchemaParser, info: QAPISourceInfo):
518        # self._parser is used to report errors with QAPIParseError.  The
519        # resulting error position depends on the state of the parser.
520        # It happens to be the beginning of the comment.  More or less
521        # servicable, but action at a distance.
522        self._parser = parser
523        self.info = info
524        self.symbol: Optional[str] = None
525        self.body = QAPIDoc.Section(parser)
526        # dicts mapping parameter/feature names to their ArgSection
527        self.args: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
528        self.features: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
529        self.sections: List[QAPIDoc.Section] = []
530        # the current section
531        self._section = self.body
532        self._append_line = self._append_body_line
534    def has_section(self, name: str) -> bool:
535        """Return True if we have a section with this name."""
536        for i in self.sections:
537            if i.name == name:
538                return True
539        return False
541    def append(self, line: str) -> None:
542        """
543        Parse a comment line and add it to the documentation.
545        The way that the line is dealt with depends on which part of
546        the documentation we're parsing right now:
547        * The body section: ._append_line is ._append_body_line
548        * An argument section: ._append_line is ._append_args_line
549        * A features section: ._append_line is ._append_features_line
550        * An additional section: ._append_line is ._append_various_line
551        """
552        line = line[1:]
553        if not line:
554            self._append_freeform(line)
555            return
557        if line[0] != ' ':
558            raise QAPIParseError(self._parser, "missing space after #")
559        line = line[1:]
560        self._append_line(line)
562    def end_comment(self) -> None:
563        self._switch_section(QAPIDoc.NullSection(self._parser))
565    @staticmethod
566    def _match_at_name_colon(string: str):
567        return re.match(r'@([^:]*): *', string)
569    @staticmethod
570    def _match_section_tag(string: str):
571        return re.match(r'(Returns|Since|Notes?|Examples?|TODO): *', string)
573    def _append_body_line(self, line: str) -> None:
574        """
575        Process a line of documentation text in the body section.
577        If this a symbol line and it is the section's first line, this
578        is a definition documentation block for that symbol.
580        If it's a definition documentation block, another symbol line
581        begins the argument section for the argument named by it, and
582        a section tag begins an additional section.  Start that
583        section and append the line to it.
585        Else, append the line to the current section.
586        """
587        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
588        # recognized, and get silently treated as ordinary text
589        if not self.symbol and not self.body.text and line.startswith('@'):
590            if not line.endswith(':'):
591                raise QAPIParseError(self._parser, "line should end with ':'")
592            self.symbol = line[1:-1]
593            # Invalid names are not checked here, but the name provided MUST
594            # match the following definition, which *is* validated in expr.py.
595            if not self.symbol:
596                raise QAPIParseError(
597                    self._parser, "name required after '@'")
598        elif self.symbol:
599            # This is a definition documentation block
600            if self._match_at_name_colon(line):
601                self._append_line = self._append_args_line
602                self._append_args_line(line)
603            elif line == 'Features:':
604                self._append_line = self._append_features_line
605            elif self._match_section_tag(line):
606                self._append_line = self._append_various_line
607                self._append_various_line(line)
608            else:
609                self._append_freeform(line)
610        else:
611            # This is a free-form documentation block
612            self._append_freeform(line)
614    def _append_args_line(self, line: str) -> None:
615        """
616        Process a line of documentation text in an argument section.
618        A symbol line begins the next argument section, a section tag
619        section or a non-indented line after a blank line begins an
620        additional section.  Start that section and append the line to
621        it.
623        Else, append the line to the current section.
625        """
626        match = self._match_at_name_colon(line)
627        if match:
628            line = line[match.end():]
629            self._start_args_section(match.group(1))
630        elif self._match_section_tag(line):
631            self._append_line = self._append_various_line
632            self._append_various_line(line)
633            return
634        elif (self._section.text.endswith('\n\n')
635              and line and not line[0].isspace()):
636            if line == 'Features:':
637                self._append_line = self._append_features_line
638            else:
639                self._start_section()
640                self._append_line = self._append_various_line
641                self._append_various_line(line)
642            return
644        self._append_freeform(line)
646    def _append_features_line(self, line: str) -> None:
647        match = self._match_at_name_colon(line)
648        if match:
649            line = line[match.end():]
650            self._start_features_section(match.group(1))
651        elif self._match_section_tag(line):
652            self._append_line = self._append_various_line
653            self._append_various_line(line)
654            return
655        elif (self._section.text.endswith('\n\n')
656              and line and not line[0].isspace()):
657            self._start_section()
658            self._append_line = self._append_various_line
659            self._append_various_line(line)
660            return
662        self._append_freeform(line)
664    def _append_various_line(self, line: str) -> None:
665        """
666        Process a line of documentation text in an additional section.
668        A symbol line is an error.
670        A section tag begins an additional section.  Start that
671        section and append the line to it.
673        Else, append the line to the current section.
674        """
675        match = self._match_at_name_colon(line)
676        if match:
677            raise QAPIParseError(self._parser,
678                                 "description of '@%s:' follows a section"
679                                 % match.group(1))
680        match = self._match_section_tag(line)
681        if match:
682            line = line[match.end():]
683            self._start_section(match.group(1))
685        self._append_freeform(line)
687    def _start_symbol_section(
688            self,
689            symbols_dict: Dict[str, 'QAPIDoc.ArgSection'],
690            name: str) -> None:
691        # FIXME invalid names other than the empty string aren't flagged
692        if not name:
693            raise QAPIParseError(self._parser, "invalid parameter name")
694        if name in symbols_dict:
695            raise QAPIParseError(self._parser,
696                                 "'%s' parameter name duplicated" % name)
697        assert not self.sections
698        new_section = QAPIDoc.ArgSection(self._parser, name)
699        self._switch_section(new_section)
700        symbols_dict[name] = new_section
702    def _start_args_section(self, name: str) -> None:
703        self._start_symbol_section(self.args, name)
705    def _start_features_section(self, name: str) -> None:
706        self._start_symbol_section(self.features, name)
708    def _start_section(self, name: Optional[str] = None) -> None:
709        if name in ('Returns', 'Since') and self.has_section(name):
710            raise QAPIParseError(self._parser,
711                                 "duplicated '%s' section" % name)
712        new_section = QAPIDoc.Section(self._parser, name)
713        self._switch_section(new_section)
714        self.sections.append(new_section)
716    def _switch_section(self, new_section: 'QAPIDoc.Section') -> None:
717        text = self._section.text = self._section.text.strip('\n')
719        # Only the 'body' section is allowed to have an empty body.
720        # All other sections, including anonymous ones, must have text.
721        if self._section != self.body and not text:
722            # We do not create anonymous sections unless there is
723            # something to put in them; this is a parser bug.
724            assert self._section.name
725            raise QAPIParseError(
726                self._parser,
727                "empty doc section '%s'" % self._section.name)
729        self._section = new_section
731    def _append_freeform(self, line: str) -> None:
732        match = re.match(r'(@\S+:)', line)
733        if match:
734            raise QAPIParseError(self._parser,
735                                 "'%s' not allowed in free-form documentation"
736                                 % match.group(1))
737        self._section.append(line)
739    def connect_member(self, member: 'QAPISchemaMember') -> None:
740        if member.name not in self.args:
741            # Undocumented TODO outlaw
742            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
743                                                        member.name)
744        self.args[member.name].connect(member)
746    def connect_feature(self, feature: 'QAPISchemaFeature') -> None:
747        if feature.name not in self.features:
748            raise QAPISemError(feature.info,
749                               "feature '%s' lacks documentation"
750                               % feature.name)
751        self.features[feature.name].connect(feature)
753    def check_expr(self, expr: QAPIExpression) -> None:
754        if self.has_section('Returns') and 'command' not in expr:
755            raise QAPISemError(self.info,
756                               "'Returns:' is only valid for commands")
758    def check(self) -> None:
760        def check_args_section(
761                args: Dict[str, QAPIDoc.ArgSection], what: str
762        ) -> None:
763            bogus = [name for name, section in args.items()
764                     if not section.member]
765            if bogus:
766                raise QAPISemError(
767                    self.info,
768                    "documented %s%s '%s' %s not exist" % (
769                        what,
770                        "s" if len(bogus) > 1 else "",
771                        "', '".join(bogus),
772                        "do" if len(bogus) > 1 else "does"
773                    ))
775        check_args_section(self.args, 'member')
776        check_args_section(self.features, 'feature')