xref: /openbmc/qemu/scripts/qapi/parser.py (revision 2f95279a)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20from typing import (
21    TYPE_CHECKING,
22    Any,
23    Dict,
24    List,
25    Mapping,
26    Match,
27    Optional,
28    Set,
29    Union,
30)
31
32from .common import must_match
33from .error import QAPISemError, QAPISourceError
34from .source import QAPISourceInfo
35
36
37if TYPE_CHECKING:
38    # pylint: disable=cyclic-import
39    # TODO: Remove cycle. [schema -> expr -> parser -> schema]
40    from .schema import QAPISchemaFeature, QAPISchemaMember
41
42
43# Return value alias for get_expr().
44_ExprValue = Union[List[object], Dict[str, object], str, bool]
45
46
47class QAPIExpression(Dict[str, Any]):
48    # pylint: disable=too-few-public-methods
49    def __init__(self,
50                 data: Mapping[str, object],
51                 info: QAPISourceInfo,
52                 doc: Optional['QAPIDoc'] = None):
53        super().__init__(data)
54        self.info = info
55        self.doc: Optional['QAPIDoc'] = doc
56
57
58class QAPIParseError(QAPISourceError):
59    """Error class for all QAPI schema parsing errors."""
60    def __init__(self, parser: 'QAPISchemaParser', msg: str):
61        col = 1
62        for ch in parser.src[parser.line_pos:parser.pos]:
63            if ch == '\t':
64                col = (col + 7) % 8 + 1
65            else:
66                col += 1
67        super().__init__(parser.info, msg, col)
68
69
70class QAPISchemaParser:
71    """
72    Parse QAPI schema source.
73
74    Parse a JSON-esque schema file and process directives.  See
75    qapi-code-gen.rst section "Schema Syntax" for the exact syntax.
76    Grammatical validation is handled later by `expr.check_exprs()`.
77
78    :param fname: Source file name.
79    :param previously_included:
80        The absolute names of previously included source files,
81        if being invoked from another parser.
82    :param incl_info:
83       `QAPISourceInfo` belonging to the parent module.
84       ``None`` implies this is the root module.
85
86    :ivar exprs: Resulting parsed expressions.
87    :ivar docs: Resulting parsed documentation blocks.
88
89    :raise OSError: For problems reading the root schema document.
90    :raise QAPIError: For errors in the schema source.
91    """
92    def __init__(self,
93                 fname: str,
94                 previously_included: Optional[Set[str]] = None,
95                 incl_info: Optional[QAPISourceInfo] = None):
96        self._fname = fname
97        self._included = previously_included or set()
98        self._included.add(os.path.abspath(self._fname))
99        self.src = ''
100
101        # Lexer state (see `accept` for details):
102        self.info = QAPISourceInfo(self._fname, incl_info)
103        self.tok: Union[None, str] = None
104        self.pos = 0
105        self.cursor = 0
106        self.val: Optional[Union[bool, str]] = None
107        self.line_pos = 0
108
109        # Parser output:
110        self.exprs: List[QAPIExpression] = []
111        self.docs: List[QAPIDoc] = []
112
113        # Showtime!
114        self._parse()
115
116    def _parse(self) -> None:
117        """
118        Parse the QAPI schema document.
119
120        :return: None.  Results are stored in ``.exprs`` and ``.docs``.
121        """
122        cur_doc = None
123
124        # May raise OSError; allow the caller to handle it.
125        with open(self._fname, 'r', encoding='utf-8') as fp:
126            self.src = fp.read()
127        if self.src == '' or self.src[-1] != '\n':
128            self.src += '\n'
129
130        # Prime the lexer:
131        self.accept()
132
133        # Parse until done:
134        while self.tok is not None:
135            info = self.info
136            if self.tok == '#':
137                self.reject_expr_doc(cur_doc)
138                cur_doc = self.get_doc()
139                self.docs.append(cur_doc)
140                continue
141
142            expr = self.get_expr()
143            if not isinstance(expr, dict):
144                raise QAPISemError(
145                    info, "top-level expression must be an object")
146
147            if 'include' in expr:
148                self.reject_expr_doc(cur_doc)
149                if len(expr) != 1:
150                    raise QAPISemError(info, "invalid 'include' directive")
151                include = expr['include']
152                if not isinstance(include, str):
153                    raise QAPISemError(info,
154                                       "value of 'include' must be a string")
155                incl_fname = os.path.join(os.path.dirname(self._fname),
156                                          include)
157                self._add_expr(OrderedDict({'include': incl_fname}), info)
158                exprs_include = self._include(include, info, incl_fname,
159                                              self._included)
160                if exprs_include:
161                    self.exprs.extend(exprs_include.exprs)
162                    self.docs.extend(exprs_include.docs)
163            elif "pragma" in expr:
164                self.reject_expr_doc(cur_doc)
165                if len(expr) != 1:
166                    raise QAPISemError(info, "invalid 'pragma' directive")
167                pragma = expr['pragma']
168                if not isinstance(pragma, dict):
169                    raise QAPISemError(
170                        info, "value of 'pragma' must be an object")
171                for name, value in pragma.items():
172                    self._pragma(name, value, info)
173            else:
174                if cur_doc and not cur_doc.symbol:
175                    raise QAPISemError(
176                        cur_doc.info, "definition documentation required")
177                self._add_expr(expr, info, cur_doc)
178            cur_doc = None
179        self.reject_expr_doc(cur_doc)
180
181    def _add_expr(self, expr: Mapping[str, object],
182                  info: QAPISourceInfo,
183                  doc: Optional['QAPIDoc'] = None) -> None:
184        self.exprs.append(QAPIExpression(expr, info, doc))
185
186    @staticmethod
187    def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
188        if doc and doc.symbol:
189            raise QAPISemError(
190                doc.info,
191                "documentation for '%s' is not followed by the definition"
192                % doc.symbol)
193
194    @staticmethod
195    def _include(include: str,
196                 info: QAPISourceInfo,
197                 incl_fname: str,
198                 previously_included: Set[str]
199                 ) -> Optional['QAPISchemaParser']:
200        incl_abs_fname = os.path.abspath(incl_fname)
201        # catch inclusion cycle
202        inf: Optional[QAPISourceInfo] = info
203        while inf:
204            if incl_abs_fname == os.path.abspath(inf.fname):
205                raise QAPISemError(info, "inclusion loop for %s" % include)
206            inf = inf.parent
207
208        # skip multiple include of the same file
209        if incl_abs_fname in previously_included:
210            return None
211
212        try:
213            return QAPISchemaParser(incl_fname, previously_included, info)
214        except OSError as err:
215            raise QAPISemError(
216                info,
217                f"can't read include file '{incl_fname}': {err.strerror}"
218            ) from err
219
220    @staticmethod
221    def _pragma(name: str, value: object, info: QAPISourceInfo) -> None:
222
223        def check_list_str(name: str, value: object) -> List[str]:
224            if (not isinstance(value, list) or
225                    any(not isinstance(elt, str) for elt in value)):
226                raise QAPISemError(
227                    info,
228                    "pragma %s must be a list of strings" % name)
229            return value
230
231        pragma = info.pragma
232
233        if name == 'doc-required':
234            if not isinstance(value, bool):
235                raise QAPISemError(info,
236                                   "pragma 'doc-required' must be boolean")
237            pragma.doc_required = value
238        elif name == 'command-name-exceptions':
239            pragma.command_name_exceptions = check_list_str(name, value)
240        elif name == 'command-returns-exceptions':
241            pragma.command_returns_exceptions = check_list_str(name, value)
242        elif name == 'documentation-exceptions':
243            pragma.documentation_exceptions = check_list_str(name, value)
244        elif name == 'member-name-exceptions':
245            pragma.member_name_exceptions = check_list_str(name, value)
246        else:
247            raise QAPISemError(info, "unknown pragma '%s'" % name)
248
249    def accept(self, skip_comment: bool = True) -> None:
250        """
251        Read and store the next token.
252
253        :param skip_comment:
254            When false, return COMMENT tokens ("#").
255            This is used when reading documentation blocks.
256
257        :return:
258            None.  Several instance attributes are updated instead:
259
260            - ``.tok`` represents the token type.  See below for values.
261            - ``.info`` describes the token's source location.
262            - ``.val`` is the token's value, if any.  See below.
263            - ``.pos`` is the buffer index of the first character of
264              the token.
265
266        * Single-character tokens:
267
268            These are "{", "}", ":", ",", "[", and "]".
269            ``.tok`` holds the single character and ``.val`` is None.
270
271        * Multi-character tokens:
272
273          * COMMENT:
274
275            This token is not normally returned by the lexer, but it can
276            be when ``skip_comment`` is False.  ``.tok`` is "#", and
277            ``.val`` is a string including all chars until end-of-line,
278            including the "#" itself.
279
280          * STRING:
281
282            ``.tok`` is "'", the single quote.  ``.val`` contains the
283            string, excluding the surrounding quotes.
284
285          * TRUE and FALSE:
286
287            ``.tok`` is either "t" or "f", ``.val`` will be the
288            corresponding bool value.
289
290          * EOF:
291
292            ``.tok`` and ``.val`` will both be None at EOF.
293        """
294        while True:
295            self.tok = self.src[self.cursor]
296            self.pos = self.cursor
297            self.cursor += 1
298            self.val = None
299
300            if self.tok == '#':
301                if self.src[self.cursor] == '#':
302                    # Start of doc comment
303                    skip_comment = False
304                self.cursor = self.src.find('\n', self.cursor)
305                if not skip_comment:
306                    self.val = self.src[self.pos:self.cursor]
307                    return
308            elif self.tok in '{}:,[]':
309                return
310            elif self.tok == "'":
311                # Note: we accept only printable ASCII
312                string = ''
313                esc = False
314                while True:
315                    ch = self.src[self.cursor]
316                    self.cursor += 1
317                    if ch == '\n':
318                        raise QAPIParseError(self, "missing terminating \"'\"")
319                    if esc:
320                        # Note: we recognize only \\ because we have
321                        # no use for funny characters in strings
322                        if ch != '\\':
323                            raise QAPIParseError(self,
324                                                 "unknown escape \\%s" % ch)
325                        esc = False
326                    elif ch == '\\':
327                        esc = True
328                        continue
329                    elif ch == "'":
330                        self.val = string
331                        return
332                    if ord(ch) < 32 or ord(ch) >= 127:
333                        raise QAPIParseError(
334                            self, "funny character in string")
335                    string += ch
336            elif self.src.startswith('true', self.pos):
337                self.val = True
338                self.cursor += 3
339                return
340            elif self.src.startswith('false', self.pos):
341                self.val = False
342                self.cursor += 4
343                return
344            elif self.tok == '\n':
345                if self.cursor == len(self.src):
346                    self.tok = None
347                    return
348                self.info = self.info.next_line()
349                self.line_pos = self.cursor
350            elif not self.tok.isspace():
351                # Show up to next structural, whitespace or quote
352                # character
353                match = must_match('[^[\\]{}:,\\s\']+',
354                                   self.src[self.cursor-1:])
355                raise QAPIParseError(self, "stray '%s'" % match.group(0))
356
357    def get_members(self) -> Dict[str, object]:
358        expr: Dict[str, object] = OrderedDict()
359        if self.tok == '}':
360            self.accept()
361            return expr
362        if self.tok != "'":
363            raise QAPIParseError(self, "expected string or '}'")
364        while True:
365            key = self.val
366            assert isinstance(key, str)  # Guaranteed by tok == "'"
367
368            self.accept()
369            if self.tok != ':':
370                raise QAPIParseError(self, "expected ':'")
371            self.accept()
372            if key in expr:
373                raise QAPIParseError(self, "duplicate key '%s'" % key)
374            expr[key] = self.get_expr()
375            if self.tok == '}':
376                self.accept()
377                return expr
378            if self.tok != ',':
379                raise QAPIParseError(self, "expected ',' or '}'")
380            self.accept()
381            if self.tok != "'":
382                raise QAPIParseError(self, "expected string")
383
384    def get_values(self) -> List[object]:
385        expr: List[object] = []
386        if self.tok == ']':
387            self.accept()
388            return expr
389        if self.tok not in tuple("{['tf"):
390            raise QAPIParseError(
391                self, "expected '{', '[', ']', string, or boolean")
392        while True:
393            expr.append(self.get_expr())
394            if self.tok == ']':
395                self.accept()
396                return expr
397            if self.tok != ',':
398                raise QAPIParseError(self, "expected ',' or ']'")
399            self.accept()
400
401    def get_expr(self) -> _ExprValue:
402        expr: _ExprValue
403        if self.tok == '{':
404            self.accept()
405            expr = self.get_members()
406        elif self.tok == '[':
407            self.accept()
408            expr = self.get_values()
409        elif self.tok in tuple("'tf"):
410            assert isinstance(self.val, (str, bool))
411            expr = self.val
412            self.accept()
413        else:
414            raise QAPIParseError(
415                self, "expected '{', '[', string, or boolean")
416        return expr
417
418    def get_doc_line(self) -> Optional[str]:
419        if self.tok != '#':
420            raise QAPIParseError(
421                self, "documentation comment must end with '##'")
422        assert isinstance(self.val, str)
423        if self.val.startswith('##'):
424            # End of doc comment
425            if self.val != '##':
426                raise QAPIParseError(
427                    self, "junk after '##' at end of documentation comment")
428            return None
429        if self.val == '#':
430            return ''
431        if self.val[1] != ' ':
432            raise QAPIParseError(self, "missing space after #")
433        return self.val[2:].rstrip()
434
435    @staticmethod
436    def _match_at_name_colon(string: str) -> Optional[Match[str]]:
437        return re.match(r'@([^:]*): *', string)
438
439    def get_doc_indented(self, doc: 'QAPIDoc') -> Optional[str]:
440        self.accept(False)
441        line = self.get_doc_line()
442        while line == '':
443            doc.append_line(line)
444            self.accept(False)
445            line = self.get_doc_line()
446        if line is None:
447            return line
448        indent = must_match(r'\s*', line).end()
449        if not indent:
450            return line
451        doc.append_line(line[indent:])
452        prev_line_blank = False
453        while True:
454            self.accept(False)
455            line = self.get_doc_line()
456            if line is None:
457                return line
458            if self._match_at_name_colon(line):
459                return line
460            cur_indent = must_match(r'\s*', line).end()
461            if line != '' and cur_indent < indent:
462                if prev_line_blank:
463                    return line
464                raise QAPIParseError(
465                    self,
466                    "unexpected de-indent (expected at least %d spaces)" %
467                    indent)
468            doc.append_line(line[indent:])
469            prev_line_blank = True
470
471    def get_doc_paragraph(self, doc: 'QAPIDoc') -> Optional[str]:
472        while True:
473            self.accept(False)
474            line = self.get_doc_line()
475            if line is None:
476                return line
477            if line == '':
478                return line
479            doc.append_line(line)
480
481    def get_doc(self) -> 'QAPIDoc':
482        if self.val != '##':
483            raise QAPIParseError(
484                self, "junk after '##' at start of documentation comment")
485        info = self.info
486        self.accept(False)
487        line = self.get_doc_line()
488        if line is not None and line.startswith('@'):
489            # Definition documentation
490            if not line.endswith(':'):
491                raise QAPIParseError(self, "line should end with ':'")
492            # Invalid names are not checked here, but the name
493            # provided *must* match the following definition,
494            # which *is* validated in expr.py.
495            symbol = line[1:-1]
496            if not symbol:
497                raise QAPIParseError(self, "name required after '@'")
498            doc = QAPIDoc(info, symbol)
499            self.accept(False)
500            line = self.get_doc_line()
501            no_more_args = False
502
503            while line is not None:
504                # Blank lines
505                while line == '':
506                    self.accept(False)
507                    line = self.get_doc_line()
508                if line is None:
509                    break
510                # Non-blank line, first of a section
511                if line == 'Features:':
512                    if doc.features:
513                        raise QAPIParseError(
514                            self, "duplicated 'Features:' line")
515                    self.accept(False)
516                    line = self.get_doc_line()
517                    while line == '':
518                        self.accept(False)
519                        line = self.get_doc_line()
520                    while (line is not None
521                           and (match := self._match_at_name_colon(line))):
522                        doc.new_feature(self.info, match.group(1))
523                        text = line[match.end():]
524                        if text:
525                            doc.append_line(text)
526                        line = self.get_doc_indented(doc)
527                    if not doc.features:
528                        raise QAPIParseError(
529                            self, 'feature descriptions expected')
530                    no_more_args = True
531                elif match := self._match_at_name_colon(line):
532                    # description
533                    if no_more_args:
534                        raise QAPIParseError(
535                            self,
536                            "description of '@%s:' follows a section"
537                            % match.group(1))
538                    while (line is not None
539                           and (match := self._match_at_name_colon(line))):
540                        doc.new_argument(self.info, match.group(1))
541                        text = line[match.end():]
542                        if text:
543                            doc.append_line(text)
544                        line = self.get_doc_indented(doc)
545                    no_more_args = True
546                elif match := re.match(
547                        r'(Returns|Errors|Since|Notes?|Examples?|TODO): *',
548                        line):
549                    # tagged section
550                    doc.new_tagged_section(self.info, match.group(1))
551                    text = line[match.end():]
552                    if text:
553                        doc.append_line(text)
554                    line = self.get_doc_indented(doc)
555                    no_more_args = True
556                elif line.startswith('='):
557                    raise QAPIParseError(
558                        self,
559                        "unexpected '=' markup in definition documentation")
560                else:
561                    # tag-less paragraph
562                    doc.ensure_untagged_section(self.info)
563                    doc.append_line(line)
564                    line = self.get_doc_paragraph(doc)
565        else:
566            # Free-form documentation
567            doc = QAPIDoc(info)
568            doc.ensure_untagged_section(self.info)
569            first = True
570            while line is not None:
571                if match := self._match_at_name_colon(line):
572                    raise QAPIParseError(
573                        self,
574                        "'@%s:' not allowed in free-form documentation"
575                        % match.group(1))
576                if line.startswith('='):
577                    if not first:
578                        raise QAPIParseError(
579                            self,
580                            "'=' heading must come first in a comment block")
581                doc.append_line(line)
582                self.accept(False)
583                line = self.get_doc_line()
584                first = False
585
586        self.accept(False)
587        doc.end()
588        return doc
589
590
591class QAPIDoc:
592    """
593    A documentation comment block, either definition or free-form
594
595    Definition documentation blocks consist of
596
597    * a body section: one line naming the definition, followed by an
598      overview (any number of lines)
599
600    * argument sections: a description of each argument (for commands
601      and events) or member (for structs, unions and alternates)
602
603    * features sections: a description of each feature flag
604
605    * additional (non-argument) sections, possibly tagged
606
607    Free-form documentation blocks consist only of a body section.
608    """
609
610    class Section:
611        # pylint: disable=too-few-public-methods
612        def __init__(self, info: QAPISourceInfo,
613                     tag: Optional[str] = None):
614            # section source info, i.e. where it begins
615            self.info = info
616            # section tag, if any ('Returns', '@name', ...)
617            self.tag = tag
618            # section text without tag
619            self.text = ''
620
621        def append_line(self, line: str) -> None:
622            self.text += line + '\n'
623
624    class ArgSection(Section):
625        def __init__(self, info: QAPISourceInfo, tag: str):
626            super().__init__(info, tag)
627            self.member: Optional['QAPISchemaMember'] = None
628
629        def connect(self, member: 'QAPISchemaMember') -> None:
630            self.member = member
631
632    def __init__(self, info: QAPISourceInfo, symbol: Optional[str] = None):
633        # info points to the doc comment block's first line
634        self.info = info
635        # definition doc's symbol, None for free-form doc
636        self.symbol: Optional[str] = symbol
637        # the sections in textual order
638        self.all_sections: List[QAPIDoc.Section] = [QAPIDoc.Section(info)]
639        # the body section
640        self.body: Optional[QAPIDoc.Section] = self.all_sections[0]
641        # dicts mapping parameter/feature names to their description
642        self.args: Dict[str, QAPIDoc.ArgSection] = {}
643        self.features: Dict[str, QAPIDoc.ArgSection] = {}
644        # a command's "Returns" and "Errors" section
645        self.returns: Optional[QAPIDoc.Section] = None
646        self.errors: Optional[QAPIDoc.Section] = None
647        # "Since" section
648        self.since: Optional[QAPIDoc.Section] = None
649        # sections other than .body, .args, .features
650        self.sections: List[QAPIDoc.Section] = []
651
652    def end(self) -> None:
653        for section in self.all_sections:
654            section.text = section.text.strip('\n')
655            if section.tag is not None and section.text == '':
656                raise QAPISemError(
657                    section.info, "text required after '%s:'" % section.tag)
658
659    def ensure_untagged_section(self, info: QAPISourceInfo) -> None:
660        if self.all_sections and not self.all_sections[-1].tag:
661            # extend current section
662            self.all_sections[-1].text += '\n'
663            return
664        # start new section
665        section = self.Section(info)
666        self.sections.append(section)
667        self.all_sections.append(section)
668
669    def new_tagged_section(self, info: QAPISourceInfo, tag: str) -> None:
670        section = self.Section(info, tag)
671        if tag == 'Returns':
672            if self.returns:
673                raise QAPISemError(
674                    info, "duplicated '%s' section" % tag)
675            self.returns = section
676        elif tag == 'Errors':
677            if self.errors:
678                raise QAPISemError(
679                    info, "duplicated '%s' section" % tag)
680            self.errors = section
681        elif tag == 'Since':
682            if self.since:
683                raise QAPISemError(
684                    info, "duplicated '%s' section" % tag)
685            self.since = section
686        self.sections.append(section)
687        self.all_sections.append(section)
688
689    def _new_description(self, info: QAPISourceInfo, name: str,
690                         desc: Dict[str, ArgSection]) -> None:
691        if not name:
692            raise QAPISemError(info, "invalid parameter name")
693        if name in desc:
694            raise QAPISemError(info, "'%s' parameter name duplicated" % name)
695        section = self.ArgSection(info, '@' + name)
696        self.all_sections.append(section)
697        desc[name] = section
698
699    def new_argument(self, info: QAPISourceInfo, name: str) -> None:
700        self._new_description(info, name, self.args)
701
702    def new_feature(self, info: QAPISourceInfo, name: str) -> None:
703        self._new_description(info, name, self.features)
704
705    def append_line(self, line: str) -> None:
706        self.all_sections[-1].append_line(line)
707
708    def connect_member(self, member: 'QAPISchemaMember') -> None:
709        if member.name not in self.args:
710            assert member.info
711            if self.symbol not in member.info.pragma.documentation_exceptions:
712                raise QAPISemError(member.info,
713                                   "%s '%s' lacks documentation"
714                                   % (member.role, member.name))
715            self.args[member.name] = QAPIDoc.ArgSection(
716                self.info, '@' + member.name)
717        self.args[member.name].connect(member)
718
719    def connect_feature(self, feature: 'QAPISchemaFeature') -> None:
720        if feature.name not in self.features:
721            raise QAPISemError(feature.info,
722                               "feature '%s' lacks documentation"
723                               % feature.name)
724        self.features[feature.name].connect(feature)
725
726    def check_expr(self, expr: QAPIExpression) -> None:
727        if 'command' in expr:
728            if self.returns and 'returns' not in expr:
729                raise QAPISemError(
730                    self.returns.info,
731                    "'Returns' section, but command doesn't return anything")
732        else:
733            if self.returns:
734                raise QAPISemError(
735                    self.returns.info,
736                    "'Returns' section is only valid for commands")
737            if self.errors:
738                raise QAPISemError(
739                    self.errors.info,
740                    "'Errors' section is only valid for commands")
741
742    def check(self) -> None:
743
744        def check_args_section(
745                args: Dict[str, QAPIDoc.ArgSection], what: str
746        ) -> None:
747            bogus = [name for name, section in args.items()
748                     if not section.member]
749            if bogus:
750                raise QAPISemError(
751                    args[bogus[0]].info,
752                    "documented %s%s '%s' %s not exist" % (
753                        what,
754                        "s" if len(bogus) > 1 else "",
755                        "', '".join(bogus),
756                        "do" if len(bogus) > 1 else "does"
757                    ))
758
759        check_args_section(self.args, 'member')
760        check_args_section(self.features, 'feature')
761