xref: /openbmc/qemu/scripts/qapi/parser.py (revision 810aff8f29dedbf4568f36462d2bfc3ef47f11e8)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20from typing import (
21    Dict,
22    List,
23    Optional,
24    Set,
25    Union,
26)
27
28from .common import must_match
29from .error import QAPISemError, QAPISourceError
30from .source import QAPISourceInfo
31
32
33# Return value alias for get_expr().
34_ExprValue = Union[List[object], Dict[str, object], str, bool]
35
36
37class QAPIParseError(QAPISourceError):
38    """Error class for all QAPI schema parsing errors."""
39    def __init__(self, parser: 'QAPISchemaParser', msg: str):
40        col = 1
41        for ch in parser.src[parser.line_pos:parser.pos]:
42            if ch == '\t':
43                col = (col + 7) % 8 + 1
44            else:
45                col += 1
46        super().__init__(parser.info, msg, col)
47
48
49class QAPISchemaParser:
50
51    def __init__(self,
52                 fname: str,
53                 previously_included: Optional[Set[str]] = None,
54                 incl_info: Optional[QAPISourceInfo] = None):
55        self._fname = fname
56        self._included = previously_included or set()
57        self._included.add(os.path.abspath(self._fname))
58        self.src = ''
59
60        # Lexer state (see `accept` for details):
61        self.info = QAPISourceInfo(self._fname, incl_info)
62        self.tok: Union[None, str] = None
63        self.pos = 0
64        self.cursor = 0
65        self.val: Optional[Union[bool, str]] = None
66        self.line_pos = 0
67
68        # Parser output:
69        self.exprs: List[Dict[str, object]] = []
70        self.docs: List[QAPIDoc] = []
71
72        # Showtime!
73        self._parse()
74
75    def _parse(self) -> None:
76        cur_doc = None
77
78        # May raise OSError; allow the caller to handle it.
79        with open(self._fname, 'r', encoding='utf-8') as fp:
80            self.src = fp.read()
81        if self.src == '' or self.src[-1] != '\n':
82            self.src += '\n'
83
84        # Prime the lexer:
85        self.accept()
86
87        # Parse until done:
88        while self.tok is not None:
89            info = self.info
90            if self.tok == '#':
91                self.reject_expr_doc(cur_doc)
92                for cur_doc in self.get_doc(info):
93                    self.docs.append(cur_doc)
94                continue
95
96            expr = self.get_expr()
97            if not isinstance(expr, dict):
98                raise QAPISemError(
99                    info, "top-level expression must be an object")
100
101            if 'include' in expr:
102                self.reject_expr_doc(cur_doc)
103                if len(expr) != 1:
104                    raise QAPISemError(info, "invalid 'include' directive")
105                include = expr['include']
106                if not isinstance(include, str):
107                    raise QAPISemError(info,
108                                       "value of 'include' must be a string")
109                incl_fname = os.path.join(os.path.dirname(self._fname),
110                                          include)
111                self.exprs.append({'expr': {'include': incl_fname},
112                                   'info': info})
113                exprs_include = self._include(include, info, incl_fname,
114                                              self._included)
115                if exprs_include:
116                    self.exprs.extend(exprs_include.exprs)
117                    self.docs.extend(exprs_include.docs)
118            elif "pragma" in expr:
119                self.reject_expr_doc(cur_doc)
120                if len(expr) != 1:
121                    raise QAPISemError(info, "invalid 'pragma' directive")
122                pragma = expr['pragma']
123                if not isinstance(pragma, dict):
124                    raise QAPISemError(
125                        info, "value of 'pragma' must be an object")
126                for name, value in pragma.items():
127                    self._pragma(name, value, info)
128            else:
129                expr_elem = {'expr': expr,
130                             'info': info}
131                if cur_doc:
132                    if not cur_doc.symbol:
133                        raise QAPISemError(
134                            cur_doc.info, "definition documentation required")
135                    expr_elem['doc'] = cur_doc
136                self.exprs.append(expr_elem)
137            cur_doc = None
138        self.reject_expr_doc(cur_doc)
139
140    @staticmethod
141    def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
142        if doc and doc.symbol:
143            raise QAPISemError(
144                doc.info,
145                "documentation for '%s' is not followed by the definition"
146                % doc.symbol)
147
148    @staticmethod
149    def _include(include: str,
150                 info: QAPISourceInfo,
151                 incl_fname: str,
152                 previously_included: Set[str]
153                 ) -> Optional['QAPISchemaParser']:
154        incl_abs_fname = os.path.abspath(incl_fname)
155        # catch inclusion cycle
156        inf: Optional[QAPISourceInfo] = info
157        while inf:
158            if incl_abs_fname == os.path.abspath(inf.fname):
159                raise QAPISemError(info, "inclusion loop for %s" % include)
160            inf = inf.parent
161
162        # skip multiple include of the same file
163        if incl_abs_fname in previously_included:
164            return None
165
166        try:
167            return QAPISchemaParser(incl_fname, previously_included, info)
168        except OSError as err:
169            raise QAPISemError(
170                info,
171                f"can't read include file '{incl_fname}': {err.strerror}"
172            ) from err
173
174    @staticmethod
175    def _pragma(name: str, value: object, info: QAPISourceInfo) -> None:
176
177        def check_list_str(name: str, value: object) -> List[str]:
178            if (not isinstance(value, list) or
179                    any([not isinstance(elt, str) for elt in value])):
180                raise QAPISemError(
181                    info,
182                    "pragma %s must be a list of strings" % name)
183            return value
184
185        pragma = info.pragma
186
187        if name == 'doc-required':
188            if not isinstance(value, bool):
189                raise QAPISemError(info,
190                                   "pragma 'doc-required' must be boolean")
191            pragma.doc_required = value
192        elif name == 'command-name-exceptions':
193            pragma.command_name_exceptions = check_list_str(name, value)
194        elif name == 'command-returns-exceptions':
195            pragma.command_returns_exceptions = check_list_str(name, value)
196        elif name == 'member-name-exceptions':
197            pragma.member_name_exceptions = check_list_str(name, value)
198        else:
199            raise QAPISemError(info, "unknown pragma '%s'" % name)
200
201    def accept(self, skip_comment: bool = True) -> None:
202        while True:
203            self.tok = self.src[self.cursor]
204            self.pos = self.cursor
205            self.cursor += 1
206            self.val = None
207
208            if self.tok == '#':
209                if self.src[self.cursor] == '#':
210                    # Start of doc comment
211                    skip_comment = False
212                self.cursor = self.src.find('\n', self.cursor)
213                if not skip_comment:
214                    self.val = self.src[self.pos:self.cursor]
215                    return
216            elif self.tok in '{}:,[]':
217                return
218            elif self.tok == "'":
219                # Note: we accept only printable ASCII
220                string = ''
221                esc = False
222                while True:
223                    ch = self.src[self.cursor]
224                    self.cursor += 1
225                    if ch == '\n':
226                        raise QAPIParseError(self, "missing terminating \"'\"")
227                    if esc:
228                        # Note: we recognize only \\ because we have
229                        # no use for funny characters in strings
230                        if ch != '\\':
231                            raise QAPIParseError(self,
232                                                 "unknown escape \\%s" % ch)
233                        esc = False
234                    elif ch == '\\':
235                        esc = True
236                        continue
237                    elif ch == "'":
238                        self.val = string
239                        return
240                    if ord(ch) < 32 or ord(ch) >= 127:
241                        raise QAPIParseError(
242                            self, "funny character in string")
243                    string += ch
244            elif self.src.startswith('true', self.pos):
245                self.val = True
246                self.cursor += 3
247                return
248            elif self.src.startswith('false', self.pos):
249                self.val = False
250                self.cursor += 4
251                return
252            elif self.tok == '\n':
253                if self.cursor == len(self.src):
254                    self.tok = None
255                    return
256                self.info = self.info.next_line()
257                self.line_pos = self.cursor
258            elif not self.tok.isspace():
259                # Show up to next structural, whitespace or quote
260                # character
261                match = must_match('[^[\\]{}:,\\s\'"]+',
262                                   self.src[self.cursor-1:])
263                raise QAPIParseError(self, "stray '%s'" % match.group(0))
264
265    def get_members(self) -> Dict[str, object]:
266        expr: Dict[str, object] = OrderedDict()
267        if self.tok == '}':
268            self.accept()
269            return expr
270        if self.tok != "'":
271            raise QAPIParseError(self, "expected string or '}'")
272        while True:
273            key = self.val
274            assert isinstance(key, str)  # Guaranteed by tok == "'"
275
276            self.accept()
277            if self.tok != ':':
278                raise QAPIParseError(self, "expected ':'")
279            self.accept()
280            if key in expr:
281                raise QAPIParseError(self, "duplicate key '%s'" % key)
282            expr[key] = self.get_expr()
283            if self.tok == '}':
284                self.accept()
285                return expr
286            if self.tok != ',':
287                raise QAPIParseError(self, "expected ',' or '}'")
288            self.accept()
289            if self.tok != "'":
290                raise QAPIParseError(self, "expected string")
291
292    def get_values(self) -> List[object]:
293        expr: List[object] = []
294        if self.tok == ']':
295            self.accept()
296            return expr
297        if self.tok not in tuple("{['tf"):
298            raise QAPIParseError(
299                self, "expected '{', '[', ']', string, or boolean")
300        while True:
301            expr.append(self.get_expr())
302            if self.tok == ']':
303                self.accept()
304                return expr
305            if self.tok != ',':
306                raise QAPIParseError(self, "expected ',' or ']'")
307            self.accept()
308
309    def get_expr(self) -> _ExprValue:
310        expr: _ExprValue
311        if self.tok == '{':
312            self.accept()
313            expr = self.get_members()
314        elif self.tok == '[':
315            self.accept()
316            expr = self.get_values()
317        elif self.tok in tuple("'tf"):
318            assert isinstance(self.val, (str, bool))
319            expr = self.val
320            self.accept()
321        else:
322            raise QAPIParseError(
323                self, "expected '{', '[', string, or boolean")
324        return expr
325
326    def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']:
327        if self.val != '##':
328            raise QAPIParseError(
329                self, "junk after '##' at start of documentation comment")
330
331        docs = []
332        cur_doc = QAPIDoc(self, info)
333        self.accept(False)
334        while self.tok == '#':
335            assert isinstance(self.val, str)
336            if self.val.startswith('##'):
337                # End of doc comment
338                if self.val != '##':
339                    raise QAPIParseError(
340                        self,
341                        "junk after '##' at end of documentation comment")
342                cur_doc.end_comment()
343                docs.append(cur_doc)
344                self.accept()
345                return docs
346            if self.val.startswith('# ='):
347                if cur_doc.symbol:
348                    raise QAPIParseError(
349                        self,
350                        "unexpected '=' markup in definition documentation")
351                if cur_doc.body.text:
352                    cur_doc.end_comment()
353                    docs.append(cur_doc)
354                    cur_doc = QAPIDoc(self, info)
355            cur_doc.append(self.val)
356            self.accept(False)
357
358        raise QAPIParseError(self, "documentation comment must end with '##'")
359
360
361class QAPIDoc:
362    """
363    A documentation comment block, either definition or free-form
364
365    Definition documentation blocks consist of
366
367    * a body section: one line naming the definition, followed by an
368      overview (any number of lines)
369
370    * argument sections: a description of each argument (for commands
371      and events) or member (for structs, unions and alternates)
372
373    * features sections: a description of each feature flag
374
375    * additional (non-argument) sections, possibly tagged
376
377    Free-form documentation blocks consist only of a body section.
378    """
379
380    class Section:
381        def __init__(self, parser, name=None, indent=0):
382            # parser, for error messages about indentation
383            self._parser = parser
384            # optional section name (argument/member or section name)
385            self.name = name
386            self.text = ''
387            # the expected indent level of the text of this section
388            self._indent = indent
389
390        def append(self, line):
391            # Strip leading spaces corresponding to the expected indent level
392            # Blank lines are always OK.
393            if line:
394                indent = must_match(r'\s*', line).end()
395                if indent < self._indent:
396                    raise QAPIParseError(
397                        self._parser,
398                        "unexpected de-indent (expected at least %d spaces)" %
399                        self._indent)
400                line = line[self._indent:]
401
402            self.text += line.rstrip() + '\n'
403
404    class ArgSection(Section):
405        def __init__(self, parser, name, indent=0):
406            super().__init__(parser, name, indent)
407            self.member = None
408
409        def connect(self, member):
410            self.member = member
411
412    def __init__(self, parser, info):
413        # self._parser is used to report errors with QAPIParseError.  The
414        # resulting error position depends on the state of the parser.
415        # It happens to be the beginning of the comment.  More or less
416        # servicable, but action at a distance.
417        self._parser = parser
418        self.info = info
419        self.symbol = None
420        self.body = QAPIDoc.Section(parser)
421        # dict mapping parameter name to ArgSection
422        self.args = OrderedDict()
423        self.features = OrderedDict()
424        # a list of Section
425        self.sections = []
426        # the current section
427        self._section = self.body
428        self._append_line = self._append_body_line
429
430    def has_section(self, name):
431        """Return True if we have a section with this name."""
432        for i in self.sections:
433            if i.name == name:
434                return True
435        return False
436
437    def append(self, line):
438        """
439        Parse a comment line and add it to the documentation.
440
441        The way that the line is dealt with depends on which part of
442        the documentation we're parsing right now:
443        * The body section: ._append_line is ._append_body_line
444        * An argument section: ._append_line is ._append_args_line
445        * A features section: ._append_line is ._append_features_line
446        * An additional section: ._append_line is ._append_various_line
447        """
448        line = line[1:]
449        if not line:
450            self._append_freeform(line)
451            return
452
453        if line[0] != ' ':
454            raise QAPIParseError(self._parser, "missing space after #")
455        line = line[1:]
456        self._append_line(line)
457
458    def end_comment(self):
459        self._end_section()
460
461    @staticmethod
462    def _is_section_tag(name):
463        return name in ('Returns:', 'Since:',
464                        # those are often singular or plural
465                        'Note:', 'Notes:',
466                        'Example:', 'Examples:',
467                        'TODO:')
468
469    def _append_body_line(self, line):
470        """
471        Process a line of documentation text in the body section.
472
473        If this a symbol line and it is the section's first line, this
474        is a definition documentation block for that symbol.
475
476        If it's a definition documentation block, another symbol line
477        begins the argument section for the argument named by it, and
478        a section tag begins an additional section.  Start that
479        section and append the line to it.
480
481        Else, append the line to the current section.
482        """
483        name = line.split(' ', 1)[0]
484        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
485        # recognized, and get silently treated as ordinary text
486        if not self.symbol and not self.body.text and line.startswith('@'):
487            if not line.endswith(':'):
488                raise QAPIParseError(self._parser, "line should end with ':'")
489            self.symbol = line[1:-1]
490            # FIXME invalid names other than the empty string aren't flagged
491            if not self.symbol:
492                raise QAPIParseError(self._parser, "invalid name")
493        elif self.symbol:
494            # This is a definition documentation block
495            if name.startswith('@') and name.endswith(':'):
496                self._append_line = self._append_args_line
497                self._append_args_line(line)
498            elif line == 'Features:':
499                self._append_line = self._append_features_line
500            elif self._is_section_tag(name):
501                self._append_line = self._append_various_line
502                self._append_various_line(line)
503            else:
504                self._append_freeform(line)
505        else:
506            # This is a free-form documentation block
507            self._append_freeform(line)
508
509    def _append_args_line(self, line):
510        """
511        Process a line of documentation text in an argument section.
512
513        A symbol line begins the next argument section, a section tag
514        section or a non-indented line after a blank line begins an
515        additional section.  Start that section and append the line to
516        it.
517
518        Else, append the line to the current section.
519
520        """
521        name = line.split(' ', 1)[0]
522
523        if name.startswith('@') and name.endswith(':'):
524            # If line is "@arg:   first line of description", find
525            # the index of 'f', which is the indent we expect for any
526            # following lines.  We then remove the leading "@arg:"
527            # from line and replace it with spaces so that 'f' has the
528            # same index as it did in the original line and can be
529            # handled the same way we will handle following lines.
530            indent = must_match(r'@\S*:\s*', line).end()
531            line = line[indent:]
532            if not line:
533                # Line was just the "@arg:" header; following lines
534                # are not indented
535                indent = 0
536            else:
537                line = ' ' * indent + line
538            self._start_args_section(name[1:-1], indent)
539        elif self._is_section_tag(name):
540            self._append_line = self._append_various_line
541            self._append_various_line(line)
542            return
543        elif (self._section.text.endswith('\n\n')
544              and line and not line[0].isspace()):
545            if line == 'Features:':
546                self._append_line = self._append_features_line
547            else:
548                self._start_section()
549                self._append_line = self._append_various_line
550                self._append_various_line(line)
551            return
552
553        self._append_freeform(line)
554
555    def _append_features_line(self, line):
556        name = line.split(' ', 1)[0]
557
558        if name.startswith('@') and name.endswith(':'):
559            # If line is "@arg:   first line of description", find
560            # the index of 'f', which is the indent we expect for any
561            # following lines.  We then remove the leading "@arg:"
562            # from line and replace it with spaces so that 'f' has the
563            # same index as it did in the original line and can be
564            # handled the same way we will handle following lines.
565            indent = must_match(r'@\S*:\s*', line).end()
566            line = line[indent:]
567            if not line:
568                # Line was just the "@arg:" header; following lines
569                # are not indented
570                indent = 0
571            else:
572                line = ' ' * indent + line
573            self._start_features_section(name[1:-1], indent)
574        elif self._is_section_tag(name):
575            self._append_line = self._append_various_line
576            self._append_various_line(line)
577            return
578        elif (self._section.text.endswith('\n\n')
579              and line and not line[0].isspace()):
580            self._start_section()
581            self._append_line = self._append_various_line
582            self._append_various_line(line)
583            return
584
585        self._append_freeform(line)
586
587    def _append_various_line(self, line):
588        """
589        Process a line of documentation text in an additional section.
590
591        A symbol line is an error.
592
593        A section tag begins an additional section.  Start that
594        section and append the line to it.
595
596        Else, append the line to the current section.
597        """
598        name = line.split(' ', 1)[0]
599
600        if name.startswith('@') and name.endswith(':'):
601            raise QAPIParseError(self._parser,
602                                 "'%s' can't follow '%s' section"
603                                 % (name, self.sections[0].name))
604        if self._is_section_tag(name):
605            # If line is "Section:   first line of description", find
606            # the index of 'f', which is the indent we expect for any
607            # following lines.  We then remove the leading "Section:"
608            # from line and replace it with spaces so that 'f' has the
609            # same index as it did in the original line and can be
610            # handled the same way we will handle following lines.
611            indent = must_match(r'\S*:\s*', line).end()
612            line = line[indent:]
613            if not line:
614                # Line was just the "Section:" header; following lines
615                # are not indented
616                indent = 0
617            else:
618                line = ' ' * indent + line
619            self._start_section(name[:-1], indent)
620
621        self._append_freeform(line)
622
623    def _start_symbol_section(self, symbols_dict, name, indent):
624        # FIXME invalid names other than the empty string aren't flagged
625        if not name:
626            raise QAPIParseError(self._parser, "invalid parameter name")
627        if name in symbols_dict:
628            raise QAPIParseError(self._parser,
629                                 "'%s' parameter name duplicated" % name)
630        assert not self.sections
631        self._end_section()
632        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
633        symbols_dict[name] = self._section
634
635    def _start_args_section(self, name, indent):
636        self._start_symbol_section(self.args, name, indent)
637
638    def _start_features_section(self, name, indent):
639        self._start_symbol_section(self.features, name, indent)
640
641    def _start_section(self, name=None, indent=0):
642        if name in ('Returns', 'Since') and self.has_section(name):
643            raise QAPIParseError(self._parser,
644                                 "duplicated '%s' section" % name)
645        self._end_section()
646        self._section = QAPIDoc.Section(self._parser, name, indent)
647        self.sections.append(self._section)
648
649    def _end_section(self):
650        if self._section:
651            text = self._section.text = self._section.text.strip()
652            if self._section.name and (not text or text.isspace()):
653                raise QAPIParseError(
654                    self._parser,
655                    "empty doc section '%s'" % self._section.name)
656            self._section = None
657
658    def _append_freeform(self, line):
659        match = re.match(r'(@\S+:)', line)
660        if match:
661            raise QAPIParseError(self._parser,
662                                 "'%s' not allowed in free-form documentation"
663                                 % match.group(1))
664        self._section.append(line)
665
666    def connect_member(self, member):
667        if member.name not in self.args:
668            # Undocumented TODO outlaw
669            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
670                                                        member.name)
671        self.args[member.name].connect(member)
672
673    def connect_feature(self, feature):
674        if feature.name not in self.features:
675            raise QAPISemError(feature.info,
676                               "feature '%s' lacks documentation"
677                               % feature.name)
678        self.features[feature.name].connect(feature)
679
680    def check_expr(self, expr):
681        if self.has_section('Returns') and 'command' not in expr:
682            raise QAPISemError(self.info,
683                               "'Returns:' is only valid for commands")
684
685    def check(self):
686
687        def check_args_section(args, info, what):
688            bogus = [name for name, section in args.items()
689                     if not section.member]
690            if bogus:
691                raise QAPISemError(
692                    self.info,
693                    "documented member%s '%s' %s not exist"
694                    % ("s" if len(bogus) > 1 else "",
695                       "', '".join(bogus),
696                       "do" if len(bogus) > 1 else "does"))
697
698        check_args_section(self.args, self.info, 'members')
699        check_args_section(self.features, self.info, 'features')
700