xref: /openbmc/qemu/scripts/qapi/parser.py (revision 03386200b90c68953e217baedd3716cdee9ed169)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20from typing import List
21
22from .common import must_match
23from .error import QAPISemError, QAPISourceError
24from .source import QAPISourceInfo
25
26
27class QAPIParseError(QAPISourceError):
28    """Error class for all QAPI schema parsing errors."""
29    def __init__(self, parser, msg):
30        col = 1
31        for ch in parser.src[parser.line_pos:parser.pos]:
32            if ch == '\t':
33                col = (col + 7) % 8 + 1
34            else:
35                col += 1
36        super().__init__(parser.info, msg, col)
37
38
39class QAPISchemaParser:
40
41    def __init__(self, fname, previously_included=None, incl_info=None):
42        self._fname = fname
43        self._included = previously_included or set()
44        self._included.add(os.path.abspath(self._fname))
45        self.src = ''
46
47        # Lexer state (see `accept` for details):
48        self.info = QAPISourceInfo(self._fname, incl_info)
49        self.tok = None
50        self.pos = 0
51        self.cursor = 0
52        self.val = None
53        self.line_pos = 0
54
55        # Parser output:
56        self.exprs = []
57        self.docs = []
58
59        # Showtime!
60        self._parse()
61
62    def _parse(self):
63        cur_doc = None
64
65        # May raise OSError; allow the caller to handle it.
66        with open(self._fname, 'r', encoding='utf-8') as fp:
67            self.src = fp.read()
68        if self.src == '' or self.src[-1] != '\n':
69            self.src += '\n'
70
71        # Prime the lexer:
72        self.accept()
73
74        # Parse until done:
75        while self.tok is not None:
76            info = self.info
77            if self.tok == '#':
78                self.reject_expr_doc(cur_doc)
79                for cur_doc in self.get_doc(info):
80                    self.docs.append(cur_doc)
81                continue
82
83            expr = self.get_expr()
84            if not isinstance(expr, dict):
85                raise QAPISemError(
86                    info, "top-level expression must be an object")
87
88            if 'include' in expr:
89                self.reject_expr_doc(cur_doc)
90                if len(expr) != 1:
91                    raise QAPISemError(info, "invalid 'include' directive")
92                include = expr['include']
93                if not isinstance(include, str):
94                    raise QAPISemError(info,
95                                       "value of 'include' must be a string")
96                incl_fname = os.path.join(os.path.dirname(self._fname),
97                                          include)
98                self.exprs.append({'expr': {'include': incl_fname},
99                                   'info': info})
100                exprs_include = self._include(include, info, incl_fname,
101                                              self._included)
102                if exprs_include:
103                    self.exprs.extend(exprs_include.exprs)
104                    self.docs.extend(exprs_include.docs)
105            elif "pragma" in expr:
106                self.reject_expr_doc(cur_doc)
107                if len(expr) != 1:
108                    raise QAPISemError(info, "invalid 'pragma' directive")
109                pragma = expr['pragma']
110                if not isinstance(pragma, dict):
111                    raise QAPISemError(
112                        info, "value of 'pragma' must be an object")
113                for name, value in pragma.items():
114                    self._pragma(name, value, info)
115            else:
116                expr_elem = {'expr': expr,
117                             'info': info}
118                if cur_doc:
119                    if not cur_doc.symbol:
120                        raise QAPISemError(
121                            cur_doc.info, "definition documentation required")
122                    expr_elem['doc'] = cur_doc
123                self.exprs.append(expr_elem)
124            cur_doc = None
125        self.reject_expr_doc(cur_doc)
126
127    @staticmethod
128    def reject_expr_doc(doc):
129        if doc and doc.symbol:
130            raise QAPISemError(
131                doc.info,
132                "documentation for '%s' is not followed by the definition"
133                % doc.symbol)
134
135    @staticmethod
136    def _include(include, info, incl_fname, previously_included):
137        incl_abs_fname = os.path.abspath(incl_fname)
138        # catch inclusion cycle
139        inf = info
140        while inf:
141            if incl_abs_fname == os.path.abspath(inf.fname):
142                raise QAPISemError(info, "inclusion loop for %s" % include)
143            inf = inf.parent
144
145        # skip multiple include of the same file
146        if incl_abs_fname in previously_included:
147            return None
148
149        try:
150            return QAPISchemaParser(incl_fname, previously_included, info)
151        except OSError as err:
152            raise QAPISemError(
153                info,
154                f"can't read include file '{incl_fname}': {err.strerror}"
155            ) from err
156
157    @staticmethod
158    def _pragma(name, value, info):
159
160        def check_list_str(name, value) -> List[str]:
161            if (not isinstance(value, list) or
162                    any([not isinstance(elt, str) for elt in value])):
163                raise QAPISemError(
164                    info,
165                    "pragma %s must be a list of strings" % name)
166            return value
167
168        pragma = info.pragma
169
170        if name == 'doc-required':
171            if not isinstance(value, bool):
172                raise QAPISemError(info,
173                                   "pragma 'doc-required' must be boolean")
174            pragma.doc_required = value
175        elif name == 'command-name-exceptions':
176            pragma.command_name_exceptions = check_list_str(name, value)
177        elif name == 'command-returns-exceptions':
178            pragma.command_returns_exceptions = check_list_str(name, value)
179        elif name == 'member-name-exceptions':
180            pragma.member_name_exceptions = check_list_str(name, value)
181        else:
182            raise QAPISemError(info, "unknown pragma '%s'" % name)
183
184    def accept(self, skip_comment=True):
185        while True:
186            self.tok = self.src[self.cursor]
187            self.pos = self.cursor
188            self.cursor += 1
189            self.val = None
190
191            if self.tok == '#':
192                if self.src[self.cursor] == '#':
193                    # Start of doc comment
194                    skip_comment = False
195                self.cursor = self.src.find('\n', self.cursor)
196                if not skip_comment:
197                    self.val = self.src[self.pos:self.cursor]
198                    return
199            elif self.tok in '{}:,[]':
200                return
201            elif self.tok == "'":
202                # Note: we accept only printable ASCII
203                string = ''
204                esc = False
205                while True:
206                    ch = self.src[self.cursor]
207                    self.cursor += 1
208                    if ch == '\n':
209                        raise QAPIParseError(self, "missing terminating \"'\"")
210                    if esc:
211                        # Note: we recognize only \\ because we have
212                        # no use for funny characters in strings
213                        if ch != '\\':
214                            raise QAPIParseError(self,
215                                                 "unknown escape \\%s" % ch)
216                        esc = False
217                    elif ch == '\\':
218                        esc = True
219                        continue
220                    elif ch == "'":
221                        self.val = string
222                        return
223                    if ord(ch) < 32 or ord(ch) >= 127:
224                        raise QAPIParseError(
225                            self, "funny character in string")
226                    string += ch
227            elif self.src.startswith('true', self.pos):
228                self.val = True
229                self.cursor += 3
230                return
231            elif self.src.startswith('false', self.pos):
232                self.val = False
233                self.cursor += 4
234                return
235            elif self.tok == '\n':
236                if self.cursor == len(self.src):
237                    self.tok = None
238                    return
239                self.info = self.info.next_line()
240                self.line_pos = self.cursor
241            elif not self.tok.isspace():
242                # Show up to next structural, whitespace or quote
243                # character
244                match = must_match('[^[\\]{}:,\\s\'"]+',
245                                   self.src[self.cursor-1:])
246                raise QAPIParseError(self, "stray '%s'" % match.group(0))
247
248    def get_members(self):
249        expr = OrderedDict()
250        if self.tok == '}':
251            self.accept()
252            return expr
253        if self.tok != "'":
254            raise QAPIParseError(self, "expected string or '}'")
255        while True:
256            key = self.val
257            assert isinstance(key, str)  # Guaranteed by tok == "'"
258
259            self.accept()
260            if self.tok != ':':
261                raise QAPIParseError(self, "expected ':'")
262            self.accept()
263            if key in expr:
264                raise QAPIParseError(self, "duplicate key '%s'" % key)
265            expr[key] = self.get_expr()
266            if self.tok == '}':
267                self.accept()
268                return expr
269            if self.tok != ',':
270                raise QAPIParseError(self, "expected ',' or '}'")
271            self.accept()
272            if self.tok != "'":
273                raise QAPIParseError(self, "expected string")
274
275    def get_values(self):
276        expr = []
277        if self.tok == ']':
278            self.accept()
279            return expr
280        if self.tok not in tuple("{['tf"):
281            raise QAPIParseError(
282                self, "expected '{', '[', ']', string, or boolean")
283        while True:
284            expr.append(self.get_expr())
285            if self.tok == ']':
286                self.accept()
287                return expr
288            if self.tok != ',':
289                raise QAPIParseError(self, "expected ',' or ']'")
290            self.accept()
291
292    def get_expr(self):
293        if self.tok == '{':
294            self.accept()
295            expr = self.get_members()
296        elif self.tok == '[':
297            self.accept()
298            expr = self.get_values()
299        elif self.tok in tuple("'tf"):
300            assert isinstance(self.val, (str, bool))
301            expr = self.val
302            self.accept()
303        else:
304            raise QAPIParseError(
305                self, "expected '{', '[', string, or boolean")
306        return expr
307
308    def get_doc(self, info):
309        if self.val != '##':
310            raise QAPIParseError(
311                self, "junk after '##' at start of documentation comment")
312
313        docs = []
314        cur_doc = QAPIDoc(self, info)
315        self.accept(False)
316        while self.tok == '#':
317            assert isinstance(self.val, str)
318            if self.val.startswith('##'):
319                # End of doc comment
320                if self.val != '##':
321                    raise QAPIParseError(
322                        self,
323                        "junk after '##' at end of documentation comment")
324                cur_doc.end_comment()
325                docs.append(cur_doc)
326                self.accept()
327                return docs
328            if self.val.startswith('# ='):
329                if cur_doc.symbol:
330                    raise QAPIParseError(
331                        self,
332                        "unexpected '=' markup in definition documentation")
333                if cur_doc.body.text:
334                    cur_doc.end_comment()
335                    docs.append(cur_doc)
336                    cur_doc = QAPIDoc(self, info)
337            cur_doc.append(self.val)
338            self.accept(False)
339
340        raise QAPIParseError(self, "documentation comment must end with '##'")
341
342
343class QAPIDoc:
344    """
345    A documentation comment block, either definition or free-form
346
347    Definition documentation blocks consist of
348
349    * a body section: one line naming the definition, followed by an
350      overview (any number of lines)
351
352    * argument sections: a description of each argument (for commands
353      and events) or member (for structs, unions and alternates)
354
355    * features sections: a description of each feature flag
356
357    * additional (non-argument) sections, possibly tagged
358
359    Free-form documentation blocks consist only of a body section.
360    """
361
362    class Section:
363        def __init__(self, parser, name=None, indent=0):
364            # parser, for error messages about indentation
365            self._parser = parser
366            # optional section name (argument/member or section name)
367            self.name = name
368            self.text = ''
369            # the expected indent level of the text of this section
370            self._indent = indent
371
372        def append(self, line):
373            # Strip leading spaces corresponding to the expected indent level
374            # Blank lines are always OK.
375            if line:
376                indent = must_match(r'\s*', line).end()
377                if indent < self._indent:
378                    raise QAPIParseError(
379                        self._parser,
380                        "unexpected de-indent (expected at least %d spaces)" %
381                        self._indent)
382                line = line[self._indent:]
383
384            self.text += line.rstrip() + '\n'
385
386    class ArgSection(Section):
387        def __init__(self, parser, name, indent=0):
388            super().__init__(parser, name, indent)
389            self.member = None
390
391        def connect(self, member):
392            self.member = member
393
394    def __init__(self, parser, info):
395        # self._parser is used to report errors with QAPIParseError.  The
396        # resulting error position depends on the state of the parser.
397        # It happens to be the beginning of the comment.  More or less
398        # servicable, but action at a distance.
399        self._parser = parser
400        self.info = info
401        self.symbol = None
402        self.body = QAPIDoc.Section(parser)
403        # dict mapping parameter name to ArgSection
404        self.args = OrderedDict()
405        self.features = OrderedDict()
406        # a list of Section
407        self.sections = []
408        # the current section
409        self._section = self.body
410        self._append_line = self._append_body_line
411
412    def has_section(self, name):
413        """Return True if we have a section with this name."""
414        for i in self.sections:
415            if i.name == name:
416                return True
417        return False
418
419    def append(self, line):
420        """
421        Parse a comment line and add it to the documentation.
422
423        The way that the line is dealt with depends on which part of
424        the documentation we're parsing right now:
425        * The body section: ._append_line is ._append_body_line
426        * An argument section: ._append_line is ._append_args_line
427        * A features section: ._append_line is ._append_features_line
428        * An additional section: ._append_line is ._append_various_line
429        """
430        line = line[1:]
431        if not line:
432            self._append_freeform(line)
433            return
434
435        if line[0] != ' ':
436            raise QAPIParseError(self._parser, "missing space after #")
437        line = line[1:]
438        self._append_line(line)
439
440    def end_comment(self):
441        self._end_section()
442
443    @staticmethod
444    def _is_section_tag(name):
445        return name in ('Returns:', 'Since:',
446                        # those are often singular or plural
447                        'Note:', 'Notes:',
448                        'Example:', 'Examples:',
449                        'TODO:')
450
451    def _append_body_line(self, line):
452        """
453        Process a line of documentation text in the body section.
454
455        If this a symbol line and it is the section's first line, this
456        is a definition documentation block for that symbol.
457
458        If it's a definition documentation block, another symbol line
459        begins the argument section for the argument named by it, and
460        a section tag begins an additional section.  Start that
461        section and append the line to it.
462
463        Else, append the line to the current section.
464        """
465        name = line.split(' ', 1)[0]
466        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
467        # recognized, and get silently treated as ordinary text
468        if not self.symbol and not self.body.text and line.startswith('@'):
469            if not line.endswith(':'):
470                raise QAPIParseError(self._parser, "line should end with ':'")
471            self.symbol = line[1:-1]
472            # FIXME invalid names other than the empty string aren't flagged
473            if not self.symbol:
474                raise QAPIParseError(self._parser, "invalid name")
475        elif self.symbol:
476            # This is a definition documentation block
477            if name.startswith('@') and name.endswith(':'):
478                self._append_line = self._append_args_line
479                self._append_args_line(line)
480            elif line == 'Features:':
481                self._append_line = self._append_features_line
482            elif self._is_section_tag(name):
483                self._append_line = self._append_various_line
484                self._append_various_line(line)
485            else:
486                self._append_freeform(line)
487        else:
488            # This is a free-form documentation block
489            self._append_freeform(line)
490
491    def _append_args_line(self, line):
492        """
493        Process a line of documentation text in an argument section.
494
495        A symbol line begins the next argument section, a section tag
496        section or a non-indented line after a blank line begins an
497        additional section.  Start that section and append the line to
498        it.
499
500        Else, append the line to the current section.
501
502        """
503        name = line.split(' ', 1)[0]
504
505        if name.startswith('@') and name.endswith(':'):
506            # If line is "@arg:   first line of description", find
507            # the index of 'f', which is the indent we expect for any
508            # following lines.  We then remove the leading "@arg:"
509            # from line and replace it with spaces so that 'f' has the
510            # same index as it did in the original line and can be
511            # handled the same way we will handle following lines.
512            indent = must_match(r'@\S*:\s*', line).end()
513            line = line[indent:]
514            if not line:
515                # Line was just the "@arg:" header; following lines
516                # are not indented
517                indent = 0
518            else:
519                line = ' ' * indent + line
520            self._start_args_section(name[1:-1], indent)
521        elif self._is_section_tag(name):
522            self._append_line = self._append_various_line
523            self._append_various_line(line)
524            return
525        elif (self._section.text.endswith('\n\n')
526              and line and not line[0].isspace()):
527            if line == 'Features:':
528                self._append_line = self._append_features_line
529            else:
530                self._start_section()
531                self._append_line = self._append_various_line
532                self._append_various_line(line)
533            return
534
535        self._append_freeform(line)
536
537    def _append_features_line(self, line):
538        name = line.split(' ', 1)[0]
539
540        if name.startswith('@') and name.endswith(':'):
541            # If line is "@arg:   first line of description", find
542            # the index of 'f', which is the indent we expect for any
543            # following lines.  We then remove the leading "@arg:"
544            # from line and replace it with spaces so that 'f' has the
545            # same index as it did in the original line and can be
546            # handled the same way we will handle following lines.
547            indent = must_match(r'@\S*:\s*', line).end()
548            line = line[indent:]
549            if not line:
550                # Line was just the "@arg:" header; following lines
551                # are not indented
552                indent = 0
553            else:
554                line = ' ' * indent + line
555            self._start_features_section(name[1:-1], indent)
556        elif self._is_section_tag(name):
557            self._append_line = self._append_various_line
558            self._append_various_line(line)
559            return
560        elif (self._section.text.endswith('\n\n')
561              and line and not line[0].isspace()):
562            self._start_section()
563            self._append_line = self._append_various_line
564            self._append_various_line(line)
565            return
566
567        self._append_freeform(line)
568
569    def _append_various_line(self, line):
570        """
571        Process a line of documentation text in an additional section.
572
573        A symbol line is an error.
574
575        A section tag begins an additional section.  Start that
576        section and append the line to it.
577
578        Else, append the line to the current section.
579        """
580        name = line.split(' ', 1)[0]
581
582        if name.startswith('@') and name.endswith(':'):
583            raise QAPIParseError(self._parser,
584                                 "'%s' can't follow '%s' section"
585                                 % (name, self.sections[0].name))
586        if self._is_section_tag(name):
587            # If line is "Section:   first line of description", find
588            # the index of 'f', which is the indent we expect for any
589            # following lines.  We then remove the leading "Section:"
590            # from line and replace it with spaces so that 'f' has the
591            # same index as it did in the original line and can be
592            # handled the same way we will handle following lines.
593            indent = must_match(r'\S*:\s*', line).end()
594            line = line[indent:]
595            if not line:
596                # Line was just the "Section:" header; following lines
597                # are not indented
598                indent = 0
599            else:
600                line = ' ' * indent + line
601            self._start_section(name[:-1], indent)
602
603        self._append_freeform(line)
604
605    def _start_symbol_section(self, symbols_dict, name, indent):
606        # FIXME invalid names other than the empty string aren't flagged
607        if not name:
608            raise QAPIParseError(self._parser, "invalid parameter name")
609        if name in symbols_dict:
610            raise QAPIParseError(self._parser,
611                                 "'%s' parameter name duplicated" % name)
612        assert not self.sections
613        self._end_section()
614        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
615        symbols_dict[name] = self._section
616
617    def _start_args_section(self, name, indent):
618        self._start_symbol_section(self.args, name, indent)
619
620    def _start_features_section(self, name, indent):
621        self._start_symbol_section(self.features, name, indent)
622
623    def _start_section(self, name=None, indent=0):
624        if name in ('Returns', 'Since') and self.has_section(name):
625            raise QAPIParseError(self._parser,
626                                 "duplicated '%s' section" % name)
627        self._end_section()
628        self._section = QAPIDoc.Section(self._parser, name, indent)
629        self.sections.append(self._section)
630
631    def _end_section(self):
632        if self._section:
633            text = self._section.text = self._section.text.strip()
634            if self._section.name and (not text or text.isspace()):
635                raise QAPIParseError(
636                    self._parser,
637                    "empty doc section '%s'" % self._section.name)
638            self._section = None
639
640    def _append_freeform(self, line):
641        match = re.match(r'(@\S+:)', line)
642        if match:
643            raise QAPIParseError(self._parser,
644                                 "'%s' not allowed in free-form documentation"
645                                 % match.group(1))
646        self._section.append(line)
647
648    def connect_member(self, member):
649        if member.name not in self.args:
650            # Undocumented TODO outlaw
651            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
652                                                        member.name)
653        self.args[member.name].connect(member)
654
655    def connect_feature(self, feature):
656        if feature.name not in self.features:
657            raise QAPISemError(feature.info,
658                               "feature '%s' lacks documentation"
659                               % feature.name)
660        self.features[feature.name].connect(feature)
661
662    def check_expr(self, expr):
663        if self.has_section('Returns') and 'command' not in expr:
664            raise QAPISemError(self.info,
665                               "'Returns:' is only valid for commands")
666
667    def check(self):
668
669        def check_args_section(args, info, what):
670            bogus = [name for name, section in args.items()
671                     if not section.member]
672            if bogus:
673                raise QAPISemError(
674                    self.info,
675                    "documented member%s '%s' %s not exist"
676                    % ("s" if len(bogus) > 1 else "",
677                       "', '".join(bogus),
678                       "do" if len(bogus) > 1 else "does"))
679
680        check_args_section(self.args, self.info, 'members')
681        check_args_section(self.features, self.info, 'features')
682