xref: /openbmc/qemu/scripts/qapi/parser.py (revision 43b1be65f07c57ef2a4a6012e263677cf812c7e1)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20
21from .error import QAPISemError, QAPISourceError
22from .source import QAPISourceInfo
23
24
25class QAPIParseError(QAPISourceError):
26    """Error class for all QAPI schema parsing errors."""
27    def __init__(self, parser, msg):
28        col = 1
29        for ch in parser.src[parser.line_pos:parser.pos]:
30            if ch == '\t':
31                col = (col + 7) % 8 + 1
32            else:
33                col += 1
34        super().__init__(parser.info, msg, col)
35
36
37class QAPISchemaParser:
38
39    def __init__(self, fname, previously_included=None, incl_info=None):
40        self._fname = fname
41        self._included = previously_included or set()
42        self._included.add(os.path.abspath(self._fname))
43        self.src = ''
44
45        # Lexer state (see `accept` for details):
46        self.info = QAPISourceInfo(self._fname, incl_info)
47        self.tok = None
48        self.pos = 0
49        self.cursor = 0
50        self.val = None
51        self.line_pos = 0
52
53        # Parser output:
54        self.exprs = []
55        self.docs = []
56
57        # Showtime!
58        self._parse()
59
60    def _parse(self):
61        cur_doc = None
62
63        # May raise OSError; allow the caller to handle it.
64        with open(self._fname, 'r', encoding='utf-8') as fp:
65            self.src = fp.read()
66        if self.src == '' or self.src[-1] != '\n':
67            self.src += '\n'
68
69        # Prime the lexer:
70        self.accept()
71
72        # Parse until done:
73        while self.tok is not None:
74            info = self.info
75            if self.tok == '#':
76                self.reject_expr_doc(cur_doc)
77                for cur_doc in self.get_doc(info):
78                    self.docs.append(cur_doc)
79                continue
80
81            expr = self.get_expr()
82            if not isinstance(expr, dict):
83                raise QAPISemError(
84                    info, "top-level expression must be an object")
85
86            if 'include' in expr:
87                self.reject_expr_doc(cur_doc)
88                if len(expr) != 1:
89                    raise QAPISemError(info, "invalid 'include' directive")
90                include = expr['include']
91                if not isinstance(include, str):
92                    raise QAPISemError(info,
93                                       "value of 'include' must be a string")
94                incl_fname = os.path.join(os.path.dirname(self._fname),
95                                          include)
96                self.exprs.append({'expr': {'include': incl_fname},
97                                   'info': info})
98                exprs_include = self._include(include, info, incl_fname,
99                                              self._included)
100                if exprs_include:
101                    self.exprs.extend(exprs_include.exprs)
102                    self.docs.extend(exprs_include.docs)
103            elif "pragma" in expr:
104                self.reject_expr_doc(cur_doc)
105                if len(expr) != 1:
106                    raise QAPISemError(info, "invalid 'pragma' directive")
107                pragma = expr['pragma']
108                if not isinstance(pragma, dict):
109                    raise QAPISemError(
110                        info, "value of 'pragma' must be an object")
111                for name, value in pragma.items():
112                    self._pragma(name, value, info)
113            else:
114                expr_elem = {'expr': expr,
115                             'info': info}
116                if cur_doc:
117                    if not cur_doc.symbol:
118                        raise QAPISemError(
119                            cur_doc.info, "definition documentation required")
120                    expr_elem['doc'] = cur_doc
121                self.exprs.append(expr_elem)
122            cur_doc = None
123        self.reject_expr_doc(cur_doc)
124
125    @staticmethod
126    def reject_expr_doc(doc):
127        if doc and doc.symbol:
128            raise QAPISemError(
129                doc.info,
130                "documentation for '%s' is not followed by the definition"
131                % doc.symbol)
132
133    @staticmethod
134    def _include(include, info, incl_fname, previously_included):
135        incl_abs_fname = os.path.abspath(incl_fname)
136        # catch inclusion cycle
137        inf = info
138        while inf:
139            if incl_abs_fname == os.path.abspath(inf.fname):
140                raise QAPISemError(info, "inclusion loop for %s" % include)
141            inf = inf.parent
142
143        # skip multiple include of the same file
144        if incl_abs_fname in previously_included:
145            return None
146
147        try:
148            return QAPISchemaParser(incl_fname, previously_included, info)
149        except OSError as err:
150            raise QAPISemError(
151                info,
152                f"can't read include file '{incl_fname}': {err.strerror}"
153            ) from err
154
155    @staticmethod
156    def _check_pragma_list_of_str(name, value, info):
157        if (not isinstance(value, list)
158                or any([not isinstance(elt, str) for elt in value])):
159            raise QAPISemError(
160                info,
161                "pragma %s must be a list of strings" % name)
162
163    def _pragma(self, name, value, info):
164        if name == 'doc-required':
165            if not isinstance(value, bool):
166                raise QAPISemError(info,
167                                   "pragma 'doc-required' must be boolean")
168            info.pragma.doc_required = value
169        elif name == 'command-name-exceptions':
170            self._check_pragma_list_of_str(name, value, info)
171            info.pragma.command_name_exceptions = value
172        elif name == 'command-returns-exceptions':
173            self._check_pragma_list_of_str(name, value, info)
174            info.pragma.command_returns_exceptions = value
175        elif name == 'member-name-exceptions':
176            self._check_pragma_list_of_str(name, value, info)
177            info.pragma.member_name_exceptions = value
178        else:
179            raise QAPISemError(info, "unknown pragma '%s'" % name)
180
181    def accept(self, skip_comment=True):
182        while True:
183            self.tok = self.src[self.cursor]
184            self.pos = self.cursor
185            self.cursor += 1
186            self.val = None
187
188            if self.tok == '#':
189                if self.src[self.cursor] == '#':
190                    # Start of doc comment
191                    skip_comment = False
192                self.cursor = self.src.find('\n', self.cursor)
193                if not skip_comment:
194                    self.val = self.src[self.pos:self.cursor]
195                    return
196            elif self.tok in '{}:,[]':
197                return
198            elif self.tok == "'":
199                # Note: we accept only printable ASCII
200                string = ''
201                esc = False
202                while True:
203                    ch = self.src[self.cursor]
204                    self.cursor += 1
205                    if ch == '\n':
206                        raise QAPIParseError(self, "missing terminating \"'\"")
207                    if esc:
208                        # Note: we recognize only \\ because we have
209                        # no use for funny characters in strings
210                        if ch != '\\':
211                            raise QAPIParseError(self,
212                                                 "unknown escape \\%s" % ch)
213                        esc = False
214                    elif ch == '\\':
215                        esc = True
216                        continue
217                    elif ch == "'":
218                        self.val = string
219                        return
220                    if ord(ch) < 32 or ord(ch) >= 127:
221                        raise QAPIParseError(
222                            self, "funny character in string")
223                    string += ch
224            elif self.src.startswith('true', self.pos):
225                self.val = True
226                self.cursor += 3
227                return
228            elif self.src.startswith('false', self.pos):
229                self.val = False
230                self.cursor += 4
231                return
232            elif self.tok == '\n':
233                if self.cursor == len(self.src):
234                    self.tok = None
235                    return
236                self.info = self.info.next_line()
237                self.line_pos = self.cursor
238            elif not self.tok.isspace():
239                # Show up to next structural, whitespace or quote
240                # character
241                match = re.match('[^[\\]{}:,\\s\'"]+',
242                                 self.src[self.cursor-1:])
243                raise QAPIParseError(self, "stray '%s'" % match.group(0))
244
245    def get_members(self):
246        expr = OrderedDict()
247        if self.tok == '}':
248            self.accept()
249            return expr
250        if self.tok != "'":
251            raise QAPIParseError(self, "expected string or '}'")
252        while True:
253            key = self.val
254            assert isinstance(key, str)  # Guaranteed by tok == "'"
255
256            self.accept()
257            if self.tok != ':':
258                raise QAPIParseError(self, "expected ':'")
259            self.accept()
260            if key in expr:
261                raise QAPIParseError(self, "duplicate key '%s'" % key)
262            expr[key] = self.get_expr()
263            if self.tok == '}':
264                self.accept()
265                return expr
266            if self.tok != ',':
267                raise QAPIParseError(self, "expected ',' or '}'")
268            self.accept()
269            if self.tok != "'":
270                raise QAPIParseError(self, "expected string")
271
272    def get_values(self):
273        expr = []
274        if self.tok == ']':
275            self.accept()
276            return expr
277        if self.tok not in "{['tf":
278            raise QAPIParseError(
279                self, "expected '{', '[', ']', string, or boolean")
280        while True:
281            expr.append(self.get_expr())
282            if self.tok == ']':
283                self.accept()
284                return expr
285            if self.tok != ',':
286                raise QAPIParseError(self, "expected ',' or ']'")
287            self.accept()
288
289    def get_expr(self):
290        if self.tok == '{':
291            self.accept()
292            expr = self.get_members()
293        elif self.tok == '[':
294            self.accept()
295            expr = self.get_values()
296        elif self.tok in "'tf":
297            expr = self.val
298            self.accept()
299        else:
300            raise QAPIParseError(
301                self, "expected '{', '[', string, or boolean")
302        return expr
303
304    def get_doc(self, info):
305        if self.val != '##':
306            raise QAPIParseError(
307                self, "junk after '##' at start of documentation comment")
308
309        docs = []
310        cur_doc = QAPIDoc(self, info)
311        self.accept(False)
312        while self.tok == '#':
313            assert isinstance(self.val, str)
314            if self.val.startswith('##'):
315                # End of doc comment
316                if self.val != '##':
317                    raise QAPIParseError(
318                        self,
319                        "junk after '##' at end of documentation comment")
320                cur_doc.end_comment()
321                docs.append(cur_doc)
322                self.accept()
323                return docs
324            if self.val.startswith('# ='):
325                if cur_doc.symbol:
326                    raise QAPIParseError(
327                        self,
328                        "unexpected '=' markup in definition documentation")
329                if cur_doc.body.text:
330                    cur_doc.end_comment()
331                    docs.append(cur_doc)
332                    cur_doc = QAPIDoc(self, info)
333            cur_doc.append(self.val)
334            self.accept(False)
335
336        raise QAPIParseError(self, "documentation comment must end with '##'")
337
338
339class QAPIDoc:
340    """
341    A documentation comment block, either definition or free-form
342
343    Definition documentation blocks consist of
344
345    * a body section: one line naming the definition, followed by an
346      overview (any number of lines)
347
348    * argument sections: a description of each argument (for commands
349      and events) or member (for structs, unions and alternates)
350
351    * features sections: a description of each feature flag
352
353    * additional (non-argument) sections, possibly tagged
354
355    Free-form documentation blocks consist only of a body section.
356    """
357
358    class Section:
359        def __init__(self, parser, name=None, indent=0):
360            # parser, for error messages about indentation
361            self._parser = parser
362            # optional section name (argument/member or section name)
363            self.name = name
364            self.text = ''
365            # the expected indent level of the text of this section
366            self._indent = indent
367
368        def append(self, line):
369            # Strip leading spaces corresponding to the expected indent level
370            # Blank lines are always OK.
371            if line:
372                indent = re.match(r'\s*', line).end()
373                if indent < self._indent:
374                    raise QAPIParseError(
375                        self._parser,
376                        "unexpected de-indent (expected at least %d spaces)" %
377                        self._indent)
378                line = line[self._indent:]
379
380            self.text += line.rstrip() + '\n'
381
382    class ArgSection(Section):
383        def __init__(self, parser, name, indent=0):
384            super().__init__(parser, name, indent)
385            self.member = None
386
387        def connect(self, member):
388            self.member = member
389
390    def __init__(self, parser, info):
391        # self._parser is used to report errors with QAPIParseError.  The
392        # resulting error position depends on the state of the parser.
393        # It happens to be the beginning of the comment.  More or less
394        # servicable, but action at a distance.
395        self._parser = parser
396        self.info = info
397        self.symbol = None
398        self.body = QAPIDoc.Section(parser)
399        # dict mapping parameter name to ArgSection
400        self.args = OrderedDict()
401        self.features = OrderedDict()
402        # a list of Section
403        self.sections = []
404        # the current section
405        self._section = self.body
406        self._append_line = self._append_body_line
407
408    def has_section(self, name):
409        """Return True if we have a section with this name."""
410        for i in self.sections:
411            if i.name == name:
412                return True
413        return False
414
415    def append(self, line):
416        """
417        Parse a comment line and add it to the documentation.
418
419        The way that the line is dealt with depends on which part of
420        the documentation we're parsing right now:
421        * The body section: ._append_line is ._append_body_line
422        * An argument section: ._append_line is ._append_args_line
423        * A features section: ._append_line is ._append_features_line
424        * An additional section: ._append_line is ._append_various_line
425        """
426        line = line[1:]
427        if not line:
428            self._append_freeform(line)
429            return
430
431        if line[0] != ' ':
432            raise QAPIParseError(self._parser, "missing space after #")
433        line = line[1:]
434        self._append_line(line)
435
436    def end_comment(self):
437        self._end_section()
438
439    @staticmethod
440    def _is_section_tag(name):
441        return name in ('Returns:', 'Since:',
442                        # those are often singular or plural
443                        'Note:', 'Notes:',
444                        'Example:', 'Examples:',
445                        'TODO:')
446
447    def _append_body_line(self, line):
448        """
449        Process a line of documentation text in the body section.
450
451        If this a symbol line and it is the section's first line, this
452        is a definition documentation block for that symbol.
453
454        If it's a definition documentation block, another symbol line
455        begins the argument section for the argument named by it, and
456        a section tag begins an additional section.  Start that
457        section and append the line to it.
458
459        Else, append the line to the current section.
460        """
461        name = line.split(' ', 1)[0]
462        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
463        # recognized, and get silently treated as ordinary text
464        if not self.symbol and not self.body.text and line.startswith('@'):
465            if not line.endswith(':'):
466                raise QAPIParseError(self._parser, "line should end with ':'")
467            self.symbol = line[1:-1]
468            # FIXME invalid names other than the empty string aren't flagged
469            if not self.symbol:
470                raise QAPIParseError(self._parser, "invalid name")
471        elif self.symbol:
472            # This is a definition documentation block
473            if name.startswith('@') and name.endswith(':'):
474                self._append_line = self._append_args_line
475                self._append_args_line(line)
476            elif line == 'Features:':
477                self._append_line = self._append_features_line
478            elif self._is_section_tag(name):
479                self._append_line = self._append_various_line
480                self._append_various_line(line)
481            else:
482                self._append_freeform(line)
483        else:
484            # This is a free-form documentation block
485            self._append_freeform(line)
486
487    def _append_args_line(self, line):
488        """
489        Process a line of documentation text in an argument section.
490
491        A symbol line begins the next argument section, a section tag
492        section or a non-indented line after a blank line begins an
493        additional section.  Start that section and append the line to
494        it.
495
496        Else, append the line to the current section.
497
498        """
499        name = line.split(' ', 1)[0]
500
501        if name.startswith('@') and name.endswith(':'):
502            # If line is "@arg:   first line of description", find
503            # the index of 'f', which is the indent we expect for any
504            # following lines.  We then remove the leading "@arg:"
505            # from line and replace it with spaces so that 'f' has the
506            # same index as it did in the original line and can be
507            # handled the same way we will handle following lines.
508            indent = re.match(r'@\S*:\s*', line).end()
509            line = line[indent:]
510            if not line:
511                # Line was just the "@arg:" header; following lines
512                # are not indented
513                indent = 0
514            else:
515                line = ' ' * indent + line
516            self._start_args_section(name[1:-1], indent)
517        elif self._is_section_tag(name):
518            self._append_line = self._append_various_line
519            self._append_various_line(line)
520            return
521        elif (self._section.text.endswith('\n\n')
522              and line and not line[0].isspace()):
523            if line == 'Features:':
524                self._append_line = self._append_features_line
525            else:
526                self._start_section()
527                self._append_line = self._append_various_line
528                self._append_various_line(line)
529            return
530
531        self._append_freeform(line)
532
533    def _append_features_line(self, line):
534        name = line.split(' ', 1)[0]
535
536        if name.startswith('@') and name.endswith(':'):
537            # If line is "@arg:   first line of description", find
538            # the index of 'f', which is the indent we expect for any
539            # following lines.  We then remove the leading "@arg:"
540            # from line and replace it with spaces so that 'f' has the
541            # same index as it did in the original line and can be
542            # handled the same way we will handle following lines.
543            indent = re.match(r'@\S*:\s*', line).end()
544            line = line[indent:]
545            if not line:
546                # Line was just the "@arg:" header; following lines
547                # are not indented
548                indent = 0
549            else:
550                line = ' ' * indent + line
551            self._start_features_section(name[1:-1], indent)
552        elif self._is_section_tag(name):
553            self._append_line = self._append_various_line
554            self._append_various_line(line)
555            return
556        elif (self._section.text.endswith('\n\n')
557              and line and not line[0].isspace()):
558            self._start_section()
559            self._append_line = self._append_various_line
560            self._append_various_line(line)
561            return
562
563        self._append_freeform(line)
564
565    def _append_various_line(self, line):
566        """
567        Process a line of documentation text in an additional section.
568
569        A symbol line is an error.
570
571        A section tag begins an additional section.  Start that
572        section and append the line to it.
573
574        Else, append the line to the current section.
575        """
576        name = line.split(' ', 1)[0]
577
578        if name.startswith('@') and name.endswith(':'):
579            raise QAPIParseError(self._parser,
580                                 "'%s' can't follow '%s' section"
581                                 % (name, self.sections[0].name))
582        if self._is_section_tag(name):
583            # If line is "Section:   first line of description", find
584            # the index of 'f', which is the indent we expect for any
585            # following lines.  We then remove the leading "Section:"
586            # from line and replace it with spaces so that 'f' has the
587            # same index as it did in the original line and can be
588            # handled the same way we will handle following lines.
589            indent = re.match(r'\S*:\s*', line).end()
590            line = line[indent:]
591            if not line:
592                # Line was just the "Section:" header; following lines
593                # are not indented
594                indent = 0
595            else:
596                line = ' ' * indent + line
597            self._start_section(name[:-1], indent)
598
599        self._append_freeform(line)
600
601    def _start_symbol_section(self, symbols_dict, name, indent):
602        # FIXME invalid names other than the empty string aren't flagged
603        if not name:
604            raise QAPIParseError(self._parser, "invalid parameter name")
605        if name in symbols_dict:
606            raise QAPIParseError(self._parser,
607                                 "'%s' parameter name duplicated" % name)
608        assert not self.sections
609        self._end_section()
610        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
611        symbols_dict[name] = self._section
612
613    def _start_args_section(self, name, indent):
614        self._start_symbol_section(self.args, name, indent)
615
616    def _start_features_section(self, name, indent):
617        self._start_symbol_section(self.features, name, indent)
618
619    def _start_section(self, name=None, indent=0):
620        if name in ('Returns', 'Since') and self.has_section(name):
621            raise QAPIParseError(self._parser,
622                                 "duplicated '%s' section" % name)
623        self._end_section()
624        self._section = QAPIDoc.Section(self._parser, name, indent)
625        self.sections.append(self._section)
626
627    def _end_section(self):
628        if self._section:
629            text = self._section.text = self._section.text.strip()
630            if self._section.name and (not text or text.isspace()):
631                raise QAPIParseError(
632                    self._parser,
633                    "empty doc section '%s'" % self._section.name)
634            self._section = None
635
636    def _append_freeform(self, line):
637        match = re.match(r'(@\S+:)', line)
638        if match:
639            raise QAPIParseError(self._parser,
640                                 "'%s' not allowed in free-form documentation"
641                                 % match.group(1))
642        self._section.append(line)
643
644    def connect_member(self, member):
645        if member.name not in self.args:
646            # Undocumented TODO outlaw
647            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
648                                                        member.name)
649        self.args[member.name].connect(member)
650
651    def connect_feature(self, feature):
652        if feature.name not in self.features:
653            raise QAPISemError(feature.info,
654                               "feature '%s' lacks documentation"
655                               % feature.name)
656        self.features[feature.name].connect(feature)
657
658    def check_expr(self, expr):
659        if self.has_section('Returns') and 'command' not in expr:
660            raise QAPISemError(self.info,
661                               "'Returns:' is only valid for commands")
662
663    def check(self):
664
665        def check_args_section(args, info, what):
666            bogus = [name for name, section in args.items()
667                     if not section.member]
668            if bogus:
669                raise QAPISemError(
670                    self.info,
671                    "documented member%s '%s' %s not exist"
672                    % ("s" if len(bogus) > 1 else "",
673                       "', '".join(bogus),
674                       "do" if len(bogus) > 1 else "does"))
675
676        check_args_section(self.args, self.info, 'members')
677        check_args_section(self.features, self.info, 'features')
678