xref: /openbmc/qemu/scripts/qapi/parser.py (revision 234dce2c2d93cfff7433c0fd244ef207c7eace2b)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20
21from .error import QAPISemError, QAPISourceError
22from .source import QAPISourceInfo
23
24
25class QAPIParseError(QAPISourceError):
26    """Error class for all QAPI schema parsing errors."""
27    def __init__(self, parser, msg):
28        col = 1
29        for ch in parser.src[parser.line_pos:parser.pos]:
30            if ch == '\t':
31                col = (col + 7) % 8 + 1
32            else:
33                col += 1
34        super().__init__(parser.info, msg, col)
35
36
37class QAPISchemaParser:
38
39    def __init__(self, fname, previously_included=None, incl_info=None):
40        self._fname = fname
41        self._included = previously_included or set()
42        self._included.add(os.path.abspath(self._fname))
43        self.src = ''
44
45        # Lexer state (see `accept` for details):
46        self.info = QAPISourceInfo(self._fname, incl_info)
47        self.tok = None
48        self.pos = 0
49        self.cursor = 0
50        self.val = None
51        self.line_pos = 0
52
53        # Parser output:
54        self.exprs = []
55        self.docs = []
56
57        # Showtime!
58        self._parse()
59
60    def _parse(self):
61        cur_doc = None
62
63        # May raise OSError; allow the caller to handle it.
64        with open(self._fname, 'r', encoding='utf-8') as fp:
65            self.src = fp.read()
66        if self.src == '' or self.src[-1] != '\n':
67            self.src += '\n'
68
69        # Prime the lexer:
70        self.accept()
71
72        # Parse until done:
73        while self.tok is not None:
74            info = self.info
75            if self.tok == '#':
76                self.reject_expr_doc(cur_doc)
77                for cur_doc in self.get_doc(info):
78                    self.docs.append(cur_doc)
79                continue
80
81            expr = self.get_expr()
82            if not isinstance(expr, dict):
83                raise QAPISemError(
84                    info, "top-level expression must be an object")
85
86            if 'include' in expr:
87                self.reject_expr_doc(cur_doc)
88                if len(expr) != 1:
89                    raise QAPISemError(info, "invalid 'include' directive")
90                include = expr['include']
91                if not isinstance(include, str):
92                    raise QAPISemError(info,
93                                       "value of 'include' must be a string")
94                incl_fname = os.path.join(os.path.dirname(self._fname),
95                                          include)
96                self.exprs.append({'expr': {'include': incl_fname},
97                                   'info': info})
98                exprs_include = self._include(include, info, incl_fname,
99                                              self._included)
100                if exprs_include:
101                    self.exprs.extend(exprs_include.exprs)
102                    self.docs.extend(exprs_include.docs)
103            elif "pragma" in expr:
104                self.reject_expr_doc(cur_doc)
105                if len(expr) != 1:
106                    raise QAPISemError(info, "invalid 'pragma' directive")
107                pragma = expr['pragma']
108                if not isinstance(pragma, dict):
109                    raise QAPISemError(
110                        info, "value of 'pragma' must be an object")
111                for name, value in pragma.items():
112                    self._pragma(name, value, info)
113            else:
114                expr_elem = {'expr': expr,
115                             'info': info}
116                if cur_doc:
117                    if not cur_doc.symbol:
118                        raise QAPISemError(
119                            cur_doc.info, "definition documentation required")
120                    expr_elem['doc'] = cur_doc
121                self.exprs.append(expr_elem)
122            cur_doc = None
123        self.reject_expr_doc(cur_doc)
124
125    @staticmethod
126    def reject_expr_doc(doc):
127        if doc and doc.symbol:
128            raise QAPISemError(
129                doc.info,
130                "documentation for '%s' is not followed by the definition"
131                % doc.symbol)
132
133    def _include(self, include, info, incl_fname, previously_included):
134        incl_abs_fname = os.path.abspath(incl_fname)
135        # catch inclusion cycle
136        inf = info
137        while inf:
138            if incl_abs_fname == os.path.abspath(inf.fname):
139                raise QAPISemError(info, "inclusion loop for %s" % include)
140            inf = inf.parent
141
142        # skip multiple include of the same file
143        if incl_abs_fname in previously_included:
144            return None
145
146        try:
147            return QAPISchemaParser(incl_fname, previously_included, info)
148        except OSError as err:
149            raise QAPISemError(
150                info,
151                f"can't read include file '{incl_fname}': {err.strerror}"
152            ) from err
153
154    def _check_pragma_list_of_str(self, name, value, info):
155        if (not isinstance(value, list)
156                or any([not isinstance(elt, str) for elt in value])):
157            raise QAPISemError(
158                info,
159                "pragma %s must be a list of strings" % name)
160
161    def _pragma(self, name, value, info):
162        if name == 'doc-required':
163            if not isinstance(value, bool):
164                raise QAPISemError(info,
165                                   "pragma 'doc-required' must be boolean")
166            info.pragma.doc_required = value
167        elif name == 'command-name-exceptions':
168            self._check_pragma_list_of_str(name, value, info)
169            info.pragma.command_name_exceptions = value
170        elif name == 'command-returns-exceptions':
171            self._check_pragma_list_of_str(name, value, info)
172            info.pragma.command_returns_exceptions = value
173        elif name == 'member-name-exceptions':
174            self._check_pragma_list_of_str(name, value, info)
175            info.pragma.member_name_exceptions = value
176        else:
177            raise QAPISemError(info, "unknown pragma '%s'" % name)
178
179    def accept(self, skip_comment=True):
180        while True:
181            self.tok = self.src[self.cursor]
182            self.pos = self.cursor
183            self.cursor += 1
184            self.val = None
185
186            if self.tok == '#':
187                if self.src[self.cursor] == '#':
188                    # Start of doc comment
189                    skip_comment = False
190                self.cursor = self.src.find('\n', self.cursor)
191                if not skip_comment:
192                    self.val = self.src[self.pos:self.cursor]
193                    return
194            elif self.tok in '{}:,[]':
195                return
196            elif self.tok == "'":
197                # Note: we accept only printable ASCII
198                string = ''
199                esc = False
200                while True:
201                    ch = self.src[self.cursor]
202                    self.cursor += 1
203                    if ch == '\n':
204                        raise QAPIParseError(self, "missing terminating \"'\"")
205                    if esc:
206                        # Note: we recognize only \\ because we have
207                        # no use for funny characters in strings
208                        if ch != '\\':
209                            raise QAPIParseError(self,
210                                                 "unknown escape \\%s" % ch)
211                        esc = False
212                    elif ch == '\\':
213                        esc = True
214                        continue
215                    elif ch == "'":
216                        self.val = string
217                        return
218                    if ord(ch) < 32 or ord(ch) >= 127:
219                        raise QAPIParseError(
220                            self, "funny character in string")
221                    string += ch
222            elif self.src.startswith('true', self.pos):
223                self.val = True
224                self.cursor += 3
225                return
226            elif self.src.startswith('false', self.pos):
227                self.val = False
228                self.cursor += 4
229                return
230            elif self.tok == '\n':
231                if self.cursor == len(self.src):
232                    self.tok = None
233                    return
234                self.info = self.info.next_line()
235                self.line_pos = self.cursor
236            elif not self.tok.isspace():
237                # Show up to next structural, whitespace or quote
238                # character
239                match = re.match('[^[\\]{}:,\\s\'"]+',
240                                 self.src[self.cursor-1:])
241                raise QAPIParseError(self, "stray '%s'" % match.group(0))
242
243    def get_members(self):
244        expr = OrderedDict()
245        if self.tok == '}':
246            self.accept()
247            return expr
248        if self.tok != "'":
249            raise QAPIParseError(self, "expected string or '}'")
250        while True:
251            key = self.val
252            assert isinstance(key, str)  # Guaranteed by tok == "'"
253
254            self.accept()
255            if self.tok != ':':
256                raise QAPIParseError(self, "expected ':'")
257            self.accept()
258            if key in expr:
259                raise QAPIParseError(self, "duplicate key '%s'" % key)
260            expr[key] = self.get_expr()
261            if self.tok == '}':
262                self.accept()
263                return expr
264            if self.tok != ',':
265                raise QAPIParseError(self, "expected ',' or '}'")
266            self.accept()
267            if self.tok != "'":
268                raise QAPIParseError(self, "expected string")
269
270    def get_values(self):
271        expr = []
272        if self.tok == ']':
273            self.accept()
274            return expr
275        if self.tok not in "{['tf":
276            raise QAPIParseError(
277                self, "expected '{', '[', ']', string, or boolean")
278        while True:
279            expr.append(self.get_expr())
280            if self.tok == ']':
281                self.accept()
282                return expr
283            if self.tok != ',':
284                raise QAPIParseError(self, "expected ',' or ']'")
285            self.accept()
286
287    def get_expr(self):
288        if self.tok == '{':
289            self.accept()
290            expr = self.get_members()
291        elif self.tok == '[':
292            self.accept()
293            expr = self.get_values()
294        elif self.tok in "'tf":
295            expr = self.val
296            self.accept()
297        else:
298            raise QAPIParseError(
299                self, "expected '{', '[', string, or boolean")
300        return expr
301
302    def get_doc(self, info):
303        if self.val != '##':
304            raise QAPIParseError(
305                self, "junk after '##' at start of documentation comment")
306
307        docs = []
308        cur_doc = QAPIDoc(self, info)
309        self.accept(False)
310        while self.tok == '#':
311            assert isinstance(self.val, str)
312            if self.val.startswith('##'):
313                # End of doc comment
314                if self.val != '##':
315                    raise QAPIParseError(
316                        self,
317                        "junk after '##' at end of documentation comment")
318                cur_doc.end_comment()
319                docs.append(cur_doc)
320                self.accept()
321                return docs
322            if self.val.startswith('# ='):
323                if cur_doc.symbol:
324                    raise QAPIParseError(
325                        self,
326                        "unexpected '=' markup in definition documentation")
327                if cur_doc.body.text:
328                    cur_doc.end_comment()
329                    docs.append(cur_doc)
330                    cur_doc = QAPIDoc(self, info)
331            cur_doc.append(self.val)
332            self.accept(False)
333
334        raise QAPIParseError(self, "documentation comment must end with '##'")
335
336
337class QAPIDoc:
338    """
339    A documentation comment block, either definition or free-form
340
341    Definition documentation blocks consist of
342
343    * a body section: one line naming the definition, followed by an
344      overview (any number of lines)
345
346    * argument sections: a description of each argument (for commands
347      and events) or member (for structs, unions and alternates)
348
349    * features sections: a description of each feature flag
350
351    * additional (non-argument) sections, possibly tagged
352
353    Free-form documentation blocks consist only of a body section.
354    """
355
356    class Section:
357        def __init__(self, parser, name=None, indent=0):
358            # parser, for error messages about indentation
359            self._parser = parser
360            # optional section name (argument/member or section name)
361            self.name = name
362            self.text = ''
363            # the expected indent level of the text of this section
364            self._indent = indent
365
366        def append(self, line):
367            # Strip leading spaces corresponding to the expected indent level
368            # Blank lines are always OK.
369            if line:
370                indent = re.match(r'\s*', line).end()
371                if indent < self._indent:
372                    raise QAPIParseError(
373                        self._parser,
374                        "unexpected de-indent (expected at least %d spaces)" %
375                        self._indent)
376                line = line[self._indent:]
377
378            self.text += line.rstrip() + '\n'
379
380    class ArgSection(Section):
381        def __init__(self, parser, name, indent=0):
382            super().__init__(parser, name, indent)
383            self.member = None
384
385        def connect(self, member):
386            self.member = member
387
388    def __init__(self, parser, info):
389        # self._parser is used to report errors with QAPIParseError.  The
390        # resulting error position depends on the state of the parser.
391        # It happens to be the beginning of the comment.  More or less
392        # servicable, but action at a distance.
393        self._parser = parser
394        self.info = info
395        self.symbol = None
396        self.body = QAPIDoc.Section(parser)
397        # dict mapping parameter name to ArgSection
398        self.args = OrderedDict()
399        self.features = OrderedDict()
400        # a list of Section
401        self.sections = []
402        # the current section
403        self._section = self.body
404        self._append_line = self._append_body_line
405
406    def has_section(self, name):
407        """Return True if we have a section with this name."""
408        for i in self.sections:
409            if i.name == name:
410                return True
411        return False
412
413    def append(self, line):
414        """
415        Parse a comment line and add it to the documentation.
416
417        The way that the line is dealt with depends on which part of
418        the documentation we're parsing right now:
419        * The body section: ._append_line is ._append_body_line
420        * An argument section: ._append_line is ._append_args_line
421        * A features section: ._append_line is ._append_features_line
422        * An additional section: ._append_line is ._append_various_line
423        """
424        line = line[1:]
425        if not line:
426            self._append_freeform(line)
427            return
428
429        if line[0] != ' ':
430            raise QAPIParseError(self._parser, "missing space after #")
431        line = line[1:]
432        self._append_line(line)
433
434    def end_comment(self):
435        self._end_section()
436
437    @staticmethod
438    def _is_section_tag(name):
439        return name in ('Returns:', 'Since:',
440                        # those are often singular or plural
441                        'Note:', 'Notes:',
442                        'Example:', 'Examples:',
443                        'TODO:')
444
445    def _append_body_line(self, line):
446        """
447        Process a line of documentation text in the body section.
448
449        If this a symbol line and it is the section's first line, this
450        is a definition documentation block for that symbol.
451
452        If it's a definition documentation block, another symbol line
453        begins the argument section for the argument named by it, and
454        a section tag begins an additional section.  Start that
455        section and append the line to it.
456
457        Else, append the line to the current section.
458        """
459        name = line.split(' ', 1)[0]
460        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
461        # recognized, and get silently treated as ordinary text
462        if not self.symbol and not self.body.text and line.startswith('@'):
463            if not line.endswith(':'):
464                raise QAPIParseError(self._parser, "line should end with ':'")
465            self.symbol = line[1:-1]
466            # FIXME invalid names other than the empty string aren't flagged
467            if not self.symbol:
468                raise QAPIParseError(self._parser, "invalid name")
469        elif self.symbol:
470            # This is a definition documentation block
471            if name.startswith('@') and name.endswith(':'):
472                self._append_line = self._append_args_line
473                self._append_args_line(line)
474            elif line == 'Features:':
475                self._append_line = self._append_features_line
476            elif self._is_section_tag(name):
477                self._append_line = self._append_various_line
478                self._append_various_line(line)
479            else:
480                self._append_freeform(line)
481        else:
482            # This is a free-form documentation block
483            self._append_freeform(line)
484
485    def _append_args_line(self, line):
486        """
487        Process a line of documentation text in an argument section.
488
489        A symbol line begins the next argument section, a section tag
490        section or a non-indented line after a blank line begins an
491        additional section.  Start that section and append the line to
492        it.
493
494        Else, append the line to the current section.
495
496        """
497        name = line.split(' ', 1)[0]
498
499        if name.startswith('@') and name.endswith(':'):
500            # If line is "@arg:   first line of description", find
501            # the index of 'f', which is the indent we expect for any
502            # following lines.  We then remove the leading "@arg:"
503            # from line and replace it with spaces so that 'f' has the
504            # same index as it did in the original line and can be
505            # handled the same way we will handle following lines.
506            indent = re.match(r'@\S*:\s*', line).end()
507            line = line[indent:]
508            if not line:
509                # Line was just the "@arg:" header; following lines
510                # are not indented
511                indent = 0
512            else:
513                line = ' ' * indent + line
514            self._start_args_section(name[1:-1], indent)
515        elif self._is_section_tag(name):
516            self._append_line = self._append_various_line
517            self._append_various_line(line)
518            return
519        elif (self._section.text.endswith('\n\n')
520              and line and not line[0].isspace()):
521            if line == 'Features:':
522                self._append_line = self._append_features_line
523            else:
524                self._start_section()
525                self._append_line = self._append_various_line
526                self._append_various_line(line)
527            return
528
529        self._append_freeform(line)
530
531    def _append_features_line(self, line):
532        name = line.split(' ', 1)[0]
533
534        if name.startswith('@') and name.endswith(':'):
535            # If line is "@arg:   first line of description", find
536            # the index of 'f', which is the indent we expect for any
537            # following lines.  We then remove the leading "@arg:"
538            # from line and replace it with spaces so that 'f' has the
539            # same index as it did in the original line and can be
540            # handled the same way we will handle following lines.
541            indent = re.match(r'@\S*:\s*', line).end()
542            line = line[indent:]
543            if not line:
544                # Line was just the "@arg:" header; following lines
545                # are not indented
546                indent = 0
547            else:
548                line = ' ' * indent + line
549            self._start_features_section(name[1:-1], indent)
550        elif self._is_section_tag(name):
551            self._append_line = self._append_various_line
552            self._append_various_line(line)
553            return
554        elif (self._section.text.endswith('\n\n')
555              and line and not line[0].isspace()):
556            self._start_section()
557            self._append_line = self._append_various_line
558            self._append_various_line(line)
559            return
560
561        self._append_freeform(line)
562
563    def _append_various_line(self, line):
564        """
565        Process a line of documentation text in an additional section.
566
567        A symbol line is an error.
568
569        A section tag begins an additional section.  Start that
570        section and append the line to it.
571
572        Else, append the line to the current section.
573        """
574        name = line.split(' ', 1)[0]
575
576        if name.startswith('@') and name.endswith(':'):
577            raise QAPIParseError(self._parser,
578                                 "'%s' can't follow '%s' section"
579                                 % (name, self.sections[0].name))
580        if self._is_section_tag(name):
581            # If line is "Section:   first line of description", find
582            # the index of 'f', which is the indent we expect for any
583            # following lines.  We then remove the leading "Section:"
584            # from line and replace it with spaces so that 'f' has the
585            # same index as it did in the original line and can be
586            # handled the same way we will handle following lines.
587            indent = re.match(r'\S*:\s*', line).end()
588            line = line[indent:]
589            if not line:
590                # Line was just the "Section:" header; following lines
591                # are not indented
592                indent = 0
593            else:
594                line = ' ' * indent + line
595            self._start_section(name[:-1], indent)
596
597        self._append_freeform(line)
598
599    def _start_symbol_section(self, symbols_dict, name, indent):
600        # FIXME invalid names other than the empty string aren't flagged
601        if not name:
602            raise QAPIParseError(self._parser, "invalid parameter name")
603        if name in symbols_dict:
604            raise QAPIParseError(self._parser,
605                                 "'%s' parameter name duplicated" % name)
606        assert not self.sections
607        self._end_section()
608        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
609        symbols_dict[name] = self._section
610
611    def _start_args_section(self, name, indent):
612        self._start_symbol_section(self.args, name, indent)
613
614    def _start_features_section(self, name, indent):
615        self._start_symbol_section(self.features, name, indent)
616
617    def _start_section(self, name=None, indent=0):
618        if name in ('Returns', 'Since') and self.has_section(name):
619            raise QAPIParseError(self._parser,
620                                 "duplicated '%s' section" % name)
621        self._end_section()
622        self._section = QAPIDoc.Section(self._parser, name, indent)
623        self.sections.append(self._section)
624
625    def _end_section(self):
626        if self._section:
627            text = self._section.text = self._section.text.strip()
628            if self._section.name and (not text or text.isspace()):
629                raise QAPIParseError(
630                    self._parser,
631                    "empty doc section '%s'" % self._section.name)
632            self._section = None
633
634    def _append_freeform(self, line):
635        match = re.match(r'(@\S+:)', line)
636        if match:
637            raise QAPIParseError(self._parser,
638                                 "'%s' not allowed in free-form documentation"
639                                 % match.group(1))
640        self._section.append(line)
641
642    def connect_member(self, member):
643        if member.name not in self.args:
644            # Undocumented TODO outlaw
645            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
646                                                        member.name)
647        self.args[member.name].connect(member)
648
649    def connect_feature(self, feature):
650        if feature.name not in self.features:
651            raise QAPISemError(feature.info,
652                               "feature '%s' lacks documentation"
653                               % feature.name)
654        self.features[feature.name].connect(feature)
655
656    def check_expr(self, expr):
657        if self.has_section('Returns') and 'command' not in expr:
658            raise QAPISemError(self.info,
659                               "'Returns:' is only valid for commands")
660
661    def check(self):
662
663        def check_args_section(args, info, what):
664            bogus = [name for name, section in args.items()
665                     if not section.member]
666            if bogus:
667                raise QAPISemError(
668                    self.info,
669                    "documented member%s '%s' %s not exist"
670                    % ("s" if len(bogus) > 1 else "",
671                       "', '".join(bogus),
672                       "do" if len(bogus) > 1 else "does"))
673
674        check_args_section(self.args, self.info, 'members')
675        check_args_section(self.features, self.info, 'features')
676