xref: /openbmc/qemu/scripts/qapi/parser.py (revision 7c610ce6a9950a49148fc3d37ed353958ca8d776)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20
21from .error import QAPISemError, QAPISourceError
22from .source import QAPISourceInfo
23
24
25class QAPIParseError(QAPISourceError):
26    """Error class for all QAPI schema parsing errors."""
27    def __init__(self, parser, msg):
28        col = 1
29        for ch in parser.src[parser.line_pos:parser.pos]:
30            if ch == '\t':
31                col = (col + 7) % 8 + 1
32            else:
33                col += 1
34        super().__init__(parser.info, msg, col)
35
36
37class QAPISchemaParser:
38
39    def __init__(self, fname, previously_included=None, incl_info=None):
40        self._fname = fname
41        self._included = previously_included or set()
42        self._included.add(os.path.abspath(self._fname))
43        self.src = ''
44
45        # Lexer state (see `accept` for details):
46        self.info = QAPISourceInfo(self._fname, incl_info)
47        self.tok = None
48        self.pos = 0
49        self.cursor = 0
50        self.val = None
51        self.line_pos = 0
52
53        # Parser output:
54        self.exprs = []
55        self.docs = []
56
57        # Showtime!
58        self._parse()
59
60    def _parse(self):
61        cur_doc = None
62
63        # May raise OSError; allow the caller to handle it.
64        with open(self._fname, 'r', encoding='utf-8') as fp:
65            self.src = fp.read()
66        if self.src == '' or self.src[-1] != '\n':
67            self.src += '\n'
68
69        # Prime the lexer:
70        self.accept()
71
72        # Parse until done:
73        while self.tok is not None:
74            info = self.info
75            if self.tok == '#':
76                self.reject_expr_doc(cur_doc)
77                for cur_doc in self.get_doc(info):
78                    self.docs.append(cur_doc)
79                continue
80
81            expr = self.get_expr(False)
82            if 'include' in expr:
83                self.reject_expr_doc(cur_doc)
84                if len(expr) != 1:
85                    raise QAPISemError(info, "invalid 'include' directive")
86                include = expr['include']
87                if not isinstance(include, str):
88                    raise QAPISemError(info,
89                                       "value of 'include' must be a string")
90                incl_fname = os.path.join(os.path.dirname(self._fname),
91                                          include)
92                self.exprs.append({'expr': {'include': incl_fname},
93                                   'info': info})
94                exprs_include = self._include(include, info, incl_fname,
95                                              self._included)
96                if exprs_include:
97                    self.exprs.extend(exprs_include.exprs)
98                    self.docs.extend(exprs_include.docs)
99            elif "pragma" in expr:
100                self.reject_expr_doc(cur_doc)
101                if len(expr) != 1:
102                    raise QAPISemError(info, "invalid 'pragma' directive")
103                pragma = expr['pragma']
104                if not isinstance(pragma, dict):
105                    raise QAPISemError(
106                        info, "value of 'pragma' must be an object")
107                for name, value in pragma.items():
108                    self._pragma(name, value, info)
109            else:
110                expr_elem = {'expr': expr,
111                             'info': info}
112                if cur_doc:
113                    if not cur_doc.symbol:
114                        raise QAPISemError(
115                            cur_doc.info, "definition documentation required")
116                    expr_elem['doc'] = cur_doc
117                self.exprs.append(expr_elem)
118            cur_doc = None
119        self.reject_expr_doc(cur_doc)
120
121    @staticmethod
122    def reject_expr_doc(doc):
123        if doc and doc.symbol:
124            raise QAPISemError(
125                doc.info,
126                "documentation for '%s' is not followed by the definition"
127                % doc.symbol)
128
129    def _include(self, include, info, incl_fname, previously_included):
130        incl_abs_fname = os.path.abspath(incl_fname)
131        # catch inclusion cycle
132        inf = info
133        while inf:
134            if incl_abs_fname == os.path.abspath(inf.fname):
135                raise QAPISemError(info, "inclusion loop for %s" % include)
136            inf = inf.parent
137
138        # skip multiple include of the same file
139        if incl_abs_fname in previously_included:
140            return None
141
142        try:
143            return QAPISchemaParser(incl_fname, previously_included, info)
144        except OSError as err:
145            raise QAPISemError(
146                info,
147                f"can't read include file '{incl_fname}': {err.strerror}"
148            ) from err
149
150    def _check_pragma_list_of_str(self, name, value, info):
151        if (not isinstance(value, list)
152                or any([not isinstance(elt, str) for elt in value])):
153            raise QAPISemError(
154                info,
155                "pragma %s must be a list of strings" % name)
156
157    def _pragma(self, name, value, info):
158        if name == 'doc-required':
159            if not isinstance(value, bool):
160                raise QAPISemError(info,
161                                   "pragma 'doc-required' must be boolean")
162            info.pragma.doc_required = value
163        elif name == 'command-name-exceptions':
164            self._check_pragma_list_of_str(name, value, info)
165            info.pragma.command_name_exceptions = value
166        elif name == 'command-returns-exceptions':
167            self._check_pragma_list_of_str(name, value, info)
168            info.pragma.command_returns_exceptions = value
169        elif name == 'member-name-exceptions':
170            self._check_pragma_list_of_str(name, value, info)
171            info.pragma.member_name_exceptions = value
172        else:
173            raise QAPISemError(info, "unknown pragma '%s'" % name)
174
175    def accept(self, skip_comment=True):
176        while True:
177            self.tok = self.src[self.cursor]
178            self.pos = self.cursor
179            self.cursor += 1
180            self.val = None
181
182            if self.tok == '#':
183                if self.src[self.cursor] == '#':
184                    # Start of doc comment
185                    skip_comment = False
186                self.cursor = self.src.find('\n', self.cursor)
187                if not skip_comment:
188                    self.val = self.src[self.pos:self.cursor]
189                    return
190            elif self.tok in '{}:,[]':
191                return
192            elif self.tok == "'":
193                # Note: we accept only printable ASCII
194                string = ''
195                esc = False
196                while True:
197                    ch = self.src[self.cursor]
198                    self.cursor += 1
199                    if ch == '\n':
200                        raise QAPIParseError(self, "missing terminating \"'\"")
201                    if esc:
202                        # Note: we recognize only \\ because we have
203                        # no use for funny characters in strings
204                        if ch != '\\':
205                            raise QAPIParseError(self,
206                                                 "unknown escape \\%s" % ch)
207                        esc = False
208                    elif ch == '\\':
209                        esc = True
210                        continue
211                    elif ch == "'":
212                        self.val = string
213                        return
214                    if ord(ch) < 32 or ord(ch) >= 127:
215                        raise QAPIParseError(
216                            self, "funny character in string")
217                    string += ch
218            elif self.src.startswith('true', self.pos):
219                self.val = True
220                self.cursor += 3
221                return
222            elif self.src.startswith('false', self.pos):
223                self.val = False
224                self.cursor += 4
225                return
226            elif self.tok == '\n':
227                if self.cursor == len(self.src):
228                    self.tok = None
229                    return
230                self.info = self.info.next_line()
231                self.line_pos = self.cursor
232            elif not self.tok.isspace():
233                # Show up to next structural, whitespace or quote
234                # character
235                match = re.match('[^[\\]{}:,\\s\'"]+',
236                                 self.src[self.cursor-1:])
237                raise QAPIParseError(self, "stray '%s'" % match.group(0))
238
239    def get_members(self):
240        expr = OrderedDict()
241        if self.tok == '}':
242            self.accept()
243            return expr
244        if self.tok != "'":
245            raise QAPIParseError(self, "expected string or '}'")
246        while True:
247            key = self.val
248            self.accept()
249            if self.tok != ':':
250                raise QAPIParseError(self, "expected ':'")
251            self.accept()
252            if key in expr:
253                raise QAPIParseError(self, "duplicate key '%s'" % key)
254            expr[key] = self.get_expr(True)
255            if self.tok == '}':
256                self.accept()
257                return expr
258            if self.tok != ',':
259                raise QAPIParseError(self, "expected ',' or '}'")
260            self.accept()
261            if self.tok != "'":
262                raise QAPIParseError(self, "expected string")
263
264    def get_values(self):
265        expr = []
266        if self.tok == ']':
267            self.accept()
268            return expr
269        if self.tok not in "{['tf":
270            raise QAPIParseError(
271                self, "expected '{', '[', ']', string, or boolean")
272        while True:
273            expr.append(self.get_expr(True))
274            if self.tok == ']':
275                self.accept()
276                return expr
277            if self.tok != ',':
278                raise QAPIParseError(self, "expected ',' or ']'")
279            self.accept()
280
281    def get_expr(self, nested):
282        if self.tok != '{' and not nested:
283            raise QAPIParseError(self, "expected '{'")
284        if self.tok == '{':
285            self.accept()
286            expr = self.get_members()
287        elif self.tok == '[':
288            self.accept()
289            expr = self.get_values()
290        elif self.tok in "'tf":
291            expr = self.val
292            self.accept()
293        else:
294            raise QAPIParseError(
295                self, "expected '{', '[', string, or boolean")
296        return expr
297
298    def get_doc(self, info):
299        if self.val != '##':
300            raise QAPIParseError(
301                self, "junk after '##' at start of documentation comment")
302
303        docs = []
304        cur_doc = QAPIDoc(self, info)
305        self.accept(False)
306        while self.tok == '#':
307            assert isinstance(self.val, str)
308            if self.val.startswith('##'):
309                # End of doc comment
310                if self.val != '##':
311                    raise QAPIParseError(
312                        self,
313                        "junk after '##' at end of documentation comment")
314                cur_doc.end_comment()
315                docs.append(cur_doc)
316                self.accept()
317                return docs
318            if self.val.startswith('# ='):
319                if cur_doc.symbol:
320                    raise QAPIParseError(
321                        self,
322                        "unexpected '=' markup in definition documentation")
323                if cur_doc.body.text:
324                    cur_doc.end_comment()
325                    docs.append(cur_doc)
326                    cur_doc = QAPIDoc(self, info)
327            cur_doc.append(self.val)
328            self.accept(False)
329
330        raise QAPIParseError(self, "documentation comment must end with '##'")
331
332
333class QAPIDoc:
334    """
335    A documentation comment block, either definition or free-form
336
337    Definition documentation blocks consist of
338
339    * a body section: one line naming the definition, followed by an
340      overview (any number of lines)
341
342    * argument sections: a description of each argument (for commands
343      and events) or member (for structs, unions and alternates)
344
345    * features sections: a description of each feature flag
346
347    * additional (non-argument) sections, possibly tagged
348
349    Free-form documentation blocks consist only of a body section.
350    """
351
352    class Section:
353        def __init__(self, parser, name=None, indent=0):
354            # parser, for error messages about indentation
355            self._parser = parser
356            # optional section name (argument/member or section name)
357            self.name = name
358            self.text = ''
359            # the expected indent level of the text of this section
360            self._indent = indent
361
362        def append(self, line):
363            # Strip leading spaces corresponding to the expected indent level
364            # Blank lines are always OK.
365            if line:
366                indent = re.match(r'\s*', line).end()
367                if indent < self._indent:
368                    raise QAPIParseError(
369                        self._parser,
370                        "unexpected de-indent (expected at least %d spaces)" %
371                        self._indent)
372                line = line[self._indent:]
373
374            self.text += line.rstrip() + '\n'
375
376    class ArgSection(Section):
377        def __init__(self, parser, name, indent=0):
378            super().__init__(parser, name, indent)
379            self.member = None
380
381        def connect(self, member):
382            self.member = member
383
384    def __init__(self, parser, info):
385        # self._parser is used to report errors with QAPIParseError.  The
386        # resulting error position depends on the state of the parser.
387        # It happens to be the beginning of the comment.  More or less
388        # servicable, but action at a distance.
389        self._parser = parser
390        self.info = info
391        self.symbol = None
392        self.body = QAPIDoc.Section(parser)
393        # dict mapping parameter name to ArgSection
394        self.args = OrderedDict()
395        self.features = OrderedDict()
396        # a list of Section
397        self.sections = []
398        # the current section
399        self._section = self.body
400        self._append_line = self._append_body_line
401
402    def has_section(self, name):
403        """Return True if we have a section with this name."""
404        for i in self.sections:
405            if i.name == name:
406                return True
407        return False
408
409    def append(self, line):
410        """
411        Parse a comment line and add it to the documentation.
412
413        The way that the line is dealt with depends on which part of
414        the documentation we're parsing right now:
415        * The body section: ._append_line is ._append_body_line
416        * An argument section: ._append_line is ._append_args_line
417        * A features section: ._append_line is ._append_features_line
418        * An additional section: ._append_line is ._append_various_line
419        """
420        line = line[1:]
421        if not line:
422            self._append_freeform(line)
423            return
424
425        if line[0] != ' ':
426            raise QAPIParseError(self._parser, "missing space after #")
427        line = line[1:]
428        self._append_line(line)
429
430    def end_comment(self):
431        self._end_section()
432
433    @staticmethod
434    def _is_section_tag(name):
435        return name in ('Returns:', 'Since:',
436                        # those are often singular or plural
437                        'Note:', 'Notes:',
438                        'Example:', 'Examples:',
439                        'TODO:')
440
441    def _append_body_line(self, line):
442        """
443        Process a line of documentation text in the body section.
444
445        If this a symbol line and it is the section's first line, this
446        is a definition documentation block for that symbol.
447
448        If it's a definition documentation block, another symbol line
449        begins the argument section for the argument named by it, and
450        a section tag begins an additional section.  Start that
451        section and append the line to it.
452
453        Else, append the line to the current section.
454        """
455        name = line.split(' ', 1)[0]
456        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
457        # recognized, and get silently treated as ordinary text
458        if not self.symbol and not self.body.text and line.startswith('@'):
459            if not line.endswith(':'):
460                raise QAPIParseError(self._parser, "line should end with ':'")
461            self.symbol = line[1:-1]
462            # FIXME invalid names other than the empty string aren't flagged
463            if not self.symbol:
464                raise QAPIParseError(self._parser, "invalid name")
465        elif self.symbol:
466            # This is a definition documentation block
467            if name.startswith('@') and name.endswith(':'):
468                self._append_line = self._append_args_line
469                self._append_args_line(line)
470            elif line == 'Features:':
471                self._append_line = self._append_features_line
472            elif self._is_section_tag(name):
473                self._append_line = self._append_various_line
474                self._append_various_line(line)
475            else:
476                self._append_freeform(line)
477        else:
478            # This is a free-form documentation block
479            self._append_freeform(line)
480
481    def _append_args_line(self, line):
482        """
483        Process a line of documentation text in an argument section.
484
485        A symbol line begins the next argument section, a section tag
486        section or a non-indented line after a blank line begins an
487        additional section.  Start that section and append the line to
488        it.
489
490        Else, append the line to the current section.
491
492        """
493        name = line.split(' ', 1)[0]
494
495        if name.startswith('@') and name.endswith(':'):
496            # If line is "@arg:   first line of description", find
497            # the index of 'f', which is the indent we expect for any
498            # following lines.  We then remove the leading "@arg:"
499            # from line and replace it with spaces so that 'f' has the
500            # same index as it did in the original line and can be
501            # handled the same way we will handle following lines.
502            indent = re.match(r'@\S*:\s*', line).end()
503            line = line[indent:]
504            if not line:
505                # Line was just the "@arg:" header; following lines
506                # are not indented
507                indent = 0
508            else:
509                line = ' ' * indent + line
510            self._start_args_section(name[1:-1], indent)
511        elif self._is_section_tag(name):
512            self._append_line = self._append_various_line
513            self._append_various_line(line)
514            return
515        elif (self._section.text.endswith('\n\n')
516              and line and not line[0].isspace()):
517            if line == 'Features:':
518                self._append_line = self._append_features_line
519            else:
520                self._start_section()
521                self._append_line = self._append_various_line
522                self._append_various_line(line)
523            return
524
525        self._append_freeform(line)
526
527    def _append_features_line(self, line):
528        name = line.split(' ', 1)[0]
529
530        if name.startswith('@') and name.endswith(':'):
531            # If line is "@arg:   first line of description", find
532            # the index of 'f', which is the indent we expect for any
533            # following lines.  We then remove the leading "@arg:"
534            # from line and replace it with spaces so that 'f' has the
535            # same index as it did in the original line and can be
536            # handled the same way we will handle following lines.
537            indent = re.match(r'@\S*:\s*', line).end()
538            line = line[indent:]
539            if not line:
540                # Line was just the "@arg:" header; following lines
541                # are not indented
542                indent = 0
543            else:
544                line = ' ' * indent + line
545            self._start_features_section(name[1:-1], indent)
546        elif self._is_section_tag(name):
547            self._append_line = self._append_various_line
548            self._append_various_line(line)
549            return
550        elif (self._section.text.endswith('\n\n')
551              and line and not line[0].isspace()):
552            self._start_section()
553            self._append_line = self._append_various_line
554            self._append_various_line(line)
555            return
556
557        self._append_freeform(line)
558
559    def _append_various_line(self, line):
560        """
561        Process a line of documentation text in an additional section.
562
563        A symbol line is an error.
564
565        A section tag begins an additional section.  Start that
566        section and append the line to it.
567
568        Else, append the line to the current section.
569        """
570        name = line.split(' ', 1)[0]
571
572        if name.startswith('@') and name.endswith(':'):
573            raise QAPIParseError(self._parser,
574                                 "'%s' can't follow '%s' section"
575                                 % (name, self.sections[0].name))
576        if self._is_section_tag(name):
577            # If line is "Section:   first line of description", find
578            # the index of 'f', which is the indent we expect for any
579            # following lines.  We then remove the leading "Section:"
580            # from line and replace it with spaces so that 'f' has the
581            # same index as it did in the original line and can be
582            # handled the same way we will handle following lines.
583            indent = re.match(r'\S*:\s*', line).end()
584            line = line[indent:]
585            if not line:
586                # Line was just the "Section:" header; following lines
587                # are not indented
588                indent = 0
589            else:
590                line = ' ' * indent + line
591            self._start_section(name[:-1], indent)
592
593        self._append_freeform(line)
594
595    def _start_symbol_section(self, symbols_dict, name, indent):
596        # FIXME invalid names other than the empty string aren't flagged
597        if not name:
598            raise QAPIParseError(self._parser, "invalid parameter name")
599        if name in symbols_dict:
600            raise QAPIParseError(self._parser,
601                                 "'%s' parameter name duplicated" % name)
602        assert not self.sections
603        self._end_section()
604        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
605        symbols_dict[name] = self._section
606
607    def _start_args_section(self, name, indent):
608        self._start_symbol_section(self.args, name, indent)
609
610    def _start_features_section(self, name, indent):
611        self._start_symbol_section(self.features, name, indent)
612
613    def _start_section(self, name=None, indent=0):
614        if name in ('Returns', 'Since') and self.has_section(name):
615            raise QAPIParseError(self._parser,
616                                 "duplicated '%s' section" % name)
617        self._end_section()
618        self._section = QAPIDoc.Section(self._parser, name, indent)
619        self.sections.append(self._section)
620
621    def _end_section(self):
622        if self._section:
623            text = self._section.text = self._section.text.strip()
624            if self._section.name and (not text or text.isspace()):
625                raise QAPIParseError(
626                    self._parser,
627                    "empty doc section '%s'" % self._section.name)
628            self._section = None
629
630    def _append_freeform(self, line):
631        match = re.match(r'(@\S+:)', line)
632        if match:
633            raise QAPIParseError(self._parser,
634                                 "'%s' not allowed in free-form documentation"
635                                 % match.group(1))
636        self._section.append(line)
637
638    def connect_member(self, member):
639        if member.name not in self.args:
640            # Undocumented TODO outlaw
641            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
642                                                        member.name)
643        self.args[member.name].connect(member)
644
645    def connect_feature(self, feature):
646        if feature.name not in self.features:
647            raise QAPISemError(feature.info,
648                               "feature '%s' lacks documentation"
649                               % feature.name)
650        self.features[feature.name].connect(feature)
651
652    def check_expr(self, expr):
653        if self.has_section('Returns') and 'command' not in expr:
654            raise QAPISemError(self.info,
655                               "'Returns:' is only valid for commands")
656
657    def check(self):
658
659        def check_args_section(args, info, what):
660            bogus = [name for name, section in args.items()
661                     if not section.member]
662            if bogus:
663                raise QAPISemError(
664                    self.info,
665                    "documented member%s '%s' %s not exist"
666                    % ("s" if len(bogus) > 1 else "",
667                       "', '".join(bogus),
668                       "do" if len(bogus) > 1 else "does"))
669
670        check_args_section(self.args, self.info, 'members')
671        check_args_section(self.features, self.info, 'features')
672