xref: /openbmc/qemu/scripts/qapi/parser.py (revision 16ff40acc9c1fc871c2c835b3b20e374d6daed98)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20
21from .error import QAPISemError, QAPISourceError
22from .source import QAPISourceInfo
23
24
25class QAPIParseError(QAPISourceError):
26    """Error class for all QAPI schema parsing errors."""
27    def __init__(self, parser, msg):
28        col = 1
29        for ch in parser.src[parser.line_pos:parser.pos]:
30            if ch == '\t':
31                col = (col + 7) % 8 + 1
32            else:
33                col += 1
34        super().__init__(parser.info, msg, col)
35
36
37class QAPISchemaParser:
38
39    def __init__(self, fname, previously_included=None, incl_info=None):
40        self._fname = fname
41        self._included = previously_included or set()
42        self._included.add(os.path.abspath(self._fname))
43        self.src = ''
44
45        # Lexer state (see `accept` for details):
46        self.info = QAPISourceInfo(self._fname, incl_info)
47        self.tok = None
48        self.pos = 0
49        self.cursor = 0
50        self.val = None
51        self.line_pos = 0
52
53        # Parser output:
54        self.exprs = []
55        self.docs = []
56
57        # Showtime!
58        self._parse()
59
60    def _parse(self):
61        cur_doc = None
62
63        # May raise OSError; allow the caller to handle it.
64        with open(self._fname, 'r', encoding='utf-8') as fp:
65            self.src = fp.read()
66        if self.src == '' or self.src[-1] != '\n':
67            self.src += '\n'
68
69        # Prime the lexer:
70        self.accept()
71
72        # Parse until done:
73        while self.tok is not None:
74            info = self.info
75            if self.tok == '#':
76                self.reject_expr_doc(cur_doc)
77                for cur_doc in self.get_doc(info):
78                    self.docs.append(cur_doc)
79                continue
80
81            expr = self.get_expr(False)
82            if 'include' in expr:
83                self.reject_expr_doc(cur_doc)
84                if len(expr) != 1:
85                    raise QAPISemError(info, "invalid 'include' directive")
86                include = expr['include']
87                if not isinstance(include, str):
88                    raise QAPISemError(info,
89                                       "value of 'include' must be a string")
90                incl_fname = os.path.join(os.path.dirname(self._fname),
91                                          include)
92                self.exprs.append({'expr': {'include': incl_fname},
93                                   'info': info})
94                exprs_include = self._include(include, info, incl_fname,
95                                              self._included)
96                if exprs_include:
97                    self.exprs.extend(exprs_include.exprs)
98                    self.docs.extend(exprs_include.docs)
99            elif "pragma" in expr:
100                self.reject_expr_doc(cur_doc)
101                if len(expr) != 1:
102                    raise QAPISemError(info, "invalid 'pragma' directive")
103                pragma = expr['pragma']
104                if not isinstance(pragma, dict):
105                    raise QAPISemError(
106                        info, "value of 'pragma' must be an object")
107                for name, value in pragma.items():
108                    self._pragma(name, value, info)
109            else:
110                expr_elem = {'expr': expr,
111                             'info': info}
112                if cur_doc:
113                    if not cur_doc.symbol:
114                        raise QAPISemError(
115                            cur_doc.info, "definition documentation required")
116                    expr_elem['doc'] = cur_doc
117                self.exprs.append(expr_elem)
118            cur_doc = None
119        self.reject_expr_doc(cur_doc)
120
121    @staticmethod
122    def reject_expr_doc(doc):
123        if doc and doc.symbol:
124            raise QAPISemError(
125                doc.info,
126                "documentation for '%s' is not followed by the definition"
127                % doc.symbol)
128
129    def _include(self, include, info, incl_fname, previously_included):
130        incl_abs_fname = os.path.abspath(incl_fname)
131        # catch inclusion cycle
132        inf = info
133        while inf:
134            if incl_abs_fname == os.path.abspath(inf.fname):
135                raise QAPISemError(info, "inclusion loop for %s" % include)
136            inf = inf.parent
137
138        # skip multiple include of the same file
139        if incl_abs_fname in previously_included:
140            return None
141
142        try:
143            return QAPISchemaParser(incl_fname, previously_included, info)
144        except OSError as err:
145            raise QAPISemError(
146                info,
147                f"can't read include file '{incl_fname}': {err.strerror}"
148            ) from err
149
150    def _check_pragma_list_of_str(self, name, value, info):
151        if (not isinstance(value, list)
152                or any([not isinstance(elt, str) for elt in value])):
153            raise QAPISemError(
154                info,
155                "pragma %s must be a list of strings" % name)
156
157    def _pragma(self, name, value, info):
158        if name == 'doc-required':
159            if not isinstance(value, bool):
160                raise QAPISemError(info,
161                                   "pragma 'doc-required' must be boolean")
162            info.pragma.doc_required = value
163        elif name == 'command-name-exceptions':
164            self._check_pragma_list_of_str(name, value, info)
165            info.pragma.command_name_exceptions = value
166        elif name == 'command-returns-exceptions':
167            self._check_pragma_list_of_str(name, value, info)
168            info.pragma.command_returns_exceptions = value
169        elif name == 'member-name-exceptions':
170            self._check_pragma_list_of_str(name, value, info)
171            info.pragma.member_name_exceptions = value
172        else:
173            raise QAPISemError(info, "unknown pragma '%s'" % name)
174
175    def accept(self, skip_comment=True):
176        while True:
177            self.tok = self.src[self.cursor]
178            self.pos = self.cursor
179            self.cursor += 1
180            self.val = None
181
182            if self.tok == '#':
183                if self.src[self.cursor] == '#':
184                    # Start of doc comment
185                    skip_comment = False
186                self.cursor = self.src.find('\n', self.cursor)
187                if not skip_comment:
188                    self.val = self.src[self.pos:self.cursor]
189                    return
190            elif self.tok in '{}:,[]':
191                return
192            elif self.tok == "'":
193                # Note: we accept only printable ASCII
194                string = ''
195                esc = False
196                while True:
197                    ch = self.src[self.cursor]
198                    self.cursor += 1
199                    if ch == '\n':
200                        raise QAPIParseError(self, "missing terminating \"'\"")
201                    if esc:
202                        # Note: we recognize only \\ because we have
203                        # no use for funny characters in strings
204                        if ch != '\\':
205                            raise QAPIParseError(self,
206                                                 "unknown escape \\%s" % ch)
207                        esc = False
208                    elif ch == '\\':
209                        esc = True
210                        continue
211                    elif ch == "'":
212                        self.val = string
213                        return
214                    if ord(ch) < 32 or ord(ch) >= 127:
215                        raise QAPIParseError(
216                            self, "funny character in string")
217                    string += ch
218            elif self.src.startswith('true', self.pos):
219                self.val = True
220                self.cursor += 3
221                return
222            elif self.src.startswith('false', self.pos):
223                self.val = False
224                self.cursor += 4
225                return
226            elif self.tok == '\n':
227                if self.cursor == len(self.src):
228                    self.tok = None
229                    return
230                self.info = self.info.next_line()
231                self.line_pos = self.cursor
232            elif not self.tok.isspace():
233                # Show up to next structural, whitespace or quote
234                # character
235                match = re.match('[^[\\]{}:,\\s\'"]+',
236                                 self.src[self.cursor-1:])
237                raise QAPIParseError(self, "stray '%s'" % match.group(0))
238
239    def get_members(self):
240        expr = OrderedDict()
241        if self.tok == '}':
242            self.accept()
243            return expr
244        if self.tok != "'":
245            raise QAPIParseError(self, "expected string or '}'")
246        while True:
247            key = self.val
248            self.accept()
249            if self.tok != ':':
250                raise QAPIParseError(self, "expected ':'")
251            self.accept()
252            if key in expr:
253                raise QAPIParseError(self, "duplicate key '%s'" % key)
254            expr[key] = self.get_expr(True)
255            if self.tok == '}':
256                self.accept()
257                return expr
258            if self.tok != ',':
259                raise QAPIParseError(self, "expected ',' or '}'")
260            self.accept()
261            if self.tok != "'":
262                raise QAPIParseError(self, "expected string")
263
264    def get_values(self):
265        expr = []
266        if self.tok == ']':
267            self.accept()
268            return expr
269        if self.tok not in "{['tf":
270            raise QAPIParseError(
271                self, "expected '{', '[', ']', string, or boolean")
272        while True:
273            expr.append(self.get_expr(True))
274            if self.tok == ']':
275                self.accept()
276                return expr
277            if self.tok != ',':
278                raise QAPIParseError(self, "expected ',' or ']'")
279            self.accept()
280
281    def get_expr(self, nested):
282        if self.tok != '{' and not nested:
283            raise QAPIParseError(self, "expected '{'")
284        if self.tok == '{':
285            self.accept()
286            expr = self.get_members()
287        elif self.tok == '[':
288            self.accept()
289            expr = self.get_values()
290        elif self.tok in "'tf":
291            expr = self.val
292            self.accept()
293        else:
294            raise QAPIParseError(
295                self, "expected '{', '[', string, or boolean")
296        return expr
297
298    def get_doc(self, info):
299        if self.val != '##':
300            raise QAPIParseError(
301                self, "junk after '##' at start of documentation comment")
302
303        docs = []
304        cur_doc = QAPIDoc(self, info)
305        self.accept(False)
306        while self.tok == '#':
307            if self.val.startswith('##'):
308                # End of doc comment
309                if self.val != '##':
310                    raise QAPIParseError(
311                        self,
312                        "junk after '##' at end of documentation comment")
313                cur_doc.end_comment()
314                docs.append(cur_doc)
315                self.accept()
316                return docs
317            if self.val.startswith('# ='):
318                if cur_doc.symbol:
319                    raise QAPIParseError(
320                        self,
321                        "unexpected '=' markup in definition documentation")
322                if cur_doc.body.text:
323                    cur_doc.end_comment()
324                    docs.append(cur_doc)
325                    cur_doc = QAPIDoc(self, info)
326            cur_doc.append(self.val)
327            self.accept(False)
328
329        raise QAPIParseError(self, "documentation comment must end with '##'")
330
331
332class QAPIDoc:
333    """
334    A documentation comment block, either definition or free-form
335
336    Definition documentation blocks consist of
337
338    * a body section: one line naming the definition, followed by an
339      overview (any number of lines)
340
341    * argument sections: a description of each argument (for commands
342      and events) or member (for structs, unions and alternates)
343
344    * features sections: a description of each feature flag
345
346    * additional (non-argument) sections, possibly tagged
347
348    Free-form documentation blocks consist only of a body section.
349    """
350
351    class Section:
352        def __init__(self, parser, name=None, indent=0):
353            # parser, for error messages about indentation
354            self._parser = parser
355            # optional section name (argument/member or section name)
356            self.name = name
357            self.text = ''
358            # the expected indent level of the text of this section
359            self._indent = indent
360
361        def append(self, line):
362            # Strip leading spaces corresponding to the expected indent level
363            # Blank lines are always OK.
364            if line:
365                indent = re.match(r'\s*', line).end()
366                if indent < self._indent:
367                    raise QAPIParseError(
368                        self._parser,
369                        "unexpected de-indent (expected at least %d spaces)" %
370                        self._indent)
371                line = line[self._indent:]
372
373            self.text += line.rstrip() + '\n'
374
375    class ArgSection(Section):
376        def __init__(self, parser, name, indent=0):
377            super().__init__(parser, name, indent)
378            self.member = None
379
380        def connect(self, member):
381            self.member = member
382
383    def __init__(self, parser, info):
384        # self._parser is used to report errors with QAPIParseError.  The
385        # resulting error position depends on the state of the parser.
386        # It happens to be the beginning of the comment.  More or less
387        # servicable, but action at a distance.
388        self._parser = parser
389        self.info = info
390        self.symbol = None
391        self.body = QAPIDoc.Section(parser)
392        # dict mapping parameter name to ArgSection
393        self.args = OrderedDict()
394        self.features = OrderedDict()
395        # a list of Section
396        self.sections = []
397        # the current section
398        self._section = self.body
399        self._append_line = self._append_body_line
400
401    def has_section(self, name):
402        """Return True if we have a section with this name."""
403        for i in self.sections:
404            if i.name == name:
405                return True
406        return False
407
408    def append(self, line):
409        """
410        Parse a comment line and add it to the documentation.
411
412        The way that the line is dealt with depends on which part of
413        the documentation we're parsing right now:
414        * The body section: ._append_line is ._append_body_line
415        * An argument section: ._append_line is ._append_args_line
416        * A features section: ._append_line is ._append_features_line
417        * An additional section: ._append_line is ._append_various_line
418        """
419        line = line[1:]
420        if not line:
421            self._append_freeform(line)
422            return
423
424        if line[0] != ' ':
425            raise QAPIParseError(self._parser, "missing space after #")
426        line = line[1:]
427        self._append_line(line)
428
429    def end_comment(self):
430        self._end_section()
431
432    @staticmethod
433    def _is_section_tag(name):
434        return name in ('Returns:', 'Since:',
435                        # those are often singular or plural
436                        'Note:', 'Notes:',
437                        'Example:', 'Examples:',
438                        'TODO:')
439
440    def _append_body_line(self, line):
441        """
442        Process a line of documentation text in the body section.
443
444        If this a symbol line and it is the section's first line, this
445        is a definition documentation block for that symbol.
446
447        If it's a definition documentation block, another symbol line
448        begins the argument section for the argument named by it, and
449        a section tag begins an additional section.  Start that
450        section and append the line to it.
451
452        Else, append the line to the current section.
453        """
454        name = line.split(' ', 1)[0]
455        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
456        # recognized, and get silently treated as ordinary text
457        if not self.symbol and not self.body.text and line.startswith('@'):
458            if not line.endswith(':'):
459                raise QAPIParseError(self._parser, "line should end with ':'")
460            self.symbol = line[1:-1]
461            # FIXME invalid names other than the empty string aren't flagged
462            if not self.symbol:
463                raise QAPIParseError(self._parser, "invalid name")
464        elif self.symbol:
465            # This is a definition documentation block
466            if name.startswith('@') and name.endswith(':'):
467                self._append_line = self._append_args_line
468                self._append_args_line(line)
469            elif line == 'Features:':
470                self._append_line = self._append_features_line
471            elif self._is_section_tag(name):
472                self._append_line = self._append_various_line
473                self._append_various_line(line)
474            else:
475                self._append_freeform(line)
476        else:
477            # This is a free-form documentation block
478            self._append_freeform(line)
479
480    def _append_args_line(self, line):
481        """
482        Process a line of documentation text in an argument section.
483
484        A symbol line begins the next argument section, a section tag
485        section or a non-indented line after a blank line begins an
486        additional section.  Start that section and append the line to
487        it.
488
489        Else, append the line to the current section.
490
491        """
492        name = line.split(' ', 1)[0]
493
494        if name.startswith('@') and name.endswith(':'):
495            # If line is "@arg:   first line of description", find
496            # the index of 'f', which is the indent we expect for any
497            # following lines.  We then remove the leading "@arg:"
498            # from line and replace it with spaces so that 'f' has the
499            # same index as it did in the original line and can be
500            # handled the same way we will handle following lines.
501            indent = re.match(r'@\S*:\s*', line).end()
502            line = line[indent:]
503            if not line:
504                # Line was just the "@arg:" header; following lines
505                # are not indented
506                indent = 0
507            else:
508                line = ' ' * indent + line
509            self._start_args_section(name[1:-1], indent)
510        elif self._is_section_tag(name):
511            self._append_line = self._append_various_line
512            self._append_various_line(line)
513            return
514        elif (self._section.text.endswith('\n\n')
515              and line and not line[0].isspace()):
516            if line == 'Features:':
517                self._append_line = self._append_features_line
518            else:
519                self._start_section()
520                self._append_line = self._append_various_line
521                self._append_various_line(line)
522            return
523
524        self._append_freeform(line)
525
526    def _append_features_line(self, line):
527        name = line.split(' ', 1)[0]
528
529        if name.startswith('@') and name.endswith(':'):
530            # If line is "@arg:   first line of description", find
531            # the index of 'f', which is the indent we expect for any
532            # following lines.  We then remove the leading "@arg:"
533            # from line and replace it with spaces so that 'f' has the
534            # same index as it did in the original line and can be
535            # handled the same way we will handle following lines.
536            indent = re.match(r'@\S*:\s*', line).end()
537            line = line[indent:]
538            if not line:
539                # Line was just the "@arg:" header; following lines
540                # are not indented
541                indent = 0
542            else:
543                line = ' ' * indent + line
544            self._start_features_section(name[1:-1], indent)
545        elif self._is_section_tag(name):
546            self._append_line = self._append_various_line
547            self._append_various_line(line)
548            return
549        elif (self._section.text.endswith('\n\n')
550              and line and not line[0].isspace()):
551            self._start_section()
552            self._append_line = self._append_various_line
553            self._append_various_line(line)
554            return
555
556        self._append_freeform(line)
557
558    def _append_various_line(self, line):
559        """
560        Process a line of documentation text in an additional section.
561
562        A symbol line is an error.
563
564        A section tag begins an additional section.  Start that
565        section and append the line to it.
566
567        Else, append the line to the current section.
568        """
569        name = line.split(' ', 1)[0]
570
571        if name.startswith('@') and name.endswith(':'):
572            raise QAPIParseError(self._parser,
573                                 "'%s' can't follow '%s' section"
574                                 % (name, self.sections[0].name))
575        if self._is_section_tag(name):
576            # If line is "Section:   first line of description", find
577            # the index of 'f', which is the indent we expect for any
578            # following lines.  We then remove the leading "Section:"
579            # from line and replace it with spaces so that 'f' has the
580            # same index as it did in the original line and can be
581            # handled the same way we will handle following lines.
582            indent = re.match(r'\S*:\s*', line).end()
583            line = line[indent:]
584            if not line:
585                # Line was just the "Section:" header; following lines
586                # are not indented
587                indent = 0
588            else:
589                line = ' ' * indent + line
590            self._start_section(name[:-1], indent)
591
592        self._append_freeform(line)
593
594    def _start_symbol_section(self, symbols_dict, name, indent):
595        # FIXME invalid names other than the empty string aren't flagged
596        if not name:
597            raise QAPIParseError(self._parser, "invalid parameter name")
598        if name in symbols_dict:
599            raise QAPIParseError(self._parser,
600                                 "'%s' parameter name duplicated" % name)
601        assert not self.sections
602        self._end_section()
603        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
604        symbols_dict[name] = self._section
605
606    def _start_args_section(self, name, indent):
607        self._start_symbol_section(self.args, name, indent)
608
609    def _start_features_section(self, name, indent):
610        self._start_symbol_section(self.features, name, indent)
611
612    def _start_section(self, name=None, indent=0):
613        if name in ('Returns', 'Since') and self.has_section(name):
614            raise QAPIParseError(self._parser,
615                                 "duplicated '%s' section" % name)
616        self._end_section()
617        self._section = QAPIDoc.Section(self._parser, name, indent)
618        self.sections.append(self._section)
619
620    def _end_section(self):
621        if self._section:
622            text = self._section.text = self._section.text.strip()
623            if self._section.name and (not text or text.isspace()):
624                raise QAPIParseError(
625                    self._parser,
626                    "empty doc section '%s'" % self._section.name)
627            self._section = None
628
629    def _append_freeform(self, line):
630        match = re.match(r'(@\S+:)', line)
631        if match:
632            raise QAPIParseError(self._parser,
633                                 "'%s' not allowed in free-form documentation"
634                                 % match.group(1))
635        self._section.append(line)
636
637    def connect_member(self, member):
638        if member.name not in self.args:
639            # Undocumented TODO outlaw
640            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
641                                                        member.name)
642        self.args[member.name].connect(member)
643
644    def connect_feature(self, feature):
645        if feature.name not in self.features:
646            raise QAPISemError(feature.info,
647                               "feature '%s' lacks documentation"
648                               % feature.name)
649        self.features[feature.name].connect(feature)
650
651    def check_expr(self, expr):
652        if self.has_section('Returns') and 'command' not in expr:
653            raise QAPISemError(self.info,
654                               "'Returns:' is only valid for commands")
655
656    def check(self):
657
658        def check_args_section(args, info, what):
659            bogus = [name for name, section in args.items()
660                     if not section.member]
661            if bogus:
662                raise QAPISemError(
663                    self.info,
664                    "documented member%s '%s' %s not exist"
665                    % ("s" if len(bogus) > 1 else "",
666                       "', '".join(bogus),
667                       "do" if len(bogus) > 1 else "does"))
668
669        check_args_section(self.args, self.info, 'members')
670        check_args_section(self.features, self.info, 'features')
671