xref: /openbmc/qemu/scripts/qapi/parser.py (revision e0e8a0ac2e60fdebd7ff0f831250c849f22af35d)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20
21from .common import must_match
22from .error import QAPISemError, QAPISourceError
23from .source import QAPISourceInfo
24
25
26class QAPIParseError(QAPISourceError):
27    """Error class for all QAPI schema parsing errors."""
28    def __init__(self, parser, msg):
29        col = 1
30        for ch in parser.src[parser.line_pos:parser.pos]:
31            if ch == '\t':
32                col = (col + 7) % 8 + 1
33            else:
34                col += 1
35        super().__init__(parser.info, msg, col)
36
37
38class QAPISchemaParser:
39
40    def __init__(self, fname, previously_included=None, incl_info=None):
41        self._fname = fname
42        self._included = previously_included or set()
43        self._included.add(os.path.abspath(self._fname))
44        self.src = ''
45
46        # Lexer state (see `accept` for details):
47        self.info = QAPISourceInfo(self._fname, incl_info)
48        self.tok = None
49        self.pos = 0
50        self.cursor = 0
51        self.val = None
52        self.line_pos = 0
53
54        # Parser output:
55        self.exprs = []
56        self.docs = []
57
58        # Showtime!
59        self._parse()
60
61    def _parse(self):
62        cur_doc = None
63
64        # May raise OSError; allow the caller to handle it.
65        with open(self._fname, 'r', encoding='utf-8') as fp:
66            self.src = fp.read()
67        if self.src == '' or self.src[-1] != '\n':
68            self.src += '\n'
69
70        # Prime the lexer:
71        self.accept()
72
73        # Parse until done:
74        while self.tok is not None:
75            info = self.info
76            if self.tok == '#':
77                self.reject_expr_doc(cur_doc)
78                for cur_doc in self.get_doc(info):
79                    self.docs.append(cur_doc)
80                continue
81
82            expr = self.get_expr()
83            if not isinstance(expr, dict):
84                raise QAPISemError(
85                    info, "top-level expression must be an object")
86
87            if 'include' in expr:
88                self.reject_expr_doc(cur_doc)
89                if len(expr) != 1:
90                    raise QAPISemError(info, "invalid 'include' directive")
91                include = expr['include']
92                if not isinstance(include, str):
93                    raise QAPISemError(info,
94                                       "value of 'include' must be a string")
95                incl_fname = os.path.join(os.path.dirname(self._fname),
96                                          include)
97                self.exprs.append({'expr': {'include': incl_fname},
98                                   'info': info})
99                exprs_include = self._include(include, info, incl_fname,
100                                              self._included)
101                if exprs_include:
102                    self.exprs.extend(exprs_include.exprs)
103                    self.docs.extend(exprs_include.docs)
104            elif "pragma" in expr:
105                self.reject_expr_doc(cur_doc)
106                if len(expr) != 1:
107                    raise QAPISemError(info, "invalid 'pragma' directive")
108                pragma = expr['pragma']
109                if not isinstance(pragma, dict):
110                    raise QAPISemError(
111                        info, "value of 'pragma' must be an object")
112                for name, value in pragma.items():
113                    self._pragma(name, value, info)
114            else:
115                expr_elem = {'expr': expr,
116                             'info': info}
117                if cur_doc:
118                    if not cur_doc.symbol:
119                        raise QAPISemError(
120                            cur_doc.info, "definition documentation required")
121                    expr_elem['doc'] = cur_doc
122                self.exprs.append(expr_elem)
123            cur_doc = None
124        self.reject_expr_doc(cur_doc)
125
126    @staticmethod
127    def reject_expr_doc(doc):
128        if doc and doc.symbol:
129            raise QAPISemError(
130                doc.info,
131                "documentation for '%s' is not followed by the definition"
132                % doc.symbol)
133
134    @staticmethod
135    def _include(include, info, incl_fname, previously_included):
136        incl_abs_fname = os.path.abspath(incl_fname)
137        # catch inclusion cycle
138        inf = info
139        while inf:
140            if incl_abs_fname == os.path.abspath(inf.fname):
141                raise QAPISemError(info, "inclusion loop for %s" % include)
142            inf = inf.parent
143
144        # skip multiple include of the same file
145        if incl_abs_fname in previously_included:
146            return None
147
148        try:
149            return QAPISchemaParser(incl_fname, previously_included, info)
150        except OSError as err:
151            raise QAPISemError(
152                info,
153                f"can't read include file '{incl_fname}': {err.strerror}"
154            ) from err
155
156    @staticmethod
157    def _check_pragma_list_of_str(name, value, info):
158        if (not isinstance(value, list)
159                or any([not isinstance(elt, str) for elt in value])):
160            raise QAPISemError(
161                info,
162                "pragma %s must be a list of strings" % name)
163
164    def _pragma(self, name, value, info):
165        if name == 'doc-required':
166            if not isinstance(value, bool):
167                raise QAPISemError(info,
168                                   "pragma 'doc-required' must be boolean")
169            info.pragma.doc_required = value
170        elif name == 'command-name-exceptions':
171            self._check_pragma_list_of_str(name, value, info)
172            info.pragma.command_name_exceptions = value
173        elif name == 'command-returns-exceptions':
174            self._check_pragma_list_of_str(name, value, info)
175            info.pragma.command_returns_exceptions = value
176        elif name == 'member-name-exceptions':
177            self._check_pragma_list_of_str(name, value, info)
178            info.pragma.member_name_exceptions = value
179        else:
180            raise QAPISemError(info, "unknown pragma '%s'" % name)
181
182    def accept(self, skip_comment=True):
183        while True:
184            self.tok = self.src[self.cursor]
185            self.pos = self.cursor
186            self.cursor += 1
187            self.val = None
188
189            if self.tok == '#':
190                if self.src[self.cursor] == '#':
191                    # Start of doc comment
192                    skip_comment = False
193                self.cursor = self.src.find('\n', self.cursor)
194                if not skip_comment:
195                    self.val = self.src[self.pos:self.cursor]
196                    return
197            elif self.tok in '{}:,[]':
198                return
199            elif self.tok == "'":
200                # Note: we accept only printable ASCII
201                string = ''
202                esc = False
203                while True:
204                    ch = self.src[self.cursor]
205                    self.cursor += 1
206                    if ch == '\n':
207                        raise QAPIParseError(self, "missing terminating \"'\"")
208                    if esc:
209                        # Note: we recognize only \\ because we have
210                        # no use for funny characters in strings
211                        if ch != '\\':
212                            raise QAPIParseError(self,
213                                                 "unknown escape \\%s" % ch)
214                        esc = False
215                    elif ch == '\\':
216                        esc = True
217                        continue
218                    elif ch == "'":
219                        self.val = string
220                        return
221                    if ord(ch) < 32 or ord(ch) >= 127:
222                        raise QAPIParseError(
223                            self, "funny character in string")
224                    string += ch
225            elif self.src.startswith('true', self.pos):
226                self.val = True
227                self.cursor += 3
228                return
229            elif self.src.startswith('false', self.pos):
230                self.val = False
231                self.cursor += 4
232                return
233            elif self.tok == '\n':
234                if self.cursor == len(self.src):
235                    self.tok = None
236                    return
237                self.info = self.info.next_line()
238                self.line_pos = self.cursor
239            elif not self.tok.isspace():
240                # Show up to next structural, whitespace or quote
241                # character
242                match = must_match('[^[\\]{}:,\\s\'"]+',
243                                   self.src[self.cursor-1:])
244                raise QAPIParseError(self, "stray '%s'" % match.group(0))
245
246    def get_members(self):
247        expr = OrderedDict()
248        if self.tok == '}':
249            self.accept()
250            return expr
251        if self.tok != "'":
252            raise QAPIParseError(self, "expected string or '}'")
253        while True:
254            key = self.val
255            assert isinstance(key, str)  # Guaranteed by tok == "'"
256
257            self.accept()
258            if self.tok != ':':
259                raise QAPIParseError(self, "expected ':'")
260            self.accept()
261            if key in expr:
262                raise QAPIParseError(self, "duplicate key '%s'" % key)
263            expr[key] = self.get_expr()
264            if self.tok == '}':
265                self.accept()
266                return expr
267            if self.tok != ',':
268                raise QAPIParseError(self, "expected ',' or '}'")
269            self.accept()
270            if self.tok != "'":
271                raise QAPIParseError(self, "expected string")
272
273    def get_values(self):
274        expr = []
275        if self.tok == ']':
276            self.accept()
277            return expr
278        if self.tok not in "{['tf":
279            raise QAPIParseError(
280                self, "expected '{', '[', ']', string, or boolean")
281        while True:
282            expr.append(self.get_expr())
283            if self.tok == ']':
284                self.accept()
285                return expr
286            if self.tok != ',':
287                raise QAPIParseError(self, "expected ',' or ']'")
288            self.accept()
289
290    def get_expr(self):
291        if self.tok == '{':
292            self.accept()
293            expr = self.get_members()
294        elif self.tok == '[':
295            self.accept()
296            expr = self.get_values()
297        elif self.tok in "'tf":
298            expr = self.val
299            self.accept()
300        else:
301            raise QAPIParseError(
302                self, "expected '{', '[', string, or boolean")
303        return expr
304
305    def get_doc(self, info):
306        if self.val != '##':
307            raise QAPIParseError(
308                self, "junk after '##' at start of documentation comment")
309
310        docs = []
311        cur_doc = QAPIDoc(self, info)
312        self.accept(False)
313        while self.tok == '#':
314            assert isinstance(self.val, str)
315            if self.val.startswith('##'):
316                # End of doc comment
317                if self.val != '##':
318                    raise QAPIParseError(
319                        self,
320                        "junk after '##' at end of documentation comment")
321                cur_doc.end_comment()
322                docs.append(cur_doc)
323                self.accept()
324                return docs
325            if self.val.startswith('# ='):
326                if cur_doc.symbol:
327                    raise QAPIParseError(
328                        self,
329                        "unexpected '=' markup in definition documentation")
330                if cur_doc.body.text:
331                    cur_doc.end_comment()
332                    docs.append(cur_doc)
333                    cur_doc = QAPIDoc(self, info)
334            cur_doc.append(self.val)
335            self.accept(False)
336
337        raise QAPIParseError(self, "documentation comment must end with '##'")
338
339
340class QAPIDoc:
341    """
342    A documentation comment block, either definition or free-form
343
344    Definition documentation blocks consist of
345
346    * a body section: one line naming the definition, followed by an
347      overview (any number of lines)
348
349    * argument sections: a description of each argument (for commands
350      and events) or member (for structs, unions and alternates)
351
352    * features sections: a description of each feature flag
353
354    * additional (non-argument) sections, possibly tagged
355
356    Free-form documentation blocks consist only of a body section.
357    """
358
359    class Section:
360        def __init__(self, parser, name=None, indent=0):
361            # parser, for error messages about indentation
362            self._parser = parser
363            # optional section name (argument/member or section name)
364            self.name = name
365            self.text = ''
366            # the expected indent level of the text of this section
367            self._indent = indent
368
369        def append(self, line):
370            # Strip leading spaces corresponding to the expected indent level
371            # Blank lines are always OK.
372            if line:
373                indent = must_match(r'\s*', line).end()
374                if indent < self._indent:
375                    raise QAPIParseError(
376                        self._parser,
377                        "unexpected de-indent (expected at least %d spaces)" %
378                        self._indent)
379                line = line[self._indent:]
380
381            self.text += line.rstrip() + '\n'
382
383    class ArgSection(Section):
384        def __init__(self, parser, name, indent=0):
385            super().__init__(parser, name, indent)
386            self.member = None
387
388        def connect(self, member):
389            self.member = member
390
391    def __init__(self, parser, info):
392        # self._parser is used to report errors with QAPIParseError.  The
393        # resulting error position depends on the state of the parser.
394        # It happens to be the beginning of the comment.  More or less
395        # servicable, but action at a distance.
396        self._parser = parser
397        self.info = info
398        self.symbol = None
399        self.body = QAPIDoc.Section(parser)
400        # dict mapping parameter name to ArgSection
401        self.args = OrderedDict()
402        self.features = OrderedDict()
403        # a list of Section
404        self.sections = []
405        # the current section
406        self._section = self.body
407        self._append_line = self._append_body_line
408
409    def has_section(self, name):
410        """Return True if we have a section with this name."""
411        for i in self.sections:
412            if i.name == name:
413                return True
414        return False
415
416    def append(self, line):
417        """
418        Parse a comment line and add it to the documentation.
419
420        The way that the line is dealt with depends on which part of
421        the documentation we're parsing right now:
422        * The body section: ._append_line is ._append_body_line
423        * An argument section: ._append_line is ._append_args_line
424        * A features section: ._append_line is ._append_features_line
425        * An additional section: ._append_line is ._append_various_line
426        """
427        line = line[1:]
428        if not line:
429            self._append_freeform(line)
430            return
431
432        if line[0] != ' ':
433            raise QAPIParseError(self._parser, "missing space after #")
434        line = line[1:]
435        self._append_line(line)
436
437    def end_comment(self):
438        self._end_section()
439
440    @staticmethod
441    def _is_section_tag(name):
442        return name in ('Returns:', 'Since:',
443                        # those are often singular or plural
444                        'Note:', 'Notes:',
445                        'Example:', 'Examples:',
446                        'TODO:')
447
448    def _append_body_line(self, line):
449        """
450        Process a line of documentation text in the body section.
451
452        If this a symbol line and it is the section's first line, this
453        is a definition documentation block for that symbol.
454
455        If it's a definition documentation block, another symbol line
456        begins the argument section for the argument named by it, and
457        a section tag begins an additional section.  Start that
458        section and append the line to it.
459
460        Else, append the line to the current section.
461        """
462        name = line.split(' ', 1)[0]
463        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
464        # recognized, and get silently treated as ordinary text
465        if not self.symbol and not self.body.text and line.startswith('@'):
466            if not line.endswith(':'):
467                raise QAPIParseError(self._parser, "line should end with ':'")
468            self.symbol = line[1:-1]
469            # FIXME invalid names other than the empty string aren't flagged
470            if not self.symbol:
471                raise QAPIParseError(self._parser, "invalid name")
472        elif self.symbol:
473            # This is a definition documentation block
474            if name.startswith('@') and name.endswith(':'):
475                self._append_line = self._append_args_line
476                self._append_args_line(line)
477            elif line == 'Features:':
478                self._append_line = self._append_features_line
479            elif self._is_section_tag(name):
480                self._append_line = self._append_various_line
481                self._append_various_line(line)
482            else:
483                self._append_freeform(line)
484        else:
485            # This is a free-form documentation block
486            self._append_freeform(line)
487
488    def _append_args_line(self, line):
489        """
490        Process a line of documentation text in an argument section.
491
492        A symbol line begins the next argument section, a section tag
493        section or a non-indented line after a blank line begins an
494        additional section.  Start that section and append the line to
495        it.
496
497        Else, append the line to the current section.
498
499        """
500        name = line.split(' ', 1)[0]
501
502        if name.startswith('@') and name.endswith(':'):
503            # If line is "@arg:   first line of description", find
504            # the index of 'f', which is the indent we expect for any
505            # following lines.  We then remove the leading "@arg:"
506            # from line and replace it with spaces so that 'f' has the
507            # same index as it did in the original line and can be
508            # handled the same way we will handle following lines.
509            indent = must_match(r'@\S*:\s*', line).end()
510            line = line[indent:]
511            if not line:
512                # Line was just the "@arg:" header; following lines
513                # are not indented
514                indent = 0
515            else:
516                line = ' ' * indent + line
517            self._start_args_section(name[1:-1], indent)
518        elif self._is_section_tag(name):
519            self._append_line = self._append_various_line
520            self._append_various_line(line)
521            return
522        elif (self._section.text.endswith('\n\n')
523              and line and not line[0].isspace()):
524            if line == 'Features:':
525                self._append_line = self._append_features_line
526            else:
527                self._start_section()
528                self._append_line = self._append_various_line
529                self._append_various_line(line)
530            return
531
532        self._append_freeform(line)
533
534    def _append_features_line(self, line):
535        name = line.split(' ', 1)[0]
536
537        if name.startswith('@') and name.endswith(':'):
538            # If line is "@arg:   first line of description", find
539            # the index of 'f', which is the indent we expect for any
540            # following lines.  We then remove the leading "@arg:"
541            # from line and replace it with spaces so that 'f' has the
542            # same index as it did in the original line and can be
543            # handled the same way we will handle following lines.
544            indent = must_match(r'@\S*:\s*', line).end()
545            line = line[indent:]
546            if not line:
547                # Line was just the "@arg:" header; following lines
548                # are not indented
549                indent = 0
550            else:
551                line = ' ' * indent + line
552            self._start_features_section(name[1:-1], indent)
553        elif self._is_section_tag(name):
554            self._append_line = self._append_various_line
555            self._append_various_line(line)
556            return
557        elif (self._section.text.endswith('\n\n')
558              and line and not line[0].isspace()):
559            self._start_section()
560            self._append_line = self._append_various_line
561            self._append_various_line(line)
562            return
563
564        self._append_freeform(line)
565
566    def _append_various_line(self, line):
567        """
568        Process a line of documentation text in an additional section.
569
570        A symbol line is an error.
571
572        A section tag begins an additional section.  Start that
573        section and append the line to it.
574
575        Else, append the line to the current section.
576        """
577        name = line.split(' ', 1)[0]
578
579        if name.startswith('@') and name.endswith(':'):
580            raise QAPIParseError(self._parser,
581                                 "'%s' can't follow '%s' section"
582                                 % (name, self.sections[0].name))
583        if self._is_section_tag(name):
584            # If line is "Section:   first line of description", find
585            # the index of 'f', which is the indent we expect for any
586            # following lines.  We then remove the leading "Section:"
587            # from line and replace it with spaces so that 'f' has the
588            # same index as it did in the original line and can be
589            # handled the same way we will handle following lines.
590            indent = must_match(r'\S*:\s*', line).end()
591            line = line[indent:]
592            if not line:
593                # Line was just the "Section:" header; following lines
594                # are not indented
595                indent = 0
596            else:
597                line = ' ' * indent + line
598            self._start_section(name[:-1], indent)
599
600        self._append_freeform(line)
601
602    def _start_symbol_section(self, symbols_dict, name, indent):
603        # FIXME invalid names other than the empty string aren't flagged
604        if not name:
605            raise QAPIParseError(self._parser, "invalid parameter name")
606        if name in symbols_dict:
607            raise QAPIParseError(self._parser,
608                                 "'%s' parameter name duplicated" % name)
609        assert not self.sections
610        self._end_section()
611        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
612        symbols_dict[name] = self._section
613
614    def _start_args_section(self, name, indent):
615        self._start_symbol_section(self.args, name, indent)
616
617    def _start_features_section(self, name, indent):
618        self._start_symbol_section(self.features, name, indent)
619
620    def _start_section(self, name=None, indent=0):
621        if name in ('Returns', 'Since') and self.has_section(name):
622            raise QAPIParseError(self._parser,
623                                 "duplicated '%s' section" % name)
624        self._end_section()
625        self._section = QAPIDoc.Section(self._parser, name, indent)
626        self.sections.append(self._section)
627
628    def _end_section(self):
629        if self._section:
630            text = self._section.text = self._section.text.strip()
631            if self._section.name and (not text or text.isspace()):
632                raise QAPIParseError(
633                    self._parser,
634                    "empty doc section '%s'" % self._section.name)
635            self._section = None
636
637    def _append_freeform(self, line):
638        match = re.match(r'(@\S+:)', line)
639        if match:
640            raise QAPIParseError(self._parser,
641                                 "'%s' not allowed in free-form documentation"
642                                 % match.group(1))
643        self._section.append(line)
644
645    def connect_member(self, member):
646        if member.name not in self.args:
647            # Undocumented TODO outlaw
648            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
649                                                        member.name)
650        self.args[member.name].connect(member)
651
652    def connect_feature(self, feature):
653        if feature.name not in self.features:
654            raise QAPISemError(feature.info,
655                               "feature '%s' lacks documentation"
656                               % feature.name)
657        self.features[feature.name].connect(feature)
658
659    def check_expr(self, expr):
660        if self.has_section('Returns') and 'command' not in expr:
661            raise QAPISemError(self.info,
662                               "'Returns:' is only valid for commands")
663
664    def check(self):
665
666        def check_args_section(args, info, what):
667            bogus = [name for name, section in args.items()
668                     if not section.member]
669            if bogus:
670                raise QAPISemError(
671                    self.info,
672                    "documented member%s '%s' %s not exist"
673                    % ("s" if len(bogus) > 1 else "",
674                       "', '".join(bogus),
675                       "do" if len(bogus) > 1 else "does"))
676
677        check_args_section(self.args, self.info, 'members')
678        check_args_section(self.features, self.info, 'features')
679