xref: /openbmc/qemu/scripts/qapi/parser.py (revision c256263f3df0eaf9011405cdaee354380beb6dc5)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20
21from .common import must_match
22from .error import QAPISemError, QAPISourceError
23from .source import QAPISourceInfo
24
25
26class QAPIParseError(QAPISourceError):
27    """Error class for all QAPI schema parsing errors."""
28    def __init__(self, parser, msg):
29        col = 1
30        for ch in parser.src[parser.line_pos:parser.pos]:
31            if ch == '\t':
32                col = (col + 7) % 8 + 1
33            else:
34                col += 1
35        super().__init__(parser.info, msg, col)
36
37
38class QAPISchemaParser:
39
40    def __init__(self, fname, previously_included=None, incl_info=None):
41        self._fname = fname
42        self._included = previously_included or set()
43        self._included.add(os.path.abspath(self._fname))
44        self.src = ''
45
46        # Lexer state (see `accept` for details):
47        self.info = QAPISourceInfo(self._fname, incl_info)
48        self.tok = None
49        self.pos = 0
50        self.cursor = 0
51        self.val = None
52        self.line_pos = 0
53
54        # Parser output:
55        self.exprs = []
56        self.docs = []
57
58        # Showtime!
59        self._parse()
60
61    def _parse(self):
62        cur_doc = None
63
64        # May raise OSError; allow the caller to handle it.
65        with open(self._fname, 'r', encoding='utf-8') as fp:
66            self.src = fp.read()
67        if self.src == '' or self.src[-1] != '\n':
68            self.src += '\n'
69
70        # Prime the lexer:
71        self.accept()
72
73        # Parse until done:
74        while self.tok is not None:
75            info = self.info
76            if self.tok == '#':
77                self.reject_expr_doc(cur_doc)
78                for cur_doc in self.get_doc(info):
79                    self.docs.append(cur_doc)
80                continue
81
82            expr = self.get_expr()
83            if not isinstance(expr, dict):
84                raise QAPISemError(
85                    info, "top-level expression must be an object")
86
87            if 'include' in expr:
88                self.reject_expr_doc(cur_doc)
89                if len(expr) != 1:
90                    raise QAPISemError(info, "invalid 'include' directive")
91                include = expr['include']
92                if not isinstance(include, str):
93                    raise QAPISemError(info,
94                                       "value of 'include' must be a string")
95                incl_fname = os.path.join(os.path.dirname(self._fname),
96                                          include)
97                self.exprs.append({'expr': {'include': incl_fname},
98                                   'info': info})
99                exprs_include = self._include(include, info, incl_fname,
100                                              self._included)
101                if exprs_include:
102                    self.exprs.extend(exprs_include.exprs)
103                    self.docs.extend(exprs_include.docs)
104            elif "pragma" in expr:
105                self.reject_expr_doc(cur_doc)
106                if len(expr) != 1:
107                    raise QAPISemError(info, "invalid 'pragma' directive")
108                pragma = expr['pragma']
109                if not isinstance(pragma, dict):
110                    raise QAPISemError(
111                        info, "value of 'pragma' must be an object")
112                for name, value in pragma.items():
113                    self._pragma(name, value, info)
114            else:
115                expr_elem = {'expr': expr,
116                             'info': info}
117                if cur_doc:
118                    if not cur_doc.symbol:
119                        raise QAPISemError(
120                            cur_doc.info, "definition documentation required")
121                    expr_elem['doc'] = cur_doc
122                self.exprs.append(expr_elem)
123            cur_doc = None
124        self.reject_expr_doc(cur_doc)
125
126    @staticmethod
127    def reject_expr_doc(doc):
128        if doc and doc.symbol:
129            raise QAPISemError(
130                doc.info,
131                "documentation for '%s' is not followed by the definition"
132                % doc.symbol)
133
134    @staticmethod
135    def _include(include, info, incl_fname, previously_included):
136        incl_abs_fname = os.path.abspath(incl_fname)
137        # catch inclusion cycle
138        inf = info
139        while inf:
140            if incl_abs_fname == os.path.abspath(inf.fname):
141                raise QAPISemError(info, "inclusion loop for %s" % include)
142            inf = inf.parent
143
144        # skip multiple include of the same file
145        if incl_abs_fname in previously_included:
146            return None
147
148        try:
149            return QAPISchemaParser(incl_fname, previously_included, info)
150        except OSError as err:
151            raise QAPISemError(
152                info,
153                f"can't read include file '{incl_fname}': {err.strerror}"
154            ) from err
155
156    @staticmethod
157    def _check_pragma_list_of_str(name, value, info):
158        if (not isinstance(value, list)
159                or any([not isinstance(elt, str) for elt in value])):
160            raise QAPISemError(
161                info,
162                "pragma %s must be a list of strings" % name)
163
164    def _pragma(self, name, value, info):
165        if name == 'doc-required':
166            if not isinstance(value, bool):
167                raise QAPISemError(info,
168                                   "pragma 'doc-required' must be boolean")
169            info.pragma.doc_required = value
170        elif name == 'command-name-exceptions':
171            self._check_pragma_list_of_str(name, value, info)
172            info.pragma.command_name_exceptions = value
173        elif name == 'command-returns-exceptions':
174            self._check_pragma_list_of_str(name, value, info)
175            info.pragma.command_returns_exceptions = value
176        elif name == 'member-name-exceptions':
177            self._check_pragma_list_of_str(name, value, info)
178            info.pragma.member_name_exceptions = value
179        else:
180            raise QAPISemError(info, "unknown pragma '%s'" % name)
181
182    def accept(self, skip_comment=True):
183        while True:
184            self.tok = self.src[self.cursor]
185            self.pos = self.cursor
186            self.cursor += 1
187            self.val = None
188
189            if self.tok == '#':
190                if self.src[self.cursor] == '#':
191                    # Start of doc comment
192                    skip_comment = False
193                self.cursor = self.src.find('\n', self.cursor)
194                if not skip_comment:
195                    self.val = self.src[self.pos:self.cursor]
196                    return
197            elif self.tok in '{}:,[]':
198                return
199            elif self.tok == "'":
200                # Note: we accept only printable ASCII
201                string = ''
202                esc = False
203                while True:
204                    ch = self.src[self.cursor]
205                    self.cursor += 1
206                    if ch == '\n':
207                        raise QAPIParseError(self, "missing terminating \"'\"")
208                    if esc:
209                        # Note: we recognize only \\ because we have
210                        # no use for funny characters in strings
211                        if ch != '\\':
212                            raise QAPIParseError(self,
213                                                 "unknown escape \\%s" % ch)
214                        esc = False
215                    elif ch == '\\':
216                        esc = True
217                        continue
218                    elif ch == "'":
219                        self.val = string
220                        return
221                    if ord(ch) < 32 or ord(ch) >= 127:
222                        raise QAPIParseError(
223                            self, "funny character in string")
224                    string += ch
225            elif self.src.startswith('true', self.pos):
226                self.val = True
227                self.cursor += 3
228                return
229            elif self.src.startswith('false', self.pos):
230                self.val = False
231                self.cursor += 4
232                return
233            elif self.tok == '\n':
234                if self.cursor == len(self.src):
235                    self.tok = None
236                    return
237                self.info = self.info.next_line()
238                self.line_pos = self.cursor
239            elif not self.tok.isspace():
240                # Show up to next structural, whitespace or quote
241                # character
242                match = must_match('[^[\\]{}:,\\s\'"]+',
243                                   self.src[self.cursor-1:])
244                raise QAPIParseError(self, "stray '%s'" % match.group(0))
245
246    def get_members(self):
247        expr = OrderedDict()
248        if self.tok == '}':
249            self.accept()
250            return expr
251        if self.tok != "'":
252            raise QAPIParseError(self, "expected string or '}'")
253        while True:
254            key = self.val
255            assert isinstance(key, str)  # Guaranteed by tok == "'"
256
257            self.accept()
258            if self.tok != ':':
259                raise QAPIParseError(self, "expected ':'")
260            self.accept()
261            if key in expr:
262                raise QAPIParseError(self, "duplicate key '%s'" % key)
263            expr[key] = self.get_expr()
264            if self.tok == '}':
265                self.accept()
266                return expr
267            if self.tok != ',':
268                raise QAPIParseError(self, "expected ',' or '}'")
269            self.accept()
270            if self.tok != "'":
271                raise QAPIParseError(self, "expected string")
272
273    def get_values(self):
274        expr = []
275        if self.tok == ']':
276            self.accept()
277            return expr
278        if self.tok not in tuple("{['tf"):
279            raise QAPIParseError(
280                self, "expected '{', '[', ']', string, or boolean")
281        while True:
282            expr.append(self.get_expr())
283            if self.tok == ']':
284                self.accept()
285                return expr
286            if self.tok != ',':
287                raise QAPIParseError(self, "expected ',' or ']'")
288            self.accept()
289
290    def get_expr(self):
291        if self.tok == '{':
292            self.accept()
293            expr = self.get_members()
294        elif self.tok == '[':
295            self.accept()
296            expr = self.get_values()
297        elif self.tok in tuple("'tf"):
298            assert isinstance(self.val, (str, bool))
299            expr = self.val
300            self.accept()
301        else:
302            raise QAPIParseError(
303                self, "expected '{', '[', string, or boolean")
304        return expr
305
306    def get_doc(self, info):
307        if self.val != '##':
308            raise QAPIParseError(
309                self, "junk after '##' at start of documentation comment")
310
311        docs = []
312        cur_doc = QAPIDoc(self, info)
313        self.accept(False)
314        while self.tok == '#':
315            assert isinstance(self.val, str)
316            if self.val.startswith('##'):
317                # End of doc comment
318                if self.val != '##':
319                    raise QAPIParseError(
320                        self,
321                        "junk after '##' at end of documentation comment")
322                cur_doc.end_comment()
323                docs.append(cur_doc)
324                self.accept()
325                return docs
326            if self.val.startswith('# ='):
327                if cur_doc.symbol:
328                    raise QAPIParseError(
329                        self,
330                        "unexpected '=' markup in definition documentation")
331                if cur_doc.body.text:
332                    cur_doc.end_comment()
333                    docs.append(cur_doc)
334                    cur_doc = QAPIDoc(self, info)
335            cur_doc.append(self.val)
336            self.accept(False)
337
338        raise QAPIParseError(self, "documentation comment must end with '##'")
339
340
341class QAPIDoc:
342    """
343    A documentation comment block, either definition or free-form
344
345    Definition documentation blocks consist of
346
347    * a body section: one line naming the definition, followed by an
348      overview (any number of lines)
349
350    * argument sections: a description of each argument (for commands
351      and events) or member (for structs, unions and alternates)
352
353    * features sections: a description of each feature flag
354
355    * additional (non-argument) sections, possibly tagged
356
357    Free-form documentation blocks consist only of a body section.
358    """
359
360    class Section:
361        def __init__(self, parser, name=None, indent=0):
362            # parser, for error messages about indentation
363            self._parser = parser
364            # optional section name (argument/member or section name)
365            self.name = name
366            self.text = ''
367            # the expected indent level of the text of this section
368            self._indent = indent
369
370        def append(self, line):
371            # Strip leading spaces corresponding to the expected indent level
372            # Blank lines are always OK.
373            if line:
374                indent = must_match(r'\s*', line).end()
375                if indent < self._indent:
376                    raise QAPIParseError(
377                        self._parser,
378                        "unexpected de-indent (expected at least %d spaces)" %
379                        self._indent)
380                line = line[self._indent:]
381
382            self.text += line.rstrip() + '\n'
383
384    class ArgSection(Section):
385        def __init__(self, parser, name, indent=0):
386            super().__init__(parser, name, indent)
387            self.member = None
388
389        def connect(self, member):
390            self.member = member
391
392    def __init__(self, parser, info):
393        # self._parser is used to report errors with QAPIParseError.  The
394        # resulting error position depends on the state of the parser.
395        # It happens to be the beginning of the comment.  More or less
396        # servicable, but action at a distance.
397        self._parser = parser
398        self.info = info
399        self.symbol = None
400        self.body = QAPIDoc.Section(parser)
401        # dict mapping parameter name to ArgSection
402        self.args = OrderedDict()
403        self.features = OrderedDict()
404        # a list of Section
405        self.sections = []
406        # the current section
407        self._section = self.body
408        self._append_line = self._append_body_line
409
410    def has_section(self, name):
411        """Return True if we have a section with this name."""
412        for i in self.sections:
413            if i.name == name:
414                return True
415        return False
416
417    def append(self, line):
418        """
419        Parse a comment line and add it to the documentation.
420
421        The way that the line is dealt with depends on which part of
422        the documentation we're parsing right now:
423        * The body section: ._append_line is ._append_body_line
424        * An argument section: ._append_line is ._append_args_line
425        * A features section: ._append_line is ._append_features_line
426        * An additional section: ._append_line is ._append_various_line
427        """
428        line = line[1:]
429        if not line:
430            self._append_freeform(line)
431            return
432
433        if line[0] != ' ':
434            raise QAPIParseError(self._parser, "missing space after #")
435        line = line[1:]
436        self._append_line(line)
437
438    def end_comment(self):
439        self._end_section()
440
441    @staticmethod
442    def _is_section_tag(name):
443        return name in ('Returns:', 'Since:',
444                        # those are often singular or plural
445                        'Note:', 'Notes:',
446                        'Example:', 'Examples:',
447                        'TODO:')
448
449    def _append_body_line(self, line):
450        """
451        Process a line of documentation text in the body section.
452
453        If this a symbol line and it is the section's first line, this
454        is a definition documentation block for that symbol.
455
456        If it's a definition documentation block, another symbol line
457        begins the argument section for the argument named by it, and
458        a section tag begins an additional section.  Start that
459        section and append the line to it.
460
461        Else, append the line to the current section.
462        """
463        name = line.split(' ', 1)[0]
464        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
465        # recognized, and get silently treated as ordinary text
466        if not self.symbol and not self.body.text and line.startswith('@'):
467            if not line.endswith(':'):
468                raise QAPIParseError(self._parser, "line should end with ':'")
469            self.symbol = line[1:-1]
470            # FIXME invalid names other than the empty string aren't flagged
471            if not self.symbol:
472                raise QAPIParseError(self._parser, "invalid name")
473        elif self.symbol:
474            # This is a definition documentation block
475            if name.startswith('@') and name.endswith(':'):
476                self._append_line = self._append_args_line
477                self._append_args_line(line)
478            elif line == 'Features:':
479                self._append_line = self._append_features_line
480            elif self._is_section_tag(name):
481                self._append_line = self._append_various_line
482                self._append_various_line(line)
483            else:
484                self._append_freeform(line)
485        else:
486            # This is a free-form documentation block
487            self._append_freeform(line)
488
489    def _append_args_line(self, line):
490        """
491        Process a line of documentation text in an argument section.
492
493        A symbol line begins the next argument section, a section tag
494        section or a non-indented line after a blank line begins an
495        additional section.  Start that section and append the line to
496        it.
497
498        Else, append the line to the current section.
499
500        """
501        name = line.split(' ', 1)[0]
502
503        if name.startswith('@') and name.endswith(':'):
504            # If line is "@arg:   first line of description", find
505            # the index of 'f', which is the indent we expect for any
506            # following lines.  We then remove the leading "@arg:"
507            # from line and replace it with spaces so that 'f' has the
508            # same index as it did in the original line and can be
509            # handled the same way we will handle following lines.
510            indent = must_match(r'@\S*:\s*', line).end()
511            line = line[indent:]
512            if not line:
513                # Line was just the "@arg:" header; following lines
514                # are not indented
515                indent = 0
516            else:
517                line = ' ' * indent + line
518            self._start_args_section(name[1:-1], indent)
519        elif self._is_section_tag(name):
520            self._append_line = self._append_various_line
521            self._append_various_line(line)
522            return
523        elif (self._section.text.endswith('\n\n')
524              and line and not line[0].isspace()):
525            if line == 'Features:':
526                self._append_line = self._append_features_line
527            else:
528                self._start_section()
529                self._append_line = self._append_various_line
530                self._append_various_line(line)
531            return
532
533        self._append_freeform(line)
534
535    def _append_features_line(self, line):
536        name = line.split(' ', 1)[0]
537
538        if name.startswith('@') and name.endswith(':'):
539            # If line is "@arg:   first line of description", find
540            # the index of 'f', which is the indent we expect for any
541            # following lines.  We then remove the leading "@arg:"
542            # from line and replace it with spaces so that 'f' has the
543            # same index as it did in the original line and can be
544            # handled the same way we will handle following lines.
545            indent = must_match(r'@\S*:\s*', line).end()
546            line = line[indent:]
547            if not line:
548                # Line was just the "@arg:" header; following lines
549                # are not indented
550                indent = 0
551            else:
552                line = ' ' * indent + line
553            self._start_features_section(name[1:-1], indent)
554        elif self._is_section_tag(name):
555            self._append_line = self._append_various_line
556            self._append_various_line(line)
557            return
558        elif (self._section.text.endswith('\n\n')
559              and line and not line[0].isspace()):
560            self._start_section()
561            self._append_line = self._append_various_line
562            self._append_various_line(line)
563            return
564
565        self._append_freeform(line)
566
567    def _append_various_line(self, line):
568        """
569        Process a line of documentation text in an additional section.
570
571        A symbol line is an error.
572
573        A section tag begins an additional section.  Start that
574        section and append the line to it.
575
576        Else, append the line to the current section.
577        """
578        name = line.split(' ', 1)[0]
579
580        if name.startswith('@') and name.endswith(':'):
581            raise QAPIParseError(self._parser,
582                                 "'%s' can't follow '%s' section"
583                                 % (name, self.sections[0].name))
584        if self._is_section_tag(name):
585            # If line is "Section:   first line of description", find
586            # the index of 'f', which is the indent we expect for any
587            # following lines.  We then remove the leading "Section:"
588            # from line and replace it with spaces so that 'f' has the
589            # same index as it did in the original line and can be
590            # handled the same way we will handle following lines.
591            indent = must_match(r'\S*:\s*', line).end()
592            line = line[indent:]
593            if not line:
594                # Line was just the "Section:" header; following lines
595                # are not indented
596                indent = 0
597            else:
598                line = ' ' * indent + line
599            self._start_section(name[:-1], indent)
600
601        self._append_freeform(line)
602
603    def _start_symbol_section(self, symbols_dict, name, indent):
604        # FIXME invalid names other than the empty string aren't flagged
605        if not name:
606            raise QAPIParseError(self._parser, "invalid parameter name")
607        if name in symbols_dict:
608            raise QAPIParseError(self._parser,
609                                 "'%s' parameter name duplicated" % name)
610        assert not self.sections
611        self._end_section()
612        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
613        symbols_dict[name] = self._section
614
615    def _start_args_section(self, name, indent):
616        self._start_symbol_section(self.args, name, indent)
617
618    def _start_features_section(self, name, indent):
619        self._start_symbol_section(self.features, name, indent)
620
621    def _start_section(self, name=None, indent=0):
622        if name in ('Returns', 'Since') and self.has_section(name):
623            raise QAPIParseError(self._parser,
624                                 "duplicated '%s' section" % name)
625        self._end_section()
626        self._section = QAPIDoc.Section(self._parser, name, indent)
627        self.sections.append(self._section)
628
629    def _end_section(self):
630        if self._section:
631            text = self._section.text = self._section.text.strip()
632            if self._section.name and (not text or text.isspace()):
633                raise QAPIParseError(
634                    self._parser,
635                    "empty doc section '%s'" % self._section.name)
636            self._section = None
637
638    def _append_freeform(self, line):
639        match = re.match(r'(@\S+:)', line)
640        if match:
641            raise QAPIParseError(self._parser,
642                                 "'%s' not allowed in free-form documentation"
643                                 % match.group(1))
644        self._section.append(line)
645
646    def connect_member(self, member):
647        if member.name not in self.args:
648            # Undocumented TODO outlaw
649            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
650                                                        member.name)
651        self.args[member.name].connect(member)
652
653    def connect_feature(self, feature):
654        if feature.name not in self.features:
655            raise QAPISemError(feature.info,
656                               "feature '%s' lacks documentation"
657                               % feature.name)
658        self.features[feature.name].connect(feature)
659
660    def check_expr(self, expr):
661        if self.has_section('Returns') and 'command' not in expr:
662            raise QAPISemError(self.info,
663                               "'Returns:' is only valid for commands")
664
665    def check(self):
666
667        def check_args_section(args, info, what):
668            bogus = [name for name, section in args.items()
669                     if not section.member]
670            if bogus:
671                raise QAPISemError(
672                    self.info,
673                    "documented member%s '%s' %s not exist"
674                    % ("s" if len(bogus) > 1 else "",
675                       "', '".join(bogus),
676                       "do" if len(bogus) > 1 else "does"))
677
678        check_args_section(self.args, self.info, 'members')
679        check_args_section(self.features, self.info, 'features')
680