xref: /openbmc/qemu/scripts/qapi/parser.py (revision b86df374784897c58b965939c9913c2a6c590426)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20
21from .error import QAPIParseError, QAPISemError
22from .source import QAPISourceInfo
23
24
25class QAPISchemaParser:
26
27    def __init__(self, fname, previously_included=None, incl_info=None):
28        previously_included = previously_included or set()
29        previously_included.add(os.path.abspath(fname))
30
31        try:
32            fp = open(fname, 'r', encoding='utf-8')
33            self.src = fp.read()
34        except IOError as e:
35            raise QAPISemError(incl_info or QAPISourceInfo(None, None, None),
36                               "can't read %s file '%s': %s"
37                               % ("include" if incl_info else "schema",
38                                  fname,
39                                  e.strerror))
40
41        if self.src == '' or self.src[-1] != '\n':
42            self.src += '\n'
43        self.cursor = 0
44        self.info = QAPISourceInfo(fname, 1, incl_info)
45        self.line_pos = 0
46        self.exprs = []
47        self.docs = []
48        self.accept()
49        cur_doc = None
50
51        while self.tok is not None:
52            info = self.info
53            if self.tok == '#':
54                self.reject_expr_doc(cur_doc)
55                for cur_doc in self.get_doc(info):
56                    self.docs.append(cur_doc)
57                continue
58
59            expr = self.get_expr(False)
60            if 'include' in expr:
61                self.reject_expr_doc(cur_doc)
62                if len(expr) != 1:
63                    raise QAPISemError(info, "invalid 'include' directive")
64                include = expr['include']
65                if not isinstance(include, str):
66                    raise QAPISemError(info,
67                                       "value of 'include' must be a string")
68                incl_fname = os.path.join(os.path.dirname(fname),
69                                          include)
70                self.exprs.append({'expr': {'include': incl_fname},
71                                   'info': info})
72                exprs_include = self._include(include, info, incl_fname,
73                                              previously_included)
74                if exprs_include:
75                    self.exprs.extend(exprs_include.exprs)
76                    self.docs.extend(exprs_include.docs)
77            elif "pragma" in expr:
78                self.reject_expr_doc(cur_doc)
79                if len(expr) != 1:
80                    raise QAPISemError(info, "invalid 'pragma' directive")
81                pragma = expr['pragma']
82                if not isinstance(pragma, dict):
83                    raise QAPISemError(
84                        info, "value of 'pragma' must be an object")
85                for name, value in pragma.items():
86                    self._pragma(name, value, info)
87            else:
88                expr_elem = {'expr': expr,
89                             'info': info}
90                if cur_doc:
91                    if not cur_doc.symbol:
92                        raise QAPISemError(
93                            cur_doc.info, "definition documentation required")
94                    expr_elem['doc'] = cur_doc
95                self.exprs.append(expr_elem)
96            cur_doc = None
97        self.reject_expr_doc(cur_doc)
98
99    @staticmethod
100    def reject_expr_doc(doc):
101        if doc and doc.symbol:
102            raise QAPISemError(
103                doc.info,
104                "documentation for '%s' is not followed by the definition"
105                % doc.symbol)
106
107    def _include(self, include, info, incl_fname, previously_included):
108        incl_abs_fname = os.path.abspath(incl_fname)
109        # catch inclusion cycle
110        inf = info
111        while inf:
112            if incl_abs_fname == os.path.abspath(inf.fname):
113                raise QAPISemError(info, "inclusion loop for %s" % include)
114            inf = inf.parent
115
116        # skip multiple include of the same file
117        if incl_abs_fname in previously_included:
118            return None
119
120        return QAPISchemaParser(incl_fname, previously_included, info)
121
122    def _check_pragma_list_of_str(self, name, value, info):
123        if (not isinstance(value, list)
124                or any([not isinstance(elt, str) for elt in value])):
125            raise QAPISemError(
126                info,
127                "pragma %s must be a list of strings" % name)
128
129    def _pragma(self, name, value, info):
130        if name == 'doc-required':
131            if not isinstance(value, bool):
132                raise QAPISemError(info,
133                                   "pragma 'doc-required' must be boolean")
134            info.pragma.doc_required = value
135        elif name == 'command-returns-exceptions':
136            self._check_pragma_list_of_str(name, value, info)
137            info.pragma.command_returns_exceptions = value
138        elif name == 'member-name-exceptions':
139            self._check_pragma_list_of_str(name, value, info)
140            info.pragma.member_name_exceptions = value
141        else:
142            raise QAPISemError(info, "unknown pragma '%s'" % name)
143
144    def accept(self, skip_comment=True):
145        while True:
146            self.tok = self.src[self.cursor]
147            self.pos = self.cursor
148            self.cursor += 1
149            self.val = None
150
151            if self.tok == '#':
152                if self.src[self.cursor] == '#':
153                    # Start of doc comment
154                    skip_comment = False
155                self.cursor = self.src.find('\n', self.cursor)
156                if not skip_comment:
157                    self.val = self.src[self.pos:self.cursor]
158                    return
159            elif self.tok in '{}:,[]':
160                return
161            elif self.tok == "'":
162                # Note: we accept only printable ASCII
163                string = ''
164                esc = False
165                while True:
166                    ch = self.src[self.cursor]
167                    self.cursor += 1
168                    if ch == '\n':
169                        raise QAPIParseError(self, "missing terminating \"'\"")
170                    if esc:
171                        # Note: we recognize only \\ because we have
172                        # no use for funny characters in strings
173                        if ch != '\\':
174                            raise QAPIParseError(self,
175                                                 "unknown escape \\%s" % ch)
176                        esc = False
177                    elif ch == '\\':
178                        esc = True
179                        continue
180                    elif ch == "'":
181                        self.val = string
182                        return
183                    if ord(ch) < 32 or ord(ch) >= 127:
184                        raise QAPIParseError(
185                            self, "funny character in string")
186                    string += ch
187            elif self.src.startswith('true', self.pos):
188                self.val = True
189                self.cursor += 3
190                return
191            elif self.src.startswith('false', self.pos):
192                self.val = False
193                self.cursor += 4
194                return
195            elif self.tok == '\n':
196                if self.cursor == len(self.src):
197                    self.tok = None
198                    return
199                self.info = self.info.next_line()
200                self.line_pos = self.cursor
201            elif not self.tok.isspace():
202                # Show up to next structural, whitespace or quote
203                # character
204                match = re.match('[^[\\]{}:,\\s\'"]+',
205                                 self.src[self.cursor-1:])
206                raise QAPIParseError(self, "stray '%s'" % match.group(0))
207
208    def get_members(self):
209        expr = OrderedDict()
210        if self.tok == '}':
211            self.accept()
212            return expr
213        if self.tok != "'":
214            raise QAPIParseError(self, "expected string or '}'")
215        while True:
216            key = self.val
217            self.accept()
218            if self.tok != ':':
219                raise QAPIParseError(self, "expected ':'")
220            self.accept()
221            if key in expr:
222                raise QAPIParseError(self, "duplicate key '%s'" % key)
223            expr[key] = self.get_expr(True)
224            if self.tok == '}':
225                self.accept()
226                return expr
227            if self.tok != ',':
228                raise QAPIParseError(self, "expected ',' or '}'")
229            self.accept()
230            if self.tok != "'":
231                raise QAPIParseError(self, "expected string")
232
233    def get_values(self):
234        expr = []
235        if self.tok == ']':
236            self.accept()
237            return expr
238        if self.tok not in "{['tf":
239            raise QAPIParseError(
240                self, "expected '{', '[', ']', string, or boolean")
241        while True:
242            expr.append(self.get_expr(True))
243            if self.tok == ']':
244                self.accept()
245                return expr
246            if self.tok != ',':
247                raise QAPIParseError(self, "expected ',' or ']'")
248            self.accept()
249
250    def get_expr(self, nested):
251        if self.tok != '{' and not nested:
252            raise QAPIParseError(self, "expected '{'")
253        if self.tok == '{':
254            self.accept()
255            expr = self.get_members()
256        elif self.tok == '[':
257            self.accept()
258            expr = self.get_values()
259        elif self.tok in "'tf":
260            expr = self.val
261            self.accept()
262        else:
263            raise QAPIParseError(
264                self, "expected '{', '[', string, or boolean")
265        return expr
266
267    def get_doc(self, info):
268        if self.val != '##':
269            raise QAPIParseError(
270                self, "junk after '##' at start of documentation comment")
271
272        docs = []
273        cur_doc = QAPIDoc(self, info)
274        self.accept(False)
275        while self.tok == '#':
276            if self.val.startswith('##'):
277                # End of doc comment
278                if self.val != '##':
279                    raise QAPIParseError(
280                        self,
281                        "junk after '##' at end of documentation comment")
282                cur_doc.end_comment()
283                docs.append(cur_doc)
284                self.accept()
285                return docs
286            if self.val.startswith('# ='):
287                if cur_doc.symbol:
288                    raise QAPIParseError(
289                        self,
290                        "unexpected '=' markup in definition documentation")
291                if cur_doc.body.text:
292                    cur_doc.end_comment()
293                    docs.append(cur_doc)
294                    cur_doc = QAPIDoc(self, info)
295            cur_doc.append(self.val)
296            self.accept(False)
297
298        raise QAPIParseError(self, "documentation comment must end with '##'")
299
300
301class QAPIDoc:
302    """
303    A documentation comment block, either definition or free-form
304
305    Definition documentation blocks consist of
306
307    * a body section: one line naming the definition, followed by an
308      overview (any number of lines)
309
310    * argument sections: a description of each argument (for commands
311      and events) or member (for structs, unions and alternates)
312
313    * features sections: a description of each feature flag
314
315    * additional (non-argument) sections, possibly tagged
316
317    Free-form documentation blocks consist only of a body section.
318    """
319
320    class Section:
321        def __init__(self, parser, name=None, indent=0):
322            # parser, for error messages about indentation
323            self._parser = parser
324            # optional section name (argument/member or section name)
325            self.name = name
326            self.text = ''
327            # the expected indent level of the text of this section
328            self._indent = indent
329
330        def append(self, line):
331            # Strip leading spaces corresponding to the expected indent level
332            # Blank lines are always OK.
333            if line:
334                indent = re.match(r'\s*', line).end()
335                if indent < self._indent:
336                    raise QAPIParseError(
337                        self._parser,
338                        "unexpected de-indent (expected at least %d spaces)" %
339                        self._indent)
340                line = line[self._indent:]
341
342            self.text += line.rstrip() + '\n'
343
344    class ArgSection(Section):
345        def __init__(self, parser, name, indent=0):
346            super().__init__(parser, name, indent)
347            self.member = None
348
349        def connect(self, member):
350            self.member = member
351
352    def __init__(self, parser, info):
353        # self._parser is used to report errors with QAPIParseError.  The
354        # resulting error position depends on the state of the parser.
355        # It happens to be the beginning of the comment.  More or less
356        # servicable, but action at a distance.
357        self._parser = parser
358        self.info = info
359        self.symbol = None
360        self.body = QAPIDoc.Section(parser)
361        # dict mapping parameter name to ArgSection
362        self.args = OrderedDict()
363        self.features = OrderedDict()
364        # a list of Section
365        self.sections = []
366        # the current section
367        self._section = self.body
368        self._append_line = self._append_body_line
369
370    def has_section(self, name):
371        """Return True if we have a section with this name."""
372        for i in self.sections:
373            if i.name == name:
374                return True
375        return False
376
377    def append(self, line):
378        """
379        Parse a comment line and add it to the documentation.
380
381        The way that the line is dealt with depends on which part of
382        the documentation we're parsing right now:
383        * The body section: ._append_line is ._append_body_line
384        * An argument section: ._append_line is ._append_args_line
385        * A features section: ._append_line is ._append_features_line
386        * An additional section: ._append_line is ._append_various_line
387        """
388        line = line[1:]
389        if not line:
390            self._append_freeform(line)
391            return
392
393        if line[0] != ' ':
394            raise QAPIParseError(self._parser, "missing space after #")
395        line = line[1:]
396        self._append_line(line)
397
398    def end_comment(self):
399        self._end_section()
400
401    @staticmethod
402    def _is_section_tag(name):
403        return name in ('Returns:', 'Since:',
404                        # those are often singular or plural
405                        'Note:', 'Notes:',
406                        'Example:', 'Examples:',
407                        'TODO:')
408
409    def _append_body_line(self, line):
410        """
411        Process a line of documentation text in the body section.
412
413        If this a symbol line and it is the section's first line, this
414        is a definition documentation block for that symbol.
415
416        If it's a definition documentation block, another symbol line
417        begins the argument section for the argument named by it, and
418        a section tag begins an additional section.  Start that
419        section and append the line to it.
420
421        Else, append the line to the current section.
422        """
423        name = line.split(' ', 1)[0]
424        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
425        # recognized, and get silently treated as ordinary text
426        if not self.symbol and not self.body.text and line.startswith('@'):
427            if not line.endswith(':'):
428                raise QAPIParseError(self._parser, "line should end with ':'")
429            self.symbol = line[1:-1]
430            # FIXME invalid names other than the empty string aren't flagged
431            if not self.symbol:
432                raise QAPIParseError(self._parser, "invalid name")
433        elif self.symbol:
434            # This is a definition documentation block
435            if name.startswith('@') and name.endswith(':'):
436                self._append_line = self._append_args_line
437                self._append_args_line(line)
438            elif line == 'Features:':
439                self._append_line = self._append_features_line
440            elif self._is_section_tag(name):
441                self._append_line = self._append_various_line
442                self._append_various_line(line)
443            else:
444                self._append_freeform(line)
445        else:
446            # This is a free-form documentation block
447            self._append_freeform(line)
448
449    def _append_args_line(self, line):
450        """
451        Process a line of documentation text in an argument section.
452
453        A symbol line begins the next argument section, a section tag
454        section or a non-indented line after a blank line begins an
455        additional section.  Start that section and append the line to
456        it.
457
458        Else, append the line to the current section.
459
460        """
461        name = line.split(' ', 1)[0]
462
463        if name.startswith('@') and name.endswith(':'):
464            # If line is "@arg:   first line of description", find
465            # the index of 'f', which is the indent we expect for any
466            # following lines.  We then remove the leading "@arg:"
467            # from line and replace it with spaces so that 'f' has the
468            # same index as it did in the original line and can be
469            # handled the same way we will handle following lines.
470            indent = re.match(r'@\S*:\s*', line).end()
471            line = line[indent:]
472            if not line:
473                # Line was just the "@arg:" header; following lines
474                # are not indented
475                indent = 0
476            else:
477                line = ' ' * indent + line
478            self._start_args_section(name[1:-1], indent)
479        elif self._is_section_tag(name):
480            self._append_line = self._append_various_line
481            self._append_various_line(line)
482            return
483        elif (self._section.text.endswith('\n\n')
484              and line and not line[0].isspace()):
485            if line == 'Features:':
486                self._append_line = self._append_features_line
487            else:
488                self._start_section()
489                self._append_line = self._append_various_line
490                self._append_various_line(line)
491            return
492
493        self._append_freeform(line)
494
495    def _append_features_line(self, line):
496        name = line.split(' ', 1)[0]
497
498        if name.startswith('@') and name.endswith(':'):
499            # If line is "@arg:   first line of description", find
500            # the index of 'f', which is the indent we expect for any
501            # following lines.  We then remove the leading "@arg:"
502            # from line and replace it with spaces so that 'f' has the
503            # same index as it did in the original line and can be
504            # handled the same way we will handle following lines.
505            indent = re.match(r'@\S*:\s*', line).end()
506            line = line[indent:]
507            if not line:
508                # Line was just the "@arg:" header; following lines
509                # are not indented
510                indent = 0
511            else:
512                line = ' ' * indent + line
513            self._start_features_section(name[1:-1], indent)
514        elif self._is_section_tag(name):
515            self._append_line = self._append_various_line
516            self._append_various_line(line)
517            return
518        elif (self._section.text.endswith('\n\n')
519              and line and not line[0].isspace()):
520            self._start_section()
521            self._append_line = self._append_various_line
522            self._append_various_line(line)
523            return
524
525        self._append_freeform(line)
526
527    def _append_various_line(self, line):
528        """
529        Process a line of documentation text in an additional section.
530
531        A symbol line is an error.
532
533        A section tag begins an additional section.  Start that
534        section and append the line to it.
535
536        Else, append the line to the current section.
537        """
538        name = line.split(' ', 1)[0]
539
540        if name.startswith('@') and name.endswith(':'):
541            raise QAPIParseError(self._parser,
542                                 "'%s' can't follow '%s' section"
543                                 % (name, self.sections[0].name))
544        if self._is_section_tag(name):
545            # If line is "Section:   first line of description", find
546            # the index of 'f', which is the indent we expect for any
547            # following lines.  We then remove the leading "Section:"
548            # from line and replace it with spaces so that 'f' has the
549            # same index as it did in the original line and can be
550            # handled the same way we will handle following lines.
551            indent = re.match(r'\S*:\s*', line).end()
552            line = line[indent:]
553            if not line:
554                # Line was just the "Section:" header; following lines
555                # are not indented
556                indent = 0
557            else:
558                line = ' ' * indent + line
559            self._start_section(name[:-1], indent)
560
561        self._append_freeform(line)
562
563    def _start_symbol_section(self, symbols_dict, name, indent):
564        # FIXME invalid names other than the empty string aren't flagged
565        if not name:
566            raise QAPIParseError(self._parser, "invalid parameter name")
567        if name in symbols_dict:
568            raise QAPIParseError(self._parser,
569                                 "'%s' parameter name duplicated" % name)
570        assert not self.sections
571        self._end_section()
572        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
573        symbols_dict[name] = self._section
574
575    def _start_args_section(self, name, indent):
576        self._start_symbol_section(self.args, name, indent)
577
578    def _start_features_section(self, name, indent):
579        self._start_symbol_section(self.features, name, indent)
580
581    def _start_section(self, name=None, indent=0):
582        if name in ('Returns', 'Since') and self.has_section(name):
583            raise QAPIParseError(self._parser,
584                                 "duplicated '%s' section" % name)
585        self._end_section()
586        self._section = QAPIDoc.Section(self._parser, name, indent)
587        self.sections.append(self._section)
588
589    def _end_section(self):
590        if self._section:
591            text = self._section.text = self._section.text.strip()
592            if self._section.name and (not text or text.isspace()):
593                raise QAPIParseError(
594                    self._parser,
595                    "empty doc section '%s'" % self._section.name)
596            self._section = None
597
598    def _append_freeform(self, line):
599        match = re.match(r'(@\S+:)', line)
600        if match:
601            raise QAPIParseError(self._parser,
602                                 "'%s' not allowed in free-form documentation"
603                                 % match.group(1))
604        self._section.append(line)
605
606    def connect_member(self, member):
607        if member.name not in self.args:
608            # Undocumented TODO outlaw
609            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
610                                                        member.name)
611        self.args[member.name].connect(member)
612
613    def connect_feature(self, feature):
614        if feature.name not in self.features:
615            raise QAPISemError(feature.info,
616                               "feature '%s' lacks documentation"
617                               % feature.name)
618        self.features[feature.name].connect(feature)
619
620    def check_expr(self, expr):
621        if self.has_section('Returns') and 'command' not in expr:
622            raise QAPISemError(self.info,
623                               "'Returns:' is only valid for commands")
624
625    def check(self):
626
627        def check_args_section(args, info, what):
628            bogus = [name for name, section in args.items()
629                     if not section.member]
630            if bogus:
631                raise QAPISemError(
632                    self.info,
633                    "documented member%s '%s' %s not exist"
634                    % ("s" if len(bogus) > 1 else "",
635                       "', '".join(bogus),
636                       "do" if len(bogus) > 1 else "does"))
637
638        check_args_section(self.args, self.info, 'members')
639        check_args_section(self.features, self.info, 'features')
640