xref: /openbmc/qemu/scripts/qapi/parser.py (revision 3539d84d)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20
21from .error import QAPIParseError, QAPISemError
22from .source import QAPISourceInfo
23
24
25class QAPISchemaParser:
26
27    def __init__(self, fname, previously_included=None, incl_info=None):
28        previously_included = previously_included or set()
29        previously_included.add(os.path.abspath(fname))
30
31        try:
32            fp = open(fname, 'r', encoding='utf-8')
33            self.src = fp.read()
34        except IOError as e:
35            raise QAPISemError(incl_info or QAPISourceInfo(None, None, None),
36                               "can't read %s file '%s': %s"
37                               % ("include" if incl_info else "schema",
38                                  fname,
39                                  e.strerror))
40
41        if self.src == '' or self.src[-1] != '\n':
42            self.src += '\n'
43        self.cursor = 0
44        self.info = QAPISourceInfo(fname, 1, incl_info)
45        self.line_pos = 0
46        self.exprs = []
47        self.docs = []
48        self.accept()
49        cur_doc = None
50
51        while self.tok is not None:
52            info = self.info
53            if self.tok == '#':
54                self.reject_expr_doc(cur_doc)
55                for cur_doc in self.get_doc(info):
56                    self.docs.append(cur_doc)
57                continue
58
59            expr = self.get_expr(False)
60            if 'include' in expr:
61                self.reject_expr_doc(cur_doc)
62                if len(expr) != 1:
63                    raise QAPISemError(info, "invalid 'include' directive")
64                include = expr['include']
65                if not isinstance(include, str):
66                    raise QAPISemError(info,
67                                       "value of 'include' must be a string")
68                incl_fname = os.path.join(os.path.dirname(fname),
69                                          include)
70                self.exprs.append({'expr': {'include': incl_fname},
71                                   'info': info})
72                exprs_include = self._include(include, info, incl_fname,
73                                              previously_included)
74                if exprs_include:
75                    self.exprs.extend(exprs_include.exprs)
76                    self.docs.extend(exprs_include.docs)
77            elif "pragma" in expr:
78                self.reject_expr_doc(cur_doc)
79                if len(expr) != 1:
80                    raise QAPISemError(info, "invalid 'pragma' directive")
81                pragma = expr['pragma']
82                if not isinstance(pragma, dict):
83                    raise QAPISemError(
84                        info, "value of 'pragma' must be an object")
85                for name, value in pragma.items():
86                    self._pragma(name, value, info)
87            else:
88                expr_elem = {'expr': expr,
89                             'info': info}
90                if cur_doc:
91                    if not cur_doc.symbol:
92                        raise QAPISemError(
93                            cur_doc.info, "definition documentation required")
94                    expr_elem['doc'] = cur_doc
95                self.exprs.append(expr_elem)
96            cur_doc = None
97        self.reject_expr_doc(cur_doc)
98
99    @staticmethod
100    def reject_expr_doc(doc):
101        if doc and doc.symbol:
102            raise QAPISemError(
103                doc.info,
104                "documentation for '%s' is not followed by the definition"
105                % doc.symbol)
106
107    def _include(self, include, info, incl_fname, previously_included):
108        incl_abs_fname = os.path.abspath(incl_fname)
109        # catch inclusion cycle
110        inf = info
111        while inf:
112            if incl_abs_fname == os.path.abspath(inf.fname):
113                raise QAPISemError(info, "inclusion loop for %s" % include)
114            inf = inf.parent
115
116        # skip multiple include of the same file
117        if incl_abs_fname in previously_included:
118            return None
119
120        return QAPISchemaParser(incl_fname, previously_included, info)
121
122    def _check_pragma_list_of_str(self, name, value, info):
123        if (not isinstance(value, list)
124                or any([not isinstance(elt, str) for elt in value])):
125            raise QAPISemError(
126                info,
127                "pragma %s must be a list of strings" % name)
128
129    def _pragma(self, name, value, info):
130        if name == 'doc-required':
131            if not isinstance(value, bool):
132                raise QAPISemError(info,
133                                   "pragma 'doc-required' must be boolean")
134            info.pragma.doc_required = value
135        elif name == 'command-name-exceptions':
136            self._check_pragma_list_of_str(name, value, info)
137            info.pragma.command_name_exceptions = value
138        elif name == 'command-returns-exceptions':
139            self._check_pragma_list_of_str(name, value, info)
140            info.pragma.command_returns_exceptions = value
141        elif name == 'member-name-exceptions':
142            self._check_pragma_list_of_str(name, value, info)
143            info.pragma.member_name_exceptions = value
144        else:
145            raise QAPISemError(info, "unknown pragma '%s'" % name)
146
147    def accept(self, skip_comment=True):
148        while True:
149            self.tok = self.src[self.cursor]
150            self.pos = self.cursor
151            self.cursor += 1
152            self.val = None
153
154            if self.tok == '#':
155                if self.src[self.cursor] == '#':
156                    # Start of doc comment
157                    skip_comment = False
158                self.cursor = self.src.find('\n', self.cursor)
159                if not skip_comment:
160                    self.val = self.src[self.pos:self.cursor]
161                    return
162            elif self.tok in '{}:,[]':
163                return
164            elif self.tok == "'":
165                # Note: we accept only printable ASCII
166                string = ''
167                esc = False
168                while True:
169                    ch = self.src[self.cursor]
170                    self.cursor += 1
171                    if ch == '\n':
172                        raise QAPIParseError(self, "missing terminating \"'\"")
173                    if esc:
174                        # Note: we recognize only \\ because we have
175                        # no use for funny characters in strings
176                        if ch != '\\':
177                            raise QAPIParseError(self,
178                                                 "unknown escape \\%s" % ch)
179                        esc = False
180                    elif ch == '\\':
181                        esc = True
182                        continue
183                    elif ch == "'":
184                        self.val = string
185                        return
186                    if ord(ch) < 32 or ord(ch) >= 127:
187                        raise QAPIParseError(
188                            self, "funny character in string")
189                    string += ch
190            elif self.src.startswith('true', self.pos):
191                self.val = True
192                self.cursor += 3
193                return
194            elif self.src.startswith('false', self.pos):
195                self.val = False
196                self.cursor += 4
197                return
198            elif self.tok == '\n':
199                if self.cursor == len(self.src):
200                    self.tok = None
201                    return
202                self.info = self.info.next_line()
203                self.line_pos = self.cursor
204            elif not self.tok.isspace():
205                # Show up to next structural, whitespace or quote
206                # character
207                match = re.match('[^[\\]{}:,\\s\'"]+',
208                                 self.src[self.cursor-1:])
209                raise QAPIParseError(self, "stray '%s'" % match.group(0))
210
211    def get_members(self):
212        expr = OrderedDict()
213        if self.tok == '}':
214            self.accept()
215            return expr
216        if self.tok != "'":
217            raise QAPIParseError(self, "expected string or '}'")
218        while True:
219            key = self.val
220            self.accept()
221            if self.tok != ':':
222                raise QAPIParseError(self, "expected ':'")
223            self.accept()
224            if key in expr:
225                raise QAPIParseError(self, "duplicate key '%s'" % key)
226            expr[key] = self.get_expr(True)
227            if self.tok == '}':
228                self.accept()
229                return expr
230            if self.tok != ',':
231                raise QAPIParseError(self, "expected ',' or '}'")
232            self.accept()
233            if self.tok != "'":
234                raise QAPIParseError(self, "expected string")
235
236    def get_values(self):
237        expr = []
238        if self.tok == ']':
239            self.accept()
240            return expr
241        if self.tok not in "{['tf":
242            raise QAPIParseError(
243                self, "expected '{', '[', ']', string, or boolean")
244        while True:
245            expr.append(self.get_expr(True))
246            if self.tok == ']':
247                self.accept()
248                return expr
249            if self.tok != ',':
250                raise QAPIParseError(self, "expected ',' or ']'")
251            self.accept()
252
253    def get_expr(self, nested):
254        if self.tok != '{' and not nested:
255            raise QAPIParseError(self, "expected '{'")
256        if self.tok == '{':
257            self.accept()
258            expr = self.get_members()
259        elif self.tok == '[':
260            self.accept()
261            expr = self.get_values()
262        elif self.tok in "'tf":
263            expr = self.val
264            self.accept()
265        else:
266            raise QAPIParseError(
267                self, "expected '{', '[', string, or boolean")
268        return expr
269
270    def get_doc(self, info):
271        if self.val != '##':
272            raise QAPIParseError(
273                self, "junk after '##' at start of documentation comment")
274
275        docs = []
276        cur_doc = QAPIDoc(self, info)
277        self.accept(False)
278        while self.tok == '#':
279            if self.val.startswith('##'):
280                # End of doc comment
281                if self.val != '##':
282                    raise QAPIParseError(
283                        self,
284                        "junk after '##' at end of documentation comment")
285                cur_doc.end_comment()
286                docs.append(cur_doc)
287                self.accept()
288                return docs
289            if self.val.startswith('# ='):
290                if cur_doc.symbol:
291                    raise QAPIParseError(
292                        self,
293                        "unexpected '=' markup in definition documentation")
294                if cur_doc.body.text:
295                    cur_doc.end_comment()
296                    docs.append(cur_doc)
297                    cur_doc = QAPIDoc(self, info)
298            cur_doc.append(self.val)
299            self.accept(False)
300
301        raise QAPIParseError(self, "documentation comment must end with '##'")
302
303
304class QAPIDoc:
305    """
306    A documentation comment block, either definition or free-form
307
308    Definition documentation blocks consist of
309
310    * a body section: one line naming the definition, followed by an
311      overview (any number of lines)
312
313    * argument sections: a description of each argument (for commands
314      and events) or member (for structs, unions and alternates)
315
316    * features sections: a description of each feature flag
317
318    * additional (non-argument) sections, possibly tagged
319
320    Free-form documentation blocks consist only of a body section.
321    """
322
323    class Section:
324        def __init__(self, parser, name=None, indent=0):
325            # parser, for error messages about indentation
326            self._parser = parser
327            # optional section name (argument/member or section name)
328            self.name = name
329            self.text = ''
330            # the expected indent level of the text of this section
331            self._indent = indent
332
333        def append(self, line):
334            # Strip leading spaces corresponding to the expected indent level
335            # Blank lines are always OK.
336            if line:
337                indent = re.match(r'\s*', line).end()
338                if indent < self._indent:
339                    raise QAPIParseError(
340                        self._parser,
341                        "unexpected de-indent (expected at least %d spaces)" %
342                        self._indent)
343                line = line[self._indent:]
344
345            self.text += line.rstrip() + '\n'
346
347    class ArgSection(Section):
348        def __init__(self, parser, name, indent=0):
349            super().__init__(parser, name, indent)
350            self.member = None
351
352        def connect(self, member):
353            self.member = member
354
355    def __init__(self, parser, info):
356        # self._parser is used to report errors with QAPIParseError.  The
357        # resulting error position depends on the state of the parser.
358        # It happens to be the beginning of the comment.  More or less
359        # servicable, but action at a distance.
360        self._parser = parser
361        self.info = info
362        self.symbol = None
363        self.body = QAPIDoc.Section(parser)
364        # dict mapping parameter name to ArgSection
365        self.args = OrderedDict()
366        self.features = OrderedDict()
367        # a list of Section
368        self.sections = []
369        # the current section
370        self._section = self.body
371        self._append_line = self._append_body_line
372
373    def has_section(self, name):
374        """Return True if we have a section with this name."""
375        for i in self.sections:
376            if i.name == name:
377                return True
378        return False
379
380    def append(self, line):
381        """
382        Parse a comment line and add it to the documentation.
383
384        The way that the line is dealt with depends on which part of
385        the documentation we're parsing right now:
386        * The body section: ._append_line is ._append_body_line
387        * An argument section: ._append_line is ._append_args_line
388        * A features section: ._append_line is ._append_features_line
389        * An additional section: ._append_line is ._append_various_line
390        """
391        line = line[1:]
392        if not line:
393            self._append_freeform(line)
394            return
395
396        if line[0] != ' ':
397            raise QAPIParseError(self._parser, "missing space after #")
398        line = line[1:]
399        self._append_line(line)
400
401    def end_comment(self):
402        self._end_section()
403
404    @staticmethod
405    def _is_section_tag(name):
406        return name in ('Returns:', 'Since:',
407                        # those are often singular or plural
408                        'Note:', 'Notes:',
409                        'Example:', 'Examples:',
410                        'TODO:')
411
412    def _append_body_line(self, line):
413        """
414        Process a line of documentation text in the body section.
415
416        If this a symbol line and it is the section's first line, this
417        is a definition documentation block for that symbol.
418
419        If it's a definition documentation block, another symbol line
420        begins the argument section for the argument named by it, and
421        a section tag begins an additional section.  Start that
422        section and append the line to it.
423
424        Else, append the line to the current section.
425        """
426        name = line.split(' ', 1)[0]
427        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
428        # recognized, and get silently treated as ordinary text
429        if not self.symbol and not self.body.text and line.startswith('@'):
430            if not line.endswith(':'):
431                raise QAPIParseError(self._parser, "line should end with ':'")
432            self.symbol = line[1:-1]
433            # FIXME invalid names other than the empty string aren't flagged
434            if not self.symbol:
435                raise QAPIParseError(self._parser, "invalid name")
436        elif self.symbol:
437            # This is a definition documentation block
438            if name.startswith('@') and name.endswith(':'):
439                self._append_line = self._append_args_line
440                self._append_args_line(line)
441            elif line == 'Features:':
442                self._append_line = self._append_features_line
443            elif self._is_section_tag(name):
444                self._append_line = self._append_various_line
445                self._append_various_line(line)
446            else:
447                self._append_freeform(line)
448        else:
449            # This is a free-form documentation block
450            self._append_freeform(line)
451
452    def _append_args_line(self, line):
453        """
454        Process a line of documentation text in an argument section.
455
456        A symbol line begins the next argument section, a section tag
457        section or a non-indented line after a blank line begins an
458        additional section.  Start that section and append the line to
459        it.
460
461        Else, append the line to the current section.
462
463        """
464        name = line.split(' ', 1)[0]
465
466        if name.startswith('@') and name.endswith(':'):
467            # If line is "@arg:   first line of description", find
468            # the index of 'f', which is the indent we expect for any
469            # following lines.  We then remove the leading "@arg:"
470            # from line and replace it with spaces so that 'f' has the
471            # same index as it did in the original line and can be
472            # handled the same way we will handle following lines.
473            indent = re.match(r'@\S*:\s*', line).end()
474            line = line[indent:]
475            if not line:
476                # Line was just the "@arg:" header; following lines
477                # are not indented
478                indent = 0
479            else:
480                line = ' ' * indent + line
481            self._start_args_section(name[1:-1], indent)
482        elif self._is_section_tag(name):
483            self._append_line = self._append_various_line
484            self._append_various_line(line)
485            return
486        elif (self._section.text.endswith('\n\n')
487              and line and not line[0].isspace()):
488            if line == 'Features:':
489                self._append_line = self._append_features_line
490            else:
491                self._start_section()
492                self._append_line = self._append_various_line
493                self._append_various_line(line)
494            return
495
496        self._append_freeform(line)
497
498    def _append_features_line(self, line):
499        name = line.split(' ', 1)[0]
500
501        if name.startswith('@') and name.endswith(':'):
502            # If line is "@arg:   first line of description", find
503            # the index of 'f', which is the indent we expect for any
504            # following lines.  We then remove the leading "@arg:"
505            # from line and replace it with spaces so that 'f' has the
506            # same index as it did in the original line and can be
507            # handled the same way we will handle following lines.
508            indent = re.match(r'@\S*:\s*', line).end()
509            line = line[indent:]
510            if not line:
511                # Line was just the "@arg:" header; following lines
512                # are not indented
513                indent = 0
514            else:
515                line = ' ' * indent + line
516            self._start_features_section(name[1:-1], indent)
517        elif self._is_section_tag(name):
518            self._append_line = self._append_various_line
519            self._append_various_line(line)
520            return
521        elif (self._section.text.endswith('\n\n')
522              and line and not line[0].isspace()):
523            self._start_section()
524            self._append_line = self._append_various_line
525            self._append_various_line(line)
526            return
527
528        self._append_freeform(line)
529
530    def _append_various_line(self, line):
531        """
532        Process a line of documentation text in an additional section.
533
534        A symbol line is an error.
535
536        A section tag begins an additional section.  Start that
537        section and append the line to it.
538
539        Else, append the line to the current section.
540        """
541        name = line.split(' ', 1)[0]
542
543        if name.startswith('@') and name.endswith(':'):
544            raise QAPIParseError(self._parser,
545                                 "'%s' can't follow '%s' section"
546                                 % (name, self.sections[0].name))
547        if self._is_section_tag(name):
548            # If line is "Section:   first line of description", find
549            # the index of 'f', which is the indent we expect for any
550            # following lines.  We then remove the leading "Section:"
551            # from line and replace it with spaces so that 'f' has the
552            # same index as it did in the original line and can be
553            # handled the same way we will handle following lines.
554            indent = re.match(r'\S*:\s*', line).end()
555            line = line[indent:]
556            if not line:
557                # Line was just the "Section:" header; following lines
558                # are not indented
559                indent = 0
560            else:
561                line = ' ' * indent + line
562            self._start_section(name[:-1], indent)
563
564        self._append_freeform(line)
565
566    def _start_symbol_section(self, symbols_dict, name, indent):
567        # FIXME invalid names other than the empty string aren't flagged
568        if not name:
569            raise QAPIParseError(self._parser, "invalid parameter name")
570        if name in symbols_dict:
571            raise QAPIParseError(self._parser,
572                                 "'%s' parameter name duplicated" % name)
573        assert not self.sections
574        self._end_section()
575        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
576        symbols_dict[name] = self._section
577
578    def _start_args_section(self, name, indent):
579        self._start_symbol_section(self.args, name, indent)
580
581    def _start_features_section(self, name, indent):
582        self._start_symbol_section(self.features, name, indent)
583
584    def _start_section(self, name=None, indent=0):
585        if name in ('Returns', 'Since') and self.has_section(name):
586            raise QAPIParseError(self._parser,
587                                 "duplicated '%s' section" % name)
588        self._end_section()
589        self._section = QAPIDoc.Section(self._parser, name, indent)
590        self.sections.append(self._section)
591
592    def _end_section(self):
593        if self._section:
594            text = self._section.text = self._section.text.strip()
595            if self._section.name and (not text or text.isspace()):
596                raise QAPIParseError(
597                    self._parser,
598                    "empty doc section '%s'" % self._section.name)
599            self._section = None
600
601    def _append_freeform(self, line):
602        match = re.match(r'(@\S+:)', line)
603        if match:
604            raise QAPIParseError(self._parser,
605                                 "'%s' not allowed in free-form documentation"
606                                 % match.group(1))
607        self._section.append(line)
608
609    def connect_member(self, member):
610        if member.name not in self.args:
611            # Undocumented TODO outlaw
612            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
613                                                        member.name)
614        self.args[member.name].connect(member)
615
616    def connect_feature(self, feature):
617        if feature.name not in self.features:
618            raise QAPISemError(feature.info,
619                               "feature '%s' lacks documentation"
620                               % feature.name)
621        self.features[feature.name].connect(feature)
622
623    def check_expr(self, expr):
624        if self.has_section('Returns') and 'command' not in expr:
625            raise QAPISemError(self.info,
626                               "'Returns:' is only valid for commands")
627
628    def check(self):
629
630        def check_args_section(args, info, what):
631            bogus = [name for name, section in args.items()
632                     if not section.member]
633            if bogus:
634                raise QAPISemError(
635                    self.info,
636                    "documented member%s '%s' %s not exist"
637                    % ("s" if len(bogus) > 1 else "",
638                       "', '".join(bogus),
639                       "do" if len(bogus) > 1 else "does"))
640
641        check_args_section(self.args, self.info, 'members')
642        check_args_section(self.features, self.info, 'features')
643