xref: /openbmc/qemu/scripts/qapi/parser.py (revision 9cd0205d553bc27a66454782dfc5d7e8d2324e34)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20
21from .error import QAPISemError, QAPISourceError
22from .source import QAPISourceInfo
23
24
25class QAPIParseError(QAPISourceError):
26    """Error class for all QAPI schema parsing errors."""
27    def __init__(self, parser, msg):
28        col = 1
29        for ch in parser.src[parser.line_pos:parser.pos]:
30            if ch == '\t':
31                col = (col + 7) % 8 + 1
32            else:
33                col += 1
34        super().__init__(parser.info, msg, col)
35
36
37class QAPISchemaParser:
38
39    def __init__(self, fname, previously_included=None, incl_info=None):
40        self._fname = fname
41        self._included = previously_included or set()
42        self._included.add(os.path.abspath(self._fname))
43        self.src = ''
44
45        # Lexer state (see `accept` for details):
46        self.info = QAPISourceInfo(self._fname, incl_info)
47        self.tok = None
48        self.pos = 0
49        self.cursor = 0
50        self.val = None
51        self.line_pos = 0
52
53        # Parser output:
54        self.exprs = []
55        self.docs = []
56
57        # Showtime!
58        self._parse()
59
60    def _parse(self):
61        cur_doc = None
62
63        # May raise OSError; allow the caller to handle it.
64        with open(self._fname, 'r', encoding='utf-8') as fp:
65            self.src = fp.read()
66        if self.src == '' or self.src[-1] != '\n':
67            self.src += '\n'
68
69        # Prime the lexer:
70        self.accept()
71
72        # Parse until done:
73        while self.tok is not None:
74            info = self.info
75            if self.tok == '#':
76                self.reject_expr_doc(cur_doc)
77                for cur_doc in self.get_doc(info):
78                    self.docs.append(cur_doc)
79                continue
80
81            expr = self.get_expr()
82            if not isinstance(expr, dict):
83                raise QAPISemError(
84                    info, "top-level expression must be an object")
85
86            if 'include' in expr:
87                self.reject_expr_doc(cur_doc)
88                if len(expr) != 1:
89                    raise QAPISemError(info, "invalid 'include' directive")
90                include = expr['include']
91                if not isinstance(include, str):
92                    raise QAPISemError(info,
93                                       "value of 'include' must be a string")
94                incl_fname = os.path.join(os.path.dirname(self._fname),
95                                          include)
96                self.exprs.append({'expr': {'include': incl_fname},
97                                   'info': info})
98                exprs_include = self._include(include, info, incl_fname,
99                                              self._included)
100                if exprs_include:
101                    self.exprs.extend(exprs_include.exprs)
102                    self.docs.extend(exprs_include.docs)
103            elif "pragma" in expr:
104                self.reject_expr_doc(cur_doc)
105                if len(expr) != 1:
106                    raise QAPISemError(info, "invalid 'pragma' directive")
107                pragma = expr['pragma']
108                if not isinstance(pragma, dict):
109                    raise QAPISemError(
110                        info, "value of 'pragma' must be an object")
111                for name, value in pragma.items():
112                    self._pragma(name, value, info)
113            else:
114                expr_elem = {'expr': expr,
115                             'info': info}
116                if cur_doc:
117                    if not cur_doc.symbol:
118                        raise QAPISemError(
119                            cur_doc.info, "definition documentation required")
120                    expr_elem['doc'] = cur_doc
121                self.exprs.append(expr_elem)
122            cur_doc = None
123        self.reject_expr_doc(cur_doc)
124
125    @staticmethod
126    def reject_expr_doc(doc):
127        if doc and doc.symbol:
128            raise QAPISemError(
129                doc.info,
130                "documentation for '%s' is not followed by the definition"
131                % doc.symbol)
132
133    def _include(self, include, info, incl_fname, previously_included):
134        incl_abs_fname = os.path.abspath(incl_fname)
135        # catch inclusion cycle
136        inf = info
137        while inf:
138            if incl_abs_fname == os.path.abspath(inf.fname):
139                raise QAPISemError(info, "inclusion loop for %s" % include)
140            inf = inf.parent
141
142        # skip multiple include of the same file
143        if incl_abs_fname in previously_included:
144            return None
145
146        try:
147            return QAPISchemaParser(incl_fname, previously_included, info)
148        except OSError as err:
149            raise QAPISemError(
150                info,
151                f"can't read include file '{incl_fname}': {err.strerror}"
152            ) from err
153
154    def _check_pragma_list_of_str(self, name, value, info):
155        if (not isinstance(value, list)
156                or any([not isinstance(elt, str) for elt in value])):
157            raise QAPISemError(
158                info,
159                "pragma %s must be a list of strings" % name)
160
161    def _pragma(self, name, value, info):
162        if name == 'doc-required':
163            if not isinstance(value, bool):
164                raise QAPISemError(info,
165                                   "pragma 'doc-required' must be boolean")
166            info.pragma.doc_required = value
167        elif name == 'command-name-exceptions':
168            self._check_pragma_list_of_str(name, value, info)
169            info.pragma.command_name_exceptions = value
170        elif name == 'command-returns-exceptions':
171            self._check_pragma_list_of_str(name, value, info)
172            info.pragma.command_returns_exceptions = value
173        elif name == 'member-name-exceptions':
174            self._check_pragma_list_of_str(name, value, info)
175            info.pragma.member_name_exceptions = value
176        else:
177            raise QAPISemError(info, "unknown pragma '%s'" % name)
178
179    def accept(self, skip_comment=True):
180        while True:
181            self.tok = self.src[self.cursor]
182            self.pos = self.cursor
183            self.cursor += 1
184            self.val = None
185
186            if self.tok == '#':
187                if self.src[self.cursor] == '#':
188                    # Start of doc comment
189                    skip_comment = False
190                self.cursor = self.src.find('\n', self.cursor)
191                if not skip_comment:
192                    self.val = self.src[self.pos:self.cursor]
193                    return
194            elif self.tok in '{}:,[]':
195                return
196            elif self.tok == "'":
197                # Note: we accept only printable ASCII
198                string = ''
199                esc = False
200                while True:
201                    ch = self.src[self.cursor]
202                    self.cursor += 1
203                    if ch == '\n':
204                        raise QAPIParseError(self, "missing terminating \"'\"")
205                    if esc:
206                        # Note: we recognize only \\ because we have
207                        # no use for funny characters in strings
208                        if ch != '\\':
209                            raise QAPIParseError(self,
210                                                 "unknown escape \\%s" % ch)
211                        esc = False
212                    elif ch == '\\':
213                        esc = True
214                        continue
215                    elif ch == "'":
216                        self.val = string
217                        return
218                    if ord(ch) < 32 or ord(ch) >= 127:
219                        raise QAPIParseError(
220                            self, "funny character in string")
221                    string += ch
222            elif self.src.startswith('true', self.pos):
223                self.val = True
224                self.cursor += 3
225                return
226            elif self.src.startswith('false', self.pos):
227                self.val = False
228                self.cursor += 4
229                return
230            elif self.tok == '\n':
231                if self.cursor == len(self.src):
232                    self.tok = None
233                    return
234                self.info = self.info.next_line()
235                self.line_pos = self.cursor
236            elif not self.tok.isspace():
237                # Show up to next structural, whitespace or quote
238                # character
239                match = re.match('[^[\\]{}:,\\s\'"]+',
240                                 self.src[self.cursor-1:])
241                raise QAPIParseError(self, "stray '%s'" % match.group(0))
242
243    def get_members(self):
244        expr = OrderedDict()
245        if self.tok == '}':
246            self.accept()
247            return expr
248        if self.tok != "'":
249            raise QAPIParseError(self, "expected string or '}'")
250        while True:
251            key = self.val
252            self.accept()
253            if self.tok != ':':
254                raise QAPIParseError(self, "expected ':'")
255            self.accept()
256            if key in expr:
257                raise QAPIParseError(self, "duplicate key '%s'" % key)
258            expr[key] = self.get_expr()
259            if self.tok == '}':
260                self.accept()
261                return expr
262            if self.tok != ',':
263                raise QAPIParseError(self, "expected ',' or '}'")
264            self.accept()
265            if self.tok != "'":
266                raise QAPIParseError(self, "expected string")
267
268    def get_values(self):
269        expr = []
270        if self.tok == ']':
271            self.accept()
272            return expr
273        if self.tok not in "{['tf":
274            raise QAPIParseError(
275                self, "expected '{', '[', ']', string, or boolean")
276        while True:
277            expr.append(self.get_expr())
278            if self.tok == ']':
279                self.accept()
280                return expr
281            if self.tok != ',':
282                raise QAPIParseError(self, "expected ',' or ']'")
283            self.accept()
284
285    def get_expr(self):
286        if self.tok == '{':
287            self.accept()
288            expr = self.get_members()
289        elif self.tok == '[':
290            self.accept()
291            expr = self.get_values()
292        elif self.tok in "'tf":
293            expr = self.val
294            self.accept()
295        else:
296            raise QAPIParseError(
297                self, "expected '{', '[', string, or boolean")
298        return expr
299
300    def get_doc(self, info):
301        if self.val != '##':
302            raise QAPIParseError(
303                self, "junk after '##' at start of documentation comment")
304
305        docs = []
306        cur_doc = QAPIDoc(self, info)
307        self.accept(False)
308        while self.tok == '#':
309            assert isinstance(self.val, str)
310            if self.val.startswith('##'):
311                # End of doc comment
312                if self.val != '##':
313                    raise QAPIParseError(
314                        self,
315                        "junk after '##' at end of documentation comment")
316                cur_doc.end_comment()
317                docs.append(cur_doc)
318                self.accept()
319                return docs
320            if self.val.startswith('# ='):
321                if cur_doc.symbol:
322                    raise QAPIParseError(
323                        self,
324                        "unexpected '=' markup in definition documentation")
325                if cur_doc.body.text:
326                    cur_doc.end_comment()
327                    docs.append(cur_doc)
328                    cur_doc = QAPIDoc(self, info)
329            cur_doc.append(self.val)
330            self.accept(False)
331
332        raise QAPIParseError(self, "documentation comment must end with '##'")
333
334
335class QAPIDoc:
336    """
337    A documentation comment block, either definition or free-form
338
339    Definition documentation blocks consist of
340
341    * a body section: one line naming the definition, followed by an
342      overview (any number of lines)
343
344    * argument sections: a description of each argument (for commands
345      and events) or member (for structs, unions and alternates)
346
347    * features sections: a description of each feature flag
348
349    * additional (non-argument) sections, possibly tagged
350
351    Free-form documentation blocks consist only of a body section.
352    """
353
354    class Section:
355        def __init__(self, parser, name=None, indent=0):
356            # parser, for error messages about indentation
357            self._parser = parser
358            # optional section name (argument/member or section name)
359            self.name = name
360            self.text = ''
361            # the expected indent level of the text of this section
362            self._indent = indent
363
364        def append(self, line):
365            # Strip leading spaces corresponding to the expected indent level
366            # Blank lines are always OK.
367            if line:
368                indent = re.match(r'\s*', line).end()
369                if indent < self._indent:
370                    raise QAPIParseError(
371                        self._parser,
372                        "unexpected de-indent (expected at least %d spaces)" %
373                        self._indent)
374                line = line[self._indent:]
375
376            self.text += line.rstrip() + '\n'
377
378    class ArgSection(Section):
379        def __init__(self, parser, name, indent=0):
380            super().__init__(parser, name, indent)
381            self.member = None
382
383        def connect(self, member):
384            self.member = member
385
386    def __init__(self, parser, info):
387        # self._parser is used to report errors with QAPIParseError.  The
388        # resulting error position depends on the state of the parser.
389        # It happens to be the beginning of the comment.  More or less
390        # servicable, but action at a distance.
391        self._parser = parser
392        self.info = info
393        self.symbol = None
394        self.body = QAPIDoc.Section(parser)
395        # dict mapping parameter name to ArgSection
396        self.args = OrderedDict()
397        self.features = OrderedDict()
398        # a list of Section
399        self.sections = []
400        # the current section
401        self._section = self.body
402        self._append_line = self._append_body_line
403
404    def has_section(self, name):
405        """Return True if we have a section with this name."""
406        for i in self.sections:
407            if i.name == name:
408                return True
409        return False
410
411    def append(self, line):
412        """
413        Parse a comment line and add it to the documentation.
414
415        The way that the line is dealt with depends on which part of
416        the documentation we're parsing right now:
417        * The body section: ._append_line is ._append_body_line
418        * An argument section: ._append_line is ._append_args_line
419        * A features section: ._append_line is ._append_features_line
420        * An additional section: ._append_line is ._append_various_line
421        """
422        line = line[1:]
423        if not line:
424            self._append_freeform(line)
425            return
426
427        if line[0] != ' ':
428            raise QAPIParseError(self._parser, "missing space after #")
429        line = line[1:]
430        self._append_line(line)
431
432    def end_comment(self):
433        self._end_section()
434
435    @staticmethod
436    def _is_section_tag(name):
437        return name in ('Returns:', 'Since:',
438                        # those are often singular or plural
439                        'Note:', 'Notes:',
440                        'Example:', 'Examples:',
441                        'TODO:')
442
443    def _append_body_line(self, line):
444        """
445        Process a line of documentation text in the body section.
446
447        If this a symbol line and it is the section's first line, this
448        is a definition documentation block for that symbol.
449
450        If it's a definition documentation block, another symbol line
451        begins the argument section for the argument named by it, and
452        a section tag begins an additional section.  Start that
453        section and append the line to it.
454
455        Else, append the line to the current section.
456        """
457        name = line.split(' ', 1)[0]
458        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
459        # recognized, and get silently treated as ordinary text
460        if not self.symbol and not self.body.text and line.startswith('@'):
461            if not line.endswith(':'):
462                raise QAPIParseError(self._parser, "line should end with ':'")
463            self.symbol = line[1:-1]
464            # FIXME invalid names other than the empty string aren't flagged
465            if not self.symbol:
466                raise QAPIParseError(self._parser, "invalid name")
467        elif self.symbol:
468            # This is a definition documentation block
469            if name.startswith('@') and name.endswith(':'):
470                self._append_line = self._append_args_line
471                self._append_args_line(line)
472            elif line == 'Features:':
473                self._append_line = self._append_features_line
474            elif self._is_section_tag(name):
475                self._append_line = self._append_various_line
476                self._append_various_line(line)
477            else:
478                self._append_freeform(line)
479        else:
480            # This is a free-form documentation block
481            self._append_freeform(line)
482
483    def _append_args_line(self, line):
484        """
485        Process a line of documentation text in an argument section.
486
487        A symbol line begins the next argument section, a section tag
488        section or a non-indented line after a blank line begins an
489        additional section.  Start that section and append the line to
490        it.
491
492        Else, append the line to the current section.
493
494        """
495        name = line.split(' ', 1)[0]
496
497        if name.startswith('@') and name.endswith(':'):
498            # If line is "@arg:   first line of description", find
499            # the index of 'f', which is the indent we expect for any
500            # following lines.  We then remove the leading "@arg:"
501            # from line and replace it with spaces so that 'f' has the
502            # same index as it did in the original line and can be
503            # handled the same way we will handle following lines.
504            indent = re.match(r'@\S*:\s*', line).end()
505            line = line[indent:]
506            if not line:
507                # Line was just the "@arg:" header; following lines
508                # are not indented
509                indent = 0
510            else:
511                line = ' ' * indent + line
512            self._start_args_section(name[1:-1], indent)
513        elif self._is_section_tag(name):
514            self._append_line = self._append_various_line
515            self._append_various_line(line)
516            return
517        elif (self._section.text.endswith('\n\n')
518              and line and not line[0].isspace()):
519            if line == 'Features:':
520                self._append_line = self._append_features_line
521            else:
522                self._start_section()
523                self._append_line = self._append_various_line
524                self._append_various_line(line)
525            return
526
527        self._append_freeform(line)
528
529    def _append_features_line(self, line):
530        name = line.split(' ', 1)[0]
531
532        if name.startswith('@') and name.endswith(':'):
533            # If line is "@arg:   first line of description", find
534            # the index of 'f', which is the indent we expect for any
535            # following lines.  We then remove the leading "@arg:"
536            # from line and replace it with spaces so that 'f' has the
537            # same index as it did in the original line and can be
538            # handled the same way we will handle following lines.
539            indent = re.match(r'@\S*:\s*', line).end()
540            line = line[indent:]
541            if not line:
542                # Line was just the "@arg:" header; following lines
543                # are not indented
544                indent = 0
545            else:
546                line = ' ' * indent + line
547            self._start_features_section(name[1:-1], indent)
548        elif self._is_section_tag(name):
549            self._append_line = self._append_various_line
550            self._append_various_line(line)
551            return
552        elif (self._section.text.endswith('\n\n')
553              and line and not line[0].isspace()):
554            self._start_section()
555            self._append_line = self._append_various_line
556            self._append_various_line(line)
557            return
558
559        self._append_freeform(line)
560
561    def _append_various_line(self, line):
562        """
563        Process a line of documentation text in an additional section.
564
565        A symbol line is an error.
566
567        A section tag begins an additional section.  Start that
568        section and append the line to it.
569
570        Else, append the line to the current section.
571        """
572        name = line.split(' ', 1)[0]
573
574        if name.startswith('@') and name.endswith(':'):
575            raise QAPIParseError(self._parser,
576                                 "'%s' can't follow '%s' section"
577                                 % (name, self.sections[0].name))
578        if self._is_section_tag(name):
579            # If line is "Section:   first line of description", find
580            # the index of 'f', which is the indent we expect for any
581            # following lines.  We then remove the leading "Section:"
582            # from line and replace it with spaces so that 'f' has the
583            # same index as it did in the original line and can be
584            # handled the same way we will handle following lines.
585            indent = re.match(r'\S*:\s*', line).end()
586            line = line[indent:]
587            if not line:
588                # Line was just the "Section:" header; following lines
589                # are not indented
590                indent = 0
591            else:
592                line = ' ' * indent + line
593            self._start_section(name[:-1], indent)
594
595        self._append_freeform(line)
596
597    def _start_symbol_section(self, symbols_dict, name, indent):
598        # FIXME invalid names other than the empty string aren't flagged
599        if not name:
600            raise QAPIParseError(self._parser, "invalid parameter name")
601        if name in symbols_dict:
602            raise QAPIParseError(self._parser,
603                                 "'%s' parameter name duplicated" % name)
604        assert not self.sections
605        self._end_section()
606        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
607        symbols_dict[name] = self._section
608
609    def _start_args_section(self, name, indent):
610        self._start_symbol_section(self.args, name, indent)
611
612    def _start_features_section(self, name, indent):
613        self._start_symbol_section(self.features, name, indent)
614
615    def _start_section(self, name=None, indent=0):
616        if name in ('Returns', 'Since') and self.has_section(name):
617            raise QAPIParseError(self._parser,
618                                 "duplicated '%s' section" % name)
619        self._end_section()
620        self._section = QAPIDoc.Section(self._parser, name, indent)
621        self.sections.append(self._section)
622
623    def _end_section(self):
624        if self._section:
625            text = self._section.text = self._section.text.strip()
626            if self._section.name and (not text or text.isspace()):
627                raise QAPIParseError(
628                    self._parser,
629                    "empty doc section '%s'" % self._section.name)
630            self._section = None
631
632    def _append_freeform(self, line):
633        match = re.match(r'(@\S+:)', line)
634        if match:
635            raise QAPIParseError(self._parser,
636                                 "'%s' not allowed in free-form documentation"
637                                 % match.group(1))
638        self._section.append(line)
639
640    def connect_member(self, member):
641        if member.name not in self.args:
642            # Undocumented TODO outlaw
643            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
644                                                        member.name)
645        self.args[member.name].connect(member)
646
647    def connect_feature(self, feature):
648        if feature.name not in self.features:
649            raise QAPISemError(feature.info,
650                               "feature '%s' lacks documentation"
651                               % feature.name)
652        self.features[feature.name].connect(feature)
653
654    def check_expr(self, expr):
655        if self.has_section('Returns') and 'command' not in expr:
656            raise QAPISemError(self.info,
657                               "'Returns:' is only valid for commands")
658
659    def check(self):
660
661        def check_args_section(args, info, what):
662            bogus = [name for name, section in args.items()
663                     if not section.member]
664            if bogus:
665                raise QAPISemError(
666                    self.info,
667                    "documented member%s '%s' %s not exist"
668                    % ("s" if len(bogus) > 1 else "",
669                       "', '".join(bogus),
670                       "do" if len(bogus) > 1 else "does"))
671
672        check_args_section(self.args, self.info, 'members')
673        check_args_section(self.features, self.info, 'features')
674