1# -*- coding: utf-8 -*- 2# 3# QAPI schema parser 4# 5# Copyright IBM, Corp. 2011 6# Copyright (c) 2013-2019 Red Hat Inc. 7# 8# Authors: 9# Anthony Liguori <aliguori@us.ibm.com> 10# Markus Armbruster <armbru@redhat.com> 11# Marc-André Lureau <marcandre.lureau@redhat.com> 12# Kevin Wolf <kwolf@redhat.com> 13# 14# This work is licensed under the terms of the GNU GPL, version 2. 15# See the COPYING file in the top-level directory. 16 17from collections import OrderedDict 18import os 19import re 20from typing import ( 21 Dict, 22 List, 23 Optional, 24 Set, 25 Union, 26) 27 28from .common import must_match 29from .error import QAPISemError, QAPISourceError 30from .source import QAPISourceInfo 31 32 33# Return value alias for get_expr(). 34_ExprValue = Union[List[object], Dict[str, object], str, bool] 35 36 37class QAPIParseError(QAPISourceError): 38 """Error class for all QAPI schema parsing errors.""" 39 def __init__(self, parser: 'QAPISchemaParser', msg: str): 40 col = 1 41 for ch in parser.src[parser.line_pos:parser.pos]: 42 if ch == '\t': 43 col = (col + 7) % 8 + 1 44 else: 45 col += 1 46 super().__init__(parser.info, msg, col) 47 48 49class QAPISchemaParser: 50 """ 51 Parse QAPI schema source. 52 53 Parse a JSON-esque schema file and process directives. See 54 qapi-code-gen.txt section "Schema Syntax" for the exact syntax. 55 Grammatical validation is handled later by `expr.check_exprs()`. 56 57 :param fname: Source file name. 58 :param previously_included: 59 The absolute names of previously included source files, 60 if being invoked from another parser. 61 :param incl_info: 62 `QAPISourceInfo` belonging to the parent module. 63 ``None`` implies this is the root module. 64 65 :ivar exprs: Resulting parsed expressions. 66 :ivar docs: Resulting parsed documentation blocks. 67 68 :raise OSError: For problems reading the root schema document. 69 :raise QAPIError: For errors in the schema source. 70 """ 71 def __init__(self, 72 fname: str, 73 previously_included: Optional[Set[str]] = None, 74 incl_info: Optional[QAPISourceInfo] = None): 75 self._fname = fname 76 self._included = previously_included or set() 77 self._included.add(os.path.abspath(self._fname)) 78 self.src = '' 79 80 # Lexer state (see `accept` for details): 81 self.info = QAPISourceInfo(self._fname, incl_info) 82 self.tok: Union[None, str] = None 83 self.pos = 0 84 self.cursor = 0 85 self.val: Optional[Union[bool, str]] = None 86 self.line_pos = 0 87 88 # Parser output: 89 self.exprs: List[Dict[str, object]] = [] 90 self.docs: List[QAPIDoc] = [] 91 92 # Showtime! 93 self._parse() 94 95 def _parse(self) -> None: 96 """ 97 Parse the QAPI schema document. 98 99 :return: None. Results are stored in ``.exprs`` and ``.docs``. 100 """ 101 cur_doc = None 102 103 # May raise OSError; allow the caller to handle it. 104 with open(self._fname, 'r', encoding='utf-8') as fp: 105 self.src = fp.read() 106 if self.src == '' or self.src[-1] != '\n': 107 self.src += '\n' 108 109 # Prime the lexer: 110 self.accept() 111 112 # Parse until done: 113 while self.tok is not None: 114 info = self.info 115 if self.tok == '#': 116 self.reject_expr_doc(cur_doc) 117 for cur_doc in self.get_doc(info): 118 self.docs.append(cur_doc) 119 continue 120 121 expr = self.get_expr() 122 if not isinstance(expr, dict): 123 raise QAPISemError( 124 info, "top-level expression must be an object") 125 126 if 'include' in expr: 127 self.reject_expr_doc(cur_doc) 128 if len(expr) != 1: 129 raise QAPISemError(info, "invalid 'include' directive") 130 include = expr['include'] 131 if not isinstance(include, str): 132 raise QAPISemError(info, 133 "value of 'include' must be a string") 134 incl_fname = os.path.join(os.path.dirname(self._fname), 135 include) 136 self.exprs.append({'expr': {'include': incl_fname}, 137 'info': info}) 138 exprs_include = self._include(include, info, incl_fname, 139 self._included) 140 if exprs_include: 141 self.exprs.extend(exprs_include.exprs) 142 self.docs.extend(exprs_include.docs) 143 elif "pragma" in expr: 144 self.reject_expr_doc(cur_doc) 145 if len(expr) != 1: 146 raise QAPISemError(info, "invalid 'pragma' directive") 147 pragma = expr['pragma'] 148 if not isinstance(pragma, dict): 149 raise QAPISemError( 150 info, "value of 'pragma' must be an object") 151 for name, value in pragma.items(): 152 self._pragma(name, value, info) 153 else: 154 expr_elem = {'expr': expr, 155 'info': info} 156 if cur_doc: 157 if not cur_doc.symbol: 158 raise QAPISemError( 159 cur_doc.info, "definition documentation required") 160 expr_elem['doc'] = cur_doc 161 self.exprs.append(expr_elem) 162 cur_doc = None 163 self.reject_expr_doc(cur_doc) 164 165 @staticmethod 166 def reject_expr_doc(doc: Optional['QAPIDoc']) -> None: 167 if doc and doc.symbol: 168 raise QAPISemError( 169 doc.info, 170 "documentation for '%s' is not followed by the definition" 171 % doc.symbol) 172 173 @staticmethod 174 def _include(include: str, 175 info: QAPISourceInfo, 176 incl_fname: str, 177 previously_included: Set[str] 178 ) -> Optional['QAPISchemaParser']: 179 incl_abs_fname = os.path.abspath(incl_fname) 180 # catch inclusion cycle 181 inf: Optional[QAPISourceInfo] = info 182 while inf: 183 if incl_abs_fname == os.path.abspath(inf.fname): 184 raise QAPISemError(info, "inclusion loop for %s" % include) 185 inf = inf.parent 186 187 # skip multiple include of the same file 188 if incl_abs_fname in previously_included: 189 return None 190 191 try: 192 return QAPISchemaParser(incl_fname, previously_included, info) 193 except OSError as err: 194 raise QAPISemError( 195 info, 196 f"can't read include file '{incl_fname}': {err.strerror}" 197 ) from err 198 199 @staticmethod 200 def _pragma(name: str, value: object, info: QAPISourceInfo) -> None: 201 202 def check_list_str(name: str, value: object) -> List[str]: 203 if (not isinstance(value, list) or 204 any(not isinstance(elt, str) for elt in value)): 205 raise QAPISemError( 206 info, 207 "pragma %s must be a list of strings" % name) 208 return value 209 210 pragma = info.pragma 211 212 if name == 'doc-required': 213 if not isinstance(value, bool): 214 raise QAPISemError(info, 215 "pragma 'doc-required' must be boolean") 216 pragma.doc_required = value 217 elif name == 'command-name-exceptions': 218 pragma.command_name_exceptions = check_list_str(name, value) 219 elif name == 'command-returns-exceptions': 220 pragma.command_returns_exceptions = check_list_str(name, value) 221 elif name == 'member-name-exceptions': 222 pragma.member_name_exceptions = check_list_str(name, value) 223 else: 224 raise QAPISemError(info, "unknown pragma '%s'" % name) 225 226 def accept(self, skip_comment: bool = True) -> None: 227 """ 228 Read and store the next token. 229 230 :param skip_comment: 231 When false, return COMMENT tokens ("#"). 232 This is used when reading documentation blocks. 233 234 :return: 235 None. Several instance attributes are updated instead: 236 237 - ``.tok`` represents the token type. See below for values. 238 - ``.info`` describes the token's source location. 239 - ``.val`` is the token's value, if any. See below. 240 - ``.pos`` is the buffer index of the first character of 241 the token. 242 243 * Single-character tokens: 244 245 These are "{", "}", ":", ",", "[", and "]". 246 ``.tok`` holds the single character and ``.val`` is None. 247 248 * Multi-character tokens: 249 250 * COMMENT: 251 252 This token is not normally returned by the lexer, but it can 253 be when ``skip_comment`` is False. ``.tok`` is "#", and 254 ``.val`` is a string including all chars until end-of-line, 255 including the "#" itself. 256 257 * STRING: 258 259 ``.tok`` is "'", the single quote. ``.val`` contains the 260 string, excluding the surrounding quotes. 261 262 * TRUE and FALSE: 263 264 ``.tok`` is either "t" or "f", ``.val`` will be the 265 corresponding bool value. 266 267 * EOF: 268 269 ``.tok`` and ``.val`` will both be None at EOF. 270 """ 271 while True: 272 self.tok = self.src[self.cursor] 273 self.pos = self.cursor 274 self.cursor += 1 275 self.val = None 276 277 if self.tok == '#': 278 if self.src[self.cursor] == '#': 279 # Start of doc comment 280 skip_comment = False 281 self.cursor = self.src.find('\n', self.cursor) 282 if not skip_comment: 283 self.val = self.src[self.pos:self.cursor] 284 return 285 elif self.tok in '{}:,[]': 286 return 287 elif self.tok == "'": 288 # Note: we accept only printable ASCII 289 string = '' 290 esc = False 291 while True: 292 ch = self.src[self.cursor] 293 self.cursor += 1 294 if ch == '\n': 295 raise QAPIParseError(self, "missing terminating \"'\"") 296 if esc: 297 # Note: we recognize only \\ because we have 298 # no use for funny characters in strings 299 if ch != '\\': 300 raise QAPIParseError(self, 301 "unknown escape \\%s" % ch) 302 esc = False 303 elif ch == '\\': 304 esc = True 305 continue 306 elif ch == "'": 307 self.val = string 308 return 309 if ord(ch) < 32 or ord(ch) >= 127: 310 raise QAPIParseError( 311 self, "funny character in string") 312 string += ch 313 elif self.src.startswith('true', self.pos): 314 self.val = True 315 self.cursor += 3 316 return 317 elif self.src.startswith('false', self.pos): 318 self.val = False 319 self.cursor += 4 320 return 321 elif self.tok == '\n': 322 if self.cursor == len(self.src): 323 self.tok = None 324 return 325 self.info = self.info.next_line() 326 self.line_pos = self.cursor 327 elif not self.tok.isspace(): 328 # Show up to next structural, whitespace or quote 329 # character 330 match = must_match('[^[\\]{}:,\\s\'"]+', 331 self.src[self.cursor-1:]) 332 raise QAPIParseError(self, "stray '%s'" % match.group(0)) 333 334 def get_members(self) -> Dict[str, object]: 335 expr: Dict[str, object] = OrderedDict() 336 if self.tok == '}': 337 self.accept() 338 return expr 339 if self.tok != "'": 340 raise QAPIParseError(self, "expected string or '}'") 341 while True: 342 key = self.val 343 assert isinstance(key, str) # Guaranteed by tok == "'" 344 345 self.accept() 346 if self.tok != ':': 347 raise QAPIParseError(self, "expected ':'") 348 self.accept() 349 if key in expr: 350 raise QAPIParseError(self, "duplicate key '%s'" % key) 351 expr[key] = self.get_expr() 352 if self.tok == '}': 353 self.accept() 354 return expr 355 if self.tok != ',': 356 raise QAPIParseError(self, "expected ',' or '}'") 357 self.accept() 358 if self.tok != "'": 359 raise QAPIParseError(self, "expected string") 360 361 def get_values(self) -> List[object]: 362 expr: List[object] = [] 363 if self.tok == ']': 364 self.accept() 365 return expr 366 if self.tok not in tuple("{['tf"): 367 raise QAPIParseError( 368 self, "expected '{', '[', ']', string, or boolean") 369 while True: 370 expr.append(self.get_expr()) 371 if self.tok == ']': 372 self.accept() 373 return expr 374 if self.tok != ',': 375 raise QAPIParseError(self, "expected ',' or ']'") 376 self.accept() 377 378 def get_expr(self) -> _ExprValue: 379 expr: _ExprValue 380 if self.tok == '{': 381 self.accept() 382 expr = self.get_members() 383 elif self.tok == '[': 384 self.accept() 385 expr = self.get_values() 386 elif self.tok in tuple("'tf"): 387 assert isinstance(self.val, (str, bool)) 388 expr = self.val 389 self.accept() 390 else: 391 raise QAPIParseError( 392 self, "expected '{', '[', string, or boolean") 393 return expr 394 395 def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']: 396 if self.val != '##': 397 raise QAPIParseError( 398 self, "junk after '##' at start of documentation comment") 399 400 docs = [] 401 cur_doc = QAPIDoc(self, info) 402 self.accept(False) 403 while self.tok == '#': 404 assert isinstance(self.val, str) 405 if self.val.startswith('##'): 406 # End of doc comment 407 if self.val != '##': 408 raise QAPIParseError( 409 self, 410 "junk after '##' at end of documentation comment") 411 cur_doc.end_comment() 412 docs.append(cur_doc) 413 self.accept() 414 return docs 415 if self.val.startswith('# ='): 416 if cur_doc.symbol: 417 raise QAPIParseError( 418 self, 419 "unexpected '=' markup in definition documentation") 420 if cur_doc.body.text: 421 cur_doc.end_comment() 422 docs.append(cur_doc) 423 cur_doc = QAPIDoc(self, info) 424 cur_doc.append(self.val) 425 self.accept(False) 426 427 raise QAPIParseError(self, "documentation comment must end with '##'") 428 429 430class QAPIDoc: 431 """ 432 A documentation comment block, either definition or free-form 433 434 Definition documentation blocks consist of 435 436 * a body section: one line naming the definition, followed by an 437 overview (any number of lines) 438 439 * argument sections: a description of each argument (for commands 440 and events) or member (for structs, unions and alternates) 441 442 * features sections: a description of each feature flag 443 444 * additional (non-argument) sections, possibly tagged 445 446 Free-form documentation blocks consist only of a body section. 447 """ 448 449 class Section: 450 def __init__(self, parser, name=None, indent=0): 451 # parser, for error messages about indentation 452 self._parser = parser 453 # optional section name (argument/member or section name) 454 self.name = name 455 self.text = '' 456 # the expected indent level of the text of this section 457 self._indent = indent 458 459 def append(self, line): 460 # Strip leading spaces corresponding to the expected indent level 461 # Blank lines are always OK. 462 if line: 463 indent = must_match(r'\s*', line).end() 464 if indent < self._indent: 465 raise QAPIParseError( 466 self._parser, 467 "unexpected de-indent (expected at least %d spaces)" % 468 self._indent) 469 line = line[self._indent:] 470 471 self.text += line.rstrip() + '\n' 472 473 class ArgSection(Section): 474 def __init__(self, parser, name, indent=0): 475 super().__init__(parser, name, indent) 476 self.member = None 477 478 def connect(self, member): 479 self.member = member 480 481 class NullSection(Section): 482 """ 483 Immutable dummy section for use at the end of a doc block. 484 """ 485 def append(self, line): 486 assert False, "Text appended after end_comment() called." 487 488 def __init__(self, parser, info): 489 # self._parser is used to report errors with QAPIParseError. The 490 # resulting error position depends on the state of the parser. 491 # It happens to be the beginning of the comment. More or less 492 # servicable, but action at a distance. 493 self._parser = parser 494 self.info = info 495 self.symbol = None 496 self.body = QAPIDoc.Section(parser) 497 # dict mapping parameter name to ArgSection 498 self.args = OrderedDict() 499 self.features = OrderedDict() 500 # a list of Section 501 self.sections = [] 502 # the current section 503 self._section = self.body 504 self._append_line = self._append_body_line 505 506 def has_section(self, name): 507 """Return True if we have a section with this name.""" 508 for i in self.sections: 509 if i.name == name: 510 return True 511 return False 512 513 def append(self, line): 514 """ 515 Parse a comment line and add it to the documentation. 516 517 The way that the line is dealt with depends on which part of 518 the documentation we're parsing right now: 519 * The body section: ._append_line is ._append_body_line 520 * An argument section: ._append_line is ._append_args_line 521 * A features section: ._append_line is ._append_features_line 522 * An additional section: ._append_line is ._append_various_line 523 """ 524 line = line[1:] 525 if not line: 526 self._append_freeform(line) 527 return 528 529 if line[0] != ' ': 530 raise QAPIParseError(self._parser, "missing space after #") 531 line = line[1:] 532 self._append_line(line) 533 534 def end_comment(self): 535 self._switch_section(QAPIDoc.NullSection(self._parser)) 536 537 @staticmethod 538 def _is_section_tag(name): 539 return name in ('Returns:', 'Since:', 540 # those are often singular or plural 541 'Note:', 'Notes:', 542 'Example:', 'Examples:', 543 'TODO:') 544 545 def _append_body_line(self, line): 546 """ 547 Process a line of documentation text in the body section. 548 549 If this a symbol line and it is the section's first line, this 550 is a definition documentation block for that symbol. 551 552 If it's a definition documentation block, another symbol line 553 begins the argument section for the argument named by it, and 554 a section tag begins an additional section. Start that 555 section and append the line to it. 556 557 Else, append the line to the current section. 558 """ 559 name = line.split(' ', 1)[0] 560 # FIXME not nice: things like '# @foo:' and '# @foo: ' aren't 561 # recognized, and get silently treated as ordinary text 562 if not self.symbol and not self.body.text and line.startswith('@'): 563 if not line.endswith(':'): 564 raise QAPIParseError(self._parser, "line should end with ':'") 565 self.symbol = line[1:-1] 566 # Invalid names are not checked here, but the name provided MUST 567 # match the following definition, which *is* validated in expr.py. 568 if not self.symbol: 569 raise QAPIParseError( 570 self._parser, "name required after '@'") 571 elif self.symbol: 572 # This is a definition documentation block 573 if name.startswith('@') and name.endswith(':'): 574 self._append_line = self._append_args_line 575 self._append_args_line(line) 576 elif line == 'Features:': 577 self._append_line = self._append_features_line 578 elif self._is_section_tag(name): 579 self._append_line = self._append_various_line 580 self._append_various_line(line) 581 else: 582 self._append_freeform(line) 583 else: 584 # This is a free-form documentation block 585 self._append_freeform(line) 586 587 def _append_args_line(self, line): 588 """ 589 Process a line of documentation text in an argument section. 590 591 A symbol line begins the next argument section, a section tag 592 section or a non-indented line after a blank line begins an 593 additional section. Start that section and append the line to 594 it. 595 596 Else, append the line to the current section. 597 598 """ 599 name = line.split(' ', 1)[0] 600 601 if name.startswith('@') and name.endswith(':'): 602 # If line is "@arg: first line of description", find 603 # the index of 'f', which is the indent we expect for any 604 # following lines. We then remove the leading "@arg:" 605 # from line and replace it with spaces so that 'f' has the 606 # same index as it did in the original line and can be 607 # handled the same way we will handle following lines. 608 indent = must_match(r'@\S*:\s*', line).end() 609 line = line[indent:] 610 if not line: 611 # Line was just the "@arg:" header; following lines 612 # are not indented 613 indent = 0 614 else: 615 line = ' ' * indent + line 616 self._start_args_section(name[1:-1], indent) 617 elif self._is_section_tag(name): 618 self._append_line = self._append_various_line 619 self._append_various_line(line) 620 return 621 elif (self._section.text.endswith('\n\n') 622 and line and not line[0].isspace()): 623 if line == 'Features:': 624 self._append_line = self._append_features_line 625 else: 626 self._start_section() 627 self._append_line = self._append_various_line 628 self._append_various_line(line) 629 return 630 631 self._append_freeform(line) 632 633 def _append_features_line(self, line): 634 name = line.split(' ', 1)[0] 635 636 if name.startswith('@') and name.endswith(':'): 637 # If line is "@arg: first line of description", find 638 # the index of 'f', which is the indent we expect for any 639 # following lines. We then remove the leading "@arg:" 640 # from line and replace it with spaces so that 'f' has the 641 # same index as it did in the original line and can be 642 # handled the same way we will handle following lines. 643 indent = must_match(r'@\S*:\s*', line).end() 644 line = line[indent:] 645 if not line: 646 # Line was just the "@arg:" header; following lines 647 # are not indented 648 indent = 0 649 else: 650 line = ' ' * indent + line 651 self._start_features_section(name[1:-1], indent) 652 elif self._is_section_tag(name): 653 self._append_line = self._append_various_line 654 self._append_various_line(line) 655 return 656 elif (self._section.text.endswith('\n\n') 657 and line and not line[0].isspace()): 658 self._start_section() 659 self._append_line = self._append_various_line 660 self._append_various_line(line) 661 return 662 663 self._append_freeform(line) 664 665 def _append_various_line(self, line): 666 """ 667 Process a line of documentation text in an additional section. 668 669 A symbol line is an error. 670 671 A section tag begins an additional section. Start that 672 section and append the line to it. 673 674 Else, append the line to the current section. 675 """ 676 name = line.split(' ', 1)[0] 677 678 if name.startswith('@') and name.endswith(':'): 679 raise QAPIParseError(self._parser, 680 "'%s' can't follow '%s' section" 681 % (name, self.sections[0].name)) 682 if self._is_section_tag(name): 683 # If line is "Section: first line of description", find 684 # the index of 'f', which is the indent we expect for any 685 # following lines. We then remove the leading "Section:" 686 # from line and replace it with spaces so that 'f' has the 687 # same index as it did in the original line and can be 688 # handled the same way we will handle following lines. 689 indent = must_match(r'\S*:\s*', line).end() 690 line = line[indent:] 691 if not line: 692 # Line was just the "Section:" header; following lines 693 # are not indented 694 indent = 0 695 else: 696 line = ' ' * indent + line 697 self._start_section(name[:-1], indent) 698 699 self._append_freeform(line) 700 701 def _start_symbol_section(self, symbols_dict, name, indent): 702 # FIXME invalid names other than the empty string aren't flagged 703 if not name: 704 raise QAPIParseError(self._parser, "invalid parameter name") 705 if name in symbols_dict: 706 raise QAPIParseError(self._parser, 707 "'%s' parameter name duplicated" % name) 708 assert not self.sections 709 new_section = QAPIDoc.ArgSection(self._parser, name, indent) 710 self._switch_section(new_section) 711 symbols_dict[name] = new_section 712 713 def _start_args_section(self, name, indent): 714 self._start_symbol_section(self.args, name, indent) 715 716 def _start_features_section(self, name, indent): 717 self._start_symbol_section(self.features, name, indent) 718 719 def _start_section(self, name=None, indent=0): 720 if name in ('Returns', 'Since') and self.has_section(name): 721 raise QAPIParseError(self._parser, 722 "duplicated '%s' section" % name) 723 new_section = QAPIDoc.Section(self._parser, name, indent) 724 self._switch_section(new_section) 725 self.sections.append(new_section) 726 727 def _switch_section(self, new_section): 728 text = self._section.text = self._section.text.strip() 729 730 # Only the 'body' section is allowed to have an empty body. 731 # All other sections, including anonymous ones, must have text. 732 if self._section != self.body and not text: 733 # We do not create anonymous sections unless there is 734 # something to put in them; this is a parser bug. 735 assert self._section.name 736 raise QAPIParseError( 737 self._parser, 738 "empty doc section '%s'" % self._section.name) 739 740 self._section = new_section 741 742 def _append_freeform(self, line): 743 match = re.match(r'(@\S+:)', line) 744 if match: 745 raise QAPIParseError(self._parser, 746 "'%s' not allowed in free-form documentation" 747 % match.group(1)) 748 self._section.append(line) 749 750 def connect_member(self, member): 751 if member.name not in self.args: 752 # Undocumented TODO outlaw 753 self.args[member.name] = QAPIDoc.ArgSection(self._parser, 754 member.name) 755 self.args[member.name].connect(member) 756 757 def connect_feature(self, feature): 758 if feature.name not in self.features: 759 raise QAPISemError(feature.info, 760 "feature '%s' lacks documentation" 761 % feature.name) 762 self.features[feature.name].connect(feature) 763 764 def check_expr(self, expr): 765 if self.has_section('Returns') and 'command' not in expr: 766 raise QAPISemError(self.info, 767 "'Returns:' is only valid for commands") 768 769 def check(self): 770 771 def check_args_section(args, what): 772 bogus = [name for name, section in args.items() 773 if not section.member] 774 if bogus: 775 raise QAPISemError( 776 self.info, 777 "documented %s%s '%s' %s not exist" % ( 778 what, 779 "s" if len(bogus) > 1 else "", 780 "', '".join(bogus), 781 "do" if len(bogus) > 1 else "does" 782 )) 783 784 check_args_section(self.args, 'member') 785 check_args_section(self.features, 'feature') 786