1# -*- coding: utf-8 -*- 2# 3# QAPI schema parser 4# 5# Copyright IBM, Corp. 2011 6# Copyright (c) 2013-2019 Red Hat Inc. 7# 8# Authors: 9# Anthony Liguori <aliguori@us.ibm.com> 10# Markus Armbruster <armbru@redhat.com> 11# Marc-André Lureau <marcandre.lureau@redhat.com> 12# Kevin Wolf <kwolf@redhat.com> 13# 14# This work is licensed under the terms of the GNU GPL, version 2. 15# See the COPYING file in the top-level directory. 16 17from collections import OrderedDict 18import os 19import re 20from typing import ( 21 Dict, 22 List, 23 Optional, 24 Set, 25 Union, 26) 27 28from .common import must_match 29from .error import QAPISemError, QAPISourceError 30from .source import QAPISourceInfo 31 32 33# Return value alias for get_expr(). 34_ExprValue = Union[List[object], Dict[str, object], str, bool] 35 36 37class QAPIParseError(QAPISourceError): 38 """Error class for all QAPI schema parsing errors.""" 39 def __init__(self, parser: 'QAPISchemaParser', msg: str): 40 col = 1 41 for ch in parser.src[parser.line_pos:parser.pos]: 42 if ch == '\t': 43 col = (col + 7) % 8 + 1 44 else: 45 col += 1 46 super().__init__(parser.info, msg, col) 47 48 49class QAPISchemaParser: 50 """ 51 Parse QAPI schema source. 52 53 Parse a JSON-esque schema file and process directives. See 54 qapi-code-gen.txt section "Schema Syntax" for the exact syntax. 55 Grammatical validation is handled later by `expr.check_exprs()`. 56 57 :param fname: Source file name. 58 :param previously_included: 59 The absolute names of previously included source files, 60 if being invoked from another parser. 61 :param incl_info: 62 `QAPISourceInfo` belonging to the parent module. 63 ``None`` implies this is the root module. 64 65 :ivar exprs: Resulting parsed expressions. 66 :ivar docs: Resulting parsed documentation blocks. 67 68 :raise OSError: For problems reading the root schema document. 69 :raise QAPIError: For errors in the schema source. 70 """ 71 def __init__(self, 72 fname: str, 73 previously_included: Optional[Set[str]] = None, 74 incl_info: Optional[QAPISourceInfo] = None): 75 self._fname = fname 76 self._included = previously_included or set() 77 self._included.add(os.path.abspath(self._fname)) 78 self.src = '' 79 80 # Lexer state (see `accept` for details): 81 self.info = QAPISourceInfo(self._fname, incl_info) 82 self.tok: Union[None, str] = None 83 self.pos = 0 84 self.cursor = 0 85 self.val: Optional[Union[bool, str]] = None 86 self.line_pos = 0 87 88 # Parser output: 89 self.exprs: List[Dict[str, object]] = [] 90 self.docs: List[QAPIDoc] = [] 91 92 # Showtime! 93 self._parse() 94 95 def _parse(self) -> None: 96 """ 97 Parse the QAPI schema document. 98 99 :return: None. Results are stored in ``.exprs`` and ``.docs``. 100 """ 101 cur_doc = None 102 103 # May raise OSError; allow the caller to handle it. 104 with open(self._fname, 'r', encoding='utf-8') as fp: 105 self.src = fp.read() 106 if self.src == '' or self.src[-1] != '\n': 107 self.src += '\n' 108 109 # Prime the lexer: 110 self.accept() 111 112 # Parse until done: 113 while self.tok is not None: 114 info = self.info 115 if self.tok == '#': 116 self.reject_expr_doc(cur_doc) 117 for cur_doc in self.get_doc(info): 118 self.docs.append(cur_doc) 119 continue 120 121 expr = self.get_expr() 122 if not isinstance(expr, dict): 123 raise QAPISemError( 124 info, "top-level expression must be an object") 125 126 if 'include' in expr: 127 self.reject_expr_doc(cur_doc) 128 if len(expr) != 1: 129 raise QAPISemError(info, "invalid 'include' directive") 130 include = expr['include'] 131 if not isinstance(include, str): 132 raise QAPISemError(info, 133 "value of 'include' must be a string") 134 incl_fname = os.path.join(os.path.dirname(self._fname), 135 include) 136 self.exprs.append({'expr': {'include': incl_fname}, 137 'info': info}) 138 exprs_include = self._include(include, info, incl_fname, 139 self._included) 140 if exprs_include: 141 self.exprs.extend(exprs_include.exprs) 142 self.docs.extend(exprs_include.docs) 143 elif "pragma" in expr: 144 self.reject_expr_doc(cur_doc) 145 if len(expr) != 1: 146 raise QAPISemError(info, "invalid 'pragma' directive") 147 pragma = expr['pragma'] 148 if not isinstance(pragma, dict): 149 raise QAPISemError( 150 info, "value of 'pragma' must be an object") 151 for name, value in pragma.items(): 152 self._pragma(name, value, info) 153 else: 154 expr_elem = {'expr': expr, 155 'info': info} 156 if cur_doc: 157 if not cur_doc.symbol: 158 raise QAPISemError( 159 cur_doc.info, "definition documentation required") 160 expr_elem['doc'] = cur_doc 161 self.exprs.append(expr_elem) 162 cur_doc = None 163 self.reject_expr_doc(cur_doc) 164 165 @staticmethod 166 def reject_expr_doc(doc: Optional['QAPIDoc']) -> None: 167 if doc and doc.symbol: 168 raise QAPISemError( 169 doc.info, 170 "documentation for '%s' is not followed by the definition" 171 % doc.symbol) 172 173 @staticmethod 174 def _include(include: str, 175 info: QAPISourceInfo, 176 incl_fname: str, 177 previously_included: Set[str] 178 ) -> Optional['QAPISchemaParser']: 179 incl_abs_fname = os.path.abspath(incl_fname) 180 # catch inclusion cycle 181 inf: Optional[QAPISourceInfo] = info 182 while inf: 183 if incl_abs_fname == os.path.abspath(inf.fname): 184 raise QAPISemError(info, "inclusion loop for %s" % include) 185 inf = inf.parent 186 187 # skip multiple include of the same file 188 if incl_abs_fname in previously_included: 189 return None 190 191 try: 192 return QAPISchemaParser(incl_fname, previously_included, info) 193 except OSError as err: 194 raise QAPISemError( 195 info, 196 f"can't read include file '{incl_fname}': {err.strerror}" 197 ) from err 198 199 @staticmethod 200 def _pragma(name: str, value: object, info: QAPISourceInfo) -> None: 201 202 def check_list_str(name: str, value: object) -> List[str]: 203 if (not isinstance(value, list) or 204 any(not isinstance(elt, str) for elt in value)): 205 raise QAPISemError( 206 info, 207 "pragma %s must be a list of strings" % name) 208 return value 209 210 pragma = info.pragma 211 212 if name == 'doc-required': 213 if not isinstance(value, bool): 214 raise QAPISemError(info, 215 "pragma 'doc-required' must be boolean") 216 pragma.doc_required = value 217 elif name == 'command-name-exceptions': 218 pragma.command_name_exceptions = check_list_str(name, value) 219 elif name == 'command-returns-exceptions': 220 pragma.command_returns_exceptions = check_list_str(name, value) 221 elif name == 'member-name-exceptions': 222 pragma.member_name_exceptions = check_list_str(name, value) 223 else: 224 raise QAPISemError(info, "unknown pragma '%s'" % name) 225 226 def accept(self, skip_comment: bool = True) -> None: 227 """ 228 Read and store the next token. 229 230 :param skip_comment: 231 When false, return COMMENT tokens ("#"). 232 This is used when reading documentation blocks. 233 234 :return: 235 None. Several instance attributes are updated instead: 236 237 - ``.tok`` represents the token type. See below for values. 238 - ``.info`` describes the token's source location. 239 - ``.val`` is the token's value, if any. See below. 240 - ``.pos`` is the buffer index of the first character of 241 the token. 242 243 * Single-character tokens: 244 245 These are "{", "}", ":", ",", "[", and "]". 246 ``.tok`` holds the single character and ``.val`` is None. 247 248 * Multi-character tokens: 249 250 * COMMENT: 251 252 This token is not normally returned by the lexer, but it can 253 be when ``skip_comment`` is False. ``.tok`` is "#", and 254 ``.val`` is a string including all chars until end-of-line, 255 including the "#" itself. 256 257 * STRING: 258 259 ``.tok`` is "'", the single quote. ``.val`` contains the 260 string, excluding the surrounding quotes. 261 262 * TRUE and FALSE: 263 264 ``.tok`` is either "t" or "f", ``.val`` will be the 265 corresponding bool value. 266 267 * EOF: 268 269 ``.tok`` and ``.val`` will both be None at EOF. 270 """ 271 while True: 272 self.tok = self.src[self.cursor] 273 self.pos = self.cursor 274 self.cursor += 1 275 self.val = None 276 277 if self.tok == '#': 278 if self.src[self.cursor] == '#': 279 # Start of doc comment 280 skip_comment = False 281 self.cursor = self.src.find('\n', self.cursor) 282 if not skip_comment: 283 self.val = self.src[self.pos:self.cursor] 284 return 285 elif self.tok in '{}:,[]': 286 return 287 elif self.tok == "'": 288 # Note: we accept only printable ASCII 289 string = '' 290 esc = False 291 while True: 292 ch = self.src[self.cursor] 293 self.cursor += 1 294 if ch == '\n': 295 raise QAPIParseError(self, "missing terminating \"'\"") 296 if esc: 297 # Note: we recognize only \\ because we have 298 # no use for funny characters in strings 299 if ch != '\\': 300 raise QAPIParseError(self, 301 "unknown escape \\%s" % ch) 302 esc = False 303 elif ch == '\\': 304 esc = True 305 continue 306 elif ch == "'": 307 self.val = string 308 return 309 if ord(ch) < 32 or ord(ch) >= 127: 310 raise QAPIParseError( 311 self, "funny character in string") 312 string += ch 313 elif self.src.startswith('true', self.pos): 314 self.val = True 315 self.cursor += 3 316 return 317 elif self.src.startswith('false', self.pos): 318 self.val = False 319 self.cursor += 4 320 return 321 elif self.tok == '\n': 322 if self.cursor == len(self.src): 323 self.tok = None 324 return 325 self.info = self.info.next_line() 326 self.line_pos = self.cursor 327 elif not self.tok.isspace(): 328 # Show up to next structural, whitespace or quote 329 # character 330 match = must_match('[^[\\]{}:,\\s\'"]+', 331 self.src[self.cursor-1:]) 332 raise QAPIParseError(self, "stray '%s'" % match.group(0)) 333 334 def get_members(self) -> Dict[str, object]: 335 expr: Dict[str, object] = OrderedDict() 336 if self.tok == '}': 337 self.accept() 338 return expr 339 if self.tok != "'": 340 raise QAPIParseError(self, "expected string or '}'") 341 while True: 342 key = self.val 343 assert isinstance(key, str) # Guaranteed by tok == "'" 344 345 self.accept() 346 if self.tok != ':': 347 raise QAPIParseError(self, "expected ':'") 348 self.accept() 349 if key in expr: 350 raise QAPIParseError(self, "duplicate key '%s'" % key) 351 expr[key] = self.get_expr() 352 if self.tok == '}': 353 self.accept() 354 return expr 355 if self.tok != ',': 356 raise QAPIParseError(self, "expected ',' or '}'") 357 self.accept() 358 if self.tok != "'": 359 raise QAPIParseError(self, "expected string") 360 361 def get_values(self) -> List[object]: 362 expr: List[object] = [] 363 if self.tok == ']': 364 self.accept() 365 return expr 366 if self.tok not in tuple("{['tf"): 367 raise QAPIParseError( 368 self, "expected '{', '[', ']', string, or boolean") 369 while True: 370 expr.append(self.get_expr()) 371 if self.tok == ']': 372 self.accept() 373 return expr 374 if self.tok != ',': 375 raise QAPIParseError(self, "expected ',' or ']'") 376 self.accept() 377 378 def get_expr(self) -> _ExprValue: 379 expr: _ExprValue 380 if self.tok == '{': 381 self.accept() 382 expr = self.get_members() 383 elif self.tok == '[': 384 self.accept() 385 expr = self.get_values() 386 elif self.tok in tuple("'tf"): 387 assert isinstance(self.val, (str, bool)) 388 expr = self.val 389 self.accept() 390 else: 391 raise QAPIParseError( 392 self, "expected '{', '[', string, or boolean") 393 return expr 394 395 def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']: 396 if self.val != '##': 397 raise QAPIParseError( 398 self, "junk after '##' at start of documentation comment") 399 400 docs = [] 401 cur_doc = QAPIDoc(self, info) 402 self.accept(False) 403 while self.tok == '#': 404 assert isinstance(self.val, str) 405 if self.val.startswith('##'): 406 # End of doc comment 407 if self.val != '##': 408 raise QAPIParseError( 409 self, 410 "junk after '##' at end of documentation comment") 411 cur_doc.end_comment() 412 docs.append(cur_doc) 413 self.accept() 414 return docs 415 if self.val.startswith('# ='): 416 if cur_doc.symbol: 417 raise QAPIParseError( 418 self, 419 "unexpected '=' markup in definition documentation") 420 if cur_doc.body.text: 421 cur_doc.end_comment() 422 docs.append(cur_doc) 423 cur_doc = QAPIDoc(self, info) 424 cur_doc.append(self.val) 425 self.accept(False) 426 427 raise QAPIParseError(self, "documentation comment must end with '##'") 428 429 430class QAPIDoc: 431 """ 432 A documentation comment block, either definition or free-form 433 434 Definition documentation blocks consist of 435 436 * a body section: one line naming the definition, followed by an 437 overview (any number of lines) 438 439 * argument sections: a description of each argument (for commands 440 and events) or member (for structs, unions and alternates) 441 442 * features sections: a description of each feature flag 443 444 * additional (non-argument) sections, possibly tagged 445 446 Free-form documentation blocks consist only of a body section. 447 """ 448 449 class Section: 450 def __init__(self, parser, name=None, indent=0): 451 # parser, for error messages about indentation 452 self._parser = parser 453 # optional section name (argument/member or section name) 454 self.name = name 455 self.text = '' 456 # the expected indent level of the text of this section 457 self._indent = indent 458 459 def append(self, line): 460 # Strip leading spaces corresponding to the expected indent level 461 # Blank lines are always OK. 462 if line: 463 indent = must_match(r'\s*', line).end() 464 if indent < self._indent: 465 raise QAPIParseError( 466 self._parser, 467 "unexpected de-indent (expected at least %d spaces)" % 468 self._indent) 469 line = line[self._indent:] 470 471 self.text += line.rstrip() + '\n' 472 473 class ArgSection(Section): 474 def __init__(self, parser, name, indent=0): 475 super().__init__(parser, name, indent) 476 self.member = None 477 478 def connect(self, member): 479 self.member = member 480 481 def __init__(self, parser, info): 482 # self._parser is used to report errors with QAPIParseError. The 483 # resulting error position depends on the state of the parser. 484 # It happens to be the beginning of the comment. More or less 485 # servicable, but action at a distance. 486 self._parser = parser 487 self.info = info 488 self.symbol = None 489 self.body = QAPIDoc.Section(parser) 490 # dict mapping parameter name to ArgSection 491 self.args = OrderedDict() 492 self.features = OrderedDict() 493 # a list of Section 494 self.sections = [] 495 # the current section 496 self._section = self.body 497 self._append_line = self._append_body_line 498 499 def has_section(self, name): 500 """Return True if we have a section with this name.""" 501 for i in self.sections: 502 if i.name == name: 503 return True 504 return False 505 506 def append(self, line): 507 """ 508 Parse a comment line and add it to the documentation. 509 510 The way that the line is dealt with depends on which part of 511 the documentation we're parsing right now: 512 * The body section: ._append_line is ._append_body_line 513 * An argument section: ._append_line is ._append_args_line 514 * A features section: ._append_line is ._append_features_line 515 * An additional section: ._append_line is ._append_various_line 516 """ 517 line = line[1:] 518 if not line: 519 self._append_freeform(line) 520 return 521 522 if line[0] != ' ': 523 raise QAPIParseError(self._parser, "missing space after #") 524 line = line[1:] 525 self._append_line(line) 526 527 def end_comment(self): 528 self._end_section() 529 530 @staticmethod 531 def _is_section_tag(name): 532 return name in ('Returns:', 'Since:', 533 # those are often singular or plural 534 'Note:', 'Notes:', 535 'Example:', 'Examples:', 536 'TODO:') 537 538 def _append_body_line(self, line): 539 """ 540 Process a line of documentation text in the body section. 541 542 If this a symbol line and it is the section's first line, this 543 is a definition documentation block for that symbol. 544 545 If it's a definition documentation block, another symbol line 546 begins the argument section for the argument named by it, and 547 a section tag begins an additional section. Start that 548 section and append the line to it. 549 550 Else, append the line to the current section. 551 """ 552 name = line.split(' ', 1)[0] 553 # FIXME not nice: things like '# @foo:' and '# @foo: ' aren't 554 # recognized, and get silently treated as ordinary text 555 if not self.symbol and not self.body.text and line.startswith('@'): 556 if not line.endswith(':'): 557 raise QAPIParseError(self._parser, "line should end with ':'") 558 self.symbol = line[1:-1] 559 # FIXME invalid names other than the empty string aren't flagged 560 if not self.symbol: 561 raise QAPIParseError(self._parser, "invalid name") 562 elif self.symbol: 563 # This is a definition documentation block 564 if name.startswith('@') and name.endswith(':'): 565 self._append_line = self._append_args_line 566 self._append_args_line(line) 567 elif line == 'Features:': 568 self._append_line = self._append_features_line 569 elif self._is_section_tag(name): 570 self._append_line = self._append_various_line 571 self._append_various_line(line) 572 else: 573 self._append_freeform(line) 574 else: 575 # This is a free-form documentation block 576 self._append_freeform(line) 577 578 def _append_args_line(self, line): 579 """ 580 Process a line of documentation text in an argument section. 581 582 A symbol line begins the next argument section, a section tag 583 section or a non-indented line after a blank line begins an 584 additional section. Start that section and append the line to 585 it. 586 587 Else, append the line to the current section. 588 589 """ 590 name = line.split(' ', 1)[0] 591 592 if name.startswith('@') and name.endswith(':'): 593 # If line is "@arg: first line of description", find 594 # the index of 'f', which is the indent we expect for any 595 # following lines. We then remove the leading "@arg:" 596 # from line and replace it with spaces so that 'f' has the 597 # same index as it did in the original line and can be 598 # handled the same way we will handle following lines. 599 indent = must_match(r'@\S*:\s*', line).end() 600 line = line[indent:] 601 if not line: 602 # Line was just the "@arg:" header; following lines 603 # are not indented 604 indent = 0 605 else: 606 line = ' ' * indent + line 607 self._start_args_section(name[1:-1], indent) 608 elif self._is_section_tag(name): 609 self._append_line = self._append_various_line 610 self._append_various_line(line) 611 return 612 elif (self._section.text.endswith('\n\n') 613 and line and not line[0].isspace()): 614 if line == 'Features:': 615 self._append_line = self._append_features_line 616 else: 617 self._start_section() 618 self._append_line = self._append_various_line 619 self._append_various_line(line) 620 return 621 622 self._append_freeform(line) 623 624 def _append_features_line(self, line): 625 name = line.split(' ', 1)[0] 626 627 if name.startswith('@') and name.endswith(':'): 628 # If line is "@arg: first line of description", find 629 # the index of 'f', which is the indent we expect for any 630 # following lines. We then remove the leading "@arg:" 631 # from line and replace it with spaces so that 'f' has the 632 # same index as it did in the original line and can be 633 # handled the same way we will handle following lines. 634 indent = must_match(r'@\S*:\s*', line).end() 635 line = line[indent:] 636 if not line: 637 # Line was just the "@arg:" header; following lines 638 # are not indented 639 indent = 0 640 else: 641 line = ' ' * indent + line 642 self._start_features_section(name[1:-1], indent) 643 elif self._is_section_tag(name): 644 self._append_line = self._append_various_line 645 self._append_various_line(line) 646 return 647 elif (self._section.text.endswith('\n\n') 648 and line and not line[0].isspace()): 649 self._start_section() 650 self._append_line = self._append_various_line 651 self._append_various_line(line) 652 return 653 654 self._append_freeform(line) 655 656 def _append_various_line(self, line): 657 """ 658 Process a line of documentation text in an additional section. 659 660 A symbol line is an error. 661 662 A section tag begins an additional section. Start that 663 section and append the line to it. 664 665 Else, append the line to the current section. 666 """ 667 name = line.split(' ', 1)[0] 668 669 if name.startswith('@') and name.endswith(':'): 670 raise QAPIParseError(self._parser, 671 "'%s' can't follow '%s' section" 672 % (name, self.sections[0].name)) 673 if self._is_section_tag(name): 674 # If line is "Section: first line of description", find 675 # the index of 'f', which is the indent we expect for any 676 # following lines. We then remove the leading "Section:" 677 # from line and replace it with spaces so that 'f' has the 678 # same index as it did in the original line and can be 679 # handled the same way we will handle following lines. 680 indent = must_match(r'\S*:\s*', line).end() 681 line = line[indent:] 682 if not line: 683 # Line was just the "Section:" header; following lines 684 # are not indented 685 indent = 0 686 else: 687 line = ' ' * indent + line 688 self._start_section(name[:-1], indent) 689 690 self._append_freeform(line) 691 692 def _start_symbol_section(self, symbols_dict, name, indent): 693 # FIXME invalid names other than the empty string aren't flagged 694 if not name: 695 raise QAPIParseError(self._parser, "invalid parameter name") 696 if name in symbols_dict: 697 raise QAPIParseError(self._parser, 698 "'%s' parameter name duplicated" % name) 699 assert not self.sections 700 self._end_section() 701 self._section = QAPIDoc.ArgSection(self._parser, name, indent) 702 symbols_dict[name] = self._section 703 704 def _start_args_section(self, name, indent): 705 self._start_symbol_section(self.args, name, indent) 706 707 def _start_features_section(self, name, indent): 708 self._start_symbol_section(self.features, name, indent) 709 710 def _start_section(self, name=None, indent=0): 711 if name in ('Returns', 'Since') and self.has_section(name): 712 raise QAPIParseError(self._parser, 713 "duplicated '%s' section" % name) 714 self._end_section() 715 self._section = QAPIDoc.Section(self._parser, name, indent) 716 self.sections.append(self._section) 717 718 def _end_section(self): 719 if self._section: 720 text = self._section.text = self._section.text.strip() 721 if self._section.name and (not text or text.isspace()): 722 raise QAPIParseError( 723 self._parser, 724 "empty doc section '%s'" % self._section.name) 725 self._section = None 726 727 def _append_freeform(self, line): 728 match = re.match(r'(@\S+:)', line) 729 if match: 730 raise QAPIParseError(self._parser, 731 "'%s' not allowed in free-form documentation" 732 % match.group(1)) 733 self._section.append(line) 734 735 def connect_member(self, member): 736 if member.name not in self.args: 737 # Undocumented TODO outlaw 738 self.args[member.name] = QAPIDoc.ArgSection(self._parser, 739 member.name) 740 self.args[member.name].connect(member) 741 742 def connect_feature(self, feature): 743 if feature.name not in self.features: 744 raise QAPISemError(feature.info, 745 "feature '%s' lacks documentation" 746 % feature.name) 747 self.features[feature.name].connect(feature) 748 749 def check_expr(self, expr): 750 if self.has_section('Returns') and 'command' not in expr: 751 raise QAPISemError(self.info, 752 "'Returns:' is only valid for commands") 753 754 def check(self): 755 756 def check_args_section(args, info, what): 757 bogus = [name for name, section in args.items() 758 if not section.member] 759 if bogus: 760 raise QAPISemError( 761 self.info, 762 "documented member%s '%s' %s not exist" 763 % ("s" if len(bogus) > 1 else "", 764 "', '".join(bogus), 765 "do" if len(bogus) > 1 else "does")) 766 767 check_args_section(self.args, self.info, 'members') 768 check_args_section(self.features, self.info, 'features') 769