1# -*- coding: utf-8 -*- 2# 3# QAPI schema parser 4# 5# Copyright IBM, Corp. 2011 6# Copyright (c) 2013-2019 Red Hat Inc. 7# 8# Authors: 9# Anthony Liguori <aliguori@us.ibm.com> 10# Markus Armbruster <armbru@redhat.com> 11# Marc-André Lureau <marcandre.lureau@redhat.com> 12# Kevin Wolf <kwolf@redhat.com> 13# 14# This work is licensed under the terms of the GNU GPL, version 2. 15# See the COPYING file in the top-level directory. 16 17from collections import OrderedDict 18import os 19import re 20from typing import ( 21 TYPE_CHECKING, 22 Any, 23 Dict, 24 List, 25 Mapping, 26 Match, 27 Optional, 28 Set, 29 Union, 30) 31 32from .common import must_match 33from .error import QAPISemError, QAPISourceError 34from .source import QAPISourceInfo 35 36 37if TYPE_CHECKING: 38 # pylint: disable=cyclic-import 39 # TODO: Remove cycle. [schema -> expr -> parser -> schema] 40 from .schema import QAPISchemaFeature, QAPISchemaMember 41 42 43# Return value alias for get_expr(). 44_ExprValue = Union[List[object], Dict[str, object], str, bool] 45 46 47class QAPIExpression(Dict[str, Any]): 48 # pylint: disable=too-few-public-methods 49 def __init__(self, 50 data: Mapping[str, object], 51 info: QAPISourceInfo, 52 doc: Optional['QAPIDoc'] = None): 53 super().__init__(data) 54 self.info = info 55 self.doc: Optional['QAPIDoc'] = doc 56 57 58class QAPIParseError(QAPISourceError): 59 """Error class for all QAPI schema parsing errors.""" 60 def __init__(self, parser: 'QAPISchemaParser', msg: str): 61 col = 1 62 for ch in parser.src[parser.line_pos:parser.pos]: 63 if ch == '\t': 64 col = (col + 7) % 8 + 1 65 else: 66 col += 1 67 super().__init__(parser.info, msg, col) 68 69 70class QAPISchemaParser: 71 """ 72 Parse QAPI schema source. 73 74 Parse a JSON-esque schema file and process directives. See 75 qapi-code-gen.rst section "Schema Syntax" for the exact syntax. 76 Grammatical validation is handled later by `expr.check_exprs()`. 77 78 :param fname: Source file name. 79 :param previously_included: 80 The absolute names of previously included source files, 81 if being invoked from another parser. 82 :param incl_info: 83 `QAPISourceInfo` belonging to the parent module. 84 ``None`` implies this is the root module. 85 86 :ivar exprs: Resulting parsed expressions. 87 :ivar docs: Resulting parsed documentation blocks. 88 89 :raise OSError: For problems reading the root schema document. 90 :raise QAPIError: For errors in the schema source. 91 """ 92 def __init__(self, 93 fname: str, 94 previously_included: Optional[Set[str]] = None, 95 incl_info: Optional[QAPISourceInfo] = None): 96 self._fname = fname 97 self._included = previously_included or set() 98 self._included.add(os.path.abspath(self._fname)) 99 self.src = '' 100 101 # Lexer state (see `accept` for details): 102 self.info = QAPISourceInfo(self._fname, incl_info) 103 self.tok: Union[None, str] = None 104 self.pos = 0 105 self.cursor = 0 106 self.val: Optional[Union[bool, str]] = None 107 self.line_pos = 0 108 109 # Parser output: 110 self.exprs: List[QAPIExpression] = [] 111 self.docs: List[QAPIDoc] = [] 112 113 # Showtime! 114 self._parse() 115 116 def _parse(self) -> None: 117 """ 118 Parse the QAPI schema document. 119 120 :return: None. Results are stored in ``.exprs`` and ``.docs``. 121 """ 122 cur_doc = None 123 124 # May raise OSError; allow the caller to handle it. 125 with open(self._fname, 'r', encoding='utf-8') as fp: 126 self.src = fp.read() 127 if self.src == '' or self.src[-1] != '\n': 128 self.src += '\n' 129 130 # Prime the lexer: 131 self.accept() 132 133 # Parse until done: 134 while self.tok is not None: 135 info = self.info 136 if self.tok == '#': 137 self.reject_expr_doc(cur_doc) 138 cur_doc = self.get_doc() 139 self.docs.append(cur_doc) 140 continue 141 142 expr = self.get_expr() 143 if not isinstance(expr, dict): 144 raise QAPISemError( 145 info, "top-level expression must be an object") 146 147 if 'include' in expr: 148 self.reject_expr_doc(cur_doc) 149 if len(expr) != 1: 150 raise QAPISemError(info, "invalid 'include' directive") 151 include = expr['include'] 152 if not isinstance(include, str): 153 raise QAPISemError(info, 154 "value of 'include' must be a string") 155 incl_fname = os.path.join(os.path.dirname(self._fname), 156 include) 157 self._add_expr(OrderedDict({'include': incl_fname}), info) 158 exprs_include = self._include(include, info, incl_fname, 159 self._included) 160 if exprs_include: 161 self.exprs.extend(exprs_include.exprs) 162 self.docs.extend(exprs_include.docs) 163 elif "pragma" in expr: 164 self.reject_expr_doc(cur_doc) 165 if len(expr) != 1: 166 raise QAPISemError(info, "invalid 'pragma' directive") 167 pragma = expr['pragma'] 168 if not isinstance(pragma, dict): 169 raise QAPISemError( 170 info, "value of 'pragma' must be an object") 171 for name, value in pragma.items(): 172 self._pragma(name, value, info) 173 else: 174 if cur_doc and not cur_doc.symbol: 175 raise QAPISemError( 176 cur_doc.info, "definition documentation required") 177 self._add_expr(expr, info, cur_doc) 178 cur_doc = None 179 self.reject_expr_doc(cur_doc) 180 181 def _add_expr(self, expr: Mapping[str, object], 182 info: QAPISourceInfo, 183 doc: Optional['QAPIDoc'] = None) -> None: 184 self.exprs.append(QAPIExpression(expr, info, doc)) 185 186 @staticmethod 187 def reject_expr_doc(doc: Optional['QAPIDoc']) -> None: 188 if doc and doc.symbol: 189 raise QAPISemError( 190 doc.info, 191 "documentation for '%s' is not followed by the definition" 192 % doc.symbol) 193 194 @staticmethod 195 def _include(include: str, 196 info: QAPISourceInfo, 197 incl_fname: str, 198 previously_included: Set[str] 199 ) -> Optional['QAPISchemaParser']: 200 incl_abs_fname = os.path.abspath(incl_fname) 201 # catch inclusion cycle 202 inf: Optional[QAPISourceInfo] = info 203 while inf: 204 if incl_abs_fname == os.path.abspath(inf.fname): 205 raise QAPISemError(info, "inclusion loop for %s" % include) 206 inf = inf.parent 207 208 # skip multiple include of the same file 209 if incl_abs_fname in previously_included: 210 return None 211 212 try: 213 return QAPISchemaParser(incl_fname, previously_included, info) 214 except OSError as err: 215 raise QAPISemError( 216 info, 217 f"can't read include file '{incl_fname}': {err.strerror}" 218 ) from err 219 220 @staticmethod 221 def _pragma(name: str, value: object, info: QAPISourceInfo) -> None: 222 223 def check_list_str(name: str, value: object) -> List[str]: 224 if (not isinstance(value, list) or 225 any(not isinstance(elt, str) for elt in value)): 226 raise QAPISemError( 227 info, 228 "pragma %s must be a list of strings" % name) 229 return value 230 231 pragma = info.pragma 232 233 if name == 'doc-required': 234 if not isinstance(value, bool): 235 raise QAPISemError(info, 236 "pragma 'doc-required' must be boolean") 237 pragma.doc_required = value 238 elif name == 'command-name-exceptions': 239 pragma.command_name_exceptions = check_list_str(name, value) 240 elif name == 'command-returns-exceptions': 241 pragma.command_returns_exceptions = check_list_str(name, value) 242 elif name == 'documentation-exceptions': 243 pragma.documentation_exceptions = check_list_str(name, value) 244 elif name == 'member-name-exceptions': 245 pragma.member_name_exceptions = check_list_str(name, value) 246 else: 247 raise QAPISemError(info, "unknown pragma '%s'" % name) 248 249 def accept(self, skip_comment: bool = True) -> None: 250 """ 251 Read and store the next token. 252 253 :param skip_comment: 254 When false, return COMMENT tokens ("#"). 255 This is used when reading documentation blocks. 256 257 :return: 258 None. Several instance attributes are updated instead: 259 260 - ``.tok`` represents the token type. See below for values. 261 - ``.info`` describes the token's source location. 262 - ``.val`` is the token's value, if any. See below. 263 - ``.pos`` is the buffer index of the first character of 264 the token. 265 266 * Single-character tokens: 267 268 These are "{", "}", ":", ",", "[", and "]". 269 ``.tok`` holds the single character and ``.val`` is None. 270 271 * Multi-character tokens: 272 273 * COMMENT: 274 275 This token is not normally returned by the lexer, but it can 276 be when ``skip_comment`` is False. ``.tok`` is "#", and 277 ``.val`` is a string including all chars until end-of-line, 278 including the "#" itself. 279 280 * STRING: 281 282 ``.tok`` is "'", the single quote. ``.val`` contains the 283 string, excluding the surrounding quotes. 284 285 * TRUE and FALSE: 286 287 ``.tok`` is either "t" or "f", ``.val`` will be the 288 corresponding bool value. 289 290 * EOF: 291 292 ``.tok`` and ``.val`` will both be None at EOF. 293 """ 294 while True: 295 self.tok = self.src[self.cursor] 296 self.pos = self.cursor 297 self.cursor += 1 298 self.val = None 299 300 if self.tok == '#': 301 if self.src[self.cursor] == '#': 302 # Start of doc comment 303 skip_comment = False 304 self.cursor = self.src.find('\n', self.cursor) 305 if not skip_comment: 306 self.val = self.src[self.pos:self.cursor] 307 return 308 elif self.tok in '{}:,[]': 309 return 310 elif self.tok == "'": 311 # Note: we accept only printable ASCII 312 string = '' 313 esc = False 314 while True: 315 ch = self.src[self.cursor] 316 self.cursor += 1 317 if ch == '\n': 318 raise QAPIParseError(self, "missing terminating \"'\"") 319 if esc: 320 # Note: we recognize only \\ because we have 321 # no use for funny characters in strings 322 if ch != '\\': 323 raise QAPIParseError(self, 324 "unknown escape \\%s" % ch) 325 esc = False 326 elif ch == '\\': 327 esc = True 328 continue 329 elif ch == "'": 330 self.val = string 331 return 332 if ord(ch) < 32 or ord(ch) >= 127: 333 raise QAPIParseError( 334 self, "funny character in string") 335 string += ch 336 elif self.src.startswith('true', self.pos): 337 self.val = True 338 self.cursor += 3 339 return 340 elif self.src.startswith('false', self.pos): 341 self.val = False 342 self.cursor += 4 343 return 344 elif self.tok == '\n': 345 if self.cursor == len(self.src): 346 self.tok = None 347 return 348 self.info = self.info.next_line() 349 self.line_pos = self.cursor 350 elif not self.tok.isspace(): 351 # Show up to next structural, whitespace or quote 352 # character 353 match = must_match('[^[\\]{}:,\\s\']+', 354 self.src[self.cursor-1:]) 355 raise QAPIParseError(self, "stray '%s'" % match.group(0)) 356 357 def get_members(self) -> Dict[str, object]: 358 expr: Dict[str, object] = OrderedDict() 359 if self.tok == '}': 360 self.accept() 361 return expr 362 if self.tok != "'": 363 raise QAPIParseError(self, "expected string or '}'") 364 while True: 365 key = self.val 366 assert isinstance(key, str) # Guaranteed by tok == "'" 367 368 self.accept() 369 if self.tok != ':': 370 raise QAPIParseError(self, "expected ':'") 371 self.accept() 372 if key in expr: 373 raise QAPIParseError(self, "duplicate key '%s'" % key) 374 expr[key] = self.get_expr() 375 if self.tok == '}': 376 self.accept() 377 return expr 378 if self.tok != ',': 379 raise QAPIParseError(self, "expected ',' or '}'") 380 self.accept() 381 if self.tok != "'": 382 raise QAPIParseError(self, "expected string") 383 384 def get_values(self) -> List[object]: 385 expr: List[object] = [] 386 if self.tok == ']': 387 self.accept() 388 return expr 389 if self.tok not in tuple("{['tf"): 390 raise QAPIParseError( 391 self, "expected '{', '[', ']', string, or boolean") 392 while True: 393 expr.append(self.get_expr()) 394 if self.tok == ']': 395 self.accept() 396 return expr 397 if self.tok != ',': 398 raise QAPIParseError(self, "expected ',' or ']'") 399 self.accept() 400 401 def get_expr(self) -> _ExprValue: 402 expr: _ExprValue 403 if self.tok == '{': 404 self.accept() 405 expr = self.get_members() 406 elif self.tok == '[': 407 self.accept() 408 expr = self.get_values() 409 elif self.tok in tuple("'tf"): 410 assert isinstance(self.val, (str, bool)) 411 expr = self.val 412 self.accept() 413 else: 414 raise QAPIParseError( 415 self, "expected '{', '[', string, or boolean") 416 return expr 417 418 def get_doc_line(self) -> Optional[str]: 419 if self.tok != '#': 420 raise QAPIParseError( 421 self, "documentation comment must end with '##'") 422 assert isinstance(self.val, str) 423 if self.val.startswith('##'): 424 # End of doc comment 425 if self.val != '##': 426 raise QAPIParseError( 427 self, "junk after '##' at end of documentation comment") 428 return None 429 if self.val == '#': 430 return '' 431 if self.val[1] != ' ': 432 raise QAPIParseError(self, "missing space after #") 433 return self.val[2:].rstrip() 434 435 @staticmethod 436 def _match_at_name_colon(string: str) -> Optional[Match[str]]: 437 return re.match(r'@([^:]*): *', string) 438 439 def get_doc_indented(self, doc: 'QAPIDoc') -> Optional[str]: 440 self.accept(False) 441 line = self.get_doc_line() 442 while line == '': 443 doc.append_line(line) 444 self.accept(False) 445 line = self.get_doc_line() 446 if line is None: 447 return line 448 indent = must_match(r'\s*', line).end() 449 if not indent: 450 return line 451 doc.append_line(line[indent:]) 452 prev_line_blank = False 453 while True: 454 self.accept(False) 455 line = self.get_doc_line() 456 if line is None: 457 return line 458 if self._match_at_name_colon(line): 459 return line 460 cur_indent = must_match(r'\s*', line).end() 461 if line != '' and cur_indent < indent: 462 if prev_line_blank: 463 return line 464 raise QAPIParseError( 465 self, 466 "unexpected de-indent (expected at least %d spaces)" % 467 indent) 468 doc.append_line(line[indent:]) 469 prev_line_blank = True 470 471 def get_doc_paragraph(self, doc: 'QAPIDoc') -> Optional[str]: 472 while True: 473 self.accept(False) 474 line = self.get_doc_line() 475 if line is None: 476 return line 477 if line == '': 478 return line 479 doc.append_line(line) 480 481 def get_doc(self) -> 'QAPIDoc': 482 if self.val != '##': 483 raise QAPIParseError( 484 self, "junk after '##' at start of documentation comment") 485 info = self.info 486 self.accept(False) 487 line = self.get_doc_line() 488 if line is not None and line.startswith('@'): 489 # Definition documentation 490 if not line.endswith(':'): 491 raise QAPIParseError(self, "line should end with ':'") 492 # Invalid names are not checked here, but the name 493 # provided *must* match the following definition, 494 # which *is* validated in expr.py. 495 symbol = line[1:-1] 496 if not symbol: 497 raise QAPIParseError(self, "name required after '@'") 498 doc = QAPIDoc(info, symbol) 499 self.accept(False) 500 line = self.get_doc_line() 501 no_more_args = False 502 503 while line is not None: 504 # Blank lines 505 while line == '': 506 self.accept(False) 507 line = self.get_doc_line() 508 if line is None: 509 break 510 # Non-blank line, first of a section 511 if line == 'Features:': 512 if doc.features: 513 raise QAPIParseError( 514 self, "duplicated 'Features:' line") 515 self.accept(False) 516 line = self.get_doc_line() 517 while line == '': 518 self.accept(False) 519 line = self.get_doc_line() 520 while (line is not None 521 and (match := self._match_at_name_colon(line))): 522 doc.new_feature(self.info, match.group(1)) 523 text = line[match.end():] 524 if text: 525 doc.append_line(text) 526 line = self.get_doc_indented(doc) 527 if not doc.features: 528 raise QAPIParseError( 529 self, 'feature descriptions expected') 530 no_more_args = True 531 elif match := self._match_at_name_colon(line): 532 # description 533 if no_more_args: 534 raise QAPIParseError( 535 self, 536 "description of '@%s:' follows a section" 537 % match.group(1)) 538 while (line is not None 539 and (match := self._match_at_name_colon(line))): 540 doc.new_argument(self.info, match.group(1)) 541 text = line[match.end():] 542 if text: 543 doc.append_line(text) 544 line = self.get_doc_indented(doc) 545 no_more_args = True 546 elif match := re.match( 547 r'(Returns|Errors|Since|Notes?|Examples?|TODO): *', 548 line): 549 # tagged section 550 doc.new_tagged_section(self.info, match.group(1)) 551 text = line[match.end():] 552 if text: 553 doc.append_line(text) 554 line = self.get_doc_indented(doc) 555 no_more_args = True 556 elif line.startswith('='): 557 raise QAPIParseError( 558 self, 559 "unexpected '=' markup in definition documentation") 560 else: 561 # tag-less paragraph 562 doc.ensure_untagged_section(self.info) 563 doc.append_line(line) 564 line = self.get_doc_paragraph(doc) 565 else: 566 # Free-form documentation 567 doc = QAPIDoc(info) 568 doc.ensure_untagged_section(self.info) 569 first = True 570 while line is not None: 571 if match := self._match_at_name_colon(line): 572 raise QAPIParseError( 573 self, 574 "'@%s:' not allowed in free-form documentation" 575 % match.group(1)) 576 if line.startswith('='): 577 if not first: 578 raise QAPIParseError( 579 self, 580 "'=' heading must come first in a comment block") 581 doc.append_line(line) 582 self.accept(False) 583 line = self.get_doc_line() 584 first = False 585 586 self.accept(False) 587 doc.end() 588 return doc 589 590 591class QAPIDoc: 592 """ 593 A documentation comment block, either definition or free-form 594 595 Definition documentation blocks consist of 596 597 * a body section: one line naming the definition, followed by an 598 overview (any number of lines) 599 600 * argument sections: a description of each argument (for commands 601 and events) or member (for structs, unions and alternates) 602 603 * features sections: a description of each feature flag 604 605 * additional (non-argument) sections, possibly tagged 606 607 Free-form documentation blocks consist only of a body section. 608 """ 609 610 class Section: 611 # pylint: disable=too-few-public-methods 612 def __init__(self, info: QAPISourceInfo, 613 tag: Optional[str] = None): 614 # section source info, i.e. where it begins 615 self.info = info 616 # section tag, if any ('Returns', '@name', ...) 617 self.tag = tag 618 # section text without tag 619 self.text = '' 620 621 def append_line(self, line: str) -> None: 622 self.text += line + '\n' 623 624 class ArgSection(Section): 625 def __init__(self, info: QAPISourceInfo, tag: str): 626 super().__init__(info, tag) 627 self.member: Optional['QAPISchemaMember'] = None 628 629 def connect(self, member: 'QAPISchemaMember') -> None: 630 self.member = member 631 632 def __init__(self, info: QAPISourceInfo, symbol: Optional[str] = None): 633 # info points to the doc comment block's first line 634 self.info = info 635 # definition doc's symbol, None for free-form doc 636 self.symbol: Optional[str] = symbol 637 # the sections in textual order 638 self.all_sections: List[QAPIDoc.Section] = [QAPIDoc.Section(info)] 639 # the body section 640 self.body: Optional[QAPIDoc.Section] = self.all_sections[0] 641 # dicts mapping parameter/feature names to their description 642 self.args: Dict[str, QAPIDoc.ArgSection] = {} 643 self.features: Dict[str, QAPIDoc.ArgSection] = {} 644 # a command's "Returns" and "Errors" section 645 self.returns: Optional[QAPIDoc.Section] = None 646 self.errors: Optional[QAPIDoc.Section] = None 647 # "Since" section 648 self.since: Optional[QAPIDoc.Section] = None 649 # sections other than .body, .args, .features 650 self.sections: List[QAPIDoc.Section] = [] 651 652 def end(self) -> None: 653 for section in self.all_sections: 654 section.text = section.text.strip('\n') 655 if section.tag is not None and section.text == '': 656 raise QAPISemError( 657 section.info, "text required after '%s:'" % section.tag) 658 659 def ensure_untagged_section(self, info: QAPISourceInfo) -> None: 660 if self.all_sections and not self.all_sections[-1].tag: 661 # extend current section 662 self.all_sections[-1].text += '\n' 663 return 664 # start new section 665 section = self.Section(info) 666 self.sections.append(section) 667 self.all_sections.append(section) 668 669 def new_tagged_section(self, info: QAPISourceInfo, tag: str) -> None: 670 section = self.Section(info, tag) 671 if tag == 'Returns': 672 if self.returns: 673 raise QAPISemError( 674 info, "duplicated '%s' section" % tag) 675 self.returns = section 676 elif tag == 'Errors': 677 if self.errors: 678 raise QAPISemError( 679 info, "duplicated '%s' section" % tag) 680 self.errors = section 681 elif tag == 'Since': 682 if self.since: 683 raise QAPISemError( 684 info, "duplicated '%s' section" % tag) 685 self.since = section 686 self.sections.append(section) 687 self.all_sections.append(section) 688 689 def _new_description(self, info: QAPISourceInfo, name: str, 690 desc: Dict[str, ArgSection]) -> None: 691 if not name: 692 raise QAPISemError(info, "invalid parameter name") 693 if name in desc: 694 raise QAPISemError(info, "'%s' parameter name duplicated" % name) 695 section = self.ArgSection(info, '@' + name) 696 self.all_sections.append(section) 697 desc[name] = section 698 699 def new_argument(self, info: QAPISourceInfo, name: str) -> None: 700 self._new_description(info, name, self.args) 701 702 def new_feature(self, info: QAPISourceInfo, name: str) -> None: 703 self._new_description(info, name, self.features) 704 705 def append_line(self, line: str) -> None: 706 self.all_sections[-1].append_line(line) 707 708 def connect_member(self, member: 'QAPISchemaMember') -> None: 709 if member.name not in self.args: 710 assert member.info 711 if self.symbol not in member.info.pragma.documentation_exceptions: 712 raise QAPISemError(member.info, 713 "%s '%s' lacks documentation" 714 % (member.role, member.name)) 715 self.args[member.name] = QAPIDoc.ArgSection( 716 self.info, '@' + member.name) 717 self.args[member.name].connect(member) 718 719 def connect_feature(self, feature: 'QAPISchemaFeature') -> None: 720 if feature.name not in self.features: 721 raise QAPISemError(feature.info, 722 "feature '%s' lacks documentation" 723 % feature.name) 724 self.features[feature.name].connect(feature) 725 726 def check_expr(self, expr: QAPIExpression) -> None: 727 if 'command' in expr: 728 if self.returns and 'returns' not in expr: 729 raise QAPISemError( 730 self.returns.info, 731 "'Returns' section, but command doesn't return anything") 732 else: 733 if self.returns: 734 raise QAPISemError( 735 self.returns.info, 736 "'Returns' section is only valid for commands") 737 if self.errors: 738 raise QAPISemError( 739 self.errors.info, 740 "'Errors' section is only valid for commands") 741 742 def check(self) -> None: 743 744 def check_args_section( 745 args: Dict[str, QAPIDoc.ArgSection], what: str 746 ) -> None: 747 bogus = [name for name, section in args.items() 748 if not section.member] 749 if bogus: 750 raise QAPISemError( 751 args[bogus[0]].info, 752 "documented %s%s '%s' %s not exist" % ( 753 what, 754 "s" if len(bogus) > 1 else "", 755 "', '".join(bogus), 756 "do" if len(bogus) > 1 else "does" 757 )) 758 759 check_args_section(self.args, 'member') 760 check_args_section(self.features, 'feature') 761