1# -*- coding: utf-8 -*- 2# 3# QAPI schema parser 4# 5# Copyright IBM, Corp. 2011 6# Copyright (c) 2013-2019 Red Hat Inc. 7# 8# Authors: 9# Anthony Liguori <aliguori@us.ibm.com> 10# Markus Armbruster <armbru@redhat.com> 11# Marc-André Lureau <marcandre.lureau@redhat.com> 12# Kevin Wolf <kwolf@redhat.com> 13# 14# This work is licensed under the terms of the GNU GPL, version 2. 15# See the COPYING file in the top-level directory. 16 17from collections import OrderedDict 18import os 19import re 20from typing import ( 21 TYPE_CHECKING, 22 Dict, 23 List, 24 Mapping, 25 Match, 26 Optional, 27 Set, 28 Union, 29) 30 31from .common import must_match 32from .error import QAPISemError, QAPISourceError 33from .source import QAPISourceInfo 34 35 36if TYPE_CHECKING: 37 # pylint: disable=cyclic-import 38 # TODO: Remove cycle. [schema -> expr -> parser -> schema] 39 from .schema import QAPISchemaFeature, QAPISchemaMember 40 41 42# Return value alias for get_expr(). 43_ExprValue = Union[List[object], Dict[str, object], str, bool] 44 45 46class QAPIExpression(Dict[str, object]): 47 # pylint: disable=too-few-public-methods 48 def __init__(self, 49 data: Mapping[str, object], 50 info: QAPISourceInfo, 51 doc: Optional['QAPIDoc'] = None): 52 super().__init__(data) 53 self.info = info 54 self.doc: Optional['QAPIDoc'] = doc 55 56 57class QAPIParseError(QAPISourceError): 58 """Error class for all QAPI schema parsing errors.""" 59 def __init__(self, parser: 'QAPISchemaParser', msg: str): 60 col = 1 61 for ch in parser.src[parser.line_pos:parser.pos]: 62 if ch == '\t': 63 col = (col + 7) % 8 + 1 64 else: 65 col += 1 66 super().__init__(parser.info, msg, col) 67 68 69class QAPISchemaParser: 70 """ 71 Parse QAPI schema source. 72 73 Parse a JSON-esque schema file and process directives. See 74 qapi-code-gen.rst section "Schema Syntax" for the exact syntax. 75 Grammatical validation is handled later by `expr.check_exprs()`. 76 77 :param fname: Source file name. 78 :param previously_included: 79 The absolute names of previously included source files, 80 if being invoked from another parser. 81 :param incl_info: 82 `QAPISourceInfo` belonging to the parent module. 83 ``None`` implies this is the root module. 84 85 :ivar exprs: Resulting parsed expressions. 86 :ivar docs: Resulting parsed documentation blocks. 87 88 :raise OSError: For problems reading the root schema document. 89 :raise QAPIError: For errors in the schema source. 90 """ 91 def __init__(self, 92 fname: str, 93 previously_included: Optional[Set[str]] = None, 94 incl_info: Optional[QAPISourceInfo] = None): 95 self._fname = fname 96 self._included = previously_included or set() 97 self._included.add(os.path.abspath(self._fname)) 98 self.src = '' 99 100 # Lexer state (see `accept` for details): 101 self.info = QAPISourceInfo(self._fname, incl_info) 102 self.tok: Union[None, str] = None 103 self.pos = 0 104 self.cursor = 0 105 self.val: Optional[Union[bool, str]] = None 106 self.line_pos = 0 107 108 # Parser output: 109 self.exprs: List[QAPIExpression] = [] 110 self.docs: List[QAPIDoc] = [] 111 112 # Showtime! 113 self._parse() 114 115 def _parse(self) -> None: 116 """ 117 Parse the QAPI schema document. 118 119 :return: None. Results are stored in ``.exprs`` and ``.docs``. 120 """ 121 cur_doc = None 122 123 # May raise OSError; allow the caller to handle it. 124 with open(self._fname, 'r', encoding='utf-8') as fp: 125 self.src = fp.read() 126 if self.src == '' or self.src[-1] != '\n': 127 self.src += '\n' 128 129 # Prime the lexer: 130 self.accept() 131 132 # Parse until done: 133 while self.tok is not None: 134 info = self.info 135 if self.tok == '#': 136 self.reject_expr_doc(cur_doc) 137 cur_doc = self.get_doc() 138 self.docs.append(cur_doc) 139 continue 140 141 expr = self.get_expr() 142 if not isinstance(expr, dict): 143 raise QAPISemError( 144 info, "top-level expression must be an object") 145 146 if 'include' in expr: 147 self.reject_expr_doc(cur_doc) 148 if len(expr) != 1: 149 raise QAPISemError(info, "invalid 'include' directive") 150 include = expr['include'] 151 if not isinstance(include, str): 152 raise QAPISemError(info, 153 "value of 'include' must be a string") 154 incl_fname = os.path.join(os.path.dirname(self._fname), 155 include) 156 self._add_expr(OrderedDict({'include': incl_fname}), info) 157 exprs_include = self._include(include, info, incl_fname, 158 self._included) 159 if exprs_include: 160 self.exprs.extend(exprs_include.exprs) 161 self.docs.extend(exprs_include.docs) 162 elif "pragma" in expr: 163 self.reject_expr_doc(cur_doc) 164 if len(expr) != 1: 165 raise QAPISemError(info, "invalid 'pragma' directive") 166 pragma = expr['pragma'] 167 if not isinstance(pragma, dict): 168 raise QAPISemError( 169 info, "value of 'pragma' must be an object") 170 for name, value in pragma.items(): 171 self._pragma(name, value, info) 172 else: 173 if cur_doc and not cur_doc.symbol: 174 raise QAPISemError( 175 cur_doc.info, "definition documentation required") 176 self._add_expr(expr, info, cur_doc) 177 cur_doc = None 178 self.reject_expr_doc(cur_doc) 179 180 def _add_expr(self, expr: Mapping[str, object], 181 info: QAPISourceInfo, 182 doc: Optional['QAPIDoc'] = None) -> None: 183 self.exprs.append(QAPIExpression(expr, info, doc)) 184 185 @staticmethod 186 def reject_expr_doc(doc: Optional['QAPIDoc']) -> None: 187 if doc and doc.symbol: 188 raise QAPISemError( 189 doc.info, 190 "documentation for '%s' is not followed by the definition" 191 % doc.symbol) 192 193 @staticmethod 194 def _include(include: str, 195 info: QAPISourceInfo, 196 incl_fname: str, 197 previously_included: Set[str] 198 ) -> Optional['QAPISchemaParser']: 199 incl_abs_fname = os.path.abspath(incl_fname) 200 # catch inclusion cycle 201 inf: Optional[QAPISourceInfo] = info 202 while inf: 203 if incl_abs_fname == os.path.abspath(inf.fname): 204 raise QAPISemError(info, "inclusion loop for %s" % include) 205 inf = inf.parent 206 207 # skip multiple include of the same file 208 if incl_abs_fname in previously_included: 209 return None 210 211 try: 212 return QAPISchemaParser(incl_fname, previously_included, info) 213 except OSError as err: 214 raise QAPISemError( 215 info, 216 f"can't read include file '{incl_fname}': {err.strerror}" 217 ) from err 218 219 @staticmethod 220 def _pragma(name: str, value: object, info: QAPISourceInfo) -> None: 221 222 def check_list_str(name: str, value: object) -> List[str]: 223 if (not isinstance(value, list) or 224 any(not isinstance(elt, str) for elt in value)): 225 raise QAPISemError( 226 info, 227 "pragma %s must be a list of strings" % name) 228 return value 229 230 pragma = info.pragma 231 232 if name == 'doc-required': 233 if not isinstance(value, bool): 234 raise QAPISemError(info, 235 "pragma 'doc-required' must be boolean") 236 pragma.doc_required = value 237 elif name == 'command-name-exceptions': 238 pragma.command_name_exceptions = check_list_str(name, value) 239 elif name == 'command-returns-exceptions': 240 pragma.command_returns_exceptions = check_list_str(name, value) 241 elif name == 'documentation-exceptions': 242 pragma.documentation_exceptions = check_list_str(name, value) 243 elif name == 'member-name-exceptions': 244 pragma.member_name_exceptions = check_list_str(name, value) 245 else: 246 raise QAPISemError(info, "unknown pragma '%s'" % name) 247 248 def accept(self, skip_comment: bool = True) -> None: 249 """ 250 Read and store the next token. 251 252 :param skip_comment: 253 When false, return COMMENT tokens ("#"). 254 This is used when reading documentation blocks. 255 256 :return: 257 None. Several instance attributes are updated instead: 258 259 - ``.tok`` represents the token type. See below for values. 260 - ``.info`` describes the token's source location. 261 - ``.val`` is the token's value, if any. See below. 262 - ``.pos`` is the buffer index of the first character of 263 the token. 264 265 * Single-character tokens: 266 267 These are "{", "}", ":", ",", "[", and "]". 268 ``.tok`` holds the single character and ``.val`` is None. 269 270 * Multi-character tokens: 271 272 * COMMENT: 273 274 This token is not normally returned by the lexer, but it can 275 be when ``skip_comment`` is False. ``.tok`` is "#", and 276 ``.val`` is a string including all chars until end-of-line, 277 including the "#" itself. 278 279 * STRING: 280 281 ``.tok`` is "'", the single quote. ``.val`` contains the 282 string, excluding the surrounding quotes. 283 284 * TRUE and FALSE: 285 286 ``.tok`` is either "t" or "f", ``.val`` will be the 287 corresponding bool value. 288 289 * EOF: 290 291 ``.tok`` and ``.val`` will both be None at EOF. 292 """ 293 while True: 294 self.tok = self.src[self.cursor] 295 self.pos = self.cursor 296 self.cursor += 1 297 self.val = None 298 299 if self.tok == '#': 300 if self.src[self.cursor] == '#': 301 # Start of doc comment 302 skip_comment = False 303 self.cursor = self.src.find('\n', self.cursor) 304 if not skip_comment: 305 self.val = self.src[self.pos:self.cursor] 306 return 307 elif self.tok in '{}:,[]': 308 return 309 elif self.tok == "'": 310 # Note: we accept only printable ASCII 311 string = '' 312 esc = False 313 while True: 314 ch = self.src[self.cursor] 315 self.cursor += 1 316 if ch == '\n': 317 raise QAPIParseError(self, "missing terminating \"'\"") 318 if esc: 319 # Note: we recognize only \\ because we have 320 # no use for funny characters in strings 321 if ch != '\\': 322 raise QAPIParseError(self, 323 "unknown escape \\%s" % ch) 324 esc = False 325 elif ch == '\\': 326 esc = True 327 continue 328 elif ch == "'": 329 self.val = string 330 return 331 if ord(ch) < 32 or ord(ch) >= 127: 332 raise QAPIParseError( 333 self, "funny character in string") 334 string += ch 335 elif self.src.startswith('true', self.pos): 336 self.val = True 337 self.cursor += 3 338 return 339 elif self.src.startswith('false', self.pos): 340 self.val = False 341 self.cursor += 4 342 return 343 elif self.tok == '\n': 344 if self.cursor == len(self.src): 345 self.tok = None 346 return 347 self.info = self.info.next_line() 348 self.line_pos = self.cursor 349 elif not self.tok.isspace(): 350 # Show up to next structural, whitespace or quote 351 # character 352 match = must_match('[^[\\]{}:,\\s\']+', 353 self.src[self.cursor-1:]) 354 raise QAPIParseError(self, "stray '%s'" % match.group(0)) 355 356 def get_members(self) -> Dict[str, object]: 357 expr: Dict[str, object] = OrderedDict() 358 if self.tok == '}': 359 self.accept() 360 return expr 361 if self.tok != "'": 362 raise QAPIParseError(self, "expected string or '}'") 363 while True: 364 key = self.val 365 assert isinstance(key, str) # Guaranteed by tok == "'" 366 367 self.accept() 368 if self.tok != ':': 369 raise QAPIParseError(self, "expected ':'") 370 self.accept() 371 if key in expr: 372 raise QAPIParseError(self, "duplicate key '%s'" % key) 373 expr[key] = self.get_expr() 374 if self.tok == '}': 375 self.accept() 376 return expr 377 if self.tok != ',': 378 raise QAPIParseError(self, "expected ',' or '}'") 379 self.accept() 380 if self.tok != "'": 381 raise QAPIParseError(self, "expected string") 382 383 def get_values(self) -> List[object]: 384 expr: List[object] = [] 385 if self.tok == ']': 386 self.accept() 387 return expr 388 if self.tok not in tuple("{['tf"): 389 raise QAPIParseError( 390 self, "expected '{', '[', ']', string, or boolean") 391 while True: 392 expr.append(self.get_expr()) 393 if self.tok == ']': 394 self.accept() 395 return expr 396 if self.tok != ',': 397 raise QAPIParseError(self, "expected ',' or ']'") 398 self.accept() 399 400 def get_expr(self) -> _ExprValue: 401 expr: _ExprValue 402 if self.tok == '{': 403 self.accept() 404 expr = self.get_members() 405 elif self.tok == '[': 406 self.accept() 407 expr = self.get_values() 408 elif self.tok in tuple("'tf"): 409 assert isinstance(self.val, (str, bool)) 410 expr = self.val 411 self.accept() 412 else: 413 raise QAPIParseError( 414 self, "expected '{', '[', string, or boolean") 415 return expr 416 417 def get_doc_line(self) -> Optional[str]: 418 if self.tok != '#': 419 raise QAPIParseError( 420 self, "documentation comment must end with '##'") 421 assert isinstance(self.val, str) 422 if self.val.startswith('##'): 423 # End of doc comment 424 if self.val != '##': 425 raise QAPIParseError( 426 self, "junk after '##' at end of documentation comment") 427 return None 428 if self.val == '#': 429 return '' 430 if self.val[1] != ' ': 431 raise QAPIParseError(self, "missing space after #") 432 return self.val[2:].rstrip() 433 434 @staticmethod 435 def _match_at_name_colon(string: str) -> Optional[Match[str]]: 436 return re.match(r'@([^:]*): *', string) 437 438 def get_doc_indented(self, doc: 'QAPIDoc') -> Optional[str]: 439 self.accept(False) 440 line = self.get_doc_line() 441 while line == '': 442 doc.append_line(line) 443 self.accept(False) 444 line = self.get_doc_line() 445 if line is None: 446 return line 447 indent = must_match(r'\s*', line).end() 448 if not indent: 449 return line 450 doc.append_line(line[indent:]) 451 prev_line_blank = False 452 while True: 453 self.accept(False) 454 line = self.get_doc_line() 455 if line is None: 456 return line 457 if self._match_at_name_colon(line): 458 return line 459 cur_indent = must_match(r'\s*', line).end() 460 if line != '' and cur_indent < indent: 461 if prev_line_blank: 462 return line 463 raise QAPIParseError( 464 self, 465 "unexpected de-indent (expected at least %d spaces)" % 466 indent) 467 doc.append_line(line[indent:]) 468 prev_line_blank = True 469 470 def get_doc_paragraph(self, doc: 'QAPIDoc') -> Optional[str]: 471 while True: 472 self.accept(False) 473 line = self.get_doc_line() 474 if line is None: 475 return line 476 if line == '': 477 return line 478 doc.append_line(line) 479 480 def get_doc(self) -> 'QAPIDoc': 481 if self.val != '##': 482 raise QAPIParseError( 483 self, "junk after '##' at start of documentation comment") 484 info = self.info 485 self.accept(False) 486 line = self.get_doc_line() 487 if line is not None and line.startswith('@'): 488 # Definition documentation 489 if not line.endswith(':'): 490 raise QAPIParseError(self, "line should end with ':'") 491 # Invalid names are not checked here, but the name 492 # provided *must* match the following definition, 493 # which *is* validated in expr.py. 494 symbol = line[1:-1] 495 if not symbol: 496 raise QAPIParseError(self, "name required after '@'") 497 doc = QAPIDoc(info, symbol) 498 self.accept(False) 499 line = self.get_doc_line() 500 no_more_args = False 501 502 while line is not None: 503 # Blank lines 504 while line == '': 505 self.accept(False) 506 line = self.get_doc_line() 507 if line is None: 508 break 509 # Non-blank line, first of a section 510 if line == 'Features:': 511 if doc.features: 512 raise QAPIParseError( 513 self, "duplicated 'Features:' line") 514 self.accept(False) 515 line = self.get_doc_line() 516 while line == '': 517 self.accept(False) 518 line = self.get_doc_line() 519 while (line is not None 520 and (match := self._match_at_name_colon(line))): 521 doc.new_feature(self.info, match.group(1)) 522 text = line[match.end():] 523 if text: 524 doc.append_line(text) 525 line = self.get_doc_indented(doc) 526 if not doc.features: 527 raise QAPIParseError( 528 self, 'feature descriptions expected') 529 no_more_args = True 530 elif match := self._match_at_name_colon(line): 531 # description 532 if no_more_args: 533 raise QAPIParseError( 534 self, 535 "description of '@%s:' follows a section" 536 % match.group(1)) 537 while (line is not None 538 and (match := self._match_at_name_colon(line))): 539 doc.new_argument(self.info, match.group(1)) 540 text = line[match.end():] 541 if text: 542 doc.append_line(text) 543 line = self.get_doc_indented(doc) 544 no_more_args = True 545 elif match := re.match( 546 r'(Returns|Since|Notes?|Examples?|TODO): *', 547 line): 548 # tagged section 549 doc.new_tagged_section(self.info, match.group(1)) 550 text = line[match.end():] 551 if text: 552 doc.append_line(text) 553 line = self.get_doc_indented(doc) 554 no_more_args = True 555 elif line.startswith('='): 556 raise QAPIParseError( 557 self, 558 "unexpected '=' markup in definition documentation") 559 else: 560 # tag-less paragraph 561 doc.ensure_untagged_section(self.info) 562 doc.append_line(line) 563 line = self.get_doc_paragraph(doc) 564 else: 565 # Free-form documentation 566 doc = QAPIDoc(info) 567 doc.ensure_untagged_section(self.info) 568 first = True 569 while line is not None: 570 if match := self._match_at_name_colon(line): 571 raise QAPIParseError( 572 self, 573 "'@%s:' not allowed in free-form documentation" 574 % match.group(1)) 575 if line.startswith('='): 576 if not first: 577 raise QAPIParseError( 578 self, 579 "'=' heading must come first in a comment block") 580 doc.append_line(line) 581 self.accept(False) 582 line = self.get_doc_line() 583 first = False 584 585 self.accept(False) 586 doc.end() 587 return doc 588 589 590class QAPIDoc: 591 """ 592 A documentation comment block, either definition or free-form 593 594 Definition documentation blocks consist of 595 596 * a body section: one line naming the definition, followed by an 597 overview (any number of lines) 598 599 * argument sections: a description of each argument (for commands 600 and events) or member (for structs, unions and alternates) 601 602 * features sections: a description of each feature flag 603 604 * additional (non-argument) sections, possibly tagged 605 606 Free-form documentation blocks consist only of a body section. 607 """ 608 609 class Section: 610 def __init__(self, info: QAPISourceInfo, 611 tag: Optional[str] = None): 612 # section source info, i.e. where it begins 613 self.info = info 614 # section tag, if any ('Returns', '@name', ...) 615 self.tag = tag 616 # section text without tag 617 self.text = '' 618 619 def append_line(self, line: str) -> None: 620 self.text += line + '\n' 621 622 class ArgSection(Section): 623 def __init__(self, info: QAPISourceInfo, tag: str): 624 super().__init__(info, tag) 625 self.member: Optional['QAPISchemaMember'] = None 626 627 def connect(self, member: 'QAPISchemaMember') -> None: 628 self.member = member 629 630 def __init__(self, info: QAPISourceInfo, symbol: Optional[str] = None): 631 # info points to the doc comment block's first line 632 self.info = info 633 # definition doc's symbol, None for free-form doc 634 self.symbol: Optional[str] = symbol 635 # the sections in textual order 636 self.all_sections: List[QAPIDoc.Section] = [QAPIDoc.Section(info)] 637 # the body section 638 self.body: Optional[QAPIDoc.Section] = self.all_sections[0] 639 # dicts mapping parameter/feature names to their description 640 self.args: Dict[str, QAPIDoc.ArgSection] = {} 641 self.features: Dict[str, QAPIDoc.ArgSection] = {} 642 # sections other than .body, .args, .features 643 self.sections: List[QAPIDoc.Section] = [] 644 645 def end(self) -> None: 646 for section in self.all_sections: 647 section.text = section.text.strip('\n') 648 if section.tag is not None and section.text == '': 649 raise QAPISemError( 650 section.info, "text required after '%s:'" % section.tag) 651 652 def ensure_untagged_section(self, info: QAPISourceInfo) -> None: 653 if self.all_sections and not self.all_sections[-1].tag: 654 # extend current section 655 self.all_sections[-1].text += '\n' 656 return 657 # start new section 658 section = self.Section(info) 659 self.sections.append(section) 660 self.all_sections.append(section) 661 662 def new_tagged_section(self, info: QAPISourceInfo, tag: str) -> None: 663 if tag in ('Returns', 'Since'): 664 for section in self.all_sections: 665 if isinstance(section, self.ArgSection): 666 continue 667 if section.tag == tag: 668 raise QAPISemError( 669 info, "duplicated '%s' section" % tag) 670 section = self.Section(info, tag) 671 self.sections.append(section) 672 self.all_sections.append(section) 673 674 def _new_description(self, info: QAPISourceInfo, name: str, 675 desc: Dict[str, ArgSection]) -> None: 676 if not name: 677 raise QAPISemError(info, "invalid parameter name") 678 if name in desc: 679 raise QAPISemError(info, "'%s' parameter name duplicated" % name) 680 section = self.ArgSection(info, '@' + name) 681 self.all_sections.append(section) 682 desc[name] = section 683 684 def new_argument(self, info: QAPISourceInfo, name: str) -> None: 685 self._new_description(info, name, self.args) 686 687 def new_feature(self, info: QAPISourceInfo, name: str) -> None: 688 self._new_description(info, name, self.features) 689 690 def append_line(self, line: str) -> None: 691 self.all_sections[-1].append_line(line) 692 693 def connect_member(self, member: 'QAPISchemaMember') -> None: 694 if member.name not in self.args: 695 if self.symbol not in member.info.pragma.documentation_exceptions: 696 raise QAPISemError(member.info, 697 "%s '%s' lacks documentation" 698 % (member.role, member.name)) 699 self.args[member.name] = QAPIDoc.ArgSection( 700 self.info, '@' + member.name) 701 self.args[member.name].connect(member) 702 703 def connect_feature(self, feature: 'QAPISchemaFeature') -> None: 704 if feature.name not in self.features: 705 raise QAPISemError(feature.info, 706 "feature '%s' lacks documentation" 707 % feature.name) 708 self.features[feature.name].connect(feature) 709 710 def check_expr(self, expr: QAPIExpression) -> None: 711 if 'command' not in expr: 712 sec = next((sec for sec in self.sections 713 if sec.tag == 'Returns'), 714 None) 715 if sec: 716 raise QAPISemError(sec.info, 717 "'Returns:' is only valid for commands") 718 719 def check(self) -> None: 720 721 def check_args_section( 722 args: Dict[str, QAPIDoc.ArgSection], what: str 723 ) -> None: 724 bogus = [name for name, section in args.items() 725 if not section.member] 726 if bogus: 727 raise QAPISemError( 728 args[bogus[0]].info, 729 "documented %s%s '%s' %s not exist" % ( 730 what, 731 "s" if len(bogus) > 1 else "", 732 "', '".join(bogus), 733 "do" if len(bogus) > 1 else "does" 734 )) 735 736 check_args_section(self.args, 'member') 737 check_args_section(self.features, 'feature') 738