1# -*- coding: utf-8 -*- 2# 3# QAPI schema parser 4# 5# Copyright IBM, Corp. 2011 6# Copyright (c) 2013-2019 Red Hat Inc. 7# 8# Authors: 9# Anthony Liguori <aliguori@us.ibm.com> 10# Markus Armbruster <armbru@redhat.com> 11# Marc-André Lureau <marcandre.lureau@redhat.com> 12# Kevin Wolf <kwolf@redhat.com> 13# 14# This work is licensed under the terms of the GNU GPL, version 2. 15# See the COPYING file in the top-level directory. 16 17from collections import OrderedDict 18import os 19import re 20from typing import List 21 22from .common import must_match 23from .error import QAPISemError, QAPISourceError 24from .source import QAPISourceInfo 25 26 27class QAPIParseError(QAPISourceError): 28 """Error class for all QAPI schema parsing errors.""" 29 def __init__(self, parser, msg): 30 col = 1 31 for ch in parser.src[parser.line_pos:parser.pos]: 32 if ch == '\t': 33 col = (col + 7) % 8 + 1 34 else: 35 col += 1 36 super().__init__(parser.info, msg, col) 37 38 39class QAPISchemaParser: 40 41 def __init__(self, fname, previously_included=None, incl_info=None): 42 self._fname = fname 43 self._included = previously_included or set() 44 self._included.add(os.path.abspath(self._fname)) 45 self.src = '' 46 47 # Lexer state (see `accept` for details): 48 self.info = QAPISourceInfo(self._fname, incl_info) 49 self.tok = None 50 self.pos = 0 51 self.cursor = 0 52 self.val = None 53 self.line_pos = 0 54 55 # Parser output: 56 self.exprs = [] 57 self.docs = [] 58 59 # Showtime! 60 self._parse() 61 62 def _parse(self): 63 cur_doc = None 64 65 # May raise OSError; allow the caller to handle it. 66 with open(self._fname, 'r', encoding='utf-8') as fp: 67 self.src = fp.read() 68 if self.src == '' or self.src[-1] != '\n': 69 self.src += '\n' 70 71 # Prime the lexer: 72 self.accept() 73 74 # Parse until done: 75 while self.tok is not None: 76 info = self.info 77 if self.tok == '#': 78 self.reject_expr_doc(cur_doc) 79 for cur_doc in self.get_doc(info): 80 self.docs.append(cur_doc) 81 continue 82 83 expr = self.get_expr() 84 if not isinstance(expr, dict): 85 raise QAPISemError( 86 info, "top-level expression must be an object") 87 88 if 'include' in expr: 89 self.reject_expr_doc(cur_doc) 90 if len(expr) != 1: 91 raise QAPISemError(info, "invalid 'include' directive") 92 include = expr['include'] 93 if not isinstance(include, str): 94 raise QAPISemError(info, 95 "value of 'include' must be a string") 96 incl_fname = os.path.join(os.path.dirname(self._fname), 97 include) 98 self.exprs.append({'expr': {'include': incl_fname}, 99 'info': info}) 100 exprs_include = self._include(include, info, incl_fname, 101 self._included) 102 if exprs_include: 103 self.exprs.extend(exprs_include.exprs) 104 self.docs.extend(exprs_include.docs) 105 elif "pragma" in expr: 106 self.reject_expr_doc(cur_doc) 107 if len(expr) != 1: 108 raise QAPISemError(info, "invalid 'pragma' directive") 109 pragma = expr['pragma'] 110 if not isinstance(pragma, dict): 111 raise QAPISemError( 112 info, "value of 'pragma' must be an object") 113 for name, value in pragma.items(): 114 self._pragma(name, value, info) 115 else: 116 expr_elem = {'expr': expr, 117 'info': info} 118 if cur_doc: 119 if not cur_doc.symbol: 120 raise QAPISemError( 121 cur_doc.info, "definition documentation required") 122 expr_elem['doc'] = cur_doc 123 self.exprs.append(expr_elem) 124 cur_doc = None 125 self.reject_expr_doc(cur_doc) 126 127 @staticmethod 128 def reject_expr_doc(doc): 129 if doc and doc.symbol: 130 raise QAPISemError( 131 doc.info, 132 "documentation for '%s' is not followed by the definition" 133 % doc.symbol) 134 135 @staticmethod 136 def _include(include, info, incl_fname, previously_included): 137 incl_abs_fname = os.path.abspath(incl_fname) 138 # catch inclusion cycle 139 inf = info 140 while inf: 141 if incl_abs_fname == os.path.abspath(inf.fname): 142 raise QAPISemError(info, "inclusion loop for %s" % include) 143 inf = inf.parent 144 145 # skip multiple include of the same file 146 if incl_abs_fname in previously_included: 147 return None 148 149 try: 150 return QAPISchemaParser(incl_fname, previously_included, info) 151 except OSError as err: 152 raise QAPISemError( 153 info, 154 f"can't read include file '{incl_fname}': {err.strerror}" 155 ) from err 156 157 @staticmethod 158 def _pragma(name, value, info): 159 160 def check_list_str(name, value) -> List[str]: 161 if (not isinstance(value, list) or 162 any([not isinstance(elt, str) for elt in value])): 163 raise QAPISemError( 164 info, 165 "pragma %s must be a list of strings" % name) 166 return value 167 168 pragma = info.pragma 169 170 if name == 'doc-required': 171 if not isinstance(value, bool): 172 raise QAPISemError(info, 173 "pragma 'doc-required' must be boolean") 174 pragma.doc_required = value 175 elif name == 'command-name-exceptions': 176 pragma.command_name_exceptions = check_list_str(name, value) 177 elif name == 'command-returns-exceptions': 178 pragma.command_returns_exceptions = check_list_str(name, value) 179 elif name == 'member-name-exceptions': 180 pragma.member_name_exceptions = check_list_str(name, value) 181 else: 182 raise QAPISemError(info, "unknown pragma '%s'" % name) 183 184 def accept(self, skip_comment=True): 185 while True: 186 self.tok = self.src[self.cursor] 187 self.pos = self.cursor 188 self.cursor += 1 189 self.val = None 190 191 if self.tok == '#': 192 if self.src[self.cursor] == '#': 193 # Start of doc comment 194 skip_comment = False 195 self.cursor = self.src.find('\n', self.cursor) 196 if not skip_comment: 197 self.val = self.src[self.pos:self.cursor] 198 return 199 elif self.tok in '{}:,[]': 200 return 201 elif self.tok == "'": 202 # Note: we accept only printable ASCII 203 string = '' 204 esc = False 205 while True: 206 ch = self.src[self.cursor] 207 self.cursor += 1 208 if ch == '\n': 209 raise QAPIParseError(self, "missing terminating \"'\"") 210 if esc: 211 # Note: we recognize only \\ because we have 212 # no use for funny characters in strings 213 if ch != '\\': 214 raise QAPIParseError(self, 215 "unknown escape \\%s" % ch) 216 esc = False 217 elif ch == '\\': 218 esc = True 219 continue 220 elif ch == "'": 221 self.val = string 222 return 223 if ord(ch) < 32 or ord(ch) >= 127: 224 raise QAPIParseError( 225 self, "funny character in string") 226 string += ch 227 elif self.src.startswith('true', self.pos): 228 self.val = True 229 self.cursor += 3 230 return 231 elif self.src.startswith('false', self.pos): 232 self.val = False 233 self.cursor += 4 234 return 235 elif self.tok == '\n': 236 if self.cursor == len(self.src): 237 self.tok = None 238 return 239 self.info = self.info.next_line() 240 self.line_pos = self.cursor 241 elif not self.tok.isspace(): 242 # Show up to next structural, whitespace or quote 243 # character 244 match = must_match('[^[\\]{}:,\\s\'"]+', 245 self.src[self.cursor-1:]) 246 raise QAPIParseError(self, "stray '%s'" % match.group(0)) 247 248 def get_members(self): 249 expr = OrderedDict() 250 if self.tok == '}': 251 self.accept() 252 return expr 253 if self.tok != "'": 254 raise QAPIParseError(self, "expected string or '}'") 255 while True: 256 key = self.val 257 assert isinstance(key, str) # Guaranteed by tok == "'" 258 259 self.accept() 260 if self.tok != ':': 261 raise QAPIParseError(self, "expected ':'") 262 self.accept() 263 if key in expr: 264 raise QAPIParseError(self, "duplicate key '%s'" % key) 265 expr[key] = self.get_expr() 266 if self.tok == '}': 267 self.accept() 268 return expr 269 if self.tok != ',': 270 raise QAPIParseError(self, "expected ',' or '}'") 271 self.accept() 272 if self.tok != "'": 273 raise QAPIParseError(self, "expected string") 274 275 def get_values(self): 276 expr = [] 277 if self.tok == ']': 278 self.accept() 279 return expr 280 if self.tok not in tuple("{['tf"): 281 raise QAPIParseError( 282 self, "expected '{', '[', ']', string, or boolean") 283 while True: 284 expr.append(self.get_expr()) 285 if self.tok == ']': 286 self.accept() 287 return expr 288 if self.tok != ',': 289 raise QAPIParseError(self, "expected ',' or ']'") 290 self.accept() 291 292 def get_expr(self): 293 if self.tok == '{': 294 self.accept() 295 expr = self.get_members() 296 elif self.tok == '[': 297 self.accept() 298 expr = self.get_values() 299 elif self.tok in tuple("'tf"): 300 assert isinstance(self.val, (str, bool)) 301 expr = self.val 302 self.accept() 303 else: 304 raise QAPIParseError( 305 self, "expected '{', '[', string, or boolean") 306 return expr 307 308 def get_doc(self, info): 309 if self.val != '##': 310 raise QAPIParseError( 311 self, "junk after '##' at start of documentation comment") 312 313 docs = [] 314 cur_doc = QAPIDoc(self, info) 315 self.accept(False) 316 while self.tok == '#': 317 assert isinstance(self.val, str) 318 if self.val.startswith('##'): 319 # End of doc comment 320 if self.val != '##': 321 raise QAPIParseError( 322 self, 323 "junk after '##' at end of documentation comment") 324 cur_doc.end_comment() 325 docs.append(cur_doc) 326 self.accept() 327 return docs 328 if self.val.startswith('# ='): 329 if cur_doc.symbol: 330 raise QAPIParseError( 331 self, 332 "unexpected '=' markup in definition documentation") 333 if cur_doc.body.text: 334 cur_doc.end_comment() 335 docs.append(cur_doc) 336 cur_doc = QAPIDoc(self, info) 337 cur_doc.append(self.val) 338 self.accept(False) 339 340 raise QAPIParseError(self, "documentation comment must end with '##'") 341 342 343class QAPIDoc: 344 """ 345 A documentation comment block, either definition or free-form 346 347 Definition documentation blocks consist of 348 349 * a body section: one line naming the definition, followed by an 350 overview (any number of lines) 351 352 * argument sections: a description of each argument (for commands 353 and events) or member (for structs, unions and alternates) 354 355 * features sections: a description of each feature flag 356 357 * additional (non-argument) sections, possibly tagged 358 359 Free-form documentation blocks consist only of a body section. 360 """ 361 362 class Section: 363 def __init__(self, parser, name=None, indent=0): 364 # parser, for error messages about indentation 365 self._parser = parser 366 # optional section name (argument/member or section name) 367 self.name = name 368 self.text = '' 369 # the expected indent level of the text of this section 370 self._indent = indent 371 372 def append(self, line): 373 # Strip leading spaces corresponding to the expected indent level 374 # Blank lines are always OK. 375 if line: 376 indent = must_match(r'\s*', line).end() 377 if indent < self._indent: 378 raise QAPIParseError( 379 self._parser, 380 "unexpected de-indent (expected at least %d spaces)" % 381 self._indent) 382 line = line[self._indent:] 383 384 self.text += line.rstrip() + '\n' 385 386 class ArgSection(Section): 387 def __init__(self, parser, name, indent=0): 388 super().__init__(parser, name, indent) 389 self.member = None 390 391 def connect(self, member): 392 self.member = member 393 394 def __init__(self, parser, info): 395 # self._parser is used to report errors with QAPIParseError. The 396 # resulting error position depends on the state of the parser. 397 # It happens to be the beginning of the comment. More or less 398 # servicable, but action at a distance. 399 self._parser = parser 400 self.info = info 401 self.symbol = None 402 self.body = QAPIDoc.Section(parser) 403 # dict mapping parameter name to ArgSection 404 self.args = OrderedDict() 405 self.features = OrderedDict() 406 # a list of Section 407 self.sections = [] 408 # the current section 409 self._section = self.body 410 self._append_line = self._append_body_line 411 412 def has_section(self, name): 413 """Return True if we have a section with this name.""" 414 for i in self.sections: 415 if i.name == name: 416 return True 417 return False 418 419 def append(self, line): 420 """ 421 Parse a comment line and add it to the documentation. 422 423 The way that the line is dealt with depends on which part of 424 the documentation we're parsing right now: 425 * The body section: ._append_line is ._append_body_line 426 * An argument section: ._append_line is ._append_args_line 427 * A features section: ._append_line is ._append_features_line 428 * An additional section: ._append_line is ._append_various_line 429 """ 430 line = line[1:] 431 if not line: 432 self._append_freeform(line) 433 return 434 435 if line[0] != ' ': 436 raise QAPIParseError(self._parser, "missing space after #") 437 line = line[1:] 438 self._append_line(line) 439 440 def end_comment(self): 441 self._end_section() 442 443 @staticmethod 444 def _is_section_tag(name): 445 return name in ('Returns:', 'Since:', 446 # those are often singular or plural 447 'Note:', 'Notes:', 448 'Example:', 'Examples:', 449 'TODO:') 450 451 def _append_body_line(self, line): 452 """ 453 Process a line of documentation text in the body section. 454 455 If this a symbol line and it is the section's first line, this 456 is a definition documentation block for that symbol. 457 458 If it's a definition documentation block, another symbol line 459 begins the argument section for the argument named by it, and 460 a section tag begins an additional section. Start that 461 section and append the line to it. 462 463 Else, append the line to the current section. 464 """ 465 name = line.split(' ', 1)[0] 466 # FIXME not nice: things like '# @foo:' and '# @foo: ' aren't 467 # recognized, and get silently treated as ordinary text 468 if not self.symbol and not self.body.text and line.startswith('@'): 469 if not line.endswith(':'): 470 raise QAPIParseError(self._parser, "line should end with ':'") 471 self.symbol = line[1:-1] 472 # FIXME invalid names other than the empty string aren't flagged 473 if not self.symbol: 474 raise QAPIParseError(self._parser, "invalid name") 475 elif self.symbol: 476 # This is a definition documentation block 477 if name.startswith('@') and name.endswith(':'): 478 self._append_line = self._append_args_line 479 self._append_args_line(line) 480 elif line == 'Features:': 481 self._append_line = self._append_features_line 482 elif self._is_section_tag(name): 483 self._append_line = self._append_various_line 484 self._append_various_line(line) 485 else: 486 self._append_freeform(line) 487 else: 488 # This is a free-form documentation block 489 self._append_freeform(line) 490 491 def _append_args_line(self, line): 492 """ 493 Process a line of documentation text in an argument section. 494 495 A symbol line begins the next argument section, a section tag 496 section or a non-indented line after a blank line begins an 497 additional section. Start that section and append the line to 498 it. 499 500 Else, append the line to the current section. 501 502 """ 503 name = line.split(' ', 1)[0] 504 505 if name.startswith('@') and name.endswith(':'): 506 # If line is "@arg: first line of description", find 507 # the index of 'f', which is the indent we expect for any 508 # following lines. We then remove the leading "@arg:" 509 # from line and replace it with spaces so that 'f' has the 510 # same index as it did in the original line and can be 511 # handled the same way we will handle following lines. 512 indent = must_match(r'@\S*:\s*', line).end() 513 line = line[indent:] 514 if not line: 515 # Line was just the "@arg:" header; following lines 516 # are not indented 517 indent = 0 518 else: 519 line = ' ' * indent + line 520 self._start_args_section(name[1:-1], indent) 521 elif self._is_section_tag(name): 522 self._append_line = self._append_various_line 523 self._append_various_line(line) 524 return 525 elif (self._section.text.endswith('\n\n') 526 and line and not line[0].isspace()): 527 if line == 'Features:': 528 self._append_line = self._append_features_line 529 else: 530 self._start_section() 531 self._append_line = self._append_various_line 532 self._append_various_line(line) 533 return 534 535 self._append_freeform(line) 536 537 def _append_features_line(self, line): 538 name = line.split(' ', 1)[0] 539 540 if name.startswith('@') and name.endswith(':'): 541 # If line is "@arg: first line of description", find 542 # the index of 'f', which is the indent we expect for any 543 # following lines. We then remove the leading "@arg:" 544 # from line and replace it with spaces so that 'f' has the 545 # same index as it did in the original line and can be 546 # handled the same way we will handle following lines. 547 indent = must_match(r'@\S*:\s*', line).end() 548 line = line[indent:] 549 if not line: 550 # Line was just the "@arg:" header; following lines 551 # are not indented 552 indent = 0 553 else: 554 line = ' ' * indent + line 555 self._start_features_section(name[1:-1], indent) 556 elif self._is_section_tag(name): 557 self._append_line = self._append_various_line 558 self._append_various_line(line) 559 return 560 elif (self._section.text.endswith('\n\n') 561 and line and not line[0].isspace()): 562 self._start_section() 563 self._append_line = self._append_various_line 564 self._append_various_line(line) 565 return 566 567 self._append_freeform(line) 568 569 def _append_various_line(self, line): 570 """ 571 Process a line of documentation text in an additional section. 572 573 A symbol line is an error. 574 575 A section tag begins an additional section. Start that 576 section and append the line to it. 577 578 Else, append the line to the current section. 579 """ 580 name = line.split(' ', 1)[0] 581 582 if name.startswith('@') and name.endswith(':'): 583 raise QAPIParseError(self._parser, 584 "'%s' can't follow '%s' section" 585 % (name, self.sections[0].name)) 586 if self._is_section_tag(name): 587 # If line is "Section: first line of description", find 588 # the index of 'f', which is the indent we expect for any 589 # following lines. We then remove the leading "Section:" 590 # from line and replace it with spaces so that 'f' has the 591 # same index as it did in the original line and can be 592 # handled the same way we will handle following lines. 593 indent = must_match(r'\S*:\s*', line).end() 594 line = line[indent:] 595 if not line: 596 # Line was just the "Section:" header; following lines 597 # are not indented 598 indent = 0 599 else: 600 line = ' ' * indent + line 601 self._start_section(name[:-1], indent) 602 603 self._append_freeform(line) 604 605 def _start_symbol_section(self, symbols_dict, name, indent): 606 # FIXME invalid names other than the empty string aren't flagged 607 if not name: 608 raise QAPIParseError(self._parser, "invalid parameter name") 609 if name in symbols_dict: 610 raise QAPIParseError(self._parser, 611 "'%s' parameter name duplicated" % name) 612 assert not self.sections 613 self._end_section() 614 self._section = QAPIDoc.ArgSection(self._parser, name, indent) 615 symbols_dict[name] = self._section 616 617 def _start_args_section(self, name, indent): 618 self._start_symbol_section(self.args, name, indent) 619 620 def _start_features_section(self, name, indent): 621 self._start_symbol_section(self.features, name, indent) 622 623 def _start_section(self, name=None, indent=0): 624 if name in ('Returns', 'Since') and self.has_section(name): 625 raise QAPIParseError(self._parser, 626 "duplicated '%s' section" % name) 627 self._end_section() 628 self._section = QAPIDoc.Section(self._parser, name, indent) 629 self.sections.append(self._section) 630 631 def _end_section(self): 632 if self._section: 633 text = self._section.text = self._section.text.strip() 634 if self._section.name and (not text or text.isspace()): 635 raise QAPIParseError( 636 self._parser, 637 "empty doc section '%s'" % self._section.name) 638 self._section = None 639 640 def _append_freeform(self, line): 641 match = re.match(r'(@\S+:)', line) 642 if match: 643 raise QAPIParseError(self._parser, 644 "'%s' not allowed in free-form documentation" 645 % match.group(1)) 646 self._section.append(line) 647 648 def connect_member(self, member): 649 if member.name not in self.args: 650 # Undocumented TODO outlaw 651 self.args[member.name] = QAPIDoc.ArgSection(self._parser, 652 member.name) 653 self.args[member.name].connect(member) 654 655 def connect_feature(self, feature): 656 if feature.name not in self.features: 657 raise QAPISemError(feature.info, 658 "feature '%s' lacks documentation" 659 % feature.name) 660 self.features[feature.name].connect(feature) 661 662 def check_expr(self, expr): 663 if self.has_section('Returns') and 'command' not in expr: 664 raise QAPISemError(self.info, 665 "'Returns:' is only valid for commands") 666 667 def check(self): 668 669 def check_args_section(args, info, what): 670 bogus = [name for name, section in args.items() 671 if not section.member] 672 if bogus: 673 raise QAPISemError( 674 self.info, 675 "documented member%s '%s' %s not exist" 676 % ("s" if len(bogus) > 1 else "", 677 "', '".join(bogus), 678 "do" if len(bogus) > 1 else "does")) 679 680 check_args_section(self.args, self.info, 'members') 681 check_args_section(self.features, self.info, 'features') 682