1#!/usr/bin/env python3 2# Copyright (c) 2018 Linaro Limited 3# 4# This library is free software; you can redistribute it and/or 5# modify it under the terms of the GNU Lesser General Public 6# License as published by the Free Software Foundation; either 7# version 2.1 of the License, or (at your option) any later version. 8# 9# This library is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12# Lesser General Public License for more details. 13# 14# You should have received a copy of the GNU Lesser General Public 15# License along with this library; if not, see <http://www.gnu.org/licenses/>. 16# 17 18# 19# Generate a decoding tree from a specification file. 20# See the syntax and semantics in docs/devel/decodetree.rst. 21# 22 23import io 24import os 25import re 26import sys 27import getopt 28 29insnwidth = 32 30bitop_width = 32 31insnmask = 0xffffffff 32variablewidth = False 33fields = {} 34arguments = {} 35formats = {} 36allpatterns = [] 37anyextern = False 38testforerror = False 39 40translate_prefix = 'trans' 41translate_scope = 'static ' 42input_file = '' 43output_file = None 44output_fd = None 45insntype = 'uint32_t' 46decode_function = 'decode' 47 48# An identifier for C. 49re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*' 50 51# Identifiers for Arguments, Fields, Formats and Patterns. 52re_arg_ident = '&[a-zA-Z0-9_]*' 53re_fld_ident = '%[a-zA-Z0-9_]*' 54re_fmt_ident = '@[a-zA-Z0-9_]*' 55re_pat_ident = '[a-zA-Z0-9_]*' 56 57def error_with_file(file, lineno, *args): 58 """Print an error message from file:line and args and exit.""" 59 global output_file 60 global output_fd 61 62 prefix = '' 63 if file: 64 prefix += f'{file}:' 65 if lineno: 66 prefix += f'{lineno}:' 67 if prefix: 68 prefix += ' ' 69 print(prefix, end='error: ', file=sys.stderr) 70 print(*args, file=sys.stderr) 71 72 if output_file and output_fd: 73 output_fd.close() 74 # Do not try to remove e.g. -o /dev/null 75 if not output_file.startswith("/dev"): 76 try: 77 os.remove(output_file) 78 except PermissionError: 79 pass 80 exit(0 if testforerror else 1) 81# end error_with_file 82 83 84def error(lineno, *args): 85 error_with_file(input_file, lineno, *args) 86# end error 87 88 89def output(*args): 90 global output_fd 91 for a in args: 92 output_fd.write(a) 93 94 95def output_autogen(): 96 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n') 97 98 99def str_indent(c): 100 """Return a string with C spaces""" 101 return ' ' * c 102 103 104def str_fields(fields): 105 """Return a string uniquely identifying FIELDS""" 106 r = '' 107 for n in sorted(fields.keys()): 108 r += '_' + n 109 return r[1:] 110 111 112def whex(val): 113 """Return a hex string for val padded for insnwidth""" 114 global insnwidth 115 return f'0x{val:0{insnwidth // 4}x}' 116 117 118def whexC(val): 119 """Return a hex string for val padded for insnwidth, 120 and with the proper suffix for a C constant.""" 121 suffix = '' 122 if val >= 0x100000000: 123 suffix = 'ull' 124 elif val >= 0x80000000: 125 suffix = 'u' 126 return whex(val) + suffix 127 128 129def str_match_bits(bits, mask): 130 """Return a string pretty-printing BITS/MASK""" 131 global insnwidth 132 133 i = 1 << (insnwidth - 1) 134 space = 0x01010100 135 r = '' 136 while i != 0: 137 if i & mask: 138 if i & bits: 139 r += '1' 140 else: 141 r += '0' 142 else: 143 r += '.' 144 if i & space: 145 r += ' ' 146 i >>= 1 147 return r 148 149 150def is_pow2(x): 151 """Return true iff X is equal to a power of 2.""" 152 return (x & (x - 1)) == 0 153 154 155def ctz(x): 156 """Return the number of times 2 factors into X.""" 157 assert x != 0 158 r = 0 159 while ((x >> r) & 1) == 0: 160 r += 1 161 return r 162 163 164def is_contiguous(bits): 165 if bits == 0: 166 return -1 167 shift = ctz(bits) 168 if is_pow2((bits >> shift) + 1): 169 return shift 170 else: 171 return -1 172 173 174def eq_fields_for_args(flds_a, arg): 175 if len(flds_a) != len(arg.fields): 176 return False 177 # Only allow inference on default types 178 for t in arg.types: 179 if t != 'int': 180 return False 181 for k, a in flds_a.items(): 182 if k not in arg.fields: 183 return False 184 return True 185 186 187def eq_fields_for_fmts(flds_a, flds_b): 188 if len(flds_a) != len(flds_b): 189 return False 190 for k, a in flds_a.items(): 191 if k not in flds_b: 192 return False 193 b = flds_b[k] 194 if a.__class__ != b.__class__ or a != b: 195 return False 196 return True 197 198 199class Field: 200 """Class representing a simple instruction field""" 201 def __init__(self, sign, pos, len): 202 self.sign = sign 203 self.pos = pos 204 self.len = len 205 self.mask = ((1 << len) - 1) << pos 206 207 def __str__(self): 208 if self.sign: 209 s = 's' 210 else: 211 s = '' 212 return str(self.pos) + ':' + s + str(self.len) 213 214 def str_extract(self): 215 global bitop_width 216 s = 's' if self.sign else '' 217 return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})' 218 219 def __eq__(self, other): 220 return self.sign == other.sign and self.mask == other.mask 221 222 def __ne__(self, other): 223 return not self.__eq__(other) 224# end Field 225 226 227class MultiField: 228 """Class representing a compound instruction field""" 229 def __init__(self, subs, mask): 230 self.subs = subs 231 self.sign = subs[0].sign 232 self.mask = mask 233 234 def __str__(self): 235 return str(self.subs) 236 237 def str_extract(self): 238 global bitop_width 239 ret = '0' 240 pos = 0 241 for f in reversed(self.subs): 242 ext = f.str_extract() 243 if pos == 0: 244 ret = ext 245 else: 246 ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})' 247 pos += f.len 248 return ret 249 250 def __ne__(self, other): 251 if len(self.subs) != len(other.subs): 252 return True 253 for a, b in zip(self.subs, other.subs): 254 if a.__class__ != b.__class__ or a != b: 255 return True 256 return False 257 258 def __eq__(self, other): 259 return not self.__ne__(other) 260# end MultiField 261 262 263class ConstField: 264 """Class representing an argument field with constant value""" 265 def __init__(self, value): 266 self.value = value 267 self.mask = 0 268 self.sign = value < 0 269 270 def __str__(self): 271 return str(self.value) 272 273 def str_extract(self): 274 return str(self.value) 275 276 def __cmp__(self, other): 277 return self.value - other.value 278# end ConstField 279 280 281class FunctionField: 282 """Class representing a field passed through a function""" 283 def __init__(self, func, base): 284 self.mask = base.mask 285 self.sign = base.sign 286 self.base = base 287 self.func = func 288 289 def __str__(self): 290 return self.func + '(' + str(self.base) + ')' 291 292 def str_extract(self): 293 return self.func + '(ctx, ' + self.base.str_extract() + ')' 294 295 def __eq__(self, other): 296 return self.func == other.func and self.base == other.base 297 298 def __ne__(self, other): 299 return not self.__eq__(other) 300# end FunctionField 301 302 303class ParameterField: 304 """Class representing a pseudo-field read from a function""" 305 def __init__(self, func): 306 self.mask = 0 307 self.sign = 0 308 self.func = func 309 310 def __str__(self): 311 return self.func 312 313 def str_extract(self): 314 return self.func + '(ctx)' 315 316 def __eq__(self, other): 317 return self.func == other.func 318 319 def __ne__(self, other): 320 return not self.__eq__(other) 321# end ParameterField 322 323 324class Arguments: 325 """Class representing the extracted fields of a format""" 326 def __init__(self, nm, flds, types, extern): 327 self.name = nm 328 self.extern = extern 329 self.fields = flds 330 self.types = types 331 332 def __str__(self): 333 return self.name + ' ' + str(self.fields) 334 335 def struct_name(self): 336 return 'arg_' + self.name 337 338 def output_def(self): 339 if not self.extern: 340 output('typedef struct {\n') 341 for (n, t) in zip(self.fields, self.types): 342 output(f' {t} {n};\n') 343 output('} ', self.struct_name(), ';\n\n') 344# end Arguments 345 346 347class General: 348 """Common code between instruction formats and instruction patterns""" 349 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): 350 self.name = name 351 self.file = input_file 352 self.lineno = lineno 353 self.base = base 354 self.fixedbits = fixb 355 self.fixedmask = fixm 356 self.undefmask = udfm 357 self.fieldmask = fldm 358 self.fields = flds 359 self.width = w 360 361 def __str__(self): 362 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) 363 364 def str1(self, i): 365 return str_indent(i) + self.__str__() 366# end General 367 368 369class Format(General): 370 """Class representing an instruction format""" 371 372 def extract_name(self): 373 global decode_function 374 return decode_function + '_extract_' + self.name 375 376 def output_extract(self): 377 output('static void ', self.extract_name(), '(DisasContext *ctx, ', 378 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') 379 for n, f in self.fields.items(): 380 output(' a->', n, ' = ', f.str_extract(), ';\n') 381 output('}\n\n') 382# end Format 383 384 385class Pattern(General): 386 """Class representing an instruction pattern""" 387 388 def output_decl(self): 389 global translate_scope 390 global translate_prefix 391 output('typedef ', self.base.base.struct_name(), 392 ' arg_', self.name, ';\n') 393 output(translate_scope, 'bool ', translate_prefix, '_', self.name, 394 '(DisasContext *ctx, arg_', self.name, ' *a);\n') 395 396 def output_code(self, i, extracted, outerbits, outermask): 397 global translate_prefix 398 ind = str_indent(i) 399 arg = self.base.base.name 400 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') 401 if not extracted: 402 output(ind, self.base.extract_name(), 403 '(ctx, &u.f_', arg, ', insn);\n') 404 for n, f in self.fields.items(): 405 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') 406 output(ind, 'if (', translate_prefix, '_', self.name, 407 '(ctx, &u.f_', arg, ')) return true;\n') 408 409 # Normal patterns do not have children. 410 def build_tree(self): 411 return 412 def prop_masks(self): 413 return 414 def prop_format(self): 415 return 416 def prop_width(self): 417 return 418 419# end Pattern 420 421 422class MultiPattern(General): 423 """Class representing a set of instruction patterns""" 424 425 def __init__(self, lineno): 426 self.file = input_file 427 self.lineno = lineno 428 self.pats = [] 429 self.base = None 430 self.fixedbits = 0 431 self.fixedmask = 0 432 self.undefmask = 0 433 self.width = None 434 435 def __str__(self): 436 r = 'group' 437 if self.fixedbits is not None: 438 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask) 439 return r 440 441 def output_decl(self): 442 for p in self.pats: 443 p.output_decl() 444 445 def prop_masks(self): 446 global insnmask 447 448 fixedmask = insnmask 449 undefmask = insnmask 450 451 # Collect fixedmask/undefmask for all of the children. 452 for p in self.pats: 453 p.prop_masks() 454 fixedmask &= p.fixedmask 455 undefmask &= p.undefmask 456 457 # Widen fixedmask until all fixedbits match 458 repeat = True 459 fixedbits = 0 460 while repeat and fixedmask != 0: 461 fixedbits = None 462 for p in self.pats: 463 thisbits = p.fixedbits & fixedmask 464 if fixedbits is None: 465 fixedbits = thisbits 466 elif fixedbits != thisbits: 467 fixedmask &= ~(fixedbits ^ thisbits) 468 break 469 else: 470 repeat = False 471 472 self.fixedbits = fixedbits 473 self.fixedmask = fixedmask 474 self.undefmask = undefmask 475 476 def build_tree(self): 477 for p in self.pats: 478 p.build_tree() 479 480 def prop_format(self): 481 for p in self.pats: 482 p.prop_format() 483 484 def prop_width(self): 485 width = None 486 for p in self.pats: 487 p.prop_width() 488 if width is None: 489 width = p.width 490 elif width != p.width: 491 error_with_file(self.file, self.lineno, 492 'width mismatch in patterns within braces') 493 self.width = width 494 495# end MultiPattern 496 497 498class IncMultiPattern(MultiPattern): 499 """Class representing an overlapping set of instruction patterns""" 500 501 def output_code(self, i, extracted, outerbits, outermask): 502 global translate_prefix 503 ind = str_indent(i) 504 for p in self.pats: 505 if outermask != p.fixedmask: 506 innermask = p.fixedmask & ~outermask 507 innerbits = p.fixedbits & ~outermask 508 output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n') 509 output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n') 510 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) 511 output(ind, '}\n') 512 else: 513 p.output_code(i, extracted, p.fixedbits, p.fixedmask) 514 515 def build_tree(self): 516 if not self.pats: 517 error_with_file(self.file, self.lineno, 'empty pattern group') 518 super().build_tree() 519 520#end IncMultiPattern 521 522 523class Tree: 524 """Class representing a node in a decode tree""" 525 526 def __init__(self, fm, tm): 527 self.fixedmask = fm 528 self.thismask = tm 529 self.subs = [] 530 self.base = None 531 532 def str1(self, i): 533 ind = str_indent(i) 534 r = ind + whex(self.fixedmask) 535 if self.format: 536 r += ' ' + self.format.name 537 r += ' [\n' 538 for (b, s) in self.subs: 539 r += ind + f' {whex(b)}:\n' 540 r += s.str1(i + 4) + '\n' 541 r += ind + ']' 542 return r 543 544 def __str__(self): 545 return self.str1(0) 546 547 def output_code(self, i, extracted, outerbits, outermask): 548 ind = str_indent(i) 549 550 # If we identified all nodes below have the same format, 551 # extract the fields now. 552 if not extracted and self.base: 553 output(ind, self.base.extract_name(), 554 '(ctx, &u.f_', self.base.base.name, ', insn);\n') 555 extracted = True 556 557 # Attempt to aid the compiler in producing compact switch statements. 558 # If the bits in the mask are contiguous, extract them. 559 sh = is_contiguous(self.thismask) 560 if sh > 0: 561 # Propagate SH down into the local functions. 562 def str_switch(b, sh=sh): 563 return f'(insn >> {sh}) & {b >> sh:#x}' 564 565 def str_case(b, sh=sh): 566 return hex(b >> sh) 567 else: 568 def str_switch(b): 569 return f'insn & {whexC(b)}' 570 571 def str_case(b): 572 return whexC(b) 573 574 output(ind, 'switch (', str_switch(self.thismask), ') {\n') 575 for b, s in sorted(self.subs): 576 assert (self.thismask & ~s.fixedmask) == 0 577 innermask = outermask | self.thismask 578 innerbits = outerbits | b 579 output(ind, 'case ', str_case(b), ':\n') 580 output(ind, ' /* ', 581 str_match_bits(innerbits, innermask), ' */\n') 582 s.output_code(i + 4, extracted, innerbits, innermask) 583 output(ind, ' break;\n') 584 output(ind, '}\n') 585# end Tree 586 587 588class ExcMultiPattern(MultiPattern): 589 """Class representing a non-overlapping set of instruction patterns""" 590 591 def output_code(self, i, extracted, outerbits, outermask): 592 # Defer everything to our decomposed Tree node 593 self.tree.output_code(i, extracted, outerbits, outermask) 594 595 @staticmethod 596 def __build_tree(pats, outerbits, outermask): 597 # Find the intersection of all remaining fixedmask. 598 innermask = ~outermask & insnmask 599 for i in pats: 600 innermask &= i.fixedmask 601 602 if innermask == 0: 603 # Edge condition: One pattern covers the entire insnmask 604 if len(pats) == 1: 605 t = Tree(outermask, innermask) 606 t.subs.append((0, pats[0])) 607 return t 608 609 text = 'overlapping patterns:' 610 for p in pats: 611 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p) 612 error_with_file(pats[0].file, pats[0].lineno, text) 613 614 fullmask = outermask | innermask 615 616 # Sort each element of pats into the bin selected by the mask. 617 bins = {} 618 for i in pats: 619 fb = i.fixedbits & innermask 620 if fb in bins: 621 bins[fb].append(i) 622 else: 623 bins[fb] = [i] 624 625 # We must recurse if any bin has more than one element or if 626 # the single element in the bin has not been fully matched. 627 t = Tree(fullmask, innermask) 628 629 for b, l in bins.items(): 630 s = l[0] 631 if len(l) > 1 or s.fixedmask & ~fullmask != 0: 632 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask) 633 t.subs.append((b, s)) 634 635 return t 636 637 def build_tree(self): 638 super().build_tree() 639 self.tree = self.__build_tree(self.pats, self.fixedbits, 640 self.fixedmask) 641 642 @staticmethod 643 def __prop_format(tree): 644 """Propagate Format objects into the decode tree""" 645 646 # Depth first search. 647 for (b, s) in tree.subs: 648 if isinstance(s, Tree): 649 ExcMultiPattern.__prop_format(s) 650 651 # If all entries in SUBS have the same format, then 652 # propagate that into the tree. 653 f = None 654 for (b, s) in tree.subs: 655 if f is None: 656 f = s.base 657 if f is None: 658 return 659 if f is not s.base: 660 return 661 tree.base = f 662 663 def prop_format(self): 664 super().prop_format() 665 self.__prop_format(self.tree) 666 667# end ExcMultiPattern 668 669 670def parse_field(lineno, name, toks): 671 """Parse one instruction field from TOKS at LINENO""" 672 global fields 673 global insnwidth 674 675 # A "simple" field will have only one entry; 676 # a "multifield" will have several. 677 subs = [] 678 width = 0 679 func = None 680 for t in toks: 681 if re.match('^!function=', t): 682 if func: 683 error(lineno, 'duplicate function') 684 func = t.split('=') 685 func = func[1] 686 continue 687 688 if re.fullmatch('[0-9]+:s[0-9]+', t): 689 # Signed field extract 690 subtoks = t.split(':s') 691 sign = True 692 elif re.fullmatch('[0-9]+:[0-9]+', t): 693 # Unsigned field extract 694 subtoks = t.split(':') 695 sign = False 696 else: 697 error(lineno, f'invalid field token "{t}"') 698 po = int(subtoks[0]) 699 le = int(subtoks[1]) 700 if po + le > insnwidth: 701 error(lineno, f'field {t} too large') 702 f = Field(sign, po, le) 703 subs.append(f) 704 width += le 705 706 if width > insnwidth: 707 error(lineno, 'field too large') 708 if len(subs) == 0: 709 if func: 710 f = ParameterField(func) 711 else: 712 error(lineno, 'field with no value') 713 else: 714 if len(subs) == 1: 715 f = subs[0] 716 else: 717 mask = 0 718 for s in subs: 719 if mask & s.mask: 720 error(lineno, 'field components overlap') 721 mask |= s.mask 722 f = MultiField(subs, mask) 723 if func: 724 f = FunctionField(func, f) 725 726 if name in fields: 727 error(lineno, 'duplicate field', name) 728 fields[name] = f 729# end parse_field 730 731 732def parse_arguments(lineno, name, toks): 733 """Parse one argument set from TOKS at LINENO""" 734 global arguments 735 global re_C_ident 736 global anyextern 737 738 flds = [] 739 types = [] 740 extern = False 741 for n in toks: 742 if re.fullmatch('!extern', n): 743 extern = True 744 anyextern = True 745 continue 746 if re.fullmatch(re_C_ident + ':' + re_C_ident, n): 747 (n, t) = n.split(':') 748 elif re.fullmatch(re_C_ident, n): 749 t = 'int' 750 else: 751 error(lineno, f'invalid argument set token "{n}"') 752 if n in flds: 753 error(lineno, f'duplicate argument "{n}"') 754 flds.append(n) 755 types.append(t) 756 757 if name in arguments: 758 error(lineno, 'duplicate argument set', name) 759 arguments[name] = Arguments(name, flds, types, extern) 760# end parse_arguments 761 762 763def lookup_field(lineno, name): 764 global fields 765 if name in fields: 766 return fields[name] 767 error(lineno, 'undefined field', name) 768 769 770def add_field(lineno, flds, new_name, f): 771 if new_name in flds: 772 error(lineno, 'duplicate field', new_name) 773 flds[new_name] = f 774 return flds 775 776 777def add_field_byname(lineno, flds, new_name, old_name): 778 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name)) 779 780 781def infer_argument_set(flds): 782 global arguments 783 global decode_function 784 785 for arg in arguments.values(): 786 if eq_fields_for_args(flds, arg): 787 return arg 788 789 name = decode_function + str(len(arguments)) 790 arg = Arguments(name, flds.keys(), ['int'] * len(flds), False) 791 arguments[name] = arg 792 return arg 793 794 795def infer_format(arg, fieldmask, flds, width): 796 global arguments 797 global formats 798 global decode_function 799 800 const_flds = {} 801 var_flds = {} 802 for n, c in flds.items(): 803 if c is ConstField: 804 const_flds[n] = c 805 else: 806 var_flds[n] = c 807 808 # Look for an existing format with the same argument set and fields 809 for fmt in formats.values(): 810 if arg and fmt.base != arg: 811 continue 812 if fieldmask != fmt.fieldmask: 813 continue 814 if width != fmt.width: 815 continue 816 if not eq_fields_for_fmts(flds, fmt.fields): 817 continue 818 return (fmt, const_flds) 819 820 name = decode_function + '_Fmt_' + str(len(formats)) 821 if not arg: 822 arg = infer_argument_set(flds) 823 824 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width) 825 formats[name] = fmt 826 827 return (fmt, const_flds) 828# end infer_format 829 830 831def parse_generic(lineno, parent_pat, name, toks): 832 """Parse one instruction format from TOKS at LINENO""" 833 global fields 834 global arguments 835 global formats 836 global allpatterns 837 global re_arg_ident 838 global re_fld_ident 839 global re_fmt_ident 840 global re_C_ident 841 global insnwidth 842 global insnmask 843 global variablewidth 844 845 is_format = parent_pat is None 846 847 fixedmask = 0 848 fixedbits = 0 849 undefmask = 0 850 width = 0 851 flds = {} 852 arg = None 853 fmt = None 854 for t in toks: 855 # '&Foo' gives a format an explicit argument set. 856 if re.fullmatch(re_arg_ident, t): 857 tt = t[1:] 858 if arg: 859 error(lineno, 'multiple argument sets') 860 if tt in arguments: 861 arg = arguments[tt] 862 else: 863 error(lineno, 'undefined argument set', t) 864 continue 865 866 # '@Foo' gives a pattern an explicit format. 867 if re.fullmatch(re_fmt_ident, t): 868 tt = t[1:] 869 if fmt: 870 error(lineno, 'multiple formats') 871 if tt in formats: 872 fmt = formats[tt] 873 else: 874 error(lineno, 'undefined format', t) 875 continue 876 877 # '%Foo' imports a field. 878 if re.fullmatch(re_fld_ident, t): 879 tt = t[1:] 880 flds = add_field_byname(lineno, flds, tt, tt) 881 continue 882 883 # 'Foo=%Bar' imports a field with a different name. 884 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t): 885 (fname, iname) = t.split('=%') 886 flds = add_field_byname(lineno, flds, fname, iname) 887 continue 888 889 # 'Foo=number' sets an argument field to a constant value 890 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t): 891 (fname, value) = t.split('=') 892 value = int(value) 893 flds = add_field(lineno, flds, fname, ConstField(value)) 894 continue 895 896 # Pattern of 0s, 1s, dots and dashes indicate required zeros, 897 # required ones, or dont-cares. 898 if re.fullmatch('[01.-]+', t): 899 shift = len(t) 900 fms = t.replace('0', '1') 901 fms = fms.replace('.', '0') 902 fms = fms.replace('-', '0') 903 fbs = t.replace('.', '0') 904 fbs = fbs.replace('-', '0') 905 ubm = t.replace('1', '0') 906 ubm = ubm.replace('.', '0') 907 ubm = ubm.replace('-', '1') 908 fms = int(fms, 2) 909 fbs = int(fbs, 2) 910 ubm = int(ubm, 2) 911 fixedbits = (fixedbits << shift) | fbs 912 fixedmask = (fixedmask << shift) | fms 913 undefmask = (undefmask << shift) | ubm 914 # Otherwise, fieldname:fieldwidth 915 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t): 916 (fname, flen) = t.split(':') 917 sign = False 918 if flen[0] == 's': 919 sign = True 920 flen = flen[1:] 921 shift = int(flen, 10) 922 if shift + width > insnwidth: 923 error(lineno, f'field {fname} exceeds insnwidth') 924 f = Field(sign, insnwidth - width - shift, shift) 925 flds = add_field(lineno, flds, fname, f) 926 fixedbits <<= shift 927 fixedmask <<= shift 928 undefmask <<= shift 929 else: 930 error(lineno, f'invalid token "{t}"') 931 width += shift 932 933 if variablewidth and width < insnwidth and width % 8 == 0: 934 shift = insnwidth - width 935 fixedbits <<= shift 936 fixedmask <<= shift 937 undefmask <<= shift 938 undefmask |= (1 << shift) - 1 939 940 # We should have filled in all of the bits of the instruction. 941 elif not (is_format and width == 0) and width != insnwidth: 942 error(lineno, f'definition has {width} bits') 943 944 # Do not check for fields overlapping fields; one valid usage 945 # is to be able to duplicate fields via import. 946 fieldmask = 0 947 for f in flds.values(): 948 fieldmask |= f.mask 949 950 # Fix up what we've parsed to match either a format or a pattern. 951 if is_format: 952 # Formats cannot reference formats. 953 if fmt: 954 error(lineno, 'format referencing format') 955 # If an argument set is given, then there should be no fields 956 # without a place to store it. 957 if arg: 958 for f in flds.keys(): 959 if f not in arg.fields: 960 error(lineno, f'field {f} not in argument set {arg.name}') 961 else: 962 arg = infer_argument_set(flds) 963 if name in formats: 964 error(lineno, 'duplicate format name', name) 965 fmt = Format(name, lineno, arg, fixedbits, fixedmask, 966 undefmask, fieldmask, flds, width) 967 formats[name] = fmt 968 else: 969 # Patterns can reference a format ... 970 if fmt: 971 # ... but not an argument simultaneously 972 if arg: 973 error(lineno, 'pattern specifies both format and argument set') 974 if fixedmask & fmt.fixedmask: 975 error(lineno, 'pattern fixed bits overlap format fixed bits') 976 if width != fmt.width: 977 error(lineno, 'pattern uses format of different width') 978 fieldmask |= fmt.fieldmask 979 fixedbits |= fmt.fixedbits 980 fixedmask |= fmt.fixedmask 981 undefmask |= fmt.undefmask 982 else: 983 (fmt, flds) = infer_format(arg, fieldmask, flds, width) 984 arg = fmt.base 985 for f in flds.keys(): 986 if f not in arg.fields: 987 error(lineno, f'field {f} not in argument set {arg.name}') 988 if f in fmt.fields.keys(): 989 error(lineno, f'field {f} set by format and pattern') 990 for f in arg.fields: 991 if f not in flds.keys() and f not in fmt.fields.keys(): 992 error(lineno, f'field {f} not initialized') 993 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, 994 undefmask, fieldmask, flds, width) 995 parent_pat.pats.append(pat) 996 allpatterns.append(pat) 997 998 # Validate the masks that we have assembled. 999 if fieldmask & fixedmask: 1000 error(lineno, 'fieldmask overlaps fixedmask ', 1001 f'({whex(fieldmask)} & {whex(fixedmask)})') 1002 if fieldmask & undefmask: 1003 error(lineno, 'fieldmask overlaps undefmask ', 1004 f'({whex(fieldmask)} & {whex(undefmask)})') 1005 if fixedmask & undefmask: 1006 error(lineno, 'fixedmask overlaps undefmask ', 1007 f'({whex(fixedmask)} & {whex(undefmask)})') 1008 if not is_format: 1009 allbits = fieldmask | fixedmask | undefmask 1010 if allbits != insnmask: 1011 error(lineno, 'bits left unspecified ', 1012 f'({whex(allbits ^ insnmask)})') 1013# end parse_general 1014 1015 1016def parse_file(f, parent_pat): 1017 """Parse all of the patterns within a file""" 1018 global re_arg_ident 1019 global re_fld_ident 1020 global re_fmt_ident 1021 global re_pat_ident 1022 1023 # Read all of the lines of the file. Concatenate lines 1024 # ending in backslash; discard empty lines and comments. 1025 toks = [] 1026 lineno = 0 1027 nesting = 0 1028 nesting_pats = [] 1029 1030 for line in f: 1031 lineno += 1 1032 1033 # Expand and strip spaces, to find indent. 1034 line = line.rstrip() 1035 line = line.expandtabs() 1036 len1 = len(line) 1037 line = line.lstrip() 1038 len2 = len(line) 1039 1040 # Discard comments 1041 end = line.find('#') 1042 if end >= 0: 1043 line = line[:end] 1044 1045 t = line.split() 1046 if len(toks) != 0: 1047 # Next line after continuation 1048 toks.extend(t) 1049 else: 1050 # Allow completely blank lines. 1051 if len1 == 0: 1052 continue 1053 indent = len1 - len2 1054 # Empty line due to comment. 1055 if len(t) == 0: 1056 # Indentation must be correct, even for comment lines. 1057 if indent != nesting: 1058 error(lineno, 'indentation ', indent, ' != ', nesting) 1059 continue 1060 start_lineno = lineno 1061 toks = t 1062 1063 # Continuation? 1064 if toks[-1] == '\\': 1065 toks.pop() 1066 continue 1067 1068 name = toks[0] 1069 del toks[0] 1070 1071 # End nesting? 1072 if name == '}' or name == ']': 1073 if len(toks) != 0: 1074 error(start_lineno, 'extra tokens after close brace') 1075 1076 # Make sure { } and [ ] nest properly. 1077 if (name == '}') != isinstance(parent_pat, IncMultiPattern): 1078 error(lineno, 'mismatched close brace') 1079 1080 try: 1081 parent_pat = nesting_pats.pop() 1082 except: 1083 error(lineno, 'extra close brace') 1084 1085 nesting -= 2 1086 if indent != nesting: 1087 error(lineno, 'indentation ', indent, ' != ', nesting) 1088 1089 toks = [] 1090 continue 1091 1092 # Everything else should have current indentation. 1093 if indent != nesting: 1094 error(start_lineno, 'indentation ', indent, ' != ', nesting) 1095 1096 # Start nesting? 1097 if name == '{' or name == '[': 1098 if len(toks) != 0: 1099 error(start_lineno, 'extra tokens after open brace') 1100 1101 if name == '{': 1102 nested_pat = IncMultiPattern(start_lineno) 1103 else: 1104 nested_pat = ExcMultiPattern(start_lineno) 1105 parent_pat.pats.append(nested_pat) 1106 nesting_pats.append(parent_pat) 1107 parent_pat = nested_pat 1108 1109 nesting += 2 1110 toks = [] 1111 continue 1112 1113 # Determine the type of object needing to be parsed. 1114 if re.fullmatch(re_fld_ident, name): 1115 parse_field(start_lineno, name[1:], toks) 1116 elif re.fullmatch(re_arg_ident, name): 1117 parse_arguments(start_lineno, name[1:], toks) 1118 elif re.fullmatch(re_fmt_ident, name): 1119 parse_generic(start_lineno, None, name[1:], toks) 1120 elif re.fullmatch(re_pat_ident, name): 1121 parse_generic(start_lineno, parent_pat, name, toks) 1122 else: 1123 error(lineno, f'invalid token "{name}"') 1124 toks = [] 1125 1126 if nesting != 0: 1127 error(lineno, 'missing close brace') 1128# end parse_file 1129 1130 1131class SizeTree: 1132 """Class representing a node in a size decode tree""" 1133 1134 def __init__(self, m, w): 1135 self.mask = m 1136 self.subs = [] 1137 self.base = None 1138 self.width = w 1139 1140 def str1(self, i): 1141 ind = str_indent(i) 1142 r = ind + whex(self.mask) + ' [\n' 1143 for (b, s) in self.subs: 1144 r += ind + f' {whex(b)}:\n' 1145 r += s.str1(i + 4) + '\n' 1146 r += ind + ']' 1147 return r 1148 1149 def __str__(self): 1150 return self.str1(0) 1151 1152 def output_code(self, i, extracted, outerbits, outermask): 1153 ind = str_indent(i) 1154 1155 # If we need to load more bytes to test, do so now. 1156 if extracted < self.width: 1157 output(ind, f'insn = {decode_function}_load_bytes', 1158 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') 1159 extracted = self.width 1160 1161 # Attempt to aid the compiler in producing compact switch statements. 1162 # If the bits in the mask are contiguous, extract them. 1163 sh = is_contiguous(self.mask) 1164 if sh > 0: 1165 # Propagate SH down into the local functions. 1166 def str_switch(b, sh=sh): 1167 return f'(insn >> {sh}) & {b >> sh:#x}' 1168 1169 def str_case(b, sh=sh): 1170 return hex(b >> sh) 1171 else: 1172 def str_switch(b): 1173 return f'insn & {whexC(b)}' 1174 1175 def str_case(b): 1176 return whexC(b) 1177 1178 output(ind, 'switch (', str_switch(self.mask), ') {\n') 1179 for b, s in sorted(self.subs): 1180 innermask = outermask | self.mask 1181 innerbits = outerbits | b 1182 output(ind, 'case ', str_case(b), ':\n') 1183 output(ind, ' /* ', 1184 str_match_bits(innerbits, innermask), ' */\n') 1185 s.output_code(i + 4, extracted, innerbits, innermask) 1186 output(ind, '}\n') 1187 output(ind, 'return insn;\n') 1188# end SizeTree 1189 1190class SizeLeaf: 1191 """Class representing a leaf node in a size decode tree""" 1192 1193 def __init__(self, m, w): 1194 self.mask = m 1195 self.width = w 1196 1197 def str1(self, i): 1198 return str_indent(i) + whex(self.mask) 1199 1200 def __str__(self): 1201 return self.str1(0) 1202 1203 def output_code(self, i, extracted, outerbits, outermask): 1204 global decode_function 1205 ind = str_indent(i) 1206 1207 # If we need to load more bytes, do so now. 1208 if extracted < self.width: 1209 output(ind, f'insn = {decode_function}_load_bytes', 1210 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') 1211 extracted = self.width 1212 output(ind, 'return insn;\n') 1213# end SizeLeaf 1214 1215 1216def build_size_tree(pats, width, outerbits, outermask): 1217 global insnwidth 1218 1219 # Collect the mask of bits that are fixed in this width 1220 innermask = 0xff << (insnwidth - width) 1221 innermask &= ~outermask 1222 minwidth = None 1223 onewidth = True 1224 for i in pats: 1225 innermask &= i.fixedmask 1226 if minwidth is None: 1227 minwidth = i.width 1228 elif minwidth != i.width: 1229 onewidth = False; 1230 if minwidth < i.width: 1231 minwidth = i.width 1232 1233 if onewidth: 1234 return SizeLeaf(innermask, minwidth) 1235 1236 if innermask == 0: 1237 if width < minwidth: 1238 return build_size_tree(pats, width + 8, outerbits, outermask) 1239 1240 pnames = [] 1241 for p in pats: 1242 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) 1243 error_with_file(pats[0].file, pats[0].lineno, 1244 f'overlapping patterns size {width}:', pnames) 1245 1246 bins = {} 1247 for i in pats: 1248 fb = i.fixedbits & innermask 1249 if fb in bins: 1250 bins[fb].append(i) 1251 else: 1252 bins[fb] = [i] 1253 1254 fullmask = outermask | innermask 1255 lens = sorted(bins.keys()) 1256 if len(lens) == 1: 1257 b = lens[0] 1258 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask) 1259 1260 r = SizeTree(innermask, width) 1261 for b, l in bins.items(): 1262 s = build_size_tree(l, width, b | outerbits, fullmask) 1263 r.subs.append((b, s)) 1264 return r 1265# end build_size_tree 1266 1267 1268def prop_size(tree): 1269 """Propagate minimum widths up the decode size tree""" 1270 1271 if isinstance(tree, SizeTree): 1272 min = None 1273 for (b, s) in tree.subs: 1274 width = prop_size(s) 1275 if min is None or min > width: 1276 min = width 1277 assert min >= tree.width 1278 tree.width = min 1279 else: 1280 min = tree.width 1281 return min 1282# end prop_size 1283 1284 1285def main(): 1286 global arguments 1287 global formats 1288 global allpatterns 1289 global translate_scope 1290 global translate_prefix 1291 global output_fd 1292 global output_file 1293 global input_file 1294 global insnwidth 1295 global insntype 1296 global insnmask 1297 global decode_function 1298 global bitop_width 1299 global variablewidth 1300 global anyextern 1301 global testforerror 1302 1303 decode_scope = 'static ' 1304 1305 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', 1306 'static-decode=', 'varinsnwidth=', 'test-for-error'] 1307 try: 1308 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts) 1309 except getopt.GetoptError as err: 1310 error(0, err) 1311 for o, a in opts: 1312 if o in ('-o', '--output'): 1313 output_file = a 1314 elif o == '--decode': 1315 decode_function = a 1316 decode_scope = '' 1317 elif o == '--static-decode': 1318 decode_function = a 1319 elif o == '--translate': 1320 translate_prefix = a 1321 translate_scope = '' 1322 elif o in ('-w', '--insnwidth', '--varinsnwidth'): 1323 if o == '--varinsnwidth': 1324 variablewidth = True 1325 insnwidth = int(a) 1326 if insnwidth == 16: 1327 insntype = 'uint16_t' 1328 insnmask = 0xffff 1329 elif insnwidth == 64: 1330 insntype = 'uint64_t' 1331 insnmask = 0xffffffffffffffff 1332 bitop_width = 64 1333 elif insnwidth != 32: 1334 error(0, 'cannot handle insns of width', insnwidth) 1335 elif o == '--test-for-error': 1336 testforerror = True 1337 else: 1338 assert False, 'unhandled option' 1339 1340 if len(args) < 1: 1341 error(0, 'missing input file') 1342 1343 toppat = ExcMultiPattern(0) 1344 1345 for filename in args: 1346 input_file = filename 1347 f = open(filename, 'rt', encoding='utf-8') 1348 parse_file(f, toppat) 1349 f.close() 1350 1351 # We do not want to compute masks for toppat, because those masks 1352 # are used as a starting point for build_tree. For toppat, we must 1353 # insist that decode begins from naught. 1354 for i in toppat.pats: 1355 i.prop_masks() 1356 1357 toppat.build_tree() 1358 toppat.prop_format() 1359 1360 if variablewidth: 1361 for i in toppat.pats: 1362 i.prop_width() 1363 stree = build_size_tree(toppat.pats, 8, 0, 0) 1364 prop_size(stree) 1365 1366 if output_file: 1367 output_fd = open(output_file, 'wt', encoding='utf-8') 1368 else: 1369 output_fd = io.TextIOWrapper(sys.stdout.buffer, 1370 encoding=sys.stdout.encoding, 1371 errors="ignore") 1372 1373 output_autogen() 1374 for n in sorted(arguments.keys()): 1375 f = arguments[n] 1376 f.output_def() 1377 1378 # A single translate function can be invoked for different patterns. 1379 # Make sure that the argument sets are the same, and declare the 1380 # function only once. 1381 # 1382 # If we're sharing formats, we're likely also sharing trans_* functions, 1383 # but we can't tell which ones. Prevent issues from the compiler by 1384 # suppressing redundant declaration warnings. 1385 if anyextern: 1386 output("#pragma GCC diagnostic push\n", 1387 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n", 1388 "#ifdef __clang__\n" 1389 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n", 1390 "#endif\n\n") 1391 1392 out_pats = {} 1393 for i in allpatterns: 1394 if i.name in out_pats: 1395 p = out_pats[i.name] 1396 if i.base.base != p.base.base: 1397 error(0, i.name, ' has conflicting argument sets') 1398 else: 1399 i.output_decl() 1400 out_pats[i.name] = i 1401 output('\n') 1402 1403 if anyextern: 1404 output("#pragma GCC diagnostic pop\n\n") 1405 1406 for n in sorted(formats.keys()): 1407 f = formats[n] 1408 f.output_extract() 1409 1410 output(decode_scope, 'bool ', decode_function, 1411 '(DisasContext *ctx, ', insntype, ' insn)\n{\n') 1412 1413 i4 = str_indent(4) 1414 1415 if len(allpatterns) != 0: 1416 output(i4, 'union {\n') 1417 for n in sorted(arguments.keys()): 1418 f = arguments[n] 1419 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') 1420 output(i4, '} u;\n\n') 1421 toppat.output_code(4, False, 0, 0) 1422 1423 output(i4, 'return false;\n') 1424 output('}\n') 1425 1426 if variablewidth: 1427 output('\n', decode_scope, insntype, ' ', decode_function, 1428 '_load(DisasContext *ctx)\n{\n', 1429 ' ', insntype, ' insn = 0;\n\n') 1430 stree.output_code(4, 0, 0, 0) 1431 output('}\n') 1432 1433 if output_file: 1434 output_fd.close() 1435 exit(1 if testforerror else 0) 1436# end main 1437 1438 1439if __name__ == '__main__': 1440 main() 1441