1#!/usr/bin/env python3 2# Copyright (c) 2018 Linaro Limited 3# 4# This library is free software; you can redistribute it and/or 5# modify it under the terms of the GNU Lesser General Public 6# License as published by the Free Software Foundation; either 7# version 2.1 of the License, or (at your option) any later version. 8# 9# This library is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12# Lesser General Public License for more details. 13# 14# You should have received a copy of the GNU Lesser General Public 15# License along with this library; if not, see <http://www.gnu.org/licenses/>. 16# 17 18# 19# Generate a decoding tree from a specification file. 20# See the syntax and semantics in docs/devel/decodetree.rst. 21# 22 23import io 24import os 25import re 26import sys 27import getopt 28 29insnwidth = 32 30bitop_width = 32 31insnmask = 0xffffffff 32variablewidth = False 33fields = {} 34arguments = {} 35formats = {} 36allpatterns = [] 37anyextern = False 38 39translate_prefix = 'trans' 40translate_scope = 'static ' 41input_file = '' 42output_file = None 43output_fd = None 44insntype = 'uint32_t' 45decode_function = 'decode' 46 47# An identifier for C. 48re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*' 49 50# Identifiers for Arguments, Fields, Formats and Patterns. 51re_arg_ident = '&[a-zA-Z0-9_]*' 52re_fld_ident = '%[a-zA-Z0-9_]*' 53re_fmt_ident = '@[a-zA-Z0-9_]*' 54re_pat_ident = '[a-zA-Z0-9_]*' 55 56def error_with_file(file, lineno, *args): 57 """Print an error message from file:line and args and exit.""" 58 global output_file 59 global output_fd 60 61 prefix = '' 62 if file: 63 prefix += f'{file}:' 64 if lineno: 65 prefix += f'{lineno}:' 66 if prefix: 67 prefix += ' ' 68 print(prefix, end='error: ', file=sys.stderr) 69 print(*args, file=sys.stderr) 70 71 if output_file and output_fd: 72 output_fd.close() 73 os.remove(output_file) 74 exit(1) 75# end error_with_file 76 77 78def error(lineno, *args): 79 error_with_file(input_file, lineno, *args) 80# end error 81 82 83def output(*args): 84 global output_fd 85 for a in args: 86 output_fd.write(a) 87 88 89def output_autogen(): 90 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n') 91 92 93def str_indent(c): 94 """Return a string with C spaces""" 95 return ' ' * c 96 97 98def str_fields(fields): 99 """Return a string uniquely identifying FIELDS""" 100 r = '' 101 for n in sorted(fields.keys()): 102 r += '_' + n 103 return r[1:] 104 105 106def whex(val): 107 """Return a hex string for val padded for insnwidth""" 108 global insnwidth 109 return f'0x{val:0{insnwidth // 4}x}' 110 111 112def whexC(val): 113 """Return a hex string for val padded for insnwidth, 114 and with the proper suffix for a C constant.""" 115 suffix = '' 116 if val >= 0x100000000: 117 suffix = 'ull' 118 elif val >= 0x80000000: 119 suffix = 'u' 120 return whex(val) + suffix 121 122 123def str_match_bits(bits, mask): 124 """Return a string pretty-printing BITS/MASK""" 125 global insnwidth 126 127 i = 1 << (insnwidth - 1) 128 space = 0x01010100 129 r = '' 130 while i != 0: 131 if i & mask: 132 if i & bits: 133 r += '1' 134 else: 135 r += '0' 136 else: 137 r += '.' 138 if i & space: 139 r += ' ' 140 i >>= 1 141 return r 142 143 144def is_pow2(x): 145 """Return true iff X is equal to a power of 2.""" 146 return (x & (x - 1)) == 0 147 148 149def ctz(x): 150 """Return the number of times 2 factors into X.""" 151 assert x != 0 152 r = 0 153 while ((x >> r) & 1) == 0: 154 r += 1 155 return r 156 157 158def is_contiguous(bits): 159 if bits == 0: 160 return -1 161 shift = ctz(bits) 162 if is_pow2((bits >> shift) + 1): 163 return shift 164 else: 165 return -1 166 167 168def eq_fields_for_args(flds_a, arg): 169 if len(flds_a) != len(arg.fields): 170 return False 171 # Only allow inference on default types 172 for t in arg.types: 173 if t != 'int': 174 return False 175 for k, a in flds_a.items(): 176 if k not in arg.fields: 177 return False 178 return True 179 180 181def eq_fields_for_fmts(flds_a, flds_b): 182 if len(flds_a) != len(flds_b): 183 return False 184 for k, a in flds_a.items(): 185 if k not in flds_b: 186 return False 187 b = flds_b[k] 188 if a.__class__ != b.__class__ or a != b: 189 return False 190 return True 191 192 193class Field: 194 """Class representing a simple instruction field""" 195 def __init__(self, sign, pos, len): 196 self.sign = sign 197 self.pos = pos 198 self.len = len 199 self.mask = ((1 << len) - 1) << pos 200 201 def __str__(self): 202 if self.sign: 203 s = 's' 204 else: 205 s = '' 206 return str(self.pos) + ':' + s + str(self.len) 207 208 def str_extract(self): 209 global bitop_width 210 s = 's' if self.sign else '' 211 return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})' 212 213 def __eq__(self, other): 214 return self.sign == other.sign and self.mask == other.mask 215 216 def __ne__(self, other): 217 return not self.__eq__(other) 218# end Field 219 220 221class MultiField: 222 """Class representing a compound instruction field""" 223 def __init__(self, subs, mask): 224 self.subs = subs 225 self.sign = subs[0].sign 226 self.mask = mask 227 228 def __str__(self): 229 return str(self.subs) 230 231 def str_extract(self): 232 global bitop_width 233 ret = '0' 234 pos = 0 235 for f in reversed(self.subs): 236 ext = f.str_extract() 237 if pos == 0: 238 ret = ext 239 else: 240 ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})' 241 pos += f.len 242 return ret 243 244 def __ne__(self, other): 245 if len(self.subs) != len(other.subs): 246 return True 247 for a, b in zip(self.subs, other.subs): 248 if a.__class__ != b.__class__ or a != b: 249 return True 250 return False 251 252 def __eq__(self, other): 253 return not self.__ne__(other) 254# end MultiField 255 256 257class ConstField: 258 """Class representing an argument field with constant value""" 259 def __init__(self, value): 260 self.value = value 261 self.mask = 0 262 self.sign = value < 0 263 264 def __str__(self): 265 return str(self.value) 266 267 def str_extract(self): 268 return str(self.value) 269 270 def __cmp__(self, other): 271 return self.value - other.value 272# end ConstField 273 274 275class FunctionField: 276 """Class representing a field passed through a function""" 277 def __init__(self, func, base): 278 self.mask = base.mask 279 self.sign = base.sign 280 self.base = base 281 self.func = func 282 283 def __str__(self): 284 return self.func + '(' + str(self.base) + ')' 285 286 def str_extract(self): 287 return self.func + '(ctx, ' + self.base.str_extract() + ')' 288 289 def __eq__(self, other): 290 return self.func == other.func and self.base == other.base 291 292 def __ne__(self, other): 293 return not self.__eq__(other) 294# end FunctionField 295 296 297class ParameterField: 298 """Class representing a pseudo-field read from a function""" 299 def __init__(self, func): 300 self.mask = 0 301 self.sign = 0 302 self.func = func 303 304 def __str__(self): 305 return self.func 306 307 def str_extract(self): 308 return self.func + '(ctx)' 309 310 def __eq__(self, other): 311 return self.func == other.func 312 313 def __ne__(self, other): 314 return not self.__eq__(other) 315# end ParameterField 316 317 318class Arguments: 319 """Class representing the extracted fields of a format""" 320 def __init__(self, nm, flds, types, extern): 321 self.name = nm 322 self.extern = extern 323 self.fields = flds 324 self.types = types 325 326 def __str__(self): 327 return self.name + ' ' + str(self.fields) 328 329 def struct_name(self): 330 return 'arg_' + self.name 331 332 def output_def(self): 333 if not self.extern: 334 output('typedef struct {\n') 335 for (n, t) in zip(self.fields, self.types): 336 output(f' {t} {n};\n') 337 output('} ', self.struct_name(), ';\n\n') 338# end Arguments 339 340 341class General: 342 """Common code between instruction formats and instruction patterns""" 343 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): 344 self.name = name 345 self.file = input_file 346 self.lineno = lineno 347 self.base = base 348 self.fixedbits = fixb 349 self.fixedmask = fixm 350 self.undefmask = udfm 351 self.fieldmask = fldm 352 self.fields = flds 353 self.width = w 354 355 def __str__(self): 356 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) 357 358 def str1(self, i): 359 return str_indent(i) + self.__str__() 360# end General 361 362 363class Format(General): 364 """Class representing an instruction format""" 365 366 def extract_name(self): 367 global decode_function 368 return decode_function + '_extract_' + self.name 369 370 def output_extract(self): 371 output('static void ', self.extract_name(), '(DisasContext *ctx, ', 372 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') 373 for n, f in self.fields.items(): 374 output(' a->', n, ' = ', f.str_extract(), ';\n') 375 output('}\n\n') 376# end Format 377 378 379class Pattern(General): 380 """Class representing an instruction pattern""" 381 382 def output_decl(self): 383 global translate_scope 384 global translate_prefix 385 output('typedef ', self.base.base.struct_name(), 386 ' arg_', self.name, ';\n') 387 output(translate_scope, 'bool ', translate_prefix, '_', self.name, 388 '(DisasContext *ctx, arg_', self.name, ' *a);\n') 389 390 def output_code(self, i, extracted, outerbits, outermask): 391 global translate_prefix 392 ind = str_indent(i) 393 arg = self.base.base.name 394 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') 395 if not extracted: 396 output(ind, self.base.extract_name(), 397 '(ctx, &u.f_', arg, ', insn);\n') 398 for n, f in self.fields.items(): 399 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') 400 output(ind, 'if (', translate_prefix, '_', self.name, 401 '(ctx, &u.f_', arg, ')) return true;\n') 402 403 # Normal patterns do not have children. 404 def build_tree(self): 405 return 406 def prop_masks(self): 407 return 408 def prop_format(self): 409 return 410 def prop_width(self): 411 return 412 413# end Pattern 414 415 416class MultiPattern(General): 417 """Class representing a set of instruction patterns""" 418 419 def __init__(self, lineno): 420 self.file = input_file 421 self.lineno = lineno 422 self.pats = [] 423 self.base = None 424 self.fixedbits = 0 425 self.fixedmask = 0 426 self.undefmask = 0 427 self.width = None 428 429 def __str__(self): 430 r = 'group' 431 if self.fixedbits is not None: 432 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask) 433 return r 434 435 def output_decl(self): 436 for p in self.pats: 437 p.output_decl() 438 439 def prop_masks(self): 440 global insnmask 441 442 fixedmask = insnmask 443 undefmask = insnmask 444 445 # Collect fixedmask/undefmask for all of the children. 446 for p in self.pats: 447 p.prop_masks() 448 fixedmask &= p.fixedmask 449 undefmask &= p.undefmask 450 451 # Widen fixedmask until all fixedbits match 452 repeat = True 453 fixedbits = 0 454 while repeat and fixedmask != 0: 455 fixedbits = None 456 for p in self.pats: 457 thisbits = p.fixedbits & fixedmask 458 if fixedbits is None: 459 fixedbits = thisbits 460 elif fixedbits != thisbits: 461 fixedmask &= ~(fixedbits ^ thisbits) 462 break 463 else: 464 repeat = False 465 466 self.fixedbits = fixedbits 467 self.fixedmask = fixedmask 468 self.undefmask = undefmask 469 470 def build_tree(self): 471 for p in self.pats: 472 p.build_tree() 473 474 def prop_format(self): 475 for p in self.pats: 476 p.build_tree() 477 478 def prop_width(self): 479 width = None 480 for p in self.pats: 481 p.prop_width() 482 if width is None: 483 width = p.width 484 elif width != p.width: 485 error_with_file(self.file, self.lineno, 486 'width mismatch in patterns within braces') 487 self.width = width 488 489# end MultiPattern 490 491 492class IncMultiPattern(MultiPattern): 493 """Class representing an overlapping set of instruction patterns""" 494 495 def output_code(self, i, extracted, outerbits, outermask): 496 global translate_prefix 497 ind = str_indent(i) 498 for p in self.pats: 499 if outermask != p.fixedmask: 500 innermask = p.fixedmask & ~outermask 501 innerbits = p.fixedbits & ~outermask 502 output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n') 503 output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n') 504 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) 505 output(ind, '}\n') 506 else: 507 p.output_code(i, extracted, p.fixedbits, p.fixedmask) 508#end IncMultiPattern 509 510 511class Tree: 512 """Class representing a node in a decode tree""" 513 514 def __init__(self, fm, tm): 515 self.fixedmask = fm 516 self.thismask = tm 517 self.subs = [] 518 self.base = None 519 520 def str1(self, i): 521 ind = str_indent(i) 522 r = ind + whex(self.fixedmask) 523 if self.format: 524 r += ' ' + self.format.name 525 r += ' [\n' 526 for (b, s) in self.subs: 527 r += ind + f' {whex(b)}:\n' 528 r += s.str1(i + 4) + '\n' 529 r += ind + ']' 530 return r 531 532 def __str__(self): 533 return self.str1(0) 534 535 def output_code(self, i, extracted, outerbits, outermask): 536 ind = str_indent(i) 537 538 # If we identified all nodes below have the same format, 539 # extract the fields now. 540 if not extracted and self.base: 541 output(ind, self.base.extract_name(), 542 '(ctx, &u.f_', self.base.base.name, ', insn);\n') 543 extracted = True 544 545 # Attempt to aid the compiler in producing compact switch statements. 546 # If the bits in the mask are contiguous, extract them. 547 sh = is_contiguous(self.thismask) 548 if sh > 0: 549 # Propagate SH down into the local functions. 550 def str_switch(b, sh=sh): 551 return f'(insn >> {sh}) & {b >> sh:#x}' 552 553 def str_case(b, sh=sh): 554 return hex(b >> sh) 555 else: 556 def str_switch(b): 557 return f'insn & {whexC(b)}' 558 559 def str_case(b): 560 return whexC(b) 561 562 output(ind, 'switch (', str_switch(self.thismask), ') {\n') 563 for b, s in sorted(self.subs): 564 assert (self.thismask & ~s.fixedmask) == 0 565 innermask = outermask | self.thismask 566 innerbits = outerbits | b 567 output(ind, 'case ', str_case(b), ':\n') 568 output(ind, ' /* ', 569 str_match_bits(innerbits, innermask), ' */\n') 570 s.output_code(i + 4, extracted, innerbits, innermask) 571 output(ind, ' break;\n') 572 output(ind, '}\n') 573# end Tree 574 575 576class ExcMultiPattern(MultiPattern): 577 """Class representing a non-overlapping set of instruction patterns""" 578 579 def output_code(self, i, extracted, outerbits, outermask): 580 # Defer everything to our decomposed Tree node 581 self.tree.output_code(i, extracted, outerbits, outermask) 582 583 @staticmethod 584 def __build_tree(pats, outerbits, outermask): 585 # Find the intersection of all remaining fixedmask. 586 innermask = ~outermask & insnmask 587 for i in pats: 588 innermask &= i.fixedmask 589 590 if innermask == 0: 591 # Edge condition: One pattern covers the entire insnmask 592 if len(pats) == 1: 593 t = Tree(outermask, innermask) 594 t.subs.append((0, pats[0])) 595 return t 596 597 text = 'overlapping patterns:' 598 for p in pats: 599 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p) 600 error_with_file(pats[0].file, pats[0].lineno, text) 601 602 fullmask = outermask | innermask 603 604 # Sort each element of pats into the bin selected by the mask. 605 bins = {} 606 for i in pats: 607 fb = i.fixedbits & innermask 608 if fb in bins: 609 bins[fb].append(i) 610 else: 611 bins[fb] = [i] 612 613 # We must recurse if any bin has more than one element or if 614 # the single element in the bin has not been fully matched. 615 t = Tree(fullmask, innermask) 616 617 for b, l in bins.items(): 618 s = l[0] 619 if len(l) > 1 or s.fixedmask & ~fullmask != 0: 620 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask) 621 t.subs.append((b, s)) 622 623 return t 624 625 def build_tree(self): 626 super().prop_format() 627 self.tree = self.__build_tree(self.pats, self.fixedbits, 628 self.fixedmask) 629 630 @staticmethod 631 def __prop_format(tree): 632 """Propagate Format objects into the decode tree""" 633 634 # Depth first search. 635 for (b, s) in tree.subs: 636 if isinstance(s, Tree): 637 ExcMultiPattern.__prop_format(s) 638 639 # If all entries in SUBS have the same format, then 640 # propagate that into the tree. 641 f = None 642 for (b, s) in tree.subs: 643 if f is None: 644 f = s.base 645 if f is None: 646 return 647 if f is not s.base: 648 return 649 tree.base = f 650 651 def prop_format(self): 652 super().prop_format() 653 self.__prop_format(self.tree) 654 655# end ExcMultiPattern 656 657 658def parse_field(lineno, name, toks): 659 """Parse one instruction field from TOKS at LINENO""" 660 global fields 661 global insnwidth 662 663 # A "simple" field will have only one entry; 664 # a "multifield" will have several. 665 subs = [] 666 width = 0 667 func = None 668 for t in toks: 669 if re.match('^!function=', t): 670 if func: 671 error(lineno, 'duplicate function') 672 func = t.split('=') 673 func = func[1] 674 continue 675 676 if re.fullmatch('[0-9]+:s[0-9]+', t): 677 # Signed field extract 678 subtoks = t.split(':s') 679 sign = True 680 elif re.fullmatch('[0-9]+:[0-9]+', t): 681 # Unsigned field extract 682 subtoks = t.split(':') 683 sign = False 684 else: 685 error(lineno, f'invalid field token "{t}"') 686 po = int(subtoks[0]) 687 le = int(subtoks[1]) 688 if po + le > insnwidth: 689 error(lineno, f'field {t} too large') 690 f = Field(sign, po, le) 691 subs.append(f) 692 width += le 693 694 if width > insnwidth: 695 error(lineno, 'field too large') 696 if len(subs) == 0: 697 if func: 698 f = ParameterField(func) 699 else: 700 error(lineno, 'field with no value') 701 else: 702 if len(subs) == 1: 703 f = subs[0] 704 else: 705 mask = 0 706 for s in subs: 707 if mask & s.mask: 708 error(lineno, 'field components overlap') 709 mask |= s.mask 710 f = MultiField(subs, mask) 711 if func: 712 f = FunctionField(func, f) 713 714 if name in fields: 715 error(lineno, 'duplicate field', name) 716 fields[name] = f 717# end parse_field 718 719 720def parse_arguments(lineno, name, toks): 721 """Parse one argument set from TOKS at LINENO""" 722 global arguments 723 global re_C_ident 724 global anyextern 725 726 flds = [] 727 types = [] 728 extern = False 729 for n in toks: 730 if re.fullmatch('!extern', n): 731 extern = True 732 anyextern = True 733 continue 734 if re.fullmatch(re_C_ident + ':' + re_C_ident, n): 735 (n, t) = n.split(':') 736 elif re.fullmatch(re_C_ident, n): 737 t = 'int' 738 else: 739 error(lineno, f'invalid argument set token "{n}"') 740 if n in flds: 741 error(lineno, f'duplicate argument "{n}"') 742 flds.append(n) 743 types.append(t) 744 745 if name in arguments: 746 error(lineno, 'duplicate argument set', name) 747 arguments[name] = Arguments(name, flds, types, extern) 748# end parse_arguments 749 750 751def lookup_field(lineno, name): 752 global fields 753 if name in fields: 754 return fields[name] 755 error(lineno, 'undefined field', name) 756 757 758def add_field(lineno, flds, new_name, f): 759 if new_name in flds: 760 error(lineno, 'duplicate field', new_name) 761 flds[new_name] = f 762 return flds 763 764 765def add_field_byname(lineno, flds, new_name, old_name): 766 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name)) 767 768 769def infer_argument_set(flds): 770 global arguments 771 global decode_function 772 773 for arg in arguments.values(): 774 if eq_fields_for_args(flds, arg): 775 return arg 776 777 name = decode_function + str(len(arguments)) 778 arg = Arguments(name, flds.keys(), ['int'] * len(flds), False) 779 arguments[name] = arg 780 return arg 781 782 783def infer_format(arg, fieldmask, flds, width): 784 global arguments 785 global formats 786 global decode_function 787 788 const_flds = {} 789 var_flds = {} 790 for n, c in flds.items(): 791 if c is ConstField: 792 const_flds[n] = c 793 else: 794 var_flds[n] = c 795 796 # Look for an existing format with the same argument set and fields 797 for fmt in formats.values(): 798 if arg and fmt.base != arg: 799 continue 800 if fieldmask != fmt.fieldmask: 801 continue 802 if width != fmt.width: 803 continue 804 if not eq_fields_for_fmts(flds, fmt.fields): 805 continue 806 return (fmt, const_flds) 807 808 name = decode_function + '_Fmt_' + str(len(formats)) 809 if not arg: 810 arg = infer_argument_set(flds) 811 812 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width) 813 formats[name] = fmt 814 815 return (fmt, const_flds) 816# end infer_format 817 818 819def parse_generic(lineno, parent_pat, name, toks): 820 """Parse one instruction format from TOKS at LINENO""" 821 global fields 822 global arguments 823 global formats 824 global allpatterns 825 global re_arg_ident 826 global re_fld_ident 827 global re_fmt_ident 828 global re_C_ident 829 global insnwidth 830 global insnmask 831 global variablewidth 832 833 is_format = parent_pat is None 834 835 fixedmask = 0 836 fixedbits = 0 837 undefmask = 0 838 width = 0 839 flds = {} 840 arg = None 841 fmt = None 842 for t in toks: 843 # '&Foo' gives a format an explicit argument set. 844 if re.fullmatch(re_arg_ident, t): 845 tt = t[1:] 846 if arg: 847 error(lineno, 'multiple argument sets') 848 if tt in arguments: 849 arg = arguments[tt] 850 else: 851 error(lineno, 'undefined argument set', t) 852 continue 853 854 # '@Foo' gives a pattern an explicit format. 855 if re.fullmatch(re_fmt_ident, t): 856 tt = t[1:] 857 if fmt: 858 error(lineno, 'multiple formats') 859 if tt in formats: 860 fmt = formats[tt] 861 else: 862 error(lineno, 'undefined format', t) 863 continue 864 865 # '%Foo' imports a field. 866 if re.fullmatch(re_fld_ident, t): 867 tt = t[1:] 868 flds = add_field_byname(lineno, flds, tt, tt) 869 continue 870 871 # 'Foo=%Bar' imports a field with a different name. 872 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t): 873 (fname, iname) = t.split('=%') 874 flds = add_field_byname(lineno, flds, fname, iname) 875 continue 876 877 # 'Foo=number' sets an argument field to a constant value 878 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t): 879 (fname, value) = t.split('=') 880 value = int(value) 881 flds = add_field(lineno, flds, fname, ConstField(value)) 882 continue 883 884 # Pattern of 0s, 1s, dots and dashes indicate required zeros, 885 # required ones, or dont-cares. 886 if re.fullmatch('[01.-]+', t): 887 shift = len(t) 888 fms = t.replace('0', '1') 889 fms = fms.replace('.', '0') 890 fms = fms.replace('-', '0') 891 fbs = t.replace('.', '0') 892 fbs = fbs.replace('-', '0') 893 ubm = t.replace('1', '0') 894 ubm = ubm.replace('.', '0') 895 ubm = ubm.replace('-', '1') 896 fms = int(fms, 2) 897 fbs = int(fbs, 2) 898 ubm = int(ubm, 2) 899 fixedbits = (fixedbits << shift) | fbs 900 fixedmask = (fixedmask << shift) | fms 901 undefmask = (undefmask << shift) | ubm 902 # Otherwise, fieldname:fieldwidth 903 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t): 904 (fname, flen) = t.split(':') 905 sign = False 906 if flen[0] == 's': 907 sign = True 908 flen = flen[1:] 909 shift = int(flen, 10) 910 if shift + width > insnwidth: 911 error(lineno, f'field {fname} exceeds insnwidth') 912 f = Field(sign, insnwidth - width - shift, shift) 913 flds = add_field(lineno, flds, fname, f) 914 fixedbits <<= shift 915 fixedmask <<= shift 916 undefmask <<= shift 917 else: 918 error(lineno, f'invalid token "{t}"') 919 width += shift 920 921 if variablewidth and width < insnwidth and width % 8 == 0: 922 shift = insnwidth - width 923 fixedbits <<= shift 924 fixedmask <<= shift 925 undefmask <<= shift 926 undefmask |= (1 << shift) - 1 927 928 # We should have filled in all of the bits of the instruction. 929 elif not (is_format and width == 0) and width != insnwidth: 930 error(lineno, f'definition has {width} bits') 931 932 # Do not check for fields overlapping fields; one valid usage 933 # is to be able to duplicate fields via import. 934 fieldmask = 0 935 for f in flds.values(): 936 fieldmask |= f.mask 937 938 # Fix up what we've parsed to match either a format or a pattern. 939 if is_format: 940 # Formats cannot reference formats. 941 if fmt: 942 error(lineno, 'format referencing format') 943 # If an argument set is given, then there should be no fields 944 # without a place to store it. 945 if arg: 946 for f in flds.keys(): 947 if f not in arg.fields: 948 error(lineno, f'field {f} not in argument set {arg.name}') 949 else: 950 arg = infer_argument_set(flds) 951 if name in formats: 952 error(lineno, 'duplicate format name', name) 953 fmt = Format(name, lineno, arg, fixedbits, fixedmask, 954 undefmask, fieldmask, flds, width) 955 formats[name] = fmt 956 else: 957 # Patterns can reference a format ... 958 if fmt: 959 # ... but not an argument simultaneously 960 if arg: 961 error(lineno, 'pattern specifies both format and argument set') 962 if fixedmask & fmt.fixedmask: 963 error(lineno, 'pattern fixed bits overlap format fixed bits') 964 if width != fmt.width: 965 error(lineno, 'pattern uses format of different width') 966 fieldmask |= fmt.fieldmask 967 fixedbits |= fmt.fixedbits 968 fixedmask |= fmt.fixedmask 969 undefmask |= fmt.undefmask 970 else: 971 (fmt, flds) = infer_format(arg, fieldmask, flds, width) 972 arg = fmt.base 973 for f in flds.keys(): 974 if f not in arg.fields: 975 error(lineno, f'field {f} not in argument set {arg.name}') 976 if f in fmt.fields.keys(): 977 error(lineno, f'field {f} set by format and pattern') 978 for f in arg.fields: 979 if f not in flds.keys() and f not in fmt.fields.keys(): 980 error(lineno, f'field {f} not initialized') 981 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, 982 undefmask, fieldmask, flds, width) 983 parent_pat.pats.append(pat) 984 allpatterns.append(pat) 985 986 # Validate the masks that we have assembled. 987 if fieldmask & fixedmask: 988 error(lineno, 'fieldmask overlaps fixedmask ', 989 f'({whex(fieldmask)} & {whex(fixedmask)})') 990 if fieldmask & undefmask: 991 error(lineno, 'fieldmask overlaps undefmask ', 992 f'({whex(fieldmask)} & {whex(undefmask)})') 993 if fixedmask & undefmask: 994 error(lineno, 'fixedmask overlaps undefmask ', 995 f'({whex(fixedmask)} & {whex(undefmask)})') 996 if not is_format: 997 allbits = fieldmask | fixedmask | undefmask 998 if allbits != insnmask: 999 error(lineno, 'bits left unspecified ', 1000 f'({whex(allbits ^ insnmask)})') 1001# end parse_general 1002 1003 1004def parse_file(f, parent_pat): 1005 """Parse all of the patterns within a file""" 1006 global re_arg_ident 1007 global re_fld_ident 1008 global re_fmt_ident 1009 global re_pat_ident 1010 1011 # Read all of the lines of the file. Concatenate lines 1012 # ending in backslash; discard empty lines and comments. 1013 toks = [] 1014 lineno = 0 1015 nesting = 0 1016 nesting_pats = [] 1017 1018 for line in f: 1019 lineno += 1 1020 1021 # Expand and strip spaces, to find indent. 1022 line = line.rstrip() 1023 line = line.expandtabs() 1024 len1 = len(line) 1025 line = line.lstrip() 1026 len2 = len(line) 1027 1028 # Discard comments 1029 end = line.find('#') 1030 if end >= 0: 1031 line = line[:end] 1032 1033 t = line.split() 1034 if len(toks) != 0: 1035 # Next line after continuation 1036 toks.extend(t) 1037 else: 1038 # Allow completely blank lines. 1039 if len1 == 0: 1040 continue 1041 indent = len1 - len2 1042 # Empty line due to comment. 1043 if len(t) == 0: 1044 # Indentation must be correct, even for comment lines. 1045 if indent != nesting: 1046 error(lineno, 'indentation ', indent, ' != ', nesting) 1047 continue 1048 start_lineno = lineno 1049 toks = t 1050 1051 # Continuation? 1052 if toks[-1] == '\\': 1053 toks.pop() 1054 continue 1055 1056 name = toks[0] 1057 del toks[0] 1058 1059 # End nesting? 1060 if name == '}' or name == ']': 1061 if len(toks) != 0: 1062 error(start_lineno, 'extra tokens after close brace') 1063 1064 # Make sure { } and [ ] nest properly. 1065 if (name == '}') != isinstance(parent_pat, IncMultiPattern): 1066 error(lineno, 'mismatched close brace') 1067 1068 try: 1069 parent_pat = nesting_pats.pop() 1070 except: 1071 error(lineno, 'extra close brace') 1072 1073 nesting -= 2 1074 if indent != nesting: 1075 error(lineno, 'indentation ', indent, ' != ', nesting) 1076 1077 toks = [] 1078 continue 1079 1080 # Everything else should have current indentation. 1081 if indent != nesting: 1082 error(start_lineno, 'indentation ', indent, ' != ', nesting) 1083 1084 # Start nesting? 1085 if name == '{' or name == '[': 1086 if len(toks) != 0: 1087 error(start_lineno, 'extra tokens after open brace') 1088 1089 if name == '{': 1090 nested_pat = IncMultiPattern(start_lineno) 1091 else: 1092 nested_pat = ExcMultiPattern(start_lineno) 1093 parent_pat.pats.append(nested_pat) 1094 nesting_pats.append(parent_pat) 1095 parent_pat = nested_pat 1096 1097 nesting += 2 1098 toks = [] 1099 continue 1100 1101 # Determine the type of object needing to be parsed. 1102 if re.fullmatch(re_fld_ident, name): 1103 parse_field(start_lineno, name[1:], toks) 1104 elif re.fullmatch(re_arg_ident, name): 1105 parse_arguments(start_lineno, name[1:], toks) 1106 elif re.fullmatch(re_fmt_ident, name): 1107 parse_generic(start_lineno, None, name[1:], toks) 1108 elif re.fullmatch(re_pat_ident, name): 1109 parse_generic(start_lineno, parent_pat, name, toks) 1110 else: 1111 error(lineno, f'invalid token "{name}"') 1112 toks = [] 1113 1114 if nesting != 0: 1115 error(lineno, 'missing close brace') 1116# end parse_file 1117 1118 1119class SizeTree: 1120 """Class representing a node in a size decode tree""" 1121 1122 def __init__(self, m, w): 1123 self.mask = m 1124 self.subs = [] 1125 self.base = None 1126 self.width = w 1127 1128 def str1(self, i): 1129 ind = str_indent(i) 1130 r = ind + whex(self.mask) + ' [\n' 1131 for (b, s) in self.subs: 1132 r += ind + f' {whex(b)}:\n' 1133 r += s.str1(i + 4) + '\n' 1134 r += ind + ']' 1135 return r 1136 1137 def __str__(self): 1138 return self.str1(0) 1139 1140 def output_code(self, i, extracted, outerbits, outermask): 1141 ind = str_indent(i) 1142 1143 # If we need to load more bytes to test, do so now. 1144 if extracted < self.width: 1145 output(ind, f'insn = {decode_function}_load_bytes', 1146 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') 1147 extracted = self.width 1148 1149 # Attempt to aid the compiler in producing compact switch statements. 1150 # If the bits in the mask are contiguous, extract them. 1151 sh = is_contiguous(self.mask) 1152 if sh > 0: 1153 # Propagate SH down into the local functions. 1154 def str_switch(b, sh=sh): 1155 return f'(insn >> {sh}) & {b >> sh:#x}' 1156 1157 def str_case(b, sh=sh): 1158 return hex(b >> sh) 1159 else: 1160 def str_switch(b): 1161 return f'insn & {whexC(b)}' 1162 1163 def str_case(b): 1164 return whexC(b) 1165 1166 output(ind, 'switch (', str_switch(self.mask), ') {\n') 1167 for b, s in sorted(self.subs): 1168 innermask = outermask | self.mask 1169 innerbits = outerbits | b 1170 output(ind, 'case ', str_case(b), ':\n') 1171 output(ind, ' /* ', 1172 str_match_bits(innerbits, innermask), ' */\n') 1173 s.output_code(i + 4, extracted, innerbits, innermask) 1174 output(ind, '}\n') 1175 output(ind, 'return insn;\n') 1176# end SizeTree 1177 1178class SizeLeaf: 1179 """Class representing a leaf node in a size decode tree""" 1180 1181 def __init__(self, m, w): 1182 self.mask = m 1183 self.width = w 1184 1185 def str1(self, i): 1186 return str_indent(i) + whex(self.mask) 1187 1188 def __str__(self): 1189 return self.str1(0) 1190 1191 def output_code(self, i, extracted, outerbits, outermask): 1192 global decode_function 1193 ind = str_indent(i) 1194 1195 # If we need to load more bytes, do so now. 1196 if extracted < self.width: 1197 output(ind, f'insn = {decode_function}_load_bytes', 1198 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') 1199 extracted = self.width 1200 output(ind, 'return insn;\n') 1201# end SizeLeaf 1202 1203 1204def build_size_tree(pats, width, outerbits, outermask): 1205 global insnwidth 1206 1207 # Collect the mask of bits that are fixed in this width 1208 innermask = 0xff << (insnwidth - width) 1209 innermask &= ~outermask 1210 minwidth = None 1211 onewidth = True 1212 for i in pats: 1213 innermask &= i.fixedmask 1214 if minwidth is None: 1215 minwidth = i.width 1216 elif minwidth != i.width: 1217 onewidth = False; 1218 if minwidth < i.width: 1219 minwidth = i.width 1220 1221 if onewidth: 1222 return SizeLeaf(innermask, minwidth) 1223 1224 if innermask == 0: 1225 if width < minwidth: 1226 return build_size_tree(pats, width + 8, outerbits, outermask) 1227 1228 pnames = [] 1229 for p in pats: 1230 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) 1231 error_with_file(pats[0].file, pats[0].lineno, 1232 f'overlapping patterns size {width}:', pnames) 1233 1234 bins = {} 1235 for i in pats: 1236 fb = i.fixedbits & innermask 1237 if fb in bins: 1238 bins[fb].append(i) 1239 else: 1240 bins[fb] = [i] 1241 1242 fullmask = outermask | innermask 1243 lens = sorted(bins.keys()) 1244 if len(lens) == 1: 1245 b = lens[0] 1246 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask) 1247 1248 r = SizeTree(innermask, width) 1249 for b, l in bins.items(): 1250 s = build_size_tree(l, width, b | outerbits, fullmask) 1251 r.subs.append((b, s)) 1252 return r 1253# end build_size_tree 1254 1255 1256def prop_size(tree): 1257 """Propagate minimum widths up the decode size tree""" 1258 1259 if isinstance(tree, SizeTree): 1260 min = None 1261 for (b, s) in tree.subs: 1262 width = prop_size(s) 1263 if min is None or min > width: 1264 min = width 1265 assert min >= tree.width 1266 tree.width = min 1267 else: 1268 min = tree.width 1269 return min 1270# end prop_size 1271 1272 1273def main(): 1274 global arguments 1275 global formats 1276 global allpatterns 1277 global translate_scope 1278 global translate_prefix 1279 global output_fd 1280 global output_file 1281 global input_file 1282 global insnwidth 1283 global insntype 1284 global insnmask 1285 global decode_function 1286 global bitop_width 1287 global variablewidth 1288 global anyextern 1289 1290 decode_scope = 'static ' 1291 1292 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', 1293 'static-decode=', 'varinsnwidth='] 1294 try: 1295 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts) 1296 except getopt.GetoptError as err: 1297 error(0, err) 1298 for o, a in opts: 1299 if o in ('-o', '--output'): 1300 output_file = a 1301 elif o == '--decode': 1302 decode_function = a 1303 decode_scope = '' 1304 elif o == '--static-decode': 1305 decode_function = a 1306 elif o == '--translate': 1307 translate_prefix = a 1308 translate_scope = '' 1309 elif o in ('-w', '--insnwidth', '--varinsnwidth'): 1310 if o == '--varinsnwidth': 1311 variablewidth = True 1312 insnwidth = int(a) 1313 if insnwidth == 16: 1314 insntype = 'uint16_t' 1315 insnmask = 0xffff 1316 elif insnwidth == 64: 1317 insntype = 'uint64_t' 1318 insnmask = 0xffffffffffffffff 1319 bitop_width = 64 1320 elif insnwidth != 32: 1321 error(0, 'cannot handle insns of width', insnwidth) 1322 else: 1323 assert False, 'unhandled option' 1324 1325 if len(args) < 1: 1326 error(0, 'missing input file') 1327 1328 toppat = ExcMultiPattern(0) 1329 1330 for filename in args: 1331 input_file = filename 1332 f = open(filename, 'rt', encoding='utf-8') 1333 parse_file(f, toppat) 1334 f.close() 1335 1336 # We do not want to compute masks for toppat, because those masks 1337 # are used as a starting point for build_tree. For toppat, we must 1338 # insist that decode begins from naught. 1339 for i in toppat.pats: 1340 i.prop_masks() 1341 1342 toppat.build_tree() 1343 toppat.prop_format() 1344 1345 if variablewidth: 1346 for i in toppat.pats: 1347 i.prop_width() 1348 stree = build_size_tree(toppat.pats, 8, 0, 0) 1349 prop_size(stree) 1350 1351 if output_file: 1352 output_fd = open(output_file, 'wt', encoding='utf-8') 1353 else: 1354 output_fd = io.TextIOWrapper(sys.stdout.buffer, 1355 encoding=sys.stdout.encoding, 1356 errors="ignore") 1357 1358 output_autogen() 1359 for n in sorted(arguments.keys()): 1360 f = arguments[n] 1361 f.output_def() 1362 1363 # A single translate function can be invoked for different patterns. 1364 # Make sure that the argument sets are the same, and declare the 1365 # function only once. 1366 # 1367 # If we're sharing formats, we're likely also sharing trans_* functions, 1368 # but we can't tell which ones. Prevent issues from the compiler by 1369 # suppressing redundant declaration warnings. 1370 if anyextern: 1371 output("#pragma GCC diagnostic push\n", 1372 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n", 1373 "#ifdef __clang__\n" 1374 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n", 1375 "#endif\n\n") 1376 1377 out_pats = {} 1378 for i in allpatterns: 1379 if i.name in out_pats: 1380 p = out_pats[i.name] 1381 if i.base.base != p.base.base: 1382 error(0, i.name, ' has conflicting argument sets') 1383 else: 1384 i.output_decl() 1385 out_pats[i.name] = i 1386 output('\n') 1387 1388 if anyextern: 1389 output("#pragma GCC diagnostic pop\n\n") 1390 1391 for n in sorted(formats.keys()): 1392 f = formats[n] 1393 f.output_extract() 1394 1395 output(decode_scope, 'bool ', decode_function, 1396 '(DisasContext *ctx, ', insntype, ' insn)\n{\n') 1397 1398 i4 = str_indent(4) 1399 1400 if len(allpatterns) != 0: 1401 output(i4, 'union {\n') 1402 for n in sorted(arguments.keys()): 1403 f = arguments[n] 1404 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') 1405 output(i4, '} u;\n\n') 1406 toppat.output_code(4, False, 0, 0) 1407 1408 output(i4, 'return false;\n') 1409 output('}\n') 1410 1411 if variablewidth: 1412 output('\n', decode_scope, insntype, ' ', decode_function, 1413 '_load(DisasContext *ctx)\n{\n', 1414 ' ', insntype, ' insn = 0;\n\n') 1415 stree.output_code(4, 0, 0, 0) 1416 output('}\n') 1417 1418 if output_file: 1419 output_fd.close() 1420# end main 1421 1422 1423if __name__ == '__main__': 1424 main() 1425