1#!/usr/bin/env python3 2# Copyright (c) 2018 Linaro Limited 3# 4# This library is free software; you can redistribute it and/or 5# modify it under the terms of the GNU Lesser General Public 6# License as published by the Free Software Foundation; either 7# version 2 of the License, or (at your option) any later version. 8# 9# This library is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12# Lesser General Public License for more details. 13# 14# You should have received a copy of the GNU Lesser General Public 15# License along with this library; if not, see <http://www.gnu.org/licenses/>. 16# 17 18# 19# Generate a decoding tree from a specification file. 20# See the syntax and semantics in docs/devel/decodetree.rst. 21# 22 23import os 24import re 25import sys 26import getopt 27 28insnwidth = 32 29insnmask = 0xffffffff 30variablewidth = False 31fields = {} 32arguments = {} 33formats = {} 34allpatterns = [] 35anyextern = False 36 37translate_prefix = 'trans' 38translate_scope = 'static ' 39input_file = '' 40output_file = None 41output_fd = None 42insntype = 'uint32_t' 43decode_function = 'decode' 44 45re_ident = '[a-zA-Z][a-zA-Z0-9_]*' 46 47 48def error_with_file(file, lineno, *args): 49 """Print an error message from file:line and args and exit.""" 50 global output_file 51 global output_fd 52 53 prefix = '' 54 if file: 55 prefix += '{0}:'.format(file) 56 if lineno: 57 prefix += '{0}:'.format(lineno) 58 if prefix: 59 prefix += ' ' 60 print(prefix, end='error: ', file=sys.stderr) 61 print(*args, file=sys.stderr) 62 63 if output_file and output_fd: 64 output_fd.close() 65 os.remove(output_file) 66 exit(1) 67# end error_with_file 68 69 70def error(lineno, *args): 71 error_with_file(input_file, lineno, *args) 72# end error 73 74 75def output(*args): 76 global output_fd 77 for a in args: 78 output_fd.write(a) 79 80 81def output_autogen(): 82 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n') 83 84 85def str_indent(c): 86 """Return a string with C spaces""" 87 return ' ' * c 88 89 90def str_fields(fields): 91 """Return a string uniquely identifing FIELDS""" 92 r = '' 93 for n in sorted(fields.keys()): 94 r += '_' + n 95 return r[1:] 96 97 98def str_match_bits(bits, mask): 99 """Return a string pretty-printing BITS/MASK""" 100 global insnwidth 101 102 i = 1 << (insnwidth - 1) 103 space = 0x01010100 104 r = '' 105 while i != 0: 106 if i & mask: 107 if i & bits: 108 r += '1' 109 else: 110 r += '0' 111 else: 112 r += '.' 113 if i & space: 114 r += ' ' 115 i >>= 1 116 return r 117 118 119def is_pow2(x): 120 """Return true iff X is equal to a power of 2.""" 121 return (x & (x - 1)) == 0 122 123 124def ctz(x): 125 """Return the number of times 2 factors into X.""" 126 assert x != 0 127 r = 0 128 while ((x >> r) & 1) == 0: 129 r += 1 130 return r 131 132 133def is_contiguous(bits): 134 if bits == 0: 135 return -1 136 shift = ctz(bits) 137 if is_pow2((bits >> shift) + 1): 138 return shift 139 else: 140 return -1 141 142 143def eq_fields_for_args(flds_a, flds_b): 144 if len(flds_a) != len(flds_b): 145 return False 146 for k, a in flds_a.items(): 147 if k not in flds_b: 148 return False 149 return True 150 151 152def eq_fields_for_fmts(flds_a, flds_b): 153 if len(flds_a) != len(flds_b): 154 return False 155 for k, a in flds_a.items(): 156 if k not in flds_b: 157 return False 158 b = flds_b[k] 159 if a.__class__ != b.__class__ or a != b: 160 return False 161 return True 162 163 164class Field: 165 """Class representing a simple instruction field""" 166 def __init__(self, sign, pos, len): 167 self.sign = sign 168 self.pos = pos 169 self.len = len 170 self.mask = ((1 << len) - 1) << pos 171 172 def __str__(self): 173 if self.sign: 174 s = 's' 175 else: 176 s = '' 177 return str(self.pos) + ':' + s + str(self.len) 178 179 def str_extract(self): 180 if self.sign: 181 extr = 'sextract32' 182 else: 183 extr = 'extract32' 184 return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len) 185 186 def __eq__(self, other): 187 return self.sign == other.sign and self.mask == other.mask 188 189 def __ne__(self, other): 190 return not self.__eq__(other) 191# end Field 192 193 194class MultiField: 195 """Class representing a compound instruction field""" 196 def __init__(self, subs, mask): 197 self.subs = subs 198 self.sign = subs[0].sign 199 self.mask = mask 200 201 def __str__(self): 202 return str(self.subs) 203 204 def str_extract(self): 205 ret = '0' 206 pos = 0 207 for f in reversed(self.subs): 208 if pos == 0: 209 ret = f.str_extract() 210 else: 211 ret = 'deposit32({0}, {1}, {2}, {3})' \ 212 .format(ret, pos, 32 - pos, f.str_extract()) 213 pos += f.len 214 return ret 215 216 def __ne__(self, other): 217 if len(self.subs) != len(other.subs): 218 return True 219 for a, b in zip(self.subs, other.subs): 220 if a.__class__ != b.__class__ or a != b: 221 return True 222 return False 223 224 def __eq__(self, other): 225 return not self.__ne__(other) 226# end MultiField 227 228 229class ConstField: 230 """Class representing an argument field with constant value""" 231 def __init__(self, value): 232 self.value = value 233 self.mask = 0 234 self.sign = value < 0 235 236 def __str__(self): 237 return str(self.value) 238 239 def str_extract(self): 240 return str(self.value) 241 242 def __cmp__(self, other): 243 return self.value - other.value 244# end ConstField 245 246 247class FunctionField: 248 """Class representing a field passed through a function""" 249 def __init__(self, func, base): 250 self.mask = base.mask 251 self.sign = base.sign 252 self.base = base 253 self.func = func 254 255 def __str__(self): 256 return self.func + '(' + str(self.base) + ')' 257 258 def str_extract(self): 259 return self.func + '(ctx, ' + self.base.str_extract() + ')' 260 261 def __eq__(self, other): 262 return self.func == other.func and self.base == other.base 263 264 def __ne__(self, other): 265 return not self.__eq__(other) 266# end FunctionField 267 268 269class ParameterField: 270 """Class representing a pseudo-field read from a function""" 271 def __init__(self, func): 272 self.mask = 0 273 self.sign = 0 274 self.func = func 275 276 def __str__(self): 277 return self.func 278 279 def str_extract(self): 280 return self.func + '(ctx)' 281 282 def __eq__(self, other): 283 return self.func == other.func 284 285 def __ne__(self, other): 286 return not self.__eq__(other) 287# end ParameterField 288 289 290class Arguments: 291 """Class representing the extracted fields of a format""" 292 def __init__(self, nm, flds, extern): 293 self.name = nm 294 self.extern = extern 295 self.fields = sorted(flds) 296 297 def __str__(self): 298 return self.name + ' ' + str(self.fields) 299 300 def struct_name(self): 301 return 'arg_' + self.name 302 303 def output_def(self): 304 if not self.extern: 305 output('typedef struct {\n') 306 for n in self.fields: 307 output(' int ', n, ';\n') 308 output('} ', self.struct_name(), ';\n\n') 309# end Arguments 310 311 312class General: 313 """Common code between instruction formats and instruction patterns""" 314 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): 315 self.name = name 316 self.file = input_file 317 self.lineno = lineno 318 self.base = base 319 self.fixedbits = fixb 320 self.fixedmask = fixm 321 self.undefmask = udfm 322 self.fieldmask = fldm 323 self.fields = flds 324 self.width = w 325 326 def __str__(self): 327 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) 328 329 def str1(self, i): 330 return str_indent(i) + self.__str__() 331# end General 332 333 334class Format(General): 335 """Class representing an instruction format""" 336 337 def extract_name(self): 338 global decode_function 339 return decode_function + '_extract_' + self.name 340 341 def output_extract(self): 342 output('static void ', self.extract_name(), '(DisasContext *ctx, ', 343 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') 344 for n, f in self.fields.items(): 345 output(' a->', n, ' = ', f.str_extract(), ';\n') 346 output('}\n\n') 347# end Format 348 349 350class Pattern(General): 351 """Class representing an instruction pattern""" 352 353 def output_decl(self): 354 global translate_scope 355 global translate_prefix 356 output('typedef ', self.base.base.struct_name(), 357 ' arg_', self.name, ';\n') 358 output(translate_scope, 'bool ', translate_prefix, '_', self.name, 359 '(DisasContext *ctx, arg_', self.name, ' *a);\n') 360 361 def output_code(self, i, extracted, outerbits, outermask): 362 global translate_prefix 363 ind = str_indent(i) 364 arg = self.base.base.name 365 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') 366 if not extracted: 367 output(ind, self.base.extract_name(), 368 '(ctx, &u.f_', arg, ', insn);\n') 369 for n, f in self.fields.items(): 370 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') 371 output(ind, 'if (', translate_prefix, '_', self.name, 372 '(ctx, &u.f_', arg, ')) return true;\n') 373 374 # Normal patterns do not have children. 375 def build_tree(self): 376 return 377 def prop_masks(self): 378 return 379 def prop_format(self): 380 return 381 def prop_width(self): 382 return 383 384# end Pattern 385 386 387class MultiPattern(General): 388 """Class representing a set of instruction patterns""" 389 390 def __init__(self, lineno): 391 self.file = input_file 392 self.lineno = lineno 393 self.pats = [] 394 self.base = None 395 self.fixedbits = 0 396 self.fixedmask = 0 397 self.undefmask = 0 398 self.width = None 399 400 def __str__(self): 401 r = 'group' 402 if self.fixedbits is not None: 403 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask) 404 return r 405 406 def output_decl(self): 407 for p in self.pats: 408 p.output_decl() 409 410 def prop_masks(self): 411 global insnmask 412 413 fixedmask = insnmask 414 undefmask = insnmask 415 416 # Collect fixedmask/undefmask for all of the children. 417 for p in self.pats: 418 p.prop_masks() 419 fixedmask &= p.fixedmask 420 undefmask &= p.undefmask 421 422 # Widen fixedmask until all fixedbits match 423 repeat = True 424 fixedbits = 0 425 while repeat and fixedmask != 0: 426 fixedbits = None 427 for p in self.pats: 428 thisbits = p.fixedbits & fixedmask 429 if fixedbits is None: 430 fixedbits = thisbits 431 elif fixedbits != thisbits: 432 fixedmask &= ~(fixedbits ^ thisbits) 433 break 434 else: 435 repeat = False 436 437 self.fixedbits = fixedbits 438 self.fixedmask = fixedmask 439 self.undefmask = undefmask 440 441 def build_tree(self): 442 for p in self.pats: 443 p.build_tree() 444 445 def prop_format(self): 446 for p in self.pats: 447 p.build_tree() 448 449 def prop_width(self): 450 width = None 451 for p in self.pats: 452 p.prop_width() 453 if width is None: 454 width = p.width 455 elif width != p.width: 456 error_with_file(self.file, self.lineno, 457 'width mismatch in patterns within braces') 458 self.width = width 459 460# end MultiPattern 461 462 463class IncMultiPattern(MultiPattern): 464 """Class representing an overlapping set of instruction patterns""" 465 466 def output_code(self, i, extracted, outerbits, outermask): 467 global translate_prefix 468 ind = str_indent(i) 469 for p in self.pats: 470 if outermask != p.fixedmask: 471 innermask = p.fixedmask & ~outermask 472 innerbits = p.fixedbits & ~outermask 473 output(ind, 'if ((insn & ', 474 '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits), 475 ') {\n') 476 output(ind, ' /* ', 477 str_match_bits(p.fixedbits, p.fixedmask), ' */\n') 478 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) 479 output(ind, '}\n') 480 else: 481 p.output_code(i, extracted, p.fixedbits, p.fixedmask) 482#end IncMultiPattern 483 484 485class Tree: 486 """Class representing a node in a decode tree""" 487 488 def __init__(self, fm, tm): 489 self.fixedmask = fm 490 self.thismask = tm 491 self.subs = [] 492 self.base = None 493 494 def str1(self, i): 495 ind = str_indent(i) 496 r = '{0}{1:08x}'.format(ind, self.fixedmask) 497 if self.format: 498 r += ' ' + self.format.name 499 r += ' [\n' 500 for (b, s) in self.subs: 501 r += '{0} {1:08x}:\n'.format(ind, b) 502 r += s.str1(i + 4) + '\n' 503 r += ind + ']' 504 return r 505 506 def __str__(self): 507 return self.str1(0) 508 509 def output_code(self, i, extracted, outerbits, outermask): 510 ind = str_indent(i) 511 512 # If we identified all nodes below have the same format, 513 # extract the fields now. 514 if not extracted and self.base: 515 output(ind, self.base.extract_name(), 516 '(ctx, &u.f_', self.base.base.name, ', insn);\n') 517 extracted = True 518 519 # Attempt to aid the compiler in producing compact switch statements. 520 # If the bits in the mask are contiguous, extract them. 521 sh = is_contiguous(self.thismask) 522 if sh > 0: 523 # Propagate SH down into the local functions. 524 def str_switch(b, sh=sh): 525 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh) 526 527 def str_case(b, sh=sh): 528 return '0x{0:x}'.format(b >> sh) 529 else: 530 def str_switch(b): 531 return 'insn & 0x{0:08x}'.format(b) 532 533 def str_case(b): 534 return '0x{0:08x}'.format(b) 535 536 output(ind, 'switch (', str_switch(self.thismask), ') {\n') 537 for b, s in sorted(self.subs): 538 assert (self.thismask & ~s.fixedmask) == 0 539 innermask = outermask | self.thismask 540 innerbits = outerbits | b 541 output(ind, 'case ', str_case(b), ':\n') 542 output(ind, ' /* ', 543 str_match_bits(innerbits, innermask), ' */\n') 544 s.output_code(i + 4, extracted, innerbits, innermask) 545 output(ind, ' return false;\n') 546 output(ind, '}\n') 547# end Tree 548 549 550class ExcMultiPattern(MultiPattern): 551 """Class representing a non-overlapping set of instruction patterns""" 552 553 def output_code(self, i, extracted, outerbits, outermask): 554 # Defer everything to our decomposed Tree node 555 self.tree.output_code(i, extracted, outerbits, outermask) 556 557 @staticmethod 558 def __build_tree(pats, outerbits, outermask): 559 # Find the intersection of all remaining fixedmask. 560 innermask = ~outermask & insnmask 561 for i in pats: 562 innermask &= i.fixedmask 563 564 if innermask == 0: 565 # Edge condition: One pattern covers the entire insnmask 566 if len(pats) == 1: 567 t = Tree(outermask, innermask) 568 t.subs.append((0, pats[0])) 569 return t 570 571 text = 'overlapping patterns:' 572 for p in pats: 573 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p) 574 error_with_file(pats[0].file, pats[0].lineno, text) 575 576 fullmask = outermask | innermask 577 578 # Sort each element of pats into the bin selected by the mask. 579 bins = {} 580 for i in pats: 581 fb = i.fixedbits & innermask 582 if fb in bins: 583 bins[fb].append(i) 584 else: 585 bins[fb] = [i] 586 587 # We must recurse if any bin has more than one element or if 588 # the single element in the bin has not been fully matched. 589 t = Tree(fullmask, innermask) 590 591 for b, l in bins.items(): 592 s = l[0] 593 if len(l) > 1 or s.fixedmask & ~fullmask != 0: 594 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask) 595 t.subs.append((b, s)) 596 597 return t 598 599 def build_tree(self): 600 super().prop_format() 601 self.tree = self.__build_tree(self.pats, self.fixedbits, 602 self.fixedmask) 603 604 @staticmethod 605 def __prop_format(tree): 606 """Propagate Format objects into the decode tree""" 607 608 # Depth first search. 609 for (b, s) in tree.subs: 610 if isinstance(s, Tree): 611 ExcMultiPattern.__prop_format(s) 612 613 # If all entries in SUBS have the same format, then 614 # propagate that into the tree. 615 f = None 616 for (b, s) in tree.subs: 617 if f is None: 618 f = s.base 619 if f is None: 620 return 621 if f is not s.base: 622 return 623 tree.base = f 624 625 def prop_format(self): 626 super().prop_format() 627 self.__prop_format(self.tree) 628 629# end ExcMultiPattern 630 631 632def parse_field(lineno, name, toks): 633 """Parse one instruction field from TOKS at LINENO""" 634 global fields 635 global re_ident 636 global insnwidth 637 638 # A "simple" field will have only one entry; 639 # a "multifield" will have several. 640 subs = [] 641 width = 0 642 func = None 643 for t in toks: 644 if re.fullmatch('!function=' + re_ident, t): 645 if func: 646 error(lineno, 'duplicate function') 647 func = t.split('=') 648 func = func[1] 649 continue 650 651 if re.fullmatch('[0-9]+:s[0-9]+', t): 652 # Signed field extract 653 subtoks = t.split(':s') 654 sign = True 655 elif re.fullmatch('[0-9]+:[0-9]+', t): 656 # Unsigned field extract 657 subtoks = t.split(':') 658 sign = False 659 else: 660 error(lineno, 'invalid field token "{0}"'.format(t)) 661 po = int(subtoks[0]) 662 le = int(subtoks[1]) 663 if po + le > insnwidth: 664 error(lineno, 'field {0} too large'.format(t)) 665 f = Field(sign, po, le) 666 subs.append(f) 667 width += le 668 669 if width > insnwidth: 670 error(lineno, 'field too large') 671 if len(subs) == 0: 672 if func: 673 f = ParameterField(func) 674 else: 675 error(lineno, 'field with no value') 676 else: 677 if len(subs) == 1: 678 f = subs[0] 679 else: 680 mask = 0 681 for s in subs: 682 if mask & s.mask: 683 error(lineno, 'field components overlap') 684 mask |= s.mask 685 f = MultiField(subs, mask) 686 if func: 687 f = FunctionField(func, f) 688 689 if name in fields: 690 error(lineno, 'duplicate field', name) 691 fields[name] = f 692# end parse_field 693 694 695def parse_arguments(lineno, name, toks): 696 """Parse one argument set from TOKS at LINENO""" 697 global arguments 698 global re_ident 699 global anyextern 700 701 flds = [] 702 extern = False 703 for t in toks: 704 if re.fullmatch('!extern', t): 705 extern = True 706 anyextern = True 707 continue 708 if not re.fullmatch(re_ident, t): 709 error(lineno, 'invalid argument set token "{0}"'.format(t)) 710 if t in flds: 711 error(lineno, 'duplicate argument "{0}"'.format(t)) 712 flds.append(t) 713 714 if name in arguments: 715 error(lineno, 'duplicate argument set', name) 716 arguments[name] = Arguments(name, flds, extern) 717# end parse_arguments 718 719 720def lookup_field(lineno, name): 721 global fields 722 if name in fields: 723 return fields[name] 724 error(lineno, 'undefined field', name) 725 726 727def add_field(lineno, flds, new_name, f): 728 if new_name in flds: 729 error(lineno, 'duplicate field', new_name) 730 flds[new_name] = f 731 return flds 732 733 734def add_field_byname(lineno, flds, new_name, old_name): 735 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name)) 736 737 738def infer_argument_set(flds): 739 global arguments 740 global decode_function 741 742 for arg in arguments.values(): 743 if eq_fields_for_args(flds, arg.fields): 744 return arg 745 746 name = decode_function + str(len(arguments)) 747 arg = Arguments(name, flds.keys(), False) 748 arguments[name] = arg 749 return arg 750 751 752def infer_format(arg, fieldmask, flds, width): 753 global arguments 754 global formats 755 global decode_function 756 757 const_flds = {} 758 var_flds = {} 759 for n, c in flds.items(): 760 if c is ConstField: 761 const_flds[n] = c 762 else: 763 var_flds[n] = c 764 765 # Look for an existing format with the same argument set and fields 766 for fmt in formats.values(): 767 if arg and fmt.base != arg: 768 continue 769 if fieldmask != fmt.fieldmask: 770 continue 771 if width != fmt.width: 772 continue 773 if not eq_fields_for_fmts(flds, fmt.fields): 774 continue 775 return (fmt, const_flds) 776 777 name = decode_function + '_Fmt_' + str(len(formats)) 778 if not arg: 779 arg = infer_argument_set(flds) 780 781 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width) 782 formats[name] = fmt 783 784 return (fmt, const_flds) 785# end infer_format 786 787 788def parse_generic(lineno, parent_pat, name, toks): 789 """Parse one instruction format from TOKS at LINENO""" 790 global fields 791 global arguments 792 global formats 793 global allpatterns 794 global re_ident 795 global insnwidth 796 global insnmask 797 global variablewidth 798 799 is_format = parent_pat is None 800 801 fixedmask = 0 802 fixedbits = 0 803 undefmask = 0 804 width = 0 805 flds = {} 806 arg = None 807 fmt = None 808 for t in toks: 809 # '&Foo' gives a format an explcit argument set. 810 if t[0] == '&': 811 tt = t[1:] 812 if arg: 813 error(lineno, 'multiple argument sets') 814 if tt in arguments: 815 arg = arguments[tt] 816 else: 817 error(lineno, 'undefined argument set', t) 818 continue 819 820 # '@Foo' gives a pattern an explicit format. 821 if t[0] == '@': 822 tt = t[1:] 823 if fmt: 824 error(lineno, 'multiple formats') 825 if tt in formats: 826 fmt = formats[tt] 827 else: 828 error(lineno, 'undefined format', t) 829 continue 830 831 # '%Foo' imports a field. 832 if t[0] == '%': 833 tt = t[1:] 834 flds = add_field_byname(lineno, flds, tt, tt) 835 continue 836 837 # 'Foo=%Bar' imports a field with a different name. 838 if re.fullmatch(re_ident + '=%' + re_ident, t): 839 (fname, iname) = t.split('=%') 840 flds = add_field_byname(lineno, flds, fname, iname) 841 continue 842 843 # 'Foo=number' sets an argument field to a constant value 844 if re.fullmatch(re_ident + '=[+-]?[0-9]+', t): 845 (fname, value) = t.split('=') 846 value = int(value) 847 flds = add_field(lineno, flds, fname, ConstField(value)) 848 continue 849 850 # Pattern of 0s, 1s, dots and dashes indicate required zeros, 851 # required ones, or dont-cares. 852 if re.fullmatch('[01.-]+', t): 853 shift = len(t) 854 fms = t.replace('0', '1') 855 fms = fms.replace('.', '0') 856 fms = fms.replace('-', '0') 857 fbs = t.replace('.', '0') 858 fbs = fbs.replace('-', '0') 859 ubm = t.replace('1', '0') 860 ubm = ubm.replace('.', '0') 861 ubm = ubm.replace('-', '1') 862 fms = int(fms, 2) 863 fbs = int(fbs, 2) 864 ubm = int(ubm, 2) 865 fixedbits = (fixedbits << shift) | fbs 866 fixedmask = (fixedmask << shift) | fms 867 undefmask = (undefmask << shift) | ubm 868 # Otherwise, fieldname:fieldwidth 869 elif re.fullmatch(re_ident + ':s?[0-9]+', t): 870 (fname, flen) = t.split(':') 871 sign = False 872 if flen[0] == 's': 873 sign = True 874 flen = flen[1:] 875 shift = int(flen, 10) 876 if shift + width > insnwidth: 877 error(lineno, 'field {0} exceeds insnwidth'.format(fname)) 878 f = Field(sign, insnwidth - width - shift, shift) 879 flds = add_field(lineno, flds, fname, f) 880 fixedbits <<= shift 881 fixedmask <<= shift 882 undefmask <<= shift 883 else: 884 error(lineno, 'invalid token "{0}"'.format(t)) 885 width += shift 886 887 if variablewidth and width < insnwidth and width % 8 == 0: 888 shift = insnwidth - width 889 fixedbits <<= shift 890 fixedmask <<= shift 891 undefmask <<= shift 892 undefmask |= (1 << shift) - 1 893 894 # We should have filled in all of the bits of the instruction. 895 elif not (is_format and width == 0) and width != insnwidth: 896 error(lineno, 'definition has {0} bits'.format(width)) 897 898 # Do not check for fields overlaping fields; one valid usage 899 # is to be able to duplicate fields via import. 900 fieldmask = 0 901 for f in flds.values(): 902 fieldmask |= f.mask 903 904 # Fix up what we've parsed to match either a format or a pattern. 905 if is_format: 906 # Formats cannot reference formats. 907 if fmt: 908 error(lineno, 'format referencing format') 909 # If an argument set is given, then there should be no fields 910 # without a place to store it. 911 if arg: 912 for f in flds.keys(): 913 if f not in arg.fields: 914 error(lineno, 'field {0} not in argument set {1}' 915 .format(f, arg.name)) 916 else: 917 arg = infer_argument_set(flds) 918 if name in formats: 919 error(lineno, 'duplicate format name', name) 920 fmt = Format(name, lineno, arg, fixedbits, fixedmask, 921 undefmask, fieldmask, flds, width) 922 formats[name] = fmt 923 else: 924 # Patterns can reference a format ... 925 if fmt: 926 # ... but not an argument simultaneously 927 if arg: 928 error(lineno, 'pattern specifies both format and argument set') 929 if fixedmask & fmt.fixedmask: 930 error(lineno, 'pattern fixed bits overlap format fixed bits') 931 if width != fmt.width: 932 error(lineno, 'pattern uses format of different width') 933 fieldmask |= fmt.fieldmask 934 fixedbits |= fmt.fixedbits 935 fixedmask |= fmt.fixedmask 936 undefmask |= fmt.undefmask 937 else: 938 (fmt, flds) = infer_format(arg, fieldmask, flds, width) 939 arg = fmt.base 940 for f in flds.keys(): 941 if f not in arg.fields: 942 error(lineno, 'field {0} not in argument set {1}' 943 .format(f, arg.name)) 944 if f in fmt.fields.keys(): 945 error(lineno, 'field {0} set by format and pattern'.format(f)) 946 for f in arg.fields: 947 if f not in flds.keys() and f not in fmt.fields.keys(): 948 error(lineno, 'field {0} not initialized'.format(f)) 949 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, 950 undefmask, fieldmask, flds, width) 951 parent_pat.pats.append(pat) 952 allpatterns.append(pat) 953 954 # Validate the masks that we have assembled. 955 if fieldmask & fixedmask: 956 error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})' 957 .format(fieldmask, fixedmask)) 958 if fieldmask & undefmask: 959 error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' 960 .format(fieldmask, undefmask)) 961 if fixedmask & undefmask: 962 error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' 963 .format(fixedmask, undefmask)) 964 if not is_format: 965 allbits = fieldmask | fixedmask | undefmask 966 if allbits != insnmask: 967 error(lineno, 'bits left unspecified (0x{0:08x})' 968 .format(allbits ^ insnmask)) 969# end parse_general 970 971 972def parse_file(f, parent_pat): 973 """Parse all of the patterns within a file""" 974 975 # Read all of the lines of the file. Concatenate lines 976 # ending in backslash; discard empty lines and comments. 977 toks = [] 978 lineno = 0 979 nesting = 0 980 nesting_pats = [] 981 982 for line in f: 983 lineno += 1 984 985 # Expand and strip spaces, to find indent. 986 line = line.rstrip() 987 line = line.expandtabs() 988 len1 = len(line) 989 line = line.lstrip() 990 len2 = len(line) 991 992 # Discard comments 993 end = line.find('#') 994 if end >= 0: 995 line = line[:end] 996 997 t = line.split() 998 if len(toks) != 0: 999 # Next line after continuation 1000 toks.extend(t) 1001 else: 1002 # Allow completely blank lines. 1003 if len1 == 0: 1004 continue 1005 indent = len1 - len2 1006 # Empty line due to comment. 1007 if len(t) == 0: 1008 # Indentation must be correct, even for comment lines. 1009 if indent != nesting: 1010 error(lineno, 'indentation ', indent, ' != ', nesting) 1011 continue 1012 start_lineno = lineno 1013 toks = t 1014 1015 # Continuation? 1016 if toks[-1] == '\\': 1017 toks.pop() 1018 continue 1019 1020 name = toks[0] 1021 del toks[0] 1022 1023 # End nesting? 1024 if name == '}' or name == ']': 1025 if len(toks) != 0: 1026 error(start_lineno, 'extra tokens after close brace') 1027 1028 # Make sure { } and [ ] nest properly. 1029 if (name == '}') != isinstance(parent_pat, IncMultiPattern): 1030 error(lineno, 'mismatched close brace') 1031 1032 try: 1033 parent_pat = nesting_pats.pop() 1034 except: 1035 error(lineno, 'extra close brace') 1036 1037 nesting -= 2 1038 if indent != nesting: 1039 error(lineno, 'indentation ', indent, ' != ', nesting) 1040 1041 toks = [] 1042 continue 1043 1044 # Everything else should have current indentation. 1045 if indent != nesting: 1046 error(start_lineno, 'indentation ', indent, ' != ', nesting) 1047 1048 # Start nesting? 1049 if name == '{' or name == '[': 1050 if len(toks) != 0: 1051 error(start_lineno, 'extra tokens after open brace') 1052 1053 if name == '{': 1054 nested_pat = IncMultiPattern(start_lineno) 1055 else: 1056 nested_pat = ExcMultiPattern(start_lineno) 1057 parent_pat.pats.append(nested_pat) 1058 nesting_pats.append(parent_pat) 1059 parent_pat = nested_pat 1060 1061 nesting += 2 1062 toks = [] 1063 continue 1064 1065 # Determine the type of object needing to be parsed. 1066 if name[0] == '%': 1067 parse_field(start_lineno, name[1:], toks) 1068 elif name[0] == '&': 1069 parse_arguments(start_lineno, name[1:], toks) 1070 elif name[0] == '@': 1071 parse_generic(start_lineno, None, name[1:], toks) 1072 else: 1073 parse_generic(start_lineno, parent_pat, name, toks) 1074 toks = [] 1075 1076 if nesting != 0: 1077 error(lineno, 'missing close brace') 1078# end parse_file 1079 1080 1081class SizeTree: 1082 """Class representing a node in a size decode tree""" 1083 1084 def __init__(self, m, w): 1085 self.mask = m 1086 self.subs = [] 1087 self.base = None 1088 self.width = w 1089 1090 def str1(self, i): 1091 ind = str_indent(i) 1092 r = '{0}{1:08x}'.format(ind, self.mask) 1093 r += ' [\n' 1094 for (b, s) in self.subs: 1095 r += '{0} {1:08x}:\n'.format(ind, b) 1096 r += s.str1(i + 4) + '\n' 1097 r += ind + ']' 1098 return r 1099 1100 def __str__(self): 1101 return self.str1(0) 1102 1103 def output_code(self, i, extracted, outerbits, outermask): 1104 ind = str_indent(i) 1105 1106 # If we need to load more bytes to test, do so now. 1107 if extracted < self.width: 1108 output(ind, 'insn = ', decode_function, 1109 '_load_bytes(ctx, insn, {0}, {1});\n' 1110 .format(extracted // 8, self.width // 8)); 1111 extracted = self.width 1112 1113 # Attempt to aid the compiler in producing compact switch statements. 1114 # If the bits in the mask are contiguous, extract them. 1115 sh = is_contiguous(self.mask) 1116 if sh > 0: 1117 # Propagate SH down into the local functions. 1118 def str_switch(b, sh=sh): 1119 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh) 1120 1121 def str_case(b, sh=sh): 1122 return '0x{0:x}'.format(b >> sh) 1123 else: 1124 def str_switch(b): 1125 return 'insn & 0x{0:08x}'.format(b) 1126 1127 def str_case(b): 1128 return '0x{0:08x}'.format(b) 1129 1130 output(ind, 'switch (', str_switch(self.mask), ') {\n') 1131 for b, s in sorted(self.subs): 1132 innermask = outermask | self.mask 1133 innerbits = outerbits | b 1134 output(ind, 'case ', str_case(b), ':\n') 1135 output(ind, ' /* ', 1136 str_match_bits(innerbits, innermask), ' */\n') 1137 s.output_code(i + 4, extracted, innerbits, innermask) 1138 output(ind, '}\n') 1139 output(ind, 'return insn;\n') 1140# end SizeTree 1141 1142class SizeLeaf: 1143 """Class representing a leaf node in a size decode tree""" 1144 1145 def __init__(self, m, w): 1146 self.mask = m 1147 self.width = w 1148 1149 def str1(self, i): 1150 ind = str_indent(i) 1151 return '{0}{1:08x}'.format(ind, self.mask) 1152 1153 def __str__(self): 1154 return self.str1(0) 1155 1156 def output_code(self, i, extracted, outerbits, outermask): 1157 global decode_function 1158 ind = str_indent(i) 1159 1160 # If we need to load more bytes, do so now. 1161 if extracted < self.width: 1162 output(ind, 'insn = ', decode_function, 1163 '_load_bytes(ctx, insn, {0}, {1});\n' 1164 .format(extracted // 8, self.width // 8)); 1165 extracted = self.width 1166 output(ind, 'return insn;\n') 1167# end SizeLeaf 1168 1169 1170def build_size_tree(pats, width, outerbits, outermask): 1171 global insnwidth 1172 1173 # Collect the mask of bits that are fixed in this width 1174 innermask = 0xff << (insnwidth - width) 1175 innermask &= ~outermask 1176 minwidth = None 1177 onewidth = True 1178 for i in pats: 1179 innermask &= i.fixedmask 1180 if minwidth is None: 1181 minwidth = i.width 1182 elif minwidth != i.width: 1183 onewidth = False; 1184 if minwidth < i.width: 1185 minwidth = i.width 1186 1187 if onewidth: 1188 return SizeLeaf(innermask, minwidth) 1189 1190 if innermask == 0: 1191 if width < minwidth: 1192 return build_size_tree(pats, width + 8, outerbits, outermask) 1193 1194 pnames = [] 1195 for p in pats: 1196 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) 1197 error_with_file(pats[0].file, pats[0].lineno, 1198 'overlapping patterns size {0}:'.format(width), pnames) 1199 1200 bins = {} 1201 for i in pats: 1202 fb = i.fixedbits & innermask 1203 if fb in bins: 1204 bins[fb].append(i) 1205 else: 1206 bins[fb] = [i] 1207 1208 fullmask = outermask | innermask 1209 lens = sorted(bins.keys()) 1210 if len(lens) == 1: 1211 b = lens[0] 1212 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask) 1213 1214 r = SizeTree(innermask, width) 1215 for b, l in bins.items(): 1216 s = build_size_tree(l, width, b | outerbits, fullmask) 1217 r.subs.append((b, s)) 1218 return r 1219# end build_size_tree 1220 1221 1222def prop_size(tree): 1223 """Propagate minimum widths up the decode size tree""" 1224 1225 if isinstance(tree, SizeTree): 1226 min = None 1227 for (b, s) in tree.subs: 1228 width = prop_size(s) 1229 if min is None or min > width: 1230 min = width 1231 assert min >= tree.width 1232 tree.width = min 1233 else: 1234 min = tree.width 1235 return min 1236# end prop_size 1237 1238 1239def main(): 1240 global arguments 1241 global formats 1242 global allpatterns 1243 global translate_scope 1244 global translate_prefix 1245 global output_fd 1246 global output_file 1247 global input_file 1248 global insnwidth 1249 global insntype 1250 global insnmask 1251 global decode_function 1252 global variablewidth 1253 global anyextern 1254 1255 decode_scope = 'static ' 1256 1257 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', 1258 'static-decode=', 'varinsnwidth='] 1259 try: 1260 (opts, args) = getopt.getopt(sys.argv[1:], 'o:vw:', long_opts) 1261 except getopt.GetoptError as err: 1262 error(0, err) 1263 for o, a in opts: 1264 if o in ('-o', '--output'): 1265 output_file = a 1266 elif o == '--decode': 1267 decode_function = a 1268 decode_scope = '' 1269 elif o == '--static-decode': 1270 decode_function = a 1271 elif o == '--translate': 1272 translate_prefix = a 1273 translate_scope = '' 1274 elif o in ('-w', '--insnwidth', '--varinsnwidth'): 1275 if o == '--varinsnwidth': 1276 variablewidth = True 1277 insnwidth = int(a) 1278 if insnwidth == 16: 1279 insntype = 'uint16_t' 1280 insnmask = 0xffff 1281 elif insnwidth != 32: 1282 error(0, 'cannot handle insns of width', insnwidth) 1283 else: 1284 assert False, 'unhandled option' 1285 1286 if len(args) < 1: 1287 error(0, 'missing input file') 1288 1289 toppat = ExcMultiPattern(0) 1290 1291 for filename in args: 1292 input_file = filename 1293 f = open(filename, 'r') 1294 parse_file(f, toppat) 1295 f.close() 1296 1297 # We do not want to compute masks for toppat, because those masks 1298 # are used as a starting point for build_tree. For toppat, we must 1299 # insist that decode begins from naught. 1300 for i in toppat.pats: 1301 i.prop_masks() 1302 1303 toppat.build_tree() 1304 toppat.prop_format() 1305 1306 if variablewidth: 1307 for i in toppat.pats: 1308 i.prop_width() 1309 stree = build_size_tree(toppat.pats, 8, 0, 0) 1310 prop_size(stree) 1311 1312 if output_file: 1313 output_fd = open(output_file, 'w') 1314 else: 1315 output_fd = sys.stdout 1316 1317 output_autogen() 1318 for n in sorted(arguments.keys()): 1319 f = arguments[n] 1320 f.output_def() 1321 1322 # A single translate function can be invoked for different patterns. 1323 # Make sure that the argument sets are the same, and declare the 1324 # function only once. 1325 # 1326 # If we're sharing formats, we're likely also sharing trans_* functions, 1327 # but we can't tell which ones. Prevent issues from the compiler by 1328 # suppressing redundant declaration warnings. 1329 if anyextern: 1330 output("#pragma GCC diagnostic push\n", 1331 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n", 1332 "#ifdef __clang__\n" 1333 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n", 1334 "#endif\n\n") 1335 1336 out_pats = {} 1337 for i in allpatterns: 1338 if i.name in out_pats: 1339 p = out_pats[i.name] 1340 if i.base.base != p.base.base: 1341 error(0, i.name, ' has conflicting argument sets') 1342 else: 1343 i.output_decl() 1344 out_pats[i.name] = i 1345 output('\n') 1346 1347 if anyextern: 1348 output("#pragma GCC diagnostic pop\n\n") 1349 1350 for n in sorted(formats.keys()): 1351 f = formats[n] 1352 f.output_extract() 1353 1354 output(decode_scope, 'bool ', decode_function, 1355 '(DisasContext *ctx, ', insntype, ' insn)\n{\n') 1356 1357 i4 = str_indent(4) 1358 1359 if len(allpatterns) != 0: 1360 output(i4, 'union {\n') 1361 for n in sorted(arguments.keys()): 1362 f = arguments[n] 1363 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') 1364 output(i4, '} u;\n\n') 1365 toppat.output_code(4, False, 0, 0) 1366 1367 output(i4, 'return false;\n') 1368 output('}\n') 1369 1370 if variablewidth: 1371 output('\n', decode_scope, insntype, ' ', decode_function, 1372 '_load(DisasContext *ctx)\n{\n', 1373 ' ', insntype, ' insn = 0;\n\n') 1374 stree.output_code(4, 0, 0, 0) 1375 output('}\n') 1376 1377 if output_file: 1378 output_fd.close() 1379# end main 1380 1381 1382if __name__ == '__main__': 1383 main() 1384