1#!/usr/bin/env python3 2# Copyright (c) 2018 Linaro Limited 3# 4# This library is free software; you can redistribute it and/or 5# modify it under the terms of the GNU Lesser General Public 6# License as published by the Free Software Foundation; either 7# version 2.1 of the License, or (at your option) any later version. 8# 9# This library is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12# Lesser General Public License for more details. 13# 14# You should have received a copy of the GNU Lesser General Public 15# License along with this library; if not, see <http://www.gnu.org/licenses/>. 16# 17 18# 19# Generate a decoding tree from a specification file. 20# See the syntax and semantics in docs/devel/decodetree.rst. 21# 22 23import io 24import os 25import re 26import sys 27import getopt 28 29insnwidth = 32 30insnmask = 0xffffffff 31variablewidth = False 32fields = {} 33arguments = {} 34formats = {} 35allpatterns = [] 36anyextern = False 37 38translate_prefix = 'trans' 39translate_scope = 'static ' 40input_file = '' 41output_file = None 42output_fd = None 43insntype = 'uint32_t' 44decode_function = 'decode' 45 46# An identifier for C. 47re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*' 48 49# Identifiers for Arguments, Fields, Formats and Patterns. 50re_arg_ident = '&[a-zA-Z0-9_]*' 51re_fld_ident = '%[a-zA-Z0-9_]*' 52re_fmt_ident = '@[a-zA-Z0-9_]*' 53re_pat_ident = '[a-zA-Z0-9_]*' 54 55def error_with_file(file, lineno, *args): 56 """Print an error message from file:line and args and exit.""" 57 global output_file 58 global output_fd 59 60 prefix = '' 61 if file: 62 prefix += '{0}:'.format(file) 63 if lineno: 64 prefix += '{0}:'.format(lineno) 65 if prefix: 66 prefix += ' ' 67 print(prefix, end='error: ', file=sys.stderr) 68 print(*args, file=sys.stderr) 69 70 if output_file and output_fd: 71 output_fd.close() 72 os.remove(output_file) 73 exit(1) 74# end error_with_file 75 76 77def error(lineno, *args): 78 error_with_file(input_file, lineno, *args) 79# end error 80 81 82def output(*args): 83 global output_fd 84 for a in args: 85 output_fd.write(a) 86 87 88def output_autogen(): 89 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n') 90 91 92def str_indent(c): 93 """Return a string with C spaces""" 94 return ' ' * c 95 96 97def str_fields(fields): 98 """Return a string uniquely identifying FIELDS""" 99 r = '' 100 for n in sorted(fields.keys()): 101 r += '_' + n 102 return r[1:] 103 104 105def str_match_bits(bits, mask): 106 """Return a string pretty-printing BITS/MASK""" 107 global insnwidth 108 109 i = 1 << (insnwidth - 1) 110 space = 0x01010100 111 r = '' 112 while i != 0: 113 if i & mask: 114 if i & bits: 115 r += '1' 116 else: 117 r += '0' 118 else: 119 r += '.' 120 if i & space: 121 r += ' ' 122 i >>= 1 123 return r 124 125 126def is_pow2(x): 127 """Return true iff X is equal to a power of 2.""" 128 return (x & (x - 1)) == 0 129 130 131def ctz(x): 132 """Return the number of times 2 factors into X.""" 133 assert x != 0 134 r = 0 135 while ((x >> r) & 1) == 0: 136 r += 1 137 return r 138 139 140def is_contiguous(bits): 141 if bits == 0: 142 return -1 143 shift = ctz(bits) 144 if is_pow2((bits >> shift) + 1): 145 return shift 146 else: 147 return -1 148 149 150def eq_fields_for_args(flds_a, flds_b): 151 if len(flds_a) != len(flds_b): 152 return False 153 for k, a in flds_a.items(): 154 if k not in flds_b: 155 return False 156 return True 157 158 159def eq_fields_for_fmts(flds_a, flds_b): 160 if len(flds_a) != len(flds_b): 161 return False 162 for k, a in flds_a.items(): 163 if k not in flds_b: 164 return False 165 b = flds_b[k] 166 if a.__class__ != b.__class__ or a != b: 167 return False 168 return True 169 170 171class Field: 172 """Class representing a simple instruction field""" 173 def __init__(self, sign, pos, len): 174 self.sign = sign 175 self.pos = pos 176 self.len = len 177 self.mask = ((1 << len) - 1) << pos 178 179 def __str__(self): 180 if self.sign: 181 s = 's' 182 else: 183 s = '' 184 return str(self.pos) + ':' + s + str(self.len) 185 186 def str_extract(self): 187 if self.sign: 188 extr = 'sextract32' 189 else: 190 extr = 'extract32' 191 return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len) 192 193 def __eq__(self, other): 194 return self.sign == other.sign and self.mask == other.mask 195 196 def __ne__(self, other): 197 return not self.__eq__(other) 198# end Field 199 200 201class MultiField: 202 """Class representing a compound instruction field""" 203 def __init__(self, subs, mask): 204 self.subs = subs 205 self.sign = subs[0].sign 206 self.mask = mask 207 208 def __str__(self): 209 return str(self.subs) 210 211 def str_extract(self): 212 ret = '0' 213 pos = 0 214 for f in reversed(self.subs): 215 if pos == 0: 216 ret = f.str_extract() 217 else: 218 ret = 'deposit32({0}, {1}, {2}, {3})' \ 219 .format(ret, pos, 32 - pos, f.str_extract()) 220 pos += f.len 221 return ret 222 223 def __ne__(self, other): 224 if len(self.subs) != len(other.subs): 225 return True 226 for a, b in zip(self.subs, other.subs): 227 if a.__class__ != b.__class__ or a != b: 228 return True 229 return False 230 231 def __eq__(self, other): 232 return not self.__ne__(other) 233# end MultiField 234 235 236class ConstField: 237 """Class representing an argument field with constant value""" 238 def __init__(self, value): 239 self.value = value 240 self.mask = 0 241 self.sign = value < 0 242 243 def __str__(self): 244 return str(self.value) 245 246 def str_extract(self): 247 return str(self.value) 248 249 def __cmp__(self, other): 250 return self.value - other.value 251# end ConstField 252 253 254class FunctionField: 255 """Class representing a field passed through a function""" 256 def __init__(self, func, base): 257 self.mask = base.mask 258 self.sign = base.sign 259 self.base = base 260 self.func = func 261 262 def __str__(self): 263 return self.func + '(' + str(self.base) + ')' 264 265 def str_extract(self): 266 return self.func + '(ctx, ' + self.base.str_extract() + ')' 267 268 def __eq__(self, other): 269 return self.func == other.func and self.base == other.base 270 271 def __ne__(self, other): 272 return not self.__eq__(other) 273# end FunctionField 274 275 276class ParameterField: 277 """Class representing a pseudo-field read from a function""" 278 def __init__(self, func): 279 self.mask = 0 280 self.sign = 0 281 self.func = func 282 283 def __str__(self): 284 return self.func 285 286 def str_extract(self): 287 return self.func + '(ctx)' 288 289 def __eq__(self, other): 290 return self.func == other.func 291 292 def __ne__(self, other): 293 return not self.__eq__(other) 294# end ParameterField 295 296 297class Arguments: 298 """Class representing the extracted fields of a format""" 299 def __init__(self, nm, flds, extern): 300 self.name = nm 301 self.extern = extern 302 self.fields = sorted(flds) 303 304 def __str__(self): 305 return self.name + ' ' + str(self.fields) 306 307 def struct_name(self): 308 return 'arg_' + self.name 309 310 def output_def(self): 311 if not self.extern: 312 output('typedef struct {\n') 313 for n in self.fields: 314 output(' int ', n, ';\n') 315 output('} ', self.struct_name(), ';\n\n') 316# end Arguments 317 318 319class General: 320 """Common code between instruction formats and instruction patterns""" 321 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): 322 self.name = name 323 self.file = input_file 324 self.lineno = lineno 325 self.base = base 326 self.fixedbits = fixb 327 self.fixedmask = fixm 328 self.undefmask = udfm 329 self.fieldmask = fldm 330 self.fields = flds 331 self.width = w 332 333 def __str__(self): 334 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) 335 336 def str1(self, i): 337 return str_indent(i) + self.__str__() 338# end General 339 340 341class Format(General): 342 """Class representing an instruction format""" 343 344 def extract_name(self): 345 global decode_function 346 return decode_function + '_extract_' + self.name 347 348 def output_extract(self): 349 output('static void ', self.extract_name(), '(DisasContext *ctx, ', 350 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') 351 for n, f in self.fields.items(): 352 output(' a->', n, ' = ', f.str_extract(), ';\n') 353 output('}\n\n') 354# end Format 355 356 357class Pattern(General): 358 """Class representing an instruction pattern""" 359 360 def output_decl(self): 361 global translate_scope 362 global translate_prefix 363 output('typedef ', self.base.base.struct_name(), 364 ' arg_', self.name, ';\n') 365 output(translate_scope, 'bool ', translate_prefix, '_', self.name, 366 '(DisasContext *ctx, arg_', self.name, ' *a);\n') 367 368 def output_code(self, i, extracted, outerbits, outermask): 369 global translate_prefix 370 ind = str_indent(i) 371 arg = self.base.base.name 372 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') 373 if not extracted: 374 output(ind, self.base.extract_name(), 375 '(ctx, &u.f_', arg, ', insn);\n') 376 for n, f in self.fields.items(): 377 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') 378 output(ind, 'if (', translate_prefix, '_', self.name, 379 '(ctx, &u.f_', arg, ')) return true;\n') 380 381 # Normal patterns do not have children. 382 def build_tree(self): 383 return 384 def prop_masks(self): 385 return 386 def prop_format(self): 387 return 388 def prop_width(self): 389 return 390 391# end Pattern 392 393 394class MultiPattern(General): 395 """Class representing a set of instruction patterns""" 396 397 def __init__(self, lineno): 398 self.file = input_file 399 self.lineno = lineno 400 self.pats = [] 401 self.base = None 402 self.fixedbits = 0 403 self.fixedmask = 0 404 self.undefmask = 0 405 self.width = None 406 407 def __str__(self): 408 r = 'group' 409 if self.fixedbits is not None: 410 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask) 411 return r 412 413 def output_decl(self): 414 for p in self.pats: 415 p.output_decl() 416 417 def prop_masks(self): 418 global insnmask 419 420 fixedmask = insnmask 421 undefmask = insnmask 422 423 # Collect fixedmask/undefmask for all of the children. 424 for p in self.pats: 425 p.prop_masks() 426 fixedmask &= p.fixedmask 427 undefmask &= p.undefmask 428 429 # Widen fixedmask until all fixedbits match 430 repeat = True 431 fixedbits = 0 432 while repeat and fixedmask != 0: 433 fixedbits = None 434 for p in self.pats: 435 thisbits = p.fixedbits & fixedmask 436 if fixedbits is None: 437 fixedbits = thisbits 438 elif fixedbits != thisbits: 439 fixedmask &= ~(fixedbits ^ thisbits) 440 break 441 else: 442 repeat = False 443 444 self.fixedbits = fixedbits 445 self.fixedmask = fixedmask 446 self.undefmask = undefmask 447 448 def build_tree(self): 449 for p in self.pats: 450 p.build_tree() 451 452 def prop_format(self): 453 for p in self.pats: 454 p.build_tree() 455 456 def prop_width(self): 457 width = None 458 for p in self.pats: 459 p.prop_width() 460 if width is None: 461 width = p.width 462 elif width != p.width: 463 error_with_file(self.file, self.lineno, 464 'width mismatch in patterns within braces') 465 self.width = width 466 467# end MultiPattern 468 469 470class IncMultiPattern(MultiPattern): 471 """Class representing an overlapping set of instruction patterns""" 472 473 def output_code(self, i, extracted, outerbits, outermask): 474 global translate_prefix 475 ind = str_indent(i) 476 for p in self.pats: 477 if outermask != p.fixedmask: 478 innermask = p.fixedmask & ~outermask 479 innerbits = p.fixedbits & ~outermask 480 output(ind, 'if ((insn & ', 481 '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits), 482 ') {\n') 483 output(ind, ' /* ', 484 str_match_bits(p.fixedbits, p.fixedmask), ' */\n') 485 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) 486 output(ind, '}\n') 487 else: 488 p.output_code(i, extracted, p.fixedbits, p.fixedmask) 489#end IncMultiPattern 490 491 492class Tree: 493 """Class representing a node in a decode tree""" 494 495 def __init__(self, fm, tm): 496 self.fixedmask = fm 497 self.thismask = tm 498 self.subs = [] 499 self.base = None 500 501 def str1(self, i): 502 ind = str_indent(i) 503 r = '{0}{1:08x}'.format(ind, self.fixedmask) 504 if self.format: 505 r += ' ' + self.format.name 506 r += ' [\n' 507 for (b, s) in self.subs: 508 r += '{0} {1:08x}:\n'.format(ind, b) 509 r += s.str1(i + 4) + '\n' 510 r += ind + ']' 511 return r 512 513 def __str__(self): 514 return self.str1(0) 515 516 def output_code(self, i, extracted, outerbits, outermask): 517 ind = str_indent(i) 518 519 # If we identified all nodes below have the same format, 520 # extract the fields now. 521 if not extracted and self.base: 522 output(ind, self.base.extract_name(), 523 '(ctx, &u.f_', self.base.base.name, ', insn);\n') 524 extracted = True 525 526 # Attempt to aid the compiler in producing compact switch statements. 527 # If the bits in the mask are contiguous, extract them. 528 sh = is_contiguous(self.thismask) 529 if sh > 0: 530 # Propagate SH down into the local functions. 531 def str_switch(b, sh=sh): 532 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh) 533 534 def str_case(b, sh=sh): 535 return '0x{0:x}'.format(b >> sh) 536 else: 537 def str_switch(b): 538 return 'insn & 0x{0:08x}'.format(b) 539 540 def str_case(b): 541 return '0x{0:08x}'.format(b) 542 543 output(ind, 'switch (', str_switch(self.thismask), ') {\n') 544 for b, s in sorted(self.subs): 545 assert (self.thismask & ~s.fixedmask) == 0 546 innermask = outermask | self.thismask 547 innerbits = outerbits | b 548 output(ind, 'case ', str_case(b), ':\n') 549 output(ind, ' /* ', 550 str_match_bits(innerbits, innermask), ' */\n') 551 s.output_code(i + 4, extracted, innerbits, innermask) 552 output(ind, ' break;\n') 553 output(ind, '}\n') 554# end Tree 555 556 557class ExcMultiPattern(MultiPattern): 558 """Class representing a non-overlapping set of instruction patterns""" 559 560 def output_code(self, i, extracted, outerbits, outermask): 561 # Defer everything to our decomposed Tree node 562 self.tree.output_code(i, extracted, outerbits, outermask) 563 564 @staticmethod 565 def __build_tree(pats, outerbits, outermask): 566 # Find the intersection of all remaining fixedmask. 567 innermask = ~outermask & insnmask 568 for i in pats: 569 innermask &= i.fixedmask 570 571 if innermask == 0: 572 # Edge condition: One pattern covers the entire insnmask 573 if len(pats) == 1: 574 t = Tree(outermask, innermask) 575 t.subs.append((0, pats[0])) 576 return t 577 578 text = 'overlapping patterns:' 579 for p in pats: 580 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p) 581 error_with_file(pats[0].file, pats[0].lineno, text) 582 583 fullmask = outermask | innermask 584 585 # Sort each element of pats into the bin selected by the mask. 586 bins = {} 587 for i in pats: 588 fb = i.fixedbits & innermask 589 if fb in bins: 590 bins[fb].append(i) 591 else: 592 bins[fb] = [i] 593 594 # We must recurse if any bin has more than one element or if 595 # the single element in the bin has not been fully matched. 596 t = Tree(fullmask, innermask) 597 598 for b, l in bins.items(): 599 s = l[0] 600 if len(l) > 1 or s.fixedmask & ~fullmask != 0: 601 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask) 602 t.subs.append((b, s)) 603 604 return t 605 606 def build_tree(self): 607 super().prop_format() 608 self.tree = self.__build_tree(self.pats, self.fixedbits, 609 self.fixedmask) 610 611 @staticmethod 612 def __prop_format(tree): 613 """Propagate Format objects into the decode tree""" 614 615 # Depth first search. 616 for (b, s) in tree.subs: 617 if isinstance(s, Tree): 618 ExcMultiPattern.__prop_format(s) 619 620 # If all entries in SUBS have the same format, then 621 # propagate that into the tree. 622 f = None 623 for (b, s) in tree.subs: 624 if f is None: 625 f = s.base 626 if f is None: 627 return 628 if f is not s.base: 629 return 630 tree.base = f 631 632 def prop_format(self): 633 super().prop_format() 634 self.__prop_format(self.tree) 635 636# end ExcMultiPattern 637 638 639def parse_field(lineno, name, toks): 640 """Parse one instruction field from TOKS at LINENO""" 641 global fields 642 global insnwidth 643 644 # A "simple" field will have only one entry; 645 # a "multifield" will have several. 646 subs = [] 647 width = 0 648 func = None 649 for t in toks: 650 if re.match('^!function=', t): 651 if func: 652 error(lineno, 'duplicate function') 653 func = t.split('=') 654 func = func[1] 655 continue 656 657 if re.fullmatch('[0-9]+:s[0-9]+', t): 658 # Signed field extract 659 subtoks = t.split(':s') 660 sign = True 661 elif re.fullmatch('[0-9]+:[0-9]+', t): 662 # Unsigned field extract 663 subtoks = t.split(':') 664 sign = False 665 else: 666 error(lineno, 'invalid field token "{0}"'.format(t)) 667 po = int(subtoks[0]) 668 le = int(subtoks[1]) 669 if po + le > insnwidth: 670 error(lineno, 'field {0} too large'.format(t)) 671 f = Field(sign, po, le) 672 subs.append(f) 673 width += le 674 675 if width > insnwidth: 676 error(lineno, 'field too large') 677 if len(subs) == 0: 678 if func: 679 f = ParameterField(func) 680 else: 681 error(lineno, 'field with no value') 682 else: 683 if len(subs) == 1: 684 f = subs[0] 685 else: 686 mask = 0 687 for s in subs: 688 if mask & s.mask: 689 error(lineno, 'field components overlap') 690 mask |= s.mask 691 f = MultiField(subs, mask) 692 if func: 693 f = FunctionField(func, f) 694 695 if name in fields: 696 error(lineno, 'duplicate field', name) 697 fields[name] = f 698# end parse_field 699 700 701def parse_arguments(lineno, name, toks): 702 """Parse one argument set from TOKS at LINENO""" 703 global arguments 704 global re_C_ident 705 global anyextern 706 707 flds = [] 708 extern = False 709 for t in toks: 710 if re.fullmatch('!extern', t): 711 extern = True 712 anyextern = True 713 continue 714 if not re.fullmatch(re_C_ident, t): 715 error(lineno, 'invalid argument set token "{0}"'.format(t)) 716 if t in flds: 717 error(lineno, 'duplicate argument "{0}"'.format(t)) 718 flds.append(t) 719 720 if name in arguments: 721 error(lineno, 'duplicate argument set', name) 722 arguments[name] = Arguments(name, flds, extern) 723# end parse_arguments 724 725 726def lookup_field(lineno, name): 727 global fields 728 if name in fields: 729 return fields[name] 730 error(lineno, 'undefined field', name) 731 732 733def add_field(lineno, flds, new_name, f): 734 if new_name in flds: 735 error(lineno, 'duplicate field', new_name) 736 flds[new_name] = f 737 return flds 738 739 740def add_field_byname(lineno, flds, new_name, old_name): 741 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name)) 742 743 744def infer_argument_set(flds): 745 global arguments 746 global decode_function 747 748 for arg in arguments.values(): 749 if eq_fields_for_args(flds, arg.fields): 750 return arg 751 752 name = decode_function + str(len(arguments)) 753 arg = Arguments(name, flds.keys(), False) 754 arguments[name] = arg 755 return arg 756 757 758def infer_format(arg, fieldmask, flds, width): 759 global arguments 760 global formats 761 global decode_function 762 763 const_flds = {} 764 var_flds = {} 765 for n, c in flds.items(): 766 if c is ConstField: 767 const_flds[n] = c 768 else: 769 var_flds[n] = c 770 771 # Look for an existing format with the same argument set and fields 772 for fmt in formats.values(): 773 if arg and fmt.base != arg: 774 continue 775 if fieldmask != fmt.fieldmask: 776 continue 777 if width != fmt.width: 778 continue 779 if not eq_fields_for_fmts(flds, fmt.fields): 780 continue 781 return (fmt, const_flds) 782 783 name = decode_function + '_Fmt_' + str(len(formats)) 784 if not arg: 785 arg = infer_argument_set(flds) 786 787 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width) 788 formats[name] = fmt 789 790 return (fmt, const_flds) 791# end infer_format 792 793 794def parse_generic(lineno, parent_pat, name, toks): 795 """Parse one instruction format from TOKS at LINENO""" 796 global fields 797 global arguments 798 global formats 799 global allpatterns 800 global re_arg_ident 801 global re_fld_ident 802 global re_fmt_ident 803 global re_C_ident 804 global insnwidth 805 global insnmask 806 global variablewidth 807 808 is_format = parent_pat is None 809 810 fixedmask = 0 811 fixedbits = 0 812 undefmask = 0 813 width = 0 814 flds = {} 815 arg = None 816 fmt = None 817 for t in toks: 818 # '&Foo' gives a format an explicit argument set. 819 if re.fullmatch(re_arg_ident, t): 820 tt = t[1:] 821 if arg: 822 error(lineno, 'multiple argument sets') 823 if tt in arguments: 824 arg = arguments[tt] 825 else: 826 error(lineno, 'undefined argument set', t) 827 continue 828 829 # '@Foo' gives a pattern an explicit format. 830 if re.fullmatch(re_fmt_ident, t): 831 tt = t[1:] 832 if fmt: 833 error(lineno, 'multiple formats') 834 if tt in formats: 835 fmt = formats[tt] 836 else: 837 error(lineno, 'undefined format', t) 838 continue 839 840 # '%Foo' imports a field. 841 if re.fullmatch(re_fld_ident, t): 842 tt = t[1:] 843 flds = add_field_byname(lineno, flds, tt, tt) 844 continue 845 846 # 'Foo=%Bar' imports a field with a different name. 847 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t): 848 (fname, iname) = t.split('=%') 849 flds = add_field_byname(lineno, flds, fname, iname) 850 continue 851 852 # 'Foo=number' sets an argument field to a constant value 853 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t): 854 (fname, value) = t.split('=') 855 value = int(value) 856 flds = add_field(lineno, flds, fname, ConstField(value)) 857 continue 858 859 # Pattern of 0s, 1s, dots and dashes indicate required zeros, 860 # required ones, or dont-cares. 861 if re.fullmatch('[01.-]+', t): 862 shift = len(t) 863 fms = t.replace('0', '1') 864 fms = fms.replace('.', '0') 865 fms = fms.replace('-', '0') 866 fbs = t.replace('.', '0') 867 fbs = fbs.replace('-', '0') 868 ubm = t.replace('1', '0') 869 ubm = ubm.replace('.', '0') 870 ubm = ubm.replace('-', '1') 871 fms = int(fms, 2) 872 fbs = int(fbs, 2) 873 ubm = int(ubm, 2) 874 fixedbits = (fixedbits << shift) | fbs 875 fixedmask = (fixedmask << shift) | fms 876 undefmask = (undefmask << shift) | ubm 877 # Otherwise, fieldname:fieldwidth 878 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t): 879 (fname, flen) = t.split(':') 880 sign = False 881 if flen[0] == 's': 882 sign = True 883 flen = flen[1:] 884 shift = int(flen, 10) 885 if shift + width > insnwidth: 886 error(lineno, 'field {0} exceeds insnwidth'.format(fname)) 887 f = Field(sign, insnwidth - width - shift, shift) 888 flds = add_field(lineno, flds, fname, f) 889 fixedbits <<= shift 890 fixedmask <<= shift 891 undefmask <<= shift 892 else: 893 error(lineno, 'invalid token "{0}"'.format(t)) 894 width += shift 895 896 if variablewidth and width < insnwidth and width % 8 == 0: 897 shift = insnwidth - width 898 fixedbits <<= shift 899 fixedmask <<= shift 900 undefmask <<= shift 901 undefmask |= (1 << shift) - 1 902 903 # We should have filled in all of the bits of the instruction. 904 elif not (is_format and width == 0) and width != insnwidth: 905 error(lineno, 'definition has {0} bits'.format(width)) 906 907 # Do not check for fields overlapping fields; one valid usage 908 # is to be able to duplicate fields via import. 909 fieldmask = 0 910 for f in flds.values(): 911 fieldmask |= f.mask 912 913 # Fix up what we've parsed to match either a format or a pattern. 914 if is_format: 915 # Formats cannot reference formats. 916 if fmt: 917 error(lineno, 'format referencing format') 918 # If an argument set is given, then there should be no fields 919 # without a place to store it. 920 if arg: 921 for f in flds.keys(): 922 if f not in arg.fields: 923 error(lineno, 'field {0} not in argument set {1}' 924 .format(f, arg.name)) 925 else: 926 arg = infer_argument_set(flds) 927 if name in formats: 928 error(lineno, 'duplicate format name', name) 929 fmt = Format(name, lineno, arg, fixedbits, fixedmask, 930 undefmask, fieldmask, flds, width) 931 formats[name] = fmt 932 else: 933 # Patterns can reference a format ... 934 if fmt: 935 # ... but not an argument simultaneously 936 if arg: 937 error(lineno, 'pattern specifies both format and argument set') 938 if fixedmask & fmt.fixedmask: 939 error(lineno, 'pattern fixed bits overlap format fixed bits') 940 if width != fmt.width: 941 error(lineno, 'pattern uses format of different width') 942 fieldmask |= fmt.fieldmask 943 fixedbits |= fmt.fixedbits 944 fixedmask |= fmt.fixedmask 945 undefmask |= fmt.undefmask 946 else: 947 (fmt, flds) = infer_format(arg, fieldmask, flds, width) 948 arg = fmt.base 949 for f in flds.keys(): 950 if f not in arg.fields: 951 error(lineno, 'field {0} not in argument set {1}' 952 .format(f, arg.name)) 953 if f in fmt.fields.keys(): 954 error(lineno, 'field {0} set by format and pattern'.format(f)) 955 for f in arg.fields: 956 if f not in flds.keys() and f not in fmt.fields.keys(): 957 error(lineno, 'field {0} not initialized'.format(f)) 958 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, 959 undefmask, fieldmask, flds, width) 960 parent_pat.pats.append(pat) 961 allpatterns.append(pat) 962 963 # Validate the masks that we have assembled. 964 if fieldmask & fixedmask: 965 error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})' 966 .format(fieldmask, fixedmask)) 967 if fieldmask & undefmask: 968 error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' 969 .format(fieldmask, undefmask)) 970 if fixedmask & undefmask: 971 error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' 972 .format(fixedmask, undefmask)) 973 if not is_format: 974 allbits = fieldmask | fixedmask | undefmask 975 if allbits != insnmask: 976 error(lineno, 'bits left unspecified (0x{0:08x})' 977 .format(allbits ^ insnmask)) 978# end parse_general 979 980 981def parse_file(f, parent_pat): 982 """Parse all of the patterns within a file""" 983 global re_arg_ident 984 global re_fld_ident 985 global re_fmt_ident 986 global re_pat_ident 987 988 # Read all of the lines of the file. Concatenate lines 989 # ending in backslash; discard empty lines and comments. 990 toks = [] 991 lineno = 0 992 nesting = 0 993 nesting_pats = [] 994 995 for line in f: 996 lineno += 1 997 998 # Expand and strip spaces, to find indent. 999 line = line.rstrip() 1000 line = line.expandtabs() 1001 len1 = len(line) 1002 line = line.lstrip() 1003 len2 = len(line) 1004 1005 # Discard comments 1006 end = line.find('#') 1007 if end >= 0: 1008 line = line[:end] 1009 1010 t = line.split() 1011 if len(toks) != 0: 1012 # Next line after continuation 1013 toks.extend(t) 1014 else: 1015 # Allow completely blank lines. 1016 if len1 == 0: 1017 continue 1018 indent = len1 - len2 1019 # Empty line due to comment. 1020 if len(t) == 0: 1021 # Indentation must be correct, even for comment lines. 1022 if indent != nesting: 1023 error(lineno, 'indentation ', indent, ' != ', nesting) 1024 continue 1025 start_lineno = lineno 1026 toks = t 1027 1028 # Continuation? 1029 if toks[-1] == '\\': 1030 toks.pop() 1031 continue 1032 1033 name = toks[0] 1034 del toks[0] 1035 1036 # End nesting? 1037 if name == '}' or name == ']': 1038 if len(toks) != 0: 1039 error(start_lineno, 'extra tokens after close brace') 1040 1041 # Make sure { } and [ ] nest properly. 1042 if (name == '}') != isinstance(parent_pat, IncMultiPattern): 1043 error(lineno, 'mismatched close brace') 1044 1045 try: 1046 parent_pat = nesting_pats.pop() 1047 except: 1048 error(lineno, 'extra close brace') 1049 1050 nesting -= 2 1051 if indent != nesting: 1052 error(lineno, 'indentation ', indent, ' != ', nesting) 1053 1054 toks = [] 1055 continue 1056 1057 # Everything else should have current indentation. 1058 if indent != nesting: 1059 error(start_lineno, 'indentation ', indent, ' != ', nesting) 1060 1061 # Start nesting? 1062 if name == '{' or name == '[': 1063 if len(toks) != 0: 1064 error(start_lineno, 'extra tokens after open brace') 1065 1066 if name == '{': 1067 nested_pat = IncMultiPattern(start_lineno) 1068 else: 1069 nested_pat = ExcMultiPattern(start_lineno) 1070 parent_pat.pats.append(nested_pat) 1071 nesting_pats.append(parent_pat) 1072 parent_pat = nested_pat 1073 1074 nesting += 2 1075 toks = [] 1076 continue 1077 1078 # Determine the type of object needing to be parsed. 1079 if re.fullmatch(re_fld_ident, name): 1080 parse_field(start_lineno, name[1:], toks) 1081 elif re.fullmatch(re_arg_ident, name): 1082 parse_arguments(start_lineno, name[1:], toks) 1083 elif re.fullmatch(re_fmt_ident, name): 1084 parse_generic(start_lineno, None, name[1:], toks) 1085 elif re.fullmatch(re_pat_ident, name): 1086 parse_generic(start_lineno, parent_pat, name, toks) 1087 else: 1088 error(lineno, 'invalid token "{0}"'.format(name)) 1089 toks = [] 1090 1091 if nesting != 0: 1092 error(lineno, 'missing close brace') 1093# end parse_file 1094 1095 1096class SizeTree: 1097 """Class representing a node in a size decode tree""" 1098 1099 def __init__(self, m, w): 1100 self.mask = m 1101 self.subs = [] 1102 self.base = None 1103 self.width = w 1104 1105 def str1(self, i): 1106 ind = str_indent(i) 1107 r = '{0}{1:08x}'.format(ind, self.mask) 1108 r += ' [\n' 1109 for (b, s) in self.subs: 1110 r += '{0} {1:08x}:\n'.format(ind, b) 1111 r += s.str1(i + 4) + '\n' 1112 r += ind + ']' 1113 return r 1114 1115 def __str__(self): 1116 return self.str1(0) 1117 1118 def output_code(self, i, extracted, outerbits, outermask): 1119 ind = str_indent(i) 1120 1121 # If we need to load more bytes to test, do so now. 1122 if extracted < self.width: 1123 output(ind, 'insn = ', decode_function, 1124 '_load_bytes(ctx, insn, {0}, {1});\n' 1125 .format(extracted // 8, self.width // 8)); 1126 extracted = self.width 1127 1128 # Attempt to aid the compiler in producing compact switch statements. 1129 # If the bits in the mask are contiguous, extract them. 1130 sh = is_contiguous(self.mask) 1131 if sh > 0: 1132 # Propagate SH down into the local functions. 1133 def str_switch(b, sh=sh): 1134 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh) 1135 1136 def str_case(b, sh=sh): 1137 return '0x{0:x}'.format(b >> sh) 1138 else: 1139 def str_switch(b): 1140 return 'insn & 0x{0:08x}'.format(b) 1141 1142 def str_case(b): 1143 return '0x{0:08x}'.format(b) 1144 1145 output(ind, 'switch (', str_switch(self.mask), ') {\n') 1146 for b, s in sorted(self.subs): 1147 innermask = outermask | self.mask 1148 innerbits = outerbits | b 1149 output(ind, 'case ', str_case(b), ':\n') 1150 output(ind, ' /* ', 1151 str_match_bits(innerbits, innermask), ' */\n') 1152 s.output_code(i + 4, extracted, innerbits, innermask) 1153 output(ind, '}\n') 1154 output(ind, 'return insn;\n') 1155# end SizeTree 1156 1157class SizeLeaf: 1158 """Class representing a leaf node in a size decode tree""" 1159 1160 def __init__(self, m, w): 1161 self.mask = m 1162 self.width = w 1163 1164 def str1(self, i): 1165 ind = str_indent(i) 1166 return '{0}{1:08x}'.format(ind, self.mask) 1167 1168 def __str__(self): 1169 return self.str1(0) 1170 1171 def output_code(self, i, extracted, outerbits, outermask): 1172 global decode_function 1173 ind = str_indent(i) 1174 1175 # If we need to load more bytes, do so now. 1176 if extracted < self.width: 1177 output(ind, 'insn = ', decode_function, 1178 '_load_bytes(ctx, insn, {0}, {1});\n' 1179 .format(extracted // 8, self.width // 8)); 1180 extracted = self.width 1181 output(ind, 'return insn;\n') 1182# end SizeLeaf 1183 1184 1185def build_size_tree(pats, width, outerbits, outermask): 1186 global insnwidth 1187 1188 # Collect the mask of bits that are fixed in this width 1189 innermask = 0xff << (insnwidth - width) 1190 innermask &= ~outermask 1191 minwidth = None 1192 onewidth = True 1193 for i in pats: 1194 innermask &= i.fixedmask 1195 if minwidth is None: 1196 minwidth = i.width 1197 elif minwidth != i.width: 1198 onewidth = False; 1199 if minwidth < i.width: 1200 minwidth = i.width 1201 1202 if onewidth: 1203 return SizeLeaf(innermask, minwidth) 1204 1205 if innermask == 0: 1206 if width < minwidth: 1207 return build_size_tree(pats, width + 8, outerbits, outermask) 1208 1209 pnames = [] 1210 for p in pats: 1211 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) 1212 error_with_file(pats[0].file, pats[0].lineno, 1213 'overlapping patterns size {0}:'.format(width), pnames) 1214 1215 bins = {} 1216 for i in pats: 1217 fb = i.fixedbits & innermask 1218 if fb in bins: 1219 bins[fb].append(i) 1220 else: 1221 bins[fb] = [i] 1222 1223 fullmask = outermask | innermask 1224 lens = sorted(bins.keys()) 1225 if len(lens) == 1: 1226 b = lens[0] 1227 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask) 1228 1229 r = SizeTree(innermask, width) 1230 for b, l in bins.items(): 1231 s = build_size_tree(l, width, b | outerbits, fullmask) 1232 r.subs.append((b, s)) 1233 return r 1234# end build_size_tree 1235 1236 1237def prop_size(tree): 1238 """Propagate minimum widths up the decode size tree""" 1239 1240 if isinstance(tree, SizeTree): 1241 min = None 1242 for (b, s) in tree.subs: 1243 width = prop_size(s) 1244 if min is None or min > width: 1245 min = width 1246 assert min >= tree.width 1247 tree.width = min 1248 else: 1249 min = tree.width 1250 return min 1251# end prop_size 1252 1253 1254def main(): 1255 global arguments 1256 global formats 1257 global allpatterns 1258 global translate_scope 1259 global translate_prefix 1260 global output_fd 1261 global output_file 1262 global input_file 1263 global insnwidth 1264 global insntype 1265 global insnmask 1266 global decode_function 1267 global variablewidth 1268 global anyextern 1269 1270 decode_scope = 'static ' 1271 1272 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', 1273 'static-decode=', 'varinsnwidth='] 1274 try: 1275 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts) 1276 except getopt.GetoptError as err: 1277 error(0, err) 1278 for o, a in opts: 1279 if o in ('-o', '--output'): 1280 output_file = a 1281 elif o == '--decode': 1282 decode_function = a 1283 decode_scope = '' 1284 elif o == '--static-decode': 1285 decode_function = a 1286 elif o == '--translate': 1287 translate_prefix = a 1288 translate_scope = '' 1289 elif o in ('-w', '--insnwidth', '--varinsnwidth'): 1290 if o == '--varinsnwidth': 1291 variablewidth = True 1292 insnwidth = int(a) 1293 if insnwidth == 16: 1294 insntype = 'uint16_t' 1295 insnmask = 0xffff 1296 elif insnwidth != 32: 1297 error(0, 'cannot handle insns of width', insnwidth) 1298 else: 1299 assert False, 'unhandled option' 1300 1301 if len(args) < 1: 1302 error(0, 'missing input file') 1303 1304 toppat = ExcMultiPattern(0) 1305 1306 for filename in args: 1307 input_file = filename 1308 f = open(filename, 'rt', encoding='utf-8') 1309 parse_file(f, toppat) 1310 f.close() 1311 1312 # We do not want to compute masks for toppat, because those masks 1313 # are used as a starting point for build_tree. For toppat, we must 1314 # insist that decode begins from naught. 1315 for i in toppat.pats: 1316 i.prop_masks() 1317 1318 toppat.build_tree() 1319 toppat.prop_format() 1320 1321 if variablewidth: 1322 for i in toppat.pats: 1323 i.prop_width() 1324 stree = build_size_tree(toppat.pats, 8, 0, 0) 1325 prop_size(stree) 1326 1327 if output_file: 1328 output_fd = open(output_file, 'wt', encoding='utf-8') 1329 else: 1330 output_fd = io.TextIOWrapper(sys.stdout.buffer, 1331 encoding=sys.stdout.encoding, 1332 errors="ignore") 1333 1334 output_autogen() 1335 for n in sorted(arguments.keys()): 1336 f = arguments[n] 1337 f.output_def() 1338 1339 # A single translate function can be invoked for different patterns. 1340 # Make sure that the argument sets are the same, and declare the 1341 # function only once. 1342 # 1343 # If we're sharing formats, we're likely also sharing trans_* functions, 1344 # but we can't tell which ones. Prevent issues from the compiler by 1345 # suppressing redundant declaration warnings. 1346 if anyextern: 1347 output("#pragma GCC diagnostic push\n", 1348 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n", 1349 "#ifdef __clang__\n" 1350 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n", 1351 "#endif\n\n") 1352 1353 out_pats = {} 1354 for i in allpatterns: 1355 if i.name in out_pats: 1356 p = out_pats[i.name] 1357 if i.base.base != p.base.base: 1358 error(0, i.name, ' has conflicting argument sets') 1359 else: 1360 i.output_decl() 1361 out_pats[i.name] = i 1362 output('\n') 1363 1364 if anyextern: 1365 output("#pragma GCC diagnostic pop\n\n") 1366 1367 for n in sorted(formats.keys()): 1368 f = formats[n] 1369 f.output_extract() 1370 1371 output(decode_scope, 'bool ', decode_function, 1372 '(DisasContext *ctx, ', insntype, ' insn)\n{\n') 1373 1374 i4 = str_indent(4) 1375 1376 if len(allpatterns) != 0: 1377 output(i4, 'union {\n') 1378 for n in sorted(arguments.keys()): 1379 f = arguments[n] 1380 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') 1381 output(i4, '} u;\n\n') 1382 toppat.output_code(4, False, 0, 0) 1383 1384 output(i4, 'return false;\n') 1385 output('}\n') 1386 1387 if variablewidth: 1388 output('\n', decode_scope, insntype, ' ', decode_function, 1389 '_load(DisasContext *ctx)\n{\n', 1390 ' ', insntype, ' insn = 0;\n\n') 1391 stree.output_code(4, 0, 0, 0) 1392 output('}\n') 1393 1394 if output_file: 1395 output_fd.close() 1396# end main 1397 1398 1399if __name__ == '__main__': 1400 main() 1401