1#!/usr/bin/env python3 2# Copyright (c) 2018 Linaro Limited 3# 4# This library is free software; you can redistribute it and/or 5# modify it under the terms of the GNU Lesser General Public 6# License as published by the Free Software Foundation; either 7# version 2 of the License, or (at your option) any later version. 8# 9# This library is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12# Lesser General Public License for more details. 13# 14# You should have received a copy of the GNU Lesser General Public 15# License along with this library; if not, see <http://www.gnu.org/licenses/>. 16# 17 18# 19# Generate a decoding tree from a specification file. 20# See the syntax and semantics in docs/devel/decodetree.rst. 21# 22 23import os 24import re 25import sys 26import getopt 27 28insnwidth = 32 29insnmask = 0xffffffff 30variablewidth = False 31fields = {} 32arguments = {} 33formats = {} 34allpatterns = [] 35anyextern = False 36 37translate_prefix = 'trans' 38translate_scope = 'static ' 39input_file = '' 40output_file = None 41output_fd = None 42insntype = 'uint32_t' 43decode_function = 'decode' 44 45# An identifier for C. 46re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*' 47 48# Identifiers for Arguments, Fields, Formats and Patterns. 49re_arg_ident = '&[a-zA-Z0-9_]*' 50re_fld_ident = '%[a-zA-Z0-9_]*' 51re_fmt_ident = '@[a-zA-Z0-9_]*' 52re_pat_ident = '[a-zA-Z0-9_]*' 53 54def error_with_file(file, lineno, *args): 55 """Print an error message from file:line and args and exit.""" 56 global output_file 57 global output_fd 58 59 prefix = '' 60 if file: 61 prefix += '{0}:'.format(file) 62 if lineno: 63 prefix += '{0}:'.format(lineno) 64 if prefix: 65 prefix += ' ' 66 print(prefix, end='error: ', file=sys.stderr) 67 print(*args, file=sys.stderr) 68 69 if output_file and output_fd: 70 output_fd.close() 71 os.remove(output_file) 72 exit(1) 73# end error_with_file 74 75 76def error(lineno, *args): 77 error_with_file(input_file, lineno, *args) 78# end error 79 80 81def output(*args): 82 global output_fd 83 for a in args: 84 output_fd.write(a) 85 86 87def output_autogen(): 88 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n') 89 90 91def str_indent(c): 92 """Return a string with C spaces""" 93 return ' ' * c 94 95 96def str_fields(fields): 97 """Return a string uniquely identifing FIELDS""" 98 r = '' 99 for n in sorted(fields.keys()): 100 r += '_' + n 101 return r[1:] 102 103 104def str_match_bits(bits, mask): 105 """Return a string pretty-printing BITS/MASK""" 106 global insnwidth 107 108 i = 1 << (insnwidth - 1) 109 space = 0x01010100 110 r = '' 111 while i != 0: 112 if i & mask: 113 if i & bits: 114 r += '1' 115 else: 116 r += '0' 117 else: 118 r += '.' 119 if i & space: 120 r += ' ' 121 i >>= 1 122 return r 123 124 125def is_pow2(x): 126 """Return true iff X is equal to a power of 2.""" 127 return (x & (x - 1)) == 0 128 129 130def ctz(x): 131 """Return the number of times 2 factors into X.""" 132 assert x != 0 133 r = 0 134 while ((x >> r) & 1) == 0: 135 r += 1 136 return r 137 138 139def is_contiguous(bits): 140 if bits == 0: 141 return -1 142 shift = ctz(bits) 143 if is_pow2((bits >> shift) + 1): 144 return shift 145 else: 146 return -1 147 148 149def eq_fields_for_args(flds_a, flds_b): 150 if len(flds_a) != len(flds_b): 151 return False 152 for k, a in flds_a.items(): 153 if k not in flds_b: 154 return False 155 return True 156 157 158def eq_fields_for_fmts(flds_a, flds_b): 159 if len(flds_a) != len(flds_b): 160 return False 161 for k, a in flds_a.items(): 162 if k not in flds_b: 163 return False 164 b = flds_b[k] 165 if a.__class__ != b.__class__ or a != b: 166 return False 167 return True 168 169 170class Field: 171 """Class representing a simple instruction field""" 172 def __init__(self, sign, pos, len): 173 self.sign = sign 174 self.pos = pos 175 self.len = len 176 self.mask = ((1 << len) - 1) << pos 177 178 def __str__(self): 179 if self.sign: 180 s = 's' 181 else: 182 s = '' 183 return str(self.pos) + ':' + s + str(self.len) 184 185 def str_extract(self): 186 if self.sign: 187 extr = 'sextract32' 188 else: 189 extr = 'extract32' 190 return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len) 191 192 def __eq__(self, other): 193 return self.sign == other.sign and self.mask == other.mask 194 195 def __ne__(self, other): 196 return not self.__eq__(other) 197# end Field 198 199 200class MultiField: 201 """Class representing a compound instruction field""" 202 def __init__(self, subs, mask): 203 self.subs = subs 204 self.sign = subs[0].sign 205 self.mask = mask 206 207 def __str__(self): 208 return str(self.subs) 209 210 def str_extract(self): 211 ret = '0' 212 pos = 0 213 for f in reversed(self.subs): 214 if pos == 0: 215 ret = f.str_extract() 216 else: 217 ret = 'deposit32({0}, {1}, {2}, {3})' \ 218 .format(ret, pos, 32 - pos, f.str_extract()) 219 pos += f.len 220 return ret 221 222 def __ne__(self, other): 223 if len(self.subs) != len(other.subs): 224 return True 225 for a, b in zip(self.subs, other.subs): 226 if a.__class__ != b.__class__ or a != b: 227 return True 228 return False 229 230 def __eq__(self, other): 231 return not self.__ne__(other) 232# end MultiField 233 234 235class ConstField: 236 """Class representing an argument field with constant value""" 237 def __init__(self, value): 238 self.value = value 239 self.mask = 0 240 self.sign = value < 0 241 242 def __str__(self): 243 return str(self.value) 244 245 def str_extract(self): 246 return str(self.value) 247 248 def __cmp__(self, other): 249 return self.value - other.value 250# end ConstField 251 252 253class FunctionField: 254 """Class representing a field passed through a function""" 255 def __init__(self, func, base): 256 self.mask = base.mask 257 self.sign = base.sign 258 self.base = base 259 self.func = func 260 261 def __str__(self): 262 return self.func + '(' + str(self.base) + ')' 263 264 def str_extract(self): 265 return self.func + '(ctx, ' + self.base.str_extract() + ')' 266 267 def __eq__(self, other): 268 return self.func == other.func and self.base == other.base 269 270 def __ne__(self, other): 271 return not self.__eq__(other) 272# end FunctionField 273 274 275class ParameterField: 276 """Class representing a pseudo-field read from a function""" 277 def __init__(self, func): 278 self.mask = 0 279 self.sign = 0 280 self.func = func 281 282 def __str__(self): 283 return self.func 284 285 def str_extract(self): 286 return self.func + '(ctx)' 287 288 def __eq__(self, other): 289 return self.func == other.func 290 291 def __ne__(self, other): 292 return not self.__eq__(other) 293# end ParameterField 294 295 296class Arguments: 297 """Class representing the extracted fields of a format""" 298 def __init__(self, nm, flds, extern): 299 self.name = nm 300 self.extern = extern 301 self.fields = sorted(flds) 302 303 def __str__(self): 304 return self.name + ' ' + str(self.fields) 305 306 def struct_name(self): 307 return 'arg_' + self.name 308 309 def output_def(self): 310 if not self.extern: 311 output('typedef struct {\n') 312 for n in self.fields: 313 output(' int ', n, ';\n') 314 output('} ', self.struct_name(), ';\n\n') 315# end Arguments 316 317 318class General: 319 """Common code between instruction formats and instruction patterns""" 320 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): 321 self.name = name 322 self.file = input_file 323 self.lineno = lineno 324 self.base = base 325 self.fixedbits = fixb 326 self.fixedmask = fixm 327 self.undefmask = udfm 328 self.fieldmask = fldm 329 self.fields = flds 330 self.width = w 331 332 def __str__(self): 333 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) 334 335 def str1(self, i): 336 return str_indent(i) + self.__str__() 337# end General 338 339 340class Format(General): 341 """Class representing an instruction format""" 342 343 def extract_name(self): 344 global decode_function 345 return decode_function + '_extract_' + self.name 346 347 def output_extract(self): 348 output('static void ', self.extract_name(), '(DisasContext *ctx, ', 349 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') 350 for n, f in self.fields.items(): 351 output(' a->', n, ' = ', f.str_extract(), ';\n') 352 output('}\n\n') 353# end Format 354 355 356class Pattern(General): 357 """Class representing an instruction pattern""" 358 359 def output_decl(self): 360 global translate_scope 361 global translate_prefix 362 output('typedef ', self.base.base.struct_name(), 363 ' arg_', self.name, ';\n') 364 output(translate_scope, 'bool ', translate_prefix, '_', self.name, 365 '(DisasContext *ctx, arg_', self.name, ' *a);\n') 366 367 def output_code(self, i, extracted, outerbits, outermask): 368 global translate_prefix 369 ind = str_indent(i) 370 arg = self.base.base.name 371 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') 372 if not extracted: 373 output(ind, self.base.extract_name(), 374 '(ctx, &u.f_', arg, ', insn);\n') 375 for n, f in self.fields.items(): 376 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') 377 output(ind, 'if (', translate_prefix, '_', self.name, 378 '(ctx, &u.f_', arg, ')) return true;\n') 379 380 # Normal patterns do not have children. 381 def build_tree(self): 382 return 383 def prop_masks(self): 384 return 385 def prop_format(self): 386 return 387 def prop_width(self): 388 return 389 390# end Pattern 391 392 393class MultiPattern(General): 394 """Class representing a set of instruction patterns""" 395 396 def __init__(self, lineno): 397 self.file = input_file 398 self.lineno = lineno 399 self.pats = [] 400 self.base = None 401 self.fixedbits = 0 402 self.fixedmask = 0 403 self.undefmask = 0 404 self.width = None 405 406 def __str__(self): 407 r = 'group' 408 if self.fixedbits is not None: 409 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask) 410 return r 411 412 def output_decl(self): 413 for p in self.pats: 414 p.output_decl() 415 416 def prop_masks(self): 417 global insnmask 418 419 fixedmask = insnmask 420 undefmask = insnmask 421 422 # Collect fixedmask/undefmask for all of the children. 423 for p in self.pats: 424 p.prop_masks() 425 fixedmask &= p.fixedmask 426 undefmask &= p.undefmask 427 428 # Widen fixedmask until all fixedbits match 429 repeat = True 430 fixedbits = 0 431 while repeat and fixedmask != 0: 432 fixedbits = None 433 for p in self.pats: 434 thisbits = p.fixedbits & fixedmask 435 if fixedbits is None: 436 fixedbits = thisbits 437 elif fixedbits != thisbits: 438 fixedmask &= ~(fixedbits ^ thisbits) 439 break 440 else: 441 repeat = False 442 443 self.fixedbits = fixedbits 444 self.fixedmask = fixedmask 445 self.undefmask = undefmask 446 447 def build_tree(self): 448 for p in self.pats: 449 p.build_tree() 450 451 def prop_format(self): 452 for p in self.pats: 453 p.build_tree() 454 455 def prop_width(self): 456 width = None 457 for p in self.pats: 458 p.prop_width() 459 if width is None: 460 width = p.width 461 elif width != p.width: 462 error_with_file(self.file, self.lineno, 463 'width mismatch in patterns within braces') 464 self.width = width 465 466# end MultiPattern 467 468 469class IncMultiPattern(MultiPattern): 470 """Class representing an overlapping set of instruction patterns""" 471 472 def output_code(self, i, extracted, outerbits, outermask): 473 global translate_prefix 474 ind = str_indent(i) 475 for p in self.pats: 476 if outermask != p.fixedmask: 477 innermask = p.fixedmask & ~outermask 478 innerbits = p.fixedbits & ~outermask 479 output(ind, 'if ((insn & ', 480 '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits), 481 ') {\n') 482 output(ind, ' /* ', 483 str_match_bits(p.fixedbits, p.fixedmask), ' */\n') 484 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) 485 output(ind, '}\n') 486 else: 487 p.output_code(i, extracted, p.fixedbits, p.fixedmask) 488#end IncMultiPattern 489 490 491class Tree: 492 """Class representing a node in a decode tree""" 493 494 def __init__(self, fm, tm): 495 self.fixedmask = fm 496 self.thismask = tm 497 self.subs = [] 498 self.base = None 499 500 def str1(self, i): 501 ind = str_indent(i) 502 r = '{0}{1:08x}'.format(ind, self.fixedmask) 503 if self.format: 504 r += ' ' + self.format.name 505 r += ' [\n' 506 for (b, s) in self.subs: 507 r += '{0} {1:08x}:\n'.format(ind, b) 508 r += s.str1(i + 4) + '\n' 509 r += ind + ']' 510 return r 511 512 def __str__(self): 513 return self.str1(0) 514 515 def output_code(self, i, extracted, outerbits, outermask): 516 ind = str_indent(i) 517 518 # If we identified all nodes below have the same format, 519 # extract the fields now. 520 if not extracted and self.base: 521 output(ind, self.base.extract_name(), 522 '(ctx, &u.f_', self.base.base.name, ', insn);\n') 523 extracted = True 524 525 # Attempt to aid the compiler in producing compact switch statements. 526 # If the bits in the mask are contiguous, extract them. 527 sh = is_contiguous(self.thismask) 528 if sh > 0: 529 # Propagate SH down into the local functions. 530 def str_switch(b, sh=sh): 531 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh) 532 533 def str_case(b, sh=sh): 534 return '0x{0:x}'.format(b >> sh) 535 else: 536 def str_switch(b): 537 return 'insn & 0x{0:08x}'.format(b) 538 539 def str_case(b): 540 return '0x{0:08x}'.format(b) 541 542 output(ind, 'switch (', str_switch(self.thismask), ') {\n') 543 for b, s in sorted(self.subs): 544 assert (self.thismask & ~s.fixedmask) == 0 545 innermask = outermask | self.thismask 546 innerbits = outerbits | b 547 output(ind, 'case ', str_case(b), ':\n') 548 output(ind, ' /* ', 549 str_match_bits(innerbits, innermask), ' */\n') 550 s.output_code(i + 4, extracted, innerbits, innermask) 551 output(ind, ' return false;\n') 552 output(ind, '}\n') 553# end Tree 554 555 556class ExcMultiPattern(MultiPattern): 557 """Class representing a non-overlapping set of instruction patterns""" 558 559 def output_code(self, i, extracted, outerbits, outermask): 560 # Defer everything to our decomposed Tree node 561 self.tree.output_code(i, extracted, outerbits, outermask) 562 563 @staticmethod 564 def __build_tree(pats, outerbits, outermask): 565 # Find the intersection of all remaining fixedmask. 566 innermask = ~outermask & insnmask 567 for i in pats: 568 innermask &= i.fixedmask 569 570 if innermask == 0: 571 # Edge condition: One pattern covers the entire insnmask 572 if len(pats) == 1: 573 t = Tree(outermask, innermask) 574 t.subs.append((0, pats[0])) 575 return t 576 577 text = 'overlapping patterns:' 578 for p in pats: 579 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p) 580 error_with_file(pats[0].file, pats[0].lineno, text) 581 582 fullmask = outermask | innermask 583 584 # Sort each element of pats into the bin selected by the mask. 585 bins = {} 586 for i in pats: 587 fb = i.fixedbits & innermask 588 if fb in bins: 589 bins[fb].append(i) 590 else: 591 bins[fb] = [i] 592 593 # We must recurse if any bin has more than one element or if 594 # the single element in the bin has not been fully matched. 595 t = Tree(fullmask, innermask) 596 597 for b, l in bins.items(): 598 s = l[0] 599 if len(l) > 1 or s.fixedmask & ~fullmask != 0: 600 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask) 601 t.subs.append((b, s)) 602 603 return t 604 605 def build_tree(self): 606 super().prop_format() 607 self.tree = self.__build_tree(self.pats, self.fixedbits, 608 self.fixedmask) 609 610 @staticmethod 611 def __prop_format(tree): 612 """Propagate Format objects into the decode tree""" 613 614 # Depth first search. 615 for (b, s) in tree.subs: 616 if isinstance(s, Tree): 617 ExcMultiPattern.__prop_format(s) 618 619 # If all entries in SUBS have the same format, then 620 # propagate that into the tree. 621 f = None 622 for (b, s) in tree.subs: 623 if f is None: 624 f = s.base 625 if f is None: 626 return 627 if f is not s.base: 628 return 629 tree.base = f 630 631 def prop_format(self): 632 super().prop_format() 633 self.__prop_format(self.tree) 634 635# end ExcMultiPattern 636 637 638def parse_field(lineno, name, toks): 639 """Parse one instruction field from TOKS at LINENO""" 640 global fields 641 global insnwidth 642 643 # A "simple" field will have only one entry; 644 # a "multifield" will have several. 645 subs = [] 646 width = 0 647 func = None 648 for t in toks: 649 if re.match('^!function=', t): 650 if func: 651 error(lineno, 'duplicate function') 652 func = t.split('=') 653 func = func[1] 654 continue 655 656 if re.fullmatch('[0-9]+:s[0-9]+', t): 657 # Signed field extract 658 subtoks = t.split(':s') 659 sign = True 660 elif re.fullmatch('[0-9]+:[0-9]+', t): 661 # Unsigned field extract 662 subtoks = t.split(':') 663 sign = False 664 else: 665 error(lineno, 'invalid field token "{0}"'.format(t)) 666 po = int(subtoks[0]) 667 le = int(subtoks[1]) 668 if po + le > insnwidth: 669 error(lineno, 'field {0} too large'.format(t)) 670 f = Field(sign, po, le) 671 subs.append(f) 672 width += le 673 674 if width > insnwidth: 675 error(lineno, 'field too large') 676 if len(subs) == 0: 677 if func: 678 f = ParameterField(func) 679 else: 680 error(lineno, 'field with no value') 681 else: 682 if len(subs) == 1: 683 f = subs[0] 684 else: 685 mask = 0 686 for s in subs: 687 if mask & s.mask: 688 error(lineno, 'field components overlap') 689 mask |= s.mask 690 f = MultiField(subs, mask) 691 if func: 692 f = FunctionField(func, f) 693 694 if name in fields: 695 error(lineno, 'duplicate field', name) 696 fields[name] = f 697# end parse_field 698 699 700def parse_arguments(lineno, name, toks): 701 """Parse one argument set from TOKS at LINENO""" 702 global arguments 703 global re_C_ident 704 global anyextern 705 706 flds = [] 707 extern = False 708 for t in toks: 709 if re.fullmatch('!extern', t): 710 extern = True 711 anyextern = True 712 continue 713 if not re.fullmatch(re_C_ident, t): 714 error(lineno, 'invalid argument set token "{0}"'.format(t)) 715 if t in flds: 716 error(lineno, 'duplicate argument "{0}"'.format(t)) 717 flds.append(t) 718 719 if name in arguments: 720 error(lineno, 'duplicate argument set', name) 721 arguments[name] = Arguments(name, flds, extern) 722# end parse_arguments 723 724 725def lookup_field(lineno, name): 726 global fields 727 if name in fields: 728 return fields[name] 729 error(lineno, 'undefined field', name) 730 731 732def add_field(lineno, flds, new_name, f): 733 if new_name in flds: 734 error(lineno, 'duplicate field', new_name) 735 flds[new_name] = f 736 return flds 737 738 739def add_field_byname(lineno, flds, new_name, old_name): 740 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name)) 741 742 743def infer_argument_set(flds): 744 global arguments 745 global decode_function 746 747 for arg in arguments.values(): 748 if eq_fields_for_args(flds, arg.fields): 749 return arg 750 751 name = decode_function + str(len(arguments)) 752 arg = Arguments(name, flds.keys(), False) 753 arguments[name] = arg 754 return arg 755 756 757def infer_format(arg, fieldmask, flds, width): 758 global arguments 759 global formats 760 global decode_function 761 762 const_flds = {} 763 var_flds = {} 764 for n, c in flds.items(): 765 if c is ConstField: 766 const_flds[n] = c 767 else: 768 var_flds[n] = c 769 770 # Look for an existing format with the same argument set and fields 771 for fmt in formats.values(): 772 if arg and fmt.base != arg: 773 continue 774 if fieldmask != fmt.fieldmask: 775 continue 776 if width != fmt.width: 777 continue 778 if not eq_fields_for_fmts(flds, fmt.fields): 779 continue 780 return (fmt, const_flds) 781 782 name = decode_function + '_Fmt_' + str(len(formats)) 783 if not arg: 784 arg = infer_argument_set(flds) 785 786 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width) 787 formats[name] = fmt 788 789 return (fmt, const_flds) 790# end infer_format 791 792 793def parse_generic(lineno, parent_pat, name, toks): 794 """Parse one instruction format from TOKS at LINENO""" 795 global fields 796 global arguments 797 global formats 798 global allpatterns 799 global re_arg_ident 800 global re_fld_ident 801 global re_fmt_ident 802 global re_C_ident 803 global insnwidth 804 global insnmask 805 global variablewidth 806 807 is_format = parent_pat is None 808 809 fixedmask = 0 810 fixedbits = 0 811 undefmask = 0 812 width = 0 813 flds = {} 814 arg = None 815 fmt = None 816 for t in toks: 817 # '&Foo' gives a format an explcit argument set. 818 if re.fullmatch(re_arg_ident, t): 819 tt = t[1:] 820 if arg: 821 error(lineno, 'multiple argument sets') 822 if tt in arguments: 823 arg = arguments[tt] 824 else: 825 error(lineno, 'undefined argument set', t) 826 continue 827 828 # '@Foo' gives a pattern an explicit format. 829 if re.fullmatch(re_fmt_ident, t): 830 tt = t[1:] 831 if fmt: 832 error(lineno, 'multiple formats') 833 if tt in formats: 834 fmt = formats[tt] 835 else: 836 error(lineno, 'undefined format', t) 837 continue 838 839 # '%Foo' imports a field. 840 if re.fullmatch(re_fld_ident, t): 841 tt = t[1:] 842 flds = add_field_byname(lineno, flds, tt, tt) 843 continue 844 845 # 'Foo=%Bar' imports a field with a different name. 846 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t): 847 (fname, iname) = t.split('=%') 848 flds = add_field_byname(lineno, flds, fname, iname) 849 continue 850 851 # 'Foo=number' sets an argument field to a constant value 852 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t): 853 (fname, value) = t.split('=') 854 value = int(value) 855 flds = add_field(lineno, flds, fname, ConstField(value)) 856 continue 857 858 # Pattern of 0s, 1s, dots and dashes indicate required zeros, 859 # required ones, or dont-cares. 860 if re.fullmatch('[01.-]+', t): 861 shift = len(t) 862 fms = t.replace('0', '1') 863 fms = fms.replace('.', '0') 864 fms = fms.replace('-', '0') 865 fbs = t.replace('.', '0') 866 fbs = fbs.replace('-', '0') 867 ubm = t.replace('1', '0') 868 ubm = ubm.replace('.', '0') 869 ubm = ubm.replace('-', '1') 870 fms = int(fms, 2) 871 fbs = int(fbs, 2) 872 ubm = int(ubm, 2) 873 fixedbits = (fixedbits << shift) | fbs 874 fixedmask = (fixedmask << shift) | fms 875 undefmask = (undefmask << shift) | ubm 876 # Otherwise, fieldname:fieldwidth 877 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t): 878 (fname, flen) = t.split(':') 879 sign = False 880 if flen[0] == 's': 881 sign = True 882 flen = flen[1:] 883 shift = int(flen, 10) 884 if shift + width > insnwidth: 885 error(lineno, 'field {0} exceeds insnwidth'.format(fname)) 886 f = Field(sign, insnwidth - width - shift, shift) 887 flds = add_field(lineno, flds, fname, f) 888 fixedbits <<= shift 889 fixedmask <<= shift 890 undefmask <<= shift 891 else: 892 error(lineno, 'invalid token "{0}"'.format(t)) 893 width += shift 894 895 if variablewidth and width < insnwidth and width % 8 == 0: 896 shift = insnwidth - width 897 fixedbits <<= shift 898 fixedmask <<= shift 899 undefmask <<= shift 900 undefmask |= (1 << shift) - 1 901 902 # We should have filled in all of the bits of the instruction. 903 elif not (is_format and width == 0) and width != insnwidth: 904 error(lineno, 'definition has {0} bits'.format(width)) 905 906 # Do not check for fields overlaping fields; one valid usage 907 # is to be able to duplicate fields via import. 908 fieldmask = 0 909 for f in flds.values(): 910 fieldmask |= f.mask 911 912 # Fix up what we've parsed to match either a format or a pattern. 913 if is_format: 914 # Formats cannot reference formats. 915 if fmt: 916 error(lineno, 'format referencing format') 917 # If an argument set is given, then there should be no fields 918 # without a place to store it. 919 if arg: 920 for f in flds.keys(): 921 if f not in arg.fields: 922 error(lineno, 'field {0} not in argument set {1}' 923 .format(f, arg.name)) 924 else: 925 arg = infer_argument_set(flds) 926 if name in formats: 927 error(lineno, 'duplicate format name', name) 928 fmt = Format(name, lineno, arg, fixedbits, fixedmask, 929 undefmask, fieldmask, flds, width) 930 formats[name] = fmt 931 else: 932 # Patterns can reference a format ... 933 if fmt: 934 # ... but not an argument simultaneously 935 if arg: 936 error(lineno, 'pattern specifies both format and argument set') 937 if fixedmask & fmt.fixedmask: 938 error(lineno, 'pattern fixed bits overlap format fixed bits') 939 if width != fmt.width: 940 error(lineno, 'pattern uses format of different width') 941 fieldmask |= fmt.fieldmask 942 fixedbits |= fmt.fixedbits 943 fixedmask |= fmt.fixedmask 944 undefmask |= fmt.undefmask 945 else: 946 (fmt, flds) = infer_format(arg, fieldmask, flds, width) 947 arg = fmt.base 948 for f in flds.keys(): 949 if f not in arg.fields: 950 error(lineno, 'field {0} not in argument set {1}' 951 .format(f, arg.name)) 952 if f in fmt.fields.keys(): 953 error(lineno, 'field {0} set by format and pattern'.format(f)) 954 for f in arg.fields: 955 if f not in flds.keys() and f not in fmt.fields.keys(): 956 error(lineno, 'field {0} not initialized'.format(f)) 957 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, 958 undefmask, fieldmask, flds, width) 959 parent_pat.pats.append(pat) 960 allpatterns.append(pat) 961 962 # Validate the masks that we have assembled. 963 if fieldmask & fixedmask: 964 error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})' 965 .format(fieldmask, fixedmask)) 966 if fieldmask & undefmask: 967 error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' 968 .format(fieldmask, undefmask)) 969 if fixedmask & undefmask: 970 error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' 971 .format(fixedmask, undefmask)) 972 if not is_format: 973 allbits = fieldmask | fixedmask | undefmask 974 if allbits != insnmask: 975 error(lineno, 'bits left unspecified (0x{0:08x})' 976 .format(allbits ^ insnmask)) 977# end parse_general 978 979 980def parse_file(f, parent_pat): 981 """Parse all of the patterns within a file""" 982 global re_arg_ident 983 global re_fld_ident 984 global re_fmt_ident 985 global re_pat_ident 986 987 # Read all of the lines of the file. Concatenate lines 988 # ending in backslash; discard empty lines and comments. 989 toks = [] 990 lineno = 0 991 nesting = 0 992 nesting_pats = [] 993 994 for line in f: 995 lineno += 1 996 997 # Expand and strip spaces, to find indent. 998 line = line.rstrip() 999 line = line.expandtabs() 1000 len1 = len(line) 1001 line = line.lstrip() 1002 len2 = len(line) 1003 1004 # Discard comments 1005 end = line.find('#') 1006 if end >= 0: 1007 line = line[:end] 1008 1009 t = line.split() 1010 if len(toks) != 0: 1011 # Next line after continuation 1012 toks.extend(t) 1013 else: 1014 # Allow completely blank lines. 1015 if len1 == 0: 1016 continue 1017 indent = len1 - len2 1018 # Empty line due to comment. 1019 if len(t) == 0: 1020 # Indentation must be correct, even for comment lines. 1021 if indent != nesting: 1022 error(lineno, 'indentation ', indent, ' != ', nesting) 1023 continue 1024 start_lineno = lineno 1025 toks = t 1026 1027 # Continuation? 1028 if toks[-1] == '\\': 1029 toks.pop() 1030 continue 1031 1032 name = toks[0] 1033 del toks[0] 1034 1035 # End nesting? 1036 if name == '}' or name == ']': 1037 if len(toks) != 0: 1038 error(start_lineno, 'extra tokens after close brace') 1039 1040 # Make sure { } and [ ] nest properly. 1041 if (name == '}') != isinstance(parent_pat, IncMultiPattern): 1042 error(lineno, 'mismatched close brace') 1043 1044 try: 1045 parent_pat = nesting_pats.pop() 1046 except: 1047 error(lineno, 'extra close brace') 1048 1049 nesting -= 2 1050 if indent != nesting: 1051 error(lineno, 'indentation ', indent, ' != ', nesting) 1052 1053 toks = [] 1054 continue 1055 1056 # Everything else should have current indentation. 1057 if indent != nesting: 1058 error(start_lineno, 'indentation ', indent, ' != ', nesting) 1059 1060 # Start nesting? 1061 if name == '{' or name == '[': 1062 if len(toks) != 0: 1063 error(start_lineno, 'extra tokens after open brace') 1064 1065 if name == '{': 1066 nested_pat = IncMultiPattern(start_lineno) 1067 else: 1068 nested_pat = ExcMultiPattern(start_lineno) 1069 parent_pat.pats.append(nested_pat) 1070 nesting_pats.append(parent_pat) 1071 parent_pat = nested_pat 1072 1073 nesting += 2 1074 toks = [] 1075 continue 1076 1077 # Determine the type of object needing to be parsed. 1078 if re.fullmatch(re_fld_ident, name): 1079 parse_field(start_lineno, name[1:], toks) 1080 elif re.fullmatch(re_arg_ident, name): 1081 parse_arguments(start_lineno, name[1:], toks) 1082 elif re.fullmatch(re_fmt_ident, name): 1083 parse_generic(start_lineno, None, name[1:], toks) 1084 elif re.fullmatch(re_pat_ident, name): 1085 parse_generic(start_lineno, parent_pat, name, toks) 1086 else: 1087 error(lineno, 'invalid token "{0}"'.format(name)) 1088 toks = [] 1089 1090 if nesting != 0: 1091 error(lineno, 'missing close brace') 1092# end parse_file 1093 1094 1095class SizeTree: 1096 """Class representing a node in a size decode tree""" 1097 1098 def __init__(self, m, w): 1099 self.mask = m 1100 self.subs = [] 1101 self.base = None 1102 self.width = w 1103 1104 def str1(self, i): 1105 ind = str_indent(i) 1106 r = '{0}{1:08x}'.format(ind, self.mask) 1107 r += ' [\n' 1108 for (b, s) in self.subs: 1109 r += '{0} {1:08x}:\n'.format(ind, b) 1110 r += s.str1(i + 4) + '\n' 1111 r += ind + ']' 1112 return r 1113 1114 def __str__(self): 1115 return self.str1(0) 1116 1117 def output_code(self, i, extracted, outerbits, outermask): 1118 ind = str_indent(i) 1119 1120 # If we need to load more bytes to test, do so now. 1121 if extracted < self.width: 1122 output(ind, 'insn = ', decode_function, 1123 '_load_bytes(ctx, insn, {0}, {1});\n' 1124 .format(extracted // 8, self.width // 8)); 1125 extracted = self.width 1126 1127 # Attempt to aid the compiler in producing compact switch statements. 1128 # If the bits in the mask are contiguous, extract them. 1129 sh = is_contiguous(self.mask) 1130 if sh > 0: 1131 # Propagate SH down into the local functions. 1132 def str_switch(b, sh=sh): 1133 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh) 1134 1135 def str_case(b, sh=sh): 1136 return '0x{0:x}'.format(b >> sh) 1137 else: 1138 def str_switch(b): 1139 return 'insn & 0x{0:08x}'.format(b) 1140 1141 def str_case(b): 1142 return '0x{0:08x}'.format(b) 1143 1144 output(ind, 'switch (', str_switch(self.mask), ') {\n') 1145 for b, s in sorted(self.subs): 1146 innermask = outermask | self.mask 1147 innerbits = outerbits | b 1148 output(ind, 'case ', str_case(b), ':\n') 1149 output(ind, ' /* ', 1150 str_match_bits(innerbits, innermask), ' */\n') 1151 s.output_code(i + 4, extracted, innerbits, innermask) 1152 output(ind, '}\n') 1153 output(ind, 'return insn;\n') 1154# end SizeTree 1155 1156class SizeLeaf: 1157 """Class representing a leaf node in a size decode tree""" 1158 1159 def __init__(self, m, w): 1160 self.mask = m 1161 self.width = w 1162 1163 def str1(self, i): 1164 ind = str_indent(i) 1165 return '{0}{1:08x}'.format(ind, self.mask) 1166 1167 def __str__(self): 1168 return self.str1(0) 1169 1170 def output_code(self, i, extracted, outerbits, outermask): 1171 global decode_function 1172 ind = str_indent(i) 1173 1174 # If we need to load more bytes, do so now. 1175 if extracted < self.width: 1176 output(ind, 'insn = ', decode_function, 1177 '_load_bytes(ctx, insn, {0}, {1});\n' 1178 .format(extracted // 8, self.width // 8)); 1179 extracted = self.width 1180 output(ind, 'return insn;\n') 1181# end SizeLeaf 1182 1183 1184def build_size_tree(pats, width, outerbits, outermask): 1185 global insnwidth 1186 1187 # Collect the mask of bits that are fixed in this width 1188 innermask = 0xff << (insnwidth - width) 1189 innermask &= ~outermask 1190 minwidth = None 1191 onewidth = True 1192 for i in pats: 1193 innermask &= i.fixedmask 1194 if minwidth is None: 1195 minwidth = i.width 1196 elif minwidth != i.width: 1197 onewidth = False; 1198 if minwidth < i.width: 1199 minwidth = i.width 1200 1201 if onewidth: 1202 return SizeLeaf(innermask, minwidth) 1203 1204 if innermask == 0: 1205 if width < minwidth: 1206 return build_size_tree(pats, width + 8, outerbits, outermask) 1207 1208 pnames = [] 1209 for p in pats: 1210 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) 1211 error_with_file(pats[0].file, pats[0].lineno, 1212 'overlapping patterns size {0}:'.format(width), pnames) 1213 1214 bins = {} 1215 for i in pats: 1216 fb = i.fixedbits & innermask 1217 if fb in bins: 1218 bins[fb].append(i) 1219 else: 1220 bins[fb] = [i] 1221 1222 fullmask = outermask | innermask 1223 lens = sorted(bins.keys()) 1224 if len(lens) == 1: 1225 b = lens[0] 1226 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask) 1227 1228 r = SizeTree(innermask, width) 1229 for b, l in bins.items(): 1230 s = build_size_tree(l, width, b | outerbits, fullmask) 1231 r.subs.append((b, s)) 1232 return r 1233# end build_size_tree 1234 1235 1236def prop_size(tree): 1237 """Propagate minimum widths up the decode size tree""" 1238 1239 if isinstance(tree, SizeTree): 1240 min = None 1241 for (b, s) in tree.subs: 1242 width = prop_size(s) 1243 if min is None or min > width: 1244 min = width 1245 assert min >= tree.width 1246 tree.width = min 1247 else: 1248 min = tree.width 1249 return min 1250# end prop_size 1251 1252 1253def main(): 1254 global arguments 1255 global formats 1256 global allpatterns 1257 global translate_scope 1258 global translate_prefix 1259 global output_fd 1260 global output_file 1261 global input_file 1262 global insnwidth 1263 global insntype 1264 global insnmask 1265 global decode_function 1266 global variablewidth 1267 global anyextern 1268 1269 decode_scope = 'static ' 1270 1271 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', 1272 'static-decode=', 'varinsnwidth='] 1273 try: 1274 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts) 1275 except getopt.GetoptError as err: 1276 error(0, err) 1277 for o, a in opts: 1278 if o in ('-o', '--output'): 1279 output_file = a 1280 elif o == '--decode': 1281 decode_function = a 1282 decode_scope = '' 1283 elif o == '--static-decode': 1284 decode_function = a 1285 elif o == '--translate': 1286 translate_prefix = a 1287 translate_scope = '' 1288 elif o in ('-w', '--insnwidth', '--varinsnwidth'): 1289 if o == '--varinsnwidth': 1290 variablewidth = True 1291 insnwidth = int(a) 1292 if insnwidth == 16: 1293 insntype = 'uint16_t' 1294 insnmask = 0xffff 1295 elif insnwidth != 32: 1296 error(0, 'cannot handle insns of width', insnwidth) 1297 else: 1298 assert False, 'unhandled option' 1299 1300 if len(args) < 1: 1301 error(0, 'missing input file') 1302 1303 toppat = ExcMultiPattern(0) 1304 1305 for filename in args: 1306 input_file = filename 1307 f = open(filename, 'r') 1308 parse_file(f, toppat) 1309 f.close() 1310 1311 # We do not want to compute masks for toppat, because those masks 1312 # are used as a starting point for build_tree. For toppat, we must 1313 # insist that decode begins from naught. 1314 for i in toppat.pats: 1315 i.prop_masks() 1316 1317 toppat.build_tree() 1318 toppat.prop_format() 1319 1320 if variablewidth: 1321 for i in toppat.pats: 1322 i.prop_width() 1323 stree = build_size_tree(toppat.pats, 8, 0, 0) 1324 prop_size(stree) 1325 1326 if output_file: 1327 output_fd = open(output_file, 'w') 1328 else: 1329 output_fd = sys.stdout 1330 1331 output_autogen() 1332 for n in sorted(arguments.keys()): 1333 f = arguments[n] 1334 f.output_def() 1335 1336 # A single translate function can be invoked for different patterns. 1337 # Make sure that the argument sets are the same, and declare the 1338 # function only once. 1339 # 1340 # If we're sharing formats, we're likely also sharing trans_* functions, 1341 # but we can't tell which ones. Prevent issues from the compiler by 1342 # suppressing redundant declaration warnings. 1343 if anyextern: 1344 output("#pragma GCC diagnostic push\n", 1345 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n", 1346 "#ifdef __clang__\n" 1347 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n", 1348 "#endif\n\n") 1349 1350 out_pats = {} 1351 for i in allpatterns: 1352 if i.name in out_pats: 1353 p = out_pats[i.name] 1354 if i.base.base != p.base.base: 1355 error(0, i.name, ' has conflicting argument sets') 1356 else: 1357 i.output_decl() 1358 out_pats[i.name] = i 1359 output('\n') 1360 1361 if anyextern: 1362 output("#pragma GCC diagnostic pop\n\n") 1363 1364 for n in sorted(formats.keys()): 1365 f = formats[n] 1366 f.output_extract() 1367 1368 output(decode_scope, 'bool ', decode_function, 1369 '(DisasContext *ctx, ', insntype, ' insn)\n{\n') 1370 1371 i4 = str_indent(4) 1372 1373 if len(allpatterns) != 0: 1374 output(i4, 'union {\n') 1375 for n in sorted(arguments.keys()): 1376 f = arguments[n] 1377 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') 1378 output(i4, '} u;\n\n') 1379 toppat.output_code(4, False, 0, 0) 1380 1381 output(i4, 'return false;\n') 1382 output('}\n') 1383 1384 if variablewidth: 1385 output('\n', decode_scope, insntype, ' ', decode_function, 1386 '_load(DisasContext *ctx)\n{\n', 1387 ' ', insntype, ' insn = 0;\n\n') 1388 stree.output_code(4, 0, 0, 0) 1389 output('}\n') 1390 1391 if output_file: 1392 output_fd.close() 1393# end main 1394 1395 1396if __name__ == '__main__': 1397 main() 1398