1#!/usr/bin/env python 2# Copyright (c) 2018 Linaro Limited 3# 4# This library is free software; you can redistribute it and/or 5# modify it under the terms of the GNU Lesser General Public 6# License as published by the Free Software Foundation; either 7# version 2 of the License, or (at your option) any later version. 8# 9# This library is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12# Lesser General Public License for more details. 13# 14# You should have received a copy of the GNU Lesser General Public 15# License along with this library; if not, see <http://www.gnu.org/licenses/>. 16# 17 18# 19# Generate a decoding tree from a specification file. 20# 21# The tree is built from instruction "patterns". A pattern may represent 22# a single architectural instruction or a group of same, depending on what 23# is convenient for further processing. 24# 25# Each pattern has "fixedbits" & "fixedmask", the combination of which 26# describes the condition under which the pattern is matched: 27# 28# (insn & fixedmask) == fixedbits 29# 30# Each pattern may have "fields", which are extracted from the insn and 31# passed along to the translator. Examples of such are registers, 32# immediates, and sub-opcodes. 33# 34# In support of patterns, one may declare fields, argument sets, and 35# formats, each of which may be re-used to simplify further definitions. 36# 37# *** Field syntax: 38# 39# field_def := '%' identifier ( unnamed_field )+ ( !function=identifier )? 40# unnamed_field := number ':' ( 's' ) number 41# 42# For unnamed_field, the first number is the least-significant bit position of 43# the field and the second number is the length of the field. If the 's' is 44# present, the field is considered signed. If multiple unnamed_fields are 45# present, they are concatenated. In this way one can define disjoint fields. 46# 47# If !function is specified, the concatenated result is passed through the 48# named function, taking and returning an integral value. 49# 50# FIXME: the fields of the structure into which this result will be stored 51# is restricted to "int". Which means that we cannot expand 64-bit items. 52# 53# Field examples: 54# 55# %disp 0:s16 -- sextract(i, 0, 16) 56# %imm9 16:6 10:3 -- extract(i, 16, 6) << 3 | extract(i, 10, 3) 57# %disp12 0:s1 1:1 2:10 -- sextract(i, 0, 1) << 11 58# | extract(i, 1, 1) << 10 59# | extract(i, 2, 10) 60# %shimm8 5:s8 13:1 !function=expand_shimm8 61# -- expand_shimm8(sextract(i, 5, 8) << 1 62# | extract(i, 13, 1)) 63# 64# *** Argument set syntax: 65# 66# args_def := '&' identifier ( args_elt )+ ( !extern )? 67# args_elt := identifier 68# 69# Each args_elt defines an argument within the argument set. 70# Each argument set will be rendered as a C structure "arg_$name" 71# with each of the fields being one of the member arguments. 72# 73# If !extern is specified, the backing structure is assumed to 74# have been already declared, typically via a second decoder. 75# 76# Argument set examples: 77# 78# ®3 ra rb rc 79# &loadstore reg base offset 80# 81# *** Format syntax: 82# 83# fmt_def := '@' identifier ( fmt_elt )+ 84# fmt_elt := fixedbit_elt | field_elt | field_ref | args_ref 85# fixedbit_elt := [01.-]+ 86# field_elt := identifier ':' 's'? number 87# field_ref := '%' identifier | identifier '=' '%' identifier 88# args_ref := '&' identifier 89# 90# Defining a format is a handy way to avoid replicating groups of fields 91# across many instruction patterns. 92# 93# A fixedbit_elt describes a contiguous sequence of bits that must 94# be 1, 0, [.-] for don't care. The difference between '.' and '-' 95# is that '.' means that the bit will be covered with a field or a 96# final [01] from the pattern, and '-' means that the bit is really 97# ignored by the cpu and will not be specified. 98# 99# A field_elt describes a simple field only given a width; the position of 100# the field is implied by its position with respect to other fixedbit_elt 101# and field_elt. 102# 103# If any fixedbit_elt or field_elt appear then all bits must be defined. 104# Padding with a fixedbit_elt of all '.' is an easy way to accomplish that. 105# 106# A field_ref incorporates a field by reference. This is the only way to 107# add a complex field to a format. A field may be renamed in the process 108# via assignment to another identifier. This is intended to allow the 109# same argument set be used with disjoint named fields. 110# 111# A single args_ref may specify an argument set to use for the format. 112# The set of fields in the format must be a subset of the arguments in 113# the argument set. If an argument set is not specified, one will be 114# inferred from the set of fields. 115# 116# It is recommended, but not required, that all field_ref and args_ref 117# appear at the end of the line, not interleaving with fixedbit_elf or 118# field_elt. 119# 120# Format examples: 121# 122# @opr ...... ra:5 rb:5 ... 0 ....... rc:5 123# @opi ...... ra:5 lit:8 1 ....... rc:5 124# 125# *** Pattern syntax: 126# 127# pat_def := identifier ( pat_elt )+ 128# pat_elt := fixedbit_elt | field_elt | field_ref 129# | args_ref | fmt_ref | const_elt 130# fmt_ref := '@' identifier 131# const_elt := identifier '=' number 132# 133# The fixedbit_elt and field_elt specifiers are unchanged from formats. 134# A pattern that does not specify a named format will have one inferred 135# from a referenced argument set (if present) and the set of fields. 136# 137# A const_elt allows a argument to be set to a constant value. This may 138# come in handy when fields overlap between patterns and one has to 139# include the values in the fixedbit_elt instead. 140# 141# The decoder will call a translator function for each pattern matched. 142# 143# Pattern examples: 144# 145# addl_r 010000 ..... ..... .... 0000000 ..... @opr 146# addl_i 010000 ..... ..... .... 0000000 ..... @opi 147# 148# which will, in part, invoke 149# 150# trans_addl_r(ctx, &arg_opr, insn) 151# and 152# trans_addl_i(ctx, &arg_opi, insn) 153# 154 155import os 156import re 157import sys 158import getopt 159 160insnwidth = 32 161insnmask = 0xffffffff 162fields = {} 163arguments = {} 164formats = {} 165patterns = [] 166 167translate_prefix = 'trans' 168translate_scope = 'static ' 169input_file = '' 170output_file = None 171output_fd = None 172insntype = 'uint32_t' 173decode_function = 'decode' 174 175re_ident = '[a-zA-Z][a-zA-Z0-9_]*' 176 177 178def error_with_file(file, lineno, *args): 179 """Print an error message from file:line and args and exit.""" 180 global output_file 181 global output_fd 182 183 if lineno: 184 r = '{0}:{1}: error:'.format(file, lineno) 185 elif input_file: 186 r = '{0}: error:'.format(file) 187 else: 188 r = 'error:' 189 for a in args: 190 r += ' ' + str(a) 191 r += '\n' 192 sys.stderr.write(r) 193 if output_file and output_fd: 194 output_fd.close() 195 os.remove(output_file) 196 exit(1) 197 198def error(lineno, *args): 199 error_with_file(input_file, lineno, args) 200 201def output(*args): 202 global output_fd 203 for a in args: 204 output_fd.write(a) 205 206 207if sys.version_info >= (3, 0): 208 re_fullmatch = re.fullmatch 209else: 210 def re_fullmatch(pat, str): 211 return re.match('^' + pat + '$', str) 212 213 214def output_autogen(): 215 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n') 216 217 218def str_indent(c): 219 """Return a string with C spaces""" 220 return ' ' * c 221 222 223def str_fields(fields): 224 """Return a string uniquely identifing FIELDS""" 225 r = '' 226 for n in sorted(fields.keys()): 227 r += '_' + n 228 return r[1:] 229 230 231def str_match_bits(bits, mask): 232 """Return a string pretty-printing BITS/MASK""" 233 global insnwidth 234 235 i = 1 << (insnwidth - 1) 236 space = 0x01010100 237 r = '' 238 while i != 0: 239 if i & mask: 240 if i & bits: 241 r += '1' 242 else: 243 r += '0' 244 else: 245 r += '.' 246 if i & space: 247 r += ' ' 248 i >>= 1 249 return r 250 251 252def is_pow2(x): 253 """Return true iff X is equal to a power of 2.""" 254 return (x & (x - 1)) == 0 255 256 257def ctz(x): 258 """Return the number of times 2 factors into X.""" 259 r = 0 260 while ((x >> r) & 1) == 0: 261 r += 1 262 return r 263 264 265def is_contiguous(bits): 266 shift = ctz(bits) 267 if is_pow2((bits >> shift) + 1): 268 return shift 269 else: 270 return -1 271 272 273def eq_fields_for_args(flds_a, flds_b): 274 if len(flds_a) != len(flds_b): 275 return False 276 for k, a in flds_a.items(): 277 if k not in flds_b: 278 return False 279 return True 280 281 282def eq_fields_for_fmts(flds_a, flds_b): 283 if len(flds_a) != len(flds_b): 284 return False 285 for k, a in flds_a.items(): 286 if k not in flds_b: 287 return False 288 b = flds_b[k] 289 if a.__class__ != b.__class__ or a != b: 290 return False 291 return True 292 293 294class Field: 295 """Class representing a simple instruction field""" 296 def __init__(self, sign, pos, len): 297 self.sign = sign 298 self.pos = pos 299 self.len = len 300 self.mask = ((1 << len) - 1) << pos 301 302 def __str__(self): 303 if self.sign: 304 s = 's' 305 else: 306 s = '' 307 return str(self.pos) + ':' + s + str(self.len) 308 309 def str_extract(self): 310 if self.sign: 311 extr = 'sextract32' 312 else: 313 extr = 'extract32' 314 return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len) 315 316 def __eq__(self, other): 317 return self.sign == other.sign and self.sign == other.sign 318 319 def __ne__(self, other): 320 return not self.__eq__(other) 321# end Field 322 323 324class MultiField: 325 """Class representing a compound instruction field""" 326 def __init__(self, subs, mask): 327 self.subs = subs 328 self.sign = subs[0].sign 329 self.mask = mask 330 331 def __str__(self): 332 return str(self.subs) 333 334 def str_extract(self): 335 ret = '0' 336 pos = 0 337 for f in reversed(self.subs): 338 if pos == 0: 339 ret = f.str_extract() 340 else: 341 ret = 'deposit32({0}, {1}, {2}, {3})' \ 342 .format(ret, pos, 32 - pos, f.str_extract()) 343 pos += f.len 344 return ret 345 346 def __ne__(self, other): 347 if len(self.subs) != len(other.subs): 348 return True 349 for a, b in zip(self.subs, other.subs): 350 if a.__class__ != b.__class__ or a != b: 351 return True 352 return False 353 354 def __eq__(self, other): 355 return not self.__ne__(other) 356# end MultiField 357 358 359class ConstField: 360 """Class representing an argument field with constant value""" 361 def __init__(self, value): 362 self.value = value 363 self.mask = 0 364 self.sign = value < 0 365 366 def __str__(self): 367 return str(self.value) 368 369 def str_extract(self): 370 return str(self.value) 371 372 def __cmp__(self, other): 373 return self.value - other.value 374# end ConstField 375 376 377class FunctionField: 378 """Class representing a field passed through an expander""" 379 def __init__(self, func, base): 380 self.mask = base.mask 381 self.sign = base.sign 382 self.base = base 383 self.func = func 384 385 def __str__(self): 386 return self.func + '(' + str(self.base) + ')' 387 388 def str_extract(self): 389 return self.func + '(' + self.base.str_extract() + ')' 390 391 def __eq__(self, other): 392 return self.func == other.func and self.base == other.base 393 394 def __ne__(self, other): 395 return not self.__eq__(other) 396# end FunctionField 397 398 399class Arguments: 400 """Class representing the extracted fields of a format""" 401 def __init__(self, nm, flds, extern): 402 self.name = nm 403 self.extern = extern 404 self.fields = sorted(flds) 405 406 def __str__(self): 407 return self.name + ' ' + str(self.fields) 408 409 def struct_name(self): 410 return 'arg_' + self.name 411 412 def output_def(self): 413 if not self.extern: 414 output('typedef struct {\n') 415 for n in self.fields: 416 output(' int ', n, ';\n') 417 output('} ', self.struct_name(), ';\n\n') 418# end Arguments 419 420 421class General: 422 """Common code between instruction formats and instruction patterns""" 423 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds): 424 self.name = name 425 self.file = input_file 426 self.lineno = lineno 427 self.base = base 428 self.fixedbits = fixb 429 self.fixedmask = fixm 430 self.undefmask = udfm 431 self.fieldmask = fldm 432 self.fields = flds 433 434 def __str__(self): 435 r = self.name 436 if self.base: 437 r = r + ' ' + self.base.name 438 else: 439 r = r + ' ' + str(self.fields) 440 r = r + ' ' + str_match_bits(self.fixedbits, self.fixedmask) 441 return r 442 443 def str1(self, i): 444 return str_indent(i) + self.__str__() 445# end General 446 447 448class Format(General): 449 """Class representing an instruction format""" 450 451 def extract_name(self): 452 return 'extract_' + self.name 453 454 def output_extract(self): 455 output('static void ', self.extract_name(), '(', 456 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') 457 for n, f in self.fields.items(): 458 output(' a->', n, ' = ', f.str_extract(), ';\n') 459 output('}\n\n') 460# end Format 461 462 463class Pattern(General): 464 """Class representing an instruction pattern""" 465 466 def output_decl(self): 467 global translate_scope 468 global translate_prefix 469 output('typedef ', self.base.base.struct_name(), 470 ' arg_', self.name, ';\n') 471 output(translate_scope, 'bool ', translate_prefix, '_', self.name, 472 '(DisasContext *ctx, arg_', self.name, ' *a);\n') 473 474 def output_code(self, i, extracted, outerbits, outermask): 475 global translate_prefix 476 ind = str_indent(i) 477 arg = self.base.base.name 478 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') 479 if not extracted: 480 output(ind, self.base.extract_name(), '(&u.f_', arg, ', insn);\n') 481 for n, f in self.fields.items(): 482 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') 483 output(ind, 'return ', translate_prefix, '_', self.name, 484 '(ctx, &u.f_', arg, ');\n') 485# end Pattern 486 487 488def parse_field(lineno, name, toks): 489 """Parse one instruction field from TOKS at LINENO""" 490 global fields 491 global re_ident 492 global insnwidth 493 494 # A "simple" field will have only one entry; 495 # a "multifield" will have several. 496 subs = [] 497 width = 0 498 func = None 499 for t in toks: 500 if re_fullmatch('!function=' + re_ident, t): 501 if func: 502 error(lineno, 'duplicate function') 503 func = t.split('=') 504 func = func[1] 505 continue 506 507 if re_fullmatch('[0-9]+:s[0-9]+', t): 508 # Signed field extract 509 subtoks = t.split(':s') 510 sign = True 511 elif re_fullmatch('[0-9]+:[0-9]+', t): 512 # Unsigned field extract 513 subtoks = t.split(':') 514 sign = False 515 else: 516 error(lineno, 'invalid field token "{0}"'.format(t)) 517 po = int(subtoks[0]) 518 le = int(subtoks[1]) 519 if po + le > insnwidth: 520 error(lineno, 'field {0} too large'.format(t)) 521 f = Field(sign, po, le) 522 subs.append(f) 523 width += le 524 525 if width > insnwidth: 526 error(lineno, 'field too large') 527 if len(subs) == 1: 528 f = subs[0] 529 else: 530 mask = 0 531 for s in subs: 532 if mask & s.mask: 533 error(lineno, 'field components overlap') 534 mask |= s.mask 535 f = MultiField(subs, mask) 536 if func: 537 f = FunctionField(func, f) 538 539 if name in fields: 540 error(lineno, 'duplicate field', name) 541 fields[name] = f 542# end parse_field 543 544 545def parse_arguments(lineno, name, toks): 546 """Parse one argument set from TOKS at LINENO""" 547 global arguments 548 global re_ident 549 550 flds = [] 551 extern = False 552 for t in toks: 553 if re_fullmatch('!extern', t): 554 extern = True 555 continue 556 if not re_fullmatch(re_ident, t): 557 error(lineno, 'invalid argument set token "{0}"'.format(t)) 558 if t in flds: 559 error(lineno, 'duplicate argument "{0}"'.format(t)) 560 flds.append(t) 561 562 if name in arguments: 563 error(lineno, 'duplicate argument set', name) 564 arguments[name] = Arguments(name, flds, extern) 565# end parse_arguments 566 567 568def lookup_field(lineno, name): 569 global fields 570 if name in fields: 571 return fields[name] 572 error(lineno, 'undefined field', name) 573 574 575def add_field(lineno, flds, new_name, f): 576 if new_name in flds: 577 error(lineno, 'duplicate field', new_name) 578 flds[new_name] = f 579 return flds 580 581 582def add_field_byname(lineno, flds, new_name, old_name): 583 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name)) 584 585 586def infer_argument_set(flds): 587 global arguments 588 global decode_function 589 590 for arg in arguments.values(): 591 if eq_fields_for_args(flds, arg.fields): 592 return arg 593 594 name = decode_function + str(len(arguments)) 595 arg = Arguments(name, flds.keys(), False) 596 arguments[name] = arg 597 return arg 598 599 600def infer_format(arg, fieldmask, flds): 601 global arguments 602 global formats 603 global decode_function 604 605 const_flds = {} 606 var_flds = {} 607 for n, c in flds.items(): 608 if c is ConstField: 609 const_flds[n] = c 610 else: 611 var_flds[n] = c 612 613 # Look for an existing format with the same argument set and fields 614 for fmt in formats.values(): 615 if arg and fmt.base != arg: 616 continue 617 if fieldmask != fmt.fieldmask: 618 continue 619 if not eq_fields_for_fmts(flds, fmt.fields): 620 continue 621 return (fmt, const_flds) 622 623 name = decode_function + '_Fmt_' + str(len(formats)) 624 if not arg: 625 arg = infer_argument_set(flds) 626 627 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds) 628 formats[name] = fmt 629 630 return (fmt, const_flds) 631# end infer_format 632 633 634def parse_generic(lineno, is_format, name, toks): 635 """Parse one instruction format from TOKS at LINENO""" 636 global fields 637 global arguments 638 global formats 639 global patterns 640 global re_ident 641 global insnwidth 642 global insnmask 643 644 fixedmask = 0 645 fixedbits = 0 646 undefmask = 0 647 width = 0 648 flds = {} 649 arg = None 650 fmt = None 651 for t in toks: 652 # '&Foo' gives a format an explcit argument set. 653 if t[0] == '&': 654 tt = t[1:] 655 if arg: 656 error(lineno, 'multiple argument sets') 657 if tt in arguments: 658 arg = arguments[tt] 659 else: 660 error(lineno, 'undefined argument set', t) 661 continue 662 663 # '@Foo' gives a pattern an explicit format. 664 if t[0] == '@': 665 tt = t[1:] 666 if fmt: 667 error(lineno, 'multiple formats') 668 if tt in formats: 669 fmt = formats[tt] 670 else: 671 error(lineno, 'undefined format', t) 672 continue 673 674 # '%Foo' imports a field. 675 if t[0] == '%': 676 tt = t[1:] 677 flds = add_field_byname(lineno, flds, tt, tt) 678 continue 679 680 # 'Foo=%Bar' imports a field with a different name. 681 if re_fullmatch(re_ident + '=%' + re_ident, t): 682 (fname, iname) = t.split('=%') 683 flds = add_field_byname(lineno, flds, fname, iname) 684 continue 685 686 # 'Foo=number' sets an argument field to a constant value 687 if re_fullmatch(re_ident + '=[0-9]+', t): 688 (fname, value) = t.split('=') 689 value = int(value) 690 flds = add_field(lineno, flds, fname, ConstField(value)) 691 continue 692 693 # Pattern of 0s, 1s, dots and dashes indicate required zeros, 694 # required ones, or dont-cares. 695 if re_fullmatch('[01.-]+', t): 696 shift = len(t) 697 fms = t.replace('0', '1') 698 fms = fms.replace('.', '0') 699 fms = fms.replace('-', '0') 700 fbs = t.replace('.', '0') 701 fbs = fbs.replace('-', '0') 702 ubm = t.replace('1', '0') 703 ubm = ubm.replace('.', '0') 704 ubm = ubm.replace('-', '1') 705 fms = int(fms, 2) 706 fbs = int(fbs, 2) 707 ubm = int(ubm, 2) 708 fixedbits = (fixedbits << shift) | fbs 709 fixedmask = (fixedmask << shift) | fms 710 undefmask = (undefmask << shift) | ubm 711 # Otherwise, fieldname:fieldwidth 712 elif re_fullmatch(re_ident + ':s?[0-9]+', t): 713 (fname, flen) = t.split(':') 714 sign = False 715 if flen[0] == 's': 716 sign = True 717 flen = flen[1:] 718 shift = int(flen, 10) 719 f = Field(sign, insnwidth - width - shift, shift) 720 flds = add_field(lineno, flds, fname, f) 721 fixedbits <<= shift 722 fixedmask <<= shift 723 undefmask <<= shift 724 else: 725 error(lineno, 'invalid token "{0}"'.format(t)) 726 width += shift 727 728 # We should have filled in all of the bits of the instruction. 729 if not (is_format and width == 0) and width != insnwidth: 730 error(lineno, 'definition has {0} bits'.format(width)) 731 732 # Do not check for fields overlaping fields; one valid usage 733 # is to be able to duplicate fields via import. 734 fieldmask = 0 735 for f in flds.values(): 736 fieldmask |= f.mask 737 738 # Fix up what we've parsed to match either a format or a pattern. 739 if is_format: 740 # Formats cannot reference formats. 741 if fmt: 742 error(lineno, 'format referencing format') 743 # If an argument set is given, then there should be no fields 744 # without a place to store it. 745 if arg: 746 for f in flds.keys(): 747 if f not in arg.fields: 748 error(lineno, 'field {0} not in argument set {1}' 749 .format(f, arg.name)) 750 else: 751 arg = infer_argument_set(flds) 752 if name in formats: 753 error(lineno, 'duplicate format name', name) 754 fmt = Format(name, lineno, arg, fixedbits, fixedmask, 755 undefmask, fieldmask, flds) 756 formats[name] = fmt 757 else: 758 # Patterns can reference a format ... 759 if fmt: 760 # ... but not an argument simultaneously 761 if arg: 762 error(lineno, 'pattern specifies both format and argument set') 763 if fixedmask & fmt.fixedmask: 764 error(lineno, 'pattern fixed bits overlap format fixed bits') 765 fieldmask |= fmt.fieldmask 766 fixedbits |= fmt.fixedbits 767 fixedmask |= fmt.fixedmask 768 undefmask |= fmt.undefmask 769 else: 770 (fmt, flds) = infer_format(arg, fieldmask, flds) 771 arg = fmt.base 772 for f in flds.keys(): 773 if f not in arg.fields: 774 error(lineno, 'field {0} not in argument set {1}' 775 .format(f, arg.name)) 776 if f in fmt.fields.keys(): 777 error(lineno, 'field {0} set by format and pattern'.format(f)) 778 for f in arg.fields: 779 if f not in flds.keys() and f not in fmt.fields.keys(): 780 error(lineno, 'field {0} not initialized'.format(f)) 781 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, 782 undefmask, fieldmask, flds) 783 patterns.append(pat) 784 785 # Validate the masks that we have assembled. 786 if fieldmask & fixedmask: 787 error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})' 788 .format(fieldmask, fixedmask)) 789 if fieldmask & undefmask: 790 error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' 791 .format(fieldmask, undefmask)) 792 if fixedmask & undefmask: 793 error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' 794 .format(fixedmask, undefmask)) 795 if not is_format: 796 allbits = fieldmask | fixedmask | undefmask 797 if allbits != insnmask: 798 error(lineno, 'bits left unspecified (0x{0:08x})' 799 .format(allbits ^ insnmask)) 800# end parse_general 801 802 803def parse_file(f): 804 """Parse all of the patterns within a file""" 805 806 # Read all of the lines of the file. Concatenate lines 807 # ending in backslash; discard empty lines and comments. 808 toks = [] 809 lineno = 0 810 for line in f: 811 lineno += 1 812 813 # Discard comments 814 end = line.find('#') 815 if end >= 0: 816 line = line[:end] 817 818 t = line.split() 819 if len(toks) != 0: 820 # Next line after continuation 821 toks.extend(t) 822 elif len(t) == 0: 823 # Empty line 824 continue 825 else: 826 toks = t 827 828 # Continuation? 829 if toks[-1] == '\\': 830 toks.pop() 831 continue 832 833 if len(toks) < 2: 834 error(lineno, 'short line') 835 836 name = toks[0] 837 del toks[0] 838 839 # Determine the type of object needing to be parsed. 840 if name[0] == '%': 841 parse_field(lineno, name[1:], toks) 842 elif name[0] == '&': 843 parse_arguments(lineno, name[1:], toks) 844 elif name[0] == '@': 845 parse_generic(lineno, True, name[1:], toks) 846 else: 847 parse_generic(lineno, False, name, toks) 848 toks = [] 849# end parse_file 850 851 852class Tree: 853 """Class representing a node in a decode tree""" 854 855 def __init__(self, fm, tm): 856 self.fixedmask = fm 857 self.thismask = tm 858 self.subs = [] 859 self.base = None 860 861 def str1(self, i): 862 ind = str_indent(i) 863 r = '{0}{1:08x}'.format(ind, self.fixedmask) 864 if self.format: 865 r += ' ' + self.format.name 866 r += ' [\n' 867 for (b, s) in self.subs: 868 r += '{0} {1:08x}:\n'.format(ind, b) 869 r += s.str1(i + 4) + '\n' 870 r += ind + ']' 871 return r 872 873 def __str__(self): 874 return self.str1(0) 875 876 def output_code(self, i, extracted, outerbits, outermask): 877 ind = str_indent(i) 878 879 # If we identified all nodes below have the same format, 880 # extract the fields now. 881 if not extracted and self.base: 882 output(ind, self.base.extract_name(), 883 '(&u.f_', self.base.base.name, ', insn);\n') 884 extracted = True 885 886 # Attempt to aid the compiler in producing compact switch statements. 887 # If the bits in the mask are contiguous, extract them. 888 sh = is_contiguous(self.thismask) 889 if sh > 0: 890 # Propagate SH down into the local functions. 891 def str_switch(b, sh=sh): 892 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh) 893 894 def str_case(b, sh=sh): 895 return '0x{0:x}'.format(b >> sh) 896 else: 897 def str_switch(b): 898 return 'insn & 0x{0:08x}'.format(b) 899 900 def str_case(b): 901 return '0x{0:08x}'.format(b) 902 903 output(ind, 'switch (', str_switch(self.thismask), ') {\n') 904 for b, s in sorted(self.subs): 905 assert (self.thismask & ~s.fixedmask) == 0 906 innermask = outermask | self.thismask 907 innerbits = outerbits | b 908 output(ind, 'case ', str_case(b), ':\n') 909 output(ind, ' /* ', 910 str_match_bits(innerbits, innermask), ' */\n') 911 s.output_code(i + 4, extracted, innerbits, innermask) 912 output(ind, '}\n') 913 output(ind, 'return false;\n') 914# end Tree 915 916 917def build_tree(pats, outerbits, outermask): 918 # Find the intersection of all remaining fixedmask. 919 innermask = ~outermask 920 for i in pats: 921 innermask &= i.fixedmask 922 923 if innermask == 0: 924 pnames = [] 925 for p in pats: 926 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) 927 error_with_file(pats[0].file, pats[0].lineno, 928 'overlapping patterns:', pnames) 929 930 fullmask = outermask | innermask 931 932 # Sort each element of pats into the bin selected by the mask. 933 bins = {} 934 for i in pats: 935 fb = i.fixedbits & innermask 936 if fb in bins: 937 bins[fb].append(i) 938 else: 939 bins[fb] = [i] 940 941 # We must recurse if any bin has more than one element or if 942 # the single element in the bin has not been fully matched. 943 t = Tree(fullmask, innermask) 944 945 for b, l in bins.items(): 946 s = l[0] 947 if len(l) > 1 or s.fixedmask & ~fullmask != 0: 948 s = build_tree(l, b | outerbits, fullmask) 949 t.subs.append((b, s)) 950 951 return t 952# end build_tree 953 954 955def prop_format(tree): 956 """Propagate Format objects into the decode tree""" 957 958 # Depth first search. 959 for (b, s) in tree.subs: 960 if isinstance(s, Tree): 961 prop_format(s) 962 963 # If all entries in SUBS have the same format, then 964 # propagate that into the tree. 965 f = None 966 for (b, s) in tree.subs: 967 if f is None: 968 f = s.base 969 if f is None: 970 return 971 if f is not s.base: 972 return 973 tree.base = f 974# end prop_format 975 976 977def main(): 978 global arguments 979 global formats 980 global patterns 981 global translate_scope 982 global translate_prefix 983 global output_fd 984 global output_file 985 global input_file 986 global insnwidth 987 global insntype 988 global insnmask 989 global decode_function 990 991 decode_scope = 'static ' 992 993 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth='] 994 try: 995 (opts, args) = getopt.getopt(sys.argv[1:], 'o:w:', long_opts) 996 except getopt.GetoptError as err: 997 error(0, err) 998 for o, a in opts: 999 if o in ('-o', '--output'): 1000 output_file = a 1001 elif o == '--decode': 1002 decode_function = a 1003 decode_scope = '' 1004 elif o == '--translate': 1005 translate_prefix = a 1006 translate_scope = '' 1007 elif o in ('-w', '--insnwidth'): 1008 insnwidth = int(a) 1009 if insnwidth == 16: 1010 insntype = 'uint16_t' 1011 insnmask = 0xffff 1012 elif insnwidth != 32: 1013 error(0, 'cannot handle insns of width', insnwidth) 1014 else: 1015 assert False, 'unhandled option' 1016 1017 if len(args) < 1: 1018 error(0, 'missing input file') 1019 for filename in args: 1020 input_file = filename 1021 f = open(filename, 'r') 1022 parse_file(f) 1023 f.close() 1024 1025 t = build_tree(patterns, 0, 0) 1026 prop_format(t) 1027 1028 if output_file: 1029 output_fd = open(output_file, 'w') 1030 else: 1031 output_fd = sys.stdout 1032 1033 output_autogen() 1034 for n in sorted(arguments.keys()): 1035 f = arguments[n] 1036 f.output_def() 1037 1038 # A single translate function can be invoked for different patterns. 1039 # Make sure that the argument sets are the same, and declare the 1040 # function only once. 1041 out_pats = {} 1042 for i in patterns: 1043 if i.name in out_pats: 1044 p = out_pats[i.name] 1045 if i.base.base != p.base.base: 1046 error(0, i.name, ' has conflicting argument sets') 1047 else: 1048 i.output_decl() 1049 out_pats[i.name] = i 1050 output('\n') 1051 1052 for n in sorted(formats.keys()): 1053 f = formats[n] 1054 f.output_extract() 1055 1056 output(decode_scope, 'bool ', decode_function, 1057 '(DisasContext *ctx, ', insntype, ' insn)\n{\n') 1058 1059 i4 = str_indent(4) 1060 output(i4, 'union {\n') 1061 for n in sorted(arguments.keys()): 1062 f = arguments[n] 1063 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') 1064 output(i4, '} u;\n\n') 1065 1066 t.output_code(4, False, 0, 0) 1067 1068 output('}\n') 1069 1070 if output_file: 1071 output_fd.close() 1072# end main 1073 1074 1075if __name__ == '__main__': 1076 main() 1077