xref: /openbmc/qemu/scripts/decodetree.py (revision 4a9b31b8)
1#!/usr/bin/env python
2# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
7# version 2 of the License, or (at your option) any later version.
8#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
20#
21# The tree is built from instruction "patterns".  A pattern may represent
22# a single architectural instruction or a group of same, depending on what
23# is convenient for further processing.
24#
25# Each pattern has "fixedbits" & "fixedmask", the combination of which
26# describes the condition under which the pattern is matched:
27#
28#   (insn & fixedmask) == fixedbits
29#
30# Each pattern may have "fields", which are extracted from the insn and
31# passed along to the translator.  Examples of such are registers,
32# immediates, and sub-opcodes.
33#
34# In support of patterns, one may declare fields, argument sets, and
35# formats, each of which may be re-used to simplify further definitions.
36#
37# *** Field syntax:
38#
39# field_def     := '%' identifier ( unnamed_field )+ ( !function=identifier )?
40# unnamed_field := number ':' ( 's' ) number
41#
42# For unnamed_field, the first number is the least-significant bit position of
43# the field and the second number is the length of the field.  If the 's' is
44# present, the field is considered signed.  If multiple unnamed_fields are
45# present, they are concatenated.  In this way one can define disjoint fields.
46#
47# If !function is specified, the concatenated result is passed through the
48# named function, taking and returning an integral value.
49#
50# FIXME: the fields of the structure into which this result will be stored
51# is restricted to "int".  Which means that we cannot expand 64-bit items.
52#
53# Field examples:
54#
55#   %disp   0:s16          -- sextract(i, 0, 16)
56#   %imm9   16:6 10:3      -- extract(i, 16, 6) << 3 | extract(i, 10, 3)
57#   %disp12 0:s1 1:1 2:10  -- sextract(i, 0, 1) << 11
58#                             | extract(i, 1, 1) << 10
59#                             | extract(i, 2, 10)
60#   %shimm8 5:s8 13:1 !function=expand_shimm8
61#                          -- expand_shimm8(sextract(i, 5, 8) << 1
62#                                           | extract(i, 13, 1))
63#
64# *** Argument set syntax:
65#
66# args_def    := '&' identifier ( args_elt )+
67# args_elt    := identifier
68#
69# Each args_elt defines an argument within the argument set.
70# Each argument set will be rendered as a C structure "arg_$name"
71# with each of the fields being one of the member arguments.
72#
73# Argument set examples:
74#
75#   &reg3       ra rb rc
76#   &loadstore  reg base offset
77#
78# *** Format syntax:
79#
80# fmt_def      := '@' identifier ( fmt_elt )+
81# fmt_elt      := fixedbit_elt | field_elt | field_ref | args_ref
82# fixedbit_elt := [01.-]+
83# field_elt    := identifier ':' 's'? number
84# field_ref    := '%' identifier | identifier '=' '%' identifier
85# args_ref     := '&' identifier
86#
87# Defining a format is a handy way to avoid replicating groups of fields
88# across many instruction patterns.
89#
90# A fixedbit_elt describes a contiguous sequence of bits that must
91# be 1, 0, [.-] for don't care.  The difference between '.' and '-'
92# is that '.' means that the bit will be covered with a field or a
93# final [01] from the pattern, and '-' means that the bit is really
94# ignored by the cpu and will not be specified.
95#
96# A field_elt describes a simple field only given a width; the position of
97# the field is implied by its position with respect to other fixedbit_elt
98# and field_elt.
99#
100# If any fixedbit_elt or field_elt appear then all bits must be defined.
101# Padding with a fixedbit_elt of all '.' is an easy way to accomplish that.
102#
103# A field_ref incorporates a field by reference.  This is the only way to
104# add a complex field to a format.  A field may be renamed in the process
105# via assignment to another identifier.  This is intended to allow the
106# same argument set be used with disjoint named fields.
107#
108# A single args_ref may specify an argument set to use for the format.
109# The set of fields in the format must be a subset of the arguments in
110# the argument set.  If an argument set is not specified, one will be
111# inferred from the set of fields.
112#
113# It is recommended, but not required, that all field_ref and args_ref
114# appear at the end of the line, not interleaving with fixedbit_elf or
115# field_elt.
116#
117# Format examples:
118#
119#   @opr    ...... ra:5 rb:5 ... 0 ....... rc:5
120#   @opi    ...... ra:5 lit:8    1 ....... rc:5
121#
122# *** Pattern syntax:
123#
124# pat_def      := identifier ( pat_elt )+
125# pat_elt      := fixedbit_elt | field_elt | field_ref
126#               | args_ref | fmt_ref | const_elt
127# fmt_ref      := '@' identifier
128# const_elt    := identifier '=' number
129#
130# The fixedbit_elt and field_elt specifiers are unchanged from formats.
131# A pattern that does not specify a named format will have one inferred
132# from a referenced argument set (if present) and the set of fields.
133#
134# A const_elt allows a argument to be set to a constant value.  This may
135# come in handy when fields overlap between patterns and one has to
136# include the values in the fixedbit_elt instead.
137#
138# The decoder will call a translator function for each pattern matched.
139#
140# Pattern examples:
141#
142#   addl_r   010000 ..... ..... .... 0000000 ..... @opr
143#   addl_i   010000 ..... ..... .... 0000000 ..... @opi
144#
145# which will, in part, invoke
146#
147#   trans_addl_r(ctx, &arg_opr, insn)
148# and
149#   trans_addl_i(ctx, &arg_opi, insn)
150#
151
152import os
153import re
154import sys
155import getopt
156
157insnwidth = 32
158insnmask = 0xffffffff
159fields = {}
160arguments = {}
161formats = {}
162patterns = []
163
164translate_prefix = 'trans'
165translate_scope = 'static '
166input_file = ''
167output_file = None
168output_fd = None
169insntype = 'uint32_t'
170
171re_ident = '[a-zA-Z][a-zA-Z0-9_]*'
172
173
174def error(lineno, *args):
175    """Print an error message from file:line and args and exit."""
176    global output_file
177    global output_fd
178
179    if lineno:
180        r = '{0}:{1}: error:'.format(input_file, lineno)
181    elif input_file:
182        r = '{0}: error:'.format(input_file)
183    else:
184        r = 'error:'
185    for a in args:
186        r += ' ' + str(a)
187    r += '\n'
188    sys.stderr.write(r)
189    if output_file and output_fd:
190        output_fd.close()
191        os.remove(output_file)
192    exit(1)
193
194
195def output(*args):
196    global output_fd
197    for a in args:
198        output_fd.write(a)
199
200
201if sys.version_info >= (3, 0):
202    re_fullmatch = re.fullmatch
203else:
204    def re_fullmatch(pat, str):
205        return re.match('^' + pat + '$', str)
206
207
208def output_autogen():
209    output('/* This file is autogenerated by scripts/decodetree.py.  */\n\n')
210
211
212def str_indent(c):
213    """Return a string with C spaces"""
214    return ' ' * c
215
216
217def str_fields(fields):
218    """Return a string uniquely identifing FIELDS"""
219    r = ''
220    for n in sorted(fields.keys()):
221        r += '_' + n
222    return r[1:]
223
224
225def str_match_bits(bits, mask):
226    """Return a string pretty-printing BITS/MASK"""
227    global insnwidth
228
229    i = 1 << (insnwidth - 1)
230    space = 0x01010100
231    r = ''
232    while i != 0:
233        if i & mask:
234            if i & bits:
235                r += '1'
236            else:
237                r += '0'
238        else:
239            r += '.'
240        if i & space:
241            r += ' '
242        i >>= 1
243    return r
244
245
246def is_pow2(x):
247    """Return true iff X is equal to a power of 2."""
248    return (x & (x - 1)) == 0
249
250
251def ctz(x):
252    """Return the number of times 2 factors into X."""
253    r = 0
254    while ((x >> r) & 1) == 0:
255        r += 1
256    return r
257
258
259def is_contiguous(bits):
260    shift = ctz(bits)
261    if is_pow2((bits >> shift) + 1):
262        return shift
263    else:
264        return -1
265
266
267def eq_fields_for_args(flds_a, flds_b):
268    if len(flds_a) != len(flds_b):
269        return False
270    for k, a in flds_a.items():
271        if k not in flds_b:
272            return False
273    return True
274
275
276def eq_fields_for_fmts(flds_a, flds_b):
277    if len(flds_a) != len(flds_b):
278        return False
279    for k, a in flds_a.items():
280        if k not in flds_b:
281            return False
282        b = flds_b[k]
283        if a.__class__ != b.__class__ or a != b:
284            return False
285    return True
286
287
288class Field:
289    """Class representing a simple instruction field"""
290    def __init__(self, sign, pos, len):
291        self.sign = sign
292        self.pos = pos
293        self.len = len
294        self.mask = ((1 << len) - 1) << pos
295
296    def __str__(self):
297        if self.sign:
298            s = 's'
299        else:
300            s = ''
301        return str(pos) + ':' + s + str(len)
302
303    def str_extract(self):
304        if self.sign:
305            extr = 'sextract32'
306        else:
307            extr = 'extract32'
308        return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
309
310    def __eq__(self, other):
311        return self.sign == other.sign and self.sign == other.sign
312
313    def __ne__(self, other):
314        return not self.__eq__(other)
315# end Field
316
317
318class MultiField:
319    """Class representing a compound instruction field"""
320    def __init__(self, subs, mask):
321        self.subs = subs
322        self.sign = subs[0].sign
323        self.mask = mask
324
325    def __str__(self):
326        return str(self.subs)
327
328    def str_extract(self):
329        ret = '0'
330        pos = 0
331        for f in reversed(self.subs):
332            if pos == 0:
333                ret = f.str_extract()
334            else:
335                ret = 'deposit32({0}, {1}, {2}, {3})' \
336                      .format(ret, pos, 32 - pos, f.str_extract())
337            pos += f.len
338        return ret
339
340    def __ne__(self, other):
341        if len(self.subs) != len(other.subs):
342            return True
343        for a, b in zip(self.subs, other.subs):
344            if a.__class__ != b.__class__ or a != b:
345                return True
346        return False
347
348    def __eq__(self, other):
349        return not self.__ne__(other)
350# end MultiField
351
352
353class ConstField:
354    """Class representing an argument field with constant value"""
355    def __init__(self, value):
356        self.value = value
357        self.mask = 0
358        self.sign = value < 0
359
360    def __str__(self):
361        return str(self.value)
362
363    def str_extract(self):
364        return str(self.value)
365
366    def __cmp__(self, other):
367        return self.value - other.value
368# end ConstField
369
370
371class FunctionField:
372    """Class representing a field passed through an expander"""
373    def __init__(self, func, base):
374        self.mask = base.mask
375        self.sign = base.sign
376        self.base = base
377        self.func = func
378
379    def __str__(self):
380        return self.func + '(' + str(self.base) + ')'
381
382    def str_extract(self):
383        return self.func + '(' + self.base.str_extract() + ')'
384
385    def __eq__(self, other):
386        return self.func == other.func and self.base == other.base
387
388    def __ne__(self, other):
389        return not self.__eq__(other)
390# end FunctionField
391
392
393class Arguments:
394    """Class representing the extracted fields of a format"""
395    def __init__(self, nm, flds):
396        self.name = nm
397        self.fields = sorted(flds)
398
399    def __str__(self):
400        return self.name + ' ' + str(self.fields)
401
402    def struct_name(self):
403        return 'arg_' + self.name
404
405    def output_def(self):
406        output('typedef struct {\n')
407        for n in self.fields:
408            output('    int ', n, ';\n')
409        output('} ', self.struct_name(), ';\n\n')
410# end Arguments
411
412
413class General:
414    """Common code between instruction formats and instruction patterns"""
415    def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds):
416        self.name = name
417        self.lineno = lineno
418        self.base = base
419        self.fixedbits = fixb
420        self.fixedmask = fixm
421        self.undefmask = udfm
422        self.fieldmask = fldm
423        self.fields = flds
424
425    def __str__(self):
426        r = self.name
427        if self.base:
428            r = r + ' ' + self.base.name
429        else:
430            r = r + ' ' + str(self.fields)
431        r = r + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
432        return r
433
434    def str1(self, i):
435        return str_indent(i) + self.__str__()
436# end General
437
438
439class Format(General):
440    """Class representing an instruction format"""
441
442    def extract_name(self):
443        return 'extract_' + self.name
444
445    def output_extract(self):
446        output('static void ', self.extract_name(), '(',
447               self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
448        for n, f in self.fields.items():
449            output('    a->', n, ' = ', f.str_extract(), ';\n')
450        output('}\n\n')
451# end Format
452
453
454class Pattern(General):
455    """Class representing an instruction pattern"""
456
457    def output_decl(self):
458        global translate_scope
459        global translate_prefix
460        output('typedef ', self.base.base.struct_name(),
461               ' arg_', self.name, ';\n')
462        output(translate_scope, 'bool ', translate_prefix, '_', self.name,
463               '(DisasContext *ctx, arg_', self.name,
464               ' *a, ', insntype, ' insn);\n')
465
466    def output_code(self, i, extracted, outerbits, outermask):
467        global translate_prefix
468        ind = str_indent(i)
469        arg = self.base.base.name
470        output(ind, '/* line ', str(self.lineno), ' */\n')
471        if not extracted:
472            output(ind, self.base.extract_name(), '(&u.f_', arg, ', insn);\n')
473        for n, f in self.fields.items():
474            output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
475        output(ind, 'return ', translate_prefix, '_', self.name,
476               '(ctx, &u.f_', arg, ', insn);\n')
477# end Pattern
478
479
480def parse_field(lineno, name, toks):
481    """Parse one instruction field from TOKS at LINENO"""
482    global fields
483    global re_ident
484    global insnwidth
485
486    # A "simple" field will have only one entry;
487    # a "multifield" will have several.
488    subs = []
489    width = 0
490    func = None
491    for t in toks:
492        if re_fullmatch('!function=' + re_ident, t):
493            if func:
494                error(lineno, 'duplicate function')
495            func = t.split('=')
496            func = func[1]
497            continue
498
499        if re_fullmatch('[0-9]+:s[0-9]+', t):
500            # Signed field extract
501            subtoks = t.split(':s')
502            sign = True
503        elif re_fullmatch('[0-9]+:[0-9]+', t):
504            # Unsigned field extract
505            subtoks = t.split(':')
506            sign = False
507        else:
508            error(lineno, 'invalid field token "{0}"'.format(t))
509        po = int(subtoks[0])
510        le = int(subtoks[1])
511        if po + le > insnwidth:
512            error(lineno, 'field {0} too large'.format(t))
513        f = Field(sign, po, le)
514        subs.append(f)
515        width += le
516
517    if width > insnwidth:
518        error(lineno, 'field too large')
519    if len(subs) == 1:
520        f = subs[0]
521    else:
522        mask = 0
523        for s in subs:
524            if mask & s.mask:
525                error(lineno, 'field components overlap')
526            mask |= s.mask
527        f = MultiField(subs, mask)
528    if func:
529        f = FunctionField(func, f)
530
531    if name in fields:
532        error(lineno, 'duplicate field', name)
533    fields[name] = f
534# end parse_field
535
536
537def parse_arguments(lineno, name, toks):
538    """Parse one argument set from TOKS at LINENO"""
539    global arguments
540    global re_ident
541
542    flds = []
543    for t in toks:
544        if not re_fullmatch(re_ident, t):
545            error(lineno, 'invalid argument set token "{0}"'.format(t))
546        if t in flds:
547            error(lineno, 'duplicate argument "{0}"'.format(t))
548        flds.append(t)
549
550    if name in arguments:
551        error(lineno, 'duplicate argument set', name)
552    arguments[name] = Arguments(name, flds)
553# end parse_arguments
554
555
556def lookup_field(lineno, name):
557    global fields
558    if name in fields:
559        return fields[name]
560    error(lineno, 'undefined field', name)
561
562
563def add_field(lineno, flds, new_name, f):
564    if new_name in flds:
565        error(lineno, 'duplicate field', new_name)
566    flds[new_name] = f
567    return flds
568
569
570def add_field_byname(lineno, flds, new_name, old_name):
571    return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
572
573
574def infer_argument_set(flds):
575    global arguments
576
577    for arg in arguments.values():
578        if eq_fields_for_args(flds, arg.fields):
579            return arg
580
581    name = str(len(arguments))
582    arg = Arguments(name, flds.keys())
583    arguments[name] = arg
584    return arg
585
586
587def infer_format(arg, fieldmask, flds):
588    global arguments
589    global formats
590
591    const_flds = {}
592    var_flds = {}
593    for n, c in flds.items():
594        if c is ConstField:
595            const_flds[n] = c
596        else:
597            var_flds[n] = c
598
599    # Look for an existing format with the same argument set and fields
600    for fmt in formats.values():
601        if arg and fmt.base != arg:
602            continue
603        if fieldmask != fmt.fieldmask:
604            continue
605        if not eq_fields_for_fmts(flds, fmt.fields):
606            continue
607        return (fmt, const_flds)
608
609    name = 'Fmt_' + str(len(formats))
610    if not arg:
611        arg = infer_argument_set(flds)
612
613    fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds)
614    formats[name] = fmt
615
616    return (fmt, const_flds)
617# end infer_format
618
619
620def parse_generic(lineno, is_format, name, toks):
621    """Parse one instruction format from TOKS at LINENO"""
622    global fields
623    global arguments
624    global formats
625    global patterns
626    global re_ident
627    global insnwidth
628    global insnmask
629
630    fixedmask = 0
631    fixedbits = 0
632    undefmask = 0
633    width = 0
634    flds = {}
635    arg = None
636    fmt = None
637    for t in toks:
638        # '&Foo' gives a format an explcit argument set.
639        if t[0] == '&':
640            tt = t[1:]
641            if arg:
642                error(lineno, 'multiple argument sets')
643            if tt in arguments:
644                arg = arguments[tt]
645            else:
646                error(lineno, 'undefined argument set', t)
647            continue
648
649        # '@Foo' gives a pattern an explicit format.
650        if t[0] == '@':
651            tt = t[1:]
652            if fmt:
653                error(lineno, 'multiple formats')
654            if tt in formats:
655                fmt = formats[tt]
656            else:
657                error(lineno, 'undefined format', t)
658            continue
659
660        # '%Foo' imports a field.
661        if t[0] == '%':
662            tt = t[1:]
663            flds = add_field_byname(lineno, flds, tt, tt)
664            continue
665
666        # 'Foo=%Bar' imports a field with a different name.
667        if re_fullmatch(re_ident + '=%' + re_ident, t):
668            (fname, iname) = t.split('=%')
669            flds = add_field_byname(lineno, flds, fname, iname)
670            continue
671
672        # 'Foo=number' sets an argument field to a constant value
673        if re_fullmatch(re_ident + '=[0-9]+', t):
674            (fname, value) = t.split('=')
675            value = int(value)
676            flds = add_field(lineno, flds, fname, ConstField(value))
677            continue
678
679        # Pattern of 0s, 1s, dots and dashes indicate required zeros,
680        # required ones, or dont-cares.
681        if re_fullmatch('[01.-]+', t):
682            shift = len(t)
683            fms = t.replace('0', '1')
684            fms = fms.replace('.', '0')
685            fms = fms.replace('-', '0')
686            fbs = t.replace('.', '0')
687            fbs = fbs.replace('-', '0')
688            ubm = t.replace('1', '0')
689            ubm = ubm.replace('.', '0')
690            ubm = ubm.replace('-', '1')
691            fms = int(fms, 2)
692            fbs = int(fbs, 2)
693            ubm = int(ubm, 2)
694            fixedbits = (fixedbits << shift) | fbs
695            fixedmask = (fixedmask << shift) | fms
696            undefmask = (undefmask << shift) | ubm
697        # Otherwise, fieldname:fieldwidth
698        elif re_fullmatch(re_ident + ':s?[0-9]+', t):
699            (fname, flen) = t.split(':')
700            sign = False
701            if flen[0] == 's':
702                sign = True
703                flen = flen[1:]
704            shift = int(flen, 10)
705            f = Field(sign, insnwidth - width - shift, shift)
706            flds = add_field(lineno, flds, fname, f)
707            fixedbits <<= shift
708            fixedmask <<= shift
709            undefmask <<= shift
710        else:
711            error(lineno, 'invalid token "{0}"'.format(t))
712        width += shift
713
714    # We should have filled in all of the bits of the instruction.
715    if not (is_format and width == 0) and width != insnwidth:
716        error(lineno, 'definition has {0} bits'.format(width))
717
718    # Do not check for fields overlaping fields; one valid usage
719    # is to be able to duplicate fields via import.
720    fieldmask = 0
721    for f in flds.values():
722        fieldmask |= f.mask
723
724    # Fix up what we've parsed to match either a format or a pattern.
725    if is_format:
726        # Formats cannot reference formats.
727        if fmt:
728            error(lineno, 'format referencing format')
729        # If an argument set is given, then there should be no fields
730        # without a place to store it.
731        if arg:
732            for f in flds.keys():
733                if f not in arg.fields:
734                    error(lineno, 'field {0} not in argument set {1}'
735                                  .format(f, arg.name))
736        else:
737            arg = infer_argument_set(flds)
738        if name in formats:
739            error(lineno, 'duplicate format name', name)
740        fmt = Format(name, lineno, arg, fixedbits, fixedmask,
741                     undefmask, fieldmask, flds)
742        formats[name] = fmt
743    else:
744        # Patterns can reference a format ...
745        if fmt:
746            # ... but not an argument simultaneously
747            if arg:
748                error(lineno, 'pattern specifies both format and argument set')
749            if fixedmask & fmt.fixedmask:
750                error(lineno, 'pattern fixed bits overlap format fixed bits')
751            fieldmask |= fmt.fieldmask
752            fixedbits |= fmt.fixedbits
753            fixedmask |= fmt.fixedmask
754            undefmask |= fmt.undefmask
755        else:
756            (fmt, flds) = infer_format(arg, fieldmask, flds)
757        arg = fmt.base
758        for f in flds.keys():
759            if f not in arg.fields:
760                error(lineno, 'field {0} not in argument set {1}'
761                              .format(f, arg.name))
762            if f in fmt.fields.keys():
763                error(lineno, 'field {0} set by format and pattern'.format(f))
764        for f in arg.fields:
765            if f not in flds.keys() and f not in fmt.fields.keys():
766                error(lineno, 'field {0} not initialized'.format(f))
767        pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
768                      undefmask, fieldmask, flds)
769        patterns.append(pat)
770
771    # Validate the masks that we have assembled.
772    if fieldmask & fixedmask:
773        error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
774                      .format(fieldmask, fixedmask))
775    if fieldmask & undefmask:
776        error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
777                      .format(fieldmask, undefmask))
778    if fixedmask & undefmask:
779        error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
780                      .format(fixedmask, undefmask))
781    if not is_format:
782        allbits = fieldmask | fixedmask | undefmask
783        if allbits != insnmask:
784            error(lineno, 'bits left unspecified (0x{0:08x})'
785                          .format(allbits ^ insnmask))
786# end parse_general
787
788
789def parse_file(f):
790    """Parse all of the patterns within a file"""
791
792    # Read all of the lines of the file.  Concatenate lines
793    # ending in backslash; discard empty lines and comments.
794    toks = []
795    lineno = 0
796    for line in f:
797        lineno += 1
798
799        # Discard comments
800        end = line.find('#')
801        if end >= 0:
802            line = line[:end]
803
804        t = line.split()
805        if len(toks) != 0:
806            # Next line after continuation
807            toks.extend(t)
808        elif len(t) == 0:
809            # Empty line
810            continue
811        else:
812            toks = t
813
814        # Continuation?
815        if toks[-1] == '\\':
816            toks.pop()
817            continue
818
819        if len(toks) < 2:
820            error(lineno, 'short line')
821
822        name = toks[0]
823        del toks[0]
824
825        # Determine the type of object needing to be parsed.
826        if name[0] == '%':
827            parse_field(lineno, name[1:], toks)
828        elif name[0] == '&':
829            parse_arguments(lineno, name[1:], toks)
830        elif name[0] == '@':
831            parse_generic(lineno, True, name[1:], toks)
832        else:
833            parse_generic(lineno, False, name, toks)
834        toks = []
835# end parse_file
836
837
838class Tree:
839    """Class representing a node in a decode tree"""
840
841    def __init__(self, fm, tm):
842        self.fixedmask = fm
843        self.thismask = tm
844        self.subs = []
845        self.base = None
846
847    def str1(self, i):
848        ind = str_indent(i)
849        r = '{0}{1:08x}'.format(ind, self.fixedmask)
850        if self.format:
851            r += ' ' + self.format.name
852        r += ' [\n'
853        for (b, s) in self.subs:
854            r += '{0}  {1:08x}:\n'.format(ind, b)
855            r += s.str1(i + 4) + '\n'
856        r += ind + ']'
857        return r
858
859    def __str__(self):
860        return self.str1(0)
861
862    def output_code(self, i, extracted, outerbits, outermask):
863        ind = str_indent(i)
864
865        # If we identified all nodes below have the same format,
866        # extract the fields now.
867        if not extracted and self.base:
868            output(ind, self.base.extract_name(),
869                   '(&u.f_', self.base.base.name, ', insn);\n')
870            extracted = True
871
872        # Attempt to aid the compiler in producing compact switch statements.
873        # If the bits in the mask are contiguous, extract them.
874        sh = is_contiguous(self.thismask)
875        if sh > 0:
876            # Propagate SH down into the local functions.
877            def str_switch(b, sh=sh):
878                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
879
880            def str_case(b, sh=sh):
881                return '0x{0:x}'.format(b >> sh)
882        else:
883            def str_switch(b):
884                return 'insn & 0x{0:08x}'.format(b)
885
886            def str_case(b):
887                return '0x{0:08x}'.format(b)
888
889        output(ind, 'switch (', str_switch(self.thismask), ') {\n')
890        for b, s in sorted(self.subs):
891            assert (self.thismask & ~s.fixedmask) == 0
892            innermask = outermask | self.thismask
893            innerbits = outerbits | b
894            output(ind, 'case ', str_case(b), ':\n')
895            output(ind, '    /* ',
896                   str_match_bits(innerbits, innermask), ' */\n')
897            s.output_code(i + 4, extracted, innerbits, innermask)
898        output(ind, '}\n')
899        output(ind, 'return false;\n')
900# end Tree
901
902
903def build_tree(pats, outerbits, outermask):
904    # Find the intersection of all remaining fixedmask.
905    innermask = ~outermask
906    for i in pats:
907        innermask &= i.fixedmask
908
909    if innermask == 0:
910        pnames = []
911        for p in pats:
912            pnames.append(p.name + ':' + str(p.lineno))
913        error(pats[0].lineno, 'overlapping patterns:', pnames)
914
915    fullmask = outermask | innermask
916
917    # Sort each element of pats into the bin selected by the mask.
918    bins = {}
919    for i in pats:
920        fb = i.fixedbits & innermask
921        if fb in bins:
922            bins[fb].append(i)
923        else:
924            bins[fb] = [i]
925
926    # We must recurse if any bin has more than one element or if
927    # the single element in the bin has not been fully matched.
928    t = Tree(fullmask, innermask)
929
930    for b, l in bins.items():
931        s = l[0]
932        if len(l) > 1 or s.fixedmask & ~fullmask != 0:
933            s = build_tree(l, b | outerbits, fullmask)
934        t.subs.append((b, s))
935
936    return t
937# end build_tree
938
939
940def prop_format(tree):
941    """Propagate Format objects into the decode tree"""
942
943    # Depth first search.
944    for (b, s) in tree.subs:
945        if isinstance(s, Tree):
946            prop_format(s)
947
948    # If all entries in SUBS have the same format, then
949    # propagate that into the tree.
950    f = None
951    for (b, s) in tree.subs:
952        if f is None:
953            f = s.base
954            if f is None:
955                return
956        if f is not s.base:
957            return
958    tree.base = f
959# end prop_format
960
961
962def main():
963    global arguments
964    global formats
965    global patterns
966    global translate_scope
967    global translate_prefix
968    global output_fd
969    global output_file
970    global input_file
971    global insnwidth
972    global insntype
973    global insnmask
974
975    decode_function = 'decode'
976    decode_scope = 'static '
977
978    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=']
979    try:
980        (opts, args) = getopt.getopt(sys.argv[1:], 'o:w:', long_opts)
981    except getopt.GetoptError as err:
982        error(0, err)
983    for o, a in opts:
984        if o in ('-o', '--output'):
985            output_file = a
986        elif o == '--decode':
987            decode_function = a
988            decode_scope = ''
989        elif o == '--translate':
990            translate_prefix = a
991            translate_scope = ''
992        elif o in ('-w', '--insnwidth'):
993            insnwidth = int(a)
994            if insnwidth == 16:
995                insntype = 'uint16_t'
996                insnmask = 0xffff
997            elif insnwidth != 32:
998                error(0, 'cannot handle insns of width', insnwidth)
999        else:
1000            assert False, 'unhandled option'
1001
1002    if len(args) < 1:
1003        error(0, 'missing input file')
1004    input_file = args[0]
1005    f = open(input_file, 'r')
1006    parse_file(f)
1007    f.close()
1008
1009    t = build_tree(patterns, 0, 0)
1010    prop_format(t)
1011
1012    if output_file:
1013        output_fd = open(output_file, 'w')
1014    else:
1015        output_fd = sys.stdout
1016
1017    output_autogen()
1018    for n in sorted(arguments.keys()):
1019        f = arguments[n]
1020        f.output_def()
1021
1022    # A single translate function can be invoked for different patterns.
1023    # Make sure that the argument sets are the same, and declare the
1024    # function only once.
1025    out_pats = {}
1026    for i in patterns:
1027        if i.name in out_pats:
1028            p = out_pats[i.name]
1029            if i.base.base != p.base.base:
1030                error(0, i.name, ' has conflicting argument sets')
1031        else:
1032            i.output_decl()
1033            out_pats[i.name] = i
1034    output('\n')
1035
1036    for n in sorted(formats.keys()):
1037        f = formats[n]
1038        f.output_extract()
1039
1040    output(decode_scope, 'bool ', decode_function,
1041           '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1042
1043    i4 = str_indent(4)
1044    output(i4, 'union {\n')
1045    for n in sorted(arguments.keys()):
1046        f = arguments[n]
1047        output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1048    output(i4, '} u;\n\n')
1049
1050    t.output_code(4, False, 0, 0)
1051
1052    output('}\n')
1053
1054    if output_file:
1055        output_fd.close()
1056# end main
1057
1058
1059if __name__ == '__main__':
1060    main()
1061