xref: /openbmc/qemu/scripts/decodetree.py (revision 7e62609353b88d9aeee9715b534588af351075af)
1#!/usr/bin/env python3
2# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
7# version 2.1 of the License, or (at your option) any later version.
8#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
20# See the syntax and semantics in docs/devel/decodetree.rst.
21#
22
23import io
24import os
25import re
26import sys
27import getopt
28
29insnwidth = 32
30bitop_width = 32
31insnmask = 0xffffffff
32variablewidth = False
33fields = {}
34arguments = {}
35formats = {}
36allpatterns = []
37anyextern = False
38testforerror = False
39
40translate_prefix = 'trans'
41translate_scope = 'static '
42input_file = ''
43output_file = None
44output_fd = None
45insntype = 'uint32_t'
46decode_function = 'decode'
47
48# An identifier for C.
49re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
50
51# Identifiers for Arguments, Fields, Formats and Patterns.
52re_arg_ident = '&[a-zA-Z0-9_]*'
53re_fld_ident = '%[a-zA-Z0-9_]*'
54re_fmt_ident = '@[a-zA-Z0-9_]*'
55re_pat_ident = '[a-zA-Z0-9_]*'
56
57def error_with_file(file, lineno, *args):
58    """Print an error message from file:line and args and exit."""
59    global output_file
60    global output_fd
61
62    prefix = ''
63    if file:
64        prefix += f'{file}:'
65    if lineno:
66        prefix += f'{lineno}:'
67    if prefix:
68        prefix += ' '
69    print(prefix, end='error: ', file=sys.stderr)
70    print(*args, file=sys.stderr)
71
72    if output_file and output_fd:
73        output_fd.close()
74        # Do not try to remove e.g. -o /dev/null
75        if not output_file.startswith("/dev"):
76            try:
77                os.remove(output_file)
78            except PermissionError:
79                pass
80    exit(0 if testforerror else 1)
81# end error_with_file
82
83
84def error(lineno, *args):
85    error_with_file(input_file, lineno, *args)
86# end error
87
88
89def output(*args):
90    global output_fd
91    for a in args:
92        output_fd.write(a)
93
94
95def output_autogen():
96    output('/* This file is autogenerated by scripts/decodetree.py.  */\n\n')
97
98
99def str_indent(c):
100    """Return a string with C spaces"""
101    return ' ' * c
102
103
104def str_fields(fields):
105    """Return a string uniquely identifying FIELDS"""
106    r = ''
107    for n in sorted(fields.keys()):
108        r += '_' + n
109    return r[1:]
110
111
112def whex(val):
113    """Return a hex string for val padded for insnwidth"""
114    global insnwidth
115    return f'0x{val:0{insnwidth // 4}x}'
116
117
118def whexC(val):
119    """Return a hex string for val padded for insnwidth,
120       and with the proper suffix for a C constant."""
121    suffix = ''
122    if val >= 0x100000000:
123        suffix = 'ull'
124    elif val >= 0x80000000:
125        suffix = 'u'
126    return whex(val) + suffix
127
128
129def str_match_bits(bits, mask):
130    """Return a string pretty-printing BITS/MASK"""
131    global insnwidth
132
133    i = 1 << (insnwidth - 1)
134    space = 0x01010100
135    r = ''
136    while i != 0:
137        if i & mask:
138            if i & bits:
139                r += '1'
140            else:
141                r += '0'
142        else:
143            r += '.'
144        if i & space:
145            r += ' '
146        i >>= 1
147    return r
148
149
150def is_pow2(x):
151    """Return true iff X is equal to a power of 2."""
152    return (x & (x - 1)) == 0
153
154
155def ctz(x):
156    """Return the number of times 2 factors into X."""
157    assert x != 0
158    r = 0
159    while ((x >> r) & 1) == 0:
160        r += 1
161    return r
162
163
164def is_contiguous(bits):
165    if bits == 0:
166        return -1
167    shift = ctz(bits)
168    if is_pow2((bits >> shift) + 1):
169        return shift
170    else:
171        return -1
172
173
174def eq_fields_for_args(flds_a, arg):
175    if len(flds_a) != len(arg.fields):
176        return False
177    # Only allow inference on default types
178    for t in arg.types:
179        if t != 'int':
180            return False
181    for k, a in flds_a.items():
182        if k not in arg.fields:
183            return False
184    return True
185
186
187def eq_fields_for_fmts(flds_a, flds_b):
188    if len(flds_a) != len(flds_b):
189        return False
190    for k, a in flds_a.items():
191        if k not in flds_b:
192            return False
193        b = flds_b[k]
194        if a.__class__ != b.__class__ or a != b:
195            return False
196    return True
197
198
199class Field:
200    """Class representing a simple instruction field"""
201    def __init__(self, sign, pos, len):
202        self.sign = sign
203        self.pos = pos
204        self.len = len
205        self.mask = ((1 << len) - 1) << pos
206
207    def __str__(self):
208        if self.sign:
209            s = 's'
210        else:
211            s = ''
212        return str(self.pos) + ':' + s + str(self.len)
213
214    def str_extract(self):
215        global bitop_width
216        s = 's' if self.sign else ''
217        return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
218
219    def __eq__(self, other):
220        return self.sign == other.sign and self.mask == other.mask
221
222    def __ne__(self, other):
223        return not self.__eq__(other)
224# end Field
225
226
227class MultiField:
228    """Class representing a compound instruction field"""
229    def __init__(self, subs, mask):
230        self.subs = subs
231        self.sign = subs[0].sign
232        self.mask = mask
233
234    def __str__(self):
235        return str(self.subs)
236
237    def str_extract(self):
238        global bitop_width
239        ret = '0'
240        pos = 0
241        for f in reversed(self.subs):
242            ext = f.str_extract()
243            if pos == 0:
244                ret = ext
245            else:
246                ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
247            pos += f.len
248        return ret
249
250    def __ne__(self, other):
251        if len(self.subs) != len(other.subs):
252            return True
253        for a, b in zip(self.subs, other.subs):
254            if a.__class__ != b.__class__ or a != b:
255                return True
256        return False
257
258    def __eq__(self, other):
259        return not self.__ne__(other)
260# end MultiField
261
262
263class ConstField:
264    """Class representing an argument field with constant value"""
265    def __init__(self, value):
266        self.value = value
267        self.mask = 0
268        self.sign = value < 0
269
270    def __str__(self):
271        return str(self.value)
272
273    def str_extract(self):
274        return str(self.value)
275
276    def __cmp__(self, other):
277        return self.value - other.value
278# end ConstField
279
280
281class FunctionField:
282    """Class representing a field passed through a function"""
283    def __init__(self, func, base):
284        self.mask = base.mask
285        self.sign = base.sign
286        self.base = base
287        self.func = func
288
289    def __str__(self):
290        return self.func + '(' + str(self.base) + ')'
291
292    def str_extract(self):
293        return self.func + '(ctx, ' + self.base.str_extract() + ')'
294
295    def __eq__(self, other):
296        return self.func == other.func and self.base == other.base
297
298    def __ne__(self, other):
299        return not self.__eq__(other)
300# end FunctionField
301
302
303class ParameterField:
304    """Class representing a pseudo-field read from a function"""
305    def __init__(self, func):
306        self.mask = 0
307        self.sign = 0
308        self.func = func
309
310    def __str__(self):
311        return self.func
312
313    def str_extract(self):
314        return self.func + '(ctx)'
315
316    def __eq__(self, other):
317        return self.func == other.func
318
319    def __ne__(self, other):
320        return not self.__eq__(other)
321# end ParameterField
322
323
324class Arguments:
325    """Class representing the extracted fields of a format"""
326    def __init__(self, nm, flds, types, extern):
327        self.name = nm
328        self.extern = extern
329        self.fields = flds
330        self.types = types
331
332    def __str__(self):
333        return self.name + ' ' + str(self.fields)
334
335    def struct_name(self):
336        return 'arg_' + self.name
337
338    def output_def(self):
339        if not self.extern:
340            output('typedef struct {\n')
341            for (n, t) in zip(self.fields, self.types):
342                output(f'    {t} {n};\n')
343            output('} ', self.struct_name(), ';\n\n')
344# end Arguments
345
346
347class General:
348    """Common code between instruction formats and instruction patterns"""
349    def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
350        self.name = name
351        self.file = input_file
352        self.lineno = lineno
353        self.base = base
354        self.fixedbits = fixb
355        self.fixedmask = fixm
356        self.undefmask = udfm
357        self.fieldmask = fldm
358        self.fields = flds
359        self.width = w
360
361    def __str__(self):
362        return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
363
364    def str1(self, i):
365        return str_indent(i) + self.__str__()
366# end General
367
368
369class Format(General):
370    """Class representing an instruction format"""
371
372    def extract_name(self):
373        global decode_function
374        return decode_function + '_extract_' + self.name
375
376    def output_extract(self):
377        output('static void ', self.extract_name(), '(DisasContext *ctx, ',
378               self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
379        for n, f in self.fields.items():
380            output('    a->', n, ' = ', f.str_extract(), ';\n')
381        output('}\n\n')
382# end Format
383
384
385class Pattern(General):
386    """Class representing an instruction pattern"""
387
388    def output_decl(self):
389        global translate_scope
390        global translate_prefix
391        output('typedef ', self.base.base.struct_name(),
392               ' arg_', self.name, ';\n')
393        output(translate_scope, 'bool ', translate_prefix, '_', self.name,
394               '(DisasContext *ctx, arg_', self.name, ' *a);\n')
395
396    def output_code(self, i, extracted, outerbits, outermask):
397        global translate_prefix
398        ind = str_indent(i)
399        arg = self.base.base.name
400        output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
401        if not extracted:
402            output(ind, self.base.extract_name(),
403                   '(ctx, &u.f_', arg, ', insn);\n')
404        for n, f in self.fields.items():
405            output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
406        output(ind, 'if (', translate_prefix, '_', self.name,
407               '(ctx, &u.f_', arg, ')) return true;\n')
408
409    # Normal patterns do not have children.
410    def build_tree(self):
411        return
412    def prop_masks(self):
413        return
414    def prop_format(self):
415        return
416    def prop_width(self):
417        return
418
419# end Pattern
420
421
422class MultiPattern(General):
423    """Class representing a set of instruction patterns"""
424
425    def __init__(self, lineno):
426        self.file = input_file
427        self.lineno = lineno
428        self.pats = []
429        self.base = None
430        self.fixedbits = 0
431        self.fixedmask = 0
432        self.undefmask = 0
433        self.width = None
434
435    def __str__(self):
436        r = 'group'
437        if self.fixedbits is not None:
438            r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
439        return r
440
441    def output_decl(self):
442        for p in self.pats:
443            p.output_decl()
444
445    def prop_masks(self):
446        global insnmask
447
448        fixedmask = insnmask
449        undefmask = insnmask
450
451        # Collect fixedmask/undefmask for all of the children.
452        for p in self.pats:
453            p.prop_masks()
454            fixedmask &= p.fixedmask
455            undefmask &= p.undefmask
456
457        # Widen fixedmask until all fixedbits match
458        repeat = True
459        fixedbits = 0
460        while repeat and fixedmask != 0:
461            fixedbits = None
462            for p in self.pats:
463                thisbits = p.fixedbits & fixedmask
464                if fixedbits is None:
465                    fixedbits = thisbits
466                elif fixedbits != thisbits:
467                    fixedmask &= ~(fixedbits ^ thisbits)
468                    break
469            else:
470                repeat = False
471
472        self.fixedbits = fixedbits
473        self.fixedmask = fixedmask
474        self.undefmask = undefmask
475
476    def build_tree(self):
477        for p in self.pats:
478            p.build_tree()
479
480    def prop_format(self):
481        for p in self.pats:
482            p.prop_format()
483
484    def prop_width(self):
485        width = None
486        for p in self.pats:
487            p.prop_width()
488            if width is None:
489                width = p.width
490            elif width != p.width:
491                error_with_file(self.file, self.lineno,
492                                'width mismatch in patterns within braces')
493        self.width = width
494
495# end MultiPattern
496
497
498class IncMultiPattern(MultiPattern):
499    """Class representing an overlapping set of instruction patterns"""
500
501    def output_code(self, i, extracted, outerbits, outermask):
502        global translate_prefix
503        ind = str_indent(i)
504        for p in self.pats:
505            if outermask != p.fixedmask:
506                innermask = p.fixedmask & ~outermask
507                innerbits = p.fixedbits & ~outermask
508                output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
509                output(ind, f'    /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
510                p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
511                output(ind, '}\n')
512            else:
513                p.output_code(i, extracted, p.fixedbits, p.fixedmask)
514
515    def build_tree(self):
516        if not self.pats:
517            error_with_file(self.file, self.lineno, 'empty pattern group')
518        super().build_tree()
519
520#end IncMultiPattern
521
522
523class Tree:
524    """Class representing a node in a decode tree"""
525
526    def __init__(self, fm, tm):
527        self.fixedmask = fm
528        self.thismask = tm
529        self.subs = []
530        self.base = None
531
532    def str1(self, i):
533        ind = str_indent(i)
534        r = ind + whex(self.fixedmask)
535        if self.format:
536            r += ' ' + self.format.name
537        r += ' [\n'
538        for (b, s) in self.subs:
539            r += ind + f'  {whex(b)}:\n'
540            r += s.str1(i + 4) + '\n'
541        r += ind + ']'
542        return r
543
544    def __str__(self):
545        return self.str1(0)
546
547    def output_code(self, i, extracted, outerbits, outermask):
548        ind = str_indent(i)
549
550        # If we identified all nodes below have the same format,
551        # extract the fields now.
552        if not extracted and self.base:
553            output(ind, self.base.extract_name(),
554                   '(ctx, &u.f_', self.base.base.name, ', insn);\n')
555            extracted = True
556
557        # Attempt to aid the compiler in producing compact switch statements.
558        # If the bits in the mask are contiguous, extract them.
559        sh = is_contiguous(self.thismask)
560        if sh > 0:
561            # Propagate SH down into the local functions.
562            def str_switch(b, sh=sh):
563                return f'(insn >> {sh}) & {b >> sh:#x}'
564
565            def str_case(b, sh=sh):
566                return hex(b >> sh)
567        else:
568            def str_switch(b):
569                return f'insn & {whexC(b)}'
570
571            def str_case(b):
572                return whexC(b)
573
574        output(ind, 'switch (', str_switch(self.thismask), ') {\n')
575        for b, s in sorted(self.subs):
576            assert (self.thismask & ~s.fixedmask) == 0
577            innermask = outermask | self.thismask
578            innerbits = outerbits | b
579            output(ind, 'case ', str_case(b), ':\n')
580            output(ind, '    /* ',
581                   str_match_bits(innerbits, innermask), ' */\n')
582            s.output_code(i + 4, extracted, innerbits, innermask)
583            output(ind, '    break;\n')
584        output(ind, '}\n')
585# end Tree
586
587
588class ExcMultiPattern(MultiPattern):
589    """Class representing a non-overlapping set of instruction patterns"""
590
591    def output_code(self, i, extracted, outerbits, outermask):
592        # Defer everything to our decomposed Tree node
593        self.tree.output_code(i, extracted, outerbits, outermask)
594
595    @staticmethod
596    def __build_tree(pats, outerbits, outermask):
597        # Find the intersection of all remaining fixedmask.
598        innermask = ~outermask & insnmask
599        for i in pats:
600            innermask &= i.fixedmask
601
602        if innermask == 0:
603            # Edge condition: One pattern covers the entire insnmask
604            if len(pats) == 1:
605                t = Tree(outermask, innermask)
606                t.subs.append((0, pats[0]))
607                return t
608
609            text = 'overlapping patterns:'
610            for p in pats:
611                text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
612            error_with_file(pats[0].file, pats[0].lineno, text)
613
614        fullmask = outermask | innermask
615
616        # Sort each element of pats into the bin selected by the mask.
617        bins = {}
618        for i in pats:
619            fb = i.fixedbits & innermask
620            if fb in bins:
621                bins[fb].append(i)
622            else:
623                bins[fb] = [i]
624
625        # We must recurse if any bin has more than one element or if
626        # the single element in the bin has not been fully matched.
627        t = Tree(fullmask, innermask)
628
629        for b, l in bins.items():
630            s = l[0]
631            if len(l) > 1 or s.fixedmask & ~fullmask != 0:
632                s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
633            t.subs.append((b, s))
634
635        return t
636
637    def build_tree(self):
638        super().build_tree()
639        self.tree = self.__build_tree(self.pats, self.fixedbits,
640                                      self.fixedmask)
641
642    @staticmethod
643    def __prop_format(tree):
644        """Propagate Format objects into the decode tree"""
645
646        # Depth first search.
647        for (b, s) in tree.subs:
648            if isinstance(s, Tree):
649                ExcMultiPattern.__prop_format(s)
650
651        # If all entries in SUBS have the same format, then
652        # propagate that into the tree.
653        f = None
654        for (b, s) in tree.subs:
655            if f is None:
656                f = s.base
657                if f is None:
658                    return
659            if f is not s.base:
660                return
661        tree.base = f
662
663    def prop_format(self):
664        super().prop_format()
665        self.__prop_format(self.tree)
666
667# end ExcMultiPattern
668
669
670def parse_field(lineno, name, toks):
671    """Parse one instruction field from TOKS at LINENO"""
672    global fields
673    global insnwidth
674
675    # A "simple" field will have only one entry;
676    # a "multifield" will have several.
677    subs = []
678    width = 0
679    func = None
680    for t in toks:
681        if re.match('^!function=', t):
682            if func:
683                error(lineno, 'duplicate function')
684            func = t.split('=')
685            func = func[1]
686            continue
687
688        if re.fullmatch('[0-9]+:s[0-9]+', t):
689            # Signed field extract
690            subtoks = t.split(':s')
691            sign = True
692        elif re.fullmatch('[0-9]+:[0-9]+', t):
693            # Unsigned field extract
694            subtoks = t.split(':')
695            sign = False
696        else:
697            error(lineno, f'invalid field token "{t}"')
698        po = int(subtoks[0])
699        le = int(subtoks[1])
700        if po + le > insnwidth:
701            error(lineno, f'field {t} too large')
702        f = Field(sign, po, le)
703        subs.append(f)
704        width += le
705
706    if width > insnwidth:
707        error(lineno, 'field too large')
708    if len(subs) == 0:
709        if func:
710            f = ParameterField(func)
711        else:
712            error(lineno, 'field with no value')
713    else:
714        if len(subs) == 1:
715            f = subs[0]
716        else:
717            mask = 0
718            for s in subs:
719                if mask & s.mask:
720                    error(lineno, 'field components overlap')
721                mask |= s.mask
722            f = MultiField(subs, mask)
723        if func:
724            f = FunctionField(func, f)
725
726    if name in fields:
727        error(lineno, 'duplicate field', name)
728    fields[name] = f
729# end parse_field
730
731
732def parse_arguments(lineno, name, toks):
733    """Parse one argument set from TOKS at LINENO"""
734    global arguments
735    global re_C_ident
736    global anyextern
737
738    flds = []
739    types = []
740    extern = False
741    for n in toks:
742        if re.fullmatch('!extern', n):
743            extern = True
744            anyextern = True
745            continue
746        if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
747            (n, t) = n.split(':')
748        elif re.fullmatch(re_C_ident, n):
749            t = 'int'
750        else:
751            error(lineno, f'invalid argument set token "{n}"')
752        if n in flds:
753            error(lineno, f'duplicate argument "{n}"')
754        flds.append(n)
755        types.append(t)
756
757    if name in arguments:
758        error(lineno, 'duplicate argument set', name)
759    arguments[name] = Arguments(name, flds, types, extern)
760# end parse_arguments
761
762
763def lookup_field(lineno, name):
764    global fields
765    if name in fields:
766        return fields[name]
767    error(lineno, 'undefined field', name)
768
769
770def add_field(lineno, flds, new_name, f):
771    if new_name in flds:
772        error(lineno, 'duplicate field', new_name)
773    flds[new_name] = f
774    return flds
775
776
777def add_field_byname(lineno, flds, new_name, old_name):
778    return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
779
780
781def infer_argument_set(flds):
782    global arguments
783    global decode_function
784
785    for arg in arguments.values():
786        if eq_fields_for_args(flds, arg):
787            return arg
788
789    name = decode_function + str(len(arguments))
790    arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
791    arguments[name] = arg
792    return arg
793
794
795def infer_format(arg, fieldmask, flds, width):
796    global arguments
797    global formats
798    global decode_function
799
800    const_flds = {}
801    var_flds = {}
802    for n, c in flds.items():
803        if c is ConstField:
804            const_flds[n] = c
805        else:
806            var_flds[n] = c
807
808    # Look for an existing format with the same argument set and fields
809    for fmt in formats.values():
810        if arg and fmt.base != arg:
811            continue
812        if fieldmask != fmt.fieldmask:
813            continue
814        if width != fmt.width:
815            continue
816        if not eq_fields_for_fmts(flds, fmt.fields):
817            continue
818        return (fmt, const_flds)
819
820    name = decode_function + '_Fmt_' + str(len(formats))
821    if not arg:
822        arg = infer_argument_set(flds)
823
824    fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
825    formats[name] = fmt
826
827    return (fmt, const_flds)
828# end infer_format
829
830
831def parse_generic(lineno, parent_pat, name, toks):
832    """Parse one instruction format from TOKS at LINENO"""
833    global fields
834    global arguments
835    global formats
836    global allpatterns
837    global re_arg_ident
838    global re_fld_ident
839    global re_fmt_ident
840    global re_C_ident
841    global insnwidth
842    global insnmask
843    global variablewidth
844
845    is_format = parent_pat is None
846
847    fixedmask = 0
848    fixedbits = 0
849    undefmask = 0
850    width = 0
851    flds = {}
852    arg = None
853    fmt = None
854    for t in toks:
855        # '&Foo' gives a format an explicit argument set.
856        if re.fullmatch(re_arg_ident, t):
857            tt = t[1:]
858            if arg:
859                error(lineno, 'multiple argument sets')
860            if tt in arguments:
861                arg = arguments[tt]
862            else:
863                error(lineno, 'undefined argument set', t)
864            continue
865
866        # '@Foo' gives a pattern an explicit format.
867        if re.fullmatch(re_fmt_ident, t):
868            tt = t[1:]
869            if fmt:
870                error(lineno, 'multiple formats')
871            if tt in formats:
872                fmt = formats[tt]
873            else:
874                error(lineno, 'undefined format', t)
875            continue
876
877        # '%Foo' imports a field.
878        if re.fullmatch(re_fld_ident, t):
879            tt = t[1:]
880            flds = add_field_byname(lineno, flds, tt, tt)
881            continue
882
883        # 'Foo=%Bar' imports a field with a different name.
884        if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
885            (fname, iname) = t.split('=%')
886            flds = add_field_byname(lineno, flds, fname, iname)
887            continue
888
889        # 'Foo=number' sets an argument field to a constant value
890        if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
891            (fname, value) = t.split('=')
892            value = int(value)
893            flds = add_field(lineno, flds, fname, ConstField(value))
894            continue
895
896        # Pattern of 0s, 1s, dots and dashes indicate required zeros,
897        # required ones, or dont-cares.
898        if re.fullmatch('[01.-]+', t):
899            shift = len(t)
900            fms = t.replace('0', '1')
901            fms = fms.replace('.', '0')
902            fms = fms.replace('-', '0')
903            fbs = t.replace('.', '0')
904            fbs = fbs.replace('-', '0')
905            ubm = t.replace('1', '0')
906            ubm = ubm.replace('.', '0')
907            ubm = ubm.replace('-', '1')
908            fms = int(fms, 2)
909            fbs = int(fbs, 2)
910            ubm = int(ubm, 2)
911            fixedbits = (fixedbits << shift) | fbs
912            fixedmask = (fixedmask << shift) | fms
913            undefmask = (undefmask << shift) | ubm
914        # Otherwise, fieldname:fieldwidth
915        elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
916            (fname, flen) = t.split(':')
917            sign = False
918            if flen[0] == 's':
919                sign = True
920                flen = flen[1:]
921            shift = int(flen, 10)
922            if shift + width > insnwidth:
923                error(lineno, f'field {fname} exceeds insnwidth')
924            f = Field(sign, insnwidth - width - shift, shift)
925            flds = add_field(lineno, flds, fname, f)
926            fixedbits <<= shift
927            fixedmask <<= shift
928            undefmask <<= shift
929        else:
930            error(lineno, f'invalid token "{t}"')
931        width += shift
932
933    if variablewidth and width < insnwidth and width % 8 == 0:
934        shift = insnwidth - width
935        fixedbits <<= shift
936        fixedmask <<= shift
937        undefmask <<= shift
938        undefmask |= (1 << shift) - 1
939
940    # We should have filled in all of the bits of the instruction.
941    elif not (is_format and width == 0) and width != insnwidth:
942        error(lineno, f'definition has {width} bits')
943
944    # Do not check for fields overlapping fields; one valid usage
945    # is to be able to duplicate fields via import.
946    fieldmask = 0
947    for f in flds.values():
948        fieldmask |= f.mask
949
950    # Fix up what we've parsed to match either a format or a pattern.
951    if is_format:
952        # Formats cannot reference formats.
953        if fmt:
954            error(lineno, 'format referencing format')
955        # If an argument set is given, then there should be no fields
956        # without a place to store it.
957        if arg:
958            for f in flds.keys():
959                if f not in arg.fields:
960                    error(lineno, f'field {f} not in argument set {arg.name}')
961        else:
962            arg = infer_argument_set(flds)
963        if name in formats:
964            error(lineno, 'duplicate format name', name)
965        fmt = Format(name, lineno, arg, fixedbits, fixedmask,
966                     undefmask, fieldmask, flds, width)
967        formats[name] = fmt
968    else:
969        # Patterns can reference a format ...
970        if fmt:
971            # ... but not an argument simultaneously
972            if arg:
973                error(lineno, 'pattern specifies both format and argument set')
974            if fixedmask & fmt.fixedmask:
975                error(lineno, 'pattern fixed bits overlap format fixed bits')
976            if width != fmt.width:
977                error(lineno, 'pattern uses format of different width')
978            fieldmask |= fmt.fieldmask
979            fixedbits |= fmt.fixedbits
980            fixedmask |= fmt.fixedmask
981            undefmask |= fmt.undefmask
982        else:
983            (fmt, flds) = infer_format(arg, fieldmask, flds, width)
984        arg = fmt.base
985        for f in flds.keys():
986            if f not in arg.fields:
987                error(lineno, f'field {f} not in argument set {arg.name}')
988            if f in fmt.fields.keys():
989                error(lineno, f'field {f} set by format and pattern')
990        for f in arg.fields:
991            if f not in flds.keys() and f not in fmt.fields.keys():
992                error(lineno, f'field {f} not initialized')
993        pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
994                      undefmask, fieldmask, flds, width)
995        parent_pat.pats.append(pat)
996        allpatterns.append(pat)
997
998    # Validate the masks that we have assembled.
999    if fieldmask & fixedmask:
1000        error(lineno, 'fieldmask overlaps fixedmask ',
1001              f'({whex(fieldmask)} & {whex(fixedmask)})')
1002    if fieldmask & undefmask:
1003        error(lineno, 'fieldmask overlaps undefmask ',
1004              f'({whex(fieldmask)} & {whex(undefmask)})')
1005    if fixedmask & undefmask:
1006        error(lineno, 'fixedmask overlaps undefmask ',
1007              f'({whex(fixedmask)} & {whex(undefmask)})')
1008    if not is_format:
1009        allbits = fieldmask | fixedmask | undefmask
1010        if allbits != insnmask:
1011            error(lineno, 'bits left unspecified ',
1012                  f'({whex(allbits ^ insnmask)})')
1013# end parse_general
1014
1015
1016def parse_file(f, parent_pat):
1017    """Parse all of the patterns within a file"""
1018    global re_arg_ident
1019    global re_fld_ident
1020    global re_fmt_ident
1021    global re_pat_ident
1022
1023    # Read all of the lines of the file.  Concatenate lines
1024    # ending in backslash; discard empty lines and comments.
1025    toks = []
1026    lineno = 0
1027    nesting = 0
1028    nesting_pats = []
1029
1030    for line in f:
1031        lineno += 1
1032
1033        # Expand and strip spaces, to find indent.
1034        line = line.rstrip()
1035        line = line.expandtabs()
1036        len1 = len(line)
1037        line = line.lstrip()
1038        len2 = len(line)
1039
1040        # Discard comments
1041        end = line.find('#')
1042        if end >= 0:
1043            line = line[:end]
1044
1045        t = line.split()
1046        if len(toks) != 0:
1047            # Next line after continuation
1048            toks.extend(t)
1049        else:
1050            # Allow completely blank lines.
1051            if len1 == 0:
1052                continue
1053            indent = len1 - len2
1054            # Empty line due to comment.
1055            if len(t) == 0:
1056                # Indentation must be correct, even for comment lines.
1057                if indent != nesting:
1058                    error(lineno, 'indentation ', indent, ' != ', nesting)
1059                continue
1060            start_lineno = lineno
1061            toks = t
1062
1063        # Continuation?
1064        if toks[-1] == '\\':
1065            toks.pop()
1066            continue
1067
1068        name = toks[0]
1069        del toks[0]
1070
1071        # End nesting?
1072        if name == '}' or name == ']':
1073            if len(toks) != 0:
1074                error(start_lineno, 'extra tokens after close brace')
1075
1076            # Make sure { } and [ ] nest properly.
1077            if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1078                error(lineno, 'mismatched close brace')
1079
1080            try:
1081                parent_pat = nesting_pats.pop()
1082            except:
1083                error(lineno, 'extra close brace')
1084
1085            nesting -= 2
1086            if indent != nesting:
1087                error(lineno, 'indentation ', indent, ' != ', nesting)
1088
1089            toks = []
1090            continue
1091
1092        # Everything else should have current indentation.
1093        if indent != nesting:
1094            error(start_lineno, 'indentation ', indent, ' != ', nesting)
1095
1096        # Start nesting?
1097        if name == '{' or name == '[':
1098            if len(toks) != 0:
1099                error(start_lineno, 'extra tokens after open brace')
1100
1101            if name == '{':
1102                nested_pat = IncMultiPattern(start_lineno)
1103            else:
1104                nested_pat = ExcMultiPattern(start_lineno)
1105            parent_pat.pats.append(nested_pat)
1106            nesting_pats.append(parent_pat)
1107            parent_pat = nested_pat
1108
1109            nesting += 2
1110            toks = []
1111            continue
1112
1113        # Determine the type of object needing to be parsed.
1114        if re.fullmatch(re_fld_ident, name):
1115            parse_field(start_lineno, name[1:], toks)
1116        elif re.fullmatch(re_arg_ident, name):
1117            parse_arguments(start_lineno, name[1:], toks)
1118        elif re.fullmatch(re_fmt_ident, name):
1119            parse_generic(start_lineno, None, name[1:], toks)
1120        elif re.fullmatch(re_pat_ident, name):
1121            parse_generic(start_lineno, parent_pat, name, toks)
1122        else:
1123            error(lineno, f'invalid token "{name}"')
1124        toks = []
1125
1126    if nesting != 0:
1127        error(lineno, 'missing close brace')
1128# end parse_file
1129
1130
1131class SizeTree:
1132    """Class representing a node in a size decode tree"""
1133
1134    def __init__(self, m, w):
1135        self.mask = m
1136        self.subs = []
1137        self.base = None
1138        self.width = w
1139
1140    def str1(self, i):
1141        ind = str_indent(i)
1142        r = ind + whex(self.mask) + ' [\n'
1143        for (b, s) in self.subs:
1144            r += ind + f'  {whex(b)}:\n'
1145            r += s.str1(i + 4) + '\n'
1146        r += ind + ']'
1147        return r
1148
1149    def __str__(self):
1150        return self.str1(0)
1151
1152    def output_code(self, i, extracted, outerbits, outermask):
1153        ind = str_indent(i)
1154
1155        # If we need to load more bytes to test, do so now.
1156        if extracted < self.width:
1157            output(ind, f'insn = {decode_function}_load_bytes',
1158                   f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1159            extracted = self.width
1160
1161        # Attempt to aid the compiler in producing compact switch statements.
1162        # If the bits in the mask are contiguous, extract them.
1163        sh = is_contiguous(self.mask)
1164        if sh > 0:
1165            # Propagate SH down into the local functions.
1166            def str_switch(b, sh=sh):
1167                return f'(insn >> {sh}) & {b >> sh:#x}'
1168
1169            def str_case(b, sh=sh):
1170                return hex(b >> sh)
1171        else:
1172            def str_switch(b):
1173                return f'insn & {whexC(b)}'
1174
1175            def str_case(b):
1176                return whexC(b)
1177
1178        output(ind, 'switch (', str_switch(self.mask), ') {\n')
1179        for b, s in sorted(self.subs):
1180            innermask = outermask | self.mask
1181            innerbits = outerbits | b
1182            output(ind, 'case ', str_case(b), ':\n')
1183            output(ind, '    /* ',
1184                   str_match_bits(innerbits, innermask), ' */\n')
1185            s.output_code(i + 4, extracted, innerbits, innermask)
1186        output(ind, '}\n')
1187        output(ind, 'return insn;\n')
1188# end SizeTree
1189
1190class SizeLeaf:
1191    """Class representing a leaf node in a size decode tree"""
1192
1193    def __init__(self, m, w):
1194        self.mask = m
1195        self.width = w
1196
1197    def str1(self, i):
1198        return str_indent(i) + whex(self.mask)
1199
1200    def __str__(self):
1201        return self.str1(0)
1202
1203    def output_code(self, i, extracted, outerbits, outermask):
1204        global decode_function
1205        ind = str_indent(i)
1206
1207        # If we need to load more bytes, do so now.
1208        if extracted < self.width:
1209            output(ind, f'insn = {decode_function}_load_bytes',
1210                   f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1211            extracted = self.width
1212        output(ind, 'return insn;\n')
1213# end SizeLeaf
1214
1215
1216def build_size_tree(pats, width, outerbits, outermask):
1217    global insnwidth
1218
1219    # Collect the mask of bits that are fixed in this width
1220    innermask = 0xff << (insnwidth - width)
1221    innermask &= ~outermask
1222    minwidth = None
1223    onewidth = True
1224    for i in pats:
1225        innermask &= i.fixedmask
1226        if minwidth is None:
1227            minwidth = i.width
1228        elif minwidth != i.width:
1229            onewidth = False;
1230            if minwidth < i.width:
1231                minwidth = i.width
1232
1233    if onewidth:
1234        return SizeLeaf(innermask, minwidth)
1235
1236    if innermask == 0:
1237        if width < minwidth:
1238            return build_size_tree(pats, width + 8, outerbits, outermask)
1239
1240        pnames = []
1241        for p in pats:
1242            pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1243        error_with_file(pats[0].file, pats[0].lineno,
1244                        f'overlapping patterns size {width}:', pnames)
1245
1246    bins = {}
1247    for i in pats:
1248        fb = i.fixedbits & innermask
1249        if fb in bins:
1250            bins[fb].append(i)
1251        else:
1252            bins[fb] = [i]
1253
1254    fullmask = outermask | innermask
1255    lens = sorted(bins.keys())
1256    if len(lens) == 1:
1257        b = lens[0]
1258        return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1259
1260    r = SizeTree(innermask, width)
1261    for b, l in bins.items():
1262        s = build_size_tree(l, width, b | outerbits, fullmask)
1263        r.subs.append((b, s))
1264    return r
1265# end build_size_tree
1266
1267
1268def prop_size(tree):
1269    """Propagate minimum widths up the decode size tree"""
1270
1271    if isinstance(tree, SizeTree):
1272        min = None
1273        for (b, s) in tree.subs:
1274            width = prop_size(s)
1275            if min is None or min > width:
1276                min = width
1277        assert min >= tree.width
1278        tree.width = min
1279    else:
1280        min = tree.width
1281    return min
1282# end prop_size
1283
1284
1285def main():
1286    global arguments
1287    global formats
1288    global allpatterns
1289    global translate_scope
1290    global translate_prefix
1291    global output_fd
1292    global output_file
1293    global input_file
1294    global insnwidth
1295    global insntype
1296    global insnmask
1297    global decode_function
1298    global bitop_width
1299    global variablewidth
1300    global anyextern
1301    global testforerror
1302
1303    decode_scope = 'static '
1304
1305    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
1306                 'static-decode=', 'varinsnwidth=', 'test-for-error']
1307    try:
1308        (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
1309    except getopt.GetoptError as err:
1310        error(0, err)
1311    for o, a in opts:
1312        if o in ('-o', '--output'):
1313            output_file = a
1314        elif o == '--decode':
1315            decode_function = a
1316            decode_scope = ''
1317        elif o == '--static-decode':
1318            decode_function = a
1319        elif o == '--translate':
1320            translate_prefix = a
1321            translate_scope = ''
1322        elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1323            if o == '--varinsnwidth':
1324                variablewidth = True
1325            insnwidth = int(a)
1326            if insnwidth == 16:
1327                insntype = 'uint16_t'
1328                insnmask = 0xffff
1329            elif insnwidth == 64:
1330                insntype = 'uint64_t'
1331                insnmask = 0xffffffffffffffff
1332                bitop_width = 64
1333            elif insnwidth != 32:
1334                error(0, 'cannot handle insns of width', insnwidth)
1335        elif o == '--test-for-error':
1336            testforerror = True
1337        else:
1338            assert False, 'unhandled option'
1339
1340    if len(args) < 1:
1341        error(0, 'missing input file')
1342
1343    toppat = ExcMultiPattern(0)
1344
1345    for filename in args:
1346        input_file = filename
1347        f = open(filename, 'rt', encoding='utf-8')
1348        parse_file(f, toppat)
1349        f.close()
1350
1351    # We do not want to compute masks for toppat, because those masks
1352    # are used as a starting point for build_tree.  For toppat, we must
1353    # insist that decode begins from naught.
1354    for i in toppat.pats:
1355        i.prop_masks()
1356
1357    toppat.build_tree()
1358    toppat.prop_format()
1359
1360    if variablewidth:
1361        for i in toppat.pats:
1362            i.prop_width()
1363        stree = build_size_tree(toppat.pats, 8, 0, 0)
1364        prop_size(stree)
1365
1366    if output_file:
1367        output_fd = open(output_file, 'wt', encoding='utf-8')
1368    else:
1369        output_fd = io.TextIOWrapper(sys.stdout.buffer,
1370                                     encoding=sys.stdout.encoding,
1371                                     errors="ignore")
1372
1373    output_autogen()
1374    for n in sorted(arguments.keys()):
1375        f = arguments[n]
1376        f.output_def()
1377
1378    # A single translate function can be invoked for different patterns.
1379    # Make sure that the argument sets are the same, and declare the
1380    # function only once.
1381    #
1382    # If we're sharing formats, we're likely also sharing trans_* functions,
1383    # but we can't tell which ones.  Prevent issues from the compiler by
1384    # suppressing redundant declaration warnings.
1385    if anyextern:
1386        output("#pragma GCC diagnostic push\n",
1387               "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1388               "#ifdef __clang__\n"
1389               "#  pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1390               "#endif\n\n")
1391
1392    out_pats = {}
1393    for i in allpatterns:
1394        if i.name in out_pats:
1395            p = out_pats[i.name]
1396            if i.base.base != p.base.base:
1397                error(0, i.name, ' has conflicting argument sets')
1398        else:
1399            i.output_decl()
1400            out_pats[i.name] = i
1401    output('\n')
1402
1403    if anyextern:
1404        output("#pragma GCC diagnostic pop\n\n")
1405
1406    for n in sorted(formats.keys()):
1407        f = formats[n]
1408        f.output_extract()
1409
1410    output(decode_scope, 'bool ', decode_function,
1411           '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1412
1413    i4 = str_indent(4)
1414
1415    if len(allpatterns) != 0:
1416        output(i4, 'union {\n')
1417        for n in sorted(arguments.keys()):
1418            f = arguments[n]
1419            output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1420        output(i4, '} u;\n\n')
1421        toppat.output_code(4, False, 0, 0)
1422
1423    output(i4, 'return false;\n')
1424    output('}\n')
1425
1426    if variablewidth:
1427        output('\n', decode_scope, insntype, ' ', decode_function,
1428               '_load(DisasContext *ctx)\n{\n',
1429               '    ', insntype, ' insn = 0;\n\n')
1430        stree.output_code(4, 0, 0, 0)
1431        output('}\n')
1432
1433    if output_file:
1434        output_fd.close()
1435    exit(1 if testforerror else 0)
1436# end main
1437
1438
1439if __name__ == '__main__':
1440    main()
1441