11547a2d3STaylor Simpson#!/usr/bin/env python3
21547a2d3STaylor Simpson
31547a2d3STaylor Simpson##
41547a2d3STaylor Simpson##  Copyright (c) 2024 Taylor Simpson <ltaylorsimpson@gmail.com>
51547a2d3STaylor Simpson##
61547a2d3STaylor Simpson##  This program is free software; you can redistribute it and/or modify
71547a2d3STaylor Simpson##  it under the terms of the GNU General Public License as published by
81547a2d3STaylor Simpson##  the Free Software Foundation; either version 2 of the License, or
91547a2d3STaylor Simpson##  (at your option) any later version.
101547a2d3STaylor Simpson##
111547a2d3STaylor Simpson##  This program is distributed in the hope that it will be useful,
121547a2d3STaylor Simpson##  but WITHOUT ANY WARRANTY; without even the implied warranty of
131547a2d3STaylor Simpson##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
141547a2d3STaylor Simpson##  GNU General Public License for more details.
151547a2d3STaylor Simpson##
161547a2d3STaylor Simpson##  You should have received a copy of the GNU General Public License
171547a2d3STaylor Simpson##  along with this program; if not, see <http://www.gnu.org/licenses/>.
181547a2d3STaylor Simpson##
191547a2d3STaylor Simpson
201547a2d3STaylor Simpsonimport io
211547a2d3STaylor Simpsonimport re
221547a2d3STaylor Simpson
231547a2d3STaylor Simpsonimport sys
241547a2d3STaylor Simpsonimport textwrap
251547a2d3STaylor Simpsonimport iset
261547a2d3STaylor Simpsonimport hex_common
271547a2d3STaylor Simpson
281547a2d3STaylor Simpsonencs = {
291547a2d3STaylor Simpson    tag: "".join(reversed(iset.iset[tag]["enc"].replace(" ", "")))
301547a2d3STaylor Simpson    for tag in iset.tags
311547a2d3STaylor Simpson    if iset.iset[tag]["enc"] != "MISSING ENCODING"
321547a2d3STaylor Simpson}
331547a2d3STaylor Simpson
341547a2d3STaylor Simpson
351547a2d3STaylor Simpsonregre = re.compile(r"((?<!DUP)[MNORCPQXSGVZA])([stuvwxyzdefg]+)([.]?[LlHh]?)(\d+S?)")
361547a2d3STaylor Simpsonimmre = re.compile(r"[#]([rRsSuUm])(\d+)(?:[:](\d+))?")
371547a2d3STaylor Simpson
381547a2d3STaylor Simpson
391547a2d3STaylor Simpsondef ordered_unique(l):
401547a2d3STaylor Simpson    return sorted(set(l), key=l.index)
411547a2d3STaylor Simpson
421547a2d3STaylor Simpsonnum_registers = {"R": 32, "V": 32}
431547a2d3STaylor Simpson
441547a2d3STaylor Simpsonoperand_letters = {
451547a2d3STaylor Simpson    "P",
461547a2d3STaylor Simpson    "i",
471547a2d3STaylor Simpson    "I",
481547a2d3STaylor Simpson    "r",
491547a2d3STaylor Simpson    "s",
501547a2d3STaylor Simpson    "t",
511547a2d3STaylor Simpson    "u",
521547a2d3STaylor Simpson    "v",
531547a2d3STaylor Simpson    "w",
541547a2d3STaylor Simpson    "x",
551547a2d3STaylor Simpson    "y",
561547a2d3STaylor Simpson    "z",
571547a2d3STaylor Simpson    "d",
581547a2d3STaylor Simpson    "e",
591547a2d3STaylor Simpson    "f",
601547a2d3STaylor Simpson    "g",
611547a2d3STaylor Simpson}
621547a2d3STaylor Simpson
631547a2d3STaylor Simpson#
641547a2d3STaylor Simpson# These instructions have unused operand letters in their encoding
651547a2d3STaylor Simpson# They don't correspond to actual operands in the instruction semantics
661547a2d3STaylor Simpson# We will mark them as ignored in QEMU decodetree
671547a2d3STaylor Simpson#
681547a2d3STaylor Simpsontags_with_unused_d_encoding = {
691547a2d3STaylor Simpson    "R6_release_at_vi",
701547a2d3STaylor Simpson    "R6_release_st_vi",
711547a2d3STaylor Simpson    "S4_stored_rl_at_vi",
721547a2d3STaylor Simpson    "S4_stored_rl_st_vi",
731547a2d3STaylor Simpson    "S2_storew_rl_at_vi",
741547a2d3STaylor Simpson    "S2_stored_rl_at_vi",
751547a2d3STaylor Simpson    "S2_storew_rl_st_vi",
761547a2d3STaylor Simpson}
771547a2d3STaylor Simpson
781547a2d3STaylor Simpsontags_with_unused_t_encoding = {
791547a2d3STaylor Simpson    "R6_release_at_vi",
801547a2d3STaylor Simpson    "R6_release_st_vi",
811547a2d3STaylor Simpson}
821547a2d3STaylor Simpson
831547a2d3STaylor Simpsondef skip_tag(tag, class_to_decode):
841547a2d3STaylor Simpson    enc_class = iset.iset[tag]["enc_class"]
851547a2d3STaylor Simpson    return enc_class != class_to_decode
861547a2d3STaylor Simpson
871547a2d3STaylor Simpson
881547a2d3STaylor Simpson##
891547a2d3STaylor Simpson## Generate the QEMU decodetree file for each instruction in class_to_decode
901547a2d3STaylor Simpson##     For A2_add: Rd32=add(Rs32,Rt32)
911547a2d3STaylor Simpson##     We produce:
921547a2d3STaylor Simpson##     %A2_add_Rd   0:5
931547a2d3STaylor Simpson##     %A2_add_Rs   16:5
941547a2d3STaylor Simpson##     %A2_add_Rt   8:5
951547a2d3STaylor Simpson##     @A2_add  11110011000.......-.....---..... Rd=%A2_add_Rd Rs=%A2_add_Rs Rt=%A2_add_Rt %PP
961547a2d3STaylor Simpson##     A2_add   ..................-.....---..... @A2_add
971547a2d3STaylor Simpson##
981547a2d3STaylor Simpsondef gen_decodetree_file(f, class_to_decode):
99*f6c01009STaylor Simpson    is_subinsn = class_to_decode.startswith("SUBINSN_")
1001547a2d3STaylor Simpson    f.write(f"## DO NOT MODIFY - This file is generated by {sys.argv[0]}\n\n")
101*f6c01009STaylor Simpson    if not is_subinsn:
1021547a2d3STaylor Simpson        f.write("%PP\t14:2\n\n")
1031547a2d3STaylor Simpson    for tag in sorted(encs.keys(), key=iset.tags.index):
1041547a2d3STaylor Simpson        if skip_tag(tag, class_to_decode):
1051547a2d3STaylor Simpson            continue
1061547a2d3STaylor Simpson
1071547a2d3STaylor Simpson        enc = encs[tag]
1081547a2d3STaylor Simpson        enc_str = "".join(reversed(encs[tag]))
1091547a2d3STaylor Simpson        f.write(("#" * 80) + "\n"
1101547a2d3STaylor Simpson                f"## {tag}:\t{enc_str}\n"
1111547a2d3STaylor Simpson                "##\n")
1121547a2d3STaylor Simpson
113*f6c01009STaylor Simpson        # The subinstructions come with a 13-bit encoding, but
114*f6c01009STaylor Simpson        # decodetree.py needs 16 bits
115*f6c01009STaylor Simpson        if is_subinsn:
116*f6c01009STaylor Simpson            enc_str = "---" + enc_str
1171547a2d3STaylor Simpson
1181547a2d3STaylor Simpson        regs = ordered_unique(regre.findall(iset.iset[tag]["syntax"]))
1191547a2d3STaylor Simpson        imms = ordered_unique(immre.findall(iset.iset[tag]["syntax"]))
1201547a2d3STaylor Simpson
1211547a2d3STaylor Simpson        # Write the field definitions for the registers
1221547a2d3STaylor Simpson        for regno, reg in enumerate(regs):
1231547a2d3STaylor Simpson            reg_type, reg_id, _, reg_enc_size = reg
1241547a2d3STaylor Simpson            reg_letter = reg_id[0]
1251547a2d3STaylor Simpson            reg_num_choices = int(reg_enc_size.rstrip("S"))
1261547a2d3STaylor Simpson            reg_mapping = reg_type + "".join("_" for letter in reg_id) + \
1271547a2d3STaylor Simpson                          reg_enc_size
1281547a2d3STaylor Simpson            reg_enc_fields = re.findall(reg_letter + "+", enc)
1291547a2d3STaylor Simpson
1301547a2d3STaylor Simpson            # Check for some errors
1311547a2d3STaylor Simpson            if len(reg_enc_fields) == 0:
1321547a2d3STaylor Simpson                raise Exception(f"{tag} missing register field!")
1331547a2d3STaylor Simpson            if len(reg_enc_fields) > 1:
1341547a2d3STaylor Simpson                raise Exception(f"{tag} has split register field!")
1351547a2d3STaylor Simpson            reg_enc_field = reg_enc_fields[0]
1361547a2d3STaylor Simpson            if 2 ** len(reg_enc_field) != reg_num_choices:
1371547a2d3STaylor Simpson                raise Exception(f"{tag} has incorrect register field width!")
1381547a2d3STaylor Simpson
1391547a2d3STaylor Simpson            f.write(f"%{tag}_{reg_type}{reg_id}\t"
1401547a2d3STaylor Simpson                    f"{enc.index(reg_enc_field)}:{len(reg_enc_field)}")
1411547a2d3STaylor Simpson
1421547a2d3STaylor Simpson            if (reg_type in num_registers and
1431547a2d3STaylor Simpson                reg_num_choices != num_registers[reg_type]):
1441547a2d3STaylor Simpson                f.write(f"\t!function=decode_mapped_reg_{reg_mapping}")
1451547a2d3STaylor Simpson            f.write("\n")
1461547a2d3STaylor Simpson
1471547a2d3STaylor Simpson        # Write the field definitions for the immediates
1481547a2d3STaylor Simpson        for imm in imms:
1491547a2d3STaylor Simpson            immno = 1 if imm[0].isupper() else 0
1501547a2d3STaylor Simpson            imm_type = imm[0]
1511547a2d3STaylor Simpson            imm_width = int(imm[1])
1521547a2d3STaylor Simpson            imm_letter = "i" if imm_type.islower() else "I"
1531547a2d3STaylor Simpson            fields = []
1541547a2d3STaylor Simpson            sign_mark = "s" if imm_type.lower() in "sr" else ""
1551547a2d3STaylor Simpson            for m in reversed(list(re.finditer(imm_letter + "+", enc))):
1561547a2d3STaylor Simpson                fields.append(f"{m.start()}:{sign_mark}{m.end() - m.start()}")
1571547a2d3STaylor Simpson                sign_mark = ""
1581547a2d3STaylor Simpson            field_str = " ".join(fields)
1591547a2d3STaylor Simpson            f.write(f"%{tag}_{imm_type}{imm_letter}\t{field_str}\n")
1601547a2d3STaylor Simpson
1611547a2d3STaylor Simpson        ## Handle instructions with unused encoding letters
1621547a2d3STaylor Simpson        ## Change the unused letters to ignored
1631547a2d3STaylor Simpson        if tag in tags_with_unused_d_encoding:
1641547a2d3STaylor Simpson            enc_str = enc_str.replace("d", "-")
1651547a2d3STaylor Simpson        if tag in tags_with_unused_t_encoding:
1661547a2d3STaylor Simpson            enc_str = enc_str.replace("t", "-")
1671547a2d3STaylor Simpson
1681547a2d3STaylor Simpson        # Replace the operand letters with .
1691547a2d3STaylor Simpson        for x in operand_letters:
1701547a2d3STaylor Simpson            enc_str = enc_str.replace(x, ".")
1711547a2d3STaylor Simpson
1721547a2d3STaylor Simpson        # Write the instruction format
1731547a2d3STaylor Simpson        f.write(f"@{tag}\t{enc_str}")
1741547a2d3STaylor Simpson        for reg in regs:
1751547a2d3STaylor Simpson            reg_type = reg[0]
1761547a2d3STaylor Simpson            reg_id = reg[1]
1771547a2d3STaylor Simpson            f.write(f" {reg_type}{reg_id}=%{tag}_{reg_type}{reg_id}")
1781547a2d3STaylor Simpson        for imm in imms:
1791547a2d3STaylor Simpson            imm_type = imm[0]
1801547a2d3STaylor Simpson            imm_letter = "i" if imm_type.islower() else "I"
1811547a2d3STaylor Simpson            f.write(f" {imm_type}{imm_letter}=%{tag}_{imm_type}{imm_letter}")
1821547a2d3STaylor Simpson
183*f6c01009STaylor Simpson        if not is_subinsn:
184*f6c01009STaylor Simpson            f.write(" %PP")
185*f6c01009STaylor Simpson        f.write("\n")
1861547a2d3STaylor Simpson
1871547a2d3STaylor Simpson         # Replace the 0s and 1s with .
1881547a2d3STaylor Simpson        enc_str = enc_str.replace("0", ".").replace("1", ".")
1891547a2d3STaylor Simpson
1901547a2d3STaylor Simpson        # Write the instruction pattern
1911547a2d3STaylor Simpson        f.write(f"{tag}\t{enc_str} @{tag}\n")
1921547a2d3STaylor Simpson
1931547a2d3STaylor Simpson
1941547a2d3STaylor Simpsonif __name__ == "__main__":
1951547a2d3STaylor Simpson    hex_common.read_semantics_file(sys.argv[1])
1961547a2d3STaylor Simpson    class_to_decode = sys.argv[2]
1971547a2d3STaylor Simpson    with open(sys.argv[3], "w") as f:
1981547a2d3STaylor Simpson        gen_decodetree_file(f, class_to_decode)
199