xref: /openbmc/qemu/target/mips/tcg/mxu_translate.c (revision 68a48804)
1 /*
2  *  Ingenic XBurst Media eXtension Unit (MXU) translation routines.
3  *
4  *  Copyright (c) 2004-2005 Jocelyn Mayer
5  *  Copyright (c) 2006 Marius Groeger (FPU operations)
6  *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
7  *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
8  *  Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
9  *
10  * SPDX-License-Identifier: LGPL-2.1-or-later
11  *
12  * Datasheet:
13  *
14  *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
15  *   Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
16  */
17 
18 #include "qemu/osdep.h"
19 #include "translate.h"
20 
21 /*
22  *
23  *       AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET
24  *       ============================================
25  *
26  *
27  * MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32
28  * instructions set. It is designed to fit the needs of signal, graphical and
29  * video processing applications. MXU instruction set is used in Xburst family
30  * of microprocessors by Ingenic.
31  *
32  * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is
33  * the control register.
34  *
35  *
36  *     The notation used in MXU assembler mnemonics
37  *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38  *
39  *  Register operands:
40  *
41  *   XRa, XRb, XRc, XRd - MXU registers
42  *   Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers
43  *
44  *  Non-register operands:
45  *
46  *   aptn1 - 1-bit accumulate add/subtract pattern
47  *   aptn2 - 2-bit accumulate add/subtract pattern
48  *   eptn2 - 2-bit execute add/subtract pattern
49  *   optn2 - 2-bit operand pattern
50  *   optn3 - 3-bit operand pattern
51  *   sft4  - 4-bit shift amount
52  *   strd2 - 2-bit stride amount
53  *
54  *  Prefixes:
55  *
56  *   Level of parallelism:                Operand size:
57  *    S - single operation at a time       32 - word
58  *    D - two operations in parallel       16 - half word
59  *    Q - four operations in parallel       8 - byte
60  *
61  *  Operations:
62  *
63  *   ADD   - Add or subtract
64  *   ADDC  - Add with carry-in
65  *   ACC   - Accumulate
66  *   ASUM  - Sum together then accumulate (add or subtract)
67  *   ASUMC - Sum together then accumulate (add or subtract) with carry-in
68  *   AVG   - Average between 2 operands
69  *   ABD   - Absolute difference
70  *   ALN   - Align data
71  *   AND   - Logical bitwise 'and' operation
72  *   CPS   - Copy sign
73  *   EXTR  - Extract bits
74  *   I2M   - Move from GPR register to MXU register
75  *   LDD   - Load data from memory to XRF
76  *   LDI   - Load data from memory to XRF (and increase the address base)
77  *   LUI   - Load unsigned immediate
78  *   MUL   - Multiply
79  *   MULU  - Unsigned multiply
80  *   MADD  - 64-bit operand add 32x32 product
81  *   MSUB  - 64-bit operand subtract 32x32 product
82  *   MAC   - Multiply and accumulate (add or subtract)
83  *   MAD   - Multiply and add or subtract
84  *   MAX   - Maximum between 2 operands
85  *   MIN   - Minimum between 2 operands
86  *   M2I   - Move from MXU register to GPR register
87  *   MOVZ  - Move if zero
88  *   MOVN  - Move if non-zero
89  *   NOR   - Logical bitwise 'nor' operation
90  *   OR    - Logical bitwise 'or' operation
91  *   STD   - Store data from XRF to memory
92  *   SDI   - Store data from XRF to memory (and increase the address base)
93  *   SLT   - Set of less than comparison
94  *   SAD   - Sum of absolute differences
95  *   SLL   - Logical shift left
96  *   SLR   - Logical shift right
97  *   SAR   - Arithmetic shift right
98  *   SAT   - Saturation
99  *   SFL   - Shuffle
100  *   SCOP  - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0)
101  *   XOR   - Logical bitwise 'exclusive or' operation
102  *
103  *  Suffixes:
104  *
105  *   E - Expand results
106  *   F - Fixed point multiplication
107  *   L - Low part result
108  *   R - Doing rounding
109  *   V - Variable instead of immediate
110  *   W - Combine above L and V
111  *
112  *
113  *     The list of MXU instructions grouped by functionality
114  *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
115  *
116  * Load/Store instructions           Multiplication instructions
117  * -----------------------           ---------------------------
118  *
119  *  S32LDD XRa, Rb, s12               S32MADD XRa, XRd, Rs, Rt
120  *  S32STD XRa, Rb, s12               S32MADDU XRa, XRd, Rs, Rt
121  *  S32LDDV XRa, Rb, rc, strd2        S32MSUB XRa, XRd, Rs, Rt
122  *  S32STDV XRa, Rb, rc, strd2        S32MSUBU XRa, XRd, Rs, Rt
123  *  S32LDI XRa, Rb, s12               S32MUL XRa, XRd, Rs, Rt
124  *  S32SDI XRa, Rb, s12               S32MULU XRa, XRd, Rs, Rt
125  *  S32LDIV XRa, Rb, rc, strd2        D16MUL XRa, XRb, XRc, XRd, optn2
126  *  S32SDIV XRa, Rb, rc, strd2        D16MULE XRa, XRb, XRc, optn2
127  *  S32LDDR XRa, Rb, s12              D16MULF XRa, XRb, XRc, optn2
128  *  S32STDR XRa, Rb, s12              D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
129  *  S32LDDVR XRa, Rb, rc, strd2       D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
130  *  S32STDVR XRa, Rb, rc, strd2       D16MACF XRa, XRb, XRc, XRd, aptn2, optn2
131  *  S32LDIR XRa, Rb, s12              D16MADL XRa, XRb, XRc, XRd, aptn2, optn2
132  *  S32SDIR XRa, Rb, s12              S16MAD XRa, XRb, XRc, XRd, aptn1, optn2
133  *  S32LDIVR XRa, Rb, rc, strd2       Q8MUL XRa, XRb, XRc, XRd
134  *  S32SDIVR XRa, Rb, rc, strd2       Q8MULSU XRa, XRb, XRc, XRd
135  *  S16LDD XRa, Rb, s10, eptn2        Q8MAC XRa, XRb, XRc, XRd, aptn2
136  *  S16STD XRa, Rb, s10, eptn2        Q8MACSU XRa, XRb, XRc, XRd, aptn2
137  *  S16LDI XRa, Rb, s10, eptn2        Q8MADL XRa, XRb, XRc, XRd, aptn2
138  *  S16SDI XRa, Rb, s10, eptn2
139  *  S8LDD XRa, Rb, s8, eptn3
140  *  S8STD XRa, Rb, s8, eptn3         Addition and subtraction instructions
141  *  S8LDI XRa, Rb, s8, eptn3         -------------------------------------
142  *  S8SDI XRa, Rb, s8, eptn3
143  *  LXW Rd, Rs, Rt, strd2             D32ADD XRa, XRb, XRc, XRd, eptn2
144  *  LXH Rd, Rs, Rt, strd2             D32ADDC XRa, XRb, XRc, XRd
145  *  LXHU Rd, Rs, Rt, strd2            D32ACC XRa, XRb, XRc, XRd, eptn2
146  *  LXB Rd, Rs, Rt, strd2             D32ACCM XRa, XRb, XRc, XRd, eptn2
147  *  LXBU Rd, Rs, Rt, strd2            D32ASUM XRa, XRb, XRc, XRd, eptn2
148  *                                    S32CPS XRa, XRb, XRc
149  *                                    Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2
150  * Comparison instructions            Q16ACC XRa, XRb, XRc, XRd, eptn2
151  * -----------------------            Q16ACCM XRa, XRb, XRc, XRd, eptn2
152  *                                    D16ASUM XRa, XRb, XRc, XRd, eptn2
153  *  S32MAX XRa, XRb, XRc              D16CPS XRa, XRb,
154  *  S32MIN XRa, XRb, XRc              D16AVG XRa, XRb, XRc
155  *  S32SLT XRa, XRb, XRc              D16AVGR XRa, XRb, XRc
156  *  S32MOVZ XRa, XRb, XRc             Q8ADD XRa, XRb, XRc, eptn2
157  *  S32MOVN XRa, XRb, XRc             Q8ADDE XRa, XRb, XRc, XRd, eptn2
158  *  D16MAX XRa, XRb, XRc              Q8ACCE XRa, XRb, XRc, XRd, eptn2
159  *  D16MIN XRa, XRb, XRc              Q8ABD XRa, XRb, XRc
160  *  D16SLT XRa, XRb, XRc              Q8SAD XRa, XRb, XRc, XRd
161  *  D16MOVZ XRa, XRb, XRc             Q8AVG XRa, XRb, XRc
162  *  D16MOVN XRa, XRb, XRc             Q8AVGR XRa, XRb, XRc
163  *  Q8MAX XRa, XRb, XRc               D8SUM XRa, XRb, XRc, XRd
164  *  Q8MIN XRa, XRb, XRc               D8SUMC XRa, XRb, XRc, XRd
165  *  Q8SLT XRa, XRb, XRc
166  *  Q8SLTU XRa, XRb, XRc
167  *  Q8MOVZ XRa, XRb, XRc             Shift instructions
168  *  Q8MOVN XRa, XRb, XRc             ------------------
169  *
170  *                                    D32SLL XRa, XRb, XRc, XRd, sft4
171  * Bitwise instructions               D32SLR XRa, XRb, XRc, XRd, sft4
172  * --------------------               D32SAR XRa, XRb, XRc, XRd, sft4
173  *                                    D32SARL XRa, XRb, XRc, sft4
174  *  S32NOR XRa, XRb, XRc              D32SLLV XRa, XRb, Rb
175  *  S32AND XRa, XRb, XRc              D32SLRV XRa, XRb, Rb
176  *  S32XOR XRa, XRb, XRc              D32SARV XRa, XRb, Rb
177  *  S32OR XRa, XRb, XRc               D32SARW XRa, XRb, XRc, Rb
178  *                                    Q16SLL XRa, XRb, XRc, XRd, sft4
179  *                                    Q16SLR XRa, XRb, XRc, XRd, sft4
180  * Miscellaneous instructions         Q16SAR XRa, XRb, XRc, XRd, sft4
181  * -------------------------          Q16SLLV XRa, XRb, Rb
182  *                                    Q16SLRV XRa, XRb, Rb
183  *  S32SFL XRa, XRb, XRc, XRd, optn2  Q16SARV XRa, XRb, Rb
184  *  S32ALN XRa, XRb, XRc, Rb
185  *  S32ALNI XRa, XRb, XRc, s3
186  *  S32LUI XRa, s8, optn3            Move instructions
187  *  S32EXTR XRa, XRb, Rb, bits5      -----------------
188  *  S32EXTRV XRa, XRb, Rs, Rt
189  *  Q16SCOP XRa, XRb, XRc, XRd        S32M2I XRa, Rb
190  *  Q16SAT XRa, XRb, XRc              S32I2M XRa, Rb
191  *
192  *
193  *     The opcode organization of MXU instructions
194  *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
195  *
196  * The bits 31..26 of all MXU instructions are equal to 0x1C (also referred
197  * as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of
198  * other bits up to the instruction level is as follows:
199  *
200  *              bits
201  *             05..00
202  *
203  *          ┌─ 000000 ─ OPC_MXU_S32MADD
204  *          ├─ 000001 ─ OPC_MXU_S32MADDU
205  *          ├─ 000010 ─ <not assigned>   (non-MXU OPC_MUL)
206  *          │
207  *          │                               20..18
208  *          ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX
209  *          │                            ├─ 001 ─ OPC_MXU_S32MIN
210  *          │                            ├─ 010 ─ OPC_MXU_D16MAX
211  *          │                            ├─ 011 ─ OPC_MXU_D16MIN
212  *          │                            ├─ 100 ─ OPC_MXU_Q8MAX
213  *          │                            ├─ 101 ─ OPC_MXU_Q8MIN
214  *          │                            ├─ 110 ─ OPC_MXU_Q8SLT
215  *          │                            └─ 111 ─ OPC_MXU_Q8SLTU
216  *          ├─ 000100 ─ OPC_MXU_S32MSUB
217  *          ├─ 000101 ─ OPC_MXU_S32MSUBU    20..18
218  *          ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT
219  *          │                            ├─ 001 ─ OPC_MXU_D16SLT
220  *          │                            ├─ 010 ─ OPC_MXU_D16AVG
221  *          │                            ├─ 011 ─ OPC_MXU_D16AVGR
222  *          │                            ├─ 100 ─ OPC_MXU_Q8AVG
223  *          │                            ├─ 101 ─ OPC_MXU_Q8AVGR
224  *          │                            └─ 111 ─ OPC_MXU_Q8ADD
225  *          │
226  *          │                               20..18
227  *          ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS
228  *          │                            ├─ 010 ─ OPC_MXU_D16CPS
229  *          │                            ├─ 100 ─ OPC_MXU_Q8ABD
230  *          │                            └─ 110 ─ OPC_MXU_Q16SAT
231  *          ├─ 001000 ─ OPC_MXU_D16MUL
232  *          │                               25..24
233  *          ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF
234  *          │                            └─ 01 ─ OPC_MXU_D16MULE
235  *          ├─ 001010 ─ OPC_MXU_D16MAC
236  *          ├─ 001011 ─ OPC_MXU_D16MACF
237  *          ├─ 001100 ─ OPC_MXU_D16MADL
238  *          ├─ 001101 ─ OPC_MXU_S16MAD
239  *          ├─ 001110 ─ OPC_MXU_Q16ADD
240  *          ├─ 001111 ─ OPC_MXU_D16MACE     20 (13..10 don't care)
241  *          │                            ┌─ 0 ─ OPC_MXU_S32LDD
242  *          ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
243  *          │
244  *          │                               20 (13..10 don't care)
245  *          ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
246  *          │                            └─ 1 ─ OPC_MXU_S32STDR
247  *          │
248  *          │                               13..10
249  *          ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV
250  *          │                            └─ 0001 ─ OPC_MXU_S32LDDVR
251  *          │
252  *          │                               13..10
253  *          ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
254  *          │                            └─ 0001 ─ OPC_MXU_S32STDVR
255  *          │
256  *          │                               20 (13..10 don't care)
257  *          ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
258  *          │                            └─ 1 ─ OPC_MXU_S32LDIR
259  *          │
260  *          │                               20 (13..10 don't care)
261  *          ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
262  *          │                            └─ 1 ─ OPC_MXU_S32SDIR
263  *          │
264  *          │                               13..10
265  *          ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV
266  *          │                            └─ 0001 ─ OPC_MXU_S32LDIVR
267  *          │
268  *          │                               13..10
269  *          ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
270  *          │                            └─ 0001 ─ OPC_MXU_S32SDIVR
271  *          ├─ 011000 ─ OPC_MXU_D32ADD  (catches D32ADDC too)
272  *          │                               23..22
273  *   MXU    ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
274  * opcodes ─┤                            ├─ 01 ─ OPC_MXU_D32ACCM
275  *          │                            └─ 10 ─ OPC_MXU_D32ASUM
276  *          ├─ 011010 ─ <not assigned>
277  *          │                               23..22
278  *          ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
279  *          │                            ├─ 01 ─ OPC_MXU_Q16ACCM
280  *          │                            └─ 10 ─ OPC_MXU_D16ASUM
281  *          │
282  *          │                               23..22
283  *          ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
284  *          │                            ├─ 01 ─ OPC_MXU_D8SUM
285  *          ├─ 011101 ─ OPC_MXU_Q8ACCE   └─ 10 ─ OPC_MXU_D8SUMC
286  *          ├─ 011110 ─ <not assigned>
287  *          ├─ 011111 ─ <not assigned>
288  *          ├─ 100000 ─ <not assigned>   (overlaps with CLZ)
289  *          ├─ 100001 ─ <not assigned>   (overlaps with CLO)
290  *          ├─ 100010 ─ OPC_MXU_S8LDD
291  *          ├─ 100011 ─ OPC_MXU_S8STD       15..14
292  *          ├─ 100100 ─ OPC_MXU_S8LDI    ┌─ 00 ─ OPC_MXU_S32MUL
293  *          ├─ 100101 ─ OPC_MXU_S8SDI    ├─ 01 ─ OPC_MXU_S32MULU
294  *          │                            ├─ 10 ─ OPC_MXU_S32EXTR
295  *          ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV
296  *          │
297  *          │                               20..18
298  *          ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
299  *          │                            ├─ 001 ─ OPC_MXU_S32ALN
300  *          │                            ├─ 010 ─ OPC_MXU_S32ALNI
301  *          │                            ├─ 011 ─ OPC_MXU_S32LUI
302  *          │                            ├─ 100 ─ OPC_MXU_S32NOR
303  *          │                            ├─ 101 ─ OPC_MXU_S32AND
304  *          │                            ├─ 110 ─ OPC_MXU_S32OR
305  *          │                            └─ 111 ─ OPC_MXU_S32XOR
306  *          │
307  *          │                               8..6
308  *          ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
309  *          │                            ├─ 001 ─ OPC_MXU_LXH
310  *          ├─ 101001 ─ <not assigned>   ├─ 011 ─ OPC_MXU_LXW
311  *          ├─ 101010 ─ OPC_MXU_S16LDD   ├─ 100 ─ OPC_MXU_LXBU
312  *          ├─ 101011 ─ OPC_MXU_S16STD   └─ 101 ─ OPC_MXU_LXHU
313  *          ├─ 101100 ─ OPC_MXU_S16LDI
314  *          ├─ 101101 ─ OPC_MXU_S16SDI
315  *          ├─ 101110 ─ OPC_MXU_S32M2I
316  *          ├─ 101111 ─ OPC_MXU_S32I2M
317  *          ├─ 110000 ─ OPC_MXU_D32SLL
318  *          ├─ 110001 ─ OPC_MXU_D32SLR      20..18
319  *          ├─ 110010 ─ OPC_MXU_D32SARL  ┌─ 000 ─ OPC_MXU_D32SLLV
320  *          ├─ 110011 ─ OPC_MXU_D32SAR   ├─ 001 ─ OPC_MXU_D32SLRV
321  *          ├─ 110100 ─ OPC_MXU_Q16SLL   ├─ 011 ─ OPC_MXU_D32SARV
322  *          ├─ 110101 ─ OPC_MXU_Q16SLR   ├─ 100 ─ OPC_MXU_Q16SLLV
323  *          │                            ├─ 101 ─ OPC_MXU_Q16SLRV
324  *          ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV
325  *          │
326  *          ├─ 110111 ─ OPC_MXU_Q16SAR
327  *          │                               23..22
328  *          ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
329  *          │                            └─ 10 ─ OPC_MXU_Q8MULSU
330  *          │
331  *          │                               20..18
332  *          ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
333  *          │                            ├─ 001 ─ OPC_MXU_Q8MOVN
334  *          │                            ├─ 010 ─ OPC_MXU_D16MOVZ
335  *          │                            ├─ 011 ─ OPC_MXU_D16MOVN
336  *          │                            ├─ 100 ─ OPC_MXU_S32MOVZ
337  *          │                            └─ 101 ─ OPC_MXU_S32MOVN
338  *          │
339  *          │                               23..22
340  *          ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC
341  *          │                            └─ 10 ─ OPC_MXU_Q8MACSU
342  *          ├─ 111011 ─ OPC_MXU_Q16SCOP
343  *          ├─ 111100 ─ OPC_MXU_Q8MADL
344  *          ├─ 111101 ─ OPC_MXU_S32SFL
345  *          ├─ 111110 ─ OPC_MXU_Q8SAD
346  *          └─ 111111 ─ <not assigned>   (overlaps with SDBBP)
347  *
348  *
349  * Compiled after:
350  *
351  *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
352  *   Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
353  */
354 
355 enum {
356     OPC_MXU_S32MADD  = 0x00,
357     OPC_MXU_S32MADDU = 0x01,
358     OPC_MXU__POOL00  = 0x03,
359     OPC_MXU_S32MSUB  = 0x04,
360     OPC_MXU_S32MSUBU = 0x05,
361     OPC_MXU__POOL01  = 0x06,
362     OPC_MXU__POOL02  = 0x07,
363     OPC_MXU_D16MUL   = 0x08,
364     OPC_MXU__POOL03  = 0x09,
365     OPC_MXU_D16MAC   = 0x0A,
366     OPC_MXU_D16MACF  = 0x0B,
367     OPC_MXU_D16MADL  = 0x0C,
368     OPC_MXU_S16MAD   = 0x0D,
369     OPC_MXU_Q16ADD   = 0x0E,
370     OPC_MXU_D16MACE  = 0x0F,
371     OPC_MXU__POOL04  = 0x10,
372     OPC_MXU__POOL05  = 0x11,
373     OPC_MXU__POOL06  = 0x12,
374     OPC_MXU__POOL07  = 0x13,
375     OPC_MXU__POOL08  = 0x14,
376     OPC_MXU__POOL09  = 0x15,
377     OPC_MXU__POOL10  = 0x16,
378     OPC_MXU__POOL11  = 0x17,
379     OPC_MXU_D32ADD   = 0x18,
380     OPC_MXU__POOL12  = 0x19,
381     OPC_MXU__POOL13  = 0x1B,
382     OPC_MXU__POOL14  = 0x1C,
383     OPC_MXU_Q8ACCE   = 0x1D,
384     OPC_MXU_S8LDD    = 0x22,
385     OPC_MXU_S8STD    = 0x23,
386     OPC_MXU_S8LDI    = 0x24,
387     OPC_MXU_S8SDI    = 0x25,
388     OPC_MXU__POOL15  = 0x26,
389     OPC_MXU__POOL16  = 0x27,
390     OPC_MXU__POOL17  = 0x28,
391     OPC_MXU_S16LDD   = 0x2A,
392     OPC_MXU_S16STD   = 0x2B,
393     OPC_MXU_S16LDI   = 0x2C,
394     OPC_MXU_S16SDI   = 0x2D,
395     OPC_MXU_S32M2I   = 0x2E,
396     OPC_MXU_S32I2M   = 0x2F,
397     OPC_MXU_D32SLL   = 0x30,
398     OPC_MXU_D32SLR   = 0x31,
399     OPC_MXU_D32SARL  = 0x32,
400     OPC_MXU_D32SAR   = 0x33,
401     OPC_MXU_Q16SLL   = 0x34,
402     OPC_MXU_Q16SLR   = 0x35,
403     OPC_MXU__POOL18  = 0x36,
404     OPC_MXU_Q16SAR   = 0x37,
405     OPC_MXU__POOL19  = 0x38,
406     OPC_MXU__POOL20  = 0x39,
407     OPC_MXU__POOL21  = 0x3A,
408     OPC_MXU_Q16SCOP  = 0x3B,
409 };
410 
411 
412 /*
413  * MXU pool 00
414  */
415 enum {
416     OPC_MXU_S32MAX   = 0x00,
417     OPC_MXU_S32MIN   = 0x01,
418     OPC_MXU_D16MAX   = 0x02,
419     OPC_MXU_D16MIN   = 0x03,
420     OPC_MXU_Q8MAX    = 0x04,
421     OPC_MXU_Q8MIN    = 0x05,
422     OPC_MXU_Q8SLT    = 0x06,
423     OPC_MXU_Q8SLTU   = 0x07,
424 };
425 
426 /*
427  * MXU pool 01
428  */
429 enum {
430     OPC_MXU_S32SLT   = 0x00,
431     OPC_MXU_D16SLT   = 0x01,
432     OPC_MXU_D16AVG   = 0x02,
433     OPC_MXU_D16AVGR  = 0x03,
434     OPC_MXU_Q8AVG    = 0x04,
435     OPC_MXU_Q8AVGR   = 0x05,
436     OPC_MXU_Q8ADD    = 0x07,
437 };
438 
439 /*
440  * MXU pool 02
441  */
442 enum {
443     OPC_MXU_S32CPS   = 0x00,
444     OPC_MXU_D16CPS   = 0x02,
445     OPC_MXU_Q8ABD    = 0x04,
446     OPC_MXU_Q16SAT   = 0x06,
447 };
448 
449 /*
450  * MXU pool 03
451  */
452 enum {
453     OPC_MXU_D16MULF  = 0x00,
454     OPC_MXU_D16MULE  = 0x01,
455 };
456 
457 /*
458  * MXU pool 04 05 06 07 08 09 10 11
459  */
460 enum {
461     OPC_MXU_S32LDST  = 0x00,
462     OPC_MXU_S32LDSTR = 0x01,
463 };
464 
465 /*
466  * MXU pool 12
467  */
468 enum {
469     OPC_MXU_D32ACC    = 0x00,
470     OPC_MXU_D32ACCM   = 0x01,
471     OPC_MXU_D32ASUM   = 0x02,
472 };
473 
474 /*
475  * MXU pool 13
476  */
477 enum {
478     OPC_MXU_Q16ACC    = 0x00,
479     OPC_MXU_Q16ACCM   = 0x01,
480     OPC_MXU_D16ASUM   = 0x02,
481 };
482 
483 /*
484  * MXU pool 14
485  */
486 enum {
487     OPC_MXU_Q8ADDE    = 0x00,
488     OPC_MXU_D8SUM     = 0x01,
489     OPC_MXU_D8SUMC    = 0x02,
490 };
491 
492 /*
493  * MXU pool 15
494  */
495 enum {
496     OPC_MXU_S32MUL    = 0x00,
497     OPC_MXU_S32MULU   = 0x01,
498     OPC_MXU_S32EXTR   = 0x02,
499     OPC_MXU_S32EXTRV  = 0x03,
500 };
501 
502 /*
503  * MXU pool 16
504  */
505 enum {
506     OPC_MXU_D32SARW  = 0x00,
507     OPC_MXU_S32ALN   = 0x01,
508     OPC_MXU_S32ALNI  = 0x02,
509     OPC_MXU_S32LUI   = 0x03,
510     OPC_MXU_S32NOR   = 0x04,
511     OPC_MXU_S32AND   = 0x05,
512     OPC_MXU_S32OR    = 0x06,
513     OPC_MXU_S32XOR   = 0x07,
514 };
515 
516 /*
517  * MXU pool 17
518  */
519 enum {
520     OPC_MXU_LXB      = 0x00,
521     OPC_MXU_LXH      = 0x01,
522     OPC_MXU_LXW      = 0x03,
523     OPC_MXU_LXBU     = 0x04,
524     OPC_MXU_LXHU     = 0x05,
525 };
526 
527 /*
528  * MXU pool 18
529  */
530 enum {
531     OPC_MXU_D32SLLV  = 0x00,
532     OPC_MXU_D32SLRV  = 0x01,
533     OPC_MXU_D32SARV  = 0x03,
534     OPC_MXU_Q16SLLV  = 0x04,
535     OPC_MXU_Q16SLRV  = 0x05,
536     OPC_MXU_Q16SARV  = 0x07,
537 };
538 
539 /*
540  * MXU pool 19
541  */
542 enum {
543     OPC_MXU_Q8MUL    = 0x00,
544     OPC_MXU_Q8MULSU  = 0x02,
545 };
546 
547 /*
548  * MXU pool 20
549  */
550 enum {
551     OPC_MXU_Q8MOVZ   = 0x00,
552     OPC_MXU_Q8MOVN   = 0x01,
553     OPC_MXU_D16MOVZ  = 0x02,
554     OPC_MXU_D16MOVN  = 0x03,
555     OPC_MXU_S32MOVZ  = 0x04,
556     OPC_MXU_S32MOVN  = 0x05,
557 };
558 
559 /*
560  * MXU pool 21
561  */
562 enum {
563     OPC_MXU_Q8MAC    = 0x00,
564     OPC_MXU_Q8MACSU  = 0x02,
565 };
566 
567 
568 /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
569 #define MXU_APTN1_A    0
570 #define MXU_APTN1_S    1
571 
572 /* MXU accumulate add/subtract 2-bit pattern 'aptn2' */
573 #define MXU_APTN2_AA    0
574 #define MXU_APTN2_AS    1
575 #define MXU_APTN2_SA    2
576 #define MXU_APTN2_SS    3
577 
578 /* MXU execute add/subtract 2-bit pattern 'eptn2' */
579 #define MXU_EPTN2_AA    0
580 #define MXU_EPTN2_AS    1
581 #define MXU_EPTN2_SA    2
582 #define MXU_EPTN2_SS    3
583 
584 /* MXU operand getting pattern 'optn2' */
585 #define MXU_OPTN2_PTN0  0
586 #define MXU_OPTN2_PTN1  1
587 #define MXU_OPTN2_PTN2  2
588 #define MXU_OPTN2_PTN3  3
589 /* alternative naming scheme for 'optn2' */
590 #define MXU_OPTN2_WW    0
591 #define MXU_OPTN2_LW    1
592 #define MXU_OPTN2_HW    2
593 #define MXU_OPTN2_XW    3
594 
595 /* MXU operand getting pattern 'optn3' */
596 #define MXU_OPTN3_PTN0  0
597 #define MXU_OPTN3_PTN1  1
598 #define MXU_OPTN3_PTN2  2
599 #define MXU_OPTN3_PTN3  3
600 #define MXU_OPTN3_PTN4  4
601 #define MXU_OPTN3_PTN5  5
602 #define MXU_OPTN3_PTN6  6
603 #define MXU_OPTN3_PTN7  7
604 
605 /* MXU registers */
606 static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
607 static TCGv mxu_CR;
608 
609 static const char mxuregnames[][4] = {
610     "XR1",  "XR2",  "XR3",  "XR4",  "XR5",  "XR6",  "XR7",  "XR8",
611     "XR9",  "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "XCR",
612 };
613 
614 void mxu_translate_init(void)
615 {
616     for (unsigned i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) {
617         mxu_gpr[i] = tcg_global_mem_new(cpu_env,
618                                         offsetof(CPUMIPSState, active_tc.mxu_gpr[i]),
619                                         mxuregnames[i]);
620     }
621 
622     mxu_CR = tcg_global_mem_new(cpu_env,
623                                 offsetof(CPUMIPSState, active_tc.mxu_cr),
624                                 mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]);
625 }
626 
627 /* MXU General purpose registers moves. */
628 static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg)
629 {
630     if (reg == 0) {
631         tcg_gen_movi_tl(t, 0);
632     } else if (reg <= 15) {
633         tcg_gen_mov_tl(t, mxu_gpr[reg - 1]);
634     }
635 }
636 
637 static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg)
638 {
639     if (reg > 0 && reg <= 15) {
640         tcg_gen_mov_tl(mxu_gpr[reg - 1], t);
641     }
642 }
643 
644 /* MXU control register moves. */
645 static inline void gen_load_mxu_cr(TCGv t)
646 {
647     tcg_gen_mov_tl(t, mxu_CR);
648 }
649 
650 static inline void gen_store_mxu_cr(TCGv t)
651 {
652     /* TODO: Add handling of RW rules for MXU_CR. */
653     tcg_gen_mov_tl(mxu_CR, t);
654 }
655 
656 /*
657  * S32I2M XRa, rb - Register move from GRF to XRF
658  */
659 static void gen_mxu_s32i2m(DisasContext *ctx)
660 {
661     TCGv t0;
662     uint32_t XRa, Rb;
663 
664     t0 = tcg_temp_new();
665 
666     XRa = extract32(ctx->opcode, 6, 5);
667     Rb = extract32(ctx->opcode, 16, 5);
668 
669     gen_load_gpr(t0, Rb);
670     if (XRa <= 15) {
671         gen_store_mxu_gpr(t0, XRa);
672     } else if (XRa == 16) {
673         gen_store_mxu_cr(t0);
674     }
675 }
676 
677 /*
678  * S32M2I XRa, rb - Register move from XRF to GRF
679  */
680 static void gen_mxu_s32m2i(DisasContext *ctx)
681 {
682     TCGv t0;
683     uint32_t XRa, Rb;
684 
685     t0 = tcg_temp_new();
686 
687     XRa = extract32(ctx->opcode, 6, 5);
688     Rb = extract32(ctx->opcode, 16, 5);
689 
690     if (XRa <= 15) {
691         gen_load_mxu_gpr(t0, XRa);
692     } else if (XRa == 16) {
693         gen_load_mxu_cr(t0);
694     }
695 
696     gen_store_gpr(t0, Rb);
697 }
698 
699 /*
700  * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
701  *
702  * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF,
703  * post modify address register
704  */
705 static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify)
706 {
707     TCGv t0, t1;
708     uint32_t XRa, Rb, s8, optn3;
709 
710     t0 = tcg_temp_new();
711     t1 = tcg_temp_new();
712 
713     XRa = extract32(ctx->opcode, 6, 4);
714     s8 = extract32(ctx->opcode, 10, 8);
715     optn3 = extract32(ctx->opcode, 18, 3);
716     Rb = extract32(ctx->opcode, 21, 5);
717 
718     gen_load_gpr(t0, Rb);
719     tcg_gen_addi_tl(t0, t0, (int8_t)s8);
720     if (postmodify) {
721         gen_store_gpr(t0, Rb);
722     }
723 
724     switch (optn3) {
725     /* XRa[7:0] = tmp8 */
726     case MXU_OPTN3_PTN0:
727         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
728         gen_load_mxu_gpr(t0, XRa);
729         tcg_gen_deposit_tl(t0, t0, t1, 0, 8);
730         break;
731     /* XRa[15:8] = tmp8 */
732     case MXU_OPTN3_PTN1:
733         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
734         gen_load_mxu_gpr(t0, XRa);
735         tcg_gen_deposit_tl(t0, t0, t1, 8, 8);
736         break;
737     /* XRa[23:16] = tmp8 */
738     case MXU_OPTN3_PTN2:
739         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
740         gen_load_mxu_gpr(t0, XRa);
741         tcg_gen_deposit_tl(t0, t0, t1, 16, 8);
742         break;
743     /* XRa[31:24] = tmp8 */
744     case MXU_OPTN3_PTN3:
745         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
746         gen_load_mxu_gpr(t0, XRa);
747         tcg_gen_deposit_tl(t0, t0, t1, 24, 8);
748         break;
749     /* XRa = {8'b0, tmp8, 8'b0, tmp8} */
750     case MXU_OPTN3_PTN4:
751         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
752         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
753         break;
754     /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */
755     case MXU_OPTN3_PTN5:
756         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
757         tcg_gen_shli_tl(t1, t1, 8);
758         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
759         break;
760     /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */
761     case MXU_OPTN3_PTN6:
762         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB);
763         tcg_gen_mov_tl(t0, t1);
764         tcg_gen_andi_tl(t0, t0, 0xFF00FFFF);
765         tcg_gen_shli_tl(t1, t1, 16);
766         tcg_gen_or_tl(t0, t0, t1);
767         break;
768     /* XRa = {tmp8, tmp8, tmp8, tmp8} */
769     case MXU_OPTN3_PTN7:
770         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
771         tcg_gen_deposit_tl(t1, t1, t1, 8, 8);
772         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
773         break;
774     }
775 
776     gen_store_mxu_gpr(t0, XRa);
777 }
778 
779 /*
780  * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory
781  *
782  * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory,
783  * post modify address register
784  */
785 static void gen_mxu_s8std(DisasContext *ctx, bool postmodify)
786 {
787     TCGv t0, t1;
788     uint32_t XRa, Rb, s8, optn3;
789 
790     t0 = tcg_temp_new();
791     t1 = tcg_temp_new();
792 
793     XRa = extract32(ctx->opcode, 6, 4);
794     s8 = extract32(ctx->opcode, 10, 8);
795     optn3 = extract32(ctx->opcode, 18, 3);
796     Rb = extract32(ctx->opcode, 21, 5);
797 
798     if (optn3 > 3) {
799         /* reserved, do nothing */
800         return;
801     }
802 
803     gen_load_gpr(t0, Rb);
804     tcg_gen_addi_tl(t0, t0, (int8_t)s8);
805     if (postmodify) {
806         gen_store_gpr(t0, Rb);
807     }
808     gen_load_mxu_gpr(t1, XRa);
809 
810     switch (optn3) {
811     /* XRa[7:0] => tmp8 */
812     case MXU_OPTN3_PTN0:
813         tcg_gen_extract_tl(t1, t1, 0, 8);
814         break;
815     /* XRa[15:8] => tmp8 */
816     case MXU_OPTN3_PTN1:
817         tcg_gen_extract_tl(t1, t1, 8, 8);
818         break;
819     /* XRa[23:16] => tmp8 */
820     case MXU_OPTN3_PTN2:
821         tcg_gen_extract_tl(t1, t1, 16, 8);
822         break;
823     /* XRa[31:24] => tmp8 */
824     case MXU_OPTN3_PTN3:
825         tcg_gen_extract_tl(t1, t1, 24, 8);
826         break;
827     }
828 
829     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB);
830 }
831 
832 /*
833  * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF
834  *
835  * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF,
836  * post modify address register
837  */
838 static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify)
839 {
840     TCGv t0, t1;
841     uint32_t XRa, Rb, optn2;
842     int32_t s10;
843 
844     t0 = tcg_temp_new();
845     t1 = tcg_temp_new();
846 
847     XRa   = extract32(ctx->opcode,   6, 4);
848     s10   = sextract32(ctx->opcode, 10, 9) * 2;
849     optn2 = extract32(ctx->opcode,  19, 2);
850     Rb    = extract32(ctx->opcode,  21, 5);
851 
852     gen_load_gpr(t0, Rb);
853     tcg_gen_addi_tl(t0, t0, s10);
854     if (postmodify) {
855         gen_store_gpr(t0, Rb);
856     }
857 
858     switch (optn2) {
859     /* XRa[15:0] = tmp16 */
860     case MXU_OPTN2_PTN0:
861         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
862         gen_load_mxu_gpr(t0, XRa);
863         tcg_gen_deposit_tl(t0, t0, t1, 0, 16);
864         break;
865     /* XRa[31:16] = tmp16 */
866     case MXU_OPTN2_PTN1:
867         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
868         gen_load_mxu_gpr(t0, XRa);
869         tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
870         break;
871     /* XRa = sign_extend(tmp16) */
872     case MXU_OPTN2_PTN2:
873         tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW);
874         break;
875     /* XRa = {tmp16, tmp16} */
876     case MXU_OPTN2_PTN3:
877         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
878         tcg_gen_deposit_tl(t0, t1, t1,  0, 16);
879         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
880         break;
881     }
882 
883     gen_store_mxu_gpr(t0, XRa);
884 }
885 
886 /*
887  * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory
888  *
889  * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory,
890  * post modify address register
891  */
892 static void gen_mxu_s16std(DisasContext *ctx, bool postmodify)
893 {
894     TCGv t0, t1;
895     uint32_t XRa, Rb, optn2;
896     int32_t s10;
897 
898     t0 = tcg_temp_new();
899     t1 = tcg_temp_new();
900 
901     XRa = extract32(ctx->opcode, 6, 4);
902     s10 = sextract32(ctx->opcode, 10, 9) * 2;
903     optn2 = extract32(ctx->opcode, 19, 2);
904     Rb = extract32(ctx->opcode, 21, 5);
905 
906     if (optn2 > 1) {
907         /* reserved, do nothing */
908         return;
909     }
910 
911     gen_load_gpr(t0, Rb);
912     tcg_gen_addi_tl(t0, t0, s10);
913     if (postmodify) {
914         gen_store_gpr(t0, Rb);
915     }
916     gen_load_mxu_gpr(t1, XRa);
917 
918     switch (optn2) {
919     /* XRa[15:0] => tmp16 */
920     case MXU_OPTN2_PTN0:
921         tcg_gen_extract_tl(t1, t1, 0, 16);
922         break;
923     /* XRa[31:16] => tmp16 */
924     case MXU_OPTN2_PTN1:
925         tcg_gen_extract_tl(t1, t1, 16, 16);
926         break;
927     }
928 
929     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW);
930 }
931 
932 /*
933  * S32MUL  XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication
934  * of GPR's and stores result into pair of MXU registers.
935  * It strains HI and LO registers.
936  *
937  * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication
938  * of GPR's and stores result into pair of MXU registers.
939  * It strains HI and LO registers.
940  */
941 static void gen_mxu_s32mul(DisasContext *ctx, bool mulu)
942 {
943     TCGv t0, t1;
944     uint32_t XRa, XRd, rs, rt;
945 
946     t0 = tcg_temp_new();
947     t1 = tcg_temp_new();
948 
949     XRa = extract32(ctx->opcode,  6, 4);
950     XRd = extract32(ctx->opcode, 10, 4);
951     rs  = extract32(ctx->opcode, 16, 5);
952     rt  = extract32(ctx->opcode, 21, 5);
953 
954     if (unlikely(rs == 0 || rt == 0)) {
955         tcg_gen_movi_tl(t0, 0);
956         tcg_gen_movi_tl(t1, 0);
957     } else {
958         gen_load_gpr(t0, rs);
959         gen_load_gpr(t1, rt);
960 
961         if (mulu) {
962             tcg_gen_mulu2_tl(t0, t1, t0, t1);
963         } else {
964             tcg_gen_muls2_tl(t0, t1, t0, t1);
965         }
966     }
967     tcg_gen_mov_tl(cpu_HI[0], t1);
968     tcg_gen_mov_tl(cpu_LO[0], t0);
969     gen_store_mxu_gpr(t1, XRa);
970     gen_store_mxu_gpr(t0, XRd);
971 }
972 
973 /*
974  * D16MUL  XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
975  * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication
976  *   with rounding and packing result
977  * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern
978  *   multiplication with rounding
979  */
980 static void gen_mxu_d16mul(DisasContext *ctx, bool fractional,
981                            bool packed_result)
982 {
983     TCGv t0, t1, t2, t3;
984     uint32_t XRa, XRb, XRc, XRd, optn2;
985 
986     t0 = tcg_temp_new();
987     t1 = tcg_temp_new();
988     t2 = tcg_temp_new();
989     t3 = tcg_temp_new();
990 
991     XRa = extract32(ctx->opcode, 6, 4);
992     XRb = extract32(ctx->opcode, 10, 4);
993     XRc = extract32(ctx->opcode, 14, 4);
994     XRd = extract32(ctx->opcode, 18, 4);
995     optn2 = extract32(ctx->opcode, 22, 2);
996 
997     /*
998      * TODO: XRd field isn't used for D16MULF
999      * There's no knowledge how this field affect
1000      * instruction decoding/behavior
1001      */
1002 
1003     gen_load_mxu_gpr(t1, XRb);
1004     tcg_gen_sextract_tl(t0, t1, 0, 16);
1005     tcg_gen_sextract_tl(t1, t1, 16, 16);
1006     gen_load_mxu_gpr(t3, XRc);
1007     tcg_gen_sextract_tl(t2, t3, 0, 16);
1008     tcg_gen_sextract_tl(t3, t3, 16, 16);
1009 
1010     switch (optn2) {
1011     case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1012         tcg_gen_mul_tl(t3, t1, t3);
1013         tcg_gen_mul_tl(t2, t0, t2);
1014         break;
1015     case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1016         tcg_gen_mul_tl(t3, t0, t3);
1017         tcg_gen_mul_tl(t2, t0, t2);
1018         break;
1019     case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1020         tcg_gen_mul_tl(t3, t1, t3);
1021         tcg_gen_mul_tl(t2, t1, t2);
1022         break;
1023     case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1024         tcg_gen_mul_tl(t3, t0, t3);
1025         tcg_gen_mul_tl(t2, t1, t2);
1026         break;
1027     }
1028     if (fractional) {
1029         TCGLabel *l_done = gen_new_label();
1030         TCGv rounding = tcg_temp_new();
1031 
1032         tcg_gen_shli_tl(t3, t3, 1);
1033         tcg_gen_shli_tl(t2, t2, 1);
1034         tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
1035         tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
1036         if (packed_result) {
1037             TCGLabel *l_apply_bias_l = gen_new_label();
1038             TCGLabel *l_apply_bias_r = gen_new_label();
1039             TCGLabel *l_half_done = gen_new_label();
1040             TCGv bias = tcg_temp_new();
1041 
1042             /*
1043              * D16MULF supports unbiased rounding aka "bankers rounding",
1044              * "round to even", "convergent rounding"
1045              */
1046             tcg_gen_andi_tl(bias, mxu_CR, 0x4);
1047             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
1048             tcg_gen_andi_tl(t0, t3, 0x1ffff);
1049             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
1050             gen_set_label(l_apply_bias_l);
1051             tcg_gen_addi_tl(t3, t3, 0x8000);
1052             gen_set_label(l_half_done);
1053             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
1054             tcg_gen_andi_tl(t0, t2, 0x1ffff);
1055             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
1056             gen_set_label(l_apply_bias_r);
1057             tcg_gen_addi_tl(t2, t2, 0x8000);
1058         } else {
1059             /* D16MULE doesn't support unbiased rounding */
1060             tcg_gen_addi_tl(t3, t3, 0x8000);
1061             tcg_gen_addi_tl(t2, t2, 0x8000);
1062         }
1063         gen_set_label(l_done);
1064     }
1065     if (!packed_result) {
1066         gen_store_mxu_gpr(t3, XRa);
1067         gen_store_mxu_gpr(t2, XRd);
1068     } else {
1069         tcg_gen_andi_tl(t3, t3, 0xffff0000);
1070         tcg_gen_shri_tl(t2, t2, 16);
1071         tcg_gen_or_tl(t3, t3, t2);
1072         gen_store_mxu_gpr(t3, XRa);
1073     }
1074 }
1075 
1076 /*
1077  * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
1078  *   Signed 16 bit pattern multiply and accumulate
1079  * D16MACF XRa, XRb, XRc, aptn2, optn2
1080  *   Signed Q15 fraction pattern multiply accumulate and pack
1081  * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
1082  *   Signed Q15 fraction pattern multiply and accumulate
1083  */
1084 static void gen_mxu_d16mac(DisasContext *ctx, bool fractional,
1085                            bool packed_result)
1086 {
1087     TCGv t0, t1, t2, t3;
1088     uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
1089 
1090     t0 = tcg_temp_new();
1091     t1 = tcg_temp_new();
1092     t2 = tcg_temp_new();
1093     t3 = tcg_temp_new();
1094 
1095     XRa = extract32(ctx->opcode, 6, 4);
1096     XRb = extract32(ctx->opcode, 10, 4);
1097     XRc = extract32(ctx->opcode, 14, 4);
1098     XRd = extract32(ctx->opcode, 18, 4);
1099     optn2 = extract32(ctx->opcode, 22, 2);
1100     aptn2 = extract32(ctx->opcode, 24, 2);
1101 
1102     gen_load_mxu_gpr(t1, XRb);
1103     tcg_gen_sextract_tl(t0, t1, 0, 16);
1104     tcg_gen_sextract_tl(t1, t1, 16, 16);
1105 
1106     gen_load_mxu_gpr(t3, XRc);
1107     tcg_gen_sextract_tl(t2, t3, 0, 16);
1108     tcg_gen_sextract_tl(t3, t3, 16, 16);
1109 
1110     switch (optn2) {
1111     case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1112         tcg_gen_mul_tl(t3, t1, t3);
1113         tcg_gen_mul_tl(t2, t0, t2);
1114         break;
1115     case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1116         tcg_gen_mul_tl(t3, t0, t3);
1117         tcg_gen_mul_tl(t2, t0, t2);
1118         break;
1119     case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1120         tcg_gen_mul_tl(t3, t1, t3);
1121         tcg_gen_mul_tl(t2, t1, t2);
1122         break;
1123     case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1124         tcg_gen_mul_tl(t3, t0, t3);
1125         tcg_gen_mul_tl(t2, t1, t2);
1126         break;
1127     }
1128 
1129     if (fractional) {
1130         tcg_gen_shli_tl(t3, t3, 1);
1131         tcg_gen_shli_tl(t2, t2, 1);
1132     }
1133     gen_load_mxu_gpr(t0, XRa);
1134     gen_load_mxu_gpr(t1, XRd);
1135 
1136     switch (aptn2) {
1137     case MXU_APTN2_AA:
1138         tcg_gen_add_tl(t3, t0, t3);
1139         tcg_gen_add_tl(t2, t1, t2);
1140         break;
1141     case MXU_APTN2_AS:
1142         tcg_gen_add_tl(t3, t0, t3);
1143         tcg_gen_sub_tl(t2, t1, t2);
1144         break;
1145     case MXU_APTN2_SA:
1146         tcg_gen_sub_tl(t3, t0, t3);
1147         tcg_gen_add_tl(t2, t1, t2);
1148         break;
1149     case MXU_APTN2_SS:
1150         tcg_gen_sub_tl(t3, t0, t3);
1151         tcg_gen_sub_tl(t2, t1, t2);
1152         break;
1153     }
1154 
1155     if (fractional) {
1156         TCGLabel *l_done = gen_new_label();
1157         TCGv rounding = tcg_temp_new();
1158 
1159         tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
1160         tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
1161         if (packed_result) {
1162             TCGLabel *l_apply_bias_l = gen_new_label();
1163             TCGLabel *l_apply_bias_r = gen_new_label();
1164             TCGLabel *l_half_done = gen_new_label();
1165             TCGv bias = tcg_temp_new();
1166 
1167             /*
1168              * D16MACF supports unbiased rounding aka "bankers rounding",
1169              * "round to even", "convergent rounding"
1170              */
1171             tcg_gen_andi_tl(bias, mxu_CR, 0x4);
1172             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
1173             tcg_gen_andi_tl(t0, t3, 0x1ffff);
1174             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
1175             gen_set_label(l_apply_bias_l);
1176             tcg_gen_addi_tl(t3, t3, 0x8000);
1177             gen_set_label(l_half_done);
1178             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
1179             tcg_gen_andi_tl(t0, t2, 0x1ffff);
1180             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
1181             gen_set_label(l_apply_bias_r);
1182             tcg_gen_addi_tl(t2, t2, 0x8000);
1183         } else {
1184             /* D16MACE doesn't support unbiased rounding */
1185             tcg_gen_addi_tl(t3, t3, 0x8000);
1186             tcg_gen_addi_tl(t2, t2, 0x8000);
1187         }
1188         gen_set_label(l_done);
1189     }
1190 
1191     if (!packed_result) {
1192         gen_store_mxu_gpr(t3, XRa);
1193         gen_store_mxu_gpr(t2, XRd);
1194     } else {
1195         tcg_gen_andi_tl(t3, t3, 0xffff0000);
1196         tcg_gen_shri_tl(t2, t2, 16);
1197         tcg_gen_or_tl(t3, t3, t2);
1198         gen_store_mxu_gpr(t3, XRa);
1199     }
1200 }
1201 
1202 /*
1203  * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed
1204  * unsigned 16 bit pattern multiply and add/subtract.
1205  */
1206 static void gen_mxu_d16madl(DisasContext *ctx)
1207 {
1208     TCGv t0, t1, t2, t3;
1209     uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
1210 
1211     t0 = tcg_temp_new();
1212     t1 = tcg_temp_new();
1213     t2 = tcg_temp_new();
1214     t3 = tcg_temp_new();
1215 
1216     XRa = extract32(ctx->opcode, 6, 4);
1217     XRb = extract32(ctx->opcode, 10, 4);
1218     XRc = extract32(ctx->opcode, 14, 4);
1219     XRd = extract32(ctx->opcode, 18, 4);
1220     optn2 = extract32(ctx->opcode, 22, 2);
1221     aptn2 = extract32(ctx->opcode, 24, 2);
1222 
1223     gen_load_mxu_gpr(t1, XRb);
1224     tcg_gen_sextract_tl(t0, t1,  0, 16);
1225     tcg_gen_sextract_tl(t1, t1, 16, 16);
1226 
1227     gen_load_mxu_gpr(t3, XRc);
1228     tcg_gen_sextract_tl(t2, t3,  0, 16);
1229     tcg_gen_sextract_tl(t3, t3, 16, 16);
1230 
1231     switch (optn2) {
1232     case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1233         tcg_gen_mul_tl(t3, t1, t3);
1234         tcg_gen_mul_tl(t2, t0, t2);
1235         break;
1236     case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1237         tcg_gen_mul_tl(t3, t0, t3);
1238         tcg_gen_mul_tl(t2, t0, t2);
1239         break;
1240     case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1241         tcg_gen_mul_tl(t3, t1, t3);
1242         tcg_gen_mul_tl(t2, t1, t2);
1243         break;
1244     case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1245         tcg_gen_mul_tl(t3, t0, t3);
1246         tcg_gen_mul_tl(t2, t1, t2);
1247         break;
1248     }
1249     tcg_gen_extract_tl(t2, t2, 0, 16);
1250     tcg_gen_extract_tl(t3, t3, 0, 16);
1251 
1252     gen_load_mxu_gpr(t1, XRa);
1253     tcg_gen_extract_tl(t0, t1,  0, 16);
1254     tcg_gen_extract_tl(t1, t1, 16, 16);
1255 
1256     switch (aptn2) {
1257     case MXU_APTN2_AA:
1258         tcg_gen_add_tl(t3, t1, t3);
1259         tcg_gen_add_tl(t2, t0, t2);
1260         break;
1261     case MXU_APTN2_AS:
1262         tcg_gen_add_tl(t3, t1, t3);
1263         tcg_gen_sub_tl(t2, t0, t2);
1264         break;
1265     case MXU_APTN2_SA:
1266         tcg_gen_sub_tl(t3, t1, t3);
1267         tcg_gen_add_tl(t2, t0, t2);
1268         break;
1269     case MXU_APTN2_SS:
1270         tcg_gen_sub_tl(t3, t1, t3);
1271         tcg_gen_sub_tl(t2, t0, t2);
1272         break;
1273     }
1274 
1275     tcg_gen_andi_tl(t2, t2, 0xffff);
1276     tcg_gen_shli_tl(t3, t3, 16);
1277     tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2);
1278 }
1279 
1280 /*
1281  * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed
1282  * signed 16 bit pattern multiply and 32-bit add/subtract.
1283  */
1284 static void gen_mxu_s16mad(DisasContext *ctx)
1285 {
1286     TCGv t0, t1;
1287     uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad;
1288 
1289     t0 = tcg_temp_new();
1290     t1 = tcg_temp_new();
1291 
1292     XRa = extract32(ctx->opcode, 6, 4);
1293     XRb = extract32(ctx->opcode, 10, 4);
1294     XRc = extract32(ctx->opcode, 14, 4);
1295     XRd = extract32(ctx->opcode, 18, 4);
1296     optn2 = extract32(ctx->opcode, 22, 2);
1297     aptn1 = extract32(ctx->opcode, 24, 1);
1298     pad = extract32(ctx->opcode, 25, 1);
1299 
1300     if (pad) {
1301         /* FIXME check if it influence the result */
1302     }
1303 
1304     gen_load_mxu_gpr(t0, XRb);
1305     gen_load_mxu_gpr(t1, XRc);
1306 
1307     switch (optn2) {
1308     case MXU_OPTN2_WW: /* XRB.H*XRC.H */
1309         tcg_gen_sextract_tl(t0, t0, 16, 16);
1310         tcg_gen_sextract_tl(t1, t1, 16, 16);
1311         break;
1312     case MXU_OPTN2_LW: /* XRB.L*XRC.L */
1313         tcg_gen_sextract_tl(t0, t0,  0, 16);
1314         tcg_gen_sextract_tl(t1, t1,  0, 16);
1315         break;
1316     case MXU_OPTN2_HW: /* XRB.H*XRC.L */
1317         tcg_gen_sextract_tl(t0, t0, 16, 16);
1318         tcg_gen_sextract_tl(t1, t1,  0, 16);
1319         break;
1320     case MXU_OPTN2_XW: /* XRB.L*XRC.H */
1321         tcg_gen_sextract_tl(t0, t0,  0, 16);
1322         tcg_gen_sextract_tl(t1, t1, 16, 16);
1323         break;
1324     }
1325     tcg_gen_mul_tl(t0, t0, t1);
1326 
1327     gen_load_mxu_gpr(t1, XRa);
1328 
1329     switch (aptn1) {
1330     case MXU_APTN1_A:
1331         tcg_gen_add_tl(t1, t1, t0);
1332         break;
1333     case MXU_APTN1_S:
1334         tcg_gen_sub_tl(t1, t1, t0);
1335         break;
1336     }
1337 
1338     gen_store_mxu_gpr(t1, XRd);
1339 }
1340 
1341 /*
1342  * Q8MUL   XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1343  * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1344  * Q8MAC   XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1345  *   and accumulate
1346  * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1347  *   and accumulate
1348  */
1349 static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac)
1350 {
1351     TCGv t0, t1, t2, t3, t4, t5, t6, t7;
1352     uint32_t XRa, XRb, XRc, XRd, aptn2;
1353 
1354     t0 = tcg_temp_new();
1355     t1 = tcg_temp_new();
1356     t2 = tcg_temp_new();
1357     t3 = tcg_temp_new();
1358     t4 = tcg_temp_new();
1359     t5 = tcg_temp_new();
1360     t6 = tcg_temp_new();
1361     t7 = tcg_temp_new();
1362 
1363     XRa = extract32(ctx->opcode, 6, 4);
1364     XRb = extract32(ctx->opcode, 10, 4);
1365     XRc = extract32(ctx->opcode, 14, 4);
1366     XRd = extract32(ctx->opcode, 18, 4);
1367     aptn2 = extract32(ctx->opcode, 24, 2);
1368 
1369     gen_load_mxu_gpr(t3, XRb);
1370     gen_load_mxu_gpr(t7, XRc);
1371 
1372     if (su) {
1373         /* Q8MULSU / Q8MACSU */
1374         tcg_gen_sextract_tl(t0, t3,  0, 8);
1375         tcg_gen_sextract_tl(t1, t3,  8, 8);
1376         tcg_gen_sextract_tl(t2, t3, 16, 8);
1377         tcg_gen_sextract_tl(t3, t3, 24, 8);
1378     } else {
1379         /* Q8MUL / Q8MAC */
1380         tcg_gen_extract_tl(t0, t3,  0, 8);
1381         tcg_gen_extract_tl(t1, t3,  8, 8);
1382         tcg_gen_extract_tl(t2, t3, 16, 8);
1383         tcg_gen_extract_tl(t3, t3, 24, 8);
1384     }
1385 
1386     tcg_gen_extract_tl(t4, t7,  0, 8);
1387     tcg_gen_extract_tl(t5, t7,  8, 8);
1388     tcg_gen_extract_tl(t6, t7, 16, 8);
1389     tcg_gen_extract_tl(t7, t7, 24, 8);
1390 
1391     tcg_gen_mul_tl(t0, t0, t4);
1392     tcg_gen_mul_tl(t1, t1, t5);
1393     tcg_gen_mul_tl(t2, t2, t6);
1394     tcg_gen_mul_tl(t3, t3, t7);
1395 
1396     if (mac) {
1397         gen_load_mxu_gpr(t4, XRd);
1398         gen_load_mxu_gpr(t5, XRa);
1399         tcg_gen_extract_tl(t6, t4,  0, 16);
1400         tcg_gen_extract_tl(t7, t4, 16, 16);
1401         if (aptn2 & 1) {
1402             tcg_gen_sub_tl(t0, t6, t0);
1403             tcg_gen_sub_tl(t1, t7, t1);
1404         } else {
1405             tcg_gen_add_tl(t0, t6, t0);
1406             tcg_gen_add_tl(t1, t7, t1);
1407         }
1408         tcg_gen_extract_tl(t6, t5,  0, 16);
1409         tcg_gen_extract_tl(t7, t5, 16, 16);
1410         if (aptn2 & 2) {
1411             tcg_gen_sub_tl(t2, t6, t2);
1412             tcg_gen_sub_tl(t3, t7, t3);
1413         } else {
1414             tcg_gen_add_tl(t2, t6, t2);
1415             tcg_gen_add_tl(t3, t7, t3);
1416         }
1417     }
1418 
1419     tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
1420     tcg_gen_deposit_tl(t1, t2, t3, 16, 16);
1421 
1422     gen_store_mxu_gpr(t0, XRd);
1423     gen_store_mxu_gpr(t1, XRa);
1424 }
1425 
1426 /*
1427  * S32LDD  XRa, Rb, S12 - Load a word from memory to XRF
1428  * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF
1429  *   in reversed byte seq.
1430  * S32LDI  XRa, Rb, S12 - Load a word from memory to XRF,
1431  *   post modify base address GPR.
1432  * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF,
1433  *   post modify base address GPR and load in reversed byte seq.
1434  */
1435 static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc)
1436 {
1437     TCGv t0, t1;
1438     uint32_t XRa, Rb, s12;
1439 
1440     t0 = tcg_temp_new();
1441     t1 = tcg_temp_new();
1442 
1443     XRa = extract32(ctx->opcode, 6, 4);
1444     s12 = sextract32(ctx->opcode, 10, 10);
1445     Rb = extract32(ctx->opcode, 21, 5);
1446 
1447     gen_load_gpr(t0, Rb);
1448     tcg_gen_movi_tl(t1, s12 * 4);
1449     tcg_gen_add_tl(t0, t0, t1);
1450 
1451     tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
1452                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1453                         ctx->default_tcg_memop_mask);
1454     gen_store_mxu_gpr(t1, XRa);
1455 
1456     if (postinc) {
1457         gen_store_gpr(t0, Rb);
1458     }
1459 }
1460 
1461 /*
1462  * S32STD  XRa, Rb, S12 - Store a word from XRF to memory
1463  * S32STDR XRa, Rb, S12 - Store a word from XRF to memory
1464  *   in reversed byte seq.
1465  * S32SDI  XRa, Rb, S12 - Store a word from XRF to memory,
1466  *   post modify base address GPR.
1467  * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory,
1468  *   post modify base address GPR and store in reversed byte seq.
1469  */
1470 static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc)
1471 {
1472     TCGv t0, t1;
1473     uint32_t XRa, Rb, s12;
1474 
1475     t0 = tcg_temp_new();
1476     t1 = tcg_temp_new();
1477 
1478     XRa = extract32(ctx->opcode, 6, 4);
1479     s12 = sextract32(ctx->opcode, 10, 10);
1480     Rb = extract32(ctx->opcode, 21, 5);
1481 
1482     gen_load_gpr(t0, Rb);
1483     tcg_gen_movi_tl(t1, s12 * 4);
1484     tcg_gen_add_tl(t0, t0, t1);
1485 
1486     gen_load_mxu_gpr(t1, XRa);
1487     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
1488                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1489                         ctx->default_tcg_memop_mask);
1490 
1491     if (postinc) {
1492         gen_store_gpr(t0, Rb);
1493     }
1494 }
1495 
1496 /*
1497  * S32LDDV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1498  * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1499  *   in reversed byte seq.
1500  * S32LDIV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1501  *   post modify base address GPR.
1502  * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1503  *   post modify base address GPR and load in reversed byte seq.
1504  */
1505 static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed,
1506                              bool postinc, uint32_t strd2)
1507 {
1508     TCGv t0, t1;
1509     uint32_t XRa, Rb, Rc;
1510 
1511     t0 = tcg_temp_new();
1512     t1 = tcg_temp_new();
1513 
1514     XRa = extract32(ctx->opcode, 6, 4);
1515     Rc = extract32(ctx->opcode, 16, 5);
1516     Rb = extract32(ctx->opcode, 21, 5);
1517 
1518     gen_load_gpr(t0, Rb);
1519     gen_load_gpr(t1, Rc);
1520     tcg_gen_shli_tl(t1, t1, strd2);
1521     tcg_gen_add_tl(t0, t0, t1);
1522 
1523     tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
1524                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1525                         ctx->default_tcg_memop_mask);
1526     gen_store_mxu_gpr(t1, XRa);
1527 
1528     if (postinc) {
1529         gen_store_gpr(t0, Rb);
1530     }
1531 }
1532 
1533 /*
1534  * LXW  Ra, Rb, Rc, STRD2 - Load a word from memory to GPR
1535  * LXB  Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1536  *   sign extending to GPR size.
1537  * LXH  Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1538  *   sign extending to GPR size.
1539  * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1540  *   zero extending to GPR size.
1541  * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1542  *   zero extending to GPR size.
1543  */
1544 static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop)
1545 {
1546     TCGv t0, t1;
1547     uint32_t Ra, Rb, Rc;
1548 
1549     t0 = tcg_temp_new();
1550     t1 = tcg_temp_new();
1551 
1552     Ra = extract32(ctx->opcode, 11, 5);
1553     Rc = extract32(ctx->opcode, 16, 5);
1554     Rb = extract32(ctx->opcode, 21, 5);
1555 
1556     gen_load_gpr(t0, Rb);
1557     gen_load_gpr(t1, Rc);
1558     tcg_gen_shli_tl(t1, t1, strd2);
1559     tcg_gen_add_tl(t0, t0, t1);
1560 
1561     tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask);
1562     gen_store_gpr(t1, Ra);
1563 }
1564 
1565 /*
1566  * S32STDV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1567  * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1568  *   in reversed byte seq.
1569  * S32SDIV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1570  *   post modify base address GPR.
1571  * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1572  *   post modify base address GPR and store in reversed byte seq.
1573  */
1574 static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed,
1575                              bool postinc, uint32_t strd2)
1576 {
1577     TCGv t0, t1;
1578     uint32_t XRa, Rb, Rc;
1579 
1580     t0 = tcg_temp_new();
1581     t1 = tcg_temp_new();
1582 
1583     XRa = extract32(ctx->opcode, 6, 4);
1584     Rc = extract32(ctx->opcode, 16, 5);
1585     Rb = extract32(ctx->opcode, 21, 5);
1586 
1587     gen_load_gpr(t0, Rb);
1588     gen_load_gpr(t1, Rc);
1589     tcg_gen_shli_tl(t1, t1, strd2);
1590     tcg_gen_add_tl(t0, t0, t1);
1591 
1592     gen_load_mxu_gpr(t1, XRa);
1593     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
1594                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1595                         ctx->default_tcg_memop_mask);
1596 
1597     if (postinc) {
1598         gen_store_gpr(t0, Rb);
1599     }
1600 }
1601 
1602 /*
1603  *                 MXU instruction category: logic
1604  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1605  *
1606  *               S32NOR    S32AND    S32OR    S32XOR
1607  */
1608 
1609 /*
1610  *  S32NOR XRa, XRb, XRc
1611  *    Update XRa with the result of logical bitwise 'nor' operation
1612  *    applied to the content of XRb and XRc.
1613  */
1614 static void gen_mxu_S32NOR(DisasContext *ctx)
1615 {
1616     uint32_t pad, XRc, XRb, XRa;
1617 
1618     pad = extract32(ctx->opcode, 21, 5);
1619     XRc = extract32(ctx->opcode, 14, 4);
1620     XRb = extract32(ctx->opcode, 10, 4);
1621     XRa = extract32(ctx->opcode,  6, 4);
1622 
1623     if (unlikely(pad != 0)) {
1624         /* opcode padding incorrect -> do nothing */
1625     } else if (unlikely(XRa == 0)) {
1626         /* destination is zero register -> do nothing */
1627     } else if (unlikely((XRb == 0) && (XRc == 0))) {
1628         /* both operands zero registers -> just set destination to all 1s */
1629         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF);
1630     } else if (unlikely(XRb == 0)) {
1631         /* XRb zero register -> just set destination to the negation of XRc */
1632         tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1633     } else if (unlikely(XRc == 0)) {
1634         /* XRa zero register -> just set destination to the negation of XRb */
1635         tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1636     } else if (unlikely(XRb == XRc)) {
1637         /* both operands same -> just set destination to the negation of XRb */
1638         tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1639     } else {
1640         /* the most general case */
1641         tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1642     }
1643 }
1644 
1645 /*
1646  *  S32AND XRa, XRb, XRc
1647  *    Update XRa with the result of logical bitwise 'and' operation
1648  *    applied to the content of XRb and XRc.
1649  */
1650 static void gen_mxu_S32AND(DisasContext *ctx)
1651 {
1652     uint32_t pad, XRc, XRb, XRa;
1653 
1654     pad = extract32(ctx->opcode, 21, 5);
1655     XRc = extract32(ctx->opcode, 14, 4);
1656     XRb = extract32(ctx->opcode, 10, 4);
1657     XRa = extract32(ctx->opcode,  6, 4);
1658 
1659     if (unlikely(pad != 0)) {
1660         /* opcode padding incorrect -> do nothing */
1661     } else if (unlikely(XRa == 0)) {
1662         /* destination is zero register -> do nothing */
1663     } else if (unlikely((XRb == 0) || (XRc == 0))) {
1664         /* one of operands zero register -> just set destination to all 0s */
1665         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1666     } else if (unlikely(XRb == XRc)) {
1667         /* both operands same -> just set destination to one of them */
1668         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1669     } else {
1670         /* the most general case */
1671         tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1672     }
1673 }
1674 
1675 /*
1676  *  S32OR XRa, XRb, XRc
1677  *    Update XRa with the result of logical bitwise 'or' operation
1678  *    applied to the content of XRb and XRc.
1679  */
1680 static void gen_mxu_S32OR(DisasContext *ctx)
1681 {
1682     uint32_t pad, XRc, XRb, XRa;
1683 
1684     pad = extract32(ctx->opcode, 21, 5);
1685     XRc = extract32(ctx->opcode, 14, 4);
1686     XRb = extract32(ctx->opcode, 10, 4);
1687     XRa = extract32(ctx->opcode,  6, 4);
1688 
1689     if (unlikely(pad != 0)) {
1690         /* opcode padding incorrect -> do nothing */
1691     } else if (unlikely(XRa == 0)) {
1692         /* destination is zero register -> do nothing */
1693     } else if (unlikely((XRb == 0) && (XRc == 0))) {
1694         /* both operands zero registers -> just set destination to all 0s */
1695         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1696     } else if (unlikely(XRb == 0)) {
1697         /* XRb zero register -> just set destination to the content of XRc */
1698         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1699     } else if (unlikely(XRc == 0)) {
1700         /* XRc zero register -> just set destination to the content of XRb */
1701         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1702     } else if (unlikely(XRb == XRc)) {
1703         /* both operands same -> just set destination to one of them */
1704         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1705     } else {
1706         /* the most general case */
1707         tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1708     }
1709 }
1710 
1711 /*
1712  *  S32XOR XRa, XRb, XRc
1713  *    Update XRa with the result of logical bitwise 'xor' operation
1714  *    applied to the content of XRb and XRc.
1715  */
1716 static void gen_mxu_S32XOR(DisasContext *ctx)
1717 {
1718     uint32_t pad, XRc, XRb, XRa;
1719 
1720     pad = extract32(ctx->opcode, 21, 5);
1721     XRc = extract32(ctx->opcode, 14, 4);
1722     XRb = extract32(ctx->opcode, 10, 4);
1723     XRa = extract32(ctx->opcode,  6, 4);
1724 
1725     if (unlikely(pad != 0)) {
1726         /* opcode padding incorrect -> do nothing */
1727     } else if (unlikely(XRa == 0)) {
1728         /* destination is zero register -> do nothing */
1729     } else if (unlikely((XRb == 0) && (XRc == 0))) {
1730         /* both operands zero registers -> just set destination to all 0s */
1731         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1732     } else if (unlikely(XRb == 0)) {
1733         /* XRb zero register -> just set destination to the content of XRc */
1734         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1735     } else if (unlikely(XRc == 0)) {
1736         /* XRc zero register -> just set destination to the content of XRb */
1737         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1738     } else if (unlikely(XRb == XRc)) {
1739         /* both operands same -> just set destination to all 0s */
1740         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1741     } else {
1742         /* the most general case */
1743         tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1744     }
1745 }
1746 
1747 /*
1748  *                 MXU instruction category: shift
1749  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1750  *
1751  *               D32SLL    D32SLR    D32SAR    D32SARL
1752  *               D32SLLV   D32SLRV   D32SARV   D32SARW
1753  *               Q16SLL    Q16SLR    Q16SAR
1754  *               Q16SLLV   Q16SLRV   Q16SARV
1755  */
1756 
1757 /*
1758  *  D32SLL XRa, XRd, XRb, XRc, SFT4
1759  *    Dual 32-bit shift left from XRb and XRc to SFT4
1760  *    bits (0..15). Store to XRa and XRd respectively.
1761  *  D32SLR XRa, XRd, XRb, XRc, SFT4
1762  *    Dual 32-bit shift logic right from XRb and XRc
1763  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1764  *  D32SAR XRa, XRd, XRb, XRc, SFT4
1765  *    Dual 32-bit shift arithmetic right from XRb and XRc
1766  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1767  */
1768 static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic)
1769 {
1770     uint32_t XRa, XRb, XRc, XRd, sft4;
1771 
1772     XRa  = extract32(ctx->opcode,  6, 4);
1773     XRb  = extract32(ctx->opcode, 10, 4);
1774     XRc  = extract32(ctx->opcode, 14, 4);
1775     XRd  = extract32(ctx->opcode, 18, 4);
1776     sft4 = extract32(ctx->opcode, 22, 4);
1777 
1778     TCGv t0 = tcg_temp_new();
1779     TCGv t1 = tcg_temp_new();
1780 
1781     gen_load_mxu_gpr(t0, XRb);
1782     gen_load_mxu_gpr(t1, XRc);
1783 
1784     if (right) {
1785         if (arithmetic) {
1786             tcg_gen_sari_tl(t0, t0, sft4);
1787             tcg_gen_sari_tl(t1, t1, sft4);
1788         } else {
1789             tcg_gen_shri_tl(t0, t0, sft4);
1790             tcg_gen_shri_tl(t1, t1, sft4);
1791         }
1792     } else {
1793         tcg_gen_shli_tl(t0, t0, sft4);
1794         tcg_gen_shli_tl(t1, t1, sft4);
1795     }
1796     gen_store_mxu_gpr(t0, XRa);
1797     gen_store_mxu_gpr(t1, XRd);
1798 }
1799 
1800 /*
1801  *  D32SLLV XRa, XRd, rs
1802  *    Dual 32-bit shift left from XRa and XRd to rs[3:0]
1803  *    bits. Store back to XRa and XRd respectively.
1804  *  D32SLRV XRa, XRd, rs
1805  *    Dual 32-bit shift logic right from XRa and XRd to rs[3:0]
1806  *    bits. Store back to XRa and XRd respectively.
1807  *  D32SARV XRa, XRd, rs
1808  *    Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0]
1809  *    bits. Store back to XRa and XRd respectively.
1810  */
1811 static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic)
1812 {
1813     uint32_t XRa, XRd, rs;
1814 
1815     XRa = extract32(ctx->opcode, 10, 4);
1816     XRd = extract32(ctx->opcode, 14, 4);
1817     rs  = extract32(ctx->opcode, 21, 5);
1818 
1819     TCGv t0 = tcg_temp_new();
1820     TCGv t1 = tcg_temp_new();
1821     TCGv t2 = tcg_temp_new();
1822 
1823     gen_load_mxu_gpr(t0, XRa);
1824     gen_load_mxu_gpr(t1, XRd);
1825     gen_load_gpr(t2, rs);
1826     tcg_gen_andi_tl(t2, t2, 0x0f);
1827 
1828     if (right) {
1829         if (arithmetic) {
1830             tcg_gen_sar_tl(t0, t0, t2);
1831             tcg_gen_sar_tl(t1, t1, t2);
1832         } else {
1833             tcg_gen_shr_tl(t0, t0, t2);
1834             tcg_gen_shr_tl(t1, t1, t2);
1835         }
1836     } else {
1837         tcg_gen_shl_tl(t0, t0, t2);
1838         tcg_gen_shl_tl(t1, t1, t2);
1839     }
1840     gen_store_mxu_gpr(t0, XRa);
1841     gen_store_mxu_gpr(t1, XRd);
1842 }
1843 
1844 /*
1845  *  D32SARL XRa, XRb, XRc, SFT4
1846  *    Dual shift arithmetic right 32-bit integers in XRb and XRc
1847  *    to SFT4 bits (0..15). Pack 16 LSBs of each into XRa.
1848  *
1849  *  D32SARW XRa, XRb, XRc, rb
1850  *    Dual shift arithmetic right 32-bit integers in XRb and XRc
1851  *    to rb[3:0] bits. Pack 16 LSBs of each into XRa.
1852  */
1853 static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw)
1854 {
1855     uint32_t XRa, XRb, XRc, rb;
1856 
1857     XRa = extract32(ctx->opcode,  6, 4);
1858     XRb = extract32(ctx->opcode, 10, 4);
1859     XRc = extract32(ctx->opcode, 14, 4);
1860     rb  = extract32(ctx->opcode, 21, 5);
1861 
1862     if (unlikely(XRa == 0)) {
1863         /* destination is zero register -> do nothing */
1864     } else {
1865         TCGv t0 = tcg_temp_new();
1866         TCGv t1 = tcg_temp_new();
1867         TCGv t2 = tcg_temp_new();
1868 
1869         if (!sarw) {
1870             /* Make SFT4 from rb field */
1871             tcg_gen_movi_tl(t2, rb >> 1);
1872         } else {
1873             gen_load_gpr(t2, rb);
1874             tcg_gen_andi_tl(t2, t2, 0x0f);
1875         }
1876         gen_load_mxu_gpr(t0, XRb);
1877         gen_load_mxu_gpr(t1, XRc);
1878         tcg_gen_sar_tl(t0, t0, t2);
1879         tcg_gen_sar_tl(t1, t1, t2);
1880         tcg_gen_extract_tl(t2, t1, 0, 16);
1881         tcg_gen_deposit_tl(t2, t2, t0, 16, 16);
1882         gen_store_mxu_gpr(t2, XRa);
1883     }
1884 }
1885 
1886 /*
1887  *  Q16SLL XRa, XRd, XRb, XRc, SFT4
1888  *    Quad 16-bit shift left from XRb and XRc to SFT4
1889  *    bits (0..15). Store to XRa and XRd respectively.
1890  *  Q16SLR XRa, XRd, XRb, XRc, SFT4
1891  *    Quad 16-bit shift logic right from XRb and XRc
1892  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1893  *  Q16SAR XRa, XRd, XRb, XRc, SFT4
1894  *    Quad 16-bit shift arithmetic right from XRb and XRc
1895  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1896  */
1897 static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic)
1898 {
1899     uint32_t XRa, XRb, XRc, XRd, sft4;
1900 
1901     XRa  = extract32(ctx->opcode,  6, 4);
1902     XRb  = extract32(ctx->opcode, 10, 4);
1903     XRc  = extract32(ctx->opcode, 14, 4);
1904     XRd  = extract32(ctx->opcode, 18, 4);
1905     sft4 = extract32(ctx->opcode, 22, 4);
1906 
1907     TCGv t0 = tcg_temp_new();
1908     TCGv t1 = tcg_temp_new();
1909     TCGv t2 = tcg_temp_new();
1910     TCGv t3 = tcg_temp_new();
1911 
1912     gen_load_mxu_gpr(t0, XRb);
1913     gen_load_mxu_gpr(t2, XRc);
1914 
1915     if (arithmetic) {
1916         tcg_gen_sextract_tl(t1, t0, 16, 16);
1917         tcg_gen_sextract_tl(t0, t0,  0, 16);
1918         tcg_gen_sextract_tl(t3, t2, 16, 16);
1919         tcg_gen_sextract_tl(t2, t2,  0, 16);
1920     } else {
1921         tcg_gen_extract_tl(t1, t0, 16, 16);
1922         tcg_gen_extract_tl(t0, t0,  0, 16);
1923         tcg_gen_extract_tl(t3, t2, 16, 16);
1924         tcg_gen_extract_tl(t2, t2,  0, 16);
1925     }
1926 
1927     if (right) {
1928         if (arithmetic) {
1929             tcg_gen_sari_tl(t0, t0, sft4);
1930             tcg_gen_sari_tl(t1, t1, sft4);
1931             tcg_gen_sari_tl(t2, t2, sft4);
1932             tcg_gen_sari_tl(t3, t3, sft4);
1933         } else {
1934             tcg_gen_shri_tl(t0, t0, sft4);
1935             tcg_gen_shri_tl(t1, t1, sft4);
1936             tcg_gen_shri_tl(t2, t2, sft4);
1937             tcg_gen_shri_tl(t3, t3, sft4);
1938         }
1939     } else {
1940         tcg_gen_shli_tl(t0, t0, sft4);
1941         tcg_gen_shli_tl(t1, t1, sft4);
1942         tcg_gen_shli_tl(t2, t2, sft4);
1943         tcg_gen_shli_tl(t3, t3, sft4);
1944     }
1945     tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
1946     tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
1947 
1948     gen_store_mxu_gpr(t0, XRa);
1949     gen_store_mxu_gpr(t2, XRd);
1950 }
1951 
1952 /*
1953  *  Q16SLLV XRa, XRd, rs
1954  *    Quad 16-bit shift left from XRa and XRd to rs[3:0]
1955  *    bits. Store to XRa and XRd respectively.
1956  *  Q16SLRV XRa, XRd, rs
1957  *    Quad 16-bit shift logic right from XRa and XRd to rs[3:0]
1958  *    bits. Store to XRa and XRd respectively.
1959  *  Q16SARV XRa, XRd, rs
1960  *    Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0]
1961  *    bits. Store to XRa and XRd respectively.
1962  */
1963 static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic)
1964 {
1965     uint32_t XRa, XRd, rs;
1966 
1967     XRa = extract32(ctx->opcode, 10, 4);
1968     XRd = extract32(ctx->opcode, 14, 4);
1969     rs  = extract32(ctx->opcode, 21, 5);
1970 
1971     TCGv t0 = tcg_temp_new();
1972     TCGv t1 = tcg_temp_new();
1973     TCGv t2 = tcg_temp_new();
1974     TCGv t3 = tcg_temp_new();
1975     TCGv t5 = tcg_temp_new();
1976 
1977     gen_load_mxu_gpr(t0, XRa);
1978     gen_load_mxu_gpr(t2, XRd);
1979     gen_load_gpr(t5, rs);
1980     tcg_gen_andi_tl(t5, t5, 0x0f);
1981 
1982 
1983     if (arithmetic) {
1984         tcg_gen_sextract_tl(t1, t0, 16, 16);
1985         tcg_gen_sextract_tl(t0, t0,  0, 16);
1986         tcg_gen_sextract_tl(t3, t2, 16, 16);
1987         tcg_gen_sextract_tl(t2, t2,  0, 16);
1988     } else {
1989         tcg_gen_extract_tl(t1, t0, 16, 16);
1990         tcg_gen_extract_tl(t0, t0,  0, 16);
1991         tcg_gen_extract_tl(t3, t2, 16, 16);
1992         tcg_gen_extract_tl(t2, t2,  0, 16);
1993     }
1994 
1995     if (right) {
1996         if (arithmetic) {
1997             tcg_gen_sar_tl(t0, t0, t5);
1998             tcg_gen_sar_tl(t1, t1, t5);
1999             tcg_gen_sar_tl(t2, t2, t5);
2000             tcg_gen_sar_tl(t3, t3, t5);
2001         } else {
2002             tcg_gen_shr_tl(t0, t0, t5);
2003             tcg_gen_shr_tl(t1, t1, t5);
2004             tcg_gen_shr_tl(t2, t2, t5);
2005             tcg_gen_shr_tl(t3, t3, t5);
2006         }
2007     } else {
2008         tcg_gen_shl_tl(t0, t0, t5);
2009         tcg_gen_shl_tl(t1, t1, t5);
2010         tcg_gen_shl_tl(t2, t2, t5);
2011         tcg_gen_shl_tl(t3, t3, t5);
2012     }
2013     tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
2014     tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2015 
2016     gen_store_mxu_gpr(t0, XRa);
2017     gen_store_mxu_gpr(t2, XRd);
2018 }
2019 
2020 /*
2021  *                   MXU instruction category max/min/avg
2022  *                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2023  *
2024  *                     S32MAX     D16MAX     Q8MAX
2025  *                     S32MIN     D16MIN     Q8MIN
2026  *                     S32SLT     D16SLT     Q8SLT
2027  *                                           Q8SLTU
2028  *                                D16AVG     Q8AVG
2029  *                                D16AVGR    Q8AVGR
2030  *                     S32MOVZ    D16MOVZ    Q8MOVZ
2031  *                     S32MOVN    D16MOVN    Q8MOVN
2032  */
2033 
2034 /*
2035  *  S32MAX XRa, XRb, XRc
2036  *    Update XRa with the maximum of signed 32-bit integers contained
2037  *    in XRb and XRc.
2038  *
2039  *  S32MIN XRa, XRb, XRc
2040  *    Update XRa with the minimum of signed 32-bit integers contained
2041  *    in XRb and XRc.
2042  */
2043 static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx)
2044 {
2045     uint32_t pad, opc, XRc, XRb, XRa;
2046 
2047     pad = extract32(ctx->opcode, 21, 5);
2048     opc = extract32(ctx->opcode, 18, 3);
2049     XRc = extract32(ctx->opcode, 14, 4);
2050     XRb = extract32(ctx->opcode, 10, 4);
2051     XRa = extract32(ctx->opcode,  6, 4);
2052 
2053     if (unlikely(pad != 0)) {
2054         /* opcode padding incorrect -> do nothing */
2055     } else if (unlikely(XRa == 0)) {
2056         /* destination is zero register -> do nothing */
2057     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2058         /* both operands zero registers -> just set destination to zero */
2059         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2060     } else if (unlikely((XRb == 0) || (XRc == 0))) {
2061         /* exactly one operand is zero register - find which one is not...*/
2062         uint32_t XRx = XRb ? XRb : XRc;
2063         /* ...and do max/min operation with one operand 0 */
2064         if (opc == OPC_MXU_S32MAX) {
2065             tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
2066         } else {
2067             tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
2068         }
2069     } else if (unlikely(XRb == XRc)) {
2070         /* both operands same -> just set destination to one of them */
2071         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2072     } else {
2073         /* the most general case */
2074         if (opc == OPC_MXU_S32MAX) {
2075             tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
2076                                                mxu_gpr[XRc - 1]);
2077         } else {
2078             tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
2079                                                mxu_gpr[XRc - 1]);
2080         }
2081     }
2082 }
2083 
2084 /*
2085  *  D16MAX
2086  *    Update XRa with the 16-bit-wise maximums of signed integers
2087  *    contained in XRb and XRc.
2088  *
2089  *  D16MIN
2090  *    Update XRa with the 16-bit-wise minimums of signed integers
2091  *    contained in XRb and XRc.
2092  */
2093 static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
2094 {
2095     uint32_t pad, opc, XRc, XRb, XRa;
2096 
2097     pad = extract32(ctx->opcode, 21, 5);
2098     opc = extract32(ctx->opcode, 18, 3);
2099     XRc = extract32(ctx->opcode, 14, 4);
2100     XRb = extract32(ctx->opcode, 10, 4);
2101     XRa = extract32(ctx->opcode,  6, 4);
2102 
2103     if (unlikely(pad != 0)) {
2104         /* opcode padding incorrect -> do nothing */
2105     } else if (unlikely(XRa == 0)) {
2106         /* destination is zero register -> do nothing */
2107     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2108         /* both operands zero registers -> just set destination to zero */
2109         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2110     } else if (unlikely((XRb == 0) || (XRc == 0))) {
2111         /* exactly one operand is zero register - find which one is not...*/
2112         uint32_t XRx = XRb ? XRb : XRc;
2113         /* ...and do half-word-wise max/min with one operand 0 */
2114         TCGv_i32 t0 = tcg_temp_new();
2115         TCGv_i32 t1 = tcg_constant_i32(0);
2116         TCGv_i32 t2 = tcg_temp_new();
2117 
2118         /* the left half-word first */
2119         tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
2120         if (opc == OPC_MXU_D16MAX) {
2121             tcg_gen_smax_i32(t2, t0, t1);
2122         } else {
2123             tcg_gen_smin_i32(t2, t0, t1);
2124         }
2125 
2126         /* the right half-word */
2127         tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF);
2128         /* move half-words to the leftmost position */
2129         tcg_gen_shli_i32(t0, t0, 16);
2130         /* t0 will be max/min of t0 and t1 */
2131         if (opc == OPC_MXU_D16MAX) {
2132             tcg_gen_smax_i32(t0, t0, t1);
2133         } else {
2134             tcg_gen_smin_i32(t0, t0, t1);
2135         }
2136         /* return resulting half-words to its original position */
2137         tcg_gen_shri_i32(t0, t0, 16);
2138         /* finally update the destination */
2139         tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
2140     } else if (unlikely(XRb == XRc)) {
2141         /* both operands same -> just set destination to one of them */
2142         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2143     } else {
2144         /* the most general case */
2145         TCGv_i32 t0 = tcg_temp_new();
2146         TCGv_i32 t1 = tcg_temp_new();
2147         TCGv_i32 t2 = tcg_temp_new();
2148 
2149         /* the left half-word first */
2150         tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
2151         tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
2152         if (opc == OPC_MXU_D16MAX) {
2153             tcg_gen_smax_i32(t2, t0, t1);
2154         } else {
2155             tcg_gen_smin_i32(t2, t0, t1);
2156         }
2157 
2158         /* the right half-word */
2159         tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
2160         tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF);
2161         /* move half-words to the leftmost position */
2162         tcg_gen_shli_i32(t0, t0, 16);
2163         tcg_gen_shli_i32(t1, t1, 16);
2164         /* t0 will be max/min of t0 and t1 */
2165         if (opc == OPC_MXU_D16MAX) {
2166             tcg_gen_smax_i32(t0, t0, t1);
2167         } else {
2168             tcg_gen_smin_i32(t0, t0, t1);
2169         }
2170         /* return resulting half-words to its original position */
2171         tcg_gen_shri_i32(t0, t0, 16);
2172         /* finally update the destination */
2173         tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
2174     }
2175 }
2176 
2177 /*
2178  *  Q8MAX
2179  *    Update XRa with the 8-bit-wise maximums of signed integers
2180  *    contained in XRb and XRc.
2181  *
2182  *  Q8MIN
2183  *    Update XRa with the 8-bit-wise minimums of signed integers
2184  *    contained in XRb and XRc.
2185  */
2186 static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
2187 {
2188     uint32_t pad, opc, XRc, XRb, XRa;
2189 
2190     pad = extract32(ctx->opcode, 21, 5);
2191     opc = extract32(ctx->opcode, 18, 3);
2192     XRc = extract32(ctx->opcode, 14, 4);
2193     XRb = extract32(ctx->opcode, 10, 4);
2194     XRa = extract32(ctx->opcode,  6, 4);
2195 
2196     if (unlikely(pad != 0)) {
2197         /* opcode padding incorrect -> do nothing */
2198     } else if (unlikely(XRa == 0)) {
2199         /* destination is zero register -> do nothing */
2200     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2201         /* both operands zero registers -> just set destination to zero */
2202         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2203     } else if (unlikely((XRb == 0) || (XRc == 0))) {
2204         /* exactly one operand is zero register - make it be the first...*/
2205         uint32_t XRx = XRb ? XRb : XRc;
2206         /* ...and do byte-wise max/min with one operand 0 */
2207         TCGv_i32 t0 = tcg_temp_new();
2208         TCGv_i32 t1 = tcg_constant_i32(0);
2209         TCGv_i32 t2 = tcg_temp_new();
2210         int32_t i;
2211 
2212         /* the leftmost byte (byte 3) first */
2213         tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
2214         if (opc == OPC_MXU_Q8MAX) {
2215             tcg_gen_smax_i32(t2, t0, t1);
2216         } else {
2217             tcg_gen_smin_i32(t2, t0, t1);
2218         }
2219 
2220         /* bytes 2, 1, 0 */
2221         for (i = 2; i >= 0; i--) {
2222             /* extract the byte */
2223             tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i));
2224             /* move the byte to the leftmost position */
2225             tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
2226             /* t0 will be max/min of t0 and t1 */
2227             if (opc == OPC_MXU_Q8MAX) {
2228                 tcg_gen_smax_i32(t0, t0, t1);
2229             } else {
2230                 tcg_gen_smin_i32(t0, t0, t1);
2231             }
2232             /* return resulting byte to its original position */
2233             tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
2234             /* finally update the destination */
2235             tcg_gen_or_i32(t2, t2, t0);
2236         }
2237         gen_store_mxu_gpr(t2, XRa);
2238     } else if (unlikely(XRb == XRc)) {
2239         /* both operands same -> just set destination to one of them */
2240         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2241     } else {
2242         /* the most general case */
2243         TCGv_i32 t0 = tcg_temp_new();
2244         TCGv_i32 t1 = tcg_temp_new();
2245         TCGv_i32 t2 = tcg_temp_new();
2246         int32_t i;
2247 
2248         /* the leftmost bytes (bytes 3) first */
2249         tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
2250         tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
2251         if (opc == OPC_MXU_Q8MAX) {
2252             tcg_gen_smax_i32(t2, t0, t1);
2253         } else {
2254             tcg_gen_smin_i32(t2, t0, t1);
2255         }
2256 
2257         /* bytes 2, 1, 0 */
2258         for (i = 2; i >= 0; i--) {
2259             /* extract corresponding bytes */
2260             tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i));
2261             tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i));
2262             /* move the bytes to the leftmost position */
2263             tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
2264             tcg_gen_shli_i32(t1, t1, 8 * (3 - i));
2265             /* t0 will be max/min of t0 and t1 */
2266             if (opc == OPC_MXU_Q8MAX) {
2267                 tcg_gen_smax_i32(t0, t0, t1);
2268             } else {
2269                 tcg_gen_smin_i32(t0, t0, t1);
2270             }
2271             /* return resulting byte to its original position */
2272             tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
2273             /* finally update the destination */
2274             tcg_gen_or_i32(t2, t2, t0);
2275         }
2276         gen_store_mxu_gpr(t2, XRa);
2277     }
2278 }
2279 
2280 /*
2281  *  Q8SLT
2282  *    Update XRa with the signed "set less than" comparison of XRb and XRc
2283  *    on per-byte basis.
2284  *    a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2285  *
2286  *  Q8SLTU
2287  *    Update XRa with the unsigned "set less than" comparison of XRb and XRc
2288  *    on per-byte basis.
2289  *    a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2290  */
2291 static void gen_mxu_q8slt(DisasContext *ctx, bool sltu)
2292 {
2293     uint32_t pad, XRc, XRb, XRa;
2294 
2295     pad = extract32(ctx->opcode, 21, 5);
2296     XRc = extract32(ctx->opcode, 14, 4);
2297     XRb = extract32(ctx->opcode, 10, 4);
2298     XRa = extract32(ctx->opcode,  6, 4);
2299 
2300     if (unlikely(pad != 0)) {
2301         /* opcode padding incorrect -> do nothing */
2302     } else if (unlikely(XRa == 0)) {
2303         /* destination is zero register -> do nothing */
2304     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2305         /* both operands zero registers -> just set destination to zero */
2306         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2307     } else if (unlikely(XRb == XRc)) {
2308         /* both operands same registers -> just set destination to zero */
2309         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2310     } else {
2311         /* the most general case */
2312         TCGv t0 = tcg_temp_new();
2313         TCGv t1 = tcg_temp_new();
2314         TCGv t2 = tcg_temp_new();
2315         TCGv t3 = tcg_temp_new();
2316         TCGv t4 = tcg_temp_new();
2317 
2318         gen_load_mxu_gpr(t3, XRb);
2319         gen_load_mxu_gpr(t4, XRc);
2320         tcg_gen_movi_tl(t2, 0);
2321 
2322         for (int i = 0; i < 4; i++) {
2323             if (sltu) {
2324                 tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2325                 tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2326             } else {
2327                 tcg_gen_sextract_tl(t0, t3, 8 * i, 8);
2328                 tcg_gen_sextract_tl(t1, t4, 8 * i, 8);
2329             }
2330             tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2331             tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2332         }
2333         gen_store_mxu_gpr(t2, XRa);
2334     }
2335 }
2336 
2337 /*
2338  *  S32SLT
2339  *    Update XRa with the signed "set less than" comparison of XRb and XRc.
2340  *    a.k.a. XRa = XRb < XRc ? 1 : 0;
2341  */
2342 static void gen_mxu_S32SLT(DisasContext *ctx)
2343 {
2344     uint32_t pad, XRc, XRb, XRa;
2345 
2346     pad = extract32(ctx->opcode, 21, 5);
2347     XRc = extract32(ctx->opcode, 14, 4);
2348     XRb = extract32(ctx->opcode, 10, 4);
2349     XRa = extract32(ctx->opcode,  6, 4);
2350 
2351     if (unlikely(pad != 0)) {
2352         /* opcode padding incorrect -> do nothing */
2353     } else if (unlikely(XRa == 0)) {
2354         /* destination is zero register -> do nothing */
2355     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2356         /* both operands zero registers -> just set destination to zero */
2357         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2358     } else if (unlikely(XRb == XRc)) {
2359         /* both operands same registers -> just set destination to zero */
2360         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2361     } else {
2362         /* the most general case */
2363         tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1],
2364                            mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
2365     }
2366 }
2367 
2368 /*
2369  *  D16SLT
2370  *    Update XRa with the signed "set less than" comparison of XRb and XRc
2371  *    on per-word basis.
2372  *    a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0;
2373  */
2374 static void gen_mxu_D16SLT(DisasContext *ctx)
2375 {
2376     uint32_t pad, XRc, XRb, XRa;
2377 
2378     pad = extract32(ctx->opcode, 21, 5);
2379     XRc = extract32(ctx->opcode, 14, 4);
2380     XRb = extract32(ctx->opcode, 10, 4);
2381     XRa = extract32(ctx->opcode,  6, 4);
2382 
2383     if (unlikely(pad != 0)) {
2384         /* opcode padding incorrect -> do nothing */
2385     } else if (unlikely(XRa == 0)) {
2386         /* destination is zero register -> do nothing */
2387     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2388         /* both operands zero registers -> just set destination to zero */
2389         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2390     } else if (unlikely(XRb == XRc)) {
2391         /* both operands same registers -> just set destination to zero */
2392         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2393     } else {
2394         /* the most general case */
2395         TCGv t0 = tcg_temp_new();
2396         TCGv t1 = tcg_temp_new();
2397         TCGv t2 = tcg_temp_new();
2398         TCGv t3 = tcg_temp_new();
2399         TCGv t4 = tcg_temp_new();
2400 
2401         gen_load_mxu_gpr(t3, XRb);
2402         gen_load_mxu_gpr(t4, XRc);
2403         tcg_gen_sextract_tl(t0, t3, 16, 16);
2404         tcg_gen_sextract_tl(t1, t4, 16, 16);
2405         tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2406         tcg_gen_shli_tl(t2, t0, 16);
2407         tcg_gen_sextract_tl(t0, t3,  0, 16);
2408         tcg_gen_sextract_tl(t1, t4,  0, 16);
2409         tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2410         tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0);
2411     }
2412 }
2413 
2414 /*
2415  *  D16AVG
2416  *    Update XRa with the signed average of XRb and XRc
2417  *    on per-word basis, rounding down.
2418  *    a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1;
2419  *
2420  *  D16AVGR
2421  *    Update XRa with the signed average of XRb and XRc
2422  *    on per-word basis, math rounding 4/5.
2423  *    a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1;
2424  */
2425 static void gen_mxu_d16avg(DisasContext *ctx, bool round45)
2426 {
2427     uint32_t pad, XRc, XRb, XRa;
2428 
2429     pad = extract32(ctx->opcode, 21, 5);
2430     XRc = extract32(ctx->opcode, 14, 4);
2431     XRb = extract32(ctx->opcode, 10, 4);
2432     XRa = extract32(ctx->opcode,  6, 4);
2433 
2434     if (unlikely(pad != 0)) {
2435         /* opcode padding incorrect -> do nothing */
2436     } else if (unlikely(XRa == 0)) {
2437         /* destination is zero register -> do nothing */
2438     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2439         /* both operands zero registers -> just set destination to zero */
2440         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2441     } else if (unlikely(XRb == XRc)) {
2442         /* both operands same registers -> just set destination to same */
2443         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2444     } else {
2445         /* the most general case */
2446         TCGv t0 = tcg_temp_new();
2447         TCGv t1 = tcg_temp_new();
2448         TCGv t2 = tcg_temp_new();
2449         TCGv t3 = tcg_temp_new();
2450         TCGv t4 = tcg_temp_new();
2451 
2452         gen_load_mxu_gpr(t3, XRb);
2453         gen_load_mxu_gpr(t4, XRc);
2454         tcg_gen_sextract_tl(t0, t3, 16, 16);
2455         tcg_gen_sextract_tl(t1, t4, 16, 16);
2456         tcg_gen_add_tl(t0, t0, t1);
2457         if (round45) {
2458             tcg_gen_addi_tl(t0, t0, 1);
2459         }
2460         tcg_gen_shli_tl(t2, t0, 15);
2461         tcg_gen_andi_tl(t2, t2, 0xffff0000);
2462         tcg_gen_sextract_tl(t0, t3,  0, 16);
2463         tcg_gen_sextract_tl(t1, t4,  0, 16);
2464         tcg_gen_add_tl(t0, t0, t1);
2465         if (round45) {
2466             tcg_gen_addi_tl(t0, t0, 1);
2467         }
2468         tcg_gen_shri_tl(t0, t0, 1);
2469         tcg_gen_deposit_tl(t2, t2, t0, 0, 16);
2470         gen_store_mxu_gpr(t2, XRa);
2471     }
2472 }
2473 
2474 /*
2475  *  Q8AVG
2476  *    Update XRa with the signed average of XRb and XRc
2477  *    on per-byte basis, rounding down.
2478  *    a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1;
2479  *
2480  *  Q8AVGR
2481  *    Update XRa with the signed average of XRb and XRc
2482  *    on per-word basis, math rounding 4/5.
2483  *    a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1;
2484  */
2485 static void gen_mxu_q8avg(DisasContext *ctx, bool round45)
2486 {
2487     uint32_t pad, XRc, XRb, XRa;
2488 
2489     pad = extract32(ctx->opcode, 21, 5);
2490     XRc = extract32(ctx->opcode, 14, 4);
2491     XRb = extract32(ctx->opcode, 10, 4);
2492     XRa = extract32(ctx->opcode,  6, 4);
2493 
2494     if (unlikely(pad != 0)) {
2495         /* opcode padding incorrect -> do nothing */
2496     } else if (unlikely(XRa == 0)) {
2497         /* destination is zero register -> do nothing */
2498     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2499         /* both operands zero registers -> just set destination to zero */
2500         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2501     } else if (unlikely(XRb == XRc)) {
2502         /* both operands same registers -> just set destination to same */
2503         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2504     } else {
2505         /* the most general case */
2506         TCGv t0 = tcg_temp_new();
2507         TCGv t1 = tcg_temp_new();
2508         TCGv t2 = tcg_temp_new();
2509         TCGv t3 = tcg_temp_new();
2510         TCGv t4 = tcg_temp_new();
2511 
2512         gen_load_mxu_gpr(t3, XRb);
2513         gen_load_mxu_gpr(t4, XRc);
2514         tcg_gen_movi_tl(t2, 0);
2515 
2516         for (int i = 0; i < 4; i++) {
2517             tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2518             tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2519             tcg_gen_add_tl(t0, t0, t1);
2520             if (round45) {
2521                 tcg_gen_addi_tl(t0, t0, 1);
2522             }
2523             tcg_gen_shri_tl(t0, t0, 1);
2524             tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2525         }
2526         gen_store_mxu_gpr(t2, XRa);
2527     }
2528 }
2529 
2530 /*
2531  *  Q8MOVZ
2532  *    Quadruple 8-bit packed conditional move where
2533  *    XRb contains conditions, XRc what to move and
2534  *    XRa is the destination.
2535  *    a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] }
2536  *
2537  *  Q8MOVN
2538  *    Quadruple 8-bit packed conditional move where
2539  *    XRb contains conditions, XRc what to move and
2540  *    XRa is the destination.
2541  *    a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] }
2542  */
2543 static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond)
2544 {
2545     uint32_t XRc, XRb, XRa;
2546 
2547     XRa = extract32(ctx->opcode,  6, 4);
2548     XRb = extract32(ctx->opcode, 10, 4);
2549     XRc = extract32(ctx->opcode, 14, 4);
2550 
2551     TCGv t0 = tcg_temp_new();
2552     TCGv t1 = tcg_temp_new();
2553     TCGv t2 = tcg_temp_new();
2554     TCGv t3 = tcg_temp_new();
2555     TCGLabel *l_quarterdone = gen_new_label();
2556     TCGLabel *l_halfdone = gen_new_label();
2557     TCGLabel *l_quarterrest = gen_new_label();
2558     TCGLabel *l_done = gen_new_label();
2559 
2560     gen_load_mxu_gpr(t0, XRc);
2561     gen_load_mxu_gpr(t1, XRb);
2562     gen_load_mxu_gpr(t2, XRa);
2563 
2564     tcg_gen_extract_tl(t3, t1, 24, 8);
2565     tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone);
2566     tcg_gen_extract_tl(t3, t0, 24, 8);
2567     tcg_gen_deposit_tl(t2, t2, t3, 24, 8);
2568 
2569     gen_set_label(l_quarterdone);
2570     tcg_gen_extract_tl(t3, t1, 16, 8);
2571     tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
2572     tcg_gen_extract_tl(t3, t0, 16, 8);
2573     tcg_gen_deposit_tl(t2, t2, t3, 16, 8);
2574 
2575     gen_set_label(l_halfdone);
2576     tcg_gen_extract_tl(t3, t1, 8, 8);
2577     tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest);
2578     tcg_gen_extract_tl(t3, t0, 8, 8);
2579     tcg_gen_deposit_tl(t2, t2, t3, 8, 8);
2580 
2581     gen_set_label(l_quarterrest);
2582     tcg_gen_extract_tl(t3, t1, 0, 8);
2583     tcg_gen_brcondi_tl(cond, t3, 0, l_done);
2584     tcg_gen_extract_tl(t3, t0, 0, 8);
2585     tcg_gen_deposit_tl(t2, t2, t3, 0, 8);
2586 
2587     gen_set_label(l_done);
2588     gen_store_mxu_gpr(t2, XRa);
2589 }
2590 
2591 /*
2592  *  D16MOVZ
2593  *    Double 16-bit packed conditional move where
2594  *    XRb contains conditions, XRc what to move and
2595  *    XRa is the destination.
2596  *    a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] }
2597  *
2598  *  D16MOVN
2599  *    Double 16-bit packed conditional move where
2600  *    XRb contains conditions, XRc what to move and
2601  *    XRa is the destination.
2602  *    a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] }
2603  */
2604 static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond)
2605 {
2606     uint32_t XRc, XRb, XRa;
2607 
2608     XRa = extract32(ctx->opcode,  6, 4);
2609     XRb = extract32(ctx->opcode, 10, 4);
2610     XRc = extract32(ctx->opcode, 14, 4);
2611 
2612     TCGv t0 = tcg_temp_new();
2613     TCGv t1 = tcg_temp_new();
2614     TCGv t2 = tcg_temp_new();
2615     TCGv t3 = tcg_temp_new();
2616     TCGLabel *l_halfdone = gen_new_label();
2617     TCGLabel *l_done = gen_new_label();
2618 
2619     gen_load_mxu_gpr(t0, XRc);
2620     gen_load_mxu_gpr(t1, XRb);
2621     gen_load_mxu_gpr(t2, XRa);
2622 
2623     tcg_gen_extract_tl(t3, t1, 16, 16);
2624     tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
2625     tcg_gen_extract_tl(t3, t0, 16, 16);
2626     tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2627 
2628     gen_set_label(l_halfdone);
2629     tcg_gen_extract_tl(t3, t1, 0, 16);
2630     tcg_gen_brcondi_tl(cond, t3, 0, l_done);
2631     tcg_gen_extract_tl(t3, t0, 0, 16);
2632     tcg_gen_deposit_tl(t2, t2, t3, 0, 16);
2633 
2634     gen_set_label(l_done);
2635     gen_store_mxu_gpr(t2, XRa);
2636 }
2637 
2638 /*
2639  *  S32MOVZ
2640  *    Quadruple 32-bit conditional move where
2641  *    XRb contains conditions, XRc what to move and
2642  *    XRa is the destination.
2643  *    a.k.a. if (XRb == 0) { XRa = XRc }
2644  *
2645  *  S32MOVN
2646  *    Single 32-bit conditional move where
2647  *    XRb contains conditions, XRc what to move and
2648  *    XRa is the destination.
2649  *    a.k.a. if (XRb != 0) { XRa = XRc }
2650  */
2651 static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond)
2652 {
2653     uint32_t XRc, XRb, XRa;
2654 
2655     XRa = extract32(ctx->opcode,  6, 4);
2656     XRb = extract32(ctx->opcode, 10, 4);
2657     XRc = extract32(ctx->opcode, 14, 4);
2658 
2659     TCGv t0 = tcg_temp_new();
2660     TCGv t1 = tcg_temp_new();
2661     TCGLabel *l_done = gen_new_label();
2662 
2663     gen_load_mxu_gpr(t0, XRc);
2664     gen_load_mxu_gpr(t1, XRb);
2665 
2666     tcg_gen_brcondi_tl(cond, t1, 0, l_done);
2667     gen_store_mxu_gpr(t0, XRa);
2668     gen_set_label(l_done);
2669 }
2670 
2671 /*
2672  *      MXU instruction category: Addition and subtraction
2673  *      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2674  *
2675  *              S32CPS      D16CPS
2676  *                                       Q8ADD
2677  */
2678 
2679 /*
2680  *  S32CPS
2681  *    Update XRa if XRc < 0 by value of 0 - XRb
2682  *    else XRa = XRb
2683  */
2684 static void gen_mxu_S32CPS(DisasContext *ctx)
2685 {
2686     uint32_t pad, XRc, XRb, XRa;
2687 
2688     pad = extract32(ctx->opcode, 21, 5);
2689     XRc = extract32(ctx->opcode, 14, 4);
2690     XRb = extract32(ctx->opcode, 10, 4);
2691     XRa = extract32(ctx->opcode,  6, 4);
2692 
2693     if (unlikely(pad != 0)) {
2694         /* opcode padding incorrect -> do nothing */
2695     } else if (unlikely(XRa == 0)) {
2696         /* destination is zero register -> do nothing */
2697     } else if (unlikely(XRb == 0)) {
2698         /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2699         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2700     } else if (unlikely(XRc == 0)) {
2701         /* condition always false -> just move XRb to XRa */
2702         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2703     } else {
2704         /* the most general case */
2705         TCGv t0 = tcg_temp_new();
2706         TCGLabel *l_not_less = gen_new_label();
2707         TCGLabel *l_done = gen_new_label();
2708 
2709         tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less);
2710         tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]);
2711         tcg_gen_br(l_done);
2712         gen_set_label(l_not_less);
2713         gen_load_mxu_gpr(t0, XRb);
2714         gen_set_label(l_done);
2715         gen_store_mxu_gpr(t0, XRa);
2716     }
2717 }
2718 
2719 /*
2720  *  D16CPS
2721  *    Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1]
2722  *    else XRa[0..1] = XRb[0..1]
2723  */
2724 static void gen_mxu_D16CPS(DisasContext *ctx)
2725 {
2726     uint32_t pad, XRc, XRb, XRa;
2727 
2728     pad = extract32(ctx->opcode, 21, 5);
2729     XRc = extract32(ctx->opcode, 14, 4);
2730     XRb = extract32(ctx->opcode, 10, 4);
2731     XRa = extract32(ctx->opcode,  6, 4);
2732 
2733     if (unlikely(pad != 0)) {
2734         /* opcode padding incorrect -> do nothing */
2735     } else if (unlikely(XRa == 0)) {
2736         /* destination is zero register -> do nothing */
2737     } else if (unlikely(XRb == 0)) {
2738         /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2739         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2740     } else if (unlikely(XRc == 0)) {
2741         /* condition always false -> just move XRb to XRa */
2742         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2743     } else {
2744         /* the most general case */
2745         TCGv t0 = tcg_temp_new();
2746         TCGv t1 = tcg_temp_new();
2747         TCGLabel *l_done_hi = gen_new_label();
2748         TCGLabel *l_not_less_lo = gen_new_label();
2749         TCGLabel *l_done_lo = gen_new_label();
2750 
2751         tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16);
2752         tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16);
2753         tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi);
2754         tcg_gen_subfi_tl(t1, 0, t1);
2755 
2756         gen_set_label(l_done_hi);
2757         tcg_gen_shli_i32(t1, t1, 16);
2758 
2759         tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1],  0, 16);
2760         tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo);
2761         tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1],  0, 16);
2762         tcg_gen_subfi_tl(t0, 0, t0);
2763         tcg_gen_br(l_done_lo);
2764 
2765         gen_set_label(l_not_less_lo);
2766         tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1],  0, 16);
2767 
2768         gen_set_label(l_done_lo);
2769         tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16);
2770     }
2771 }
2772 
2773 /*
2774  *  Q8ABD XRa, XRb, XRc
2775  *  Gets absolute difference for quadruple of 8-bit
2776  *  packed in XRb to another one in XRc,
2777  *  put the result in XRa.
2778  *  a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]);
2779  */
2780 static void gen_mxu_Q8ABD(DisasContext *ctx)
2781 {
2782     uint32_t pad, XRc, XRb, XRa;
2783 
2784     pad = extract32(ctx->opcode, 21, 3);
2785     XRc = extract32(ctx->opcode, 14, 4);
2786     XRb = extract32(ctx->opcode, 10, 4);
2787     XRa = extract32(ctx->opcode,  6, 4);
2788 
2789     if (unlikely(pad != 0)) {
2790         /* opcode padding incorrect -> do nothing */
2791     } else if (unlikely(XRa == 0)) {
2792         /* destination is zero register -> do nothing */
2793     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2794         /* both operands zero registers -> just set destination to zero */
2795         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2796     } else {
2797         /* the most general case */
2798         TCGv t0 = tcg_temp_new();
2799         TCGv t1 = tcg_temp_new();
2800         TCGv t2 = tcg_temp_new();
2801         TCGv t3 = tcg_temp_new();
2802         TCGv t4 = tcg_temp_new();
2803 
2804         gen_load_mxu_gpr(t3, XRb);
2805         gen_load_mxu_gpr(t4, XRc);
2806         tcg_gen_movi_tl(t2, 0);
2807 
2808         for (int i = 0; i < 4; i++) {
2809             tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2810             tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2811 
2812             tcg_gen_sub_tl(t0, t0, t1);
2813             tcg_gen_abs_tl(t0, t0);
2814 
2815             tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2816         }
2817         gen_store_mxu_gpr(t2, XRa);
2818     }
2819 }
2820 
2821 /*
2822  *  Q8ADD XRa, XRb, XRc, ptn2
2823  *  Add/subtract quadruple of 8-bit packed in XRb
2824  *  to another one in XRc, put the result in XRa.
2825  */
2826 static void gen_mxu_Q8ADD(DisasContext *ctx)
2827 {
2828     uint32_t aptn2, pad, XRc, XRb, XRa;
2829 
2830     aptn2 = extract32(ctx->opcode, 24, 2);
2831     pad   = extract32(ctx->opcode, 21, 3);
2832     XRc   = extract32(ctx->opcode, 14, 4);
2833     XRb   = extract32(ctx->opcode, 10, 4);
2834     XRa   = extract32(ctx->opcode,  6, 4);
2835 
2836     if (unlikely(pad != 0)) {
2837         /* opcode padding incorrect -> do nothing */
2838     } else if (unlikely(XRa == 0)) {
2839         /* destination is zero register -> do nothing */
2840     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2841         /* both operands zero registers -> just set destination to zero */
2842         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2843     } else {
2844         /* the most general case */
2845         TCGv t0 = tcg_temp_new();
2846         TCGv t1 = tcg_temp_new();
2847         TCGv t2 = tcg_temp_new();
2848         TCGv t3 = tcg_temp_new();
2849         TCGv t4 = tcg_temp_new();
2850 
2851         gen_load_mxu_gpr(t3, XRb);
2852         gen_load_mxu_gpr(t4, XRc);
2853 
2854         for (int i = 0; i < 4; i++) {
2855             tcg_gen_andi_tl(t0, t3, 0xff);
2856             tcg_gen_andi_tl(t1, t4, 0xff);
2857 
2858             if (i < 2) {
2859                 if (aptn2 & 0x01) {
2860                     tcg_gen_sub_tl(t0, t0, t1);
2861                 } else {
2862                     tcg_gen_add_tl(t0, t0, t1);
2863                 }
2864             } else {
2865                 if (aptn2 & 0x02) {
2866                     tcg_gen_sub_tl(t0, t0, t1);
2867                 } else {
2868                     tcg_gen_add_tl(t0, t0, t1);
2869                 }
2870             }
2871             if (i < 3) {
2872                 tcg_gen_shri_tl(t3, t3, 8);
2873                 tcg_gen_shri_tl(t4, t4, 8);
2874             }
2875             if (i > 0) {
2876                 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2877             } else {
2878                 tcg_gen_andi_tl(t0, t0, 0xff);
2879                 tcg_gen_mov_tl(t2, t0);
2880             }
2881         }
2882         gen_store_mxu_gpr(t2, XRa);
2883     }
2884 }
2885 
2886 /*
2887  *  Q8ADDE XRa, XRb, XRc, XRd, aptn2
2888  *    Add/subtract quadruple of 8-bit packed in XRb
2889  *    to another one in XRc, with zero extending
2890  *    to 16-bit and put results as packed 16-bit data
2891  *    into XRa and XRd.
2892  *    aptn2 manages action add or subract of pairs of data.
2893  *
2894  *  Q8ACCE XRa, XRb, XRc, XRd, aptn2
2895  *    Add/subtract quadruple of 8-bit packed in XRb
2896  *    to another one in XRc, with zero extending
2897  *    to 16-bit and accumulate results as packed 16-bit data
2898  *    into XRa and XRd.
2899  *    aptn2 manages action add or subract of pairs of data.
2900  */
2901 static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate)
2902 {
2903     uint32_t aptn2, XRd, XRc, XRb, XRa;
2904 
2905     aptn2 = extract32(ctx->opcode, 24, 2);
2906     XRd   = extract32(ctx->opcode, 18, 4);
2907     XRc   = extract32(ctx->opcode, 14, 4);
2908     XRb   = extract32(ctx->opcode, 10, 4);
2909     XRa   = extract32(ctx->opcode,  6, 4);
2910 
2911     if (unlikely((XRb == 0) && (XRc == 0))) {
2912         /* both operands zero registers -> just set destination to zero */
2913         if (XRa != 0) {
2914             tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2915         }
2916         if (XRd != 0) {
2917             tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0);
2918         }
2919     } else {
2920         /* the most general case */
2921         TCGv t0 = tcg_temp_new();
2922         TCGv t1 = tcg_temp_new();
2923         TCGv t2 = tcg_temp_new();
2924         TCGv t3 = tcg_temp_new();
2925         TCGv t4 = tcg_temp_new();
2926         TCGv t5 = tcg_temp_new();
2927 
2928         if (XRa != 0) {
2929             tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 16, 8);
2930             tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 16, 8);
2931             tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 24, 8);
2932             tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
2933             if (aptn2 & 2) {
2934                 tcg_gen_sub_tl(t0, t0, t1);
2935                 tcg_gen_sub_tl(t2, t2, t3);
2936             } else {
2937                 tcg_gen_add_tl(t0, t0, t1);
2938                 tcg_gen_add_tl(t2, t2, t3);
2939             }
2940             if (accumulate) {
2941                 gen_load_mxu_gpr(t5, XRa);
2942                 tcg_gen_extract_tl(t1, t5,  0, 16);
2943                 tcg_gen_extract_tl(t3, t5, 16, 16);
2944                 tcg_gen_add_tl(t0, t0, t1);
2945                 tcg_gen_add_tl(t2, t2, t3);
2946             }
2947             tcg_gen_shli_tl(t2, t2, 16);
2948             tcg_gen_extract_tl(t0, t0, 0, 16);
2949             tcg_gen_or_tl(t4, t2, t0);
2950         }
2951         if (XRd != 0) {
2952             tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8);
2953             tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 0, 8);
2954             tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 8, 8);
2955             tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 8, 8);
2956             if (aptn2 & 1) {
2957                 tcg_gen_sub_tl(t0, t0, t1);
2958                 tcg_gen_sub_tl(t2, t2, t3);
2959             } else {
2960                 tcg_gen_add_tl(t0, t0, t1);
2961                 tcg_gen_add_tl(t2, t2, t3);
2962             }
2963             if (accumulate) {
2964                 gen_load_mxu_gpr(t5, XRd);
2965                 tcg_gen_extract_tl(t1, t5,  0, 16);
2966                 tcg_gen_extract_tl(t3, t5, 16, 16);
2967                 tcg_gen_add_tl(t0, t0, t1);
2968                 tcg_gen_add_tl(t2, t2, t3);
2969             }
2970             tcg_gen_shli_tl(t2, t2, 16);
2971             tcg_gen_extract_tl(t0, t0, 0, 16);
2972             tcg_gen_or_tl(t5, t2, t0);
2973         }
2974 
2975         gen_store_mxu_gpr(t4, XRa);
2976         gen_store_mxu_gpr(t5, XRd);
2977     }
2978 }
2979 
2980 /*
2981  *  D8SUM XRa, XRb, XRc
2982  *    Double parallel add of quadruple unsigned 8-bit together
2983  *    with zero extending to 16-bit data.
2984  *  D8SUMC XRa, XRb, XRc
2985  *    Double parallel add of quadruple unsigned 8-bit together
2986  *    with zero extending to 16-bit data and adding 2 to each
2987  *    parallel result.
2988  */
2989 static void gen_mxu_d8sum(DisasContext *ctx, bool sumc)
2990 {
2991     uint32_t pad, pad2, XRc, XRb, XRa;
2992 
2993     pad  = extract32(ctx->opcode, 24, 2);
2994     pad2 = extract32(ctx->opcode, 18, 4);
2995     XRc  = extract32(ctx->opcode, 14, 4);
2996     XRb  = extract32(ctx->opcode, 10, 4);
2997     XRa  = extract32(ctx->opcode,  6, 4);
2998 
2999     if (unlikely(pad != 0 || pad2 != 0)) {
3000         /* opcode padding incorrect -> do nothing */
3001     } else if (unlikely(XRa == 0)) {
3002         /* destination is zero register -> do nothing */
3003     } else if (unlikely((XRb == 0) && (XRc == 0))) {
3004         /* both operands zero registers -> just set destination to zero */
3005         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
3006     } else {
3007         /* the most general case */
3008         TCGv t0 = tcg_temp_new();
3009         TCGv t1 = tcg_temp_new();
3010         TCGv t2 = tcg_temp_new();
3011         TCGv t3 = tcg_temp_new();
3012         TCGv t4 = tcg_temp_new();
3013         TCGv t5 = tcg_temp_new();
3014 
3015         if (XRb != 0) {
3016             tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1],  0, 8);
3017             tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1],  8, 8);
3018             tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8);
3019             tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8);
3020             tcg_gen_add_tl(t4, t0, t1);
3021             tcg_gen_add_tl(t4, t4, t2);
3022             tcg_gen_add_tl(t4, t4, t3);
3023         } else {
3024             tcg_gen_mov_tl(t4, 0);
3025         }
3026         if (XRc != 0) {
3027             tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1],  0, 8);
3028             tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1],  8, 8);
3029             tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8);
3030             tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
3031             tcg_gen_add_tl(t5, t0, t1);
3032             tcg_gen_add_tl(t5, t5, t2);
3033             tcg_gen_add_tl(t5, t5, t3);
3034         } else {
3035             tcg_gen_mov_tl(t5, 0);
3036         }
3037 
3038         if (sumc) {
3039             tcg_gen_addi_tl(t4, t4, 2);
3040             tcg_gen_addi_tl(t5, t5, 2);
3041         }
3042         tcg_gen_shli_tl(t4, t4, 16);
3043 
3044         tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
3045     }
3046 }
3047 
3048 /*
3049  * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed
3050  * 16-bit pattern addition.
3051  */
3052 static void gen_mxu_q16add(DisasContext *ctx)
3053 {
3054     uint32_t aptn2, optn2, XRc, XRb, XRa, XRd;
3055 
3056     aptn2 = extract32(ctx->opcode, 24, 2);
3057     optn2 = extract32(ctx->opcode, 22, 2);
3058     XRd   = extract32(ctx->opcode, 18, 4);
3059     XRc   = extract32(ctx->opcode, 14, 4);
3060     XRb   = extract32(ctx->opcode, 10, 4);
3061     XRa   = extract32(ctx->opcode,  6, 4);
3062 
3063     TCGv t0 = tcg_temp_new();
3064     TCGv t1 = tcg_temp_new();
3065     TCGv t2 = tcg_temp_new();
3066     TCGv t3 = tcg_temp_new();
3067     TCGv t4 = tcg_temp_new();
3068     TCGv t5 = tcg_temp_new();
3069 
3070     gen_load_mxu_gpr(t1, XRb);
3071     tcg_gen_extract_tl(t0, t1,  0, 16);
3072     tcg_gen_extract_tl(t1, t1, 16, 16);
3073 
3074     gen_load_mxu_gpr(t3, XRc);
3075     tcg_gen_extract_tl(t2, t3,  0, 16);
3076     tcg_gen_extract_tl(t3, t3, 16, 16);
3077 
3078     switch (optn2) {
3079     case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */
3080         tcg_gen_mov_tl(t4, t1);
3081         tcg_gen_mov_tl(t5, t0);
3082         break;
3083     case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */
3084         tcg_gen_mov_tl(t4, t0);
3085         tcg_gen_mov_tl(t5, t0);
3086         break;
3087     case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */
3088         tcg_gen_mov_tl(t4, t1);
3089         tcg_gen_mov_tl(t5, t1);
3090         break;
3091     case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */
3092         tcg_gen_mov_tl(t4, t0);
3093         tcg_gen_mov_tl(t5, t1);
3094         break;
3095     }
3096 
3097     switch (aptn2) {
3098     case MXU_APTN2_AA: /* lop +, rop + */
3099         tcg_gen_add_tl(t0, t4, t3);
3100         tcg_gen_add_tl(t1, t5, t2);
3101         tcg_gen_add_tl(t4, t4, t3);
3102         tcg_gen_add_tl(t5, t5, t2);
3103         break;
3104     case MXU_APTN2_AS: /* lop +, rop + */
3105         tcg_gen_sub_tl(t0, t4, t3);
3106         tcg_gen_sub_tl(t1, t5, t2);
3107         tcg_gen_add_tl(t4, t4, t3);
3108         tcg_gen_add_tl(t5, t5, t2);
3109         break;
3110     case MXU_APTN2_SA: /* lop +, rop + */
3111         tcg_gen_add_tl(t0, t4, t3);
3112         tcg_gen_add_tl(t1, t5, t2);
3113         tcg_gen_sub_tl(t4, t4, t3);
3114         tcg_gen_sub_tl(t5, t5, t2);
3115         break;
3116     case MXU_APTN2_SS: /* lop +, rop + */
3117         tcg_gen_sub_tl(t0, t4, t3);
3118         tcg_gen_sub_tl(t1, t5, t2);
3119         tcg_gen_sub_tl(t4, t4, t3);
3120         tcg_gen_sub_tl(t5, t5, t2);
3121         break;
3122     }
3123 
3124     tcg_gen_shli_tl(t0, t0, 16);
3125     tcg_gen_extract_tl(t1, t1, 0, 16);
3126     tcg_gen_shli_tl(t4, t4, 16);
3127     tcg_gen_extract_tl(t5, t5, 0, 16);
3128 
3129     tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
3130     tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1);
3131 }
3132 
3133 /*
3134  * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed
3135  * 16-bit addition/subtraction with accumulate.
3136  */
3137 static void gen_mxu_q16acc(DisasContext *ctx)
3138 {
3139     uint32_t aptn2, XRc, XRb, XRa, XRd;
3140 
3141     aptn2 = extract32(ctx->opcode, 24, 2);
3142     XRd   = extract32(ctx->opcode, 18, 4);
3143     XRc   = extract32(ctx->opcode, 14, 4);
3144     XRb   = extract32(ctx->opcode, 10, 4);
3145     XRa   = extract32(ctx->opcode,  6, 4);
3146 
3147     TCGv t0 = tcg_temp_new();
3148     TCGv t1 = tcg_temp_new();
3149     TCGv t2 = tcg_temp_new();
3150     TCGv t3 = tcg_temp_new();
3151     TCGv s3 = tcg_temp_new();
3152     TCGv s2 = tcg_temp_new();
3153     TCGv s1 = tcg_temp_new();
3154     TCGv s0 = tcg_temp_new();
3155 
3156     gen_load_mxu_gpr(t1, XRb);
3157     tcg_gen_extract_tl(t0, t1,  0, 16);
3158     tcg_gen_extract_tl(t1, t1, 16, 16);
3159 
3160     gen_load_mxu_gpr(t3, XRc);
3161     tcg_gen_extract_tl(t2, t3,  0, 16);
3162     tcg_gen_extract_tl(t3, t3, 16, 16);
3163 
3164     switch (aptn2) {
3165     case MXU_APTN2_AA: /* lop +, rop + */
3166         tcg_gen_add_tl(s3, t1, t3);
3167         tcg_gen_add_tl(s2, t0, t2);
3168         tcg_gen_add_tl(s1, t1, t3);
3169         tcg_gen_add_tl(s0, t0, t2);
3170         break;
3171     case MXU_APTN2_AS: /* lop +, rop - */
3172         tcg_gen_sub_tl(s3, t1, t3);
3173         tcg_gen_sub_tl(s2, t0, t2);
3174         tcg_gen_add_tl(s1, t1, t3);
3175         tcg_gen_add_tl(s0, t0, t2);
3176         break;
3177     case MXU_APTN2_SA: /* lop -, rop + */
3178         tcg_gen_add_tl(s3, t1, t3);
3179         tcg_gen_add_tl(s2, t0, t2);
3180         tcg_gen_sub_tl(s1, t1, t3);
3181         tcg_gen_sub_tl(s0, t0, t2);
3182         break;
3183     case MXU_APTN2_SS: /* lop -, rop - */
3184         tcg_gen_sub_tl(s3, t1, t3);
3185         tcg_gen_sub_tl(s2, t0, t2);
3186         tcg_gen_sub_tl(s1, t1, t3);
3187         tcg_gen_sub_tl(s0, t0, t2);
3188         break;
3189     }
3190 
3191     if (XRa != 0) {
3192         tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0);
3193         tcg_gen_extract_tl(t0, t0, 0, 16);
3194         tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16);
3195         tcg_gen_add_tl(t1, t1, s1);
3196         tcg_gen_shli_tl(t1, t1, 16);
3197         tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0);
3198     }
3199 
3200     if (XRd != 0) {
3201         tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2);
3202         tcg_gen_extract_tl(t0, t0, 0, 16);
3203         tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16);
3204         tcg_gen_add_tl(t1, t1, s3);
3205         tcg_gen_shli_tl(t1, t1, 16);
3206         tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0);
3207     }
3208 }
3209 
3210 /*
3211  * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed
3212  * 16-bit accumulate.
3213  */
3214 static void gen_mxu_q16accm(DisasContext *ctx)
3215 {
3216     uint32_t aptn2, XRc, XRb, XRa, XRd;
3217 
3218     aptn2 = extract32(ctx->opcode, 24, 2);
3219     XRd   = extract32(ctx->opcode, 18, 4);
3220     XRc   = extract32(ctx->opcode, 14, 4);
3221     XRb   = extract32(ctx->opcode, 10, 4);
3222     XRa   = extract32(ctx->opcode,  6, 4);
3223 
3224     TCGv t0 = tcg_temp_new();
3225     TCGv t1 = tcg_temp_new();
3226     TCGv t2 = tcg_temp_new();
3227     TCGv t3 = tcg_temp_new();
3228 
3229     gen_load_mxu_gpr(t2, XRb);
3230     gen_load_mxu_gpr(t3, XRc);
3231 
3232     if (XRa != 0) {
3233         TCGv a0 = tcg_temp_new();
3234         TCGv a1 = tcg_temp_new();
3235 
3236         tcg_gen_extract_tl(t0, t2,  0, 16);
3237         tcg_gen_extract_tl(t1, t2, 16, 16);
3238 
3239         gen_load_mxu_gpr(a1, XRa);
3240         tcg_gen_extract_tl(a0, a1,  0, 16);
3241         tcg_gen_extract_tl(a1, a1, 16, 16);
3242 
3243         if (aptn2 & 2) {
3244             tcg_gen_sub_tl(a0, a0, t0);
3245             tcg_gen_sub_tl(a1, a1, t1);
3246         } else {
3247             tcg_gen_add_tl(a0, a0, t0);
3248             tcg_gen_add_tl(a1, a1, t1);
3249         }
3250         tcg_gen_extract_tl(a0, a0, 0, 16);
3251         tcg_gen_shli_tl(a1, a1, 16);
3252         tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0);
3253     }
3254 
3255     if (XRd != 0) {
3256         TCGv a0 = tcg_temp_new();
3257         TCGv a1 = tcg_temp_new();
3258 
3259         tcg_gen_extract_tl(t0, t3,  0, 16);
3260         tcg_gen_extract_tl(t1, t3, 16, 16);
3261 
3262         gen_load_mxu_gpr(a1, XRd);
3263         tcg_gen_extract_tl(a0, a1,  0, 16);
3264         tcg_gen_extract_tl(a1, a1, 16, 16);
3265 
3266         if (aptn2 & 1) {
3267             tcg_gen_sub_tl(a0, a0, t0);
3268             tcg_gen_sub_tl(a1, a1, t1);
3269         } else {
3270             tcg_gen_add_tl(a0, a0, t0);
3271             tcg_gen_add_tl(a1, a1, t1);
3272         }
3273         tcg_gen_extract_tl(a0, a0, 0, 16);
3274         tcg_gen_shli_tl(a1, a1, 16);
3275         tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0);
3276     }
3277 }
3278 
3279 
3280 /*
3281  * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed
3282  * 16-bit sign extended addition and accumulate.
3283  */
3284 static void gen_mxu_d16asum(DisasContext *ctx)
3285 {
3286     uint32_t aptn2, XRc, XRb, XRa, XRd;
3287 
3288     aptn2 = extract32(ctx->opcode, 24, 2);
3289     XRd   = extract32(ctx->opcode, 18, 4);
3290     XRc   = extract32(ctx->opcode, 14, 4);
3291     XRb   = extract32(ctx->opcode, 10, 4);
3292     XRa   = extract32(ctx->opcode,  6, 4);
3293 
3294     TCGv t0 = tcg_temp_new();
3295     TCGv t1 = tcg_temp_new();
3296     TCGv t2 = tcg_temp_new();
3297     TCGv t3 = tcg_temp_new();
3298 
3299     gen_load_mxu_gpr(t2, XRb);
3300     gen_load_mxu_gpr(t3, XRc);
3301 
3302     if (XRa != 0) {
3303         tcg_gen_sextract_tl(t0, t2,  0, 16);
3304         tcg_gen_sextract_tl(t1, t2, 16, 16);
3305         tcg_gen_add_tl(t0, t0, t1);
3306         if (aptn2 & 2) {
3307             tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3308         } else {
3309             tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3310         }
3311     }
3312 
3313     if (XRd != 0) {
3314         tcg_gen_sextract_tl(t0, t3,  0, 16);
3315         tcg_gen_sextract_tl(t1, t3, 16, 16);
3316         tcg_gen_add_tl(t0, t0, t1);
3317         if (aptn2 & 1) {
3318             tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
3319         } else {
3320             tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
3321         }
3322     }
3323 }
3324 
3325 /*
3326  * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double
3327  * 32 bit pattern addition/subtraction, set carry.
3328  *
3329  * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double
3330  * 32 bit pattern addition/subtraction with carry.
3331  */
3332 static void gen_mxu_d32add(DisasContext *ctx)
3333 {
3334     uint32_t aptn2, addc, XRc, XRb, XRa, XRd;
3335 
3336     aptn2 = extract32(ctx->opcode, 24, 2);
3337     addc  = extract32(ctx->opcode, 22, 2);
3338     XRd   = extract32(ctx->opcode, 18, 4);
3339     XRc   = extract32(ctx->opcode, 14, 4);
3340     XRb   = extract32(ctx->opcode, 10, 4);
3341     XRa   = extract32(ctx->opcode,  6, 4);
3342 
3343     TCGv t0 = tcg_temp_new();
3344     TCGv t1 = tcg_temp_new();
3345     TCGv t2 = tcg_temp_new();
3346     TCGv cr = tcg_temp_new();
3347 
3348     if (unlikely(addc > 1)) {
3349         /* opcode incorrect -> do nothing */
3350     } else if (addc == 1) {
3351         if (unlikely(XRa == 0 && XRd == 0)) {
3352             /* destinations are zero register -> do nothing */
3353         } else {
3354             /* FIXME ??? What if XRa == XRd ??? */
3355             /* aptn2 is unused here */
3356             gen_load_mxu_gpr(t0, XRb);
3357             gen_load_mxu_gpr(t1, XRc);
3358             gen_load_mxu_cr(cr);
3359             if (XRa != 0) {
3360                 tcg_gen_extract_tl(t2, cr, 31, 1);
3361                 tcg_gen_add_tl(t0, t0, t2);
3362                 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3363             }
3364             if (XRd != 0) {
3365                 tcg_gen_extract_tl(t2, cr, 30, 1);
3366                 tcg_gen_add_tl(t1, t1, t2);
3367                 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3368             }
3369         }
3370     } else if (unlikely(XRa == 0 && XRd == 0)) {
3371         /* destinations are zero register -> do nothing */
3372     } else {
3373         /* common case */
3374         /* FIXME ??? What if XRa == XRd ??? */
3375         TCGv carry = tcg_temp_new();
3376 
3377         gen_load_mxu_gpr(t0, XRb);
3378         gen_load_mxu_gpr(t1, XRc);
3379         gen_load_mxu_cr(cr);
3380         if (XRa != 0) {
3381             if (aptn2 & 2) {
3382                 tcg_gen_sub_i32(t2, t0, t1);
3383                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
3384             } else {
3385                 tcg_gen_add_i32(t2, t0, t1);
3386                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
3387             }
3388             tcg_gen_andi_tl(cr, cr, 0x7fffffff);
3389             tcg_gen_shli_tl(carry, carry, 31);
3390             tcg_gen_or_tl(cr, cr, carry);
3391             gen_store_mxu_gpr(t2, XRa);
3392         }
3393         if (XRd != 0) {
3394             if (aptn2 & 1) {
3395                 tcg_gen_sub_i32(t2, t0, t1);
3396                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
3397             } else {
3398                 tcg_gen_add_i32(t2, t0, t1);
3399                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
3400             }
3401             tcg_gen_andi_tl(cr, cr, 0xbfffffff);
3402             tcg_gen_shli_tl(carry, carry, 30);
3403             tcg_gen_or_tl(cr, cr, carry);
3404             gen_store_mxu_gpr(t2, XRd);
3405         }
3406         gen_store_mxu_cr(cr);
3407     }
3408 }
3409 
3410 /*
3411  * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double
3412  * 32 bit pattern addition/subtraction and accumulate.
3413  */
3414 static void gen_mxu_d32acc(DisasContext *ctx)
3415 {
3416     uint32_t aptn2, XRc, XRb, XRa, XRd;
3417 
3418     aptn2 = extract32(ctx->opcode, 24, 2);
3419     XRd   = extract32(ctx->opcode, 18, 4);
3420     XRc   = extract32(ctx->opcode, 14, 4);
3421     XRb   = extract32(ctx->opcode, 10, 4);
3422     XRa   = extract32(ctx->opcode,  6, 4);
3423 
3424     TCGv t0 = tcg_temp_new();
3425     TCGv t1 = tcg_temp_new();
3426     TCGv t2 = tcg_temp_new();
3427 
3428     if (unlikely(XRa == 0 && XRd == 0)) {
3429         /* destinations are zero register -> do nothing */
3430     } else {
3431         /* common case */
3432         gen_load_mxu_gpr(t0, XRb);
3433         gen_load_mxu_gpr(t1, XRc);
3434         if (XRa != 0) {
3435             if (aptn2 & 2) {
3436                 tcg_gen_sub_tl(t2, t0, t1);
3437             } else {
3438                 tcg_gen_add_tl(t2, t0, t1);
3439             }
3440             tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3441         }
3442         if (XRd != 0) {
3443             if (aptn2 & 1) {
3444                 tcg_gen_sub_tl(t2, t0, t1);
3445             } else {
3446                 tcg_gen_add_tl(t2, t0, t1);
3447             }
3448             tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3449         }
3450     }
3451 }
3452 
3453 /*
3454  * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double
3455  * 32 bit pattern addition/subtraction and accumulate.
3456  */
3457 static void gen_mxu_d32accm(DisasContext *ctx)
3458 {
3459     uint32_t aptn2, XRc, XRb, XRa, XRd;
3460 
3461     aptn2 = extract32(ctx->opcode, 24, 2);
3462     XRd   = extract32(ctx->opcode, 18, 4);
3463     XRc   = extract32(ctx->opcode, 14, 4);
3464     XRb   = extract32(ctx->opcode, 10, 4);
3465     XRa   = extract32(ctx->opcode,  6, 4);
3466 
3467     TCGv t0 = tcg_temp_new();
3468     TCGv t1 = tcg_temp_new();
3469     TCGv t2 = tcg_temp_new();
3470 
3471     if (unlikely(XRa == 0 && XRd == 0)) {
3472         /* destinations are zero register -> do nothing */
3473     } else {
3474         /* common case */
3475         gen_load_mxu_gpr(t0, XRb);
3476         gen_load_mxu_gpr(t1, XRc);
3477         if (XRa != 0) {
3478             tcg_gen_add_tl(t2, t0, t1);
3479             if (aptn2 & 2) {
3480                 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3481             } else {
3482                 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3483             }
3484         }
3485         if (XRd != 0) {
3486             tcg_gen_sub_tl(t2, t0, t1);
3487             if (aptn2 & 1) {
3488                 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3489             } else {
3490                 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3491             }
3492         }
3493     }
3494 }
3495 
3496 /*
3497  * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double
3498  * 32 bit pattern addition/subtraction.
3499  */
3500 static void gen_mxu_d32asum(DisasContext *ctx)
3501 {
3502     uint32_t aptn2, XRc, XRb, XRa, XRd;
3503 
3504     aptn2 = extract32(ctx->opcode, 24, 2);
3505     XRd   = extract32(ctx->opcode, 18, 4);
3506     XRc   = extract32(ctx->opcode, 14, 4);
3507     XRb   = extract32(ctx->opcode, 10, 4);
3508     XRa   = extract32(ctx->opcode,  6, 4);
3509 
3510     TCGv t0 = tcg_temp_new();
3511     TCGv t1 = tcg_temp_new();
3512 
3513     if (unlikely(XRa == 0 && XRd == 0)) {
3514         /* destinations are zero register -> do nothing */
3515     } else {
3516         /* common case */
3517         gen_load_mxu_gpr(t0, XRb);
3518         gen_load_mxu_gpr(t1, XRc);
3519         if (XRa != 0) {
3520             if (aptn2 & 2) {
3521                 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3522             } else {
3523                 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3524             }
3525         }
3526         if (XRd != 0) {
3527             if (aptn2 & 1) {
3528                 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3529             } else {
3530                 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3531             }
3532         }
3533     }
3534 }
3535 
3536 /*
3537  *                 MXU instruction category: Miscellaneous
3538  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3539  *
3540  *               S32EXTR      S32LUI
3541  *               S32EXTRV
3542  *                            Q16SAT
3543  *                            Q16SCOP
3544  */
3545 
3546 /*
3547  *  S32EXTR XRa, XRd, rs, bits5
3548  *    Extract bits5 bits from 64-bit pair {XRa:XRd}
3549  *    starting from rs[4:0] offset and put to the XRa.
3550  */
3551 static void gen_mxu_s32extr(DisasContext *ctx)
3552 {
3553     TCGv t0, t1, t2, t3;
3554     uint32_t XRa, XRd, rs, bits5;
3555 
3556     t0 = tcg_temp_new();
3557     t1 = tcg_temp_new();
3558     t2 = tcg_temp_new();
3559     t3 = tcg_temp_new();
3560 
3561     XRa   = extract32(ctx->opcode,  6, 4);
3562     XRd   = extract32(ctx->opcode, 10, 4);
3563     bits5 = extract32(ctx->opcode, 16, 5);
3564     rs    = extract32(ctx->opcode, 21, 5);
3565 
3566     /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */
3567     /* {XRa} = extract({tmp}, 0, bits5); */
3568     if (bits5 > 0) {
3569         TCGLabel *l_xra_only = gen_new_label();
3570         TCGLabel *l_done = gen_new_label();
3571 
3572         gen_load_mxu_gpr(t0, XRd);
3573         gen_load_mxu_gpr(t1, XRa);
3574         gen_load_gpr(t2, rs);
3575         tcg_gen_andi_tl(t2, t2, 0x1f);
3576         tcg_gen_subfi_tl(t2, 32, t2);
3577         tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only);
3578         tcg_gen_subfi_tl(t2, bits5, t2);
3579         tcg_gen_subfi_tl(t3, 32, t2);
3580         tcg_gen_shr_tl(t0, t0, t3);
3581         tcg_gen_shl_tl(t1, t1, t2);
3582         tcg_gen_or_tl(t0, t0, t1);
3583         tcg_gen_br(l_done);
3584         gen_set_label(l_xra_only);
3585         tcg_gen_subi_tl(t2, t2, bits5);
3586         tcg_gen_shr_tl(t0, t1, t2);
3587         gen_set_label(l_done);
3588         tcg_gen_extract_tl(t0, t0, 0, bits5);
3589     } else {
3590         /* unspecified behavior but matches tests on real hardware*/
3591         tcg_gen_movi_tl(t0, 0);
3592     }
3593     gen_store_mxu_gpr(t0, XRa);
3594 }
3595 
3596 /*
3597  *  S32EXTRV XRa, XRd, rs, rt
3598  *    Extract rt[4:0] bits from 64-bit pair {XRa:XRd}
3599  *    starting from rs[4:0] offset and put to the XRa.
3600  */
3601 static void gen_mxu_s32extrv(DisasContext *ctx)
3602 {
3603     TCGv t0, t1, t2, t3, t4;
3604     uint32_t XRa, XRd, rs, rt;
3605 
3606     t0 = tcg_temp_new();
3607     t1 = tcg_temp_new();
3608     t2 = tcg_temp_new();
3609     t3 = tcg_temp_new();
3610     t4 = tcg_temp_new();
3611     TCGLabel *l_xra_only = gen_new_label();
3612     TCGLabel *l_done = gen_new_label();
3613     TCGLabel *l_zero = gen_new_label();
3614     TCGLabel *l_extract = gen_new_label();
3615 
3616     XRa = extract32(ctx->opcode,  6, 4);
3617     XRd = extract32(ctx->opcode, 10, 4);
3618     rt  = extract32(ctx->opcode, 16, 5);
3619     rs  = extract32(ctx->opcode, 21, 5);
3620 
3621     /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */
3622     gen_load_mxu_gpr(t0, XRd);
3623     gen_load_mxu_gpr(t1, XRa);
3624     gen_load_gpr(t2, rs);
3625     gen_load_gpr(t4, rt);
3626     tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero);
3627     tcg_gen_andi_tl(t2, t2, 0x1f);
3628     tcg_gen_subfi_tl(t2, 32, t2);
3629     tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only);
3630     tcg_gen_sub_tl(t2, t4, t2);
3631     tcg_gen_subfi_tl(t3, 32, t2);
3632     tcg_gen_shr_tl(t0, t0, t3);
3633     tcg_gen_shl_tl(t1, t1, t2);
3634     tcg_gen_or_tl(t0, t0, t1);
3635     tcg_gen_br(l_extract);
3636 
3637     gen_set_label(l_xra_only);
3638     tcg_gen_sub_tl(t2, t2, t4);
3639     tcg_gen_shr_tl(t0, t1, t2);
3640     tcg_gen_br(l_extract);
3641 
3642     /* unspecified behavior but matches tests on real hardware*/
3643     gen_set_label(l_zero);
3644     tcg_gen_movi_tl(t0, 0);
3645     tcg_gen_br(l_done);
3646 
3647     /* {XRa} = extract({tmp}, 0, rt) */
3648     gen_set_label(l_extract);
3649     tcg_gen_subfi_tl(t4, 32, t4);
3650     tcg_gen_shl_tl(t0, t0, t4);
3651     tcg_gen_shr_tl(t0, t0, t4);
3652 
3653     gen_set_label(l_done);
3654     gen_store_mxu_gpr(t0, XRa);
3655 }
3656 
3657 /*
3658  *  S32LUI XRa, S8, optn3
3659  *    Permutate the immediate S8 value to form a word
3660  *    to update XRa.
3661  */
3662 static void gen_mxu_s32lui(DisasContext *ctx)
3663 {
3664     uint32_t XRa, s8, optn3, pad;
3665 
3666     XRa   = extract32(ctx->opcode,  6, 4);
3667     s8    = extract32(ctx->opcode, 10, 8);
3668     pad   = extract32(ctx->opcode, 21, 2);
3669     optn3 = extract32(ctx->opcode, 23, 3);
3670 
3671     if (unlikely(pad != 0)) {
3672         /* opcode padding incorrect -> do nothing */
3673     } else if (unlikely(XRa == 0)) {
3674         /* destination is zero register -> do nothing */
3675     } else {
3676         uint32_t s16;
3677         TCGv t0 = tcg_temp_new();
3678 
3679         switch (optn3) {
3680         case 0:
3681             tcg_gen_movi_tl(t0, s8);
3682             break;
3683         case 1:
3684             tcg_gen_movi_tl(t0, s8 << 8);
3685             break;
3686         case 2:
3687             tcg_gen_movi_tl(t0, s8 << 16);
3688             break;
3689         case 3:
3690             tcg_gen_movi_tl(t0, s8 << 24);
3691             break;
3692         case 4:
3693             tcg_gen_movi_tl(t0, (s8 << 16) | s8);
3694             break;
3695         case 5:
3696             tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8));
3697             break;
3698         case 6:
3699             s16 = (uint16_t)(int16_t)(int8_t)s8;
3700             tcg_gen_movi_tl(t0, (s16 << 16) | s16);
3701             break;
3702         case 7:
3703             tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8);
3704             break;
3705         }
3706         gen_store_mxu_gpr(t0, XRa);
3707     }
3708 }
3709 
3710 /*
3711  *  Q16SAT XRa, XRb, XRc
3712  *  Packs four 16-bit signed integers in XRb and XRc to
3713  *  four saturated unsigned 8-bit into XRa.
3714  *
3715  */
3716 static void gen_mxu_Q16SAT(DisasContext *ctx)
3717 {
3718     uint32_t pad, XRc, XRb, XRa;
3719 
3720     pad = extract32(ctx->opcode, 21, 3);
3721     XRc = extract32(ctx->opcode, 14, 4);
3722     XRb = extract32(ctx->opcode, 10, 4);
3723     XRa = extract32(ctx->opcode,  6, 4);
3724 
3725     if (unlikely(pad != 0)) {
3726         /* opcode padding incorrect -> do nothing */
3727     } else if (unlikely(XRa == 0)) {
3728         /* destination is zero register -> do nothing */
3729     } else {
3730         /* the most general case */
3731         TCGv t0 = tcg_temp_new();
3732         TCGv t1 = tcg_temp_new();
3733         TCGv t2 = tcg_temp_new();
3734 
3735         tcg_gen_movi_tl(t2, 0);
3736         if (XRb != 0) {
3737             TCGLabel *l_less_hi = gen_new_label();
3738             TCGLabel *l_less_lo = gen_new_label();
3739             TCGLabel *l_lo = gen_new_label();
3740             TCGLabel *l_greater_hi = gen_new_label();
3741             TCGLabel *l_greater_lo = gen_new_label();
3742             TCGLabel *l_done = gen_new_label();
3743 
3744             tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16);
3745             tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
3746             tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
3747             tcg_gen_br(l_lo);
3748             gen_set_label(l_less_hi);
3749             tcg_gen_movi_tl(t0, 0);
3750             tcg_gen_br(l_lo);
3751             gen_set_label(l_greater_hi);
3752             tcg_gen_movi_tl(t0, 255);
3753 
3754             gen_set_label(l_lo);
3755             tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16);
3756             tcg_gen_sari_tl(t1, t1, 16);
3757             tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
3758             tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
3759             tcg_gen_br(l_done);
3760             gen_set_label(l_less_lo);
3761             tcg_gen_movi_tl(t1, 0);
3762             tcg_gen_br(l_done);
3763             gen_set_label(l_greater_lo);
3764             tcg_gen_movi_tl(t1, 255);
3765 
3766             gen_set_label(l_done);
3767             tcg_gen_shli_tl(t2, t0, 24);
3768             tcg_gen_shli_tl(t1, t1, 16);
3769             tcg_gen_or_tl(t2, t2, t1);
3770         }
3771 
3772         if (XRc != 0) {
3773             TCGLabel *l_less_hi = gen_new_label();
3774             TCGLabel *l_less_lo = gen_new_label();
3775             TCGLabel *l_lo = gen_new_label();
3776             TCGLabel *l_greater_hi = gen_new_label();
3777             TCGLabel *l_greater_lo = gen_new_label();
3778             TCGLabel *l_done = gen_new_label();
3779 
3780             tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16);
3781             tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
3782             tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
3783             tcg_gen_br(l_lo);
3784             gen_set_label(l_less_hi);
3785             tcg_gen_movi_tl(t0, 0);
3786             tcg_gen_br(l_lo);
3787             gen_set_label(l_greater_hi);
3788             tcg_gen_movi_tl(t0, 255);
3789 
3790             gen_set_label(l_lo);
3791             tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16);
3792             tcg_gen_sari_tl(t1, t1, 16);
3793             tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
3794             tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
3795             tcg_gen_br(l_done);
3796             gen_set_label(l_less_lo);
3797             tcg_gen_movi_tl(t1, 0);
3798             tcg_gen_br(l_done);
3799             gen_set_label(l_greater_lo);
3800             tcg_gen_movi_tl(t1, 255);
3801 
3802             gen_set_label(l_done);
3803             tcg_gen_shli_tl(t0, t0, 8);
3804             tcg_gen_or_tl(t2, t2, t0);
3805             tcg_gen_or_tl(t2, t2, t1);
3806         }
3807         gen_store_mxu_gpr(t2, XRa);
3808     }
3809 }
3810 
3811 /*
3812  *  Q16SCOP XRa, XRd, XRb, XRc
3813  *    Determine sign of quad packed 16-bit signed values
3814  *    in XRb and XRc put result in XRa and XRd respectively.
3815  */
3816 static void gen_mxu_q16scop(DisasContext *ctx)
3817 {
3818     uint32_t XRd, XRc, XRb, XRa;
3819 
3820     XRd  = extract32(ctx->opcode, 18, 4);
3821     XRc  = extract32(ctx->opcode, 14, 4);
3822     XRb  = extract32(ctx->opcode, 10, 4);
3823     XRa  = extract32(ctx->opcode,  6, 4);
3824 
3825     TCGv t0 = tcg_temp_new();
3826     TCGv t1 = tcg_temp_new();
3827     TCGv t2 = tcg_temp_new();
3828     TCGv t3 = tcg_temp_new();
3829     TCGv t4 = tcg_temp_new();
3830 
3831     TCGLabel *l_b_hi_lt = gen_new_label();
3832     TCGLabel *l_b_hi_gt = gen_new_label();
3833     TCGLabel *l_b_lo = gen_new_label();
3834     TCGLabel *l_b_lo_lt = gen_new_label();
3835     TCGLabel *l_c_hi = gen_new_label();
3836     TCGLabel *l_c_hi_lt = gen_new_label();
3837     TCGLabel *l_c_hi_gt = gen_new_label();
3838     TCGLabel *l_c_lo = gen_new_label();
3839     TCGLabel *l_c_lo_lt = gen_new_label();
3840     TCGLabel *l_done = gen_new_label();
3841 
3842     gen_load_mxu_gpr(t0, XRb);
3843     gen_load_mxu_gpr(t1, XRc);
3844 
3845     tcg_gen_sextract_tl(t2, t0, 16, 16);
3846     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt);
3847     tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt);
3848     tcg_gen_movi_tl(t3, 0);
3849     tcg_gen_br(l_b_lo);
3850     gen_set_label(l_b_hi_lt);
3851     tcg_gen_movi_tl(t3, 0xffff0000);
3852     tcg_gen_br(l_b_lo);
3853     gen_set_label(l_b_hi_gt);
3854     tcg_gen_movi_tl(t3, 0x00010000);
3855 
3856     gen_set_label(l_b_lo);
3857     tcg_gen_sextract_tl(t2, t0, 0, 16);
3858     tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi);
3859     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt);
3860     tcg_gen_ori_tl(t3, t3, 0x00000001);
3861     tcg_gen_br(l_c_hi);
3862     gen_set_label(l_b_lo_lt);
3863     tcg_gen_ori_tl(t3, t3, 0x0000ffff);
3864     tcg_gen_br(l_c_hi);
3865 
3866     gen_set_label(l_c_hi);
3867     tcg_gen_sextract_tl(t2, t1, 16, 16);
3868     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt);
3869     tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt);
3870     tcg_gen_movi_tl(t4, 0);
3871     tcg_gen_br(l_c_lo);
3872     gen_set_label(l_c_hi_lt);
3873     tcg_gen_movi_tl(t4, 0xffff0000);
3874     tcg_gen_br(l_c_lo);
3875     gen_set_label(l_c_hi_gt);
3876     tcg_gen_movi_tl(t4, 0x00010000);
3877 
3878     gen_set_label(l_c_lo);
3879     tcg_gen_sextract_tl(t2, t1, 0, 16);
3880     tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done);
3881     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt);
3882     tcg_gen_ori_tl(t4, t4, 0x00000001);
3883     tcg_gen_br(l_done);
3884     gen_set_label(l_c_lo_lt);
3885     tcg_gen_ori_tl(t4, t4, 0x0000ffff);
3886 
3887     gen_set_label(l_done);
3888     gen_store_mxu_gpr(t3, XRa);
3889     gen_store_mxu_gpr(t4, XRd);
3890 }
3891 
3892 /*
3893  *                 MXU instruction category: align
3894  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3895  *
3896  *                       S32ALN     S32ALNI
3897  */
3898 
3899 /*
3900  *  S32ALNI XRc, XRb, XRa, optn3
3901  *    Arrange bytes from XRb and XRc according to one of five sets of
3902  *    rules determined by optn3, and place the result in XRa.
3903  */
3904 static void gen_mxu_S32ALNI(DisasContext *ctx)
3905 {
3906     uint32_t optn3, pad, XRc, XRb, XRa;
3907 
3908     optn3 = extract32(ctx->opcode,  23, 3);
3909     pad   = extract32(ctx->opcode,  21, 2);
3910     XRc   = extract32(ctx->opcode, 14, 4);
3911     XRb   = extract32(ctx->opcode, 10, 4);
3912     XRa   = extract32(ctx->opcode,  6, 4);
3913 
3914     if (unlikely(pad != 0)) {
3915         /* opcode padding incorrect -> do nothing */
3916     } else if (unlikely(XRa == 0)) {
3917         /* destination is zero register -> do nothing */
3918     } else if (unlikely((XRb == 0) && (XRc == 0))) {
3919         /* both operands zero registers -> just set destination to all 0s */
3920         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
3921     } else if (unlikely(XRb == 0)) {
3922         /* XRb zero register -> just appropriatelly shift XRc into XRa */
3923         switch (optn3) {
3924         case MXU_OPTN3_PTN0:
3925             tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
3926             break;
3927         case MXU_OPTN3_PTN1:
3928         case MXU_OPTN3_PTN2:
3929         case MXU_OPTN3_PTN3:
3930             tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1],
3931                              8 * (4 - optn3));
3932             break;
3933         case MXU_OPTN3_PTN4:
3934             tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
3935             break;
3936         }
3937     } else if (unlikely(XRc == 0)) {
3938         /* XRc zero register -> just appropriatelly shift XRb into XRa */
3939         switch (optn3) {
3940         case MXU_OPTN3_PTN0:
3941             tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
3942             break;
3943         case MXU_OPTN3_PTN1:
3944         case MXU_OPTN3_PTN2:
3945         case MXU_OPTN3_PTN3:
3946             tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
3947             break;
3948         case MXU_OPTN3_PTN4:
3949             tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
3950             break;
3951         }
3952     } else if (unlikely(XRb == XRc)) {
3953         /* both operands same -> just rotation or moving from any of them */
3954         switch (optn3) {
3955         case MXU_OPTN3_PTN0:
3956         case MXU_OPTN3_PTN4:
3957             tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
3958             break;
3959         case MXU_OPTN3_PTN1:
3960         case MXU_OPTN3_PTN2:
3961         case MXU_OPTN3_PTN3:
3962             tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
3963             break;
3964         }
3965     } else {
3966         /* the most general case */
3967         switch (optn3) {
3968         case MXU_OPTN3_PTN0:
3969             {
3970                 /*                                         */
3971                 /*         XRb                XRc          */
3972                 /*  +---------------+                      */
3973                 /*  | A   B   C   D |    E   F   G   H     */
3974                 /*  +-------+-------+                      */
3975                 /*          |                              */
3976                 /*         XRa                             */
3977                 /*                                         */
3978 
3979                 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
3980             }
3981             break;
3982         case MXU_OPTN3_PTN1:
3983             {
3984                 /*                                         */
3985                 /*         XRb                 XRc         */
3986                 /*      +-------------------+              */
3987                 /*    A | B   C   D       E | F   G   H    */
3988                 /*      +---------+---------+              */
3989                 /*                |                        */
3990                 /*               XRa                       */
3991                 /*                                         */
3992 
3993                 TCGv_i32 t0 = tcg_temp_new();
3994                 TCGv_i32 t1 = tcg_temp_new();
3995 
3996                 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF);
3997                 tcg_gen_shli_i32(t0, t0, 8);
3998 
3999                 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
4000                 tcg_gen_shri_i32(t1, t1, 24);
4001 
4002                 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4003             }
4004             break;
4005         case MXU_OPTN3_PTN2:
4006             {
4007                 /*                                         */
4008                 /*         XRb                 XRc         */
4009                 /*          +-------------------+          */
4010                 /*    A   B | C   D       E   F | G   H    */
4011                 /*          +---------+---------+          */
4012                 /*                    |                    */
4013                 /*                   XRa                   */
4014                 /*                                         */
4015 
4016                 TCGv_i32 t0 = tcg_temp_new();
4017                 TCGv_i32 t1 = tcg_temp_new();
4018 
4019                 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
4020                 tcg_gen_shli_i32(t0, t0, 16);
4021 
4022                 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
4023                 tcg_gen_shri_i32(t1, t1, 16);
4024 
4025                 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4026             }
4027             break;
4028         case MXU_OPTN3_PTN3:
4029             {
4030                 /*                                         */
4031                 /*         XRb                 XRc         */
4032                 /*              +-------------------+      */
4033                 /*    A   B   C | D       E   F   G | H    */
4034                 /*              +---------+---------+      */
4035                 /*                        |                */
4036                 /*                       XRa               */
4037                 /*                                         */
4038 
4039                 TCGv_i32 t0 = tcg_temp_new();
4040                 TCGv_i32 t1 = tcg_temp_new();
4041 
4042                 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF);
4043                 tcg_gen_shli_i32(t0, t0, 24);
4044 
4045                 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00);
4046                 tcg_gen_shri_i32(t1, t1, 8);
4047 
4048                 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4049             }
4050             break;
4051         case MXU_OPTN3_PTN4:
4052             {
4053                 /*                                         */
4054                 /*         XRb                 XRc         */
4055                 /*                     +---------------+   */
4056                 /*    A   B   C   D    | E   F   G   H |   */
4057                 /*                     +-------+-------+   */
4058                 /*                             |           */
4059                 /*                            XRa          */
4060                 /*                                         */
4061 
4062                 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
4063             }
4064             break;
4065         }
4066     }
4067 }
4068 
4069 /*
4070  *  S32ALN XRc, XRb, XRa, rs
4071  *    Arrange bytes from XRb and XRc according to one of five sets of
4072  *    rules determined by rs[2:0], and place the result in XRa.
4073  */
4074 static void gen_mxu_S32ALN(DisasContext *ctx)
4075 {
4076     uint32_t rs, XRc, XRb, XRa;
4077 
4078     rs  = extract32(ctx->opcode, 21, 5);
4079     XRc = extract32(ctx->opcode, 14, 4);
4080     XRb = extract32(ctx->opcode, 10, 4);
4081     XRa = extract32(ctx->opcode,  6, 4);
4082 
4083     if (unlikely(XRa == 0)) {
4084         /* destination is zero register -> do nothing */
4085     } else if (unlikely((XRb == 0) && (XRc == 0))) {
4086         /* both operands zero registers -> just set destination to all 0s */
4087         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
4088     } else {
4089         /* the most general case */
4090         TCGv t0 = tcg_temp_new();
4091         TCGv t1 = tcg_temp_new();
4092         TCGv t2 = tcg_temp_new();
4093         TCGv t3 = tcg_temp_new();
4094         TCGLabel *l_exit = gen_new_label();
4095         TCGLabel *l_b_only = gen_new_label();
4096         TCGLabel *l_c_only = gen_new_label();
4097 
4098         gen_load_mxu_gpr(t0, XRb);
4099         gen_load_mxu_gpr(t1, XRc);
4100         gen_load_gpr(t2, rs);
4101         tcg_gen_andi_tl(t2, t2, 0x07);
4102 
4103         /* do nothing for undefined cases */
4104         tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit);
4105 
4106         tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only);
4107         tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only);
4108 
4109         tcg_gen_shli_tl(t2, t2, 3);
4110         tcg_gen_subfi_tl(t3, 32, t2);
4111 
4112         tcg_gen_shl_tl(t0, t0, t2);
4113         tcg_gen_shr_tl(t1, t1, t3);
4114         tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1);
4115         tcg_gen_br(l_exit);
4116 
4117         gen_set_label(l_b_only);
4118         gen_store_mxu_gpr(t0, XRa);
4119         tcg_gen_br(l_exit);
4120 
4121         gen_set_label(l_c_only);
4122         gen_store_mxu_gpr(t1, XRa);
4123 
4124         gen_set_label(l_exit);
4125     }
4126 }
4127 
4128 /*
4129  *  S32MADD XRa, XRd, rb, rc
4130  *    32 to 64 bit signed multiply with subsequent add
4131  *    result stored in {XRa, XRd} pair, stain HI/LO.
4132  *  S32MADDU XRa, XRd, rb, rc
4133  *    32 to 64 bit unsigned multiply with subsequent add
4134  *    result stored in {XRa, XRd} pair, stain HI/LO.
4135  *  S32MSUB XRa, XRd, rb, rc
4136  *    32 to 64 bit signed multiply with subsequent subtract
4137  *    result stored in {XRa, XRd} pair, stain HI/LO.
4138  *  S32MSUBU XRa, XRd, rb, rc
4139  *    32 to 64 bit unsigned multiply with subsequent subtract
4140  *    result stored in {XRa, XRd} pair, stain HI/LO.
4141  */
4142 static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns)
4143 {
4144     uint32_t XRa, XRd, Rb, Rc;
4145 
4146     XRa  = extract32(ctx->opcode,  6, 4);
4147     XRd  = extract32(ctx->opcode, 10, 4);
4148     Rb   = extract32(ctx->opcode, 16, 5);
4149     Rc   = extract32(ctx->opcode, 21, 5);
4150 
4151     if (unlikely(Rb == 0 || Rc == 0)) {
4152         /* do nothing because x + 0 * y => x */
4153     } else if (unlikely(XRa == 0 && XRd == 0)) {
4154         /* do nothing because result just dropped */
4155     } else {
4156         TCGv t0 = tcg_temp_new();
4157         TCGv t1 = tcg_temp_new();
4158         TCGv_i64 t2 = tcg_temp_new_i64();
4159         TCGv_i64 t3 = tcg_temp_new_i64();
4160 
4161         gen_load_gpr(t0, Rb);
4162         gen_load_gpr(t1, Rc);
4163 
4164         if (uns) {
4165             tcg_gen_extu_tl_i64(t2, t0);
4166             tcg_gen_extu_tl_i64(t3, t1);
4167         } else {
4168             tcg_gen_ext_tl_i64(t2, t0);
4169             tcg_gen_ext_tl_i64(t3, t1);
4170         }
4171         tcg_gen_mul_i64(t2, t2, t3);
4172 
4173         gen_load_mxu_gpr(t0, XRa);
4174         gen_load_mxu_gpr(t1, XRd);
4175 
4176         tcg_gen_concat_tl_i64(t3, t1, t0);
4177         if (sub) {
4178             tcg_gen_sub_i64(t3, t3, t2);
4179         } else {
4180             tcg_gen_add_i64(t3, t3, t2);
4181         }
4182         gen_move_low32(t1, t3);
4183         gen_move_high32(t0, t3);
4184 
4185         tcg_gen_mov_tl(cpu_HI[0], t0);
4186         tcg_gen_mov_tl(cpu_LO[0], t1);
4187 
4188         gen_store_mxu_gpr(t1, XRd);
4189         gen_store_mxu_gpr(t0, XRa);
4190     }
4191 }
4192 
4193 /*
4194  * Decoding engine for MXU
4195  * =======================
4196  */
4197 
4198 static void decode_opc_mxu__pool00(DisasContext *ctx)
4199 {
4200     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4201 
4202     switch (opcode) {
4203     case OPC_MXU_S32MAX:
4204     case OPC_MXU_S32MIN:
4205         gen_mxu_S32MAX_S32MIN(ctx);
4206         break;
4207     case OPC_MXU_D16MAX:
4208     case OPC_MXU_D16MIN:
4209         gen_mxu_D16MAX_D16MIN(ctx);
4210         break;
4211     case OPC_MXU_Q8MAX:
4212     case OPC_MXU_Q8MIN:
4213         gen_mxu_Q8MAX_Q8MIN(ctx);
4214         break;
4215     case OPC_MXU_Q8SLT:
4216         gen_mxu_q8slt(ctx, false);
4217         break;
4218     case OPC_MXU_Q8SLTU:
4219         gen_mxu_q8slt(ctx, true);
4220         break;
4221     default:
4222         MIPS_INVAL("decode_opc_mxu");
4223         gen_reserved_instruction(ctx);
4224         break;
4225     }
4226 }
4227 
4228 static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx)
4229 {
4230     uint32_t opcode = extract32(ctx->opcode, 0, 6);
4231     uint32_t pad  = extract32(ctx->opcode, 14, 2);
4232 
4233     if (pad != 2) {
4234         /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */
4235         return false;
4236     }
4237 
4238     switch (opcode) {
4239     case OPC_MXU_S32MADD:
4240         gen_mxu_s32madd_sub(ctx, false, false);
4241         break;
4242     case OPC_MXU_S32MADDU:
4243         gen_mxu_s32madd_sub(ctx, false, true);
4244         break;
4245     case OPC_MXU_S32MSUB:
4246         gen_mxu_s32madd_sub(ctx, true, false);
4247         break;
4248     case OPC_MXU_S32MSUBU:
4249         gen_mxu_s32madd_sub(ctx, true, true);
4250         break;
4251     default:
4252         return false;
4253     }
4254     return true;
4255 }
4256 
4257 static void decode_opc_mxu__pool01(DisasContext *ctx)
4258 {
4259     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4260 
4261     switch (opcode) {
4262     case OPC_MXU_S32SLT:
4263         gen_mxu_S32SLT(ctx);
4264         break;
4265     case OPC_MXU_D16SLT:
4266         gen_mxu_D16SLT(ctx);
4267         break;
4268     case OPC_MXU_D16AVG:
4269         gen_mxu_d16avg(ctx, false);
4270         break;
4271     case OPC_MXU_D16AVGR:
4272         gen_mxu_d16avg(ctx, true);
4273         break;
4274     case OPC_MXU_Q8AVG:
4275         gen_mxu_q8avg(ctx, false);
4276         break;
4277     case OPC_MXU_Q8AVGR:
4278         gen_mxu_q8avg(ctx, true);
4279         break;
4280     case OPC_MXU_Q8ADD:
4281         gen_mxu_Q8ADD(ctx);
4282         break;
4283     default:
4284         MIPS_INVAL("decode_opc_mxu");
4285         gen_reserved_instruction(ctx);
4286         break;
4287     }
4288 }
4289 
4290 static void decode_opc_mxu__pool02(DisasContext *ctx)
4291 {
4292     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4293 
4294     switch (opcode) {
4295     case OPC_MXU_S32CPS:
4296         gen_mxu_S32CPS(ctx);
4297         break;
4298     case OPC_MXU_D16CPS:
4299         gen_mxu_D16CPS(ctx);
4300         break;
4301     case OPC_MXU_Q8ABD:
4302         gen_mxu_Q8ABD(ctx);
4303         break;
4304     case OPC_MXU_Q16SAT:
4305         gen_mxu_Q16SAT(ctx);
4306         break;
4307     default:
4308         MIPS_INVAL("decode_opc_mxu");
4309         gen_reserved_instruction(ctx);
4310         break;
4311     }
4312 }
4313 
4314 static void decode_opc_mxu__pool03(DisasContext *ctx)
4315 {
4316     uint32_t opcode = extract32(ctx->opcode, 24, 2);
4317 
4318     switch (opcode) {
4319     case OPC_MXU_D16MULF:
4320         gen_mxu_d16mul(ctx, true, true);
4321         break;
4322     case OPC_MXU_D16MULE:
4323         gen_mxu_d16mul(ctx, true, false);
4324         break;
4325     default:
4326         MIPS_INVAL("decode_opc_mxu");
4327         gen_reserved_instruction(ctx);
4328         break;
4329     }
4330 }
4331 
4332 static void decode_opc_mxu__pool04(DisasContext *ctx)
4333 {
4334     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4335     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4336 
4337     /* Don't care about opcode bits as their meaning is unknown yet */
4338     switch (opcode) {
4339     default:
4340         gen_mxu_s32ldxx(ctx, reversed, false);
4341         break;
4342     }
4343 }
4344 
4345 static void decode_opc_mxu__pool05(DisasContext *ctx)
4346 {
4347     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4348     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4349 
4350     /* Don't care about opcode bits as their meaning is unknown yet */
4351     switch (opcode) {
4352     default:
4353         gen_mxu_s32stxx(ctx, reversed, false);
4354         break;
4355     }
4356 }
4357 
4358 static void decode_opc_mxu__pool06(DisasContext *ctx)
4359 {
4360     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4361     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4362 
4363     switch (opcode) {
4364     case OPC_MXU_S32LDST:
4365     case OPC_MXU_S32LDSTR:
4366         if (strd2 <= 2) {
4367             gen_mxu_s32ldxvx(ctx, opcode, false, strd2);
4368             break;
4369         }
4370         /* fallthrough */
4371     default:
4372         MIPS_INVAL("decode_opc_mxu");
4373         gen_reserved_instruction(ctx);
4374         break;
4375     }
4376 }
4377 
4378 static void decode_opc_mxu__pool07(DisasContext *ctx)
4379 {
4380     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4381     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4382 
4383     switch (opcode) {
4384     case OPC_MXU_S32LDST:
4385     case OPC_MXU_S32LDSTR:
4386         if (strd2 <= 2) {
4387             gen_mxu_s32stxvx(ctx, opcode, false, strd2);
4388             break;
4389         }
4390         /* fallthrough */
4391     default:
4392         MIPS_INVAL("decode_opc_mxu");
4393         gen_reserved_instruction(ctx);
4394         break;
4395     }
4396 }
4397 
4398 static void decode_opc_mxu__pool08(DisasContext *ctx)
4399 {
4400     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4401     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4402 
4403     /* Don't care about opcode bits as their meaning is unknown yet */
4404     switch (opcode) {
4405     default:
4406         gen_mxu_s32ldxx(ctx, reversed, true);
4407         break;
4408     }
4409 }
4410 
4411 static void decode_opc_mxu__pool09(DisasContext *ctx)
4412 {
4413     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4414     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4415 
4416     /* Don't care about opcode bits as their meaning is unknown yet */
4417     switch (opcode) {
4418     default:
4419         gen_mxu_s32stxx(ctx, reversed, true);
4420         break;
4421     }
4422 }
4423 
4424 static void decode_opc_mxu__pool10(DisasContext *ctx)
4425 {
4426     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4427     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4428 
4429     switch (opcode) {
4430     case OPC_MXU_S32LDST:
4431     case OPC_MXU_S32LDSTR:
4432         if (strd2 <= 2) {
4433             gen_mxu_s32ldxvx(ctx, opcode, true, strd2);
4434             break;
4435         }
4436         /* fallthrough */
4437     default:
4438         MIPS_INVAL("decode_opc_mxu");
4439         gen_reserved_instruction(ctx);
4440         break;
4441     }
4442 }
4443 
4444 static void decode_opc_mxu__pool11(DisasContext *ctx)
4445 {
4446     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4447     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4448 
4449     switch (opcode) {
4450     case OPC_MXU_S32LDST:
4451     case OPC_MXU_S32LDSTR:
4452         if (strd2 <= 2) {
4453             gen_mxu_s32stxvx(ctx, opcode, true, strd2);
4454             break;
4455         }
4456         /* fallthrough */
4457     default:
4458         MIPS_INVAL("decode_opc_mxu");
4459         gen_reserved_instruction(ctx);
4460         break;
4461     }
4462 }
4463 
4464 static void decode_opc_mxu__pool12(DisasContext *ctx)
4465 {
4466     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4467 
4468     switch (opcode) {
4469     case OPC_MXU_D32ACC:
4470         gen_mxu_d32acc(ctx);
4471         break;
4472     case OPC_MXU_D32ACCM:
4473         gen_mxu_d32accm(ctx);
4474         break;
4475     case OPC_MXU_D32ASUM:
4476         gen_mxu_d32asum(ctx);
4477         break;
4478     default:
4479         MIPS_INVAL("decode_opc_mxu");
4480         gen_reserved_instruction(ctx);
4481         break;
4482     }
4483 }
4484 
4485 static void decode_opc_mxu__pool13(DisasContext *ctx)
4486 {
4487     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4488 
4489     switch (opcode) {
4490     case OPC_MXU_Q16ACC:
4491         gen_mxu_q16acc(ctx);
4492         break;
4493     case OPC_MXU_Q16ACCM:
4494         gen_mxu_q16accm(ctx);
4495         break;
4496     case OPC_MXU_D16ASUM:
4497         gen_mxu_d16asum(ctx);
4498         break;
4499     default:
4500         MIPS_INVAL("decode_opc_mxu");
4501         gen_reserved_instruction(ctx);
4502         break;
4503     }
4504 }
4505 
4506 static void decode_opc_mxu__pool14(DisasContext *ctx)
4507 {
4508     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4509 
4510     switch (opcode) {
4511     case OPC_MXU_Q8ADDE:
4512         gen_mxu_q8adde(ctx, false);
4513         break;
4514     case OPC_MXU_D8SUM:
4515         gen_mxu_d8sum(ctx, false);
4516         break;
4517     case OPC_MXU_D8SUMC:
4518         gen_mxu_d8sum(ctx, true);
4519         break;
4520     default:
4521         MIPS_INVAL("decode_opc_mxu");
4522         gen_reserved_instruction(ctx);
4523         break;
4524     }
4525 }
4526 
4527 static void decode_opc_mxu__pool15(DisasContext *ctx)
4528 {
4529     uint32_t opcode = extract32(ctx->opcode, 14, 2);
4530 
4531     switch (opcode) {
4532     case OPC_MXU_S32MUL:
4533         gen_mxu_s32mul(ctx, false);
4534         break;
4535     case OPC_MXU_S32MULU:
4536         gen_mxu_s32mul(ctx, true);
4537         break;
4538     case OPC_MXU_S32EXTR:
4539         gen_mxu_s32extr(ctx);
4540         break;
4541     case OPC_MXU_S32EXTRV:
4542         gen_mxu_s32extrv(ctx);
4543         break;
4544     default:
4545         MIPS_INVAL("decode_opc_mxu");
4546         gen_reserved_instruction(ctx);
4547         break;
4548     }
4549 }
4550 
4551 static void decode_opc_mxu__pool16(DisasContext *ctx)
4552 {
4553     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4554 
4555     switch (opcode) {
4556     case OPC_MXU_D32SARW:
4557         gen_mxu_d32sarl(ctx, true);
4558         break;
4559     case OPC_MXU_S32ALN:
4560         gen_mxu_S32ALN(ctx);
4561         break;
4562     case OPC_MXU_S32ALNI:
4563         gen_mxu_S32ALNI(ctx);
4564         break;
4565     case OPC_MXU_S32LUI:
4566         gen_mxu_s32lui(ctx);
4567         break;
4568     case OPC_MXU_S32NOR:
4569         gen_mxu_S32NOR(ctx);
4570         break;
4571     case OPC_MXU_S32AND:
4572         gen_mxu_S32AND(ctx);
4573         break;
4574     case OPC_MXU_S32OR:
4575         gen_mxu_S32OR(ctx);
4576         break;
4577     case OPC_MXU_S32XOR:
4578         gen_mxu_S32XOR(ctx);
4579         break;
4580     default:
4581         MIPS_INVAL("decode_opc_mxu");
4582         gen_reserved_instruction(ctx);
4583         break;
4584     }
4585 }
4586 
4587 static void decode_opc_mxu__pool17(DisasContext *ctx)
4588 {
4589     uint32_t opcode = extract32(ctx->opcode, 6, 3);
4590     uint32_t strd2  = extract32(ctx->opcode, 9, 2);
4591 
4592     if (strd2 > 2) {
4593         MIPS_INVAL("decode_opc_mxu");
4594         gen_reserved_instruction(ctx);
4595         return;
4596     }
4597 
4598     switch (opcode) {
4599     case OPC_MXU_LXW:
4600           gen_mxu_lxx(ctx, strd2, MO_TE | MO_UL);
4601           break;
4602     case OPC_MXU_LXB:
4603           gen_mxu_lxx(ctx, strd2, MO_TE | MO_SB);
4604           break;
4605     case OPC_MXU_LXH:
4606           gen_mxu_lxx(ctx, strd2, MO_TE | MO_SW);
4607           break;
4608     case OPC_MXU_LXBU:
4609           gen_mxu_lxx(ctx, strd2, MO_TE | MO_UB);
4610           break;
4611     case OPC_MXU_LXHU:
4612           gen_mxu_lxx(ctx, strd2, MO_TE | MO_UW);
4613           break;
4614     default:
4615         MIPS_INVAL("decode_opc_mxu");
4616         gen_reserved_instruction(ctx);
4617         break;
4618     }
4619 }
4620 
4621 static void decode_opc_mxu__pool18(DisasContext *ctx)
4622 {
4623     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4624 
4625     switch (opcode) {
4626     case OPC_MXU_D32SLLV:
4627         gen_mxu_d32sxxv(ctx, false, false);
4628         break;
4629     case OPC_MXU_D32SLRV:
4630         gen_mxu_d32sxxv(ctx, true, false);
4631         break;
4632     case OPC_MXU_D32SARV:
4633         gen_mxu_d32sxxv(ctx, true, true);
4634         break;
4635     case OPC_MXU_Q16SLLV:
4636         gen_mxu_q16sxxv(ctx, false, false);
4637         break;
4638     case OPC_MXU_Q16SLRV:
4639         gen_mxu_q16sxxv(ctx, true, false);
4640         break;
4641     case OPC_MXU_Q16SARV:
4642         gen_mxu_q16sxxv(ctx, true, true);
4643         break;
4644     default:
4645         MIPS_INVAL("decode_opc_mxu");
4646         gen_reserved_instruction(ctx);
4647         break;
4648     }
4649 }
4650 
4651 static void decode_opc_mxu__pool19(DisasContext *ctx)
4652 {
4653     uint32_t opcode = extract32(ctx->opcode, 22, 4);
4654 
4655     switch (opcode) {
4656     case OPC_MXU_Q8MUL:
4657         gen_mxu_q8mul_mac(ctx, false, false);
4658         break;
4659     case OPC_MXU_Q8MULSU:
4660         gen_mxu_q8mul_mac(ctx, true, false);
4661         break;
4662     default:
4663         MIPS_INVAL("decode_opc_mxu");
4664         gen_reserved_instruction(ctx);
4665         break;
4666     }
4667 }
4668 
4669 static void decode_opc_mxu__pool20(DisasContext *ctx)
4670 {
4671     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4672 
4673     switch (opcode) {
4674     case OPC_MXU_Q8MOVZ:
4675         gen_mxu_q8movzn(ctx, TCG_COND_NE);
4676         break;
4677     case OPC_MXU_Q8MOVN:
4678         gen_mxu_q8movzn(ctx, TCG_COND_EQ);
4679         break;
4680     case OPC_MXU_D16MOVZ:
4681         gen_mxu_d16movzn(ctx, TCG_COND_NE);
4682         break;
4683     case OPC_MXU_D16MOVN:
4684         gen_mxu_d16movzn(ctx, TCG_COND_EQ);
4685         break;
4686     case OPC_MXU_S32MOVZ:
4687         gen_mxu_s32movzn(ctx, TCG_COND_NE);
4688         break;
4689     case OPC_MXU_S32MOVN:
4690         gen_mxu_s32movzn(ctx, TCG_COND_EQ);
4691         break;
4692     default:
4693         MIPS_INVAL("decode_opc_mxu");
4694         gen_reserved_instruction(ctx);
4695         break;
4696     }
4697 }
4698 
4699 static void decode_opc_mxu__pool21(DisasContext *ctx)
4700 {
4701     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4702 
4703     switch (opcode) {
4704     case OPC_MXU_Q8MAC:
4705         gen_mxu_q8mul_mac(ctx, false, true);
4706         break;
4707     case OPC_MXU_Q8MACSU:
4708         gen_mxu_q8mul_mac(ctx, true, true);
4709         break;
4710     default:
4711         MIPS_INVAL("decode_opc_mxu");
4712         gen_reserved_instruction(ctx);
4713         break;
4714     }
4715 }
4716 
4717 
4718 bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
4719 {
4720     uint32_t opcode = extract32(insn, 0, 6);
4721 
4722     if (opcode == OPC_MXU_S32M2I) {
4723         gen_mxu_s32m2i(ctx);
4724         return true;
4725     }
4726 
4727     if (opcode == OPC_MXU_S32I2M) {
4728         gen_mxu_s32i2m(ctx);
4729         return true;
4730     }
4731 
4732     {
4733         TCGv t_mxu_cr = tcg_temp_new();
4734         TCGLabel *l_exit = gen_new_label();
4735 
4736         gen_load_mxu_cr(t_mxu_cr);
4737         tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN);
4738         tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
4739 
4740         switch (opcode) {
4741         case OPC_MXU_S32MADD:
4742         case OPC_MXU_S32MADDU:
4743         case OPC_MXU_S32MSUB:
4744         case OPC_MXU_S32MSUBU:
4745             return decode_opc_mxu_s32madd_sub(ctx);
4746         case OPC_MXU__POOL00:
4747             decode_opc_mxu__pool00(ctx);
4748             break;
4749         case OPC_MXU_D16MUL:
4750             gen_mxu_d16mul(ctx, false, false);
4751             break;
4752         case OPC_MXU_D16MAC:
4753             gen_mxu_d16mac(ctx, false, false);
4754             break;
4755         case OPC_MXU_D16MACF:
4756             gen_mxu_d16mac(ctx, true, true);
4757             break;
4758         case OPC_MXU_D16MADL:
4759             gen_mxu_d16madl(ctx);
4760             break;
4761         case OPC_MXU_S16MAD:
4762             gen_mxu_s16mad(ctx);
4763             break;
4764         case OPC_MXU_Q16ADD:
4765             gen_mxu_q16add(ctx);
4766             break;
4767         case OPC_MXU_D16MACE:
4768             gen_mxu_d16mac(ctx, true, false);
4769             break;
4770         case OPC_MXU__POOL01:
4771             decode_opc_mxu__pool01(ctx);
4772             break;
4773         case OPC_MXU__POOL02:
4774             decode_opc_mxu__pool02(ctx);
4775             break;
4776         case OPC_MXU__POOL03:
4777             decode_opc_mxu__pool03(ctx);
4778             break;
4779         case OPC_MXU__POOL04:
4780             decode_opc_mxu__pool04(ctx);
4781             break;
4782         case OPC_MXU__POOL05:
4783             decode_opc_mxu__pool05(ctx);
4784             break;
4785         case OPC_MXU__POOL06:
4786             decode_opc_mxu__pool06(ctx);
4787             break;
4788         case OPC_MXU__POOL07:
4789             decode_opc_mxu__pool07(ctx);
4790             break;
4791         case OPC_MXU__POOL08:
4792             decode_opc_mxu__pool08(ctx);
4793             break;
4794         case OPC_MXU__POOL09:
4795             decode_opc_mxu__pool09(ctx);
4796             break;
4797         case OPC_MXU__POOL10:
4798             decode_opc_mxu__pool10(ctx);
4799             break;
4800         case OPC_MXU__POOL11:
4801             decode_opc_mxu__pool11(ctx);
4802             break;
4803         case OPC_MXU_D32ADD:
4804             gen_mxu_d32add(ctx);
4805             break;
4806         case OPC_MXU__POOL12:
4807             decode_opc_mxu__pool12(ctx);
4808             break;
4809         case OPC_MXU__POOL13:
4810             decode_opc_mxu__pool13(ctx);
4811             break;
4812         case OPC_MXU__POOL14:
4813             decode_opc_mxu__pool14(ctx);
4814             break;
4815         case OPC_MXU_Q8ACCE:
4816             gen_mxu_q8adde(ctx, true);
4817             break;
4818         case OPC_MXU_S8LDD:
4819             gen_mxu_s8ldd(ctx, false);
4820             break;
4821         case OPC_MXU_S8STD:
4822             gen_mxu_s8std(ctx, false);
4823             break;
4824         case OPC_MXU_S8LDI:
4825             gen_mxu_s8ldd(ctx, true);
4826             break;
4827         case OPC_MXU_S8SDI:
4828             gen_mxu_s8std(ctx, true);
4829             break;
4830         case OPC_MXU__POOL15:
4831             decode_opc_mxu__pool15(ctx);
4832             break;
4833         case OPC_MXU__POOL16:
4834             decode_opc_mxu__pool16(ctx);
4835             break;
4836         case OPC_MXU__POOL17:
4837             decode_opc_mxu__pool17(ctx);
4838             break;
4839         case OPC_MXU_S16LDD:
4840             gen_mxu_s16ldd(ctx, false);
4841             break;
4842         case OPC_MXU_S16STD:
4843             gen_mxu_s16std(ctx, false);
4844             break;
4845         case OPC_MXU_S16LDI:
4846             gen_mxu_s16ldd(ctx, true);
4847             break;
4848         case OPC_MXU_S16SDI:
4849             gen_mxu_s16std(ctx, true);
4850             break;
4851         case OPC_MXU_D32SLL:
4852             gen_mxu_d32sxx(ctx, false, false);
4853             break;
4854         case OPC_MXU_D32SLR:
4855             gen_mxu_d32sxx(ctx, true, false);
4856             break;
4857         case OPC_MXU_D32SARL:
4858             gen_mxu_d32sarl(ctx, false);
4859             break;
4860         case OPC_MXU_D32SAR:
4861             gen_mxu_d32sxx(ctx, true, true);
4862             break;
4863         case OPC_MXU_Q16SLL:
4864             gen_mxu_q16sxx(ctx, false, false);
4865             break;
4866         case OPC_MXU__POOL18:
4867             decode_opc_mxu__pool18(ctx);
4868             break;
4869         case OPC_MXU_Q16SLR:
4870             gen_mxu_q16sxx(ctx, true, false);
4871             break;
4872         case OPC_MXU_Q16SAR:
4873             gen_mxu_q16sxx(ctx, true, true);
4874             break;
4875         case OPC_MXU__POOL19:
4876             decode_opc_mxu__pool19(ctx);
4877             break;
4878         case OPC_MXU__POOL20:
4879             decode_opc_mxu__pool20(ctx);
4880             break;
4881         case OPC_MXU__POOL21:
4882             decode_opc_mxu__pool21(ctx);
4883             break;
4884         case OPC_MXU_Q16SCOP:
4885             gen_mxu_q16scop(ctx);
4886             break;
4887         default:
4888             return false;
4889         }
4890 
4891         gen_set_label(l_exit);
4892     }
4893 
4894     return true;
4895 }
4896