xref: /openbmc/qemu/target/mips/tcg/mxu_translate.c (revision a1a62ced51bd33716c79719246ac969447acadb2)
1 /*
2  *  Ingenic XBurst Media eXtension Unit (MXU) translation routines.
3  *
4  *  Copyright (c) 2004-2005 Jocelyn Mayer
5  *  Copyright (c) 2006 Marius Groeger (FPU operations)
6  *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
7  *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
8  *  Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
9  *
10  * SPDX-License-Identifier: LGPL-2.1-or-later
11  *
12  * Datasheet:
13  *
14  *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
15  *   Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
16  */
17 
18 #include "qemu/osdep.h"
19 #include "translate.h"
20 
21 /*
22  *
23  *       AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET
24  *       ============================================
25  *
26  *
27  * MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32
28  * instructions set. It is designed to fit the needs of signal, graphical and
29  * video processing applications. MXU instruction set is used in Xburst family
30  * of microprocessors by Ingenic.
31  *
32  * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is
33  * the control register.
34  *
35  *
36  *     The notation used in MXU assembler mnemonics
37  *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38  *
39  *  Register operands:
40  *
41  *   XRa, XRb, XRc, XRd - MXU registers
42  *   Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers
43  *
44  *  Non-register operands:
45  *
46  *   aptn1 - 1-bit accumulate add/subtract pattern
47  *   aptn2 - 2-bit accumulate add/subtract pattern
48  *   eptn2 - 2-bit execute add/subtract pattern
49  *   optn2 - 2-bit operand pattern
50  *   optn3 - 3-bit operand pattern
51  *   sft4  - 4-bit shift amount
52  *   strd2 - 2-bit stride amount
53  *
54  *  Prefixes:
55  *
56  *   Level of parallelism:                Operand size:
57  *    S - single operation at a time       32 - word
58  *    D - two operations in parallel       16 - half word
59  *    Q - four operations in parallel       8 - byte
60  *
61  *  Operations:
62  *
63  *   ADD   - Add or subtract
64  *   ADDC  - Add with carry-in
65  *   ACC   - Accumulate
66  *   ASUM  - Sum together then accumulate (add or subtract)
67  *   ASUMC - Sum together then accumulate (add or subtract) with carry-in
68  *   AVG   - Average between 2 operands
69  *   ABD   - Absolute difference
70  *   ALN   - Align data
71  *   AND   - Logical bitwise 'and' operation
72  *   CPS   - Copy sign
73  *   EXTR  - Extract bits
74  *   I2M   - Move from GPR register to MXU register
75  *   LDD   - Load data from memory to XRF
76  *   LDI   - Load data from memory to XRF (and increase the address base)
77  *   LUI   - Load unsigned immediate
78  *   MUL   - Multiply
79  *   MULU  - Unsigned multiply
80  *   MADD  - 64-bit operand add 32x32 product
81  *   MSUB  - 64-bit operand subtract 32x32 product
82  *   MAC   - Multiply and accumulate (add or subtract)
83  *   MAD   - Multiply and add or subtract
84  *   MAX   - Maximum between 2 operands
85  *   MIN   - Minimum between 2 operands
86  *   M2I   - Move from MXU register to GPR register
87  *   MOVZ  - Move if zero
88  *   MOVN  - Move if non-zero
89  *   NOR   - Logical bitwise 'nor' operation
90  *   OR    - Logical bitwise 'or' operation
91  *   STD   - Store data from XRF to memory
92  *   SDI   - Store data from XRF to memory (and increase the address base)
93  *   SLT   - Set of less than comparison
94  *   SAD   - Sum of absolute differences
95  *   SLL   - Logical shift left
96  *   SLR   - Logical shift right
97  *   SAR   - Arithmetic shift right
98  *   SAT   - Saturation
99  *   SFL   - Shuffle
100  *   SCOP  - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0)
101  *   XOR   - Logical bitwise 'exclusive or' operation
102  *
103  *  Suffixes:
104  *
105  *   E - Expand results
106  *   F - Fixed point multiplication
107  *   L - Low part result
108  *   R - Doing rounding
109  *   V - Variable instead of immediate
110  *   W - Combine above L and V
111  *
112  *
113  *     The list of MXU instructions grouped by functionality
114  *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
115  *
116  * Load/Store instructions           Multiplication instructions
117  * -----------------------           ---------------------------
118  *
119  *  S32LDD XRa, Rb, s12               S32MADD XRa, XRd, Rs, Rt
120  *  S32STD XRa, Rb, s12               S32MADDU XRa, XRd, Rs, Rt
121  *  S32LDDV XRa, Rb, rc, strd2        S32MSUB XRa, XRd, Rs, Rt
122  *  S32STDV XRa, Rb, rc, strd2        S32MSUBU XRa, XRd, Rs, Rt
123  *  S32LDI XRa, Rb, s12               S32MUL XRa, XRd, Rs, Rt
124  *  S32SDI XRa, Rb, s12               S32MULU XRa, XRd, Rs, Rt
125  *  S32LDIV XRa, Rb, rc, strd2        D16MUL XRa, XRb, XRc, XRd, optn2
126  *  S32SDIV XRa, Rb, rc, strd2        D16MULE XRa, XRb, XRc, optn2
127  *  S32LDDR XRa, Rb, s12              D16MULF XRa, XRb, XRc, optn2
128  *  S32STDR XRa, Rb, s12              D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
129  *  S32LDDVR XRa, Rb, rc, strd2       D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
130  *  S32STDVR XRa, Rb, rc, strd2       D16MACF XRa, XRb, XRc, XRd, aptn2, optn2
131  *  S32LDIR XRa, Rb, s12              D16MADL XRa, XRb, XRc, XRd, aptn2, optn2
132  *  S32SDIR XRa, Rb, s12              S16MAD XRa, XRb, XRc, XRd, aptn1, optn2
133  *  S32LDIVR XRa, Rb, rc, strd2       Q8MUL XRa, XRb, XRc, XRd
134  *  S32SDIVR XRa, Rb, rc, strd2       Q8MULSU XRa, XRb, XRc, XRd
135  *  S16LDD XRa, Rb, s10, eptn2        Q8MAC XRa, XRb, XRc, XRd, aptn2
136  *  S16STD XRa, Rb, s10, eptn2        Q8MACSU XRa, XRb, XRc, XRd, aptn2
137  *  S16LDI XRa, Rb, s10, eptn2        Q8MADL XRa, XRb, XRc, XRd, aptn2
138  *  S16SDI XRa, Rb, s10, eptn2
139  *  S8LDD XRa, Rb, s8, eptn3
140  *  S8STD XRa, Rb, s8, eptn3         Addition and subtraction instructions
141  *  S8LDI XRa, Rb, s8, eptn3         -------------------------------------
142  *  S8SDI XRa, Rb, s8, eptn3
143  *  LXW Rd, Rs, Rt, strd2             D32ADD XRa, XRb, XRc, XRd, eptn2
144  *  LXH Rd, Rs, Rt, strd2             D32ADDC XRa, XRb, XRc, XRd
145  *  LXHU Rd, Rs, Rt, strd2            D32ACC XRa, XRb, XRc, XRd, eptn2
146  *  LXB Rd, Rs, Rt, strd2             D32ACCM XRa, XRb, XRc, XRd, eptn2
147  *  LXBU Rd, Rs, Rt, strd2            D32ASUM XRa, XRb, XRc, XRd, eptn2
148  *                                    S32CPS XRa, XRb, XRc
149  *                                    Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2
150  * Comparison instructions            Q16ACC XRa, XRb, XRc, XRd, eptn2
151  * -----------------------            Q16ACCM XRa, XRb, XRc, XRd, eptn2
152  *                                    D16ASUM XRa, XRb, XRc, XRd, eptn2
153  *  S32MAX XRa, XRb, XRc              D16CPS XRa, XRb,
154  *  S32MIN XRa, XRb, XRc              D16AVG XRa, XRb, XRc
155  *  S32SLT XRa, XRb, XRc              D16AVGR XRa, XRb, XRc
156  *  S32MOVZ XRa, XRb, XRc             Q8ADD XRa, XRb, XRc, eptn2
157  *  S32MOVN XRa, XRb, XRc             Q8ADDE XRa, XRb, XRc, XRd, eptn2
158  *  D16MAX XRa, XRb, XRc              Q8ACCE XRa, XRb, XRc, XRd, eptn2
159  *  D16MIN XRa, XRb, XRc              Q8ABD XRa, XRb, XRc
160  *  D16SLT XRa, XRb, XRc              Q8SAD XRa, XRb, XRc, XRd
161  *  D16MOVZ XRa, XRb, XRc             Q8AVG XRa, XRb, XRc
162  *  D16MOVN XRa, XRb, XRc             Q8AVGR XRa, XRb, XRc
163  *  Q8MAX XRa, XRb, XRc               D8SUM XRa, XRb, XRc, XRd
164  *  Q8MIN XRa, XRb, XRc               D8SUMC XRa, XRb, XRc, XRd
165  *  Q8SLT XRa, XRb, XRc
166  *  Q8SLTU XRa, XRb, XRc
167  *  Q8MOVZ XRa, XRb, XRc             Shift instructions
168  *  Q8MOVN XRa, XRb, XRc             ------------------
169  *
170  *                                    D32SLL XRa, XRb, XRc, XRd, sft4
171  * Bitwise instructions               D32SLR XRa, XRb, XRc, XRd, sft4
172  * --------------------               D32SAR XRa, XRb, XRc, XRd, sft4
173  *                                    D32SARL XRa, XRb, XRc, sft4
174  *  S32NOR XRa, XRb, XRc              D32SLLV XRa, XRb, Rb
175  *  S32AND XRa, XRb, XRc              D32SLRV XRa, XRb, Rb
176  *  S32XOR XRa, XRb, XRc              D32SARV XRa, XRb, Rb
177  *  S32OR XRa, XRb, XRc               D32SARW XRa, XRb, XRc, Rb
178  *                                    Q16SLL XRa, XRb, XRc, XRd, sft4
179  *                                    Q16SLR XRa, XRb, XRc, XRd, sft4
180  * Miscellaneous instructions         Q16SAR XRa, XRb, XRc, XRd, sft4
181  * -------------------------          Q16SLLV XRa, XRb, Rb
182  *                                    Q16SLRV XRa, XRb, Rb
183  *  S32SFL XRa, XRb, XRc, XRd, optn2  Q16SARV XRa, XRb, Rb
184  *  S32ALN XRa, XRb, XRc, Rb
185  *  S32ALNI XRa, XRb, XRc, s3
186  *  S32LUI XRa, s8, optn3            Move instructions
187  *  S32EXTR XRa, XRb, Rb, bits5      -----------------
188  *  S32EXTRV XRa, XRb, Rs, Rt
189  *  Q16SCOP XRa, XRb, XRc, XRd        S32M2I XRa, Rb
190  *  Q16SAT XRa, XRb, XRc              S32I2M XRa, Rb
191  *
192  *
193  *     The opcode organization of MXU instructions
194  *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
195  *
196  * The bits 31..26 of all MXU instructions are equal to 0x1C (also referred
197  * as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of
198  * other bits up to the instruction level is as follows:
199  *
200  *              bits
201  *             05..00
202  *
203  *          ┌─ 000000 ─ OPC_MXU_S32MADD
204  *          ├─ 000001 ─ OPC_MXU_S32MADDU
205  *          ├─ 000010 ─ <not assigned>   (non-MXU OPC_MUL)
206  *          │
207  *          │                               20..18
208  *          ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX
209  *          │                            ├─ 001 ─ OPC_MXU_S32MIN
210  *          │                            ├─ 010 ─ OPC_MXU_D16MAX
211  *          │                            ├─ 011 ─ OPC_MXU_D16MIN
212  *          │                            ├─ 100 ─ OPC_MXU_Q8MAX
213  *          │                            ├─ 101 ─ OPC_MXU_Q8MIN
214  *          │                            ├─ 110 ─ OPC_MXU_Q8SLT
215  *          │                            └─ 111 ─ OPC_MXU_Q8SLTU
216  *          ├─ 000100 ─ OPC_MXU_S32MSUB
217  *          ├─ 000101 ─ OPC_MXU_S32MSUBU    20..18
218  *          ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT
219  *          │                            ├─ 001 ─ OPC_MXU_D16SLT
220  *          │                            ├─ 010 ─ OPC_MXU_D16AVG
221  *          │                            ├─ 011 ─ OPC_MXU_D16AVGR
222  *          │                            ├─ 100 ─ OPC_MXU_Q8AVG
223  *          │                            ├─ 101 ─ OPC_MXU_Q8AVGR
224  *          │                            └─ 111 ─ OPC_MXU_Q8ADD
225  *          │
226  *          │                               20..18
227  *          ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS
228  *          │                            ├─ 010 ─ OPC_MXU_D16CPS
229  *          │                            ├─ 100 ─ OPC_MXU_Q8ABD
230  *          │                            └─ 110 ─ OPC_MXU_Q16SAT
231  *          ├─ 001000 ─ OPC_MXU_D16MUL
232  *          │                               25..24
233  *          ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF
234  *          │                            └─ 01 ─ OPC_MXU_D16MULE
235  *          ├─ 001010 ─ OPC_MXU_D16MAC
236  *          ├─ 001011 ─ OPC_MXU_D16MACF
237  *          ├─ 001100 ─ OPC_MXU_D16MADL
238  *          ├─ 001101 ─ OPC_MXU_S16MAD
239  *          ├─ 001110 ─ OPC_MXU_Q16ADD
240  *          ├─ 001111 ─ OPC_MXU_D16MACE     20 (13..10 don't care)
241  *          │                            ┌─ 0 ─ OPC_MXU_S32LDD
242  *          ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
243  *          │
244  *          │                               20 (13..10 don't care)
245  *          ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
246  *          │                            └─ 1 ─ OPC_MXU_S32STDR
247  *          │
248  *          │                               13..10
249  *          ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV
250  *          │                            └─ 0001 ─ OPC_MXU_S32LDDVR
251  *          │
252  *          │                               13..10
253  *          ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
254  *          │                            └─ 0001 ─ OPC_MXU_S32STDVR
255  *          │
256  *          │                               20 (13..10 don't care)
257  *          ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
258  *          │                            └─ 1 ─ OPC_MXU_S32LDIR
259  *          │
260  *          │                               20 (13..10 don't care)
261  *          ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
262  *          │                            └─ 1 ─ OPC_MXU_S32SDIR
263  *          │
264  *          │                               13..10
265  *          ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV
266  *          │                            └─ 0001 ─ OPC_MXU_S32LDIVR
267  *          │
268  *          │                               13..10
269  *          ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
270  *          │                            └─ 0001 ─ OPC_MXU_S32SDIVR
271  *          ├─ 011000 ─ OPC_MXU_D32ADD  (catches D32ADDC too)
272  *          │                               23..22
273  *   MXU    ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
274  * opcodes ─┤                            ├─ 01 ─ OPC_MXU_D32ACCM
275  *          │                            └─ 10 ─ OPC_MXU_D32ASUM
276  *          ├─ 011010 ─ <not assigned>
277  *          │                               23..22
278  *          ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
279  *          │                            ├─ 01 ─ OPC_MXU_Q16ACCM
280  *          │                            └─ 10 ─ OPC_MXU_D16ASUM
281  *          │
282  *          │                               23..22
283  *          ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
284  *          │                            ├─ 01 ─ OPC_MXU_D8SUM
285  *          ├─ 011101 ─ OPC_MXU_Q8ACCE   └─ 10 ─ OPC_MXU_D8SUMC
286  *          ├─ 011110 ─ <not assigned>
287  *          ├─ 011111 ─ <not assigned>
288  *          ├─ 100000 ─ <not assigned>   (overlaps with CLZ)
289  *          ├─ 100001 ─ <not assigned>   (overlaps with CLO)
290  *          ├─ 100010 ─ OPC_MXU_S8LDD
291  *          ├─ 100011 ─ OPC_MXU_S8STD       15..14
292  *          ├─ 100100 ─ OPC_MXU_S8LDI    ┌─ 00 ─ OPC_MXU_S32MUL
293  *          ├─ 100101 ─ OPC_MXU_S8SDI    ├─ 01 ─ OPC_MXU_S32MULU
294  *          │                            ├─ 10 ─ OPC_MXU_S32EXTR
295  *          ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV
296  *          │
297  *          │                               20..18
298  *          ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
299  *          │                            ├─ 001 ─ OPC_MXU_S32ALN
300  *          │                            ├─ 010 ─ OPC_MXU_S32ALNI
301  *          │                            ├─ 011 ─ OPC_MXU_S32LUI
302  *          │                            ├─ 100 ─ OPC_MXU_S32NOR
303  *          │                            ├─ 101 ─ OPC_MXU_S32AND
304  *          │                            ├─ 110 ─ OPC_MXU_S32OR
305  *          │                            └─ 111 ─ OPC_MXU_S32XOR
306  *          │
307  *          │                               8..6
308  *          ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
309  *          │                            ├─ 001 ─ OPC_MXU_LXH
310  *          ├─ 101001 ─ <not assigned>   ├─ 011 ─ OPC_MXU_LXW
311  *          ├─ 101010 ─ OPC_MXU_S16LDD   ├─ 100 ─ OPC_MXU_LXBU
312  *          ├─ 101011 ─ OPC_MXU_S16STD   └─ 101 ─ OPC_MXU_LXHU
313  *          ├─ 101100 ─ OPC_MXU_S16LDI
314  *          ├─ 101101 ─ OPC_MXU_S16SDI
315  *          ├─ 101110 ─ OPC_MXU_S32M2I
316  *          ├─ 101111 ─ OPC_MXU_S32I2M
317  *          ├─ 110000 ─ OPC_MXU_D32SLL
318  *          ├─ 110001 ─ OPC_MXU_D32SLR      20..18
319  *          ├─ 110010 ─ OPC_MXU_D32SARL  ┌─ 000 ─ OPC_MXU_D32SLLV
320  *          ├─ 110011 ─ OPC_MXU_D32SAR   ├─ 001 ─ OPC_MXU_D32SLRV
321  *          ├─ 110100 ─ OPC_MXU_Q16SLL   ├─ 011 ─ OPC_MXU_D32SARV
322  *          ├─ 110101 ─ OPC_MXU_Q16SLR   ├─ 100 ─ OPC_MXU_Q16SLLV
323  *          │                            ├─ 101 ─ OPC_MXU_Q16SLRV
324  *          ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV
325  *          │
326  *          ├─ 110111 ─ OPC_MXU_Q16SAR
327  *          │                               23..22
328  *          ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
329  *          │                            └─ 10 ─ OPC_MXU_Q8MULSU
330  *          │
331  *          │                               20..18
332  *          ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
333  *          │                            ├─ 001 ─ OPC_MXU_Q8MOVN
334  *          │                            ├─ 010 ─ OPC_MXU_D16MOVZ
335  *          │                            ├─ 011 ─ OPC_MXU_D16MOVN
336  *          │                            ├─ 100 ─ OPC_MXU_S32MOVZ
337  *          │                            └─ 101 ─ OPC_MXU_S32MOVN
338  *          │
339  *          │                               23..22
340  *          ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC
341  *          │                            └─ 10 ─ OPC_MXU_Q8MACSU
342  *          ├─ 111011 ─ OPC_MXU_Q16SCOP
343  *          ├─ 111100 ─ OPC_MXU_Q8MADL
344  *          ├─ 111101 ─ OPC_MXU_S32SFL
345  *          ├─ 111110 ─ OPC_MXU_Q8SAD
346  *          └─ 111111 ─ <not assigned>   (overlaps with SDBBP)
347  *
348  *
349  * Compiled after:
350  *
351  *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
352  *   Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
353  */
354 
355 enum {
356     OPC_MXU_S32MADD  = 0x00,
357     OPC_MXU_S32MADDU = 0x01,
358     OPC_MXU__POOL00  = 0x03,
359     OPC_MXU_S32MSUB  = 0x04,
360     OPC_MXU_S32MSUBU = 0x05,
361     OPC_MXU__POOL01  = 0x06,
362     OPC_MXU__POOL02  = 0x07,
363     OPC_MXU_D16MUL   = 0x08,
364     OPC_MXU__POOL03  = 0x09,
365     OPC_MXU_D16MAC   = 0x0A,
366     OPC_MXU_D16MACF  = 0x0B,
367     OPC_MXU_D16MADL  = 0x0C,
368     OPC_MXU_S16MAD   = 0x0D,
369     OPC_MXU_Q16ADD   = 0x0E,
370     OPC_MXU_D16MACE  = 0x0F,
371     OPC_MXU__POOL04  = 0x10,
372     OPC_MXU__POOL05  = 0x11,
373     OPC_MXU__POOL06  = 0x12,
374     OPC_MXU__POOL07  = 0x13,
375     OPC_MXU__POOL08  = 0x14,
376     OPC_MXU__POOL09  = 0x15,
377     OPC_MXU__POOL10  = 0x16,
378     OPC_MXU__POOL11  = 0x17,
379     OPC_MXU_D32ADD   = 0x18,
380     OPC_MXU__POOL12  = 0x19,
381     OPC_MXU__POOL13  = 0x1B,
382     OPC_MXU__POOL14  = 0x1C,
383     OPC_MXU_Q8ACCE   = 0x1D,
384     OPC_MXU_S8LDD    = 0x22,
385     OPC_MXU_S8STD    = 0x23,
386     OPC_MXU_S8LDI    = 0x24,
387     OPC_MXU_S8SDI    = 0x25,
388     OPC_MXU__POOL15  = 0x26,
389     OPC_MXU__POOL16  = 0x27,
390     OPC_MXU__POOL17  = 0x28,
391     OPC_MXU_S16LDD   = 0x2A,
392     OPC_MXU_S16STD   = 0x2B,
393     OPC_MXU_S16LDI   = 0x2C,
394     OPC_MXU_S16SDI   = 0x2D,
395     OPC_MXU_S32M2I   = 0x2E,
396     OPC_MXU_S32I2M   = 0x2F,
397     OPC_MXU_D32SLL   = 0x30,
398     OPC_MXU_D32SLR   = 0x31,
399     OPC_MXU_D32SARL  = 0x32,
400     OPC_MXU_D32SAR   = 0x33,
401     OPC_MXU_Q16SLL   = 0x34,
402     OPC_MXU_Q16SLR   = 0x35,
403     OPC_MXU__POOL18  = 0x36,
404     OPC_MXU_Q16SAR   = 0x37,
405     OPC_MXU__POOL19  = 0x38,
406     OPC_MXU__POOL20  = 0x39,
407     OPC_MXU__POOL21  = 0x3A,
408     OPC_MXU_Q16SCOP  = 0x3B,
409     OPC_MXU_Q8MADL   = 0x3C,
410     OPC_MXU_S32SFL   = 0x3D,
411     OPC_MXU_Q8SAD    = 0x3E,
412 };
413 
414 
415 /*
416  * MXU pool 00
417  */
418 enum {
419     OPC_MXU_S32MAX   = 0x00,
420     OPC_MXU_S32MIN   = 0x01,
421     OPC_MXU_D16MAX   = 0x02,
422     OPC_MXU_D16MIN   = 0x03,
423     OPC_MXU_Q8MAX    = 0x04,
424     OPC_MXU_Q8MIN    = 0x05,
425     OPC_MXU_Q8SLT    = 0x06,
426     OPC_MXU_Q8SLTU   = 0x07,
427 };
428 
429 /*
430  * MXU pool 01
431  */
432 enum {
433     OPC_MXU_S32SLT   = 0x00,
434     OPC_MXU_D16SLT   = 0x01,
435     OPC_MXU_D16AVG   = 0x02,
436     OPC_MXU_D16AVGR  = 0x03,
437     OPC_MXU_Q8AVG    = 0x04,
438     OPC_MXU_Q8AVGR   = 0x05,
439     OPC_MXU_Q8ADD    = 0x07,
440 };
441 
442 /*
443  * MXU pool 02
444  */
445 enum {
446     OPC_MXU_S32CPS   = 0x00,
447     OPC_MXU_D16CPS   = 0x02,
448     OPC_MXU_Q8ABD    = 0x04,
449     OPC_MXU_Q16SAT   = 0x06,
450 };
451 
452 /*
453  * MXU pool 03
454  */
455 enum {
456     OPC_MXU_D16MULF  = 0x00,
457     OPC_MXU_D16MULE  = 0x01,
458 };
459 
460 /*
461  * MXU pool 04 05 06 07 08 09 10 11
462  */
463 enum {
464     OPC_MXU_S32LDST  = 0x00,
465     OPC_MXU_S32LDSTR = 0x01,
466 };
467 
468 /*
469  * MXU pool 12
470  */
471 enum {
472     OPC_MXU_D32ACC    = 0x00,
473     OPC_MXU_D32ACCM   = 0x01,
474     OPC_MXU_D32ASUM   = 0x02,
475 };
476 
477 /*
478  * MXU pool 13
479  */
480 enum {
481     OPC_MXU_Q16ACC    = 0x00,
482     OPC_MXU_Q16ACCM   = 0x01,
483     OPC_MXU_D16ASUM   = 0x02,
484 };
485 
486 /*
487  * MXU pool 14
488  */
489 enum {
490     OPC_MXU_Q8ADDE    = 0x00,
491     OPC_MXU_D8SUM     = 0x01,
492     OPC_MXU_D8SUMC    = 0x02,
493 };
494 
495 /*
496  * MXU pool 15
497  */
498 enum {
499     OPC_MXU_S32MUL    = 0x00,
500     OPC_MXU_S32MULU   = 0x01,
501     OPC_MXU_S32EXTR   = 0x02,
502     OPC_MXU_S32EXTRV  = 0x03,
503 };
504 
505 /*
506  * MXU pool 16
507  */
508 enum {
509     OPC_MXU_D32SARW  = 0x00,
510     OPC_MXU_S32ALN   = 0x01,
511     OPC_MXU_S32ALNI  = 0x02,
512     OPC_MXU_S32LUI   = 0x03,
513     OPC_MXU_S32NOR   = 0x04,
514     OPC_MXU_S32AND   = 0x05,
515     OPC_MXU_S32OR    = 0x06,
516     OPC_MXU_S32XOR   = 0x07,
517 };
518 
519 /*
520  * MXU pool 17
521  */
522 enum {
523     OPC_MXU_LXB      = 0x00,
524     OPC_MXU_LXH      = 0x01,
525     OPC_MXU_LXW      = 0x03,
526     OPC_MXU_LXBU     = 0x04,
527     OPC_MXU_LXHU     = 0x05,
528 };
529 
530 /*
531  * MXU pool 18
532  */
533 enum {
534     OPC_MXU_D32SLLV  = 0x00,
535     OPC_MXU_D32SLRV  = 0x01,
536     OPC_MXU_D32SARV  = 0x03,
537     OPC_MXU_Q16SLLV  = 0x04,
538     OPC_MXU_Q16SLRV  = 0x05,
539     OPC_MXU_Q16SARV  = 0x07,
540 };
541 
542 /*
543  * MXU pool 19
544  */
545 enum {
546     OPC_MXU_Q8MUL    = 0x00,
547     OPC_MXU_Q8MULSU  = 0x02,
548 };
549 
550 /*
551  * MXU pool 20
552  */
553 enum {
554     OPC_MXU_Q8MOVZ   = 0x00,
555     OPC_MXU_Q8MOVN   = 0x01,
556     OPC_MXU_D16MOVZ  = 0x02,
557     OPC_MXU_D16MOVN  = 0x03,
558     OPC_MXU_S32MOVZ  = 0x04,
559     OPC_MXU_S32MOVN  = 0x05,
560 };
561 
562 /*
563  * MXU pool 21
564  */
565 enum {
566     OPC_MXU_Q8MAC    = 0x00,
567     OPC_MXU_Q8MACSU  = 0x02,
568 };
569 
570 
571 /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
572 #define MXU_APTN1_A    0
573 #define MXU_APTN1_S    1
574 
575 /* MXU accumulate add/subtract 2-bit pattern 'aptn2' */
576 #define MXU_APTN2_AA    0
577 #define MXU_APTN2_AS    1
578 #define MXU_APTN2_SA    2
579 #define MXU_APTN2_SS    3
580 
581 /* MXU execute add/subtract 2-bit pattern 'eptn2' */
582 #define MXU_EPTN2_AA    0
583 #define MXU_EPTN2_AS    1
584 #define MXU_EPTN2_SA    2
585 #define MXU_EPTN2_SS    3
586 
587 /* MXU operand getting pattern 'optn2' */
588 #define MXU_OPTN2_PTN0  0
589 #define MXU_OPTN2_PTN1  1
590 #define MXU_OPTN2_PTN2  2
591 #define MXU_OPTN2_PTN3  3
592 /* alternative naming scheme for 'optn2' */
593 #define MXU_OPTN2_WW    0
594 #define MXU_OPTN2_LW    1
595 #define MXU_OPTN2_HW    2
596 #define MXU_OPTN2_XW    3
597 
598 /* MXU operand getting pattern 'optn3' */
599 #define MXU_OPTN3_PTN0  0
600 #define MXU_OPTN3_PTN1  1
601 #define MXU_OPTN3_PTN2  2
602 #define MXU_OPTN3_PTN3  3
603 #define MXU_OPTN3_PTN4  4
604 #define MXU_OPTN3_PTN5  5
605 #define MXU_OPTN3_PTN6  6
606 #define MXU_OPTN3_PTN7  7
607 
608 /* MXU registers */
609 static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
610 static TCGv mxu_CR;
611 
612 static const char mxuregnames[NUMBER_OF_MXU_REGISTERS][4] = {
613     "XR1",  "XR2",  "XR3",  "XR4",  "XR5",  "XR6",  "XR7",  "XR8",
614     "XR9",  "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "XCR",
615 };
616 
617 void mxu_translate_init(void)
618 {
619     for (unsigned i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) {
620         mxu_gpr[i] = tcg_global_mem_new(cpu_env,
621                                         offsetof(CPUMIPSState, active_tc.mxu_gpr[i]),
622                                         mxuregnames[i]);
623     }
624 
625     mxu_CR = tcg_global_mem_new(cpu_env,
626                                 offsetof(CPUMIPSState, active_tc.mxu_cr),
627                                 mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]);
628 }
629 
630 /* MXU General purpose registers moves. */
631 static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg)
632 {
633     if (reg == 0) {
634         tcg_gen_movi_tl(t, 0);
635     } else if (reg <= 15) {
636         tcg_gen_mov_tl(t, mxu_gpr[reg - 1]);
637     }
638 }
639 
640 static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg)
641 {
642     if (reg > 0 && reg <= 15) {
643         tcg_gen_mov_tl(mxu_gpr[reg - 1], t);
644     }
645 }
646 
647 static inline void gen_extract_mxu_gpr(TCGv t, unsigned int reg,
648                                        unsigned int ofs, unsigned int len)
649 {
650     if (reg == 0) {
651         tcg_gen_movi_tl(t, 0);
652     } else if (reg <= 15) {
653         tcg_gen_extract_tl(t, mxu_gpr[reg - 1], ofs, len);
654     }
655 }
656 
657 /* MXU control register moves. */
658 static inline void gen_load_mxu_cr(TCGv t)
659 {
660     tcg_gen_mov_tl(t, mxu_CR);
661 }
662 
663 static inline void gen_store_mxu_cr(TCGv t)
664 {
665     /* TODO: Add handling of RW rules for MXU_CR. */
666     tcg_gen_mov_tl(mxu_CR, t);
667 }
668 
669 /*
670  * S32I2M XRa, rb - Register move from GRF to XRF
671  */
672 static void gen_mxu_s32i2m(DisasContext *ctx)
673 {
674     TCGv t0;
675     uint32_t XRa, Rb;
676 
677     t0 = tcg_temp_new();
678 
679     XRa = extract32(ctx->opcode, 6, 5);
680     Rb = extract32(ctx->opcode, 16, 5);
681 
682     gen_load_gpr(t0, Rb);
683     if (XRa <= 15) {
684         gen_store_mxu_gpr(t0, XRa);
685     } else if (XRa == 16) {
686         gen_store_mxu_cr(t0);
687     }
688 }
689 
690 /*
691  * S32M2I XRa, rb - Register move from XRF to GRF
692  */
693 static void gen_mxu_s32m2i(DisasContext *ctx)
694 {
695     TCGv t0;
696     uint32_t XRa, Rb;
697 
698     t0 = tcg_temp_new();
699 
700     XRa = extract32(ctx->opcode, 6, 5);
701     Rb = extract32(ctx->opcode, 16, 5);
702 
703     if (XRa <= 15) {
704         gen_load_mxu_gpr(t0, XRa);
705     } else if (XRa == 16) {
706         gen_load_mxu_cr(t0);
707     }
708 
709     gen_store_gpr(t0, Rb);
710 }
711 
712 /*
713  * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
714  *
715  * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF,
716  * post modify address register
717  */
718 static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify)
719 {
720     TCGv t0, t1;
721     uint32_t XRa, Rb, s8, optn3;
722 
723     t0 = tcg_temp_new();
724     t1 = tcg_temp_new();
725 
726     XRa = extract32(ctx->opcode, 6, 4);
727     s8 = extract32(ctx->opcode, 10, 8);
728     optn3 = extract32(ctx->opcode, 18, 3);
729     Rb = extract32(ctx->opcode, 21, 5);
730 
731     gen_load_gpr(t0, Rb);
732     tcg_gen_addi_tl(t0, t0, (int8_t)s8);
733     if (postmodify) {
734         gen_store_gpr(t0, Rb);
735     }
736 
737     switch (optn3) {
738     /* XRa[7:0] = tmp8 */
739     case MXU_OPTN3_PTN0:
740         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
741         gen_load_mxu_gpr(t0, XRa);
742         tcg_gen_deposit_tl(t0, t0, t1, 0, 8);
743         break;
744     /* XRa[15:8] = tmp8 */
745     case MXU_OPTN3_PTN1:
746         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
747         gen_load_mxu_gpr(t0, XRa);
748         tcg_gen_deposit_tl(t0, t0, t1, 8, 8);
749         break;
750     /* XRa[23:16] = tmp8 */
751     case MXU_OPTN3_PTN2:
752         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
753         gen_load_mxu_gpr(t0, XRa);
754         tcg_gen_deposit_tl(t0, t0, t1, 16, 8);
755         break;
756     /* XRa[31:24] = tmp8 */
757     case MXU_OPTN3_PTN3:
758         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
759         gen_load_mxu_gpr(t0, XRa);
760         tcg_gen_deposit_tl(t0, t0, t1, 24, 8);
761         break;
762     /* XRa = {8'b0, tmp8, 8'b0, tmp8} */
763     case MXU_OPTN3_PTN4:
764         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
765         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
766         break;
767     /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */
768     case MXU_OPTN3_PTN5:
769         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
770         tcg_gen_shli_tl(t1, t1, 8);
771         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
772         break;
773     /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */
774     case MXU_OPTN3_PTN6:
775         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB);
776         tcg_gen_mov_tl(t0, t1);
777         tcg_gen_andi_tl(t0, t0, 0xFF00FFFF);
778         tcg_gen_shli_tl(t1, t1, 16);
779         tcg_gen_or_tl(t0, t0, t1);
780         break;
781     /* XRa = {tmp8, tmp8, tmp8, tmp8} */
782     case MXU_OPTN3_PTN7:
783         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
784         tcg_gen_deposit_tl(t1, t1, t1, 8, 8);
785         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
786         break;
787     }
788 
789     gen_store_mxu_gpr(t0, XRa);
790 }
791 
792 /*
793  * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory
794  *
795  * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory,
796  * post modify address register
797  */
798 static void gen_mxu_s8std(DisasContext *ctx, bool postmodify)
799 {
800     TCGv t0, t1;
801     uint32_t XRa, Rb, s8, optn3;
802 
803     t0 = tcg_temp_new();
804     t1 = tcg_temp_new();
805 
806     XRa = extract32(ctx->opcode, 6, 4);
807     s8 = extract32(ctx->opcode, 10, 8);
808     optn3 = extract32(ctx->opcode, 18, 3);
809     Rb = extract32(ctx->opcode, 21, 5);
810 
811     if (optn3 > 3) {
812         /* reserved, do nothing */
813         return;
814     }
815 
816     gen_load_gpr(t0, Rb);
817     tcg_gen_addi_tl(t0, t0, (int8_t)s8);
818     if (postmodify) {
819         gen_store_gpr(t0, Rb);
820     }
821     gen_load_mxu_gpr(t1, XRa);
822 
823     switch (optn3) {
824     /* XRa[7:0] => tmp8 */
825     case MXU_OPTN3_PTN0:
826         tcg_gen_extract_tl(t1, t1, 0, 8);
827         break;
828     /* XRa[15:8] => tmp8 */
829     case MXU_OPTN3_PTN1:
830         tcg_gen_extract_tl(t1, t1, 8, 8);
831         break;
832     /* XRa[23:16] => tmp8 */
833     case MXU_OPTN3_PTN2:
834         tcg_gen_extract_tl(t1, t1, 16, 8);
835         break;
836     /* XRa[31:24] => tmp8 */
837     case MXU_OPTN3_PTN3:
838         tcg_gen_extract_tl(t1, t1, 24, 8);
839         break;
840     }
841 
842     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB);
843 }
844 
845 /*
846  * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF
847  *
848  * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF,
849  * post modify address register
850  */
851 static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify)
852 {
853     TCGv t0, t1;
854     uint32_t XRa, Rb, optn2;
855     int32_t s10;
856 
857     t0 = tcg_temp_new();
858     t1 = tcg_temp_new();
859 
860     XRa   = extract32(ctx->opcode,   6, 4);
861     s10   = sextract32(ctx->opcode, 10, 9) * 2;
862     optn2 = extract32(ctx->opcode,  19, 2);
863     Rb    = extract32(ctx->opcode,  21, 5);
864 
865     gen_load_gpr(t0, Rb);
866     tcg_gen_addi_tl(t0, t0, s10);
867     if (postmodify) {
868         gen_store_gpr(t0, Rb);
869     }
870 
871     switch (optn2) {
872     /* XRa[15:0] = tmp16 */
873     case MXU_OPTN2_PTN0:
874         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
875         gen_load_mxu_gpr(t0, XRa);
876         tcg_gen_deposit_tl(t0, t0, t1, 0, 16);
877         break;
878     /* XRa[31:16] = tmp16 */
879     case MXU_OPTN2_PTN1:
880         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
881         gen_load_mxu_gpr(t0, XRa);
882         tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
883         break;
884     /* XRa = sign_extend(tmp16) */
885     case MXU_OPTN2_PTN2:
886         tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW);
887         break;
888     /* XRa = {tmp16, tmp16} */
889     case MXU_OPTN2_PTN3:
890         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
891         tcg_gen_deposit_tl(t0, t1, t1,  0, 16);
892         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
893         break;
894     }
895 
896     gen_store_mxu_gpr(t0, XRa);
897 }
898 
899 /*
900  * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory
901  *
902  * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory,
903  * post modify address register
904  */
905 static void gen_mxu_s16std(DisasContext *ctx, bool postmodify)
906 {
907     TCGv t0, t1;
908     uint32_t XRa, Rb, optn2;
909     int32_t s10;
910 
911     t0 = tcg_temp_new();
912     t1 = tcg_temp_new();
913 
914     XRa = extract32(ctx->opcode, 6, 4);
915     s10 = sextract32(ctx->opcode, 10, 9) * 2;
916     optn2 = extract32(ctx->opcode, 19, 2);
917     Rb = extract32(ctx->opcode, 21, 5);
918 
919     if (optn2 > 1) {
920         /* reserved, do nothing */
921         return;
922     }
923 
924     gen_load_gpr(t0, Rb);
925     tcg_gen_addi_tl(t0, t0, s10);
926     if (postmodify) {
927         gen_store_gpr(t0, Rb);
928     }
929     gen_load_mxu_gpr(t1, XRa);
930 
931     switch (optn2) {
932     /* XRa[15:0] => tmp16 */
933     case MXU_OPTN2_PTN0:
934         tcg_gen_extract_tl(t1, t1, 0, 16);
935         break;
936     /* XRa[31:16] => tmp16 */
937     case MXU_OPTN2_PTN1:
938         tcg_gen_extract_tl(t1, t1, 16, 16);
939         break;
940     }
941 
942     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW);
943 }
944 
945 /*
946  * S32MUL  XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication
947  * of GPR's and stores result into pair of MXU registers.
948  * It strains HI and LO registers.
949  *
950  * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication
951  * of GPR's and stores result into pair of MXU registers.
952  * It strains HI and LO registers.
953  */
954 static void gen_mxu_s32mul(DisasContext *ctx, bool mulu)
955 {
956     TCGv t0, t1;
957     uint32_t XRa, XRd, rs, rt;
958 
959     t0 = tcg_temp_new();
960     t1 = tcg_temp_new();
961 
962     XRa = extract32(ctx->opcode,  6, 4);
963     XRd = extract32(ctx->opcode, 10, 4);
964     rs  = extract32(ctx->opcode, 16, 5);
965     rt  = extract32(ctx->opcode, 21, 5);
966 
967     if (unlikely(rs == 0 || rt == 0)) {
968         tcg_gen_movi_tl(t0, 0);
969         tcg_gen_movi_tl(t1, 0);
970     } else {
971         gen_load_gpr(t0, rs);
972         gen_load_gpr(t1, rt);
973 
974         if (mulu) {
975             tcg_gen_mulu2_tl(t0, t1, t0, t1);
976         } else {
977             tcg_gen_muls2_tl(t0, t1, t0, t1);
978         }
979     }
980     tcg_gen_mov_tl(cpu_HI[0], t1);
981     tcg_gen_mov_tl(cpu_LO[0], t0);
982     gen_store_mxu_gpr(t1, XRa);
983     gen_store_mxu_gpr(t0, XRd);
984 }
985 
986 /*
987  * D16MUL  XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
988  * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication
989  *   with rounding and packing result
990  * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern
991  *   multiplication with rounding
992  */
993 static void gen_mxu_d16mul(DisasContext *ctx, bool fractional,
994                            bool packed_result)
995 {
996     TCGv t0, t1, t2, t3;
997     uint32_t XRa, XRb, XRc, XRd, optn2;
998 
999     t0 = tcg_temp_new();
1000     t1 = tcg_temp_new();
1001     t2 = tcg_temp_new();
1002     t3 = tcg_temp_new();
1003 
1004     XRa = extract32(ctx->opcode, 6, 4);
1005     XRb = extract32(ctx->opcode, 10, 4);
1006     XRc = extract32(ctx->opcode, 14, 4);
1007     XRd = extract32(ctx->opcode, 18, 4);
1008     optn2 = extract32(ctx->opcode, 22, 2);
1009 
1010     /*
1011      * TODO: XRd field isn't used for D16MULF
1012      * There's no knowledge how this field affect
1013      * instruction decoding/behavior
1014      */
1015 
1016     gen_load_mxu_gpr(t1, XRb);
1017     tcg_gen_sextract_tl(t0, t1, 0, 16);
1018     tcg_gen_sextract_tl(t1, t1, 16, 16);
1019     gen_load_mxu_gpr(t3, XRc);
1020     tcg_gen_sextract_tl(t2, t3, 0, 16);
1021     tcg_gen_sextract_tl(t3, t3, 16, 16);
1022 
1023     switch (optn2) {
1024     case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1025         tcg_gen_mul_tl(t3, t1, t3);
1026         tcg_gen_mul_tl(t2, t0, t2);
1027         break;
1028     case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1029         tcg_gen_mul_tl(t3, t0, t3);
1030         tcg_gen_mul_tl(t2, t0, t2);
1031         break;
1032     case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1033         tcg_gen_mul_tl(t3, t1, t3);
1034         tcg_gen_mul_tl(t2, t1, t2);
1035         break;
1036     case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1037         tcg_gen_mul_tl(t3, t0, t3);
1038         tcg_gen_mul_tl(t2, t1, t2);
1039         break;
1040     }
1041     if (fractional) {
1042         TCGLabel *l_done = gen_new_label();
1043         TCGv rounding = tcg_temp_new();
1044 
1045         tcg_gen_shli_tl(t3, t3, 1);
1046         tcg_gen_shli_tl(t2, t2, 1);
1047         tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
1048         tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
1049         if (packed_result) {
1050             TCGLabel *l_apply_bias_l = gen_new_label();
1051             TCGLabel *l_apply_bias_r = gen_new_label();
1052             TCGLabel *l_half_done = gen_new_label();
1053             TCGv bias = tcg_temp_new();
1054 
1055             /*
1056              * D16MULF supports unbiased rounding aka "bankers rounding",
1057              * "round to even", "convergent rounding"
1058              */
1059             tcg_gen_andi_tl(bias, mxu_CR, 0x4);
1060             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
1061             tcg_gen_andi_tl(t0, t3, 0x1ffff);
1062             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
1063             gen_set_label(l_apply_bias_l);
1064             tcg_gen_addi_tl(t3, t3, 0x8000);
1065             gen_set_label(l_half_done);
1066             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
1067             tcg_gen_andi_tl(t0, t2, 0x1ffff);
1068             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
1069             gen_set_label(l_apply_bias_r);
1070             tcg_gen_addi_tl(t2, t2, 0x8000);
1071         } else {
1072             /* D16MULE doesn't support unbiased rounding */
1073             tcg_gen_addi_tl(t3, t3, 0x8000);
1074             tcg_gen_addi_tl(t2, t2, 0x8000);
1075         }
1076         gen_set_label(l_done);
1077     }
1078     if (!packed_result) {
1079         gen_store_mxu_gpr(t3, XRa);
1080         gen_store_mxu_gpr(t2, XRd);
1081     } else {
1082         tcg_gen_andi_tl(t3, t3, 0xffff0000);
1083         tcg_gen_shri_tl(t2, t2, 16);
1084         tcg_gen_or_tl(t3, t3, t2);
1085         gen_store_mxu_gpr(t3, XRa);
1086     }
1087 }
1088 
1089 /*
1090  * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
1091  *   Signed 16 bit pattern multiply and accumulate
1092  * D16MACF XRa, XRb, XRc, aptn2, optn2
1093  *   Signed Q15 fraction pattern multiply accumulate and pack
1094  * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
1095  *   Signed Q15 fraction pattern multiply and accumulate
1096  */
1097 static void gen_mxu_d16mac(DisasContext *ctx, bool fractional,
1098                            bool packed_result)
1099 {
1100     TCGv t0, t1, t2, t3;
1101     uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
1102 
1103     t0 = tcg_temp_new();
1104     t1 = tcg_temp_new();
1105     t2 = tcg_temp_new();
1106     t3 = tcg_temp_new();
1107 
1108     XRa = extract32(ctx->opcode, 6, 4);
1109     XRb = extract32(ctx->opcode, 10, 4);
1110     XRc = extract32(ctx->opcode, 14, 4);
1111     XRd = extract32(ctx->opcode, 18, 4);
1112     optn2 = extract32(ctx->opcode, 22, 2);
1113     aptn2 = extract32(ctx->opcode, 24, 2);
1114 
1115     gen_load_mxu_gpr(t1, XRb);
1116     tcg_gen_sextract_tl(t0, t1, 0, 16);
1117     tcg_gen_sextract_tl(t1, t1, 16, 16);
1118 
1119     gen_load_mxu_gpr(t3, XRc);
1120     tcg_gen_sextract_tl(t2, t3, 0, 16);
1121     tcg_gen_sextract_tl(t3, t3, 16, 16);
1122 
1123     switch (optn2) {
1124     case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1125         tcg_gen_mul_tl(t3, t1, t3);
1126         tcg_gen_mul_tl(t2, t0, t2);
1127         break;
1128     case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1129         tcg_gen_mul_tl(t3, t0, t3);
1130         tcg_gen_mul_tl(t2, t0, t2);
1131         break;
1132     case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1133         tcg_gen_mul_tl(t3, t1, t3);
1134         tcg_gen_mul_tl(t2, t1, t2);
1135         break;
1136     case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1137         tcg_gen_mul_tl(t3, t0, t3);
1138         tcg_gen_mul_tl(t2, t1, t2);
1139         break;
1140     }
1141 
1142     if (fractional) {
1143         tcg_gen_shli_tl(t3, t3, 1);
1144         tcg_gen_shli_tl(t2, t2, 1);
1145     }
1146     gen_load_mxu_gpr(t0, XRa);
1147     gen_load_mxu_gpr(t1, XRd);
1148 
1149     switch (aptn2) {
1150     case MXU_APTN2_AA:
1151         tcg_gen_add_tl(t3, t0, t3);
1152         tcg_gen_add_tl(t2, t1, t2);
1153         break;
1154     case MXU_APTN2_AS:
1155         tcg_gen_add_tl(t3, t0, t3);
1156         tcg_gen_sub_tl(t2, t1, t2);
1157         break;
1158     case MXU_APTN2_SA:
1159         tcg_gen_sub_tl(t3, t0, t3);
1160         tcg_gen_add_tl(t2, t1, t2);
1161         break;
1162     case MXU_APTN2_SS:
1163         tcg_gen_sub_tl(t3, t0, t3);
1164         tcg_gen_sub_tl(t2, t1, t2);
1165         break;
1166     }
1167 
1168     if (fractional) {
1169         TCGLabel *l_done = gen_new_label();
1170         TCGv rounding = tcg_temp_new();
1171 
1172         tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
1173         tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
1174         if (packed_result) {
1175             TCGLabel *l_apply_bias_l = gen_new_label();
1176             TCGLabel *l_apply_bias_r = gen_new_label();
1177             TCGLabel *l_half_done = gen_new_label();
1178             TCGv bias = tcg_temp_new();
1179 
1180             /*
1181              * D16MACF supports unbiased rounding aka "bankers rounding",
1182              * "round to even", "convergent rounding"
1183              */
1184             tcg_gen_andi_tl(bias, mxu_CR, 0x4);
1185             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
1186             tcg_gen_andi_tl(t0, t3, 0x1ffff);
1187             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
1188             gen_set_label(l_apply_bias_l);
1189             tcg_gen_addi_tl(t3, t3, 0x8000);
1190             gen_set_label(l_half_done);
1191             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
1192             tcg_gen_andi_tl(t0, t2, 0x1ffff);
1193             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
1194             gen_set_label(l_apply_bias_r);
1195             tcg_gen_addi_tl(t2, t2, 0x8000);
1196         } else {
1197             /* D16MACE doesn't support unbiased rounding */
1198             tcg_gen_addi_tl(t3, t3, 0x8000);
1199             tcg_gen_addi_tl(t2, t2, 0x8000);
1200         }
1201         gen_set_label(l_done);
1202     }
1203 
1204     if (!packed_result) {
1205         gen_store_mxu_gpr(t3, XRa);
1206         gen_store_mxu_gpr(t2, XRd);
1207     } else {
1208         tcg_gen_andi_tl(t3, t3, 0xffff0000);
1209         tcg_gen_shri_tl(t2, t2, 16);
1210         tcg_gen_or_tl(t3, t3, t2);
1211         gen_store_mxu_gpr(t3, XRa);
1212     }
1213 }
1214 
1215 /*
1216  * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed
1217  * unsigned 16 bit pattern multiply and add/subtract.
1218  */
1219 static void gen_mxu_d16madl(DisasContext *ctx)
1220 {
1221     TCGv t0, t1, t2, t3;
1222     uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
1223 
1224     t0 = tcg_temp_new();
1225     t1 = tcg_temp_new();
1226     t2 = tcg_temp_new();
1227     t3 = tcg_temp_new();
1228 
1229     XRa = extract32(ctx->opcode, 6, 4);
1230     XRb = extract32(ctx->opcode, 10, 4);
1231     XRc = extract32(ctx->opcode, 14, 4);
1232     XRd = extract32(ctx->opcode, 18, 4);
1233     optn2 = extract32(ctx->opcode, 22, 2);
1234     aptn2 = extract32(ctx->opcode, 24, 2);
1235 
1236     gen_load_mxu_gpr(t1, XRb);
1237     tcg_gen_sextract_tl(t0, t1,  0, 16);
1238     tcg_gen_sextract_tl(t1, t1, 16, 16);
1239 
1240     gen_load_mxu_gpr(t3, XRc);
1241     tcg_gen_sextract_tl(t2, t3,  0, 16);
1242     tcg_gen_sextract_tl(t3, t3, 16, 16);
1243 
1244     switch (optn2) {
1245     case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1246         tcg_gen_mul_tl(t3, t1, t3);
1247         tcg_gen_mul_tl(t2, t0, t2);
1248         break;
1249     case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1250         tcg_gen_mul_tl(t3, t0, t3);
1251         tcg_gen_mul_tl(t2, t0, t2);
1252         break;
1253     case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1254         tcg_gen_mul_tl(t3, t1, t3);
1255         tcg_gen_mul_tl(t2, t1, t2);
1256         break;
1257     case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1258         tcg_gen_mul_tl(t3, t0, t3);
1259         tcg_gen_mul_tl(t2, t1, t2);
1260         break;
1261     }
1262     tcg_gen_extract_tl(t2, t2, 0, 16);
1263     tcg_gen_extract_tl(t3, t3, 0, 16);
1264 
1265     gen_load_mxu_gpr(t1, XRa);
1266     tcg_gen_extract_tl(t0, t1,  0, 16);
1267     tcg_gen_extract_tl(t1, t1, 16, 16);
1268 
1269     switch (aptn2) {
1270     case MXU_APTN2_AA:
1271         tcg_gen_add_tl(t3, t1, t3);
1272         tcg_gen_add_tl(t2, t0, t2);
1273         break;
1274     case MXU_APTN2_AS:
1275         tcg_gen_add_tl(t3, t1, t3);
1276         tcg_gen_sub_tl(t2, t0, t2);
1277         break;
1278     case MXU_APTN2_SA:
1279         tcg_gen_sub_tl(t3, t1, t3);
1280         tcg_gen_add_tl(t2, t0, t2);
1281         break;
1282     case MXU_APTN2_SS:
1283         tcg_gen_sub_tl(t3, t1, t3);
1284         tcg_gen_sub_tl(t2, t0, t2);
1285         break;
1286     }
1287 
1288     tcg_gen_andi_tl(t2, t2, 0xffff);
1289     tcg_gen_shli_tl(t3, t3, 16);
1290     tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2);
1291 }
1292 
1293 /*
1294  * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed
1295  * signed 16 bit pattern multiply and 32-bit add/subtract.
1296  */
1297 static void gen_mxu_s16mad(DisasContext *ctx)
1298 {
1299     TCGv t0, t1;
1300     uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad;
1301 
1302     t0 = tcg_temp_new();
1303     t1 = tcg_temp_new();
1304 
1305     XRa = extract32(ctx->opcode, 6, 4);
1306     XRb = extract32(ctx->opcode, 10, 4);
1307     XRc = extract32(ctx->opcode, 14, 4);
1308     XRd = extract32(ctx->opcode, 18, 4);
1309     optn2 = extract32(ctx->opcode, 22, 2);
1310     aptn1 = extract32(ctx->opcode, 24, 1);
1311     pad = extract32(ctx->opcode, 25, 1);
1312 
1313     if (pad) {
1314         /* FIXME check if it influence the result */
1315     }
1316 
1317     gen_load_mxu_gpr(t0, XRb);
1318     gen_load_mxu_gpr(t1, XRc);
1319 
1320     switch (optn2) {
1321     case MXU_OPTN2_WW: /* XRB.H*XRC.H */
1322         tcg_gen_sextract_tl(t0, t0, 16, 16);
1323         tcg_gen_sextract_tl(t1, t1, 16, 16);
1324         break;
1325     case MXU_OPTN2_LW: /* XRB.L*XRC.L */
1326         tcg_gen_sextract_tl(t0, t0,  0, 16);
1327         tcg_gen_sextract_tl(t1, t1,  0, 16);
1328         break;
1329     case MXU_OPTN2_HW: /* XRB.H*XRC.L */
1330         tcg_gen_sextract_tl(t0, t0, 16, 16);
1331         tcg_gen_sextract_tl(t1, t1,  0, 16);
1332         break;
1333     case MXU_OPTN2_XW: /* XRB.L*XRC.H */
1334         tcg_gen_sextract_tl(t0, t0,  0, 16);
1335         tcg_gen_sextract_tl(t1, t1, 16, 16);
1336         break;
1337     }
1338     tcg_gen_mul_tl(t0, t0, t1);
1339 
1340     gen_load_mxu_gpr(t1, XRa);
1341 
1342     switch (aptn1) {
1343     case MXU_APTN1_A:
1344         tcg_gen_add_tl(t1, t1, t0);
1345         break;
1346     case MXU_APTN1_S:
1347         tcg_gen_sub_tl(t1, t1, t0);
1348         break;
1349     }
1350 
1351     gen_store_mxu_gpr(t1, XRd);
1352 }
1353 
1354 /*
1355  * Q8MUL   XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1356  * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1357  * Q8MAC   XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1358  *   and accumulate
1359  * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1360  *   and accumulate
1361  */
1362 static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac)
1363 {
1364     TCGv t0, t1, t2, t3, t4, t5, t6, t7;
1365     uint32_t XRa, XRb, XRc, XRd, aptn2;
1366 
1367     t0 = tcg_temp_new();
1368     t1 = tcg_temp_new();
1369     t2 = tcg_temp_new();
1370     t3 = tcg_temp_new();
1371     t4 = tcg_temp_new();
1372     t5 = tcg_temp_new();
1373     t6 = tcg_temp_new();
1374     t7 = tcg_temp_new();
1375 
1376     XRa = extract32(ctx->opcode, 6, 4);
1377     XRb = extract32(ctx->opcode, 10, 4);
1378     XRc = extract32(ctx->opcode, 14, 4);
1379     XRd = extract32(ctx->opcode, 18, 4);
1380     aptn2 = extract32(ctx->opcode, 24, 2);
1381 
1382     gen_load_mxu_gpr(t3, XRb);
1383     gen_load_mxu_gpr(t7, XRc);
1384 
1385     if (su) {
1386         /* Q8MULSU / Q8MACSU */
1387         tcg_gen_sextract_tl(t0, t3,  0, 8);
1388         tcg_gen_sextract_tl(t1, t3,  8, 8);
1389         tcg_gen_sextract_tl(t2, t3, 16, 8);
1390         tcg_gen_sextract_tl(t3, t3, 24, 8);
1391     } else {
1392         /* Q8MUL / Q8MAC */
1393         tcg_gen_extract_tl(t0, t3,  0, 8);
1394         tcg_gen_extract_tl(t1, t3,  8, 8);
1395         tcg_gen_extract_tl(t2, t3, 16, 8);
1396         tcg_gen_extract_tl(t3, t3, 24, 8);
1397     }
1398 
1399     tcg_gen_extract_tl(t4, t7,  0, 8);
1400     tcg_gen_extract_tl(t5, t7,  8, 8);
1401     tcg_gen_extract_tl(t6, t7, 16, 8);
1402     tcg_gen_extract_tl(t7, t7, 24, 8);
1403 
1404     tcg_gen_mul_tl(t0, t0, t4);
1405     tcg_gen_mul_tl(t1, t1, t5);
1406     tcg_gen_mul_tl(t2, t2, t6);
1407     tcg_gen_mul_tl(t3, t3, t7);
1408 
1409     if (mac) {
1410         gen_load_mxu_gpr(t4, XRd);
1411         gen_load_mxu_gpr(t5, XRa);
1412         tcg_gen_extract_tl(t6, t4,  0, 16);
1413         tcg_gen_extract_tl(t7, t4, 16, 16);
1414         if (aptn2 & 1) {
1415             tcg_gen_sub_tl(t0, t6, t0);
1416             tcg_gen_sub_tl(t1, t7, t1);
1417         } else {
1418             tcg_gen_add_tl(t0, t6, t0);
1419             tcg_gen_add_tl(t1, t7, t1);
1420         }
1421         tcg_gen_extract_tl(t6, t5,  0, 16);
1422         tcg_gen_extract_tl(t7, t5, 16, 16);
1423         if (aptn2 & 2) {
1424             tcg_gen_sub_tl(t2, t6, t2);
1425             tcg_gen_sub_tl(t3, t7, t3);
1426         } else {
1427             tcg_gen_add_tl(t2, t6, t2);
1428             tcg_gen_add_tl(t3, t7, t3);
1429         }
1430     }
1431 
1432     tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
1433     tcg_gen_deposit_tl(t1, t2, t3, 16, 16);
1434 
1435     gen_store_mxu_gpr(t0, XRd);
1436     gen_store_mxu_gpr(t1, XRa);
1437 }
1438 
1439 /*
1440  * Q8MADL  XRd, XRa, XRb, XRc
1441  *   Parallel quad unsigned 8 bit multiply and accumulate.
1442  *   e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3]
1443  */
1444 static void gen_mxu_q8madl(DisasContext *ctx)
1445 {
1446     TCGv t0, t1, t2, t3, t4, t5, t6, t7;
1447     uint32_t XRa, XRb, XRc, XRd, aptn2;
1448 
1449     t0 = tcg_temp_new();
1450     t1 = tcg_temp_new();
1451     t2 = tcg_temp_new();
1452     t3 = tcg_temp_new();
1453     t4 = tcg_temp_new();
1454     t5 = tcg_temp_new();
1455     t6 = tcg_temp_new();
1456     t7 = tcg_temp_new();
1457 
1458     XRa = extract32(ctx->opcode, 6, 4);
1459     XRb = extract32(ctx->opcode, 10, 4);
1460     XRc = extract32(ctx->opcode, 14, 4);
1461     XRd = extract32(ctx->opcode, 18, 4);
1462     aptn2 = extract32(ctx->opcode, 24, 2);
1463 
1464     gen_load_mxu_gpr(t3, XRb);
1465     gen_load_mxu_gpr(t7, XRc);
1466 
1467     tcg_gen_extract_tl(t0, t3,  0, 8);
1468     tcg_gen_extract_tl(t1, t3,  8, 8);
1469     tcg_gen_extract_tl(t2, t3, 16, 8);
1470     tcg_gen_extract_tl(t3, t3, 24, 8);
1471 
1472     tcg_gen_extract_tl(t4, t7,  0, 8);
1473     tcg_gen_extract_tl(t5, t7,  8, 8);
1474     tcg_gen_extract_tl(t6, t7, 16, 8);
1475     tcg_gen_extract_tl(t7, t7, 24, 8);
1476 
1477     tcg_gen_mul_tl(t0, t0, t4);
1478     tcg_gen_mul_tl(t1, t1, t5);
1479     tcg_gen_mul_tl(t2, t2, t6);
1480     tcg_gen_mul_tl(t3, t3, t7);
1481 
1482     gen_load_mxu_gpr(t4, XRa);
1483     tcg_gen_extract_tl(t6, t4, 0, 8);
1484     tcg_gen_extract_tl(t7, t4, 8, 8);
1485     if (aptn2 & 1) {
1486         tcg_gen_sub_tl(t0, t6, t0);
1487         tcg_gen_sub_tl(t1, t7, t1);
1488     } else {
1489         tcg_gen_add_tl(t0, t6, t0);
1490         tcg_gen_add_tl(t1, t7, t1);
1491     }
1492     tcg_gen_extract_tl(t6, t4, 16, 8);
1493     tcg_gen_extract_tl(t7, t4, 24, 8);
1494     if (aptn2 & 2) {
1495         tcg_gen_sub_tl(t2, t6, t2);
1496         tcg_gen_sub_tl(t3, t7, t3);
1497     } else {
1498         tcg_gen_add_tl(t2, t6, t2);
1499         tcg_gen_add_tl(t3, t7, t3);
1500     }
1501 
1502     tcg_gen_andi_tl(t5, t0, 0xff);
1503     tcg_gen_deposit_tl(t5, t5, t1,  8, 8);
1504     tcg_gen_deposit_tl(t5, t5, t2, 16, 8);
1505     tcg_gen_deposit_tl(t5, t5, t3, 24, 8);
1506 
1507     gen_store_mxu_gpr(t5, XRd);
1508 }
1509 
1510 /*
1511  * S32LDD  XRa, Rb, S12 - Load a word from memory to XRF
1512  * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF
1513  *   in reversed byte seq.
1514  * S32LDI  XRa, Rb, S12 - Load a word from memory to XRF,
1515  *   post modify base address GPR.
1516  * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF,
1517  *   post modify base address GPR and load in reversed byte seq.
1518  */
1519 static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc)
1520 {
1521     TCGv t0, t1;
1522     uint32_t XRa, Rb, s12;
1523 
1524     t0 = tcg_temp_new();
1525     t1 = tcg_temp_new();
1526 
1527     XRa = extract32(ctx->opcode, 6, 4);
1528     s12 = sextract32(ctx->opcode, 10, 10);
1529     Rb = extract32(ctx->opcode, 21, 5);
1530 
1531     gen_load_gpr(t0, Rb);
1532     tcg_gen_movi_tl(t1, s12 * 4);
1533     tcg_gen_add_tl(t0, t0, t1);
1534 
1535     tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
1536                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1537                         ctx->default_tcg_memop_mask);
1538     gen_store_mxu_gpr(t1, XRa);
1539 
1540     if (postinc) {
1541         gen_store_gpr(t0, Rb);
1542     }
1543 }
1544 
1545 /*
1546  * S32STD  XRa, Rb, S12 - Store a word from XRF to memory
1547  * S32STDR XRa, Rb, S12 - Store a word from XRF to memory
1548  *   in reversed byte seq.
1549  * S32SDI  XRa, Rb, S12 - Store a word from XRF to memory,
1550  *   post modify base address GPR.
1551  * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory,
1552  *   post modify base address GPR and store in reversed byte seq.
1553  */
1554 static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc)
1555 {
1556     TCGv t0, t1;
1557     uint32_t XRa, Rb, s12;
1558 
1559     t0 = tcg_temp_new();
1560     t1 = tcg_temp_new();
1561 
1562     XRa = extract32(ctx->opcode, 6, 4);
1563     s12 = sextract32(ctx->opcode, 10, 10);
1564     Rb = extract32(ctx->opcode, 21, 5);
1565 
1566     gen_load_gpr(t0, Rb);
1567     tcg_gen_movi_tl(t1, s12 * 4);
1568     tcg_gen_add_tl(t0, t0, t1);
1569 
1570     gen_load_mxu_gpr(t1, XRa);
1571     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
1572                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1573                         ctx->default_tcg_memop_mask);
1574 
1575     if (postinc) {
1576         gen_store_gpr(t0, Rb);
1577     }
1578 }
1579 
1580 /*
1581  * S32LDDV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1582  * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1583  *   in reversed byte seq.
1584  * S32LDIV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1585  *   post modify base address GPR.
1586  * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1587  *   post modify base address GPR and load in reversed byte seq.
1588  */
1589 static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed,
1590                              bool postinc, uint32_t strd2)
1591 {
1592     TCGv t0, t1;
1593     uint32_t XRa, Rb, Rc;
1594 
1595     t0 = tcg_temp_new();
1596     t1 = tcg_temp_new();
1597 
1598     XRa = extract32(ctx->opcode, 6, 4);
1599     Rc = extract32(ctx->opcode, 16, 5);
1600     Rb = extract32(ctx->opcode, 21, 5);
1601 
1602     gen_load_gpr(t0, Rb);
1603     gen_load_gpr(t1, Rc);
1604     tcg_gen_shli_tl(t1, t1, strd2);
1605     tcg_gen_add_tl(t0, t0, t1);
1606 
1607     tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
1608                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1609                         ctx->default_tcg_memop_mask);
1610     gen_store_mxu_gpr(t1, XRa);
1611 
1612     if (postinc) {
1613         gen_store_gpr(t0, Rb);
1614     }
1615 }
1616 
1617 /*
1618  * LXW  Ra, Rb, Rc, STRD2 - Load a word from memory to GPR
1619  * LXB  Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1620  *   sign extending to GPR size.
1621  * LXH  Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1622  *   sign extending to GPR size.
1623  * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1624  *   zero extending to GPR size.
1625  * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1626  *   zero extending to GPR size.
1627  */
1628 static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop)
1629 {
1630     TCGv t0, t1;
1631     uint32_t Ra, Rb, Rc;
1632 
1633     t0 = tcg_temp_new();
1634     t1 = tcg_temp_new();
1635 
1636     Ra = extract32(ctx->opcode, 11, 5);
1637     Rc = extract32(ctx->opcode, 16, 5);
1638     Rb = extract32(ctx->opcode, 21, 5);
1639 
1640     gen_load_gpr(t0, Rb);
1641     gen_load_gpr(t1, Rc);
1642     tcg_gen_shli_tl(t1, t1, strd2);
1643     tcg_gen_add_tl(t0, t0, t1);
1644 
1645     tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask);
1646     gen_store_gpr(t1, Ra);
1647 }
1648 
1649 /*
1650  * S32STDV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1651  * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1652  *   in reversed byte seq.
1653  * S32SDIV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1654  *   post modify base address GPR.
1655  * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1656  *   post modify base address GPR and store in reversed byte seq.
1657  */
1658 static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed,
1659                              bool postinc, uint32_t strd2)
1660 {
1661     TCGv t0, t1;
1662     uint32_t XRa, Rb, Rc;
1663 
1664     t0 = tcg_temp_new();
1665     t1 = tcg_temp_new();
1666 
1667     XRa = extract32(ctx->opcode, 6, 4);
1668     Rc = extract32(ctx->opcode, 16, 5);
1669     Rb = extract32(ctx->opcode, 21, 5);
1670 
1671     gen_load_gpr(t0, Rb);
1672     gen_load_gpr(t1, Rc);
1673     tcg_gen_shli_tl(t1, t1, strd2);
1674     tcg_gen_add_tl(t0, t0, t1);
1675 
1676     gen_load_mxu_gpr(t1, XRa);
1677     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
1678                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1679                         ctx->default_tcg_memop_mask);
1680 
1681     if (postinc) {
1682         gen_store_gpr(t0, Rb);
1683     }
1684 }
1685 
1686 /*
1687  *                 MXU instruction category: logic
1688  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1689  *
1690  *               S32NOR    S32AND    S32OR    S32XOR
1691  */
1692 
1693 /*
1694  *  S32NOR XRa, XRb, XRc
1695  *    Update XRa with the result of logical bitwise 'nor' operation
1696  *    applied to the content of XRb and XRc.
1697  */
1698 static void gen_mxu_S32NOR(DisasContext *ctx)
1699 {
1700     uint32_t pad, XRc, XRb, XRa;
1701 
1702     pad = extract32(ctx->opcode, 21, 5);
1703     XRc = extract32(ctx->opcode, 14, 4);
1704     XRb = extract32(ctx->opcode, 10, 4);
1705     XRa = extract32(ctx->opcode,  6, 4);
1706 
1707     if (unlikely(pad != 0)) {
1708         /* opcode padding incorrect -> do nothing */
1709     } else if (unlikely(XRa == 0)) {
1710         /* destination is zero register -> do nothing */
1711     } else if (unlikely((XRb == 0) && (XRc == 0))) {
1712         /* both operands zero registers -> just set destination to all 1s */
1713         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF);
1714     } else if (unlikely(XRb == 0)) {
1715         /* XRb zero register -> just set destination to the negation of XRc */
1716         tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1717     } else if (unlikely(XRc == 0)) {
1718         /* XRa zero register -> just set destination to the negation of XRb */
1719         tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1720     } else if (unlikely(XRb == XRc)) {
1721         /* both operands same -> just set destination to the negation of XRb */
1722         tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1723     } else {
1724         /* the most general case */
1725         tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1726     }
1727 }
1728 
1729 /*
1730  *  S32AND XRa, XRb, XRc
1731  *    Update XRa with the result of logical bitwise 'and' operation
1732  *    applied to the content of XRb and XRc.
1733  */
1734 static void gen_mxu_S32AND(DisasContext *ctx)
1735 {
1736     uint32_t pad, XRc, XRb, XRa;
1737 
1738     pad = extract32(ctx->opcode, 21, 5);
1739     XRc = extract32(ctx->opcode, 14, 4);
1740     XRb = extract32(ctx->opcode, 10, 4);
1741     XRa = extract32(ctx->opcode,  6, 4);
1742 
1743     if (unlikely(pad != 0)) {
1744         /* opcode padding incorrect -> do nothing */
1745     } else if (unlikely(XRa == 0)) {
1746         /* destination is zero register -> do nothing */
1747     } else if (unlikely((XRb == 0) || (XRc == 0))) {
1748         /* one of operands zero register -> just set destination to all 0s */
1749         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1750     } else if (unlikely(XRb == XRc)) {
1751         /* both operands same -> just set destination to one of them */
1752         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1753     } else {
1754         /* the most general case */
1755         tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1756     }
1757 }
1758 
1759 /*
1760  *  S32OR XRa, XRb, XRc
1761  *    Update XRa with the result of logical bitwise 'or' operation
1762  *    applied to the content of XRb and XRc.
1763  */
1764 static void gen_mxu_S32OR(DisasContext *ctx)
1765 {
1766     uint32_t pad, XRc, XRb, XRa;
1767 
1768     pad = extract32(ctx->opcode, 21, 5);
1769     XRc = extract32(ctx->opcode, 14, 4);
1770     XRb = extract32(ctx->opcode, 10, 4);
1771     XRa = extract32(ctx->opcode,  6, 4);
1772 
1773     if (unlikely(pad != 0)) {
1774         /* opcode padding incorrect -> do nothing */
1775     } else if (unlikely(XRa == 0)) {
1776         /* destination is zero register -> do nothing */
1777     } else if (unlikely((XRb == 0) && (XRc == 0))) {
1778         /* both operands zero registers -> just set destination to all 0s */
1779         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1780     } else if (unlikely(XRb == 0)) {
1781         /* XRb zero register -> just set destination to the content of XRc */
1782         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1783     } else if (unlikely(XRc == 0)) {
1784         /* XRc zero register -> just set destination to the content of XRb */
1785         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1786     } else if (unlikely(XRb == XRc)) {
1787         /* both operands same -> just set destination to one of them */
1788         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1789     } else {
1790         /* the most general case */
1791         tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1792     }
1793 }
1794 
1795 /*
1796  *  S32XOR XRa, XRb, XRc
1797  *    Update XRa with the result of logical bitwise 'xor' operation
1798  *    applied to the content of XRb and XRc.
1799  */
1800 static void gen_mxu_S32XOR(DisasContext *ctx)
1801 {
1802     uint32_t pad, XRc, XRb, XRa;
1803 
1804     pad = extract32(ctx->opcode, 21, 5);
1805     XRc = extract32(ctx->opcode, 14, 4);
1806     XRb = extract32(ctx->opcode, 10, 4);
1807     XRa = extract32(ctx->opcode,  6, 4);
1808 
1809     if (unlikely(pad != 0)) {
1810         /* opcode padding incorrect -> do nothing */
1811     } else if (unlikely(XRa == 0)) {
1812         /* destination is zero register -> do nothing */
1813     } else if (unlikely((XRb == 0) && (XRc == 0))) {
1814         /* both operands zero registers -> just set destination to all 0s */
1815         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1816     } else if (unlikely(XRb == 0)) {
1817         /* XRb zero register -> just set destination to the content of XRc */
1818         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1819     } else if (unlikely(XRc == 0)) {
1820         /* XRc zero register -> just set destination to the content of XRb */
1821         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1822     } else if (unlikely(XRb == XRc)) {
1823         /* both operands same -> just set destination to all 0s */
1824         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1825     } else {
1826         /* the most general case */
1827         tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1828     }
1829 }
1830 
1831 /*
1832  *                 MXU instruction category: shift
1833  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1834  *
1835  *               D32SLL    D32SLR    D32SAR    D32SARL
1836  *               D32SLLV   D32SLRV   D32SARV   D32SARW
1837  *               Q16SLL    Q16SLR    Q16SAR
1838  *               Q16SLLV   Q16SLRV   Q16SARV
1839  */
1840 
1841 /*
1842  *  D32SLL XRa, XRd, XRb, XRc, SFT4
1843  *    Dual 32-bit shift left from XRb and XRc to SFT4
1844  *    bits (0..15). Store to XRa and XRd respectively.
1845  *  D32SLR XRa, XRd, XRb, XRc, SFT4
1846  *    Dual 32-bit shift logic right from XRb and XRc
1847  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1848  *  D32SAR XRa, XRd, XRb, XRc, SFT4
1849  *    Dual 32-bit shift arithmetic right from XRb and XRc
1850  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1851  */
1852 static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic)
1853 {
1854     uint32_t XRa, XRb, XRc, XRd, sft4;
1855 
1856     XRa  = extract32(ctx->opcode,  6, 4);
1857     XRb  = extract32(ctx->opcode, 10, 4);
1858     XRc  = extract32(ctx->opcode, 14, 4);
1859     XRd  = extract32(ctx->opcode, 18, 4);
1860     sft4 = extract32(ctx->opcode, 22, 4);
1861 
1862     TCGv t0 = tcg_temp_new();
1863     TCGv t1 = tcg_temp_new();
1864 
1865     gen_load_mxu_gpr(t0, XRb);
1866     gen_load_mxu_gpr(t1, XRc);
1867 
1868     if (right) {
1869         if (arithmetic) {
1870             tcg_gen_sari_tl(t0, t0, sft4);
1871             tcg_gen_sari_tl(t1, t1, sft4);
1872         } else {
1873             tcg_gen_shri_tl(t0, t0, sft4);
1874             tcg_gen_shri_tl(t1, t1, sft4);
1875         }
1876     } else {
1877         tcg_gen_shli_tl(t0, t0, sft4);
1878         tcg_gen_shli_tl(t1, t1, sft4);
1879     }
1880     gen_store_mxu_gpr(t0, XRa);
1881     gen_store_mxu_gpr(t1, XRd);
1882 }
1883 
1884 /*
1885  *  D32SLLV XRa, XRd, rs
1886  *    Dual 32-bit shift left from XRa and XRd to rs[3:0]
1887  *    bits. Store back to XRa and XRd respectively.
1888  *  D32SLRV XRa, XRd, rs
1889  *    Dual 32-bit shift logic right from XRa and XRd to rs[3:0]
1890  *    bits. Store back to XRa and XRd respectively.
1891  *  D32SARV XRa, XRd, rs
1892  *    Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0]
1893  *    bits. Store back to XRa and XRd respectively.
1894  */
1895 static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic)
1896 {
1897     uint32_t XRa, XRd, rs;
1898 
1899     XRa = extract32(ctx->opcode, 10, 4);
1900     XRd = extract32(ctx->opcode, 14, 4);
1901     rs  = extract32(ctx->opcode, 21, 5);
1902 
1903     TCGv t0 = tcg_temp_new();
1904     TCGv t1 = tcg_temp_new();
1905     TCGv t2 = tcg_temp_new();
1906 
1907     gen_load_mxu_gpr(t0, XRa);
1908     gen_load_mxu_gpr(t1, XRd);
1909     gen_load_gpr(t2, rs);
1910     tcg_gen_andi_tl(t2, t2, 0x0f);
1911 
1912     if (right) {
1913         if (arithmetic) {
1914             tcg_gen_sar_tl(t0, t0, t2);
1915             tcg_gen_sar_tl(t1, t1, t2);
1916         } else {
1917             tcg_gen_shr_tl(t0, t0, t2);
1918             tcg_gen_shr_tl(t1, t1, t2);
1919         }
1920     } else {
1921         tcg_gen_shl_tl(t0, t0, t2);
1922         tcg_gen_shl_tl(t1, t1, t2);
1923     }
1924     gen_store_mxu_gpr(t0, XRa);
1925     gen_store_mxu_gpr(t1, XRd);
1926 }
1927 
1928 /*
1929  *  D32SARL XRa, XRb, XRc, SFT4
1930  *    Dual shift arithmetic right 32-bit integers in XRb and XRc
1931  *    to SFT4 bits (0..15). Pack 16 LSBs of each into XRa.
1932  *
1933  *  D32SARW XRa, XRb, XRc, rb
1934  *    Dual shift arithmetic right 32-bit integers in XRb and XRc
1935  *    to rb[3:0] bits. Pack 16 LSBs of each into XRa.
1936  */
1937 static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw)
1938 {
1939     uint32_t XRa, XRb, XRc, rb;
1940 
1941     XRa = extract32(ctx->opcode,  6, 4);
1942     XRb = extract32(ctx->opcode, 10, 4);
1943     XRc = extract32(ctx->opcode, 14, 4);
1944     rb  = extract32(ctx->opcode, 21, 5);
1945 
1946     if (unlikely(XRa == 0)) {
1947         /* destination is zero register -> do nothing */
1948     } else {
1949         TCGv t0 = tcg_temp_new();
1950         TCGv t1 = tcg_temp_new();
1951         TCGv t2 = tcg_temp_new();
1952 
1953         if (!sarw) {
1954             /* Make SFT4 from rb field */
1955             tcg_gen_movi_tl(t2, rb >> 1);
1956         } else {
1957             gen_load_gpr(t2, rb);
1958             tcg_gen_andi_tl(t2, t2, 0x0f);
1959         }
1960         gen_load_mxu_gpr(t0, XRb);
1961         gen_load_mxu_gpr(t1, XRc);
1962         tcg_gen_sar_tl(t0, t0, t2);
1963         tcg_gen_sar_tl(t1, t1, t2);
1964         tcg_gen_extract_tl(t2, t1, 0, 16);
1965         tcg_gen_deposit_tl(t2, t2, t0, 16, 16);
1966         gen_store_mxu_gpr(t2, XRa);
1967     }
1968 }
1969 
1970 /*
1971  *  Q16SLL XRa, XRd, XRb, XRc, SFT4
1972  *    Quad 16-bit shift left from XRb and XRc to SFT4
1973  *    bits (0..15). Store to XRa and XRd respectively.
1974  *  Q16SLR XRa, XRd, XRb, XRc, SFT4
1975  *    Quad 16-bit shift logic right from XRb and XRc
1976  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1977  *  Q16SAR XRa, XRd, XRb, XRc, SFT4
1978  *    Quad 16-bit shift arithmetic right from XRb and XRc
1979  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1980  */
1981 static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic)
1982 {
1983     uint32_t XRa, XRb, XRc, XRd, sft4;
1984 
1985     XRa  = extract32(ctx->opcode,  6, 4);
1986     XRb  = extract32(ctx->opcode, 10, 4);
1987     XRc  = extract32(ctx->opcode, 14, 4);
1988     XRd  = extract32(ctx->opcode, 18, 4);
1989     sft4 = extract32(ctx->opcode, 22, 4);
1990 
1991     TCGv t0 = tcg_temp_new();
1992     TCGv t1 = tcg_temp_new();
1993     TCGv t2 = tcg_temp_new();
1994     TCGv t3 = tcg_temp_new();
1995 
1996     gen_load_mxu_gpr(t0, XRb);
1997     gen_load_mxu_gpr(t2, XRc);
1998 
1999     if (arithmetic) {
2000         tcg_gen_sextract_tl(t1, t0, 16, 16);
2001         tcg_gen_sextract_tl(t0, t0,  0, 16);
2002         tcg_gen_sextract_tl(t3, t2, 16, 16);
2003         tcg_gen_sextract_tl(t2, t2,  0, 16);
2004     } else {
2005         tcg_gen_extract_tl(t1, t0, 16, 16);
2006         tcg_gen_extract_tl(t0, t0,  0, 16);
2007         tcg_gen_extract_tl(t3, t2, 16, 16);
2008         tcg_gen_extract_tl(t2, t2,  0, 16);
2009     }
2010 
2011     if (right) {
2012         if (arithmetic) {
2013             tcg_gen_sari_tl(t0, t0, sft4);
2014             tcg_gen_sari_tl(t1, t1, sft4);
2015             tcg_gen_sari_tl(t2, t2, sft4);
2016             tcg_gen_sari_tl(t3, t3, sft4);
2017         } else {
2018             tcg_gen_shri_tl(t0, t0, sft4);
2019             tcg_gen_shri_tl(t1, t1, sft4);
2020             tcg_gen_shri_tl(t2, t2, sft4);
2021             tcg_gen_shri_tl(t3, t3, sft4);
2022         }
2023     } else {
2024         tcg_gen_shli_tl(t0, t0, sft4);
2025         tcg_gen_shli_tl(t1, t1, sft4);
2026         tcg_gen_shli_tl(t2, t2, sft4);
2027         tcg_gen_shli_tl(t3, t3, sft4);
2028     }
2029     tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
2030     tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2031 
2032     gen_store_mxu_gpr(t0, XRa);
2033     gen_store_mxu_gpr(t2, XRd);
2034 }
2035 
2036 /*
2037  *  Q16SLLV XRa, XRd, rs
2038  *    Quad 16-bit shift left from XRa and XRd to rs[3:0]
2039  *    bits. Store to XRa and XRd respectively.
2040  *  Q16SLRV XRa, XRd, rs
2041  *    Quad 16-bit shift logic right from XRa and XRd to rs[3:0]
2042  *    bits. Store to XRa and XRd respectively.
2043  *  Q16SARV XRa, XRd, rs
2044  *    Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0]
2045  *    bits. Store to XRa and XRd respectively.
2046  */
2047 static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic)
2048 {
2049     uint32_t XRa, XRd, rs;
2050 
2051     XRa = extract32(ctx->opcode, 10, 4);
2052     XRd = extract32(ctx->opcode, 14, 4);
2053     rs  = extract32(ctx->opcode, 21, 5);
2054 
2055     TCGv t0 = tcg_temp_new();
2056     TCGv t1 = tcg_temp_new();
2057     TCGv t2 = tcg_temp_new();
2058     TCGv t3 = tcg_temp_new();
2059     TCGv t5 = tcg_temp_new();
2060 
2061     gen_load_mxu_gpr(t0, XRa);
2062     gen_load_mxu_gpr(t2, XRd);
2063     gen_load_gpr(t5, rs);
2064     tcg_gen_andi_tl(t5, t5, 0x0f);
2065 
2066 
2067     if (arithmetic) {
2068         tcg_gen_sextract_tl(t1, t0, 16, 16);
2069         tcg_gen_sextract_tl(t0, t0,  0, 16);
2070         tcg_gen_sextract_tl(t3, t2, 16, 16);
2071         tcg_gen_sextract_tl(t2, t2,  0, 16);
2072     } else {
2073         tcg_gen_extract_tl(t1, t0, 16, 16);
2074         tcg_gen_extract_tl(t0, t0,  0, 16);
2075         tcg_gen_extract_tl(t3, t2, 16, 16);
2076         tcg_gen_extract_tl(t2, t2,  0, 16);
2077     }
2078 
2079     if (right) {
2080         if (arithmetic) {
2081             tcg_gen_sar_tl(t0, t0, t5);
2082             tcg_gen_sar_tl(t1, t1, t5);
2083             tcg_gen_sar_tl(t2, t2, t5);
2084             tcg_gen_sar_tl(t3, t3, t5);
2085         } else {
2086             tcg_gen_shr_tl(t0, t0, t5);
2087             tcg_gen_shr_tl(t1, t1, t5);
2088             tcg_gen_shr_tl(t2, t2, t5);
2089             tcg_gen_shr_tl(t3, t3, t5);
2090         }
2091     } else {
2092         tcg_gen_shl_tl(t0, t0, t5);
2093         tcg_gen_shl_tl(t1, t1, t5);
2094         tcg_gen_shl_tl(t2, t2, t5);
2095         tcg_gen_shl_tl(t3, t3, t5);
2096     }
2097     tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
2098     tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2099 
2100     gen_store_mxu_gpr(t0, XRa);
2101     gen_store_mxu_gpr(t2, XRd);
2102 }
2103 
2104 /*
2105  *                   MXU instruction category max/min/avg
2106  *                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2107  *
2108  *                     S32MAX     D16MAX     Q8MAX
2109  *                     S32MIN     D16MIN     Q8MIN
2110  *                     S32SLT     D16SLT     Q8SLT
2111  *                                           Q8SLTU
2112  *                                D16AVG     Q8AVG
2113  *                                D16AVGR    Q8AVGR
2114  *                     S32MOVZ    D16MOVZ    Q8MOVZ
2115  *                     S32MOVN    D16MOVN    Q8MOVN
2116  */
2117 
2118 /*
2119  *  S32MAX XRa, XRb, XRc
2120  *    Update XRa with the maximum of signed 32-bit integers contained
2121  *    in XRb and XRc.
2122  *
2123  *  S32MIN XRa, XRb, XRc
2124  *    Update XRa with the minimum of signed 32-bit integers contained
2125  *    in XRb and XRc.
2126  */
2127 static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx)
2128 {
2129     uint32_t pad, opc, XRc, XRb, XRa;
2130 
2131     pad = extract32(ctx->opcode, 21, 5);
2132     opc = extract32(ctx->opcode, 18, 3);
2133     XRc = extract32(ctx->opcode, 14, 4);
2134     XRb = extract32(ctx->opcode, 10, 4);
2135     XRa = extract32(ctx->opcode,  6, 4);
2136 
2137     if (unlikely(pad != 0)) {
2138         /* opcode padding incorrect -> do nothing */
2139     } else if (unlikely(XRa == 0)) {
2140         /* destination is zero register -> do nothing */
2141     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2142         /* both operands zero registers -> just set destination to zero */
2143         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2144     } else if (unlikely((XRb == 0) || (XRc == 0))) {
2145         /* exactly one operand is zero register - find which one is not...*/
2146         uint32_t XRx = XRb ? XRb : XRc;
2147         /* ...and do max/min operation with one operand 0 */
2148         if (opc == OPC_MXU_S32MAX) {
2149             tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
2150         } else {
2151             tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
2152         }
2153     } else if (unlikely(XRb == XRc)) {
2154         /* both operands same -> just set destination to one of them */
2155         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2156     } else {
2157         /* the most general case */
2158         if (opc == OPC_MXU_S32MAX) {
2159             tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
2160                                                mxu_gpr[XRc - 1]);
2161         } else {
2162             tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
2163                                                mxu_gpr[XRc - 1]);
2164         }
2165     }
2166 }
2167 
2168 /*
2169  *  D16MAX
2170  *    Update XRa with the 16-bit-wise maximums of signed integers
2171  *    contained in XRb and XRc.
2172  *
2173  *  D16MIN
2174  *    Update XRa with the 16-bit-wise minimums of signed integers
2175  *    contained in XRb and XRc.
2176  */
2177 static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
2178 {
2179     uint32_t pad, opc, XRc, XRb, XRa;
2180 
2181     pad = extract32(ctx->opcode, 21, 5);
2182     opc = extract32(ctx->opcode, 18, 3);
2183     XRc = extract32(ctx->opcode, 14, 4);
2184     XRb = extract32(ctx->opcode, 10, 4);
2185     XRa = extract32(ctx->opcode,  6, 4);
2186 
2187     if (unlikely(pad != 0)) {
2188         /* opcode padding incorrect -> do nothing */
2189     } else if (unlikely(XRa == 0)) {
2190         /* destination is zero register -> do nothing */
2191     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2192         /* both operands zero registers -> just set destination to zero */
2193         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2194     } else if (unlikely((XRb == 0) || (XRc == 0))) {
2195         /* exactly one operand is zero register - find which one is not...*/
2196         uint32_t XRx = XRb ? XRb : XRc;
2197         /* ...and do half-word-wise max/min with one operand 0 */
2198         TCGv_i32 t0 = tcg_temp_new();
2199         TCGv_i32 t1 = tcg_constant_i32(0);
2200         TCGv_i32 t2 = tcg_temp_new();
2201 
2202         /* the left half-word first */
2203         tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
2204         if (opc == OPC_MXU_D16MAX) {
2205             tcg_gen_smax_i32(t2, t0, t1);
2206         } else {
2207             tcg_gen_smin_i32(t2, t0, t1);
2208         }
2209 
2210         /* the right half-word */
2211         tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF);
2212         /* move half-words to the leftmost position */
2213         tcg_gen_shli_i32(t0, t0, 16);
2214         /* t0 will be max/min of t0 and t1 */
2215         if (opc == OPC_MXU_D16MAX) {
2216             tcg_gen_smax_i32(t0, t0, t1);
2217         } else {
2218             tcg_gen_smin_i32(t0, t0, t1);
2219         }
2220         /* return resulting half-words to its original position */
2221         tcg_gen_shri_i32(t0, t0, 16);
2222         /* finally update the destination */
2223         tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
2224     } else if (unlikely(XRb == XRc)) {
2225         /* both operands same -> just set destination to one of them */
2226         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2227     } else {
2228         /* the most general case */
2229         TCGv_i32 t0 = tcg_temp_new();
2230         TCGv_i32 t1 = tcg_temp_new();
2231         TCGv_i32 t2 = tcg_temp_new();
2232 
2233         /* the left half-word first */
2234         tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
2235         tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
2236         if (opc == OPC_MXU_D16MAX) {
2237             tcg_gen_smax_i32(t2, t0, t1);
2238         } else {
2239             tcg_gen_smin_i32(t2, t0, t1);
2240         }
2241 
2242         /* the right half-word */
2243         tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
2244         tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF);
2245         /* move half-words to the leftmost position */
2246         tcg_gen_shli_i32(t0, t0, 16);
2247         tcg_gen_shli_i32(t1, t1, 16);
2248         /* t0 will be max/min of t0 and t1 */
2249         if (opc == OPC_MXU_D16MAX) {
2250             tcg_gen_smax_i32(t0, t0, t1);
2251         } else {
2252             tcg_gen_smin_i32(t0, t0, t1);
2253         }
2254         /* return resulting half-words to its original position */
2255         tcg_gen_shri_i32(t0, t0, 16);
2256         /* finally update the destination */
2257         tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
2258     }
2259 }
2260 
2261 /*
2262  *  Q8MAX
2263  *    Update XRa with the 8-bit-wise maximums of signed integers
2264  *    contained in XRb and XRc.
2265  *
2266  *  Q8MIN
2267  *    Update XRa with the 8-bit-wise minimums of signed integers
2268  *    contained in XRb and XRc.
2269  */
2270 static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
2271 {
2272     uint32_t pad, opc, XRc, XRb, XRa;
2273 
2274     pad = extract32(ctx->opcode, 21, 5);
2275     opc = extract32(ctx->opcode, 18, 3);
2276     XRc = extract32(ctx->opcode, 14, 4);
2277     XRb = extract32(ctx->opcode, 10, 4);
2278     XRa = extract32(ctx->opcode,  6, 4);
2279 
2280     if (unlikely(pad != 0)) {
2281         /* opcode padding incorrect -> do nothing */
2282     } else if (unlikely(XRa == 0)) {
2283         /* destination is zero register -> do nothing */
2284     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2285         /* both operands zero registers -> just set destination to zero */
2286         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2287     } else if (unlikely((XRb == 0) || (XRc == 0))) {
2288         /* exactly one operand is zero register - make it be the first...*/
2289         uint32_t XRx = XRb ? XRb : XRc;
2290         /* ...and do byte-wise max/min with one operand 0 */
2291         TCGv_i32 t0 = tcg_temp_new();
2292         TCGv_i32 t1 = tcg_constant_i32(0);
2293         TCGv_i32 t2 = tcg_temp_new();
2294         int32_t i;
2295 
2296         /* the leftmost byte (byte 3) first */
2297         tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
2298         if (opc == OPC_MXU_Q8MAX) {
2299             tcg_gen_smax_i32(t2, t0, t1);
2300         } else {
2301             tcg_gen_smin_i32(t2, t0, t1);
2302         }
2303 
2304         /* bytes 2, 1, 0 */
2305         for (i = 2; i >= 0; i--) {
2306             /* extract the byte */
2307             tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i));
2308             /* move the byte to the leftmost position */
2309             tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
2310             /* t0 will be max/min of t0 and t1 */
2311             if (opc == OPC_MXU_Q8MAX) {
2312                 tcg_gen_smax_i32(t0, t0, t1);
2313             } else {
2314                 tcg_gen_smin_i32(t0, t0, t1);
2315             }
2316             /* return resulting byte to its original position */
2317             tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
2318             /* finally update the destination */
2319             tcg_gen_or_i32(t2, t2, t0);
2320         }
2321         gen_store_mxu_gpr(t2, XRa);
2322     } else if (unlikely(XRb == XRc)) {
2323         /* both operands same -> just set destination to one of them */
2324         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2325     } else {
2326         /* the most general case */
2327         TCGv_i32 t0 = tcg_temp_new();
2328         TCGv_i32 t1 = tcg_temp_new();
2329         TCGv_i32 t2 = tcg_temp_new();
2330         int32_t i;
2331 
2332         /* the leftmost bytes (bytes 3) first */
2333         tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
2334         tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
2335         if (opc == OPC_MXU_Q8MAX) {
2336             tcg_gen_smax_i32(t2, t0, t1);
2337         } else {
2338             tcg_gen_smin_i32(t2, t0, t1);
2339         }
2340 
2341         /* bytes 2, 1, 0 */
2342         for (i = 2; i >= 0; i--) {
2343             /* extract corresponding bytes */
2344             tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i));
2345             tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i));
2346             /* move the bytes to the leftmost position */
2347             tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
2348             tcg_gen_shli_i32(t1, t1, 8 * (3 - i));
2349             /* t0 will be max/min of t0 and t1 */
2350             if (opc == OPC_MXU_Q8MAX) {
2351                 tcg_gen_smax_i32(t0, t0, t1);
2352             } else {
2353                 tcg_gen_smin_i32(t0, t0, t1);
2354             }
2355             /* return resulting byte to its original position */
2356             tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
2357             /* finally update the destination */
2358             tcg_gen_or_i32(t2, t2, t0);
2359         }
2360         gen_store_mxu_gpr(t2, XRa);
2361     }
2362 }
2363 
2364 /*
2365  *  Q8SLT
2366  *    Update XRa with the signed "set less than" comparison of XRb and XRc
2367  *    on per-byte basis.
2368  *    a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2369  *
2370  *  Q8SLTU
2371  *    Update XRa with the unsigned "set less than" comparison of XRb and XRc
2372  *    on per-byte basis.
2373  *    a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2374  */
2375 static void gen_mxu_q8slt(DisasContext *ctx, bool sltu)
2376 {
2377     uint32_t pad, XRc, XRb, XRa;
2378 
2379     pad = extract32(ctx->opcode, 21, 5);
2380     XRc = extract32(ctx->opcode, 14, 4);
2381     XRb = extract32(ctx->opcode, 10, 4);
2382     XRa = extract32(ctx->opcode,  6, 4);
2383 
2384     if (unlikely(pad != 0)) {
2385         /* opcode padding incorrect -> do nothing */
2386     } else if (unlikely(XRa == 0)) {
2387         /* destination is zero register -> do nothing */
2388     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2389         /* both operands zero registers -> just set destination to zero */
2390         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2391     } else if (unlikely(XRb == XRc)) {
2392         /* both operands same registers -> just set destination to zero */
2393         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2394     } else {
2395         /* the most general case */
2396         TCGv t0 = tcg_temp_new();
2397         TCGv t1 = tcg_temp_new();
2398         TCGv t2 = tcg_temp_new();
2399         TCGv t3 = tcg_temp_new();
2400         TCGv t4 = tcg_temp_new();
2401 
2402         gen_load_mxu_gpr(t3, XRb);
2403         gen_load_mxu_gpr(t4, XRc);
2404         tcg_gen_movi_tl(t2, 0);
2405 
2406         for (int i = 0; i < 4; i++) {
2407             if (sltu) {
2408                 tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2409                 tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2410             } else {
2411                 tcg_gen_sextract_tl(t0, t3, 8 * i, 8);
2412                 tcg_gen_sextract_tl(t1, t4, 8 * i, 8);
2413             }
2414             tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2415             tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2416         }
2417         gen_store_mxu_gpr(t2, XRa);
2418     }
2419 }
2420 
2421 /*
2422  *  S32SLT
2423  *    Update XRa with the signed "set less than" comparison of XRb and XRc.
2424  *    a.k.a. XRa = XRb < XRc ? 1 : 0;
2425  */
2426 static void gen_mxu_S32SLT(DisasContext *ctx)
2427 {
2428     uint32_t pad, XRc, XRb, XRa;
2429 
2430     pad = extract32(ctx->opcode, 21, 5);
2431     XRc = extract32(ctx->opcode, 14, 4);
2432     XRb = extract32(ctx->opcode, 10, 4);
2433     XRa = extract32(ctx->opcode,  6, 4);
2434 
2435     if (unlikely(pad != 0)) {
2436         /* opcode padding incorrect -> do nothing */
2437     } else if (unlikely(XRa == 0)) {
2438         /* destination is zero register -> do nothing */
2439     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2440         /* both operands zero registers -> just set destination to zero */
2441         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2442     } else if (unlikely(XRb == XRc)) {
2443         /* both operands same registers -> just set destination to zero */
2444         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2445     } else {
2446         /* the most general case */
2447         TCGv t0 = tcg_temp_new();
2448         TCGv t1 = tcg_temp_new();
2449 
2450         gen_load_mxu_gpr(t0, XRb);
2451         gen_load_mxu_gpr(t1, XRc);
2452         tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1], t0, t1);
2453     }
2454 }
2455 
2456 /*
2457  *  D16SLT
2458  *    Update XRa with the signed "set less than" comparison of XRb and XRc
2459  *    on per-word basis.
2460  *    a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0;
2461  */
2462 static void gen_mxu_D16SLT(DisasContext *ctx)
2463 {
2464     uint32_t pad, XRc, XRb, XRa;
2465 
2466     pad = extract32(ctx->opcode, 21, 5);
2467     XRc = extract32(ctx->opcode, 14, 4);
2468     XRb = extract32(ctx->opcode, 10, 4);
2469     XRa = extract32(ctx->opcode,  6, 4);
2470 
2471     if (unlikely(pad != 0)) {
2472         /* opcode padding incorrect -> do nothing */
2473     } else if (unlikely(XRa == 0)) {
2474         /* destination is zero register -> do nothing */
2475     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2476         /* both operands zero registers -> just set destination to zero */
2477         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2478     } else if (unlikely(XRb == XRc)) {
2479         /* both operands same registers -> just set destination to zero */
2480         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2481     } else {
2482         /* the most general case */
2483         TCGv t0 = tcg_temp_new();
2484         TCGv t1 = tcg_temp_new();
2485         TCGv t2 = tcg_temp_new();
2486         TCGv t3 = tcg_temp_new();
2487         TCGv t4 = tcg_temp_new();
2488 
2489         gen_load_mxu_gpr(t3, XRb);
2490         gen_load_mxu_gpr(t4, XRc);
2491         tcg_gen_sextract_tl(t0, t3, 16, 16);
2492         tcg_gen_sextract_tl(t1, t4, 16, 16);
2493         tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2494         tcg_gen_shli_tl(t2, t0, 16);
2495         tcg_gen_sextract_tl(t0, t3,  0, 16);
2496         tcg_gen_sextract_tl(t1, t4,  0, 16);
2497         tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2498         tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0);
2499     }
2500 }
2501 
2502 /*
2503  *  D16AVG
2504  *    Update XRa with the signed average of XRb and XRc
2505  *    on per-word basis, rounding down.
2506  *    a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1;
2507  *
2508  *  D16AVGR
2509  *    Update XRa with the signed average of XRb and XRc
2510  *    on per-word basis, math rounding 4/5.
2511  *    a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1;
2512  */
2513 static void gen_mxu_d16avg(DisasContext *ctx, bool round45)
2514 {
2515     uint32_t pad, XRc, XRb, XRa;
2516 
2517     pad = extract32(ctx->opcode, 21, 5);
2518     XRc = extract32(ctx->opcode, 14, 4);
2519     XRb = extract32(ctx->opcode, 10, 4);
2520     XRa = extract32(ctx->opcode,  6, 4);
2521 
2522     if (unlikely(pad != 0)) {
2523         /* opcode padding incorrect -> do nothing */
2524     } else if (unlikely(XRa == 0)) {
2525         /* destination is zero register -> do nothing */
2526     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2527         /* both operands zero registers -> just set destination to zero */
2528         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2529     } else if (unlikely(XRb == XRc)) {
2530         /* both operands same registers -> just set destination to same */
2531         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2532     } else {
2533         /* the most general case */
2534         TCGv t0 = tcg_temp_new();
2535         TCGv t1 = tcg_temp_new();
2536         TCGv t2 = tcg_temp_new();
2537         TCGv t3 = tcg_temp_new();
2538         TCGv t4 = tcg_temp_new();
2539 
2540         gen_load_mxu_gpr(t3, XRb);
2541         gen_load_mxu_gpr(t4, XRc);
2542         tcg_gen_sextract_tl(t0, t3, 16, 16);
2543         tcg_gen_sextract_tl(t1, t4, 16, 16);
2544         tcg_gen_add_tl(t0, t0, t1);
2545         if (round45) {
2546             tcg_gen_addi_tl(t0, t0, 1);
2547         }
2548         tcg_gen_shli_tl(t2, t0, 15);
2549         tcg_gen_andi_tl(t2, t2, 0xffff0000);
2550         tcg_gen_sextract_tl(t0, t3,  0, 16);
2551         tcg_gen_sextract_tl(t1, t4,  0, 16);
2552         tcg_gen_add_tl(t0, t0, t1);
2553         if (round45) {
2554             tcg_gen_addi_tl(t0, t0, 1);
2555         }
2556         tcg_gen_shri_tl(t0, t0, 1);
2557         tcg_gen_deposit_tl(t2, t2, t0, 0, 16);
2558         gen_store_mxu_gpr(t2, XRa);
2559     }
2560 }
2561 
2562 /*
2563  *  Q8AVG
2564  *    Update XRa with the signed average of XRb and XRc
2565  *    on per-byte basis, rounding down.
2566  *    a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1;
2567  *
2568  *  Q8AVGR
2569  *    Update XRa with the signed average of XRb and XRc
2570  *    on per-word basis, math rounding 4/5.
2571  *    a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1;
2572  */
2573 static void gen_mxu_q8avg(DisasContext *ctx, bool round45)
2574 {
2575     uint32_t pad, XRc, XRb, XRa;
2576 
2577     pad = extract32(ctx->opcode, 21, 5);
2578     XRc = extract32(ctx->opcode, 14, 4);
2579     XRb = extract32(ctx->opcode, 10, 4);
2580     XRa = extract32(ctx->opcode,  6, 4);
2581 
2582     if (unlikely(pad != 0)) {
2583         /* opcode padding incorrect -> do nothing */
2584     } else if (unlikely(XRa == 0)) {
2585         /* destination is zero register -> do nothing */
2586     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2587         /* both operands zero registers -> just set destination to zero */
2588         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2589     } else if (unlikely(XRb == XRc)) {
2590         /* both operands same registers -> just set destination to same */
2591         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2592     } else {
2593         /* the most general case */
2594         TCGv t0 = tcg_temp_new();
2595         TCGv t1 = tcg_temp_new();
2596         TCGv t2 = tcg_temp_new();
2597         TCGv t3 = tcg_temp_new();
2598         TCGv t4 = tcg_temp_new();
2599 
2600         gen_load_mxu_gpr(t3, XRb);
2601         gen_load_mxu_gpr(t4, XRc);
2602         tcg_gen_movi_tl(t2, 0);
2603 
2604         for (int i = 0; i < 4; i++) {
2605             tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2606             tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2607             tcg_gen_add_tl(t0, t0, t1);
2608             if (round45) {
2609                 tcg_gen_addi_tl(t0, t0, 1);
2610             }
2611             tcg_gen_shri_tl(t0, t0, 1);
2612             tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2613         }
2614         gen_store_mxu_gpr(t2, XRa);
2615     }
2616 }
2617 
2618 /*
2619  *  Q8MOVZ
2620  *    Quadruple 8-bit packed conditional move where
2621  *    XRb contains conditions, XRc what to move and
2622  *    XRa is the destination.
2623  *    a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] }
2624  *
2625  *  Q8MOVN
2626  *    Quadruple 8-bit packed conditional move where
2627  *    XRb contains conditions, XRc what to move and
2628  *    XRa is the destination.
2629  *    a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] }
2630  */
2631 static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond)
2632 {
2633     uint32_t XRc, XRb, XRa;
2634 
2635     XRa = extract32(ctx->opcode,  6, 4);
2636     XRb = extract32(ctx->opcode, 10, 4);
2637     XRc = extract32(ctx->opcode, 14, 4);
2638 
2639     TCGv t0 = tcg_temp_new();
2640     TCGv t1 = tcg_temp_new();
2641     TCGv t2 = tcg_temp_new();
2642     TCGv t3 = tcg_temp_new();
2643     TCGLabel *l_quarterdone = gen_new_label();
2644     TCGLabel *l_halfdone = gen_new_label();
2645     TCGLabel *l_quarterrest = gen_new_label();
2646     TCGLabel *l_done = gen_new_label();
2647 
2648     gen_load_mxu_gpr(t0, XRc);
2649     gen_load_mxu_gpr(t1, XRb);
2650     gen_load_mxu_gpr(t2, XRa);
2651 
2652     tcg_gen_extract_tl(t3, t1, 24, 8);
2653     tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone);
2654     tcg_gen_extract_tl(t3, t0, 24, 8);
2655     tcg_gen_deposit_tl(t2, t2, t3, 24, 8);
2656 
2657     gen_set_label(l_quarterdone);
2658     tcg_gen_extract_tl(t3, t1, 16, 8);
2659     tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
2660     tcg_gen_extract_tl(t3, t0, 16, 8);
2661     tcg_gen_deposit_tl(t2, t2, t3, 16, 8);
2662 
2663     gen_set_label(l_halfdone);
2664     tcg_gen_extract_tl(t3, t1, 8, 8);
2665     tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest);
2666     tcg_gen_extract_tl(t3, t0, 8, 8);
2667     tcg_gen_deposit_tl(t2, t2, t3, 8, 8);
2668 
2669     gen_set_label(l_quarterrest);
2670     tcg_gen_extract_tl(t3, t1, 0, 8);
2671     tcg_gen_brcondi_tl(cond, t3, 0, l_done);
2672     tcg_gen_extract_tl(t3, t0, 0, 8);
2673     tcg_gen_deposit_tl(t2, t2, t3, 0, 8);
2674 
2675     gen_set_label(l_done);
2676     gen_store_mxu_gpr(t2, XRa);
2677 }
2678 
2679 /*
2680  *  D16MOVZ
2681  *    Double 16-bit packed conditional move where
2682  *    XRb contains conditions, XRc what to move and
2683  *    XRa is the destination.
2684  *    a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] }
2685  *
2686  *  D16MOVN
2687  *    Double 16-bit packed conditional move where
2688  *    XRb contains conditions, XRc what to move and
2689  *    XRa is the destination.
2690  *    a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] }
2691  */
2692 static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond)
2693 {
2694     uint32_t XRc, XRb, XRa;
2695 
2696     XRa = extract32(ctx->opcode,  6, 4);
2697     XRb = extract32(ctx->opcode, 10, 4);
2698     XRc = extract32(ctx->opcode, 14, 4);
2699 
2700     TCGv t0 = tcg_temp_new();
2701     TCGv t1 = tcg_temp_new();
2702     TCGv t2 = tcg_temp_new();
2703     TCGv t3 = tcg_temp_new();
2704     TCGLabel *l_halfdone = gen_new_label();
2705     TCGLabel *l_done = gen_new_label();
2706 
2707     gen_load_mxu_gpr(t0, XRc);
2708     gen_load_mxu_gpr(t1, XRb);
2709     gen_load_mxu_gpr(t2, XRa);
2710 
2711     tcg_gen_extract_tl(t3, t1, 16, 16);
2712     tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
2713     tcg_gen_extract_tl(t3, t0, 16, 16);
2714     tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2715 
2716     gen_set_label(l_halfdone);
2717     tcg_gen_extract_tl(t3, t1, 0, 16);
2718     tcg_gen_brcondi_tl(cond, t3, 0, l_done);
2719     tcg_gen_extract_tl(t3, t0, 0, 16);
2720     tcg_gen_deposit_tl(t2, t2, t3, 0, 16);
2721 
2722     gen_set_label(l_done);
2723     gen_store_mxu_gpr(t2, XRa);
2724 }
2725 
2726 /*
2727  *  S32MOVZ
2728  *    Quadruple 32-bit conditional move where
2729  *    XRb contains conditions, XRc what to move and
2730  *    XRa is the destination.
2731  *    a.k.a. if (XRb == 0) { XRa = XRc }
2732  *
2733  *  S32MOVN
2734  *    Single 32-bit conditional move where
2735  *    XRb contains conditions, XRc what to move and
2736  *    XRa is the destination.
2737  *    a.k.a. if (XRb != 0) { XRa = XRc }
2738  */
2739 static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond)
2740 {
2741     uint32_t XRc, XRb, XRa;
2742 
2743     XRa = extract32(ctx->opcode,  6, 4);
2744     XRb = extract32(ctx->opcode, 10, 4);
2745     XRc = extract32(ctx->opcode, 14, 4);
2746 
2747     TCGv t0 = tcg_temp_new();
2748     TCGv t1 = tcg_temp_new();
2749     TCGLabel *l_done = gen_new_label();
2750 
2751     gen_load_mxu_gpr(t0, XRc);
2752     gen_load_mxu_gpr(t1, XRb);
2753 
2754     tcg_gen_brcondi_tl(cond, t1, 0, l_done);
2755     gen_store_mxu_gpr(t0, XRa);
2756     gen_set_label(l_done);
2757 }
2758 
2759 /*
2760  *      MXU instruction category: Addition and subtraction
2761  *      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2762  *
2763  *              S32CPS      D16CPS
2764  *                                       Q8ADD
2765  */
2766 
2767 /*
2768  *  S32CPS
2769  *    Update XRa if XRc < 0 by value of 0 - XRb
2770  *    else XRa = XRb
2771  */
2772 static void gen_mxu_S32CPS(DisasContext *ctx)
2773 {
2774     uint32_t pad, XRc, XRb, XRa;
2775 
2776     pad = extract32(ctx->opcode, 21, 5);
2777     XRc = extract32(ctx->opcode, 14, 4);
2778     XRb = extract32(ctx->opcode, 10, 4);
2779     XRa = extract32(ctx->opcode,  6, 4);
2780 
2781     if (unlikely(pad != 0)) {
2782         /* opcode padding incorrect -> do nothing */
2783     } else if (unlikely(XRa == 0)) {
2784         /* destination is zero register -> do nothing */
2785     } else if (unlikely(XRb == 0)) {
2786         /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2787         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2788     } else if (unlikely(XRc == 0)) {
2789         /* condition always false -> just move XRb to XRa */
2790         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2791     } else {
2792         /* the most general case */
2793         TCGv t0 = tcg_temp_new();
2794         TCGLabel *l_not_less = gen_new_label();
2795         TCGLabel *l_done = gen_new_label();
2796 
2797         tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less);
2798         tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]);
2799         tcg_gen_br(l_done);
2800         gen_set_label(l_not_less);
2801         gen_load_mxu_gpr(t0, XRb);
2802         gen_set_label(l_done);
2803         gen_store_mxu_gpr(t0, XRa);
2804     }
2805 }
2806 
2807 /*
2808  *  D16CPS
2809  *    Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1]
2810  *    else XRa[0..1] = XRb[0..1]
2811  */
2812 static void gen_mxu_D16CPS(DisasContext *ctx)
2813 {
2814     uint32_t pad, XRc, XRb, XRa;
2815 
2816     pad = extract32(ctx->opcode, 21, 5);
2817     XRc = extract32(ctx->opcode, 14, 4);
2818     XRb = extract32(ctx->opcode, 10, 4);
2819     XRa = extract32(ctx->opcode,  6, 4);
2820 
2821     if (unlikely(pad != 0)) {
2822         /* opcode padding incorrect -> do nothing */
2823     } else if (unlikely(XRa == 0)) {
2824         /* destination is zero register -> do nothing */
2825     } else if (unlikely(XRb == 0)) {
2826         /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2827         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2828     } else if (unlikely(XRc == 0)) {
2829         /* condition always false -> just move XRb to XRa */
2830         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2831     } else {
2832         /* the most general case */
2833         TCGv t0 = tcg_temp_new();
2834         TCGv t1 = tcg_temp_new();
2835         TCGLabel *l_done_hi = gen_new_label();
2836         TCGLabel *l_not_less_lo = gen_new_label();
2837         TCGLabel *l_done_lo = gen_new_label();
2838 
2839         tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16);
2840         tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16);
2841         tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi);
2842         tcg_gen_subfi_tl(t1, 0, t1);
2843 
2844         gen_set_label(l_done_hi);
2845         tcg_gen_shli_i32(t1, t1, 16);
2846 
2847         tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1],  0, 16);
2848         tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo);
2849         tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1],  0, 16);
2850         tcg_gen_subfi_tl(t0, 0, t0);
2851         tcg_gen_br(l_done_lo);
2852 
2853         gen_set_label(l_not_less_lo);
2854         tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1],  0, 16);
2855 
2856         gen_set_label(l_done_lo);
2857         tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16);
2858     }
2859 }
2860 
2861 /*
2862  *  Q8ABD XRa, XRb, XRc
2863  *  Gets absolute difference for quadruple of 8-bit
2864  *  packed in XRb to another one in XRc,
2865  *  put the result in XRa.
2866  *  a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]);
2867  */
2868 static void gen_mxu_Q8ABD(DisasContext *ctx)
2869 {
2870     uint32_t pad, XRc, XRb, XRa;
2871 
2872     pad = extract32(ctx->opcode, 21, 3);
2873     XRc = extract32(ctx->opcode, 14, 4);
2874     XRb = extract32(ctx->opcode, 10, 4);
2875     XRa = extract32(ctx->opcode,  6, 4);
2876 
2877     if (unlikely(pad != 0)) {
2878         /* opcode padding incorrect -> do nothing */
2879     } else if (unlikely(XRa == 0)) {
2880         /* destination is zero register -> do nothing */
2881     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2882         /* both operands zero registers -> just set destination to zero */
2883         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2884     } else {
2885         /* the most general case */
2886         TCGv t0 = tcg_temp_new();
2887         TCGv t1 = tcg_temp_new();
2888         TCGv t2 = tcg_temp_new();
2889         TCGv t3 = tcg_temp_new();
2890         TCGv t4 = tcg_temp_new();
2891 
2892         gen_load_mxu_gpr(t3, XRb);
2893         gen_load_mxu_gpr(t4, XRc);
2894         tcg_gen_movi_tl(t2, 0);
2895 
2896         for (int i = 0; i < 4; i++) {
2897             tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2898             tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2899 
2900             tcg_gen_sub_tl(t0, t0, t1);
2901             tcg_gen_abs_tl(t0, t0);
2902 
2903             tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2904         }
2905         gen_store_mxu_gpr(t2, XRa);
2906     }
2907 }
2908 
2909 /*
2910  *  Q8ADD XRa, XRb, XRc, ptn2
2911  *  Add/subtract quadruple of 8-bit packed in XRb
2912  *  to another one in XRc, put the result in XRa.
2913  */
2914 static void gen_mxu_Q8ADD(DisasContext *ctx)
2915 {
2916     uint32_t aptn2, pad, XRc, XRb, XRa;
2917 
2918     aptn2 = extract32(ctx->opcode, 24, 2);
2919     pad   = extract32(ctx->opcode, 21, 3);
2920     XRc   = extract32(ctx->opcode, 14, 4);
2921     XRb   = extract32(ctx->opcode, 10, 4);
2922     XRa   = extract32(ctx->opcode,  6, 4);
2923 
2924     if (unlikely(pad != 0)) {
2925         /* opcode padding incorrect -> do nothing */
2926     } else if (unlikely(XRa == 0)) {
2927         /* destination is zero register -> do nothing */
2928     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2929         /* both operands zero registers -> just set destination to zero */
2930         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2931     } else {
2932         /* the most general case */
2933         TCGv t0 = tcg_temp_new();
2934         TCGv t1 = tcg_temp_new();
2935         TCGv t2 = tcg_temp_new();
2936         TCGv t3 = tcg_temp_new();
2937         TCGv t4 = tcg_temp_new();
2938 
2939         gen_load_mxu_gpr(t3, XRb);
2940         gen_load_mxu_gpr(t4, XRc);
2941 
2942         for (int i = 0; i < 4; i++) {
2943             tcg_gen_andi_tl(t0, t3, 0xff);
2944             tcg_gen_andi_tl(t1, t4, 0xff);
2945 
2946             if (i < 2) {
2947                 if (aptn2 & 0x01) {
2948                     tcg_gen_sub_tl(t0, t0, t1);
2949                 } else {
2950                     tcg_gen_add_tl(t0, t0, t1);
2951                 }
2952             } else {
2953                 if (aptn2 & 0x02) {
2954                     tcg_gen_sub_tl(t0, t0, t1);
2955                 } else {
2956                     tcg_gen_add_tl(t0, t0, t1);
2957                 }
2958             }
2959             if (i < 3) {
2960                 tcg_gen_shri_tl(t3, t3, 8);
2961                 tcg_gen_shri_tl(t4, t4, 8);
2962             }
2963             if (i > 0) {
2964                 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2965             } else {
2966                 tcg_gen_andi_tl(t0, t0, 0xff);
2967                 tcg_gen_mov_tl(t2, t0);
2968             }
2969         }
2970         gen_store_mxu_gpr(t2, XRa);
2971     }
2972 }
2973 
2974 /*
2975  *  Q8ADDE XRa, XRb, XRc, XRd, aptn2
2976  *    Add/subtract quadruple of 8-bit packed in XRb
2977  *    to another one in XRc, with zero extending
2978  *    to 16-bit and put results as packed 16-bit data
2979  *    into XRa and XRd.
2980  *    aptn2 manages action add or subtract of pairs of data.
2981  *
2982  *  Q8ACCE XRa, XRb, XRc, XRd, aptn2
2983  *    Add/subtract quadruple of 8-bit packed in XRb
2984  *    to another one in XRc, with zero extending
2985  *    to 16-bit and accumulate results as packed 16-bit data
2986  *    into XRa and XRd.
2987  *    aptn2 manages action add or subtract of pairs of data.
2988  */
2989 static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate)
2990 {
2991     uint32_t aptn2, XRd, XRc, XRb, XRa;
2992 
2993     aptn2 = extract32(ctx->opcode, 24, 2);
2994     XRd   = extract32(ctx->opcode, 18, 4);
2995     XRc   = extract32(ctx->opcode, 14, 4);
2996     XRb   = extract32(ctx->opcode, 10, 4);
2997     XRa   = extract32(ctx->opcode,  6, 4);
2998 
2999     if (unlikely((XRb == 0) && (XRc == 0))) {
3000         /* both operands zero registers -> just set destination to zero */
3001         if (XRa != 0) {
3002             tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
3003         }
3004         if (XRd != 0) {
3005             tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0);
3006         }
3007     } else {
3008         /* the most general case */
3009         TCGv t0 = tcg_temp_new();
3010         TCGv t1 = tcg_temp_new();
3011         TCGv t2 = tcg_temp_new();
3012         TCGv t3 = tcg_temp_new();
3013         TCGv t4 = tcg_temp_new();
3014         TCGv t5 = tcg_temp_new();
3015 
3016         if (XRa != 0) {
3017             gen_extract_mxu_gpr(t0, XRb, 16, 8);
3018             gen_extract_mxu_gpr(t1, XRc, 16, 8);
3019             gen_extract_mxu_gpr(t2, XRb, 24, 8);
3020             gen_extract_mxu_gpr(t3, XRc, 24, 8);
3021             if (aptn2 & 2) {
3022                 tcg_gen_sub_tl(t0, t0, t1);
3023                 tcg_gen_sub_tl(t2, t2, t3);
3024             } else {
3025                 tcg_gen_add_tl(t0, t0, t1);
3026                 tcg_gen_add_tl(t2, t2, t3);
3027             }
3028             if (accumulate) {
3029                 gen_load_mxu_gpr(t5, XRa);
3030                 tcg_gen_extract_tl(t1, t5,  0, 16);
3031                 tcg_gen_extract_tl(t3, t5, 16, 16);
3032                 tcg_gen_add_tl(t0, t0, t1);
3033                 tcg_gen_add_tl(t2, t2, t3);
3034             }
3035             tcg_gen_shli_tl(t2, t2, 16);
3036             tcg_gen_extract_tl(t0, t0, 0, 16);
3037             tcg_gen_or_tl(t4, t2, t0);
3038         }
3039         if (XRd != 0) {
3040             gen_extract_mxu_gpr(t0, XRb, 0, 8);
3041             gen_extract_mxu_gpr(t1, XRc, 0, 8);
3042             gen_extract_mxu_gpr(t2, XRb, 8, 8);
3043             gen_extract_mxu_gpr(t3, XRc, 8, 8);
3044             if (aptn2 & 1) {
3045                 tcg_gen_sub_tl(t0, t0, t1);
3046                 tcg_gen_sub_tl(t2, t2, t3);
3047             } else {
3048                 tcg_gen_add_tl(t0, t0, t1);
3049                 tcg_gen_add_tl(t2, t2, t3);
3050             }
3051             if (accumulate) {
3052                 gen_load_mxu_gpr(t5, XRd);
3053                 tcg_gen_extract_tl(t1, t5,  0, 16);
3054                 tcg_gen_extract_tl(t3, t5, 16, 16);
3055                 tcg_gen_add_tl(t0, t0, t1);
3056                 tcg_gen_add_tl(t2, t2, t3);
3057             }
3058             tcg_gen_shli_tl(t2, t2, 16);
3059             tcg_gen_extract_tl(t0, t0, 0, 16);
3060             tcg_gen_or_tl(t5, t2, t0);
3061         }
3062 
3063         gen_store_mxu_gpr(t4, XRa);
3064         gen_store_mxu_gpr(t5, XRd);
3065     }
3066 }
3067 
3068 /*
3069  *  D8SUM XRa, XRb, XRc
3070  *    Double parallel add of quadruple unsigned 8-bit together
3071  *    with zero extending to 16-bit data.
3072  *  D8SUMC XRa, XRb, XRc
3073  *    Double parallel add of quadruple unsigned 8-bit together
3074  *    with zero extending to 16-bit data and adding 2 to each
3075  *    parallel result.
3076  */
3077 static void gen_mxu_d8sum(DisasContext *ctx, bool sumc)
3078 {
3079     uint32_t pad, pad2, XRc, XRb, XRa;
3080 
3081     pad  = extract32(ctx->opcode, 24, 2);
3082     pad2 = extract32(ctx->opcode, 18, 4);
3083     XRc  = extract32(ctx->opcode, 14, 4);
3084     XRb  = extract32(ctx->opcode, 10, 4);
3085     XRa  = extract32(ctx->opcode,  6, 4);
3086 
3087     if (unlikely(pad != 0 || pad2 != 0)) {
3088         /* opcode padding incorrect -> do nothing */
3089     } else if (unlikely(XRa == 0)) {
3090         /* destination is zero register -> do nothing */
3091     } else if (unlikely((XRb == 0) && (XRc == 0))) {
3092         /* both operands zero registers -> just set destination to zero */
3093         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
3094     } else {
3095         /* the most general case */
3096         TCGv t0 = tcg_temp_new();
3097         TCGv t1 = tcg_temp_new();
3098         TCGv t2 = tcg_temp_new();
3099         TCGv t3 = tcg_temp_new();
3100         TCGv t4 = tcg_temp_new();
3101         TCGv t5 = tcg_temp_new();
3102 
3103         if (XRb != 0) {
3104             tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1],  0, 8);
3105             tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1],  8, 8);
3106             tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8);
3107             tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8);
3108             tcg_gen_add_tl(t4, t0, t1);
3109             tcg_gen_add_tl(t4, t4, t2);
3110             tcg_gen_add_tl(t4, t4, t3);
3111         } else {
3112             tcg_gen_mov_tl(t4, 0);
3113         }
3114         if (XRc != 0) {
3115             tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1],  0, 8);
3116             tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1],  8, 8);
3117             tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8);
3118             tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
3119             tcg_gen_add_tl(t5, t0, t1);
3120             tcg_gen_add_tl(t5, t5, t2);
3121             tcg_gen_add_tl(t5, t5, t3);
3122         } else {
3123             tcg_gen_mov_tl(t5, 0);
3124         }
3125 
3126         if (sumc) {
3127             tcg_gen_addi_tl(t4, t4, 2);
3128             tcg_gen_addi_tl(t5, t5, 2);
3129         }
3130         tcg_gen_shli_tl(t4, t4, 16);
3131 
3132         tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
3133     }
3134 }
3135 
3136 /*
3137  * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed
3138  * 16-bit pattern addition.
3139  */
3140 static void gen_mxu_q16add(DisasContext *ctx)
3141 {
3142     uint32_t aptn2, optn2, XRc, XRb, XRa, XRd;
3143 
3144     aptn2 = extract32(ctx->opcode, 24, 2);
3145     optn2 = extract32(ctx->opcode, 22, 2);
3146     XRd   = extract32(ctx->opcode, 18, 4);
3147     XRc   = extract32(ctx->opcode, 14, 4);
3148     XRb   = extract32(ctx->opcode, 10, 4);
3149     XRa   = extract32(ctx->opcode,  6, 4);
3150 
3151     TCGv t0 = tcg_temp_new();
3152     TCGv t1 = tcg_temp_new();
3153     TCGv t2 = tcg_temp_new();
3154     TCGv t3 = tcg_temp_new();
3155     TCGv t4 = tcg_temp_new();
3156     TCGv t5 = tcg_temp_new();
3157 
3158     gen_load_mxu_gpr(t1, XRb);
3159     tcg_gen_extract_tl(t0, t1,  0, 16);
3160     tcg_gen_extract_tl(t1, t1, 16, 16);
3161 
3162     gen_load_mxu_gpr(t3, XRc);
3163     tcg_gen_extract_tl(t2, t3,  0, 16);
3164     tcg_gen_extract_tl(t3, t3, 16, 16);
3165 
3166     switch (optn2) {
3167     case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */
3168         tcg_gen_mov_tl(t4, t1);
3169         tcg_gen_mov_tl(t5, t0);
3170         break;
3171     case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */
3172         tcg_gen_mov_tl(t4, t0);
3173         tcg_gen_mov_tl(t5, t0);
3174         break;
3175     case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */
3176         tcg_gen_mov_tl(t4, t1);
3177         tcg_gen_mov_tl(t5, t1);
3178         break;
3179     case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */
3180         tcg_gen_mov_tl(t4, t0);
3181         tcg_gen_mov_tl(t5, t1);
3182         break;
3183     }
3184 
3185     switch (aptn2) {
3186     case MXU_APTN2_AA: /* lop +, rop + */
3187         tcg_gen_add_tl(t0, t4, t3);
3188         tcg_gen_add_tl(t1, t5, t2);
3189         tcg_gen_add_tl(t4, t4, t3);
3190         tcg_gen_add_tl(t5, t5, t2);
3191         break;
3192     case MXU_APTN2_AS: /* lop +, rop + */
3193         tcg_gen_sub_tl(t0, t4, t3);
3194         tcg_gen_sub_tl(t1, t5, t2);
3195         tcg_gen_add_tl(t4, t4, t3);
3196         tcg_gen_add_tl(t5, t5, t2);
3197         break;
3198     case MXU_APTN2_SA: /* lop +, rop + */
3199         tcg_gen_add_tl(t0, t4, t3);
3200         tcg_gen_add_tl(t1, t5, t2);
3201         tcg_gen_sub_tl(t4, t4, t3);
3202         tcg_gen_sub_tl(t5, t5, t2);
3203         break;
3204     case MXU_APTN2_SS: /* lop +, rop + */
3205         tcg_gen_sub_tl(t0, t4, t3);
3206         tcg_gen_sub_tl(t1, t5, t2);
3207         tcg_gen_sub_tl(t4, t4, t3);
3208         tcg_gen_sub_tl(t5, t5, t2);
3209         break;
3210     }
3211 
3212     tcg_gen_shli_tl(t0, t0, 16);
3213     tcg_gen_extract_tl(t1, t1, 0, 16);
3214     tcg_gen_shli_tl(t4, t4, 16);
3215     tcg_gen_extract_tl(t5, t5, 0, 16);
3216 
3217     tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
3218     tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1);
3219 }
3220 
3221 /*
3222  * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed
3223  * 16-bit addition/subtraction with accumulate.
3224  */
3225 static void gen_mxu_q16acc(DisasContext *ctx)
3226 {
3227     uint32_t aptn2, XRc, XRb, XRa, XRd;
3228 
3229     aptn2 = extract32(ctx->opcode, 24, 2);
3230     XRd   = extract32(ctx->opcode, 18, 4);
3231     XRc   = extract32(ctx->opcode, 14, 4);
3232     XRb   = extract32(ctx->opcode, 10, 4);
3233     XRa   = extract32(ctx->opcode,  6, 4);
3234 
3235     TCGv t0 = tcg_temp_new();
3236     TCGv t1 = tcg_temp_new();
3237     TCGv t2 = tcg_temp_new();
3238     TCGv t3 = tcg_temp_new();
3239     TCGv s3 = tcg_temp_new();
3240     TCGv s2 = tcg_temp_new();
3241     TCGv s1 = tcg_temp_new();
3242     TCGv s0 = tcg_temp_new();
3243 
3244     gen_load_mxu_gpr(t1, XRb);
3245     tcg_gen_extract_tl(t0, t1,  0, 16);
3246     tcg_gen_extract_tl(t1, t1, 16, 16);
3247 
3248     gen_load_mxu_gpr(t3, XRc);
3249     tcg_gen_extract_tl(t2, t3,  0, 16);
3250     tcg_gen_extract_tl(t3, t3, 16, 16);
3251 
3252     switch (aptn2) {
3253     case MXU_APTN2_AA: /* lop +, rop + */
3254         tcg_gen_add_tl(s3, t1, t3);
3255         tcg_gen_add_tl(s2, t0, t2);
3256         tcg_gen_add_tl(s1, t1, t3);
3257         tcg_gen_add_tl(s0, t0, t2);
3258         break;
3259     case MXU_APTN2_AS: /* lop +, rop - */
3260         tcg_gen_sub_tl(s3, t1, t3);
3261         tcg_gen_sub_tl(s2, t0, t2);
3262         tcg_gen_add_tl(s1, t1, t3);
3263         tcg_gen_add_tl(s0, t0, t2);
3264         break;
3265     case MXU_APTN2_SA: /* lop -, rop + */
3266         tcg_gen_add_tl(s3, t1, t3);
3267         tcg_gen_add_tl(s2, t0, t2);
3268         tcg_gen_sub_tl(s1, t1, t3);
3269         tcg_gen_sub_tl(s0, t0, t2);
3270         break;
3271     case MXU_APTN2_SS: /* lop -, rop - */
3272         tcg_gen_sub_tl(s3, t1, t3);
3273         tcg_gen_sub_tl(s2, t0, t2);
3274         tcg_gen_sub_tl(s1, t1, t3);
3275         tcg_gen_sub_tl(s0, t0, t2);
3276         break;
3277     }
3278 
3279     if (XRa != 0) {
3280         tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0);
3281         tcg_gen_extract_tl(t0, t0, 0, 16);
3282         tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16);
3283         tcg_gen_add_tl(t1, t1, s1);
3284         tcg_gen_shli_tl(t1, t1, 16);
3285         tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0);
3286     }
3287 
3288     if (XRd != 0) {
3289         tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2);
3290         tcg_gen_extract_tl(t0, t0, 0, 16);
3291         tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16);
3292         tcg_gen_add_tl(t1, t1, s3);
3293         tcg_gen_shli_tl(t1, t1, 16);
3294         tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0);
3295     }
3296 }
3297 
3298 /*
3299  * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed
3300  * 16-bit accumulate.
3301  */
3302 static void gen_mxu_q16accm(DisasContext *ctx)
3303 {
3304     uint32_t aptn2, XRc, XRb, XRa, XRd;
3305 
3306     aptn2 = extract32(ctx->opcode, 24, 2);
3307     XRd   = extract32(ctx->opcode, 18, 4);
3308     XRc   = extract32(ctx->opcode, 14, 4);
3309     XRb   = extract32(ctx->opcode, 10, 4);
3310     XRa   = extract32(ctx->opcode,  6, 4);
3311 
3312     TCGv t0 = tcg_temp_new();
3313     TCGv t1 = tcg_temp_new();
3314     TCGv t2 = tcg_temp_new();
3315     TCGv t3 = tcg_temp_new();
3316 
3317     gen_load_mxu_gpr(t2, XRb);
3318     gen_load_mxu_gpr(t3, XRc);
3319 
3320     if (XRa != 0) {
3321         TCGv a0 = tcg_temp_new();
3322         TCGv a1 = tcg_temp_new();
3323 
3324         tcg_gen_extract_tl(t0, t2,  0, 16);
3325         tcg_gen_extract_tl(t1, t2, 16, 16);
3326 
3327         gen_load_mxu_gpr(a1, XRa);
3328         tcg_gen_extract_tl(a0, a1,  0, 16);
3329         tcg_gen_extract_tl(a1, a1, 16, 16);
3330 
3331         if (aptn2 & 2) {
3332             tcg_gen_sub_tl(a0, a0, t0);
3333             tcg_gen_sub_tl(a1, a1, t1);
3334         } else {
3335             tcg_gen_add_tl(a0, a0, t0);
3336             tcg_gen_add_tl(a1, a1, t1);
3337         }
3338         tcg_gen_extract_tl(a0, a0, 0, 16);
3339         tcg_gen_shli_tl(a1, a1, 16);
3340         tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0);
3341     }
3342 
3343     if (XRd != 0) {
3344         TCGv a0 = tcg_temp_new();
3345         TCGv a1 = tcg_temp_new();
3346 
3347         tcg_gen_extract_tl(t0, t3,  0, 16);
3348         tcg_gen_extract_tl(t1, t3, 16, 16);
3349 
3350         gen_load_mxu_gpr(a1, XRd);
3351         tcg_gen_extract_tl(a0, a1,  0, 16);
3352         tcg_gen_extract_tl(a1, a1, 16, 16);
3353 
3354         if (aptn2 & 1) {
3355             tcg_gen_sub_tl(a0, a0, t0);
3356             tcg_gen_sub_tl(a1, a1, t1);
3357         } else {
3358             tcg_gen_add_tl(a0, a0, t0);
3359             tcg_gen_add_tl(a1, a1, t1);
3360         }
3361         tcg_gen_extract_tl(a0, a0, 0, 16);
3362         tcg_gen_shli_tl(a1, a1, 16);
3363         tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0);
3364     }
3365 }
3366 
3367 
3368 /*
3369  * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed
3370  * 16-bit sign extended addition and accumulate.
3371  */
3372 static void gen_mxu_d16asum(DisasContext *ctx)
3373 {
3374     uint32_t aptn2, XRc, XRb, XRa, XRd;
3375 
3376     aptn2 = extract32(ctx->opcode, 24, 2);
3377     XRd   = extract32(ctx->opcode, 18, 4);
3378     XRc   = extract32(ctx->opcode, 14, 4);
3379     XRb   = extract32(ctx->opcode, 10, 4);
3380     XRa   = extract32(ctx->opcode,  6, 4);
3381 
3382     TCGv t0 = tcg_temp_new();
3383     TCGv t1 = tcg_temp_new();
3384     TCGv t2 = tcg_temp_new();
3385     TCGv t3 = tcg_temp_new();
3386 
3387     gen_load_mxu_gpr(t2, XRb);
3388     gen_load_mxu_gpr(t3, XRc);
3389 
3390     if (XRa != 0) {
3391         tcg_gen_sextract_tl(t0, t2,  0, 16);
3392         tcg_gen_sextract_tl(t1, t2, 16, 16);
3393         tcg_gen_add_tl(t0, t0, t1);
3394         if (aptn2 & 2) {
3395             tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3396         } else {
3397             tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3398         }
3399     }
3400 
3401     if (XRd != 0) {
3402         tcg_gen_sextract_tl(t0, t3,  0, 16);
3403         tcg_gen_sextract_tl(t1, t3, 16, 16);
3404         tcg_gen_add_tl(t0, t0, t1);
3405         if (aptn2 & 1) {
3406             tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
3407         } else {
3408             tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
3409         }
3410     }
3411 }
3412 
3413 /*
3414  * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double
3415  * 32 bit pattern addition/subtraction, set carry.
3416  *
3417  * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double
3418  * 32 bit pattern addition/subtraction with carry.
3419  */
3420 static void gen_mxu_d32add(DisasContext *ctx)
3421 {
3422     uint32_t aptn2, addc, XRc, XRb, XRa, XRd;
3423 
3424     aptn2 = extract32(ctx->opcode, 24, 2);
3425     addc  = extract32(ctx->opcode, 22, 2);
3426     XRd   = extract32(ctx->opcode, 18, 4);
3427     XRc   = extract32(ctx->opcode, 14, 4);
3428     XRb   = extract32(ctx->opcode, 10, 4);
3429     XRa   = extract32(ctx->opcode,  6, 4);
3430 
3431     TCGv t0 = tcg_temp_new();
3432     TCGv t1 = tcg_temp_new();
3433     TCGv t2 = tcg_temp_new();
3434     TCGv cr = tcg_temp_new();
3435 
3436     if (unlikely(addc > 1)) {
3437         /* opcode incorrect -> do nothing */
3438     } else if (addc == 1) {
3439         if (unlikely(XRa == 0 && XRd == 0)) {
3440             /* destinations are zero register -> do nothing */
3441         } else {
3442             /* FIXME ??? What if XRa == XRd ??? */
3443             /* aptn2 is unused here */
3444             gen_load_mxu_gpr(t0, XRb);
3445             gen_load_mxu_gpr(t1, XRc);
3446             gen_load_mxu_cr(cr);
3447             if (XRa != 0) {
3448                 tcg_gen_extract_tl(t2, cr, 31, 1);
3449                 tcg_gen_add_tl(t0, t0, t2);
3450                 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3451             }
3452             if (XRd != 0) {
3453                 tcg_gen_extract_tl(t2, cr, 30, 1);
3454                 tcg_gen_add_tl(t1, t1, t2);
3455                 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3456             }
3457         }
3458     } else if (unlikely(XRa == 0 && XRd == 0)) {
3459         /* destinations are zero register -> do nothing */
3460     } else {
3461         /* common case */
3462         /* FIXME ??? What if XRa == XRd ??? */
3463         TCGv carry = tcg_temp_new();
3464 
3465         gen_load_mxu_gpr(t0, XRb);
3466         gen_load_mxu_gpr(t1, XRc);
3467         gen_load_mxu_cr(cr);
3468         if (XRa != 0) {
3469             if (aptn2 & 2) {
3470                 tcg_gen_sub_i32(t2, t0, t1);
3471                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
3472             } else {
3473                 tcg_gen_add_i32(t2, t0, t1);
3474                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
3475             }
3476             tcg_gen_andi_tl(cr, cr, 0x7fffffff);
3477             tcg_gen_shli_tl(carry, carry, 31);
3478             tcg_gen_or_tl(cr, cr, carry);
3479             gen_store_mxu_gpr(t2, XRa);
3480         }
3481         if (XRd != 0) {
3482             if (aptn2 & 1) {
3483                 tcg_gen_sub_i32(t2, t0, t1);
3484                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
3485             } else {
3486                 tcg_gen_add_i32(t2, t0, t1);
3487                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
3488             }
3489             tcg_gen_andi_tl(cr, cr, 0xbfffffff);
3490             tcg_gen_shli_tl(carry, carry, 30);
3491             tcg_gen_or_tl(cr, cr, carry);
3492             gen_store_mxu_gpr(t2, XRd);
3493         }
3494         gen_store_mxu_cr(cr);
3495     }
3496 }
3497 
3498 /*
3499  * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double
3500  * 32 bit pattern addition/subtraction and accumulate.
3501  */
3502 static void gen_mxu_d32acc(DisasContext *ctx)
3503 {
3504     uint32_t aptn2, XRc, XRb, XRa, XRd;
3505 
3506     aptn2 = extract32(ctx->opcode, 24, 2);
3507     XRd   = extract32(ctx->opcode, 18, 4);
3508     XRc   = extract32(ctx->opcode, 14, 4);
3509     XRb   = extract32(ctx->opcode, 10, 4);
3510     XRa   = extract32(ctx->opcode,  6, 4);
3511 
3512     TCGv t0 = tcg_temp_new();
3513     TCGv t1 = tcg_temp_new();
3514     TCGv t2 = tcg_temp_new();
3515 
3516     if (unlikely(XRa == 0 && XRd == 0)) {
3517         /* destinations are zero register -> do nothing */
3518     } else {
3519         /* common case */
3520         gen_load_mxu_gpr(t0, XRb);
3521         gen_load_mxu_gpr(t1, XRc);
3522         if (XRa != 0) {
3523             if (aptn2 & 2) {
3524                 tcg_gen_sub_tl(t2, t0, t1);
3525             } else {
3526                 tcg_gen_add_tl(t2, t0, t1);
3527             }
3528             tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3529         }
3530         if (XRd != 0) {
3531             if (aptn2 & 1) {
3532                 tcg_gen_sub_tl(t2, t0, t1);
3533             } else {
3534                 tcg_gen_add_tl(t2, t0, t1);
3535             }
3536             tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3537         }
3538     }
3539 }
3540 
3541 /*
3542  * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double
3543  * 32 bit pattern addition/subtraction and accumulate.
3544  */
3545 static void gen_mxu_d32accm(DisasContext *ctx)
3546 {
3547     uint32_t aptn2, XRc, XRb, XRa, XRd;
3548 
3549     aptn2 = extract32(ctx->opcode, 24, 2);
3550     XRd   = extract32(ctx->opcode, 18, 4);
3551     XRc   = extract32(ctx->opcode, 14, 4);
3552     XRb   = extract32(ctx->opcode, 10, 4);
3553     XRa   = extract32(ctx->opcode,  6, 4);
3554 
3555     TCGv t0 = tcg_temp_new();
3556     TCGv t1 = tcg_temp_new();
3557     TCGv t2 = tcg_temp_new();
3558 
3559     if (unlikely(XRa == 0 && XRd == 0)) {
3560         /* destinations are zero register -> do nothing */
3561     } else {
3562         /* common case */
3563         gen_load_mxu_gpr(t0, XRb);
3564         gen_load_mxu_gpr(t1, XRc);
3565         if (XRa != 0) {
3566             tcg_gen_add_tl(t2, t0, t1);
3567             if (aptn2 & 2) {
3568                 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3569             } else {
3570                 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3571             }
3572         }
3573         if (XRd != 0) {
3574             tcg_gen_sub_tl(t2, t0, t1);
3575             if (aptn2 & 1) {
3576                 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3577             } else {
3578                 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3579             }
3580         }
3581     }
3582 }
3583 
3584 /*
3585  * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double
3586  * 32 bit pattern addition/subtraction.
3587  */
3588 static void gen_mxu_d32asum(DisasContext *ctx)
3589 {
3590     uint32_t aptn2, XRc, XRb, XRa, XRd;
3591 
3592     aptn2 = extract32(ctx->opcode, 24, 2);
3593     XRd   = extract32(ctx->opcode, 18, 4);
3594     XRc   = extract32(ctx->opcode, 14, 4);
3595     XRb   = extract32(ctx->opcode, 10, 4);
3596     XRa   = extract32(ctx->opcode,  6, 4);
3597 
3598     TCGv t0 = tcg_temp_new();
3599     TCGv t1 = tcg_temp_new();
3600 
3601     if (unlikely(XRa == 0 && XRd == 0)) {
3602         /* destinations are zero register -> do nothing */
3603     } else {
3604         /* common case */
3605         gen_load_mxu_gpr(t0, XRb);
3606         gen_load_mxu_gpr(t1, XRc);
3607         if (XRa != 0) {
3608             if (aptn2 & 2) {
3609                 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3610             } else {
3611                 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3612             }
3613         }
3614         if (XRd != 0) {
3615             if (aptn2 & 1) {
3616                 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3617             } else {
3618                 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3619             }
3620         }
3621     }
3622 }
3623 
3624 /*
3625  *                 MXU instruction category: Miscellaneous
3626  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3627  *
3628  *               S32EXTR      S32LUI
3629  *               S32EXTRV
3630  *                            Q16SAT
3631  *                            Q16SCOP
3632  */
3633 
3634 /*
3635  *  S32EXTR XRa, XRd, rs, bits5
3636  *    Extract bits5 bits from 64-bit pair {XRa:XRd}
3637  *    starting from rs[4:0] offset and put to the XRa.
3638  */
3639 static void gen_mxu_s32extr(DisasContext *ctx)
3640 {
3641     TCGv t0, t1, t2, t3;
3642     uint32_t XRa, XRd, rs, bits5;
3643 
3644     t0 = tcg_temp_new();
3645     t1 = tcg_temp_new();
3646     t2 = tcg_temp_new();
3647     t3 = tcg_temp_new();
3648 
3649     XRa   = extract32(ctx->opcode,  6, 4);
3650     XRd   = extract32(ctx->opcode, 10, 4);
3651     bits5 = extract32(ctx->opcode, 16, 5);
3652     rs    = extract32(ctx->opcode, 21, 5);
3653 
3654     /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */
3655     /* {XRa} = extract({tmp}, 0, bits5); */
3656     if (bits5 > 0) {
3657         TCGLabel *l_xra_only = gen_new_label();
3658         TCGLabel *l_done = gen_new_label();
3659 
3660         gen_load_mxu_gpr(t0, XRd);
3661         gen_load_mxu_gpr(t1, XRa);
3662         gen_load_gpr(t2, rs);
3663         tcg_gen_andi_tl(t2, t2, 0x1f);
3664         tcg_gen_subfi_tl(t2, 32, t2);
3665         tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only);
3666         tcg_gen_subfi_tl(t2, bits5, t2);
3667         tcg_gen_subfi_tl(t3, 32, t2);
3668         tcg_gen_shr_tl(t0, t0, t3);
3669         tcg_gen_shl_tl(t1, t1, t2);
3670         tcg_gen_or_tl(t0, t0, t1);
3671         tcg_gen_br(l_done);
3672         gen_set_label(l_xra_only);
3673         tcg_gen_subi_tl(t2, t2, bits5);
3674         tcg_gen_shr_tl(t0, t1, t2);
3675         gen_set_label(l_done);
3676         tcg_gen_extract_tl(t0, t0, 0, bits5);
3677     } else {
3678         /* unspecified behavior but matches tests on real hardware*/
3679         tcg_gen_movi_tl(t0, 0);
3680     }
3681     gen_store_mxu_gpr(t0, XRa);
3682 }
3683 
3684 /*
3685  *  S32EXTRV XRa, XRd, rs, rt
3686  *    Extract rt[4:0] bits from 64-bit pair {XRa:XRd}
3687  *    starting from rs[4:0] offset and put to the XRa.
3688  */
3689 static void gen_mxu_s32extrv(DisasContext *ctx)
3690 {
3691     TCGv t0, t1, t2, t3, t4;
3692     uint32_t XRa, XRd, rs, rt;
3693 
3694     t0 = tcg_temp_new();
3695     t1 = tcg_temp_new();
3696     t2 = tcg_temp_new();
3697     t3 = tcg_temp_new();
3698     t4 = tcg_temp_new();
3699     TCGLabel *l_xra_only = gen_new_label();
3700     TCGLabel *l_done = gen_new_label();
3701     TCGLabel *l_zero = gen_new_label();
3702     TCGLabel *l_extract = gen_new_label();
3703 
3704     XRa = extract32(ctx->opcode,  6, 4);
3705     XRd = extract32(ctx->opcode, 10, 4);
3706     rt  = extract32(ctx->opcode, 16, 5);
3707     rs  = extract32(ctx->opcode, 21, 5);
3708 
3709     /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */
3710     gen_load_mxu_gpr(t0, XRd);
3711     gen_load_mxu_gpr(t1, XRa);
3712     gen_load_gpr(t2, rs);
3713     gen_load_gpr(t4, rt);
3714     tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero);
3715     tcg_gen_andi_tl(t2, t2, 0x1f);
3716     tcg_gen_subfi_tl(t2, 32, t2);
3717     tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only);
3718     tcg_gen_sub_tl(t2, t4, t2);
3719     tcg_gen_subfi_tl(t3, 32, t2);
3720     tcg_gen_shr_tl(t0, t0, t3);
3721     tcg_gen_shl_tl(t1, t1, t2);
3722     tcg_gen_or_tl(t0, t0, t1);
3723     tcg_gen_br(l_extract);
3724 
3725     gen_set_label(l_xra_only);
3726     tcg_gen_sub_tl(t2, t2, t4);
3727     tcg_gen_shr_tl(t0, t1, t2);
3728     tcg_gen_br(l_extract);
3729 
3730     /* unspecified behavior but matches tests on real hardware*/
3731     gen_set_label(l_zero);
3732     tcg_gen_movi_tl(t0, 0);
3733     tcg_gen_br(l_done);
3734 
3735     /* {XRa} = extract({tmp}, 0, rt) */
3736     gen_set_label(l_extract);
3737     tcg_gen_subfi_tl(t4, 32, t4);
3738     tcg_gen_shl_tl(t0, t0, t4);
3739     tcg_gen_shr_tl(t0, t0, t4);
3740 
3741     gen_set_label(l_done);
3742     gen_store_mxu_gpr(t0, XRa);
3743 }
3744 
3745 /*
3746  *  S32LUI XRa, S8, optn3
3747  *    Permutate the immediate S8 value to form a word
3748  *    to update XRa.
3749  */
3750 static void gen_mxu_s32lui(DisasContext *ctx)
3751 {
3752     uint32_t XRa, s8, optn3, pad;
3753 
3754     XRa   = extract32(ctx->opcode,  6, 4);
3755     s8    = extract32(ctx->opcode, 10, 8);
3756     pad   = extract32(ctx->opcode, 21, 2);
3757     optn3 = extract32(ctx->opcode, 23, 3);
3758 
3759     if (unlikely(pad != 0)) {
3760         /* opcode padding incorrect -> do nothing */
3761     } else if (unlikely(XRa == 0)) {
3762         /* destination is zero register -> do nothing */
3763     } else {
3764         uint32_t s16;
3765         TCGv t0 = tcg_temp_new();
3766 
3767         switch (optn3) {
3768         case 0:
3769             tcg_gen_movi_tl(t0, s8);
3770             break;
3771         case 1:
3772             tcg_gen_movi_tl(t0, s8 << 8);
3773             break;
3774         case 2:
3775             tcg_gen_movi_tl(t0, s8 << 16);
3776             break;
3777         case 3:
3778             tcg_gen_movi_tl(t0, s8 << 24);
3779             break;
3780         case 4:
3781             tcg_gen_movi_tl(t0, (s8 << 16) | s8);
3782             break;
3783         case 5:
3784             tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8));
3785             break;
3786         case 6:
3787             s16 = (uint16_t)(int16_t)(int8_t)s8;
3788             tcg_gen_movi_tl(t0, (s16 << 16) | s16);
3789             break;
3790         case 7:
3791             tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8);
3792             break;
3793         }
3794         gen_store_mxu_gpr(t0, XRa);
3795     }
3796 }
3797 
3798 /*
3799  *  Q16SAT XRa, XRb, XRc
3800  *  Packs four 16-bit signed integers in XRb and XRc to
3801  *  four saturated unsigned 8-bit into XRa.
3802  *
3803  */
3804 static void gen_mxu_Q16SAT(DisasContext *ctx)
3805 {
3806     uint32_t pad, XRc, XRb, XRa;
3807 
3808     pad = extract32(ctx->opcode, 21, 3);
3809     XRc = extract32(ctx->opcode, 14, 4);
3810     XRb = extract32(ctx->opcode, 10, 4);
3811     XRa = extract32(ctx->opcode,  6, 4);
3812 
3813     if (unlikely(pad != 0)) {
3814         /* opcode padding incorrect -> do nothing */
3815     } else if (unlikely(XRa == 0)) {
3816         /* destination is zero register -> do nothing */
3817     } else {
3818         /* the most general case */
3819         TCGv t0 = tcg_temp_new();
3820         TCGv t1 = tcg_temp_new();
3821         TCGv t2 = tcg_temp_new();
3822 
3823         tcg_gen_movi_tl(t2, 0);
3824         if (XRb != 0) {
3825             TCGLabel *l_less_hi = gen_new_label();
3826             TCGLabel *l_less_lo = gen_new_label();
3827             TCGLabel *l_lo = gen_new_label();
3828             TCGLabel *l_greater_hi = gen_new_label();
3829             TCGLabel *l_greater_lo = gen_new_label();
3830             TCGLabel *l_done = gen_new_label();
3831 
3832             tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16);
3833             tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
3834             tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
3835             tcg_gen_br(l_lo);
3836             gen_set_label(l_less_hi);
3837             tcg_gen_movi_tl(t0, 0);
3838             tcg_gen_br(l_lo);
3839             gen_set_label(l_greater_hi);
3840             tcg_gen_movi_tl(t0, 255);
3841 
3842             gen_set_label(l_lo);
3843             tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16);
3844             tcg_gen_sari_tl(t1, t1, 16);
3845             tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
3846             tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
3847             tcg_gen_br(l_done);
3848             gen_set_label(l_less_lo);
3849             tcg_gen_movi_tl(t1, 0);
3850             tcg_gen_br(l_done);
3851             gen_set_label(l_greater_lo);
3852             tcg_gen_movi_tl(t1, 255);
3853 
3854             gen_set_label(l_done);
3855             tcg_gen_shli_tl(t2, t0, 24);
3856             tcg_gen_shli_tl(t1, t1, 16);
3857             tcg_gen_or_tl(t2, t2, t1);
3858         }
3859 
3860         if (XRc != 0) {
3861             TCGLabel *l_less_hi = gen_new_label();
3862             TCGLabel *l_less_lo = gen_new_label();
3863             TCGLabel *l_lo = gen_new_label();
3864             TCGLabel *l_greater_hi = gen_new_label();
3865             TCGLabel *l_greater_lo = gen_new_label();
3866             TCGLabel *l_done = gen_new_label();
3867 
3868             tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16);
3869             tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
3870             tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
3871             tcg_gen_br(l_lo);
3872             gen_set_label(l_less_hi);
3873             tcg_gen_movi_tl(t0, 0);
3874             tcg_gen_br(l_lo);
3875             gen_set_label(l_greater_hi);
3876             tcg_gen_movi_tl(t0, 255);
3877 
3878             gen_set_label(l_lo);
3879             tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16);
3880             tcg_gen_sari_tl(t1, t1, 16);
3881             tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
3882             tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
3883             tcg_gen_br(l_done);
3884             gen_set_label(l_less_lo);
3885             tcg_gen_movi_tl(t1, 0);
3886             tcg_gen_br(l_done);
3887             gen_set_label(l_greater_lo);
3888             tcg_gen_movi_tl(t1, 255);
3889 
3890             gen_set_label(l_done);
3891             tcg_gen_shli_tl(t0, t0, 8);
3892             tcg_gen_or_tl(t2, t2, t0);
3893             tcg_gen_or_tl(t2, t2, t1);
3894         }
3895         gen_store_mxu_gpr(t2, XRa);
3896     }
3897 }
3898 
3899 /*
3900  *  Q16SCOP XRa, XRd, XRb, XRc
3901  *    Determine sign of quad packed 16-bit signed values
3902  *    in XRb and XRc put result in XRa and XRd respectively.
3903  */
3904 static void gen_mxu_q16scop(DisasContext *ctx)
3905 {
3906     uint32_t XRd, XRc, XRb, XRa;
3907 
3908     XRd  = extract32(ctx->opcode, 18, 4);
3909     XRc  = extract32(ctx->opcode, 14, 4);
3910     XRb  = extract32(ctx->opcode, 10, 4);
3911     XRa  = extract32(ctx->opcode,  6, 4);
3912 
3913     TCGv t0 = tcg_temp_new();
3914     TCGv t1 = tcg_temp_new();
3915     TCGv t2 = tcg_temp_new();
3916     TCGv t3 = tcg_temp_new();
3917     TCGv t4 = tcg_temp_new();
3918 
3919     TCGLabel *l_b_hi_lt = gen_new_label();
3920     TCGLabel *l_b_hi_gt = gen_new_label();
3921     TCGLabel *l_b_lo = gen_new_label();
3922     TCGLabel *l_b_lo_lt = gen_new_label();
3923     TCGLabel *l_c_hi = gen_new_label();
3924     TCGLabel *l_c_hi_lt = gen_new_label();
3925     TCGLabel *l_c_hi_gt = gen_new_label();
3926     TCGLabel *l_c_lo = gen_new_label();
3927     TCGLabel *l_c_lo_lt = gen_new_label();
3928     TCGLabel *l_done = gen_new_label();
3929 
3930     gen_load_mxu_gpr(t0, XRb);
3931     gen_load_mxu_gpr(t1, XRc);
3932 
3933     tcg_gen_sextract_tl(t2, t0, 16, 16);
3934     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt);
3935     tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt);
3936     tcg_gen_movi_tl(t3, 0);
3937     tcg_gen_br(l_b_lo);
3938     gen_set_label(l_b_hi_lt);
3939     tcg_gen_movi_tl(t3, 0xffff0000);
3940     tcg_gen_br(l_b_lo);
3941     gen_set_label(l_b_hi_gt);
3942     tcg_gen_movi_tl(t3, 0x00010000);
3943 
3944     gen_set_label(l_b_lo);
3945     tcg_gen_sextract_tl(t2, t0, 0, 16);
3946     tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi);
3947     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt);
3948     tcg_gen_ori_tl(t3, t3, 0x00000001);
3949     tcg_gen_br(l_c_hi);
3950     gen_set_label(l_b_lo_lt);
3951     tcg_gen_ori_tl(t3, t3, 0x0000ffff);
3952     tcg_gen_br(l_c_hi);
3953 
3954     gen_set_label(l_c_hi);
3955     tcg_gen_sextract_tl(t2, t1, 16, 16);
3956     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt);
3957     tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt);
3958     tcg_gen_movi_tl(t4, 0);
3959     tcg_gen_br(l_c_lo);
3960     gen_set_label(l_c_hi_lt);
3961     tcg_gen_movi_tl(t4, 0xffff0000);
3962     tcg_gen_br(l_c_lo);
3963     gen_set_label(l_c_hi_gt);
3964     tcg_gen_movi_tl(t4, 0x00010000);
3965 
3966     gen_set_label(l_c_lo);
3967     tcg_gen_sextract_tl(t2, t1, 0, 16);
3968     tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done);
3969     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt);
3970     tcg_gen_ori_tl(t4, t4, 0x00000001);
3971     tcg_gen_br(l_done);
3972     gen_set_label(l_c_lo_lt);
3973     tcg_gen_ori_tl(t4, t4, 0x0000ffff);
3974 
3975     gen_set_label(l_done);
3976     gen_store_mxu_gpr(t3, XRa);
3977     gen_store_mxu_gpr(t4, XRd);
3978 }
3979 
3980 /*
3981  *  S32SFL XRa, XRd, XRb, XRc
3982  *    Shuffle bytes according to one of four patterns.
3983  */
3984 static void gen_mxu_s32sfl(DisasContext *ctx)
3985 {
3986     uint32_t XRd, XRc, XRb, XRa, ptn2;
3987 
3988     XRd  = extract32(ctx->opcode, 18, 4);
3989     XRc  = extract32(ctx->opcode, 14, 4);
3990     XRb  = extract32(ctx->opcode, 10, 4);
3991     XRa  = extract32(ctx->opcode,  6, 4);
3992     ptn2 = extract32(ctx->opcode, 24, 2);
3993 
3994     TCGv t0 = tcg_temp_new();
3995     TCGv t1 = tcg_temp_new();
3996     TCGv t2 = tcg_temp_new();
3997     TCGv t3 = tcg_temp_new();
3998 
3999     gen_load_mxu_gpr(t0, XRb);
4000     gen_load_mxu_gpr(t1, XRc);
4001 
4002     switch (ptn2) {
4003     case 0:
4004         tcg_gen_andi_tl(t2, t0, 0xff000000);
4005         tcg_gen_andi_tl(t3, t1, 0x000000ff);
4006         tcg_gen_deposit_tl(t3, t3, t0,  8, 8);
4007         tcg_gen_shri_tl(t0, t0,  8);
4008         tcg_gen_shri_tl(t1, t1,  8);
4009         tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
4010         tcg_gen_deposit_tl(t3, t3, t1, 16, 8);
4011         tcg_gen_shri_tl(t0, t0,  8);
4012         tcg_gen_shri_tl(t1, t1,  8);
4013         tcg_gen_deposit_tl(t2, t2, t0,  8, 8);
4014         tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
4015         tcg_gen_shri_tl(t1, t1,  8);
4016         tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
4017         break;
4018     case 1:
4019         tcg_gen_andi_tl(t2, t0, 0xff000000);
4020         tcg_gen_andi_tl(t3, t1, 0x000000ff);
4021         tcg_gen_deposit_tl(t3, t3, t0, 16, 8);
4022         tcg_gen_shri_tl(t0, t0,  8);
4023         tcg_gen_shri_tl(t1, t1,  8);
4024         tcg_gen_deposit_tl(t2, t2, t0, 16, 8);
4025         tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
4026         tcg_gen_shri_tl(t0, t0,  8);
4027         tcg_gen_shri_tl(t1, t1,  8);
4028         tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
4029         tcg_gen_deposit_tl(t3, t3, t1,  8, 8);
4030         tcg_gen_shri_tl(t1, t1,  8);
4031         tcg_gen_deposit_tl(t2, t2, t1,  8, 8);
4032         break;
4033     case 2:
4034         tcg_gen_andi_tl(t2, t0, 0xff00ff00);
4035         tcg_gen_andi_tl(t3, t1, 0x00ff00ff);
4036         tcg_gen_deposit_tl(t3, t3, t0,  8, 8);
4037         tcg_gen_shri_tl(t0, t0, 16);
4038         tcg_gen_shri_tl(t1, t1,  8);
4039         tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
4040         tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
4041         tcg_gen_shri_tl(t1, t1, 16);
4042         tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
4043         break;
4044     case 3:
4045         tcg_gen_andi_tl(t2, t0, 0xffff0000);
4046         tcg_gen_andi_tl(t3, t1, 0x0000ffff);
4047         tcg_gen_shri_tl(t1, t1, 16);
4048         tcg_gen_deposit_tl(t2, t2, t1,  0, 16);
4049         tcg_gen_deposit_tl(t3, t3, t0, 16, 16);
4050         break;
4051     }
4052 
4053     gen_store_mxu_gpr(t2, XRa);
4054     gen_store_mxu_gpr(t3, XRd);
4055 }
4056 
4057 /*
4058  *  Q8SAD XRa, XRd, XRb, XRc
4059  *    Typical SAD operation for motion estimation.
4060  */
4061 static void gen_mxu_q8sad(DisasContext *ctx)
4062 {
4063     uint32_t XRd, XRc, XRb, XRa;
4064 
4065     XRd = extract32(ctx->opcode, 18, 4);
4066     XRc = extract32(ctx->opcode, 14, 4);
4067     XRb = extract32(ctx->opcode, 10, 4);
4068     XRa = extract32(ctx->opcode,  6, 4);
4069 
4070     TCGv t0 = tcg_temp_new();
4071     TCGv t1 = tcg_temp_new();
4072     TCGv t2 = tcg_temp_new();
4073     TCGv t3 = tcg_temp_new();
4074     TCGv t4 = tcg_temp_new();
4075     TCGv t5 = tcg_temp_new();
4076 
4077     gen_load_mxu_gpr(t2, XRb);
4078     gen_load_mxu_gpr(t3, XRc);
4079     gen_load_mxu_gpr(t5, XRd);
4080     tcg_gen_movi_tl(t4, 0);
4081 
4082     for (int i = 0; i < 4; i++) {
4083         tcg_gen_andi_tl(t0, t2, 0xff);
4084         tcg_gen_andi_tl(t1, t3, 0xff);
4085         tcg_gen_sub_tl(t0, t0, t1);
4086         tcg_gen_abs_tl(t0, t0);
4087         tcg_gen_add_tl(t4, t4, t0);
4088         if (i < 3) {
4089             tcg_gen_shri_tl(t2, t2, 8);
4090             tcg_gen_shri_tl(t3, t3, 8);
4091         }
4092     }
4093     tcg_gen_add_tl(t5, t5, t4);
4094     gen_store_mxu_gpr(t4, XRa);
4095     gen_store_mxu_gpr(t5, XRd);
4096 }
4097 
4098 /*
4099  *                 MXU instruction category: align
4100  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4101  *
4102  *                       S32ALN     S32ALNI
4103  */
4104 
4105 /*
4106  *  S32ALNI XRc, XRb, XRa, optn3
4107  *    Arrange bytes from XRb and XRc according to one of five sets of
4108  *    rules determined by optn3, and place the result in XRa.
4109  */
4110 static void gen_mxu_S32ALNI(DisasContext *ctx)
4111 {
4112     uint32_t optn3, pad, XRc, XRb, XRa;
4113 
4114     optn3 = extract32(ctx->opcode,  23, 3);
4115     pad   = extract32(ctx->opcode,  21, 2);
4116     XRc   = extract32(ctx->opcode, 14, 4);
4117     XRb   = extract32(ctx->opcode, 10, 4);
4118     XRa   = extract32(ctx->opcode,  6, 4);
4119 
4120     if (unlikely(pad != 0)) {
4121         /* opcode padding incorrect -> do nothing */
4122     } else if (unlikely(XRa == 0)) {
4123         /* destination is zero register -> do nothing */
4124     } else if (unlikely((XRb == 0) && (XRc == 0))) {
4125         /* both operands zero registers -> just set destination to all 0s */
4126         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4127     } else if (unlikely(XRb == 0)) {
4128         /* XRb zero register -> just appropriatelly shift XRc into XRa */
4129         switch (optn3) {
4130         case MXU_OPTN3_PTN0:
4131             tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4132             break;
4133         case MXU_OPTN3_PTN1:
4134         case MXU_OPTN3_PTN2:
4135         case MXU_OPTN3_PTN3:
4136             tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1],
4137                              8 * (4 - optn3));
4138             break;
4139         case MXU_OPTN3_PTN4:
4140             tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
4141             break;
4142         }
4143     } else if (unlikely(XRc == 0)) {
4144         /* XRc zero register -> just appropriatelly shift XRb into XRa */
4145         switch (optn3) {
4146         case MXU_OPTN3_PTN0:
4147             tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4148             break;
4149         case MXU_OPTN3_PTN1:
4150         case MXU_OPTN3_PTN2:
4151         case MXU_OPTN3_PTN3:
4152             tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
4153             break;
4154         case MXU_OPTN3_PTN4:
4155             tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4156             break;
4157         }
4158     } else if (unlikely(XRb == XRc)) {
4159         /* both operands same -> just rotation or moving from any of them */
4160         switch (optn3) {
4161         case MXU_OPTN3_PTN0:
4162         case MXU_OPTN3_PTN4:
4163             tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4164             break;
4165         case MXU_OPTN3_PTN1:
4166         case MXU_OPTN3_PTN2:
4167         case MXU_OPTN3_PTN3:
4168             tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
4169             break;
4170         }
4171     } else {
4172         /* the most general case */
4173         switch (optn3) {
4174         case MXU_OPTN3_PTN0:
4175             {
4176                 /*                                         */
4177                 /*         XRb                XRc          */
4178                 /*  +---------------+                      */
4179                 /*  | A   B   C   D |    E   F   G   H     */
4180                 /*  +-------+-------+                      */
4181                 /*          |                              */
4182                 /*         XRa                             */
4183                 /*                                         */
4184 
4185                 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4186             }
4187             break;
4188         case MXU_OPTN3_PTN1:
4189             {
4190                 /*                                         */
4191                 /*         XRb                 XRc         */
4192                 /*      +-------------------+              */
4193                 /*    A | B   C   D       E | F   G   H    */
4194                 /*      +---------+---------+              */
4195                 /*                |                        */
4196                 /*               XRa                       */
4197                 /*                                         */
4198 
4199                 TCGv_i32 t0 = tcg_temp_new();
4200                 TCGv_i32 t1 = tcg_temp_new();
4201 
4202                 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF);
4203                 tcg_gen_shli_i32(t0, t0, 8);
4204 
4205                 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
4206                 tcg_gen_shri_i32(t1, t1, 24);
4207 
4208                 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4209             }
4210             break;
4211         case MXU_OPTN3_PTN2:
4212             {
4213                 /*                                         */
4214                 /*         XRb                 XRc         */
4215                 /*          +-------------------+          */
4216                 /*    A   B | C   D       E   F | G   H    */
4217                 /*          +---------+---------+          */
4218                 /*                    |                    */
4219                 /*                   XRa                   */
4220                 /*                                         */
4221 
4222                 TCGv_i32 t0 = tcg_temp_new();
4223                 TCGv_i32 t1 = tcg_temp_new();
4224 
4225                 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
4226                 tcg_gen_shli_i32(t0, t0, 16);
4227 
4228                 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
4229                 tcg_gen_shri_i32(t1, t1, 16);
4230 
4231                 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4232             }
4233             break;
4234         case MXU_OPTN3_PTN3:
4235             {
4236                 /*                                         */
4237                 /*         XRb                 XRc         */
4238                 /*              +-------------------+      */
4239                 /*    A   B   C | D       E   F   G | H    */
4240                 /*              +---------+---------+      */
4241                 /*                        |                */
4242                 /*                       XRa               */
4243                 /*                                         */
4244 
4245                 TCGv_i32 t0 = tcg_temp_new();
4246                 TCGv_i32 t1 = tcg_temp_new();
4247 
4248                 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF);
4249                 tcg_gen_shli_i32(t0, t0, 24);
4250 
4251                 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00);
4252                 tcg_gen_shri_i32(t1, t1, 8);
4253 
4254                 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4255             }
4256             break;
4257         case MXU_OPTN3_PTN4:
4258             {
4259                 /*                                         */
4260                 /*         XRb                 XRc         */
4261                 /*                     +---------------+   */
4262                 /*    A   B   C   D    | E   F   G   H |   */
4263                 /*                     +-------+-------+   */
4264                 /*                             |           */
4265                 /*                            XRa          */
4266                 /*                                         */
4267 
4268                 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
4269             }
4270             break;
4271         }
4272     }
4273 }
4274 
4275 /*
4276  *  S32ALN XRc, XRb, XRa, rs
4277  *    Arrange bytes from XRb and XRc according to one of five sets of
4278  *    rules determined by rs[2:0], and place the result in XRa.
4279  */
4280 static void gen_mxu_S32ALN(DisasContext *ctx)
4281 {
4282     uint32_t rs, XRc, XRb, XRa;
4283 
4284     rs  = extract32(ctx->opcode, 21, 5);
4285     XRc = extract32(ctx->opcode, 14, 4);
4286     XRb = extract32(ctx->opcode, 10, 4);
4287     XRa = extract32(ctx->opcode,  6, 4);
4288 
4289     if (unlikely(XRa == 0)) {
4290         /* destination is zero register -> do nothing */
4291     } else if (unlikely((XRb == 0) && (XRc == 0))) {
4292         /* both operands zero registers -> just set destination to all 0s */
4293         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
4294     } else {
4295         /* the most general case */
4296         TCGv t0 = tcg_temp_new();
4297         TCGv t1 = tcg_temp_new();
4298         TCGv t2 = tcg_temp_new();
4299         TCGv t3 = tcg_temp_new();
4300         TCGLabel *l_exit = gen_new_label();
4301         TCGLabel *l_b_only = gen_new_label();
4302         TCGLabel *l_c_only = gen_new_label();
4303 
4304         gen_load_mxu_gpr(t0, XRb);
4305         gen_load_mxu_gpr(t1, XRc);
4306         gen_load_gpr(t2, rs);
4307         tcg_gen_andi_tl(t2, t2, 0x07);
4308 
4309         /* do nothing for undefined cases */
4310         tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit);
4311 
4312         tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only);
4313         tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only);
4314 
4315         tcg_gen_shli_tl(t2, t2, 3);
4316         tcg_gen_subfi_tl(t3, 32, t2);
4317 
4318         tcg_gen_shl_tl(t0, t0, t2);
4319         tcg_gen_shr_tl(t1, t1, t3);
4320         tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1);
4321         tcg_gen_br(l_exit);
4322 
4323         gen_set_label(l_b_only);
4324         gen_store_mxu_gpr(t0, XRa);
4325         tcg_gen_br(l_exit);
4326 
4327         gen_set_label(l_c_only);
4328         gen_store_mxu_gpr(t1, XRa);
4329 
4330         gen_set_label(l_exit);
4331     }
4332 }
4333 
4334 /*
4335  *  S32MADD XRa, XRd, rb, rc
4336  *    32 to 64 bit signed multiply with subsequent add
4337  *    result stored in {XRa, XRd} pair, stain HI/LO.
4338  *  S32MADDU XRa, XRd, rb, rc
4339  *    32 to 64 bit unsigned multiply with subsequent add
4340  *    result stored in {XRa, XRd} pair, stain HI/LO.
4341  *  S32MSUB XRa, XRd, rb, rc
4342  *    32 to 64 bit signed multiply with subsequent subtract
4343  *    result stored in {XRa, XRd} pair, stain HI/LO.
4344  *  S32MSUBU XRa, XRd, rb, rc
4345  *    32 to 64 bit unsigned multiply with subsequent subtract
4346  *    result stored in {XRa, XRd} pair, stain HI/LO.
4347  */
4348 static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns)
4349 {
4350     uint32_t XRa, XRd, Rb, Rc;
4351 
4352     XRa  = extract32(ctx->opcode,  6, 4);
4353     XRd  = extract32(ctx->opcode, 10, 4);
4354     Rb   = extract32(ctx->opcode, 16, 5);
4355     Rc   = extract32(ctx->opcode, 21, 5);
4356 
4357     if (unlikely(Rb == 0 || Rc == 0)) {
4358         /* do nothing because x + 0 * y => x */
4359     } else if (unlikely(XRa == 0 && XRd == 0)) {
4360         /* do nothing because result just dropped */
4361     } else {
4362         TCGv t0 = tcg_temp_new();
4363         TCGv t1 = tcg_temp_new();
4364         TCGv_i64 t2 = tcg_temp_new_i64();
4365         TCGv_i64 t3 = tcg_temp_new_i64();
4366 
4367         gen_load_gpr(t0, Rb);
4368         gen_load_gpr(t1, Rc);
4369 
4370         if (uns) {
4371             tcg_gen_extu_tl_i64(t2, t0);
4372             tcg_gen_extu_tl_i64(t3, t1);
4373         } else {
4374             tcg_gen_ext_tl_i64(t2, t0);
4375             tcg_gen_ext_tl_i64(t3, t1);
4376         }
4377         tcg_gen_mul_i64(t2, t2, t3);
4378 
4379         gen_load_mxu_gpr(t0, XRa);
4380         gen_load_mxu_gpr(t1, XRd);
4381 
4382         tcg_gen_concat_tl_i64(t3, t1, t0);
4383         if (sub) {
4384             tcg_gen_sub_i64(t3, t3, t2);
4385         } else {
4386             tcg_gen_add_i64(t3, t3, t2);
4387         }
4388         gen_move_low32(t1, t3);
4389         gen_move_high32(t0, t3);
4390 
4391         tcg_gen_mov_tl(cpu_HI[0], t0);
4392         tcg_gen_mov_tl(cpu_LO[0], t1);
4393 
4394         gen_store_mxu_gpr(t1, XRd);
4395         gen_store_mxu_gpr(t0, XRa);
4396     }
4397 }
4398 
4399 /*
4400  * Decoding engine for MXU
4401  * =======================
4402  */
4403 
4404 static void decode_opc_mxu__pool00(DisasContext *ctx)
4405 {
4406     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4407 
4408     switch (opcode) {
4409     case OPC_MXU_S32MAX:
4410     case OPC_MXU_S32MIN:
4411         gen_mxu_S32MAX_S32MIN(ctx);
4412         break;
4413     case OPC_MXU_D16MAX:
4414     case OPC_MXU_D16MIN:
4415         gen_mxu_D16MAX_D16MIN(ctx);
4416         break;
4417     case OPC_MXU_Q8MAX:
4418     case OPC_MXU_Q8MIN:
4419         gen_mxu_Q8MAX_Q8MIN(ctx);
4420         break;
4421     case OPC_MXU_Q8SLT:
4422         gen_mxu_q8slt(ctx, false);
4423         break;
4424     case OPC_MXU_Q8SLTU:
4425         gen_mxu_q8slt(ctx, true);
4426         break;
4427     default:
4428         MIPS_INVAL("decode_opc_mxu");
4429         gen_reserved_instruction(ctx);
4430         break;
4431     }
4432 }
4433 
4434 static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx)
4435 {
4436     uint32_t opcode = extract32(ctx->opcode, 0, 6);
4437     uint32_t pad  = extract32(ctx->opcode, 14, 2);
4438 
4439     if (pad != 2) {
4440         /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */
4441         return false;
4442     }
4443 
4444     switch (opcode) {
4445     case OPC_MXU_S32MADD:
4446         gen_mxu_s32madd_sub(ctx, false, false);
4447         break;
4448     case OPC_MXU_S32MADDU:
4449         gen_mxu_s32madd_sub(ctx, false, true);
4450         break;
4451     case OPC_MXU_S32MSUB:
4452         gen_mxu_s32madd_sub(ctx, true, false);
4453         break;
4454     case OPC_MXU_S32MSUBU:
4455         gen_mxu_s32madd_sub(ctx, true, true);
4456         break;
4457     default:
4458         return false;
4459     }
4460     return true;
4461 }
4462 
4463 static void decode_opc_mxu__pool01(DisasContext *ctx)
4464 {
4465     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4466 
4467     switch (opcode) {
4468     case OPC_MXU_S32SLT:
4469         gen_mxu_S32SLT(ctx);
4470         break;
4471     case OPC_MXU_D16SLT:
4472         gen_mxu_D16SLT(ctx);
4473         break;
4474     case OPC_MXU_D16AVG:
4475         gen_mxu_d16avg(ctx, false);
4476         break;
4477     case OPC_MXU_D16AVGR:
4478         gen_mxu_d16avg(ctx, true);
4479         break;
4480     case OPC_MXU_Q8AVG:
4481         gen_mxu_q8avg(ctx, false);
4482         break;
4483     case OPC_MXU_Q8AVGR:
4484         gen_mxu_q8avg(ctx, true);
4485         break;
4486     case OPC_MXU_Q8ADD:
4487         gen_mxu_Q8ADD(ctx);
4488         break;
4489     default:
4490         MIPS_INVAL("decode_opc_mxu");
4491         gen_reserved_instruction(ctx);
4492         break;
4493     }
4494 }
4495 
4496 static void decode_opc_mxu__pool02(DisasContext *ctx)
4497 {
4498     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4499 
4500     switch (opcode) {
4501     case OPC_MXU_S32CPS:
4502         gen_mxu_S32CPS(ctx);
4503         break;
4504     case OPC_MXU_D16CPS:
4505         gen_mxu_D16CPS(ctx);
4506         break;
4507     case OPC_MXU_Q8ABD:
4508         gen_mxu_Q8ABD(ctx);
4509         break;
4510     case OPC_MXU_Q16SAT:
4511         gen_mxu_Q16SAT(ctx);
4512         break;
4513     default:
4514         MIPS_INVAL("decode_opc_mxu");
4515         gen_reserved_instruction(ctx);
4516         break;
4517     }
4518 }
4519 
4520 static void decode_opc_mxu__pool03(DisasContext *ctx)
4521 {
4522     uint32_t opcode = extract32(ctx->opcode, 24, 2);
4523 
4524     switch (opcode) {
4525     case OPC_MXU_D16MULF:
4526         gen_mxu_d16mul(ctx, true, true);
4527         break;
4528     case OPC_MXU_D16MULE:
4529         gen_mxu_d16mul(ctx, true, false);
4530         break;
4531     default:
4532         MIPS_INVAL("decode_opc_mxu");
4533         gen_reserved_instruction(ctx);
4534         break;
4535     }
4536 }
4537 
4538 static void decode_opc_mxu__pool04(DisasContext *ctx)
4539 {
4540     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4541     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4542 
4543     /* Don't care about opcode bits as their meaning is unknown yet */
4544     switch (opcode) {
4545     default:
4546         gen_mxu_s32ldxx(ctx, reversed, false);
4547         break;
4548     }
4549 }
4550 
4551 static void decode_opc_mxu__pool05(DisasContext *ctx)
4552 {
4553     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4554     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4555 
4556     /* Don't care about opcode bits as their meaning is unknown yet */
4557     switch (opcode) {
4558     default:
4559         gen_mxu_s32stxx(ctx, reversed, false);
4560         break;
4561     }
4562 }
4563 
4564 static void decode_opc_mxu__pool06(DisasContext *ctx)
4565 {
4566     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4567     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4568 
4569     switch (opcode) {
4570     case OPC_MXU_S32LDST:
4571     case OPC_MXU_S32LDSTR:
4572         if (strd2 <= 2) {
4573             gen_mxu_s32ldxvx(ctx, opcode, false, strd2);
4574             break;
4575         }
4576         /* fallthrough */
4577     default:
4578         MIPS_INVAL("decode_opc_mxu");
4579         gen_reserved_instruction(ctx);
4580         break;
4581     }
4582 }
4583 
4584 static void decode_opc_mxu__pool07(DisasContext *ctx)
4585 {
4586     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4587     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4588 
4589     switch (opcode) {
4590     case OPC_MXU_S32LDST:
4591     case OPC_MXU_S32LDSTR:
4592         if (strd2 <= 2) {
4593             gen_mxu_s32stxvx(ctx, opcode, false, strd2);
4594             break;
4595         }
4596         /* fallthrough */
4597     default:
4598         MIPS_INVAL("decode_opc_mxu");
4599         gen_reserved_instruction(ctx);
4600         break;
4601     }
4602 }
4603 
4604 static void decode_opc_mxu__pool08(DisasContext *ctx)
4605 {
4606     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4607     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4608 
4609     /* Don't care about opcode bits as their meaning is unknown yet */
4610     switch (opcode) {
4611     default:
4612         gen_mxu_s32ldxx(ctx, reversed, true);
4613         break;
4614     }
4615 }
4616 
4617 static void decode_opc_mxu__pool09(DisasContext *ctx)
4618 {
4619     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4620     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4621 
4622     /* Don't care about opcode bits as their meaning is unknown yet */
4623     switch (opcode) {
4624     default:
4625         gen_mxu_s32stxx(ctx, reversed, true);
4626         break;
4627     }
4628 }
4629 
4630 static void decode_opc_mxu__pool10(DisasContext *ctx)
4631 {
4632     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4633     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4634 
4635     switch (opcode) {
4636     case OPC_MXU_S32LDST:
4637     case OPC_MXU_S32LDSTR:
4638         if (strd2 <= 2) {
4639             gen_mxu_s32ldxvx(ctx, opcode, true, strd2);
4640             break;
4641         }
4642         /* fallthrough */
4643     default:
4644         MIPS_INVAL("decode_opc_mxu");
4645         gen_reserved_instruction(ctx);
4646         break;
4647     }
4648 }
4649 
4650 static void decode_opc_mxu__pool11(DisasContext *ctx)
4651 {
4652     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4653     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4654 
4655     switch (opcode) {
4656     case OPC_MXU_S32LDST:
4657     case OPC_MXU_S32LDSTR:
4658         if (strd2 <= 2) {
4659             gen_mxu_s32stxvx(ctx, opcode, true, strd2);
4660             break;
4661         }
4662         /* fallthrough */
4663     default:
4664         MIPS_INVAL("decode_opc_mxu");
4665         gen_reserved_instruction(ctx);
4666         break;
4667     }
4668 }
4669 
4670 static void decode_opc_mxu__pool12(DisasContext *ctx)
4671 {
4672     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4673 
4674     switch (opcode) {
4675     case OPC_MXU_D32ACC:
4676         gen_mxu_d32acc(ctx);
4677         break;
4678     case OPC_MXU_D32ACCM:
4679         gen_mxu_d32accm(ctx);
4680         break;
4681     case OPC_MXU_D32ASUM:
4682         gen_mxu_d32asum(ctx);
4683         break;
4684     default:
4685         MIPS_INVAL("decode_opc_mxu");
4686         gen_reserved_instruction(ctx);
4687         break;
4688     }
4689 }
4690 
4691 static void decode_opc_mxu__pool13(DisasContext *ctx)
4692 {
4693     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4694 
4695     switch (opcode) {
4696     case OPC_MXU_Q16ACC:
4697         gen_mxu_q16acc(ctx);
4698         break;
4699     case OPC_MXU_Q16ACCM:
4700         gen_mxu_q16accm(ctx);
4701         break;
4702     case OPC_MXU_D16ASUM:
4703         gen_mxu_d16asum(ctx);
4704         break;
4705     default:
4706         MIPS_INVAL("decode_opc_mxu");
4707         gen_reserved_instruction(ctx);
4708         break;
4709     }
4710 }
4711 
4712 static void decode_opc_mxu__pool14(DisasContext *ctx)
4713 {
4714     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4715 
4716     switch (opcode) {
4717     case OPC_MXU_Q8ADDE:
4718         gen_mxu_q8adde(ctx, false);
4719         break;
4720     case OPC_MXU_D8SUM:
4721         gen_mxu_d8sum(ctx, false);
4722         break;
4723     case OPC_MXU_D8SUMC:
4724         gen_mxu_d8sum(ctx, true);
4725         break;
4726     default:
4727         MIPS_INVAL("decode_opc_mxu");
4728         gen_reserved_instruction(ctx);
4729         break;
4730     }
4731 }
4732 
4733 static void decode_opc_mxu__pool15(DisasContext *ctx)
4734 {
4735     uint32_t opcode = extract32(ctx->opcode, 14, 2);
4736 
4737     switch (opcode) {
4738     case OPC_MXU_S32MUL:
4739         gen_mxu_s32mul(ctx, false);
4740         break;
4741     case OPC_MXU_S32MULU:
4742         gen_mxu_s32mul(ctx, true);
4743         break;
4744     case OPC_MXU_S32EXTR:
4745         gen_mxu_s32extr(ctx);
4746         break;
4747     case OPC_MXU_S32EXTRV:
4748         gen_mxu_s32extrv(ctx);
4749         break;
4750     default:
4751         MIPS_INVAL("decode_opc_mxu");
4752         gen_reserved_instruction(ctx);
4753         break;
4754     }
4755 }
4756 
4757 static void decode_opc_mxu__pool16(DisasContext *ctx)
4758 {
4759     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4760 
4761     switch (opcode) {
4762     case OPC_MXU_D32SARW:
4763         gen_mxu_d32sarl(ctx, true);
4764         break;
4765     case OPC_MXU_S32ALN:
4766         gen_mxu_S32ALN(ctx);
4767         break;
4768     case OPC_MXU_S32ALNI:
4769         gen_mxu_S32ALNI(ctx);
4770         break;
4771     case OPC_MXU_S32LUI:
4772         gen_mxu_s32lui(ctx);
4773         break;
4774     case OPC_MXU_S32NOR:
4775         gen_mxu_S32NOR(ctx);
4776         break;
4777     case OPC_MXU_S32AND:
4778         gen_mxu_S32AND(ctx);
4779         break;
4780     case OPC_MXU_S32OR:
4781         gen_mxu_S32OR(ctx);
4782         break;
4783     case OPC_MXU_S32XOR:
4784         gen_mxu_S32XOR(ctx);
4785         break;
4786     default:
4787         MIPS_INVAL("decode_opc_mxu");
4788         gen_reserved_instruction(ctx);
4789         break;
4790     }
4791 }
4792 
4793 static void decode_opc_mxu__pool17(DisasContext *ctx)
4794 {
4795     uint32_t opcode = extract32(ctx->opcode, 6, 3);
4796     uint32_t strd2  = extract32(ctx->opcode, 9, 2);
4797 
4798     if (strd2 > 2) {
4799         MIPS_INVAL("decode_opc_mxu");
4800         gen_reserved_instruction(ctx);
4801         return;
4802     }
4803 
4804     switch (opcode) {
4805     case OPC_MXU_LXW:
4806           gen_mxu_lxx(ctx, strd2, MO_TE | MO_UL);
4807           break;
4808     case OPC_MXU_LXB:
4809           gen_mxu_lxx(ctx, strd2, MO_TE | MO_SB);
4810           break;
4811     case OPC_MXU_LXH:
4812           gen_mxu_lxx(ctx, strd2, MO_TE | MO_SW);
4813           break;
4814     case OPC_MXU_LXBU:
4815           gen_mxu_lxx(ctx, strd2, MO_TE | MO_UB);
4816           break;
4817     case OPC_MXU_LXHU:
4818           gen_mxu_lxx(ctx, strd2, MO_TE | MO_UW);
4819           break;
4820     default:
4821         MIPS_INVAL("decode_opc_mxu");
4822         gen_reserved_instruction(ctx);
4823         break;
4824     }
4825 }
4826 
4827 static void decode_opc_mxu__pool18(DisasContext *ctx)
4828 {
4829     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4830 
4831     switch (opcode) {
4832     case OPC_MXU_D32SLLV:
4833         gen_mxu_d32sxxv(ctx, false, false);
4834         break;
4835     case OPC_MXU_D32SLRV:
4836         gen_mxu_d32sxxv(ctx, true, false);
4837         break;
4838     case OPC_MXU_D32SARV:
4839         gen_mxu_d32sxxv(ctx, true, true);
4840         break;
4841     case OPC_MXU_Q16SLLV:
4842         gen_mxu_q16sxxv(ctx, false, false);
4843         break;
4844     case OPC_MXU_Q16SLRV:
4845         gen_mxu_q16sxxv(ctx, true, false);
4846         break;
4847     case OPC_MXU_Q16SARV:
4848         gen_mxu_q16sxxv(ctx, true, true);
4849         break;
4850     default:
4851         MIPS_INVAL("decode_opc_mxu");
4852         gen_reserved_instruction(ctx);
4853         break;
4854     }
4855 }
4856 
4857 static void decode_opc_mxu__pool19(DisasContext *ctx)
4858 {
4859     uint32_t opcode = extract32(ctx->opcode, 22, 4);
4860 
4861     switch (opcode) {
4862     case OPC_MXU_Q8MUL:
4863         gen_mxu_q8mul_mac(ctx, false, false);
4864         break;
4865     case OPC_MXU_Q8MULSU:
4866         gen_mxu_q8mul_mac(ctx, true, false);
4867         break;
4868     default:
4869         MIPS_INVAL("decode_opc_mxu");
4870         gen_reserved_instruction(ctx);
4871         break;
4872     }
4873 }
4874 
4875 static void decode_opc_mxu__pool20(DisasContext *ctx)
4876 {
4877     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4878 
4879     switch (opcode) {
4880     case OPC_MXU_Q8MOVZ:
4881         gen_mxu_q8movzn(ctx, TCG_COND_NE);
4882         break;
4883     case OPC_MXU_Q8MOVN:
4884         gen_mxu_q8movzn(ctx, TCG_COND_EQ);
4885         break;
4886     case OPC_MXU_D16MOVZ:
4887         gen_mxu_d16movzn(ctx, TCG_COND_NE);
4888         break;
4889     case OPC_MXU_D16MOVN:
4890         gen_mxu_d16movzn(ctx, TCG_COND_EQ);
4891         break;
4892     case OPC_MXU_S32MOVZ:
4893         gen_mxu_s32movzn(ctx, TCG_COND_NE);
4894         break;
4895     case OPC_MXU_S32MOVN:
4896         gen_mxu_s32movzn(ctx, TCG_COND_EQ);
4897         break;
4898     default:
4899         MIPS_INVAL("decode_opc_mxu");
4900         gen_reserved_instruction(ctx);
4901         break;
4902     }
4903 }
4904 
4905 static void decode_opc_mxu__pool21(DisasContext *ctx)
4906 {
4907     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4908 
4909     switch (opcode) {
4910     case OPC_MXU_Q8MAC:
4911         gen_mxu_q8mul_mac(ctx, false, true);
4912         break;
4913     case OPC_MXU_Q8MACSU:
4914         gen_mxu_q8mul_mac(ctx, true, true);
4915         break;
4916     default:
4917         MIPS_INVAL("decode_opc_mxu");
4918         gen_reserved_instruction(ctx);
4919         break;
4920     }
4921 }
4922 
4923 
4924 bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
4925 {
4926     uint32_t opcode = extract32(insn, 0, 6);
4927 
4928     if (opcode == OPC_MXU_S32M2I) {
4929         gen_mxu_s32m2i(ctx);
4930         return true;
4931     }
4932 
4933     if (opcode == OPC_MXU_S32I2M) {
4934         gen_mxu_s32i2m(ctx);
4935         return true;
4936     }
4937 
4938     {
4939         TCGv t_mxu_cr = tcg_temp_new();
4940         TCGLabel *l_exit = gen_new_label();
4941 
4942         gen_load_mxu_cr(t_mxu_cr);
4943         tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN);
4944         tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
4945 
4946         switch (opcode) {
4947         case OPC_MXU_S32MADD:
4948         case OPC_MXU_S32MADDU:
4949         case OPC_MXU_S32MSUB:
4950         case OPC_MXU_S32MSUBU:
4951             return decode_opc_mxu_s32madd_sub(ctx);
4952         case OPC_MXU__POOL00:
4953             decode_opc_mxu__pool00(ctx);
4954             break;
4955         case OPC_MXU_D16MUL:
4956             gen_mxu_d16mul(ctx, false, false);
4957             break;
4958         case OPC_MXU_D16MAC:
4959             gen_mxu_d16mac(ctx, false, false);
4960             break;
4961         case OPC_MXU_D16MACF:
4962             gen_mxu_d16mac(ctx, true, true);
4963             break;
4964         case OPC_MXU_D16MADL:
4965             gen_mxu_d16madl(ctx);
4966             break;
4967         case OPC_MXU_S16MAD:
4968             gen_mxu_s16mad(ctx);
4969             break;
4970         case OPC_MXU_Q16ADD:
4971             gen_mxu_q16add(ctx);
4972             break;
4973         case OPC_MXU_D16MACE:
4974             gen_mxu_d16mac(ctx, true, false);
4975             break;
4976         case OPC_MXU__POOL01:
4977             decode_opc_mxu__pool01(ctx);
4978             break;
4979         case OPC_MXU__POOL02:
4980             decode_opc_mxu__pool02(ctx);
4981             break;
4982         case OPC_MXU__POOL03:
4983             decode_opc_mxu__pool03(ctx);
4984             break;
4985         case OPC_MXU__POOL04:
4986             decode_opc_mxu__pool04(ctx);
4987             break;
4988         case OPC_MXU__POOL05:
4989             decode_opc_mxu__pool05(ctx);
4990             break;
4991         case OPC_MXU__POOL06:
4992             decode_opc_mxu__pool06(ctx);
4993             break;
4994         case OPC_MXU__POOL07:
4995             decode_opc_mxu__pool07(ctx);
4996             break;
4997         case OPC_MXU__POOL08:
4998             decode_opc_mxu__pool08(ctx);
4999             break;
5000         case OPC_MXU__POOL09:
5001             decode_opc_mxu__pool09(ctx);
5002             break;
5003         case OPC_MXU__POOL10:
5004             decode_opc_mxu__pool10(ctx);
5005             break;
5006         case OPC_MXU__POOL11:
5007             decode_opc_mxu__pool11(ctx);
5008             break;
5009         case OPC_MXU_D32ADD:
5010             gen_mxu_d32add(ctx);
5011             break;
5012         case OPC_MXU__POOL12:
5013             decode_opc_mxu__pool12(ctx);
5014             break;
5015         case OPC_MXU__POOL13:
5016             decode_opc_mxu__pool13(ctx);
5017             break;
5018         case OPC_MXU__POOL14:
5019             decode_opc_mxu__pool14(ctx);
5020             break;
5021         case OPC_MXU_Q8ACCE:
5022             gen_mxu_q8adde(ctx, true);
5023             break;
5024         case OPC_MXU_S8LDD:
5025             gen_mxu_s8ldd(ctx, false);
5026             break;
5027         case OPC_MXU_S8STD:
5028             gen_mxu_s8std(ctx, false);
5029             break;
5030         case OPC_MXU_S8LDI:
5031             gen_mxu_s8ldd(ctx, true);
5032             break;
5033         case OPC_MXU_S8SDI:
5034             gen_mxu_s8std(ctx, true);
5035             break;
5036         case OPC_MXU__POOL15:
5037             decode_opc_mxu__pool15(ctx);
5038             break;
5039         case OPC_MXU__POOL16:
5040             decode_opc_mxu__pool16(ctx);
5041             break;
5042         case OPC_MXU__POOL17:
5043             decode_opc_mxu__pool17(ctx);
5044             break;
5045         case OPC_MXU_S16LDD:
5046             gen_mxu_s16ldd(ctx, false);
5047             break;
5048         case OPC_MXU_S16STD:
5049             gen_mxu_s16std(ctx, false);
5050             break;
5051         case OPC_MXU_S16LDI:
5052             gen_mxu_s16ldd(ctx, true);
5053             break;
5054         case OPC_MXU_S16SDI:
5055             gen_mxu_s16std(ctx, true);
5056             break;
5057         case OPC_MXU_D32SLL:
5058             gen_mxu_d32sxx(ctx, false, false);
5059             break;
5060         case OPC_MXU_D32SLR:
5061             gen_mxu_d32sxx(ctx, true, false);
5062             break;
5063         case OPC_MXU_D32SARL:
5064             gen_mxu_d32sarl(ctx, false);
5065             break;
5066         case OPC_MXU_D32SAR:
5067             gen_mxu_d32sxx(ctx, true, true);
5068             break;
5069         case OPC_MXU_Q16SLL:
5070             gen_mxu_q16sxx(ctx, false, false);
5071             break;
5072         case OPC_MXU__POOL18:
5073             decode_opc_mxu__pool18(ctx);
5074             break;
5075         case OPC_MXU_Q16SLR:
5076             gen_mxu_q16sxx(ctx, true, false);
5077             break;
5078         case OPC_MXU_Q16SAR:
5079             gen_mxu_q16sxx(ctx, true, true);
5080             break;
5081         case OPC_MXU__POOL19:
5082             decode_opc_mxu__pool19(ctx);
5083             break;
5084         case OPC_MXU__POOL20:
5085             decode_opc_mxu__pool20(ctx);
5086             break;
5087         case OPC_MXU__POOL21:
5088             decode_opc_mxu__pool21(ctx);
5089             break;
5090         case OPC_MXU_Q16SCOP:
5091             gen_mxu_q16scop(ctx);
5092             break;
5093         case OPC_MXU_Q8MADL:
5094             gen_mxu_q8madl(ctx);
5095             break;
5096         case OPC_MXU_S32SFL:
5097             gen_mxu_s32sfl(ctx);
5098             break;
5099         case OPC_MXU_Q8SAD:
5100             gen_mxu_q8sad(ctx);
5101             break;
5102         default:
5103             return false;
5104         }
5105 
5106         gen_set_label(l_exit);
5107     }
5108 
5109     return true;
5110 }
5111