xref: /openbmc/qemu/target/mips/tcg/mxu_translate.c (revision dbdf841b)
1 /*
2  *  Ingenic XBurst Media eXtension Unit (MXU) translation routines.
3  *
4  *  Copyright (c) 2004-2005 Jocelyn Mayer
5  *  Copyright (c) 2006 Marius Groeger (FPU operations)
6  *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
7  *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
8  *  Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
9  *
10  * SPDX-License-Identifier: LGPL-2.1-or-later
11  *
12  * Datasheet:
13  *
14  *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
15  *   Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
16  */
17 
18 #include "qemu/osdep.h"
19 #include "translate.h"
20 
21 /*
22  *
23  *       AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET
24  *       ============================================
25  *
26  *
27  * MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32
28  * instructions set. It is designed to fit the needs of signal, graphical and
29  * video processing applications. MXU instruction set is used in Xburst family
30  * of microprocessors by Ingenic.
31  *
32  * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is
33  * the control register.
34  *
35  *
36  *     The notation used in MXU assembler mnemonics
37  *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38  *
39  *  Register operands:
40  *
41  *   XRa, XRb, XRc, XRd - MXU registers
42  *   Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers
43  *
44  *  Non-register operands:
45  *
46  *   aptn1 - 1-bit accumulate add/subtract pattern
47  *   aptn2 - 2-bit accumulate add/subtract pattern
48  *   eptn2 - 2-bit execute add/subtract pattern
49  *   optn2 - 2-bit operand pattern
50  *   optn3 - 3-bit operand pattern
51  *   sft4  - 4-bit shift amount
52  *   strd2 - 2-bit stride amount
53  *
54  *  Prefixes:
55  *
56  *   Level of parallelism:                Operand size:
57  *    S - single operation at a time       32 - word
58  *    D - two operations in parallel       16 - half word
59  *    Q - four operations in parallel       8 - byte
60  *
61  *  Operations:
62  *
63  *   ADD   - Add or subtract
64  *   ADDC  - Add with carry-in
65  *   ACC   - Accumulate
66  *   ASUM  - Sum together then accumulate (add or subtract)
67  *   ASUMC - Sum together then accumulate (add or subtract) with carry-in
68  *   AVG   - Average between 2 operands
69  *   ABD   - Absolute difference
70  *   ALN   - Align data
71  *   AND   - Logical bitwise 'and' operation
72  *   CPS   - Copy sign
73  *   EXTR  - Extract bits
74  *   I2M   - Move from GPR register to MXU register
75  *   LDD   - Load data from memory to XRF
76  *   LDI   - Load data from memory to XRF (and increase the address base)
77  *   LUI   - Load unsigned immediate
78  *   MUL   - Multiply
79  *   MULU  - Unsigned multiply
80  *   MADD  - 64-bit operand add 32x32 product
81  *   MSUB  - 64-bit operand subtract 32x32 product
82  *   MAC   - Multiply and accumulate (add or subtract)
83  *   MAD   - Multiply and add or subtract
84  *   MAX   - Maximum between 2 operands
85  *   MIN   - Minimum between 2 operands
86  *   M2I   - Move from MXU register to GPR register
87  *   MOVZ  - Move if zero
88  *   MOVN  - Move if non-zero
89  *   NOR   - Logical bitwise 'nor' operation
90  *   OR    - Logical bitwise 'or' operation
91  *   STD   - Store data from XRF to memory
92  *   SDI   - Store data from XRF to memory (and increase the address base)
93  *   SLT   - Set of less than comparison
94  *   SAD   - Sum of absolute differences
95  *   SLL   - Logical shift left
96  *   SLR   - Logical shift right
97  *   SAR   - Arithmetic shift right
98  *   SAT   - Saturation
99  *   SFL   - Shuffle
100  *   SCOP  - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0)
101  *   XOR   - Logical bitwise 'exclusive or' operation
102  *
103  *  Suffixes:
104  *
105  *   E - Expand results
106  *   F - Fixed point multiplication
107  *   L - Low part result
108  *   R - Doing rounding
109  *   V - Variable instead of immediate
110  *   W - Combine above L and V
111  *
112  *
113  *     The list of MXU instructions grouped by functionality
114  *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
115  *
116  * Load/Store instructions           Multiplication instructions
117  * -----------------------           ---------------------------
118  *
119  *  S32LDD XRa, Rb, s12               S32MADD XRa, XRd, Rs, Rt
120  *  S32STD XRa, Rb, s12               S32MADDU XRa, XRd, Rs, Rt
121  *  S32LDDV XRa, Rb, rc, strd2        S32MSUB XRa, XRd, Rs, Rt
122  *  S32STDV XRa, Rb, rc, strd2        S32MSUBU XRa, XRd, Rs, Rt
123  *  S32LDI XRa, Rb, s12               S32MUL XRa, XRd, Rs, Rt
124  *  S32SDI XRa, Rb, s12               S32MULU XRa, XRd, Rs, Rt
125  *  S32LDIV XRa, Rb, rc, strd2        D16MUL XRa, XRb, XRc, XRd, optn2
126  *  S32SDIV XRa, Rb, rc, strd2        D16MULE XRa, XRb, XRc, optn2
127  *  S32LDDR XRa, Rb, s12              D16MULF XRa, XRb, XRc, optn2
128  *  S32STDR XRa, Rb, s12              D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
129  *  S32LDDVR XRa, Rb, rc, strd2       D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
130  *  S32STDVR XRa, Rb, rc, strd2       D16MACF XRa, XRb, XRc, XRd, aptn2, optn2
131  *  S32LDIR XRa, Rb, s12              D16MADL XRa, XRb, XRc, XRd, aptn2, optn2
132  *  S32SDIR XRa, Rb, s12              S16MAD XRa, XRb, XRc, XRd, aptn1, optn2
133  *  S32LDIVR XRa, Rb, rc, strd2       Q8MUL XRa, XRb, XRc, XRd
134  *  S32SDIVR XRa, Rb, rc, strd2       Q8MULSU XRa, XRb, XRc, XRd
135  *  S16LDD XRa, Rb, s10, eptn2        Q8MAC XRa, XRb, XRc, XRd, aptn2
136  *  S16STD XRa, Rb, s10, eptn2        Q8MACSU XRa, XRb, XRc, XRd, aptn2
137  *  S16LDI XRa, Rb, s10, eptn2        Q8MADL XRa, XRb, XRc, XRd, aptn2
138  *  S16SDI XRa, Rb, s10, eptn2
139  *  S8LDD XRa, Rb, s8, eptn3
140  *  S8STD XRa, Rb, s8, eptn3         Addition and subtraction instructions
141  *  S8LDI XRa, Rb, s8, eptn3         -------------------------------------
142  *  S8SDI XRa, Rb, s8, eptn3
143  *  LXW Rd, Rs, Rt, strd2             D32ADD XRa, XRb, XRc, XRd, eptn2
144  *  LXH Rd, Rs, Rt, strd2             D32ADDC XRa, XRb, XRc, XRd
145  *  LXHU Rd, Rs, Rt, strd2            D32ACC XRa, XRb, XRc, XRd, eptn2
146  *  LXB Rd, Rs, Rt, strd2             D32ACCM XRa, XRb, XRc, XRd, eptn2
147  *  LXBU Rd, Rs, Rt, strd2            D32ASUM XRa, XRb, XRc, XRd, eptn2
148  *                                    S32CPS XRa, XRb, XRc
149  *                                    Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2
150  * Comparison instructions            Q16ACC XRa, XRb, XRc, XRd, eptn2
151  * -----------------------            Q16ACCM XRa, XRb, XRc, XRd, eptn2
152  *                                    D16ASUM XRa, XRb, XRc, XRd, eptn2
153  *  S32MAX XRa, XRb, XRc              D16CPS XRa, XRb,
154  *  S32MIN XRa, XRb, XRc              D16AVG XRa, XRb, XRc
155  *  S32SLT XRa, XRb, XRc              D16AVGR XRa, XRb, XRc
156  *  S32MOVZ XRa, XRb, XRc             Q8ADD XRa, XRb, XRc, eptn2
157  *  S32MOVN XRa, XRb, XRc             Q8ADDE XRa, XRb, XRc, XRd, eptn2
158  *  D16MAX XRa, XRb, XRc              Q8ACCE XRa, XRb, XRc, XRd, eptn2
159  *  D16MIN XRa, XRb, XRc              Q8ABD XRa, XRb, XRc
160  *  D16SLT XRa, XRb, XRc              Q8SAD XRa, XRb, XRc, XRd
161  *  D16MOVZ XRa, XRb, XRc             Q8AVG XRa, XRb, XRc
162  *  D16MOVN XRa, XRb, XRc             Q8AVGR XRa, XRb, XRc
163  *  Q8MAX XRa, XRb, XRc               D8SUM XRa, XRb, XRc, XRd
164  *  Q8MIN XRa, XRb, XRc               D8SUMC XRa, XRb, XRc, XRd
165  *  Q8SLT XRa, XRb, XRc
166  *  Q8SLTU XRa, XRb, XRc
167  *  Q8MOVZ XRa, XRb, XRc             Shift instructions
168  *  Q8MOVN XRa, XRb, XRc             ------------------
169  *
170  *                                    D32SLL XRa, XRb, XRc, XRd, sft4
171  * Bitwise instructions               D32SLR XRa, XRb, XRc, XRd, sft4
172  * --------------------               D32SAR XRa, XRb, XRc, XRd, sft4
173  *                                    D32SARL XRa, XRb, XRc, sft4
174  *  S32NOR XRa, XRb, XRc              D32SLLV XRa, XRb, Rb
175  *  S32AND XRa, XRb, XRc              D32SLRV XRa, XRb, Rb
176  *  S32XOR XRa, XRb, XRc              D32SARV XRa, XRb, Rb
177  *  S32OR XRa, XRb, XRc               D32SARW XRa, XRb, XRc, Rb
178  *                                    Q16SLL XRa, XRb, XRc, XRd, sft4
179  *                                    Q16SLR XRa, XRb, XRc, XRd, sft4
180  * Miscellaneous instructions         Q16SAR XRa, XRb, XRc, XRd, sft4
181  * -------------------------          Q16SLLV XRa, XRb, Rb
182  *                                    Q16SLRV XRa, XRb, Rb
183  *  S32SFL XRa, XRb, XRc, XRd, optn2  Q16SARV XRa, XRb, Rb
184  *  S32ALN XRa, XRb, XRc, Rb
185  *  S32ALNI XRa, XRb, XRc, s3
186  *  S32LUI XRa, s8, optn3            Move instructions
187  *  S32EXTR XRa, XRb, Rb, bits5      -----------------
188  *  S32EXTRV XRa, XRb, Rs, Rt
189  *  Q16SCOP XRa, XRb, XRc, XRd        S32M2I XRa, Rb
190  *  Q16SAT XRa, XRb, XRc              S32I2M XRa, Rb
191  *
192  *
193  *     The opcode organization of MXU instructions
194  *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
195  *
196  * The bits 31..26 of all MXU instructions are equal to 0x1C (also referred
197  * as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of
198  * other bits up to the instruction level is as follows:
199  *
200  *              bits
201  *             05..00
202  *
203  *          ┌─ 000000 ─ OPC_MXU_S32MADD
204  *          ├─ 000001 ─ OPC_MXU_S32MADDU
205  *          ├─ 000010 ─ <not assigned>   (non-MXU OPC_MUL)
206  *          │
207  *          │                               20..18
208  *          ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX
209  *          │                            ├─ 001 ─ OPC_MXU_S32MIN
210  *          │                            ├─ 010 ─ OPC_MXU_D16MAX
211  *          │                            ├─ 011 ─ OPC_MXU_D16MIN
212  *          │                            ├─ 100 ─ OPC_MXU_Q8MAX
213  *          │                            ├─ 101 ─ OPC_MXU_Q8MIN
214  *          │                            ├─ 110 ─ OPC_MXU_Q8SLT
215  *          │                            └─ 111 ─ OPC_MXU_Q8SLTU
216  *          ├─ 000100 ─ OPC_MXU_S32MSUB
217  *          ├─ 000101 ─ OPC_MXU_S32MSUBU    20..18
218  *          ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT
219  *          │                            ├─ 001 ─ OPC_MXU_D16SLT
220  *          │                            ├─ 010 ─ OPC_MXU_D16AVG
221  *          │                            ├─ 011 ─ OPC_MXU_D16AVGR
222  *          │                            ├─ 100 ─ OPC_MXU_Q8AVG
223  *          │                            ├─ 101 ─ OPC_MXU_Q8AVGR
224  *          │                            └─ 111 ─ OPC_MXU_Q8ADD
225  *          │
226  *          │                               20..18
227  *          ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS
228  *          │                            ├─ 010 ─ OPC_MXU_D16CPS
229  *          │                            ├─ 100 ─ OPC_MXU_Q8ABD
230  *          │                            └─ 110 ─ OPC_MXU_Q16SAT
231  *          ├─ 001000 ─ OPC_MXU_D16MUL
232  *          │                               25..24
233  *          ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF
234  *          │                            └─ 01 ─ OPC_MXU_D16MULE
235  *          ├─ 001010 ─ OPC_MXU_D16MAC
236  *          ├─ 001011 ─ OPC_MXU_D16MACF
237  *          ├─ 001100 ─ OPC_MXU_D16MADL
238  *          ├─ 001101 ─ OPC_MXU_S16MAD
239  *          ├─ 001110 ─ OPC_MXU_Q16ADD
240  *          ├─ 001111 ─ OPC_MXU_D16MACE     20 (13..10 don't care)
241  *          │                            ┌─ 0 ─ OPC_MXU_S32LDD
242  *          ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
243  *          │
244  *          │                               20 (13..10 don't care)
245  *          ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
246  *          │                            └─ 1 ─ OPC_MXU_S32STDR
247  *          │
248  *          │                               13..10
249  *          ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV
250  *          │                            └─ 0001 ─ OPC_MXU_S32LDDVR
251  *          │
252  *          │                               13..10
253  *          ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
254  *          │                            └─ 0001 ─ OPC_MXU_S32STDVR
255  *          │
256  *          │                               20 (13..10 don't care)
257  *          ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
258  *          │                            └─ 1 ─ OPC_MXU_S32LDIR
259  *          │
260  *          │                               20 (13..10 don't care)
261  *          ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
262  *          │                            └─ 1 ─ OPC_MXU_S32SDIR
263  *          │
264  *          │                               13..10
265  *          ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV
266  *          │                            └─ 0001 ─ OPC_MXU_S32LDIVR
267  *          │
268  *          │                               13..10
269  *          ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
270  *          │                            └─ 0001 ─ OPC_MXU_S32SDIVR
271  *          ├─ 011000 ─ OPC_MXU_D32ADD  (catches D32ADDC too)
272  *          │                               23..22
273  *   MXU    ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
274  * opcodes ─┤                            ├─ 01 ─ OPC_MXU_D32ACCM
275  *          │                            └─ 10 ─ OPC_MXU_D32ASUM
276  *          ├─ 011010 ─ <not assigned>
277  *          │                               23..22
278  *          ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
279  *          │                            ├─ 01 ─ OPC_MXU_Q16ACCM
280  *          │                            └─ 10 ─ OPC_MXU_D16ASUM
281  *          │
282  *          │                               23..22
283  *          ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
284  *          │                            ├─ 01 ─ OPC_MXU_D8SUM
285  *          ├─ 011101 ─ OPC_MXU_Q8ACCE   └─ 10 ─ OPC_MXU_D8SUMC
286  *          ├─ 011110 ─ <not assigned>
287  *          ├─ 011111 ─ <not assigned>
288  *          ├─ 100000 ─ <not assigned>   (overlaps with CLZ)
289  *          ├─ 100001 ─ <not assigned>   (overlaps with CLO)
290  *          ├─ 100010 ─ OPC_MXU_S8LDD
291  *          ├─ 100011 ─ OPC_MXU_S8STD       15..14
292  *          ├─ 100100 ─ OPC_MXU_S8LDI    ┌─ 00 ─ OPC_MXU_S32MUL
293  *          ├─ 100101 ─ OPC_MXU_S8SDI    ├─ 01 ─ OPC_MXU_S32MULU
294  *          │                            ├─ 10 ─ OPC_MXU_S32EXTR
295  *          ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV
296  *          │
297  *          │                               20..18
298  *          ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
299  *          │                            ├─ 001 ─ OPC_MXU_S32ALN
300  *          │                            ├─ 010 ─ OPC_MXU_S32ALNI
301  *          │                            ├─ 011 ─ OPC_MXU_S32LUI
302  *          │                            ├─ 100 ─ OPC_MXU_S32NOR
303  *          │                            ├─ 101 ─ OPC_MXU_S32AND
304  *          │                            ├─ 110 ─ OPC_MXU_S32OR
305  *          │                            └─ 111 ─ OPC_MXU_S32XOR
306  *          │
307  *          │                               8..6
308  *          ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
309  *          │                            ├─ 001 ─ OPC_MXU_LXH
310  *          ├─ 101001 ─ <not assigned>   ├─ 011 ─ OPC_MXU_LXW
311  *          ├─ 101010 ─ OPC_MXU_S16LDD   ├─ 100 ─ OPC_MXU_LXBU
312  *          ├─ 101011 ─ OPC_MXU_S16STD   └─ 101 ─ OPC_MXU_LXHU
313  *          ├─ 101100 ─ OPC_MXU_S16LDI
314  *          ├─ 101101 ─ OPC_MXU_S16SDI
315  *          ├─ 101110 ─ OPC_MXU_S32M2I
316  *          ├─ 101111 ─ OPC_MXU_S32I2M
317  *          ├─ 110000 ─ OPC_MXU_D32SLL
318  *          ├─ 110001 ─ OPC_MXU_D32SLR      20..18
319  *          ├─ 110010 ─ OPC_MXU_D32SARL  ┌─ 000 ─ OPC_MXU_D32SLLV
320  *          ├─ 110011 ─ OPC_MXU_D32SAR   ├─ 001 ─ OPC_MXU_D32SLRV
321  *          ├─ 110100 ─ OPC_MXU_Q16SLL   ├─ 011 ─ OPC_MXU_D32SARV
322  *          ├─ 110101 ─ OPC_MXU_Q16SLR   ├─ 100 ─ OPC_MXU_Q16SLLV
323  *          │                            ├─ 101 ─ OPC_MXU_Q16SLRV
324  *          ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV
325  *          │
326  *          ├─ 110111 ─ OPC_MXU_Q16SAR
327  *          │                               23..22
328  *          ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
329  *          │                            └─ 10 ─ OPC_MXU_Q8MULSU
330  *          │
331  *          │                               20..18
332  *          ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
333  *          │                            ├─ 001 ─ OPC_MXU_Q8MOVN
334  *          │                            ├─ 010 ─ OPC_MXU_D16MOVZ
335  *          │                            ├─ 011 ─ OPC_MXU_D16MOVN
336  *          │                            ├─ 100 ─ OPC_MXU_S32MOVZ
337  *          │                            └─ 101 ─ OPC_MXU_S32MOVN
338  *          │
339  *          │                               23..22
340  *          ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC
341  *          │                            └─ 10 ─ OPC_MXU_Q8MACSU
342  *          ├─ 111011 ─ OPC_MXU_Q16SCOP
343  *          ├─ 111100 ─ OPC_MXU_Q8MADL
344  *          ├─ 111101 ─ OPC_MXU_S32SFL
345  *          ├─ 111110 ─ OPC_MXU_Q8SAD
346  *          └─ 111111 ─ <not assigned>   (overlaps with SDBBP)
347  *
348  *
349  * Compiled after:
350  *
351  *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
352  *   Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
353  */
354 
355 enum {
356     OPC_MXU_S32MADD  = 0x00,
357     OPC_MXU_S32MADDU = 0x01,
358     OPC_MXU__POOL00  = 0x03,
359     OPC_MXU_S32MSUB  = 0x04,
360     OPC_MXU_S32MSUBU = 0x05,
361     OPC_MXU__POOL01  = 0x06,
362     OPC_MXU__POOL02  = 0x07,
363     OPC_MXU_D16MUL   = 0x08,
364     OPC_MXU__POOL03  = 0x09,
365     OPC_MXU_D16MAC   = 0x0A,
366     OPC_MXU_D16MACF  = 0x0B,
367     OPC_MXU_D16MADL  = 0x0C,
368     OPC_MXU_S16MAD   = 0x0D,
369     OPC_MXU_Q16ADD   = 0x0E,
370     OPC_MXU_D16MACE  = 0x0F,
371     OPC_MXU__POOL04  = 0x10,
372     OPC_MXU__POOL05  = 0x11,
373     OPC_MXU__POOL06  = 0x12,
374     OPC_MXU__POOL07  = 0x13,
375     OPC_MXU__POOL08  = 0x14,
376     OPC_MXU__POOL09  = 0x15,
377     OPC_MXU__POOL10  = 0x16,
378     OPC_MXU__POOL11  = 0x17,
379     OPC_MXU_D32ADD   = 0x18,
380     OPC_MXU__POOL12  = 0x19,
381     OPC_MXU__POOL13  = 0x1B,
382     OPC_MXU__POOL14  = 0x1C,
383     OPC_MXU_Q8ACCE   = 0x1D,
384     OPC_MXU_S8LDD    = 0x22,
385     OPC_MXU_S8STD    = 0x23,
386     OPC_MXU_S8LDI    = 0x24,
387     OPC_MXU_S8SDI    = 0x25,
388     OPC_MXU__POOL15  = 0x26,
389     OPC_MXU__POOL16  = 0x27,
390     OPC_MXU__POOL17  = 0x28,
391     OPC_MXU_S16LDD   = 0x2A,
392     OPC_MXU_S16STD   = 0x2B,
393     OPC_MXU_S16LDI   = 0x2C,
394     OPC_MXU_S16SDI   = 0x2D,
395     OPC_MXU_S32M2I   = 0x2E,
396     OPC_MXU_S32I2M   = 0x2F,
397     OPC_MXU_D32SLL   = 0x30,
398     OPC_MXU_D32SLR   = 0x31,
399     OPC_MXU_D32SARL  = 0x32,
400     OPC_MXU_D32SAR   = 0x33,
401     OPC_MXU_Q16SLL   = 0x34,
402     OPC_MXU_Q16SLR   = 0x35,
403     OPC_MXU__POOL18  = 0x36,
404     OPC_MXU_Q16SAR   = 0x37,
405     OPC_MXU__POOL19  = 0x38,
406     OPC_MXU__POOL20  = 0x39,
407     OPC_MXU__POOL21  = 0x3A,
408     OPC_MXU_Q16SCOP  = 0x3B,
409     OPC_MXU_Q8MADL   = 0x3C,
410     OPC_MXU_S32SFL   = 0x3D,
411     OPC_MXU_Q8SAD    = 0x3E,
412 };
413 
414 
415 /*
416  * MXU pool 00
417  */
418 enum {
419     OPC_MXU_S32MAX   = 0x00,
420     OPC_MXU_S32MIN   = 0x01,
421     OPC_MXU_D16MAX   = 0x02,
422     OPC_MXU_D16MIN   = 0x03,
423     OPC_MXU_Q8MAX    = 0x04,
424     OPC_MXU_Q8MIN    = 0x05,
425     OPC_MXU_Q8SLT    = 0x06,
426     OPC_MXU_Q8SLTU   = 0x07,
427 };
428 
429 /*
430  * MXU pool 01
431  */
432 enum {
433     OPC_MXU_S32SLT   = 0x00,
434     OPC_MXU_D16SLT   = 0x01,
435     OPC_MXU_D16AVG   = 0x02,
436     OPC_MXU_D16AVGR  = 0x03,
437     OPC_MXU_Q8AVG    = 0x04,
438     OPC_MXU_Q8AVGR   = 0x05,
439     OPC_MXU_Q8ADD    = 0x07,
440 };
441 
442 /*
443  * MXU pool 02
444  */
445 enum {
446     OPC_MXU_S32CPS   = 0x00,
447     OPC_MXU_D16CPS   = 0x02,
448     OPC_MXU_Q8ABD    = 0x04,
449     OPC_MXU_Q16SAT   = 0x06,
450 };
451 
452 /*
453  * MXU pool 03
454  */
455 enum {
456     OPC_MXU_D16MULF  = 0x00,
457     OPC_MXU_D16MULE  = 0x01,
458 };
459 
460 /*
461  * MXU pool 04 05 06 07 08 09 10 11
462  */
463 enum {
464     OPC_MXU_S32LDST  = 0x00,
465     OPC_MXU_S32LDSTR = 0x01,
466 };
467 
468 /*
469  * MXU pool 12
470  */
471 enum {
472     OPC_MXU_D32ACC    = 0x00,
473     OPC_MXU_D32ACCM   = 0x01,
474     OPC_MXU_D32ASUM   = 0x02,
475 };
476 
477 /*
478  * MXU pool 13
479  */
480 enum {
481     OPC_MXU_Q16ACC    = 0x00,
482     OPC_MXU_Q16ACCM   = 0x01,
483     OPC_MXU_D16ASUM   = 0x02,
484 };
485 
486 /*
487  * MXU pool 14
488  */
489 enum {
490     OPC_MXU_Q8ADDE    = 0x00,
491     OPC_MXU_D8SUM     = 0x01,
492     OPC_MXU_D8SUMC    = 0x02,
493 };
494 
495 /*
496  * MXU pool 15
497  */
498 enum {
499     OPC_MXU_S32MUL    = 0x00,
500     OPC_MXU_S32MULU   = 0x01,
501     OPC_MXU_S32EXTR   = 0x02,
502     OPC_MXU_S32EXTRV  = 0x03,
503 };
504 
505 /*
506  * MXU pool 16
507  */
508 enum {
509     OPC_MXU_D32SARW  = 0x00,
510     OPC_MXU_S32ALN   = 0x01,
511     OPC_MXU_S32ALNI  = 0x02,
512     OPC_MXU_S32LUI   = 0x03,
513     OPC_MXU_S32NOR   = 0x04,
514     OPC_MXU_S32AND   = 0x05,
515     OPC_MXU_S32OR    = 0x06,
516     OPC_MXU_S32XOR   = 0x07,
517 };
518 
519 /*
520  * MXU pool 17
521  */
522 enum {
523     OPC_MXU_LXB      = 0x00,
524     OPC_MXU_LXH      = 0x01,
525     OPC_MXU_LXW      = 0x03,
526     OPC_MXU_LXBU     = 0x04,
527     OPC_MXU_LXHU     = 0x05,
528 };
529 
530 /*
531  * MXU pool 18
532  */
533 enum {
534     OPC_MXU_D32SLLV  = 0x00,
535     OPC_MXU_D32SLRV  = 0x01,
536     OPC_MXU_D32SARV  = 0x03,
537     OPC_MXU_Q16SLLV  = 0x04,
538     OPC_MXU_Q16SLRV  = 0x05,
539     OPC_MXU_Q16SARV  = 0x07,
540 };
541 
542 /*
543  * MXU pool 19
544  */
545 enum {
546     OPC_MXU_Q8MUL    = 0x00,
547     OPC_MXU_Q8MULSU  = 0x02,
548 };
549 
550 /*
551  * MXU pool 20
552  */
553 enum {
554     OPC_MXU_Q8MOVZ   = 0x00,
555     OPC_MXU_Q8MOVN   = 0x01,
556     OPC_MXU_D16MOVZ  = 0x02,
557     OPC_MXU_D16MOVN  = 0x03,
558     OPC_MXU_S32MOVZ  = 0x04,
559     OPC_MXU_S32MOVN  = 0x05,
560 };
561 
562 /*
563  * MXU pool 21
564  */
565 enum {
566     OPC_MXU_Q8MAC    = 0x00,
567     OPC_MXU_Q8MACSU  = 0x02,
568 };
569 
570 
571 /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
572 #define MXU_APTN1_A    0
573 #define MXU_APTN1_S    1
574 
575 /* MXU accumulate add/subtract 2-bit pattern 'aptn2' */
576 #define MXU_APTN2_AA    0
577 #define MXU_APTN2_AS    1
578 #define MXU_APTN2_SA    2
579 #define MXU_APTN2_SS    3
580 
581 /* MXU execute add/subtract 2-bit pattern 'eptn2' */
582 #define MXU_EPTN2_AA    0
583 #define MXU_EPTN2_AS    1
584 #define MXU_EPTN2_SA    2
585 #define MXU_EPTN2_SS    3
586 
587 /* MXU operand getting pattern 'optn2' */
588 #define MXU_OPTN2_PTN0  0
589 #define MXU_OPTN2_PTN1  1
590 #define MXU_OPTN2_PTN2  2
591 #define MXU_OPTN2_PTN3  3
592 /* alternative naming scheme for 'optn2' */
593 #define MXU_OPTN2_WW    0
594 #define MXU_OPTN2_LW    1
595 #define MXU_OPTN2_HW    2
596 #define MXU_OPTN2_XW    3
597 
598 /* MXU operand getting pattern 'optn3' */
599 #define MXU_OPTN3_PTN0  0
600 #define MXU_OPTN3_PTN1  1
601 #define MXU_OPTN3_PTN2  2
602 #define MXU_OPTN3_PTN3  3
603 #define MXU_OPTN3_PTN4  4
604 #define MXU_OPTN3_PTN5  5
605 #define MXU_OPTN3_PTN6  6
606 #define MXU_OPTN3_PTN7  7
607 
608 /* MXU registers */
609 static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
610 static TCGv mxu_CR;
611 
612 static const char mxuregnames[][4] = {
613     "XR1",  "XR2",  "XR3",  "XR4",  "XR5",  "XR6",  "XR7",  "XR8",
614     "XR9",  "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "XCR",
615 };
616 
617 void mxu_translate_init(void)
618 {
619     for (unsigned i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) {
620         mxu_gpr[i] = tcg_global_mem_new(cpu_env,
621                                         offsetof(CPUMIPSState, active_tc.mxu_gpr[i]),
622                                         mxuregnames[i]);
623     }
624 
625     mxu_CR = tcg_global_mem_new(cpu_env,
626                                 offsetof(CPUMIPSState, active_tc.mxu_cr),
627                                 mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]);
628 }
629 
630 /* MXU General purpose registers moves. */
631 static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg)
632 {
633     if (reg == 0) {
634         tcg_gen_movi_tl(t, 0);
635     } else if (reg <= 15) {
636         tcg_gen_mov_tl(t, mxu_gpr[reg - 1]);
637     }
638 }
639 
640 static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg)
641 {
642     if (reg > 0 && reg <= 15) {
643         tcg_gen_mov_tl(mxu_gpr[reg - 1], t);
644     }
645 }
646 
647 /* MXU control register moves. */
648 static inline void gen_load_mxu_cr(TCGv t)
649 {
650     tcg_gen_mov_tl(t, mxu_CR);
651 }
652 
653 static inline void gen_store_mxu_cr(TCGv t)
654 {
655     /* TODO: Add handling of RW rules for MXU_CR. */
656     tcg_gen_mov_tl(mxu_CR, t);
657 }
658 
659 /*
660  * S32I2M XRa, rb - Register move from GRF to XRF
661  */
662 static void gen_mxu_s32i2m(DisasContext *ctx)
663 {
664     TCGv t0;
665     uint32_t XRa, Rb;
666 
667     t0 = tcg_temp_new();
668 
669     XRa = extract32(ctx->opcode, 6, 5);
670     Rb = extract32(ctx->opcode, 16, 5);
671 
672     gen_load_gpr(t0, Rb);
673     if (XRa <= 15) {
674         gen_store_mxu_gpr(t0, XRa);
675     } else if (XRa == 16) {
676         gen_store_mxu_cr(t0);
677     }
678 }
679 
680 /*
681  * S32M2I XRa, rb - Register move from XRF to GRF
682  */
683 static void gen_mxu_s32m2i(DisasContext *ctx)
684 {
685     TCGv t0;
686     uint32_t XRa, Rb;
687 
688     t0 = tcg_temp_new();
689 
690     XRa = extract32(ctx->opcode, 6, 5);
691     Rb = extract32(ctx->opcode, 16, 5);
692 
693     if (XRa <= 15) {
694         gen_load_mxu_gpr(t0, XRa);
695     } else if (XRa == 16) {
696         gen_load_mxu_cr(t0);
697     }
698 
699     gen_store_gpr(t0, Rb);
700 }
701 
702 /*
703  * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
704  *
705  * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF,
706  * post modify address register
707  */
708 static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify)
709 {
710     TCGv t0, t1;
711     uint32_t XRa, Rb, s8, optn3;
712 
713     t0 = tcg_temp_new();
714     t1 = tcg_temp_new();
715 
716     XRa = extract32(ctx->opcode, 6, 4);
717     s8 = extract32(ctx->opcode, 10, 8);
718     optn3 = extract32(ctx->opcode, 18, 3);
719     Rb = extract32(ctx->opcode, 21, 5);
720 
721     gen_load_gpr(t0, Rb);
722     tcg_gen_addi_tl(t0, t0, (int8_t)s8);
723     if (postmodify) {
724         gen_store_gpr(t0, Rb);
725     }
726 
727     switch (optn3) {
728     /* XRa[7:0] = tmp8 */
729     case MXU_OPTN3_PTN0:
730         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
731         gen_load_mxu_gpr(t0, XRa);
732         tcg_gen_deposit_tl(t0, t0, t1, 0, 8);
733         break;
734     /* XRa[15:8] = tmp8 */
735     case MXU_OPTN3_PTN1:
736         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
737         gen_load_mxu_gpr(t0, XRa);
738         tcg_gen_deposit_tl(t0, t0, t1, 8, 8);
739         break;
740     /* XRa[23:16] = tmp8 */
741     case MXU_OPTN3_PTN2:
742         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
743         gen_load_mxu_gpr(t0, XRa);
744         tcg_gen_deposit_tl(t0, t0, t1, 16, 8);
745         break;
746     /* XRa[31:24] = tmp8 */
747     case MXU_OPTN3_PTN3:
748         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
749         gen_load_mxu_gpr(t0, XRa);
750         tcg_gen_deposit_tl(t0, t0, t1, 24, 8);
751         break;
752     /* XRa = {8'b0, tmp8, 8'b0, tmp8} */
753     case MXU_OPTN3_PTN4:
754         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
755         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
756         break;
757     /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */
758     case MXU_OPTN3_PTN5:
759         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
760         tcg_gen_shli_tl(t1, t1, 8);
761         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
762         break;
763     /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */
764     case MXU_OPTN3_PTN6:
765         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB);
766         tcg_gen_mov_tl(t0, t1);
767         tcg_gen_andi_tl(t0, t0, 0xFF00FFFF);
768         tcg_gen_shli_tl(t1, t1, 16);
769         tcg_gen_or_tl(t0, t0, t1);
770         break;
771     /* XRa = {tmp8, tmp8, tmp8, tmp8} */
772     case MXU_OPTN3_PTN7:
773         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
774         tcg_gen_deposit_tl(t1, t1, t1, 8, 8);
775         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
776         break;
777     }
778 
779     gen_store_mxu_gpr(t0, XRa);
780 }
781 
782 /*
783  * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory
784  *
785  * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory,
786  * post modify address register
787  */
788 static void gen_mxu_s8std(DisasContext *ctx, bool postmodify)
789 {
790     TCGv t0, t1;
791     uint32_t XRa, Rb, s8, optn3;
792 
793     t0 = tcg_temp_new();
794     t1 = tcg_temp_new();
795 
796     XRa = extract32(ctx->opcode, 6, 4);
797     s8 = extract32(ctx->opcode, 10, 8);
798     optn3 = extract32(ctx->opcode, 18, 3);
799     Rb = extract32(ctx->opcode, 21, 5);
800 
801     if (optn3 > 3) {
802         /* reserved, do nothing */
803         return;
804     }
805 
806     gen_load_gpr(t0, Rb);
807     tcg_gen_addi_tl(t0, t0, (int8_t)s8);
808     if (postmodify) {
809         gen_store_gpr(t0, Rb);
810     }
811     gen_load_mxu_gpr(t1, XRa);
812 
813     switch (optn3) {
814     /* XRa[7:0] => tmp8 */
815     case MXU_OPTN3_PTN0:
816         tcg_gen_extract_tl(t1, t1, 0, 8);
817         break;
818     /* XRa[15:8] => tmp8 */
819     case MXU_OPTN3_PTN1:
820         tcg_gen_extract_tl(t1, t1, 8, 8);
821         break;
822     /* XRa[23:16] => tmp8 */
823     case MXU_OPTN3_PTN2:
824         tcg_gen_extract_tl(t1, t1, 16, 8);
825         break;
826     /* XRa[31:24] => tmp8 */
827     case MXU_OPTN3_PTN3:
828         tcg_gen_extract_tl(t1, t1, 24, 8);
829         break;
830     }
831 
832     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB);
833 }
834 
835 /*
836  * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF
837  *
838  * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF,
839  * post modify address register
840  */
841 static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify)
842 {
843     TCGv t0, t1;
844     uint32_t XRa, Rb, optn2;
845     int32_t s10;
846 
847     t0 = tcg_temp_new();
848     t1 = tcg_temp_new();
849 
850     XRa   = extract32(ctx->opcode,   6, 4);
851     s10   = sextract32(ctx->opcode, 10, 9) * 2;
852     optn2 = extract32(ctx->opcode,  19, 2);
853     Rb    = extract32(ctx->opcode,  21, 5);
854 
855     gen_load_gpr(t0, Rb);
856     tcg_gen_addi_tl(t0, t0, s10);
857     if (postmodify) {
858         gen_store_gpr(t0, Rb);
859     }
860 
861     switch (optn2) {
862     /* XRa[15:0] = tmp16 */
863     case MXU_OPTN2_PTN0:
864         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
865         gen_load_mxu_gpr(t0, XRa);
866         tcg_gen_deposit_tl(t0, t0, t1, 0, 16);
867         break;
868     /* XRa[31:16] = tmp16 */
869     case MXU_OPTN2_PTN1:
870         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
871         gen_load_mxu_gpr(t0, XRa);
872         tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
873         break;
874     /* XRa = sign_extend(tmp16) */
875     case MXU_OPTN2_PTN2:
876         tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW);
877         break;
878     /* XRa = {tmp16, tmp16} */
879     case MXU_OPTN2_PTN3:
880         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
881         tcg_gen_deposit_tl(t0, t1, t1,  0, 16);
882         tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
883         break;
884     }
885 
886     gen_store_mxu_gpr(t0, XRa);
887 }
888 
889 /*
890  * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory
891  *
892  * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory,
893  * post modify address register
894  */
895 static void gen_mxu_s16std(DisasContext *ctx, bool postmodify)
896 {
897     TCGv t0, t1;
898     uint32_t XRa, Rb, optn2;
899     int32_t s10;
900 
901     t0 = tcg_temp_new();
902     t1 = tcg_temp_new();
903 
904     XRa = extract32(ctx->opcode, 6, 4);
905     s10 = sextract32(ctx->opcode, 10, 9) * 2;
906     optn2 = extract32(ctx->opcode, 19, 2);
907     Rb = extract32(ctx->opcode, 21, 5);
908 
909     if (optn2 > 1) {
910         /* reserved, do nothing */
911         return;
912     }
913 
914     gen_load_gpr(t0, Rb);
915     tcg_gen_addi_tl(t0, t0, s10);
916     if (postmodify) {
917         gen_store_gpr(t0, Rb);
918     }
919     gen_load_mxu_gpr(t1, XRa);
920 
921     switch (optn2) {
922     /* XRa[15:0] => tmp16 */
923     case MXU_OPTN2_PTN0:
924         tcg_gen_extract_tl(t1, t1, 0, 16);
925         break;
926     /* XRa[31:16] => tmp16 */
927     case MXU_OPTN2_PTN1:
928         tcg_gen_extract_tl(t1, t1, 16, 16);
929         break;
930     }
931 
932     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW);
933 }
934 
935 /*
936  * S32MUL  XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication
937  * of GPR's and stores result into pair of MXU registers.
938  * It strains HI and LO registers.
939  *
940  * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication
941  * of GPR's and stores result into pair of MXU registers.
942  * It strains HI and LO registers.
943  */
944 static void gen_mxu_s32mul(DisasContext *ctx, bool mulu)
945 {
946     TCGv t0, t1;
947     uint32_t XRa, XRd, rs, rt;
948 
949     t0 = tcg_temp_new();
950     t1 = tcg_temp_new();
951 
952     XRa = extract32(ctx->opcode,  6, 4);
953     XRd = extract32(ctx->opcode, 10, 4);
954     rs  = extract32(ctx->opcode, 16, 5);
955     rt  = extract32(ctx->opcode, 21, 5);
956 
957     if (unlikely(rs == 0 || rt == 0)) {
958         tcg_gen_movi_tl(t0, 0);
959         tcg_gen_movi_tl(t1, 0);
960     } else {
961         gen_load_gpr(t0, rs);
962         gen_load_gpr(t1, rt);
963 
964         if (mulu) {
965             tcg_gen_mulu2_tl(t0, t1, t0, t1);
966         } else {
967             tcg_gen_muls2_tl(t0, t1, t0, t1);
968         }
969     }
970     tcg_gen_mov_tl(cpu_HI[0], t1);
971     tcg_gen_mov_tl(cpu_LO[0], t0);
972     gen_store_mxu_gpr(t1, XRa);
973     gen_store_mxu_gpr(t0, XRd);
974 }
975 
976 /*
977  * D16MUL  XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
978  * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication
979  *   with rounding and packing result
980  * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern
981  *   multiplication with rounding
982  */
983 static void gen_mxu_d16mul(DisasContext *ctx, bool fractional,
984                            bool packed_result)
985 {
986     TCGv t0, t1, t2, t3;
987     uint32_t XRa, XRb, XRc, XRd, optn2;
988 
989     t0 = tcg_temp_new();
990     t1 = tcg_temp_new();
991     t2 = tcg_temp_new();
992     t3 = tcg_temp_new();
993 
994     XRa = extract32(ctx->opcode, 6, 4);
995     XRb = extract32(ctx->opcode, 10, 4);
996     XRc = extract32(ctx->opcode, 14, 4);
997     XRd = extract32(ctx->opcode, 18, 4);
998     optn2 = extract32(ctx->opcode, 22, 2);
999 
1000     /*
1001      * TODO: XRd field isn't used for D16MULF
1002      * There's no knowledge how this field affect
1003      * instruction decoding/behavior
1004      */
1005 
1006     gen_load_mxu_gpr(t1, XRb);
1007     tcg_gen_sextract_tl(t0, t1, 0, 16);
1008     tcg_gen_sextract_tl(t1, t1, 16, 16);
1009     gen_load_mxu_gpr(t3, XRc);
1010     tcg_gen_sextract_tl(t2, t3, 0, 16);
1011     tcg_gen_sextract_tl(t3, t3, 16, 16);
1012 
1013     switch (optn2) {
1014     case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1015         tcg_gen_mul_tl(t3, t1, t3);
1016         tcg_gen_mul_tl(t2, t0, t2);
1017         break;
1018     case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1019         tcg_gen_mul_tl(t3, t0, t3);
1020         tcg_gen_mul_tl(t2, t0, t2);
1021         break;
1022     case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1023         tcg_gen_mul_tl(t3, t1, t3);
1024         tcg_gen_mul_tl(t2, t1, t2);
1025         break;
1026     case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1027         tcg_gen_mul_tl(t3, t0, t3);
1028         tcg_gen_mul_tl(t2, t1, t2);
1029         break;
1030     }
1031     if (fractional) {
1032         TCGLabel *l_done = gen_new_label();
1033         TCGv rounding = tcg_temp_new();
1034 
1035         tcg_gen_shli_tl(t3, t3, 1);
1036         tcg_gen_shli_tl(t2, t2, 1);
1037         tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
1038         tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
1039         if (packed_result) {
1040             TCGLabel *l_apply_bias_l = gen_new_label();
1041             TCGLabel *l_apply_bias_r = gen_new_label();
1042             TCGLabel *l_half_done = gen_new_label();
1043             TCGv bias = tcg_temp_new();
1044 
1045             /*
1046              * D16MULF supports unbiased rounding aka "bankers rounding",
1047              * "round to even", "convergent rounding"
1048              */
1049             tcg_gen_andi_tl(bias, mxu_CR, 0x4);
1050             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
1051             tcg_gen_andi_tl(t0, t3, 0x1ffff);
1052             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
1053             gen_set_label(l_apply_bias_l);
1054             tcg_gen_addi_tl(t3, t3, 0x8000);
1055             gen_set_label(l_half_done);
1056             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
1057             tcg_gen_andi_tl(t0, t2, 0x1ffff);
1058             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
1059             gen_set_label(l_apply_bias_r);
1060             tcg_gen_addi_tl(t2, t2, 0x8000);
1061         } else {
1062             /* D16MULE doesn't support unbiased rounding */
1063             tcg_gen_addi_tl(t3, t3, 0x8000);
1064             tcg_gen_addi_tl(t2, t2, 0x8000);
1065         }
1066         gen_set_label(l_done);
1067     }
1068     if (!packed_result) {
1069         gen_store_mxu_gpr(t3, XRa);
1070         gen_store_mxu_gpr(t2, XRd);
1071     } else {
1072         tcg_gen_andi_tl(t3, t3, 0xffff0000);
1073         tcg_gen_shri_tl(t2, t2, 16);
1074         tcg_gen_or_tl(t3, t3, t2);
1075         gen_store_mxu_gpr(t3, XRa);
1076     }
1077 }
1078 
1079 /*
1080  * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
1081  *   Signed 16 bit pattern multiply and accumulate
1082  * D16MACF XRa, XRb, XRc, aptn2, optn2
1083  *   Signed Q15 fraction pattern multiply accumulate and pack
1084  * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
1085  *   Signed Q15 fraction pattern multiply and accumulate
1086  */
1087 static void gen_mxu_d16mac(DisasContext *ctx, bool fractional,
1088                            bool packed_result)
1089 {
1090     TCGv t0, t1, t2, t3;
1091     uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
1092 
1093     t0 = tcg_temp_new();
1094     t1 = tcg_temp_new();
1095     t2 = tcg_temp_new();
1096     t3 = tcg_temp_new();
1097 
1098     XRa = extract32(ctx->opcode, 6, 4);
1099     XRb = extract32(ctx->opcode, 10, 4);
1100     XRc = extract32(ctx->opcode, 14, 4);
1101     XRd = extract32(ctx->opcode, 18, 4);
1102     optn2 = extract32(ctx->opcode, 22, 2);
1103     aptn2 = extract32(ctx->opcode, 24, 2);
1104 
1105     gen_load_mxu_gpr(t1, XRb);
1106     tcg_gen_sextract_tl(t0, t1, 0, 16);
1107     tcg_gen_sextract_tl(t1, t1, 16, 16);
1108 
1109     gen_load_mxu_gpr(t3, XRc);
1110     tcg_gen_sextract_tl(t2, t3, 0, 16);
1111     tcg_gen_sextract_tl(t3, t3, 16, 16);
1112 
1113     switch (optn2) {
1114     case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1115         tcg_gen_mul_tl(t3, t1, t3);
1116         tcg_gen_mul_tl(t2, t0, t2);
1117         break;
1118     case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1119         tcg_gen_mul_tl(t3, t0, t3);
1120         tcg_gen_mul_tl(t2, t0, t2);
1121         break;
1122     case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1123         tcg_gen_mul_tl(t3, t1, t3);
1124         tcg_gen_mul_tl(t2, t1, t2);
1125         break;
1126     case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1127         tcg_gen_mul_tl(t3, t0, t3);
1128         tcg_gen_mul_tl(t2, t1, t2);
1129         break;
1130     }
1131 
1132     if (fractional) {
1133         tcg_gen_shli_tl(t3, t3, 1);
1134         tcg_gen_shli_tl(t2, t2, 1);
1135     }
1136     gen_load_mxu_gpr(t0, XRa);
1137     gen_load_mxu_gpr(t1, XRd);
1138 
1139     switch (aptn2) {
1140     case MXU_APTN2_AA:
1141         tcg_gen_add_tl(t3, t0, t3);
1142         tcg_gen_add_tl(t2, t1, t2);
1143         break;
1144     case MXU_APTN2_AS:
1145         tcg_gen_add_tl(t3, t0, t3);
1146         tcg_gen_sub_tl(t2, t1, t2);
1147         break;
1148     case MXU_APTN2_SA:
1149         tcg_gen_sub_tl(t3, t0, t3);
1150         tcg_gen_add_tl(t2, t1, t2);
1151         break;
1152     case MXU_APTN2_SS:
1153         tcg_gen_sub_tl(t3, t0, t3);
1154         tcg_gen_sub_tl(t2, t1, t2);
1155         break;
1156     }
1157 
1158     if (fractional) {
1159         TCGLabel *l_done = gen_new_label();
1160         TCGv rounding = tcg_temp_new();
1161 
1162         tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
1163         tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
1164         if (packed_result) {
1165             TCGLabel *l_apply_bias_l = gen_new_label();
1166             TCGLabel *l_apply_bias_r = gen_new_label();
1167             TCGLabel *l_half_done = gen_new_label();
1168             TCGv bias = tcg_temp_new();
1169 
1170             /*
1171              * D16MACF supports unbiased rounding aka "bankers rounding",
1172              * "round to even", "convergent rounding"
1173              */
1174             tcg_gen_andi_tl(bias, mxu_CR, 0x4);
1175             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
1176             tcg_gen_andi_tl(t0, t3, 0x1ffff);
1177             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
1178             gen_set_label(l_apply_bias_l);
1179             tcg_gen_addi_tl(t3, t3, 0x8000);
1180             gen_set_label(l_half_done);
1181             tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
1182             tcg_gen_andi_tl(t0, t2, 0x1ffff);
1183             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
1184             gen_set_label(l_apply_bias_r);
1185             tcg_gen_addi_tl(t2, t2, 0x8000);
1186         } else {
1187             /* D16MACE doesn't support unbiased rounding */
1188             tcg_gen_addi_tl(t3, t3, 0x8000);
1189             tcg_gen_addi_tl(t2, t2, 0x8000);
1190         }
1191         gen_set_label(l_done);
1192     }
1193 
1194     if (!packed_result) {
1195         gen_store_mxu_gpr(t3, XRa);
1196         gen_store_mxu_gpr(t2, XRd);
1197     } else {
1198         tcg_gen_andi_tl(t3, t3, 0xffff0000);
1199         tcg_gen_shri_tl(t2, t2, 16);
1200         tcg_gen_or_tl(t3, t3, t2);
1201         gen_store_mxu_gpr(t3, XRa);
1202     }
1203 }
1204 
1205 /*
1206  * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed
1207  * unsigned 16 bit pattern multiply and add/subtract.
1208  */
1209 static void gen_mxu_d16madl(DisasContext *ctx)
1210 {
1211     TCGv t0, t1, t2, t3;
1212     uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
1213 
1214     t0 = tcg_temp_new();
1215     t1 = tcg_temp_new();
1216     t2 = tcg_temp_new();
1217     t3 = tcg_temp_new();
1218 
1219     XRa = extract32(ctx->opcode, 6, 4);
1220     XRb = extract32(ctx->opcode, 10, 4);
1221     XRc = extract32(ctx->opcode, 14, 4);
1222     XRd = extract32(ctx->opcode, 18, 4);
1223     optn2 = extract32(ctx->opcode, 22, 2);
1224     aptn2 = extract32(ctx->opcode, 24, 2);
1225 
1226     gen_load_mxu_gpr(t1, XRb);
1227     tcg_gen_sextract_tl(t0, t1,  0, 16);
1228     tcg_gen_sextract_tl(t1, t1, 16, 16);
1229 
1230     gen_load_mxu_gpr(t3, XRc);
1231     tcg_gen_sextract_tl(t2, t3,  0, 16);
1232     tcg_gen_sextract_tl(t3, t3, 16, 16);
1233 
1234     switch (optn2) {
1235     case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1236         tcg_gen_mul_tl(t3, t1, t3);
1237         tcg_gen_mul_tl(t2, t0, t2);
1238         break;
1239     case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1240         tcg_gen_mul_tl(t3, t0, t3);
1241         tcg_gen_mul_tl(t2, t0, t2);
1242         break;
1243     case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1244         tcg_gen_mul_tl(t3, t1, t3);
1245         tcg_gen_mul_tl(t2, t1, t2);
1246         break;
1247     case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1248         tcg_gen_mul_tl(t3, t0, t3);
1249         tcg_gen_mul_tl(t2, t1, t2);
1250         break;
1251     }
1252     tcg_gen_extract_tl(t2, t2, 0, 16);
1253     tcg_gen_extract_tl(t3, t3, 0, 16);
1254 
1255     gen_load_mxu_gpr(t1, XRa);
1256     tcg_gen_extract_tl(t0, t1,  0, 16);
1257     tcg_gen_extract_tl(t1, t1, 16, 16);
1258 
1259     switch (aptn2) {
1260     case MXU_APTN2_AA:
1261         tcg_gen_add_tl(t3, t1, t3);
1262         tcg_gen_add_tl(t2, t0, t2);
1263         break;
1264     case MXU_APTN2_AS:
1265         tcg_gen_add_tl(t3, t1, t3);
1266         tcg_gen_sub_tl(t2, t0, t2);
1267         break;
1268     case MXU_APTN2_SA:
1269         tcg_gen_sub_tl(t3, t1, t3);
1270         tcg_gen_add_tl(t2, t0, t2);
1271         break;
1272     case MXU_APTN2_SS:
1273         tcg_gen_sub_tl(t3, t1, t3);
1274         tcg_gen_sub_tl(t2, t0, t2);
1275         break;
1276     }
1277 
1278     tcg_gen_andi_tl(t2, t2, 0xffff);
1279     tcg_gen_shli_tl(t3, t3, 16);
1280     tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2);
1281 }
1282 
1283 /*
1284  * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed
1285  * signed 16 bit pattern multiply and 32-bit add/subtract.
1286  */
1287 static void gen_mxu_s16mad(DisasContext *ctx)
1288 {
1289     TCGv t0, t1;
1290     uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad;
1291 
1292     t0 = tcg_temp_new();
1293     t1 = tcg_temp_new();
1294 
1295     XRa = extract32(ctx->opcode, 6, 4);
1296     XRb = extract32(ctx->opcode, 10, 4);
1297     XRc = extract32(ctx->opcode, 14, 4);
1298     XRd = extract32(ctx->opcode, 18, 4);
1299     optn2 = extract32(ctx->opcode, 22, 2);
1300     aptn1 = extract32(ctx->opcode, 24, 1);
1301     pad = extract32(ctx->opcode, 25, 1);
1302 
1303     if (pad) {
1304         /* FIXME check if it influence the result */
1305     }
1306 
1307     gen_load_mxu_gpr(t0, XRb);
1308     gen_load_mxu_gpr(t1, XRc);
1309 
1310     switch (optn2) {
1311     case MXU_OPTN2_WW: /* XRB.H*XRC.H */
1312         tcg_gen_sextract_tl(t0, t0, 16, 16);
1313         tcg_gen_sextract_tl(t1, t1, 16, 16);
1314         break;
1315     case MXU_OPTN2_LW: /* XRB.L*XRC.L */
1316         tcg_gen_sextract_tl(t0, t0,  0, 16);
1317         tcg_gen_sextract_tl(t1, t1,  0, 16);
1318         break;
1319     case MXU_OPTN2_HW: /* XRB.H*XRC.L */
1320         tcg_gen_sextract_tl(t0, t0, 16, 16);
1321         tcg_gen_sextract_tl(t1, t1,  0, 16);
1322         break;
1323     case MXU_OPTN2_XW: /* XRB.L*XRC.H */
1324         tcg_gen_sextract_tl(t0, t0,  0, 16);
1325         tcg_gen_sextract_tl(t1, t1, 16, 16);
1326         break;
1327     }
1328     tcg_gen_mul_tl(t0, t0, t1);
1329 
1330     gen_load_mxu_gpr(t1, XRa);
1331 
1332     switch (aptn1) {
1333     case MXU_APTN1_A:
1334         tcg_gen_add_tl(t1, t1, t0);
1335         break;
1336     case MXU_APTN1_S:
1337         tcg_gen_sub_tl(t1, t1, t0);
1338         break;
1339     }
1340 
1341     gen_store_mxu_gpr(t1, XRd);
1342 }
1343 
1344 /*
1345  * Q8MUL   XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1346  * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1347  * Q8MAC   XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1348  *   and accumulate
1349  * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1350  *   and accumulate
1351  */
1352 static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac)
1353 {
1354     TCGv t0, t1, t2, t3, t4, t5, t6, t7;
1355     uint32_t XRa, XRb, XRc, XRd, aptn2;
1356 
1357     t0 = tcg_temp_new();
1358     t1 = tcg_temp_new();
1359     t2 = tcg_temp_new();
1360     t3 = tcg_temp_new();
1361     t4 = tcg_temp_new();
1362     t5 = tcg_temp_new();
1363     t6 = tcg_temp_new();
1364     t7 = tcg_temp_new();
1365 
1366     XRa = extract32(ctx->opcode, 6, 4);
1367     XRb = extract32(ctx->opcode, 10, 4);
1368     XRc = extract32(ctx->opcode, 14, 4);
1369     XRd = extract32(ctx->opcode, 18, 4);
1370     aptn2 = extract32(ctx->opcode, 24, 2);
1371 
1372     gen_load_mxu_gpr(t3, XRb);
1373     gen_load_mxu_gpr(t7, XRc);
1374 
1375     if (su) {
1376         /* Q8MULSU / Q8MACSU */
1377         tcg_gen_sextract_tl(t0, t3,  0, 8);
1378         tcg_gen_sextract_tl(t1, t3,  8, 8);
1379         tcg_gen_sextract_tl(t2, t3, 16, 8);
1380         tcg_gen_sextract_tl(t3, t3, 24, 8);
1381     } else {
1382         /* Q8MUL / Q8MAC */
1383         tcg_gen_extract_tl(t0, t3,  0, 8);
1384         tcg_gen_extract_tl(t1, t3,  8, 8);
1385         tcg_gen_extract_tl(t2, t3, 16, 8);
1386         tcg_gen_extract_tl(t3, t3, 24, 8);
1387     }
1388 
1389     tcg_gen_extract_tl(t4, t7,  0, 8);
1390     tcg_gen_extract_tl(t5, t7,  8, 8);
1391     tcg_gen_extract_tl(t6, t7, 16, 8);
1392     tcg_gen_extract_tl(t7, t7, 24, 8);
1393 
1394     tcg_gen_mul_tl(t0, t0, t4);
1395     tcg_gen_mul_tl(t1, t1, t5);
1396     tcg_gen_mul_tl(t2, t2, t6);
1397     tcg_gen_mul_tl(t3, t3, t7);
1398 
1399     if (mac) {
1400         gen_load_mxu_gpr(t4, XRd);
1401         gen_load_mxu_gpr(t5, XRa);
1402         tcg_gen_extract_tl(t6, t4,  0, 16);
1403         tcg_gen_extract_tl(t7, t4, 16, 16);
1404         if (aptn2 & 1) {
1405             tcg_gen_sub_tl(t0, t6, t0);
1406             tcg_gen_sub_tl(t1, t7, t1);
1407         } else {
1408             tcg_gen_add_tl(t0, t6, t0);
1409             tcg_gen_add_tl(t1, t7, t1);
1410         }
1411         tcg_gen_extract_tl(t6, t5,  0, 16);
1412         tcg_gen_extract_tl(t7, t5, 16, 16);
1413         if (aptn2 & 2) {
1414             tcg_gen_sub_tl(t2, t6, t2);
1415             tcg_gen_sub_tl(t3, t7, t3);
1416         } else {
1417             tcg_gen_add_tl(t2, t6, t2);
1418             tcg_gen_add_tl(t3, t7, t3);
1419         }
1420     }
1421 
1422     tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
1423     tcg_gen_deposit_tl(t1, t2, t3, 16, 16);
1424 
1425     gen_store_mxu_gpr(t0, XRd);
1426     gen_store_mxu_gpr(t1, XRa);
1427 }
1428 
1429 /*
1430  * Q8MADL  XRd, XRa, XRb, XRc
1431  *   Parallel quad unsigned 8 bit multiply and accumulate.
1432  *   e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3]
1433  */
1434 static void gen_mxu_q8madl(DisasContext *ctx)
1435 {
1436     TCGv t0, t1, t2, t3, t4, t5, t6, t7;
1437     uint32_t XRa, XRb, XRc, XRd, aptn2;
1438 
1439     t0 = tcg_temp_new();
1440     t1 = tcg_temp_new();
1441     t2 = tcg_temp_new();
1442     t3 = tcg_temp_new();
1443     t4 = tcg_temp_new();
1444     t5 = tcg_temp_new();
1445     t6 = tcg_temp_new();
1446     t7 = tcg_temp_new();
1447 
1448     XRa = extract32(ctx->opcode, 6, 4);
1449     XRb = extract32(ctx->opcode, 10, 4);
1450     XRc = extract32(ctx->opcode, 14, 4);
1451     XRd = extract32(ctx->opcode, 18, 4);
1452     aptn2 = extract32(ctx->opcode, 24, 2);
1453 
1454     gen_load_mxu_gpr(t3, XRb);
1455     gen_load_mxu_gpr(t7, XRc);
1456 
1457     tcg_gen_extract_tl(t0, t3,  0, 8);
1458     tcg_gen_extract_tl(t1, t3,  8, 8);
1459     tcg_gen_extract_tl(t2, t3, 16, 8);
1460     tcg_gen_extract_tl(t3, t3, 24, 8);
1461 
1462     tcg_gen_extract_tl(t4, t7,  0, 8);
1463     tcg_gen_extract_tl(t5, t7,  8, 8);
1464     tcg_gen_extract_tl(t6, t7, 16, 8);
1465     tcg_gen_extract_tl(t7, t7, 24, 8);
1466 
1467     tcg_gen_mul_tl(t0, t0, t4);
1468     tcg_gen_mul_tl(t1, t1, t5);
1469     tcg_gen_mul_tl(t2, t2, t6);
1470     tcg_gen_mul_tl(t3, t3, t7);
1471 
1472     gen_load_mxu_gpr(t4, XRa);
1473     tcg_gen_extract_tl(t6, t4, 0, 8);
1474     tcg_gen_extract_tl(t7, t4, 8, 8);
1475     if (aptn2 & 1) {
1476         tcg_gen_sub_tl(t0, t6, t0);
1477         tcg_gen_sub_tl(t1, t7, t1);
1478     } else {
1479         tcg_gen_add_tl(t0, t6, t0);
1480         tcg_gen_add_tl(t1, t7, t1);
1481     }
1482     tcg_gen_extract_tl(t6, t4, 16, 8);
1483     tcg_gen_extract_tl(t7, t4, 24, 8);
1484     if (aptn2 & 2) {
1485         tcg_gen_sub_tl(t2, t6, t2);
1486         tcg_gen_sub_tl(t3, t7, t3);
1487     } else {
1488         tcg_gen_add_tl(t2, t6, t2);
1489         tcg_gen_add_tl(t3, t7, t3);
1490     }
1491 
1492     tcg_gen_andi_tl(t5, t0, 0xff);
1493     tcg_gen_deposit_tl(t5, t5, t1,  8, 8);
1494     tcg_gen_deposit_tl(t5, t5, t2, 16, 8);
1495     tcg_gen_deposit_tl(t5, t5, t3, 24, 8);
1496 
1497     gen_store_mxu_gpr(t5, XRd);
1498 }
1499 
1500 /*
1501  * S32LDD  XRa, Rb, S12 - Load a word from memory to XRF
1502  * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF
1503  *   in reversed byte seq.
1504  * S32LDI  XRa, Rb, S12 - Load a word from memory to XRF,
1505  *   post modify base address GPR.
1506  * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF,
1507  *   post modify base address GPR and load in reversed byte seq.
1508  */
1509 static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc)
1510 {
1511     TCGv t0, t1;
1512     uint32_t XRa, Rb, s12;
1513 
1514     t0 = tcg_temp_new();
1515     t1 = tcg_temp_new();
1516 
1517     XRa = extract32(ctx->opcode, 6, 4);
1518     s12 = sextract32(ctx->opcode, 10, 10);
1519     Rb = extract32(ctx->opcode, 21, 5);
1520 
1521     gen_load_gpr(t0, Rb);
1522     tcg_gen_movi_tl(t1, s12 * 4);
1523     tcg_gen_add_tl(t0, t0, t1);
1524 
1525     tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
1526                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1527                         ctx->default_tcg_memop_mask);
1528     gen_store_mxu_gpr(t1, XRa);
1529 
1530     if (postinc) {
1531         gen_store_gpr(t0, Rb);
1532     }
1533 }
1534 
1535 /*
1536  * S32STD  XRa, Rb, S12 - Store a word from XRF to memory
1537  * S32STDR XRa, Rb, S12 - Store a word from XRF to memory
1538  *   in reversed byte seq.
1539  * S32SDI  XRa, Rb, S12 - Store a word from XRF to memory,
1540  *   post modify base address GPR.
1541  * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory,
1542  *   post modify base address GPR and store in reversed byte seq.
1543  */
1544 static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc)
1545 {
1546     TCGv t0, t1;
1547     uint32_t XRa, Rb, s12;
1548 
1549     t0 = tcg_temp_new();
1550     t1 = tcg_temp_new();
1551 
1552     XRa = extract32(ctx->opcode, 6, 4);
1553     s12 = sextract32(ctx->opcode, 10, 10);
1554     Rb = extract32(ctx->opcode, 21, 5);
1555 
1556     gen_load_gpr(t0, Rb);
1557     tcg_gen_movi_tl(t1, s12 * 4);
1558     tcg_gen_add_tl(t0, t0, t1);
1559 
1560     gen_load_mxu_gpr(t1, XRa);
1561     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
1562                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1563                         ctx->default_tcg_memop_mask);
1564 
1565     if (postinc) {
1566         gen_store_gpr(t0, Rb);
1567     }
1568 }
1569 
1570 /*
1571  * S32LDDV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1572  * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1573  *   in reversed byte seq.
1574  * S32LDIV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1575  *   post modify base address GPR.
1576  * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1577  *   post modify base address GPR and load in reversed byte seq.
1578  */
1579 static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed,
1580                              bool postinc, uint32_t strd2)
1581 {
1582     TCGv t0, t1;
1583     uint32_t XRa, Rb, Rc;
1584 
1585     t0 = tcg_temp_new();
1586     t1 = tcg_temp_new();
1587 
1588     XRa = extract32(ctx->opcode, 6, 4);
1589     Rc = extract32(ctx->opcode, 16, 5);
1590     Rb = extract32(ctx->opcode, 21, 5);
1591 
1592     gen_load_gpr(t0, Rb);
1593     gen_load_gpr(t1, Rc);
1594     tcg_gen_shli_tl(t1, t1, strd2);
1595     tcg_gen_add_tl(t0, t0, t1);
1596 
1597     tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
1598                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1599                         ctx->default_tcg_memop_mask);
1600     gen_store_mxu_gpr(t1, XRa);
1601 
1602     if (postinc) {
1603         gen_store_gpr(t0, Rb);
1604     }
1605 }
1606 
1607 /*
1608  * LXW  Ra, Rb, Rc, STRD2 - Load a word from memory to GPR
1609  * LXB  Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1610  *   sign extending to GPR size.
1611  * LXH  Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1612  *   sign extending to GPR size.
1613  * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1614  *   zero extending to GPR size.
1615  * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1616  *   zero extending to GPR size.
1617  */
1618 static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop)
1619 {
1620     TCGv t0, t1;
1621     uint32_t Ra, Rb, Rc;
1622 
1623     t0 = tcg_temp_new();
1624     t1 = tcg_temp_new();
1625 
1626     Ra = extract32(ctx->opcode, 11, 5);
1627     Rc = extract32(ctx->opcode, 16, 5);
1628     Rb = extract32(ctx->opcode, 21, 5);
1629 
1630     gen_load_gpr(t0, Rb);
1631     gen_load_gpr(t1, Rc);
1632     tcg_gen_shli_tl(t1, t1, strd2);
1633     tcg_gen_add_tl(t0, t0, t1);
1634 
1635     tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask);
1636     gen_store_gpr(t1, Ra);
1637 }
1638 
1639 /*
1640  * S32STDV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1641  * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1642  *   in reversed byte seq.
1643  * S32SDIV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1644  *   post modify base address GPR.
1645  * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1646  *   post modify base address GPR and store in reversed byte seq.
1647  */
1648 static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed,
1649                              bool postinc, uint32_t strd2)
1650 {
1651     TCGv t0, t1;
1652     uint32_t XRa, Rb, Rc;
1653 
1654     t0 = tcg_temp_new();
1655     t1 = tcg_temp_new();
1656 
1657     XRa = extract32(ctx->opcode, 6, 4);
1658     Rc = extract32(ctx->opcode, 16, 5);
1659     Rb = extract32(ctx->opcode, 21, 5);
1660 
1661     gen_load_gpr(t0, Rb);
1662     gen_load_gpr(t1, Rc);
1663     tcg_gen_shli_tl(t1, t1, strd2);
1664     tcg_gen_add_tl(t0, t0, t1);
1665 
1666     gen_load_mxu_gpr(t1, XRa);
1667     tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
1668                        (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
1669                         ctx->default_tcg_memop_mask);
1670 
1671     if (postinc) {
1672         gen_store_gpr(t0, Rb);
1673     }
1674 }
1675 
1676 /*
1677  *                 MXU instruction category: logic
1678  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1679  *
1680  *               S32NOR    S32AND    S32OR    S32XOR
1681  */
1682 
1683 /*
1684  *  S32NOR XRa, XRb, XRc
1685  *    Update XRa with the result of logical bitwise 'nor' operation
1686  *    applied to the content of XRb and XRc.
1687  */
1688 static void gen_mxu_S32NOR(DisasContext *ctx)
1689 {
1690     uint32_t pad, XRc, XRb, XRa;
1691 
1692     pad = extract32(ctx->opcode, 21, 5);
1693     XRc = extract32(ctx->opcode, 14, 4);
1694     XRb = extract32(ctx->opcode, 10, 4);
1695     XRa = extract32(ctx->opcode,  6, 4);
1696 
1697     if (unlikely(pad != 0)) {
1698         /* opcode padding incorrect -> do nothing */
1699     } else if (unlikely(XRa == 0)) {
1700         /* destination is zero register -> do nothing */
1701     } else if (unlikely((XRb == 0) && (XRc == 0))) {
1702         /* both operands zero registers -> just set destination to all 1s */
1703         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF);
1704     } else if (unlikely(XRb == 0)) {
1705         /* XRb zero register -> just set destination to the negation of XRc */
1706         tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1707     } else if (unlikely(XRc == 0)) {
1708         /* XRa zero register -> just set destination to the negation of XRb */
1709         tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1710     } else if (unlikely(XRb == XRc)) {
1711         /* both operands same -> just set destination to the negation of XRb */
1712         tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1713     } else {
1714         /* the most general case */
1715         tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1716     }
1717 }
1718 
1719 /*
1720  *  S32AND XRa, XRb, XRc
1721  *    Update XRa with the result of logical bitwise 'and' operation
1722  *    applied to the content of XRb and XRc.
1723  */
1724 static void gen_mxu_S32AND(DisasContext *ctx)
1725 {
1726     uint32_t pad, XRc, XRb, XRa;
1727 
1728     pad = extract32(ctx->opcode, 21, 5);
1729     XRc = extract32(ctx->opcode, 14, 4);
1730     XRb = extract32(ctx->opcode, 10, 4);
1731     XRa = extract32(ctx->opcode,  6, 4);
1732 
1733     if (unlikely(pad != 0)) {
1734         /* opcode padding incorrect -> do nothing */
1735     } else if (unlikely(XRa == 0)) {
1736         /* destination is zero register -> do nothing */
1737     } else if (unlikely((XRb == 0) || (XRc == 0))) {
1738         /* one of operands zero register -> just set destination to all 0s */
1739         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1740     } else if (unlikely(XRb == XRc)) {
1741         /* both operands same -> just set destination to one of them */
1742         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1743     } else {
1744         /* the most general case */
1745         tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1746     }
1747 }
1748 
1749 /*
1750  *  S32OR XRa, XRb, XRc
1751  *    Update XRa with the result of logical bitwise 'or' operation
1752  *    applied to the content of XRb and XRc.
1753  */
1754 static void gen_mxu_S32OR(DisasContext *ctx)
1755 {
1756     uint32_t pad, XRc, XRb, XRa;
1757 
1758     pad = extract32(ctx->opcode, 21, 5);
1759     XRc = extract32(ctx->opcode, 14, 4);
1760     XRb = extract32(ctx->opcode, 10, 4);
1761     XRa = extract32(ctx->opcode,  6, 4);
1762 
1763     if (unlikely(pad != 0)) {
1764         /* opcode padding incorrect -> do nothing */
1765     } else if (unlikely(XRa == 0)) {
1766         /* destination is zero register -> do nothing */
1767     } else if (unlikely((XRb == 0) && (XRc == 0))) {
1768         /* both operands zero registers -> just set destination to all 0s */
1769         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1770     } else if (unlikely(XRb == 0)) {
1771         /* XRb zero register -> just set destination to the content of XRc */
1772         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1773     } else if (unlikely(XRc == 0)) {
1774         /* XRc zero register -> just set destination to the content of XRb */
1775         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1776     } else if (unlikely(XRb == XRc)) {
1777         /* both operands same -> just set destination to one of them */
1778         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1779     } else {
1780         /* the most general case */
1781         tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1782     }
1783 }
1784 
1785 /*
1786  *  S32XOR XRa, XRb, XRc
1787  *    Update XRa with the result of logical bitwise 'xor' operation
1788  *    applied to the content of XRb and XRc.
1789  */
1790 static void gen_mxu_S32XOR(DisasContext *ctx)
1791 {
1792     uint32_t pad, XRc, XRb, XRa;
1793 
1794     pad = extract32(ctx->opcode, 21, 5);
1795     XRc = extract32(ctx->opcode, 14, 4);
1796     XRb = extract32(ctx->opcode, 10, 4);
1797     XRa = extract32(ctx->opcode,  6, 4);
1798 
1799     if (unlikely(pad != 0)) {
1800         /* opcode padding incorrect -> do nothing */
1801     } else if (unlikely(XRa == 0)) {
1802         /* destination is zero register -> do nothing */
1803     } else if (unlikely((XRb == 0) && (XRc == 0))) {
1804         /* both operands zero registers -> just set destination to all 0s */
1805         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1806     } else if (unlikely(XRb == 0)) {
1807         /* XRb zero register -> just set destination to the content of XRc */
1808         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1809     } else if (unlikely(XRc == 0)) {
1810         /* XRc zero register -> just set destination to the content of XRb */
1811         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1812     } else if (unlikely(XRb == XRc)) {
1813         /* both operands same -> just set destination to all 0s */
1814         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1815     } else {
1816         /* the most general case */
1817         tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1818     }
1819 }
1820 
1821 /*
1822  *                 MXU instruction category: shift
1823  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1824  *
1825  *               D32SLL    D32SLR    D32SAR    D32SARL
1826  *               D32SLLV   D32SLRV   D32SARV   D32SARW
1827  *               Q16SLL    Q16SLR    Q16SAR
1828  *               Q16SLLV   Q16SLRV   Q16SARV
1829  */
1830 
1831 /*
1832  *  D32SLL XRa, XRd, XRb, XRc, SFT4
1833  *    Dual 32-bit shift left from XRb and XRc to SFT4
1834  *    bits (0..15). Store to XRa and XRd respectively.
1835  *  D32SLR XRa, XRd, XRb, XRc, SFT4
1836  *    Dual 32-bit shift logic right from XRb and XRc
1837  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1838  *  D32SAR XRa, XRd, XRb, XRc, SFT4
1839  *    Dual 32-bit shift arithmetic right from XRb and XRc
1840  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1841  */
1842 static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic)
1843 {
1844     uint32_t XRa, XRb, XRc, XRd, sft4;
1845 
1846     XRa  = extract32(ctx->opcode,  6, 4);
1847     XRb  = extract32(ctx->opcode, 10, 4);
1848     XRc  = extract32(ctx->opcode, 14, 4);
1849     XRd  = extract32(ctx->opcode, 18, 4);
1850     sft4 = extract32(ctx->opcode, 22, 4);
1851 
1852     TCGv t0 = tcg_temp_new();
1853     TCGv t1 = tcg_temp_new();
1854 
1855     gen_load_mxu_gpr(t0, XRb);
1856     gen_load_mxu_gpr(t1, XRc);
1857 
1858     if (right) {
1859         if (arithmetic) {
1860             tcg_gen_sari_tl(t0, t0, sft4);
1861             tcg_gen_sari_tl(t1, t1, sft4);
1862         } else {
1863             tcg_gen_shri_tl(t0, t0, sft4);
1864             tcg_gen_shri_tl(t1, t1, sft4);
1865         }
1866     } else {
1867         tcg_gen_shli_tl(t0, t0, sft4);
1868         tcg_gen_shli_tl(t1, t1, sft4);
1869     }
1870     gen_store_mxu_gpr(t0, XRa);
1871     gen_store_mxu_gpr(t1, XRd);
1872 }
1873 
1874 /*
1875  *  D32SLLV XRa, XRd, rs
1876  *    Dual 32-bit shift left from XRa and XRd to rs[3:0]
1877  *    bits. Store back to XRa and XRd respectively.
1878  *  D32SLRV XRa, XRd, rs
1879  *    Dual 32-bit shift logic right from XRa and XRd to rs[3:0]
1880  *    bits. Store back to XRa and XRd respectively.
1881  *  D32SARV XRa, XRd, rs
1882  *    Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0]
1883  *    bits. Store back to XRa and XRd respectively.
1884  */
1885 static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic)
1886 {
1887     uint32_t XRa, XRd, rs;
1888 
1889     XRa = extract32(ctx->opcode, 10, 4);
1890     XRd = extract32(ctx->opcode, 14, 4);
1891     rs  = extract32(ctx->opcode, 21, 5);
1892 
1893     TCGv t0 = tcg_temp_new();
1894     TCGv t1 = tcg_temp_new();
1895     TCGv t2 = tcg_temp_new();
1896 
1897     gen_load_mxu_gpr(t0, XRa);
1898     gen_load_mxu_gpr(t1, XRd);
1899     gen_load_gpr(t2, rs);
1900     tcg_gen_andi_tl(t2, t2, 0x0f);
1901 
1902     if (right) {
1903         if (arithmetic) {
1904             tcg_gen_sar_tl(t0, t0, t2);
1905             tcg_gen_sar_tl(t1, t1, t2);
1906         } else {
1907             tcg_gen_shr_tl(t0, t0, t2);
1908             tcg_gen_shr_tl(t1, t1, t2);
1909         }
1910     } else {
1911         tcg_gen_shl_tl(t0, t0, t2);
1912         tcg_gen_shl_tl(t1, t1, t2);
1913     }
1914     gen_store_mxu_gpr(t0, XRa);
1915     gen_store_mxu_gpr(t1, XRd);
1916 }
1917 
1918 /*
1919  *  D32SARL XRa, XRb, XRc, SFT4
1920  *    Dual shift arithmetic right 32-bit integers in XRb and XRc
1921  *    to SFT4 bits (0..15). Pack 16 LSBs of each into XRa.
1922  *
1923  *  D32SARW XRa, XRb, XRc, rb
1924  *    Dual shift arithmetic right 32-bit integers in XRb and XRc
1925  *    to rb[3:0] bits. Pack 16 LSBs of each into XRa.
1926  */
1927 static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw)
1928 {
1929     uint32_t XRa, XRb, XRc, rb;
1930 
1931     XRa = extract32(ctx->opcode,  6, 4);
1932     XRb = extract32(ctx->opcode, 10, 4);
1933     XRc = extract32(ctx->opcode, 14, 4);
1934     rb  = extract32(ctx->opcode, 21, 5);
1935 
1936     if (unlikely(XRa == 0)) {
1937         /* destination is zero register -> do nothing */
1938     } else {
1939         TCGv t0 = tcg_temp_new();
1940         TCGv t1 = tcg_temp_new();
1941         TCGv t2 = tcg_temp_new();
1942 
1943         if (!sarw) {
1944             /* Make SFT4 from rb field */
1945             tcg_gen_movi_tl(t2, rb >> 1);
1946         } else {
1947             gen_load_gpr(t2, rb);
1948             tcg_gen_andi_tl(t2, t2, 0x0f);
1949         }
1950         gen_load_mxu_gpr(t0, XRb);
1951         gen_load_mxu_gpr(t1, XRc);
1952         tcg_gen_sar_tl(t0, t0, t2);
1953         tcg_gen_sar_tl(t1, t1, t2);
1954         tcg_gen_extract_tl(t2, t1, 0, 16);
1955         tcg_gen_deposit_tl(t2, t2, t0, 16, 16);
1956         gen_store_mxu_gpr(t2, XRa);
1957     }
1958 }
1959 
1960 /*
1961  *  Q16SLL XRa, XRd, XRb, XRc, SFT4
1962  *    Quad 16-bit shift left from XRb and XRc to SFT4
1963  *    bits (0..15). Store to XRa and XRd respectively.
1964  *  Q16SLR XRa, XRd, XRb, XRc, SFT4
1965  *    Quad 16-bit shift logic right from XRb and XRc
1966  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1967  *  Q16SAR XRa, XRd, XRb, XRc, SFT4
1968  *    Quad 16-bit shift arithmetic right from XRb and XRc
1969  *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1970  */
1971 static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic)
1972 {
1973     uint32_t XRa, XRb, XRc, XRd, sft4;
1974 
1975     XRa  = extract32(ctx->opcode,  6, 4);
1976     XRb  = extract32(ctx->opcode, 10, 4);
1977     XRc  = extract32(ctx->opcode, 14, 4);
1978     XRd  = extract32(ctx->opcode, 18, 4);
1979     sft4 = extract32(ctx->opcode, 22, 4);
1980 
1981     TCGv t0 = tcg_temp_new();
1982     TCGv t1 = tcg_temp_new();
1983     TCGv t2 = tcg_temp_new();
1984     TCGv t3 = tcg_temp_new();
1985 
1986     gen_load_mxu_gpr(t0, XRb);
1987     gen_load_mxu_gpr(t2, XRc);
1988 
1989     if (arithmetic) {
1990         tcg_gen_sextract_tl(t1, t0, 16, 16);
1991         tcg_gen_sextract_tl(t0, t0,  0, 16);
1992         tcg_gen_sextract_tl(t3, t2, 16, 16);
1993         tcg_gen_sextract_tl(t2, t2,  0, 16);
1994     } else {
1995         tcg_gen_extract_tl(t1, t0, 16, 16);
1996         tcg_gen_extract_tl(t0, t0,  0, 16);
1997         tcg_gen_extract_tl(t3, t2, 16, 16);
1998         tcg_gen_extract_tl(t2, t2,  0, 16);
1999     }
2000 
2001     if (right) {
2002         if (arithmetic) {
2003             tcg_gen_sari_tl(t0, t0, sft4);
2004             tcg_gen_sari_tl(t1, t1, sft4);
2005             tcg_gen_sari_tl(t2, t2, sft4);
2006             tcg_gen_sari_tl(t3, t3, sft4);
2007         } else {
2008             tcg_gen_shri_tl(t0, t0, sft4);
2009             tcg_gen_shri_tl(t1, t1, sft4);
2010             tcg_gen_shri_tl(t2, t2, sft4);
2011             tcg_gen_shri_tl(t3, t3, sft4);
2012         }
2013     } else {
2014         tcg_gen_shli_tl(t0, t0, sft4);
2015         tcg_gen_shli_tl(t1, t1, sft4);
2016         tcg_gen_shli_tl(t2, t2, sft4);
2017         tcg_gen_shli_tl(t3, t3, sft4);
2018     }
2019     tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
2020     tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2021 
2022     gen_store_mxu_gpr(t0, XRa);
2023     gen_store_mxu_gpr(t2, XRd);
2024 }
2025 
2026 /*
2027  *  Q16SLLV XRa, XRd, rs
2028  *    Quad 16-bit shift left from XRa and XRd to rs[3:0]
2029  *    bits. Store to XRa and XRd respectively.
2030  *  Q16SLRV XRa, XRd, rs
2031  *    Quad 16-bit shift logic right from XRa and XRd to rs[3:0]
2032  *    bits. Store to XRa and XRd respectively.
2033  *  Q16SARV XRa, XRd, rs
2034  *    Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0]
2035  *    bits. Store to XRa and XRd respectively.
2036  */
2037 static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic)
2038 {
2039     uint32_t XRa, XRd, rs;
2040 
2041     XRa = extract32(ctx->opcode, 10, 4);
2042     XRd = extract32(ctx->opcode, 14, 4);
2043     rs  = extract32(ctx->opcode, 21, 5);
2044 
2045     TCGv t0 = tcg_temp_new();
2046     TCGv t1 = tcg_temp_new();
2047     TCGv t2 = tcg_temp_new();
2048     TCGv t3 = tcg_temp_new();
2049     TCGv t5 = tcg_temp_new();
2050 
2051     gen_load_mxu_gpr(t0, XRa);
2052     gen_load_mxu_gpr(t2, XRd);
2053     gen_load_gpr(t5, rs);
2054     tcg_gen_andi_tl(t5, t5, 0x0f);
2055 
2056 
2057     if (arithmetic) {
2058         tcg_gen_sextract_tl(t1, t0, 16, 16);
2059         tcg_gen_sextract_tl(t0, t0,  0, 16);
2060         tcg_gen_sextract_tl(t3, t2, 16, 16);
2061         tcg_gen_sextract_tl(t2, t2,  0, 16);
2062     } else {
2063         tcg_gen_extract_tl(t1, t0, 16, 16);
2064         tcg_gen_extract_tl(t0, t0,  0, 16);
2065         tcg_gen_extract_tl(t3, t2, 16, 16);
2066         tcg_gen_extract_tl(t2, t2,  0, 16);
2067     }
2068 
2069     if (right) {
2070         if (arithmetic) {
2071             tcg_gen_sar_tl(t0, t0, t5);
2072             tcg_gen_sar_tl(t1, t1, t5);
2073             tcg_gen_sar_tl(t2, t2, t5);
2074             tcg_gen_sar_tl(t3, t3, t5);
2075         } else {
2076             tcg_gen_shr_tl(t0, t0, t5);
2077             tcg_gen_shr_tl(t1, t1, t5);
2078             tcg_gen_shr_tl(t2, t2, t5);
2079             tcg_gen_shr_tl(t3, t3, t5);
2080         }
2081     } else {
2082         tcg_gen_shl_tl(t0, t0, t5);
2083         tcg_gen_shl_tl(t1, t1, t5);
2084         tcg_gen_shl_tl(t2, t2, t5);
2085         tcg_gen_shl_tl(t3, t3, t5);
2086     }
2087     tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
2088     tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2089 
2090     gen_store_mxu_gpr(t0, XRa);
2091     gen_store_mxu_gpr(t2, XRd);
2092 }
2093 
2094 /*
2095  *                   MXU instruction category max/min/avg
2096  *                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2097  *
2098  *                     S32MAX     D16MAX     Q8MAX
2099  *                     S32MIN     D16MIN     Q8MIN
2100  *                     S32SLT     D16SLT     Q8SLT
2101  *                                           Q8SLTU
2102  *                                D16AVG     Q8AVG
2103  *                                D16AVGR    Q8AVGR
2104  *                     S32MOVZ    D16MOVZ    Q8MOVZ
2105  *                     S32MOVN    D16MOVN    Q8MOVN
2106  */
2107 
2108 /*
2109  *  S32MAX XRa, XRb, XRc
2110  *    Update XRa with the maximum of signed 32-bit integers contained
2111  *    in XRb and XRc.
2112  *
2113  *  S32MIN XRa, XRb, XRc
2114  *    Update XRa with the minimum of signed 32-bit integers contained
2115  *    in XRb and XRc.
2116  */
2117 static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx)
2118 {
2119     uint32_t pad, opc, XRc, XRb, XRa;
2120 
2121     pad = extract32(ctx->opcode, 21, 5);
2122     opc = extract32(ctx->opcode, 18, 3);
2123     XRc = extract32(ctx->opcode, 14, 4);
2124     XRb = extract32(ctx->opcode, 10, 4);
2125     XRa = extract32(ctx->opcode,  6, 4);
2126 
2127     if (unlikely(pad != 0)) {
2128         /* opcode padding incorrect -> do nothing */
2129     } else if (unlikely(XRa == 0)) {
2130         /* destination is zero register -> do nothing */
2131     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2132         /* both operands zero registers -> just set destination to zero */
2133         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2134     } else if (unlikely((XRb == 0) || (XRc == 0))) {
2135         /* exactly one operand is zero register - find which one is not...*/
2136         uint32_t XRx = XRb ? XRb : XRc;
2137         /* ...and do max/min operation with one operand 0 */
2138         if (opc == OPC_MXU_S32MAX) {
2139             tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
2140         } else {
2141             tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
2142         }
2143     } else if (unlikely(XRb == XRc)) {
2144         /* both operands same -> just set destination to one of them */
2145         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2146     } else {
2147         /* the most general case */
2148         if (opc == OPC_MXU_S32MAX) {
2149             tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
2150                                                mxu_gpr[XRc - 1]);
2151         } else {
2152             tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
2153                                                mxu_gpr[XRc - 1]);
2154         }
2155     }
2156 }
2157 
2158 /*
2159  *  D16MAX
2160  *    Update XRa with the 16-bit-wise maximums of signed integers
2161  *    contained in XRb and XRc.
2162  *
2163  *  D16MIN
2164  *    Update XRa with the 16-bit-wise minimums of signed integers
2165  *    contained in XRb and XRc.
2166  */
2167 static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
2168 {
2169     uint32_t pad, opc, XRc, XRb, XRa;
2170 
2171     pad = extract32(ctx->opcode, 21, 5);
2172     opc = extract32(ctx->opcode, 18, 3);
2173     XRc = extract32(ctx->opcode, 14, 4);
2174     XRb = extract32(ctx->opcode, 10, 4);
2175     XRa = extract32(ctx->opcode,  6, 4);
2176 
2177     if (unlikely(pad != 0)) {
2178         /* opcode padding incorrect -> do nothing */
2179     } else if (unlikely(XRa == 0)) {
2180         /* destination is zero register -> do nothing */
2181     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2182         /* both operands zero registers -> just set destination to zero */
2183         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2184     } else if (unlikely((XRb == 0) || (XRc == 0))) {
2185         /* exactly one operand is zero register - find which one is not...*/
2186         uint32_t XRx = XRb ? XRb : XRc;
2187         /* ...and do half-word-wise max/min with one operand 0 */
2188         TCGv_i32 t0 = tcg_temp_new();
2189         TCGv_i32 t1 = tcg_constant_i32(0);
2190         TCGv_i32 t2 = tcg_temp_new();
2191 
2192         /* the left half-word first */
2193         tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
2194         if (opc == OPC_MXU_D16MAX) {
2195             tcg_gen_smax_i32(t2, t0, t1);
2196         } else {
2197             tcg_gen_smin_i32(t2, t0, t1);
2198         }
2199 
2200         /* the right half-word */
2201         tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF);
2202         /* move half-words to the leftmost position */
2203         tcg_gen_shli_i32(t0, t0, 16);
2204         /* t0 will be max/min of t0 and t1 */
2205         if (opc == OPC_MXU_D16MAX) {
2206             tcg_gen_smax_i32(t0, t0, t1);
2207         } else {
2208             tcg_gen_smin_i32(t0, t0, t1);
2209         }
2210         /* return resulting half-words to its original position */
2211         tcg_gen_shri_i32(t0, t0, 16);
2212         /* finally update the destination */
2213         tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
2214     } else if (unlikely(XRb == XRc)) {
2215         /* both operands same -> just set destination to one of them */
2216         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2217     } else {
2218         /* the most general case */
2219         TCGv_i32 t0 = tcg_temp_new();
2220         TCGv_i32 t1 = tcg_temp_new();
2221         TCGv_i32 t2 = tcg_temp_new();
2222 
2223         /* the left half-word first */
2224         tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
2225         tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
2226         if (opc == OPC_MXU_D16MAX) {
2227             tcg_gen_smax_i32(t2, t0, t1);
2228         } else {
2229             tcg_gen_smin_i32(t2, t0, t1);
2230         }
2231 
2232         /* the right half-word */
2233         tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
2234         tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF);
2235         /* move half-words to the leftmost position */
2236         tcg_gen_shli_i32(t0, t0, 16);
2237         tcg_gen_shli_i32(t1, t1, 16);
2238         /* t0 will be max/min of t0 and t1 */
2239         if (opc == OPC_MXU_D16MAX) {
2240             tcg_gen_smax_i32(t0, t0, t1);
2241         } else {
2242             tcg_gen_smin_i32(t0, t0, t1);
2243         }
2244         /* return resulting half-words to its original position */
2245         tcg_gen_shri_i32(t0, t0, 16);
2246         /* finally update the destination */
2247         tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
2248     }
2249 }
2250 
2251 /*
2252  *  Q8MAX
2253  *    Update XRa with the 8-bit-wise maximums of signed integers
2254  *    contained in XRb and XRc.
2255  *
2256  *  Q8MIN
2257  *    Update XRa with the 8-bit-wise minimums of signed integers
2258  *    contained in XRb and XRc.
2259  */
2260 static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
2261 {
2262     uint32_t pad, opc, XRc, XRb, XRa;
2263 
2264     pad = extract32(ctx->opcode, 21, 5);
2265     opc = extract32(ctx->opcode, 18, 3);
2266     XRc = extract32(ctx->opcode, 14, 4);
2267     XRb = extract32(ctx->opcode, 10, 4);
2268     XRa = extract32(ctx->opcode,  6, 4);
2269 
2270     if (unlikely(pad != 0)) {
2271         /* opcode padding incorrect -> do nothing */
2272     } else if (unlikely(XRa == 0)) {
2273         /* destination is zero register -> do nothing */
2274     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2275         /* both operands zero registers -> just set destination to zero */
2276         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2277     } else if (unlikely((XRb == 0) || (XRc == 0))) {
2278         /* exactly one operand is zero register - make it be the first...*/
2279         uint32_t XRx = XRb ? XRb : XRc;
2280         /* ...and do byte-wise max/min with one operand 0 */
2281         TCGv_i32 t0 = tcg_temp_new();
2282         TCGv_i32 t1 = tcg_constant_i32(0);
2283         TCGv_i32 t2 = tcg_temp_new();
2284         int32_t i;
2285 
2286         /* the leftmost byte (byte 3) first */
2287         tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
2288         if (opc == OPC_MXU_Q8MAX) {
2289             tcg_gen_smax_i32(t2, t0, t1);
2290         } else {
2291             tcg_gen_smin_i32(t2, t0, t1);
2292         }
2293 
2294         /* bytes 2, 1, 0 */
2295         for (i = 2; i >= 0; i--) {
2296             /* extract the byte */
2297             tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i));
2298             /* move the byte to the leftmost position */
2299             tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
2300             /* t0 will be max/min of t0 and t1 */
2301             if (opc == OPC_MXU_Q8MAX) {
2302                 tcg_gen_smax_i32(t0, t0, t1);
2303             } else {
2304                 tcg_gen_smin_i32(t0, t0, t1);
2305             }
2306             /* return resulting byte to its original position */
2307             tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
2308             /* finally update the destination */
2309             tcg_gen_or_i32(t2, t2, t0);
2310         }
2311         gen_store_mxu_gpr(t2, XRa);
2312     } else if (unlikely(XRb == XRc)) {
2313         /* both operands same -> just set destination to one of them */
2314         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2315     } else {
2316         /* the most general case */
2317         TCGv_i32 t0 = tcg_temp_new();
2318         TCGv_i32 t1 = tcg_temp_new();
2319         TCGv_i32 t2 = tcg_temp_new();
2320         int32_t i;
2321 
2322         /* the leftmost bytes (bytes 3) first */
2323         tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
2324         tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
2325         if (opc == OPC_MXU_Q8MAX) {
2326             tcg_gen_smax_i32(t2, t0, t1);
2327         } else {
2328             tcg_gen_smin_i32(t2, t0, t1);
2329         }
2330 
2331         /* bytes 2, 1, 0 */
2332         for (i = 2; i >= 0; i--) {
2333             /* extract corresponding bytes */
2334             tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i));
2335             tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i));
2336             /* move the bytes to the leftmost position */
2337             tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
2338             tcg_gen_shli_i32(t1, t1, 8 * (3 - i));
2339             /* t0 will be max/min of t0 and t1 */
2340             if (opc == OPC_MXU_Q8MAX) {
2341                 tcg_gen_smax_i32(t0, t0, t1);
2342             } else {
2343                 tcg_gen_smin_i32(t0, t0, t1);
2344             }
2345             /* return resulting byte to its original position */
2346             tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
2347             /* finally update the destination */
2348             tcg_gen_or_i32(t2, t2, t0);
2349         }
2350         gen_store_mxu_gpr(t2, XRa);
2351     }
2352 }
2353 
2354 /*
2355  *  Q8SLT
2356  *    Update XRa with the signed "set less than" comparison of XRb and XRc
2357  *    on per-byte basis.
2358  *    a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2359  *
2360  *  Q8SLTU
2361  *    Update XRa with the unsigned "set less than" comparison of XRb and XRc
2362  *    on per-byte basis.
2363  *    a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2364  */
2365 static void gen_mxu_q8slt(DisasContext *ctx, bool sltu)
2366 {
2367     uint32_t pad, XRc, XRb, XRa;
2368 
2369     pad = extract32(ctx->opcode, 21, 5);
2370     XRc = extract32(ctx->opcode, 14, 4);
2371     XRb = extract32(ctx->opcode, 10, 4);
2372     XRa = extract32(ctx->opcode,  6, 4);
2373 
2374     if (unlikely(pad != 0)) {
2375         /* opcode padding incorrect -> do nothing */
2376     } else if (unlikely(XRa == 0)) {
2377         /* destination is zero register -> do nothing */
2378     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2379         /* both operands zero registers -> just set destination to zero */
2380         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2381     } else if (unlikely(XRb == XRc)) {
2382         /* both operands same registers -> just set destination to zero */
2383         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2384     } else {
2385         /* the most general case */
2386         TCGv t0 = tcg_temp_new();
2387         TCGv t1 = tcg_temp_new();
2388         TCGv t2 = tcg_temp_new();
2389         TCGv t3 = tcg_temp_new();
2390         TCGv t4 = tcg_temp_new();
2391 
2392         gen_load_mxu_gpr(t3, XRb);
2393         gen_load_mxu_gpr(t4, XRc);
2394         tcg_gen_movi_tl(t2, 0);
2395 
2396         for (int i = 0; i < 4; i++) {
2397             if (sltu) {
2398                 tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2399                 tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2400             } else {
2401                 tcg_gen_sextract_tl(t0, t3, 8 * i, 8);
2402                 tcg_gen_sextract_tl(t1, t4, 8 * i, 8);
2403             }
2404             tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2405             tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2406         }
2407         gen_store_mxu_gpr(t2, XRa);
2408     }
2409 }
2410 
2411 /*
2412  *  S32SLT
2413  *    Update XRa with the signed "set less than" comparison of XRb and XRc.
2414  *    a.k.a. XRa = XRb < XRc ? 1 : 0;
2415  */
2416 static void gen_mxu_S32SLT(DisasContext *ctx)
2417 {
2418     uint32_t pad, XRc, XRb, XRa;
2419 
2420     pad = extract32(ctx->opcode, 21, 5);
2421     XRc = extract32(ctx->opcode, 14, 4);
2422     XRb = extract32(ctx->opcode, 10, 4);
2423     XRa = extract32(ctx->opcode,  6, 4);
2424 
2425     if (unlikely(pad != 0)) {
2426         /* opcode padding incorrect -> do nothing */
2427     } else if (unlikely(XRa == 0)) {
2428         /* destination is zero register -> do nothing */
2429     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2430         /* both operands zero registers -> just set destination to zero */
2431         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2432     } else if (unlikely(XRb == XRc)) {
2433         /* both operands same registers -> just set destination to zero */
2434         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2435     } else {
2436         /* the most general case */
2437         tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1],
2438                            mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
2439     }
2440 }
2441 
2442 /*
2443  *  D16SLT
2444  *    Update XRa with the signed "set less than" comparison of XRb and XRc
2445  *    on per-word basis.
2446  *    a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0;
2447  */
2448 static void gen_mxu_D16SLT(DisasContext *ctx)
2449 {
2450     uint32_t pad, XRc, XRb, XRa;
2451 
2452     pad = extract32(ctx->opcode, 21, 5);
2453     XRc = extract32(ctx->opcode, 14, 4);
2454     XRb = extract32(ctx->opcode, 10, 4);
2455     XRa = extract32(ctx->opcode,  6, 4);
2456 
2457     if (unlikely(pad != 0)) {
2458         /* opcode padding incorrect -> do nothing */
2459     } else if (unlikely(XRa == 0)) {
2460         /* destination is zero register -> do nothing */
2461     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2462         /* both operands zero registers -> just set destination to zero */
2463         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2464     } else if (unlikely(XRb == XRc)) {
2465         /* both operands same registers -> just set destination to zero */
2466         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2467     } else {
2468         /* the most general case */
2469         TCGv t0 = tcg_temp_new();
2470         TCGv t1 = tcg_temp_new();
2471         TCGv t2 = tcg_temp_new();
2472         TCGv t3 = tcg_temp_new();
2473         TCGv t4 = tcg_temp_new();
2474 
2475         gen_load_mxu_gpr(t3, XRb);
2476         gen_load_mxu_gpr(t4, XRc);
2477         tcg_gen_sextract_tl(t0, t3, 16, 16);
2478         tcg_gen_sextract_tl(t1, t4, 16, 16);
2479         tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2480         tcg_gen_shli_tl(t2, t0, 16);
2481         tcg_gen_sextract_tl(t0, t3,  0, 16);
2482         tcg_gen_sextract_tl(t1, t4,  0, 16);
2483         tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2484         tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0);
2485     }
2486 }
2487 
2488 /*
2489  *  D16AVG
2490  *    Update XRa with the signed average of XRb and XRc
2491  *    on per-word basis, rounding down.
2492  *    a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1;
2493  *
2494  *  D16AVGR
2495  *    Update XRa with the signed average of XRb and XRc
2496  *    on per-word basis, math rounding 4/5.
2497  *    a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1;
2498  */
2499 static void gen_mxu_d16avg(DisasContext *ctx, bool round45)
2500 {
2501     uint32_t pad, XRc, XRb, XRa;
2502 
2503     pad = extract32(ctx->opcode, 21, 5);
2504     XRc = extract32(ctx->opcode, 14, 4);
2505     XRb = extract32(ctx->opcode, 10, 4);
2506     XRa = extract32(ctx->opcode,  6, 4);
2507 
2508     if (unlikely(pad != 0)) {
2509         /* opcode padding incorrect -> do nothing */
2510     } else if (unlikely(XRa == 0)) {
2511         /* destination is zero register -> do nothing */
2512     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2513         /* both operands zero registers -> just set destination to zero */
2514         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2515     } else if (unlikely(XRb == XRc)) {
2516         /* both operands same registers -> just set destination to same */
2517         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2518     } else {
2519         /* the most general case */
2520         TCGv t0 = tcg_temp_new();
2521         TCGv t1 = tcg_temp_new();
2522         TCGv t2 = tcg_temp_new();
2523         TCGv t3 = tcg_temp_new();
2524         TCGv t4 = tcg_temp_new();
2525 
2526         gen_load_mxu_gpr(t3, XRb);
2527         gen_load_mxu_gpr(t4, XRc);
2528         tcg_gen_sextract_tl(t0, t3, 16, 16);
2529         tcg_gen_sextract_tl(t1, t4, 16, 16);
2530         tcg_gen_add_tl(t0, t0, t1);
2531         if (round45) {
2532             tcg_gen_addi_tl(t0, t0, 1);
2533         }
2534         tcg_gen_shli_tl(t2, t0, 15);
2535         tcg_gen_andi_tl(t2, t2, 0xffff0000);
2536         tcg_gen_sextract_tl(t0, t3,  0, 16);
2537         tcg_gen_sextract_tl(t1, t4,  0, 16);
2538         tcg_gen_add_tl(t0, t0, t1);
2539         if (round45) {
2540             tcg_gen_addi_tl(t0, t0, 1);
2541         }
2542         tcg_gen_shri_tl(t0, t0, 1);
2543         tcg_gen_deposit_tl(t2, t2, t0, 0, 16);
2544         gen_store_mxu_gpr(t2, XRa);
2545     }
2546 }
2547 
2548 /*
2549  *  Q8AVG
2550  *    Update XRa with the signed average of XRb and XRc
2551  *    on per-byte basis, rounding down.
2552  *    a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1;
2553  *
2554  *  Q8AVGR
2555  *    Update XRa with the signed average of XRb and XRc
2556  *    on per-word basis, math rounding 4/5.
2557  *    a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1;
2558  */
2559 static void gen_mxu_q8avg(DisasContext *ctx, bool round45)
2560 {
2561     uint32_t pad, XRc, XRb, XRa;
2562 
2563     pad = extract32(ctx->opcode, 21, 5);
2564     XRc = extract32(ctx->opcode, 14, 4);
2565     XRb = extract32(ctx->opcode, 10, 4);
2566     XRa = extract32(ctx->opcode,  6, 4);
2567 
2568     if (unlikely(pad != 0)) {
2569         /* opcode padding incorrect -> do nothing */
2570     } else if (unlikely(XRa == 0)) {
2571         /* destination is zero register -> do nothing */
2572     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2573         /* both operands zero registers -> just set destination to zero */
2574         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2575     } else if (unlikely(XRb == XRc)) {
2576         /* both operands same registers -> just set destination to same */
2577         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2578     } else {
2579         /* the most general case */
2580         TCGv t0 = tcg_temp_new();
2581         TCGv t1 = tcg_temp_new();
2582         TCGv t2 = tcg_temp_new();
2583         TCGv t3 = tcg_temp_new();
2584         TCGv t4 = tcg_temp_new();
2585 
2586         gen_load_mxu_gpr(t3, XRb);
2587         gen_load_mxu_gpr(t4, XRc);
2588         tcg_gen_movi_tl(t2, 0);
2589 
2590         for (int i = 0; i < 4; i++) {
2591             tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2592             tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2593             tcg_gen_add_tl(t0, t0, t1);
2594             if (round45) {
2595                 tcg_gen_addi_tl(t0, t0, 1);
2596             }
2597             tcg_gen_shri_tl(t0, t0, 1);
2598             tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2599         }
2600         gen_store_mxu_gpr(t2, XRa);
2601     }
2602 }
2603 
2604 /*
2605  *  Q8MOVZ
2606  *    Quadruple 8-bit packed conditional move where
2607  *    XRb contains conditions, XRc what to move and
2608  *    XRa is the destination.
2609  *    a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] }
2610  *
2611  *  Q8MOVN
2612  *    Quadruple 8-bit packed conditional move where
2613  *    XRb contains conditions, XRc what to move and
2614  *    XRa is the destination.
2615  *    a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] }
2616  */
2617 static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond)
2618 {
2619     uint32_t XRc, XRb, XRa;
2620 
2621     XRa = extract32(ctx->opcode,  6, 4);
2622     XRb = extract32(ctx->opcode, 10, 4);
2623     XRc = extract32(ctx->opcode, 14, 4);
2624 
2625     TCGv t0 = tcg_temp_new();
2626     TCGv t1 = tcg_temp_new();
2627     TCGv t2 = tcg_temp_new();
2628     TCGv t3 = tcg_temp_new();
2629     TCGLabel *l_quarterdone = gen_new_label();
2630     TCGLabel *l_halfdone = gen_new_label();
2631     TCGLabel *l_quarterrest = gen_new_label();
2632     TCGLabel *l_done = gen_new_label();
2633 
2634     gen_load_mxu_gpr(t0, XRc);
2635     gen_load_mxu_gpr(t1, XRb);
2636     gen_load_mxu_gpr(t2, XRa);
2637 
2638     tcg_gen_extract_tl(t3, t1, 24, 8);
2639     tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone);
2640     tcg_gen_extract_tl(t3, t0, 24, 8);
2641     tcg_gen_deposit_tl(t2, t2, t3, 24, 8);
2642 
2643     gen_set_label(l_quarterdone);
2644     tcg_gen_extract_tl(t3, t1, 16, 8);
2645     tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
2646     tcg_gen_extract_tl(t3, t0, 16, 8);
2647     tcg_gen_deposit_tl(t2, t2, t3, 16, 8);
2648 
2649     gen_set_label(l_halfdone);
2650     tcg_gen_extract_tl(t3, t1, 8, 8);
2651     tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest);
2652     tcg_gen_extract_tl(t3, t0, 8, 8);
2653     tcg_gen_deposit_tl(t2, t2, t3, 8, 8);
2654 
2655     gen_set_label(l_quarterrest);
2656     tcg_gen_extract_tl(t3, t1, 0, 8);
2657     tcg_gen_brcondi_tl(cond, t3, 0, l_done);
2658     tcg_gen_extract_tl(t3, t0, 0, 8);
2659     tcg_gen_deposit_tl(t2, t2, t3, 0, 8);
2660 
2661     gen_set_label(l_done);
2662     gen_store_mxu_gpr(t2, XRa);
2663 }
2664 
2665 /*
2666  *  D16MOVZ
2667  *    Double 16-bit packed conditional move where
2668  *    XRb contains conditions, XRc what to move and
2669  *    XRa is the destination.
2670  *    a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] }
2671  *
2672  *  D16MOVN
2673  *    Double 16-bit packed conditional move where
2674  *    XRb contains conditions, XRc what to move and
2675  *    XRa is the destination.
2676  *    a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] }
2677  */
2678 static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond)
2679 {
2680     uint32_t XRc, XRb, XRa;
2681 
2682     XRa = extract32(ctx->opcode,  6, 4);
2683     XRb = extract32(ctx->opcode, 10, 4);
2684     XRc = extract32(ctx->opcode, 14, 4);
2685 
2686     TCGv t0 = tcg_temp_new();
2687     TCGv t1 = tcg_temp_new();
2688     TCGv t2 = tcg_temp_new();
2689     TCGv t3 = tcg_temp_new();
2690     TCGLabel *l_halfdone = gen_new_label();
2691     TCGLabel *l_done = gen_new_label();
2692 
2693     gen_load_mxu_gpr(t0, XRc);
2694     gen_load_mxu_gpr(t1, XRb);
2695     gen_load_mxu_gpr(t2, XRa);
2696 
2697     tcg_gen_extract_tl(t3, t1, 16, 16);
2698     tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
2699     tcg_gen_extract_tl(t3, t0, 16, 16);
2700     tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2701 
2702     gen_set_label(l_halfdone);
2703     tcg_gen_extract_tl(t3, t1, 0, 16);
2704     tcg_gen_brcondi_tl(cond, t3, 0, l_done);
2705     tcg_gen_extract_tl(t3, t0, 0, 16);
2706     tcg_gen_deposit_tl(t2, t2, t3, 0, 16);
2707 
2708     gen_set_label(l_done);
2709     gen_store_mxu_gpr(t2, XRa);
2710 }
2711 
2712 /*
2713  *  S32MOVZ
2714  *    Quadruple 32-bit conditional move where
2715  *    XRb contains conditions, XRc what to move and
2716  *    XRa is the destination.
2717  *    a.k.a. if (XRb == 0) { XRa = XRc }
2718  *
2719  *  S32MOVN
2720  *    Single 32-bit conditional move where
2721  *    XRb contains conditions, XRc what to move and
2722  *    XRa is the destination.
2723  *    a.k.a. if (XRb != 0) { XRa = XRc }
2724  */
2725 static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond)
2726 {
2727     uint32_t XRc, XRb, XRa;
2728 
2729     XRa = extract32(ctx->opcode,  6, 4);
2730     XRb = extract32(ctx->opcode, 10, 4);
2731     XRc = extract32(ctx->opcode, 14, 4);
2732 
2733     TCGv t0 = tcg_temp_new();
2734     TCGv t1 = tcg_temp_new();
2735     TCGLabel *l_done = gen_new_label();
2736 
2737     gen_load_mxu_gpr(t0, XRc);
2738     gen_load_mxu_gpr(t1, XRb);
2739 
2740     tcg_gen_brcondi_tl(cond, t1, 0, l_done);
2741     gen_store_mxu_gpr(t0, XRa);
2742     gen_set_label(l_done);
2743 }
2744 
2745 /*
2746  *      MXU instruction category: Addition and subtraction
2747  *      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2748  *
2749  *              S32CPS      D16CPS
2750  *                                       Q8ADD
2751  */
2752 
2753 /*
2754  *  S32CPS
2755  *    Update XRa if XRc < 0 by value of 0 - XRb
2756  *    else XRa = XRb
2757  */
2758 static void gen_mxu_S32CPS(DisasContext *ctx)
2759 {
2760     uint32_t pad, XRc, XRb, XRa;
2761 
2762     pad = extract32(ctx->opcode, 21, 5);
2763     XRc = extract32(ctx->opcode, 14, 4);
2764     XRb = extract32(ctx->opcode, 10, 4);
2765     XRa = extract32(ctx->opcode,  6, 4);
2766 
2767     if (unlikely(pad != 0)) {
2768         /* opcode padding incorrect -> do nothing */
2769     } else if (unlikely(XRa == 0)) {
2770         /* destination is zero register -> do nothing */
2771     } else if (unlikely(XRb == 0)) {
2772         /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2773         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2774     } else if (unlikely(XRc == 0)) {
2775         /* condition always false -> just move XRb to XRa */
2776         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2777     } else {
2778         /* the most general case */
2779         TCGv t0 = tcg_temp_new();
2780         TCGLabel *l_not_less = gen_new_label();
2781         TCGLabel *l_done = gen_new_label();
2782 
2783         tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less);
2784         tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]);
2785         tcg_gen_br(l_done);
2786         gen_set_label(l_not_less);
2787         gen_load_mxu_gpr(t0, XRb);
2788         gen_set_label(l_done);
2789         gen_store_mxu_gpr(t0, XRa);
2790     }
2791 }
2792 
2793 /*
2794  *  D16CPS
2795  *    Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1]
2796  *    else XRa[0..1] = XRb[0..1]
2797  */
2798 static void gen_mxu_D16CPS(DisasContext *ctx)
2799 {
2800     uint32_t pad, XRc, XRb, XRa;
2801 
2802     pad = extract32(ctx->opcode, 21, 5);
2803     XRc = extract32(ctx->opcode, 14, 4);
2804     XRb = extract32(ctx->opcode, 10, 4);
2805     XRa = extract32(ctx->opcode,  6, 4);
2806 
2807     if (unlikely(pad != 0)) {
2808         /* opcode padding incorrect -> do nothing */
2809     } else if (unlikely(XRa == 0)) {
2810         /* destination is zero register -> do nothing */
2811     } else if (unlikely(XRb == 0)) {
2812         /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2813         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2814     } else if (unlikely(XRc == 0)) {
2815         /* condition always false -> just move XRb to XRa */
2816         tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2817     } else {
2818         /* the most general case */
2819         TCGv t0 = tcg_temp_new();
2820         TCGv t1 = tcg_temp_new();
2821         TCGLabel *l_done_hi = gen_new_label();
2822         TCGLabel *l_not_less_lo = gen_new_label();
2823         TCGLabel *l_done_lo = gen_new_label();
2824 
2825         tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16);
2826         tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16);
2827         tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi);
2828         tcg_gen_subfi_tl(t1, 0, t1);
2829 
2830         gen_set_label(l_done_hi);
2831         tcg_gen_shli_i32(t1, t1, 16);
2832 
2833         tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1],  0, 16);
2834         tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo);
2835         tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1],  0, 16);
2836         tcg_gen_subfi_tl(t0, 0, t0);
2837         tcg_gen_br(l_done_lo);
2838 
2839         gen_set_label(l_not_less_lo);
2840         tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1],  0, 16);
2841 
2842         gen_set_label(l_done_lo);
2843         tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16);
2844     }
2845 }
2846 
2847 /*
2848  *  Q8ABD XRa, XRb, XRc
2849  *  Gets absolute difference for quadruple of 8-bit
2850  *  packed in XRb to another one in XRc,
2851  *  put the result in XRa.
2852  *  a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]);
2853  */
2854 static void gen_mxu_Q8ABD(DisasContext *ctx)
2855 {
2856     uint32_t pad, XRc, XRb, XRa;
2857 
2858     pad = extract32(ctx->opcode, 21, 3);
2859     XRc = extract32(ctx->opcode, 14, 4);
2860     XRb = extract32(ctx->opcode, 10, 4);
2861     XRa = extract32(ctx->opcode,  6, 4);
2862 
2863     if (unlikely(pad != 0)) {
2864         /* opcode padding incorrect -> do nothing */
2865     } else if (unlikely(XRa == 0)) {
2866         /* destination is zero register -> do nothing */
2867     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2868         /* both operands zero registers -> just set destination to zero */
2869         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2870     } else {
2871         /* the most general case */
2872         TCGv t0 = tcg_temp_new();
2873         TCGv t1 = tcg_temp_new();
2874         TCGv t2 = tcg_temp_new();
2875         TCGv t3 = tcg_temp_new();
2876         TCGv t4 = tcg_temp_new();
2877 
2878         gen_load_mxu_gpr(t3, XRb);
2879         gen_load_mxu_gpr(t4, XRc);
2880         tcg_gen_movi_tl(t2, 0);
2881 
2882         for (int i = 0; i < 4; i++) {
2883             tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2884             tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2885 
2886             tcg_gen_sub_tl(t0, t0, t1);
2887             tcg_gen_abs_tl(t0, t0);
2888 
2889             tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2890         }
2891         gen_store_mxu_gpr(t2, XRa);
2892     }
2893 }
2894 
2895 /*
2896  *  Q8ADD XRa, XRb, XRc, ptn2
2897  *  Add/subtract quadruple of 8-bit packed in XRb
2898  *  to another one in XRc, put the result in XRa.
2899  */
2900 static void gen_mxu_Q8ADD(DisasContext *ctx)
2901 {
2902     uint32_t aptn2, pad, XRc, XRb, XRa;
2903 
2904     aptn2 = extract32(ctx->opcode, 24, 2);
2905     pad   = extract32(ctx->opcode, 21, 3);
2906     XRc   = extract32(ctx->opcode, 14, 4);
2907     XRb   = extract32(ctx->opcode, 10, 4);
2908     XRa   = extract32(ctx->opcode,  6, 4);
2909 
2910     if (unlikely(pad != 0)) {
2911         /* opcode padding incorrect -> do nothing */
2912     } else if (unlikely(XRa == 0)) {
2913         /* destination is zero register -> do nothing */
2914     } else if (unlikely((XRb == 0) && (XRc == 0))) {
2915         /* both operands zero registers -> just set destination to zero */
2916         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2917     } else {
2918         /* the most general case */
2919         TCGv t0 = tcg_temp_new();
2920         TCGv t1 = tcg_temp_new();
2921         TCGv t2 = tcg_temp_new();
2922         TCGv t3 = tcg_temp_new();
2923         TCGv t4 = tcg_temp_new();
2924 
2925         gen_load_mxu_gpr(t3, XRb);
2926         gen_load_mxu_gpr(t4, XRc);
2927 
2928         for (int i = 0; i < 4; i++) {
2929             tcg_gen_andi_tl(t0, t3, 0xff);
2930             tcg_gen_andi_tl(t1, t4, 0xff);
2931 
2932             if (i < 2) {
2933                 if (aptn2 & 0x01) {
2934                     tcg_gen_sub_tl(t0, t0, t1);
2935                 } else {
2936                     tcg_gen_add_tl(t0, t0, t1);
2937                 }
2938             } else {
2939                 if (aptn2 & 0x02) {
2940                     tcg_gen_sub_tl(t0, t0, t1);
2941                 } else {
2942                     tcg_gen_add_tl(t0, t0, t1);
2943                 }
2944             }
2945             if (i < 3) {
2946                 tcg_gen_shri_tl(t3, t3, 8);
2947                 tcg_gen_shri_tl(t4, t4, 8);
2948             }
2949             if (i > 0) {
2950                 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2951             } else {
2952                 tcg_gen_andi_tl(t0, t0, 0xff);
2953                 tcg_gen_mov_tl(t2, t0);
2954             }
2955         }
2956         gen_store_mxu_gpr(t2, XRa);
2957     }
2958 }
2959 
2960 /*
2961  *  Q8ADDE XRa, XRb, XRc, XRd, aptn2
2962  *    Add/subtract quadruple of 8-bit packed in XRb
2963  *    to another one in XRc, with zero extending
2964  *    to 16-bit and put results as packed 16-bit data
2965  *    into XRa and XRd.
2966  *    aptn2 manages action add or subract of pairs of data.
2967  *
2968  *  Q8ACCE XRa, XRb, XRc, XRd, aptn2
2969  *    Add/subtract quadruple of 8-bit packed in XRb
2970  *    to another one in XRc, with zero extending
2971  *    to 16-bit and accumulate results as packed 16-bit data
2972  *    into XRa and XRd.
2973  *    aptn2 manages action add or subract of pairs of data.
2974  */
2975 static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate)
2976 {
2977     uint32_t aptn2, XRd, XRc, XRb, XRa;
2978 
2979     aptn2 = extract32(ctx->opcode, 24, 2);
2980     XRd   = extract32(ctx->opcode, 18, 4);
2981     XRc   = extract32(ctx->opcode, 14, 4);
2982     XRb   = extract32(ctx->opcode, 10, 4);
2983     XRa   = extract32(ctx->opcode,  6, 4);
2984 
2985     if (unlikely((XRb == 0) && (XRc == 0))) {
2986         /* both operands zero registers -> just set destination to zero */
2987         if (XRa != 0) {
2988             tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2989         }
2990         if (XRd != 0) {
2991             tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0);
2992         }
2993     } else {
2994         /* the most general case */
2995         TCGv t0 = tcg_temp_new();
2996         TCGv t1 = tcg_temp_new();
2997         TCGv t2 = tcg_temp_new();
2998         TCGv t3 = tcg_temp_new();
2999         TCGv t4 = tcg_temp_new();
3000         TCGv t5 = tcg_temp_new();
3001 
3002         if (XRa != 0) {
3003             tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 16, 8);
3004             tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 16, 8);
3005             tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 24, 8);
3006             tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
3007             if (aptn2 & 2) {
3008                 tcg_gen_sub_tl(t0, t0, t1);
3009                 tcg_gen_sub_tl(t2, t2, t3);
3010             } else {
3011                 tcg_gen_add_tl(t0, t0, t1);
3012                 tcg_gen_add_tl(t2, t2, t3);
3013             }
3014             if (accumulate) {
3015                 gen_load_mxu_gpr(t5, XRa);
3016                 tcg_gen_extract_tl(t1, t5,  0, 16);
3017                 tcg_gen_extract_tl(t3, t5, 16, 16);
3018                 tcg_gen_add_tl(t0, t0, t1);
3019                 tcg_gen_add_tl(t2, t2, t3);
3020             }
3021             tcg_gen_shli_tl(t2, t2, 16);
3022             tcg_gen_extract_tl(t0, t0, 0, 16);
3023             tcg_gen_or_tl(t4, t2, t0);
3024         }
3025         if (XRd != 0) {
3026             tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8);
3027             tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 0, 8);
3028             tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 8, 8);
3029             tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 8, 8);
3030             if (aptn2 & 1) {
3031                 tcg_gen_sub_tl(t0, t0, t1);
3032                 tcg_gen_sub_tl(t2, t2, t3);
3033             } else {
3034                 tcg_gen_add_tl(t0, t0, t1);
3035                 tcg_gen_add_tl(t2, t2, t3);
3036             }
3037             if (accumulate) {
3038                 gen_load_mxu_gpr(t5, XRd);
3039                 tcg_gen_extract_tl(t1, t5,  0, 16);
3040                 tcg_gen_extract_tl(t3, t5, 16, 16);
3041                 tcg_gen_add_tl(t0, t0, t1);
3042                 tcg_gen_add_tl(t2, t2, t3);
3043             }
3044             tcg_gen_shli_tl(t2, t2, 16);
3045             tcg_gen_extract_tl(t0, t0, 0, 16);
3046             tcg_gen_or_tl(t5, t2, t0);
3047         }
3048 
3049         gen_store_mxu_gpr(t4, XRa);
3050         gen_store_mxu_gpr(t5, XRd);
3051     }
3052 }
3053 
3054 /*
3055  *  D8SUM XRa, XRb, XRc
3056  *    Double parallel add of quadruple unsigned 8-bit together
3057  *    with zero extending to 16-bit data.
3058  *  D8SUMC XRa, XRb, XRc
3059  *    Double parallel add of quadruple unsigned 8-bit together
3060  *    with zero extending to 16-bit data and adding 2 to each
3061  *    parallel result.
3062  */
3063 static void gen_mxu_d8sum(DisasContext *ctx, bool sumc)
3064 {
3065     uint32_t pad, pad2, XRc, XRb, XRa;
3066 
3067     pad  = extract32(ctx->opcode, 24, 2);
3068     pad2 = extract32(ctx->opcode, 18, 4);
3069     XRc  = extract32(ctx->opcode, 14, 4);
3070     XRb  = extract32(ctx->opcode, 10, 4);
3071     XRa  = extract32(ctx->opcode,  6, 4);
3072 
3073     if (unlikely(pad != 0 || pad2 != 0)) {
3074         /* opcode padding incorrect -> do nothing */
3075     } else if (unlikely(XRa == 0)) {
3076         /* destination is zero register -> do nothing */
3077     } else if (unlikely((XRb == 0) && (XRc == 0))) {
3078         /* both operands zero registers -> just set destination to zero */
3079         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
3080     } else {
3081         /* the most general case */
3082         TCGv t0 = tcg_temp_new();
3083         TCGv t1 = tcg_temp_new();
3084         TCGv t2 = tcg_temp_new();
3085         TCGv t3 = tcg_temp_new();
3086         TCGv t4 = tcg_temp_new();
3087         TCGv t5 = tcg_temp_new();
3088 
3089         if (XRb != 0) {
3090             tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1],  0, 8);
3091             tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1],  8, 8);
3092             tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8);
3093             tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8);
3094             tcg_gen_add_tl(t4, t0, t1);
3095             tcg_gen_add_tl(t4, t4, t2);
3096             tcg_gen_add_tl(t4, t4, t3);
3097         } else {
3098             tcg_gen_mov_tl(t4, 0);
3099         }
3100         if (XRc != 0) {
3101             tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1],  0, 8);
3102             tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1],  8, 8);
3103             tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8);
3104             tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
3105             tcg_gen_add_tl(t5, t0, t1);
3106             tcg_gen_add_tl(t5, t5, t2);
3107             tcg_gen_add_tl(t5, t5, t3);
3108         } else {
3109             tcg_gen_mov_tl(t5, 0);
3110         }
3111 
3112         if (sumc) {
3113             tcg_gen_addi_tl(t4, t4, 2);
3114             tcg_gen_addi_tl(t5, t5, 2);
3115         }
3116         tcg_gen_shli_tl(t4, t4, 16);
3117 
3118         tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
3119     }
3120 }
3121 
3122 /*
3123  * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed
3124  * 16-bit pattern addition.
3125  */
3126 static void gen_mxu_q16add(DisasContext *ctx)
3127 {
3128     uint32_t aptn2, optn2, XRc, XRb, XRa, XRd;
3129 
3130     aptn2 = extract32(ctx->opcode, 24, 2);
3131     optn2 = extract32(ctx->opcode, 22, 2);
3132     XRd   = extract32(ctx->opcode, 18, 4);
3133     XRc   = extract32(ctx->opcode, 14, 4);
3134     XRb   = extract32(ctx->opcode, 10, 4);
3135     XRa   = extract32(ctx->opcode,  6, 4);
3136 
3137     TCGv t0 = tcg_temp_new();
3138     TCGv t1 = tcg_temp_new();
3139     TCGv t2 = tcg_temp_new();
3140     TCGv t3 = tcg_temp_new();
3141     TCGv t4 = tcg_temp_new();
3142     TCGv t5 = tcg_temp_new();
3143 
3144     gen_load_mxu_gpr(t1, XRb);
3145     tcg_gen_extract_tl(t0, t1,  0, 16);
3146     tcg_gen_extract_tl(t1, t1, 16, 16);
3147 
3148     gen_load_mxu_gpr(t3, XRc);
3149     tcg_gen_extract_tl(t2, t3,  0, 16);
3150     tcg_gen_extract_tl(t3, t3, 16, 16);
3151 
3152     switch (optn2) {
3153     case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */
3154         tcg_gen_mov_tl(t4, t1);
3155         tcg_gen_mov_tl(t5, t0);
3156         break;
3157     case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */
3158         tcg_gen_mov_tl(t4, t0);
3159         tcg_gen_mov_tl(t5, t0);
3160         break;
3161     case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */
3162         tcg_gen_mov_tl(t4, t1);
3163         tcg_gen_mov_tl(t5, t1);
3164         break;
3165     case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */
3166         tcg_gen_mov_tl(t4, t0);
3167         tcg_gen_mov_tl(t5, t1);
3168         break;
3169     }
3170 
3171     switch (aptn2) {
3172     case MXU_APTN2_AA: /* lop +, rop + */
3173         tcg_gen_add_tl(t0, t4, t3);
3174         tcg_gen_add_tl(t1, t5, t2);
3175         tcg_gen_add_tl(t4, t4, t3);
3176         tcg_gen_add_tl(t5, t5, t2);
3177         break;
3178     case MXU_APTN2_AS: /* lop +, rop + */
3179         tcg_gen_sub_tl(t0, t4, t3);
3180         tcg_gen_sub_tl(t1, t5, t2);
3181         tcg_gen_add_tl(t4, t4, t3);
3182         tcg_gen_add_tl(t5, t5, t2);
3183         break;
3184     case MXU_APTN2_SA: /* lop +, rop + */
3185         tcg_gen_add_tl(t0, t4, t3);
3186         tcg_gen_add_tl(t1, t5, t2);
3187         tcg_gen_sub_tl(t4, t4, t3);
3188         tcg_gen_sub_tl(t5, t5, t2);
3189         break;
3190     case MXU_APTN2_SS: /* lop +, rop + */
3191         tcg_gen_sub_tl(t0, t4, t3);
3192         tcg_gen_sub_tl(t1, t5, t2);
3193         tcg_gen_sub_tl(t4, t4, t3);
3194         tcg_gen_sub_tl(t5, t5, t2);
3195         break;
3196     }
3197 
3198     tcg_gen_shli_tl(t0, t0, 16);
3199     tcg_gen_extract_tl(t1, t1, 0, 16);
3200     tcg_gen_shli_tl(t4, t4, 16);
3201     tcg_gen_extract_tl(t5, t5, 0, 16);
3202 
3203     tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
3204     tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1);
3205 }
3206 
3207 /*
3208  * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed
3209  * 16-bit addition/subtraction with accumulate.
3210  */
3211 static void gen_mxu_q16acc(DisasContext *ctx)
3212 {
3213     uint32_t aptn2, XRc, XRb, XRa, XRd;
3214 
3215     aptn2 = extract32(ctx->opcode, 24, 2);
3216     XRd   = extract32(ctx->opcode, 18, 4);
3217     XRc   = extract32(ctx->opcode, 14, 4);
3218     XRb   = extract32(ctx->opcode, 10, 4);
3219     XRa   = extract32(ctx->opcode,  6, 4);
3220 
3221     TCGv t0 = tcg_temp_new();
3222     TCGv t1 = tcg_temp_new();
3223     TCGv t2 = tcg_temp_new();
3224     TCGv t3 = tcg_temp_new();
3225     TCGv s3 = tcg_temp_new();
3226     TCGv s2 = tcg_temp_new();
3227     TCGv s1 = tcg_temp_new();
3228     TCGv s0 = tcg_temp_new();
3229 
3230     gen_load_mxu_gpr(t1, XRb);
3231     tcg_gen_extract_tl(t0, t1,  0, 16);
3232     tcg_gen_extract_tl(t1, t1, 16, 16);
3233 
3234     gen_load_mxu_gpr(t3, XRc);
3235     tcg_gen_extract_tl(t2, t3,  0, 16);
3236     tcg_gen_extract_tl(t3, t3, 16, 16);
3237 
3238     switch (aptn2) {
3239     case MXU_APTN2_AA: /* lop +, rop + */
3240         tcg_gen_add_tl(s3, t1, t3);
3241         tcg_gen_add_tl(s2, t0, t2);
3242         tcg_gen_add_tl(s1, t1, t3);
3243         tcg_gen_add_tl(s0, t0, t2);
3244         break;
3245     case MXU_APTN2_AS: /* lop +, rop - */
3246         tcg_gen_sub_tl(s3, t1, t3);
3247         tcg_gen_sub_tl(s2, t0, t2);
3248         tcg_gen_add_tl(s1, t1, t3);
3249         tcg_gen_add_tl(s0, t0, t2);
3250         break;
3251     case MXU_APTN2_SA: /* lop -, rop + */
3252         tcg_gen_add_tl(s3, t1, t3);
3253         tcg_gen_add_tl(s2, t0, t2);
3254         tcg_gen_sub_tl(s1, t1, t3);
3255         tcg_gen_sub_tl(s0, t0, t2);
3256         break;
3257     case MXU_APTN2_SS: /* lop -, rop - */
3258         tcg_gen_sub_tl(s3, t1, t3);
3259         tcg_gen_sub_tl(s2, t0, t2);
3260         tcg_gen_sub_tl(s1, t1, t3);
3261         tcg_gen_sub_tl(s0, t0, t2);
3262         break;
3263     }
3264 
3265     if (XRa != 0) {
3266         tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0);
3267         tcg_gen_extract_tl(t0, t0, 0, 16);
3268         tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16);
3269         tcg_gen_add_tl(t1, t1, s1);
3270         tcg_gen_shli_tl(t1, t1, 16);
3271         tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0);
3272     }
3273 
3274     if (XRd != 0) {
3275         tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2);
3276         tcg_gen_extract_tl(t0, t0, 0, 16);
3277         tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16);
3278         tcg_gen_add_tl(t1, t1, s3);
3279         tcg_gen_shli_tl(t1, t1, 16);
3280         tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0);
3281     }
3282 }
3283 
3284 /*
3285  * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed
3286  * 16-bit accumulate.
3287  */
3288 static void gen_mxu_q16accm(DisasContext *ctx)
3289 {
3290     uint32_t aptn2, XRc, XRb, XRa, XRd;
3291 
3292     aptn2 = extract32(ctx->opcode, 24, 2);
3293     XRd   = extract32(ctx->opcode, 18, 4);
3294     XRc   = extract32(ctx->opcode, 14, 4);
3295     XRb   = extract32(ctx->opcode, 10, 4);
3296     XRa   = extract32(ctx->opcode,  6, 4);
3297 
3298     TCGv t0 = tcg_temp_new();
3299     TCGv t1 = tcg_temp_new();
3300     TCGv t2 = tcg_temp_new();
3301     TCGv t3 = tcg_temp_new();
3302 
3303     gen_load_mxu_gpr(t2, XRb);
3304     gen_load_mxu_gpr(t3, XRc);
3305 
3306     if (XRa != 0) {
3307         TCGv a0 = tcg_temp_new();
3308         TCGv a1 = tcg_temp_new();
3309 
3310         tcg_gen_extract_tl(t0, t2,  0, 16);
3311         tcg_gen_extract_tl(t1, t2, 16, 16);
3312 
3313         gen_load_mxu_gpr(a1, XRa);
3314         tcg_gen_extract_tl(a0, a1,  0, 16);
3315         tcg_gen_extract_tl(a1, a1, 16, 16);
3316 
3317         if (aptn2 & 2) {
3318             tcg_gen_sub_tl(a0, a0, t0);
3319             tcg_gen_sub_tl(a1, a1, t1);
3320         } else {
3321             tcg_gen_add_tl(a0, a0, t0);
3322             tcg_gen_add_tl(a1, a1, t1);
3323         }
3324         tcg_gen_extract_tl(a0, a0, 0, 16);
3325         tcg_gen_shli_tl(a1, a1, 16);
3326         tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0);
3327     }
3328 
3329     if (XRd != 0) {
3330         TCGv a0 = tcg_temp_new();
3331         TCGv a1 = tcg_temp_new();
3332 
3333         tcg_gen_extract_tl(t0, t3,  0, 16);
3334         tcg_gen_extract_tl(t1, t3, 16, 16);
3335 
3336         gen_load_mxu_gpr(a1, XRd);
3337         tcg_gen_extract_tl(a0, a1,  0, 16);
3338         tcg_gen_extract_tl(a1, a1, 16, 16);
3339 
3340         if (aptn2 & 1) {
3341             tcg_gen_sub_tl(a0, a0, t0);
3342             tcg_gen_sub_tl(a1, a1, t1);
3343         } else {
3344             tcg_gen_add_tl(a0, a0, t0);
3345             tcg_gen_add_tl(a1, a1, t1);
3346         }
3347         tcg_gen_extract_tl(a0, a0, 0, 16);
3348         tcg_gen_shli_tl(a1, a1, 16);
3349         tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0);
3350     }
3351 }
3352 
3353 
3354 /*
3355  * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed
3356  * 16-bit sign extended addition and accumulate.
3357  */
3358 static void gen_mxu_d16asum(DisasContext *ctx)
3359 {
3360     uint32_t aptn2, XRc, XRb, XRa, XRd;
3361 
3362     aptn2 = extract32(ctx->opcode, 24, 2);
3363     XRd   = extract32(ctx->opcode, 18, 4);
3364     XRc   = extract32(ctx->opcode, 14, 4);
3365     XRb   = extract32(ctx->opcode, 10, 4);
3366     XRa   = extract32(ctx->opcode,  6, 4);
3367 
3368     TCGv t0 = tcg_temp_new();
3369     TCGv t1 = tcg_temp_new();
3370     TCGv t2 = tcg_temp_new();
3371     TCGv t3 = tcg_temp_new();
3372 
3373     gen_load_mxu_gpr(t2, XRb);
3374     gen_load_mxu_gpr(t3, XRc);
3375 
3376     if (XRa != 0) {
3377         tcg_gen_sextract_tl(t0, t2,  0, 16);
3378         tcg_gen_sextract_tl(t1, t2, 16, 16);
3379         tcg_gen_add_tl(t0, t0, t1);
3380         if (aptn2 & 2) {
3381             tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3382         } else {
3383             tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3384         }
3385     }
3386 
3387     if (XRd != 0) {
3388         tcg_gen_sextract_tl(t0, t3,  0, 16);
3389         tcg_gen_sextract_tl(t1, t3, 16, 16);
3390         tcg_gen_add_tl(t0, t0, t1);
3391         if (aptn2 & 1) {
3392             tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
3393         } else {
3394             tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
3395         }
3396     }
3397 }
3398 
3399 /*
3400  * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double
3401  * 32 bit pattern addition/subtraction, set carry.
3402  *
3403  * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double
3404  * 32 bit pattern addition/subtraction with carry.
3405  */
3406 static void gen_mxu_d32add(DisasContext *ctx)
3407 {
3408     uint32_t aptn2, addc, XRc, XRb, XRa, XRd;
3409 
3410     aptn2 = extract32(ctx->opcode, 24, 2);
3411     addc  = extract32(ctx->opcode, 22, 2);
3412     XRd   = extract32(ctx->opcode, 18, 4);
3413     XRc   = extract32(ctx->opcode, 14, 4);
3414     XRb   = extract32(ctx->opcode, 10, 4);
3415     XRa   = extract32(ctx->opcode,  6, 4);
3416 
3417     TCGv t0 = tcg_temp_new();
3418     TCGv t1 = tcg_temp_new();
3419     TCGv t2 = tcg_temp_new();
3420     TCGv cr = tcg_temp_new();
3421 
3422     if (unlikely(addc > 1)) {
3423         /* opcode incorrect -> do nothing */
3424     } else if (addc == 1) {
3425         if (unlikely(XRa == 0 && XRd == 0)) {
3426             /* destinations are zero register -> do nothing */
3427         } else {
3428             /* FIXME ??? What if XRa == XRd ??? */
3429             /* aptn2 is unused here */
3430             gen_load_mxu_gpr(t0, XRb);
3431             gen_load_mxu_gpr(t1, XRc);
3432             gen_load_mxu_cr(cr);
3433             if (XRa != 0) {
3434                 tcg_gen_extract_tl(t2, cr, 31, 1);
3435                 tcg_gen_add_tl(t0, t0, t2);
3436                 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3437             }
3438             if (XRd != 0) {
3439                 tcg_gen_extract_tl(t2, cr, 30, 1);
3440                 tcg_gen_add_tl(t1, t1, t2);
3441                 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3442             }
3443         }
3444     } else if (unlikely(XRa == 0 && XRd == 0)) {
3445         /* destinations are zero register -> do nothing */
3446     } else {
3447         /* common case */
3448         /* FIXME ??? What if XRa == XRd ??? */
3449         TCGv carry = tcg_temp_new();
3450 
3451         gen_load_mxu_gpr(t0, XRb);
3452         gen_load_mxu_gpr(t1, XRc);
3453         gen_load_mxu_cr(cr);
3454         if (XRa != 0) {
3455             if (aptn2 & 2) {
3456                 tcg_gen_sub_i32(t2, t0, t1);
3457                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
3458             } else {
3459                 tcg_gen_add_i32(t2, t0, t1);
3460                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
3461             }
3462             tcg_gen_andi_tl(cr, cr, 0x7fffffff);
3463             tcg_gen_shli_tl(carry, carry, 31);
3464             tcg_gen_or_tl(cr, cr, carry);
3465             gen_store_mxu_gpr(t2, XRa);
3466         }
3467         if (XRd != 0) {
3468             if (aptn2 & 1) {
3469                 tcg_gen_sub_i32(t2, t0, t1);
3470                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
3471             } else {
3472                 tcg_gen_add_i32(t2, t0, t1);
3473                 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
3474             }
3475             tcg_gen_andi_tl(cr, cr, 0xbfffffff);
3476             tcg_gen_shli_tl(carry, carry, 30);
3477             tcg_gen_or_tl(cr, cr, carry);
3478             gen_store_mxu_gpr(t2, XRd);
3479         }
3480         gen_store_mxu_cr(cr);
3481     }
3482 }
3483 
3484 /*
3485  * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double
3486  * 32 bit pattern addition/subtraction and accumulate.
3487  */
3488 static void gen_mxu_d32acc(DisasContext *ctx)
3489 {
3490     uint32_t aptn2, XRc, XRb, XRa, XRd;
3491 
3492     aptn2 = extract32(ctx->opcode, 24, 2);
3493     XRd   = extract32(ctx->opcode, 18, 4);
3494     XRc   = extract32(ctx->opcode, 14, 4);
3495     XRb   = extract32(ctx->opcode, 10, 4);
3496     XRa   = extract32(ctx->opcode,  6, 4);
3497 
3498     TCGv t0 = tcg_temp_new();
3499     TCGv t1 = tcg_temp_new();
3500     TCGv t2 = tcg_temp_new();
3501 
3502     if (unlikely(XRa == 0 && XRd == 0)) {
3503         /* destinations are zero register -> do nothing */
3504     } else {
3505         /* common case */
3506         gen_load_mxu_gpr(t0, XRb);
3507         gen_load_mxu_gpr(t1, XRc);
3508         if (XRa != 0) {
3509             if (aptn2 & 2) {
3510                 tcg_gen_sub_tl(t2, t0, t1);
3511             } else {
3512                 tcg_gen_add_tl(t2, t0, t1);
3513             }
3514             tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3515         }
3516         if (XRd != 0) {
3517             if (aptn2 & 1) {
3518                 tcg_gen_sub_tl(t2, t0, t1);
3519             } else {
3520                 tcg_gen_add_tl(t2, t0, t1);
3521             }
3522             tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3523         }
3524     }
3525 }
3526 
3527 /*
3528  * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double
3529  * 32 bit pattern addition/subtraction and accumulate.
3530  */
3531 static void gen_mxu_d32accm(DisasContext *ctx)
3532 {
3533     uint32_t aptn2, XRc, XRb, XRa, XRd;
3534 
3535     aptn2 = extract32(ctx->opcode, 24, 2);
3536     XRd   = extract32(ctx->opcode, 18, 4);
3537     XRc   = extract32(ctx->opcode, 14, 4);
3538     XRb   = extract32(ctx->opcode, 10, 4);
3539     XRa   = extract32(ctx->opcode,  6, 4);
3540 
3541     TCGv t0 = tcg_temp_new();
3542     TCGv t1 = tcg_temp_new();
3543     TCGv t2 = tcg_temp_new();
3544 
3545     if (unlikely(XRa == 0 && XRd == 0)) {
3546         /* destinations are zero register -> do nothing */
3547     } else {
3548         /* common case */
3549         gen_load_mxu_gpr(t0, XRb);
3550         gen_load_mxu_gpr(t1, XRc);
3551         if (XRa != 0) {
3552             tcg_gen_add_tl(t2, t0, t1);
3553             if (aptn2 & 2) {
3554                 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3555             } else {
3556                 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3557             }
3558         }
3559         if (XRd != 0) {
3560             tcg_gen_sub_tl(t2, t0, t1);
3561             if (aptn2 & 1) {
3562                 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3563             } else {
3564                 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3565             }
3566         }
3567     }
3568 }
3569 
3570 /*
3571  * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double
3572  * 32 bit pattern addition/subtraction.
3573  */
3574 static void gen_mxu_d32asum(DisasContext *ctx)
3575 {
3576     uint32_t aptn2, XRc, XRb, XRa, XRd;
3577 
3578     aptn2 = extract32(ctx->opcode, 24, 2);
3579     XRd   = extract32(ctx->opcode, 18, 4);
3580     XRc   = extract32(ctx->opcode, 14, 4);
3581     XRb   = extract32(ctx->opcode, 10, 4);
3582     XRa   = extract32(ctx->opcode,  6, 4);
3583 
3584     TCGv t0 = tcg_temp_new();
3585     TCGv t1 = tcg_temp_new();
3586 
3587     if (unlikely(XRa == 0 && XRd == 0)) {
3588         /* destinations are zero register -> do nothing */
3589     } else {
3590         /* common case */
3591         gen_load_mxu_gpr(t0, XRb);
3592         gen_load_mxu_gpr(t1, XRc);
3593         if (XRa != 0) {
3594             if (aptn2 & 2) {
3595                 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3596             } else {
3597                 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3598             }
3599         }
3600         if (XRd != 0) {
3601             if (aptn2 & 1) {
3602                 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3603             } else {
3604                 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3605             }
3606         }
3607     }
3608 }
3609 
3610 /*
3611  *                 MXU instruction category: Miscellaneous
3612  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3613  *
3614  *               S32EXTR      S32LUI
3615  *               S32EXTRV
3616  *                            Q16SAT
3617  *                            Q16SCOP
3618  */
3619 
3620 /*
3621  *  S32EXTR XRa, XRd, rs, bits5
3622  *    Extract bits5 bits from 64-bit pair {XRa:XRd}
3623  *    starting from rs[4:0] offset and put to the XRa.
3624  */
3625 static void gen_mxu_s32extr(DisasContext *ctx)
3626 {
3627     TCGv t0, t1, t2, t3;
3628     uint32_t XRa, XRd, rs, bits5;
3629 
3630     t0 = tcg_temp_new();
3631     t1 = tcg_temp_new();
3632     t2 = tcg_temp_new();
3633     t3 = tcg_temp_new();
3634 
3635     XRa   = extract32(ctx->opcode,  6, 4);
3636     XRd   = extract32(ctx->opcode, 10, 4);
3637     bits5 = extract32(ctx->opcode, 16, 5);
3638     rs    = extract32(ctx->opcode, 21, 5);
3639 
3640     /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */
3641     /* {XRa} = extract({tmp}, 0, bits5); */
3642     if (bits5 > 0) {
3643         TCGLabel *l_xra_only = gen_new_label();
3644         TCGLabel *l_done = gen_new_label();
3645 
3646         gen_load_mxu_gpr(t0, XRd);
3647         gen_load_mxu_gpr(t1, XRa);
3648         gen_load_gpr(t2, rs);
3649         tcg_gen_andi_tl(t2, t2, 0x1f);
3650         tcg_gen_subfi_tl(t2, 32, t2);
3651         tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only);
3652         tcg_gen_subfi_tl(t2, bits5, t2);
3653         tcg_gen_subfi_tl(t3, 32, t2);
3654         tcg_gen_shr_tl(t0, t0, t3);
3655         tcg_gen_shl_tl(t1, t1, t2);
3656         tcg_gen_or_tl(t0, t0, t1);
3657         tcg_gen_br(l_done);
3658         gen_set_label(l_xra_only);
3659         tcg_gen_subi_tl(t2, t2, bits5);
3660         tcg_gen_shr_tl(t0, t1, t2);
3661         gen_set_label(l_done);
3662         tcg_gen_extract_tl(t0, t0, 0, bits5);
3663     } else {
3664         /* unspecified behavior but matches tests on real hardware*/
3665         tcg_gen_movi_tl(t0, 0);
3666     }
3667     gen_store_mxu_gpr(t0, XRa);
3668 }
3669 
3670 /*
3671  *  S32EXTRV XRa, XRd, rs, rt
3672  *    Extract rt[4:0] bits from 64-bit pair {XRa:XRd}
3673  *    starting from rs[4:0] offset and put to the XRa.
3674  */
3675 static void gen_mxu_s32extrv(DisasContext *ctx)
3676 {
3677     TCGv t0, t1, t2, t3, t4;
3678     uint32_t XRa, XRd, rs, rt;
3679 
3680     t0 = tcg_temp_new();
3681     t1 = tcg_temp_new();
3682     t2 = tcg_temp_new();
3683     t3 = tcg_temp_new();
3684     t4 = tcg_temp_new();
3685     TCGLabel *l_xra_only = gen_new_label();
3686     TCGLabel *l_done = gen_new_label();
3687     TCGLabel *l_zero = gen_new_label();
3688     TCGLabel *l_extract = gen_new_label();
3689 
3690     XRa = extract32(ctx->opcode,  6, 4);
3691     XRd = extract32(ctx->opcode, 10, 4);
3692     rt  = extract32(ctx->opcode, 16, 5);
3693     rs  = extract32(ctx->opcode, 21, 5);
3694 
3695     /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */
3696     gen_load_mxu_gpr(t0, XRd);
3697     gen_load_mxu_gpr(t1, XRa);
3698     gen_load_gpr(t2, rs);
3699     gen_load_gpr(t4, rt);
3700     tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero);
3701     tcg_gen_andi_tl(t2, t2, 0x1f);
3702     tcg_gen_subfi_tl(t2, 32, t2);
3703     tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only);
3704     tcg_gen_sub_tl(t2, t4, t2);
3705     tcg_gen_subfi_tl(t3, 32, t2);
3706     tcg_gen_shr_tl(t0, t0, t3);
3707     tcg_gen_shl_tl(t1, t1, t2);
3708     tcg_gen_or_tl(t0, t0, t1);
3709     tcg_gen_br(l_extract);
3710 
3711     gen_set_label(l_xra_only);
3712     tcg_gen_sub_tl(t2, t2, t4);
3713     tcg_gen_shr_tl(t0, t1, t2);
3714     tcg_gen_br(l_extract);
3715 
3716     /* unspecified behavior but matches tests on real hardware*/
3717     gen_set_label(l_zero);
3718     tcg_gen_movi_tl(t0, 0);
3719     tcg_gen_br(l_done);
3720 
3721     /* {XRa} = extract({tmp}, 0, rt) */
3722     gen_set_label(l_extract);
3723     tcg_gen_subfi_tl(t4, 32, t4);
3724     tcg_gen_shl_tl(t0, t0, t4);
3725     tcg_gen_shr_tl(t0, t0, t4);
3726 
3727     gen_set_label(l_done);
3728     gen_store_mxu_gpr(t0, XRa);
3729 }
3730 
3731 /*
3732  *  S32LUI XRa, S8, optn3
3733  *    Permutate the immediate S8 value to form a word
3734  *    to update XRa.
3735  */
3736 static void gen_mxu_s32lui(DisasContext *ctx)
3737 {
3738     uint32_t XRa, s8, optn3, pad;
3739 
3740     XRa   = extract32(ctx->opcode,  6, 4);
3741     s8    = extract32(ctx->opcode, 10, 8);
3742     pad   = extract32(ctx->opcode, 21, 2);
3743     optn3 = extract32(ctx->opcode, 23, 3);
3744 
3745     if (unlikely(pad != 0)) {
3746         /* opcode padding incorrect -> do nothing */
3747     } else if (unlikely(XRa == 0)) {
3748         /* destination is zero register -> do nothing */
3749     } else {
3750         uint32_t s16;
3751         TCGv t0 = tcg_temp_new();
3752 
3753         switch (optn3) {
3754         case 0:
3755             tcg_gen_movi_tl(t0, s8);
3756             break;
3757         case 1:
3758             tcg_gen_movi_tl(t0, s8 << 8);
3759             break;
3760         case 2:
3761             tcg_gen_movi_tl(t0, s8 << 16);
3762             break;
3763         case 3:
3764             tcg_gen_movi_tl(t0, s8 << 24);
3765             break;
3766         case 4:
3767             tcg_gen_movi_tl(t0, (s8 << 16) | s8);
3768             break;
3769         case 5:
3770             tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8));
3771             break;
3772         case 6:
3773             s16 = (uint16_t)(int16_t)(int8_t)s8;
3774             tcg_gen_movi_tl(t0, (s16 << 16) | s16);
3775             break;
3776         case 7:
3777             tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8);
3778             break;
3779         }
3780         gen_store_mxu_gpr(t0, XRa);
3781     }
3782 }
3783 
3784 /*
3785  *  Q16SAT XRa, XRb, XRc
3786  *  Packs four 16-bit signed integers in XRb and XRc to
3787  *  four saturated unsigned 8-bit into XRa.
3788  *
3789  */
3790 static void gen_mxu_Q16SAT(DisasContext *ctx)
3791 {
3792     uint32_t pad, XRc, XRb, XRa;
3793 
3794     pad = extract32(ctx->opcode, 21, 3);
3795     XRc = extract32(ctx->opcode, 14, 4);
3796     XRb = extract32(ctx->opcode, 10, 4);
3797     XRa = extract32(ctx->opcode,  6, 4);
3798 
3799     if (unlikely(pad != 0)) {
3800         /* opcode padding incorrect -> do nothing */
3801     } else if (unlikely(XRa == 0)) {
3802         /* destination is zero register -> do nothing */
3803     } else {
3804         /* the most general case */
3805         TCGv t0 = tcg_temp_new();
3806         TCGv t1 = tcg_temp_new();
3807         TCGv t2 = tcg_temp_new();
3808 
3809         tcg_gen_movi_tl(t2, 0);
3810         if (XRb != 0) {
3811             TCGLabel *l_less_hi = gen_new_label();
3812             TCGLabel *l_less_lo = gen_new_label();
3813             TCGLabel *l_lo = gen_new_label();
3814             TCGLabel *l_greater_hi = gen_new_label();
3815             TCGLabel *l_greater_lo = gen_new_label();
3816             TCGLabel *l_done = gen_new_label();
3817 
3818             tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16);
3819             tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
3820             tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
3821             tcg_gen_br(l_lo);
3822             gen_set_label(l_less_hi);
3823             tcg_gen_movi_tl(t0, 0);
3824             tcg_gen_br(l_lo);
3825             gen_set_label(l_greater_hi);
3826             tcg_gen_movi_tl(t0, 255);
3827 
3828             gen_set_label(l_lo);
3829             tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16);
3830             tcg_gen_sari_tl(t1, t1, 16);
3831             tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
3832             tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
3833             tcg_gen_br(l_done);
3834             gen_set_label(l_less_lo);
3835             tcg_gen_movi_tl(t1, 0);
3836             tcg_gen_br(l_done);
3837             gen_set_label(l_greater_lo);
3838             tcg_gen_movi_tl(t1, 255);
3839 
3840             gen_set_label(l_done);
3841             tcg_gen_shli_tl(t2, t0, 24);
3842             tcg_gen_shli_tl(t1, t1, 16);
3843             tcg_gen_or_tl(t2, t2, t1);
3844         }
3845 
3846         if (XRc != 0) {
3847             TCGLabel *l_less_hi = gen_new_label();
3848             TCGLabel *l_less_lo = gen_new_label();
3849             TCGLabel *l_lo = gen_new_label();
3850             TCGLabel *l_greater_hi = gen_new_label();
3851             TCGLabel *l_greater_lo = gen_new_label();
3852             TCGLabel *l_done = gen_new_label();
3853 
3854             tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16);
3855             tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
3856             tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
3857             tcg_gen_br(l_lo);
3858             gen_set_label(l_less_hi);
3859             tcg_gen_movi_tl(t0, 0);
3860             tcg_gen_br(l_lo);
3861             gen_set_label(l_greater_hi);
3862             tcg_gen_movi_tl(t0, 255);
3863 
3864             gen_set_label(l_lo);
3865             tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16);
3866             tcg_gen_sari_tl(t1, t1, 16);
3867             tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
3868             tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
3869             tcg_gen_br(l_done);
3870             gen_set_label(l_less_lo);
3871             tcg_gen_movi_tl(t1, 0);
3872             tcg_gen_br(l_done);
3873             gen_set_label(l_greater_lo);
3874             tcg_gen_movi_tl(t1, 255);
3875 
3876             gen_set_label(l_done);
3877             tcg_gen_shli_tl(t0, t0, 8);
3878             tcg_gen_or_tl(t2, t2, t0);
3879             tcg_gen_or_tl(t2, t2, t1);
3880         }
3881         gen_store_mxu_gpr(t2, XRa);
3882     }
3883 }
3884 
3885 /*
3886  *  Q16SCOP XRa, XRd, XRb, XRc
3887  *    Determine sign of quad packed 16-bit signed values
3888  *    in XRb and XRc put result in XRa and XRd respectively.
3889  */
3890 static void gen_mxu_q16scop(DisasContext *ctx)
3891 {
3892     uint32_t XRd, XRc, XRb, XRa;
3893 
3894     XRd  = extract32(ctx->opcode, 18, 4);
3895     XRc  = extract32(ctx->opcode, 14, 4);
3896     XRb  = extract32(ctx->opcode, 10, 4);
3897     XRa  = extract32(ctx->opcode,  6, 4);
3898 
3899     TCGv t0 = tcg_temp_new();
3900     TCGv t1 = tcg_temp_new();
3901     TCGv t2 = tcg_temp_new();
3902     TCGv t3 = tcg_temp_new();
3903     TCGv t4 = tcg_temp_new();
3904 
3905     TCGLabel *l_b_hi_lt = gen_new_label();
3906     TCGLabel *l_b_hi_gt = gen_new_label();
3907     TCGLabel *l_b_lo = gen_new_label();
3908     TCGLabel *l_b_lo_lt = gen_new_label();
3909     TCGLabel *l_c_hi = gen_new_label();
3910     TCGLabel *l_c_hi_lt = gen_new_label();
3911     TCGLabel *l_c_hi_gt = gen_new_label();
3912     TCGLabel *l_c_lo = gen_new_label();
3913     TCGLabel *l_c_lo_lt = gen_new_label();
3914     TCGLabel *l_done = gen_new_label();
3915 
3916     gen_load_mxu_gpr(t0, XRb);
3917     gen_load_mxu_gpr(t1, XRc);
3918 
3919     tcg_gen_sextract_tl(t2, t0, 16, 16);
3920     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt);
3921     tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt);
3922     tcg_gen_movi_tl(t3, 0);
3923     tcg_gen_br(l_b_lo);
3924     gen_set_label(l_b_hi_lt);
3925     tcg_gen_movi_tl(t3, 0xffff0000);
3926     tcg_gen_br(l_b_lo);
3927     gen_set_label(l_b_hi_gt);
3928     tcg_gen_movi_tl(t3, 0x00010000);
3929 
3930     gen_set_label(l_b_lo);
3931     tcg_gen_sextract_tl(t2, t0, 0, 16);
3932     tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi);
3933     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt);
3934     tcg_gen_ori_tl(t3, t3, 0x00000001);
3935     tcg_gen_br(l_c_hi);
3936     gen_set_label(l_b_lo_lt);
3937     tcg_gen_ori_tl(t3, t3, 0x0000ffff);
3938     tcg_gen_br(l_c_hi);
3939 
3940     gen_set_label(l_c_hi);
3941     tcg_gen_sextract_tl(t2, t1, 16, 16);
3942     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt);
3943     tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt);
3944     tcg_gen_movi_tl(t4, 0);
3945     tcg_gen_br(l_c_lo);
3946     gen_set_label(l_c_hi_lt);
3947     tcg_gen_movi_tl(t4, 0xffff0000);
3948     tcg_gen_br(l_c_lo);
3949     gen_set_label(l_c_hi_gt);
3950     tcg_gen_movi_tl(t4, 0x00010000);
3951 
3952     gen_set_label(l_c_lo);
3953     tcg_gen_sextract_tl(t2, t1, 0, 16);
3954     tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done);
3955     tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt);
3956     tcg_gen_ori_tl(t4, t4, 0x00000001);
3957     tcg_gen_br(l_done);
3958     gen_set_label(l_c_lo_lt);
3959     tcg_gen_ori_tl(t4, t4, 0x0000ffff);
3960 
3961     gen_set_label(l_done);
3962     gen_store_mxu_gpr(t3, XRa);
3963     gen_store_mxu_gpr(t4, XRd);
3964 }
3965 
3966 /*
3967  *  S32SFL XRa, XRd, XRb, XRc
3968  *    Shuffle bytes according to one of four patterns.
3969  */
3970 static void gen_mxu_s32sfl(DisasContext *ctx)
3971 {
3972     uint32_t XRd, XRc, XRb, XRa, ptn2;
3973 
3974     XRd  = extract32(ctx->opcode, 18, 4);
3975     XRc  = extract32(ctx->opcode, 14, 4);
3976     XRb  = extract32(ctx->opcode, 10, 4);
3977     XRa  = extract32(ctx->opcode,  6, 4);
3978     ptn2 = extract32(ctx->opcode, 24, 2);
3979 
3980     TCGv t0 = tcg_temp_new();
3981     TCGv t1 = tcg_temp_new();
3982     TCGv t2 = tcg_temp_new();
3983     TCGv t3 = tcg_temp_new();
3984 
3985     gen_load_mxu_gpr(t0, XRb);
3986     gen_load_mxu_gpr(t1, XRc);
3987 
3988     switch (ptn2) {
3989     case 0:
3990         tcg_gen_andi_tl(t2, t0, 0xff000000);
3991         tcg_gen_andi_tl(t3, t1, 0x000000ff);
3992         tcg_gen_deposit_tl(t3, t3, t0,  8, 8);
3993         tcg_gen_shri_tl(t0, t0,  8);
3994         tcg_gen_shri_tl(t1, t1,  8);
3995         tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
3996         tcg_gen_deposit_tl(t3, t3, t1, 16, 8);
3997         tcg_gen_shri_tl(t0, t0,  8);
3998         tcg_gen_shri_tl(t1, t1,  8);
3999         tcg_gen_deposit_tl(t2, t2, t0,  8, 8);
4000         tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
4001         tcg_gen_shri_tl(t1, t1,  8);
4002         tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
4003         break;
4004     case 1:
4005         tcg_gen_andi_tl(t2, t0, 0xff000000);
4006         tcg_gen_andi_tl(t3, t1, 0x000000ff);
4007         tcg_gen_deposit_tl(t3, t3, t0, 16, 8);
4008         tcg_gen_shri_tl(t0, t0,  8);
4009         tcg_gen_shri_tl(t1, t1,  8);
4010         tcg_gen_deposit_tl(t2, t2, t0, 16, 8);
4011         tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
4012         tcg_gen_shri_tl(t0, t0,  8);
4013         tcg_gen_shri_tl(t1, t1,  8);
4014         tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
4015         tcg_gen_deposit_tl(t3, t3, t1,  8, 8);
4016         tcg_gen_shri_tl(t1, t1,  8);
4017         tcg_gen_deposit_tl(t2, t2, t1,  8, 8);
4018         break;
4019     case 2:
4020         tcg_gen_andi_tl(t2, t0, 0xff00ff00);
4021         tcg_gen_andi_tl(t3, t1, 0x00ff00ff);
4022         tcg_gen_deposit_tl(t3, t3, t0,  8, 8);
4023         tcg_gen_shri_tl(t0, t0, 16);
4024         tcg_gen_shri_tl(t1, t1,  8);
4025         tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
4026         tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
4027         tcg_gen_shri_tl(t1, t1, 16);
4028         tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
4029         break;
4030     case 3:
4031         tcg_gen_andi_tl(t2, t0, 0xffff0000);
4032         tcg_gen_andi_tl(t3, t1, 0x0000ffff);
4033         tcg_gen_shri_tl(t1, t1, 16);
4034         tcg_gen_deposit_tl(t2, t2, t1,  0, 16);
4035         tcg_gen_deposit_tl(t3, t3, t0, 16, 16);
4036         break;
4037     }
4038 
4039     gen_store_mxu_gpr(t2, XRa);
4040     gen_store_mxu_gpr(t3, XRd);
4041 }
4042 
4043 /*
4044  *  Q8SAD XRa, XRd, XRb, XRc
4045  *    Typical SAD opration for motion estimation.
4046  */
4047 static void gen_mxu_q8sad(DisasContext *ctx)
4048 {
4049     uint32_t XRd, XRc, XRb, XRa;
4050 
4051     XRd = extract32(ctx->opcode, 18, 4);
4052     XRc = extract32(ctx->opcode, 14, 4);
4053     XRb = extract32(ctx->opcode, 10, 4);
4054     XRa = extract32(ctx->opcode,  6, 4);
4055 
4056     TCGv t0 = tcg_temp_new();
4057     TCGv t1 = tcg_temp_new();
4058     TCGv t2 = tcg_temp_new();
4059     TCGv t3 = tcg_temp_new();
4060     TCGv t4 = tcg_temp_new();
4061     TCGv t5 = tcg_temp_new();
4062 
4063     gen_load_mxu_gpr(t2, XRb);
4064     gen_load_mxu_gpr(t3, XRc);
4065     gen_load_mxu_gpr(t5, XRd);
4066     tcg_gen_movi_tl(t4, 0);
4067 
4068     for (int i = 0; i < 4; i++) {
4069         tcg_gen_andi_tl(t0, t2, 0xff);
4070         tcg_gen_andi_tl(t1, t3, 0xff);
4071         tcg_gen_sub_tl(t0, t0, t1);
4072         tcg_gen_abs_tl(t0, t0);
4073         tcg_gen_add_tl(t4, t4, t0);
4074         if (i < 3) {
4075             tcg_gen_shri_tl(t2, t2, 8);
4076             tcg_gen_shri_tl(t3, t3, 8);
4077         }
4078     }
4079     tcg_gen_add_tl(t5, t5, t4);
4080     gen_store_mxu_gpr(t4, XRa);
4081     gen_store_mxu_gpr(t5, XRd);
4082 }
4083 
4084 /*
4085  *                 MXU instruction category: align
4086  *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4087  *
4088  *                       S32ALN     S32ALNI
4089  */
4090 
4091 /*
4092  *  S32ALNI XRc, XRb, XRa, optn3
4093  *    Arrange bytes from XRb and XRc according to one of five sets of
4094  *    rules determined by optn3, and place the result in XRa.
4095  */
4096 static void gen_mxu_S32ALNI(DisasContext *ctx)
4097 {
4098     uint32_t optn3, pad, XRc, XRb, XRa;
4099 
4100     optn3 = extract32(ctx->opcode,  23, 3);
4101     pad   = extract32(ctx->opcode,  21, 2);
4102     XRc   = extract32(ctx->opcode, 14, 4);
4103     XRb   = extract32(ctx->opcode, 10, 4);
4104     XRa   = extract32(ctx->opcode,  6, 4);
4105 
4106     if (unlikely(pad != 0)) {
4107         /* opcode padding incorrect -> do nothing */
4108     } else if (unlikely(XRa == 0)) {
4109         /* destination is zero register -> do nothing */
4110     } else if (unlikely((XRb == 0) && (XRc == 0))) {
4111         /* both operands zero registers -> just set destination to all 0s */
4112         tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4113     } else if (unlikely(XRb == 0)) {
4114         /* XRb zero register -> just appropriatelly shift XRc into XRa */
4115         switch (optn3) {
4116         case MXU_OPTN3_PTN0:
4117             tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4118             break;
4119         case MXU_OPTN3_PTN1:
4120         case MXU_OPTN3_PTN2:
4121         case MXU_OPTN3_PTN3:
4122             tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1],
4123                              8 * (4 - optn3));
4124             break;
4125         case MXU_OPTN3_PTN4:
4126             tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
4127             break;
4128         }
4129     } else if (unlikely(XRc == 0)) {
4130         /* XRc zero register -> just appropriatelly shift XRb into XRa */
4131         switch (optn3) {
4132         case MXU_OPTN3_PTN0:
4133             tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4134             break;
4135         case MXU_OPTN3_PTN1:
4136         case MXU_OPTN3_PTN2:
4137         case MXU_OPTN3_PTN3:
4138             tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
4139             break;
4140         case MXU_OPTN3_PTN4:
4141             tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4142             break;
4143         }
4144     } else if (unlikely(XRb == XRc)) {
4145         /* both operands same -> just rotation or moving from any of them */
4146         switch (optn3) {
4147         case MXU_OPTN3_PTN0:
4148         case MXU_OPTN3_PTN4:
4149             tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4150             break;
4151         case MXU_OPTN3_PTN1:
4152         case MXU_OPTN3_PTN2:
4153         case MXU_OPTN3_PTN3:
4154             tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
4155             break;
4156         }
4157     } else {
4158         /* the most general case */
4159         switch (optn3) {
4160         case MXU_OPTN3_PTN0:
4161             {
4162                 /*                                         */
4163                 /*         XRb                XRc          */
4164                 /*  +---------------+                      */
4165                 /*  | A   B   C   D |    E   F   G   H     */
4166                 /*  +-------+-------+                      */
4167                 /*          |                              */
4168                 /*         XRa                             */
4169                 /*                                         */
4170 
4171                 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4172             }
4173             break;
4174         case MXU_OPTN3_PTN1:
4175             {
4176                 /*                                         */
4177                 /*         XRb                 XRc         */
4178                 /*      +-------------------+              */
4179                 /*    A | B   C   D       E | F   G   H    */
4180                 /*      +---------+---------+              */
4181                 /*                |                        */
4182                 /*               XRa                       */
4183                 /*                                         */
4184 
4185                 TCGv_i32 t0 = tcg_temp_new();
4186                 TCGv_i32 t1 = tcg_temp_new();
4187 
4188                 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF);
4189                 tcg_gen_shli_i32(t0, t0, 8);
4190 
4191                 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
4192                 tcg_gen_shri_i32(t1, t1, 24);
4193 
4194                 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4195             }
4196             break;
4197         case MXU_OPTN3_PTN2:
4198             {
4199                 /*                                         */
4200                 /*         XRb                 XRc         */
4201                 /*          +-------------------+          */
4202                 /*    A   B | C   D       E   F | G   H    */
4203                 /*          +---------+---------+          */
4204                 /*                    |                    */
4205                 /*                   XRa                   */
4206                 /*                                         */
4207 
4208                 TCGv_i32 t0 = tcg_temp_new();
4209                 TCGv_i32 t1 = tcg_temp_new();
4210 
4211                 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
4212                 tcg_gen_shli_i32(t0, t0, 16);
4213 
4214                 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
4215                 tcg_gen_shri_i32(t1, t1, 16);
4216 
4217                 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4218             }
4219             break;
4220         case MXU_OPTN3_PTN3:
4221             {
4222                 /*                                         */
4223                 /*         XRb                 XRc         */
4224                 /*              +-------------------+      */
4225                 /*    A   B   C | D       E   F   G | H    */
4226                 /*              +---------+---------+      */
4227                 /*                        |                */
4228                 /*                       XRa               */
4229                 /*                                         */
4230 
4231                 TCGv_i32 t0 = tcg_temp_new();
4232                 TCGv_i32 t1 = tcg_temp_new();
4233 
4234                 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF);
4235                 tcg_gen_shli_i32(t0, t0, 24);
4236 
4237                 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00);
4238                 tcg_gen_shri_i32(t1, t1, 8);
4239 
4240                 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4241             }
4242             break;
4243         case MXU_OPTN3_PTN4:
4244             {
4245                 /*                                         */
4246                 /*         XRb                 XRc         */
4247                 /*                     +---------------+   */
4248                 /*    A   B   C   D    | E   F   G   H |   */
4249                 /*                     +-------+-------+   */
4250                 /*                             |           */
4251                 /*                            XRa          */
4252                 /*                                         */
4253 
4254                 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
4255             }
4256             break;
4257         }
4258     }
4259 }
4260 
4261 /*
4262  *  S32ALN XRc, XRb, XRa, rs
4263  *    Arrange bytes from XRb and XRc according to one of five sets of
4264  *    rules determined by rs[2:0], and place the result in XRa.
4265  */
4266 static void gen_mxu_S32ALN(DisasContext *ctx)
4267 {
4268     uint32_t rs, XRc, XRb, XRa;
4269 
4270     rs  = extract32(ctx->opcode, 21, 5);
4271     XRc = extract32(ctx->opcode, 14, 4);
4272     XRb = extract32(ctx->opcode, 10, 4);
4273     XRa = extract32(ctx->opcode,  6, 4);
4274 
4275     if (unlikely(XRa == 0)) {
4276         /* destination is zero register -> do nothing */
4277     } else if (unlikely((XRb == 0) && (XRc == 0))) {
4278         /* both operands zero registers -> just set destination to all 0s */
4279         tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
4280     } else {
4281         /* the most general case */
4282         TCGv t0 = tcg_temp_new();
4283         TCGv t1 = tcg_temp_new();
4284         TCGv t2 = tcg_temp_new();
4285         TCGv t3 = tcg_temp_new();
4286         TCGLabel *l_exit = gen_new_label();
4287         TCGLabel *l_b_only = gen_new_label();
4288         TCGLabel *l_c_only = gen_new_label();
4289 
4290         gen_load_mxu_gpr(t0, XRb);
4291         gen_load_mxu_gpr(t1, XRc);
4292         gen_load_gpr(t2, rs);
4293         tcg_gen_andi_tl(t2, t2, 0x07);
4294 
4295         /* do nothing for undefined cases */
4296         tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit);
4297 
4298         tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only);
4299         tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only);
4300 
4301         tcg_gen_shli_tl(t2, t2, 3);
4302         tcg_gen_subfi_tl(t3, 32, t2);
4303 
4304         tcg_gen_shl_tl(t0, t0, t2);
4305         tcg_gen_shr_tl(t1, t1, t3);
4306         tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1);
4307         tcg_gen_br(l_exit);
4308 
4309         gen_set_label(l_b_only);
4310         gen_store_mxu_gpr(t0, XRa);
4311         tcg_gen_br(l_exit);
4312 
4313         gen_set_label(l_c_only);
4314         gen_store_mxu_gpr(t1, XRa);
4315 
4316         gen_set_label(l_exit);
4317     }
4318 }
4319 
4320 /*
4321  *  S32MADD XRa, XRd, rb, rc
4322  *    32 to 64 bit signed multiply with subsequent add
4323  *    result stored in {XRa, XRd} pair, stain HI/LO.
4324  *  S32MADDU XRa, XRd, rb, rc
4325  *    32 to 64 bit unsigned multiply with subsequent add
4326  *    result stored in {XRa, XRd} pair, stain HI/LO.
4327  *  S32MSUB XRa, XRd, rb, rc
4328  *    32 to 64 bit signed multiply with subsequent subtract
4329  *    result stored in {XRa, XRd} pair, stain HI/LO.
4330  *  S32MSUBU XRa, XRd, rb, rc
4331  *    32 to 64 bit unsigned multiply with subsequent subtract
4332  *    result stored in {XRa, XRd} pair, stain HI/LO.
4333  */
4334 static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns)
4335 {
4336     uint32_t XRa, XRd, Rb, Rc;
4337 
4338     XRa  = extract32(ctx->opcode,  6, 4);
4339     XRd  = extract32(ctx->opcode, 10, 4);
4340     Rb   = extract32(ctx->opcode, 16, 5);
4341     Rc   = extract32(ctx->opcode, 21, 5);
4342 
4343     if (unlikely(Rb == 0 || Rc == 0)) {
4344         /* do nothing because x + 0 * y => x */
4345     } else if (unlikely(XRa == 0 && XRd == 0)) {
4346         /* do nothing because result just dropped */
4347     } else {
4348         TCGv t0 = tcg_temp_new();
4349         TCGv t1 = tcg_temp_new();
4350         TCGv_i64 t2 = tcg_temp_new_i64();
4351         TCGv_i64 t3 = tcg_temp_new_i64();
4352 
4353         gen_load_gpr(t0, Rb);
4354         gen_load_gpr(t1, Rc);
4355 
4356         if (uns) {
4357             tcg_gen_extu_tl_i64(t2, t0);
4358             tcg_gen_extu_tl_i64(t3, t1);
4359         } else {
4360             tcg_gen_ext_tl_i64(t2, t0);
4361             tcg_gen_ext_tl_i64(t3, t1);
4362         }
4363         tcg_gen_mul_i64(t2, t2, t3);
4364 
4365         gen_load_mxu_gpr(t0, XRa);
4366         gen_load_mxu_gpr(t1, XRd);
4367 
4368         tcg_gen_concat_tl_i64(t3, t1, t0);
4369         if (sub) {
4370             tcg_gen_sub_i64(t3, t3, t2);
4371         } else {
4372             tcg_gen_add_i64(t3, t3, t2);
4373         }
4374         gen_move_low32(t1, t3);
4375         gen_move_high32(t0, t3);
4376 
4377         tcg_gen_mov_tl(cpu_HI[0], t0);
4378         tcg_gen_mov_tl(cpu_LO[0], t1);
4379 
4380         gen_store_mxu_gpr(t1, XRd);
4381         gen_store_mxu_gpr(t0, XRa);
4382     }
4383 }
4384 
4385 /*
4386  * Decoding engine for MXU
4387  * =======================
4388  */
4389 
4390 static void decode_opc_mxu__pool00(DisasContext *ctx)
4391 {
4392     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4393 
4394     switch (opcode) {
4395     case OPC_MXU_S32MAX:
4396     case OPC_MXU_S32MIN:
4397         gen_mxu_S32MAX_S32MIN(ctx);
4398         break;
4399     case OPC_MXU_D16MAX:
4400     case OPC_MXU_D16MIN:
4401         gen_mxu_D16MAX_D16MIN(ctx);
4402         break;
4403     case OPC_MXU_Q8MAX:
4404     case OPC_MXU_Q8MIN:
4405         gen_mxu_Q8MAX_Q8MIN(ctx);
4406         break;
4407     case OPC_MXU_Q8SLT:
4408         gen_mxu_q8slt(ctx, false);
4409         break;
4410     case OPC_MXU_Q8SLTU:
4411         gen_mxu_q8slt(ctx, true);
4412         break;
4413     default:
4414         MIPS_INVAL("decode_opc_mxu");
4415         gen_reserved_instruction(ctx);
4416         break;
4417     }
4418 }
4419 
4420 static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx)
4421 {
4422     uint32_t opcode = extract32(ctx->opcode, 0, 6);
4423     uint32_t pad  = extract32(ctx->opcode, 14, 2);
4424 
4425     if (pad != 2) {
4426         /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */
4427         return false;
4428     }
4429 
4430     switch (opcode) {
4431     case OPC_MXU_S32MADD:
4432         gen_mxu_s32madd_sub(ctx, false, false);
4433         break;
4434     case OPC_MXU_S32MADDU:
4435         gen_mxu_s32madd_sub(ctx, false, true);
4436         break;
4437     case OPC_MXU_S32MSUB:
4438         gen_mxu_s32madd_sub(ctx, true, false);
4439         break;
4440     case OPC_MXU_S32MSUBU:
4441         gen_mxu_s32madd_sub(ctx, true, true);
4442         break;
4443     default:
4444         return false;
4445     }
4446     return true;
4447 }
4448 
4449 static void decode_opc_mxu__pool01(DisasContext *ctx)
4450 {
4451     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4452 
4453     switch (opcode) {
4454     case OPC_MXU_S32SLT:
4455         gen_mxu_S32SLT(ctx);
4456         break;
4457     case OPC_MXU_D16SLT:
4458         gen_mxu_D16SLT(ctx);
4459         break;
4460     case OPC_MXU_D16AVG:
4461         gen_mxu_d16avg(ctx, false);
4462         break;
4463     case OPC_MXU_D16AVGR:
4464         gen_mxu_d16avg(ctx, true);
4465         break;
4466     case OPC_MXU_Q8AVG:
4467         gen_mxu_q8avg(ctx, false);
4468         break;
4469     case OPC_MXU_Q8AVGR:
4470         gen_mxu_q8avg(ctx, true);
4471         break;
4472     case OPC_MXU_Q8ADD:
4473         gen_mxu_Q8ADD(ctx);
4474         break;
4475     default:
4476         MIPS_INVAL("decode_opc_mxu");
4477         gen_reserved_instruction(ctx);
4478         break;
4479     }
4480 }
4481 
4482 static void decode_opc_mxu__pool02(DisasContext *ctx)
4483 {
4484     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4485 
4486     switch (opcode) {
4487     case OPC_MXU_S32CPS:
4488         gen_mxu_S32CPS(ctx);
4489         break;
4490     case OPC_MXU_D16CPS:
4491         gen_mxu_D16CPS(ctx);
4492         break;
4493     case OPC_MXU_Q8ABD:
4494         gen_mxu_Q8ABD(ctx);
4495         break;
4496     case OPC_MXU_Q16SAT:
4497         gen_mxu_Q16SAT(ctx);
4498         break;
4499     default:
4500         MIPS_INVAL("decode_opc_mxu");
4501         gen_reserved_instruction(ctx);
4502         break;
4503     }
4504 }
4505 
4506 static void decode_opc_mxu__pool03(DisasContext *ctx)
4507 {
4508     uint32_t opcode = extract32(ctx->opcode, 24, 2);
4509 
4510     switch (opcode) {
4511     case OPC_MXU_D16MULF:
4512         gen_mxu_d16mul(ctx, true, true);
4513         break;
4514     case OPC_MXU_D16MULE:
4515         gen_mxu_d16mul(ctx, true, false);
4516         break;
4517     default:
4518         MIPS_INVAL("decode_opc_mxu");
4519         gen_reserved_instruction(ctx);
4520         break;
4521     }
4522 }
4523 
4524 static void decode_opc_mxu__pool04(DisasContext *ctx)
4525 {
4526     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4527     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4528 
4529     /* Don't care about opcode bits as their meaning is unknown yet */
4530     switch (opcode) {
4531     default:
4532         gen_mxu_s32ldxx(ctx, reversed, false);
4533         break;
4534     }
4535 }
4536 
4537 static void decode_opc_mxu__pool05(DisasContext *ctx)
4538 {
4539     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4540     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4541 
4542     /* Don't care about opcode bits as their meaning is unknown yet */
4543     switch (opcode) {
4544     default:
4545         gen_mxu_s32stxx(ctx, reversed, false);
4546         break;
4547     }
4548 }
4549 
4550 static void decode_opc_mxu__pool06(DisasContext *ctx)
4551 {
4552     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4553     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4554 
4555     switch (opcode) {
4556     case OPC_MXU_S32LDST:
4557     case OPC_MXU_S32LDSTR:
4558         if (strd2 <= 2) {
4559             gen_mxu_s32ldxvx(ctx, opcode, false, strd2);
4560             break;
4561         }
4562         /* fallthrough */
4563     default:
4564         MIPS_INVAL("decode_opc_mxu");
4565         gen_reserved_instruction(ctx);
4566         break;
4567     }
4568 }
4569 
4570 static void decode_opc_mxu__pool07(DisasContext *ctx)
4571 {
4572     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4573     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4574 
4575     switch (opcode) {
4576     case OPC_MXU_S32LDST:
4577     case OPC_MXU_S32LDSTR:
4578         if (strd2 <= 2) {
4579             gen_mxu_s32stxvx(ctx, opcode, false, strd2);
4580             break;
4581         }
4582         /* fallthrough */
4583     default:
4584         MIPS_INVAL("decode_opc_mxu");
4585         gen_reserved_instruction(ctx);
4586         break;
4587     }
4588 }
4589 
4590 static void decode_opc_mxu__pool08(DisasContext *ctx)
4591 {
4592     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4593     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4594 
4595     /* Don't care about opcode bits as their meaning is unknown yet */
4596     switch (opcode) {
4597     default:
4598         gen_mxu_s32ldxx(ctx, reversed, true);
4599         break;
4600     }
4601 }
4602 
4603 static void decode_opc_mxu__pool09(DisasContext *ctx)
4604 {
4605     uint32_t reversed = extract32(ctx->opcode, 20, 1);
4606     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4607 
4608     /* Don't care about opcode bits as their meaning is unknown yet */
4609     switch (opcode) {
4610     default:
4611         gen_mxu_s32stxx(ctx, reversed, true);
4612         break;
4613     }
4614 }
4615 
4616 static void decode_opc_mxu__pool10(DisasContext *ctx)
4617 {
4618     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4619     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4620 
4621     switch (opcode) {
4622     case OPC_MXU_S32LDST:
4623     case OPC_MXU_S32LDSTR:
4624         if (strd2 <= 2) {
4625             gen_mxu_s32ldxvx(ctx, opcode, true, strd2);
4626             break;
4627         }
4628         /* fallthrough */
4629     default:
4630         MIPS_INVAL("decode_opc_mxu");
4631         gen_reserved_instruction(ctx);
4632         break;
4633     }
4634 }
4635 
4636 static void decode_opc_mxu__pool11(DisasContext *ctx)
4637 {
4638     uint32_t opcode = extract32(ctx->opcode, 10, 4);
4639     uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4640 
4641     switch (opcode) {
4642     case OPC_MXU_S32LDST:
4643     case OPC_MXU_S32LDSTR:
4644         if (strd2 <= 2) {
4645             gen_mxu_s32stxvx(ctx, opcode, true, strd2);
4646             break;
4647         }
4648         /* fallthrough */
4649     default:
4650         MIPS_INVAL("decode_opc_mxu");
4651         gen_reserved_instruction(ctx);
4652         break;
4653     }
4654 }
4655 
4656 static void decode_opc_mxu__pool12(DisasContext *ctx)
4657 {
4658     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4659 
4660     switch (opcode) {
4661     case OPC_MXU_D32ACC:
4662         gen_mxu_d32acc(ctx);
4663         break;
4664     case OPC_MXU_D32ACCM:
4665         gen_mxu_d32accm(ctx);
4666         break;
4667     case OPC_MXU_D32ASUM:
4668         gen_mxu_d32asum(ctx);
4669         break;
4670     default:
4671         MIPS_INVAL("decode_opc_mxu");
4672         gen_reserved_instruction(ctx);
4673         break;
4674     }
4675 }
4676 
4677 static void decode_opc_mxu__pool13(DisasContext *ctx)
4678 {
4679     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4680 
4681     switch (opcode) {
4682     case OPC_MXU_Q16ACC:
4683         gen_mxu_q16acc(ctx);
4684         break;
4685     case OPC_MXU_Q16ACCM:
4686         gen_mxu_q16accm(ctx);
4687         break;
4688     case OPC_MXU_D16ASUM:
4689         gen_mxu_d16asum(ctx);
4690         break;
4691     default:
4692         MIPS_INVAL("decode_opc_mxu");
4693         gen_reserved_instruction(ctx);
4694         break;
4695     }
4696 }
4697 
4698 static void decode_opc_mxu__pool14(DisasContext *ctx)
4699 {
4700     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4701 
4702     switch (opcode) {
4703     case OPC_MXU_Q8ADDE:
4704         gen_mxu_q8adde(ctx, false);
4705         break;
4706     case OPC_MXU_D8SUM:
4707         gen_mxu_d8sum(ctx, false);
4708         break;
4709     case OPC_MXU_D8SUMC:
4710         gen_mxu_d8sum(ctx, true);
4711         break;
4712     default:
4713         MIPS_INVAL("decode_opc_mxu");
4714         gen_reserved_instruction(ctx);
4715         break;
4716     }
4717 }
4718 
4719 static void decode_opc_mxu__pool15(DisasContext *ctx)
4720 {
4721     uint32_t opcode = extract32(ctx->opcode, 14, 2);
4722 
4723     switch (opcode) {
4724     case OPC_MXU_S32MUL:
4725         gen_mxu_s32mul(ctx, false);
4726         break;
4727     case OPC_MXU_S32MULU:
4728         gen_mxu_s32mul(ctx, true);
4729         break;
4730     case OPC_MXU_S32EXTR:
4731         gen_mxu_s32extr(ctx);
4732         break;
4733     case OPC_MXU_S32EXTRV:
4734         gen_mxu_s32extrv(ctx);
4735         break;
4736     default:
4737         MIPS_INVAL("decode_opc_mxu");
4738         gen_reserved_instruction(ctx);
4739         break;
4740     }
4741 }
4742 
4743 static void decode_opc_mxu__pool16(DisasContext *ctx)
4744 {
4745     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4746 
4747     switch (opcode) {
4748     case OPC_MXU_D32SARW:
4749         gen_mxu_d32sarl(ctx, true);
4750         break;
4751     case OPC_MXU_S32ALN:
4752         gen_mxu_S32ALN(ctx);
4753         break;
4754     case OPC_MXU_S32ALNI:
4755         gen_mxu_S32ALNI(ctx);
4756         break;
4757     case OPC_MXU_S32LUI:
4758         gen_mxu_s32lui(ctx);
4759         break;
4760     case OPC_MXU_S32NOR:
4761         gen_mxu_S32NOR(ctx);
4762         break;
4763     case OPC_MXU_S32AND:
4764         gen_mxu_S32AND(ctx);
4765         break;
4766     case OPC_MXU_S32OR:
4767         gen_mxu_S32OR(ctx);
4768         break;
4769     case OPC_MXU_S32XOR:
4770         gen_mxu_S32XOR(ctx);
4771         break;
4772     default:
4773         MIPS_INVAL("decode_opc_mxu");
4774         gen_reserved_instruction(ctx);
4775         break;
4776     }
4777 }
4778 
4779 static void decode_opc_mxu__pool17(DisasContext *ctx)
4780 {
4781     uint32_t opcode = extract32(ctx->opcode, 6, 3);
4782     uint32_t strd2  = extract32(ctx->opcode, 9, 2);
4783 
4784     if (strd2 > 2) {
4785         MIPS_INVAL("decode_opc_mxu");
4786         gen_reserved_instruction(ctx);
4787         return;
4788     }
4789 
4790     switch (opcode) {
4791     case OPC_MXU_LXW:
4792           gen_mxu_lxx(ctx, strd2, MO_TE | MO_UL);
4793           break;
4794     case OPC_MXU_LXB:
4795           gen_mxu_lxx(ctx, strd2, MO_TE | MO_SB);
4796           break;
4797     case OPC_MXU_LXH:
4798           gen_mxu_lxx(ctx, strd2, MO_TE | MO_SW);
4799           break;
4800     case OPC_MXU_LXBU:
4801           gen_mxu_lxx(ctx, strd2, MO_TE | MO_UB);
4802           break;
4803     case OPC_MXU_LXHU:
4804           gen_mxu_lxx(ctx, strd2, MO_TE | MO_UW);
4805           break;
4806     default:
4807         MIPS_INVAL("decode_opc_mxu");
4808         gen_reserved_instruction(ctx);
4809         break;
4810     }
4811 }
4812 
4813 static void decode_opc_mxu__pool18(DisasContext *ctx)
4814 {
4815     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4816 
4817     switch (opcode) {
4818     case OPC_MXU_D32SLLV:
4819         gen_mxu_d32sxxv(ctx, false, false);
4820         break;
4821     case OPC_MXU_D32SLRV:
4822         gen_mxu_d32sxxv(ctx, true, false);
4823         break;
4824     case OPC_MXU_D32SARV:
4825         gen_mxu_d32sxxv(ctx, true, true);
4826         break;
4827     case OPC_MXU_Q16SLLV:
4828         gen_mxu_q16sxxv(ctx, false, false);
4829         break;
4830     case OPC_MXU_Q16SLRV:
4831         gen_mxu_q16sxxv(ctx, true, false);
4832         break;
4833     case OPC_MXU_Q16SARV:
4834         gen_mxu_q16sxxv(ctx, true, true);
4835         break;
4836     default:
4837         MIPS_INVAL("decode_opc_mxu");
4838         gen_reserved_instruction(ctx);
4839         break;
4840     }
4841 }
4842 
4843 static void decode_opc_mxu__pool19(DisasContext *ctx)
4844 {
4845     uint32_t opcode = extract32(ctx->opcode, 22, 4);
4846 
4847     switch (opcode) {
4848     case OPC_MXU_Q8MUL:
4849         gen_mxu_q8mul_mac(ctx, false, false);
4850         break;
4851     case OPC_MXU_Q8MULSU:
4852         gen_mxu_q8mul_mac(ctx, true, false);
4853         break;
4854     default:
4855         MIPS_INVAL("decode_opc_mxu");
4856         gen_reserved_instruction(ctx);
4857         break;
4858     }
4859 }
4860 
4861 static void decode_opc_mxu__pool20(DisasContext *ctx)
4862 {
4863     uint32_t opcode = extract32(ctx->opcode, 18, 3);
4864 
4865     switch (opcode) {
4866     case OPC_MXU_Q8MOVZ:
4867         gen_mxu_q8movzn(ctx, TCG_COND_NE);
4868         break;
4869     case OPC_MXU_Q8MOVN:
4870         gen_mxu_q8movzn(ctx, TCG_COND_EQ);
4871         break;
4872     case OPC_MXU_D16MOVZ:
4873         gen_mxu_d16movzn(ctx, TCG_COND_NE);
4874         break;
4875     case OPC_MXU_D16MOVN:
4876         gen_mxu_d16movzn(ctx, TCG_COND_EQ);
4877         break;
4878     case OPC_MXU_S32MOVZ:
4879         gen_mxu_s32movzn(ctx, TCG_COND_NE);
4880         break;
4881     case OPC_MXU_S32MOVN:
4882         gen_mxu_s32movzn(ctx, TCG_COND_EQ);
4883         break;
4884     default:
4885         MIPS_INVAL("decode_opc_mxu");
4886         gen_reserved_instruction(ctx);
4887         break;
4888     }
4889 }
4890 
4891 static void decode_opc_mxu__pool21(DisasContext *ctx)
4892 {
4893     uint32_t opcode = extract32(ctx->opcode, 22, 2);
4894 
4895     switch (opcode) {
4896     case OPC_MXU_Q8MAC:
4897         gen_mxu_q8mul_mac(ctx, false, true);
4898         break;
4899     case OPC_MXU_Q8MACSU:
4900         gen_mxu_q8mul_mac(ctx, true, true);
4901         break;
4902     default:
4903         MIPS_INVAL("decode_opc_mxu");
4904         gen_reserved_instruction(ctx);
4905         break;
4906     }
4907 }
4908 
4909 
4910 bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
4911 {
4912     uint32_t opcode = extract32(insn, 0, 6);
4913 
4914     if (opcode == OPC_MXU_S32M2I) {
4915         gen_mxu_s32m2i(ctx);
4916         return true;
4917     }
4918 
4919     if (opcode == OPC_MXU_S32I2M) {
4920         gen_mxu_s32i2m(ctx);
4921         return true;
4922     }
4923 
4924     {
4925         TCGv t_mxu_cr = tcg_temp_new();
4926         TCGLabel *l_exit = gen_new_label();
4927 
4928         gen_load_mxu_cr(t_mxu_cr);
4929         tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN);
4930         tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
4931 
4932         switch (opcode) {
4933         case OPC_MXU_S32MADD:
4934         case OPC_MXU_S32MADDU:
4935         case OPC_MXU_S32MSUB:
4936         case OPC_MXU_S32MSUBU:
4937             return decode_opc_mxu_s32madd_sub(ctx);
4938         case OPC_MXU__POOL00:
4939             decode_opc_mxu__pool00(ctx);
4940             break;
4941         case OPC_MXU_D16MUL:
4942             gen_mxu_d16mul(ctx, false, false);
4943             break;
4944         case OPC_MXU_D16MAC:
4945             gen_mxu_d16mac(ctx, false, false);
4946             break;
4947         case OPC_MXU_D16MACF:
4948             gen_mxu_d16mac(ctx, true, true);
4949             break;
4950         case OPC_MXU_D16MADL:
4951             gen_mxu_d16madl(ctx);
4952             break;
4953         case OPC_MXU_S16MAD:
4954             gen_mxu_s16mad(ctx);
4955             break;
4956         case OPC_MXU_Q16ADD:
4957             gen_mxu_q16add(ctx);
4958             break;
4959         case OPC_MXU_D16MACE:
4960             gen_mxu_d16mac(ctx, true, false);
4961             break;
4962         case OPC_MXU__POOL01:
4963             decode_opc_mxu__pool01(ctx);
4964             break;
4965         case OPC_MXU__POOL02:
4966             decode_opc_mxu__pool02(ctx);
4967             break;
4968         case OPC_MXU__POOL03:
4969             decode_opc_mxu__pool03(ctx);
4970             break;
4971         case OPC_MXU__POOL04:
4972             decode_opc_mxu__pool04(ctx);
4973             break;
4974         case OPC_MXU__POOL05:
4975             decode_opc_mxu__pool05(ctx);
4976             break;
4977         case OPC_MXU__POOL06:
4978             decode_opc_mxu__pool06(ctx);
4979             break;
4980         case OPC_MXU__POOL07:
4981             decode_opc_mxu__pool07(ctx);
4982             break;
4983         case OPC_MXU__POOL08:
4984             decode_opc_mxu__pool08(ctx);
4985             break;
4986         case OPC_MXU__POOL09:
4987             decode_opc_mxu__pool09(ctx);
4988             break;
4989         case OPC_MXU__POOL10:
4990             decode_opc_mxu__pool10(ctx);
4991             break;
4992         case OPC_MXU__POOL11:
4993             decode_opc_mxu__pool11(ctx);
4994             break;
4995         case OPC_MXU_D32ADD:
4996             gen_mxu_d32add(ctx);
4997             break;
4998         case OPC_MXU__POOL12:
4999             decode_opc_mxu__pool12(ctx);
5000             break;
5001         case OPC_MXU__POOL13:
5002             decode_opc_mxu__pool13(ctx);
5003             break;
5004         case OPC_MXU__POOL14:
5005             decode_opc_mxu__pool14(ctx);
5006             break;
5007         case OPC_MXU_Q8ACCE:
5008             gen_mxu_q8adde(ctx, true);
5009             break;
5010         case OPC_MXU_S8LDD:
5011             gen_mxu_s8ldd(ctx, false);
5012             break;
5013         case OPC_MXU_S8STD:
5014             gen_mxu_s8std(ctx, false);
5015             break;
5016         case OPC_MXU_S8LDI:
5017             gen_mxu_s8ldd(ctx, true);
5018             break;
5019         case OPC_MXU_S8SDI:
5020             gen_mxu_s8std(ctx, true);
5021             break;
5022         case OPC_MXU__POOL15:
5023             decode_opc_mxu__pool15(ctx);
5024             break;
5025         case OPC_MXU__POOL16:
5026             decode_opc_mxu__pool16(ctx);
5027             break;
5028         case OPC_MXU__POOL17:
5029             decode_opc_mxu__pool17(ctx);
5030             break;
5031         case OPC_MXU_S16LDD:
5032             gen_mxu_s16ldd(ctx, false);
5033             break;
5034         case OPC_MXU_S16STD:
5035             gen_mxu_s16std(ctx, false);
5036             break;
5037         case OPC_MXU_S16LDI:
5038             gen_mxu_s16ldd(ctx, true);
5039             break;
5040         case OPC_MXU_S16SDI:
5041             gen_mxu_s16std(ctx, true);
5042             break;
5043         case OPC_MXU_D32SLL:
5044             gen_mxu_d32sxx(ctx, false, false);
5045             break;
5046         case OPC_MXU_D32SLR:
5047             gen_mxu_d32sxx(ctx, true, false);
5048             break;
5049         case OPC_MXU_D32SARL:
5050             gen_mxu_d32sarl(ctx, false);
5051             break;
5052         case OPC_MXU_D32SAR:
5053             gen_mxu_d32sxx(ctx, true, true);
5054             break;
5055         case OPC_MXU_Q16SLL:
5056             gen_mxu_q16sxx(ctx, false, false);
5057             break;
5058         case OPC_MXU__POOL18:
5059             decode_opc_mxu__pool18(ctx);
5060             break;
5061         case OPC_MXU_Q16SLR:
5062             gen_mxu_q16sxx(ctx, true, false);
5063             break;
5064         case OPC_MXU_Q16SAR:
5065             gen_mxu_q16sxx(ctx, true, true);
5066             break;
5067         case OPC_MXU__POOL19:
5068             decode_opc_mxu__pool19(ctx);
5069             break;
5070         case OPC_MXU__POOL20:
5071             decode_opc_mxu__pool20(ctx);
5072             break;
5073         case OPC_MXU__POOL21:
5074             decode_opc_mxu__pool21(ctx);
5075             break;
5076         case OPC_MXU_Q16SCOP:
5077             gen_mxu_q16scop(ctx);
5078             break;
5079         case OPC_MXU_Q8MADL:
5080             gen_mxu_q8madl(ctx);
5081             break;
5082         case OPC_MXU_S32SFL:
5083             gen_mxu_s32sfl(ctx);
5084             break;
5085         case OPC_MXU_Q8SAD:
5086             gen_mxu_q8sad(ctx);
5087             break;
5088         default:
5089             return false;
5090         }
5091 
5092         gen_set_label(l_exit);
5093     }
5094 
5095     return true;
5096 }
5097