xref: /openbmc/qemu/target/mips/tcg/mxu_translate.c (revision 08ae519ab8eb6c9abbd97156cb3678f372521501)
1  /*
2   *  Ingenic XBurst Media eXtension Unit (MXU) translation routines.
3   *
4   *  Copyright (c) 2004-2005 Jocelyn Mayer
5   *  Copyright (c) 2006 Marius Groeger (FPU operations)
6   *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
7   *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
8   *  Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
9   *
10   * SPDX-License-Identifier: LGPL-2.1-or-later
11   *
12   * Datasheet:
13   *
14   *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
15   *   Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
16   */
17  
18  #include "qemu/osdep.h"
19  #include "translate.h"
20  
21  /*
22   *
23   *       AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET
24   *       ============================================
25   *
26   *
27   * MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32
28   * instructions set. It is designed to fit the needs of signal, graphical and
29   * video processing applications. MXU instruction set is used in Xburst family
30   * of microprocessors by Ingenic.
31   *
32   * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is
33   * the control register.
34   *
35   *
36   *     The notation used in MXU assembler mnemonics
37   *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38   *
39   *  Register operands:
40   *
41   *   XRa, XRb, XRc, XRd - MXU registers
42   *   Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers
43   *
44   *  Non-register operands:
45   *
46   *   aptn1 - 1-bit accumulate add/subtract pattern
47   *   aptn2 - 2-bit accumulate add/subtract pattern
48   *   eptn2 - 2-bit execute add/subtract pattern
49   *   optn2 - 2-bit operand pattern
50   *   optn3 - 3-bit operand pattern
51   *   sft4  - 4-bit shift amount
52   *   strd2 - 2-bit stride amount
53   *
54   *  Prefixes:
55   *
56   *   Level of parallelism:                Operand size:
57   *    S - single operation at a time       32 - word
58   *    D - two operations in parallel       16 - half word
59   *    Q - four operations in parallel       8 - byte
60   *
61   *  Operations:
62   *
63   *   ADD   - Add or subtract
64   *   ADDC  - Add with carry-in
65   *   ACC   - Accumulate
66   *   ASUM  - Sum together then accumulate (add or subtract)
67   *   ASUMC - Sum together then accumulate (add or subtract) with carry-in
68   *   AVG   - Average between 2 operands
69   *   ABD   - Absolute difference
70   *   ALN   - Align data
71   *   AND   - Logical bitwise 'and' operation
72   *   CPS   - Copy sign
73   *   EXTR  - Extract bits
74   *   I2M   - Move from GPR register to MXU register
75   *   LDD   - Load data from memory to XRF
76   *   LDI   - Load data from memory to XRF (and increase the address base)
77   *   LUI   - Load unsigned immediate
78   *   MUL   - Multiply
79   *   MULU  - Unsigned multiply
80   *   MADD  - 64-bit operand add 32x32 product
81   *   MSUB  - 64-bit operand subtract 32x32 product
82   *   MAC   - Multiply and accumulate (add or subtract)
83   *   MAD   - Multiply and add or subtract
84   *   MAX   - Maximum between 2 operands
85   *   MIN   - Minimum between 2 operands
86   *   M2I   - Move from MXU register to GPR register
87   *   MOVZ  - Move if zero
88   *   MOVN  - Move if non-zero
89   *   NOR   - Logical bitwise 'nor' operation
90   *   OR    - Logical bitwise 'or' operation
91   *   STD   - Store data from XRF to memory
92   *   SDI   - Store data from XRF to memory (and increase the address base)
93   *   SLT   - Set of less than comparison
94   *   SAD   - Sum of absolute differences
95   *   SLL   - Logical shift left
96   *   SLR   - Logical shift right
97   *   SAR   - Arithmetic shift right
98   *   SAT   - Saturation
99   *   SFL   - Shuffle
100   *   SCOP  - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0)
101   *   XOR   - Logical bitwise 'exclusive or' operation
102   *
103   *  Suffixes:
104   *
105   *   E - Expand results
106   *   F - Fixed point multiplication
107   *   L - Low part result
108   *   R - Doing rounding
109   *   V - Variable instead of immediate
110   *   W - Combine above L and V
111   *
112   *
113   *     The list of MXU instructions grouped by functionality
114   *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
115   *
116   * Load/Store instructions           Multiplication instructions
117   * -----------------------           ---------------------------
118   *
119   *  S32LDD XRa, Rb, s12               S32MADD XRa, XRd, Rs, Rt
120   *  S32STD XRa, Rb, s12               S32MADDU XRa, XRd, Rs, Rt
121   *  S32LDDV XRa, Rb, rc, strd2        S32MSUB XRa, XRd, Rs, Rt
122   *  S32STDV XRa, Rb, rc, strd2        S32MSUBU XRa, XRd, Rs, Rt
123   *  S32LDI XRa, Rb, s12               S32MUL XRa, XRd, Rs, Rt
124   *  S32SDI XRa, Rb, s12               S32MULU XRa, XRd, Rs, Rt
125   *  S32LDIV XRa, Rb, rc, strd2        D16MUL XRa, XRb, XRc, XRd, optn2
126   *  S32SDIV XRa, Rb, rc, strd2        D16MULE XRa, XRb, XRc, optn2
127   *  S32LDDR XRa, Rb, s12              D16MULF XRa, XRb, XRc, optn2
128   *  S32STDR XRa, Rb, s12              D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
129   *  S32LDDVR XRa, Rb, rc, strd2       D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
130   *  S32STDVR XRa, Rb, rc, strd2       D16MACF XRa, XRb, XRc, XRd, aptn2, optn2
131   *  S32LDIR XRa, Rb, s12              D16MADL XRa, XRb, XRc, XRd, aptn2, optn2
132   *  S32SDIR XRa, Rb, s12              S16MAD XRa, XRb, XRc, XRd, aptn1, optn2
133   *  S32LDIVR XRa, Rb, rc, strd2       Q8MUL XRa, XRb, XRc, XRd
134   *  S32SDIVR XRa, Rb, rc, strd2       Q8MULSU XRa, XRb, XRc, XRd
135   *  S16LDD XRa, Rb, s10, eptn2        Q8MAC XRa, XRb, XRc, XRd, aptn2
136   *  S16STD XRa, Rb, s10, eptn2        Q8MACSU XRa, XRb, XRc, XRd, aptn2
137   *  S16LDI XRa, Rb, s10, eptn2        Q8MADL XRa, XRb, XRc, XRd, aptn2
138   *  S16SDI XRa, Rb, s10, eptn2
139   *  S8LDD XRa, Rb, s8, eptn3
140   *  S8STD XRa, Rb, s8, eptn3         Addition and subtraction instructions
141   *  S8LDI XRa, Rb, s8, eptn3         -------------------------------------
142   *  S8SDI XRa, Rb, s8, eptn3
143   *  LXW Rd, Rs, Rt, strd2             D32ADD XRa, XRb, XRc, XRd, eptn2
144   *  LXH Rd, Rs, Rt, strd2             D32ADDC XRa, XRb, XRc, XRd
145   *  LXHU Rd, Rs, Rt, strd2            D32ACC XRa, XRb, XRc, XRd, eptn2
146   *  LXB Rd, Rs, Rt, strd2             D32ACCM XRa, XRb, XRc, XRd, eptn2
147   *  LXBU Rd, Rs, Rt, strd2            D32ASUM XRa, XRb, XRc, XRd, eptn2
148   *                                    S32CPS XRa, XRb, XRc
149   *                                    Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2
150   * Comparison instructions            Q16ACC XRa, XRb, XRc, XRd, eptn2
151   * -----------------------            Q16ACCM XRa, XRb, XRc, XRd, eptn2
152   *                                    D16ASUM XRa, XRb, XRc, XRd, eptn2
153   *  S32MAX XRa, XRb, XRc              D16CPS XRa, XRb,
154   *  S32MIN XRa, XRb, XRc              D16AVG XRa, XRb, XRc
155   *  S32SLT XRa, XRb, XRc              D16AVGR XRa, XRb, XRc
156   *  S32MOVZ XRa, XRb, XRc             Q8ADD XRa, XRb, XRc, eptn2
157   *  S32MOVN XRa, XRb, XRc             Q8ADDE XRa, XRb, XRc, XRd, eptn2
158   *  D16MAX XRa, XRb, XRc              Q8ACCE XRa, XRb, XRc, XRd, eptn2
159   *  D16MIN XRa, XRb, XRc              Q8ABD XRa, XRb, XRc
160   *  D16SLT XRa, XRb, XRc              Q8SAD XRa, XRb, XRc, XRd
161   *  D16MOVZ XRa, XRb, XRc             Q8AVG XRa, XRb, XRc
162   *  D16MOVN XRa, XRb, XRc             Q8AVGR XRa, XRb, XRc
163   *  Q8MAX XRa, XRb, XRc               D8SUM XRa, XRb, XRc, XRd
164   *  Q8MIN XRa, XRb, XRc               D8SUMC XRa, XRb, XRc, XRd
165   *  Q8SLT XRa, XRb, XRc
166   *  Q8SLTU XRa, XRb, XRc
167   *  Q8MOVZ XRa, XRb, XRc             Shift instructions
168   *  Q8MOVN XRa, XRb, XRc             ------------------
169   *
170   *                                    D32SLL XRa, XRb, XRc, XRd, sft4
171   * Bitwise instructions               D32SLR XRa, XRb, XRc, XRd, sft4
172   * --------------------               D32SAR XRa, XRb, XRc, XRd, sft4
173   *                                    D32SARL XRa, XRb, XRc, sft4
174   *  S32NOR XRa, XRb, XRc              D32SLLV XRa, XRb, Rb
175   *  S32AND XRa, XRb, XRc              D32SLRV XRa, XRb, Rb
176   *  S32XOR XRa, XRb, XRc              D32SARV XRa, XRb, Rb
177   *  S32OR XRa, XRb, XRc               D32SARW XRa, XRb, XRc, Rb
178   *                                    Q16SLL XRa, XRb, XRc, XRd, sft4
179   *                                    Q16SLR XRa, XRb, XRc, XRd, sft4
180   * Miscellaneous instructions         Q16SAR XRa, XRb, XRc, XRd, sft4
181   * -------------------------          Q16SLLV XRa, XRb, Rb
182   *                                    Q16SLRV XRa, XRb, Rb
183   *  S32SFL XRa, XRb, XRc, XRd, optn2  Q16SARV XRa, XRb, Rb
184   *  S32ALN XRa, XRb, XRc, Rb
185   *  S32ALNI XRa, XRb, XRc, s3
186   *  S32LUI XRa, s8, optn3            Move instructions
187   *  S32EXTR XRa, XRb, Rb, bits5      -----------------
188   *  S32EXTRV XRa, XRb, Rs, Rt
189   *  Q16SCOP XRa, XRb, XRc, XRd        S32M2I XRa, Rb
190   *  Q16SAT XRa, XRb, XRc              S32I2M XRa, Rb
191   *
192   *
193   *     The opcode organization of MXU instructions
194   *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
195   *
196   * The bits 31..26 of all MXU instructions are equal to 0x1C (also referred
197   * as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of
198   * other bits up to the instruction level is as follows:
199   *
200   *              bits
201   *             05..00
202   *
203   *          ┌─ 000000 ─ OPC_MXU_S32MADD
204   *          ├─ 000001 ─ OPC_MXU_S32MADDU
205   *          ├─ 000010 ─ <not assigned>   (non-MXU OPC_MUL)
206   *          │
207   *          │                               20..18
208   *          ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX
209   *          │                            ├─ 001 ─ OPC_MXU_S32MIN
210   *          │                            ├─ 010 ─ OPC_MXU_D16MAX
211   *          │                            ├─ 011 ─ OPC_MXU_D16MIN
212   *          │                            ├─ 100 ─ OPC_MXU_Q8MAX
213   *          │                            ├─ 101 ─ OPC_MXU_Q8MIN
214   *          │                            ├─ 110 ─ OPC_MXU_Q8SLT
215   *          │                            └─ 111 ─ OPC_MXU_Q8SLTU
216   *          ├─ 000100 ─ OPC_MXU_S32MSUB
217   *          ├─ 000101 ─ OPC_MXU_S32MSUBU    20..18
218   *          ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT
219   *          │                            ├─ 001 ─ OPC_MXU_D16SLT
220   *          │                            ├─ 010 ─ OPC_MXU_D16AVG
221   *          │                            ├─ 011 ─ OPC_MXU_D16AVGR
222   *          │                            ├─ 100 ─ OPC_MXU_Q8AVG
223   *          │                            ├─ 101 ─ OPC_MXU_Q8AVGR
224   *          │                            └─ 111 ─ OPC_MXU_Q8ADD
225   *          │
226   *          │                               20..18
227   *          ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS
228   *          │                            ├─ 010 ─ OPC_MXU_D16CPS
229   *          │                            ├─ 100 ─ OPC_MXU_Q8ABD
230   *          │                            └─ 110 ─ OPC_MXU_Q16SAT
231   *          ├─ 001000 ─ OPC_MXU_D16MUL
232   *          │                               25..24
233   *          ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF
234   *          │                            └─ 01 ─ OPC_MXU_D16MULE
235   *          ├─ 001010 ─ OPC_MXU_D16MAC
236   *          ├─ 001011 ─ OPC_MXU_D16MACF
237   *          ├─ 001100 ─ OPC_MXU_D16MADL
238   *          ├─ 001101 ─ OPC_MXU_S16MAD
239   *          ├─ 001110 ─ OPC_MXU_Q16ADD
240   *          ├─ 001111 ─ OPC_MXU_D16MACE     20 (13..10 don't care)
241   *          │                            ┌─ 0 ─ OPC_MXU_S32LDD
242   *          ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
243   *          │
244   *          │                               20 (13..10 don't care)
245   *          ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
246   *          │                            └─ 1 ─ OPC_MXU_S32STDR
247   *          │
248   *          │                               13..10
249   *          ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV
250   *          │                            └─ 0001 ─ OPC_MXU_S32LDDVR
251   *          │
252   *          │                               13..10
253   *          ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
254   *          │                            └─ 0001 ─ OPC_MXU_S32STDVR
255   *          │
256   *          │                               20 (13..10 don't care)
257   *          ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
258   *          │                            └─ 1 ─ OPC_MXU_S32LDIR
259   *          │
260   *          │                               20 (13..10 don't care)
261   *          ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
262   *          │                            └─ 1 ─ OPC_MXU_S32SDIR
263   *          │
264   *          │                               13..10
265   *          ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV
266   *          │                            └─ 0001 ─ OPC_MXU_S32LDIVR
267   *          │
268   *          │                               13..10
269   *          ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
270   *          │                            └─ 0001 ─ OPC_MXU_S32SDIVR
271   *          ├─ 011000 ─ OPC_MXU_D32ADD  (catches D32ADDC too)
272   *          │                               23..22
273   *   MXU    ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
274   * opcodes ─┤                            ├─ 01 ─ OPC_MXU_D32ACCM
275   *          │                            └─ 10 ─ OPC_MXU_D32ASUM
276   *          ├─ 011010 ─ <not assigned>
277   *          │                               23..22
278   *          ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
279   *          │                            ├─ 01 ─ OPC_MXU_Q16ACCM
280   *          │                            └─ 10 ─ OPC_MXU_D16ASUM
281   *          │
282   *          │                               23..22
283   *          ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
284   *          │                            ├─ 01 ─ OPC_MXU_D8SUM
285   *          ├─ 011101 ─ OPC_MXU_Q8ACCE   └─ 10 ─ OPC_MXU_D8SUMC
286   *          ├─ 011110 ─ <not assigned>
287   *          ├─ 011111 ─ <not assigned>
288   *          ├─ 100000 ─ <not assigned>   (overlaps with CLZ)
289   *          ├─ 100001 ─ <not assigned>   (overlaps with CLO)
290   *          ├─ 100010 ─ OPC_MXU_S8LDD
291   *          ├─ 100011 ─ OPC_MXU_S8STD       15..14
292   *          ├─ 100100 ─ OPC_MXU_S8LDI    ┌─ 00 ─ OPC_MXU_S32MUL
293   *          ├─ 100101 ─ OPC_MXU_S8SDI    ├─ 01 ─ OPC_MXU_S32MULU
294   *          │                            ├─ 10 ─ OPC_MXU_S32EXTR
295   *          ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV
296   *          │
297   *          │                               20..18
298   *          ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
299   *          │                            ├─ 001 ─ OPC_MXU_S32ALN
300   *          │                            ├─ 010 ─ OPC_MXU_S32ALNI
301   *          │                            ├─ 011 ─ OPC_MXU_S32LUI
302   *          │                            ├─ 100 ─ OPC_MXU_S32NOR
303   *          │                            ├─ 101 ─ OPC_MXU_S32AND
304   *          │                            ├─ 110 ─ OPC_MXU_S32OR
305   *          │                            └─ 111 ─ OPC_MXU_S32XOR
306   *          │
307   *          │                               8..6
308   *          ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
309   *          │                            ├─ 001 ─ OPC_MXU_LXH
310   *          ├─ 101001 ─ <not assigned>   ├─ 011 ─ OPC_MXU_LXW
311   *          ├─ 101010 ─ OPC_MXU_S16LDD   ├─ 100 ─ OPC_MXU_LXBU
312   *          ├─ 101011 ─ OPC_MXU_S16STD   └─ 101 ─ OPC_MXU_LXHU
313   *          ├─ 101100 ─ OPC_MXU_S16LDI
314   *          ├─ 101101 ─ OPC_MXU_S16SDI
315   *          ├─ 101110 ─ OPC_MXU_S32M2I
316   *          ├─ 101111 ─ OPC_MXU_S32I2M
317   *          ├─ 110000 ─ OPC_MXU_D32SLL
318   *          ├─ 110001 ─ OPC_MXU_D32SLR      20..18
319   *          ├─ 110010 ─ OPC_MXU_D32SARL  ┌─ 000 ─ OPC_MXU_D32SLLV
320   *          ├─ 110011 ─ OPC_MXU_D32SAR   ├─ 001 ─ OPC_MXU_D32SLRV
321   *          ├─ 110100 ─ OPC_MXU_Q16SLL   ├─ 011 ─ OPC_MXU_D32SARV
322   *          ├─ 110101 ─ OPC_MXU_Q16SLR   ├─ 100 ─ OPC_MXU_Q16SLLV
323   *          │                            ├─ 101 ─ OPC_MXU_Q16SLRV
324   *          ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV
325   *          │
326   *          ├─ 110111 ─ OPC_MXU_Q16SAR
327   *          │                               23..22
328   *          ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
329   *          │                            └─ 10 ─ OPC_MXU_Q8MULSU
330   *          │
331   *          │                               20..18
332   *          ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
333   *          │                            ├─ 001 ─ OPC_MXU_Q8MOVN
334   *          │                            ├─ 010 ─ OPC_MXU_D16MOVZ
335   *          │                            ├─ 011 ─ OPC_MXU_D16MOVN
336   *          │                            ├─ 100 ─ OPC_MXU_S32MOVZ
337   *          │                            └─ 101 ─ OPC_MXU_S32MOVN
338   *          │
339   *          │                               23..22
340   *          ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC
341   *          │                            └─ 10 ─ OPC_MXU_Q8MACSU
342   *          ├─ 111011 ─ OPC_MXU_Q16SCOP
343   *          ├─ 111100 ─ OPC_MXU_Q8MADL
344   *          ├─ 111101 ─ OPC_MXU_S32SFL
345   *          ├─ 111110 ─ OPC_MXU_Q8SAD
346   *          └─ 111111 ─ <not assigned>   (overlaps with SDBBP)
347   *
348   *
349   * Compiled after:
350   *
351   *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
352   *   Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
353   */
354  
355  enum {
356      OPC_MXU_S32MADD  = 0x00,
357      OPC_MXU_S32MADDU = 0x01,
358      OPC_MXU__POOL00  = 0x03,
359      OPC_MXU_S32MSUB  = 0x04,
360      OPC_MXU_S32MSUBU = 0x05,
361      OPC_MXU__POOL01  = 0x06,
362      OPC_MXU__POOL02  = 0x07,
363      OPC_MXU_D16MUL   = 0x08,
364      OPC_MXU__POOL03  = 0x09,
365      OPC_MXU_D16MAC   = 0x0A,
366      OPC_MXU_D16MACF  = 0x0B,
367      OPC_MXU_D16MADL  = 0x0C,
368      OPC_MXU_S16MAD   = 0x0D,
369      OPC_MXU_Q16ADD   = 0x0E,
370      OPC_MXU_D16MACE  = 0x0F,
371      OPC_MXU__POOL04  = 0x10,
372      OPC_MXU__POOL05  = 0x11,
373      OPC_MXU__POOL06  = 0x12,
374      OPC_MXU__POOL07  = 0x13,
375      OPC_MXU__POOL08  = 0x14,
376      OPC_MXU__POOL09  = 0x15,
377      OPC_MXU__POOL10  = 0x16,
378      OPC_MXU__POOL11  = 0x17,
379      OPC_MXU_D32ADD   = 0x18,
380      OPC_MXU__POOL12  = 0x19,
381      OPC_MXU__POOL13  = 0x1B,
382      OPC_MXU__POOL14  = 0x1C,
383      OPC_MXU_Q8ACCE   = 0x1D,
384      OPC_MXU_S8LDD    = 0x22,
385      OPC_MXU_S8STD    = 0x23,
386      OPC_MXU_S8LDI    = 0x24,
387      OPC_MXU_S8SDI    = 0x25,
388      OPC_MXU__POOL15  = 0x26,
389      OPC_MXU__POOL16  = 0x27,
390      OPC_MXU__POOL17  = 0x28,
391      OPC_MXU_S16LDD   = 0x2A,
392      OPC_MXU_S16STD   = 0x2B,
393      OPC_MXU_S16LDI   = 0x2C,
394      OPC_MXU_S16SDI   = 0x2D,
395      OPC_MXU_S32M2I   = 0x2E,
396      OPC_MXU_S32I2M   = 0x2F,
397      OPC_MXU_D32SLL   = 0x30,
398      OPC_MXU_D32SLR   = 0x31,
399      OPC_MXU_D32SARL  = 0x32,
400      OPC_MXU_D32SAR   = 0x33,
401      OPC_MXU_Q16SLL   = 0x34,
402      OPC_MXU_Q16SLR   = 0x35,
403      OPC_MXU__POOL18  = 0x36,
404      OPC_MXU_Q16SAR   = 0x37,
405      OPC_MXU__POOL19  = 0x38,
406      OPC_MXU__POOL20  = 0x39,
407      OPC_MXU__POOL21  = 0x3A,
408      OPC_MXU_Q16SCOP  = 0x3B,
409      OPC_MXU_Q8MADL   = 0x3C,
410      OPC_MXU_S32SFL   = 0x3D,
411      OPC_MXU_Q8SAD    = 0x3E,
412  };
413  
414  
415  /*
416   * MXU pool 00
417   */
418  enum {
419      OPC_MXU_S32MAX   = 0x00,
420      OPC_MXU_S32MIN   = 0x01,
421      OPC_MXU_D16MAX   = 0x02,
422      OPC_MXU_D16MIN   = 0x03,
423      OPC_MXU_Q8MAX    = 0x04,
424      OPC_MXU_Q8MIN    = 0x05,
425      OPC_MXU_Q8SLT    = 0x06,
426      OPC_MXU_Q8SLTU   = 0x07,
427  };
428  
429  /*
430   * MXU pool 01
431   */
432  enum {
433      OPC_MXU_S32SLT   = 0x00,
434      OPC_MXU_D16SLT   = 0x01,
435      OPC_MXU_D16AVG   = 0x02,
436      OPC_MXU_D16AVGR  = 0x03,
437      OPC_MXU_Q8AVG    = 0x04,
438      OPC_MXU_Q8AVGR   = 0x05,
439      OPC_MXU_Q8ADD    = 0x07,
440  };
441  
442  /*
443   * MXU pool 02
444   */
445  enum {
446      OPC_MXU_S32CPS   = 0x00,
447      OPC_MXU_D16CPS   = 0x02,
448      OPC_MXU_Q8ABD    = 0x04,
449      OPC_MXU_Q16SAT   = 0x06,
450  };
451  
452  /*
453   * MXU pool 03
454   */
455  enum {
456      OPC_MXU_D16MULF  = 0x00,
457      OPC_MXU_D16MULE  = 0x01,
458  };
459  
460  /*
461   * MXU pool 04 05 06 07 08 09 10 11
462   */
463  enum {
464      OPC_MXU_S32LDST  = 0x00,
465      OPC_MXU_S32LDSTR = 0x01,
466  };
467  
468  /*
469   * MXU pool 12
470   */
471  enum {
472      OPC_MXU_D32ACC    = 0x00,
473      OPC_MXU_D32ACCM   = 0x01,
474      OPC_MXU_D32ASUM   = 0x02,
475  };
476  
477  /*
478   * MXU pool 13
479   */
480  enum {
481      OPC_MXU_Q16ACC    = 0x00,
482      OPC_MXU_Q16ACCM   = 0x01,
483      OPC_MXU_D16ASUM   = 0x02,
484  };
485  
486  /*
487   * MXU pool 14
488   */
489  enum {
490      OPC_MXU_Q8ADDE    = 0x00,
491      OPC_MXU_D8SUM     = 0x01,
492      OPC_MXU_D8SUMC    = 0x02,
493  };
494  
495  /*
496   * MXU pool 15
497   */
498  enum {
499      OPC_MXU_S32MUL    = 0x00,
500      OPC_MXU_S32MULU   = 0x01,
501      OPC_MXU_S32EXTR   = 0x02,
502      OPC_MXU_S32EXTRV  = 0x03,
503  };
504  
505  /*
506   * MXU pool 16
507   */
508  enum {
509      OPC_MXU_D32SARW  = 0x00,
510      OPC_MXU_S32ALN   = 0x01,
511      OPC_MXU_S32ALNI  = 0x02,
512      OPC_MXU_S32LUI   = 0x03,
513      OPC_MXU_S32NOR   = 0x04,
514      OPC_MXU_S32AND   = 0x05,
515      OPC_MXU_S32OR    = 0x06,
516      OPC_MXU_S32XOR   = 0x07,
517  };
518  
519  /*
520   * MXU pool 17
521   */
522  enum {
523      OPC_MXU_LXB      = 0x00,
524      OPC_MXU_LXH      = 0x01,
525      OPC_MXU_LXW      = 0x03,
526      OPC_MXU_LXBU     = 0x04,
527      OPC_MXU_LXHU     = 0x05,
528  };
529  
530  /*
531   * MXU pool 18
532   */
533  enum {
534      OPC_MXU_D32SLLV  = 0x00,
535      OPC_MXU_D32SLRV  = 0x01,
536      OPC_MXU_D32SARV  = 0x03,
537      OPC_MXU_Q16SLLV  = 0x04,
538      OPC_MXU_Q16SLRV  = 0x05,
539      OPC_MXU_Q16SARV  = 0x07,
540  };
541  
542  /*
543   * MXU pool 19
544   */
545  enum {
546      OPC_MXU_Q8MUL    = 0x00,
547      OPC_MXU_Q8MULSU  = 0x02,
548  };
549  
550  /*
551   * MXU pool 20
552   */
553  enum {
554      OPC_MXU_Q8MOVZ   = 0x00,
555      OPC_MXU_Q8MOVN   = 0x01,
556      OPC_MXU_D16MOVZ  = 0x02,
557      OPC_MXU_D16MOVN  = 0x03,
558      OPC_MXU_S32MOVZ  = 0x04,
559      OPC_MXU_S32MOVN  = 0x05,
560  };
561  
562  /*
563   * MXU pool 21
564   */
565  enum {
566      OPC_MXU_Q8MAC    = 0x00,
567      OPC_MXU_Q8MACSU  = 0x02,
568  };
569  
570  
571  /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
572  #define MXU_APTN1_A    0
573  #define MXU_APTN1_S    1
574  
575  /* MXU accumulate add/subtract 2-bit pattern 'aptn2' */
576  #define MXU_APTN2_AA    0
577  #define MXU_APTN2_AS    1
578  #define MXU_APTN2_SA    2
579  #define MXU_APTN2_SS    3
580  
581  /* MXU execute add/subtract 2-bit pattern 'eptn2' */
582  #define MXU_EPTN2_AA    0
583  #define MXU_EPTN2_AS    1
584  #define MXU_EPTN2_SA    2
585  #define MXU_EPTN2_SS    3
586  
587  /* MXU operand getting pattern 'optn2' */
588  #define MXU_OPTN2_PTN0  0
589  #define MXU_OPTN2_PTN1  1
590  #define MXU_OPTN2_PTN2  2
591  #define MXU_OPTN2_PTN3  3
592  /* alternative naming scheme for 'optn2' */
593  #define MXU_OPTN2_WW    0
594  #define MXU_OPTN2_LW    1
595  #define MXU_OPTN2_HW    2
596  #define MXU_OPTN2_XW    3
597  
598  /* MXU operand getting pattern 'optn3' */
599  #define MXU_OPTN3_PTN0  0
600  #define MXU_OPTN3_PTN1  1
601  #define MXU_OPTN3_PTN2  2
602  #define MXU_OPTN3_PTN3  3
603  #define MXU_OPTN3_PTN4  4
604  #define MXU_OPTN3_PTN5  5
605  #define MXU_OPTN3_PTN6  6
606  #define MXU_OPTN3_PTN7  7
607  
608  /* MXU registers */
609  static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
610  static TCGv mxu_CR;
611  
612  static const char mxuregnames[NUMBER_OF_MXU_REGISTERS][4] = {
613      "XR1",  "XR2",  "XR3",  "XR4",  "XR5",  "XR6",  "XR7",  "XR8",
614      "XR9",  "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "XCR",
615  };
616  
mxu_translate_init(void)617  void mxu_translate_init(void)
618  {
619      for (unsigned i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) {
620          mxu_gpr[i] = tcg_global_mem_new(tcg_env,
621                                          offsetof(CPUMIPSState, active_tc.mxu_gpr[i]),
622                                          mxuregnames[i]);
623      }
624  
625      mxu_CR = tcg_global_mem_new(tcg_env,
626                                  offsetof(CPUMIPSState, active_tc.mxu_cr),
627                                  mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]);
628  }
629  
630  /* MXU General purpose registers moves. */
gen_load_mxu_gpr(TCGv t,unsigned int reg)631  static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg)
632  {
633      if (reg == 0) {
634          tcg_gen_movi_tl(t, 0);
635      } else if (reg <= 15) {
636          tcg_gen_mov_tl(t, mxu_gpr[reg - 1]);
637      }
638  }
639  
gen_store_mxu_gpr(TCGv t,unsigned int reg)640  static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg)
641  {
642      if (reg > 0 && reg <= 15) {
643          tcg_gen_mov_tl(mxu_gpr[reg - 1], t);
644      }
645  }
646  
gen_extract_mxu_gpr(TCGv t,unsigned int reg,unsigned int ofs,unsigned int len)647  static inline void gen_extract_mxu_gpr(TCGv t, unsigned int reg,
648                                         unsigned int ofs, unsigned int len)
649  {
650      if (reg == 0) {
651          tcg_gen_movi_tl(t, 0);
652      } else if (reg <= 15) {
653          tcg_gen_extract_tl(t, mxu_gpr[reg - 1], ofs, len);
654      }
655  }
656  
657  /* MXU control register moves. */
gen_load_mxu_cr(TCGv t)658  static inline void gen_load_mxu_cr(TCGv t)
659  {
660      tcg_gen_mov_tl(t, mxu_CR);
661  }
662  
gen_store_mxu_cr(TCGv t)663  static inline void gen_store_mxu_cr(TCGv t)
664  {
665      /* TODO: Add handling of RW rules for MXU_CR. */
666      tcg_gen_mov_tl(mxu_CR, t);
667  }
668  
669  /*
670   * S32I2M XRa, rb - Register move from GRF to XRF
671   */
gen_mxu_s32i2m(DisasContext * ctx)672  static void gen_mxu_s32i2m(DisasContext *ctx)
673  {
674      TCGv t0;
675      uint32_t XRa, Rb;
676  
677      t0 = tcg_temp_new();
678  
679      XRa = extract32(ctx->opcode, 6, 5);
680      Rb = extract32(ctx->opcode, 16, 5);
681  
682      gen_load_gpr(t0, Rb);
683      if (XRa <= 15) {
684          gen_store_mxu_gpr(t0, XRa);
685      } else if (XRa == 16) {
686          gen_store_mxu_cr(t0);
687      }
688  }
689  
690  /*
691   * S32M2I XRa, rb - Register move from XRF to GRF
692   */
gen_mxu_s32m2i(DisasContext * ctx)693  static void gen_mxu_s32m2i(DisasContext *ctx)
694  {
695      TCGv t0;
696      uint32_t XRa, Rb;
697  
698      t0 = tcg_temp_new();
699  
700      XRa = extract32(ctx->opcode, 6, 5);
701      Rb = extract32(ctx->opcode, 16, 5);
702  
703      if (XRa <= 15) {
704          gen_load_mxu_gpr(t0, XRa);
705      } else if (XRa == 16) {
706          gen_load_mxu_cr(t0);
707      }
708  
709      gen_store_gpr(t0, Rb);
710  }
711  
712  /*
713   * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
714   *
715   * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF,
716   * post modify address register
717   */
gen_mxu_s8ldd(DisasContext * ctx,bool postmodify)718  static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify)
719  {
720      TCGv t0, t1;
721      uint32_t XRa, Rb, s8, optn3;
722  
723      t0 = tcg_temp_new();
724      t1 = tcg_temp_new();
725  
726      XRa = extract32(ctx->opcode, 6, 4);
727      s8 = extract32(ctx->opcode, 10, 8);
728      optn3 = extract32(ctx->opcode, 18, 3);
729      Rb = extract32(ctx->opcode, 21, 5);
730  
731      gen_load_gpr(t0, Rb);
732      tcg_gen_addi_tl(t0, t0, (int8_t)s8);
733      if (postmodify) {
734          gen_store_gpr(t0, Rb);
735      }
736  
737      switch (optn3) {
738      /* XRa[7:0] = tmp8 */
739      case MXU_OPTN3_PTN0:
740          tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
741          gen_load_mxu_gpr(t0, XRa);
742          tcg_gen_deposit_tl(t0, t0, t1, 0, 8);
743          break;
744      /* XRa[15:8] = tmp8 */
745      case MXU_OPTN3_PTN1:
746          tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
747          gen_load_mxu_gpr(t0, XRa);
748          tcg_gen_deposit_tl(t0, t0, t1, 8, 8);
749          break;
750      /* XRa[23:16] = tmp8 */
751      case MXU_OPTN3_PTN2:
752          tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
753          gen_load_mxu_gpr(t0, XRa);
754          tcg_gen_deposit_tl(t0, t0, t1, 16, 8);
755          break;
756      /* XRa[31:24] = tmp8 */
757      case MXU_OPTN3_PTN3:
758          tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
759          gen_load_mxu_gpr(t0, XRa);
760          tcg_gen_deposit_tl(t0, t0, t1, 24, 8);
761          break;
762      /* XRa = {8'b0, tmp8, 8'b0, tmp8} */
763      case MXU_OPTN3_PTN4:
764          tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
765          tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
766          break;
767      /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */
768      case MXU_OPTN3_PTN5:
769          tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
770          tcg_gen_shli_tl(t1, t1, 8);
771          tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
772          break;
773      /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */
774      case MXU_OPTN3_PTN6:
775          tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB);
776          tcg_gen_mov_tl(t0, t1);
777          tcg_gen_andi_tl(t0, t0, 0xFF00FFFF);
778          tcg_gen_shli_tl(t1, t1, 16);
779          tcg_gen_or_tl(t0, t0, t1);
780          break;
781      /* XRa = {tmp8, tmp8, tmp8, tmp8} */
782      case MXU_OPTN3_PTN7:
783          tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
784          tcg_gen_deposit_tl(t1, t1, t1, 8, 8);
785          tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
786          break;
787      }
788  
789      gen_store_mxu_gpr(t0, XRa);
790  }
791  
792  /*
793   * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory
794   *
795   * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory,
796   * post modify address register
797   */
gen_mxu_s8std(DisasContext * ctx,bool postmodify)798  static void gen_mxu_s8std(DisasContext *ctx, bool postmodify)
799  {
800      TCGv t0, t1;
801      uint32_t XRa, Rb, s8, optn3;
802  
803      t0 = tcg_temp_new();
804      t1 = tcg_temp_new();
805  
806      XRa = extract32(ctx->opcode, 6, 4);
807      s8 = extract32(ctx->opcode, 10, 8);
808      optn3 = extract32(ctx->opcode, 18, 3);
809      Rb = extract32(ctx->opcode, 21, 5);
810  
811      if (optn3 > 3) {
812          /* reserved, do nothing */
813          return;
814      }
815  
816      gen_load_gpr(t0, Rb);
817      tcg_gen_addi_tl(t0, t0, (int8_t)s8);
818      if (postmodify) {
819          gen_store_gpr(t0, Rb);
820      }
821      gen_load_mxu_gpr(t1, XRa);
822  
823      switch (optn3) {
824      /* XRa[7:0] => tmp8 */
825      case MXU_OPTN3_PTN0:
826          tcg_gen_extract_tl(t1, t1, 0, 8);
827          break;
828      /* XRa[15:8] => tmp8 */
829      case MXU_OPTN3_PTN1:
830          tcg_gen_extract_tl(t1, t1, 8, 8);
831          break;
832      /* XRa[23:16] => tmp8 */
833      case MXU_OPTN3_PTN2:
834          tcg_gen_extract_tl(t1, t1, 16, 8);
835          break;
836      /* XRa[31:24] => tmp8 */
837      case MXU_OPTN3_PTN3:
838          tcg_gen_extract_tl(t1, t1, 24, 8);
839          break;
840      }
841  
842      tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB);
843  }
844  
845  /*
846   * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF
847   *
848   * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF,
849   * post modify address register
850   */
gen_mxu_s16ldd(DisasContext * ctx,bool postmodify)851  static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify)
852  {
853      TCGv t0, t1;
854      uint32_t XRa, Rb, optn2;
855      int32_t s10;
856  
857      t0 = tcg_temp_new();
858      t1 = tcg_temp_new();
859  
860      XRa   = extract32(ctx->opcode,   6, 4);
861      s10   = sextract32(ctx->opcode, 10, 9) * 2;
862      optn2 = extract32(ctx->opcode,  19, 2);
863      Rb    = extract32(ctx->opcode,  21, 5);
864  
865      gen_load_gpr(t0, Rb);
866      tcg_gen_addi_tl(t0, t0, s10);
867      if (postmodify) {
868          gen_store_gpr(t0, Rb);
869      }
870  
871      switch (optn2) {
872      /* XRa[15:0] = tmp16 */
873      case MXU_OPTN2_PTN0:
874          tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
875          gen_load_mxu_gpr(t0, XRa);
876          tcg_gen_deposit_tl(t0, t0, t1, 0, 16);
877          break;
878      /* XRa[31:16] = tmp16 */
879      case MXU_OPTN2_PTN1:
880          tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
881          gen_load_mxu_gpr(t0, XRa);
882          tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
883          break;
884      /* XRa = sign_extend(tmp16) */
885      case MXU_OPTN2_PTN2:
886          tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW);
887          break;
888      /* XRa = {tmp16, tmp16} */
889      case MXU_OPTN2_PTN3:
890          tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
891          tcg_gen_deposit_tl(t0, t1, t1,  0, 16);
892          tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
893          break;
894      }
895  
896      gen_store_mxu_gpr(t0, XRa);
897  }
898  
899  /*
900   * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory
901   *
902   * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory,
903   * post modify address register
904   */
gen_mxu_s16std(DisasContext * ctx,bool postmodify)905  static void gen_mxu_s16std(DisasContext *ctx, bool postmodify)
906  {
907      TCGv t0, t1;
908      uint32_t XRa, Rb, optn2;
909      int32_t s10;
910  
911      t0 = tcg_temp_new();
912      t1 = tcg_temp_new();
913  
914      XRa = extract32(ctx->opcode, 6, 4);
915      s10 = sextract32(ctx->opcode, 10, 9) * 2;
916      optn2 = extract32(ctx->opcode, 19, 2);
917      Rb = extract32(ctx->opcode, 21, 5);
918  
919      if (optn2 > 1) {
920          /* reserved, do nothing */
921          return;
922      }
923  
924      gen_load_gpr(t0, Rb);
925      tcg_gen_addi_tl(t0, t0, s10);
926      if (postmodify) {
927          gen_store_gpr(t0, Rb);
928      }
929      gen_load_mxu_gpr(t1, XRa);
930  
931      switch (optn2) {
932      /* XRa[15:0] => tmp16 */
933      case MXU_OPTN2_PTN0:
934          tcg_gen_extract_tl(t1, t1, 0, 16);
935          break;
936      /* XRa[31:16] => tmp16 */
937      case MXU_OPTN2_PTN1:
938          tcg_gen_extract_tl(t1, t1, 16, 16);
939          break;
940      }
941  
942      tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW);
943  }
944  
945  /*
946   * S32MUL  XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication
947   * of GPR's and stores result into pair of MXU registers.
948   * It strains HI and LO registers.
949   *
950   * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication
951   * of GPR's and stores result into pair of MXU registers.
952   * It strains HI and LO registers.
953   */
gen_mxu_s32mul(DisasContext * ctx,bool mulu)954  static void gen_mxu_s32mul(DisasContext *ctx, bool mulu)
955  {
956      TCGv t0, t1;
957      uint32_t XRa, XRd, rs, rt;
958  
959      t0 = tcg_temp_new();
960      t1 = tcg_temp_new();
961  
962      XRa = extract32(ctx->opcode,  6, 4);
963      XRd = extract32(ctx->opcode, 10, 4);
964      rs  = extract32(ctx->opcode, 16, 5);
965      rt  = extract32(ctx->opcode, 21, 5);
966  
967      if (unlikely(rs == 0 || rt == 0)) {
968          tcg_gen_movi_tl(t0, 0);
969          tcg_gen_movi_tl(t1, 0);
970      } else {
971          gen_load_gpr(t0, rs);
972          gen_load_gpr(t1, rt);
973  
974          if (mulu) {
975              tcg_gen_mulu2_tl(t0, t1, t0, t1);
976          } else {
977              tcg_gen_muls2_tl(t0, t1, t0, t1);
978          }
979      }
980      tcg_gen_mov_tl(cpu_HI[0], t1);
981      tcg_gen_mov_tl(cpu_LO[0], t0);
982      gen_store_mxu_gpr(t1, XRa);
983      gen_store_mxu_gpr(t0, XRd);
984  }
985  
986  /*
987   * D16MUL  XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
988   * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication
989   *   with rounding and packing result
990   * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern
991   *   multiplication with rounding
992   */
gen_mxu_d16mul(DisasContext * ctx,bool fractional,bool packed_result)993  static void gen_mxu_d16mul(DisasContext *ctx, bool fractional,
994                             bool packed_result)
995  {
996      TCGv t0, t1, t2, t3;
997      uint32_t XRa, XRb, XRc, XRd, optn2;
998  
999      t0 = tcg_temp_new();
1000      t1 = tcg_temp_new();
1001      t2 = tcg_temp_new();
1002      t3 = tcg_temp_new();
1003  
1004      XRa = extract32(ctx->opcode, 6, 4);
1005      XRb = extract32(ctx->opcode, 10, 4);
1006      XRc = extract32(ctx->opcode, 14, 4);
1007      XRd = extract32(ctx->opcode, 18, 4);
1008      optn2 = extract32(ctx->opcode, 22, 2);
1009  
1010      /*
1011       * TODO: XRd field isn't used for D16MULF
1012       * There's no knowledge how this field affect
1013       * instruction decoding/behavior
1014       */
1015  
1016      gen_load_mxu_gpr(t1, XRb);
1017      tcg_gen_sextract_tl(t0, t1, 0, 16);
1018      tcg_gen_sextract_tl(t1, t1, 16, 16);
1019      gen_load_mxu_gpr(t3, XRc);
1020      tcg_gen_sextract_tl(t2, t3, 0, 16);
1021      tcg_gen_sextract_tl(t3, t3, 16, 16);
1022  
1023      switch (optn2) {
1024      case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1025          tcg_gen_mul_tl(t3, t1, t3);
1026          tcg_gen_mul_tl(t2, t0, t2);
1027          break;
1028      case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1029          tcg_gen_mul_tl(t3, t0, t3);
1030          tcg_gen_mul_tl(t2, t0, t2);
1031          break;
1032      case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1033          tcg_gen_mul_tl(t3, t1, t3);
1034          tcg_gen_mul_tl(t2, t1, t2);
1035          break;
1036      case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1037          tcg_gen_mul_tl(t3, t0, t3);
1038          tcg_gen_mul_tl(t2, t1, t2);
1039          break;
1040      }
1041      if (fractional) {
1042          TCGLabel *l_done = gen_new_label();
1043          TCGv rounding = tcg_temp_new();
1044  
1045          tcg_gen_shli_tl(t3, t3, 1);
1046          tcg_gen_shli_tl(t2, t2, 1);
1047          tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
1048          tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
1049          if (packed_result) {
1050              TCGLabel *l_apply_bias_l = gen_new_label();
1051              TCGLabel *l_apply_bias_r = gen_new_label();
1052              TCGLabel *l_half_done = gen_new_label();
1053              TCGv bias = tcg_temp_new();
1054  
1055              /*
1056               * D16MULF supports unbiased rounding aka "bankers rounding",
1057               * "round to even", "convergent rounding"
1058               */
1059              tcg_gen_andi_tl(bias, mxu_CR, 0x4);
1060              tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
1061              tcg_gen_andi_tl(t0, t3, 0x1ffff);
1062              tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
1063              gen_set_label(l_apply_bias_l);
1064              tcg_gen_addi_tl(t3, t3, 0x8000);
1065              gen_set_label(l_half_done);
1066              tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
1067              tcg_gen_andi_tl(t0, t2, 0x1ffff);
1068              tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
1069              gen_set_label(l_apply_bias_r);
1070              tcg_gen_addi_tl(t2, t2, 0x8000);
1071          } else {
1072              /* D16MULE doesn't support unbiased rounding */
1073              tcg_gen_addi_tl(t3, t3, 0x8000);
1074              tcg_gen_addi_tl(t2, t2, 0x8000);
1075          }
1076          gen_set_label(l_done);
1077      }
1078      if (!packed_result) {
1079          gen_store_mxu_gpr(t3, XRa);
1080          gen_store_mxu_gpr(t2, XRd);
1081      } else {
1082          tcg_gen_andi_tl(t3, t3, 0xffff0000);
1083          tcg_gen_shri_tl(t2, t2, 16);
1084          tcg_gen_or_tl(t3, t3, t2);
1085          gen_store_mxu_gpr(t3, XRa);
1086      }
1087  }
1088  
1089  /*
1090   * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
1091   *   Signed 16 bit pattern multiply and accumulate
1092   * D16MACF XRa, XRb, XRc, aptn2, optn2
1093   *   Signed Q15 fraction pattern multiply accumulate and pack
1094   * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
1095   *   Signed Q15 fraction pattern multiply and accumulate
1096   */
gen_mxu_d16mac(DisasContext * ctx,bool fractional,bool packed_result)1097  static void gen_mxu_d16mac(DisasContext *ctx, bool fractional,
1098                             bool packed_result)
1099  {
1100      TCGv t0, t1, t2, t3;
1101      uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
1102  
1103      t0 = tcg_temp_new();
1104      t1 = tcg_temp_new();
1105      t2 = tcg_temp_new();
1106      t3 = tcg_temp_new();
1107  
1108      XRa = extract32(ctx->opcode, 6, 4);
1109      XRb = extract32(ctx->opcode, 10, 4);
1110      XRc = extract32(ctx->opcode, 14, 4);
1111      XRd = extract32(ctx->opcode, 18, 4);
1112      optn2 = extract32(ctx->opcode, 22, 2);
1113      aptn2 = extract32(ctx->opcode, 24, 2);
1114  
1115      gen_load_mxu_gpr(t1, XRb);
1116      tcg_gen_sextract_tl(t0, t1, 0, 16);
1117      tcg_gen_sextract_tl(t1, t1, 16, 16);
1118  
1119      gen_load_mxu_gpr(t3, XRc);
1120      tcg_gen_sextract_tl(t2, t3, 0, 16);
1121      tcg_gen_sextract_tl(t3, t3, 16, 16);
1122  
1123      switch (optn2) {
1124      case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1125          tcg_gen_mul_tl(t3, t1, t3);
1126          tcg_gen_mul_tl(t2, t0, t2);
1127          break;
1128      case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1129          tcg_gen_mul_tl(t3, t0, t3);
1130          tcg_gen_mul_tl(t2, t0, t2);
1131          break;
1132      case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1133          tcg_gen_mul_tl(t3, t1, t3);
1134          tcg_gen_mul_tl(t2, t1, t2);
1135          break;
1136      case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1137          tcg_gen_mul_tl(t3, t0, t3);
1138          tcg_gen_mul_tl(t2, t1, t2);
1139          break;
1140      }
1141  
1142      if (fractional) {
1143          tcg_gen_shli_tl(t3, t3, 1);
1144          tcg_gen_shli_tl(t2, t2, 1);
1145      }
1146      gen_load_mxu_gpr(t0, XRa);
1147      gen_load_mxu_gpr(t1, XRd);
1148  
1149      switch (aptn2) {
1150      case MXU_APTN2_AA:
1151          tcg_gen_add_tl(t3, t0, t3);
1152          tcg_gen_add_tl(t2, t1, t2);
1153          break;
1154      case MXU_APTN2_AS:
1155          tcg_gen_add_tl(t3, t0, t3);
1156          tcg_gen_sub_tl(t2, t1, t2);
1157          break;
1158      case MXU_APTN2_SA:
1159          tcg_gen_sub_tl(t3, t0, t3);
1160          tcg_gen_add_tl(t2, t1, t2);
1161          break;
1162      case MXU_APTN2_SS:
1163          tcg_gen_sub_tl(t3, t0, t3);
1164          tcg_gen_sub_tl(t2, t1, t2);
1165          break;
1166      }
1167  
1168      if (fractional) {
1169          TCGLabel *l_done = gen_new_label();
1170          TCGv rounding = tcg_temp_new();
1171  
1172          tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
1173          tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
1174          if (packed_result) {
1175              TCGLabel *l_apply_bias_l = gen_new_label();
1176              TCGLabel *l_apply_bias_r = gen_new_label();
1177              TCGLabel *l_half_done = gen_new_label();
1178              TCGv bias = tcg_temp_new();
1179  
1180              /*
1181               * D16MACF supports unbiased rounding aka "bankers rounding",
1182               * "round to even", "convergent rounding"
1183               */
1184              tcg_gen_andi_tl(bias, mxu_CR, 0x4);
1185              tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
1186              tcg_gen_andi_tl(t0, t3, 0x1ffff);
1187              tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
1188              gen_set_label(l_apply_bias_l);
1189              tcg_gen_addi_tl(t3, t3, 0x8000);
1190              gen_set_label(l_half_done);
1191              tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
1192              tcg_gen_andi_tl(t0, t2, 0x1ffff);
1193              tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
1194              gen_set_label(l_apply_bias_r);
1195              tcg_gen_addi_tl(t2, t2, 0x8000);
1196          } else {
1197              /* D16MACE doesn't support unbiased rounding */
1198              tcg_gen_addi_tl(t3, t3, 0x8000);
1199              tcg_gen_addi_tl(t2, t2, 0x8000);
1200          }
1201          gen_set_label(l_done);
1202      }
1203  
1204      if (!packed_result) {
1205          gen_store_mxu_gpr(t3, XRa);
1206          gen_store_mxu_gpr(t2, XRd);
1207      } else {
1208          tcg_gen_andi_tl(t3, t3, 0xffff0000);
1209          tcg_gen_shri_tl(t2, t2, 16);
1210          tcg_gen_or_tl(t3, t3, t2);
1211          gen_store_mxu_gpr(t3, XRa);
1212      }
1213  }
1214  
1215  /*
1216   * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed
1217   * unsigned 16 bit pattern multiply and add/subtract.
1218   */
gen_mxu_d16madl(DisasContext * ctx)1219  static void gen_mxu_d16madl(DisasContext *ctx)
1220  {
1221      TCGv t0, t1, t2, t3;
1222      uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
1223  
1224      t0 = tcg_temp_new();
1225      t1 = tcg_temp_new();
1226      t2 = tcg_temp_new();
1227      t3 = tcg_temp_new();
1228  
1229      XRa = extract32(ctx->opcode, 6, 4);
1230      XRb = extract32(ctx->opcode, 10, 4);
1231      XRc = extract32(ctx->opcode, 14, 4);
1232      XRd = extract32(ctx->opcode, 18, 4);
1233      optn2 = extract32(ctx->opcode, 22, 2);
1234      aptn2 = extract32(ctx->opcode, 24, 2);
1235  
1236      gen_load_mxu_gpr(t1, XRb);
1237      tcg_gen_sextract_tl(t0, t1,  0, 16);
1238      tcg_gen_sextract_tl(t1, t1, 16, 16);
1239  
1240      gen_load_mxu_gpr(t3, XRc);
1241      tcg_gen_sextract_tl(t2, t3,  0, 16);
1242      tcg_gen_sextract_tl(t3, t3, 16, 16);
1243  
1244      switch (optn2) {
1245      case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1246          tcg_gen_mul_tl(t3, t1, t3);
1247          tcg_gen_mul_tl(t2, t0, t2);
1248          break;
1249      case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1250          tcg_gen_mul_tl(t3, t0, t3);
1251          tcg_gen_mul_tl(t2, t0, t2);
1252          break;
1253      case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1254          tcg_gen_mul_tl(t3, t1, t3);
1255          tcg_gen_mul_tl(t2, t1, t2);
1256          break;
1257      case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1258          tcg_gen_mul_tl(t3, t0, t3);
1259          tcg_gen_mul_tl(t2, t1, t2);
1260          break;
1261      }
1262      tcg_gen_extract_tl(t2, t2, 0, 16);
1263      tcg_gen_extract_tl(t3, t3, 0, 16);
1264  
1265      gen_load_mxu_gpr(t1, XRa);
1266      tcg_gen_extract_tl(t0, t1,  0, 16);
1267      tcg_gen_extract_tl(t1, t1, 16, 16);
1268  
1269      switch (aptn2) {
1270      case MXU_APTN2_AA:
1271          tcg_gen_add_tl(t3, t1, t3);
1272          tcg_gen_add_tl(t2, t0, t2);
1273          break;
1274      case MXU_APTN2_AS:
1275          tcg_gen_add_tl(t3, t1, t3);
1276          tcg_gen_sub_tl(t2, t0, t2);
1277          break;
1278      case MXU_APTN2_SA:
1279          tcg_gen_sub_tl(t3, t1, t3);
1280          tcg_gen_add_tl(t2, t0, t2);
1281          break;
1282      case MXU_APTN2_SS:
1283          tcg_gen_sub_tl(t3, t1, t3);
1284          tcg_gen_sub_tl(t2, t0, t2);
1285          break;
1286      }
1287  
1288      tcg_gen_andi_tl(t2, t2, 0xffff);
1289      tcg_gen_shli_tl(t3, t3, 16);
1290      tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2);
1291  }
1292  
1293  /*
1294   * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed
1295   * signed 16 bit pattern multiply and 32-bit add/subtract.
1296   */
gen_mxu_s16mad(DisasContext * ctx)1297  static void gen_mxu_s16mad(DisasContext *ctx)
1298  {
1299      TCGv t0, t1;
1300      uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad;
1301  
1302      t0 = tcg_temp_new();
1303      t1 = tcg_temp_new();
1304  
1305      XRa = extract32(ctx->opcode, 6, 4);
1306      XRb = extract32(ctx->opcode, 10, 4);
1307      XRc = extract32(ctx->opcode, 14, 4);
1308      XRd = extract32(ctx->opcode, 18, 4);
1309      optn2 = extract32(ctx->opcode, 22, 2);
1310      aptn1 = extract32(ctx->opcode, 24, 1);
1311      pad = extract32(ctx->opcode, 25, 1);
1312  
1313      if (pad) {
1314          /* FIXME check if it influence the result */
1315      }
1316  
1317      gen_load_mxu_gpr(t0, XRb);
1318      gen_load_mxu_gpr(t1, XRc);
1319  
1320      switch (optn2) {
1321      case MXU_OPTN2_WW: /* XRB.H*XRC.H */
1322          tcg_gen_sextract_tl(t0, t0, 16, 16);
1323          tcg_gen_sextract_tl(t1, t1, 16, 16);
1324          break;
1325      case MXU_OPTN2_LW: /* XRB.L*XRC.L */
1326          tcg_gen_sextract_tl(t0, t0,  0, 16);
1327          tcg_gen_sextract_tl(t1, t1,  0, 16);
1328          break;
1329      case MXU_OPTN2_HW: /* XRB.H*XRC.L */
1330          tcg_gen_sextract_tl(t0, t0, 16, 16);
1331          tcg_gen_sextract_tl(t1, t1,  0, 16);
1332          break;
1333      case MXU_OPTN2_XW: /* XRB.L*XRC.H */
1334          tcg_gen_sextract_tl(t0, t0,  0, 16);
1335          tcg_gen_sextract_tl(t1, t1, 16, 16);
1336          break;
1337      }
1338      tcg_gen_mul_tl(t0, t0, t1);
1339  
1340      gen_load_mxu_gpr(t1, XRa);
1341  
1342      switch (aptn1) {
1343      case MXU_APTN1_A:
1344          tcg_gen_add_tl(t1, t1, t0);
1345          break;
1346      case MXU_APTN1_S:
1347          tcg_gen_sub_tl(t1, t1, t0);
1348          break;
1349      }
1350  
1351      gen_store_mxu_gpr(t1, XRd);
1352  }
1353  
1354  /*
1355   * Q8MUL   XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1356   * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1357   * Q8MAC   XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1358   *   and accumulate
1359   * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1360   *   and accumulate
1361   */
gen_mxu_q8mul_mac(DisasContext * ctx,bool su,bool mac)1362  static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac)
1363  {
1364      TCGv t0, t1, t2, t3, t4, t5, t6, t7;
1365      uint32_t XRa, XRb, XRc, XRd, aptn2;
1366  
1367      t0 = tcg_temp_new();
1368      t1 = tcg_temp_new();
1369      t2 = tcg_temp_new();
1370      t3 = tcg_temp_new();
1371      t4 = tcg_temp_new();
1372      t5 = tcg_temp_new();
1373      t6 = tcg_temp_new();
1374      t7 = tcg_temp_new();
1375  
1376      XRa = extract32(ctx->opcode, 6, 4);
1377      XRb = extract32(ctx->opcode, 10, 4);
1378      XRc = extract32(ctx->opcode, 14, 4);
1379      XRd = extract32(ctx->opcode, 18, 4);
1380      aptn2 = extract32(ctx->opcode, 24, 2);
1381  
1382      gen_load_mxu_gpr(t3, XRb);
1383      gen_load_mxu_gpr(t7, XRc);
1384  
1385      if (su) {
1386          /* Q8MULSU / Q8MACSU */
1387          tcg_gen_sextract_tl(t0, t3,  0, 8);
1388          tcg_gen_sextract_tl(t1, t3,  8, 8);
1389          tcg_gen_sextract_tl(t2, t3, 16, 8);
1390          tcg_gen_sextract_tl(t3, t3, 24, 8);
1391      } else {
1392          /* Q8MUL / Q8MAC */
1393          tcg_gen_extract_tl(t0, t3,  0, 8);
1394          tcg_gen_extract_tl(t1, t3,  8, 8);
1395          tcg_gen_extract_tl(t2, t3, 16, 8);
1396          tcg_gen_extract_tl(t3, t3, 24, 8);
1397      }
1398  
1399      tcg_gen_extract_tl(t4, t7,  0, 8);
1400      tcg_gen_extract_tl(t5, t7,  8, 8);
1401      tcg_gen_extract_tl(t6, t7, 16, 8);
1402      tcg_gen_extract_tl(t7, t7, 24, 8);
1403  
1404      tcg_gen_mul_tl(t0, t0, t4);
1405      tcg_gen_mul_tl(t1, t1, t5);
1406      tcg_gen_mul_tl(t2, t2, t6);
1407      tcg_gen_mul_tl(t3, t3, t7);
1408  
1409      if (mac) {
1410          gen_load_mxu_gpr(t4, XRd);
1411          gen_load_mxu_gpr(t5, XRa);
1412          tcg_gen_extract_tl(t6, t4,  0, 16);
1413          tcg_gen_extract_tl(t7, t4, 16, 16);
1414          if (aptn2 & 1) {
1415              tcg_gen_sub_tl(t0, t6, t0);
1416              tcg_gen_sub_tl(t1, t7, t1);
1417          } else {
1418              tcg_gen_add_tl(t0, t6, t0);
1419              tcg_gen_add_tl(t1, t7, t1);
1420          }
1421          tcg_gen_extract_tl(t6, t5,  0, 16);
1422          tcg_gen_extract_tl(t7, t5, 16, 16);
1423          if (aptn2 & 2) {
1424              tcg_gen_sub_tl(t2, t6, t2);
1425              tcg_gen_sub_tl(t3, t7, t3);
1426          } else {
1427              tcg_gen_add_tl(t2, t6, t2);
1428              tcg_gen_add_tl(t3, t7, t3);
1429          }
1430      }
1431  
1432      tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
1433      tcg_gen_deposit_tl(t1, t2, t3, 16, 16);
1434  
1435      gen_store_mxu_gpr(t0, XRd);
1436      gen_store_mxu_gpr(t1, XRa);
1437  }
1438  
1439  /*
1440   * Q8MADL  XRd, XRa, XRb, XRc
1441   *   Parallel quad unsigned 8 bit multiply and accumulate.
1442   *   e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3]
1443   */
gen_mxu_q8madl(DisasContext * ctx)1444  static void gen_mxu_q8madl(DisasContext *ctx)
1445  {
1446      TCGv t0, t1, t2, t3, t4, t5, t6, t7;
1447      uint32_t XRa, XRb, XRc, XRd, aptn2;
1448  
1449      t0 = tcg_temp_new();
1450      t1 = tcg_temp_new();
1451      t2 = tcg_temp_new();
1452      t3 = tcg_temp_new();
1453      t4 = tcg_temp_new();
1454      t5 = tcg_temp_new();
1455      t6 = tcg_temp_new();
1456      t7 = tcg_temp_new();
1457  
1458      XRa = extract32(ctx->opcode, 6, 4);
1459      XRb = extract32(ctx->opcode, 10, 4);
1460      XRc = extract32(ctx->opcode, 14, 4);
1461      XRd = extract32(ctx->opcode, 18, 4);
1462      aptn2 = extract32(ctx->opcode, 24, 2);
1463  
1464      gen_load_mxu_gpr(t3, XRb);
1465      gen_load_mxu_gpr(t7, XRc);
1466  
1467      tcg_gen_extract_tl(t0, t3,  0, 8);
1468      tcg_gen_extract_tl(t1, t3,  8, 8);
1469      tcg_gen_extract_tl(t2, t3, 16, 8);
1470      tcg_gen_extract_tl(t3, t3, 24, 8);
1471  
1472      tcg_gen_extract_tl(t4, t7,  0, 8);
1473      tcg_gen_extract_tl(t5, t7,  8, 8);
1474      tcg_gen_extract_tl(t6, t7, 16, 8);
1475      tcg_gen_extract_tl(t7, t7, 24, 8);
1476  
1477      tcg_gen_mul_tl(t0, t0, t4);
1478      tcg_gen_mul_tl(t1, t1, t5);
1479      tcg_gen_mul_tl(t2, t2, t6);
1480      tcg_gen_mul_tl(t3, t3, t7);
1481  
1482      gen_load_mxu_gpr(t4, XRa);
1483      tcg_gen_extract_tl(t6, t4, 0, 8);
1484      tcg_gen_extract_tl(t7, t4, 8, 8);
1485      if (aptn2 & 1) {
1486          tcg_gen_sub_tl(t0, t6, t0);
1487          tcg_gen_sub_tl(t1, t7, t1);
1488      } else {
1489          tcg_gen_add_tl(t0, t6, t0);
1490          tcg_gen_add_tl(t1, t7, t1);
1491      }
1492      tcg_gen_extract_tl(t6, t4, 16, 8);
1493      tcg_gen_extract_tl(t7, t4, 24, 8);
1494      if (aptn2 & 2) {
1495          tcg_gen_sub_tl(t2, t6, t2);
1496          tcg_gen_sub_tl(t3, t7, t3);
1497      } else {
1498          tcg_gen_add_tl(t2, t6, t2);
1499          tcg_gen_add_tl(t3, t7, t3);
1500      }
1501  
1502      tcg_gen_andi_tl(t5, t0, 0xff);
1503      tcg_gen_deposit_tl(t5, t5, t1,  8, 8);
1504      tcg_gen_deposit_tl(t5, t5, t2, 16, 8);
1505      tcg_gen_deposit_tl(t5, t5, t3, 24, 8);
1506  
1507      gen_store_mxu_gpr(t5, XRd);
1508  }
1509  
1510  /*
1511   * S32LDD  XRa, Rb, S12 - Load a word from memory to XRF
1512   * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF
1513   *   in reversed byte seq.
1514   * S32LDI  XRa, Rb, S12 - Load a word from memory to XRF,
1515   *   post modify base address GPR.
1516   * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF,
1517   *   post modify base address GPR and load in reversed byte seq.
1518   */
gen_mxu_s32ldxx(DisasContext * ctx,bool reversed,bool postinc)1519  static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc)
1520  {
1521      TCGv t0, t1;
1522      uint32_t XRa, Rb, s12;
1523  
1524      t0 = tcg_temp_new();
1525      t1 = tcg_temp_new();
1526  
1527      XRa = extract32(ctx->opcode, 6, 4);
1528      s12 = sextract32(ctx->opcode, 10, 10);
1529      Rb = extract32(ctx->opcode, 21, 5);
1530  
1531      gen_load_gpr(t0, Rb);
1532      tcg_gen_movi_tl(t1, s12 * 4);
1533      tcg_gen_add_tl(t0, t0, t1);
1534  
1535      tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
1536                         MO_SL | mo_endian_rev(ctx, reversed) |
1537                          ctx->default_tcg_memop_mask);
1538      gen_store_mxu_gpr(t1, XRa);
1539  
1540      if (postinc) {
1541          gen_store_gpr(t0, Rb);
1542      }
1543  }
1544  
1545  /*
1546   * S32STD  XRa, Rb, S12 - Store a word from XRF to memory
1547   * S32STDR XRa, Rb, S12 - Store a word from XRF to memory
1548   *   in reversed byte seq.
1549   * S32SDI  XRa, Rb, S12 - Store a word from XRF to memory,
1550   *   post modify base address GPR.
1551   * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory,
1552   *   post modify base address GPR and store in reversed byte seq.
1553   */
gen_mxu_s32stxx(DisasContext * ctx,bool reversed,bool postinc)1554  static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc)
1555  {
1556      TCGv t0, t1;
1557      uint32_t XRa, Rb, s12;
1558  
1559      t0 = tcg_temp_new();
1560      t1 = tcg_temp_new();
1561  
1562      XRa = extract32(ctx->opcode, 6, 4);
1563      s12 = sextract32(ctx->opcode, 10, 10);
1564      Rb = extract32(ctx->opcode, 21, 5);
1565  
1566      gen_load_gpr(t0, Rb);
1567      tcg_gen_movi_tl(t1, s12 * 4);
1568      tcg_gen_add_tl(t0, t0, t1);
1569  
1570      gen_load_mxu_gpr(t1, XRa);
1571      tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
1572                         MO_SL | mo_endian_rev(ctx, reversed) |
1573                          ctx->default_tcg_memop_mask);
1574  
1575      if (postinc) {
1576          gen_store_gpr(t0, Rb);
1577      }
1578  }
1579  
1580  /*
1581   * S32LDDV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1582   * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1583   *   in reversed byte seq.
1584   * S32LDIV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1585   *   post modify base address GPR.
1586   * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1587   *   post modify base address GPR and load in reversed byte seq.
1588   */
gen_mxu_s32ldxvx(DisasContext * ctx,bool reversed,bool postinc,uint32_t strd2)1589  static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed,
1590                               bool postinc, uint32_t strd2)
1591  {
1592      TCGv t0, t1;
1593      uint32_t XRa, Rb, Rc;
1594  
1595      t0 = tcg_temp_new();
1596      t1 = tcg_temp_new();
1597  
1598      XRa = extract32(ctx->opcode, 6, 4);
1599      Rc = extract32(ctx->opcode, 16, 5);
1600      Rb = extract32(ctx->opcode, 21, 5);
1601  
1602      gen_load_gpr(t0, Rb);
1603      gen_load_gpr(t1, Rc);
1604      tcg_gen_shli_tl(t1, t1, strd2);
1605      tcg_gen_add_tl(t0, t0, t1);
1606  
1607      tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
1608                         MO_SL | mo_endian_rev(ctx, reversed) |
1609                          ctx->default_tcg_memop_mask);
1610      gen_store_mxu_gpr(t1, XRa);
1611  
1612      if (postinc) {
1613          gen_store_gpr(t0, Rb);
1614      }
1615  }
1616  
1617  /*
1618   * LXW  Ra, Rb, Rc, STRD2 - Load a word from memory to GPR
1619   * LXB  Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1620   *   sign extending to GPR size.
1621   * LXH  Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1622   *   sign extending to GPR size.
1623   * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1624   *   zero extending to GPR size.
1625   * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1626   *   zero extending to GPR size.
1627   */
gen_mxu_lxx(DisasContext * ctx,uint32_t strd2,MemOp mop)1628  static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop)
1629  {
1630      TCGv t0, t1;
1631      uint32_t Ra, Rb, Rc;
1632  
1633      t0 = tcg_temp_new();
1634      t1 = tcg_temp_new();
1635  
1636      Ra = extract32(ctx->opcode, 11, 5);
1637      Rc = extract32(ctx->opcode, 16, 5);
1638      Rb = extract32(ctx->opcode, 21, 5);
1639  
1640      gen_load_gpr(t0, Rb);
1641      gen_load_gpr(t1, Rc);
1642      tcg_gen_shli_tl(t1, t1, strd2);
1643      tcg_gen_add_tl(t0, t0, t1);
1644  
1645      tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask);
1646      gen_store_gpr(t1, Ra);
1647  }
1648  
1649  /*
1650   * S32STDV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1651   * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1652   *   in reversed byte seq.
1653   * S32SDIV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1654   *   post modify base address GPR.
1655   * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1656   *   post modify base address GPR and store in reversed byte seq.
1657   */
gen_mxu_s32stxvx(DisasContext * ctx,bool reversed,bool postinc,uint32_t strd2)1658  static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed,
1659                               bool postinc, uint32_t strd2)
1660  {
1661      TCGv t0, t1;
1662      uint32_t XRa, Rb, Rc;
1663  
1664      t0 = tcg_temp_new();
1665      t1 = tcg_temp_new();
1666  
1667      XRa = extract32(ctx->opcode, 6, 4);
1668      Rc = extract32(ctx->opcode, 16, 5);
1669      Rb = extract32(ctx->opcode, 21, 5);
1670  
1671      gen_load_gpr(t0, Rb);
1672      gen_load_gpr(t1, Rc);
1673      tcg_gen_shli_tl(t1, t1, strd2);
1674      tcg_gen_add_tl(t0, t0, t1);
1675  
1676      gen_load_mxu_gpr(t1, XRa);
1677      tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
1678                         MO_SL | mo_endian_rev(ctx, reversed) |
1679                          ctx->default_tcg_memop_mask);
1680  
1681      if (postinc) {
1682          gen_store_gpr(t0, Rb);
1683      }
1684  }
1685  
1686  /*
1687   *                 MXU instruction category: logic
1688   *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1689   *
1690   *               S32NOR    S32AND    S32OR    S32XOR
1691   */
1692  
1693  /*
1694   *  S32NOR XRa, XRb, XRc
1695   *    Update XRa with the result of logical bitwise 'nor' operation
1696   *    applied to the content of XRb and XRc.
1697   */
gen_mxu_S32NOR(DisasContext * ctx)1698  static void gen_mxu_S32NOR(DisasContext *ctx)
1699  {
1700      uint32_t pad, XRc, XRb, XRa;
1701  
1702      pad = extract32(ctx->opcode, 21, 5);
1703      XRc = extract32(ctx->opcode, 14, 4);
1704      XRb = extract32(ctx->opcode, 10, 4);
1705      XRa = extract32(ctx->opcode,  6, 4);
1706  
1707      if (unlikely(pad != 0)) {
1708          /* opcode padding incorrect -> do nothing */
1709      } else if (unlikely(XRa == 0)) {
1710          /* destination is zero register -> do nothing */
1711      } else if (unlikely((XRb == 0) && (XRc == 0))) {
1712          /* both operands zero registers -> just set destination to all 1s */
1713          tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF);
1714      } else if (unlikely(XRb == 0)) {
1715          /* XRb zero register -> just set destination to the negation of XRc */
1716          tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1717      } else if (unlikely(XRc == 0)) {
1718          /* XRa zero register -> just set destination to the negation of XRb */
1719          tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1720      } else if (unlikely(XRb == XRc)) {
1721          /* both operands same -> just set destination to the negation of XRb */
1722          tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1723      } else {
1724          /* the most general case */
1725          tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1726      }
1727  }
1728  
1729  /*
1730   *  S32AND XRa, XRb, XRc
1731   *    Update XRa with the result of logical bitwise 'and' operation
1732   *    applied to the content of XRb and XRc.
1733   */
gen_mxu_S32AND(DisasContext * ctx)1734  static void gen_mxu_S32AND(DisasContext *ctx)
1735  {
1736      uint32_t pad, XRc, XRb, XRa;
1737  
1738      pad = extract32(ctx->opcode, 21, 5);
1739      XRc = extract32(ctx->opcode, 14, 4);
1740      XRb = extract32(ctx->opcode, 10, 4);
1741      XRa = extract32(ctx->opcode,  6, 4);
1742  
1743      if (unlikely(pad != 0)) {
1744          /* opcode padding incorrect -> do nothing */
1745      } else if (unlikely(XRa == 0)) {
1746          /* destination is zero register -> do nothing */
1747      } else if (unlikely((XRb == 0) || (XRc == 0))) {
1748          /* one of operands zero register -> just set destination to all 0s */
1749          tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1750      } else if (unlikely(XRb == XRc)) {
1751          /* both operands same -> just set destination to one of them */
1752          tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1753      } else {
1754          /* the most general case */
1755          tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1756      }
1757  }
1758  
1759  /*
1760   *  S32OR XRa, XRb, XRc
1761   *    Update XRa with the result of logical bitwise 'or' operation
1762   *    applied to the content of XRb and XRc.
1763   */
gen_mxu_S32OR(DisasContext * ctx)1764  static void gen_mxu_S32OR(DisasContext *ctx)
1765  {
1766      uint32_t pad, XRc, XRb, XRa;
1767  
1768      pad = extract32(ctx->opcode, 21, 5);
1769      XRc = extract32(ctx->opcode, 14, 4);
1770      XRb = extract32(ctx->opcode, 10, 4);
1771      XRa = extract32(ctx->opcode,  6, 4);
1772  
1773      if (unlikely(pad != 0)) {
1774          /* opcode padding incorrect -> do nothing */
1775      } else if (unlikely(XRa == 0)) {
1776          /* destination is zero register -> do nothing */
1777      } else if (unlikely((XRb == 0) && (XRc == 0))) {
1778          /* both operands zero registers -> just set destination to all 0s */
1779          tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1780      } else if (unlikely(XRb == 0)) {
1781          /* XRb zero register -> just set destination to the content of XRc */
1782          tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1783      } else if (unlikely(XRc == 0)) {
1784          /* XRc zero register -> just set destination to the content of XRb */
1785          tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1786      } else if (unlikely(XRb == XRc)) {
1787          /* both operands same -> just set destination to one of them */
1788          tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1789      } else {
1790          /* the most general case */
1791          tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1792      }
1793  }
1794  
1795  /*
1796   *  S32XOR XRa, XRb, XRc
1797   *    Update XRa with the result of logical bitwise 'xor' operation
1798   *    applied to the content of XRb and XRc.
1799   */
gen_mxu_S32XOR(DisasContext * ctx)1800  static void gen_mxu_S32XOR(DisasContext *ctx)
1801  {
1802      uint32_t pad, XRc, XRb, XRa;
1803  
1804      pad = extract32(ctx->opcode, 21, 5);
1805      XRc = extract32(ctx->opcode, 14, 4);
1806      XRb = extract32(ctx->opcode, 10, 4);
1807      XRa = extract32(ctx->opcode,  6, 4);
1808  
1809      if (unlikely(pad != 0)) {
1810          /* opcode padding incorrect -> do nothing */
1811      } else if (unlikely(XRa == 0)) {
1812          /* destination is zero register -> do nothing */
1813      } else if (unlikely((XRb == 0) && (XRc == 0))) {
1814          /* both operands zero registers -> just set destination to all 0s */
1815          tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1816      } else if (unlikely(XRb == 0)) {
1817          /* XRb zero register -> just set destination to the content of XRc */
1818          tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1819      } else if (unlikely(XRc == 0)) {
1820          /* XRc zero register -> just set destination to the content of XRb */
1821          tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1822      } else if (unlikely(XRb == XRc)) {
1823          /* both operands same -> just set destination to all 0s */
1824          tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1825      } else {
1826          /* the most general case */
1827          tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1828      }
1829  }
1830  
1831  /*
1832   *                 MXU instruction category: shift
1833   *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1834   *
1835   *               D32SLL    D32SLR    D32SAR    D32SARL
1836   *               D32SLLV   D32SLRV   D32SARV   D32SARW
1837   *               Q16SLL    Q16SLR    Q16SAR
1838   *               Q16SLLV   Q16SLRV   Q16SARV
1839   */
1840  
1841  /*
1842   *  D32SLL XRa, XRd, XRb, XRc, SFT4
1843   *    Dual 32-bit shift left from XRb and XRc to SFT4
1844   *    bits (0..15). Store to XRa and XRd respectively.
1845   *  D32SLR XRa, XRd, XRb, XRc, SFT4
1846   *    Dual 32-bit shift logic right from XRb and XRc
1847   *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1848   *  D32SAR XRa, XRd, XRb, XRc, SFT4
1849   *    Dual 32-bit shift arithmetic right from XRb and XRc
1850   *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1851   */
gen_mxu_d32sxx(DisasContext * ctx,bool right,bool arithmetic)1852  static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic)
1853  {
1854      uint32_t XRa, XRb, XRc, XRd, sft4;
1855  
1856      XRa  = extract32(ctx->opcode,  6, 4);
1857      XRb  = extract32(ctx->opcode, 10, 4);
1858      XRc  = extract32(ctx->opcode, 14, 4);
1859      XRd  = extract32(ctx->opcode, 18, 4);
1860      sft4 = extract32(ctx->opcode, 22, 4);
1861  
1862      TCGv t0 = tcg_temp_new();
1863      TCGv t1 = tcg_temp_new();
1864  
1865      gen_load_mxu_gpr(t0, XRb);
1866      gen_load_mxu_gpr(t1, XRc);
1867  
1868      if (right) {
1869          if (arithmetic) {
1870              tcg_gen_sari_tl(t0, t0, sft4);
1871              tcg_gen_sari_tl(t1, t1, sft4);
1872          } else {
1873              tcg_gen_shri_tl(t0, t0, sft4);
1874              tcg_gen_shri_tl(t1, t1, sft4);
1875          }
1876      } else {
1877          tcg_gen_shli_tl(t0, t0, sft4);
1878          tcg_gen_shli_tl(t1, t1, sft4);
1879      }
1880      gen_store_mxu_gpr(t0, XRa);
1881      gen_store_mxu_gpr(t1, XRd);
1882  }
1883  
1884  /*
1885   *  D32SLLV XRa, XRd, rs
1886   *    Dual 32-bit shift left from XRa and XRd to rs[3:0]
1887   *    bits. Store back to XRa and XRd respectively.
1888   *  D32SLRV XRa, XRd, rs
1889   *    Dual 32-bit shift logic right from XRa and XRd to rs[3:0]
1890   *    bits. Store back to XRa and XRd respectively.
1891   *  D32SARV XRa, XRd, rs
1892   *    Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0]
1893   *    bits. Store back to XRa and XRd respectively.
1894   */
gen_mxu_d32sxxv(DisasContext * ctx,bool right,bool arithmetic)1895  static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic)
1896  {
1897      uint32_t XRa, XRd, rs;
1898  
1899      XRa = extract32(ctx->opcode, 10, 4);
1900      XRd = extract32(ctx->opcode, 14, 4);
1901      rs  = extract32(ctx->opcode, 21, 5);
1902  
1903      TCGv t0 = tcg_temp_new();
1904      TCGv t1 = tcg_temp_new();
1905      TCGv t2 = tcg_temp_new();
1906  
1907      gen_load_mxu_gpr(t0, XRa);
1908      gen_load_mxu_gpr(t1, XRd);
1909      gen_load_gpr(t2, rs);
1910      tcg_gen_andi_tl(t2, t2, 0x0f);
1911  
1912      if (right) {
1913          if (arithmetic) {
1914              tcg_gen_sar_tl(t0, t0, t2);
1915              tcg_gen_sar_tl(t1, t1, t2);
1916          } else {
1917              tcg_gen_shr_tl(t0, t0, t2);
1918              tcg_gen_shr_tl(t1, t1, t2);
1919          }
1920      } else {
1921          tcg_gen_shl_tl(t0, t0, t2);
1922          tcg_gen_shl_tl(t1, t1, t2);
1923      }
1924      gen_store_mxu_gpr(t0, XRa);
1925      gen_store_mxu_gpr(t1, XRd);
1926  }
1927  
1928  /*
1929   *  D32SARL XRa, XRb, XRc, SFT4
1930   *    Dual shift arithmetic right 32-bit integers in XRb and XRc
1931   *    to SFT4 bits (0..15). Pack 16 LSBs of each into XRa.
1932   *
1933   *  D32SARW XRa, XRb, XRc, rb
1934   *    Dual shift arithmetic right 32-bit integers in XRb and XRc
1935   *    to rb[3:0] bits. Pack 16 LSBs of each into XRa.
1936   */
gen_mxu_d32sarl(DisasContext * ctx,bool sarw)1937  static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw)
1938  {
1939      uint32_t XRa, XRb, XRc, rb;
1940  
1941      XRa = extract32(ctx->opcode,  6, 4);
1942      XRb = extract32(ctx->opcode, 10, 4);
1943      XRc = extract32(ctx->opcode, 14, 4);
1944      rb  = extract32(ctx->opcode, 21, 5);
1945  
1946      if (unlikely(XRa == 0)) {
1947          /* destination is zero register -> do nothing */
1948      } else {
1949          TCGv t0 = tcg_temp_new();
1950          TCGv t1 = tcg_temp_new();
1951          TCGv t2 = tcg_temp_new();
1952  
1953          if (!sarw) {
1954              /* Make SFT4 from rb field */
1955              tcg_gen_movi_tl(t2, rb >> 1);
1956          } else {
1957              gen_load_gpr(t2, rb);
1958              tcg_gen_andi_tl(t2, t2, 0x0f);
1959          }
1960          gen_load_mxu_gpr(t0, XRb);
1961          gen_load_mxu_gpr(t1, XRc);
1962          tcg_gen_sar_tl(t0, t0, t2);
1963          tcg_gen_sar_tl(t1, t1, t2);
1964          tcg_gen_extract_tl(t2, t1, 0, 16);
1965          tcg_gen_deposit_tl(t2, t2, t0, 16, 16);
1966          gen_store_mxu_gpr(t2, XRa);
1967      }
1968  }
1969  
1970  /*
1971   *  Q16SLL XRa, XRd, XRb, XRc, SFT4
1972   *    Quad 16-bit shift left from XRb and XRc to SFT4
1973   *    bits (0..15). Store to XRa and XRd respectively.
1974   *  Q16SLR XRa, XRd, XRb, XRc, SFT4
1975   *    Quad 16-bit shift logic right from XRb and XRc
1976   *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1977   *  Q16SAR XRa, XRd, XRb, XRc, SFT4
1978   *    Quad 16-bit shift arithmetic right from XRb and XRc
1979   *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
1980   */
gen_mxu_q16sxx(DisasContext * ctx,bool right,bool arithmetic)1981  static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic)
1982  {
1983      uint32_t XRa, XRb, XRc, XRd, sft4;
1984  
1985      XRa  = extract32(ctx->opcode,  6, 4);
1986      XRb  = extract32(ctx->opcode, 10, 4);
1987      XRc  = extract32(ctx->opcode, 14, 4);
1988      XRd  = extract32(ctx->opcode, 18, 4);
1989      sft4 = extract32(ctx->opcode, 22, 4);
1990  
1991      TCGv t0 = tcg_temp_new();
1992      TCGv t1 = tcg_temp_new();
1993      TCGv t2 = tcg_temp_new();
1994      TCGv t3 = tcg_temp_new();
1995  
1996      gen_load_mxu_gpr(t0, XRb);
1997      gen_load_mxu_gpr(t2, XRc);
1998  
1999      if (arithmetic) {
2000          tcg_gen_sextract_tl(t1, t0, 16, 16);
2001          tcg_gen_sextract_tl(t0, t0,  0, 16);
2002          tcg_gen_sextract_tl(t3, t2, 16, 16);
2003          tcg_gen_sextract_tl(t2, t2,  0, 16);
2004      } else {
2005          tcg_gen_extract_tl(t1, t0, 16, 16);
2006          tcg_gen_extract_tl(t0, t0,  0, 16);
2007          tcg_gen_extract_tl(t3, t2, 16, 16);
2008          tcg_gen_extract_tl(t2, t2,  0, 16);
2009      }
2010  
2011      if (right) {
2012          if (arithmetic) {
2013              tcg_gen_sari_tl(t0, t0, sft4);
2014              tcg_gen_sari_tl(t1, t1, sft4);
2015              tcg_gen_sari_tl(t2, t2, sft4);
2016              tcg_gen_sari_tl(t3, t3, sft4);
2017          } else {
2018              tcg_gen_shri_tl(t0, t0, sft4);
2019              tcg_gen_shri_tl(t1, t1, sft4);
2020              tcg_gen_shri_tl(t2, t2, sft4);
2021              tcg_gen_shri_tl(t3, t3, sft4);
2022          }
2023      } else {
2024          tcg_gen_shli_tl(t0, t0, sft4);
2025          tcg_gen_shli_tl(t1, t1, sft4);
2026          tcg_gen_shli_tl(t2, t2, sft4);
2027          tcg_gen_shli_tl(t3, t3, sft4);
2028      }
2029      tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
2030      tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2031  
2032      gen_store_mxu_gpr(t0, XRa);
2033      gen_store_mxu_gpr(t2, XRd);
2034  }
2035  
2036  /*
2037   *  Q16SLLV XRa, XRd, rs
2038   *    Quad 16-bit shift left from XRa and XRd to rs[3:0]
2039   *    bits. Store to XRa and XRd respectively.
2040   *  Q16SLRV XRa, XRd, rs
2041   *    Quad 16-bit shift logic right from XRa and XRd to rs[3:0]
2042   *    bits. Store to XRa and XRd respectively.
2043   *  Q16SARV XRa, XRd, rs
2044   *    Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0]
2045   *    bits. Store to XRa and XRd respectively.
2046   */
gen_mxu_q16sxxv(DisasContext * ctx,bool right,bool arithmetic)2047  static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic)
2048  {
2049      uint32_t XRa, XRd, rs;
2050  
2051      XRa = extract32(ctx->opcode, 10, 4);
2052      XRd = extract32(ctx->opcode, 14, 4);
2053      rs  = extract32(ctx->opcode, 21, 5);
2054  
2055      TCGv t0 = tcg_temp_new();
2056      TCGv t1 = tcg_temp_new();
2057      TCGv t2 = tcg_temp_new();
2058      TCGv t3 = tcg_temp_new();
2059      TCGv t5 = tcg_temp_new();
2060  
2061      gen_load_mxu_gpr(t0, XRa);
2062      gen_load_mxu_gpr(t2, XRd);
2063      gen_load_gpr(t5, rs);
2064      tcg_gen_andi_tl(t5, t5, 0x0f);
2065  
2066  
2067      if (arithmetic) {
2068          tcg_gen_sextract_tl(t1, t0, 16, 16);
2069          tcg_gen_sextract_tl(t0, t0,  0, 16);
2070          tcg_gen_sextract_tl(t3, t2, 16, 16);
2071          tcg_gen_sextract_tl(t2, t2,  0, 16);
2072      } else {
2073          tcg_gen_extract_tl(t1, t0, 16, 16);
2074          tcg_gen_extract_tl(t0, t0,  0, 16);
2075          tcg_gen_extract_tl(t3, t2, 16, 16);
2076          tcg_gen_extract_tl(t2, t2,  0, 16);
2077      }
2078  
2079      if (right) {
2080          if (arithmetic) {
2081              tcg_gen_sar_tl(t0, t0, t5);
2082              tcg_gen_sar_tl(t1, t1, t5);
2083              tcg_gen_sar_tl(t2, t2, t5);
2084              tcg_gen_sar_tl(t3, t3, t5);
2085          } else {
2086              tcg_gen_shr_tl(t0, t0, t5);
2087              tcg_gen_shr_tl(t1, t1, t5);
2088              tcg_gen_shr_tl(t2, t2, t5);
2089              tcg_gen_shr_tl(t3, t3, t5);
2090          }
2091      } else {
2092          tcg_gen_shl_tl(t0, t0, t5);
2093          tcg_gen_shl_tl(t1, t1, t5);
2094          tcg_gen_shl_tl(t2, t2, t5);
2095          tcg_gen_shl_tl(t3, t3, t5);
2096      }
2097      tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
2098      tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2099  
2100      gen_store_mxu_gpr(t0, XRa);
2101      gen_store_mxu_gpr(t2, XRd);
2102  }
2103  
2104  /*
2105   *                   MXU instruction category max/min/avg
2106   *                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2107   *
2108   *                     S32MAX     D16MAX     Q8MAX
2109   *                     S32MIN     D16MIN     Q8MIN
2110   *                     S32SLT     D16SLT     Q8SLT
2111   *                                           Q8SLTU
2112   *                                D16AVG     Q8AVG
2113   *                                D16AVGR    Q8AVGR
2114   *                     S32MOVZ    D16MOVZ    Q8MOVZ
2115   *                     S32MOVN    D16MOVN    Q8MOVN
2116   */
2117  
2118  /*
2119   *  S32MAX XRa, XRb, XRc
2120   *    Update XRa with the maximum of signed 32-bit integers contained
2121   *    in XRb and XRc.
2122   *
2123   *  S32MIN XRa, XRb, XRc
2124   *    Update XRa with the minimum of signed 32-bit integers contained
2125   *    in XRb and XRc.
2126   */
gen_mxu_S32MAX_S32MIN(DisasContext * ctx)2127  static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx)
2128  {
2129      uint32_t pad, opc, XRc, XRb, XRa;
2130  
2131      pad = extract32(ctx->opcode, 21, 5);
2132      opc = extract32(ctx->opcode, 18, 3);
2133      XRc = extract32(ctx->opcode, 14, 4);
2134      XRb = extract32(ctx->opcode, 10, 4);
2135      XRa = extract32(ctx->opcode,  6, 4);
2136  
2137      if (unlikely(pad != 0)) {
2138          /* opcode padding incorrect -> do nothing */
2139      } else if (unlikely(XRa == 0)) {
2140          /* destination is zero register -> do nothing */
2141      } else if (unlikely((XRb == 0) && (XRc == 0))) {
2142          /* both operands zero registers -> just set destination to zero */
2143          tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2144      } else if (unlikely((XRb == 0) || (XRc == 0))) {
2145          /* exactly one operand is zero register - find which one is not...*/
2146          uint32_t XRx = XRb ? XRb : XRc;
2147          /* ...and do max/min operation with one operand 0 */
2148          if (opc == OPC_MXU_S32MAX) {
2149              tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
2150          } else {
2151              tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
2152          }
2153      } else if (unlikely(XRb == XRc)) {
2154          /* both operands same -> just set destination to one of them */
2155          tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2156      } else {
2157          /* the most general case */
2158          if (opc == OPC_MXU_S32MAX) {
2159              tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
2160                                                 mxu_gpr[XRc - 1]);
2161          } else {
2162              tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
2163                                                 mxu_gpr[XRc - 1]);
2164          }
2165      }
2166  }
2167  
2168  /*
2169   *  D16MAX
2170   *    Update XRa with the 16-bit-wise maximums of signed integers
2171   *    contained in XRb and XRc.
2172   *
2173   *  D16MIN
2174   *    Update XRa with the 16-bit-wise minimums of signed integers
2175   *    contained in XRb and XRc.
2176   */
gen_mxu_D16MAX_D16MIN(DisasContext * ctx)2177  static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
2178  {
2179      uint32_t pad, opc, XRc, XRb, XRa;
2180  
2181      pad = extract32(ctx->opcode, 21, 5);
2182      opc = extract32(ctx->opcode, 18, 3);
2183      XRc = extract32(ctx->opcode, 14, 4);
2184      XRb = extract32(ctx->opcode, 10, 4);
2185      XRa = extract32(ctx->opcode,  6, 4);
2186  
2187      if (unlikely(pad != 0)) {
2188          /* opcode padding incorrect -> do nothing */
2189      } else if (unlikely(XRa == 0)) {
2190          /* destination is zero register -> do nothing */
2191      } else if (unlikely((XRb == 0) && (XRc == 0))) {
2192          /* both operands zero registers -> just set destination to zero */
2193          tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2194      } else if (unlikely((XRb == 0) || (XRc == 0))) {
2195          /* exactly one operand is zero register - find which one is not...*/
2196          uint32_t XRx = XRb ? XRb : XRc;
2197          /* ...and do half-word-wise max/min with one operand 0 */
2198          TCGv_i32 t0 = tcg_temp_new();
2199          TCGv_i32 t1 = tcg_constant_i32(0);
2200          TCGv_i32 t2 = tcg_temp_new();
2201  
2202          /* the left half-word first */
2203          tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
2204          if (opc == OPC_MXU_D16MAX) {
2205              tcg_gen_smax_i32(t2, t0, t1);
2206          } else {
2207              tcg_gen_smin_i32(t2, t0, t1);
2208          }
2209  
2210          /* the right half-word */
2211          tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF);
2212          /* move half-words to the leftmost position */
2213          tcg_gen_shli_i32(t0, t0, 16);
2214          /* t0 will be max/min of t0 and t1 */
2215          if (opc == OPC_MXU_D16MAX) {
2216              tcg_gen_smax_i32(t0, t0, t1);
2217          } else {
2218              tcg_gen_smin_i32(t0, t0, t1);
2219          }
2220          /* return resulting half-words to its original position */
2221          tcg_gen_shri_i32(t0, t0, 16);
2222          /* finally update the destination */
2223          tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
2224      } else if (unlikely(XRb == XRc)) {
2225          /* both operands same -> just set destination to one of them */
2226          tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2227      } else {
2228          /* the most general case */
2229          TCGv_i32 t0 = tcg_temp_new();
2230          TCGv_i32 t1 = tcg_temp_new();
2231          TCGv_i32 t2 = tcg_temp_new();
2232  
2233          /* the left half-word first */
2234          tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
2235          tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
2236          if (opc == OPC_MXU_D16MAX) {
2237              tcg_gen_smax_i32(t2, t0, t1);
2238          } else {
2239              tcg_gen_smin_i32(t2, t0, t1);
2240          }
2241  
2242          /* the right half-word */
2243          tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
2244          tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF);
2245          /* move half-words to the leftmost position */
2246          tcg_gen_shli_i32(t0, t0, 16);
2247          tcg_gen_shli_i32(t1, t1, 16);
2248          /* t0 will be max/min of t0 and t1 */
2249          if (opc == OPC_MXU_D16MAX) {
2250              tcg_gen_smax_i32(t0, t0, t1);
2251          } else {
2252              tcg_gen_smin_i32(t0, t0, t1);
2253          }
2254          /* return resulting half-words to its original position */
2255          tcg_gen_shri_i32(t0, t0, 16);
2256          /* finally update the destination */
2257          tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
2258      }
2259  }
2260  
2261  /*
2262   *  Q8MAX
2263   *    Update XRa with the 8-bit-wise maximums of signed integers
2264   *    contained in XRb and XRc.
2265   *
2266   *  Q8MIN
2267   *    Update XRa with the 8-bit-wise minimums of signed integers
2268   *    contained in XRb and XRc.
2269   */
gen_mxu_Q8MAX_Q8MIN(DisasContext * ctx)2270  static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
2271  {
2272      uint32_t pad, opc, XRc, XRb, XRa;
2273  
2274      pad = extract32(ctx->opcode, 21, 5);
2275      opc = extract32(ctx->opcode, 18, 3);
2276      XRc = extract32(ctx->opcode, 14, 4);
2277      XRb = extract32(ctx->opcode, 10, 4);
2278      XRa = extract32(ctx->opcode,  6, 4);
2279  
2280      if (unlikely(pad != 0)) {
2281          /* opcode padding incorrect -> do nothing */
2282      } else if (unlikely(XRa == 0)) {
2283          /* destination is zero register -> do nothing */
2284      } else if (unlikely((XRb == 0) && (XRc == 0))) {
2285          /* both operands zero registers -> just set destination to zero */
2286          tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2287      } else if (unlikely((XRb == 0) || (XRc == 0))) {
2288          /* exactly one operand is zero register - make it be the first...*/
2289          uint32_t XRx = XRb ? XRb : XRc;
2290          /* ...and do byte-wise max/min with one operand 0 */
2291          TCGv_i32 t0 = tcg_temp_new();
2292          TCGv_i32 t1 = tcg_constant_i32(0);
2293          TCGv_i32 t2 = tcg_temp_new();
2294          int32_t i;
2295  
2296          /* the leftmost byte (byte 3) first */
2297          tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
2298          if (opc == OPC_MXU_Q8MAX) {
2299              tcg_gen_smax_i32(t2, t0, t1);
2300          } else {
2301              tcg_gen_smin_i32(t2, t0, t1);
2302          }
2303  
2304          /* bytes 2, 1, 0 */
2305          for (i = 2; i >= 0; i--) {
2306              /* extract the byte */
2307              tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i));
2308              /* move the byte to the leftmost position */
2309              tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
2310              /* t0 will be max/min of t0 and t1 */
2311              if (opc == OPC_MXU_Q8MAX) {
2312                  tcg_gen_smax_i32(t0, t0, t1);
2313              } else {
2314                  tcg_gen_smin_i32(t0, t0, t1);
2315              }
2316              /* return resulting byte to its original position */
2317              tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
2318              /* finally update the destination */
2319              tcg_gen_or_i32(t2, t2, t0);
2320          }
2321          gen_store_mxu_gpr(t2, XRa);
2322      } else if (unlikely(XRb == XRc)) {
2323          /* both operands same -> just set destination to one of them */
2324          tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2325      } else {
2326          /* the most general case */
2327          TCGv_i32 t0 = tcg_temp_new();
2328          TCGv_i32 t1 = tcg_temp_new();
2329          TCGv_i32 t2 = tcg_temp_new();
2330          int32_t i;
2331  
2332          /* the leftmost bytes (bytes 3) first */
2333          tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
2334          tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
2335          if (opc == OPC_MXU_Q8MAX) {
2336              tcg_gen_smax_i32(t2, t0, t1);
2337          } else {
2338              tcg_gen_smin_i32(t2, t0, t1);
2339          }
2340  
2341          /* bytes 2, 1, 0 */
2342          for (i = 2; i >= 0; i--) {
2343              /* extract corresponding bytes */
2344              tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i));
2345              tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i));
2346              /* move the bytes to the leftmost position */
2347              tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
2348              tcg_gen_shli_i32(t1, t1, 8 * (3 - i));
2349              /* t0 will be max/min of t0 and t1 */
2350              if (opc == OPC_MXU_Q8MAX) {
2351                  tcg_gen_smax_i32(t0, t0, t1);
2352              } else {
2353                  tcg_gen_smin_i32(t0, t0, t1);
2354              }
2355              /* return resulting byte to its original position */
2356              tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
2357              /* finally update the destination */
2358              tcg_gen_or_i32(t2, t2, t0);
2359          }
2360          gen_store_mxu_gpr(t2, XRa);
2361      }
2362  }
2363  
2364  /*
2365   *  Q8SLT
2366   *    Update XRa with the signed "set less than" comparison of XRb and XRc
2367   *    on per-byte basis.
2368   *    a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2369   *
2370   *  Q8SLTU
2371   *    Update XRa with the unsigned "set less than" comparison of XRb and XRc
2372   *    on per-byte basis.
2373   *    a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2374   */
gen_mxu_q8slt(DisasContext * ctx,bool sltu)2375  static void gen_mxu_q8slt(DisasContext *ctx, bool sltu)
2376  {
2377      uint32_t pad, XRc, XRb, XRa;
2378  
2379      pad = extract32(ctx->opcode, 21, 5);
2380      XRc = extract32(ctx->opcode, 14, 4);
2381      XRb = extract32(ctx->opcode, 10, 4);
2382      XRa = extract32(ctx->opcode,  6, 4);
2383  
2384      if (unlikely(pad != 0)) {
2385          /* opcode padding incorrect -> do nothing */
2386      } else if (unlikely(XRa == 0)) {
2387          /* destination is zero register -> do nothing */
2388      } else if (unlikely((XRb == 0) && (XRc == 0))) {
2389          /* both operands zero registers -> just set destination to zero */
2390          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2391      } else if (unlikely(XRb == XRc)) {
2392          /* both operands same registers -> just set destination to zero */
2393          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2394      } else {
2395          /* the most general case */
2396          TCGv t0 = tcg_temp_new();
2397          TCGv t1 = tcg_temp_new();
2398          TCGv t2 = tcg_temp_new();
2399          TCGv t3 = tcg_temp_new();
2400          TCGv t4 = tcg_temp_new();
2401  
2402          gen_load_mxu_gpr(t3, XRb);
2403          gen_load_mxu_gpr(t4, XRc);
2404          tcg_gen_movi_tl(t2, 0);
2405  
2406          for (int i = 0; i < 4; i++) {
2407              if (sltu) {
2408                  tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2409                  tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2410              } else {
2411                  tcg_gen_sextract_tl(t0, t3, 8 * i, 8);
2412                  tcg_gen_sextract_tl(t1, t4, 8 * i, 8);
2413              }
2414              tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2415              tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2416          }
2417          gen_store_mxu_gpr(t2, XRa);
2418      }
2419  }
2420  
2421  /*
2422   *  S32SLT
2423   *    Update XRa with the signed "set less than" comparison of XRb and XRc.
2424   *    a.k.a. XRa = XRb < XRc ? 1 : 0;
2425   */
gen_mxu_S32SLT(DisasContext * ctx)2426  static void gen_mxu_S32SLT(DisasContext *ctx)
2427  {
2428      uint32_t pad, XRc, XRb, XRa;
2429  
2430      pad = extract32(ctx->opcode, 21, 5);
2431      XRc = extract32(ctx->opcode, 14, 4);
2432      XRb = extract32(ctx->opcode, 10, 4);
2433      XRa = extract32(ctx->opcode,  6, 4);
2434  
2435      if (unlikely(pad != 0)) {
2436          /* opcode padding incorrect -> do nothing */
2437      } else if (unlikely(XRa == 0)) {
2438          /* destination is zero register -> do nothing */
2439      } else if (unlikely((XRb == 0) && (XRc == 0))) {
2440          /* both operands zero registers -> just set destination to zero */
2441          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2442      } else if (unlikely(XRb == XRc)) {
2443          /* both operands same registers -> just set destination to zero */
2444          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2445      } else {
2446          /* the most general case */
2447          TCGv t0 = tcg_temp_new();
2448          TCGv t1 = tcg_temp_new();
2449  
2450          gen_load_mxu_gpr(t0, XRb);
2451          gen_load_mxu_gpr(t1, XRc);
2452          tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1], t0, t1);
2453      }
2454  }
2455  
2456  /*
2457   *  D16SLT
2458   *    Update XRa with the signed "set less than" comparison of XRb and XRc
2459   *    on per-word basis.
2460   *    a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0;
2461   */
gen_mxu_D16SLT(DisasContext * ctx)2462  static void gen_mxu_D16SLT(DisasContext *ctx)
2463  {
2464      uint32_t pad, XRc, XRb, XRa;
2465  
2466      pad = extract32(ctx->opcode, 21, 5);
2467      XRc = extract32(ctx->opcode, 14, 4);
2468      XRb = extract32(ctx->opcode, 10, 4);
2469      XRa = extract32(ctx->opcode,  6, 4);
2470  
2471      if (unlikely(pad != 0)) {
2472          /* opcode padding incorrect -> do nothing */
2473      } else if (unlikely(XRa == 0)) {
2474          /* destination is zero register -> do nothing */
2475      } else if (unlikely((XRb == 0) && (XRc == 0))) {
2476          /* both operands zero registers -> just set destination to zero */
2477          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2478      } else if (unlikely(XRb == XRc)) {
2479          /* both operands same registers -> just set destination to zero */
2480          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2481      } else {
2482          /* the most general case */
2483          TCGv t0 = tcg_temp_new();
2484          TCGv t1 = tcg_temp_new();
2485          TCGv t2 = tcg_temp_new();
2486          TCGv t3 = tcg_temp_new();
2487          TCGv t4 = tcg_temp_new();
2488  
2489          gen_load_mxu_gpr(t3, XRb);
2490          gen_load_mxu_gpr(t4, XRc);
2491          tcg_gen_sextract_tl(t0, t3, 16, 16);
2492          tcg_gen_sextract_tl(t1, t4, 16, 16);
2493          tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2494          tcg_gen_shli_tl(t2, t0, 16);
2495          tcg_gen_sextract_tl(t0, t3,  0, 16);
2496          tcg_gen_sextract_tl(t1, t4,  0, 16);
2497          tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2498          tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0);
2499      }
2500  }
2501  
2502  /*
2503   *  D16AVG
2504   *    Update XRa with the signed average of XRb and XRc
2505   *    on per-word basis, rounding down.
2506   *    a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1;
2507   *
2508   *  D16AVGR
2509   *    Update XRa with the signed average of XRb and XRc
2510   *    on per-word basis, math rounding 4/5.
2511   *    a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1;
2512   */
gen_mxu_d16avg(DisasContext * ctx,bool round45)2513  static void gen_mxu_d16avg(DisasContext *ctx, bool round45)
2514  {
2515      uint32_t pad, XRc, XRb, XRa;
2516  
2517      pad = extract32(ctx->opcode, 21, 5);
2518      XRc = extract32(ctx->opcode, 14, 4);
2519      XRb = extract32(ctx->opcode, 10, 4);
2520      XRa = extract32(ctx->opcode,  6, 4);
2521  
2522      if (unlikely(pad != 0)) {
2523          /* opcode padding incorrect -> do nothing */
2524      } else if (unlikely(XRa == 0)) {
2525          /* destination is zero register -> do nothing */
2526      } else if (unlikely((XRb == 0) && (XRc == 0))) {
2527          /* both operands zero registers -> just set destination to zero */
2528          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2529      } else if (unlikely(XRb == XRc)) {
2530          /* both operands same registers -> just set destination to same */
2531          tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2532      } else {
2533          /* the most general case */
2534          TCGv t0 = tcg_temp_new();
2535          TCGv t1 = tcg_temp_new();
2536          TCGv t2 = tcg_temp_new();
2537          TCGv t3 = tcg_temp_new();
2538          TCGv t4 = tcg_temp_new();
2539  
2540          gen_load_mxu_gpr(t3, XRb);
2541          gen_load_mxu_gpr(t4, XRc);
2542          tcg_gen_sextract_tl(t0, t3, 16, 16);
2543          tcg_gen_sextract_tl(t1, t4, 16, 16);
2544          tcg_gen_add_tl(t0, t0, t1);
2545          if (round45) {
2546              tcg_gen_addi_tl(t0, t0, 1);
2547          }
2548          tcg_gen_shli_tl(t2, t0, 15);
2549          tcg_gen_andi_tl(t2, t2, 0xffff0000);
2550          tcg_gen_sextract_tl(t0, t3,  0, 16);
2551          tcg_gen_sextract_tl(t1, t4,  0, 16);
2552          tcg_gen_add_tl(t0, t0, t1);
2553          if (round45) {
2554              tcg_gen_addi_tl(t0, t0, 1);
2555          }
2556          tcg_gen_shri_tl(t0, t0, 1);
2557          tcg_gen_deposit_tl(t2, t2, t0, 0, 16);
2558          gen_store_mxu_gpr(t2, XRa);
2559      }
2560  }
2561  
2562  /*
2563   *  Q8AVG
2564   *    Update XRa with the signed average of XRb and XRc
2565   *    on per-byte basis, rounding down.
2566   *    a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1;
2567   *
2568   *  Q8AVGR
2569   *    Update XRa with the signed average of XRb and XRc
2570   *    on per-word basis, math rounding 4/5.
2571   *    a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1;
2572   */
gen_mxu_q8avg(DisasContext * ctx,bool round45)2573  static void gen_mxu_q8avg(DisasContext *ctx, bool round45)
2574  {
2575      uint32_t pad, XRc, XRb, XRa;
2576  
2577      pad = extract32(ctx->opcode, 21, 5);
2578      XRc = extract32(ctx->opcode, 14, 4);
2579      XRb = extract32(ctx->opcode, 10, 4);
2580      XRa = extract32(ctx->opcode,  6, 4);
2581  
2582      if (unlikely(pad != 0)) {
2583          /* opcode padding incorrect -> do nothing */
2584      } else if (unlikely(XRa == 0)) {
2585          /* destination is zero register -> do nothing */
2586      } else if (unlikely((XRb == 0) && (XRc == 0))) {
2587          /* both operands zero registers -> just set destination to zero */
2588          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2589      } else if (unlikely(XRb == XRc)) {
2590          /* both operands same registers -> just set destination to same */
2591          tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2592      } else {
2593          /* the most general case */
2594          TCGv t0 = tcg_temp_new();
2595          TCGv t1 = tcg_temp_new();
2596          TCGv t2 = tcg_temp_new();
2597          TCGv t3 = tcg_temp_new();
2598          TCGv t4 = tcg_temp_new();
2599  
2600          gen_load_mxu_gpr(t3, XRb);
2601          gen_load_mxu_gpr(t4, XRc);
2602          tcg_gen_movi_tl(t2, 0);
2603  
2604          for (int i = 0; i < 4; i++) {
2605              tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2606              tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2607              tcg_gen_add_tl(t0, t0, t1);
2608              if (round45) {
2609                  tcg_gen_addi_tl(t0, t0, 1);
2610              }
2611              tcg_gen_shri_tl(t0, t0, 1);
2612              tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2613          }
2614          gen_store_mxu_gpr(t2, XRa);
2615      }
2616  }
2617  
2618  /*
2619   *  Q8MOVZ
2620   *    Quadruple 8-bit packed conditional move where
2621   *    XRb contains conditions, XRc what to move and
2622   *    XRa is the destination.
2623   *    a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] }
2624   *
2625   *  Q8MOVN
2626   *    Quadruple 8-bit packed conditional move where
2627   *    XRb contains conditions, XRc what to move and
2628   *    XRa is the destination.
2629   *    a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] }
2630   */
gen_mxu_q8movzn(DisasContext * ctx,TCGCond cond)2631  static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond)
2632  {
2633      uint32_t XRc, XRb, XRa;
2634  
2635      XRa = extract32(ctx->opcode,  6, 4);
2636      XRb = extract32(ctx->opcode, 10, 4);
2637      XRc = extract32(ctx->opcode, 14, 4);
2638  
2639      TCGv t0 = tcg_temp_new();
2640      TCGv t1 = tcg_temp_new();
2641      TCGv t2 = tcg_temp_new();
2642      TCGv t3 = tcg_temp_new();
2643      TCGLabel *l_quarterdone = gen_new_label();
2644      TCGLabel *l_halfdone = gen_new_label();
2645      TCGLabel *l_quarterrest = gen_new_label();
2646      TCGLabel *l_done = gen_new_label();
2647  
2648      gen_load_mxu_gpr(t0, XRc);
2649      gen_load_mxu_gpr(t1, XRb);
2650      gen_load_mxu_gpr(t2, XRa);
2651  
2652      tcg_gen_extract_tl(t3, t1, 24, 8);
2653      tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone);
2654      tcg_gen_extract_tl(t3, t0, 24, 8);
2655      tcg_gen_deposit_tl(t2, t2, t3, 24, 8);
2656  
2657      gen_set_label(l_quarterdone);
2658      tcg_gen_extract_tl(t3, t1, 16, 8);
2659      tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
2660      tcg_gen_extract_tl(t3, t0, 16, 8);
2661      tcg_gen_deposit_tl(t2, t2, t3, 16, 8);
2662  
2663      gen_set_label(l_halfdone);
2664      tcg_gen_extract_tl(t3, t1, 8, 8);
2665      tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest);
2666      tcg_gen_extract_tl(t3, t0, 8, 8);
2667      tcg_gen_deposit_tl(t2, t2, t3, 8, 8);
2668  
2669      gen_set_label(l_quarterrest);
2670      tcg_gen_extract_tl(t3, t1, 0, 8);
2671      tcg_gen_brcondi_tl(cond, t3, 0, l_done);
2672      tcg_gen_extract_tl(t3, t0, 0, 8);
2673      tcg_gen_deposit_tl(t2, t2, t3, 0, 8);
2674  
2675      gen_set_label(l_done);
2676      gen_store_mxu_gpr(t2, XRa);
2677  }
2678  
2679  /*
2680   *  D16MOVZ
2681   *    Double 16-bit packed conditional move where
2682   *    XRb contains conditions, XRc what to move and
2683   *    XRa is the destination.
2684   *    a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] }
2685   *
2686   *  D16MOVN
2687   *    Double 16-bit packed conditional move where
2688   *    XRb contains conditions, XRc what to move and
2689   *    XRa is the destination.
2690   *    a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] }
2691   */
gen_mxu_d16movzn(DisasContext * ctx,TCGCond cond)2692  static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond)
2693  {
2694      uint32_t XRc, XRb, XRa;
2695  
2696      XRa = extract32(ctx->opcode,  6, 4);
2697      XRb = extract32(ctx->opcode, 10, 4);
2698      XRc = extract32(ctx->opcode, 14, 4);
2699  
2700      TCGv t0 = tcg_temp_new();
2701      TCGv t1 = tcg_temp_new();
2702      TCGv t2 = tcg_temp_new();
2703      TCGv t3 = tcg_temp_new();
2704      TCGLabel *l_halfdone = gen_new_label();
2705      TCGLabel *l_done = gen_new_label();
2706  
2707      gen_load_mxu_gpr(t0, XRc);
2708      gen_load_mxu_gpr(t1, XRb);
2709      gen_load_mxu_gpr(t2, XRa);
2710  
2711      tcg_gen_extract_tl(t3, t1, 16, 16);
2712      tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
2713      tcg_gen_extract_tl(t3, t0, 16, 16);
2714      tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2715  
2716      gen_set_label(l_halfdone);
2717      tcg_gen_extract_tl(t3, t1, 0, 16);
2718      tcg_gen_brcondi_tl(cond, t3, 0, l_done);
2719      tcg_gen_extract_tl(t3, t0, 0, 16);
2720      tcg_gen_deposit_tl(t2, t2, t3, 0, 16);
2721  
2722      gen_set_label(l_done);
2723      gen_store_mxu_gpr(t2, XRa);
2724  }
2725  
2726  /*
2727   *  S32MOVZ
2728   *    Quadruple 32-bit conditional move where
2729   *    XRb contains conditions, XRc what to move and
2730   *    XRa is the destination.
2731   *    a.k.a. if (XRb == 0) { XRa = XRc }
2732   *
2733   *  S32MOVN
2734   *    Single 32-bit conditional move where
2735   *    XRb contains conditions, XRc what to move and
2736   *    XRa is the destination.
2737   *    a.k.a. if (XRb != 0) { XRa = XRc }
2738   */
gen_mxu_s32movzn(DisasContext * ctx,TCGCond cond)2739  static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond)
2740  {
2741      uint32_t XRc, XRb, XRa;
2742  
2743      XRa = extract32(ctx->opcode,  6, 4);
2744      XRb = extract32(ctx->opcode, 10, 4);
2745      XRc = extract32(ctx->opcode, 14, 4);
2746  
2747      TCGv t0 = tcg_temp_new();
2748      TCGv t1 = tcg_temp_new();
2749      TCGLabel *l_done = gen_new_label();
2750  
2751      gen_load_mxu_gpr(t0, XRc);
2752      gen_load_mxu_gpr(t1, XRb);
2753  
2754      tcg_gen_brcondi_tl(cond, t1, 0, l_done);
2755      gen_store_mxu_gpr(t0, XRa);
2756      gen_set_label(l_done);
2757  }
2758  
2759  /*
2760   *      MXU instruction category: Addition and subtraction
2761   *      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2762   *
2763   *              S32CPS      D16CPS
2764   *                                       Q8ADD
2765   */
2766  
2767  /*
2768   *  S32CPS
2769   *    Update XRa if XRc < 0 by value of 0 - XRb
2770   *    else XRa = XRb
2771   */
gen_mxu_S32CPS(DisasContext * ctx)2772  static void gen_mxu_S32CPS(DisasContext *ctx)
2773  {
2774      uint32_t pad, XRc, XRb, XRa;
2775  
2776      pad = extract32(ctx->opcode, 21, 5);
2777      XRc = extract32(ctx->opcode, 14, 4);
2778      XRb = extract32(ctx->opcode, 10, 4);
2779      XRa = extract32(ctx->opcode,  6, 4);
2780  
2781      if (unlikely(pad != 0)) {
2782          /* opcode padding incorrect -> do nothing */
2783      } else if (unlikely(XRa == 0)) {
2784          /* destination is zero register -> do nothing */
2785      } else if (unlikely(XRb == 0)) {
2786          /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2787          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2788      } else if (unlikely(XRc == 0)) {
2789          /* condition always false -> just move XRb to XRa */
2790          tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2791      } else {
2792          /* the most general case */
2793          TCGv t0 = tcg_temp_new();
2794          TCGLabel *l_not_less = gen_new_label();
2795          TCGLabel *l_done = gen_new_label();
2796  
2797          tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less);
2798          tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]);
2799          tcg_gen_br(l_done);
2800          gen_set_label(l_not_less);
2801          gen_load_mxu_gpr(t0, XRb);
2802          gen_set_label(l_done);
2803          gen_store_mxu_gpr(t0, XRa);
2804      }
2805  }
2806  
2807  /*
2808   *  D16CPS
2809   *    Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1]
2810   *    else XRa[0..1] = XRb[0..1]
2811   */
gen_mxu_D16CPS(DisasContext * ctx)2812  static void gen_mxu_D16CPS(DisasContext *ctx)
2813  {
2814      uint32_t pad, XRc, XRb, XRa;
2815  
2816      pad = extract32(ctx->opcode, 21, 5);
2817      XRc = extract32(ctx->opcode, 14, 4);
2818      XRb = extract32(ctx->opcode, 10, 4);
2819      XRa = extract32(ctx->opcode,  6, 4);
2820  
2821      if (unlikely(pad != 0)) {
2822          /* opcode padding incorrect -> do nothing */
2823      } else if (unlikely(XRa == 0)) {
2824          /* destination is zero register -> do nothing */
2825      } else if (unlikely(XRb == 0)) {
2826          /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2827          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2828      } else if (unlikely(XRc == 0)) {
2829          /* condition always false -> just move XRb to XRa */
2830          tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2831      } else {
2832          /* the most general case */
2833          TCGv t0 = tcg_temp_new();
2834          TCGv t1 = tcg_temp_new();
2835          TCGLabel *l_done_hi = gen_new_label();
2836          TCGLabel *l_not_less_lo = gen_new_label();
2837          TCGLabel *l_done_lo = gen_new_label();
2838  
2839          tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16);
2840          tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16);
2841          tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi);
2842          tcg_gen_subfi_tl(t1, 0, t1);
2843  
2844          gen_set_label(l_done_hi);
2845          tcg_gen_shli_i32(t1, t1, 16);
2846  
2847          tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1],  0, 16);
2848          tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo);
2849          tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1],  0, 16);
2850          tcg_gen_subfi_tl(t0, 0, t0);
2851          tcg_gen_br(l_done_lo);
2852  
2853          gen_set_label(l_not_less_lo);
2854          tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1],  0, 16);
2855  
2856          gen_set_label(l_done_lo);
2857          tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16);
2858      }
2859  }
2860  
2861  /*
2862   *  Q8ABD XRa, XRb, XRc
2863   *  Gets absolute difference for quadruple of 8-bit
2864   *  packed in XRb to another one in XRc,
2865   *  put the result in XRa.
2866   *  a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]);
2867   */
gen_mxu_Q8ABD(DisasContext * ctx)2868  static void gen_mxu_Q8ABD(DisasContext *ctx)
2869  {
2870      uint32_t pad, XRc, XRb, XRa;
2871  
2872      pad = extract32(ctx->opcode, 21, 3);
2873      XRc = extract32(ctx->opcode, 14, 4);
2874      XRb = extract32(ctx->opcode, 10, 4);
2875      XRa = extract32(ctx->opcode,  6, 4);
2876  
2877      if (unlikely(pad != 0)) {
2878          /* opcode padding incorrect -> do nothing */
2879      } else if (unlikely(XRa == 0)) {
2880          /* destination is zero register -> do nothing */
2881      } else if (unlikely((XRb == 0) && (XRc == 0))) {
2882          /* both operands zero registers -> just set destination to zero */
2883          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2884      } else {
2885          /* the most general case */
2886          TCGv t0 = tcg_temp_new();
2887          TCGv t1 = tcg_temp_new();
2888          TCGv t2 = tcg_temp_new();
2889          TCGv t3 = tcg_temp_new();
2890          TCGv t4 = tcg_temp_new();
2891  
2892          gen_load_mxu_gpr(t3, XRb);
2893          gen_load_mxu_gpr(t4, XRc);
2894          tcg_gen_movi_tl(t2, 0);
2895  
2896          for (int i = 0; i < 4; i++) {
2897              tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2898              tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2899  
2900              tcg_gen_sub_tl(t0, t0, t1);
2901              tcg_gen_abs_tl(t0, t0);
2902  
2903              tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2904          }
2905          gen_store_mxu_gpr(t2, XRa);
2906      }
2907  }
2908  
2909  /*
2910   *  Q8ADD XRa, XRb, XRc, ptn2
2911   *  Add/subtract quadruple of 8-bit packed in XRb
2912   *  to another one in XRc, put the result in XRa.
2913   */
gen_mxu_Q8ADD(DisasContext * ctx)2914  static void gen_mxu_Q8ADD(DisasContext *ctx)
2915  {
2916      uint32_t aptn2, pad, XRc, XRb, XRa;
2917  
2918      aptn2 = extract32(ctx->opcode, 24, 2);
2919      pad   = extract32(ctx->opcode, 21, 3);
2920      XRc   = extract32(ctx->opcode, 14, 4);
2921      XRb   = extract32(ctx->opcode, 10, 4);
2922      XRa   = extract32(ctx->opcode,  6, 4);
2923  
2924      if (unlikely(pad != 0)) {
2925          /* opcode padding incorrect -> do nothing */
2926      } else if (unlikely(XRa == 0)) {
2927          /* destination is zero register -> do nothing */
2928      } else if (unlikely((XRb == 0) && (XRc == 0))) {
2929          /* both operands zero registers -> just set destination to zero */
2930          tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2931      } else {
2932          /* the most general case */
2933          TCGv t0 = tcg_temp_new();
2934          TCGv t1 = tcg_temp_new();
2935          TCGv t2 = tcg_temp_new();
2936          TCGv t3 = tcg_temp_new();
2937          TCGv t4 = tcg_temp_new();
2938  
2939          gen_load_mxu_gpr(t3, XRb);
2940          gen_load_mxu_gpr(t4, XRc);
2941  
2942          for (int i = 0; i < 4; i++) {
2943              tcg_gen_andi_tl(t0, t3, 0xff);
2944              tcg_gen_andi_tl(t1, t4, 0xff);
2945  
2946              if (i < 2) {
2947                  if (aptn2 & 0x01) {
2948                      tcg_gen_sub_tl(t0, t0, t1);
2949                  } else {
2950                      tcg_gen_add_tl(t0, t0, t1);
2951                  }
2952              } else {
2953                  if (aptn2 & 0x02) {
2954                      tcg_gen_sub_tl(t0, t0, t1);
2955                  } else {
2956                      tcg_gen_add_tl(t0, t0, t1);
2957                  }
2958              }
2959              if (i < 3) {
2960                  tcg_gen_shri_tl(t3, t3, 8);
2961                  tcg_gen_shri_tl(t4, t4, 8);
2962              }
2963              if (i > 0) {
2964                  tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2965              } else {
2966                  tcg_gen_andi_tl(t0, t0, 0xff);
2967                  tcg_gen_mov_tl(t2, t0);
2968              }
2969          }
2970          gen_store_mxu_gpr(t2, XRa);
2971      }
2972  }
2973  
2974  /*
2975   *  Q8ADDE XRa, XRb, XRc, XRd, aptn2
2976   *    Add/subtract quadruple of 8-bit packed in XRb
2977   *    to another one in XRc, with zero extending
2978   *    to 16-bit and put results as packed 16-bit data
2979   *    into XRa and XRd.
2980   *    aptn2 manages action add or subtract of pairs of data.
2981   *
2982   *  Q8ACCE XRa, XRb, XRc, XRd, aptn2
2983   *    Add/subtract quadruple of 8-bit packed in XRb
2984   *    to another one in XRc, with zero extending
2985   *    to 16-bit and accumulate results as packed 16-bit data
2986   *    into XRa and XRd.
2987   *    aptn2 manages action add or subtract of pairs of data.
2988   */
gen_mxu_q8adde(DisasContext * ctx,bool accumulate)2989  static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate)
2990  {
2991      uint32_t aptn2, XRd, XRc, XRb, XRa;
2992  
2993      aptn2 = extract32(ctx->opcode, 24, 2);
2994      XRd   = extract32(ctx->opcode, 18, 4);
2995      XRc   = extract32(ctx->opcode, 14, 4);
2996      XRb   = extract32(ctx->opcode, 10, 4);
2997      XRa   = extract32(ctx->opcode,  6, 4);
2998  
2999      if (unlikely((XRb == 0) && (XRc == 0))) {
3000          /* both operands zero registers -> just set destination to zero */
3001          if (XRa != 0) {
3002              tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
3003          }
3004          if (XRd != 0) {
3005              tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0);
3006          }
3007      } else {
3008          /* the most general case */
3009          TCGv t0 = tcg_temp_new();
3010          TCGv t1 = tcg_temp_new();
3011          TCGv t2 = tcg_temp_new();
3012          TCGv t3 = tcg_temp_new();
3013          TCGv t4 = tcg_temp_new();
3014          TCGv t5 = tcg_temp_new();
3015  
3016          if (XRa != 0) {
3017              gen_extract_mxu_gpr(t0, XRb, 16, 8);
3018              gen_extract_mxu_gpr(t1, XRc, 16, 8);
3019              gen_extract_mxu_gpr(t2, XRb, 24, 8);
3020              gen_extract_mxu_gpr(t3, XRc, 24, 8);
3021              if (aptn2 & 2) {
3022                  tcg_gen_sub_tl(t0, t0, t1);
3023                  tcg_gen_sub_tl(t2, t2, t3);
3024              } else {
3025                  tcg_gen_add_tl(t0, t0, t1);
3026                  tcg_gen_add_tl(t2, t2, t3);
3027              }
3028              if (accumulate) {
3029                  gen_load_mxu_gpr(t5, XRa);
3030                  tcg_gen_extract_tl(t1, t5,  0, 16);
3031                  tcg_gen_extract_tl(t3, t5, 16, 16);
3032                  tcg_gen_add_tl(t0, t0, t1);
3033                  tcg_gen_add_tl(t2, t2, t3);
3034              }
3035              tcg_gen_shli_tl(t2, t2, 16);
3036              tcg_gen_extract_tl(t0, t0, 0, 16);
3037              tcg_gen_or_tl(t4, t2, t0);
3038          }
3039          if (XRd != 0) {
3040              gen_extract_mxu_gpr(t0, XRb, 0, 8);
3041              gen_extract_mxu_gpr(t1, XRc, 0, 8);
3042              gen_extract_mxu_gpr(t2, XRb, 8, 8);
3043              gen_extract_mxu_gpr(t3, XRc, 8, 8);
3044              if (aptn2 & 1) {
3045                  tcg_gen_sub_tl(t0, t0, t1);
3046                  tcg_gen_sub_tl(t2, t2, t3);
3047              } else {
3048                  tcg_gen_add_tl(t0, t0, t1);
3049                  tcg_gen_add_tl(t2, t2, t3);
3050              }
3051              if (accumulate) {
3052                  gen_load_mxu_gpr(t5, XRd);
3053                  tcg_gen_extract_tl(t1, t5,  0, 16);
3054                  tcg_gen_extract_tl(t3, t5, 16, 16);
3055                  tcg_gen_add_tl(t0, t0, t1);
3056                  tcg_gen_add_tl(t2, t2, t3);
3057              }
3058              tcg_gen_shli_tl(t2, t2, 16);
3059              tcg_gen_extract_tl(t0, t0, 0, 16);
3060              tcg_gen_or_tl(t5, t2, t0);
3061          }
3062  
3063          gen_store_mxu_gpr(t4, XRa);
3064          gen_store_mxu_gpr(t5, XRd);
3065      }
3066  }
3067  
3068  /*
3069   *  D8SUM XRa, XRb, XRc
3070   *    Double parallel add of quadruple unsigned 8-bit together
3071   *    with zero extending to 16-bit data.
3072   *  D8SUMC XRa, XRb, XRc
3073   *    Double parallel add of quadruple unsigned 8-bit together
3074   *    with zero extending to 16-bit data and adding 2 to each
3075   *    parallel result.
3076   */
gen_mxu_d8sum(DisasContext * ctx,bool sumc)3077  static void gen_mxu_d8sum(DisasContext *ctx, bool sumc)
3078  {
3079      uint32_t pad, pad2, XRc, XRb, XRa;
3080  
3081      pad  = extract32(ctx->opcode, 24, 2);
3082      pad2 = extract32(ctx->opcode, 18, 4);
3083      XRc  = extract32(ctx->opcode, 14, 4);
3084      XRb  = extract32(ctx->opcode, 10, 4);
3085      XRa  = extract32(ctx->opcode,  6, 4);
3086  
3087      if (unlikely(pad != 0 || pad2 != 0)) {
3088          /* opcode padding incorrect -> do nothing */
3089      } else if (unlikely(XRa == 0)) {
3090          /* destination is zero register -> do nothing */
3091      } else if (unlikely((XRb == 0) && (XRc == 0))) {
3092          /* both operands zero registers -> just set destination to zero */
3093          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
3094      } else {
3095          /* the most general case */
3096          TCGv t0 = tcg_temp_new();
3097          TCGv t1 = tcg_temp_new();
3098          TCGv t2 = tcg_temp_new();
3099          TCGv t3 = tcg_temp_new();
3100          TCGv t4 = tcg_temp_new();
3101          TCGv t5 = tcg_temp_new();
3102  
3103          if (XRb != 0) {
3104              tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1],  0, 8);
3105              tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1],  8, 8);
3106              tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8);
3107              tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8);
3108              tcg_gen_add_tl(t4, t0, t1);
3109              tcg_gen_add_tl(t4, t4, t2);
3110              tcg_gen_add_tl(t4, t4, t3);
3111          } else {
3112              tcg_gen_mov_tl(t4, 0);
3113          }
3114          if (XRc != 0) {
3115              tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1],  0, 8);
3116              tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1],  8, 8);
3117              tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8);
3118              tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
3119              tcg_gen_add_tl(t5, t0, t1);
3120              tcg_gen_add_tl(t5, t5, t2);
3121              tcg_gen_add_tl(t5, t5, t3);
3122          } else {
3123              tcg_gen_mov_tl(t5, 0);
3124          }
3125  
3126          if (sumc) {
3127              tcg_gen_addi_tl(t4, t4, 2);
3128              tcg_gen_addi_tl(t5, t5, 2);
3129          }
3130          tcg_gen_shli_tl(t4, t4, 16);
3131  
3132          tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
3133      }
3134  }
3135  
3136  /*
3137   * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed
3138   * 16-bit pattern addition.
3139   */
gen_mxu_q16add(DisasContext * ctx)3140  static void gen_mxu_q16add(DisasContext *ctx)
3141  {
3142      uint32_t aptn2, optn2, XRc, XRb, XRa, XRd;
3143  
3144      aptn2 = extract32(ctx->opcode, 24, 2);
3145      optn2 = extract32(ctx->opcode, 22, 2);
3146      XRd   = extract32(ctx->opcode, 18, 4);
3147      XRc   = extract32(ctx->opcode, 14, 4);
3148      XRb   = extract32(ctx->opcode, 10, 4);
3149      XRa   = extract32(ctx->opcode,  6, 4);
3150  
3151      TCGv t0 = tcg_temp_new();
3152      TCGv t1 = tcg_temp_new();
3153      TCGv t2 = tcg_temp_new();
3154      TCGv t3 = tcg_temp_new();
3155      TCGv t4 = tcg_temp_new();
3156      TCGv t5 = tcg_temp_new();
3157  
3158      gen_load_mxu_gpr(t1, XRb);
3159      tcg_gen_extract_tl(t0, t1,  0, 16);
3160      tcg_gen_extract_tl(t1, t1, 16, 16);
3161  
3162      gen_load_mxu_gpr(t3, XRc);
3163      tcg_gen_extract_tl(t2, t3,  0, 16);
3164      tcg_gen_extract_tl(t3, t3, 16, 16);
3165  
3166      switch (optn2) {
3167      case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */
3168          tcg_gen_mov_tl(t4, t1);
3169          tcg_gen_mov_tl(t5, t0);
3170          break;
3171      case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */
3172          tcg_gen_mov_tl(t4, t0);
3173          tcg_gen_mov_tl(t5, t0);
3174          break;
3175      case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */
3176          tcg_gen_mov_tl(t4, t1);
3177          tcg_gen_mov_tl(t5, t1);
3178          break;
3179      case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */
3180          tcg_gen_mov_tl(t4, t0);
3181          tcg_gen_mov_tl(t5, t1);
3182          break;
3183      }
3184  
3185      switch (aptn2) {
3186      case MXU_APTN2_AA: /* lop +, rop + */
3187          tcg_gen_add_tl(t0, t4, t3);
3188          tcg_gen_add_tl(t1, t5, t2);
3189          tcg_gen_add_tl(t4, t4, t3);
3190          tcg_gen_add_tl(t5, t5, t2);
3191          break;
3192      case MXU_APTN2_AS: /* lop +, rop + */
3193          tcg_gen_sub_tl(t0, t4, t3);
3194          tcg_gen_sub_tl(t1, t5, t2);
3195          tcg_gen_add_tl(t4, t4, t3);
3196          tcg_gen_add_tl(t5, t5, t2);
3197          break;
3198      case MXU_APTN2_SA: /* lop +, rop + */
3199          tcg_gen_add_tl(t0, t4, t3);
3200          tcg_gen_add_tl(t1, t5, t2);
3201          tcg_gen_sub_tl(t4, t4, t3);
3202          tcg_gen_sub_tl(t5, t5, t2);
3203          break;
3204      case MXU_APTN2_SS: /* lop +, rop + */
3205          tcg_gen_sub_tl(t0, t4, t3);
3206          tcg_gen_sub_tl(t1, t5, t2);
3207          tcg_gen_sub_tl(t4, t4, t3);
3208          tcg_gen_sub_tl(t5, t5, t2);
3209          break;
3210      }
3211  
3212      tcg_gen_shli_tl(t0, t0, 16);
3213      tcg_gen_extract_tl(t1, t1, 0, 16);
3214      tcg_gen_shli_tl(t4, t4, 16);
3215      tcg_gen_extract_tl(t5, t5, 0, 16);
3216  
3217      tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
3218      tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1);
3219  }
3220  
3221  /*
3222   * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed
3223   * 16-bit addition/subtraction with accumulate.
3224   */
gen_mxu_q16acc(DisasContext * ctx)3225  static void gen_mxu_q16acc(DisasContext *ctx)
3226  {
3227      uint32_t aptn2, XRc, XRb, XRa, XRd;
3228  
3229      aptn2 = extract32(ctx->opcode, 24, 2);
3230      XRd   = extract32(ctx->opcode, 18, 4);
3231      XRc   = extract32(ctx->opcode, 14, 4);
3232      XRb   = extract32(ctx->opcode, 10, 4);
3233      XRa   = extract32(ctx->opcode,  6, 4);
3234  
3235      TCGv t0 = tcg_temp_new();
3236      TCGv t1 = tcg_temp_new();
3237      TCGv t2 = tcg_temp_new();
3238      TCGv t3 = tcg_temp_new();
3239      TCGv s3 = tcg_temp_new();
3240      TCGv s2 = tcg_temp_new();
3241      TCGv s1 = tcg_temp_new();
3242      TCGv s0 = tcg_temp_new();
3243  
3244      gen_load_mxu_gpr(t1, XRb);
3245      tcg_gen_extract_tl(t0, t1,  0, 16);
3246      tcg_gen_extract_tl(t1, t1, 16, 16);
3247  
3248      gen_load_mxu_gpr(t3, XRc);
3249      tcg_gen_extract_tl(t2, t3,  0, 16);
3250      tcg_gen_extract_tl(t3, t3, 16, 16);
3251  
3252      switch (aptn2) {
3253      case MXU_APTN2_AA: /* lop +, rop + */
3254          tcg_gen_add_tl(s3, t1, t3);
3255          tcg_gen_add_tl(s2, t0, t2);
3256          tcg_gen_add_tl(s1, t1, t3);
3257          tcg_gen_add_tl(s0, t0, t2);
3258          break;
3259      case MXU_APTN2_AS: /* lop +, rop - */
3260          tcg_gen_sub_tl(s3, t1, t3);
3261          tcg_gen_sub_tl(s2, t0, t2);
3262          tcg_gen_add_tl(s1, t1, t3);
3263          tcg_gen_add_tl(s0, t0, t2);
3264          break;
3265      case MXU_APTN2_SA: /* lop -, rop + */
3266          tcg_gen_add_tl(s3, t1, t3);
3267          tcg_gen_add_tl(s2, t0, t2);
3268          tcg_gen_sub_tl(s1, t1, t3);
3269          tcg_gen_sub_tl(s0, t0, t2);
3270          break;
3271      case MXU_APTN2_SS: /* lop -, rop - */
3272          tcg_gen_sub_tl(s3, t1, t3);
3273          tcg_gen_sub_tl(s2, t0, t2);
3274          tcg_gen_sub_tl(s1, t1, t3);
3275          tcg_gen_sub_tl(s0, t0, t2);
3276          break;
3277      }
3278  
3279      if (XRa != 0) {
3280          tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0);
3281          tcg_gen_extract_tl(t0, t0, 0, 16);
3282          tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16);
3283          tcg_gen_add_tl(t1, t1, s1);
3284          tcg_gen_shli_tl(t1, t1, 16);
3285          tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0);
3286      }
3287  
3288      if (XRd != 0) {
3289          tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2);
3290          tcg_gen_extract_tl(t0, t0, 0, 16);
3291          tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16);
3292          tcg_gen_add_tl(t1, t1, s3);
3293          tcg_gen_shli_tl(t1, t1, 16);
3294          tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0);
3295      }
3296  }
3297  
3298  /*
3299   * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed
3300   * 16-bit accumulate.
3301   */
gen_mxu_q16accm(DisasContext * ctx)3302  static void gen_mxu_q16accm(DisasContext *ctx)
3303  {
3304      uint32_t aptn2, XRc, XRb, XRa, XRd;
3305  
3306      aptn2 = extract32(ctx->opcode, 24, 2);
3307      XRd   = extract32(ctx->opcode, 18, 4);
3308      XRc   = extract32(ctx->opcode, 14, 4);
3309      XRb   = extract32(ctx->opcode, 10, 4);
3310      XRa   = extract32(ctx->opcode,  6, 4);
3311  
3312      TCGv t0 = tcg_temp_new();
3313      TCGv t1 = tcg_temp_new();
3314      TCGv t2 = tcg_temp_new();
3315      TCGv t3 = tcg_temp_new();
3316  
3317      gen_load_mxu_gpr(t2, XRb);
3318      gen_load_mxu_gpr(t3, XRc);
3319  
3320      if (XRa != 0) {
3321          TCGv a0 = tcg_temp_new();
3322          TCGv a1 = tcg_temp_new();
3323  
3324          tcg_gen_extract_tl(t0, t2,  0, 16);
3325          tcg_gen_extract_tl(t1, t2, 16, 16);
3326  
3327          gen_load_mxu_gpr(a1, XRa);
3328          tcg_gen_extract_tl(a0, a1,  0, 16);
3329          tcg_gen_extract_tl(a1, a1, 16, 16);
3330  
3331          if (aptn2 & 2) {
3332              tcg_gen_sub_tl(a0, a0, t0);
3333              tcg_gen_sub_tl(a1, a1, t1);
3334          } else {
3335              tcg_gen_add_tl(a0, a0, t0);
3336              tcg_gen_add_tl(a1, a1, t1);
3337          }
3338          tcg_gen_extract_tl(a0, a0, 0, 16);
3339          tcg_gen_shli_tl(a1, a1, 16);
3340          tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0);
3341      }
3342  
3343      if (XRd != 0) {
3344          TCGv a0 = tcg_temp_new();
3345          TCGv a1 = tcg_temp_new();
3346  
3347          tcg_gen_extract_tl(t0, t3,  0, 16);
3348          tcg_gen_extract_tl(t1, t3, 16, 16);
3349  
3350          gen_load_mxu_gpr(a1, XRd);
3351          tcg_gen_extract_tl(a0, a1,  0, 16);
3352          tcg_gen_extract_tl(a1, a1, 16, 16);
3353  
3354          if (aptn2 & 1) {
3355              tcg_gen_sub_tl(a0, a0, t0);
3356              tcg_gen_sub_tl(a1, a1, t1);
3357          } else {
3358              tcg_gen_add_tl(a0, a0, t0);
3359              tcg_gen_add_tl(a1, a1, t1);
3360          }
3361          tcg_gen_extract_tl(a0, a0, 0, 16);
3362          tcg_gen_shli_tl(a1, a1, 16);
3363          tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0);
3364      }
3365  }
3366  
3367  
3368  /*
3369   * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed
3370   * 16-bit sign extended addition and accumulate.
3371   */
gen_mxu_d16asum(DisasContext * ctx)3372  static void gen_mxu_d16asum(DisasContext *ctx)
3373  {
3374      uint32_t aptn2, XRc, XRb, XRa, XRd;
3375  
3376      aptn2 = extract32(ctx->opcode, 24, 2);
3377      XRd   = extract32(ctx->opcode, 18, 4);
3378      XRc   = extract32(ctx->opcode, 14, 4);
3379      XRb   = extract32(ctx->opcode, 10, 4);
3380      XRa   = extract32(ctx->opcode,  6, 4);
3381  
3382      TCGv t0 = tcg_temp_new();
3383      TCGv t1 = tcg_temp_new();
3384      TCGv t2 = tcg_temp_new();
3385      TCGv t3 = tcg_temp_new();
3386  
3387      gen_load_mxu_gpr(t2, XRb);
3388      gen_load_mxu_gpr(t3, XRc);
3389  
3390      if (XRa != 0) {
3391          tcg_gen_sextract_tl(t0, t2,  0, 16);
3392          tcg_gen_sextract_tl(t1, t2, 16, 16);
3393          tcg_gen_add_tl(t0, t0, t1);
3394          if (aptn2 & 2) {
3395              tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3396          } else {
3397              tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3398          }
3399      }
3400  
3401      if (XRd != 0) {
3402          tcg_gen_sextract_tl(t0, t3,  0, 16);
3403          tcg_gen_sextract_tl(t1, t3, 16, 16);
3404          tcg_gen_add_tl(t0, t0, t1);
3405          if (aptn2 & 1) {
3406              tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
3407          } else {
3408              tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
3409          }
3410      }
3411  }
3412  
3413  /*
3414   * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double
3415   * 32 bit pattern addition/subtraction, set carry.
3416   *
3417   * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double
3418   * 32 bit pattern addition/subtraction with carry.
3419   */
gen_mxu_d32add(DisasContext * ctx)3420  static void gen_mxu_d32add(DisasContext *ctx)
3421  {
3422      uint32_t aptn2, addc, XRc, XRb, XRa, XRd;
3423  
3424      aptn2 = extract32(ctx->opcode, 24, 2);
3425      addc  = extract32(ctx->opcode, 22, 2);
3426      XRd   = extract32(ctx->opcode, 18, 4);
3427      XRc   = extract32(ctx->opcode, 14, 4);
3428      XRb   = extract32(ctx->opcode, 10, 4);
3429      XRa   = extract32(ctx->opcode,  6, 4);
3430  
3431      TCGv t0 = tcg_temp_new();
3432      TCGv t1 = tcg_temp_new();
3433      TCGv t2 = tcg_temp_new();
3434      TCGv cr = tcg_temp_new();
3435  
3436      if (unlikely(addc > 1)) {
3437          /* opcode incorrect -> do nothing */
3438      } else if (addc == 1) {
3439          if (unlikely(XRa == 0 && XRd == 0)) {
3440              /* destinations are zero register -> do nothing */
3441          } else {
3442              /* FIXME ??? What if XRa == XRd ??? */
3443              /* aptn2 is unused here */
3444              gen_load_mxu_gpr(t0, XRb);
3445              gen_load_mxu_gpr(t1, XRc);
3446              gen_load_mxu_cr(cr);
3447              if (XRa != 0) {
3448                  tcg_gen_extract_tl(t2, cr, 31, 1);
3449                  tcg_gen_add_tl(t0, t0, t2);
3450                  tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3451              }
3452              if (XRd != 0) {
3453                  tcg_gen_extract_tl(t2, cr, 30, 1);
3454                  tcg_gen_add_tl(t1, t1, t2);
3455                  tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3456              }
3457          }
3458      } else if (unlikely(XRa == 0 && XRd == 0)) {
3459          /* destinations are zero register -> do nothing */
3460      } else {
3461          /* common case */
3462          /* FIXME ??? What if XRa == XRd ??? */
3463          TCGv carry = tcg_temp_new();
3464  
3465          gen_load_mxu_gpr(t0, XRb);
3466          gen_load_mxu_gpr(t1, XRc);
3467          gen_load_mxu_cr(cr);
3468          if (XRa != 0) {
3469              if (aptn2 & 2) {
3470                  tcg_gen_sub_i32(t2, t0, t1);
3471                  tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
3472              } else {
3473                  tcg_gen_add_i32(t2, t0, t1);
3474                  tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
3475              }
3476              tcg_gen_andi_tl(cr, cr, 0x7fffffff);
3477              tcg_gen_shli_tl(carry, carry, 31);
3478              tcg_gen_or_tl(cr, cr, carry);
3479              gen_store_mxu_gpr(t2, XRa);
3480          }
3481          if (XRd != 0) {
3482              if (aptn2 & 1) {
3483                  tcg_gen_sub_i32(t2, t0, t1);
3484                  tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
3485              } else {
3486                  tcg_gen_add_i32(t2, t0, t1);
3487                  tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
3488              }
3489              tcg_gen_andi_tl(cr, cr, 0xbfffffff);
3490              tcg_gen_shli_tl(carry, carry, 30);
3491              tcg_gen_or_tl(cr, cr, carry);
3492              gen_store_mxu_gpr(t2, XRd);
3493          }
3494          gen_store_mxu_cr(cr);
3495      }
3496  }
3497  
3498  /*
3499   * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double
3500   * 32 bit pattern addition/subtraction and accumulate.
3501   */
gen_mxu_d32acc(DisasContext * ctx)3502  static void gen_mxu_d32acc(DisasContext *ctx)
3503  {
3504      uint32_t aptn2, XRc, XRb, XRa, XRd;
3505  
3506      aptn2 = extract32(ctx->opcode, 24, 2);
3507      XRd   = extract32(ctx->opcode, 18, 4);
3508      XRc   = extract32(ctx->opcode, 14, 4);
3509      XRb   = extract32(ctx->opcode, 10, 4);
3510      XRa   = extract32(ctx->opcode,  6, 4);
3511  
3512      TCGv t0 = tcg_temp_new();
3513      TCGv t1 = tcg_temp_new();
3514      TCGv t2 = tcg_temp_new();
3515  
3516      if (unlikely(XRa == 0 && XRd == 0)) {
3517          /* destinations are zero register -> do nothing */
3518      } else {
3519          /* common case */
3520          gen_load_mxu_gpr(t0, XRb);
3521          gen_load_mxu_gpr(t1, XRc);
3522          if (XRa != 0) {
3523              if (aptn2 & 2) {
3524                  tcg_gen_sub_tl(t2, t0, t1);
3525              } else {
3526                  tcg_gen_add_tl(t2, t0, t1);
3527              }
3528              tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3529          }
3530          if (XRd != 0) {
3531              if (aptn2 & 1) {
3532                  tcg_gen_sub_tl(t2, t0, t1);
3533              } else {
3534                  tcg_gen_add_tl(t2, t0, t1);
3535              }
3536              tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3537          }
3538      }
3539  }
3540  
3541  /*
3542   * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double
3543   * 32 bit pattern addition/subtraction and accumulate.
3544   */
gen_mxu_d32accm(DisasContext * ctx)3545  static void gen_mxu_d32accm(DisasContext *ctx)
3546  {
3547      uint32_t aptn2, XRc, XRb, XRa, XRd;
3548  
3549      aptn2 = extract32(ctx->opcode, 24, 2);
3550      XRd   = extract32(ctx->opcode, 18, 4);
3551      XRc   = extract32(ctx->opcode, 14, 4);
3552      XRb   = extract32(ctx->opcode, 10, 4);
3553      XRa   = extract32(ctx->opcode,  6, 4);
3554  
3555      TCGv t0 = tcg_temp_new();
3556      TCGv t1 = tcg_temp_new();
3557      TCGv t2 = tcg_temp_new();
3558  
3559      if (unlikely(XRa == 0 && XRd == 0)) {
3560          /* destinations are zero register -> do nothing */
3561      } else {
3562          /* common case */
3563          gen_load_mxu_gpr(t0, XRb);
3564          gen_load_mxu_gpr(t1, XRc);
3565          if (XRa != 0) {
3566              tcg_gen_add_tl(t2, t0, t1);
3567              if (aptn2 & 2) {
3568                  tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3569              } else {
3570                  tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3571              }
3572          }
3573          if (XRd != 0) {
3574              tcg_gen_sub_tl(t2, t0, t1);
3575              if (aptn2 & 1) {
3576                  tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3577              } else {
3578                  tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3579              }
3580          }
3581      }
3582  }
3583  
3584  /*
3585   * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double
3586   * 32 bit pattern addition/subtraction.
3587   */
gen_mxu_d32asum(DisasContext * ctx)3588  static void gen_mxu_d32asum(DisasContext *ctx)
3589  {
3590      uint32_t aptn2, XRc, XRb, XRa, XRd;
3591  
3592      aptn2 = extract32(ctx->opcode, 24, 2);
3593      XRd   = extract32(ctx->opcode, 18, 4);
3594      XRc   = extract32(ctx->opcode, 14, 4);
3595      XRb   = extract32(ctx->opcode, 10, 4);
3596      XRa   = extract32(ctx->opcode,  6, 4);
3597  
3598      TCGv t0 = tcg_temp_new();
3599      TCGv t1 = tcg_temp_new();
3600  
3601      if (unlikely(XRa == 0 && XRd == 0)) {
3602          /* destinations are zero register -> do nothing */
3603      } else {
3604          /* common case */
3605          gen_load_mxu_gpr(t0, XRb);
3606          gen_load_mxu_gpr(t1, XRc);
3607          if (XRa != 0) {
3608              if (aptn2 & 2) {
3609                  tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3610              } else {
3611                  tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3612              }
3613          }
3614          if (XRd != 0) {
3615              if (aptn2 & 1) {
3616                  tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3617              } else {
3618                  tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3619              }
3620          }
3621      }
3622  }
3623  
3624  /*
3625   *                 MXU instruction category: Miscellaneous
3626   *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3627   *
3628   *               S32EXTR      S32LUI
3629   *               S32EXTRV
3630   *                            Q16SAT
3631   *                            Q16SCOP
3632   */
3633  
3634  /*
3635   *  S32EXTR XRa, XRd, rs, bits5
3636   *    Extract bits5 bits from 64-bit pair {XRa:XRd}
3637   *    starting from rs[4:0] offset and put to the XRa.
3638   */
gen_mxu_s32extr(DisasContext * ctx)3639  static void gen_mxu_s32extr(DisasContext *ctx)
3640  {
3641      TCGv t0, t1, t2, t3;
3642      uint32_t XRa, XRd, rs, bits5;
3643  
3644      t0 = tcg_temp_new();
3645      t1 = tcg_temp_new();
3646      t2 = tcg_temp_new();
3647      t3 = tcg_temp_new();
3648  
3649      XRa   = extract32(ctx->opcode,  6, 4);
3650      XRd   = extract32(ctx->opcode, 10, 4);
3651      bits5 = extract32(ctx->opcode, 16, 5);
3652      rs    = extract32(ctx->opcode, 21, 5);
3653  
3654      /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */
3655      /* {XRa} = extract({tmp}, 0, bits5); */
3656      if (bits5 > 0) {
3657          TCGLabel *l_xra_only = gen_new_label();
3658          TCGLabel *l_done = gen_new_label();
3659  
3660          gen_load_mxu_gpr(t0, XRd);
3661          gen_load_mxu_gpr(t1, XRa);
3662          gen_load_gpr(t2, rs);
3663          tcg_gen_andi_tl(t2, t2, 0x1f);
3664          tcg_gen_subfi_tl(t2, 32, t2);
3665          tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only);
3666          tcg_gen_subfi_tl(t2, bits5, t2);
3667          tcg_gen_subfi_tl(t3, 32, t2);
3668          tcg_gen_shr_tl(t0, t0, t3);
3669          tcg_gen_shl_tl(t1, t1, t2);
3670          tcg_gen_or_tl(t0, t0, t1);
3671          tcg_gen_br(l_done);
3672          gen_set_label(l_xra_only);
3673          tcg_gen_subi_tl(t2, t2, bits5);
3674          tcg_gen_shr_tl(t0, t1, t2);
3675          gen_set_label(l_done);
3676          tcg_gen_extract_tl(t0, t0, 0, bits5);
3677      } else {
3678          /* unspecified behavior but matches tests on real hardware*/
3679          tcg_gen_movi_tl(t0, 0);
3680      }
3681      gen_store_mxu_gpr(t0, XRa);
3682  }
3683  
3684  /*
3685   *  S32EXTRV XRa, XRd, rs, rt
3686   *    Extract rt[4:0] bits from 64-bit pair {XRa:XRd}
3687   *    starting from rs[4:0] offset and put to the XRa.
3688   */
gen_mxu_s32extrv(DisasContext * ctx)3689  static void gen_mxu_s32extrv(DisasContext *ctx)
3690  {
3691      TCGv t0, t1, t2, t3, t4;
3692      uint32_t XRa, XRd, rs, rt;
3693  
3694      t0 = tcg_temp_new();
3695      t1 = tcg_temp_new();
3696      t2 = tcg_temp_new();
3697      t3 = tcg_temp_new();
3698      t4 = tcg_temp_new();
3699      TCGLabel *l_xra_only = gen_new_label();
3700      TCGLabel *l_done = gen_new_label();
3701      TCGLabel *l_zero = gen_new_label();
3702      TCGLabel *l_extract = gen_new_label();
3703  
3704      XRa = extract32(ctx->opcode,  6, 4);
3705      XRd = extract32(ctx->opcode, 10, 4);
3706      rt  = extract32(ctx->opcode, 16, 5);
3707      rs  = extract32(ctx->opcode, 21, 5);
3708  
3709      /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */
3710      gen_load_mxu_gpr(t0, XRd);
3711      gen_load_mxu_gpr(t1, XRa);
3712      gen_load_gpr(t2, rs);
3713      gen_load_gpr(t4, rt);
3714      tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero);
3715      tcg_gen_andi_tl(t2, t2, 0x1f);
3716      tcg_gen_subfi_tl(t2, 32, t2);
3717      tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only);
3718      tcg_gen_sub_tl(t2, t4, t2);
3719      tcg_gen_subfi_tl(t3, 32, t2);
3720      tcg_gen_shr_tl(t0, t0, t3);
3721      tcg_gen_shl_tl(t1, t1, t2);
3722      tcg_gen_or_tl(t0, t0, t1);
3723      tcg_gen_br(l_extract);
3724  
3725      gen_set_label(l_xra_only);
3726      tcg_gen_sub_tl(t2, t2, t4);
3727      tcg_gen_shr_tl(t0, t1, t2);
3728      tcg_gen_br(l_extract);
3729  
3730      /* unspecified behavior but matches tests on real hardware*/
3731      gen_set_label(l_zero);
3732      tcg_gen_movi_tl(t0, 0);
3733      tcg_gen_br(l_done);
3734  
3735      /* {XRa} = extract({tmp}, 0, rt) */
3736      gen_set_label(l_extract);
3737      tcg_gen_subfi_tl(t4, 32, t4);
3738      tcg_gen_shl_tl(t0, t0, t4);
3739      tcg_gen_shr_tl(t0, t0, t4);
3740  
3741      gen_set_label(l_done);
3742      gen_store_mxu_gpr(t0, XRa);
3743  }
3744  
3745  /*
3746   *  S32LUI XRa, S8, optn3
3747   *    Permutate the immediate S8 value to form a word
3748   *    to update XRa.
3749   */
gen_mxu_s32lui(DisasContext * ctx)3750  static void gen_mxu_s32lui(DisasContext *ctx)
3751  {
3752      uint32_t XRa, s8, optn3, pad;
3753  
3754      XRa   = extract32(ctx->opcode,  6, 4);
3755      s8    = extract32(ctx->opcode, 10, 8);
3756      pad   = extract32(ctx->opcode, 21, 2);
3757      optn3 = extract32(ctx->opcode, 23, 3);
3758  
3759      if (unlikely(pad != 0)) {
3760          /* opcode padding incorrect -> do nothing */
3761      } else if (unlikely(XRa == 0)) {
3762          /* destination is zero register -> do nothing */
3763      } else {
3764          uint32_t s16;
3765          TCGv t0 = tcg_temp_new();
3766  
3767          switch (optn3) {
3768          case 0:
3769              tcg_gen_movi_tl(t0, s8);
3770              break;
3771          case 1:
3772              tcg_gen_movi_tl(t0, s8 << 8);
3773              break;
3774          case 2:
3775              tcg_gen_movi_tl(t0, s8 << 16);
3776              break;
3777          case 3:
3778              tcg_gen_movi_tl(t0, s8 << 24);
3779              break;
3780          case 4:
3781              tcg_gen_movi_tl(t0, (s8 << 16) | s8);
3782              break;
3783          case 5:
3784              tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8));
3785              break;
3786          case 6:
3787              s16 = (uint16_t)(int16_t)(int8_t)s8;
3788              tcg_gen_movi_tl(t0, (s16 << 16) | s16);
3789              break;
3790          case 7:
3791              tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8);
3792              break;
3793          }
3794          gen_store_mxu_gpr(t0, XRa);
3795      }
3796  }
3797  
3798  /*
3799   *  Q16SAT XRa, XRb, XRc
3800   *  Packs four 16-bit signed integers in XRb and XRc to
3801   *  four saturated unsigned 8-bit into XRa.
3802   *
3803   */
gen_mxu_Q16SAT(DisasContext * ctx)3804  static void gen_mxu_Q16SAT(DisasContext *ctx)
3805  {
3806      uint32_t pad, XRc, XRb, XRa;
3807  
3808      pad = extract32(ctx->opcode, 21, 3);
3809      XRc = extract32(ctx->opcode, 14, 4);
3810      XRb = extract32(ctx->opcode, 10, 4);
3811      XRa = extract32(ctx->opcode,  6, 4);
3812  
3813      if (unlikely(pad != 0)) {
3814          /* opcode padding incorrect -> do nothing */
3815      } else if (unlikely(XRa == 0)) {
3816          /* destination is zero register -> do nothing */
3817      } else {
3818          /* the most general case */
3819          TCGv t0 = tcg_temp_new();
3820          TCGv t1 = tcg_temp_new();
3821          TCGv t2 = tcg_temp_new();
3822  
3823          tcg_gen_movi_tl(t2, 0);
3824          if (XRb != 0) {
3825              TCGLabel *l_less_hi = gen_new_label();
3826              TCGLabel *l_less_lo = gen_new_label();
3827              TCGLabel *l_lo = gen_new_label();
3828              TCGLabel *l_greater_hi = gen_new_label();
3829              TCGLabel *l_greater_lo = gen_new_label();
3830              TCGLabel *l_done = gen_new_label();
3831  
3832              tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16);
3833              tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
3834              tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
3835              tcg_gen_br(l_lo);
3836              gen_set_label(l_less_hi);
3837              tcg_gen_movi_tl(t0, 0);
3838              tcg_gen_br(l_lo);
3839              gen_set_label(l_greater_hi);
3840              tcg_gen_movi_tl(t0, 255);
3841  
3842              gen_set_label(l_lo);
3843              tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16);
3844              tcg_gen_sari_tl(t1, t1, 16);
3845              tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
3846              tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
3847              tcg_gen_br(l_done);
3848              gen_set_label(l_less_lo);
3849              tcg_gen_movi_tl(t1, 0);
3850              tcg_gen_br(l_done);
3851              gen_set_label(l_greater_lo);
3852              tcg_gen_movi_tl(t1, 255);
3853  
3854              gen_set_label(l_done);
3855              tcg_gen_shli_tl(t2, t0, 24);
3856              tcg_gen_shli_tl(t1, t1, 16);
3857              tcg_gen_or_tl(t2, t2, t1);
3858          }
3859  
3860          if (XRc != 0) {
3861              TCGLabel *l_less_hi = gen_new_label();
3862              TCGLabel *l_less_lo = gen_new_label();
3863              TCGLabel *l_lo = gen_new_label();
3864              TCGLabel *l_greater_hi = gen_new_label();
3865              TCGLabel *l_greater_lo = gen_new_label();
3866              TCGLabel *l_done = gen_new_label();
3867  
3868              tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16);
3869              tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
3870              tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
3871              tcg_gen_br(l_lo);
3872              gen_set_label(l_less_hi);
3873              tcg_gen_movi_tl(t0, 0);
3874              tcg_gen_br(l_lo);
3875              gen_set_label(l_greater_hi);
3876              tcg_gen_movi_tl(t0, 255);
3877  
3878              gen_set_label(l_lo);
3879              tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16);
3880              tcg_gen_sari_tl(t1, t1, 16);
3881              tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
3882              tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
3883              tcg_gen_br(l_done);
3884              gen_set_label(l_less_lo);
3885              tcg_gen_movi_tl(t1, 0);
3886              tcg_gen_br(l_done);
3887              gen_set_label(l_greater_lo);
3888              tcg_gen_movi_tl(t1, 255);
3889  
3890              gen_set_label(l_done);
3891              tcg_gen_shli_tl(t0, t0, 8);
3892              tcg_gen_or_tl(t2, t2, t0);
3893              tcg_gen_or_tl(t2, t2, t1);
3894          }
3895          gen_store_mxu_gpr(t2, XRa);
3896      }
3897  }
3898  
3899  /*
3900   *  Q16SCOP XRa, XRd, XRb, XRc
3901   *    Determine sign of quad packed 16-bit signed values
3902   *    in XRb and XRc put result in XRa and XRd respectively.
3903   */
gen_mxu_q16scop(DisasContext * ctx)3904  static void gen_mxu_q16scop(DisasContext *ctx)
3905  {
3906      uint32_t XRd, XRc, XRb, XRa;
3907  
3908      XRd  = extract32(ctx->opcode, 18, 4);
3909      XRc  = extract32(ctx->opcode, 14, 4);
3910      XRb  = extract32(ctx->opcode, 10, 4);
3911      XRa  = extract32(ctx->opcode,  6, 4);
3912  
3913      TCGv t0 = tcg_temp_new();
3914      TCGv t1 = tcg_temp_new();
3915      TCGv t2 = tcg_temp_new();
3916      TCGv t3 = tcg_temp_new();
3917      TCGv t4 = tcg_temp_new();
3918  
3919      TCGLabel *l_b_hi_lt = gen_new_label();
3920      TCGLabel *l_b_hi_gt = gen_new_label();
3921      TCGLabel *l_b_lo = gen_new_label();
3922      TCGLabel *l_b_lo_lt = gen_new_label();
3923      TCGLabel *l_c_hi = gen_new_label();
3924      TCGLabel *l_c_hi_lt = gen_new_label();
3925      TCGLabel *l_c_hi_gt = gen_new_label();
3926      TCGLabel *l_c_lo = gen_new_label();
3927      TCGLabel *l_c_lo_lt = gen_new_label();
3928      TCGLabel *l_done = gen_new_label();
3929  
3930      gen_load_mxu_gpr(t0, XRb);
3931      gen_load_mxu_gpr(t1, XRc);
3932  
3933      tcg_gen_sextract_tl(t2, t0, 16, 16);
3934      tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt);
3935      tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt);
3936      tcg_gen_movi_tl(t3, 0);
3937      tcg_gen_br(l_b_lo);
3938      gen_set_label(l_b_hi_lt);
3939      tcg_gen_movi_tl(t3, 0xffff0000);
3940      tcg_gen_br(l_b_lo);
3941      gen_set_label(l_b_hi_gt);
3942      tcg_gen_movi_tl(t3, 0x00010000);
3943  
3944      gen_set_label(l_b_lo);
3945      tcg_gen_sextract_tl(t2, t0, 0, 16);
3946      tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi);
3947      tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt);
3948      tcg_gen_ori_tl(t3, t3, 0x00000001);
3949      tcg_gen_br(l_c_hi);
3950      gen_set_label(l_b_lo_lt);
3951      tcg_gen_ori_tl(t3, t3, 0x0000ffff);
3952      tcg_gen_br(l_c_hi);
3953  
3954      gen_set_label(l_c_hi);
3955      tcg_gen_sextract_tl(t2, t1, 16, 16);
3956      tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt);
3957      tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt);
3958      tcg_gen_movi_tl(t4, 0);
3959      tcg_gen_br(l_c_lo);
3960      gen_set_label(l_c_hi_lt);
3961      tcg_gen_movi_tl(t4, 0xffff0000);
3962      tcg_gen_br(l_c_lo);
3963      gen_set_label(l_c_hi_gt);
3964      tcg_gen_movi_tl(t4, 0x00010000);
3965  
3966      gen_set_label(l_c_lo);
3967      tcg_gen_sextract_tl(t2, t1, 0, 16);
3968      tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done);
3969      tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt);
3970      tcg_gen_ori_tl(t4, t4, 0x00000001);
3971      tcg_gen_br(l_done);
3972      gen_set_label(l_c_lo_lt);
3973      tcg_gen_ori_tl(t4, t4, 0x0000ffff);
3974  
3975      gen_set_label(l_done);
3976      gen_store_mxu_gpr(t3, XRa);
3977      gen_store_mxu_gpr(t4, XRd);
3978  }
3979  
3980  /*
3981   *  S32SFL XRa, XRd, XRb, XRc
3982   *    Shuffle bytes according to one of four patterns.
3983   */
gen_mxu_s32sfl(DisasContext * ctx)3984  static void gen_mxu_s32sfl(DisasContext *ctx)
3985  {
3986      uint32_t XRd, XRc, XRb, XRa, ptn2;
3987  
3988      XRd  = extract32(ctx->opcode, 18, 4);
3989      XRc  = extract32(ctx->opcode, 14, 4);
3990      XRb  = extract32(ctx->opcode, 10, 4);
3991      XRa  = extract32(ctx->opcode,  6, 4);
3992      ptn2 = extract32(ctx->opcode, 24, 2);
3993  
3994      TCGv t0 = tcg_temp_new();
3995      TCGv t1 = tcg_temp_new();
3996      TCGv t2 = tcg_temp_new();
3997      TCGv t3 = tcg_temp_new();
3998  
3999      gen_load_mxu_gpr(t0, XRb);
4000      gen_load_mxu_gpr(t1, XRc);
4001  
4002      switch (ptn2) {
4003      case 0:
4004          tcg_gen_andi_tl(t2, t0, 0xff000000);
4005          tcg_gen_andi_tl(t3, t1, 0x000000ff);
4006          tcg_gen_deposit_tl(t3, t3, t0,  8, 8);
4007          tcg_gen_shri_tl(t0, t0,  8);
4008          tcg_gen_shri_tl(t1, t1,  8);
4009          tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
4010          tcg_gen_deposit_tl(t3, t3, t1, 16, 8);
4011          tcg_gen_shri_tl(t0, t0,  8);
4012          tcg_gen_shri_tl(t1, t1,  8);
4013          tcg_gen_deposit_tl(t2, t2, t0,  8, 8);
4014          tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
4015          tcg_gen_shri_tl(t1, t1,  8);
4016          tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
4017          break;
4018      case 1:
4019          tcg_gen_andi_tl(t2, t0, 0xff000000);
4020          tcg_gen_andi_tl(t3, t1, 0x000000ff);
4021          tcg_gen_deposit_tl(t3, t3, t0, 16, 8);
4022          tcg_gen_shri_tl(t0, t0,  8);
4023          tcg_gen_shri_tl(t1, t1,  8);
4024          tcg_gen_deposit_tl(t2, t2, t0, 16, 8);
4025          tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
4026          tcg_gen_shri_tl(t0, t0,  8);
4027          tcg_gen_shri_tl(t1, t1,  8);
4028          tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
4029          tcg_gen_deposit_tl(t3, t3, t1,  8, 8);
4030          tcg_gen_shri_tl(t1, t1,  8);
4031          tcg_gen_deposit_tl(t2, t2, t1,  8, 8);
4032          break;
4033      case 2:
4034          tcg_gen_andi_tl(t2, t0, 0xff00ff00);
4035          tcg_gen_andi_tl(t3, t1, 0x00ff00ff);
4036          tcg_gen_deposit_tl(t3, t3, t0,  8, 8);
4037          tcg_gen_shri_tl(t0, t0, 16);
4038          tcg_gen_shri_tl(t1, t1,  8);
4039          tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
4040          tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
4041          tcg_gen_shri_tl(t1, t1, 16);
4042          tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
4043          break;
4044      case 3:
4045          tcg_gen_andi_tl(t2, t0, 0xffff0000);
4046          tcg_gen_andi_tl(t3, t1, 0x0000ffff);
4047          tcg_gen_shri_tl(t1, t1, 16);
4048          tcg_gen_deposit_tl(t2, t2, t1,  0, 16);
4049          tcg_gen_deposit_tl(t3, t3, t0, 16, 16);
4050          break;
4051      }
4052  
4053      gen_store_mxu_gpr(t2, XRa);
4054      gen_store_mxu_gpr(t3, XRd);
4055  }
4056  
4057  /*
4058   *  Q8SAD XRa, XRd, XRb, XRc
4059   *    Typical SAD operation for motion estimation.
4060   */
gen_mxu_q8sad(DisasContext * ctx)4061  static void gen_mxu_q8sad(DisasContext *ctx)
4062  {
4063      uint32_t XRd, XRc, XRb, XRa;
4064  
4065      XRd = extract32(ctx->opcode, 18, 4);
4066      XRc = extract32(ctx->opcode, 14, 4);
4067      XRb = extract32(ctx->opcode, 10, 4);
4068      XRa = extract32(ctx->opcode,  6, 4);
4069  
4070      TCGv t0 = tcg_temp_new();
4071      TCGv t1 = tcg_temp_new();
4072      TCGv t2 = tcg_temp_new();
4073      TCGv t3 = tcg_temp_new();
4074      TCGv t4 = tcg_temp_new();
4075      TCGv t5 = tcg_temp_new();
4076  
4077      gen_load_mxu_gpr(t2, XRb);
4078      gen_load_mxu_gpr(t3, XRc);
4079      gen_load_mxu_gpr(t5, XRd);
4080      tcg_gen_movi_tl(t4, 0);
4081  
4082      for (int i = 0; i < 4; i++) {
4083          tcg_gen_andi_tl(t0, t2, 0xff);
4084          tcg_gen_andi_tl(t1, t3, 0xff);
4085          tcg_gen_sub_tl(t0, t0, t1);
4086          tcg_gen_abs_tl(t0, t0);
4087          tcg_gen_add_tl(t4, t4, t0);
4088          if (i < 3) {
4089              tcg_gen_shri_tl(t2, t2, 8);
4090              tcg_gen_shri_tl(t3, t3, 8);
4091          }
4092      }
4093      tcg_gen_add_tl(t5, t5, t4);
4094      gen_store_mxu_gpr(t4, XRa);
4095      gen_store_mxu_gpr(t5, XRd);
4096  }
4097  
4098  /*
4099   *                 MXU instruction category: align
4100   *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4101   *
4102   *                       S32ALN     S32ALNI
4103   */
4104  
4105  /*
4106   *  S32ALNI XRc, XRb, XRa, optn3
4107   *    Arrange bytes from XRb and XRc according to one of five sets of
4108   *    rules determined by optn3, and place the result in XRa.
4109   */
gen_mxu_S32ALNI(DisasContext * ctx)4110  static void gen_mxu_S32ALNI(DisasContext *ctx)
4111  {
4112      uint32_t optn3, pad, XRc, XRb, XRa;
4113  
4114      optn3 = extract32(ctx->opcode,  23, 3);
4115      pad   = extract32(ctx->opcode,  21, 2);
4116      XRc   = extract32(ctx->opcode, 14, 4);
4117      XRb   = extract32(ctx->opcode, 10, 4);
4118      XRa   = extract32(ctx->opcode,  6, 4);
4119  
4120      if (unlikely(pad != 0)) {
4121          /* opcode padding incorrect -> do nothing */
4122      } else if (unlikely(XRa == 0)) {
4123          /* destination is zero register -> do nothing */
4124      } else if (unlikely((XRb == 0) && (XRc == 0))) {
4125          /* both operands zero registers -> just set destination to all 0s */
4126          tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4127      } else if (unlikely(XRb == 0)) {
4128          /* XRb zero register -> just appropriatelly shift XRc into XRa */
4129          switch (optn3) {
4130          case MXU_OPTN3_PTN0:
4131              tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4132              break;
4133          case MXU_OPTN3_PTN1:
4134          case MXU_OPTN3_PTN2:
4135          case MXU_OPTN3_PTN3:
4136              tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1],
4137                               8 * (4 - optn3));
4138              break;
4139          case MXU_OPTN3_PTN4:
4140              tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
4141              break;
4142          }
4143      } else if (unlikely(XRc == 0)) {
4144          /* XRc zero register -> just appropriatelly shift XRb into XRa */
4145          switch (optn3) {
4146          case MXU_OPTN3_PTN0:
4147              tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4148              break;
4149          case MXU_OPTN3_PTN1:
4150          case MXU_OPTN3_PTN2:
4151          case MXU_OPTN3_PTN3:
4152              tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
4153              break;
4154          case MXU_OPTN3_PTN4:
4155              tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4156              break;
4157          }
4158      } else if (unlikely(XRb == XRc)) {
4159          /* both operands same -> just rotation or moving from any of them */
4160          switch (optn3) {
4161          case MXU_OPTN3_PTN0:
4162          case MXU_OPTN3_PTN4:
4163              tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4164              break;
4165          case MXU_OPTN3_PTN1:
4166          case MXU_OPTN3_PTN2:
4167          case MXU_OPTN3_PTN3:
4168              tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
4169              break;
4170          }
4171      } else {
4172          /* the most general case */
4173          switch (optn3) {
4174          case MXU_OPTN3_PTN0:
4175              {
4176                  /*                                         */
4177                  /*         XRb                XRc          */
4178                  /*  +---------------+                      */
4179                  /*  | A   B   C   D |    E   F   G   H     */
4180                  /*  +-------+-------+                      */
4181                  /*          |                              */
4182                  /*         XRa                             */
4183                  /*                                         */
4184  
4185                  tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4186              }
4187              break;
4188          case MXU_OPTN3_PTN1:
4189              {
4190                  /*                                         */
4191                  /*         XRb                 XRc         */
4192                  /*      +-------------------+              */
4193                  /*    A | B   C   D       E | F   G   H    */
4194                  /*      +---------+---------+              */
4195                  /*                |                        */
4196                  /*               XRa                       */
4197                  /*                                         */
4198  
4199                  TCGv_i32 t0 = tcg_temp_new();
4200                  TCGv_i32 t1 = tcg_temp_new();
4201  
4202                  tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF);
4203                  tcg_gen_shli_i32(t0, t0, 8);
4204  
4205                  tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
4206                  tcg_gen_shri_i32(t1, t1, 24);
4207  
4208                  tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4209              }
4210              break;
4211          case MXU_OPTN3_PTN2:
4212              {
4213                  /*                                         */
4214                  /*         XRb                 XRc         */
4215                  /*          +-------------------+          */
4216                  /*    A   B | C   D       E   F | G   H    */
4217                  /*          +---------+---------+          */
4218                  /*                    |                    */
4219                  /*                   XRa                   */
4220                  /*                                         */
4221  
4222                  TCGv_i32 t0 = tcg_temp_new();
4223                  TCGv_i32 t1 = tcg_temp_new();
4224  
4225                  tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
4226                  tcg_gen_shli_i32(t0, t0, 16);
4227  
4228                  tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
4229                  tcg_gen_shri_i32(t1, t1, 16);
4230  
4231                  tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4232              }
4233              break;
4234          case MXU_OPTN3_PTN3:
4235              {
4236                  /*                                         */
4237                  /*         XRb                 XRc         */
4238                  /*              +-------------------+      */
4239                  /*    A   B   C | D       E   F   G | H    */
4240                  /*              +---------+---------+      */
4241                  /*                        |                */
4242                  /*                       XRa               */
4243                  /*                                         */
4244  
4245                  TCGv_i32 t0 = tcg_temp_new();
4246                  TCGv_i32 t1 = tcg_temp_new();
4247  
4248                  tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF);
4249                  tcg_gen_shli_i32(t0, t0, 24);
4250  
4251                  tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00);
4252                  tcg_gen_shri_i32(t1, t1, 8);
4253  
4254                  tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4255              }
4256              break;
4257          case MXU_OPTN3_PTN4:
4258              {
4259                  /*                                         */
4260                  /*         XRb                 XRc         */
4261                  /*                     +---------------+   */
4262                  /*    A   B   C   D    | E   F   G   H |   */
4263                  /*                     +-------+-------+   */
4264                  /*                             |           */
4265                  /*                            XRa          */
4266                  /*                                         */
4267  
4268                  tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
4269              }
4270              break;
4271          }
4272      }
4273  }
4274  
4275  /*
4276   *  S32ALN XRc, XRb, XRa, rs
4277   *    Arrange bytes from XRb and XRc according to one of five sets of
4278   *    rules determined by rs[2:0], and place the result in XRa.
4279   */
gen_mxu_S32ALN(DisasContext * ctx)4280  static void gen_mxu_S32ALN(DisasContext *ctx)
4281  {
4282      uint32_t rs, XRc, XRb, XRa;
4283  
4284      rs  = extract32(ctx->opcode, 21, 5);
4285      XRc = extract32(ctx->opcode, 14, 4);
4286      XRb = extract32(ctx->opcode, 10, 4);
4287      XRa = extract32(ctx->opcode,  6, 4);
4288  
4289      if (unlikely(XRa == 0)) {
4290          /* destination is zero register -> do nothing */
4291      } else if (unlikely((XRb == 0) && (XRc == 0))) {
4292          /* both operands zero registers -> just set destination to all 0s */
4293          tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
4294      } else {
4295          /* the most general case */
4296          TCGv t0 = tcg_temp_new();
4297          TCGv t1 = tcg_temp_new();
4298          TCGv t2 = tcg_temp_new();
4299          TCGv t3 = tcg_temp_new();
4300          TCGLabel *l_exit = gen_new_label();
4301          TCGLabel *l_b_only = gen_new_label();
4302          TCGLabel *l_c_only = gen_new_label();
4303  
4304          gen_load_mxu_gpr(t0, XRb);
4305          gen_load_mxu_gpr(t1, XRc);
4306          gen_load_gpr(t2, rs);
4307          tcg_gen_andi_tl(t2, t2, 0x07);
4308  
4309          /* do nothing for undefined cases */
4310          tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit);
4311  
4312          tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only);
4313          tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only);
4314  
4315          tcg_gen_shli_tl(t2, t2, 3);
4316          tcg_gen_subfi_tl(t3, 32, t2);
4317  
4318          tcg_gen_shl_tl(t0, t0, t2);
4319          tcg_gen_shr_tl(t1, t1, t3);
4320          tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1);
4321          tcg_gen_br(l_exit);
4322  
4323          gen_set_label(l_b_only);
4324          gen_store_mxu_gpr(t0, XRa);
4325          tcg_gen_br(l_exit);
4326  
4327          gen_set_label(l_c_only);
4328          gen_store_mxu_gpr(t1, XRa);
4329  
4330          gen_set_label(l_exit);
4331      }
4332  }
4333  
4334  /*
4335   *  S32MADD XRa, XRd, rb, rc
4336   *    32 to 64 bit signed multiply with subsequent add
4337   *    result stored in {XRa, XRd} pair, stain HI/LO.
4338   *  S32MADDU XRa, XRd, rb, rc
4339   *    32 to 64 bit unsigned multiply with subsequent add
4340   *    result stored in {XRa, XRd} pair, stain HI/LO.
4341   *  S32MSUB XRa, XRd, rb, rc
4342   *    32 to 64 bit signed multiply with subsequent subtract
4343   *    result stored in {XRa, XRd} pair, stain HI/LO.
4344   *  S32MSUBU XRa, XRd, rb, rc
4345   *    32 to 64 bit unsigned multiply with subsequent subtract
4346   *    result stored in {XRa, XRd} pair, stain HI/LO.
4347   */
gen_mxu_s32madd_sub(DisasContext * ctx,bool sub,bool uns)4348  static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns)
4349  {
4350      uint32_t XRa, XRd, Rb, Rc;
4351  
4352      XRa  = extract32(ctx->opcode,  6, 4);
4353      XRd  = extract32(ctx->opcode, 10, 4);
4354      Rb   = extract32(ctx->opcode, 16, 5);
4355      Rc   = extract32(ctx->opcode, 21, 5);
4356  
4357      if (unlikely(Rb == 0 || Rc == 0)) {
4358          /* do nothing because x + 0 * y => x */
4359      } else if (unlikely(XRa == 0 && XRd == 0)) {
4360          /* do nothing because result just dropped */
4361      } else {
4362          TCGv t0 = tcg_temp_new();
4363          TCGv t1 = tcg_temp_new();
4364          TCGv_i64 t2 = tcg_temp_new_i64();
4365          TCGv_i64 t3 = tcg_temp_new_i64();
4366  
4367          gen_load_gpr(t0, Rb);
4368          gen_load_gpr(t1, Rc);
4369  
4370          if (uns) {
4371              tcg_gen_extu_tl_i64(t2, t0);
4372              tcg_gen_extu_tl_i64(t3, t1);
4373          } else {
4374              tcg_gen_ext_tl_i64(t2, t0);
4375              tcg_gen_ext_tl_i64(t3, t1);
4376          }
4377          tcg_gen_mul_i64(t2, t2, t3);
4378  
4379          gen_load_mxu_gpr(t0, XRa);
4380          gen_load_mxu_gpr(t1, XRd);
4381  
4382          tcg_gen_concat_tl_i64(t3, t1, t0);
4383          if (sub) {
4384              tcg_gen_sub_i64(t3, t3, t2);
4385          } else {
4386              tcg_gen_add_i64(t3, t3, t2);
4387          }
4388          gen_move_low32(t1, t3);
4389          gen_move_high32(t0, t3);
4390  
4391          tcg_gen_mov_tl(cpu_HI[0], t0);
4392          tcg_gen_mov_tl(cpu_LO[0], t1);
4393  
4394          gen_store_mxu_gpr(t1, XRd);
4395          gen_store_mxu_gpr(t0, XRa);
4396      }
4397  }
4398  
4399  /*
4400   * Decoding engine for MXU
4401   * =======================
4402   */
4403  
decode_opc_mxu__pool00(DisasContext * ctx)4404  static void decode_opc_mxu__pool00(DisasContext *ctx)
4405  {
4406      uint32_t opcode = extract32(ctx->opcode, 18, 3);
4407  
4408      switch (opcode) {
4409      case OPC_MXU_S32MAX:
4410      case OPC_MXU_S32MIN:
4411          gen_mxu_S32MAX_S32MIN(ctx);
4412          break;
4413      case OPC_MXU_D16MAX:
4414      case OPC_MXU_D16MIN:
4415          gen_mxu_D16MAX_D16MIN(ctx);
4416          break;
4417      case OPC_MXU_Q8MAX:
4418      case OPC_MXU_Q8MIN:
4419          gen_mxu_Q8MAX_Q8MIN(ctx);
4420          break;
4421      case OPC_MXU_Q8SLT:
4422          gen_mxu_q8slt(ctx, false);
4423          break;
4424      case OPC_MXU_Q8SLTU:
4425          gen_mxu_q8slt(ctx, true);
4426          break;
4427      default:
4428          MIPS_INVAL("decode_opc_mxu");
4429          gen_reserved_instruction(ctx);
4430          break;
4431      }
4432  }
4433  
decode_opc_mxu_s32madd_sub(DisasContext * ctx)4434  static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx)
4435  {
4436      uint32_t opcode = extract32(ctx->opcode, 0, 6);
4437      uint32_t pad  = extract32(ctx->opcode, 14, 2);
4438  
4439      if (pad != 2) {
4440          /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */
4441          return false;
4442      }
4443  
4444      switch (opcode) {
4445      case OPC_MXU_S32MADD:
4446          gen_mxu_s32madd_sub(ctx, false, false);
4447          break;
4448      case OPC_MXU_S32MADDU:
4449          gen_mxu_s32madd_sub(ctx, false, true);
4450          break;
4451      case OPC_MXU_S32MSUB:
4452          gen_mxu_s32madd_sub(ctx, true, false);
4453          break;
4454      case OPC_MXU_S32MSUBU:
4455          gen_mxu_s32madd_sub(ctx, true, true);
4456          break;
4457      default:
4458          return false;
4459      }
4460      return true;
4461  }
4462  
decode_opc_mxu__pool01(DisasContext * ctx)4463  static void decode_opc_mxu__pool01(DisasContext *ctx)
4464  {
4465      uint32_t opcode = extract32(ctx->opcode, 18, 3);
4466  
4467      switch (opcode) {
4468      case OPC_MXU_S32SLT:
4469          gen_mxu_S32SLT(ctx);
4470          break;
4471      case OPC_MXU_D16SLT:
4472          gen_mxu_D16SLT(ctx);
4473          break;
4474      case OPC_MXU_D16AVG:
4475          gen_mxu_d16avg(ctx, false);
4476          break;
4477      case OPC_MXU_D16AVGR:
4478          gen_mxu_d16avg(ctx, true);
4479          break;
4480      case OPC_MXU_Q8AVG:
4481          gen_mxu_q8avg(ctx, false);
4482          break;
4483      case OPC_MXU_Q8AVGR:
4484          gen_mxu_q8avg(ctx, true);
4485          break;
4486      case OPC_MXU_Q8ADD:
4487          gen_mxu_Q8ADD(ctx);
4488          break;
4489      default:
4490          MIPS_INVAL("decode_opc_mxu");
4491          gen_reserved_instruction(ctx);
4492          break;
4493      }
4494  }
4495  
decode_opc_mxu__pool02(DisasContext * ctx)4496  static void decode_opc_mxu__pool02(DisasContext *ctx)
4497  {
4498      uint32_t opcode = extract32(ctx->opcode, 18, 3);
4499  
4500      switch (opcode) {
4501      case OPC_MXU_S32CPS:
4502          gen_mxu_S32CPS(ctx);
4503          break;
4504      case OPC_MXU_D16CPS:
4505          gen_mxu_D16CPS(ctx);
4506          break;
4507      case OPC_MXU_Q8ABD:
4508          gen_mxu_Q8ABD(ctx);
4509          break;
4510      case OPC_MXU_Q16SAT:
4511          gen_mxu_Q16SAT(ctx);
4512          break;
4513      default:
4514          MIPS_INVAL("decode_opc_mxu");
4515          gen_reserved_instruction(ctx);
4516          break;
4517      }
4518  }
4519  
decode_opc_mxu__pool03(DisasContext * ctx)4520  static void decode_opc_mxu__pool03(DisasContext *ctx)
4521  {
4522      uint32_t opcode = extract32(ctx->opcode, 24, 2);
4523  
4524      switch (opcode) {
4525      case OPC_MXU_D16MULF:
4526          gen_mxu_d16mul(ctx, true, true);
4527          break;
4528      case OPC_MXU_D16MULE:
4529          gen_mxu_d16mul(ctx, true, false);
4530          break;
4531      default:
4532          MIPS_INVAL("decode_opc_mxu");
4533          gen_reserved_instruction(ctx);
4534          break;
4535      }
4536  }
4537  
decode_opc_mxu__pool04(DisasContext * ctx)4538  static void decode_opc_mxu__pool04(DisasContext *ctx)
4539  {
4540      uint32_t reversed = extract32(ctx->opcode, 20, 1);
4541      uint32_t opcode = extract32(ctx->opcode, 10, 4);
4542  
4543      /* Don't care about opcode bits as their meaning is unknown yet */
4544      switch (opcode) {
4545      default:
4546          gen_mxu_s32ldxx(ctx, reversed, false);
4547          break;
4548      }
4549  }
4550  
decode_opc_mxu__pool05(DisasContext * ctx)4551  static void decode_opc_mxu__pool05(DisasContext *ctx)
4552  {
4553      uint32_t reversed = extract32(ctx->opcode, 20, 1);
4554      uint32_t opcode = extract32(ctx->opcode, 10, 4);
4555  
4556      /* Don't care about opcode bits as their meaning is unknown yet */
4557      switch (opcode) {
4558      default:
4559          gen_mxu_s32stxx(ctx, reversed, false);
4560          break;
4561      }
4562  }
4563  
decode_opc_mxu__pool06(DisasContext * ctx)4564  static void decode_opc_mxu__pool06(DisasContext *ctx)
4565  {
4566      uint32_t opcode = extract32(ctx->opcode, 10, 4);
4567      uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4568  
4569      switch (opcode) {
4570      case OPC_MXU_S32LDST:
4571      case OPC_MXU_S32LDSTR:
4572          if (strd2 <= 2) {
4573              gen_mxu_s32ldxvx(ctx, opcode, false, strd2);
4574              break;
4575          }
4576          /* fallthrough */
4577      default:
4578          MIPS_INVAL("decode_opc_mxu");
4579          gen_reserved_instruction(ctx);
4580          break;
4581      }
4582  }
4583  
decode_opc_mxu__pool07(DisasContext * ctx)4584  static void decode_opc_mxu__pool07(DisasContext *ctx)
4585  {
4586      uint32_t opcode = extract32(ctx->opcode, 10, 4);
4587      uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4588  
4589      switch (opcode) {
4590      case OPC_MXU_S32LDST:
4591      case OPC_MXU_S32LDSTR:
4592          if (strd2 <= 2) {
4593              gen_mxu_s32stxvx(ctx, opcode, false, strd2);
4594              break;
4595          }
4596          /* fallthrough */
4597      default:
4598          MIPS_INVAL("decode_opc_mxu");
4599          gen_reserved_instruction(ctx);
4600          break;
4601      }
4602  }
4603  
decode_opc_mxu__pool08(DisasContext * ctx)4604  static void decode_opc_mxu__pool08(DisasContext *ctx)
4605  {
4606      uint32_t reversed = extract32(ctx->opcode, 20, 1);
4607      uint32_t opcode = extract32(ctx->opcode, 10, 4);
4608  
4609      /* Don't care about opcode bits as their meaning is unknown yet */
4610      switch (opcode) {
4611      default:
4612          gen_mxu_s32ldxx(ctx, reversed, true);
4613          break;
4614      }
4615  }
4616  
decode_opc_mxu__pool09(DisasContext * ctx)4617  static void decode_opc_mxu__pool09(DisasContext *ctx)
4618  {
4619      uint32_t reversed = extract32(ctx->opcode, 20, 1);
4620      uint32_t opcode = extract32(ctx->opcode, 10, 4);
4621  
4622      /* Don't care about opcode bits as their meaning is unknown yet */
4623      switch (opcode) {
4624      default:
4625          gen_mxu_s32stxx(ctx, reversed, true);
4626          break;
4627      }
4628  }
4629  
decode_opc_mxu__pool10(DisasContext * ctx)4630  static void decode_opc_mxu__pool10(DisasContext *ctx)
4631  {
4632      uint32_t opcode = extract32(ctx->opcode, 10, 4);
4633      uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4634  
4635      switch (opcode) {
4636      case OPC_MXU_S32LDST:
4637      case OPC_MXU_S32LDSTR:
4638          if (strd2 <= 2) {
4639              gen_mxu_s32ldxvx(ctx, opcode, true, strd2);
4640              break;
4641          }
4642          /* fallthrough */
4643      default:
4644          MIPS_INVAL("decode_opc_mxu");
4645          gen_reserved_instruction(ctx);
4646          break;
4647      }
4648  }
4649  
decode_opc_mxu__pool11(DisasContext * ctx)4650  static void decode_opc_mxu__pool11(DisasContext *ctx)
4651  {
4652      uint32_t opcode = extract32(ctx->opcode, 10, 4);
4653      uint32_t strd2  = extract32(ctx->opcode, 14, 2);
4654  
4655      switch (opcode) {
4656      case OPC_MXU_S32LDST:
4657      case OPC_MXU_S32LDSTR:
4658          if (strd2 <= 2) {
4659              gen_mxu_s32stxvx(ctx, opcode, true, strd2);
4660              break;
4661          }
4662          /* fallthrough */
4663      default:
4664          MIPS_INVAL("decode_opc_mxu");
4665          gen_reserved_instruction(ctx);
4666          break;
4667      }
4668  }
4669  
decode_opc_mxu__pool12(DisasContext * ctx)4670  static void decode_opc_mxu__pool12(DisasContext *ctx)
4671  {
4672      uint32_t opcode = extract32(ctx->opcode, 22, 2);
4673  
4674      switch (opcode) {
4675      case OPC_MXU_D32ACC:
4676          gen_mxu_d32acc(ctx);
4677          break;
4678      case OPC_MXU_D32ACCM:
4679          gen_mxu_d32accm(ctx);
4680          break;
4681      case OPC_MXU_D32ASUM:
4682          gen_mxu_d32asum(ctx);
4683          break;
4684      default:
4685          MIPS_INVAL("decode_opc_mxu");
4686          gen_reserved_instruction(ctx);
4687          break;
4688      }
4689  }
4690  
decode_opc_mxu__pool13(DisasContext * ctx)4691  static void decode_opc_mxu__pool13(DisasContext *ctx)
4692  {
4693      uint32_t opcode = extract32(ctx->opcode, 22, 2);
4694  
4695      switch (opcode) {
4696      case OPC_MXU_Q16ACC:
4697          gen_mxu_q16acc(ctx);
4698          break;
4699      case OPC_MXU_Q16ACCM:
4700          gen_mxu_q16accm(ctx);
4701          break;
4702      case OPC_MXU_D16ASUM:
4703          gen_mxu_d16asum(ctx);
4704          break;
4705      default:
4706          MIPS_INVAL("decode_opc_mxu");
4707          gen_reserved_instruction(ctx);
4708          break;
4709      }
4710  }
4711  
decode_opc_mxu__pool14(DisasContext * ctx)4712  static void decode_opc_mxu__pool14(DisasContext *ctx)
4713  {
4714      uint32_t opcode = extract32(ctx->opcode, 22, 2);
4715  
4716      switch (opcode) {
4717      case OPC_MXU_Q8ADDE:
4718          gen_mxu_q8adde(ctx, false);
4719          break;
4720      case OPC_MXU_D8SUM:
4721          gen_mxu_d8sum(ctx, false);
4722          break;
4723      case OPC_MXU_D8SUMC:
4724          gen_mxu_d8sum(ctx, true);
4725          break;
4726      default:
4727          MIPS_INVAL("decode_opc_mxu");
4728          gen_reserved_instruction(ctx);
4729          break;
4730      }
4731  }
4732  
decode_opc_mxu__pool15(DisasContext * ctx)4733  static void decode_opc_mxu__pool15(DisasContext *ctx)
4734  {
4735      uint32_t opcode = extract32(ctx->opcode, 14, 2);
4736  
4737      switch (opcode) {
4738      case OPC_MXU_S32MUL:
4739          gen_mxu_s32mul(ctx, false);
4740          break;
4741      case OPC_MXU_S32MULU:
4742          gen_mxu_s32mul(ctx, true);
4743          break;
4744      case OPC_MXU_S32EXTR:
4745          gen_mxu_s32extr(ctx);
4746          break;
4747      case OPC_MXU_S32EXTRV:
4748          gen_mxu_s32extrv(ctx);
4749          break;
4750      default:
4751          MIPS_INVAL("decode_opc_mxu");
4752          gen_reserved_instruction(ctx);
4753          break;
4754      }
4755  }
4756  
decode_opc_mxu__pool16(DisasContext * ctx)4757  static void decode_opc_mxu__pool16(DisasContext *ctx)
4758  {
4759      uint32_t opcode = extract32(ctx->opcode, 18, 3);
4760  
4761      switch (opcode) {
4762      case OPC_MXU_D32SARW:
4763          gen_mxu_d32sarl(ctx, true);
4764          break;
4765      case OPC_MXU_S32ALN:
4766          gen_mxu_S32ALN(ctx);
4767          break;
4768      case OPC_MXU_S32ALNI:
4769          gen_mxu_S32ALNI(ctx);
4770          break;
4771      case OPC_MXU_S32LUI:
4772          gen_mxu_s32lui(ctx);
4773          break;
4774      case OPC_MXU_S32NOR:
4775          gen_mxu_S32NOR(ctx);
4776          break;
4777      case OPC_MXU_S32AND:
4778          gen_mxu_S32AND(ctx);
4779          break;
4780      case OPC_MXU_S32OR:
4781          gen_mxu_S32OR(ctx);
4782          break;
4783      case OPC_MXU_S32XOR:
4784          gen_mxu_S32XOR(ctx);
4785          break;
4786      default:
4787          MIPS_INVAL("decode_opc_mxu");
4788          gen_reserved_instruction(ctx);
4789          break;
4790      }
4791  }
4792  
decode_opc_mxu__pool17(DisasContext * ctx)4793  static void decode_opc_mxu__pool17(DisasContext *ctx)
4794  {
4795      uint32_t opcode = extract32(ctx->opcode, 6, 3);
4796      uint32_t strd2  = extract32(ctx->opcode, 9, 2);
4797  
4798      if (strd2 > 2) {
4799          MIPS_INVAL("decode_opc_mxu");
4800          gen_reserved_instruction(ctx);
4801          return;
4802      }
4803  
4804      switch (opcode) {
4805      case OPC_MXU_LXW:
4806            gen_mxu_lxx(ctx, strd2, mo_endian(ctx) | MO_UL);
4807            break;
4808      case OPC_MXU_LXB:
4809            gen_mxu_lxx(ctx, strd2, mo_endian(ctx) | MO_SB);
4810            break;
4811      case OPC_MXU_LXH:
4812            gen_mxu_lxx(ctx, strd2, mo_endian(ctx) | MO_SW);
4813            break;
4814      case OPC_MXU_LXBU:
4815            gen_mxu_lxx(ctx, strd2, mo_endian(ctx) | MO_UB);
4816            break;
4817      case OPC_MXU_LXHU:
4818            gen_mxu_lxx(ctx, strd2, mo_endian(ctx) | MO_UW);
4819            break;
4820      default:
4821          MIPS_INVAL("decode_opc_mxu");
4822          gen_reserved_instruction(ctx);
4823          break;
4824      }
4825  }
4826  
decode_opc_mxu__pool18(DisasContext * ctx)4827  static void decode_opc_mxu__pool18(DisasContext *ctx)
4828  {
4829      uint32_t opcode = extract32(ctx->opcode, 18, 3);
4830  
4831      switch (opcode) {
4832      case OPC_MXU_D32SLLV:
4833          gen_mxu_d32sxxv(ctx, false, false);
4834          break;
4835      case OPC_MXU_D32SLRV:
4836          gen_mxu_d32sxxv(ctx, true, false);
4837          break;
4838      case OPC_MXU_D32SARV:
4839          gen_mxu_d32sxxv(ctx, true, true);
4840          break;
4841      case OPC_MXU_Q16SLLV:
4842          gen_mxu_q16sxxv(ctx, false, false);
4843          break;
4844      case OPC_MXU_Q16SLRV:
4845          gen_mxu_q16sxxv(ctx, true, false);
4846          break;
4847      case OPC_MXU_Q16SARV:
4848          gen_mxu_q16sxxv(ctx, true, true);
4849          break;
4850      default:
4851          MIPS_INVAL("decode_opc_mxu");
4852          gen_reserved_instruction(ctx);
4853          break;
4854      }
4855  }
4856  
decode_opc_mxu__pool19(DisasContext * ctx)4857  static void decode_opc_mxu__pool19(DisasContext *ctx)
4858  {
4859      uint32_t opcode = extract32(ctx->opcode, 22, 4);
4860  
4861      switch (opcode) {
4862      case OPC_MXU_Q8MUL:
4863          gen_mxu_q8mul_mac(ctx, false, false);
4864          break;
4865      case OPC_MXU_Q8MULSU:
4866          gen_mxu_q8mul_mac(ctx, true, false);
4867          break;
4868      default:
4869          MIPS_INVAL("decode_opc_mxu");
4870          gen_reserved_instruction(ctx);
4871          break;
4872      }
4873  }
4874  
decode_opc_mxu__pool20(DisasContext * ctx)4875  static void decode_opc_mxu__pool20(DisasContext *ctx)
4876  {
4877      uint32_t opcode = extract32(ctx->opcode, 18, 3);
4878  
4879      switch (opcode) {
4880      case OPC_MXU_Q8MOVZ:
4881          gen_mxu_q8movzn(ctx, TCG_COND_NE);
4882          break;
4883      case OPC_MXU_Q8MOVN:
4884          gen_mxu_q8movzn(ctx, TCG_COND_EQ);
4885          break;
4886      case OPC_MXU_D16MOVZ:
4887          gen_mxu_d16movzn(ctx, TCG_COND_NE);
4888          break;
4889      case OPC_MXU_D16MOVN:
4890          gen_mxu_d16movzn(ctx, TCG_COND_EQ);
4891          break;
4892      case OPC_MXU_S32MOVZ:
4893          gen_mxu_s32movzn(ctx, TCG_COND_NE);
4894          break;
4895      case OPC_MXU_S32MOVN:
4896          gen_mxu_s32movzn(ctx, TCG_COND_EQ);
4897          break;
4898      default:
4899          MIPS_INVAL("decode_opc_mxu");
4900          gen_reserved_instruction(ctx);
4901          break;
4902      }
4903  }
4904  
decode_opc_mxu__pool21(DisasContext * ctx)4905  static void decode_opc_mxu__pool21(DisasContext *ctx)
4906  {
4907      uint32_t opcode = extract32(ctx->opcode, 22, 2);
4908  
4909      switch (opcode) {
4910      case OPC_MXU_Q8MAC:
4911          gen_mxu_q8mul_mac(ctx, false, true);
4912          break;
4913      case OPC_MXU_Q8MACSU:
4914          gen_mxu_q8mul_mac(ctx, true, true);
4915          break;
4916      default:
4917          MIPS_INVAL("decode_opc_mxu");
4918          gen_reserved_instruction(ctx);
4919          break;
4920      }
4921  }
4922  
4923  
decode_ase_mxu(DisasContext * ctx,uint32_t insn)4924  bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
4925  {
4926      uint32_t opcode = extract32(insn, 0, 6);
4927  
4928      if (opcode == OPC_MXU_S32M2I) {
4929          gen_mxu_s32m2i(ctx);
4930          return true;
4931      }
4932  
4933      if (opcode == OPC_MXU_S32I2M) {
4934          gen_mxu_s32i2m(ctx);
4935          return true;
4936      }
4937  
4938      {
4939          TCGv t_mxu_cr = tcg_temp_new();
4940          TCGLabel *l_exit = gen_new_label();
4941  
4942          gen_load_mxu_cr(t_mxu_cr);
4943          tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN);
4944          tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
4945  
4946          switch (opcode) {
4947          case OPC_MXU_S32MADD:
4948          case OPC_MXU_S32MADDU:
4949          case OPC_MXU_S32MSUB:
4950          case OPC_MXU_S32MSUBU:
4951              return decode_opc_mxu_s32madd_sub(ctx);
4952          case OPC_MXU__POOL00:
4953              decode_opc_mxu__pool00(ctx);
4954              break;
4955          case OPC_MXU_D16MUL:
4956              gen_mxu_d16mul(ctx, false, false);
4957              break;
4958          case OPC_MXU_D16MAC:
4959              gen_mxu_d16mac(ctx, false, false);
4960              break;
4961          case OPC_MXU_D16MACF:
4962              gen_mxu_d16mac(ctx, true, true);
4963              break;
4964          case OPC_MXU_D16MADL:
4965              gen_mxu_d16madl(ctx);
4966              break;
4967          case OPC_MXU_S16MAD:
4968              gen_mxu_s16mad(ctx);
4969              break;
4970          case OPC_MXU_Q16ADD:
4971              gen_mxu_q16add(ctx);
4972              break;
4973          case OPC_MXU_D16MACE:
4974              gen_mxu_d16mac(ctx, true, false);
4975              break;
4976          case OPC_MXU__POOL01:
4977              decode_opc_mxu__pool01(ctx);
4978              break;
4979          case OPC_MXU__POOL02:
4980              decode_opc_mxu__pool02(ctx);
4981              break;
4982          case OPC_MXU__POOL03:
4983              decode_opc_mxu__pool03(ctx);
4984              break;
4985          case OPC_MXU__POOL04:
4986              decode_opc_mxu__pool04(ctx);
4987              break;
4988          case OPC_MXU__POOL05:
4989              decode_opc_mxu__pool05(ctx);
4990              break;
4991          case OPC_MXU__POOL06:
4992              decode_opc_mxu__pool06(ctx);
4993              break;
4994          case OPC_MXU__POOL07:
4995              decode_opc_mxu__pool07(ctx);
4996              break;
4997          case OPC_MXU__POOL08:
4998              decode_opc_mxu__pool08(ctx);
4999              break;
5000          case OPC_MXU__POOL09:
5001              decode_opc_mxu__pool09(ctx);
5002              break;
5003          case OPC_MXU__POOL10:
5004              decode_opc_mxu__pool10(ctx);
5005              break;
5006          case OPC_MXU__POOL11:
5007              decode_opc_mxu__pool11(ctx);
5008              break;
5009          case OPC_MXU_D32ADD:
5010              gen_mxu_d32add(ctx);
5011              break;
5012          case OPC_MXU__POOL12:
5013              decode_opc_mxu__pool12(ctx);
5014              break;
5015          case OPC_MXU__POOL13:
5016              decode_opc_mxu__pool13(ctx);
5017              break;
5018          case OPC_MXU__POOL14:
5019              decode_opc_mxu__pool14(ctx);
5020              break;
5021          case OPC_MXU_Q8ACCE:
5022              gen_mxu_q8adde(ctx, true);
5023              break;
5024          case OPC_MXU_S8LDD:
5025              gen_mxu_s8ldd(ctx, false);
5026              break;
5027          case OPC_MXU_S8STD:
5028              gen_mxu_s8std(ctx, false);
5029              break;
5030          case OPC_MXU_S8LDI:
5031              gen_mxu_s8ldd(ctx, true);
5032              break;
5033          case OPC_MXU_S8SDI:
5034              gen_mxu_s8std(ctx, true);
5035              break;
5036          case OPC_MXU__POOL15:
5037              decode_opc_mxu__pool15(ctx);
5038              break;
5039          case OPC_MXU__POOL16:
5040              decode_opc_mxu__pool16(ctx);
5041              break;
5042          case OPC_MXU__POOL17:
5043              decode_opc_mxu__pool17(ctx);
5044              break;
5045          case OPC_MXU_S16LDD:
5046              gen_mxu_s16ldd(ctx, false);
5047              break;
5048          case OPC_MXU_S16STD:
5049              gen_mxu_s16std(ctx, false);
5050              break;
5051          case OPC_MXU_S16LDI:
5052              gen_mxu_s16ldd(ctx, true);
5053              break;
5054          case OPC_MXU_S16SDI:
5055              gen_mxu_s16std(ctx, true);
5056              break;
5057          case OPC_MXU_D32SLL:
5058              gen_mxu_d32sxx(ctx, false, false);
5059              break;
5060          case OPC_MXU_D32SLR:
5061              gen_mxu_d32sxx(ctx, true, false);
5062              break;
5063          case OPC_MXU_D32SARL:
5064              gen_mxu_d32sarl(ctx, false);
5065              break;
5066          case OPC_MXU_D32SAR:
5067              gen_mxu_d32sxx(ctx, true, true);
5068              break;
5069          case OPC_MXU_Q16SLL:
5070              gen_mxu_q16sxx(ctx, false, false);
5071              break;
5072          case OPC_MXU__POOL18:
5073              decode_opc_mxu__pool18(ctx);
5074              break;
5075          case OPC_MXU_Q16SLR:
5076              gen_mxu_q16sxx(ctx, true, false);
5077              break;
5078          case OPC_MXU_Q16SAR:
5079              gen_mxu_q16sxx(ctx, true, true);
5080              break;
5081          case OPC_MXU__POOL19:
5082              decode_opc_mxu__pool19(ctx);
5083              break;
5084          case OPC_MXU__POOL20:
5085              decode_opc_mxu__pool20(ctx);
5086              break;
5087          case OPC_MXU__POOL21:
5088              decode_opc_mxu__pool21(ctx);
5089              break;
5090          case OPC_MXU_Q16SCOP:
5091              gen_mxu_q16scop(ctx);
5092              break;
5093          case OPC_MXU_Q8MADL:
5094              gen_mxu_q8madl(ctx);
5095              break;
5096          case OPC_MXU_S32SFL:
5097              gen_mxu_s32sfl(ctx);
5098              break;
5099          case OPC_MXU_Q8SAD:
5100              gen_mxu_q8sad(ctx);
5101              break;
5102          default:
5103              return false;
5104          }
5105  
5106          gen_set_label(l_exit);
5107      }
5108  
5109      return true;
5110  }
5111