xref: /openbmc/qemu/target/mips/tcg/msa_helper.c (revision 2df1eb27)
1 /*
2  * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
3  *
4  * Copyright (c) 2014 Imagination Technologies
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "tcg/tcg.h"
24 #include "exec/exec-all.h"
25 #include "exec/cpu_ldst.h"
26 #include "exec/helper-proto.h"
27 #include "exec/memop.h"
28 #include "fpu/softfloat.h"
29 #include "fpu_helper.h"
30 
31 /* Data format min and max values */
32 #define DF_BITS(df) (1 << ((df) + 3))
33 
34 #define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
35 #define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
36 
37 #define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
38 #define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
39 
40 #define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
41 #define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
42 
43 #define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
44 #define SIGNED(x, df)                                                   \
45     ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
46 
47 /* Element-by-element access macros */
48 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
49 
50 
51 
52 /*
53  * Bit Count
54  * ---------
55  *
56  * +---------------+----------------------------------------------------------+
57  * | NLOC.B        | Vector Leading Ones Count (byte)                         |
58  * | NLOC.H        | Vector Leading Ones Count (halfword)                     |
59  * | NLOC.W        | Vector Leading Ones Count (word)                         |
60  * | NLOC.D        | Vector Leading Ones Count (doubleword)                   |
61  * | NLZC.B        | Vector Leading Zeros Count (byte)                        |
62  * | NLZC.H        | Vector Leading Zeros Count (halfword)                    |
63  * | NLZC.W        | Vector Leading Zeros Count (word)                        |
64  * | NLZC.D        | Vector Leading Zeros Count (doubleword)                  |
65  * | PCNT.B        | Vector Population Count (byte)                           |
66  * | PCNT.H        | Vector Population Count (halfword)                       |
67  * | PCNT.W        | Vector Population Count (word)                           |
68  * | PCNT.D        | Vector Population Count (doubleword)                     |
69  * +---------------+----------------------------------------------------------+
70  */
71 
72 static inline int64_t msa_nlzc_df(uint32_t df, int64_t arg)
73 {
74     uint64_t x, y;
75     int n, c;
76 
77     x = UNSIGNED(arg, df);
78     n = DF_BITS(df);
79     c = DF_BITS(df) / 2;
80 
81     do {
82         y = x >> c;
83         if (y != 0) {
84             n = n - c;
85             x = y;
86         }
87         c = c >> 1;
88     } while (c != 0);
89 
90     return n - x;
91 }
92 
93 static inline int64_t msa_nloc_df(uint32_t df, int64_t arg)
94 {
95     return msa_nlzc_df(df, UNSIGNED((~arg), df));
96 }
97 
98 void helper_msa_nloc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
99 {
100     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
101     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
102 
103     pwd->b[0]  = msa_nloc_df(DF_BYTE, pws->b[0]);
104     pwd->b[1]  = msa_nloc_df(DF_BYTE, pws->b[1]);
105     pwd->b[2]  = msa_nloc_df(DF_BYTE, pws->b[2]);
106     pwd->b[3]  = msa_nloc_df(DF_BYTE, pws->b[3]);
107     pwd->b[4]  = msa_nloc_df(DF_BYTE, pws->b[4]);
108     pwd->b[5]  = msa_nloc_df(DF_BYTE, pws->b[5]);
109     pwd->b[6]  = msa_nloc_df(DF_BYTE, pws->b[6]);
110     pwd->b[7]  = msa_nloc_df(DF_BYTE, pws->b[7]);
111     pwd->b[8]  = msa_nloc_df(DF_BYTE, pws->b[8]);
112     pwd->b[9]  = msa_nloc_df(DF_BYTE, pws->b[9]);
113     pwd->b[10] = msa_nloc_df(DF_BYTE, pws->b[10]);
114     pwd->b[11] = msa_nloc_df(DF_BYTE, pws->b[11]);
115     pwd->b[12] = msa_nloc_df(DF_BYTE, pws->b[12]);
116     pwd->b[13] = msa_nloc_df(DF_BYTE, pws->b[13]);
117     pwd->b[14] = msa_nloc_df(DF_BYTE, pws->b[14]);
118     pwd->b[15] = msa_nloc_df(DF_BYTE, pws->b[15]);
119 }
120 
121 void helper_msa_nloc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
122 {
123     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
124     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
125 
126     pwd->h[0]  = msa_nloc_df(DF_HALF, pws->h[0]);
127     pwd->h[1]  = msa_nloc_df(DF_HALF, pws->h[1]);
128     pwd->h[2]  = msa_nloc_df(DF_HALF, pws->h[2]);
129     pwd->h[3]  = msa_nloc_df(DF_HALF, pws->h[3]);
130     pwd->h[4]  = msa_nloc_df(DF_HALF, pws->h[4]);
131     pwd->h[5]  = msa_nloc_df(DF_HALF, pws->h[5]);
132     pwd->h[6]  = msa_nloc_df(DF_HALF, pws->h[6]);
133     pwd->h[7]  = msa_nloc_df(DF_HALF, pws->h[7]);
134 }
135 
136 void helper_msa_nloc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
137 {
138     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
139     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
140 
141     pwd->w[0]  = msa_nloc_df(DF_WORD, pws->w[0]);
142     pwd->w[1]  = msa_nloc_df(DF_WORD, pws->w[1]);
143     pwd->w[2]  = msa_nloc_df(DF_WORD, pws->w[2]);
144     pwd->w[3]  = msa_nloc_df(DF_WORD, pws->w[3]);
145 }
146 
147 void helper_msa_nloc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
148 {
149     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
150     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
151 
152     pwd->d[0]  = msa_nloc_df(DF_DOUBLE, pws->d[0]);
153     pwd->d[1]  = msa_nloc_df(DF_DOUBLE, pws->d[1]);
154 }
155 
156 void helper_msa_nlzc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
157 {
158     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
159     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
160 
161     pwd->b[0]  = msa_nlzc_df(DF_BYTE, pws->b[0]);
162     pwd->b[1]  = msa_nlzc_df(DF_BYTE, pws->b[1]);
163     pwd->b[2]  = msa_nlzc_df(DF_BYTE, pws->b[2]);
164     pwd->b[3]  = msa_nlzc_df(DF_BYTE, pws->b[3]);
165     pwd->b[4]  = msa_nlzc_df(DF_BYTE, pws->b[4]);
166     pwd->b[5]  = msa_nlzc_df(DF_BYTE, pws->b[5]);
167     pwd->b[6]  = msa_nlzc_df(DF_BYTE, pws->b[6]);
168     pwd->b[7]  = msa_nlzc_df(DF_BYTE, pws->b[7]);
169     pwd->b[8]  = msa_nlzc_df(DF_BYTE, pws->b[8]);
170     pwd->b[9]  = msa_nlzc_df(DF_BYTE, pws->b[9]);
171     pwd->b[10] = msa_nlzc_df(DF_BYTE, pws->b[10]);
172     pwd->b[11] = msa_nlzc_df(DF_BYTE, pws->b[11]);
173     pwd->b[12] = msa_nlzc_df(DF_BYTE, pws->b[12]);
174     pwd->b[13] = msa_nlzc_df(DF_BYTE, pws->b[13]);
175     pwd->b[14] = msa_nlzc_df(DF_BYTE, pws->b[14]);
176     pwd->b[15] = msa_nlzc_df(DF_BYTE, pws->b[15]);
177 }
178 
179 void helper_msa_nlzc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
180 {
181     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
182     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
183 
184     pwd->h[0]  = msa_nlzc_df(DF_HALF, pws->h[0]);
185     pwd->h[1]  = msa_nlzc_df(DF_HALF, pws->h[1]);
186     pwd->h[2]  = msa_nlzc_df(DF_HALF, pws->h[2]);
187     pwd->h[3]  = msa_nlzc_df(DF_HALF, pws->h[3]);
188     pwd->h[4]  = msa_nlzc_df(DF_HALF, pws->h[4]);
189     pwd->h[5]  = msa_nlzc_df(DF_HALF, pws->h[5]);
190     pwd->h[6]  = msa_nlzc_df(DF_HALF, pws->h[6]);
191     pwd->h[7]  = msa_nlzc_df(DF_HALF, pws->h[7]);
192 }
193 
194 void helper_msa_nlzc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
195 {
196     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
197     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
198 
199     pwd->w[0]  = msa_nlzc_df(DF_WORD, pws->w[0]);
200     pwd->w[1]  = msa_nlzc_df(DF_WORD, pws->w[1]);
201     pwd->w[2]  = msa_nlzc_df(DF_WORD, pws->w[2]);
202     pwd->w[3]  = msa_nlzc_df(DF_WORD, pws->w[3]);
203 }
204 
205 void helper_msa_nlzc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
206 {
207     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
208     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
209 
210     pwd->d[0]  = msa_nlzc_df(DF_DOUBLE, pws->d[0]);
211     pwd->d[1]  = msa_nlzc_df(DF_DOUBLE, pws->d[1]);
212 }
213 
214 static inline int64_t msa_pcnt_df(uint32_t df, int64_t arg)
215 {
216     uint64_t x;
217 
218     x = UNSIGNED(arg, df);
219 
220     x = (x & 0x5555555555555555ULL) + ((x >>  1) & 0x5555555555555555ULL);
221     x = (x & 0x3333333333333333ULL) + ((x >>  2) & 0x3333333333333333ULL);
222     x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >>  4) & 0x0F0F0F0F0F0F0F0FULL);
223     x = (x & 0x00FF00FF00FF00FFULL) + ((x >>  8) & 0x00FF00FF00FF00FFULL);
224     x = (x & 0x0000FFFF0000FFFFULL) + ((x >> 16) & 0x0000FFFF0000FFFFULL);
225     x = (x & 0x00000000FFFFFFFFULL) + ((x >> 32));
226 
227     return x;
228 }
229 
230 void helper_msa_pcnt_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
231 {
232     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
233     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
234 
235     pwd->b[0]  = msa_pcnt_df(DF_BYTE, pws->b[0]);
236     pwd->b[1]  = msa_pcnt_df(DF_BYTE, pws->b[1]);
237     pwd->b[2]  = msa_pcnt_df(DF_BYTE, pws->b[2]);
238     pwd->b[3]  = msa_pcnt_df(DF_BYTE, pws->b[3]);
239     pwd->b[4]  = msa_pcnt_df(DF_BYTE, pws->b[4]);
240     pwd->b[5]  = msa_pcnt_df(DF_BYTE, pws->b[5]);
241     pwd->b[6]  = msa_pcnt_df(DF_BYTE, pws->b[6]);
242     pwd->b[7]  = msa_pcnt_df(DF_BYTE, pws->b[7]);
243     pwd->b[8]  = msa_pcnt_df(DF_BYTE, pws->b[8]);
244     pwd->b[9]  = msa_pcnt_df(DF_BYTE, pws->b[9]);
245     pwd->b[10] = msa_pcnt_df(DF_BYTE, pws->b[10]);
246     pwd->b[11] = msa_pcnt_df(DF_BYTE, pws->b[11]);
247     pwd->b[12] = msa_pcnt_df(DF_BYTE, pws->b[12]);
248     pwd->b[13] = msa_pcnt_df(DF_BYTE, pws->b[13]);
249     pwd->b[14] = msa_pcnt_df(DF_BYTE, pws->b[14]);
250     pwd->b[15] = msa_pcnt_df(DF_BYTE, pws->b[15]);
251 }
252 
253 void helper_msa_pcnt_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
254 {
255     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
256     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
257 
258     pwd->h[0]  = msa_pcnt_df(DF_HALF, pws->h[0]);
259     pwd->h[1]  = msa_pcnt_df(DF_HALF, pws->h[1]);
260     pwd->h[2]  = msa_pcnt_df(DF_HALF, pws->h[2]);
261     pwd->h[3]  = msa_pcnt_df(DF_HALF, pws->h[3]);
262     pwd->h[4]  = msa_pcnt_df(DF_HALF, pws->h[4]);
263     pwd->h[5]  = msa_pcnt_df(DF_HALF, pws->h[5]);
264     pwd->h[6]  = msa_pcnt_df(DF_HALF, pws->h[6]);
265     pwd->h[7]  = msa_pcnt_df(DF_HALF, pws->h[7]);
266 }
267 
268 void helper_msa_pcnt_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
269 {
270     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
271     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
272 
273     pwd->w[0]  = msa_pcnt_df(DF_WORD, pws->w[0]);
274     pwd->w[1]  = msa_pcnt_df(DF_WORD, pws->w[1]);
275     pwd->w[2]  = msa_pcnt_df(DF_WORD, pws->w[2]);
276     pwd->w[3]  = msa_pcnt_df(DF_WORD, pws->w[3]);
277 }
278 
279 void helper_msa_pcnt_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
280 {
281     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
282     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
283 
284     pwd->d[0]  = msa_pcnt_df(DF_DOUBLE, pws->d[0]);
285     pwd->d[1]  = msa_pcnt_df(DF_DOUBLE, pws->d[1]);
286 }
287 
288 
289 /*
290  * Bit Move
291  * --------
292  *
293  * +---------------+----------------------------------------------------------+
294  * | BINSL.B       | Vector Bit Insert Left (byte)                            |
295  * | BINSL.H       | Vector Bit Insert Left (halfword)                        |
296  * | BINSL.W       | Vector Bit Insert Left (word)                            |
297  * | BINSL.D       | Vector Bit Insert Left (doubleword)                      |
298  * | BINSR.B       | Vector Bit Insert Right (byte)                           |
299  * | BINSR.H       | Vector Bit Insert Right (halfword)                       |
300  * | BINSR.W       | Vector Bit Insert Right (word)                           |
301  * | BINSR.D       | Vector Bit Insert Right (doubleword)                     |
302  * | BMNZ.V        | Vector Bit Move If Not Zero                              |
303  * | BMZ.V         | Vector Bit Move If Zero                                  |
304  * | BSEL.V        | Vector Bit Select                                        |
305  * +---------------+----------------------------------------------------------+
306  */
307 
308 /* Data format bit position and unsigned values */
309 #define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
310 
311 static inline int64_t msa_binsl_df(uint32_t df,
312                                    int64_t dest, int64_t arg1, int64_t arg2)
313 {
314     uint64_t u_arg1 = UNSIGNED(arg1, df);
315     uint64_t u_dest = UNSIGNED(dest, df);
316     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
317     int32_t sh_a = DF_BITS(df) - sh_d;
318     if (sh_d == DF_BITS(df)) {
319         return u_arg1;
320     } else {
321         return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
322                UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
323     }
324 }
325 
326 void helper_msa_binsl_b(CPUMIPSState *env,
327                         uint32_t wd, uint32_t ws, uint32_t wt)
328 {
329     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
330     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
331     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
332 
333     pwd->b[0]  = msa_binsl_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
334     pwd->b[1]  = msa_binsl_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
335     pwd->b[2]  = msa_binsl_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
336     pwd->b[3]  = msa_binsl_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
337     pwd->b[4]  = msa_binsl_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
338     pwd->b[5]  = msa_binsl_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
339     pwd->b[6]  = msa_binsl_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
340     pwd->b[7]  = msa_binsl_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
341     pwd->b[8]  = msa_binsl_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
342     pwd->b[9]  = msa_binsl_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
343     pwd->b[10] = msa_binsl_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
344     pwd->b[11] = msa_binsl_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
345     pwd->b[12] = msa_binsl_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
346     pwd->b[13] = msa_binsl_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
347     pwd->b[14] = msa_binsl_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
348     pwd->b[15] = msa_binsl_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
349 }
350 
351 void helper_msa_binsl_h(CPUMIPSState *env,
352                         uint32_t wd, uint32_t ws, uint32_t wt)
353 {
354     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
355     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
356     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
357 
358     pwd->h[0]  = msa_binsl_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
359     pwd->h[1]  = msa_binsl_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
360     pwd->h[2]  = msa_binsl_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
361     pwd->h[3]  = msa_binsl_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
362     pwd->h[4]  = msa_binsl_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
363     pwd->h[5]  = msa_binsl_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
364     pwd->h[6]  = msa_binsl_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
365     pwd->h[7]  = msa_binsl_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
366 }
367 
368 void helper_msa_binsl_w(CPUMIPSState *env,
369                         uint32_t wd, uint32_t ws, uint32_t wt)
370 {
371     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
372     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
373     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
374 
375     pwd->w[0]  = msa_binsl_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
376     pwd->w[1]  = msa_binsl_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
377     pwd->w[2]  = msa_binsl_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
378     pwd->w[3]  = msa_binsl_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
379 }
380 
381 void helper_msa_binsl_d(CPUMIPSState *env,
382                         uint32_t wd, uint32_t ws, uint32_t wt)
383 {
384     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
385     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
386     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
387 
388     pwd->d[0]  = msa_binsl_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
389     pwd->d[1]  = msa_binsl_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
390 }
391 
392 static inline int64_t msa_binsr_df(uint32_t df,
393                                    int64_t dest, int64_t arg1, int64_t arg2)
394 {
395     uint64_t u_arg1 = UNSIGNED(arg1, df);
396     uint64_t u_dest = UNSIGNED(dest, df);
397     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
398     int32_t sh_a = DF_BITS(df) - sh_d;
399     if (sh_d == DF_BITS(df)) {
400         return u_arg1;
401     } else {
402         return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
403                UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
404     }
405 }
406 
407 void helper_msa_binsr_b(CPUMIPSState *env,
408                         uint32_t wd, uint32_t ws, uint32_t wt)
409 {
410     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
411     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
412     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
413 
414     pwd->b[0]  = msa_binsr_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
415     pwd->b[1]  = msa_binsr_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
416     pwd->b[2]  = msa_binsr_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
417     pwd->b[3]  = msa_binsr_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
418     pwd->b[4]  = msa_binsr_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
419     pwd->b[5]  = msa_binsr_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
420     pwd->b[6]  = msa_binsr_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
421     pwd->b[7]  = msa_binsr_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
422     pwd->b[8]  = msa_binsr_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
423     pwd->b[9]  = msa_binsr_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
424     pwd->b[10] = msa_binsr_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
425     pwd->b[11] = msa_binsr_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
426     pwd->b[12] = msa_binsr_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
427     pwd->b[13] = msa_binsr_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
428     pwd->b[14] = msa_binsr_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
429     pwd->b[15] = msa_binsr_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
430 }
431 
432 void helper_msa_binsr_h(CPUMIPSState *env,
433                         uint32_t wd, uint32_t ws, uint32_t wt)
434 {
435     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
436     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
437     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
438 
439     pwd->h[0]  = msa_binsr_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
440     pwd->h[1]  = msa_binsr_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
441     pwd->h[2]  = msa_binsr_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
442     pwd->h[3]  = msa_binsr_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
443     pwd->h[4]  = msa_binsr_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
444     pwd->h[5]  = msa_binsr_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
445     pwd->h[6]  = msa_binsr_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
446     pwd->h[7]  = msa_binsr_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
447 }
448 
449 void helper_msa_binsr_w(CPUMIPSState *env,
450                         uint32_t wd, uint32_t ws, uint32_t wt)
451 {
452     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
453     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
454     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
455 
456     pwd->w[0]  = msa_binsr_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
457     pwd->w[1]  = msa_binsr_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
458     pwd->w[2]  = msa_binsr_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
459     pwd->w[3]  = msa_binsr_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
460 }
461 
462 void helper_msa_binsr_d(CPUMIPSState *env,
463                         uint32_t wd, uint32_t ws, uint32_t wt)
464 {
465     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
466     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
467     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
468 
469     pwd->d[0]  = msa_binsr_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
470     pwd->d[1]  = msa_binsr_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
471 }
472 
473 void helper_msa_bmnz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
474 {
475     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
476     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
477     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
478 
479     pwd->d[0] = UNSIGNED(                                                     \
480         ((pwd->d[0] & (~pwt->d[0])) | (pws->d[0] & pwt->d[0])), DF_DOUBLE);
481     pwd->d[1] = UNSIGNED(                                                     \
482         ((pwd->d[1] & (~pwt->d[1])) | (pws->d[1] & pwt->d[1])), DF_DOUBLE);
483 }
484 
485 void helper_msa_bmz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
486 {
487     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
488     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
489     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
490 
491     pwd->d[0] = UNSIGNED(                                                     \
492         ((pwd->d[0] & pwt->d[0]) | (pws->d[0] & (~pwt->d[0]))), DF_DOUBLE);
493     pwd->d[1] = UNSIGNED(                                                     \
494         ((pwd->d[1] & pwt->d[1]) | (pws->d[1] & (~pwt->d[1]))), DF_DOUBLE);
495 }
496 
497 void helper_msa_bsel_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
498 {
499     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
500     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
501     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
502 
503     pwd->d[0] = UNSIGNED(                                                     \
504         (pws->d[0] & (~pwd->d[0])) | (pwt->d[0] & pwd->d[0]), DF_DOUBLE);
505     pwd->d[1] = UNSIGNED(                                                     \
506         (pws->d[1] & (~pwd->d[1])) | (pwt->d[1] & pwd->d[1]), DF_DOUBLE);
507 }
508 
509 
510 /*
511  * Bit Set
512  * -------
513  *
514  * +---------------+----------------------------------------------------------+
515  * | BCLR.B        | Vector Bit Clear (byte)                                  |
516  * | BCLR.H        | Vector Bit Clear (halfword)                              |
517  * | BCLR.W        | Vector Bit Clear (word)                                  |
518  * | BCLR.D        | Vector Bit Clear (doubleword)                            |
519  * | BNEG.B        | Vector Bit Negate (byte)                                 |
520  * | BNEG.H        | Vector Bit Negate (halfword)                             |
521  * | BNEG.W        | Vector Bit Negate (word)                                 |
522  * | BNEG.D        | Vector Bit Negate (doubleword)                           |
523  * | BSET.B        | Vector Bit Set (byte)                                    |
524  * | BSET.H        | Vector Bit Set (halfword)                                |
525  * | BSET.W        | Vector Bit Set (word)                                    |
526  * | BSET.D        | Vector Bit Set (doubleword)                              |
527  * +---------------+----------------------------------------------------------+
528  */
529 
530 static inline int64_t msa_bclr_df(uint32_t df, int64_t arg1, int64_t arg2)
531 {
532     int32_t b_arg2 = BIT_POSITION(arg2, df);
533     return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
534 }
535 
536 void helper_msa_bclr_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
537 {
538     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
539     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
540     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
541 
542     pwd->b[0]  = msa_bclr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
543     pwd->b[1]  = msa_bclr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
544     pwd->b[2]  = msa_bclr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
545     pwd->b[3]  = msa_bclr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
546     pwd->b[4]  = msa_bclr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
547     pwd->b[5]  = msa_bclr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
548     pwd->b[6]  = msa_bclr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
549     pwd->b[7]  = msa_bclr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
550     pwd->b[8]  = msa_bclr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
551     pwd->b[9]  = msa_bclr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
552     pwd->b[10] = msa_bclr_df(DF_BYTE, pws->b[10], pwt->b[10]);
553     pwd->b[11] = msa_bclr_df(DF_BYTE, pws->b[11], pwt->b[11]);
554     pwd->b[12] = msa_bclr_df(DF_BYTE, pws->b[12], pwt->b[12]);
555     pwd->b[13] = msa_bclr_df(DF_BYTE, pws->b[13], pwt->b[13]);
556     pwd->b[14] = msa_bclr_df(DF_BYTE, pws->b[14], pwt->b[14]);
557     pwd->b[15] = msa_bclr_df(DF_BYTE, pws->b[15], pwt->b[15]);
558 }
559 
560 void helper_msa_bclr_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
561 {
562     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
563     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
564     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
565 
566     pwd->h[0]  = msa_bclr_df(DF_HALF, pws->h[0],  pwt->h[0]);
567     pwd->h[1]  = msa_bclr_df(DF_HALF, pws->h[1],  pwt->h[1]);
568     pwd->h[2]  = msa_bclr_df(DF_HALF, pws->h[2],  pwt->h[2]);
569     pwd->h[3]  = msa_bclr_df(DF_HALF, pws->h[3],  pwt->h[3]);
570     pwd->h[4]  = msa_bclr_df(DF_HALF, pws->h[4],  pwt->h[4]);
571     pwd->h[5]  = msa_bclr_df(DF_HALF, pws->h[5],  pwt->h[5]);
572     pwd->h[6]  = msa_bclr_df(DF_HALF, pws->h[6],  pwt->h[6]);
573     pwd->h[7]  = msa_bclr_df(DF_HALF, pws->h[7],  pwt->h[7]);
574 }
575 
576 void helper_msa_bclr_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
577 {
578     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
579     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
580     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
581 
582     pwd->w[0]  = msa_bclr_df(DF_WORD, pws->w[0],  pwt->w[0]);
583     pwd->w[1]  = msa_bclr_df(DF_WORD, pws->w[1],  pwt->w[1]);
584     pwd->w[2]  = msa_bclr_df(DF_WORD, pws->w[2],  pwt->w[2]);
585     pwd->w[3]  = msa_bclr_df(DF_WORD, pws->w[3],  pwt->w[3]);
586 }
587 
588 void helper_msa_bclr_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
589 {
590     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
591     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
592     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
593 
594     pwd->d[0]  = msa_bclr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
595     pwd->d[1]  = msa_bclr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
596 }
597 
598 static inline int64_t msa_bneg_df(uint32_t df, int64_t arg1, int64_t arg2)
599 {
600     int32_t b_arg2 = BIT_POSITION(arg2, df);
601     return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
602 }
603 
604 void helper_msa_bneg_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
605 {
606     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
607     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
608     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
609 
610     pwd->b[0]  = msa_bneg_df(DF_BYTE, pws->b[0],  pwt->b[0]);
611     pwd->b[1]  = msa_bneg_df(DF_BYTE, pws->b[1],  pwt->b[1]);
612     pwd->b[2]  = msa_bneg_df(DF_BYTE, pws->b[2],  pwt->b[2]);
613     pwd->b[3]  = msa_bneg_df(DF_BYTE, pws->b[3],  pwt->b[3]);
614     pwd->b[4]  = msa_bneg_df(DF_BYTE, pws->b[4],  pwt->b[4]);
615     pwd->b[5]  = msa_bneg_df(DF_BYTE, pws->b[5],  pwt->b[5]);
616     pwd->b[6]  = msa_bneg_df(DF_BYTE, pws->b[6],  pwt->b[6]);
617     pwd->b[7]  = msa_bneg_df(DF_BYTE, pws->b[7],  pwt->b[7]);
618     pwd->b[8]  = msa_bneg_df(DF_BYTE, pws->b[8],  pwt->b[8]);
619     pwd->b[9]  = msa_bneg_df(DF_BYTE, pws->b[9],  pwt->b[9]);
620     pwd->b[10] = msa_bneg_df(DF_BYTE, pws->b[10], pwt->b[10]);
621     pwd->b[11] = msa_bneg_df(DF_BYTE, pws->b[11], pwt->b[11]);
622     pwd->b[12] = msa_bneg_df(DF_BYTE, pws->b[12], pwt->b[12]);
623     pwd->b[13] = msa_bneg_df(DF_BYTE, pws->b[13], pwt->b[13]);
624     pwd->b[14] = msa_bneg_df(DF_BYTE, pws->b[14], pwt->b[14]);
625     pwd->b[15] = msa_bneg_df(DF_BYTE, pws->b[15], pwt->b[15]);
626 }
627 
628 void helper_msa_bneg_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
629 {
630     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
631     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
632     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
633 
634     pwd->h[0]  = msa_bneg_df(DF_HALF, pws->h[0],  pwt->h[0]);
635     pwd->h[1]  = msa_bneg_df(DF_HALF, pws->h[1],  pwt->h[1]);
636     pwd->h[2]  = msa_bneg_df(DF_HALF, pws->h[2],  pwt->h[2]);
637     pwd->h[3]  = msa_bneg_df(DF_HALF, pws->h[3],  pwt->h[3]);
638     pwd->h[4]  = msa_bneg_df(DF_HALF, pws->h[4],  pwt->h[4]);
639     pwd->h[5]  = msa_bneg_df(DF_HALF, pws->h[5],  pwt->h[5]);
640     pwd->h[6]  = msa_bneg_df(DF_HALF, pws->h[6],  pwt->h[6]);
641     pwd->h[7]  = msa_bneg_df(DF_HALF, pws->h[7],  pwt->h[7]);
642 }
643 
644 void helper_msa_bneg_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
645 {
646     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
647     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
648     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
649 
650     pwd->w[0]  = msa_bneg_df(DF_WORD, pws->w[0],  pwt->w[0]);
651     pwd->w[1]  = msa_bneg_df(DF_WORD, pws->w[1],  pwt->w[1]);
652     pwd->w[2]  = msa_bneg_df(DF_WORD, pws->w[2],  pwt->w[2]);
653     pwd->w[3]  = msa_bneg_df(DF_WORD, pws->w[3],  pwt->w[3]);
654 }
655 
656 void helper_msa_bneg_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
657 {
658     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
659     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
660     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
661 
662     pwd->d[0]  = msa_bneg_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
663     pwd->d[1]  = msa_bneg_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
664 }
665 
666 static inline int64_t msa_bset_df(uint32_t df, int64_t arg1,
667         int64_t arg2)
668 {
669     int32_t b_arg2 = BIT_POSITION(arg2, df);
670     return UNSIGNED(arg1 | (1LL << b_arg2), df);
671 }
672 
673 void helper_msa_bset_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
674 {
675     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
676     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
677     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
678 
679     pwd->b[0]  = msa_bset_df(DF_BYTE, pws->b[0],  pwt->b[0]);
680     pwd->b[1]  = msa_bset_df(DF_BYTE, pws->b[1],  pwt->b[1]);
681     pwd->b[2]  = msa_bset_df(DF_BYTE, pws->b[2],  pwt->b[2]);
682     pwd->b[3]  = msa_bset_df(DF_BYTE, pws->b[3],  pwt->b[3]);
683     pwd->b[4]  = msa_bset_df(DF_BYTE, pws->b[4],  pwt->b[4]);
684     pwd->b[5]  = msa_bset_df(DF_BYTE, pws->b[5],  pwt->b[5]);
685     pwd->b[6]  = msa_bset_df(DF_BYTE, pws->b[6],  pwt->b[6]);
686     pwd->b[7]  = msa_bset_df(DF_BYTE, pws->b[7],  pwt->b[7]);
687     pwd->b[8]  = msa_bset_df(DF_BYTE, pws->b[8],  pwt->b[8]);
688     pwd->b[9]  = msa_bset_df(DF_BYTE, pws->b[9],  pwt->b[9]);
689     pwd->b[10] = msa_bset_df(DF_BYTE, pws->b[10], pwt->b[10]);
690     pwd->b[11] = msa_bset_df(DF_BYTE, pws->b[11], pwt->b[11]);
691     pwd->b[12] = msa_bset_df(DF_BYTE, pws->b[12], pwt->b[12]);
692     pwd->b[13] = msa_bset_df(DF_BYTE, pws->b[13], pwt->b[13]);
693     pwd->b[14] = msa_bset_df(DF_BYTE, pws->b[14], pwt->b[14]);
694     pwd->b[15] = msa_bset_df(DF_BYTE, pws->b[15], pwt->b[15]);
695 }
696 
697 void helper_msa_bset_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
698 {
699     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
700     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
701     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
702 
703     pwd->h[0]  = msa_bset_df(DF_HALF, pws->h[0],  pwt->h[0]);
704     pwd->h[1]  = msa_bset_df(DF_HALF, pws->h[1],  pwt->h[1]);
705     pwd->h[2]  = msa_bset_df(DF_HALF, pws->h[2],  pwt->h[2]);
706     pwd->h[3]  = msa_bset_df(DF_HALF, pws->h[3],  pwt->h[3]);
707     pwd->h[4]  = msa_bset_df(DF_HALF, pws->h[4],  pwt->h[4]);
708     pwd->h[5]  = msa_bset_df(DF_HALF, pws->h[5],  pwt->h[5]);
709     pwd->h[6]  = msa_bset_df(DF_HALF, pws->h[6],  pwt->h[6]);
710     pwd->h[7]  = msa_bset_df(DF_HALF, pws->h[7],  pwt->h[7]);
711 }
712 
713 void helper_msa_bset_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
714 {
715     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
716     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
717     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
718 
719     pwd->w[0]  = msa_bset_df(DF_WORD, pws->w[0],  pwt->w[0]);
720     pwd->w[1]  = msa_bset_df(DF_WORD, pws->w[1],  pwt->w[1]);
721     pwd->w[2]  = msa_bset_df(DF_WORD, pws->w[2],  pwt->w[2]);
722     pwd->w[3]  = msa_bset_df(DF_WORD, pws->w[3],  pwt->w[3]);
723 }
724 
725 void helper_msa_bset_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
726 {
727     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
728     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
729     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
730 
731     pwd->d[0]  = msa_bset_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
732     pwd->d[1]  = msa_bset_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
733 }
734 
735 
736 /*
737  * Fixed Multiply
738  * --------------
739  *
740  * +---------------+----------------------------------------------------------+
741  * | MADD_Q.H      | Vector Fixed-Point Multiply and Add (halfword)           |
742  * | MADD_Q.W      | Vector Fixed-Point Multiply and Add (word)               |
743  * | MADDR_Q.H     | Vector Fixed-Point Multiply and Add Rounded (halfword)   |
744  * | MADDR_Q.W     | Vector Fixed-Point Multiply and Add Rounded (word)       |
745  * | MSUB_Q.H      | Vector Fixed-Point Multiply and Subtr. (halfword)        |
746  * | MSUB_Q.W      | Vector Fixed-Point Multiply and Subtr. (word)            |
747  * | MSUBR_Q.H     | Vector Fixed-Point Multiply and Subtr. Rounded (halfword)|
748  * | MSUBR_Q.W     | Vector Fixed-Point Multiply and Subtr. Rounded (word)    |
749  * | MUL_Q.H       | Vector Fixed-Point Multiply (halfword)                   |
750  * | MUL_Q.W       | Vector Fixed-Point Multiply (word)                       |
751  * | MULR_Q.H      | Vector Fixed-Point Multiply Rounded (halfword)           |
752  * | MULR_Q.W      | Vector Fixed-Point Multiply Rounded (word)               |
753  * +---------------+----------------------------------------------------------+
754  */
755 
756 /* TODO: insert Fixed Multiply group helpers here */
757 
758 
759 /*
760  * Float Max Min
761  * -------------
762  *
763  * +---------------+----------------------------------------------------------+
764  * | FMAX_A.W      | Vector Floating-Point Maximum (Absolute) (word)          |
765  * | FMAX_A.D      | Vector Floating-Point Maximum (Absolute) (doubleword)    |
766  * | FMAX.W        | Vector Floating-Point Maximum (word)                     |
767  * | FMAX.D        | Vector Floating-Point Maximum (doubleword)               |
768  * | FMIN_A.W      | Vector Floating-Point Minimum (Absolute) (word)          |
769  * | FMIN_A.D      | Vector Floating-Point Minimum (Absolute) (doubleword)    |
770  * | FMIN.W        | Vector Floating-Point Minimum (word)                     |
771  * | FMIN.D        | Vector Floating-Point Minimum (doubleword)               |
772  * +---------------+----------------------------------------------------------+
773  */
774 
775 /* TODO: insert Float Max Min group helpers here */
776 
777 
778 /*
779  * Int Add
780  * -------
781  *
782  * +---------------+----------------------------------------------------------+
783  * | ADD_A.B       | Vector Add Absolute Values (byte)                        |
784  * | ADD_A.H       | Vector Add Absolute Values (halfword)                    |
785  * | ADD_A.W       | Vector Add Absolute Values (word)                        |
786  * | ADD_A.D       | Vector Add Absolute Values (doubleword)                  |
787  * | ADDS_A.B      | Vector Signed Saturated Add (of Absolute) (byte)         |
788  * | ADDS_A.H      | Vector Signed Saturated Add (of Absolute) (halfword)     |
789  * | ADDS_A.W      | Vector Signed Saturated Add (of Absolute) (word)         |
790  * | ADDS_A.D      | Vector Signed Saturated Add (of Absolute) (doubleword)   |
791  * | ADDS_S.B      | Vector Signed Saturated Add (of Signed) (byte)           |
792  * | ADDS_S.H      | Vector Signed Saturated Add (of Signed) (halfword)       |
793  * | ADDS_S.W      | Vector Signed Saturated Add (of Signed) (word)           |
794  * | ADDS_S.D      | Vector Signed Saturated Add (of Signed) (doubleword)     |
795  * | ADDS_U.B      | Vector Unsigned Saturated Add (of Unsigned) (byte)       |
796  * | ADDS_U.H      | Vector Unsigned Saturated Add (of Unsigned) (halfword)   |
797  * | ADDS_U.W      | Vector Unsigned Saturated Add (of Unsigned) (word)       |
798  * | ADDS_U.D      | Vector Unsigned Saturated Add (of Unsigned) (doubleword) |
799  * | ADDV.B        | Vector Add (byte)                                        |
800  * | ADDV.H        | Vector Add (halfword)                                    |
801  * | ADDV.W        | Vector Add (word)                                        |
802  * | ADDV.D        | Vector Add (doubleword)                                  |
803  * | HADD_S.H      | Vector Signed Horizontal Add (halfword)                  |
804  * | HADD_S.W      | Vector Signed Horizontal Add (word)                      |
805  * | HADD_S.D      | Vector Signed Horizontal Add (doubleword)                |
806  * | HADD_U.H      | Vector Unsigned Horizontal Add (halfword)                |
807  * | HADD_U.W      | Vector Unsigned Horizontal Add (word)                    |
808  * | HADD_U.D      | Vector Unsigned Horizontal Add (doubleword)              |
809  * +---------------+----------------------------------------------------------+
810  */
811 
812 
813 static inline int64_t msa_add_a_df(uint32_t df, int64_t arg1, int64_t arg2)
814 {
815     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
816     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
817     return abs_arg1 + abs_arg2;
818 }
819 
820 void helper_msa_add_a_b(CPUMIPSState *env,
821                         uint32_t wd, uint32_t ws, uint32_t wt)
822 {
823     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
824     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
825     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
826 
827     pwd->b[0]  = msa_add_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
828     pwd->b[1]  = msa_add_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
829     pwd->b[2]  = msa_add_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
830     pwd->b[3]  = msa_add_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
831     pwd->b[4]  = msa_add_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
832     pwd->b[5]  = msa_add_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
833     pwd->b[6]  = msa_add_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
834     pwd->b[7]  = msa_add_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
835     pwd->b[8]  = msa_add_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
836     pwd->b[9]  = msa_add_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
837     pwd->b[10] = msa_add_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
838     pwd->b[11] = msa_add_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
839     pwd->b[12] = msa_add_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
840     pwd->b[13] = msa_add_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
841     pwd->b[14] = msa_add_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
842     pwd->b[15] = msa_add_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
843 }
844 
845 void helper_msa_add_a_h(CPUMIPSState *env,
846                         uint32_t wd, uint32_t ws, uint32_t wt)
847 {
848     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
849     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
850     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
851 
852     pwd->h[0]  = msa_add_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
853     pwd->h[1]  = msa_add_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
854     pwd->h[2]  = msa_add_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
855     pwd->h[3]  = msa_add_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
856     pwd->h[4]  = msa_add_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
857     pwd->h[5]  = msa_add_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
858     pwd->h[6]  = msa_add_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
859     pwd->h[7]  = msa_add_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
860 }
861 
862 void helper_msa_add_a_w(CPUMIPSState *env,
863                         uint32_t wd, uint32_t ws, uint32_t wt)
864 {
865     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
866     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
867     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
868 
869     pwd->w[0]  = msa_add_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
870     pwd->w[1]  = msa_add_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
871     pwd->w[2]  = msa_add_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
872     pwd->w[3]  = msa_add_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
873 }
874 
875 void helper_msa_add_a_d(CPUMIPSState *env,
876                         uint32_t wd, uint32_t ws, uint32_t wt)
877 {
878     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
879     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
880     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
881 
882     pwd->d[0]  = msa_add_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
883     pwd->d[1]  = msa_add_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
884 }
885 
886 
887 static inline int64_t msa_adds_a_df(uint32_t df, int64_t arg1, int64_t arg2)
888 {
889     uint64_t max_int = (uint64_t)DF_MAX_INT(df);
890     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
891     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
892     if (abs_arg1 > max_int || abs_arg2 > max_int) {
893         return (int64_t)max_int;
894     } else {
895         return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
896     }
897 }
898 
899 void helper_msa_adds_a_b(CPUMIPSState *env,
900                          uint32_t wd, uint32_t ws, uint32_t wt)
901 {
902     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
903     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
904     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
905 
906     pwd->b[0]  = msa_adds_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
907     pwd->b[1]  = msa_adds_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
908     pwd->b[2]  = msa_adds_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
909     pwd->b[3]  = msa_adds_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
910     pwd->b[4]  = msa_adds_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
911     pwd->b[5]  = msa_adds_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
912     pwd->b[6]  = msa_adds_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
913     pwd->b[7]  = msa_adds_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
914     pwd->b[8]  = msa_adds_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
915     pwd->b[9]  = msa_adds_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
916     pwd->b[10] = msa_adds_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
917     pwd->b[11] = msa_adds_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
918     pwd->b[12] = msa_adds_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
919     pwd->b[13] = msa_adds_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
920     pwd->b[14] = msa_adds_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
921     pwd->b[15] = msa_adds_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
922 }
923 
924 void helper_msa_adds_a_h(CPUMIPSState *env,
925                          uint32_t wd, uint32_t ws, uint32_t wt)
926 {
927     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
928     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
929     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
930 
931     pwd->h[0]  = msa_adds_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
932     pwd->h[1]  = msa_adds_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
933     pwd->h[2]  = msa_adds_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
934     pwd->h[3]  = msa_adds_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
935     pwd->h[4]  = msa_adds_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
936     pwd->h[5]  = msa_adds_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
937     pwd->h[6]  = msa_adds_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
938     pwd->h[7]  = msa_adds_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
939 }
940 
941 void helper_msa_adds_a_w(CPUMIPSState *env,
942                          uint32_t wd, uint32_t ws, uint32_t wt)
943 {
944     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
945     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
946     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
947 
948     pwd->w[0]  = msa_adds_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
949     pwd->w[1]  = msa_adds_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
950     pwd->w[2]  = msa_adds_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
951     pwd->w[3]  = msa_adds_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
952 }
953 
954 void helper_msa_adds_a_d(CPUMIPSState *env,
955                          uint32_t wd, uint32_t ws, uint32_t wt)
956 {
957     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
958     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
959     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
960 
961     pwd->d[0]  = msa_adds_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
962     pwd->d[1]  = msa_adds_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
963 }
964 
965 
966 static inline int64_t msa_adds_s_df(uint32_t df, int64_t arg1, int64_t arg2)
967 {
968     int64_t max_int = DF_MAX_INT(df);
969     int64_t min_int = DF_MIN_INT(df);
970     if (arg1 < 0) {
971         return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
972     } else {
973         return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
974     }
975 }
976 
977 void helper_msa_adds_s_b(CPUMIPSState *env,
978                          uint32_t wd, uint32_t ws, uint32_t wt)
979 {
980     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
981     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
982     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
983 
984     pwd->b[0]  = msa_adds_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
985     pwd->b[1]  = msa_adds_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
986     pwd->b[2]  = msa_adds_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
987     pwd->b[3]  = msa_adds_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
988     pwd->b[4]  = msa_adds_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
989     pwd->b[5]  = msa_adds_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
990     pwd->b[6]  = msa_adds_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
991     pwd->b[7]  = msa_adds_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
992     pwd->b[8]  = msa_adds_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
993     pwd->b[9]  = msa_adds_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
994     pwd->b[10] = msa_adds_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
995     pwd->b[11] = msa_adds_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
996     pwd->b[12] = msa_adds_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
997     pwd->b[13] = msa_adds_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
998     pwd->b[14] = msa_adds_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
999     pwd->b[15] = msa_adds_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1000 }
1001 
1002 void helper_msa_adds_s_h(CPUMIPSState *env,
1003                          uint32_t wd, uint32_t ws, uint32_t wt)
1004 {
1005     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1006     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1007     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1008 
1009     pwd->h[0]  = msa_adds_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1010     pwd->h[1]  = msa_adds_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1011     pwd->h[2]  = msa_adds_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1012     pwd->h[3]  = msa_adds_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1013     pwd->h[4]  = msa_adds_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1014     pwd->h[5]  = msa_adds_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1015     pwd->h[6]  = msa_adds_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1016     pwd->h[7]  = msa_adds_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1017 }
1018 
1019 void helper_msa_adds_s_w(CPUMIPSState *env,
1020                          uint32_t wd, uint32_t ws, uint32_t wt)
1021 {
1022     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1023     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1024     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1025 
1026     pwd->w[0]  = msa_adds_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1027     pwd->w[1]  = msa_adds_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1028     pwd->w[2]  = msa_adds_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1029     pwd->w[3]  = msa_adds_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1030 }
1031 
1032 void helper_msa_adds_s_d(CPUMIPSState *env,
1033                          uint32_t wd, uint32_t ws, uint32_t wt)
1034 {
1035     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1036     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1037     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1038 
1039     pwd->d[0]  = msa_adds_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1040     pwd->d[1]  = msa_adds_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1041 }
1042 
1043 
1044 static inline uint64_t msa_adds_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1045 {
1046     uint64_t max_uint = DF_MAX_UINT(df);
1047     uint64_t u_arg1 = UNSIGNED(arg1, df);
1048     uint64_t u_arg2 = UNSIGNED(arg2, df);
1049     return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
1050 }
1051 
1052 void helper_msa_adds_u_b(CPUMIPSState *env,
1053                          uint32_t wd, uint32_t ws, uint32_t wt)
1054 {
1055     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1056     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1057     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1058 
1059     pwd->b[0]  = msa_adds_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1060     pwd->b[1]  = msa_adds_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1061     pwd->b[2]  = msa_adds_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1062     pwd->b[3]  = msa_adds_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1063     pwd->b[4]  = msa_adds_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1064     pwd->b[5]  = msa_adds_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1065     pwd->b[6]  = msa_adds_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1066     pwd->b[7]  = msa_adds_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1067     pwd->b[8]  = msa_adds_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1068     pwd->b[9]  = msa_adds_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1069     pwd->b[10] = msa_adds_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1070     pwd->b[11] = msa_adds_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1071     pwd->b[12] = msa_adds_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1072     pwd->b[13] = msa_adds_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1073     pwd->b[14] = msa_adds_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1074     pwd->b[15] = msa_adds_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1075 }
1076 
1077 void helper_msa_adds_u_h(CPUMIPSState *env,
1078                          uint32_t wd, uint32_t ws, uint32_t wt)
1079 {
1080     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1081     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1082     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1083 
1084     pwd->h[0]  = msa_adds_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1085     pwd->h[1]  = msa_adds_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1086     pwd->h[2]  = msa_adds_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1087     pwd->h[3]  = msa_adds_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1088     pwd->h[4]  = msa_adds_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1089     pwd->h[5]  = msa_adds_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1090     pwd->h[6]  = msa_adds_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1091     pwd->h[7]  = msa_adds_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1092 }
1093 
1094 void helper_msa_adds_u_w(CPUMIPSState *env,
1095                          uint32_t wd, uint32_t ws, uint32_t wt)
1096 {
1097     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1098     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1099     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1100 
1101     pwd->w[0]  = msa_adds_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1102     pwd->w[1]  = msa_adds_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1103     pwd->w[2]  = msa_adds_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1104     pwd->w[3]  = msa_adds_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1105 }
1106 
1107 void helper_msa_adds_u_d(CPUMIPSState *env,
1108                          uint32_t wd, uint32_t ws, uint32_t wt)
1109 {
1110     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1111     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1112     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1113 
1114     pwd->d[0]  = msa_adds_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1115     pwd->d[1]  = msa_adds_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1116 }
1117 
1118 
1119 static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
1120 {
1121     return arg1 + arg2;
1122 }
1123 
1124 void helper_msa_addv_b(CPUMIPSState *env,
1125                        uint32_t wd, uint32_t ws, uint32_t wt)
1126 {
1127     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1128     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1129     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1130 
1131     pwd->b[0]  = msa_addv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1132     pwd->b[1]  = msa_addv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1133     pwd->b[2]  = msa_addv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1134     pwd->b[3]  = msa_addv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1135     pwd->b[4]  = msa_addv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1136     pwd->b[5]  = msa_addv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1137     pwd->b[6]  = msa_addv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1138     pwd->b[7]  = msa_addv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1139     pwd->b[8]  = msa_addv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1140     pwd->b[9]  = msa_addv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1141     pwd->b[10] = msa_addv_df(DF_BYTE, pws->b[10], pwt->b[10]);
1142     pwd->b[11] = msa_addv_df(DF_BYTE, pws->b[11], pwt->b[11]);
1143     pwd->b[12] = msa_addv_df(DF_BYTE, pws->b[12], pwt->b[12]);
1144     pwd->b[13] = msa_addv_df(DF_BYTE, pws->b[13], pwt->b[13]);
1145     pwd->b[14] = msa_addv_df(DF_BYTE, pws->b[14], pwt->b[14]);
1146     pwd->b[15] = msa_addv_df(DF_BYTE, pws->b[15], pwt->b[15]);
1147 }
1148 
1149 void helper_msa_addv_h(CPUMIPSState *env,
1150                        uint32_t wd, uint32_t ws, uint32_t wt)
1151 {
1152     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1153     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1154     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1155 
1156     pwd->h[0]  = msa_addv_df(DF_HALF, pws->h[0],  pwt->h[0]);
1157     pwd->h[1]  = msa_addv_df(DF_HALF, pws->h[1],  pwt->h[1]);
1158     pwd->h[2]  = msa_addv_df(DF_HALF, pws->h[2],  pwt->h[2]);
1159     pwd->h[3]  = msa_addv_df(DF_HALF, pws->h[3],  pwt->h[3]);
1160     pwd->h[4]  = msa_addv_df(DF_HALF, pws->h[4],  pwt->h[4]);
1161     pwd->h[5]  = msa_addv_df(DF_HALF, pws->h[5],  pwt->h[5]);
1162     pwd->h[6]  = msa_addv_df(DF_HALF, pws->h[6],  pwt->h[6]);
1163     pwd->h[7]  = msa_addv_df(DF_HALF, pws->h[7],  pwt->h[7]);
1164 }
1165 
1166 void helper_msa_addv_w(CPUMIPSState *env,
1167                        uint32_t wd, uint32_t ws, uint32_t wt)
1168 {
1169     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1170     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1171     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1172 
1173     pwd->w[0]  = msa_addv_df(DF_WORD, pws->w[0],  pwt->w[0]);
1174     pwd->w[1]  = msa_addv_df(DF_WORD, pws->w[1],  pwt->w[1]);
1175     pwd->w[2]  = msa_addv_df(DF_WORD, pws->w[2],  pwt->w[2]);
1176     pwd->w[3]  = msa_addv_df(DF_WORD, pws->w[3],  pwt->w[3]);
1177 }
1178 
1179 void helper_msa_addv_d(CPUMIPSState *env,
1180                        uint32_t wd, uint32_t ws, uint32_t wt)
1181 {
1182     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1183     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1184     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1185 
1186     pwd->d[0]  = msa_addv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1187     pwd->d[1]  = msa_addv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1188 }
1189 
1190 
1191 #define SIGNED_EVEN(a, df) \
1192         ((((int64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1193 
1194 #define UNSIGNED_EVEN(a, df) \
1195         ((((uint64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1196 
1197 #define SIGNED_ODD(a, df) \
1198         ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1199 
1200 #define UNSIGNED_ODD(a, df) \
1201         ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1202 
1203 
1204 static inline int64_t msa_hadd_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1205 {
1206     return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
1207 }
1208 
1209 void helper_msa_hadd_s_h(CPUMIPSState *env,
1210                          uint32_t wd, uint32_t ws, uint32_t wt)
1211 {
1212     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1213     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1214     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1215 
1216     pwd->h[0]  = msa_hadd_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1217     pwd->h[1]  = msa_hadd_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1218     pwd->h[2]  = msa_hadd_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1219     pwd->h[3]  = msa_hadd_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1220     pwd->h[4]  = msa_hadd_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1221     pwd->h[5]  = msa_hadd_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1222     pwd->h[6]  = msa_hadd_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1223     pwd->h[7]  = msa_hadd_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1224 }
1225 
1226 void helper_msa_hadd_s_w(CPUMIPSState *env,
1227                          uint32_t wd, uint32_t ws, uint32_t wt)
1228 {
1229     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1230     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1231     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1232 
1233     pwd->w[0]  = msa_hadd_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1234     pwd->w[1]  = msa_hadd_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1235     pwd->w[2]  = msa_hadd_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1236     pwd->w[3]  = msa_hadd_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1237 }
1238 
1239 void helper_msa_hadd_s_d(CPUMIPSState *env,
1240                          uint32_t wd, uint32_t ws, uint32_t wt)
1241 {
1242     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1243     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1244     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1245 
1246     pwd->d[0]  = msa_hadd_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1247     pwd->d[1]  = msa_hadd_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1248 }
1249 
1250 
1251 static inline int64_t msa_hadd_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1252 {
1253     return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
1254 }
1255 
1256 void helper_msa_hadd_u_h(CPUMIPSState *env,
1257                          uint32_t wd, uint32_t ws, uint32_t wt)
1258 {
1259     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1260     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1261     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1262 
1263     pwd->h[0]  = msa_hadd_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1264     pwd->h[1]  = msa_hadd_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1265     pwd->h[2]  = msa_hadd_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1266     pwd->h[3]  = msa_hadd_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1267     pwd->h[4]  = msa_hadd_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1268     pwd->h[5]  = msa_hadd_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1269     pwd->h[6]  = msa_hadd_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1270     pwd->h[7]  = msa_hadd_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1271 }
1272 
1273 void helper_msa_hadd_u_w(CPUMIPSState *env,
1274                          uint32_t wd, uint32_t ws, uint32_t wt)
1275 {
1276     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1277     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1278     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1279 
1280     pwd->w[0]  = msa_hadd_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1281     pwd->w[1]  = msa_hadd_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1282     pwd->w[2]  = msa_hadd_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1283     pwd->w[3]  = msa_hadd_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1284 }
1285 
1286 void helper_msa_hadd_u_d(CPUMIPSState *env,
1287                          uint32_t wd, uint32_t ws, uint32_t wt)
1288 {
1289     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1290     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1291     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1292 
1293     pwd->d[0]  = msa_hadd_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1294     pwd->d[1]  = msa_hadd_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1295 }
1296 
1297 
1298 /*
1299  * Int Average
1300  * -----------
1301  *
1302  * +---------------+----------------------------------------------------------+
1303  * | AVE_S.B       | Vector Signed Average (byte)                             |
1304  * | AVE_S.H       | Vector Signed Average (halfword)                         |
1305  * | AVE_S.W       | Vector Signed Average (word)                             |
1306  * | AVE_S.D       | Vector Signed Average (doubleword)                       |
1307  * | AVE_U.B       | Vector Unsigned Average (byte)                           |
1308  * | AVE_U.H       | Vector Unsigned Average (halfword)                       |
1309  * | AVE_U.W       | Vector Unsigned Average (word)                           |
1310  * | AVE_U.D       | Vector Unsigned Average (doubleword)                     |
1311  * | AVER_S.B      | Vector Signed Average Rounded (byte)                     |
1312  * | AVER_S.H      | Vector Signed Average Rounded (halfword)                 |
1313  * | AVER_S.W      | Vector Signed Average Rounded (word)                     |
1314  * | AVER_S.D      | Vector Signed Average Rounded (doubleword)               |
1315  * | AVER_U.B      | Vector Unsigned Average Rounded (byte)                   |
1316  * | AVER_U.H      | Vector Unsigned Average Rounded (halfword)               |
1317  * | AVER_U.W      | Vector Unsigned Average Rounded (word)                   |
1318  * | AVER_U.D      | Vector Unsigned Average Rounded (doubleword)             |
1319  * +---------------+----------------------------------------------------------+
1320  */
1321 
1322 static inline int64_t msa_ave_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1323 {
1324     /* signed shift */
1325     return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
1326 }
1327 
1328 void helper_msa_ave_s_b(CPUMIPSState *env,
1329                         uint32_t wd, uint32_t ws, uint32_t wt)
1330 {
1331     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1332     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1333     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1334 
1335     pwd->b[0]  = msa_ave_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1336     pwd->b[1]  = msa_ave_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1337     pwd->b[2]  = msa_ave_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1338     pwd->b[3]  = msa_ave_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1339     pwd->b[4]  = msa_ave_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1340     pwd->b[5]  = msa_ave_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1341     pwd->b[6]  = msa_ave_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1342     pwd->b[7]  = msa_ave_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1343     pwd->b[8]  = msa_ave_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1344     pwd->b[9]  = msa_ave_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1345     pwd->b[10] = msa_ave_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1346     pwd->b[11] = msa_ave_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1347     pwd->b[12] = msa_ave_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1348     pwd->b[13] = msa_ave_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1349     pwd->b[14] = msa_ave_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1350     pwd->b[15] = msa_ave_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1351 }
1352 
1353 void helper_msa_ave_s_h(CPUMIPSState *env,
1354                         uint32_t wd, uint32_t ws, uint32_t wt)
1355 {
1356     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1357     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1358     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1359 
1360     pwd->h[0]  = msa_ave_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1361     pwd->h[1]  = msa_ave_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1362     pwd->h[2]  = msa_ave_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1363     pwd->h[3]  = msa_ave_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1364     pwd->h[4]  = msa_ave_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1365     pwd->h[5]  = msa_ave_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1366     pwd->h[6]  = msa_ave_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1367     pwd->h[7]  = msa_ave_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1368 }
1369 
1370 void helper_msa_ave_s_w(CPUMIPSState *env,
1371                         uint32_t wd, uint32_t ws, uint32_t wt)
1372 {
1373     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1374     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1375     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1376 
1377     pwd->w[0]  = msa_ave_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1378     pwd->w[1]  = msa_ave_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1379     pwd->w[2]  = msa_ave_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1380     pwd->w[3]  = msa_ave_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1381 }
1382 
1383 void helper_msa_ave_s_d(CPUMIPSState *env,
1384                         uint32_t wd, uint32_t ws, uint32_t wt)
1385 {
1386     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1387     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1388     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1389 
1390     pwd->d[0]  = msa_ave_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1391     pwd->d[1]  = msa_ave_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1392 }
1393 
1394 static inline uint64_t msa_ave_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1395 {
1396     uint64_t u_arg1 = UNSIGNED(arg1, df);
1397     uint64_t u_arg2 = UNSIGNED(arg2, df);
1398     /* unsigned shift */
1399     return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
1400 }
1401 
1402 void helper_msa_ave_u_b(CPUMIPSState *env,
1403                         uint32_t wd, uint32_t ws, uint32_t wt)
1404 {
1405     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1406     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1407     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1408 
1409     pwd->b[0]  = msa_ave_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1410     pwd->b[1]  = msa_ave_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1411     pwd->b[2]  = msa_ave_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1412     pwd->b[3]  = msa_ave_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1413     pwd->b[4]  = msa_ave_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1414     pwd->b[5]  = msa_ave_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1415     pwd->b[6]  = msa_ave_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1416     pwd->b[7]  = msa_ave_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1417     pwd->b[8]  = msa_ave_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1418     pwd->b[9]  = msa_ave_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1419     pwd->b[10] = msa_ave_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1420     pwd->b[11] = msa_ave_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1421     pwd->b[12] = msa_ave_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1422     pwd->b[13] = msa_ave_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1423     pwd->b[14] = msa_ave_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1424     pwd->b[15] = msa_ave_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1425 }
1426 
1427 void helper_msa_ave_u_h(CPUMIPSState *env,
1428                         uint32_t wd, uint32_t ws, uint32_t wt)
1429 {
1430     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1431     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1432     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1433 
1434     pwd->h[0]  = msa_ave_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1435     pwd->h[1]  = msa_ave_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1436     pwd->h[2]  = msa_ave_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1437     pwd->h[3]  = msa_ave_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1438     pwd->h[4]  = msa_ave_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1439     pwd->h[5]  = msa_ave_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1440     pwd->h[6]  = msa_ave_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1441     pwd->h[7]  = msa_ave_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1442 }
1443 
1444 void helper_msa_ave_u_w(CPUMIPSState *env,
1445                         uint32_t wd, uint32_t ws, uint32_t wt)
1446 {
1447     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1448     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1449     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1450 
1451     pwd->w[0]  = msa_ave_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1452     pwd->w[1]  = msa_ave_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1453     pwd->w[2]  = msa_ave_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1454     pwd->w[3]  = msa_ave_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1455 }
1456 
1457 void helper_msa_ave_u_d(CPUMIPSState *env,
1458                         uint32_t wd, uint32_t ws, uint32_t wt)
1459 {
1460     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1461     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1462     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1463 
1464     pwd->d[0]  = msa_ave_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1465     pwd->d[1]  = msa_ave_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1466 }
1467 
1468 static inline int64_t msa_aver_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1469 {
1470     /* signed shift */
1471     return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
1472 }
1473 
1474 void helper_msa_aver_s_b(CPUMIPSState *env,
1475                          uint32_t wd, uint32_t ws, uint32_t wt)
1476 {
1477     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1478     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1479     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1480 
1481     pwd->b[0]  = msa_aver_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1482     pwd->b[1]  = msa_aver_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1483     pwd->b[2]  = msa_aver_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1484     pwd->b[3]  = msa_aver_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1485     pwd->b[4]  = msa_aver_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1486     pwd->b[5]  = msa_aver_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1487     pwd->b[6]  = msa_aver_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1488     pwd->b[7]  = msa_aver_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1489     pwd->b[8]  = msa_aver_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1490     pwd->b[9]  = msa_aver_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1491     pwd->b[10] = msa_aver_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1492     pwd->b[11] = msa_aver_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1493     pwd->b[12] = msa_aver_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1494     pwd->b[13] = msa_aver_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1495     pwd->b[14] = msa_aver_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1496     pwd->b[15] = msa_aver_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1497 }
1498 
1499 void helper_msa_aver_s_h(CPUMIPSState *env,
1500                          uint32_t wd, uint32_t ws, uint32_t wt)
1501 {
1502     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1503     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1504     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1505 
1506     pwd->h[0]  = msa_aver_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1507     pwd->h[1]  = msa_aver_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1508     pwd->h[2]  = msa_aver_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1509     pwd->h[3]  = msa_aver_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1510     pwd->h[4]  = msa_aver_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1511     pwd->h[5]  = msa_aver_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1512     pwd->h[6]  = msa_aver_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1513     pwd->h[7]  = msa_aver_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1514 }
1515 
1516 void helper_msa_aver_s_w(CPUMIPSState *env,
1517                          uint32_t wd, uint32_t ws, uint32_t wt)
1518 {
1519     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1520     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1521     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1522 
1523     pwd->w[0]  = msa_aver_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1524     pwd->w[1]  = msa_aver_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1525     pwd->w[2]  = msa_aver_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1526     pwd->w[3]  = msa_aver_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1527 }
1528 
1529 void helper_msa_aver_s_d(CPUMIPSState *env,
1530                          uint32_t wd, uint32_t ws, uint32_t wt)
1531 {
1532     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1533     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1534     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1535 
1536     pwd->d[0]  = msa_aver_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1537     pwd->d[1]  = msa_aver_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1538 }
1539 
1540 static inline uint64_t msa_aver_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1541 {
1542     uint64_t u_arg1 = UNSIGNED(arg1, df);
1543     uint64_t u_arg2 = UNSIGNED(arg2, df);
1544     /* unsigned shift */
1545     return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
1546 }
1547 
1548 void helper_msa_aver_u_b(CPUMIPSState *env,
1549                          uint32_t wd, uint32_t ws, uint32_t wt)
1550 {
1551     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1552     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1553     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1554 
1555     pwd->b[0]  = msa_aver_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1556     pwd->b[1]  = msa_aver_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1557     pwd->b[2]  = msa_aver_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1558     pwd->b[3]  = msa_aver_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1559     pwd->b[4]  = msa_aver_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1560     pwd->b[5]  = msa_aver_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1561     pwd->b[6]  = msa_aver_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1562     pwd->b[7]  = msa_aver_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1563     pwd->b[8]  = msa_aver_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1564     pwd->b[9]  = msa_aver_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1565     pwd->b[10] = msa_aver_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1566     pwd->b[11] = msa_aver_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1567     pwd->b[12] = msa_aver_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1568     pwd->b[13] = msa_aver_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1569     pwd->b[14] = msa_aver_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1570     pwd->b[15] = msa_aver_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1571 }
1572 
1573 void helper_msa_aver_u_h(CPUMIPSState *env,
1574                          uint32_t wd, uint32_t ws, uint32_t wt)
1575 {
1576     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1577     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1578     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1579 
1580     pwd->h[0]  = msa_aver_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1581     pwd->h[1]  = msa_aver_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1582     pwd->h[2]  = msa_aver_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1583     pwd->h[3]  = msa_aver_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1584     pwd->h[4]  = msa_aver_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1585     pwd->h[5]  = msa_aver_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1586     pwd->h[6]  = msa_aver_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1587     pwd->h[7]  = msa_aver_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1588 }
1589 
1590 void helper_msa_aver_u_w(CPUMIPSState *env,
1591                          uint32_t wd, uint32_t ws, uint32_t wt)
1592 {
1593     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1594     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1595     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1596 
1597     pwd->w[0]  = msa_aver_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1598     pwd->w[1]  = msa_aver_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1599     pwd->w[2]  = msa_aver_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1600     pwd->w[3]  = msa_aver_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1601 }
1602 
1603 void helper_msa_aver_u_d(CPUMIPSState *env,
1604                          uint32_t wd, uint32_t ws, uint32_t wt)
1605 {
1606     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1607     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1608     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1609 
1610     pwd->d[0]  = msa_aver_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1611     pwd->d[1]  = msa_aver_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1612 }
1613 
1614 
1615 /*
1616  * Int Compare
1617  * -----------
1618  *
1619  * +---------------+----------------------------------------------------------+
1620  * | CEQ.B         | Vector Compare Equal (byte)                              |
1621  * | CEQ.H         | Vector Compare Equal (halfword)                          |
1622  * | CEQ.W         | Vector Compare Equal (word)                              |
1623  * | CEQ.D         | Vector Compare Equal (doubleword)                        |
1624  * | CLE_S.B       | Vector Compare Signed Less Than or Equal (byte)          |
1625  * | CLE_S.H       | Vector Compare Signed Less Than or Equal (halfword)      |
1626  * | CLE_S.W       | Vector Compare Signed Less Than or Equal (word)          |
1627  * | CLE_S.D       | Vector Compare Signed Less Than or Equal (doubleword)    |
1628  * | CLE_U.B       | Vector Compare Unsigned Less Than or Equal (byte)        |
1629  * | CLE_U.H       | Vector Compare Unsigned Less Than or Equal (halfword)    |
1630  * | CLE_U.W       | Vector Compare Unsigned Less Than or Equal (word)        |
1631  * | CLE_U.D       | Vector Compare Unsigned Less Than or Equal (doubleword)  |
1632  * | CLT_S.B       | Vector Compare Signed Less Than (byte)                   |
1633  * | CLT_S.H       | Vector Compare Signed Less Than (halfword)               |
1634  * | CLT_S.W       | Vector Compare Signed Less Than (word)                   |
1635  * | CLT_S.D       | Vector Compare Signed Less Than (doubleword)             |
1636  * | CLT_U.B       | Vector Compare Unsigned Less Than (byte)                 |
1637  * | CLT_U.H       | Vector Compare Unsigned Less Than (halfword)             |
1638  * | CLT_U.W       | Vector Compare Unsigned Less Than (word)                 |
1639  * | CLT_U.D       | Vector Compare Unsigned Less Than (doubleword)           |
1640  * +---------------+----------------------------------------------------------+
1641  */
1642 
1643 static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
1644 {
1645     return arg1 == arg2 ? -1 : 0;
1646 }
1647 
1648 static inline int8_t msa_ceq_b(int8_t arg1, int8_t arg2)
1649 {
1650     return arg1 == arg2 ? -1 : 0;
1651 }
1652 
1653 void helper_msa_ceq_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1654 {
1655     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1656     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1657     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1658 
1659     pwd->b[0]  = msa_ceq_b(pws->b[0],  pwt->b[0]);
1660     pwd->b[1]  = msa_ceq_b(pws->b[1],  pwt->b[1]);
1661     pwd->b[2]  = msa_ceq_b(pws->b[2],  pwt->b[2]);
1662     pwd->b[3]  = msa_ceq_b(pws->b[3],  pwt->b[3]);
1663     pwd->b[4]  = msa_ceq_b(pws->b[4],  pwt->b[4]);
1664     pwd->b[5]  = msa_ceq_b(pws->b[5],  pwt->b[5]);
1665     pwd->b[6]  = msa_ceq_b(pws->b[6],  pwt->b[6]);
1666     pwd->b[7]  = msa_ceq_b(pws->b[7],  pwt->b[7]);
1667     pwd->b[8]  = msa_ceq_b(pws->b[8],  pwt->b[8]);
1668     pwd->b[9]  = msa_ceq_b(pws->b[9],  pwt->b[9]);
1669     pwd->b[10] = msa_ceq_b(pws->b[10], pwt->b[10]);
1670     pwd->b[11] = msa_ceq_b(pws->b[11], pwt->b[11]);
1671     pwd->b[12] = msa_ceq_b(pws->b[12], pwt->b[12]);
1672     pwd->b[13] = msa_ceq_b(pws->b[13], pwt->b[13]);
1673     pwd->b[14] = msa_ceq_b(pws->b[14], pwt->b[14]);
1674     pwd->b[15] = msa_ceq_b(pws->b[15], pwt->b[15]);
1675 }
1676 
1677 static inline int16_t msa_ceq_h(int16_t arg1, int16_t arg2)
1678 {
1679     return arg1 == arg2 ? -1 : 0;
1680 }
1681 
1682 void helper_msa_ceq_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1683 {
1684     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1685     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1686     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1687 
1688     pwd->h[0]  = msa_ceq_h(pws->h[0],  pwt->h[0]);
1689     pwd->h[1]  = msa_ceq_h(pws->h[1],  pwt->h[1]);
1690     pwd->h[2]  = msa_ceq_h(pws->h[2],  pwt->h[2]);
1691     pwd->h[3]  = msa_ceq_h(pws->h[3],  pwt->h[3]);
1692     pwd->h[4]  = msa_ceq_h(pws->h[4],  pwt->h[4]);
1693     pwd->h[5]  = msa_ceq_h(pws->h[5],  pwt->h[5]);
1694     pwd->h[6]  = msa_ceq_h(pws->h[6],  pwt->h[6]);
1695     pwd->h[7]  = msa_ceq_h(pws->h[7],  pwt->h[7]);
1696 }
1697 
1698 static inline int32_t msa_ceq_w(int32_t arg1, int32_t arg2)
1699 {
1700     return arg1 == arg2 ? -1 : 0;
1701 }
1702 
1703 void helper_msa_ceq_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1704 {
1705     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1706     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1707     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1708 
1709     pwd->w[0]  = msa_ceq_w(pws->w[0],  pwt->w[0]);
1710     pwd->w[1]  = msa_ceq_w(pws->w[1],  pwt->w[1]);
1711     pwd->w[2]  = msa_ceq_w(pws->w[2],  pwt->w[2]);
1712     pwd->w[3]  = msa_ceq_w(pws->w[3],  pwt->w[3]);
1713 }
1714 
1715 static inline int64_t msa_ceq_d(int64_t arg1, int64_t arg2)
1716 {
1717     return arg1 == arg2 ? -1 : 0;
1718 }
1719 
1720 void helper_msa_ceq_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1721 {
1722     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1723     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1724     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1725 
1726     pwd->d[0]  = msa_ceq_d(pws->d[0],  pwt->d[0]);
1727     pwd->d[1]  = msa_ceq_d(pws->d[1],  pwt->d[1]);
1728 }
1729 
1730 static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1731 {
1732     return arg1 <= arg2 ? -1 : 0;
1733 }
1734 
1735 void helper_msa_cle_s_b(CPUMIPSState *env,
1736                         uint32_t wd, uint32_t ws, uint32_t wt)
1737 {
1738     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1739     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1740     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1741 
1742     pwd->b[0]  = msa_cle_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1743     pwd->b[1]  = msa_cle_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1744     pwd->b[2]  = msa_cle_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1745     pwd->b[3]  = msa_cle_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1746     pwd->b[4]  = msa_cle_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1747     pwd->b[5]  = msa_cle_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1748     pwd->b[6]  = msa_cle_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1749     pwd->b[7]  = msa_cle_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1750     pwd->b[8]  = msa_cle_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1751     pwd->b[9]  = msa_cle_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1752     pwd->b[10] = msa_cle_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1753     pwd->b[11] = msa_cle_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1754     pwd->b[12] = msa_cle_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1755     pwd->b[13] = msa_cle_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1756     pwd->b[14] = msa_cle_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1757     pwd->b[15] = msa_cle_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1758 }
1759 
1760 void helper_msa_cle_s_h(CPUMIPSState *env,
1761                         uint32_t wd, uint32_t ws, uint32_t wt)
1762 {
1763     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1764     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1765     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1766 
1767     pwd->h[0]  = msa_cle_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1768     pwd->h[1]  = msa_cle_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1769     pwd->h[2]  = msa_cle_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1770     pwd->h[3]  = msa_cle_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1771     pwd->h[4]  = msa_cle_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1772     pwd->h[5]  = msa_cle_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1773     pwd->h[6]  = msa_cle_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1774     pwd->h[7]  = msa_cle_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1775 }
1776 
1777 void helper_msa_cle_s_w(CPUMIPSState *env,
1778                         uint32_t wd, uint32_t ws, uint32_t wt)
1779 {
1780     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1781     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1782     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1783 
1784     pwd->w[0]  = msa_cle_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1785     pwd->w[1]  = msa_cle_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1786     pwd->w[2]  = msa_cle_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1787     pwd->w[3]  = msa_cle_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1788 }
1789 
1790 void helper_msa_cle_s_d(CPUMIPSState *env,
1791                         uint32_t wd, uint32_t ws, uint32_t wt)
1792 {
1793     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1794     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1795     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1796 
1797     pwd->d[0]  = msa_cle_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1798     pwd->d[1]  = msa_cle_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1799 }
1800 
1801 static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1802 {
1803     uint64_t u_arg1 = UNSIGNED(arg1, df);
1804     uint64_t u_arg2 = UNSIGNED(arg2, df);
1805     return u_arg1 <= u_arg2 ? -1 : 0;
1806 }
1807 
1808 void helper_msa_cle_u_b(CPUMIPSState *env,
1809                         uint32_t wd, uint32_t ws, uint32_t wt)
1810 {
1811     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1812     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1813     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1814 
1815     pwd->b[0]  = msa_cle_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1816     pwd->b[1]  = msa_cle_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1817     pwd->b[2]  = msa_cle_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1818     pwd->b[3]  = msa_cle_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1819     pwd->b[4]  = msa_cle_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1820     pwd->b[5]  = msa_cle_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1821     pwd->b[6]  = msa_cle_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1822     pwd->b[7]  = msa_cle_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1823     pwd->b[8]  = msa_cle_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1824     pwd->b[9]  = msa_cle_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1825     pwd->b[10] = msa_cle_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1826     pwd->b[11] = msa_cle_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1827     pwd->b[12] = msa_cle_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1828     pwd->b[13] = msa_cle_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1829     pwd->b[14] = msa_cle_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1830     pwd->b[15] = msa_cle_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1831 }
1832 
1833 void helper_msa_cle_u_h(CPUMIPSState *env,
1834                         uint32_t wd, uint32_t ws, uint32_t wt)
1835 {
1836     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1837     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1838     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1839 
1840     pwd->h[0]  = msa_cle_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1841     pwd->h[1]  = msa_cle_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1842     pwd->h[2]  = msa_cle_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1843     pwd->h[3]  = msa_cle_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1844     pwd->h[4]  = msa_cle_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1845     pwd->h[5]  = msa_cle_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1846     pwd->h[6]  = msa_cle_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1847     pwd->h[7]  = msa_cle_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1848 }
1849 
1850 void helper_msa_cle_u_w(CPUMIPSState *env,
1851                         uint32_t wd, uint32_t ws, uint32_t wt)
1852 {
1853     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1854     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1855     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1856 
1857     pwd->w[0]  = msa_cle_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1858     pwd->w[1]  = msa_cle_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1859     pwd->w[2]  = msa_cle_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1860     pwd->w[3]  = msa_cle_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1861 }
1862 
1863 void helper_msa_cle_u_d(CPUMIPSState *env,
1864                         uint32_t wd, uint32_t ws, uint32_t wt)
1865 {
1866     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1867     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1868     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1869 
1870     pwd->d[0]  = msa_cle_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1871     pwd->d[1]  = msa_cle_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1872 }
1873 
1874 static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1875 {
1876     return arg1 < arg2 ? -1 : 0;
1877 }
1878 
1879 static inline int8_t msa_clt_s_b(int8_t arg1, int8_t arg2)
1880 {
1881     return arg1 < arg2 ? -1 : 0;
1882 }
1883 
1884 void helper_msa_clt_s_b(CPUMIPSState *env,
1885                         uint32_t wd, uint32_t ws, uint32_t wt)
1886 {
1887     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1888     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1889     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1890 
1891     pwd->b[0]  = msa_clt_s_b(pws->b[0],  pwt->b[0]);
1892     pwd->b[1]  = msa_clt_s_b(pws->b[1],  pwt->b[1]);
1893     pwd->b[2]  = msa_clt_s_b(pws->b[2],  pwt->b[2]);
1894     pwd->b[3]  = msa_clt_s_b(pws->b[3],  pwt->b[3]);
1895     pwd->b[4]  = msa_clt_s_b(pws->b[4],  pwt->b[4]);
1896     pwd->b[5]  = msa_clt_s_b(pws->b[5],  pwt->b[5]);
1897     pwd->b[6]  = msa_clt_s_b(pws->b[6],  pwt->b[6]);
1898     pwd->b[7]  = msa_clt_s_b(pws->b[7],  pwt->b[7]);
1899     pwd->b[8]  = msa_clt_s_b(pws->b[8],  pwt->b[8]);
1900     pwd->b[9]  = msa_clt_s_b(pws->b[9],  pwt->b[9]);
1901     pwd->b[10] = msa_clt_s_b(pws->b[10], pwt->b[10]);
1902     pwd->b[11] = msa_clt_s_b(pws->b[11], pwt->b[11]);
1903     pwd->b[12] = msa_clt_s_b(pws->b[12], pwt->b[12]);
1904     pwd->b[13] = msa_clt_s_b(pws->b[13], pwt->b[13]);
1905     pwd->b[14] = msa_clt_s_b(pws->b[14], pwt->b[14]);
1906     pwd->b[15] = msa_clt_s_b(pws->b[15], pwt->b[15]);
1907 }
1908 
1909 static inline int16_t msa_clt_s_h(int16_t arg1, int16_t arg2)
1910 {
1911     return arg1 < arg2 ? -1 : 0;
1912 }
1913 
1914 void helper_msa_clt_s_h(CPUMIPSState *env,
1915                         uint32_t wd, uint32_t ws, uint32_t wt)
1916 {
1917     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1918     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1919     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1920 
1921     pwd->h[0]  = msa_clt_s_h(pws->h[0],  pwt->h[0]);
1922     pwd->h[1]  = msa_clt_s_h(pws->h[1],  pwt->h[1]);
1923     pwd->h[2]  = msa_clt_s_h(pws->h[2],  pwt->h[2]);
1924     pwd->h[3]  = msa_clt_s_h(pws->h[3],  pwt->h[3]);
1925     pwd->h[4]  = msa_clt_s_h(pws->h[4],  pwt->h[4]);
1926     pwd->h[5]  = msa_clt_s_h(pws->h[5],  pwt->h[5]);
1927     pwd->h[6]  = msa_clt_s_h(pws->h[6],  pwt->h[6]);
1928     pwd->h[7]  = msa_clt_s_h(pws->h[7],  pwt->h[7]);
1929 }
1930 
1931 static inline int32_t msa_clt_s_w(int32_t arg1, int32_t arg2)
1932 {
1933     return arg1 < arg2 ? -1 : 0;
1934 }
1935 
1936 void helper_msa_clt_s_w(CPUMIPSState *env,
1937                         uint32_t wd, uint32_t ws, uint32_t wt)
1938 {
1939     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1940     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1941     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1942 
1943     pwd->w[0]  = msa_clt_s_w(pws->w[0],  pwt->w[0]);
1944     pwd->w[1]  = msa_clt_s_w(pws->w[1],  pwt->w[1]);
1945     pwd->w[2]  = msa_clt_s_w(pws->w[2],  pwt->w[2]);
1946     pwd->w[3]  = msa_clt_s_w(pws->w[3],  pwt->w[3]);
1947 }
1948 
1949 static inline int64_t msa_clt_s_d(int64_t arg1, int64_t arg2)
1950 {
1951     return arg1 < arg2 ? -1 : 0;
1952 }
1953 
1954 void helper_msa_clt_s_d(CPUMIPSState *env,
1955                         uint32_t wd, uint32_t ws, uint32_t wt)
1956 {
1957     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1958     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1959     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1960 
1961     pwd->d[0]  = msa_clt_s_d(pws->d[0],  pwt->d[0]);
1962     pwd->d[1]  = msa_clt_s_d(pws->d[1],  pwt->d[1]);
1963 }
1964 
1965 static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1966 {
1967     uint64_t u_arg1 = UNSIGNED(arg1, df);
1968     uint64_t u_arg2 = UNSIGNED(arg2, df);
1969     return u_arg1 < u_arg2 ? -1 : 0;
1970 }
1971 
1972 void helper_msa_clt_u_b(CPUMIPSState *env,
1973                         uint32_t wd, uint32_t ws, uint32_t wt)
1974 {
1975     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1976     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1977     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1978 
1979     pwd->b[0]  = msa_clt_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1980     pwd->b[1]  = msa_clt_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1981     pwd->b[2]  = msa_clt_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1982     pwd->b[3]  = msa_clt_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1983     pwd->b[4]  = msa_clt_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1984     pwd->b[5]  = msa_clt_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1985     pwd->b[6]  = msa_clt_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1986     pwd->b[7]  = msa_clt_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1987     pwd->b[8]  = msa_clt_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1988     pwd->b[9]  = msa_clt_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1989     pwd->b[10] = msa_clt_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1990     pwd->b[11] = msa_clt_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1991     pwd->b[12] = msa_clt_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1992     pwd->b[13] = msa_clt_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1993     pwd->b[14] = msa_clt_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1994     pwd->b[15] = msa_clt_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1995 }
1996 
1997 void helper_msa_clt_u_h(CPUMIPSState *env,
1998                         uint32_t wd, uint32_t ws, uint32_t wt)
1999 {
2000     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2001     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2002     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2003 
2004     pwd->h[0]  = msa_clt_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2005     pwd->h[1]  = msa_clt_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2006     pwd->h[2]  = msa_clt_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2007     pwd->h[3]  = msa_clt_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2008     pwd->h[4]  = msa_clt_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2009     pwd->h[5]  = msa_clt_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2010     pwd->h[6]  = msa_clt_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2011     pwd->h[7]  = msa_clt_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2012 }
2013 
2014 void helper_msa_clt_u_w(CPUMIPSState *env,
2015                         uint32_t wd, uint32_t ws, uint32_t wt)
2016 {
2017     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2018     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2019     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2020 
2021     pwd->w[0]  = msa_clt_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2022     pwd->w[1]  = msa_clt_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2023     pwd->w[2]  = msa_clt_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2024     pwd->w[3]  = msa_clt_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2025 }
2026 
2027 void helper_msa_clt_u_d(CPUMIPSState *env,
2028                         uint32_t wd, uint32_t ws, uint32_t wt)
2029 {
2030     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2031     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2032     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2033 
2034     pwd->d[0]  = msa_clt_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2035     pwd->d[1]  = msa_clt_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2036 }
2037 
2038 
2039 /*
2040  * Int Divide
2041  * ----------
2042  *
2043  * +---------------+----------------------------------------------------------+
2044  * | DIV_S.B       | Vector Signed Divide (byte)                              |
2045  * | DIV_S.H       | Vector Signed Divide (halfword)                          |
2046  * | DIV_S.W       | Vector Signed Divide (word)                              |
2047  * | DIV_S.D       | Vector Signed Divide (doubleword)                        |
2048  * | DIV_U.B       | Vector Unsigned Divide (byte)                            |
2049  * | DIV_U.H       | Vector Unsigned Divide (halfword)                        |
2050  * | DIV_U.W       | Vector Unsigned Divide (word)                            |
2051  * | DIV_U.D       | Vector Unsigned Divide (doubleword)                      |
2052  * +---------------+----------------------------------------------------------+
2053  */
2054 
2055 
2056 static inline int64_t msa_div_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2057 {
2058     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
2059         return DF_MIN_INT(df);
2060     }
2061     return arg2 ? arg1 / arg2
2062                 : arg1 >= 0 ? -1 : 1;
2063 }
2064 
2065 void helper_msa_div_s_b(CPUMIPSState *env,
2066                         uint32_t wd, uint32_t ws, uint32_t wt)
2067 {
2068     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2069     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2070     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2071 
2072     pwd->b[0]  = msa_div_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2073     pwd->b[1]  = msa_div_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2074     pwd->b[2]  = msa_div_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2075     pwd->b[3]  = msa_div_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2076     pwd->b[4]  = msa_div_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2077     pwd->b[5]  = msa_div_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2078     pwd->b[6]  = msa_div_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2079     pwd->b[7]  = msa_div_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2080     pwd->b[8]  = msa_div_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2081     pwd->b[9]  = msa_div_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2082     pwd->b[10] = msa_div_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2083     pwd->b[11] = msa_div_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2084     pwd->b[12] = msa_div_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2085     pwd->b[13] = msa_div_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2086     pwd->b[14] = msa_div_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2087     pwd->b[15] = msa_div_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2088 }
2089 
2090 void helper_msa_div_s_h(CPUMIPSState *env,
2091                         uint32_t wd, uint32_t ws, uint32_t wt)
2092 {
2093     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2094     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2095     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2096 
2097     pwd->h[0]  = msa_div_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2098     pwd->h[1]  = msa_div_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2099     pwd->h[2]  = msa_div_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2100     pwd->h[3]  = msa_div_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2101     pwd->h[4]  = msa_div_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2102     pwd->h[5]  = msa_div_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2103     pwd->h[6]  = msa_div_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2104     pwd->h[7]  = msa_div_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2105 }
2106 
2107 void helper_msa_div_s_w(CPUMIPSState *env,
2108                         uint32_t wd, uint32_t ws, uint32_t wt)
2109 {
2110     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2111     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2112     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2113 
2114     pwd->w[0]  = msa_div_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2115     pwd->w[1]  = msa_div_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2116     pwd->w[2]  = msa_div_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2117     pwd->w[3]  = msa_div_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2118 }
2119 
2120 void helper_msa_div_s_d(CPUMIPSState *env,
2121                         uint32_t wd, uint32_t ws, uint32_t wt)
2122 {
2123     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2124     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2125     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2126 
2127     pwd->d[0]  = msa_div_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2128     pwd->d[1]  = msa_div_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2129 }
2130 
2131 static inline int64_t msa_div_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2132 {
2133     uint64_t u_arg1 = UNSIGNED(arg1, df);
2134     uint64_t u_arg2 = UNSIGNED(arg2, df);
2135     return arg2 ? u_arg1 / u_arg2 : -1;
2136 }
2137 
2138 void helper_msa_div_u_b(CPUMIPSState *env,
2139                         uint32_t wd, uint32_t ws, uint32_t wt)
2140 {
2141     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2142     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2143     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2144 
2145     pwd->b[0]  = msa_div_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2146     pwd->b[1]  = msa_div_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2147     pwd->b[2]  = msa_div_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2148     pwd->b[3]  = msa_div_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2149     pwd->b[4]  = msa_div_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2150     pwd->b[5]  = msa_div_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2151     pwd->b[6]  = msa_div_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2152     pwd->b[7]  = msa_div_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2153     pwd->b[8]  = msa_div_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2154     pwd->b[9]  = msa_div_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2155     pwd->b[10] = msa_div_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2156     pwd->b[11] = msa_div_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2157     pwd->b[12] = msa_div_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2158     pwd->b[13] = msa_div_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2159     pwd->b[14] = msa_div_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2160     pwd->b[15] = msa_div_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2161 }
2162 
2163 void helper_msa_div_u_h(CPUMIPSState *env,
2164                         uint32_t wd, uint32_t ws, uint32_t wt)
2165 {
2166     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2167     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2168     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2169 
2170     pwd->h[0]  = msa_div_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2171     pwd->h[1]  = msa_div_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2172     pwd->h[2]  = msa_div_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2173     pwd->h[3]  = msa_div_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2174     pwd->h[4]  = msa_div_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2175     pwd->h[5]  = msa_div_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2176     pwd->h[6]  = msa_div_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2177     pwd->h[7]  = msa_div_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2178 }
2179 
2180 void helper_msa_div_u_w(CPUMIPSState *env,
2181                         uint32_t wd, uint32_t ws, uint32_t wt)
2182 {
2183     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2184     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2185     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2186 
2187     pwd->w[0]  = msa_div_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2188     pwd->w[1]  = msa_div_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2189     pwd->w[2]  = msa_div_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2190     pwd->w[3]  = msa_div_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2191 }
2192 
2193 void helper_msa_div_u_d(CPUMIPSState *env,
2194                         uint32_t wd, uint32_t ws, uint32_t wt)
2195 {
2196     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2197     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2198     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2199 
2200     pwd->d[0]  = msa_div_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2201     pwd->d[1]  = msa_div_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2202 }
2203 
2204 
2205 /*
2206  * Int Dot Product
2207  * ---------------
2208  *
2209  * +---------------+----------------------------------------------------------+
2210  * | DOTP_S.H      | Vector Signed Dot Product (halfword)                     |
2211  * | DOTP_S.W      | Vector Signed Dot Product (word)                         |
2212  * | DOTP_S.D      | Vector Signed Dot Product (doubleword)                   |
2213  * | DOTP_U.H      | Vector Unsigned Dot Product (halfword)                   |
2214  * | DOTP_U.W      | Vector Unsigned Dot Product (word)                       |
2215  * | DOTP_U.D      | Vector Unsigned Dot Product (doubleword)                 |
2216  * | DPADD_S.H     | Vector Signed Dot Product (halfword)                     |
2217  * | DPADD_S.W     | Vector Signed Dot Product (word)                         |
2218  * | DPADD_S.D     | Vector Signed Dot Product (doubleword)                   |
2219  * | DPADD_U.H     | Vector Unsigned Dot Product (halfword)                   |
2220  * | DPADD_U.W     | Vector Unsigned Dot Product (word)                       |
2221  * | DPADD_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2222  * | DPSUB_S.H     | Vector Signed Dot Product (halfword)                     |
2223  * | DPSUB_S.W     | Vector Signed Dot Product (word)                         |
2224  * | DPSUB_S.D     | Vector Signed Dot Product (doubleword)                   |
2225  * | DPSUB_U.H     | Vector Unsigned Dot Product (halfword)                   |
2226  * | DPSUB_U.W     | Vector Unsigned Dot Product (word)                       |
2227  * | DPSUB_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2228  * +---------------+----------------------------------------------------------+
2229  */
2230 
2231 #define SIGNED_EXTRACT(e, o, a, df)     \
2232     do {                                \
2233         e = SIGNED_EVEN(a, df);         \
2234         o = SIGNED_ODD(a, df);          \
2235     } while (0)
2236 
2237 #define UNSIGNED_EXTRACT(e, o, a, df)   \
2238     do {                                \
2239         e = UNSIGNED_EVEN(a, df);       \
2240         o = UNSIGNED_ODD(a, df);        \
2241     } while (0)
2242 
2243 
2244 static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2245 {
2246     int64_t even_arg1;
2247     int64_t even_arg2;
2248     int64_t odd_arg1;
2249     int64_t odd_arg2;
2250     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2251     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2252     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2253 }
2254 
2255 void helper_msa_dotp_s_h(CPUMIPSState *env,
2256                          uint32_t wd, uint32_t ws, uint32_t wt)
2257 {
2258     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2259     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2260     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2261 
2262     pwd->h[0]  = msa_dotp_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2263     pwd->h[1]  = msa_dotp_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2264     pwd->h[2]  = msa_dotp_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2265     pwd->h[3]  = msa_dotp_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2266     pwd->h[4]  = msa_dotp_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2267     pwd->h[5]  = msa_dotp_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2268     pwd->h[6]  = msa_dotp_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2269     pwd->h[7]  = msa_dotp_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2270 }
2271 
2272 void helper_msa_dotp_s_w(CPUMIPSState *env,
2273                          uint32_t wd, uint32_t ws, uint32_t wt)
2274 {
2275     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2276     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2277     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2278 
2279     pwd->w[0]  = msa_dotp_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2280     pwd->w[1]  = msa_dotp_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2281     pwd->w[2]  = msa_dotp_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2282     pwd->w[3]  = msa_dotp_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2283 }
2284 
2285 void helper_msa_dotp_s_d(CPUMIPSState *env,
2286                          uint32_t wd, uint32_t ws, uint32_t wt)
2287 {
2288     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2289     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2290     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2291 
2292     pwd->d[0]  = msa_dotp_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2293     pwd->d[1]  = msa_dotp_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2294 }
2295 
2296 
2297 static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2298 {
2299     int64_t even_arg1;
2300     int64_t even_arg2;
2301     int64_t odd_arg1;
2302     int64_t odd_arg2;
2303     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2304     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2305     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2306 }
2307 
2308 void helper_msa_dotp_u_h(CPUMIPSState *env,
2309                          uint32_t wd, uint32_t ws, uint32_t wt)
2310 {
2311     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2312     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2313     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2314 
2315     pwd->h[0]  = msa_dotp_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2316     pwd->h[1]  = msa_dotp_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2317     pwd->h[2]  = msa_dotp_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2318     pwd->h[3]  = msa_dotp_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2319     pwd->h[4]  = msa_dotp_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2320     pwd->h[5]  = msa_dotp_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2321     pwd->h[6]  = msa_dotp_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2322     pwd->h[7]  = msa_dotp_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2323 }
2324 
2325 void helper_msa_dotp_u_w(CPUMIPSState *env,
2326                          uint32_t wd, uint32_t ws, uint32_t wt)
2327 {
2328     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2329     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2330     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2331 
2332     pwd->w[0]  = msa_dotp_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2333     pwd->w[1]  = msa_dotp_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2334     pwd->w[2]  = msa_dotp_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2335     pwd->w[3]  = msa_dotp_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2336 }
2337 
2338 void helper_msa_dotp_u_d(CPUMIPSState *env,
2339                          uint32_t wd, uint32_t ws, uint32_t wt)
2340 {
2341     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2342     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2343     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2344 
2345     pwd->d[0]  = msa_dotp_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2346     pwd->d[1]  = msa_dotp_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2347 }
2348 
2349 
2350 static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
2351                                      int64_t arg2)
2352 {
2353     int64_t even_arg1;
2354     int64_t even_arg2;
2355     int64_t odd_arg1;
2356     int64_t odd_arg2;
2357     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2358     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2359     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2360 }
2361 
2362 void helper_msa_dpadd_s_h(CPUMIPSState *env,
2363                           uint32_t wd, uint32_t ws, uint32_t wt)
2364 {
2365     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2366     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2367     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2368 
2369     pwd->h[0]  = msa_dpadd_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2370     pwd->h[1]  = msa_dpadd_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2371     pwd->h[2]  = msa_dpadd_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2372     pwd->h[3]  = msa_dpadd_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2373     pwd->h[4]  = msa_dpadd_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2374     pwd->h[5]  = msa_dpadd_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2375     pwd->h[6]  = msa_dpadd_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2376     pwd->h[7]  = msa_dpadd_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2377 }
2378 
2379 void helper_msa_dpadd_s_w(CPUMIPSState *env,
2380                           uint32_t wd, uint32_t ws, uint32_t wt)
2381 {
2382     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2383     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2384     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2385 
2386     pwd->w[0]  = msa_dpadd_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2387     pwd->w[1]  = msa_dpadd_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2388     pwd->w[2]  = msa_dpadd_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2389     pwd->w[3]  = msa_dpadd_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2390 }
2391 
2392 void helper_msa_dpadd_s_d(CPUMIPSState *env,
2393                           uint32_t wd, uint32_t ws, uint32_t wt)
2394 {
2395     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2396     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2397     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2398 
2399     pwd->d[0]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2400     pwd->d[1]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2401 }
2402 
2403 
2404 static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
2405                                      int64_t arg2)
2406 {
2407     int64_t even_arg1;
2408     int64_t even_arg2;
2409     int64_t odd_arg1;
2410     int64_t odd_arg2;
2411     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2412     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2413     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2414 }
2415 
2416 void helper_msa_dpadd_u_h(CPUMIPSState *env,
2417                           uint32_t wd, uint32_t ws, uint32_t wt)
2418 {
2419     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2420     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2421     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2422 
2423     pwd->h[0]  = msa_dpadd_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2424     pwd->h[1]  = msa_dpadd_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2425     pwd->h[2]  = msa_dpadd_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2426     pwd->h[3]  = msa_dpadd_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2427     pwd->h[4]  = msa_dpadd_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2428     pwd->h[5]  = msa_dpadd_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2429     pwd->h[6]  = msa_dpadd_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2430     pwd->h[7]  = msa_dpadd_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2431 }
2432 
2433 void helper_msa_dpadd_u_w(CPUMIPSState *env,
2434                           uint32_t wd, uint32_t ws, uint32_t wt)
2435 {
2436     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2437     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2438     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2439 
2440     pwd->w[0]  = msa_dpadd_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2441     pwd->w[1]  = msa_dpadd_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2442     pwd->w[2]  = msa_dpadd_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2443     pwd->w[3]  = msa_dpadd_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2444 }
2445 
2446 void helper_msa_dpadd_u_d(CPUMIPSState *env,
2447                           uint32_t wd, uint32_t ws, uint32_t wt)
2448 {
2449     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2450     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2451     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2452 
2453     pwd->d[0]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2454     pwd->d[1]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2455 }
2456 
2457 
2458 static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
2459                                      int64_t arg2)
2460 {
2461     int64_t even_arg1;
2462     int64_t even_arg2;
2463     int64_t odd_arg1;
2464     int64_t odd_arg2;
2465     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2466     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2467     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2468 }
2469 
2470 void helper_msa_dpsub_s_h(CPUMIPSState *env,
2471                           uint32_t wd, uint32_t ws, uint32_t wt)
2472 {
2473     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2474     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2475     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2476 
2477     pwd->h[0]  = msa_dpsub_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2478     pwd->h[1]  = msa_dpsub_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2479     pwd->h[2]  = msa_dpsub_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2480     pwd->h[3]  = msa_dpsub_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2481     pwd->h[4]  = msa_dpsub_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2482     pwd->h[5]  = msa_dpsub_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2483     pwd->h[6]  = msa_dpsub_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2484     pwd->h[7]  = msa_dpsub_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2485 }
2486 
2487 void helper_msa_dpsub_s_w(CPUMIPSState *env,
2488                           uint32_t wd, uint32_t ws, uint32_t wt)
2489 {
2490     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2491     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2492     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2493 
2494     pwd->w[0]  = msa_dpsub_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2495     pwd->w[1]  = msa_dpsub_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2496     pwd->w[2]  = msa_dpsub_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2497     pwd->w[3]  = msa_dpsub_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2498 }
2499 
2500 void helper_msa_dpsub_s_d(CPUMIPSState *env,
2501                           uint32_t wd, uint32_t ws, uint32_t wt)
2502 {
2503     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2504     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2505     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2506 
2507     pwd->d[0]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2508     pwd->d[1]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2509 }
2510 
2511 
2512 static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
2513                                      int64_t arg2)
2514 {
2515     int64_t even_arg1;
2516     int64_t even_arg2;
2517     int64_t odd_arg1;
2518     int64_t odd_arg2;
2519     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2520     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2521     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2522 }
2523 
2524 void helper_msa_dpsub_u_h(CPUMIPSState *env,
2525                           uint32_t wd, uint32_t ws, uint32_t wt)
2526 {
2527     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2528     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2529     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2530 
2531     pwd->h[0]  = msa_dpsub_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2532     pwd->h[1]  = msa_dpsub_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2533     pwd->h[2]  = msa_dpsub_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2534     pwd->h[3]  = msa_dpsub_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2535     pwd->h[4]  = msa_dpsub_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2536     pwd->h[5]  = msa_dpsub_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2537     pwd->h[6]  = msa_dpsub_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2538     pwd->h[7]  = msa_dpsub_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2539 }
2540 
2541 void helper_msa_dpsub_u_w(CPUMIPSState *env,
2542                           uint32_t wd, uint32_t ws, uint32_t wt)
2543 {
2544     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2545     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2546     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2547 
2548     pwd->w[0]  = msa_dpsub_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2549     pwd->w[1]  = msa_dpsub_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2550     pwd->w[2]  = msa_dpsub_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2551     pwd->w[3]  = msa_dpsub_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2552 }
2553 
2554 void helper_msa_dpsub_u_d(CPUMIPSState *env,
2555                           uint32_t wd, uint32_t ws, uint32_t wt)
2556 {
2557     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2558     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2559     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2560 
2561     pwd->d[0]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2562     pwd->d[1]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2563 }
2564 
2565 
2566 /*
2567  * Int Max Min
2568  * -----------
2569  *
2570  * +---------------+----------------------------------------------------------+
2571  * | MAX_A.B       | Vector Maximum Based on Absolute Value (byte)            |
2572  * | MAX_A.H       | Vector Maximum Based on Absolute Value (halfword)        |
2573  * | MAX_A.W       | Vector Maximum Based on Absolute Value (word)            |
2574  * | MAX_A.D       | Vector Maximum Based on Absolute Value (doubleword)      |
2575  * | MAX_S.B       | Vector Signed Maximum (byte)                             |
2576  * | MAX_S.H       | Vector Signed Maximum (halfword)                         |
2577  * | MAX_S.W       | Vector Signed Maximum (word)                             |
2578  * | MAX_S.D       | Vector Signed Maximum (doubleword)                       |
2579  * | MAX_U.B       | Vector Unsigned Maximum (byte)                           |
2580  * | MAX_U.H       | Vector Unsigned Maximum (halfword)                       |
2581  * | MAX_U.W       | Vector Unsigned Maximum (word)                           |
2582  * | MAX_U.D       | Vector Unsigned Maximum (doubleword)                     |
2583  * | MIN_A.B       | Vector Minimum Based on Absolute Value (byte)            |
2584  * | MIN_A.H       | Vector Minimum Based on Absolute Value (halfword)        |
2585  * | MIN_A.W       | Vector Minimum Based on Absolute Value (word)            |
2586  * | MIN_A.D       | Vector Minimum Based on Absolute Value (doubleword)      |
2587  * | MIN_S.B       | Vector Signed Minimum (byte)                             |
2588  * | MIN_S.H       | Vector Signed Minimum (halfword)                         |
2589  * | MIN_S.W       | Vector Signed Minimum (word)                             |
2590  * | MIN_S.D       | Vector Signed Minimum (doubleword)                       |
2591  * | MIN_U.B       | Vector Unsigned Minimum (byte)                           |
2592  * | MIN_U.H       | Vector Unsigned Minimum (halfword)                       |
2593  * | MIN_U.W       | Vector Unsigned Minimum (word)                           |
2594  * | MIN_U.D       | Vector Unsigned Minimum (doubleword)                     |
2595  * +---------------+----------------------------------------------------------+
2596  */
2597 
2598 static inline int64_t msa_max_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2599 {
2600     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2601     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2602     return abs_arg1 > abs_arg2 ? arg1 : arg2;
2603 }
2604 
2605 void helper_msa_max_a_b(CPUMIPSState *env,
2606                         uint32_t wd, uint32_t ws, uint32_t wt)
2607 {
2608     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2609     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2610     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2611 
2612     pwd->b[0]  = msa_max_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2613     pwd->b[1]  = msa_max_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2614     pwd->b[2]  = msa_max_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2615     pwd->b[3]  = msa_max_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2616     pwd->b[4]  = msa_max_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2617     pwd->b[5]  = msa_max_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2618     pwd->b[6]  = msa_max_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2619     pwd->b[7]  = msa_max_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2620     pwd->b[8]  = msa_max_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2621     pwd->b[9]  = msa_max_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2622     pwd->b[10] = msa_max_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2623     pwd->b[11] = msa_max_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2624     pwd->b[12] = msa_max_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2625     pwd->b[13] = msa_max_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2626     pwd->b[14] = msa_max_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2627     pwd->b[15] = msa_max_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2628 }
2629 
2630 void helper_msa_max_a_h(CPUMIPSState *env,
2631                         uint32_t wd, uint32_t ws, uint32_t wt)
2632 {
2633     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2634     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2635     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2636 
2637     pwd->h[0]  = msa_max_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2638     pwd->h[1]  = msa_max_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2639     pwd->h[2]  = msa_max_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2640     pwd->h[3]  = msa_max_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2641     pwd->h[4]  = msa_max_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2642     pwd->h[5]  = msa_max_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2643     pwd->h[6]  = msa_max_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2644     pwd->h[7]  = msa_max_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2645 }
2646 
2647 void helper_msa_max_a_w(CPUMIPSState *env,
2648                         uint32_t wd, uint32_t ws, uint32_t wt)
2649 {
2650     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2651     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2652     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2653 
2654     pwd->w[0]  = msa_max_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2655     pwd->w[1]  = msa_max_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2656     pwd->w[2]  = msa_max_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2657     pwd->w[3]  = msa_max_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2658 }
2659 
2660 void helper_msa_max_a_d(CPUMIPSState *env,
2661                         uint32_t wd, uint32_t ws, uint32_t wt)
2662 {
2663     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2664     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2665     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2666 
2667     pwd->d[0]  = msa_max_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2668     pwd->d[1]  = msa_max_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2669 }
2670 
2671 
2672 static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2673 {
2674     return arg1 > arg2 ? arg1 : arg2;
2675 }
2676 
2677 void helper_msa_max_s_b(CPUMIPSState *env,
2678                         uint32_t wd, uint32_t ws, uint32_t wt)
2679 {
2680     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2681     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2682     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2683 
2684     pwd->b[0]  = msa_max_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2685     pwd->b[1]  = msa_max_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2686     pwd->b[2]  = msa_max_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2687     pwd->b[3]  = msa_max_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2688     pwd->b[4]  = msa_max_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2689     pwd->b[5]  = msa_max_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2690     pwd->b[6]  = msa_max_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2691     pwd->b[7]  = msa_max_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2692     pwd->b[8]  = msa_max_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2693     pwd->b[9]  = msa_max_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2694     pwd->b[10] = msa_max_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2695     pwd->b[11] = msa_max_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2696     pwd->b[12] = msa_max_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2697     pwd->b[13] = msa_max_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2698     pwd->b[14] = msa_max_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2699     pwd->b[15] = msa_max_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2700 }
2701 
2702 void helper_msa_max_s_h(CPUMIPSState *env,
2703                         uint32_t wd, uint32_t ws, uint32_t wt)
2704 {
2705     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2706     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2707     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2708 
2709     pwd->h[0]  = msa_max_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2710     pwd->h[1]  = msa_max_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2711     pwd->h[2]  = msa_max_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2712     pwd->h[3]  = msa_max_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2713     pwd->h[4]  = msa_max_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2714     pwd->h[5]  = msa_max_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2715     pwd->h[6]  = msa_max_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2716     pwd->h[7]  = msa_max_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2717 }
2718 
2719 void helper_msa_max_s_w(CPUMIPSState *env,
2720                         uint32_t wd, uint32_t ws, uint32_t wt)
2721 {
2722     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2723     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2724     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2725 
2726     pwd->w[0]  = msa_max_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2727     pwd->w[1]  = msa_max_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2728     pwd->w[2]  = msa_max_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2729     pwd->w[3]  = msa_max_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2730 }
2731 
2732 void helper_msa_max_s_d(CPUMIPSState *env,
2733                         uint32_t wd, uint32_t ws, uint32_t wt)
2734 {
2735     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2736     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2737     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2738 
2739     pwd->d[0]  = msa_max_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2740     pwd->d[1]  = msa_max_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2741 }
2742 
2743 
2744 static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2745 {
2746     uint64_t u_arg1 = UNSIGNED(arg1, df);
2747     uint64_t u_arg2 = UNSIGNED(arg2, df);
2748     return u_arg1 > u_arg2 ? arg1 : arg2;
2749 }
2750 
2751 void helper_msa_max_u_b(CPUMIPSState *env,
2752                         uint32_t wd, uint32_t ws, uint32_t wt)
2753 {
2754     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2755     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2756     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2757 
2758     pwd->b[0]  = msa_max_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2759     pwd->b[1]  = msa_max_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2760     pwd->b[2]  = msa_max_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2761     pwd->b[3]  = msa_max_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2762     pwd->b[4]  = msa_max_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2763     pwd->b[5]  = msa_max_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2764     pwd->b[6]  = msa_max_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2765     pwd->b[7]  = msa_max_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2766     pwd->b[8]  = msa_max_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2767     pwd->b[9]  = msa_max_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2768     pwd->b[10] = msa_max_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2769     pwd->b[11] = msa_max_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2770     pwd->b[12] = msa_max_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2771     pwd->b[13] = msa_max_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2772     pwd->b[14] = msa_max_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2773     pwd->b[15] = msa_max_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2774 }
2775 
2776 void helper_msa_max_u_h(CPUMIPSState *env,
2777                         uint32_t wd, uint32_t ws, uint32_t wt)
2778 {
2779     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2780     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2781     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2782 
2783     pwd->h[0]  = msa_max_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2784     pwd->h[1]  = msa_max_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2785     pwd->h[2]  = msa_max_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2786     pwd->h[3]  = msa_max_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2787     pwd->h[4]  = msa_max_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2788     pwd->h[5]  = msa_max_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2789     pwd->h[6]  = msa_max_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2790     pwd->h[7]  = msa_max_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2791 }
2792 
2793 void helper_msa_max_u_w(CPUMIPSState *env,
2794                         uint32_t wd, uint32_t ws, uint32_t wt)
2795 {
2796     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2797     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2798     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2799 
2800     pwd->w[0]  = msa_max_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2801     pwd->w[1]  = msa_max_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2802     pwd->w[2]  = msa_max_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2803     pwd->w[3]  = msa_max_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2804 }
2805 
2806 void helper_msa_max_u_d(CPUMIPSState *env,
2807                         uint32_t wd, uint32_t ws, uint32_t wt)
2808 {
2809     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2810     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2811     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2812 
2813     pwd->d[0]  = msa_max_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2814     pwd->d[1]  = msa_max_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2815 }
2816 
2817 
2818 static inline int64_t msa_min_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2819 {
2820     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2821     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2822     return abs_arg1 < abs_arg2 ? arg1 : arg2;
2823 }
2824 
2825 void helper_msa_min_a_b(CPUMIPSState *env,
2826                         uint32_t wd, uint32_t ws, uint32_t wt)
2827 {
2828     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2829     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2830     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2831 
2832     pwd->b[0]  = msa_min_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2833     pwd->b[1]  = msa_min_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2834     pwd->b[2]  = msa_min_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2835     pwd->b[3]  = msa_min_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2836     pwd->b[4]  = msa_min_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2837     pwd->b[5]  = msa_min_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2838     pwd->b[6]  = msa_min_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2839     pwd->b[7]  = msa_min_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2840     pwd->b[8]  = msa_min_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2841     pwd->b[9]  = msa_min_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2842     pwd->b[10] = msa_min_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2843     pwd->b[11] = msa_min_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2844     pwd->b[12] = msa_min_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2845     pwd->b[13] = msa_min_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2846     pwd->b[14] = msa_min_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2847     pwd->b[15] = msa_min_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2848 }
2849 
2850 void helper_msa_min_a_h(CPUMIPSState *env,
2851                         uint32_t wd, uint32_t ws, uint32_t wt)
2852 {
2853     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2854     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2855     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2856 
2857     pwd->h[0]  = msa_min_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2858     pwd->h[1]  = msa_min_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2859     pwd->h[2]  = msa_min_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2860     pwd->h[3]  = msa_min_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2861     pwd->h[4]  = msa_min_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2862     pwd->h[5]  = msa_min_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2863     pwd->h[6]  = msa_min_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2864     pwd->h[7]  = msa_min_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2865 }
2866 
2867 void helper_msa_min_a_w(CPUMIPSState *env,
2868                         uint32_t wd, uint32_t ws, uint32_t wt)
2869 {
2870     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2871     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2872     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2873 
2874     pwd->w[0]  = msa_min_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2875     pwd->w[1]  = msa_min_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2876     pwd->w[2]  = msa_min_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2877     pwd->w[3]  = msa_min_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2878 }
2879 
2880 void helper_msa_min_a_d(CPUMIPSState *env,
2881                         uint32_t wd, uint32_t ws, uint32_t wt)
2882 {
2883     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2884     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2885     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2886 
2887     pwd->d[0]  = msa_min_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2888     pwd->d[1]  = msa_min_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2889 }
2890 
2891 
2892 static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2893 {
2894     return arg1 < arg2 ? arg1 : arg2;
2895 }
2896 
2897 void helper_msa_min_s_b(CPUMIPSState *env,
2898                         uint32_t wd, uint32_t ws, uint32_t wt)
2899 {
2900     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2901     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2902     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2903 
2904     pwd->b[0]  = msa_min_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2905     pwd->b[1]  = msa_min_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2906     pwd->b[2]  = msa_min_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2907     pwd->b[3]  = msa_min_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2908     pwd->b[4]  = msa_min_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2909     pwd->b[5]  = msa_min_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2910     pwd->b[6]  = msa_min_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2911     pwd->b[7]  = msa_min_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2912     pwd->b[8]  = msa_min_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2913     pwd->b[9]  = msa_min_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2914     pwd->b[10] = msa_min_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2915     pwd->b[11] = msa_min_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2916     pwd->b[12] = msa_min_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2917     pwd->b[13] = msa_min_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2918     pwd->b[14] = msa_min_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2919     pwd->b[15] = msa_min_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2920 }
2921 
2922 void helper_msa_min_s_h(CPUMIPSState *env,
2923                         uint32_t wd, uint32_t ws, uint32_t wt)
2924 {
2925     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2926     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2927     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2928 
2929     pwd->h[0]  = msa_min_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2930     pwd->h[1]  = msa_min_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2931     pwd->h[2]  = msa_min_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2932     pwd->h[3]  = msa_min_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2933     pwd->h[4]  = msa_min_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2934     pwd->h[5]  = msa_min_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2935     pwd->h[6]  = msa_min_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2936     pwd->h[7]  = msa_min_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2937 }
2938 
2939 void helper_msa_min_s_w(CPUMIPSState *env,
2940                         uint32_t wd, uint32_t ws, uint32_t wt)
2941 {
2942     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2943     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2944     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2945 
2946     pwd->w[0]  = msa_min_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2947     pwd->w[1]  = msa_min_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2948     pwd->w[2]  = msa_min_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2949     pwd->w[3]  = msa_min_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2950 }
2951 
2952 void helper_msa_min_s_d(CPUMIPSState *env,
2953                         uint32_t wd, uint32_t ws, uint32_t wt)
2954 {
2955     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2956     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2957     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2958 
2959     pwd->d[0]  = msa_min_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2960     pwd->d[1]  = msa_min_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2961 }
2962 
2963 
2964 static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2965 {
2966     uint64_t u_arg1 = UNSIGNED(arg1, df);
2967     uint64_t u_arg2 = UNSIGNED(arg2, df);
2968     return u_arg1 < u_arg2 ? arg1 : arg2;
2969 }
2970 
2971 void helper_msa_min_u_b(CPUMIPSState *env,
2972                         uint32_t wd, uint32_t ws, uint32_t wt)
2973 {
2974     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2975     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2976     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2977 
2978     pwd->b[0]  = msa_min_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2979     pwd->b[1]  = msa_min_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2980     pwd->b[2]  = msa_min_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2981     pwd->b[3]  = msa_min_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2982     pwd->b[4]  = msa_min_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2983     pwd->b[5]  = msa_min_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2984     pwd->b[6]  = msa_min_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2985     pwd->b[7]  = msa_min_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2986     pwd->b[8]  = msa_min_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2987     pwd->b[9]  = msa_min_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2988     pwd->b[10] = msa_min_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2989     pwd->b[11] = msa_min_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2990     pwd->b[12] = msa_min_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2991     pwd->b[13] = msa_min_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2992     pwd->b[14] = msa_min_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2993     pwd->b[15] = msa_min_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2994 }
2995 
2996 void helper_msa_min_u_h(CPUMIPSState *env,
2997                         uint32_t wd, uint32_t ws, uint32_t wt)
2998 {
2999     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3000     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3001     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3002 
3003     pwd->h[0]  = msa_min_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3004     pwd->h[1]  = msa_min_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3005     pwd->h[2]  = msa_min_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3006     pwd->h[3]  = msa_min_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3007     pwd->h[4]  = msa_min_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3008     pwd->h[5]  = msa_min_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3009     pwd->h[6]  = msa_min_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3010     pwd->h[7]  = msa_min_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3011 }
3012 
3013 void helper_msa_min_u_w(CPUMIPSState *env,
3014                         uint32_t wd, uint32_t ws, uint32_t wt)
3015 {
3016     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3017     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3018     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3019 
3020     pwd->w[0]  = msa_min_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3021     pwd->w[1]  = msa_min_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3022     pwd->w[2]  = msa_min_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3023     pwd->w[3]  = msa_min_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3024 }
3025 
3026 void helper_msa_min_u_d(CPUMIPSState *env,
3027                         uint32_t wd, uint32_t ws, uint32_t wt)
3028 {
3029     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3030     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3031     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3032 
3033     pwd->d[0]  = msa_min_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3034     pwd->d[1]  = msa_min_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3035 }
3036 
3037 
3038 /*
3039  * Int Modulo
3040  * ----------
3041  *
3042  * +---------------+----------------------------------------------------------+
3043  * | MOD_S.B       | Vector Signed Modulo (byte)                              |
3044  * | MOD_S.H       | Vector Signed Modulo (halfword)                          |
3045  * | MOD_S.W       | Vector Signed Modulo (word)                              |
3046  * | MOD_S.D       | Vector Signed Modulo (doubleword)                        |
3047  * | MOD_U.B       | Vector Unsigned Modulo (byte)                            |
3048  * | MOD_U.H       | Vector Unsigned Modulo (halfword)                        |
3049  * | MOD_U.W       | Vector Unsigned Modulo (word)                            |
3050  * | MOD_U.D       | Vector Unsigned Modulo (doubleword)                      |
3051  * +---------------+----------------------------------------------------------+
3052  */
3053 
3054 static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3055 {
3056     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
3057         return 0;
3058     }
3059     return arg2 ? arg1 % arg2 : arg1;
3060 }
3061 
3062 void helper_msa_mod_s_b(CPUMIPSState *env,
3063                         uint32_t wd, uint32_t ws, uint32_t wt)
3064 {
3065     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3066     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3067     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3068 
3069     pwd->b[0]  = msa_mod_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3070     pwd->b[1]  = msa_mod_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3071     pwd->b[2]  = msa_mod_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3072     pwd->b[3]  = msa_mod_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3073     pwd->b[4]  = msa_mod_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3074     pwd->b[5]  = msa_mod_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3075     pwd->b[6]  = msa_mod_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3076     pwd->b[7]  = msa_mod_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3077     pwd->b[8]  = msa_mod_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3078     pwd->b[9]  = msa_mod_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3079     pwd->b[10] = msa_mod_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3080     pwd->b[11] = msa_mod_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3081     pwd->b[12] = msa_mod_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3082     pwd->b[13] = msa_mod_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3083     pwd->b[14] = msa_mod_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3084     pwd->b[15] = msa_mod_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3085 }
3086 
3087 void helper_msa_mod_s_h(CPUMIPSState *env,
3088                         uint32_t wd, uint32_t ws, uint32_t wt)
3089 {
3090     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3091     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3092     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3093 
3094     pwd->h[0]  = msa_mod_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3095     pwd->h[1]  = msa_mod_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3096     pwd->h[2]  = msa_mod_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3097     pwd->h[3]  = msa_mod_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3098     pwd->h[4]  = msa_mod_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3099     pwd->h[5]  = msa_mod_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3100     pwd->h[6]  = msa_mod_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3101     pwd->h[7]  = msa_mod_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3102 }
3103 
3104 void helper_msa_mod_s_w(CPUMIPSState *env,
3105                         uint32_t wd, uint32_t ws, uint32_t wt)
3106 {
3107     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3108     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3109     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3110 
3111     pwd->w[0]  = msa_mod_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3112     pwd->w[1]  = msa_mod_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3113     pwd->w[2]  = msa_mod_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3114     pwd->w[3]  = msa_mod_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3115 }
3116 
3117 void helper_msa_mod_s_d(CPUMIPSState *env,
3118                         uint32_t wd, uint32_t ws, uint32_t wt)
3119 {
3120     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3121     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3122     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3123 
3124     pwd->d[0]  = msa_mod_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3125     pwd->d[1]  = msa_mod_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3126 }
3127 
3128 static inline int64_t msa_mod_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3129 {
3130     uint64_t u_arg1 = UNSIGNED(arg1, df);
3131     uint64_t u_arg2 = UNSIGNED(arg2, df);
3132     return u_arg2 ? u_arg1 % u_arg2 : u_arg1;
3133 }
3134 
3135 void helper_msa_mod_u_b(CPUMIPSState *env,
3136                         uint32_t wd, uint32_t ws, uint32_t wt)
3137 {
3138     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3139     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3140     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3141 
3142     pwd->b[0]  = msa_mod_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3143     pwd->b[1]  = msa_mod_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3144     pwd->b[2]  = msa_mod_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3145     pwd->b[3]  = msa_mod_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3146     pwd->b[4]  = msa_mod_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3147     pwd->b[5]  = msa_mod_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3148     pwd->b[6]  = msa_mod_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3149     pwd->b[7]  = msa_mod_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3150     pwd->b[8]  = msa_mod_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3151     pwd->b[9]  = msa_mod_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3152     pwd->b[10] = msa_mod_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3153     pwd->b[11] = msa_mod_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3154     pwd->b[12] = msa_mod_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3155     pwd->b[13] = msa_mod_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3156     pwd->b[14] = msa_mod_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3157     pwd->b[15] = msa_mod_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3158 }
3159 
3160 void helper_msa_mod_u_h(CPUMIPSState *env,
3161                         uint32_t wd, uint32_t ws, uint32_t wt)
3162 {
3163     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3164     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3165     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3166 
3167     pwd->h[0]  = msa_mod_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3168     pwd->h[1]  = msa_mod_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3169     pwd->h[2]  = msa_mod_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3170     pwd->h[3]  = msa_mod_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3171     pwd->h[4]  = msa_mod_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3172     pwd->h[5]  = msa_mod_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3173     pwd->h[6]  = msa_mod_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3174     pwd->h[7]  = msa_mod_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3175 }
3176 
3177 void helper_msa_mod_u_w(CPUMIPSState *env,
3178                         uint32_t wd, uint32_t ws, uint32_t wt)
3179 {
3180     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3181     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3182     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3183 
3184     pwd->w[0]  = msa_mod_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3185     pwd->w[1]  = msa_mod_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3186     pwd->w[2]  = msa_mod_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3187     pwd->w[3]  = msa_mod_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3188 }
3189 
3190 void helper_msa_mod_u_d(CPUMIPSState *env,
3191                         uint32_t wd, uint32_t ws, uint32_t wt)
3192 {
3193     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3194     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3195     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3196 
3197     pwd->d[0]  = msa_mod_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3198     pwd->d[1]  = msa_mod_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3199 }
3200 
3201 
3202 /*
3203  * Int Multiply
3204  * ------------
3205  *
3206  * +---------------+----------------------------------------------------------+
3207  * | MADDV.B       | Vector Multiply and Add (byte)                           |
3208  * | MADDV.H       | Vector Multiply and Add (halfword)                       |
3209  * | MADDV.W       | Vector Multiply and Add (word)                           |
3210  * | MADDV.D       | Vector Multiply and Add (doubleword)                     |
3211  * | MSUBV.B       | Vector Multiply and Subtract (byte)                      |
3212  * | MSUBV.H       | Vector Multiply and Subtract (halfword)                  |
3213  * | MSUBV.W       | Vector Multiply and Subtract (word)                      |
3214  * | MSUBV.D       | Vector Multiply and Subtract (doubleword)                |
3215  * | MULV.B        | Vector Multiply (byte)                                   |
3216  * | MULV.H        | Vector Multiply (halfword)                               |
3217  * | MULV.W        | Vector Multiply (word)                                   |
3218  * | MULV.D        | Vector Multiply (doubleword)                             |
3219  * +---------------+----------------------------------------------------------+
3220  */
3221 
3222 static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
3223                                    int64_t arg2)
3224 {
3225     return dest + arg1 * arg2;
3226 }
3227 
3228 void helper_msa_maddv_b(CPUMIPSState *env,
3229                         uint32_t wd, uint32_t ws, uint32_t wt)
3230 {
3231     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3232     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3233     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3234 
3235     pwd->b[0]  = msa_maddv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3236     pwd->b[1]  = msa_maddv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3237     pwd->b[2]  = msa_maddv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3238     pwd->b[3]  = msa_maddv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3239     pwd->b[4]  = msa_maddv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3240     pwd->b[5]  = msa_maddv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3241     pwd->b[6]  = msa_maddv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3242     pwd->b[7]  = msa_maddv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3243     pwd->b[8]  = msa_maddv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3244     pwd->b[9]  = msa_maddv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3245     pwd->b[10] = msa_maddv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3246     pwd->b[11] = msa_maddv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3247     pwd->b[12] = msa_maddv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3248     pwd->b[13] = msa_maddv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3249     pwd->b[14] = msa_maddv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3250     pwd->b[15] = msa_maddv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3251 }
3252 
3253 void helper_msa_maddv_h(CPUMIPSState *env,
3254                         uint32_t wd, uint32_t ws, uint32_t wt)
3255 {
3256     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3257     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3258     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3259 
3260     pwd->h[0]  = msa_maddv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3261     pwd->h[1]  = msa_maddv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3262     pwd->h[2]  = msa_maddv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3263     pwd->h[3]  = msa_maddv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3264     pwd->h[4]  = msa_maddv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3265     pwd->h[5]  = msa_maddv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3266     pwd->h[6]  = msa_maddv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3267     pwd->h[7]  = msa_maddv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3268 }
3269 
3270 void helper_msa_maddv_w(CPUMIPSState *env,
3271                         uint32_t wd, uint32_t ws, uint32_t wt)
3272 {
3273     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3274     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3275     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3276 
3277     pwd->w[0]  = msa_maddv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3278     pwd->w[1]  = msa_maddv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3279     pwd->w[2]  = msa_maddv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3280     pwd->w[3]  = msa_maddv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3281 }
3282 
3283 void helper_msa_maddv_d(CPUMIPSState *env,
3284                         uint32_t wd, uint32_t ws, uint32_t wt)
3285 {
3286     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3287     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3288     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3289 
3290     pwd->d[0]  = msa_maddv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3291     pwd->d[1]  = msa_maddv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3292 }
3293 
3294 static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
3295                                    int64_t arg2)
3296 {
3297     return dest - arg1 * arg2;
3298 }
3299 
3300 void helper_msa_msubv_b(CPUMIPSState *env,
3301                         uint32_t wd, uint32_t ws, uint32_t wt)
3302 {
3303     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3304     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3305     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3306 
3307     pwd->b[0]  = msa_msubv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3308     pwd->b[1]  = msa_msubv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3309     pwd->b[2]  = msa_msubv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3310     pwd->b[3]  = msa_msubv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3311     pwd->b[4]  = msa_msubv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3312     pwd->b[5]  = msa_msubv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3313     pwd->b[6]  = msa_msubv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3314     pwd->b[7]  = msa_msubv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3315     pwd->b[8]  = msa_msubv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3316     pwd->b[9]  = msa_msubv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3317     pwd->b[10] = msa_msubv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3318     pwd->b[11] = msa_msubv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3319     pwd->b[12] = msa_msubv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3320     pwd->b[13] = msa_msubv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3321     pwd->b[14] = msa_msubv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3322     pwd->b[15] = msa_msubv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3323 }
3324 
3325 void helper_msa_msubv_h(CPUMIPSState *env,
3326                         uint32_t wd, uint32_t ws, uint32_t wt)
3327 {
3328     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3329     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3330     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3331 
3332     pwd->h[0]  = msa_msubv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3333     pwd->h[1]  = msa_msubv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3334     pwd->h[2]  = msa_msubv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3335     pwd->h[3]  = msa_msubv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3336     pwd->h[4]  = msa_msubv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3337     pwd->h[5]  = msa_msubv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3338     pwd->h[6]  = msa_msubv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3339     pwd->h[7]  = msa_msubv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3340 }
3341 
3342 void helper_msa_msubv_w(CPUMIPSState *env,
3343                         uint32_t wd, uint32_t ws, uint32_t wt)
3344 {
3345     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3346     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3347     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3348 
3349     pwd->w[0]  = msa_msubv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3350     pwd->w[1]  = msa_msubv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3351     pwd->w[2]  = msa_msubv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3352     pwd->w[3]  = msa_msubv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3353 }
3354 
3355 void helper_msa_msubv_d(CPUMIPSState *env,
3356                         uint32_t wd, uint32_t ws, uint32_t wt)
3357 {
3358     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3359     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3360     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3361 
3362     pwd->d[0]  = msa_msubv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3363     pwd->d[1]  = msa_msubv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3364 }
3365 
3366 
3367 static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
3368 {
3369     return arg1 * arg2;
3370 }
3371 
3372 void helper_msa_mulv_b(CPUMIPSState *env,
3373                        uint32_t wd, uint32_t ws, uint32_t wt)
3374 {
3375     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3376     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3377     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3378 
3379     pwd->b[0]  = msa_mulv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3380     pwd->b[1]  = msa_mulv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3381     pwd->b[2]  = msa_mulv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3382     pwd->b[3]  = msa_mulv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3383     pwd->b[4]  = msa_mulv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3384     pwd->b[5]  = msa_mulv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3385     pwd->b[6]  = msa_mulv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3386     pwd->b[7]  = msa_mulv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3387     pwd->b[8]  = msa_mulv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3388     pwd->b[9]  = msa_mulv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3389     pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]);
3390     pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]);
3391     pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]);
3392     pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]);
3393     pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]);
3394     pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]);
3395 }
3396 
3397 void helper_msa_mulv_h(CPUMIPSState *env,
3398                        uint32_t wd, uint32_t ws, uint32_t wt)
3399 {
3400     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3401     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3402     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3403 
3404     pwd->h[0]  = msa_mulv_df(DF_HALF, pws->h[0],  pwt->h[0]);
3405     pwd->h[1]  = msa_mulv_df(DF_HALF, pws->h[1],  pwt->h[1]);
3406     pwd->h[2]  = msa_mulv_df(DF_HALF, pws->h[2],  pwt->h[2]);
3407     pwd->h[3]  = msa_mulv_df(DF_HALF, pws->h[3],  pwt->h[3]);
3408     pwd->h[4]  = msa_mulv_df(DF_HALF, pws->h[4],  pwt->h[4]);
3409     pwd->h[5]  = msa_mulv_df(DF_HALF, pws->h[5],  pwt->h[5]);
3410     pwd->h[6]  = msa_mulv_df(DF_HALF, pws->h[6],  pwt->h[6]);
3411     pwd->h[7]  = msa_mulv_df(DF_HALF, pws->h[7],  pwt->h[7]);
3412 }
3413 
3414 void helper_msa_mulv_w(CPUMIPSState *env,
3415                        uint32_t wd, uint32_t ws, uint32_t wt)
3416 {
3417     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3418     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3419     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3420 
3421     pwd->w[0]  = msa_mulv_df(DF_WORD, pws->w[0],  pwt->w[0]);
3422     pwd->w[1]  = msa_mulv_df(DF_WORD, pws->w[1],  pwt->w[1]);
3423     pwd->w[2]  = msa_mulv_df(DF_WORD, pws->w[2],  pwt->w[2]);
3424     pwd->w[3]  = msa_mulv_df(DF_WORD, pws->w[3],  pwt->w[3]);
3425 }
3426 
3427 void helper_msa_mulv_d(CPUMIPSState *env,
3428                        uint32_t wd, uint32_t ws, uint32_t wt)
3429 {
3430     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3431     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3432     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3433 
3434     pwd->d[0]  = msa_mulv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3435     pwd->d[1]  = msa_mulv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3436 }
3437 
3438 
3439 /*
3440  * Int Subtract
3441  * ------------
3442  *
3443  * +---------------+----------------------------------------------------------+
3444  * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
3445  * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
3446  * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
3447  * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
3448  * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
3449  * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
3450  * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
3451  * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
3452  * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
3453  * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
3454  * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
3455  * | HSUB_U.H      | Vector Unsigned Horizontal Subtract (halfword)           |
3456  * | HSUB_U.W      | Vector Unsigned Horizontal Subtract (word)               |
3457  * | HSUB_U.D      | Vector Unsigned Horizontal Subtract (doubleword)         |
3458  * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
3459  * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
3460  * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
3461  * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
3462  * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
3463  * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
3464  * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
3465  * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
3466  * | SUBSUS_U.B    | Vector Uns. Sat. Subtract (of S. from Uns.) (byte)       |
3467  * | SUBSUS_U.H    | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword)   |
3468  * | SUBSUS_U.W    | Vector Uns. Sat. Subtract (of S. from Uns.) (word)       |
3469  * | SUBSUS_U.D    | Vector Uns. Sat. Subtract (of S. from Uns.) (doubleword) |
3470  * | SUBSUU_S.B    | Vector Signed Saturated Subtract (of Uns.) (byte)        |
3471  * | SUBSUU_S.H    | Vector Signed Saturated Subtract (of Uns.) (halfword)    |
3472  * | SUBSUU_S.W    | Vector Signed Saturated Subtract (of Uns.) (word)        |
3473  * | SUBSUU_S.D    | Vector Signed Saturated Subtract (of Uns.) (doubleword)  |
3474  * | SUBV.B        | Vector Subtract (byte)                                   |
3475  * | SUBV.H        | Vector Subtract (halfword)                               |
3476  * | SUBV.W        | Vector Subtract (word)                                   |
3477  * | SUBV.D        | Vector Subtract (doubleword)                             |
3478  * +---------------+----------------------------------------------------------+
3479  */
3480 
3481 
3482 static inline int64_t msa_asub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3483 {
3484     /* signed compare */
3485     return (arg1 < arg2) ?
3486         (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
3487 }
3488 
3489 void helper_msa_asub_s_b(CPUMIPSState *env,
3490                          uint32_t wd, uint32_t ws, uint32_t wt)
3491 {
3492     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3493     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3494     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3495 
3496     pwd->b[0]  = msa_asub_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3497     pwd->b[1]  = msa_asub_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3498     pwd->b[2]  = msa_asub_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3499     pwd->b[3]  = msa_asub_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3500     pwd->b[4]  = msa_asub_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3501     pwd->b[5]  = msa_asub_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3502     pwd->b[6]  = msa_asub_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3503     pwd->b[7]  = msa_asub_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3504     pwd->b[8]  = msa_asub_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3505     pwd->b[9]  = msa_asub_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3506     pwd->b[10] = msa_asub_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3507     pwd->b[11] = msa_asub_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3508     pwd->b[12] = msa_asub_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3509     pwd->b[13] = msa_asub_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3510     pwd->b[14] = msa_asub_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3511     pwd->b[15] = msa_asub_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3512 }
3513 
3514 void helper_msa_asub_s_h(CPUMIPSState *env,
3515                          uint32_t wd, uint32_t ws, uint32_t wt)
3516 {
3517     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3518     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3519     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3520 
3521     pwd->h[0]  = msa_asub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3522     pwd->h[1]  = msa_asub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3523     pwd->h[2]  = msa_asub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3524     pwd->h[3]  = msa_asub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3525     pwd->h[4]  = msa_asub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3526     pwd->h[5]  = msa_asub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3527     pwd->h[6]  = msa_asub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3528     pwd->h[7]  = msa_asub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3529 }
3530 
3531 void helper_msa_asub_s_w(CPUMIPSState *env,
3532                          uint32_t wd, uint32_t ws, uint32_t wt)
3533 {
3534     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3535     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3536     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3537 
3538     pwd->w[0]  = msa_asub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3539     pwd->w[1]  = msa_asub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3540     pwd->w[2]  = msa_asub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3541     pwd->w[3]  = msa_asub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3542 }
3543 
3544 void helper_msa_asub_s_d(CPUMIPSState *env,
3545                          uint32_t wd, uint32_t ws, uint32_t wt)
3546 {
3547     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3548     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3549     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3550 
3551     pwd->d[0]  = msa_asub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3552     pwd->d[1]  = msa_asub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3553 }
3554 
3555 
3556 static inline uint64_t msa_asub_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
3557 {
3558     uint64_t u_arg1 = UNSIGNED(arg1, df);
3559     uint64_t u_arg2 = UNSIGNED(arg2, df);
3560     /* unsigned compare */
3561     return (u_arg1 < u_arg2) ?
3562         (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
3563 }
3564 
3565 void helper_msa_asub_u_b(CPUMIPSState *env,
3566                          uint32_t wd, uint32_t ws, uint32_t wt)
3567 {
3568     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3569     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3570     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3571 
3572     pwd->b[0]  = msa_asub_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3573     pwd->b[1]  = msa_asub_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3574     pwd->b[2]  = msa_asub_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3575     pwd->b[3]  = msa_asub_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3576     pwd->b[4]  = msa_asub_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3577     pwd->b[5]  = msa_asub_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3578     pwd->b[6]  = msa_asub_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3579     pwd->b[7]  = msa_asub_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3580     pwd->b[8]  = msa_asub_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3581     pwd->b[9]  = msa_asub_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3582     pwd->b[10] = msa_asub_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3583     pwd->b[11] = msa_asub_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3584     pwd->b[12] = msa_asub_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3585     pwd->b[13] = msa_asub_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3586     pwd->b[14] = msa_asub_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3587     pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3588 }
3589 
3590 void helper_msa_asub_u_h(CPUMIPSState *env,
3591                          uint32_t wd, uint32_t ws, uint32_t wt)
3592 {
3593     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3594     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3595     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3596 
3597     pwd->h[0]  = msa_asub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3598     pwd->h[1]  = msa_asub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3599     pwd->h[2]  = msa_asub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3600     pwd->h[3]  = msa_asub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3601     pwd->h[4]  = msa_asub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3602     pwd->h[5]  = msa_asub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3603     pwd->h[6]  = msa_asub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3604     pwd->h[7]  = msa_asub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3605 }
3606 
3607 void helper_msa_asub_u_w(CPUMIPSState *env,
3608                          uint32_t wd, uint32_t ws, uint32_t wt)
3609 {
3610     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3611     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3612     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3613 
3614     pwd->w[0]  = msa_asub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3615     pwd->w[1]  = msa_asub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3616     pwd->w[2]  = msa_asub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3617     pwd->w[3]  = msa_asub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3618 }
3619 
3620 void helper_msa_asub_u_d(CPUMIPSState *env,
3621                          uint32_t wd, uint32_t ws, uint32_t wt)
3622 {
3623     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3624     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3625     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3626 
3627     pwd->d[0]  = msa_asub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3628     pwd->d[1]  = msa_asub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3629 }
3630 
3631 
3632 static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3633 {
3634     return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
3635 }
3636 
3637 void helper_msa_hsub_s_h(CPUMIPSState *env,
3638                          uint32_t wd, uint32_t ws, uint32_t wt)
3639 {
3640     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3641     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3642     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3643 
3644     pwd->h[0]  = msa_hsub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3645     pwd->h[1]  = msa_hsub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3646     pwd->h[2]  = msa_hsub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3647     pwd->h[3]  = msa_hsub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3648     pwd->h[4]  = msa_hsub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3649     pwd->h[5]  = msa_hsub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3650     pwd->h[6]  = msa_hsub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3651     pwd->h[7]  = msa_hsub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3652 }
3653 
3654 void helper_msa_hsub_s_w(CPUMIPSState *env,
3655                          uint32_t wd, uint32_t ws, uint32_t wt)
3656 {
3657     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3658     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3659     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3660 
3661     pwd->w[0]  = msa_hsub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3662     pwd->w[1]  = msa_hsub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3663     pwd->w[2]  = msa_hsub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3664     pwd->w[3]  = msa_hsub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3665 }
3666 
3667 void helper_msa_hsub_s_d(CPUMIPSState *env,
3668                          uint32_t wd, uint32_t ws, uint32_t wt)
3669 {
3670     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3671     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3672     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3673 
3674     pwd->d[0]  = msa_hsub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3675     pwd->d[1]  = msa_hsub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3676 }
3677 
3678 
3679 static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3680 {
3681     return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
3682 }
3683 
3684 void helper_msa_hsub_u_h(CPUMIPSState *env,
3685                          uint32_t wd, uint32_t ws, uint32_t wt)
3686 {
3687     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3688     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3689     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3690 
3691     pwd->h[0]  = msa_hsub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3692     pwd->h[1]  = msa_hsub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3693     pwd->h[2]  = msa_hsub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3694     pwd->h[3]  = msa_hsub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3695     pwd->h[4]  = msa_hsub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3696     pwd->h[5]  = msa_hsub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3697     pwd->h[6]  = msa_hsub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3698     pwd->h[7]  = msa_hsub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3699 }
3700 
3701 void helper_msa_hsub_u_w(CPUMIPSState *env,
3702                          uint32_t wd, uint32_t ws, uint32_t wt)
3703 {
3704     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3705     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3706     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3707 
3708     pwd->w[0]  = msa_hsub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3709     pwd->w[1]  = msa_hsub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3710     pwd->w[2]  = msa_hsub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3711     pwd->w[3]  = msa_hsub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3712 }
3713 
3714 void helper_msa_hsub_u_d(CPUMIPSState *env,
3715                          uint32_t wd, uint32_t ws, uint32_t wt)
3716 {
3717     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3718     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3719     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3720 
3721     pwd->d[0]  = msa_hsub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3722     pwd->d[1]  = msa_hsub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3723 }
3724 
3725 
3726 static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3727 {
3728     int64_t max_int = DF_MAX_INT(df);
3729     int64_t min_int = DF_MIN_INT(df);
3730     if (arg2 > 0) {
3731         return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
3732     } else {
3733         return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
3734     }
3735 }
3736 
3737 void helper_msa_subs_s_b(CPUMIPSState *env,
3738                          uint32_t wd, uint32_t ws, uint32_t wt)
3739 {
3740     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3741     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3742     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3743 
3744     pwd->b[0]  = msa_subs_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3745     pwd->b[1]  = msa_subs_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3746     pwd->b[2]  = msa_subs_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3747     pwd->b[3]  = msa_subs_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3748     pwd->b[4]  = msa_subs_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3749     pwd->b[5]  = msa_subs_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3750     pwd->b[6]  = msa_subs_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3751     pwd->b[7]  = msa_subs_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3752     pwd->b[8]  = msa_subs_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3753     pwd->b[9]  = msa_subs_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3754     pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3755     pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3756     pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3757     pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3758     pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3759     pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3760 }
3761 
3762 void helper_msa_subs_s_h(CPUMIPSState *env,
3763                          uint32_t wd, uint32_t ws, uint32_t wt)
3764 {
3765     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3766     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3767     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3768 
3769     pwd->h[0]  = msa_subs_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3770     pwd->h[1]  = msa_subs_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3771     pwd->h[2]  = msa_subs_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3772     pwd->h[3]  = msa_subs_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3773     pwd->h[4]  = msa_subs_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3774     pwd->h[5]  = msa_subs_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3775     pwd->h[6]  = msa_subs_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3776     pwd->h[7]  = msa_subs_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3777 }
3778 
3779 void helper_msa_subs_s_w(CPUMIPSState *env,
3780                          uint32_t wd, uint32_t ws, uint32_t wt)
3781 {
3782     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3783     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3784     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3785 
3786     pwd->w[0]  = msa_subs_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3787     pwd->w[1]  = msa_subs_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3788     pwd->w[2]  = msa_subs_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3789     pwd->w[3]  = msa_subs_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3790 }
3791 
3792 void helper_msa_subs_s_d(CPUMIPSState *env,
3793                          uint32_t wd, uint32_t ws, uint32_t wt)
3794 {
3795     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3796     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3797     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3798 
3799     pwd->d[0]  = msa_subs_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3800     pwd->d[1]  = msa_subs_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3801 }
3802 
3803 
3804 static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3805 {
3806     uint64_t u_arg1 = UNSIGNED(arg1, df);
3807     uint64_t u_arg2 = UNSIGNED(arg2, df);
3808     return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
3809 }
3810 
3811 void helper_msa_subs_u_b(CPUMIPSState *env,
3812                          uint32_t wd, uint32_t ws, uint32_t wt)
3813 {
3814     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3815     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3816     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3817 
3818     pwd->b[0]  = msa_subs_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3819     pwd->b[1]  = msa_subs_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3820     pwd->b[2]  = msa_subs_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3821     pwd->b[3]  = msa_subs_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3822     pwd->b[4]  = msa_subs_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3823     pwd->b[5]  = msa_subs_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3824     pwd->b[6]  = msa_subs_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3825     pwd->b[7]  = msa_subs_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3826     pwd->b[8]  = msa_subs_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3827     pwd->b[9]  = msa_subs_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3828     pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3829     pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3830     pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3831     pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3832     pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3833     pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3834 }
3835 
3836 void helper_msa_subs_u_h(CPUMIPSState *env,
3837                          uint32_t wd, uint32_t ws, uint32_t wt)
3838 {
3839     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3840     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3841     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3842 
3843     pwd->h[0]  = msa_subs_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3844     pwd->h[1]  = msa_subs_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3845     pwd->h[2]  = msa_subs_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3846     pwd->h[3]  = msa_subs_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3847     pwd->h[4]  = msa_subs_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3848     pwd->h[5]  = msa_subs_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3849     pwd->h[6]  = msa_subs_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3850     pwd->h[7]  = msa_subs_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3851 }
3852 
3853 void helper_msa_subs_u_w(CPUMIPSState *env,
3854                          uint32_t wd, uint32_t ws, uint32_t wt)
3855 {
3856     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3857     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3858     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3859 
3860     pwd->w[0]  = msa_subs_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3861     pwd->w[1]  = msa_subs_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3862     pwd->w[2]  = msa_subs_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3863     pwd->w[3]  = msa_subs_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3864 }
3865 
3866 void helper_msa_subs_u_d(CPUMIPSState *env,
3867                          uint32_t wd, uint32_t ws, uint32_t wt)
3868 {
3869     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3870     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3871     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3872 
3873     pwd->d[0]  = msa_subs_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3874     pwd->d[1]  = msa_subs_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3875 }
3876 
3877 
3878 static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3879 {
3880     uint64_t u_arg1 = UNSIGNED(arg1, df);
3881     uint64_t max_uint = DF_MAX_UINT(df);
3882     if (arg2 >= 0) {
3883         uint64_t u_arg2 = (uint64_t)arg2;
3884         return (u_arg1 > u_arg2) ?
3885             (int64_t)(u_arg1 - u_arg2) :
3886             0;
3887     } else {
3888         uint64_t u_arg2 = (uint64_t)(-arg2);
3889         return (u_arg1 < max_uint - u_arg2) ?
3890             (int64_t)(u_arg1 + u_arg2) :
3891             (int64_t)max_uint;
3892     }
3893 }
3894 
3895 void helper_msa_subsus_u_b(CPUMIPSState *env,
3896                            uint32_t wd, uint32_t ws, uint32_t wt)
3897 {
3898     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3899     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3900     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3901 
3902     pwd->b[0]  = msa_subsus_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3903     pwd->b[1]  = msa_subsus_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3904     pwd->b[2]  = msa_subsus_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3905     pwd->b[3]  = msa_subsus_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3906     pwd->b[4]  = msa_subsus_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3907     pwd->b[5]  = msa_subsus_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3908     pwd->b[6]  = msa_subsus_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3909     pwd->b[7]  = msa_subsus_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3910     pwd->b[8]  = msa_subsus_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3911     pwd->b[9]  = msa_subsus_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3912     pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3913     pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3914     pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3915     pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3916     pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3917     pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3918 }
3919 
3920 void helper_msa_subsus_u_h(CPUMIPSState *env,
3921                            uint32_t wd, uint32_t ws, uint32_t wt)
3922 {
3923     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3924     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3925     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3926 
3927     pwd->h[0]  = msa_subsus_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3928     pwd->h[1]  = msa_subsus_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3929     pwd->h[2]  = msa_subsus_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3930     pwd->h[3]  = msa_subsus_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3931     pwd->h[4]  = msa_subsus_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3932     pwd->h[5]  = msa_subsus_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3933     pwd->h[6]  = msa_subsus_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3934     pwd->h[7]  = msa_subsus_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3935 }
3936 
3937 void helper_msa_subsus_u_w(CPUMIPSState *env,
3938                            uint32_t wd, uint32_t ws, uint32_t wt)
3939 {
3940     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3941     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3942     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3943 
3944     pwd->w[0]  = msa_subsus_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3945     pwd->w[1]  = msa_subsus_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3946     pwd->w[2]  = msa_subsus_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3947     pwd->w[3]  = msa_subsus_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3948 }
3949 
3950 void helper_msa_subsus_u_d(CPUMIPSState *env,
3951                            uint32_t wd, uint32_t ws, uint32_t wt)
3952 {
3953     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3954     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3955     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3956 
3957     pwd->d[0]  = msa_subsus_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3958     pwd->d[1]  = msa_subsus_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3959 }
3960 
3961 
3962 static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3963 {
3964     uint64_t u_arg1 = UNSIGNED(arg1, df);
3965     uint64_t u_arg2 = UNSIGNED(arg2, df);
3966     int64_t max_int = DF_MAX_INT(df);
3967     int64_t min_int = DF_MIN_INT(df);
3968     if (u_arg1 > u_arg2) {
3969         return u_arg1 - u_arg2 < (uint64_t)max_int ?
3970             (int64_t)(u_arg1 - u_arg2) :
3971             max_int;
3972     } else {
3973         return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
3974             (int64_t)(u_arg1 - u_arg2) :
3975             min_int;
3976     }
3977 }
3978 
3979 void helper_msa_subsuu_s_b(CPUMIPSState *env,
3980                            uint32_t wd, uint32_t ws, uint32_t wt)
3981 {
3982     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3983     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3984     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3985 
3986     pwd->b[0]  = msa_subsuu_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3987     pwd->b[1]  = msa_subsuu_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3988     pwd->b[2]  = msa_subsuu_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3989     pwd->b[3]  = msa_subsuu_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3990     pwd->b[4]  = msa_subsuu_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3991     pwd->b[5]  = msa_subsuu_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3992     pwd->b[6]  = msa_subsuu_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3993     pwd->b[7]  = msa_subsuu_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3994     pwd->b[8]  = msa_subsuu_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3995     pwd->b[9]  = msa_subsuu_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3996     pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3997     pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3998     pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3999     pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
4000     pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
4001     pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
4002 }
4003 
4004 void helper_msa_subsuu_s_h(CPUMIPSState *env,
4005                            uint32_t wd, uint32_t ws, uint32_t wt)
4006 {
4007     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4008     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4009     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4010 
4011     pwd->h[0]  = msa_subsuu_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
4012     pwd->h[1]  = msa_subsuu_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
4013     pwd->h[2]  = msa_subsuu_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
4014     pwd->h[3]  = msa_subsuu_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
4015     pwd->h[4]  = msa_subsuu_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
4016     pwd->h[5]  = msa_subsuu_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
4017     pwd->h[6]  = msa_subsuu_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
4018     pwd->h[7]  = msa_subsuu_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
4019 }
4020 
4021 void helper_msa_subsuu_s_w(CPUMIPSState *env,
4022                            uint32_t wd, uint32_t ws, uint32_t wt)
4023 {
4024     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4025     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4026     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4027 
4028     pwd->w[0]  = msa_subsuu_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
4029     pwd->w[1]  = msa_subsuu_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
4030     pwd->w[2]  = msa_subsuu_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
4031     pwd->w[3]  = msa_subsuu_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
4032 }
4033 
4034 void helper_msa_subsuu_s_d(CPUMIPSState *env,
4035                            uint32_t wd, uint32_t ws, uint32_t wt)
4036 {
4037     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4038     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4039     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4040 
4041     pwd->d[0]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4042     pwd->d[1]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4043 }
4044 
4045 
4046 static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
4047 {
4048     return arg1 - arg2;
4049 }
4050 
4051 void helper_msa_subv_b(CPUMIPSState *env,
4052                        uint32_t wd, uint32_t ws, uint32_t wt)
4053 {
4054     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4055     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4056     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4057 
4058     pwd->b[0]  = msa_subv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4059     pwd->b[1]  = msa_subv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4060     pwd->b[2]  = msa_subv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4061     pwd->b[3]  = msa_subv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4062     pwd->b[4]  = msa_subv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4063     pwd->b[5]  = msa_subv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4064     pwd->b[6]  = msa_subv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4065     pwd->b[7]  = msa_subv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4066     pwd->b[8]  = msa_subv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4067     pwd->b[9]  = msa_subv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4068     pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]);
4069     pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]);
4070     pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]);
4071     pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]);
4072     pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]);
4073     pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]);
4074 }
4075 
4076 void helper_msa_subv_h(CPUMIPSState *env,
4077                        uint32_t wd, uint32_t ws, uint32_t wt)
4078 {
4079     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4080     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4081     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4082 
4083     pwd->h[0]  = msa_subv_df(DF_HALF, pws->h[0],  pwt->h[0]);
4084     pwd->h[1]  = msa_subv_df(DF_HALF, pws->h[1],  pwt->h[1]);
4085     pwd->h[2]  = msa_subv_df(DF_HALF, pws->h[2],  pwt->h[2]);
4086     pwd->h[3]  = msa_subv_df(DF_HALF, pws->h[3],  pwt->h[3]);
4087     pwd->h[4]  = msa_subv_df(DF_HALF, pws->h[4],  pwt->h[4]);
4088     pwd->h[5]  = msa_subv_df(DF_HALF, pws->h[5],  pwt->h[5]);
4089     pwd->h[6]  = msa_subv_df(DF_HALF, pws->h[6],  pwt->h[6]);
4090     pwd->h[7]  = msa_subv_df(DF_HALF, pws->h[7],  pwt->h[7]);
4091 }
4092 
4093 void helper_msa_subv_w(CPUMIPSState *env,
4094                        uint32_t wd, uint32_t ws, uint32_t wt)
4095 {
4096     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4097     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4098     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4099 
4100     pwd->w[0]  = msa_subv_df(DF_WORD, pws->w[0],  pwt->w[0]);
4101     pwd->w[1]  = msa_subv_df(DF_WORD, pws->w[1],  pwt->w[1]);
4102     pwd->w[2]  = msa_subv_df(DF_WORD, pws->w[2],  pwt->w[2]);
4103     pwd->w[3]  = msa_subv_df(DF_WORD, pws->w[3],  pwt->w[3]);
4104 }
4105 
4106 void helper_msa_subv_d(CPUMIPSState *env,
4107                        uint32_t wd, uint32_t ws, uint32_t wt)
4108 {
4109     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4110     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4111     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4112 
4113     pwd->d[0]  = msa_subv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4114     pwd->d[1]  = msa_subv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4115 }
4116 
4117 
4118 /*
4119  * Interleave
4120  * ----------
4121  *
4122  * +---------------+----------------------------------------------------------+
4123  * | ILVEV.B       | Vector Interleave Even (byte)                            |
4124  * | ILVEV.H       | Vector Interleave Even (halfword)                        |
4125  * | ILVEV.W       | Vector Interleave Even (word)                            |
4126  * | ILVEV.D       | Vector Interleave Even (doubleword)                      |
4127  * | ILVOD.B       | Vector Interleave Odd (byte)                             |
4128  * | ILVOD.H       | Vector Interleave Odd (halfword)                         |
4129  * | ILVOD.W       | Vector Interleave Odd (word)                             |
4130  * | ILVOD.D       | Vector Interleave Odd (doubleword)                       |
4131  * | ILVL.B        | Vector Interleave Left (byte)                            |
4132  * | ILVL.H        | Vector Interleave Left (halfword)                        |
4133  * | ILVL.W        | Vector Interleave Left (word)                            |
4134  * | ILVL.D        | Vector Interleave Left (doubleword)                      |
4135  * | ILVR.B        | Vector Interleave Right (byte)                           |
4136  * | ILVR.H        | Vector Interleave Right (halfword)                       |
4137  * | ILVR.W        | Vector Interleave Right (word)                           |
4138  * | ILVR.D        | Vector Interleave Right (doubleword)                     |
4139  * +---------------+----------------------------------------------------------+
4140  */
4141 
4142 
4143 void helper_msa_ilvev_b(CPUMIPSState *env,
4144                         uint32_t wd, uint32_t ws, uint32_t wt)
4145 {
4146     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4147     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4148     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4149 
4150 #if HOST_BIG_ENDIAN
4151     pwd->b[8]  = pws->b[9];
4152     pwd->b[9]  = pwt->b[9];
4153     pwd->b[10] = pws->b[11];
4154     pwd->b[11] = pwt->b[11];
4155     pwd->b[12] = pws->b[13];
4156     pwd->b[13] = pwt->b[13];
4157     pwd->b[14] = pws->b[15];
4158     pwd->b[15] = pwt->b[15];
4159     pwd->b[0]  = pws->b[1];
4160     pwd->b[1]  = pwt->b[1];
4161     pwd->b[2]  = pws->b[3];
4162     pwd->b[3]  = pwt->b[3];
4163     pwd->b[4]  = pws->b[5];
4164     pwd->b[5]  = pwt->b[5];
4165     pwd->b[6]  = pws->b[7];
4166     pwd->b[7]  = pwt->b[7];
4167 #else
4168     pwd->b[15] = pws->b[14];
4169     pwd->b[14] = pwt->b[14];
4170     pwd->b[13] = pws->b[12];
4171     pwd->b[12] = pwt->b[12];
4172     pwd->b[11] = pws->b[10];
4173     pwd->b[10] = pwt->b[10];
4174     pwd->b[9]  = pws->b[8];
4175     pwd->b[8]  = pwt->b[8];
4176     pwd->b[7]  = pws->b[6];
4177     pwd->b[6]  = pwt->b[6];
4178     pwd->b[5]  = pws->b[4];
4179     pwd->b[4]  = pwt->b[4];
4180     pwd->b[3]  = pws->b[2];
4181     pwd->b[2]  = pwt->b[2];
4182     pwd->b[1]  = pws->b[0];
4183     pwd->b[0]  = pwt->b[0];
4184 #endif
4185 }
4186 
4187 void helper_msa_ilvev_h(CPUMIPSState *env,
4188                         uint32_t wd, uint32_t ws, uint32_t wt)
4189 {
4190     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4191     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4192     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4193 
4194 #if HOST_BIG_ENDIAN
4195     pwd->h[4] = pws->h[5];
4196     pwd->h[5] = pwt->h[5];
4197     pwd->h[6] = pws->h[7];
4198     pwd->h[7] = pwt->h[7];
4199     pwd->h[0] = pws->h[1];
4200     pwd->h[1] = pwt->h[1];
4201     pwd->h[2] = pws->h[3];
4202     pwd->h[3] = pwt->h[3];
4203 #else
4204     pwd->h[7] = pws->h[6];
4205     pwd->h[6] = pwt->h[6];
4206     pwd->h[5] = pws->h[4];
4207     pwd->h[4] = pwt->h[4];
4208     pwd->h[3] = pws->h[2];
4209     pwd->h[2] = pwt->h[2];
4210     pwd->h[1] = pws->h[0];
4211     pwd->h[0] = pwt->h[0];
4212 #endif
4213 }
4214 
4215 void helper_msa_ilvev_w(CPUMIPSState *env,
4216                         uint32_t wd, uint32_t ws, uint32_t wt)
4217 {
4218     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4219     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4220     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4221 
4222 #if HOST_BIG_ENDIAN
4223     pwd->w[2] = pws->w[3];
4224     pwd->w[3] = pwt->w[3];
4225     pwd->w[0] = pws->w[1];
4226     pwd->w[1] = pwt->w[1];
4227 #else
4228     pwd->w[3] = pws->w[2];
4229     pwd->w[2] = pwt->w[2];
4230     pwd->w[1] = pws->w[0];
4231     pwd->w[0] = pwt->w[0];
4232 #endif
4233 }
4234 
4235 void helper_msa_ilvev_d(CPUMIPSState *env,
4236                         uint32_t wd, uint32_t ws, uint32_t wt)
4237 {
4238     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4239     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4240     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4241 
4242     pwd->d[1] = pws->d[0];
4243     pwd->d[0] = pwt->d[0];
4244 }
4245 
4246 
4247 void helper_msa_ilvod_b(CPUMIPSState *env,
4248                         uint32_t wd, uint32_t ws, uint32_t wt)
4249 {
4250     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4251     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4252     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4253 
4254 #if HOST_BIG_ENDIAN
4255     pwd->b[7]  = pwt->b[6];
4256     pwd->b[6]  = pws->b[6];
4257     pwd->b[5]  = pwt->b[4];
4258     pwd->b[4]  = pws->b[4];
4259     pwd->b[3]  = pwt->b[2];
4260     pwd->b[2]  = pws->b[2];
4261     pwd->b[1]  = pwt->b[0];
4262     pwd->b[0]  = pws->b[0];
4263     pwd->b[15] = pwt->b[14];
4264     pwd->b[14] = pws->b[14];
4265     pwd->b[13] = pwt->b[12];
4266     pwd->b[12] = pws->b[12];
4267     pwd->b[11] = pwt->b[10];
4268     pwd->b[10] = pws->b[10];
4269     pwd->b[9]  = pwt->b[8];
4270     pwd->b[8]  = pws->b[8];
4271 #else
4272     pwd->b[0]  = pwt->b[1];
4273     pwd->b[1]  = pws->b[1];
4274     pwd->b[2]  = pwt->b[3];
4275     pwd->b[3]  = pws->b[3];
4276     pwd->b[4]  = pwt->b[5];
4277     pwd->b[5]  = pws->b[5];
4278     pwd->b[6]  = pwt->b[7];
4279     pwd->b[7]  = pws->b[7];
4280     pwd->b[8]  = pwt->b[9];
4281     pwd->b[9]  = pws->b[9];
4282     pwd->b[10] = pwt->b[11];
4283     pwd->b[11] = pws->b[11];
4284     pwd->b[12] = pwt->b[13];
4285     pwd->b[13] = pws->b[13];
4286     pwd->b[14] = pwt->b[15];
4287     pwd->b[15] = pws->b[15];
4288 #endif
4289 }
4290 
4291 void helper_msa_ilvod_h(CPUMIPSState *env,
4292                         uint32_t wd, uint32_t ws, uint32_t wt)
4293 {
4294     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4295     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4296     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4297 
4298 #if HOST_BIG_ENDIAN
4299     pwd->h[3] = pwt->h[2];
4300     pwd->h[2] = pws->h[2];
4301     pwd->h[1] = pwt->h[0];
4302     pwd->h[0] = pws->h[0];
4303     pwd->h[7] = pwt->h[6];
4304     pwd->h[6] = pws->h[6];
4305     pwd->h[5] = pwt->h[4];
4306     pwd->h[4] = pws->h[4];
4307 #else
4308     pwd->h[0] = pwt->h[1];
4309     pwd->h[1] = pws->h[1];
4310     pwd->h[2] = pwt->h[3];
4311     pwd->h[3] = pws->h[3];
4312     pwd->h[4] = pwt->h[5];
4313     pwd->h[5] = pws->h[5];
4314     pwd->h[6] = pwt->h[7];
4315     pwd->h[7] = pws->h[7];
4316 #endif
4317 }
4318 
4319 void helper_msa_ilvod_w(CPUMIPSState *env,
4320                         uint32_t wd, uint32_t ws, uint32_t wt)
4321 {
4322     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4323     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4324     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4325 
4326 #if HOST_BIG_ENDIAN
4327     pwd->w[1] = pwt->w[0];
4328     pwd->w[0] = pws->w[0];
4329     pwd->w[3] = pwt->w[2];
4330     pwd->w[2] = pws->w[2];
4331 #else
4332     pwd->w[0] = pwt->w[1];
4333     pwd->w[1] = pws->w[1];
4334     pwd->w[2] = pwt->w[3];
4335     pwd->w[3] = pws->w[3];
4336 #endif
4337 }
4338 
4339 void helper_msa_ilvod_d(CPUMIPSState *env,
4340                         uint32_t wd, uint32_t ws, uint32_t wt)
4341 {
4342     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4343     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4344     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4345 
4346     pwd->d[0] = pwt->d[1];
4347     pwd->d[1] = pws->d[1];
4348 }
4349 
4350 
4351 void helper_msa_ilvl_b(CPUMIPSState *env,
4352                        uint32_t wd, uint32_t ws, uint32_t wt)
4353 {
4354     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4355     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4356     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4357 
4358 #if HOST_BIG_ENDIAN
4359     pwd->b[7]  = pwt->b[15];
4360     pwd->b[6]  = pws->b[15];
4361     pwd->b[5]  = pwt->b[14];
4362     pwd->b[4]  = pws->b[14];
4363     pwd->b[3]  = pwt->b[13];
4364     pwd->b[2]  = pws->b[13];
4365     pwd->b[1]  = pwt->b[12];
4366     pwd->b[0]  = pws->b[12];
4367     pwd->b[15] = pwt->b[11];
4368     pwd->b[14] = pws->b[11];
4369     pwd->b[13] = pwt->b[10];
4370     pwd->b[12] = pws->b[10];
4371     pwd->b[11] = pwt->b[9];
4372     pwd->b[10] = pws->b[9];
4373     pwd->b[9]  = pwt->b[8];
4374     pwd->b[8]  = pws->b[8];
4375 #else
4376     pwd->b[0]  = pwt->b[8];
4377     pwd->b[1]  = pws->b[8];
4378     pwd->b[2]  = pwt->b[9];
4379     pwd->b[3]  = pws->b[9];
4380     pwd->b[4]  = pwt->b[10];
4381     pwd->b[5]  = pws->b[10];
4382     pwd->b[6]  = pwt->b[11];
4383     pwd->b[7]  = pws->b[11];
4384     pwd->b[8]  = pwt->b[12];
4385     pwd->b[9]  = pws->b[12];
4386     pwd->b[10] = pwt->b[13];
4387     pwd->b[11] = pws->b[13];
4388     pwd->b[12] = pwt->b[14];
4389     pwd->b[13] = pws->b[14];
4390     pwd->b[14] = pwt->b[15];
4391     pwd->b[15] = pws->b[15];
4392 #endif
4393 }
4394 
4395 void helper_msa_ilvl_h(CPUMIPSState *env,
4396                        uint32_t wd, uint32_t ws, uint32_t wt)
4397 {
4398     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4399     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4400     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4401 
4402 #if HOST_BIG_ENDIAN
4403     pwd->h[3] = pwt->h[7];
4404     pwd->h[2] = pws->h[7];
4405     pwd->h[1] = pwt->h[6];
4406     pwd->h[0] = pws->h[6];
4407     pwd->h[7] = pwt->h[5];
4408     pwd->h[6] = pws->h[5];
4409     pwd->h[5] = pwt->h[4];
4410     pwd->h[4] = pws->h[4];
4411 #else
4412     pwd->h[0] = pwt->h[4];
4413     pwd->h[1] = pws->h[4];
4414     pwd->h[2] = pwt->h[5];
4415     pwd->h[3] = pws->h[5];
4416     pwd->h[4] = pwt->h[6];
4417     pwd->h[5] = pws->h[6];
4418     pwd->h[6] = pwt->h[7];
4419     pwd->h[7] = pws->h[7];
4420 #endif
4421 }
4422 
4423 void helper_msa_ilvl_w(CPUMIPSState *env,
4424                        uint32_t wd, uint32_t ws, uint32_t wt)
4425 {
4426     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4427     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4428     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4429 
4430 #if HOST_BIG_ENDIAN
4431     pwd->w[1] = pwt->w[3];
4432     pwd->w[0] = pws->w[3];
4433     pwd->w[3] = pwt->w[2];
4434     pwd->w[2] = pws->w[2];
4435 #else
4436     pwd->w[0] = pwt->w[2];
4437     pwd->w[1] = pws->w[2];
4438     pwd->w[2] = pwt->w[3];
4439     pwd->w[3] = pws->w[3];
4440 #endif
4441 }
4442 
4443 void helper_msa_ilvl_d(CPUMIPSState *env,
4444                        uint32_t wd, uint32_t ws, uint32_t wt)
4445 {
4446     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4447     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4448     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4449 
4450     pwd->d[0] = pwt->d[1];
4451     pwd->d[1] = pws->d[1];
4452 }
4453 
4454 
4455 void helper_msa_ilvr_b(CPUMIPSState *env,
4456                        uint32_t wd, uint32_t ws, uint32_t wt)
4457 {
4458     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4459     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4460     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4461 
4462 #if HOST_BIG_ENDIAN
4463     pwd->b[8]  = pws->b[0];
4464     pwd->b[9]  = pwt->b[0];
4465     pwd->b[10] = pws->b[1];
4466     pwd->b[11] = pwt->b[1];
4467     pwd->b[12] = pws->b[2];
4468     pwd->b[13] = pwt->b[2];
4469     pwd->b[14] = pws->b[3];
4470     pwd->b[15] = pwt->b[3];
4471     pwd->b[0]  = pws->b[4];
4472     pwd->b[1]  = pwt->b[4];
4473     pwd->b[2]  = pws->b[5];
4474     pwd->b[3]  = pwt->b[5];
4475     pwd->b[4]  = pws->b[6];
4476     pwd->b[5]  = pwt->b[6];
4477     pwd->b[6]  = pws->b[7];
4478     pwd->b[7]  = pwt->b[7];
4479 #else
4480     pwd->b[15] = pws->b[7];
4481     pwd->b[14] = pwt->b[7];
4482     pwd->b[13] = pws->b[6];
4483     pwd->b[12] = pwt->b[6];
4484     pwd->b[11] = pws->b[5];
4485     pwd->b[10] = pwt->b[5];
4486     pwd->b[9]  = pws->b[4];
4487     pwd->b[8]  = pwt->b[4];
4488     pwd->b[7]  = pws->b[3];
4489     pwd->b[6]  = pwt->b[3];
4490     pwd->b[5]  = pws->b[2];
4491     pwd->b[4]  = pwt->b[2];
4492     pwd->b[3]  = pws->b[1];
4493     pwd->b[2]  = pwt->b[1];
4494     pwd->b[1]  = pws->b[0];
4495     pwd->b[0]  = pwt->b[0];
4496 #endif
4497 }
4498 
4499 void helper_msa_ilvr_h(CPUMIPSState *env,
4500                        uint32_t wd, uint32_t ws, uint32_t wt)
4501 {
4502     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4503     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4504     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4505 
4506 #if HOST_BIG_ENDIAN
4507     pwd->h[4] = pws->h[0];
4508     pwd->h[5] = pwt->h[0];
4509     pwd->h[6] = pws->h[1];
4510     pwd->h[7] = pwt->h[1];
4511     pwd->h[0] = pws->h[2];
4512     pwd->h[1] = pwt->h[2];
4513     pwd->h[2] = pws->h[3];
4514     pwd->h[3] = pwt->h[3];
4515 #else
4516     pwd->h[7] = pws->h[3];
4517     pwd->h[6] = pwt->h[3];
4518     pwd->h[5] = pws->h[2];
4519     pwd->h[4] = pwt->h[2];
4520     pwd->h[3] = pws->h[1];
4521     pwd->h[2] = pwt->h[1];
4522     pwd->h[1] = pws->h[0];
4523     pwd->h[0] = pwt->h[0];
4524 #endif
4525 }
4526 
4527 void helper_msa_ilvr_w(CPUMIPSState *env,
4528                        uint32_t wd, uint32_t ws, uint32_t wt)
4529 {
4530     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4531     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4532     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4533 
4534 #if HOST_BIG_ENDIAN
4535     pwd->w[2] = pws->w[0];
4536     pwd->w[3] = pwt->w[0];
4537     pwd->w[0] = pws->w[1];
4538     pwd->w[1] = pwt->w[1];
4539 #else
4540     pwd->w[3] = pws->w[1];
4541     pwd->w[2] = pwt->w[1];
4542     pwd->w[1] = pws->w[0];
4543     pwd->w[0] = pwt->w[0];
4544 #endif
4545 }
4546 
4547 void helper_msa_ilvr_d(CPUMIPSState *env,
4548                        uint32_t wd, uint32_t ws, uint32_t wt)
4549 {
4550     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4551     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4552     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4553 
4554     pwd->d[1] = pws->d[0];
4555     pwd->d[0] = pwt->d[0];
4556 }
4557 
4558 
4559 /*
4560  * Logic
4561  * -----
4562  *
4563  * +---------------+----------------------------------------------------------+
4564  * | AND.V         | Vector Logical And                                       |
4565  * | NOR.V         | Vector Logical Negated Or                                |
4566  * | OR.V          | Vector Logical Or                                        |
4567  * | XOR.V         | Vector Logical Exclusive Or                              |
4568  * +---------------+----------------------------------------------------------+
4569  */
4570 
4571 
4572 void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4573 {
4574     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4575     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4576     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4577 
4578     pwd->d[0] = pws->d[0] & pwt->d[0];
4579     pwd->d[1] = pws->d[1] & pwt->d[1];
4580 }
4581 
4582 void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4583 {
4584     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4585     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4586     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4587 
4588     pwd->d[0] = ~(pws->d[0] | pwt->d[0]);
4589     pwd->d[1] = ~(pws->d[1] | pwt->d[1]);
4590 }
4591 
4592 void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4593 {
4594     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4595     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4596     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4597 
4598     pwd->d[0] = pws->d[0] | pwt->d[0];
4599     pwd->d[1] = pws->d[1] | pwt->d[1];
4600 }
4601 
4602 void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4603 {
4604     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4605     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4606     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4607 
4608     pwd->d[0] = pws->d[0] ^ pwt->d[0];
4609     pwd->d[1] = pws->d[1] ^ pwt->d[1];
4610 }
4611 
4612 
4613 /*
4614  * Move
4615  * ----
4616  *
4617  * +---------------+----------------------------------------------------------+
4618  * | MOVE.V        | Vector Move                                              |
4619  * +---------------+----------------------------------------------------------+
4620  */
4621 
4622 static inline void msa_move_v(wr_t *pwd, wr_t *pws)
4623 {
4624     pwd->d[0] = pws->d[0];
4625     pwd->d[1] = pws->d[1];
4626 }
4627 
4628 void helper_msa_move_v(CPUMIPSState *env, uint32_t wd, uint32_t ws)
4629 {
4630     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4631     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4632 
4633     msa_move_v(pwd, pws);
4634 }
4635 
4636 
4637 /*
4638  * Pack
4639  * ----
4640  *
4641  * +---------------+----------------------------------------------------------+
4642  * | PCKEV.B       | Vector Pack Even (byte)                                  |
4643  * | PCKEV.H       | Vector Pack Even (halfword)                              |
4644  * | PCKEV.W       | Vector Pack Even (word)                                  |
4645  * | PCKEV.D       | Vector Pack Even (doubleword)                            |
4646  * | PCKOD.B       | Vector Pack Odd (byte)                                   |
4647  * | PCKOD.H       | Vector Pack Odd (halfword)                               |
4648  * | PCKOD.W       | Vector Pack Odd (word)                                   |
4649  * | PCKOD.D       | Vector Pack Odd (doubleword)                             |
4650  * | VSHF.B        | Vector Data Preserving Shuffle (byte)                    |
4651  * | VSHF.H        | Vector Data Preserving Shuffle (halfword)                |
4652  * | VSHF.W        | Vector Data Preserving Shuffle (word)                    |
4653  * | VSHF.D        | Vector Data Preserving Shuffle (doubleword)              |
4654  * +---------------+----------------------------------------------------------+
4655  */
4656 
4657 
4658 void helper_msa_pckev_b(CPUMIPSState *env,
4659                         uint32_t wd, uint32_t ws, uint32_t wt)
4660 {
4661     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4662     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4663     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4664 
4665 #if HOST_BIG_ENDIAN
4666     pwd->b[8]  = pws->b[9];
4667     pwd->b[10] = pws->b[13];
4668     pwd->b[12] = pws->b[1];
4669     pwd->b[14] = pws->b[5];
4670     pwd->b[0]  = pwt->b[9];
4671     pwd->b[2]  = pwt->b[13];
4672     pwd->b[4]  = pwt->b[1];
4673     pwd->b[6]  = pwt->b[5];
4674     pwd->b[9]  = pws->b[11];
4675     pwd->b[13] = pws->b[3];
4676     pwd->b[1]  = pwt->b[11];
4677     pwd->b[5]  = pwt->b[3];
4678     pwd->b[11] = pws->b[15];
4679     pwd->b[3]  = pwt->b[15];
4680     pwd->b[15] = pws->b[7];
4681     pwd->b[7]  = pwt->b[7];
4682 #else
4683     pwd->b[15] = pws->b[14];
4684     pwd->b[13] = pws->b[10];
4685     pwd->b[11] = pws->b[6];
4686     pwd->b[9]  = pws->b[2];
4687     pwd->b[7]  = pwt->b[14];
4688     pwd->b[5]  = pwt->b[10];
4689     pwd->b[3]  = pwt->b[6];
4690     pwd->b[1]  = pwt->b[2];
4691     pwd->b[14] = pws->b[12];
4692     pwd->b[10] = pws->b[4];
4693     pwd->b[6]  = pwt->b[12];
4694     pwd->b[2]  = pwt->b[4];
4695     pwd->b[12] = pws->b[8];
4696     pwd->b[4]  = pwt->b[8];
4697     pwd->b[8]  = pws->b[0];
4698     pwd->b[0]  = pwt->b[0];
4699 #endif
4700 }
4701 
4702 void helper_msa_pckev_h(CPUMIPSState *env,
4703                         uint32_t wd, uint32_t ws, uint32_t wt)
4704 {
4705     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4706     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4707     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4708 
4709 #if HOST_BIG_ENDIAN
4710     pwd->h[4] = pws->h[5];
4711     pwd->h[6] = pws->h[1];
4712     pwd->h[0] = pwt->h[5];
4713     pwd->h[2] = pwt->h[1];
4714     pwd->h[5] = pws->h[7];
4715     pwd->h[1] = pwt->h[7];
4716     pwd->h[7] = pws->h[3];
4717     pwd->h[3] = pwt->h[3];
4718 #else
4719     pwd->h[7] = pws->h[6];
4720     pwd->h[5] = pws->h[2];
4721     pwd->h[3] = pwt->h[6];
4722     pwd->h[1] = pwt->h[2];
4723     pwd->h[6] = pws->h[4];
4724     pwd->h[2] = pwt->h[4];
4725     pwd->h[4] = pws->h[0];
4726     pwd->h[0] = pwt->h[0];
4727 #endif
4728 }
4729 
4730 void helper_msa_pckev_w(CPUMIPSState *env,
4731                         uint32_t wd, uint32_t ws, uint32_t wt)
4732 {
4733     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4734     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4735     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4736 
4737 #if HOST_BIG_ENDIAN
4738     pwd->w[2] = pws->w[3];
4739     pwd->w[0] = pwt->w[3];
4740     pwd->w[3] = pws->w[1];
4741     pwd->w[1] = pwt->w[1];
4742 #else
4743     pwd->w[3] = pws->w[2];
4744     pwd->w[1] = pwt->w[2];
4745     pwd->w[2] = pws->w[0];
4746     pwd->w[0] = pwt->w[0];
4747 #endif
4748 }
4749 
4750 void helper_msa_pckev_d(CPUMIPSState *env,
4751                         uint32_t wd, uint32_t ws, uint32_t wt)
4752 {
4753     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4754     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4755     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4756 
4757     pwd->d[1] = pws->d[0];
4758     pwd->d[0] = pwt->d[0];
4759 }
4760 
4761 
4762 void helper_msa_pckod_b(CPUMIPSState *env,
4763                         uint32_t wd, uint32_t ws, uint32_t wt)
4764 {
4765     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4766     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4767     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4768 
4769 #if HOST_BIG_ENDIAN
4770     pwd->b[7]  = pwt->b[6];
4771     pwd->b[5]  = pwt->b[2];
4772     pwd->b[3]  = pwt->b[14];
4773     pwd->b[1]  = pwt->b[10];
4774     pwd->b[15] = pws->b[6];
4775     pwd->b[13] = pws->b[2];
4776     pwd->b[11] = pws->b[14];
4777     pwd->b[9]  = pws->b[10];
4778     pwd->b[6]  = pwt->b[4];
4779     pwd->b[2]  = pwt->b[12];
4780     pwd->b[14] = pws->b[4];
4781     pwd->b[10] = pws->b[12];
4782     pwd->b[4]  = pwt->b[0];
4783     pwd->b[12] = pws->b[0];
4784     pwd->b[0]  = pwt->b[8];
4785     pwd->b[8]  = pws->b[8];
4786 #else
4787     pwd->b[0]  = pwt->b[1];
4788     pwd->b[2]  = pwt->b[5];
4789     pwd->b[4]  = pwt->b[9];
4790     pwd->b[6]  = pwt->b[13];
4791     pwd->b[8]  = pws->b[1];
4792     pwd->b[10] = pws->b[5];
4793     pwd->b[12] = pws->b[9];
4794     pwd->b[14] = pws->b[13];
4795     pwd->b[1]  = pwt->b[3];
4796     pwd->b[5]  = pwt->b[11];
4797     pwd->b[9]  = pws->b[3];
4798     pwd->b[13] = pws->b[11];
4799     pwd->b[3]  = pwt->b[7];
4800     pwd->b[11] = pws->b[7];
4801     pwd->b[7]  = pwt->b[15];
4802     pwd->b[15] = pws->b[15];
4803 #endif
4804 
4805 }
4806 
4807 void helper_msa_pckod_h(CPUMIPSState *env,
4808                         uint32_t wd, uint32_t ws, uint32_t wt)
4809 {
4810     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4811     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4812     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4813 
4814 #if HOST_BIG_ENDIAN
4815     pwd->h[3] = pwt->h[2];
4816     pwd->h[1] = pwt->h[6];
4817     pwd->h[7] = pws->h[2];
4818     pwd->h[5] = pws->h[6];
4819     pwd->h[2] = pwt->h[0];
4820     pwd->h[6] = pws->h[0];
4821     pwd->h[0] = pwt->h[4];
4822     pwd->h[4] = pws->h[4];
4823 #else
4824     pwd->h[0] = pwt->h[1];
4825     pwd->h[2] = pwt->h[5];
4826     pwd->h[4] = pws->h[1];
4827     pwd->h[6] = pws->h[5];
4828     pwd->h[1] = pwt->h[3];
4829     pwd->h[5] = pws->h[3];
4830     pwd->h[3] = pwt->h[7];
4831     pwd->h[7] = pws->h[7];
4832 #endif
4833 }
4834 
4835 void helper_msa_pckod_w(CPUMIPSState *env,
4836                         uint32_t wd, uint32_t ws, uint32_t wt)
4837 {
4838     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4839     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4840     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4841 
4842 #if HOST_BIG_ENDIAN
4843     pwd->w[1] = pwt->w[0];
4844     pwd->w[3] = pws->w[0];
4845     pwd->w[0] = pwt->w[2];
4846     pwd->w[2] = pws->w[2];
4847 #else
4848     pwd->w[0] = pwt->w[1];
4849     pwd->w[2] = pws->w[1];
4850     pwd->w[1] = pwt->w[3];
4851     pwd->w[3] = pws->w[3];
4852 #endif
4853 }
4854 
4855 void helper_msa_pckod_d(CPUMIPSState *env,
4856                         uint32_t wd, uint32_t ws, uint32_t wt)
4857 {
4858     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4859     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4860     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4861 
4862     pwd->d[0] = pwt->d[1];
4863     pwd->d[1] = pws->d[1];
4864 }
4865 
4866 
4867 /*
4868  * Shift
4869  * -----
4870  *
4871  * +---------------+----------------------------------------------------------+
4872  * | SLL.B         | Vector Shift Left (byte)                                 |
4873  * | SLL.H         | Vector Shift Left (halfword)                             |
4874  * | SLL.W         | Vector Shift Left (word)                                 |
4875  * | SLL.D         | Vector Shift Left (doubleword)                           |
4876  * | SRA.B         | Vector Shift Right Arithmetic (byte)                     |
4877  * | SRA.H         | Vector Shift Right Arithmetic (halfword)                 |
4878  * | SRA.W         | Vector Shift Right Arithmetic (word)                     |
4879  * | SRA.D         | Vector Shift Right Arithmetic (doubleword)               |
4880  * | SRAR.B        | Vector Shift Right Arithmetic Rounded (byte)             |
4881  * | SRAR.H        | Vector Shift Right Arithmetic Rounded (halfword)         |
4882  * | SRAR.W        | Vector Shift Right Arithmetic Rounded (word)             |
4883  * | SRAR.D        | Vector Shift Right Arithmetic Rounded (doubleword)       |
4884  * | SRL.B         | Vector Shift Right Logical (byte)                        |
4885  * | SRL.H         | Vector Shift Right Logical (halfword)                    |
4886  * | SRL.W         | Vector Shift Right Logical (word)                        |
4887  * | SRL.D         | Vector Shift Right Logical (doubleword)                  |
4888  * | SRLR.B        | Vector Shift Right Logical Rounded (byte)                |
4889  * | SRLR.H        | Vector Shift Right Logical Rounded (halfword)            |
4890  * | SRLR.W        | Vector Shift Right Logical Rounded (word)                |
4891  * | SRLR.D        | Vector Shift Right Logical Rounded (doubleword)          |
4892  * +---------------+----------------------------------------------------------+
4893  */
4894 
4895 
4896 static inline int64_t msa_sll_df(uint32_t df, int64_t arg1, int64_t arg2)
4897 {
4898     int32_t b_arg2 = BIT_POSITION(arg2, df);
4899     return arg1 << b_arg2;
4900 }
4901 
4902 void helper_msa_sll_b(CPUMIPSState *env,
4903                       uint32_t wd, uint32_t ws, uint32_t wt)
4904 {
4905     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4906     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4907     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4908 
4909     pwd->b[0]  = msa_sll_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4910     pwd->b[1]  = msa_sll_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4911     pwd->b[2]  = msa_sll_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4912     pwd->b[3]  = msa_sll_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4913     pwd->b[4]  = msa_sll_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4914     pwd->b[5]  = msa_sll_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4915     pwd->b[6]  = msa_sll_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4916     pwd->b[7]  = msa_sll_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4917     pwd->b[8]  = msa_sll_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4918     pwd->b[9]  = msa_sll_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4919     pwd->b[10] = msa_sll_df(DF_BYTE, pws->b[10], pwt->b[10]);
4920     pwd->b[11] = msa_sll_df(DF_BYTE, pws->b[11], pwt->b[11]);
4921     pwd->b[12] = msa_sll_df(DF_BYTE, pws->b[12], pwt->b[12]);
4922     pwd->b[13] = msa_sll_df(DF_BYTE, pws->b[13], pwt->b[13]);
4923     pwd->b[14] = msa_sll_df(DF_BYTE, pws->b[14], pwt->b[14]);
4924     pwd->b[15] = msa_sll_df(DF_BYTE, pws->b[15], pwt->b[15]);
4925 }
4926 
4927 void helper_msa_sll_h(CPUMIPSState *env,
4928                       uint32_t wd, uint32_t ws, uint32_t wt)
4929 {
4930     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4931     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4932     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4933 
4934     pwd->h[0]  = msa_sll_df(DF_HALF, pws->h[0],  pwt->h[0]);
4935     pwd->h[1]  = msa_sll_df(DF_HALF, pws->h[1],  pwt->h[1]);
4936     pwd->h[2]  = msa_sll_df(DF_HALF, pws->h[2],  pwt->h[2]);
4937     pwd->h[3]  = msa_sll_df(DF_HALF, pws->h[3],  pwt->h[3]);
4938     pwd->h[4]  = msa_sll_df(DF_HALF, pws->h[4],  pwt->h[4]);
4939     pwd->h[5]  = msa_sll_df(DF_HALF, pws->h[5],  pwt->h[5]);
4940     pwd->h[6]  = msa_sll_df(DF_HALF, pws->h[6],  pwt->h[6]);
4941     pwd->h[7]  = msa_sll_df(DF_HALF, pws->h[7],  pwt->h[7]);
4942 }
4943 
4944 void helper_msa_sll_w(CPUMIPSState *env,
4945                       uint32_t wd, uint32_t ws, uint32_t wt)
4946 {
4947     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4948     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4949     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4950 
4951     pwd->w[0]  = msa_sll_df(DF_WORD, pws->w[0],  pwt->w[0]);
4952     pwd->w[1]  = msa_sll_df(DF_WORD, pws->w[1],  pwt->w[1]);
4953     pwd->w[2]  = msa_sll_df(DF_WORD, pws->w[2],  pwt->w[2]);
4954     pwd->w[3]  = msa_sll_df(DF_WORD, pws->w[3],  pwt->w[3]);
4955 }
4956 
4957 void helper_msa_sll_d(CPUMIPSState *env,
4958                       uint32_t wd, uint32_t ws, uint32_t wt)
4959 {
4960     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4961     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4962     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4963 
4964     pwd->d[0]  = msa_sll_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4965     pwd->d[1]  = msa_sll_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4966 }
4967 
4968 
4969 static inline int64_t msa_sra_df(uint32_t df, int64_t arg1, int64_t arg2)
4970 {
4971     int32_t b_arg2 = BIT_POSITION(arg2, df);
4972     return arg1 >> b_arg2;
4973 }
4974 
4975 void helper_msa_sra_b(CPUMIPSState *env,
4976                       uint32_t wd, uint32_t ws, uint32_t wt)
4977 {
4978     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4979     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4980     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4981 
4982     pwd->b[0]  = msa_sra_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4983     pwd->b[1]  = msa_sra_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4984     pwd->b[2]  = msa_sra_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4985     pwd->b[3]  = msa_sra_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4986     pwd->b[4]  = msa_sra_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4987     pwd->b[5]  = msa_sra_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4988     pwd->b[6]  = msa_sra_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4989     pwd->b[7]  = msa_sra_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4990     pwd->b[8]  = msa_sra_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4991     pwd->b[9]  = msa_sra_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4992     pwd->b[10] = msa_sra_df(DF_BYTE, pws->b[10], pwt->b[10]);
4993     pwd->b[11] = msa_sra_df(DF_BYTE, pws->b[11], pwt->b[11]);
4994     pwd->b[12] = msa_sra_df(DF_BYTE, pws->b[12], pwt->b[12]);
4995     pwd->b[13] = msa_sra_df(DF_BYTE, pws->b[13], pwt->b[13]);
4996     pwd->b[14] = msa_sra_df(DF_BYTE, pws->b[14], pwt->b[14]);
4997     pwd->b[15] = msa_sra_df(DF_BYTE, pws->b[15], pwt->b[15]);
4998 }
4999 
5000 void helper_msa_sra_h(CPUMIPSState *env,
5001                       uint32_t wd, uint32_t ws, uint32_t wt)
5002 {
5003     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5004     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5005     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5006 
5007     pwd->h[0]  = msa_sra_df(DF_HALF, pws->h[0],  pwt->h[0]);
5008     pwd->h[1]  = msa_sra_df(DF_HALF, pws->h[1],  pwt->h[1]);
5009     pwd->h[2]  = msa_sra_df(DF_HALF, pws->h[2],  pwt->h[2]);
5010     pwd->h[3]  = msa_sra_df(DF_HALF, pws->h[3],  pwt->h[3]);
5011     pwd->h[4]  = msa_sra_df(DF_HALF, pws->h[4],  pwt->h[4]);
5012     pwd->h[5]  = msa_sra_df(DF_HALF, pws->h[5],  pwt->h[5]);
5013     pwd->h[6]  = msa_sra_df(DF_HALF, pws->h[6],  pwt->h[6]);
5014     pwd->h[7]  = msa_sra_df(DF_HALF, pws->h[7],  pwt->h[7]);
5015 }
5016 
5017 void helper_msa_sra_w(CPUMIPSState *env,
5018                       uint32_t wd, uint32_t ws, uint32_t wt)
5019 {
5020     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5021     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5022     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5023 
5024     pwd->w[0]  = msa_sra_df(DF_WORD, pws->w[0],  pwt->w[0]);
5025     pwd->w[1]  = msa_sra_df(DF_WORD, pws->w[1],  pwt->w[1]);
5026     pwd->w[2]  = msa_sra_df(DF_WORD, pws->w[2],  pwt->w[2]);
5027     pwd->w[3]  = msa_sra_df(DF_WORD, pws->w[3],  pwt->w[3]);
5028 }
5029 
5030 void helper_msa_sra_d(CPUMIPSState *env,
5031                       uint32_t wd, uint32_t ws, uint32_t wt)
5032 {
5033     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5034     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5035     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5036 
5037     pwd->d[0]  = msa_sra_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5038     pwd->d[1]  = msa_sra_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5039 }
5040 
5041 
5042 static inline int64_t msa_srar_df(uint32_t df, int64_t arg1, int64_t arg2)
5043 {
5044     int32_t b_arg2 = BIT_POSITION(arg2, df);
5045     if (b_arg2 == 0) {
5046         return arg1;
5047     } else {
5048         int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
5049         return (arg1 >> b_arg2) + r_bit;
5050     }
5051 }
5052 
5053 void helper_msa_srar_b(CPUMIPSState *env,
5054                        uint32_t wd, uint32_t ws, uint32_t wt)
5055 {
5056     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5057     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5058     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5059 
5060     pwd->b[0]  = msa_srar_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5061     pwd->b[1]  = msa_srar_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5062     pwd->b[2]  = msa_srar_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5063     pwd->b[3]  = msa_srar_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5064     pwd->b[4]  = msa_srar_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5065     pwd->b[5]  = msa_srar_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5066     pwd->b[6]  = msa_srar_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5067     pwd->b[7]  = msa_srar_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5068     pwd->b[8]  = msa_srar_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5069     pwd->b[9]  = msa_srar_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5070     pwd->b[10] = msa_srar_df(DF_BYTE, pws->b[10], pwt->b[10]);
5071     pwd->b[11] = msa_srar_df(DF_BYTE, pws->b[11], pwt->b[11]);
5072     pwd->b[12] = msa_srar_df(DF_BYTE, pws->b[12], pwt->b[12]);
5073     pwd->b[13] = msa_srar_df(DF_BYTE, pws->b[13], pwt->b[13]);
5074     pwd->b[14] = msa_srar_df(DF_BYTE, pws->b[14], pwt->b[14]);
5075     pwd->b[15] = msa_srar_df(DF_BYTE, pws->b[15], pwt->b[15]);
5076 }
5077 
5078 void helper_msa_srar_h(CPUMIPSState *env,
5079                        uint32_t wd, uint32_t ws, uint32_t wt)
5080 {
5081     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5082     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5083     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5084 
5085     pwd->h[0]  = msa_srar_df(DF_HALF, pws->h[0],  pwt->h[0]);
5086     pwd->h[1]  = msa_srar_df(DF_HALF, pws->h[1],  pwt->h[1]);
5087     pwd->h[2]  = msa_srar_df(DF_HALF, pws->h[2],  pwt->h[2]);
5088     pwd->h[3]  = msa_srar_df(DF_HALF, pws->h[3],  pwt->h[3]);
5089     pwd->h[4]  = msa_srar_df(DF_HALF, pws->h[4],  pwt->h[4]);
5090     pwd->h[5]  = msa_srar_df(DF_HALF, pws->h[5],  pwt->h[5]);
5091     pwd->h[6]  = msa_srar_df(DF_HALF, pws->h[6],  pwt->h[6]);
5092     pwd->h[7]  = msa_srar_df(DF_HALF, pws->h[7],  pwt->h[7]);
5093 }
5094 
5095 void helper_msa_srar_w(CPUMIPSState *env,
5096                        uint32_t wd, uint32_t ws, uint32_t wt)
5097 {
5098     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5099     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5100     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5101 
5102     pwd->w[0]  = msa_srar_df(DF_WORD, pws->w[0],  pwt->w[0]);
5103     pwd->w[1]  = msa_srar_df(DF_WORD, pws->w[1],  pwt->w[1]);
5104     pwd->w[2]  = msa_srar_df(DF_WORD, pws->w[2],  pwt->w[2]);
5105     pwd->w[3]  = msa_srar_df(DF_WORD, pws->w[3],  pwt->w[3]);
5106 }
5107 
5108 void helper_msa_srar_d(CPUMIPSState *env,
5109                        uint32_t wd, uint32_t ws, uint32_t wt)
5110 {
5111     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5112     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5113     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5114 
5115     pwd->d[0]  = msa_srar_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5116     pwd->d[1]  = msa_srar_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5117 }
5118 
5119 
5120 static inline int64_t msa_srl_df(uint32_t df, int64_t arg1, int64_t arg2)
5121 {
5122     uint64_t u_arg1 = UNSIGNED(arg1, df);
5123     int32_t b_arg2 = BIT_POSITION(arg2, df);
5124     return u_arg1 >> b_arg2;
5125 }
5126 
5127 void helper_msa_srl_b(CPUMIPSState *env,
5128                       uint32_t wd, uint32_t ws, uint32_t wt)
5129 {
5130     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5131     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5132     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5133 
5134     pwd->b[0]  = msa_srl_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5135     pwd->b[1]  = msa_srl_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5136     pwd->b[2]  = msa_srl_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5137     pwd->b[3]  = msa_srl_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5138     pwd->b[4]  = msa_srl_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5139     pwd->b[5]  = msa_srl_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5140     pwd->b[6]  = msa_srl_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5141     pwd->b[7]  = msa_srl_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5142     pwd->b[8]  = msa_srl_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5143     pwd->b[9]  = msa_srl_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5144     pwd->b[10] = msa_srl_df(DF_BYTE, pws->b[10], pwt->b[10]);
5145     pwd->b[11] = msa_srl_df(DF_BYTE, pws->b[11], pwt->b[11]);
5146     pwd->b[12] = msa_srl_df(DF_BYTE, pws->b[12], pwt->b[12]);
5147     pwd->b[13] = msa_srl_df(DF_BYTE, pws->b[13], pwt->b[13]);
5148     pwd->b[14] = msa_srl_df(DF_BYTE, pws->b[14], pwt->b[14]);
5149     pwd->b[15] = msa_srl_df(DF_BYTE, pws->b[15], pwt->b[15]);
5150 }
5151 
5152 void helper_msa_srl_h(CPUMIPSState *env,
5153                       uint32_t wd, uint32_t ws, uint32_t wt)
5154 {
5155     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5156     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5157     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5158 
5159     pwd->h[0]  = msa_srl_df(DF_HALF, pws->h[0],  pwt->h[0]);
5160     pwd->h[1]  = msa_srl_df(DF_HALF, pws->h[1],  pwt->h[1]);
5161     pwd->h[2]  = msa_srl_df(DF_HALF, pws->h[2],  pwt->h[2]);
5162     pwd->h[3]  = msa_srl_df(DF_HALF, pws->h[3],  pwt->h[3]);
5163     pwd->h[4]  = msa_srl_df(DF_HALF, pws->h[4],  pwt->h[4]);
5164     pwd->h[5]  = msa_srl_df(DF_HALF, pws->h[5],  pwt->h[5]);
5165     pwd->h[6]  = msa_srl_df(DF_HALF, pws->h[6],  pwt->h[6]);
5166     pwd->h[7]  = msa_srl_df(DF_HALF, pws->h[7],  pwt->h[7]);
5167 }
5168 
5169 void helper_msa_srl_w(CPUMIPSState *env,
5170                       uint32_t wd, uint32_t ws, uint32_t wt)
5171 {
5172     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5173     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5174     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5175 
5176     pwd->w[0]  = msa_srl_df(DF_WORD, pws->w[0],  pwt->w[0]);
5177     pwd->w[1]  = msa_srl_df(DF_WORD, pws->w[1],  pwt->w[1]);
5178     pwd->w[2]  = msa_srl_df(DF_WORD, pws->w[2],  pwt->w[2]);
5179     pwd->w[3]  = msa_srl_df(DF_WORD, pws->w[3],  pwt->w[3]);
5180 }
5181 
5182 void helper_msa_srl_d(CPUMIPSState *env,
5183                       uint32_t wd, uint32_t ws, uint32_t wt)
5184 {
5185     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5186     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5187     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5188 
5189     pwd->d[0]  = msa_srl_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5190     pwd->d[1]  = msa_srl_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5191 }
5192 
5193 
5194 static inline int64_t msa_srlr_df(uint32_t df, int64_t arg1, int64_t arg2)
5195 {
5196     uint64_t u_arg1 = UNSIGNED(arg1, df);
5197     int32_t b_arg2 = BIT_POSITION(arg2, df);
5198     if (b_arg2 == 0) {
5199         return u_arg1;
5200     } else {
5201         uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
5202         return (u_arg1 >> b_arg2) + r_bit;
5203     }
5204 }
5205 
5206 void helper_msa_srlr_b(CPUMIPSState *env,
5207                        uint32_t wd, uint32_t ws, uint32_t wt)
5208 {
5209     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5210     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5211     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5212 
5213     pwd->b[0]  = msa_srlr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5214     pwd->b[1]  = msa_srlr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5215     pwd->b[2]  = msa_srlr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5216     pwd->b[3]  = msa_srlr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5217     pwd->b[4]  = msa_srlr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5218     pwd->b[5]  = msa_srlr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5219     pwd->b[6]  = msa_srlr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5220     pwd->b[7]  = msa_srlr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5221     pwd->b[8]  = msa_srlr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5222     pwd->b[9]  = msa_srlr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5223     pwd->b[10] = msa_srlr_df(DF_BYTE, pws->b[10], pwt->b[10]);
5224     pwd->b[11] = msa_srlr_df(DF_BYTE, pws->b[11], pwt->b[11]);
5225     pwd->b[12] = msa_srlr_df(DF_BYTE, pws->b[12], pwt->b[12]);
5226     pwd->b[13] = msa_srlr_df(DF_BYTE, pws->b[13], pwt->b[13]);
5227     pwd->b[14] = msa_srlr_df(DF_BYTE, pws->b[14], pwt->b[14]);
5228     pwd->b[15] = msa_srlr_df(DF_BYTE, pws->b[15], pwt->b[15]);
5229 }
5230 
5231 void helper_msa_srlr_h(CPUMIPSState *env,
5232                        uint32_t wd, uint32_t ws, uint32_t wt)
5233 {
5234     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5235     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5236     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5237 
5238     pwd->h[0]  = msa_srlr_df(DF_HALF, pws->h[0],  pwt->h[0]);
5239     pwd->h[1]  = msa_srlr_df(DF_HALF, pws->h[1],  pwt->h[1]);
5240     pwd->h[2]  = msa_srlr_df(DF_HALF, pws->h[2],  pwt->h[2]);
5241     pwd->h[3]  = msa_srlr_df(DF_HALF, pws->h[3],  pwt->h[3]);
5242     pwd->h[4]  = msa_srlr_df(DF_HALF, pws->h[4],  pwt->h[4]);
5243     pwd->h[5]  = msa_srlr_df(DF_HALF, pws->h[5],  pwt->h[5]);
5244     pwd->h[6]  = msa_srlr_df(DF_HALF, pws->h[6],  pwt->h[6]);
5245     pwd->h[7]  = msa_srlr_df(DF_HALF, pws->h[7],  pwt->h[7]);
5246 }
5247 
5248 void helper_msa_srlr_w(CPUMIPSState *env,
5249                        uint32_t wd, uint32_t ws, uint32_t wt)
5250 {
5251     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5252     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5253     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5254 
5255     pwd->w[0]  = msa_srlr_df(DF_WORD, pws->w[0],  pwt->w[0]);
5256     pwd->w[1]  = msa_srlr_df(DF_WORD, pws->w[1],  pwt->w[1]);
5257     pwd->w[2]  = msa_srlr_df(DF_WORD, pws->w[2],  pwt->w[2]);
5258     pwd->w[3]  = msa_srlr_df(DF_WORD, pws->w[3],  pwt->w[3]);
5259 }
5260 
5261 void helper_msa_srlr_d(CPUMIPSState *env,
5262                        uint32_t wd, uint32_t ws, uint32_t wt)
5263 {
5264     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5265     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5266     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5267 
5268     pwd->d[0]  = msa_srlr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5269     pwd->d[1]  = msa_srlr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5270 }
5271 
5272 
5273 #define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
5274 void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
5275         uint32_t i8)                                                    \
5276 {                                                                       \
5277     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5278     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5279     uint32_t i;                                                         \
5280     for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
5281         DEST = OPERATION;                                               \
5282     }                                                                   \
5283 }
5284 
5285 MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
5286 MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
5287 MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
5288 MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
5289 
5290 #define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
5291             UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
5292 MSA_FN_IMM8(bmnzi_b, pwd->b[i],
5293         BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5294 
5295 #define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
5296             UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
5297 MSA_FN_IMM8(bmzi_b, pwd->b[i],
5298         BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5299 
5300 #define BIT_SELECT(dest, arg1, arg2, df) \
5301             UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
5302 MSA_FN_IMM8(bseli_b, pwd->b[i],
5303         BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
5304 
5305 #undef BIT_SELECT
5306 #undef BIT_MOVE_IF_ZERO
5307 #undef BIT_MOVE_IF_NOT_ZERO
5308 #undef MSA_FN_IMM8
5309 
5310 #define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
5311 
5312 void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5313                        uint32_t ws, uint32_t imm)
5314 {
5315     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5316     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5317     wr_t wx, *pwx = &wx;
5318     uint32_t i;
5319 
5320     switch (df) {
5321     case DF_BYTE:
5322         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5323             pwx->b[i] = pws->b[SHF_POS(i, imm)];
5324         }
5325         break;
5326     case DF_HALF:
5327         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5328             pwx->h[i] = pws->h[SHF_POS(i, imm)];
5329         }
5330         break;
5331     case DF_WORD:
5332         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5333             pwx->w[i] = pws->w[SHF_POS(i, imm)];
5334         }
5335         break;
5336     default:
5337         g_assert_not_reached();
5338     }
5339     msa_move_v(pwd, pwx);
5340 }
5341 
5342 #define MSA_BINOP_IMM_DF(helper, func)                                  \
5343 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5344                         uint32_t wd, uint32_t ws, int32_t u5)           \
5345 {                                                                       \
5346     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5347     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5348     uint32_t i;                                                         \
5349                                                                         \
5350     switch (df) {                                                       \
5351     case DF_BYTE:                                                       \
5352         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5353             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5354         }                                                               \
5355         break;                                                          \
5356     case DF_HALF:                                                       \
5357         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5358             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5359         }                                                               \
5360         break;                                                          \
5361     case DF_WORD:                                                       \
5362         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5363             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5364         }                                                               \
5365         break;                                                          \
5366     case DF_DOUBLE:                                                     \
5367         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5368             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5369         }                                                               \
5370         break;                                                          \
5371     default:                                                            \
5372         g_assert_not_reached();                                         \
5373     }                                                                   \
5374 }
5375 
5376 MSA_BINOP_IMM_DF(addvi, addv)
5377 MSA_BINOP_IMM_DF(subvi, subv)
5378 MSA_BINOP_IMM_DF(ceqi, ceq)
5379 MSA_BINOP_IMM_DF(clei_s, cle_s)
5380 MSA_BINOP_IMM_DF(clei_u, cle_u)
5381 MSA_BINOP_IMM_DF(clti_s, clt_s)
5382 MSA_BINOP_IMM_DF(clti_u, clt_u)
5383 MSA_BINOP_IMM_DF(maxi_s, max_s)
5384 MSA_BINOP_IMM_DF(maxi_u, max_u)
5385 MSA_BINOP_IMM_DF(mini_s, min_s)
5386 MSA_BINOP_IMM_DF(mini_u, min_u)
5387 #undef MSA_BINOP_IMM_DF
5388 
5389 void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5390                        int32_t s10)
5391 {
5392     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5393     uint32_t i;
5394 
5395     switch (df) {
5396     case DF_BYTE:
5397         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5398             pwd->b[i] = (int8_t)s10;
5399         }
5400         break;
5401     case DF_HALF:
5402         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5403             pwd->h[i] = (int16_t)s10;
5404         }
5405         break;
5406     case DF_WORD:
5407         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5408             pwd->w[i] = (int32_t)s10;
5409         }
5410         break;
5411     case DF_DOUBLE:
5412         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5413             pwd->d[i] = (int64_t)s10;
5414         }
5415        break;
5416     default:
5417         g_assert_not_reached();
5418     }
5419 }
5420 
5421 static inline int64_t msa_sat_s_df(uint32_t df, int64_t arg, uint32_t m)
5422 {
5423     return arg < M_MIN_INT(m + 1) ? M_MIN_INT(m + 1) :
5424                                     arg > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) :
5425                                                              arg;
5426 }
5427 
5428 static inline int64_t msa_sat_u_df(uint32_t df, int64_t arg, uint32_t m)
5429 {
5430     uint64_t u_arg = UNSIGNED(arg, df);
5431     return  u_arg < M_MAX_UINT(m + 1) ? u_arg :
5432                                         M_MAX_UINT(m + 1);
5433 }
5434 
5435 #define MSA_BINOP_IMMU_DF(helper, func)                                  \
5436 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5437                        uint32_t ws, uint32_t u5)                        \
5438 {                                                                       \
5439     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5440     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5441     uint32_t i;                                                         \
5442                                                                         \
5443     switch (df) {                                                       \
5444     case DF_BYTE:                                                       \
5445         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5446             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5447         }                                                               \
5448         break;                                                          \
5449     case DF_HALF:                                                       \
5450         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5451             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5452         }                                                               \
5453         break;                                                          \
5454     case DF_WORD:                                                       \
5455         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5456             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5457         }                                                               \
5458         break;                                                          \
5459     case DF_DOUBLE:                                                     \
5460         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5461             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5462         }                                                               \
5463         break;                                                          \
5464     default:                                                            \
5465         g_assert_not_reached();                                         \
5466     }                                                                   \
5467 }
5468 
5469 MSA_BINOP_IMMU_DF(slli, sll)
5470 MSA_BINOP_IMMU_DF(srai, sra)
5471 MSA_BINOP_IMMU_DF(srli, srl)
5472 MSA_BINOP_IMMU_DF(bclri, bclr)
5473 MSA_BINOP_IMMU_DF(bseti, bset)
5474 MSA_BINOP_IMMU_DF(bnegi, bneg)
5475 MSA_BINOP_IMMU_DF(sat_s, sat_s)
5476 MSA_BINOP_IMMU_DF(sat_u, sat_u)
5477 MSA_BINOP_IMMU_DF(srari, srar)
5478 MSA_BINOP_IMMU_DF(srlri, srlr)
5479 #undef MSA_BINOP_IMMU_DF
5480 
5481 #define MSA_TEROP_IMMU_DF(helper, func)                                  \
5482 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5483                                   uint32_t wd, uint32_t ws, uint32_t u5) \
5484 {                                                                       \
5485     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5486     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5487     uint32_t i;                                                         \
5488                                                                         \
5489     switch (df) {                                                       \
5490     case DF_BYTE:                                                       \
5491         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5492             pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
5493                                             u5);                        \
5494         }                                                               \
5495         break;                                                          \
5496     case DF_HALF:                                                       \
5497         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5498             pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
5499                                             u5);                        \
5500         }                                                               \
5501         break;                                                          \
5502     case DF_WORD:                                                       \
5503         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5504             pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
5505                                             u5);                        \
5506         }                                                               \
5507         break;                                                          \
5508     case DF_DOUBLE:                                                     \
5509         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5510             pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
5511                                             u5);                        \
5512         }                                                               \
5513         break;                                                          \
5514     default:                                                            \
5515         g_assert_not_reached();                                         \
5516     }                                                                   \
5517 }
5518 
5519 MSA_TEROP_IMMU_DF(binsli, binsl)
5520 MSA_TEROP_IMMU_DF(binsri, binsr)
5521 #undef MSA_TEROP_IMMU_DF
5522 
5523 #define CONCATENATE_AND_SLIDE(s, k)             \
5524     do {                                        \
5525         for (i = 0; i < s; i++) {               \
5526             v[i]     = pws->b[s * k + i];       \
5527             v[i + s] = pwd->b[s * k + i];       \
5528         }                                       \
5529         for (i = 0; i < s; i++) {               \
5530             pwd->b[s * k + i] = v[i + n];       \
5531         }                                       \
5532     } while (0)
5533 
5534 static inline void msa_sld_df(uint32_t df, wr_t *pwd,
5535                               wr_t *pws, target_ulong rt)
5536 {
5537     uint32_t n = rt % DF_ELEMENTS(df);
5538     uint8_t v[64];
5539     uint32_t i, k;
5540 
5541     switch (df) {
5542     case DF_BYTE:
5543         CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_BYTE), 0);
5544         break;
5545     case DF_HALF:
5546         for (k = 0; k < 2; k++) {
5547             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_HALF), k);
5548         }
5549         break;
5550     case DF_WORD:
5551         for (k = 0; k < 4; k++) {
5552             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_WORD), k);
5553         }
5554         break;
5555     case DF_DOUBLE:
5556         for (k = 0; k < 8; k++) {
5557             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_DOUBLE), k);
5558         }
5559         break;
5560     default:
5561         g_assert_not_reached();
5562     }
5563 }
5564 
5565 static inline int64_t msa_mul_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5566 {
5567     int64_t q_min = DF_MIN_INT(df);
5568     int64_t q_max = DF_MAX_INT(df);
5569 
5570     if (arg1 == q_min && arg2 == q_min) {
5571         return q_max;
5572     }
5573     return (arg1 * arg2) >> (DF_BITS(df) - 1);
5574 }
5575 
5576 static inline int64_t msa_mulr_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5577 {
5578     int64_t q_min = DF_MIN_INT(df);
5579     int64_t q_max = DF_MAX_INT(df);
5580     int64_t r_bit = 1 << (DF_BITS(df) - 2);
5581 
5582     if (arg1 == q_min && arg2 == q_min) {
5583         return q_max;
5584     }
5585     return (arg1 * arg2 + r_bit) >> (DF_BITS(df) - 1);
5586 }
5587 
5588 #define MSA_BINOP_DF(func) \
5589 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
5590                                 uint32_t wd, uint32_t ws, uint32_t wt)  \
5591 {                                                                       \
5592     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5593     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5594     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
5595                                                                         \
5596     switch (df) {                                                       \
5597     case DF_BYTE:                                                       \
5598         pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0],  pwt->b[0]);    \
5599         pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1],  pwt->b[1]);    \
5600         pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2],  pwt->b[2]);    \
5601         pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3],  pwt->b[3]);    \
5602         pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4],  pwt->b[4]);    \
5603         pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5],  pwt->b[5]);    \
5604         pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6],  pwt->b[6]);    \
5605         pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7],  pwt->b[7]);    \
5606         pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8],  pwt->b[8]);    \
5607         pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9],  pwt->b[9]);    \
5608         pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]);   \
5609         pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]);   \
5610         pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]);   \
5611         pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]);   \
5612         pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]);   \
5613         pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]);   \
5614         break;                                                          \
5615     case DF_HALF:                                                       \
5616         pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]);      \
5617         pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]);      \
5618         pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]);      \
5619         pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]);      \
5620         pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]);      \
5621         pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]);      \
5622         pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]);      \
5623         pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]);      \
5624         break;                                                          \
5625     case DF_WORD:                                                       \
5626         pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]);      \
5627         pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]);      \
5628         pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]);      \
5629         pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]);      \
5630         break;                                                          \
5631     case DF_DOUBLE:                                                     \
5632         pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]);      \
5633         pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]);      \
5634         break;                                                          \
5635     default:                                                            \
5636         g_assert_not_reached();                                         \
5637     }                                                                   \
5638 }
5639 
5640 MSA_BINOP_DF(mul_q)
5641 MSA_BINOP_DF(mulr_q)
5642 #undef MSA_BINOP_DF
5643 
5644 void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5645                        uint32_t ws, uint32_t rt)
5646 {
5647     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5648     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5649 
5650     msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]);
5651 }
5652 
5653 static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1,
5654                                     int64_t arg2)
5655 {
5656     int64_t q_prod, q_ret;
5657 
5658     int64_t q_max = DF_MAX_INT(df);
5659     int64_t q_min = DF_MIN_INT(df);
5660 
5661     q_prod = arg1 * arg2;
5662     q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod) >> (DF_BITS(df) - 1);
5663 
5664     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5665 }
5666 
5667 static inline int64_t msa_msub_q_df(uint32_t df, int64_t dest, int64_t arg1,
5668                                     int64_t arg2)
5669 {
5670     int64_t q_prod, q_ret;
5671 
5672     int64_t q_max = DF_MAX_INT(df);
5673     int64_t q_min = DF_MIN_INT(df);
5674 
5675     q_prod = arg1 * arg2;
5676     q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod) >> (DF_BITS(df) - 1);
5677 
5678     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5679 }
5680 
5681 static inline int64_t msa_maddr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5682                                      int64_t arg2)
5683 {
5684     int64_t q_prod, q_ret;
5685 
5686     int64_t q_max = DF_MAX_INT(df);
5687     int64_t q_min = DF_MIN_INT(df);
5688     int64_t r_bit = 1 << (DF_BITS(df) - 2);
5689 
5690     q_prod = arg1 * arg2;
5691     q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod + r_bit) >> (DF_BITS(df) - 1);
5692 
5693     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5694 }
5695 
5696 static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5697                                      int64_t arg2)
5698 {
5699     int64_t q_prod, q_ret;
5700 
5701     int64_t q_max = DF_MAX_INT(df);
5702     int64_t q_min = DF_MIN_INT(df);
5703     int64_t r_bit = 1 << (DF_BITS(df) - 2);
5704 
5705     q_prod = arg1 * arg2;
5706     q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod + r_bit) >> (DF_BITS(df) - 1);
5707 
5708     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5709 }
5710 
5711 #define MSA_TEROP_DF(func) \
5712 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
5713                                 uint32_t ws, uint32_t wt)                     \
5714 {                                                                             \
5715     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                                \
5716     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                                \
5717     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                                \
5718                                                                               \
5719     switch (df) {                                                             \
5720     case DF_BYTE:                                                             \
5721         pwd->b[0]  = msa_ ## func ## _df(df, pwd->b[0],  pws->b[0],           \
5722                                              pwt->b[0]);                      \
5723         pwd->b[1]  = msa_ ## func ## _df(df, pwd->b[1],  pws->b[1],           \
5724                                              pwt->b[1]);                      \
5725         pwd->b[2]  = msa_ ## func ## _df(df, pwd->b[2],  pws->b[2],           \
5726                                              pwt->b[2]);                      \
5727         pwd->b[3]  = msa_ ## func ## _df(df, pwd->b[3],  pws->b[3],           \
5728                                              pwt->b[3]);                      \
5729         pwd->b[4]  = msa_ ## func ## _df(df, pwd->b[4],  pws->b[4],           \
5730                                              pwt->b[4]);                      \
5731         pwd->b[5]  = msa_ ## func ## _df(df, pwd->b[5],  pws->b[5],           \
5732                                              pwt->b[5]);                      \
5733         pwd->b[6]  = msa_ ## func ## _df(df, pwd->b[6],  pws->b[6],           \
5734                                              pwt->b[6]);                      \
5735         pwd->b[7]  = msa_ ## func ## _df(df, pwd->b[7],  pws->b[7],           \
5736                                              pwt->b[7]);                      \
5737         pwd->b[8]  = msa_ ## func ## _df(df, pwd->b[8],  pws->b[8],           \
5738                                              pwt->b[8]);                      \
5739         pwd->b[9]  = msa_ ## func ## _df(df, pwd->b[9],  pws->b[9],           \
5740                                              pwt->b[9]);                      \
5741         pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10],          \
5742                                              pwt->b[10]);                     \
5743         pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11],          \
5744                                              pwt->b[11]);                     \
5745         pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12],          \
5746                                              pwt->b[12]);                     \
5747         pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13],          \
5748                                              pwt->b[13]);                     \
5749         pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14],          \
5750                                              pwt->b[14]);                     \
5751         pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15],          \
5752                                              pwt->b[15]);                     \
5753         break;                                                                \
5754     case DF_HALF:                                                             \
5755         pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
5756         pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
5757         pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
5758         pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
5759         pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
5760         pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
5761         pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
5762         pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
5763         break;                                                                \
5764     case DF_WORD:                                                             \
5765         pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
5766         pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
5767         pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
5768         pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
5769         break;                                                                \
5770     case DF_DOUBLE:                                                           \
5771         pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
5772         pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
5773         break;                                                                \
5774     default:                                                                  \
5775         g_assert_not_reached();                                               \
5776     }                                                                         \
5777 }
5778 
5779 MSA_TEROP_DF(binsl)
5780 MSA_TEROP_DF(binsr)
5781 MSA_TEROP_DF(madd_q)
5782 MSA_TEROP_DF(msub_q)
5783 MSA_TEROP_DF(maddr_q)
5784 MSA_TEROP_DF(msubr_q)
5785 #undef MSA_TEROP_DF
5786 
5787 static inline void msa_splat_df(uint32_t df, wr_t *pwd,
5788                                 wr_t *pws, target_ulong rt)
5789 {
5790     uint32_t n = rt % DF_ELEMENTS(df);
5791     uint32_t i;
5792 
5793     switch (df) {
5794     case DF_BYTE:
5795         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5796             pwd->b[i] = pws->b[n];
5797         }
5798         break;
5799     case DF_HALF:
5800         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5801             pwd->h[i] = pws->h[n];
5802         }
5803         break;
5804     case DF_WORD:
5805         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5806             pwd->w[i] = pws->w[n];
5807         }
5808         break;
5809     case DF_DOUBLE:
5810         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5811             pwd->d[i] = pws->d[n];
5812         }
5813        break;
5814     default:
5815         g_assert_not_reached();
5816     }
5817 }
5818 
5819 void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5820                          uint32_t ws, uint32_t rt)
5821 {
5822     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5823     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5824 
5825     msa_splat_df(df, pwd, pws, env->active_tc.gpr[rt]);
5826 }
5827 
5828 #define MSA_DO_B MSA_DO(b)
5829 #define MSA_DO_H MSA_DO(h)
5830 #define MSA_DO_W MSA_DO(w)
5831 #define MSA_DO_D MSA_DO(d)
5832 
5833 #define MSA_LOOP_B MSA_LOOP(B)
5834 #define MSA_LOOP_H MSA_LOOP(H)
5835 #define MSA_LOOP_W MSA_LOOP(W)
5836 #define MSA_LOOP_D MSA_LOOP(D)
5837 
5838 #define MSA_LOOP_COND_B MSA_LOOP_COND(DF_BYTE)
5839 #define MSA_LOOP_COND_H MSA_LOOP_COND(DF_HALF)
5840 #define MSA_LOOP_COND_W MSA_LOOP_COND(DF_WORD)
5841 #define MSA_LOOP_COND_D MSA_LOOP_COND(DF_DOUBLE)
5842 
5843 #define MSA_LOOP(DF) \
5844     do { \
5845         for (i = 0; i < (MSA_LOOP_COND_ ## DF) ; i++) { \
5846             MSA_DO_ ## DF; \
5847         } \
5848     } while (0)
5849 
5850 #define MSA_FN_DF(FUNC)                                             \
5851 void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5852         uint32_t ws, uint32_t wt)                                   \
5853 {                                                                   \
5854     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                      \
5855     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                      \
5856     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                      \
5857     wr_t wx, *pwx = &wx;                                            \
5858     uint32_t i;                                                     \
5859     switch (df) {                                                   \
5860     case DF_BYTE:                                                   \
5861         MSA_LOOP_B;                                                 \
5862         break;                                                      \
5863     case DF_HALF:                                                   \
5864         MSA_LOOP_H;                                                 \
5865         break;                                                      \
5866     case DF_WORD:                                                   \
5867         MSA_LOOP_W;                                                 \
5868         break;                                                      \
5869     case DF_DOUBLE:                                                 \
5870         MSA_LOOP_D;                                                 \
5871         break;                                                      \
5872     default:                                                        \
5873         g_assert_not_reached();                                     \
5874     }                                                               \
5875     msa_move_v(pwd, pwx);                                           \
5876 }
5877 
5878 #define MSA_LOOP_COND(DF) \
5879             (DF_ELEMENTS(DF) / 2)
5880 
5881 #define Rb(pwr, i) (pwr->b[i])
5882 #define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE) / 2])
5883 #define Rh(pwr, i) (pwr->h[i])
5884 #define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF) / 2])
5885 #define Rw(pwr, i) (pwr->w[i])
5886 #define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD) / 2])
5887 #define Rd(pwr, i) (pwr->d[i])
5888 #define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE) / 2])
5889 
5890 #undef MSA_LOOP_COND
5891 
5892 #define MSA_LOOP_COND(DF) \
5893             (DF_ELEMENTS(DF))
5894 
5895 #define MSA_DO(DF)                                                          \
5896     do {                                                                    \
5897         uint32_t n = DF_ELEMENTS(df);                                       \
5898         uint32_t k = (pwd->DF[i] & 0x3f) % (2 * n);                         \
5899         pwx->DF[i] =                                                        \
5900             (pwd->DF[i] & 0xc0) ? 0 : k < n ? pwt->DF[k] : pws->DF[k - n];  \
5901     } while (0)
5902 MSA_FN_DF(vshf_df)
5903 #undef MSA_DO
5904 #undef MSA_LOOP_COND
5905 #undef MSA_FN_DF
5906 
5907 
5908 void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5909                         uint32_t ws, uint32_t n)
5910 {
5911     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5912     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5913 
5914     msa_sld_df(df, pwd, pws, n);
5915 }
5916 
5917 void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5918                           uint32_t ws, uint32_t n)
5919 {
5920     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5921     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5922 
5923     msa_splat_df(df, pwd, pws, n);
5924 }
5925 
5926 void helper_msa_copy_s_b(CPUMIPSState *env, uint32_t rd,
5927                          uint32_t ws, uint32_t n)
5928 {
5929     n %= 16;
5930 #if HOST_BIG_ENDIAN
5931     if (n < 8) {
5932         n = 8 - n - 1;
5933     } else {
5934         n = 24 - n - 1;
5935     }
5936 #endif
5937     env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
5938 }
5939 
5940 void helper_msa_copy_s_h(CPUMIPSState *env, uint32_t rd,
5941                          uint32_t ws, uint32_t n)
5942 {
5943     n %= 8;
5944 #if HOST_BIG_ENDIAN
5945     if (n < 4) {
5946         n = 4 - n - 1;
5947     } else {
5948         n = 12 - n - 1;
5949     }
5950 #endif
5951     env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
5952 }
5953 
5954 void helper_msa_copy_s_w(CPUMIPSState *env, uint32_t rd,
5955                          uint32_t ws, uint32_t n)
5956 {
5957     n %= 4;
5958 #if HOST_BIG_ENDIAN
5959     if (n < 2) {
5960         n = 2 - n - 1;
5961     } else {
5962         n = 6 - n - 1;
5963     }
5964 #endif
5965     env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
5966 }
5967 
5968 void helper_msa_copy_s_d(CPUMIPSState *env, uint32_t rd,
5969                          uint32_t ws, uint32_t n)
5970 {
5971     n %= 2;
5972     env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
5973 }
5974 
5975 void helper_msa_copy_u_b(CPUMIPSState *env, uint32_t rd,
5976                          uint32_t ws, uint32_t n)
5977 {
5978     n %= 16;
5979 #if HOST_BIG_ENDIAN
5980     if (n < 8) {
5981         n = 8 - n - 1;
5982     } else {
5983         n = 24 - n - 1;
5984     }
5985 #endif
5986     env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
5987 }
5988 
5989 void helper_msa_copy_u_h(CPUMIPSState *env, uint32_t rd,
5990                          uint32_t ws, uint32_t n)
5991 {
5992     n %= 8;
5993 #if HOST_BIG_ENDIAN
5994     if (n < 4) {
5995         n = 4 - n - 1;
5996     } else {
5997         n = 12 - n - 1;
5998     }
5999 #endif
6000     env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
6001 }
6002 
6003 void helper_msa_copy_u_w(CPUMIPSState *env, uint32_t rd,
6004                          uint32_t ws, uint32_t n)
6005 {
6006     n %= 4;
6007 #if HOST_BIG_ENDIAN
6008     if (n < 2) {
6009         n = 2 - n - 1;
6010     } else {
6011         n = 6 - n - 1;
6012     }
6013 #endif
6014     env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
6015 }
6016 
6017 void helper_msa_insert_b(CPUMIPSState *env, uint32_t wd,
6018                           uint32_t rs_num, uint32_t n)
6019 {
6020     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6021     target_ulong rs = env->active_tc.gpr[rs_num];
6022     n %= 16;
6023 #if HOST_BIG_ENDIAN
6024     if (n < 8) {
6025         n = 8 - n - 1;
6026     } else {
6027         n = 24 - n - 1;
6028     }
6029 #endif
6030     pwd->b[n] = (int8_t)rs;
6031 }
6032 
6033 void helper_msa_insert_h(CPUMIPSState *env, uint32_t wd,
6034                           uint32_t rs_num, uint32_t n)
6035 {
6036     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6037     target_ulong rs = env->active_tc.gpr[rs_num];
6038     n %= 8;
6039 #if HOST_BIG_ENDIAN
6040     if (n < 4) {
6041         n = 4 - n - 1;
6042     } else {
6043         n = 12 - n - 1;
6044     }
6045 #endif
6046     pwd->h[n] = (int16_t)rs;
6047 }
6048 
6049 void helper_msa_insert_w(CPUMIPSState *env, uint32_t wd,
6050                           uint32_t rs_num, uint32_t n)
6051 {
6052     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6053     target_ulong rs = env->active_tc.gpr[rs_num];
6054     n %= 4;
6055 #if HOST_BIG_ENDIAN
6056     if (n < 2) {
6057         n = 2 - n - 1;
6058     } else {
6059         n = 6 - n - 1;
6060     }
6061 #endif
6062     pwd->w[n] = (int32_t)rs;
6063 }
6064 
6065 void helper_msa_insert_d(CPUMIPSState *env, uint32_t wd,
6066                           uint32_t rs_num, uint32_t n)
6067 {
6068     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6069     target_ulong rs = env->active_tc.gpr[rs_num];
6070     n %= 2;
6071     pwd->d[n] = (int64_t)rs;
6072 }
6073 
6074 void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6075                          uint32_t ws, uint32_t n)
6076 {
6077     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6078     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6079 
6080     switch (df) {
6081     case DF_BYTE:
6082         pwd->b[n] = (int8_t)pws->b[0];
6083         break;
6084     case DF_HALF:
6085         pwd->h[n] = (int16_t)pws->h[0];
6086         break;
6087     case DF_WORD:
6088         pwd->w[n] = (int32_t)pws->w[0];
6089         break;
6090     case DF_DOUBLE:
6091         pwd->d[n] = (int64_t)pws->d[0];
6092         break;
6093     default:
6094         g_assert_not_reached();
6095     }
6096 }
6097 
6098 void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
6099 {
6100     switch (cd) {
6101     case 0:
6102         break;
6103     case 1:
6104         env->active_tc.msacsr = (int32_t)elm & MSACSR_MASK;
6105         restore_msa_fp_status(env);
6106         /* check exception */
6107         if ((GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)
6108             & GET_FP_CAUSE(env->active_tc.msacsr)) {
6109             do_raise_exception(env, EXCP_MSAFPE, GETPC());
6110         }
6111         break;
6112     }
6113 }
6114 
6115 target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
6116 {
6117     switch (cs) {
6118     case 0:
6119         return env->msair;
6120     case 1:
6121         return env->active_tc.msacsr & MSACSR_MASK;
6122     }
6123     return 0;
6124 }
6125 
6126 void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6127                         uint32_t rs)
6128 {
6129     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6130     uint32_t i;
6131 
6132     switch (df) {
6133     case DF_BYTE:
6134         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
6135             pwd->b[i] = (int8_t)env->active_tc.gpr[rs];
6136         }
6137         break;
6138     case DF_HALF:
6139         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
6140             pwd->h[i] = (int16_t)env->active_tc.gpr[rs];
6141         }
6142         break;
6143     case DF_WORD:
6144         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6145             pwd->w[i] = (int32_t)env->active_tc.gpr[rs];
6146         }
6147         break;
6148     case DF_DOUBLE:
6149         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6150             pwd->d[i] = (int64_t)env->active_tc.gpr[rs];
6151         }
6152        break;
6153     default:
6154         g_assert_not_reached();
6155     }
6156 }
6157 
6158 
6159 #define FLOAT_ONE32 make_float32(0x3f8 << 20)
6160 #define FLOAT_ONE64 make_float64(0x3ffULL << 52)
6161 
6162 #define FLOAT_SNAN16(s) (float16_default_nan(s) ^ 0x0220)
6163         /* 0x7c20 */
6164 #define FLOAT_SNAN32(s) (float32_default_nan(s) ^ 0x00400020)
6165         /* 0x7f800020 */
6166 #define FLOAT_SNAN64(s) (float64_default_nan(s) ^ 0x0008000000000020ULL)
6167         /* 0x7ff0000000000020 */
6168 
6169 static inline void clear_msacsr_cause(CPUMIPSState *env)
6170 {
6171     SET_FP_CAUSE(env->active_tc.msacsr, 0);
6172 }
6173 
6174 static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr)
6175 {
6176     if ((GET_FP_CAUSE(env->active_tc.msacsr) &
6177             (GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)) == 0) {
6178         UPDATE_FP_FLAGS(env->active_tc.msacsr,
6179                 GET_FP_CAUSE(env->active_tc.msacsr));
6180     } else {
6181         do_raise_exception(env, EXCP_MSAFPE, retaddr);
6182     }
6183 }
6184 
6185 /* Flush-to-zero use cases for update_msacsr() */
6186 #define CLEAR_FS_UNDERFLOW 1
6187 #define CLEAR_IS_INEXACT   2
6188 #define RECIPROCAL_INEXACT 4
6189 
6190 
6191 static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt)
6192 {
6193     int mips_xcpt = 0;
6194 
6195     if (ieee_xcpt & float_flag_invalid) {
6196         mips_xcpt |= FP_INVALID;
6197     }
6198     if (ieee_xcpt & float_flag_overflow) {
6199         mips_xcpt |= FP_OVERFLOW;
6200     }
6201     if (ieee_xcpt & float_flag_underflow) {
6202         mips_xcpt |= FP_UNDERFLOW;
6203     }
6204     if (ieee_xcpt & float_flag_divbyzero) {
6205         mips_xcpt |= FP_DIV0;
6206     }
6207     if (ieee_xcpt & float_flag_inexact) {
6208         mips_xcpt |= FP_INEXACT;
6209     }
6210 
6211     return mips_xcpt;
6212 }
6213 
6214 static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
6215 {
6216     int ieee_exception_flags;
6217     int mips_exception_flags = 0;
6218     int cause;
6219     int enable;
6220 
6221     ieee_exception_flags = get_float_exception_flags(
6222                                &env->active_tc.msa_fp_status);
6223 
6224     /* QEMU softfloat does not signal all underflow cases */
6225     if (denormal) {
6226         ieee_exception_flags |= float_flag_underflow;
6227     }
6228     if (ieee_exception_flags) {
6229         mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags);
6230     }
6231     enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6232 
6233     /* Set Inexact (I) when flushing inputs to zero */
6234     if ((ieee_exception_flags & float_flag_input_denormal) &&
6235             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6236         if (action & CLEAR_IS_INEXACT) {
6237             mips_exception_flags &= ~FP_INEXACT;
6238         } else {
6239             mips_exception_flags |= FP_INEXACT;
6240         }
6241     }
6242 
6243     /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
6244     if ((ieee_exception_flags & float_flag_output_denormal) &&
6245             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6246         mips_exception_flags |= FP_INEXACT;
6247         if (action & CLEAR_FS_UNDERFLOW) {
6248             mips_exception_flags &= ~FP_UNDERFLOW;
6249         } else {
6250             mips_exception_flags |= FP_UNDERFLOW;
6251         }
6252     }
6253 
6254     /* Set Inexact (I) when Overflow (O) is not enabled */
6255     if ((mips_exception_flags & FP_OVERFLOW) != 0 &&
6256            (enable & FP_OVERFLOW) == 0) {
6257         mips_exception_flags |= FP_INEXACT;
6258     }
6259 
6260     /* Clear Exact Underflow when Underflow (U) is not enabled */
6261     if ((mips_exception_flags & FP_UNDERFLOW) != 0 &&
6262            (enable & FP_UNDERFLOW) == 0 &&
6263            (mips_exception_flags & FP_INEXACT) == 0) {
6264         mips_exception_flags &= ~FP_UNDERFLOW;
6265     }
6266 
6267     /*
6268      * Reciprocal operations set only Inexact when valid and not
6269      * divide by zero
6270      */
6271     if ((action & RECIPROCAL_INEXACT) &&
6272             (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) {
6273         mips_exception_flags = FP_INEXACT;
6274     }
6275 
6276     cause = mips_exception_flags & enable; /* all current enabled exceptions */
6277 
6278     if (cause == 0) {
6279         /*
6280          * No enabled exception, update the MSACSR Cause
6281          * with all current exceptions
6282          */
6283         SET_FP_CAUSE(env->active_tc.msacsr,
6284             (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6285     } else {
6286         /* Current exceptions are enabled */
6287         if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) {
6288             /*
6289              * Exception(s) will trap, update MSACSR Cause
6290              * with all enabled exceptions
6291              */
6292             SET_FP_CAUSE(env->active_tc.msacsr,
6293                 (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6294         }
6295     }
6296 
6297     return mips_exception_flags;
6298 }
6299 
6300 static inline int get_enabled_exceptions(const CPUMIPSState *env, int c)
6301 {
6302     int enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6303     return c & enable;
6304 }
6305 
6306 static inline float16 float16_from_float32(int32_t a, bool ieee,
6307                                            float_status *status)
6308 {
6309       float16 f_val;
6310 
6311       f_val = float32_to_float16((float32)a, ieee, status);
6312 
6313       return a < 0 ? (f_val | (1 << 15)) : f_val;
6314 }
6315 
6316 static inline float32 float32_from_float64(int64_t a, float_status *status)
6317 {
6318       float32 f_val;
6319 
6320       f_val = float64_to_float32((float64)a, status);
6321 
6322       return a < 0 ? (f_val | (1 << 31)) : f_val;
6323 }
6324 
6325 static inline float32 float32_from_float16(int16_t a, bool ieee,
6326                                            float_status *status)
6327 {
6328       float32 f_val;
6329 
6330       f_val = float16_to_float32((float16)a, ieee, status);
6331 
6332       return a < 0 ? (f_val | (1 << 31)) : f_val;
6333 }
6334 
6335 static inline float64 float64_from_float32(int32_t a, float_status *status)
6336 {
6337       float64 f_val;
6338 
6339       f_val = float32_to_float64((float64)a, status);
6340 
6341       return a < 0 ? (f_val | (1ULL << 63)) : f_val;
6342 }
6343 
6344 static inline float32 float32_from_q16(int16_t a, float_status *status)
6345 {
6346     float32 f_val;
6347 
6348     /* conversion as integer and scaling */
6349     f_val = int32_to_float32(a, status);
6350     f_val = float32_scalbn(f_val, -15, status);
6351 
6352     return f_val;
6353 }
6354 
6355 static inline float64 float64_from_q32(int32_t a, float_status *status)
6356 {
6357     float64 f_val;
6358 
6359     /* conversion as integer and scaling */
6360     f_val = int32_to_float64(a, status);
6361     f_val = float64_scalbn(f_val, -31, status);
6362 
6363     return f_val;
6364 }
6365 
6366 static inline int16_t float32_to_q16(float32 a, float_status *status)
6367 {
6368     int32_t q_val;
6369     int32_t q_min = 0xffff8000;
6370     int32_t q_max = 0x00007fff;
6371 
6372     int ieee_ex;
6373 
6374     if (float32_is_any_nan(a)) {
6375         float_raise(float_flag_invalid, status);
6376         return 0;
6377     }
6378 
6379     /* scaling */
6380     a = float32_scalbn(a, 15, status);
6381 
6382     ieee_ex = get_float_exception_flags(status);
6383     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6384                              , status);
6385 
6386     if (ieee_ex & float_flag_overflow) {
6387         float_raise(float_flag_inexact, status);
6388         return (int32_t)a < 0 ? q_min : q_max;
6389     }
6390 
6391     /* conversion to int */
6392     q_val = float32_to_int32(a, status);
6393 
6394     ieee_ex = get_float_exception_flags(status);
6395     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6396                              , status);
6397 
6398     if (ieee_ex & float_flag_invalid) {
6399         set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6400                                , status);
6401         float_raise(float_flag_overflow | float_flag_inexact, status);
6402         return (int32_t)a < 0 ? q_min : q_max;
6403     }
6404 
6405     if (q_val < q_min) {
6406         float_raise(float_flag_overflow | float_flag_inexact, status);
6407         return (int16_t)q_min;
6408     }
6409 
6410     if (q_max < q_val) {
6411         float_raise(float_flag_overflow | float_flag_inexact, status);
6412         return (int16_t)q_max;
6413     }
6414 
6415     return (int16_t)q_val;
6416 }
6417 
6418 static inline int32_t float64_to_q32(float64 a, float_status *status)
6419 {
6420     int64_t q_val;
6421     int64_t q_min = 0xffffffff80000000LL;
6422     int64_t q_max = 0x000000007fffffffLL;
6423 
6424     int ieee_ex;
6425 
6426     if (float64_is_any_nan(a)) {
6427         float_raise(float_flag_invalid, status);
6428         return 0;
6429     }
6430 
6431     /* scaling */
6432     a = float64_scalbn(a, 31, status);
6433 
6434     ieee_ex = get_float_exception_flags(status);
6435     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6436            , status);
6437 
6438     if (ieee_ex & float_flag_overflow) {
6439         float_raise(float_flag_inexact, status);
6440         return (int64_t)a < 0 ? q_min : q_max;
6441     }
6442 
6443     /* conversion to integer */
6444     q_val = float64_to_int64(a, status);
6445 
6446     ieee_ex = get_float_exception_flags(status);
6447     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6448            , status);
6449 
6450     if (ieee_ex & float_flag_invalid) {
6451         set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6452                , status);
6453         float_raise(float_flag_overflow | float_flag_inexact, status);
6454         return (int64_t)a < 0 ? q_min : q_max;
6455     }
6456 
6457     if (q_val < q_min) {
6458         float_raise(float_flag_overflow | float_flag_inexact, status);
6459         return (int32_t)q_min;
6460     }
6461 
6462     if (q_max < q_val) {
6463         float_raise(float_flag_overflow | float_flag_inexact, status);
6464         return (int32_t)q_max;
6465     }
6466 
6467     return (int32_t)q_val;
6468 }
6469 
6470 #define MSA_FLOAT_COND(DEST, OP, ARG1, ARG2, BITS, QUIET)                   \
6471     do {                                                                    \
6472         float_status *status = &env->active_tc.msa_fp_status;               \
6473         int c;                                                              \
6474         int64_t cond;                                                       \
6475         set_float_exception_flags(0, status);                               \
6476         if (!QUIET) {                                                       \
6477             cond = float ## BITS ## _ ## OP(ARG1, ARG2, status);            \
6478         } else {                                                            \
6479             cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2, status);  \
6480         }                                                                   \
6481         DEST = cond ? M_MAX_UINT(BITS) : 0;                                 \
6482         c = update_msacsr(env, CLEAR_IS_INEXACT, 0);                        \
6483                                                                             \
6484         if (get_enabled_exceptions(env, c)) {                               \
6485             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
6486         }                                                                   \
6487     } while (0)
6488 
6489 #define MSA_FLOAT_AF(DEST, ARG1, ARG2, BITS, QUIET)                 \
6490     do {                                                            \
6491         MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);          \
6492         if ((DEST & M_MAX_UINT(BITS)) == M_MAX_UINT(BITS)) {        \
6493             DEST = 0;                                               \
6494         }                                                           \
6495     } while (0)
6496 
6497 #define MSA_FLOAT_UEQ(DEST, ARG1, ARG2, BITS, QUIET)                \
6498     do {                                                            \
6499         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6500         if (DEST == 0) {                                            \
6501             MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);      \
6502         }                                                           \
6503     } while (0)
6504 
6505 #define MSA_FLOAT_NE(DEST, ARG1, ARG2, BITS, QUIET)                 \
6506     do {                                                            \
6507         MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);          \
6508         if (DEST == 0) {                                            \
6509             MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);      \
6510         }                                                           \
6511     } while (0)
6512 
6513 #define MSA_FLOAT_UNE(DEST, ARG1, ARG2, BITS, QUIET)                \
6514     do {                                                            \
6515         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6516         if (DEST == 0) {                                            \
6517             MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6518             if (DEST == 0) {                                        \
6519                 MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);  \
6520             }                                                       \
6521         }                                                           \
6522     } while (0)
6523 
6524 #define MSA_FLOAT_ULE(DEST, ARG1, ARG2, BITS, QUIET)                \
6525     do {                                                            \
6526         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6527         if (DEST == 0) {                                            \
6528             MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);      \
6529         }                                                           \
6530     } while (0)
6531 
6532 #define MSA_FLOAT_ULT(DEST, ARG1, ARG2, BITS, QUIET)                \
6533     do {                                                            \
6534         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6535         if (DEST == 0) {                                            \
6536             MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6537         }                                                           \
6538     } while (0)
6539 
6540 #define MSA_FLOAT_OR(DEST, ARG1, ARG2, BITS, QUIET)                 \
6541     do {                                                            \
6542         MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);          \
6543         if (DEST == 0) {                                            \
6544             MSA_FLOAT_COND(DEST, le, ARG2, ARG1, BITS, QUIET);      \
6545         }                                                           \
6546     } while (0)
6547 
6548 static inline void compare_af(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6549                               wr_t *pwt, uint32_t df, int quiet,
6550                               uintptr_t retaddr)
6551 {
6552     wr_t wx, *pwx = &wx;
6553     uint32_t i;
6554 
6555     clear_msacsr_cause(env);
6556 
6557     switch (df) {
6558     case DF_WORD:
6559         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6560             MSA_FLOAT_AF(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6561         }
6562         break;
6563     case DF_DOUBLE:
6564         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6565             MSA_FLOAT_AF(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6566         }
6567         break;
6568     default:
6569         g_assert_not_reached();
6570     }
6571 
6572     check_msacsr_cause(env, retaddr);
6573 
6574     msa_move_v(pwd, pwx);
6575 }
6576 
6577 static inline void compare_un(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6578                               wr_t *pwt, uint32_t df, int quiet,
6579                               uintptr_t retaddr)
6580 {
6581     wr_t wx, *pwx = &wx;
6582     uint32_t i;
6583 
6584     clear_msacsr_cause(env);
6585 
6586     switch (df) {
6587     case DF_WORD:
6588         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6589             MSA_FLOAT_COND(pwx->w[i], unordered, pws->w[i], pwt->w[i], 32,
6590                     quiet);
6591         }
6592         break;
6593     case DF_DOUBLE:
6594         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6595             MSA_FLOAT_COND(pwx->d[i], unordered, pws->d[i], pwt->d[i], 64,
6596                     quiet);
6597         }
6598         break;
6599     default:
6600         g_assert_not_reached();
6601     }
6602 
6603     check_msacsr_cause(env, retaddr);
6604 
6605     msa_move_v(pwd, pwx);
6606 }
6607 
6608 static inline void compare_eq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6609                               wr_t *pwt, uint32_t df, int quiet,
6610                               uintptr_t retaddr)
6611 {
6612     wr_t wx, *pwx = &wx;
6613     uint32_t i;
6614 
6615     clear_msacsr_cause(env);
6616 
6617     switch (df) {
6618     case DF_WORD:
6619         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6620             MSA_FLOAT_COND(pwx->w[i], eq, pws->w[i], pwt->w[i], 32, quiet);
6621         }
6622         break;
6623     case DF_DOUBLE:
6624         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6625             MSA_FLOAT_COND(pwx->d[i], eq, pws->d[i], pwt->d[i], 64, quiet);
6626         }
6627         break;
6628     default:
6629         g_assert_not_reached();
6630     }
6631 
6632     check_msacsr_cause(env, retaddr);
6633 
6634     msa_move_v(pwd, pwx);
6635 }
6636 
6637 static inline void compare_ueq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6638                                wr_t *pwt, uint32_t df, int quiet,
6639                                uintptr_t retaddr)
6640 {
6641     wr_t wx, *pwx = &wx;
6642     uint32_t i;
6643 
6644     clear_msacsr_cause(env);
6645 
6646     switch (df) {
6647     case DF_WORD:
6648         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6649             MSA_FLOAT_UEQ(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6650         }
6651         break;
6652     case DF_DOUBLE:
6653         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6654             MSA_FLOAT_UEQ(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6655         }
6656         break;
6657     default:
6658         g_assert_not_reached();
6659     }
6660 
6661     check_msacsr_cause(env, retaddr);
6662 
6663     msa_move_v(pwd, pwx);
6664 }
6665 
6666 static inline void compare_lt(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6667                               wr_t *pwt, uint32_t df, int quiet,
6668                               uintptr_t retaddr)
6669 {
6670     wr_t wx, *pwx = &wx;
6671     uint32_t i;
6672 
6673     clear_msacsr_cause(env);
6674 
6675     switch (df) {
6676     case DF_WORD:
6677         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6678             MSA_FLOAT_COND(pwx->w[i], lt, pws->w[i], pwt->w[i], 32, quiet);
6679         }
6680         break;
6681     case DF_DOUBLE:
6682         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6683             MSA_FLOAT_COND(pwx->d[i], lt, pws->d[i], pwt->d[i], 64, quiet);
6684         }
6685         break;
6686     default:
6687         g_assert_not_reached();
6688     }
6689 
6690     check_msacsr_cause(env, retaddr);
6691 
6692     msa_move_v(pwd, pwx);
6693 }
6694 
6695 static inline void compare_ult(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6696                                wr_t *pwt, uint32_t df, int quiet,
6697                                uintptr_t retaddr)
6698 {
6699     wr_t wx, *pwx = &wx;
6700     uint32_t i;
6701 
6702     clear_msacsr_cause(env);
6703 
6704     switch (df) {
6705     case DF_WORD:
6706         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6707             MSA_FLOAT_ULT(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6708         }
6709         break;
6710     case DF_DOUBLE:
6711         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6712             MSA_FLOAT_ULT(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6713         }
6714         break;
6715     default:
6716         g_assert_not_reached();
6717     }
6718 
6719     check_msacsr_cause(env, retaddr);
6720 
6721     msa_move_v(pwd, pwx);
6722 }
6723 
6724 static inline void compare_le(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6725                               wr_t *pwt, uint32_t df, int quiet,
6726                               uintptr_t retaddr)
6727 {
6728     wr_t wx, *pwx = &wx;
6729     uint32_t i;
6730 
6731     clear_msacsr_cause(env);
6732 
6733     switch (df) {
6734     case DF_WORD:
6735         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6736             MSA_FLOAT_COND(pwx->w[i], le, pws->w[i], pwt->w[i], 32, quiet);
6737         }
6738         break;
6739     case DF_DOUBLE:
6740         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6741             MSA_FLOAT_COND(pwx->d[i], le, pws->d[i], pwt->d[i], 64, quiet);
6742         }
6743         break;
6744     default:
6745         g_assert_not_reached();
6746     }
6747 
6748     check_msacsr_cause(env, retaddr);
6749 
6750     msa_move_v(pwd, pwx);
6751 }
6752 
6753 static inline void compare_ule(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6754                                wr_t *pwt, uint32_t df, int quiet,
6755                                uintptr_t retaddr)
6756 {
6757     wr_t wx, *pwx = &wx;
6758     uint32_t i;
6759 
6760     clear_msacsr_cause(env);
6761 
6762     switch (df) {
6763     case DF_WORD:
6764         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6765             MSA_FLOAT_ULE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6766         }
6767         break;
6768     case DF_DOUBLE:
6769         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6770             MSA_FLOAT_ULE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6771         }
6772         break;
6773     default:
6774         g_assert_not_reached();
6775     }
6776 
6777     check_msacsr_cause(env, retaddr);
6778 
6779     msa_move_v(pwd, pwx);
6780 }
6781 
6782 static inline void compare_or(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6783                               wr_t *pwt, uint32_t df, int quiet,
6784                               uintptr_t retaddr)
6785 {
6786     wr_t wx, *pwx = &wx;
6787     uint32_t i;
6788 
6789     clear_msacsr_cause(env);
6790 
6791     switch (df) {
6792     case DF_WORD:
6793         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6794             MSA_FLOAT_OR(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6795         }
6796         break;
6797     case DF_DOUBLE:
6798         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6799             MSA_FLOAT_OR(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6800         }
6801         break;
6802     default:
6803         g_assert_not_reached();
6804     }
6805 
6806     check_msacsr_cause(env, retaddr);
6807 
6808     msa_move_v(pwd, pwx);
6809 }
6810 
6811 static inline void compare_une(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6812                                wr_t *pwt, uint32_t df, int quiet,
6813                                uintptr_t retaddr)
6814 {
6815     wr_t wx, *pwx = &wx;
6816     uint32_t i;
6817 
6818     clear_msacsr_cause(env);
6819 
6820     switch (df) {
6821     case DF_WORD:
6822         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6823             MSA_FLOAT_UNE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6824         }
6825         break;
6826     case DF_DOUBLE:
6827         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6828             MSA_FLOAT_UNE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6829         }
6830         break;
6831     default:
6832         g_assert_not_reached();
6833     }
6834 
6835     check_msacsr_cause(env, retaddr);
6836 
6837     msa_move_v(pwd, pwx);
6838 }
6839 
6840 static inline void compare_ne(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6841                               wr_t *pwt, uint32_t df, int quiet,
6842                               uintptr_t retaddr)
6843 {
6844     wr_t wx, *pwx = &wx;
6845     uint32_t i;
6846 
6847     clear_msacsr_cause(env);
6848 
6849     switch (df) {
6850     case DF_WORD:
6851         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6852             MSA_FLOAT_NE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6853         }
6854         break;
6855     case DF_DOUBLE:
6856         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6857             MSA_FLOAT_NE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6858         }
6859         break;
6860     default:
6861         g_assert_not_reached();
6862     }
6863 
6864     check_msacsr_cause(env, retaddr);
6865 
6866     msa_move_v(pwd, pwx);
6867 }
6868 
6869 void helper_msa_fcaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6870                         uint32_t ws, uint32_t wt)
6871 {
6872     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6873     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6874     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6875     compare_af(env, pwd, pws, pwt, df, 1, GETPC());
6876 }
6877 
6878 void helper_msa_fcun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6879                         uint32_t ws, uint32_t wt)
6880 {
6881     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6882     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6883     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6884     compare_un(env, pwd, pws, pwt, df, 1, GETPC());
6885 }
6886 
6887 void helper_msa_fceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6888                         uint32_t ws, uint32_t wt)
6889 {
6890     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6891     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6892     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6893     compare_eq(env, pwd, pws, pwt, df, 1, GETPC());
6894 }
6895 
6896 void helper_msa_fcueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6897                          uint32_t ws, uint32_t wt)
6898 {
6899     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6900     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6901     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6902     compare_ueq(env, pwd, pws, pwt, df, 1, GETPC());
6903 }
6904 
6905 void helper_msa_fclt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6906                         uint32_t ws, uint32_t wt)
6907 {
6908     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6909     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6910     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6911     compare_lt(env, pwd, pws, pwt, df, 1, GETPC());
6912 }
6913 
6914 void helper_msa_fcult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6915                          uint32_t ws, uint32_t wt)
6916 {
6917     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6918     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6919     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6920     compare_ult(env, pwd, pws, pwt, df, 1, GETPC());
6921 }
6922 
6923 void helper_msa_fcle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6924                         uint32_t ws, uint32_t wt)
6925 {
6926     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6927     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6928     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6929     compare_le(env, pwd, pws, pwt, df, 1, GETPC());
6930 }
6931 
6932 void helper_msa_fcule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6933                          uint32_t ws, uint32_t wt)
6934 {
6935     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6936     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6937     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6938     compare_ule(env, pwd, pws, pwt, df, 1, GETPC());
6939 }
6940 
6941 void helper_msa_fsaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6942                         uint32_t ws, uint32_t wt)
6943 {
6944     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6945     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6946     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6947     compare_af(env, pwd, pws, pwt, df, 0, GETPC());
6948 }
6949 
6950 void helper_msa_fsun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6951                         uint32_t ws, uint32_t wt)
6952 {
6953     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6954     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6955     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6956     compare_un(env, pwd, pws, pwt, df, 0, GETPC());
6957 }
6958 
6959 void helper_msa_fseq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6960                         uint32_t ws, uint32_t wt)
6961 {
6962     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6963     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6964     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6965     compare_eq(env, pwd, pws, pwt, df, 0, GETPC());
6966 }
6967 
6968 void helper_msa_fsueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6969                          uint32_t ws, uint32_t wt)
6970 {
6971     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6972     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6973     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6974     compare_ueq(env, pwd, pws, pwt, df, 0, GETPC());
6975 }
6976 
6977 void helper_msa_fslt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6978                         uint32_t ws, uint32_t wt)
6979 {
6980     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6981     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6982     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6983     compare_lt(env, pwd, pws, pwt, df, 0, GETPC());
6984 }
6985 
6986 void helper_msa_fsult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6987                          uint32_t ws, uint32_t wt)
6988 {
6989     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6990     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6991     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6992     compare_ult(env, pwd, pws, pwt, df, 0, GETPC());
6993 }
6994 
6995 void helper_msa_fsle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6996                         uint32_t ws, uint32_t wt)
6997 {
6998     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6999     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7000     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7001     compare_le(env, pwd, pws, pwt, df, 0, GETPC());
7002 }
7003 
7004 void helper_msa_fsule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7005                          uint32_t ws, uint32_t wt)
7006 {
7007     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7008     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7009     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7010     compare_ule(env, pwd, pws, pwt, df, 0, GETPC());
7011 }
7012 
7013 void helper_msa_fcor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7014                         uint32_t ws, uint32_t wt)
7015 {
7016     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7017     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7018     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7019     compare_or(env, pwd, pws, pwt, df, 1, GETPC());
7020 }
7021 
7022 void helper_msa_fcune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7023                          uint32_t ws, uint32_t wt)
7024 {
7025     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7026     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7027     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7028     compare_une(env, pwd, pws, pwt, df, 1, GETPC());
7029 }
7030 
7031 void helper_msa_fcne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7032                         uint32_t ws, uint32_t wt)
7033 {
7034     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7035     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7036     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7037     compare_ne(env, pwd, pws, pwt, df, 1, GETPC());
7038 }
7039 
7040 void helper_msa_fsor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7041                         uint32_t ws, uint32_t wt)
7042 {
7043     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7044     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7045     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7046     compare_or(env, pwd, pws, pwt, df, 0, GETPC());
7047 }
7048 
7049 void helper_msa_fsune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7050                          uint32_t ws, uint32_t wt)
7051 {
7052     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7053     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7054     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7055     compare_une(env, pwd, pws, pwt, df, 0, GETPC());
7056 }
7057 
7058 void helper_msa_fsne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7059                         uint32_t ws, uint32_t wt)
7060 {
7061     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7062     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7063     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7064     compare_ne(env, pwd, pws, pwt, df, 0, GETPC());
7065 }
7066 
7067 #define float16_is_zero(ARG) 0
7068 #define float16_is_zero_or_denormal(ARG) 0
7069 
7070 #define IS_DENORMAL(ARG, BITS)                      \
7071     (!float ## BITS ## _is_zero(ARG)                \
7072     && float ## BITS ## _is_zero_or_denormal(ARG))
7073 
7074 #define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
7075     do {                                                                    \
7076         float_status *status = &env->active_tc.msa_fp_status;               \
7077         int c;                                                              \
7078                                                                             \
7079         set_float_exception_flags(0, status);                               \
7080         DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
7081         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7082                                                                             \
7083         if (get_enabled_exceptions(env, c)) {                               \
7084             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7085         }                                                                   \
7086     } while (0)
7087 
7088 void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7089         uint32_t ws, uint32_t wt)
7090 {
7091     wr_t wx, *pwx = &wx;
7092     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7093     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7094     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7095     uint32_t i;
7096 
7097     clear_msacsr_cause(env);
7098 
7099     switch (df) {
7100     case DF_WORD:
7101         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7102             MSA_FLOAT_BINOP(pwx->w[i], add, pws->w[i], pwt->w[i], 32);
7103         }
7104         break;
7105     case DF_DOUBLE:
7106         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7107             MSA_FLOAT_BINOP(pwx->d[i], add, pws->d[i], pwt->d[i], 64);
7108         }
7109         break;
7110     default:
7111         g_assert_not_reached();
7112     }
7113 
7114     check_msacsr_cause(env, GETPC());
7115     msa_move_v(pwd, pwx);
7116 }
7117 
7118 void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7119         uint32_t ws, uint32_t wt)
7120 {
7121     wr_t wx, *pwx = &wx;
7122     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7123     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7124     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7125     uint32_t i;
7126 
7127     clear_msacsr_cause(env);
7128 
7129     switch (df) {
7130     case DF_WORD:
7131         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7132             MSA_FLOAT_BINOP(pwx->w[i], sub, pws->w[i], pwt->w[i], 32);
7133         }
7134         break;
7135     case DF_DOUBLE:
7136         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7137             MSA_FLOAT_BINOP(pwx->d[i], sub, pws->d[i], pwt->d[i], 64);
7138         }
7139         break;
7140     default:
7141         g_assert_not_reached();
7142     }
7143 
7144     check_msacsr_cause(env, GETPC());
7145     msa_move_v(pwd, pwx);
7146 }
7147 
7148 void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7149         uint32_t ws, uint32_t wt)
7150 {
7151     wr_t wx, *pwx = &wx;
7152     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7153     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7154     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7155     uint32_t i;
7156 
7157     clear_msacsr_cause(env);
7158 
7159     switch (df) {
7160     case DF_WORD:
7161         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7162             MSA_FLOAT_BINOP(pwx->w[i], mul, pws->w[i], pwt->w[i], 32);
7163         }
7164         break;
7165     case DF_DOUBLE:
7166         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7167             MSA_FLOAT_BINOP(pwx->d[i], mul, pws->d[i], pwt->d[i], 64);
7168         }
7169         break;
7170     default:
7171         g_assert_not_reached();
7172     }
7173 
7174     check_msacsr_cause(env, GETPC());
7175 
7176     msa_move_v(pwd, pwx);
7177 }
7178 
7179 void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7180         uint32_t ws, uint32_t wt)
7181 {
7182     wr_t wx, *pwx = &wx;
7183     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7184     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7185     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7186     uint32_t i;
7187 
7188     clear_msacsr_cause(env);
7189 
7190     switch (df) {
7191     case DF_WORD:
7192         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7193             MSA_FLOAT_BINOP(pwx->w[i], div, pws->w[i], pwt->w[i], 32);
7194         }
7195         break;
7196     case DF_DOUBLE:
7197         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7198             MSA_FLOAT_BINOP(pwx->d[i], div, pws->d[i], pwt->d[i], 64);
7199         }
7200         break;
7201     default:
7202         g_assert_not_reached();
7203     }
7204 
7205     check_msacsr_cause(env, GETPC());
7206 
7207     msa_move_v(pwd, pwx);
7208 }
7209 
7210 #define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
7211     do {                                                                    \
7212         float_status *status = &env->active_tc.msa_fp_status;               \
7213         int c;                                                              \
7214                                                                             \
7215         set_float_exception_flags(0, status);                               \
7216         DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE, status);  \
7217         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7218                                                                             \
7219         if (get_enabled_exceptions(env, c)) {                               \
7220             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7221         }                                                                   \
7222     } while (0)
7223 
7224 void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7225         uint32_t ws, uint32_t wt)
7226 {
7227     wr_t wx, *pwx = &wx;
7228     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7229     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7230     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7231     uint32_t i;
7232 
7233     clear_msacsr_cause(env);
7234 
7235     switch (df) {
7236     case DF_WORD:
7237         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7238             MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7239                            pws->w[i], pwt->w[i], 0, 32);
7240         }
7241         break;
7242     case DF_DOUBLE:
7243         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7244             MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7245                            pws->d[i], pwt->d[i], 0, 64);
7246         }
7247         break;
7248     default:
7249         g_assert_not_reached();
7250     }
7251 
7252     check_msacsr_cause(env, GETPC());
7253 
7254     msa_move_v(pwd, pwx);
7255 }
7256 
7257 void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7258         uint32_t ws, uint32_t wt)
7259 {
7260     wr_t wx, *pwx = &wx;
7261     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7262     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7263     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7264     uint32_t i;
7265 
7266     clear_msacsr_cause(env);
7267 
7268     switch (df) {
7269     case DF_WORD:
7270         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7271             MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7272                            pws->w[i], pwt->w[i],
7273                            float_muladd_negate_product, 32);
7274       }
7275       break;
7276     case DF_DOUBLE:
7277         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7278             MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7279                            pws->d[i], pwt->d[i],
7280                            float_muladd_negate_product, 64);
7281         }
7282         break;
7283     default:
7284         g_assert_not_reached();
7285     }
7286 
7287     check_msacsr_cause(env, GETPC());
7288 
7289     msa_move_v(pwd, pwx);
7290 }
7291 
7292 void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7293         uint32_t ws, uint32_t wt)
7294 {
7295     wr_t wx, *pwx = &wx;
7296     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7297     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7298     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7299     uint32_t i;
7300 
7301     clear_msacsr_cause(env);
7302 
7303     switch (df) {
7304     case DF_WORD:
7305         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7306             MSA_FLOAT_BINOP(pwx->w[i], scalbn, pws->w[i],
7307                             pwt->w[i] >  0x200 ?  0x200 :
7308                             pwt->w[i] < -0x200 ? -0x200 : pwt->w[i],
7309                             32);
7310         }
7311         break;
7312     case DF_DOUBLE:
7313         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7314             MSA_FLOAT_BINOP(pwx->d[i], scalbn, pws->d[i],
7315                             pwt->d[i] >  0x1000 ?  0x1000 :
7316                             pwt->d[i] < -0x1000 ? -0x1000 : pwt->d[i],
7317                             64);
7318         }
7319         break;
7320     default:
7321         g_assert_not_reached();
7322     }
7323 
7324     check_msacsr_cause(env, GETPC());
7325 
7326     msa_move_v(pwd, pwx);
7327 }
7328 
7329 #define MSA_FLOAT_UNOP(DEST, OP, ARG, BITS)                                 \
7330     do {                                                                    \
7331         float_status *status = &env->active_tc.msa_fp_status;               \
7332         int c;                                                              \
7333                                                                             \
7334         set_float_exception_flags(0, status);                               \
7335         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7336         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7337                                                                             \
7338         if (get_enabled_exceptions(env, c)) {                               \
7339             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7340         }                                                                   \
7341     } while (0)
7342 
7343 void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7344                          uint32_t ws, uint32_t wt)
7345 {
7346     wr_t wx, *pwx = &wx;
7347     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7348     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7349     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7350     uint32_t i;
7351 
7352     clear_msacsr_cause(env);
7353 
7354     switch (df) {
7355     case DF_WORD:
7356         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7357             /*
7358              * Half precision floats come in two formats: standard
7359              * IEEE and "ARM" format.  The latter gains extra exponent
7360              * range by omitting the NaN/Inf encodings.
7361              */
7362             bool ieee = true;
7363 
7364             MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16);
7365             MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16);
7366         }
7367         break;
7368     case DF_DOUBLE:
7369         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7370             MSA_FLOAT_UNOP(Lw(pwx, i), from_float64, pws->d[i], 32);
7371             MSA_FLOAT_UNOP(Rw(pwx, i), from_float64, pwt->d[i], 32);
7372         }
7373         break;
7374     default:
7375         g_assert_not_reached();
7376     }
7377 
7378     check_msacsr_cause(env, GETPC());
7379     msa_move_v(pwd, pwx);
7380 }
7381 
7382 #define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
7383     do {                                                                    \
7384         float_status *status = &env->active_tc.msa_fp_status;               \
7385         int c;                                                              \
7386                                                                             \
7387         set_float_exception_flags(0, status);                               \
7388         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7389         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7390                                                                             \
7391         if (get_enabled_exceptions(env, c)) {                               \
7392             DEST = ((FLOAT_SNAN ## XBITS(status) >> 6) << 6) | c;           \
7393         }                                                                   \
7394     } while (0)
7395 
7396 void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7397                        uint32_t ws, uint32_t wt)
7398 {
7399     wr_t wx, *pwx = &wx;
7400     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7401     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7402     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7403     uint32_t i;
7404 
7405     clear_msacsr_cause(env);
7406 
7407     switch (df) {
7408     case DF_WORD:
7409         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7410             MSA_FLOAT_UNOP_XD(Lh(pwx, i), to_q16, pws->w[i], 32, 16);
7411             MSA_FLOAT_UNOP_XD(Rh(pwx, i), to_q16, pwt->w[i], 32, 16);
7412         }
7413         break;
7414     case DF_DOUBLE:
7415         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7416             MSA_FLOAT_UNOP_XD(Lw(pwx, i), to_q32, pws->d[i], 64, 32);
7417             MSA_FLOAT_UNOP_XD(Rw(pwx, i), to_q32, pwt->d[i], 64, 32);
7418         }
7419         break;
7420     default:
7421         g_assert_not_reached();
7422     }
7423 
7424     check_msacsr_cause(env, GETPC());
7425 
7426     msa_move_v(pwd, pwx);
7427 }
7428 
7429 #define NUMBER_QNAN_PAIR(ARG1, ARG2, BITS, STATUS)      \
7430     !float ## BITS ## _is_any_nan(ARG1)                 \
7431     && float ## BITS ## _is_quiet_nan(ARG2, STATUS)
7432 
7433 #define MSA_FLOAT_MAXOP(DEST, OP, ARG1, ARG2, BITS)                         \
7434     do {                                                                    \
7435         float_status *status_ = &env->active_tc.msa_fp_status;              \
7436         int c;                                                              \
7437                                                                             \
7438         set_float_exception_flags(0, status_);                              \
7439         DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status_);               \
7440         c = update_msacsr(env, 0, 0);                                       \
7441                                                                             \
7442         if (get_enabled_exceptions(env, c)) {                               \
7443             DEST = ((FLOAT_SNAN ## BITS(status_) >> 6) << 6) | c;           \
7444         }                                                                   \
7445     } while (0)
7446 
7447 #define FMAXMIN_A(F, G, X, _S, _T, BITS, STATUS)                    \
7448     do {                                                            \
7449         uint## BITS ##_t S = _S, T = _T;                            \
7450         uint## BITS ##_t as, at, xs, xt, xd;                        \
7451         if (NUMBER_QNAN_PAIR(S, T, BITS, STATUS)) {                 \
7452             T = S;                                                  \
7453         }                                                           \
7454         else if (NUMBER_QNAN_PAIR(T, S, BITS, STATUS)) {            \
7455             S = T;                                                  \
7456         }                                                           \
7457         as = float## BITS ##_abs(S);                                \
7458         at = float## BITS ##_abs(T);                                \
7459         MSA_FLOAT_MAXOP(xs, F,  S,  T, BITS);                       \
7460         MSA_FLOAT_MAXOP(xt, G,  S,  T, BITS);                       \
7461         MSA_FLOAT_MAXOP(xd, F, as, at, BITS);                       \
7462         X = (as == at || xd == float## BITS ##_abs(xs)) ? xs : xt;  \
7463     } while (0)
7464 
7465 void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7466         uint32_t ws, uint32_t wt)
7467 {
7468     float_status *status = &env->active_tc.msa_fp_status;
7469     wr_t wx, *pwx = &wx;
7470     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7471     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7472     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7473 
7474     clear_msacsr_cause(env);
7475 
7476     if (df == DF_WORD) {
7477 
7478         if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7479             MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32);
7480         } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7481             MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32);
7482         } else {
7483             MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32);
7484         }
7485 
7486         if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7487             MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32);
7488         } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7489             MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32);
7490         } else {
7491             MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32);
7492         }
7493 
7494         if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7495             MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32);
7496         } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7497             MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32);
7498         } else {
7499             MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32);
7500         }
7501 
7502         if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7503             MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32);
7504         } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7505             MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32);
7506         } else {
7507             MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32);
7508         }
7509 
7510     } else if (df == DF_DOUBLE) {
7511 
7512         if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7513             MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64);
7514         } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7515             MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64);
7516         } else {
7517             MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64);
7518         }
7519 
7520         if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7521             MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64);
7522         } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7523             MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64);
7524         } else {
7525             MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64);
7526         }
7527 
7528     } else {
7529 
7530         g_assert_not_reached();
7531 
7532     }
7533 
7534     check_msacsr_cause(env, GETPC());
7535 
7536     msa_move_v(pwd, pwx);
7537 }
7538 
7539 void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7540         uint32_t ws, uint32_t wt)
7541 {
7542     float_status *status = &env->active_tc.msa_fp_status;
7543     wr_t wx, *pwx = &wx;
7544     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7545     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7546     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7547 
7548     clear_msacsr_cause(env);
7549 
7550     if (df == DF_WORD) {
7551         FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7552         FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7553         FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7554         FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7555     } else if (df == DF_DOUBLE) {
7556         FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7557         FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7558     } else {
7559         g_assert_not_reached();
7560     }
7561 
7562     check_msacsr_cause(env, GETPC());
7563 
7564     msa_move_v(pwd, pwx);
7565 }
7566 
7567 void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7568         uint32_t ws, uint32_t wt)
7569 {
7570      float_status *status = &env->active_tc.msa_fp_status;
7571     wr_t wx, *pwx = &wx;
7572     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7573     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7574     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7575 
7576     clear_msacsr_cause(env);
7577 
7578     if (df == DF_WORD) {
7579 
7580         if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7581             MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32);
7582         } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7583             MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32);
7584         } else {
7585             MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32);
7586         }
7587 
7588         if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7589             MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32);
7590         } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7591             MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32);
7592         } else {
7593             MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32);
7594         }
7595 
7596         if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7597             MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32);
7598         } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7599             MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32);
7600         } else {
7601             MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32);
7602         }
7603 
7604         if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7605             MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32);
7606         } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7607             MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32);
7608         } else {
7609             MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32);
7610         }
7611 
7612     } else if (df == DF_DOUBLE) {
7613 
7614         if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7615             MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64);
7616         } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7617             MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64);
7618         } else {
7619             MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64);
7620         }
7621 
7622         if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7623             MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64);
7624         } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7625             MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64);
7626         } else {
7627             MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64);
7628         }
7629 
7630     } else {
7631 
7632         g_assert_not_reached();
7633 
7634     }
7635 
7636     check_msacsr_cause(env, GETPC());
7637 
7638     msa_move_v(pwd, pwx);
7639 }
7640 
7641 void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7642         uint32_t ws, uint32_t wt)
7643 {
7644     float_status *status = &env->active_tc.msa_fp_status;
7645     wr_t wx, *pwx = &wx;
7646     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7647     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7648     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7649 
7650     clear_msacsr_cause(env);
7651 
7652     if (df == DF_WORD) {
7653         FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7654         FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7655         FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7656         FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7657     } else if (df == DF_DOUBLE) {
7658         FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7659         FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7660     } else {
7661         g_assert_not_reached();
7662     }
7663 
7664     check_msacsr_cause(env, GETPC());
7665 
7666     msa_move_v(pwd, pwx);
7667 }
7668 
7669 void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df,
7670         uint32_t wd, uint32_t ws)
7671 {
7672     float_status *status = &env->active_tc.msa_fp_status;
7673 
7674     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7675     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7676     if (df == DF_WORD) {
7677         pwd->w[0] = float_class_s(pws->w[0], status);
7678         pwd->w[1] = float_class_s(pws->w[1], status);
7679         pwd->w[2] = float_class_s(pws->w[2], status);
7680         pwd->w[3] = float_class_s(pws->w[3], status);
7681     } else if (df == DF_DOUBLE) {
7682         pwd->d[0] = float_class_d(pws->d[0], status);
7683         pwd->d[1] = float_class_d(pws->d[1], status);
7684     } else {
7685         g_assert_not_reached();
7686     }
7687 }
7688 
7689 #define MSA_FLOAT_UNOP0(DEST, OP, ARG, BITS)                                \
7690     do {                                                                    \
7691         float_status *status = &env->active_tc.msa_fp_status;               \
7692         int c;                                                              \
7693                                                                             \
7694         set_float_exception_flags(0, status);                               \
7695         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7696         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7697                                                                             \
7698         if (get_enabled_exceptions(env, c)) {                               \
7699             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7700         } else if (float ## BITS ## _is_any_nan(ARG)) {                     \
7701             DEST = 0;                                                       \
7702         }                                                                   \
7703     } while (0)
7704 
7705 void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7706                             uint32_t ws)
7707 {
7708     wr_t wx, *pwx = &wx;
7709     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7710     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7711     uint32_t i;
7712 
7713     clear_msacsr_cause(env);
7714 
7715     switch (df) {
7716     case DF_WORD:
7717         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7718             MSA_FLOAT_UNOP0(pwx->w[i], to_int32_round_to_zero, pws->w[i], 32);
7719         }
7720         break;
7721     case DF_DOUBLE:
7722         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7723             MSA_FLOAT_UNOP0(pwx->d[i], to_int64_round_to_zero, pws->d[i], 64);
7724         }
7725         break;
7726     default:
7727         g_assert_not_reached();
7728     }
7729 
7730     check_msacsr_cause(env, GETPC());
7731 
7732     msa_move_v(pwd, pwx);
7733 }
7734 
7735 void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7736                             uint32_t ws)
7737 {
7738     wr_t wx, *pwx = &wx;
7739     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7740     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7741     uint32_t i;
7742 
7743     clear_msacsr_cause(env);
7744 
7745     switch (df) {
7746     case DF_WORD:
7747         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7748             MSA_FLOAT_UNOP0(pwx->w[i], to_uint32_round_to_zero, pws->w[i], 32);
7749         }
7750         break;
7751     case DF_DOUBLE:
7752         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7753             MSA_FLOAT_UNOP0(pwx->d[i], to_uint64_round_to_zero, pws->d[i], 64);
7754         }
7755         break;
7756     default:
7757         g_assert_not_reached();
7758     }
7759 
7760     check_msacsr_cause(env, GETPC());
7761 
7762     msa_move_v(pwd, pwx);
7763 }
7764 
7765 void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7766                          uint32_t ws)
7767 {
7768     wr_t wx, *pwx = &wx;
7769     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7770     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7771     uint32_t i;
7772 
7773     clear_msacsr_cause(env);
7774 
7775     switch (df) {
7776     case DF_WORD:
7777         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7778             MSA_FLOAT_UNOP(pwx->w[i], sqrt, pws->w[i], 32);
7779         }
7780         break;
7781     case DF_DOUBLE:
7782         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7783             MSA_FLOAT_UNOP(pwx->d[i], sqrt, pws->d[i], 64);
7784         }
7785         break;
7786     default:
7787         g_assert_not_reached();
7788     }
7789 
7790     check_msacsr_cause(env, GETPC());
7791 
7792     msa_move_v(pwd, pwx);
7793 }
7794 
7795 #define MSA_FLOAT_RECIPROCAL(DEST, ARG, BITS)                               \
7796     do {                                                                    \
7797         float_status *status = &env->active_tc.msa_fp_status;               \
7798         int c;                                                              \
7799                                                                             \
7800         set_float_exception_flags(0, status);                               \
7801         DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG, status);   \
7802         c = update_msacsr(env, float ## BITS ## _is_infinity(ARG) ||        \
7803                           float ## BITS ## _is_quiet_nan(DEST, status) ?    \
7804                           0 : RECIPROCAL_INEXACT,                           \
7805                           IS_DENORMAL(DEST, BITS));                         \
7806                                                                             \
7807         if (get_enabled_exceptions(env, c)) {                               \
7808             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7809         }                                                                   \
7810     } while (0)
7811 
7812 void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7813                           uint32_t ws)
7814 {
7815     wr_t wx, *pwx = &wx;
7816     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7817     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7818     uint32_t i;
7819 
7820     clear_msacsr_cause(env);
7821 
7822     switch (df) {
7823     case DF_WORD:
7824         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7825             MSA_FLOAT_RECIPROCAL(pwx->w[i], float32_sqrt(pws->w[i],
7826                     &env->active_tc.msa_fp_status), 32);
7827         }
7828         break;
7829     case DF_DOUBLE:
7830         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7831             MSA_FLOAT_RECIPROCAL(pwx->d[i], float64_sqrt(pws->d[i],
7832                     &env->active_tc.msa_fp_status), 64);
7833         }
7834         break;
7835     default:
7836         g_assert_not_reached();
7837     }
7838 
7839     check_msacsr_cause(env, GETPC());
7840 
7841     msa_move_v(pwd, pwx);
7842 }
7843 
7844 void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7845                         uint32_t ws)
7846 {
7847     wr_t wx, *pwx = &wx;
7848     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7849     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7850     uint32_t i;
7851 
7852     clear_msacsr_cause(env);
7853 
7854     switch (df) {
7855     case DF_WORD:
7856         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7857             MSA_FLOAT_RECIPROCAL(pwx->w[i], pws->w[i], 32);
7858         }
7859         break;
7860     case DF_DOUBLE:
7861         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7862             MSA_FLOAT_RECIPROCAL(pwx->d[i], pws->d[i], 64);
7863         }
7864         break;
7865     default:
7866         g_assert_not_reached();
7867     }
7868 
7869     check_msacsr_cause(env, GETPC());
7870 
7871     msa_move_v(pwd, pwx);
7872 }
7873 
7874 void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7875                          uint32_t ws)
7876 {
7877     wr_t wx, *pwx = &wx;
7878     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7879     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7880     uint32_t i;
7881 
7882     clear_msacsr_cause(env);
7883 
7884     switch (df) {
7885     case DF_WORD:
7886         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7887             MSA_FLOAT_UNOP(pwx->w[i], round_to_int, pws->w[i], 32);
7888         }
7889         break;
7890     case DF_DOUBLE:
7891         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7892             MSA_FLOAT_UNOP(pwx->d[i], round_to_int, pws->d[i], 64);
7893         }
7894         break;
7895     default:
7896         g_assert_not_reached();
7897     }
7898 
7899     check_msacsr_cause(env, GETPC());
7900 
7901     msa_move_v(pwd, pwx);
7902 }
7903 
7904 #define MSA_FLOAT_LOGB(DEST, ARG, BITS)                                     \
7905     do {                                                                    \
7906         float_status *status = &env->active_tc.msa_fp_status;               \
7907         int c;                                                              \
7908                                                                             \
7909         set_float_exception_flags(0, status);                               \
7910         set_float_rounding_mode(float_round_down, status);                  \
7911         DEST = float ## BITS ## _ ## log2(ARG, status);                     \
7912         DEST = float ## BITS ## _ ## round_to_int(DEST, status);            \
7913         set_float_rounding_mode(ieee_rm[(env->active_tc.msacsr &            \
7914                                          MSACSR_RM_MASK) >> MSACSR_RM],     \
7915                                 status);                                    \
7916                                                                             \
7917         set_float_exception_flags(get_float_exception_flags(status) &       \
7918                                   (~float_flag_inexact),                    \
7919                                   status);                                  \
7920                                                                             \
7921         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7922                                                                             \
7923         if (get_enabled_exceptions(env, c)) {                               \
7924             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7925         }                                                                   \
7926     } while (0)
7927 
7928 void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7929                          uint32_t ws)
7930 {
7931     wr_t wx, *pwx = &wx;
7932     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7933     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7934     uint32_t i;
7935 
7936     clear_msacsr_cause(env);
7937 
7938     switch (df) {
7939     case DF_WORD:
7940         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7941             MSA_FLOAT_LOGB(pwx->w[i], pws->w[i], 32);
7942         }
7943         break;
7944     case DF_DOUBLE:
7945         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7946             MSA_FLOAT_LOGB(pwx->d[i], pws->d[i], 64);
7947         }
7948         break;
7949     default:
7950         g_assert_not_reached();
7951     }
7952 
7953     check_msacsr_cause(env, GETPC());
7954 
7955     msa_move_v(pwd, pwx);
7956 }
7957 
7958 void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7959                           uint32_t ws)
7960 {
7961     wr_t wx, *pwx = &wx;
7962     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7963     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7964     uint32_t i;
7965 
7966     clear_msacsr_cause(env);
7967 
7968     switch (df) {
7969     case DF_WORD:
7970         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7971             /*
7972              * Half precision floats come in two formats: standard
7973              * IEEE and "ARM" format.  The latter gains extra exponent
7974              * range by omitting the NaN/Inf encodings.
7975              */
7976             bool ieee = true;
7977 
7978             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32);
7979         }
7980         break;
7981     case DF_DOUBLE:
7982         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7983             MSA_FLOAT_UNOP(pwx->d[i], from_float32, Lw(pws, i), 64);
7984         }
7985         break;
7986     default:
7987         g_assert_not_reached();
7988     }
7989 
7990     check_msacsr_cause(env, GETPC());
7991     msa_move_v(pwd, pwx);
7992 }
7993 
7994 void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7995                           uint32_t ws)
7996 {
7997     wr_t wx, *pwx = &wx;
7998     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7999     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8000     uint32_t i;
8001 
8002     clear_msacsr_cause(env);
8003 
8004     switch (df) {
8005     case DF_WORD:
8006         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8007             /*
8008              * Half precision floats come in two formats: standard
8009              * IEEE and "ARM" format.  The latter gains extra exponent
8010              * range by omitting the NaN/Inf encodings.
8011              */
8012             bool ieee = true;
8013 
8014             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32);
8015         }
8016         break;
8017     case DF_DOUBLE:
8018         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8019             MSA_FLOAT_UNOP(pwx->d[i], from_float32, Rw(pws, i), 64);
8020         }
8021         break;
8022     default:
8023         g_assert_not_reached();
8024     }
8025 
8026     check_msacsr_cause(env, GETPC());
8027     msa_move_v(pwd, pwx);
8028 }
8029 
8030 void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8031                         uint32_t ws)
8032 {
8033     wr_t wx, *pwx = &wx;
8034     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8035     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8036     uint32_t i;
8037 
8038     switch (df) {
8039     case DF_WORD:
8040         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8041             MSA_FLOAT_UNOP(pwx->w[i], from_q16, Lh(pws, i), 32);
8042         }
8043         break;
8044     case DF_DOUBLE:
8045         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8046             MSA_FLOAT_UNOP(pwx->d[i], from_q32, Lw(pws, i), 64);
8047         }
8048         break;
8049     default:
8050         g_assert_not_reached();
8051     }
8052 
8053     msa_move_v(pwd, pwx);
8054 }
8055 
8056 void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8057                         uint32_t ws)
8058 {
8059     wr_t wx, *pwx = &wx;
8060     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8061     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8062     uint32_t i;
8063 
8064     switch (df) {
8065     case DF_WORD:
8066         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8067             MSA_FLOAT_UNOP(pwx->w[i], from_q16, Rh(pws, i), 32);
8068         }
8069         break;
8070     case DF_DOUBLE:
8071         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8072             MSA_FLOAT_UNOP(pwx->d[i], from_q32, Rw(pws, i), 64);
8073         }
8074         break;
8075     default:
8076         g_assert_not_reached();
8077     }
8078 
8079     msa_move_v(pwd, pwx);
8080 }
8081 
8082 void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8083                            uint32_t ws)
8084 {
8085     wr_t wx, *pwx = &wx;
8086     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8087     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8088     uint32_t i;
8089 
8090     clear_msacsr_cause(env);
8091 
8092     switch (df) {
8093     case DF_WORD:
8094         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8095             MSA_FLOAT_UNOP0(pwx->w[i], to_int32, pws->w[i], 32);
8096         }
8097         break;
8098     case DF_DOUBLE:
8099         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8100             MSA_FLOAT_UNOP0(pwx->d[i], to_int64, pws->d[i], 64);
8101         }
8102         break;
8103     default:
8104         g_assert_not_reached();
8105     }
8106 
8107     check_msacsr_cause(env, GETPC());
8108 
8109     msa_move_v(pwd, pwx);
8110 }
8111 
8112 void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8113                            uint32_t ws)
8114 {
8115     wr_t wx, *pwx = &wx;
8116     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8117     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8118     uint32_t i;
8119 
8120     clear_msacsr_cause(env);
8121 
8122     switch (df) {
8123     case DF_WORD:
8124         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8125             MSA_FLOAT_UNOP0(pwx->w[i], to_uint32, pws->w[i], 32);
8126         }
8127         break;
8128     case DF_DOUBLE:
8129         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8130             MSA_FLOAT_UNOP0(pwx->d[i], to_uint64, pws->d[i], 64);
8131         }
8132         break;
8133     default:
8134         g_assert_not_reached();
8135     }
8136 
8137     check_msacsr_cause(env, GETPC());
8138 
8139     msa_move_v(pwd, pwx);
8140 }
8141 
8142 #define float32_from_int32 int32_to_float32
8143 #define float32_from_uint32 uint32_to_float32
8144 
8145 #define float64_from_int64 int64_to_float64
8146 #define float64_from_uint64 uint64_to_float64
8147 
8148 void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8149                            uint32_t ws)
8150 {
8151     wr_t wx, *pwx = &wx;
8152     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8153     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8154     uint32_t i;
8155 
8156     clear_msacsr_cause(env);
8157 
8158     switch (df) {
8159     case DF_WORD:
8160         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8161             MSA_FLOAT_UNOP(pwx->w[i], from_int32, pws->w[i], 32);
8162         }
8163         break;
8164     case DF_DOUBLE:
8165         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8166             MSA_FLOAT_UNOP(pwx->d[i], from_int64, pws->d[i], 64);
8167         }
8168         break;
8169     default:
8170         g_assert_not_reached();
8171     }
8172 
8173     check_msacsr_cause(env, GETPC());
8174 
8175     msa_move_v(pwd, pwx);
8176 }
8177 
8178 void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8179                            uint32_t ws)
8180 {
8181     wr_t wx, *pwx = &wx;
8182     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8183     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8184     uint32_t i;
8185 
8186     clear_msacsr_cause(env);
8187 
8188     switch (df) {
8189     case DF_WORD:
8190         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8191             MSA_FLOAT_UNOP(pwx->w[i], from_uint32, pws->w[i], 32);
8192         }
8193         break;
8194     case DF_DOUBLE:
8195         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8196             MSA_FLOAT_UNOP(pwx->d[i], from_uint64, pws->d[i], 64);
8197         }
8198         break;
8199     default:
8200         g_assert_not_reached();
8201     }
8202 
8203     check_msacsr_cause(env, GETPC());
8204 
8205     msa_move_v(pwd, pwx);
8206 }
8207 
8208 /* Data format min and max values */
8209 #define DF_BITS(df) (1 << ((df) + 3))
8210 
8211 /* Element-by-element access macros */
8212 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
8213 
8214 #if !defined(CONFIG_USER_ONLY)
8215 #define MEMOP_IDX(DF)                                                   \
8216     MemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN,                 \
8217                                  mips_env_mmu_index(env));
8218 #else
8219 #define MEMOP_IDX(DF)
8220 #endif
8221 
8222 #if TARGET_BIG_ENDIAN
8223 static inline uint64_t bswap16x4(uint64_t x)
8224 {
8225     uint64_t m = 0x00ff00ff00ff00ffull;
8226     return ((x & m) << 8) | ((x >> 8) & m);
8227 }
8228 
8229 static inline uint64_t bswap32x2(uint64_t x)
8230 {
8231     return ror64(bswap64(x), 32);
8232 }
8233 #endif
8234 
8235 void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd,
8236                      target_ulong addr)
8237 {
8238     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8239     uintptr_t ra = GETPC();
8240     uint64_t d0, d1;
8241 
8242     /* Load 8 bytes at a time.  Vector element ordering makes this LE.  */
8243     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8244     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8245     pwd->d[0] = d0;
8246     pwd->d[1] = d1;
8247 }
8248 
8249 void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
8250                      target_ulong addr)
8251 {
8252     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8253     uintptr_t ra = GETPC();
8254     uint64_t d0, d1;
8255 
8256     /*
8257      * Load 8 bytes at a time.  Use little-endian load, then for
8258      * big-endian target, we must then swap the four halfwords.
8259      */
8260     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8261     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8262 #if TARGET_BIG_ENDIAN
8263     d0 = bswap16x4(d0);
8264     d1 = bswap16x4(d1);
8265 #endif
8266     pwd->d[0] = d0;
8267     pwd->d[1] = d1;
8268 }
8269 
8270 void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
8271                      target_ulong addr)
8272 {
8273     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8274     uintptr_t ra = GETPC();
8275     uint64_t d0, d1;
8276 
8277     /*
8278      * Load 8 bytes at a time.  Use little-endian load, then for
8279      * big-endian target, we must then bswap the two words.
8280      */
8281     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8282     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8283 #if TARGET_BIG_ENDIAN
8284     d0 = bswap32x2(d0);
8285     d1 = bswap32x2(d1);
8286 #endif
8287     pwd->d[0] = d0;
8288     pwd->d[1] = d1;
8289 }
8290 
8291 void helper_msa_ld_d(CPUMIPSState *env, uint32_t wd,
8292                      target_ulong addr)
8293 {
8294     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8295     uintptr_t ra = GETPC();
8296     uint64_t d0, d1;
8297 
8298     d0 = cpu_ldq_data_ra(env, addr + 0, ra);
8299     d1 = cpu_ldq_data_ra(env, addr + 8, ra);
8300     pwd->d[0] = d0;
8301     pwd->d[1] = d1;
8302 }
8303 
8304 #define MSA_PAGESPAN(x) \
8305         ((((x) & ~TARGET_PAGE_MASK) + MSA_WRLEN / 8 - 1) >= TARGET_PAGE_SIZE)
8306 
8307 static inline void ensure_writable_pages(CPUMIPSState *env,
8308                                          target_ulong addr,
8309                                          int mmu_idx,
8310                                          uintptr_t retaddr)
8311 {
8312     /* FIXME: Probe the actual accesses (pass and use a size) */
8313     if (unlikely(MSA_PAGESPAN(addr))) {
8314         /* first page */
8315         probe_write(env, addr, 0, mmu_idx, retaddr);
8316         /* second page */
8317         addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
8318         probe_write(env, addr, 0, mmu_idx, retaddr);
8319     }
8320 }
8321 
8322 void helper_msa_st_b(CPUMIPSState *env, uint32_t wd,
8323                      target_ulong addr)
8324 {
8325     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8326     int mmu_idx = mips_env_mmu_index(env);
8327     uintptr_t ra = GETPC();
8328 
8329     ensure_writable_pages(env, addr, mmu_idx, ra);
8330 
8331     /* Store 8 bytes at a time.  Vector element ordering makes this LE.  */
8332     cpu_stq_le_data_ra(env, addr + 0, pwd->d[0], ra);
8333     cpu_stq_le_data_ra(env, addr + 8, pwd->d[1], ra);
8334 }
8335 
8336 void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
8337                      target_ulong addr)
8338 {
8339     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8340     int mmu_idx = mips_env_mmu_index(env);
8341     uintptr_t ra = GETPC();
8342     uint64_t d0, d1;
8343 
8344     ensure_writable_pages(env, addr, mmu_idx, ra);
8345 
8346     /* Store 8 bytes at a time.  See helper_msa_ld_h. */
8347     d0 = pwd->d[0];
8348     d1 = pwd->d[1];
8349 #if TARGET_BIG_ENDIAN
8350     d0 = bswap16x4(d0);
8351     d1 = bswap16x4(d1);
8352 #endif
8353     cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8354     cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8355 }
8356 
8357 void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
8358                      target_ulong addr)
8359 {
8360     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8361     int mmu_idx = mips_env_mmu_index(env);
8362     uintptr_t ra = GETPC();
8363     uint64_t d0, d1;
8364 
8365     ensure_writable_pages(env, addr, mmu_idx, ra);
8366 
8367     /* Store 8 bytes at a time.  See helper_msa_ld_w. */
8368     d0 = pwd->d[0];
8369     d1 = pwd->d[1];
8370 #if TARGET_BIG_ENDIAN
8371     d0 = bswap32x2(d0);
8372     d1 = bswap32x2(d1);
8373 #endif
8374     cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8375     cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8376 }
8377 
8378 void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
8379                      target_ulong addr)
8380 {
8381     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8382     int mmu_idx = mips_env_mmu_index(env);
8383     uintptr_t ra = GETPC();
8384 
8385     ensure_writable_pages(env, addr, mmu_idx, GETPC());
8386 
8387     cpu_stq_data_ra(env, addr + 0, pwd->d[0], ra);
8388     cpu_stq_data_ra(env, addr + 8, pwd->d[1], ra);
8389 }
8390