xref: /openbmc/qemu/target/mips/tcg/msa_helper.c (revision ed3a06b1)
1 /*
2  * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
3  *
4  * Copyright (c) 2014 Imagination Technologies
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "tcg/tcg.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "exec/memop.h"
27 #include "fpu/softfloat.h"
28 #include "fpu_helper.h"
29 
30 /* Data format min and max values */
31 #define DF_BITS(df) (1 << ((df) + 3))
32 
33 #define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
34 #define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
35 
36 #define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
37 #define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
38 
39 #define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
40 #define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
41 
42 #define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
43 #define SIGNED(x, df)                                                   \
44     ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
45 
46 /* Element-by-element access macros */
47 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
48 
49 
50 
51 /*
52  * Bit Count
53  * ---------
54  *
55  * +---------------+----------------------------------------------------------+
56  * | NLOC.B        | Vector Leading Ones Count (byte)                         |
57  * | NLOC.H        | Vector Leading Ones Count (halfword)                     |
58  * | NLOC.W        | Vector Leading Ones Count (word)                         |
59  * | NLOC.D        | Vector Leading Ones Count (doubleword)                   |
60  * | NLZC.B        | Vector Leading Zeros Count (byte)                        |
61  * | NLZC.H        | Vector Leading Zeros Count (halfword)                    |
62  * | NLZC.W        | Vector Leading Zeros Count (word)                        |
63  * | NLZC.D        | Vector Leading Zeros Count (doubleword)                  |
64  * | PCNT.B        | Vector Population Count (byte)                           |
65  * | PCNT.H        | Vector Population Count (halfword)                       |
66  * | PCNT.W        | Vector Population Count (word)                           |
67  * | PCNT.D        | Vector Population Count (doubleword)                     |
68  * +---------------+----------------------------------------------------------+
69  */
70 
71 static inline int64_t msa_nlzc_df(uint32_t df, int64_t arg)
72 {
73     uint64_t x, y;
74     int n, c;
75 
76     x = UNSIGNED(arg, df);
77     n = DF_BITS(df);
78     c = DF_BITS(df) / 2;
79 
80     do {
81         y = x >> c;
82         if (y != 0) {
83             n = n - c;
84             x = y;
85         }
86         c = c >> 1;
87     } while (c != 0);
88 
89     return n - x;
90 }
91 
92 static inline int64_t msa_nloc_df(uint32_t df, int64_t arg)
93 {
94     return msa_nlzc_df(df, UNSIGNED((~arg), df));
95 }
96 
97 void helper_msa_nloc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
98 {
99     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
100     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
101 
102     pwd->b[0]  = msa_nloc_df(DF_BYTE, pws->b[0]);
103     pwd->b[1]  = msa_nloc_df(DF_BYTE, pws->b[1]);
104     pwd->b[2]  = msa_nloc_df(DF_BYTE, pws->b[2]);
105     pwd->b[3]  = msa_nloc_df(DF_BYTE, pws->b[3]);
106     pwd->b[4]  = msa_nloc_df(DF_BYTE, pws->b[4]);
107     pwd->b[5]  = msa_nloc_df(DF_BYTE, pws->b[5]);
108     pwd->b[6]  = msa_nloc_df(DF_BYTE, pws->b[6]);
109     pwd->b[7]  = msa_nloc_df(DF_BYTE, pws->b[7]);
110     pwd->b[8]  = msa_nloc_df(DF_BYTE, pws->b[8]);
111     pwd->b[9]  = msa_nloc_df(DF_BYTE, pws->b[9]);
112     pwd->b[10] = msa_nloc_df(DF_BYTE, pws->b[10]);
113     pwd->b[11] = msa_nloc_df(DF_BYTE, pws->b[11]);
114     pwd->b[12] = msa_nloc_df(DF_BYTE, pws->b[12]);
115     pwd->b[13] = msa_nloc_df(DF_BYTE, pws->b[13]);
116     pwd->b[14] = msa_nloc_df(DF_BYTE, pws->b[14]);
117     pwd->b[15] = msa_nloc_df(DF_BYTE, pws->b[15]);
118 }
119 
120 void helper_msa_nloc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
121 {
122     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
123     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
124 
125     pwd->h[0]  = msa_nloc_df(DF_HALF, pws->h[0]);
126     pwd->h[1]  = msa_nloc_df(DF_HALF, pws->h[1]);
127     pwd->h[2]  = msa_nloc_df(DF_HALF, pws->h[2]);
128     pwd->h[3]  = msa_nloc_df(DF_HALF, pws->h[3]);
129     pwd->h[4]  = msa_nloc_df(DF_HALF, pws->h[4]);
130     pwd->h[5]  = msa_nloc_df(DF_HALF, pws->h[5]);
131     pwd->h[6]  = msa_nloc_df(DF_HALF, pws->h[6]);
132     pwd->h[7]  = msa_nloc_df(DF_HALF, pws->h[7]);
133 }
134 
135 void helper_msa_nloc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
136 {
137     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
138     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
139 
140     pwd->w[0]  = msa_nloc_df(DF_WORD, pws->w[0]);
141     pwd->w[1]  = msa_nloc_df(DF_WORD, pws->w[1]);
142     pwd->w[2]  = msa_nloc_df(DF_WORD, pws->w[2]);
143     pwd->w[3]  = msa_nloc_df(DF_WORD, pws->w[3]);
144 }
145 
146 void helper_msa_nloc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
147 {
148     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
149     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
150 
151     pwd->d[0]  = msa_nloc_df(DF_DOUBLE, pws->d[0]);
152     pwd->d[1]  = msa_nloc_df(DF_DOUBLE, pws->d[1]);
153 }
154 
155 void helper_msa_nlzc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
156 {
157     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
158     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
159 
160     pwd->b[0]  = msa_nlzc_df(DF_BYTE, pws->b[0]);
161     pwd->b[1]  = msa_nlzc_df(DF_BYTE, pws->b[1]);
162     pwd->b[2]  = msa_nlzc_df(DF_BYTE, pws->b[2]);
163     pwd->b[3]  = msa_nlzc_df(DF_BYTE, pws->b[3]);
164     pwd->b[4]  = msa_nlzc_df(DF_BYTE, pws->b[4]);
165     pwd->b[5]  = msa_nlzc_df(DF_BYTE, pws->b[5]);
166     pwd->b[6]  = msa_nlzc_df(DF_BYTE, pws->b[6]);
167     pwd->b[7]  = msa_nlzc_df(DF_BYTE, pws->b[7]);
168     pwd->b[8]  = msa_nlzc_df(DF_BYTE, pws->b[8]);
169     pwd->b[9]  = msa_nlzc_df(DF_BYTE, pws->b[9]);
170     pwd->b[10] = msa_nlzc_df(DF_BYTE, pws->b[10]);
171     pwd->b[11] = msa_nlzc_df(DF_BYTE, pws->b[11]);
172     pwd->b[12] = msa_nlzc_df(DF_BYTE, pws->b[12]);
173     pwd->b[13] = msa_nlzc_df(DF_BYTE, pws->b[13]);
174     pwd->b[14] = msa_nlzc_df(DF_BYTE, pws->b[14]);
175     pwd->b[15] = msa_nlzc_df(DF_BYTE, pws->b[15]);
176 }
177 
178 void helper_msa_nlzc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
179 {
180     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
181     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
182 
183     pwd->h[0]  = msa_nlzc_df(DF_HALF, pws->h[0]);
184     pwd->h[1]  = msa_nlzc_df(DF_HALF, pws->h[1]);
185     pwd->h[2]  = msa_nlzc_df(DF_HALF, pws->h[2]);
186     pwd->h[3]  = msa_nlzc_df(DF_HALF, pws->h[3]);
187     pwd->h[4]  = msa_nlzc_df(DF_HALF, pws->h[4]);
188     pwd->h[5]  = msa_nlzc_df(DF_HALF, pws->h[5]);
189     pwd->h[6]  = msa_nlzc_df(DF_HALF, pws->h[6]);
190     pwd->h[7]  = msa_nlzc_df(DF_HALF, pws->h[7]);
191 }
192 
193 void helper_msa_nlzc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
194 {
195     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
196     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
197 
198     pwd->w[0]  = msa_nlzc_df(DF_WORD, pws->w[0]);
199     pwd->w[1]  = msa_nlzc_df(DF_WORD, pws->w[1]);
200     pwd->w[2]  = msa_nlzc_df(DF_WORD, pws->w[2]);
201     pwd->w[3]  = msa_nlzc_df(DF_WORD, pws->w[3]);
202 }
203 
204 void helper_msa_nlzc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
205 {
206     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
207     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
208 
209     pwd->d[0]  = msa_nlzc_df(DF_DOUBLE, pws->d[0]);
210     pwd->d[1]  = msa_nlzc_df(DF_DOUBLE, pws->d[1]);
211 }
212 
213 static inline int64_t msa_pcnt_df(uint32_t df, int64_t arg)
214 {
215     uint64_t x;
216 
217     x = UNSIGNED(arg, df);
218 
219     x = (x & 0x5555555555555555ULL) + ((x >>  1) & 0x5555555555555555ULL);
220     x = (x & 0x3333333333333333ULL) + ((x >>  2) & 0x3333333333333333ULL);
221     x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >>  4) & 0x0F0F0F0F0F0F0F0FULL);
222     x = (x & 0x00FF00FF00FF00FFULL) + ((x >>  8) & 0x00FF00FF00FF00FFULL);
223     x = (x & 0x0000FFFF0000FFFFULL) + ((x >> 16) & 0x0000FFFF0000FFFFULL);
224     x = (x & 0x00000000FFFFFFFFULL) + ((x >> 32));
225 
226     return x;
227 }
228 
229 void helper_msa_pcnt_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
230 {
231     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
232     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
233 
234     pwd->b[0]  = msa_pcnt_df(DF_BYTE, pws->b[0]);
235     pwd->b[1]  = msa_pcnt_df(DF_BYTE, pws->b[1]);
236     pwd->b[2]  = msa_pcnt_df(DF_BYTE, pws->b[2]);
237     pwd->b[3]  = msa_pcnt_df(DF_BYTE, pws->b[3]);
238     pwd->b[4]  = msa_pcnt_df(DF_BYTE, pws->b[4]);
239     pwd->b[5]  = msa_pcnt_df(DF_BYTE, pws->b[5]);
240     pwd->b[6]  = msa_pcnt_df(DF_BYTE, pws->b[6]);
241     pwd->b[7]  = msa_pcnt_df(DF_BYTE, pws->b[7]);
242     pwd->b[8]  = msa_pcnt_df(DF_BYTE, pws->b[8]);
243     pwd->b[9]  = msa_pcnt_df(DF_BYTE, pws->b[9]);
244     pwd->b[10] = msa_pcnt_df(DF_BYTE, pws->b[10]);
245     pwd->b[11] = msa_pcnt_df(DF_BYTE, pws->b[11]);
246     pwd->b[12] = msa_pcnt_df(DF_BYTE, pws->b[12]);
247     pwd->b[13] = msa_pcnt_df(DF_BYTE, pws->b[13]);
248     pwd->b[14] = msa_pcnt_df(DF_BYTE, pws->b[14]);
249     pwd->b[15] = msa_pcnt_df(DF_BYTE, pws->b[15]);
250 }
251 
252 void helper_msa_pcnt_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
253 {
254     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
255     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
256 
257     pwd->h[0]  = msa_pcnt_df(DF_HALF, pws->h[0]);
258     pwd->h[1]  = msa_pcnt_df(DF_HALF, pws->h[1]);
259     pwd->h[2]  = msa_pcnt_df(DF_HALF, pws->h[2]);
260     pwd->h[3]  = msa_pcnt_df(DF_HALF, pws->h[3]);
261     pwd->h[4]  = msa_pcnt_df(DF_HALF, pws->h[4]);
262     pwd->h[5]  = msa_pcnt_df(DF_HALF, pws->h[5]);
263     pwd->h[6]  = msa_pcnt_df(DF_HALF, pws->h[6]);
264     pwd->h[7]  = msa_pcnt_df(DF_HALF, pws->h[7]);
265 }
266 
267 void helper_msa_pcnt_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
268 {
269     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
270     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
271 
272     pwd->w[0]  = msa_pcnt_df(DF_WORD, pws->w[0]);
273     pwd->w[1]  = msa_pcnt_df(DF_WORD, pws->w[1]);
274     pwd->w[2]  = msa_pcnt_df(DF_WORD, pws->w[2]);
275     pwd->w[3]  = msa_pcnt_df(DF_WORD, pws->w[3]);
276 }
277 
278 void helper_msa_pcnt_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
279 {
280     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
281     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
282 
283     pwd->d[0]  = msa_pcnt_df(DF_DOUBLE, pws->d[0]);
284     pwd->d[1]  = msa_pcnt_df(DF_DOUBLE, pws->d[1]);
285 }
286 
287 
288 /*
289  * Bit Move
290  * --------
291  *
292  * +---------------+----------------------------------------------------------+
293  * | BINSL.B       | Vector Bit Insert Left (byte)                            |
294  * | BINSL.H       | Vector Bit Insert Left (halfword)                        |
295  * | BINSL.W       | Vector Bit Insert Left (word)                            |
296  * | BINSL.D       | Vector Bit Insert Left (doubleword)                      |
297  * | BINSR.B       | Vector Bit Insert Right (byte)                           |
298  * | BINSR.H       | Vector Bit Insert Right (halfword)                       |
299  * | BINSR.W       | Vector Bit Insert Right (word)                           |
300  * | BINSR.D       | Vector Bit Insert Right (doubleword)                     |
301  * | BMNZ.V        | Vector Bit Move If Not Zero                              |
302  * | BMZ.V         | Vector Bit Move If Zero                                  |
303  * | BSEL.V        | Vector Bit Select                                        |
304  * +---------------+----------------------------------------------------------+
305  */
306 
307 /* Data format bit position and unsigned values */
308 #define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
309 
310 static inline int64_t msa_binsl_df(uint32_t df,
311                                    int64_t dest, int64_t arg1, int64_t arg2)
312 {
313     uint64_t u_arg1 = UNSIGNED(arg1, df);
314     uint64_t u_dest = UNSIGNED(dest, df);
315     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
316     int32_t sh_a = DF_BITS(df) - sh_d;
317     if (sh_d == DF_BITS(df)) {
318         return u_arg1;
319     } else {
320         return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
321                UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
322     }
323 }
324 
325 void helper_msa_binsl_b(CPUMIPSState *env,
326                         uint32_t wd, uint32_t ws, uint32_t wt)
327 {
328     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
329     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
330     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
331 
332     pwd->b[0]  = msa_binsl_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
333     pwd->b[1]  = msa_binsl_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
334     pwd->b[2]  = msa_binsl_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
335     pwd->b[3]  = msa_binsl_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
336     pwd->b[4]  = msa_binsl_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
337     pwd->b[5]  = msa_binsl_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
338     pwd->b[6]  = msa_binsl_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
339     pwd->b[7]  = msa_binsl_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
340     pwd->b[8]  = msa_binsl_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
341     pwd->b[9]  = msa_binsl_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
342     pwd->b[10] = msa_binsl_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
343     pwd->b[11] = msa_binsl_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
344     pwd->b[12] = msa_binsl_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
345     pwd->b[13] = msa_binsl_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
346     pwd->b[14] = msa_binsl_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
347     pwd->b[15] = msa_binsl_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
348 }
349 
350 void helper_msa_binsl_h(CPUMIPSState *env,
351                         uint32_t wd, uint32_t ws, uint32_t wt)
352 {
353     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
354     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
355     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
356 
357     pwd->h[0]  = msa_binsl_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
358     pwd->h[1]  = msa_binsl_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
359     pwd->h[2]  = msa_binsl_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
360     pwd->h[3]  = msa_binsl_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
361     pwd->h[4]  = msa_binsl_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
362     pwd->h[5]  = msa_binsl_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
363     pwd->h[6]  = msa_binsl_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
364     pwd->h[7]  = msa_binsl_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
365 }
366 
367 void helper_msa_binsl_w(CPUMIPSState *env,
368                         uint32_t wd, uint32_t ws, uint32_t wt)
369 {
370     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
371     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
372     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
373 
374     pwd->w[0]  = msa_binsl_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
375     pwd->w[1]  = msa_binsl_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
376     pwd->w[2]  = msa_binsl_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
377     pwd->w[3]  = msa_binsl_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
378 }
379 
380 void helper_msa_binsl_d(CPUMIPSState *env,
381                         uint32_t wd, uint32_t ws, uint32_t wt)
382 {
383     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
384     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
385     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
386 
387     pwd->d[0]  = msa_binsl_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
388     pwd->d[1]  = msa_binsl_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
389 }
390 
391 static inline int64_t msa_binsr_df(uint32_t df,
392                                    int64_t dest, int64_t arg1, int64_t arg2)
393 {
394     uint64_t u_arg1 = UNSIGNED(arg1, df);
395     uint64_t u_dest = UNSIGNED(dest, df);
396     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
397     int32_t sh_a = DF_BITS(df) - sh_d;
398     if (sh_d == DF_BITS(df)) {
399         return u_arg1;
400     } else {
401         return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
402                UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
403     }
404 }
405 
406 void helper_msa_binsr_b(CPUMIPSState *env,
407                         uint32_t wd, uint32_t ws, uint32_t wt)
408 {
409     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
410     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
411     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
412 
413     pwd->b[0]  = msa_binsr_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
414     pwd->b[1]  = msa_binsr_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
415     pwd->b[2]  = msa_binsr_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
416     pwd->b[3]  = msa_binsr_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
417     pwd->b[4]  = msa_binsr_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
418     pwd->b[5]  = msa_binsr_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
419     pwd->b[6]  = msa_binsr_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
420     pwd->b[7]  = msa_binsr_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
421     pwd->b[8]  = msa_binsr_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
422     pwd->b[9]  = msa_binsr_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
423     pwd->b[10] = msa_binsr_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
424     pwd->b[11] = msa_binsr_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
425     pwd->b[12] = msa_binsr_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
426     pwd->b[13] = msa_binsr_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
427     pwd->b[14] = msa_binsr_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
428     pwd->b[15] = msa_binsr_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
429 }
430 
431 void helper_msa_binsr_h(CPUMIPSState *env,
432                         uint32_t wd, uint32_t ws, uint32_t wt)
433 {
434     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
435     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
436     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
437 
438     pwd->h[0]  = msa_binsr_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
439     pwd->h[1]  = msa_binsr_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
440     pwd->h[2]  = msa_binsr_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
441     pwd->h[3]  = msa_binsr_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
442     pwd->h[4]  = msa_binsr_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
443     pwd->h[5]  = msa_binsr_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
444     pwd->h[6]  = msa_binsr_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
445     pwd->h[7]  = msa_binsr_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
446 }
447 
448 void helper_msa_binsr_w(CPUMIPSState *env,
449                         uint32_t wd, uint32_t ws, uint32_t wt)
450 {
451     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
452     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
453     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
454 
455     pwd->w[0]  = msa_binsr_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
456     pwd->w[1]  = msa_binsr_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
457     pwd->w[2]  = msa_binsr_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
458     pwd->w[3]  = msa_binsr_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
459 }
460 
461 void helper_msa_binsr_d(CPUMIPSState *env,
462                         uint32_t wd, uint32_t ws, uint32_t wt)
463 {
464     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
465     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
466     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
467 
468     pwd->d[0]  = msa_binsr_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
469     pwd->d[1]  = msa_binsr_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
470 }
471 
472 void helper_msa_bmnz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
473 {
474     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
475     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
476     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
477 
478     pwd->d[0] = UNSIGNED(                                                     \
479         ((pwd->d[0] & (~pwt->d[0])) | (pws->d[0] & pwt->d[0])), DF_DOUBLE);
480     pwd->d[1] = UNSIGNED(                                                     \
481         ((pwd->d[1] & (~pwt->d[1])) | (pws->d[1] & pwt->d[1])), DF_DOUBLE);
482 }
483 
484 void helper_msa_bmz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
485 {
486     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
487     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
488     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
489 
490     pwd->d[0] = UNSIGNED(                                                     \
491         ((pwd->d[0] & pwt->d[0]) | (pws->d[0] & (~pwt->d[0]))), DF_DOUBLE);
492     pwd->d[1] = UNSIGNED(                                                     \
493         ((pwd->d[1] & pwt->d[1]) | (pws->d[1] & (~pwt->d[1]))), DF_DOUBLE);
494 }
495 
496 void helper_msa_bsel_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
497 {
498     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
499     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
500     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
501 
502     pwd->d[0] = UNSIGNED(                                                     \
503         (pws->d[0] & (~pwd->d[0])) | (pwt->d[0] & pwd->d[0]), DF_DOUBLE);
504     pwd->d[1] = UNSIGNED(                                                     \
505         (pws->d[1] & (~pwd->d[1])) | (pwt->d[1] & pwd->d[1]), DF_DOUBLE);
506 }
507 
508 
509 /*
510  * Bit Set
511  * -------
512  *
513  * +---------------+----------------------------------------------------------+
514  * | BCLR.B        | Vector Bit Clear (byte)                                  |
515  * | BCLR.H        | Vector Bit Clear (halfword)                              |
516  * | BCLR.W        | Vector Bit Clear (word)                                  |
517  * | BCLR.D        | Vector Bit Clear (doubleword)                            |
518  * | BNEG.B        | Vector Bit Negate (byte)                                 |
519  * | BNEG.H        | Vector Bit Negate (halfword)                             |
520  * | BNEG.W        | Vector Bit Negate (word)                                 |
521  * | BNEG.D        | Vector Bit Negate (doubleword)                           |
522  * | BSET.B        | Vector Bit Set (byte)                                    |
523  * | BSET.H        | Vector Bit Set (halfword)                                |
524  * | BSET.W        | Vector Bit Set (word)                                    |
525  * | BSET.D        | Vector Bit Set (doubleword)                              |
526  * +---------------+----------------------------------------------------------+
527  */
528 
529 static inline int64_t msa_bclr_df(uint32_t df, int64_t arg1, int64_t arg2)
530 {
531     int32_t b_arg2 = BIT_POSITION(arg2, df);
532     return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
533 }
534 
535 void helper_msa_bclr_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
536 {
537     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
538     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
539     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
540 
541     pwd->b[0]  = msa_bclr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
542     pwd->b[1]  = msa_bclr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
543     pwd->b[2]  = msa_bclr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
544     pwd->b[3]  = msa_bclr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
545     pwd->b[4]  = msa_bclr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
546     pwd->b[5]  = msa_bclr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
547     pwd->b[6]  = msa_bclr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
548     pwd->b[7]  = msa_bclr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
549     pwd->b[8]  = msa_bclr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
550     pwd->b[9]  = msa_bclr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
551     pwd->b[10] = msa_bclr_df(DF_BYTE, pws->b[10], pwt->b[10]);
552     pwd->b[11] = msa_bclr_df(DF_BYTE, pws->b[11], pwt->b[11]);
553     pwd->b[12] = msa_bclr_df(DF_BYTE, pws->b[12], pwt->b[12]);
554     pwd->b[13] = msa_bclr_df(DF_BYTE, pws->b[13], pwt->b[13]);
555     pwd->b[14] = msa_bclr_df(DF_BYTE, pws->b[14], pwt->b[14]);
556     pwd->b[15] = msa_bclr_df(DF_BYTE, pws->b[15], pwt->b[15]);
557 }
558 
559 void helper_msa_bclr_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
560 {
561     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
562     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
563     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
564 
565     pwd->h[0]  = msa_bclr_df(DF_HALF, pws->h[0],  pwt->h[0]);
566     pwd->h[1]  = msa_bclr_df(DF_HALF, pws->h[1],  pwt->h[1]);
567     pwd->h[2]  = msa_bclr_df(DF_HALF, pws->h[2],  pwt->h[2]);
568     pwd->h[3]  = msa_bclr_df(DF_HALF, pws->h[3],  pwt->h[3]);
569     pwd->h[4]  = msa_bclr_df(DF_HALF, pws->h[4],  pwt->h[4]);
570     pwd->h[5]  = msa_bclr_df(DF_HALF, pws->h[5],  pwt->h[5]);
571     pwd->h[6]  = msa_bclr_df(DF_HALF, pws->h[6],  pwt->h[6]);
572     pwd->h[7]  = msa_bclr_df(DF_HALF, pws->h[7],  pwt->h[7]);
573 }
574 
575 void helper_msa_bclr_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
576 {
577     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
578     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
579     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
580 
581     pwd->w[0]  = msa_bclr_df(DF_WORD, pws->w[0],  pwt->w[0]);
582     pwd->w[1]  = msa_bclr_df(DF_WORD, pws->w[1],  pwt->w[1]);
583     pwd->w[2]  = msa_bclr_df(DF_WORD, pws->w[2],  pwt->w[2]);
584     pwd->w[3]  = msa_bclr_df(DF_WORD, pws->w[3],  pwt->w[3]);
585 }
586 
587 void helper_msa_bclr_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
588 {
589     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
590     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
591     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
592 
593     pwd->d[0]  = msa_bclr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
594     pwd->d[1]  = msa_bclr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
595 }
596 
597 static inline int64_t msa_bneg_df(uint32_t df, int64_t arg1, int64_t arg2)
598 {
599     int32_t b_arg2 = BIT_POSITION(arg2, df);
600     return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
601 }
602 
603 void helper_msa_bneg_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
604 {
605     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
606     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
607     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
608 
609     pwd->b[0]  = msa_bneg_df(DF_BYTE, pws->b[0],  pwt->b[0]);
610     pwd->b[1]  = msa_bneg_df(DF_BYTE, pws->b[1],  pwt->b[1]);
611     pwd->b[2]  = msa_bneg_df(DF_BYTE, pws->b[2],  pwt->b[2]);
612     pwd->b[3]  = msa_bneg_df(DF_BYTE, pws->b[3],  pwt->b[3]);
613     pwd->b[4]  = msa_bneg_df(DF_BYTE, pws->b[4],  pwt->b[4]);
614     pwd->b[5]  = msa_bneg_df(DF_BYTE, pws->b[5],  pwt->b[5]);
615     pwd->b[6]  = msa_bneg_df(DF_BYTE, pws->b[6],  pwt->b[6]);
616     pwd->b[7]  = msa_bneg_df(DF_BYTE, pws->b[7],  pwt->b[7]);
617     pwd->b[8]  = msa_bneg_df(DF_BYTE, pws->b[8],  pwt->b[8]);
618     pwd->b[9]  = msa_bneg_df(DF_BYTE, pws->b[9],  pwt->b[9]);
619     pwd->b[10] = msa_bneg_df(DF_BYTE, pws->b[10], pwt->b[10]);
620     pwd->b[11] = msa_bneg_df(DF_BYTE, pws->b[11], pwt->b[11]);
621     pwd->b[12] = msa_bneg_df(DF_BYTE, pws->b[12], pwt->b[12]);
622     pwd->b[13] = msa_bneg_df(DF_BYTE, pws->b[13], pwt->b[13]);
623     pwd->b[14] = msa_bneg_df(DF_BYTE, pws->b[14], pwt->b[14]);
624     pwd->b[15] = msa_bneg_df(DF_BYTE, pws->b[15], pwt->b[15]);
625 }
626 
627 void helper_msa_bneg_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
628 {
629     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
630     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
631     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
632 
633     pwd->h[0]  = msa_bneg_df(DF_HALF, pws->h[0],  pwt->h[0]);
634     pwd->h[1]  = msa_bneg_df(DF_HALF, pws->h[1],  pwt->h[1]);
635     pwd->h[2]  = msa_bneg_df(DF_HALF, pws->h[2],  pwt->h[2]);
636     pwd->h[3]  = msa_bneg_df(DF_HALF, pws->h[3],  pwt->h[3]);
637     pwd->h[4]  = msa_bneg_df(DF_HALF, pws->h[4],  pwt->h[4]);
638     pwd->h[5]  = msa_bneg_df(DF_HALF, pws->h[5],  pwt->h[5]);
639     pwd->h[6]  = msa_bneg_df(DF_HALF, pws->h[6],  pwt->h[6]);
640     pwd->h[7]  = msa_bneg_df(DF_HALF, pws->h[7],  pwt->h[7]);
641 }
642 
643 void helper_msa_bneg_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
644 {
645     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
646     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
647     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
648 
649     pwd->w[0]  = msa_bneg_df(DF_WORD, pws->w[0],  pwt->w[0]);
650     pwd->w[1]  = msa_bneg_df(DF_WORD, pws->w[1],  pwt->w[1]);
651     pwd->w[2]  = msa_bneg_df(DF_WORD, pws->w[2],  pwt->w[2]);
652     pwd->w[3]  = msa_bneg_df(DF_WORD, pws->w[3],  pwt->w[3]);
653 }
654 
655 void helper_msa_bneg_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
656 {
657     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
658     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
659     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
660 
661     pwd->d[0]  = msa_bneg_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
662     pwd->d[1]  = msa_bneg_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
663 }
664 
665 static inline int64_t msa_bset_df(uint32_t df, int64_t arg1,
666         int64_t arg2)
667 {
668     int32_t b_arg2 = BIT_POSITION(arg2, df);
669     return UNSIGNED(arg1 | (1LL << b_arg2), df);
670 }
671 
672 void helper_msa_bset_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
673 {
674     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
675     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
676     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
677 
678     pwd->b[0]  = msa_bset_df(DF_BYTE, pws->b[0],  pwt->b[0]);
679     pwd->b[1]  = msa_bset_df(DF_BYTE, pws->b[1],  pwt->b[1]);
680     pwd->b[2]  = msa_bset_df(DF_BYTE, pws->b[2],  pwt->b[2]);
681     pwd->b[3]  = msa_bset_df(DF_BYTE, pws->b[3],  pwt->b[3]);
682     pwd->b[4]  = msa_bset_df(DF_BYTE, pws->b[4],  pwt->b[4]);
683     pwd->b[5]  = msa_bset_df(DF_BYTE, pws->b[5],  pwt->b[5]);
684     pwd->b[6]  = msa_bset_df(DF_BYTE, pws->b[6],  pwt->b[6]);
685     pwd->b[7]  = msa_bset_df(DF_BYTE, pws->b[7],  pwt->b[7]);
686     pwd->b[8]  = msa_bset_df(DF_BYTE, pws->b[8],  pwt->b[8]);
687     pwd->b[9]  = msa_bset_df(DF_BYTE, pws->b[9],  pwt->b[9]);
688     pwd->b[10] = msa_bset_df(DF_BYTE, pws->b[10], pwt->b[10]);
689     pwd->b[11] = msa_bset_df(DF_BYTE, pws->b[11], pwt->b[11]);
690     pwd->b[12] = msa_bset_df(DF_BYTE, pws->b[12], pwt->b[12]);
691     pwd->b[13] = msa_bset_df(DF_BYTE, pws->b[13], pwt->b[13]);
692     pwd->b[14] = msa_bset_df(DF_BYTE, pws->b[14], pwt->b[14]);
693     pwd->b[15] = msa_bset_df(DF_BYTE, pws->b[15], pwt->b[15]);
694 }
695 
696 void helper_msa_bset_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
697 {
698     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
699     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
700     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
701 
702     pwd->h[0]  = msa_bset_df(DF_HALF, pws->h[0],  pwt->h[0]);
703     pwd->h[1]  = msa_bset_df(DF_HALF, pws->h[1],  pwt->h[1]);
704     pwd->h[2]  = msa_bset_df(DF_HALF, pws->h[2],  pwt->h[2]);
705     pwd->h[3]  = msa_bset_df(DF_HALF, pws->h[3],  pwt->h[3]);
706     pwd->h[4]  = msa_bset_df(DF_HALF, pws->h[4],  pwt->h[4]);
707     pwd->h[5]  = msa_bset_df(DF_HALF, pws->h[5],  pwt->h[5]);
708     pwd->h[6]  = msa_bset_df(DF_HALF, pws->h[6],  pwt->h[6]);
709     pwd->h[7]  = msa_bset_df(DF_HALF, pws->h[7],  pwt->h[7]);
710 }
711 
712 void helper_msa_bset_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
713 {
714     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
715     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
716     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
717 
718     pwd->w[0]  = msa_bset_df(DF_WORD, pws->w[0],  pwt->w[0]);
719     pwd->w[1]  = msa_bset_df(DF_WORD, pws->w[1],  pwt->w[1]);
720     pwd->w[2]  = msa_bset_df(DF_WORD, pws->w[2],  pwt->w[2]);
721     pwd->w[3]  = msa_bset_df(DF_WORD, pws->w[3],  pwt->w[3]);
722 }
723 
724 void helper_msa_bset_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
725 {
726     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
727     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
728     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
729 
730     pwd->d[0]  = msa_bset_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
731     pwd->d[1]  = msa_bset_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
732 }
733 
734 
735 /*
736  * Fixed Multiply
737  * --------------
738  *
739  * +---------------+----------------------------------------------------------+
740  * | MADD_Q.H      | Vector Fixed-Point Multiply and Add (halfword)           |
741  * | MADD_Q.W      | Vector Fixed-Point Multiply and Add (word)               |
742  * | MADDR_Q.H     | Vector Fixed-Point Multiply and Add Rounded (halfword)   |
743  * | MADDR_Q.W     | Vector Fixed-Point Multiply and Add Rounded (word)       |
744  * | MSUB_Q.H      | Vector Fixed-Point Multiply and Subtr. (halfword)        |
745  * | MSUB_Q.W      | Vector Fixed-Point Multiply and Subtr. (word)            |
746  * | MSUBR_Q.H     | Vector Fixed-Point Multiply and Subtr. Rounded (halfword)|
747  * | MSUBR_Q.W     | Vector Fixed-Point Multiply and Subtr. Rounded (word)    |
748  * | MUL_Q.H       | Vector Fixed-Point Multiply (halfword)                   |
749  * | MUL_Q.W       | Vector Fixed-Point Multiply (word)                       |
750  * | MULR_Q.H      | Vector Fixed-Point Multiply Rounded (halfword)           |
751  * | MULR_Q.W      | Vector Fixed-Point Multiply Rounded (word)               |
752  * +---------------+----------------------------------------------------------+
753  */
754 
755 /* TODO: insert Fixed Multiply group helpers here */
756 
757 
758 /*
759  * Float Max Min
760  * -------------
761  *
762  * +---------------+----------------------------------------------------------+
763  * | FMAX_A.W      | Vector Floating-Point Maximum (Absolute) (word)          |
764  * | FMAX_A.D      | Vector Floating-Point Maximum (Absolute) (doubleword)    |
765  * | FMAX.W        | Vector Floating-Point Maximum (word)                     |
766  * | FMAX.D        | Vector Floating-Point Maximum (doubleword)               |
767  * | FMIN_A.W      | Vector Floating-Point Minimum (Absolute) (word)          |
768  * | FMIN_A.D      | Vector Floating-Point Minimum (Absolute) (doubleword)    |
769  * | FMIN.W        | Vector Floating-Point Minimum (word)                     |
770  * | FMIN.D        | Vector Floating-Point Minimum (doubleword)               |
771  * +---------------+----------------------------------------------------------+
772  */
773 
774 /* TODO: insert Float Max Min group helpers here */
775 
776 
777 /*
778  * Int Add
779  * -------
780  *
781  * +---------------+----------------------------------------------------------+
782  * | ADD_A.B       | Vector Add Absolute Values (byte)                        |
783  * | ADD_A.H       | Vector Add Absolute Values (halfword)                    |
784  * | ADD_A.W       | Vector Add Absolute Values (word)                        |
785  * | ADD_A.D       | Vector Add Absolute Values (doubleword)                  |
786  * | ADDS_A.B      | Vector Signed Saturated Add (of Absolute) (byte)         |
787  * | ADDS_A.H      | Vector Signed Saturated Add (of Absolute) (halfword)     |
788  * | ADDS_A.W      | Vector Signed Saturated Add (of Absolute) (word)         |
789  * | ADDS_A.D      | Vector Signed Saturated Add (of Absolute) (doubleword)   |
790  * | ADDS_S.B      | Vector Signed Saturated Add (of Signed) (byte)           |
791  * | ADDS_S.H      | Vector Signed Saturated Add (of Signed) (halfword)       |
792  * | ADDS_S.W      | Vector Signed Saturated Add (of Signed) (word)           |
793  * | ADDS_S.D      | Vector Signed Saturated Add (of Signed) (doubleword)     |
794  * | ADDS_U.B      | Vector Unsigned Saturated Add (of Unsigned) (byte)       |
795  * | ADDS_U.H      | Vector Unsigned Saturated Add (of Unsigned) (halfword)   |
796  * | ADDS_U.W      | Vector Unsigned Saturated Add (of Unsigned) (word)       |
797  * | ADDS_U.D      | Vector Unsigned Saturated Add (of Unsigned) (doubleword) |
798  * | ADDV.B        | Vector Add (byte)                                        |
799  * | ADDV.H        | Vector Add (halfword)                                    |
800  * | ADDV.W        | Vector Add (word)                                        |
801  * | ADDV.D        | Vector Add (doubleword)                                  |
802  * | HADD_S.H      | Vector Signed Horizontal Add (halfword)                  |
803  * | HADD_S.W      | Vector Signed Horizontal Add (word)                      |
804  * | HADD_S.D      | Vector Signed Horizontal Add (doubleword)                |
805  * | HADD_U.H      | Vector Unigned Horizontal Add (halfword)                 |
806  * | HADD_U.W      | Vector Unigned Horizontal Add (word)                     |
807  * | HADD_U.D      | Vector Unigned Horizontal Add (doubleword)               |
808  * +---------------+----------------------------------------------------------+
809  */
810 
811 
812 static inline int64_t msa_add_a_df(uint32_t df, int64_t arg1, int64_t arg2)
813 {
814     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
815     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
816     return abs_arg1 + abs_arg2;
817 }
818 
819 void helper_msa_add_a_b(CPUMIPSState *env,
820                         uint32_t wd, uint32_t ws, uint32_t wt)
821 {
822     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
823     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
824     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
825 
826     pwd->b[0]  = msa_add_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
827     pwd->b[1]  = msa_add_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
828     pwd->b[2]  = msa_add_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
829     pwd->b[3]  = msa_add_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
830     pwd->b[4]  = msa_add_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
831     pwd->b[5]  = msa_add_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
832     pwd->b[6]  = msa_add_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
833     pwd->b[7]  = msa_add_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
834     pwd->b[8]  = msa_add_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
835     pwd->b[9]  = msa_add_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
836     pwd->b[10] = msa_add_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
837     pwd->b[11] = msa_add_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
838     pwd->b[12] = msa_add_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
839     pwd->b[13] = msa_add_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
840     pwd->b[14] = msa_add_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
841     pwd->b[15] = msa_add_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
842 }
843 
844 void helper_msa_add_a_h(CPUMIPSState *env,
845                         uint32_t wd, uint32_t ws, uint32_t wt)
846 {
847     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
848     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
849     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
850 
851     pwd->h[0]  = msa_add_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
852     pwd->h[1]  = msa_add_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
853     pwd->h[2]  = msa_add_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
854     pwd->h[3]  = msa_add_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
855     pwd->h[4]  = msa_add_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
856     pwd->h[5]  = msa_add_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
857     pwd->h[6]  = msa_add_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
858     pwd->h[7]  = msa_add_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
859 }
860 
861 void helper_msa_add_a_w(CPUMIPSState *env,
862                         uint32_t wd, uint32_t ws, uint32_t wt)
863 {
864     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
865     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
866     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
867 
868     pwd->w[0]  = msa_add_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
869     pwd->w[1]  = msa_add_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
870     pwd->w[2]  = msa_add_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
871     pwd->w[3]  = msa_add_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
872 }
873 
874 void helper_msa_add_a_d(CPUMIPSState *env,
875                         uint32_t wd, uint32_t ws, uint32_t wt)
876 {
877     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
878     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
879     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
880 
881     pwd->d[0]  = msa_add_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
882     pwd->d[1]  = msa_add_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
883 }
884 
885 
886 static inline int64_t msa_adds_a_df(uint32_t df, int64_t arg1, int64_t arg2)
887 {
888     uint64_t max_int = (uint64_t)DF_MAX_INT(df);
889     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
890     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
891     if (abs_arg1 > max_int || abs_arg2 > max_int) {
892         return (int64_t)max_int;
893     } else {
894         return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
895     }
896 }
897 
898 void helper_msa_adds_a_b(CPUMIPSState *env,
899                          uint32_t wd, uint32_t ws, uint32_t wt)
900 {
901     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
902     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
903     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
904 
905     pwd->b[0]  = msa_adds_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
906     pwd->b[1]  = msa_adds_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
907     pwd->b[2]  = msa_adds_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
908     pwd->b[3]  = msa_adds_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
909     pwd->b[4]  = msa_adds_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
910     pwd->b[5]  = msa_adds_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
911     pwd->b[6]  = msa_adds_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
912     pwd->b[7]  = msa_adds_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
913     pwd->b[8]  = msa_adds_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
914     pwd->b[9]  = msa_adds_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
915     pwd->b[10] = msa_adds_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
916     pwd->b[11] = msa_adds_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
917     pwd->b[12] = msa_adds_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
918     pwd->b[13] = msa_adds_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
919     pwd->b[14] = msa_adds_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
920     pwd->b[15] = msa_adds_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
921 }
922 
923 void helper_msa_adds_a_h(CPUMIPSState *env,
924                          uint32_t wd, uint32_t ws, uint32_t wt)
925 {
926     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
927     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
928     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
929 
930     pwd->h[0]  = msa_adds_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
931     pwd->h[1]  = msa_adds_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
932     pwd->h[2]  = msa_adds_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
933     pwd->h[3]  = msa_adds_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
934     pwd->h[4]  = msa_adds_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
935     pwd->h[5]  = msa_adds_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
936     pwd->h[6]  = msa_adds_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
937     pwd->h[7]  = msa_adds_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
938 }
939 
940 void helper_msa_adds_a_w(CPUMIPSState *env,
941                          uint32_t wd, uint32_t ws, uint32_t wt)
942 {
943     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
944     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
945     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
946 
947     pwd->w[0]  = msa_adds_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
948     pwd->w[1]  = msa_adds_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
949     pwd->w[2]  = msa_adds_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
950     pwd->w[3]  = msa_adds_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
951 }
952 
953 void helper_msa_adds_a_d(CPUMIPSState *env,
954                          uint32_t wd, uint32_t ws, uint32_t wt)
955 {
956     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
957     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
958     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
959 
960     pwd->d[0]  = msa_adds_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
961     pwd->d[1]  = msa_adds_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
962 }
963 
964 
965 static inline int64_t msa_adds_s_df(uint32_t df, int64_t arg1, int64_t arg2)
966 {
967     int64_t max_int = DF_MAX_INT(df);
968     int64_t min_int = DF_MIN_INT(df);
969     if (arg1 < 0) {
970         return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
971     } else {
972         return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
973     }
974 }
975 
976 void helper_msa_adds_s_b(CPUMIPSState *env,
977                          uint32_t wd, uint32_t ws, uint32_t wt)
978 {
979     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
980     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
981     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
982 
983     pwd->b[0]  = msa_adds_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
984     pwd->b[1]  = msa_adds_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
985     pwd->b[2]  = msa_adds_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
986     pwd->b[3]  = msa_adds_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
987     pwd->b[4]  = msa_adds_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
988     pwd->b[5]  = msa_adds_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
989     pwd->b[6]  = msa_adds_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
990     pwd->b[7]  = msa_adds_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
991     pwd->b[8]  = msa_adds_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
992     pwd->b[9]  = msa_adds_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
993     pwd->b[10] = msa_adds_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
994     pwd->b[11] = msa_adds_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
995     pwd->b[12] = msa_adds_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
996     pwd->b[13] = msa_adds_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
997     pwd->b[14] = msa_adds_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
998     pwd->b[15] = msa_adds_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
999 }
1000 
1001 void helper_msa_adds_s_h(CPUMIPSState *env,
1002                          uint32_t wd, uint32_t ws, uint32_t wt)
1003 {
1004     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1005     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1006     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1007 
1008     pwd->h[0]  = msa_adds_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1009     pwd->h[1]  = msa_adds_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1010     pwd->h[2]  = msa_adds_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1011     pwd->h[3]  = msa_adds_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1012     pwd->h[4]  = msa_adds_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1013     pwd->h[5]  = msa_adds_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1014     pwd->h[6]  = msa_adds_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1015     pwd->h[7]  = msa_adds_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1016 }
1017 
1018 void helper_msa_adds_s_w(CPUMIPSState *env,
1019                          uint32_t wd, uint32_t ws, uint32_t wt)
1020 {
1021     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1022     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1023     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1024 
1025     pwd->w[0]  = msa_adds_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1026     pwd->w[1]  = msa_adds_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1027     pwd->w[2]  = msa_adds_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1028     pwd->w[3]  = msa_adds_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1029 }
1030 
1031 void helper_msa_adds_s_d(CPUMIPSState *env,
1032                          uint32_t wd, uint32_t ws, uint32_t wt)
1033 {
1034     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1035     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1036     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1037 
1038     pwd->d[0]  = msa_adds_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1039     pwd->d[1]  = msa_adds_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1040 }
1041 
1042 
1043 static inline uint64_t msa_adds_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1044 {
1045     uint64_t max_uint = DF_MAX_UINT(df);
1046     uint64_t u_arg1 = UNSIGNED(arg1, df);
1047     uint64_t u_arg2 = UNSIGNED(arg2, df);
1048     return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
1049 }
1050 
1051 void helper_msa_adds_u_b(CPUMIPSState *env,
1052                          uint32_t wd, uint32_t ws, uint32_t wt)
1053 {
1054     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1055     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1056     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1057 
1058     pwd->b[0]  = msa_adds_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1059     pwd->b[1]  = msa_adds_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1060     pwd->b[2]  = msa_adds_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1061     pwd->b[3]  = msa_adds_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1062     pwd->b[4]  = msa_adds_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1063     pwd->b[5]  = msa_adds_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1064     pwd->b[6]  = msa_adds_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1065     pwd->b[7]  = msa_adds_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1066     pwd->b[8]  = msa_adds_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1067     pwd->b[9]  = msa_adds_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1068     pwd->b[10] = msa_adds_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1069     pwd->b[11] = msa_adds_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1070     pwd->b[12] = msa_adds_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1071     pwd->b[13] = msa_adds_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1072     pwd->b[14] = msa_adds_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1073     pwd->b[15] = msa_adds_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1074 }
1075 
1076 void helper_msa_adds_u_h(CPUMIPSState *env,
1077                          uint32_t wd, uint32_t ws, uint32_t wt)
1078 {
1079     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1080     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1081     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1082 
1083     pwd->h[0]  = msa_adds_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1084     pwd->h[1]  = msa_adds_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1085     pwd->h[2]  = msa_adds_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1086     pwd->h[3]  = msa_adds_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1087     pwd->h[4]  = msa_adds_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1088     pwd->h[5]  = msa_adds_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1089     pwd->h[6]  = msa_adds_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1090     pwd->h[7]  = msa_adds_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1091 }
1092 
1093 void helper_msa_adds_u_w(CPUMIPSState *env,
1094                          uint32_t wd, uint32_t ws, uint32_t wt)
1095 {
1096     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1097     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1098     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1099 
1100     pwd->w[0]  = msa_adds_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1101     pwd->w[1]  = msa_adds_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1102     pwd->w[2]  = msa_adds_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1103     pwd->w[3]  = msa_adds_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1104 }
1105 
1106 void helper_msa_adds_u_d(CPUMIPSState *env,
1107                          uint32_t wd, uint32_t ws, uint32_t wt)
1108 {
1109     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1110     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1111     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1112 
1113     pwd->d[0]  = msa_adds_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1114     pwd->d[1]  = msa_adds_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1115 }
1116 
1117 
1118 static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
1119 {
1120     return arg1 + arg2;
1121 }
1122 
1123 void helper_msa_addv_b(CPUMIPSState *env,
1124                        uint32_t wd, uint32_t ws, uint32_t wt)
1125 {
1126     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1127     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1128     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1129 
1130     pwd->b[0]  = msa_addv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1131     pwd->b[1]  = msa_addv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1132     pwd->b[2]  = msa_addv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1133     pwd->b[3]  = msa_addv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1134     pwd->b[4]  = msa_addv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1135     pwd->b[5]  = msa_addv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1136     pwd->b[6]  = msa_addv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1137     pwd->b[7]  = msa_addv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1138     pwd->b[8]  = msa_addv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1139     pwd->b[9]  = msa_addv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1140     pwd->b[10] = msa_addv_df(DF_BYTE, pws->b[10], pwt->b[10]);
1141     pwd->b[11] = msa_addv_df(DF_BYTE, pws->b[11], pwt->b[11]);
1142     pwd->b[12] = msa_addv_df(DF_BYTE, pws->b[12], pwt->b[12]);
1143     pwd->b[13] = msa_addv_df(DF_BYTE, pws->b[13], pwt->b[13]);
1144     pwd->b[14] = msa_addv_df(DF_BYTE, pws->b[14], pwt->b[14]);
1145     pwd->b[15] = msa_addv_df(DF_BYTE, pws->b[15], pwt->b[15]);
1146 }
1147 
1148 void helper_msa_addv_h(CPUMIPSState *env,
1149                        uint32_t wd, uint32_t ws, uint32_t wt)
1150 {
1151     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1152     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1153     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1154 
1155     pwd->h[0]  = msa_addv_df(DF_HALF, pws->h[0],  pwt->h[0]);
1156     pwd->h[1]  = msa_addv_df(DF_HALF, pws->h[1],  pwt->h[1]);
1157     pwd->h[2]  = msa_addv_df(DF_HALF, pws->h[2],  pwt->h[2]);
1158     pwd->h[3]  = msa_addv_df(DF_HALF, pws->h[3],  pwt->h[3]);
1159     pwd->h[4]  = msa_addv_df(DF_HALF, pws->h[4],  pwt->h[4]);
1160     pwd->h[5]  = msa_addv_df(DF_HALF, pws->h[5],  pwt->h[5]);
1161     pwd->h[6]  = msa_addv_df(DF_HALF, pws->h[6],  pwt->h[6]);
1162     pwd->h[7]  = msa_addv_df(DF_HALF, pws->h[7],  pwt->h[7]);
1163 }
1164 
1165 void helper_msa_addv_w(CPUMIPSState *env,
1166                        uint32_t wd, uint32_t ws, uint32_t wt)
1167 {
1168     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1169     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1170     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1171 
1172     pwd->w[0]  = msa_addv_df(DF_WORD, pws->w[0],  pwt->w[0]);
1173     pwd->w[1]  = msa_addv_df(DF_WORD, pws->w[1],  pwt->w[1]);
1174     pwd->w[2]  = msa_addv_df(DF_WORD, pws->w[2],  pwt->w[2]);
1175     pwd->w[3]  = msa_addv_df(DF_WORD, pws->w[3],  pwt->w[3]);
1176 }
1177 
1178 void helper_msa_addv_d(CPUMIPSState *env,
1179                        uint32_t wd, uint32_t ws, uint32_t wt)
1180 {
1181     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1182     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1183     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1184 
1185     pwd->d[0]  = msa_addv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1186     pwd->d[1]  = msa_addv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1187 }
1188 
1189 
1190 #define SIGNED_EVEN(a, df) \
1191         ((((int64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1192 
1193 #define UNSIGNED_EVEN(a, df) \
1194         ((((uint64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1195 
1196 #define SIGNED_ODD(a, df) \
1197         ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1198 
1199 #define UNSIGNED_ODD(a, df) \
1200         ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1201 
1202 
1203 static inline int64_t msa_hadd_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1204 {
1205     return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
1206 }
1207 
1208 void helper_msa_hadd_s_h(CPUMIPSState *env,
1209                          uint32_t wd, uint32_t ws, uint32_t wt)
1210 {
1211     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1212     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1213     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1214 
1215     pwd->h[0]  = msa_hadd_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1216     pwd->h[1]  = msa_hadd_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1217     pwd->h[2]  = msa_hadd_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1218     pwd->h[3]  = msa_hadd_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1219     pwd->h[4]  = msa_hadd_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1220     pwd->h[5]  = msa_hadd_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1221     pwd->h[6]  = msa_hadd_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1222     pwd->h[7]  = msa_hadd_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1223 }
1224 
1225 void helper_msa_hadd_s_w(CPUMIPSState *env,
1226                          uint32_t wd, uint32_t ws, uint32_t wt)
1227 {
1228     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1229     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1230     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1231 
1232     pwd->w[0]  = msa_hadd_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1233     pwd->w[1]  = msa_hadd_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1234     pwd->w[2]  = msa_hadd_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1235     pwd->w[3]  = msa_hadd_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1236 }
1237 
1238 void helper_msa_hadd_s_d(CPUMIPSState *env,
1239                          uint32_t wd, uint32_t ws, uint32_t wt)
1240 {
1241     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1242     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1243     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1244 
1245     pwd->d[0]  = msa_hadd_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1246     pwd->d[1]  = msa_hadd_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1247 }
1248 
1249 
1250 static inline int64_t msa_hadd_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1251 {
1252     return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
1253 }
1254 
1255 void helper_msa_hadd_u_h(CPUMIPSState *env,
1256                          uint32_t wd, uint32_t ws, uint32_t wt)
1257 {
1258     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1259     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1260     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1261 
1262     pwd->h[0]  = msa_hadd_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1263     pwd->h[1]  = msa_hadd_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1264     pwd->h[2]  = msa_hadd_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1265     pwd->h[3]  = msa_hadd_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1266     pwd->h[4]  = msa_hadd_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1267     pwd->h[5]  = msa_hadd_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1268     pwd->h[6]  = msa_hadd_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1269     pwd->h[7]  = msa_hadd_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1270 }
1271 
1272 void helper_msa_hadd_u_w(CPUMIPSState *env,
1273                          uint32_t wd, uint32_t ws, uint32_t wt)
1274 {
1275     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1276     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1277     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1278 
1279     pwd->w[0]  = msa_hadd_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1280     pwd->w[1]  = msa_hadd_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1281     pwd->w[2]  = msa_hadd_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1282     pwd->w[3]  = msa_hadd_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1283 }
1284 
1285 void helper_msa_hadd_u_d(CPUMIPSState *env,
1286                          uint32_t wd, uint32_t ws, uint32_t wt)
1287 {
1288     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1289     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1290     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1291 
1292     pwd->d[0]  = msa_hadd_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1293     pwd->d[1]  = msa_hadd_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1294 }
1295 
1296 
1297 /*
1298  * Int Average
1299  * -----------
1300  *
1301  * +---------------+----------------------------------------------------------+
1302  * | AVE_S.B       | Vector Signed Average (byte)                             |
1303  * | AVE_S.H       | Vector Signed Average (halfword)                         |
1304  * | AVE_S.W       | Vector Signed Average (word)                             |
1305  * | AVE_S.D       | Vector Signed Average (doubleword)                       |
1306  * | AVE_U.B       | Vector Unsigned Average (byte)                           |
1307  * | AVE_U.H       | Vector Unsigned Average (halfword)                       |
1308  * | AVE_U.W       | Vector Unsigned Average (word)                           |
1309  * | AVE_U.D       | Vector Unsigned Average (doubleword)                     |
1310  * | AVER_S.B      | Vector Signed Average Rounded (byte)                     |
1311  * | AVER_S.H      | Vector Signed Average Rounded (halfword)                 |
1312  * | AVER_S.W      | Vector Signed Average Rounded (word)                     |
1313  * | AVER_S.D      | Vector Signed Average Rounded (doubleword)               |
1314  * | AVER_U.B      | Vector Unsigned Average Rounded (byte)                   |
1315  * | AVER_U.H      | Vector Unsigned Average Rounded (halfword)               |
1316  * | AVER_U.W      | Vector Unsigned Average Rounded (word)                   |
1317  * | AVER_U.D      | Vector Unsigned Average Rounded (doubleword)             |
1318  * +---------------+----------------------------------------------------------+
1319  */
1320 
1321 static inline int64_t msa_ave_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1322 {
1323     /* signed shift */
1324     return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
1325 }
1326 
1327 void helper_msa_ave_s_b(CPUMIPSState *env,
1328                         uint32_t wd, uint32_t ws, uint32_t wt)
1329 {
1330     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1331     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1332     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1333 
1334     pwd->b[0]  = msa_ave_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1335     pwd->b[1]  = msa_ave_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1336     pwd->b[2]  = msa_ave_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1337     pwd->b[3]  = msa_ave_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1338     pwd->b[4]  = msa_ave_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1339     pwd->b[5]  = msa_ave_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1340     pwd->b[6]  = msa_ave_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1341     pwd->b[7]  = msa_ave_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1342     pwd->b[8]  = msa_ave_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1343     pwd->b[9]  = msa_ave_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1344     pwd->b[10] = msa_ave_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1345     pwd->b[11] = msa_ave_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1346     pwd->b[12] = msa_ave_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1347     pwd->b[13] = msa_ave_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1348     pwd->b[14] = msa_ave_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1349     pwd->b[15] = msa_ave_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1350 }
1351 
1352 void helper_msa_ave_s_h(CPUMIPSState *env,
1353                         uint32_t wd, uint32_t ws, uint32_t wt)
1354 {
1355     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1356     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1357     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1358 
1359     pwd->h[0]  = msa_ave_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1360     pwd->h[1]  = msa_ave_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1361     pwd->h[2]  = msa_ave_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1362     pwd->h[3]  = msa_ave_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1363     pwd->h[4]  = msa_ave_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1364     pwd->h[5]  = msa_ave_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1365     pwd->h[6]  = msa_ave_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1366     pwd->h[7]  = msa_ave_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1367 }
1368 
1369 void helper_msa_ave_s_w(CPUMIPSState *env,
1370                         uint32_t wd, uint32_t ws, uint32_t wt)
1371 {
1372     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1373     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1374     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1375 
1376     pwd->w[0]  = msa_ave_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1377     pwd->w[1]  = msa_ave_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1378     pwd->w[2]  = msa_ave_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1379     pwd->w[3]  = msa_ave_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1380 }
1381 
1382 void helper_msa_ave_s_d(CPUMIPSState *env,
1383                         uint32_t wd, uint32_t ws, uint32_t wt)
1384 {
1385     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1386     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1387     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1388 
1389     pwd->d[0]  = msa_ave_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1390     pwd->d[1]  = msa_ave_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1391 }
1392 
1393 static inline uint64_t msa_ave_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1394 {
1395     uint64_t u_arg1 = UNSIGNED(arg1, df);
1396     uint64_t u_arg2 = UNSIGNED(arg2, df);
1397     /* unsigned shift */
1398     return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
1399 }
1400 
1401 void helper_msa_ave_u_b(CPUMIPSState *env,
1402                         uint32_t wd, uint32_t ws, uint32_t wt)
1403 {
1404     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1405     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1406     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1407 
1408     pwd->b[0]  = msa_ave_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1409     pwd->b[1]  = msa_ave_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1410     pwd->b[2]  = msa_ave_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1411     pwd->b[3]  = msa_ave_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1412     pwd->b[4]  = msa_ave_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1413     pwd->b[5]  = msa_ave_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1414     pwd->b[6]  = msa_ave_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1415     pwd->b[7]  = msa_ave_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1416     pwd->b[8]  = msa_ave_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1417     pwd->b[9]  = msa_ave_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1418     pwd->b[10] = msa_ave_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1419     pwd->b[11] = msa_ave_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1420     pwd->b[12] = msa_ave_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1421     pwd->b[13] = msa_ave_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1422     pwd->b[14] = msa_ave_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1423     pwd->b[15] = msa_ave_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1424 }
1425 
1426 void helper_msa_ave_u_h(CPUMIPSState *env,
1427                         uint32_t wd, uint32_t ws, uint32_t wt)
1428 {
1429     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1430     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1431     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1432 
1433     pwd->h[0]  = msa_ave_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1434     pwd->h[1]  = msa_ave_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1435     pwd->h[2]  = msa_ave_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1436     pwd->h[3]  = msa_ave_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1437     pwd->h[4]  = msa_ave_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1438     pwd->h[5]  = msa_ave_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1439     pwd->h[6]  = msa_ave_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1440     pwd->h[7]  = msa_ave_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1441 }
1442 
1443 void helper_msa_ave_u_w(CPUMIPSState *env,
1444                         uint32_t wd, uint32_t ws, uint32_t wt)
1445 {
1446     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1447     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1448     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1449 
1450     pwd->w[0]  = msa_ave_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1451     pwd->w[1]  = msa_ave_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1452     pwd->w[2]  = msa_ave_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1453     pwd->w[3]  = msa_ave_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1454 }
1455 
1456 void helper_msa_ave_u_d(CPUMIPSState *env,
1457                         uint32_t wd, uint32_t ws, uint32_t wt)
1458 {
1459     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1460     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1461     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1462 
1463     pwd->d[0]  = msa_ave_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1464     pwd->d[1]  = msa_ave_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1465 }
1466 
1467 static inline int64_t msa_aver_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1468 {
1469     /* signed shift */
1470     return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
1471 }
1472 
1473 void helper_msa_aver_s_b(CPUMIPSState *env,
1474                          uint32_t wd, uint32_t ws, uint32_t wt)
1475 {
1476     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1477     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1478     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1479 
1480     pwd->b[0]  = msa_aver_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1481     pwd->b[1]  = msa_aver_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1482     pwd->b[2]  = msa_aver_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1483     pwd->b[3]  = msa_aver_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1484     pwd->b[4]  = msa_aver_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1485     pwd->b[5]  = msa_aver_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1486     pwd->b[6]  = msa_aver_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1487     pwd->b[7]  = msa_aver_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1488     pwd->b[8]  = msa_aver_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1489     pwd->b[9]  = msa_aver_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1490     pwd->b[10] = msa_aver_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1491     pwd->b[11] = msa_aver_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1492     pwd->b[12] = msa_aver_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1493     pwd->b[13] = msa_aver_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1494     pwd->b[14] = msa_aver_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1495     pwd->b[15] = msa_aver_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1496 }
1497 
1498 void helper_msa_aver_s_h(CPUMIPSState *env,
1499                          uint32_t wd, uint32_t ws, uint32_t wt)
1500 {
1501     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1502     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1503     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1504 
1505     pwd->h[0]  = msa_aver_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1506     pwd->h[1]  = msa_aver_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1507     pwd->h[2]  = msa_aver_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1508     pwd->h[3]  = msa_aver_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1509     pwd->h[4]  = msa_aver_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1510     pwd->h[5]  = msa_aver_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1511     pwd->h[6]  = msa_aver_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1512     pwd->h[7]  = msa_aver_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1513 }
1514 
1515 void helper_msa_aver_s_w(CPUMIPSState *env,
1516                          uint32_t wd, uint32_t ws, uint32_t wt)
1517 {
1518     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1519     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1520     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1521 
1522     pwd->w[0]  = msa_aver_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1523     pwd->w[1]  = msa_aver_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1524     pwd->w[2]  = msa_aver_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1525     pwd->w[3]  = msa_aver_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1526 }
1527 
1528 void helper_msa_aver_s_d(CPUMIPSState *env,
1529                          uint32_t wd, uint32_t ws, uint32_t wt)
1530 {
1531     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1532     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1533     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1534 
1535     pwd->d[0]  = msa_aver_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1536     pwd->d[1]  = msa_aver_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1537 }
1538 
1539 static inline uint64_t msa_aver_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1540 {
1541     uint64_t u_arg1 = UNSIGNED(arg1, df);
1542     uint64_t u_arg2 = UNSIGNED(arg2, df);
1543     /* unsigned shift */
1544     return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
1545 }
1546 
1547 void helper_msa_aver_u_b(CPUMIPSState *env,
1548                          uint32_t wd, uint32_t ws, uint32_t wt)
1549 {
1550     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1551     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1552     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1553 
1554     pwd->b[0]  = msa_aver_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1555     pwd->b[1]  = msa_aver_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1556     pwd->b[2]  = msa_aver_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1557     pwd->b[3]  = msa_aver_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1558     pwd->b[4]  = msa_aver_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1559     pwd->b[5]  = msa_aver_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1560     pwd->b[6]  = msa_aver_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1561     pwd->b[7]  = msa_aver_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1562     pwd->b[8]  = msa_aver_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1563     pwd->b[9]  = msa_aver_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1564     pwd->b[10] = msa_aver_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1565     pwd->b[11] = msa_aver_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1566     pwd->b[12] = msa_aver_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1567     pwd->b[13] = msa_aver_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1568     pwd->b[14] = msa_aver_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1569     pwd->b[15] = msa_aver_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1570 }
1571 
1572 void helper_msa_aver_u_h(CPUMIPSState *env,
1573                          uint32_t wd, uint32_t ws, uint32_t wt)
1574 {
1575     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1576     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1577     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1578 
1579     pwd->h[0]  = msa_aver_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1580     pwd->h[1]  = msa_aver_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1581     pwd->h[2]  = msa_aver_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1582     pwd->h[3]  = msa_aver_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1583     pwd->h[4]  = msa_aver_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1584     pwd->h[5]  = msa_aver_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1585     pwd->h[6]  = msa_aver_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1586     pwd->h[7]  = msa_aver_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1587 }
1588 
1589 void helper_msa_aver_u_w(CPUMIPSState *env,
1590                          uint32_t wd, uint32_t ws, uint32_t wt)
1591 {
1592     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1593     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1594     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1595 
1596     pwd->w[0]  = msa_aver_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1597     pwd->w[1]  = msa_aver_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1598     pwd->w[2]  = msa_aver_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1599     pwd->w[3]  = msa_aver_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1600 }
1601 
1602 void helper_msa_aver_u_d(CPUMIPSState *env,
1603                          uint32_t wd, uint32_t ws, uint32_t wt)
1604 {
1605     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1606     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1607     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1608 
1609     pwd->d[0]  = msa_aver_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1610     pwd->d[1]  = msa_aver_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1611 }
1612 
1613 
1614 /*
1615  * Int Compare
1616  * -----------
1617  *
1618  * +---------------+----------------------------------------------------------+
1619  * | CEQ.B         | Vector Compare Equal (byte)                              |
1620  * | CEQ.H         | Vector Compare Equal (halfword)                          |
1621  * | CEQ.W         | Vector Compare Equal (word)                              |
1622  * | CEQ.D         | Vector Compare Equal (doubleword)                        |
1623  * | CLE_S.B       | Vector Compare Signed Less Than or Equal (byte)          |
1624  * | CLE_S.H       | Vector Compare Signed Less Than or Equal (halfword)      |
1625  * | CLE_S.W       | Vector Compare Signed Less Than or Equal (word)          |
1626  * | CLE_S.D       | Vector Compare Signed Less Than or Equal (doubleword)    |
1627  * | CLE_U.B       | Vector Compare Unsigned Less Than or Equal (byte)        |
1628  * | CLE_U.H       | Vector Compare Unsigned Less Than or Equal (halfword)    |
1629  * | CLE_U.W       | Vector Compare Unsigned Less Than or Equal (word)        |
1630  * | CLE_U.D       | Vector Compare Unsigned Less Than or Equal (doubleword)  |
1631  * | CLT_S.B       | Vector Compare Signed Less Than (byte)                   |
1632  * | CLT_S.H       | Vector Compare Signed Less Than (halfword)               |
1633  * | CLT_S.W       | Vector Compare Signed Less Than (word)                   |
1634  * | CLT_S.D       | Vector Compare Signed Less Than (doubleword)             |
1635  * | CLT_U.B       | Vector Compare Unsigned Less Than (byte)                 |
1636  * | CLT_U.H       | Vector Compare Unsigned Less Than (halfword)             |
1637  * | CLT_U.W       | Vector Compare Unsigned Less Than (word)                 |
1638  * | CLT_U.D       | Vector Compare Unsigned Less Than (doubleword)           |
1639  * +---------------+----------------------------------------------------------+
1640  */
1641 
1642 static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
1643 {
1644     return arg1 == arg2 ? -1 : 0;
1645 }
1646 
1647 static inline int8_t msa_ceq_b(int8_t arg1, int8_t arg2)
1648 {
1649     return arg1 == arg2 ? -1 : 0;
1650 }
1651 
1652 void helper_msa_ceq_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1653 {
1654     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1655     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1656     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1657 
1658     pwd->b[0]  = msa_ceq_b(pws->b[0],  pwt->b[0]);
1659     pwd->b[1]  = msa_ceq_b(pws->b[1],  pwt->b[1]);
1660     pwd->b[2]  = msa_ceq_b(pws->b[2],  pwt->b[2]);
1661     pwd->b[3]  = msa_ceq_b(pws->b[3],  pwt->b[3]);
1662     pwd->b[4]  = msa_ceq_b(pws->b[4],  pwt->b[4]);
1663     pwd->b[5]  = msa_ceq_b(pws->b[5],  pwt->b[5]);
1664     pwd->b[6]  = msa_ceq_b(pws->b[6],  pwt->b[6]);
1665     pwd->b[7]  = msa_ceq_b(pws->b[7],  pwt->b[7]);
1666     pwd->b[8]  = msa_ceq_b(pws->b[8],  pwt->b[8]);
1667     pwd->b[9]  = msa_ceq_b(pws->b[9],  pwt->b[9]);
1668     pwd->b[10] = msa_ceq_b(pws->b[10], pwt->b[10]);
1669     pwd->b[11] = msa_ceq_b(pws->b[11], pwt->b[11]);
1670     pwd->b[12] = msa_ceq_b(pws->b[12], pwt->b[12]);
1671     pwd->b[13] = msa_ceq_b(pws->b[13], pwt->b[13]);
1672     pwd->b[14] = msa_ceq_b(pws->b[14], pwt->b[14]);
1673     pwd->b[15] = msa_ceq_b(pws->b[15], pwt->b[15]);
1674 }
1675 
1676 static inline int16_t msa_ceq_h(int16_t arg1, int16_t arg2)
1677 {
1678     return arg1 == arg2 ? -1 : 0;
1679 }
1680 
1681 void helper_msa_ceq_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1682 {
1683     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1684     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1685     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1686 
1687     pwd->h[0]  = msa_ceq_h(pws->h[0],  pwt->h[0]);
1688     pwd->h[1]  = msa_ceq_h(pws->h[1],  pwt->h[1]);
1689     pwd->h[2]  = msa_ceq_h(pws->h[2],  pwt->h[2]);
1690     pwd->h[3]  = msa_ceq_h(pws->h[3],  pwt->h[3]);
1691     pwd->h[4]  = msa_ceq_h(pws->h[4],  pwt->h[4]);
1692     pwd->h[5]  = msa_ceq_h(pws->h[5],  pwt->h[5]);
1693     pwd->h[6]  = msa_ceq_h(pws->h[6],  pwt->h[6]);
1694     pwd->h[7]  = msa_ceq_h(pws->h[7],  pwt->h[7]);
1695 }
1696 
1697 static inline int32_t msa_ceq_w(int32_t arg1, int32_t arg2)
1698 {
1699     return arg1 == arg2 ? -1 : 0;
1700 }
1701 
1702 void helper_msa_ceq_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1703 {
1704     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1705     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1706     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1707 
1708     pwd->w[0]  = msa_ceq_w(pws->w[0],  pwt->w[0]);
1709     pwd->w[1]  = msa_ceq_w(pws->w[1],  pwt->w[1]);
1710     pwd->w[2]  = msa_ceq_w(pws->w[2],  pwt->w[2]);
1711     pwd->w[3]  = msa_ceq_w(pws->w[3],  pwt->w[3]);
1712 }
1713 
1714 static inline int64_t msa_ceq_d(int64_t arg1, int64_t arg2)
1715 {
1716     return arg1 == arg2 ? -1 : 0;
1717 }
1718 
1719 void helper_msa_ceq_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1720 {
1721     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1722     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1723     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1724 
1725     pwd->d[0]  = msa_ceq_d(pws->d[0],  pwt->d[0]);
1726     pwd->d[1]  = msa_ceq_d(pws->d[1],  pwt->d[1]);
1727 }
1728 
1729 static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1730 {
1731     return arg1 <= arg2 ? -1 : 0;
1732 }
1733 
1734 void helper_msa_cle_s_b(CPUMIPSState *env,
1735                         uint32_t wd, uint32_t ws, uint32_t wt)
1736 {
1737     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1738     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1739     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1740 
1741     pwd->b[0]  = msa_cle_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1742     pwd->b[1]  = msa_cle_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1743     pwd->b[2]  = msa_cle_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1744     pwd->b[3]  = msa_cle_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1745     pwd->b[4]  = msa_cle_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1746     pwd->b[5]  = msa_cle_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1747     pwd->b[6]  = msa_cle_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1748     pwd->b[7]  = msa_cle_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1749     pwd->b[8]  = msa_cle_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1750     pwd->b[9]  = msa_cle_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1751     pwd->b[10] = msa_cle_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1752     pwd->b[11] = msa_cle_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1753     pwd->b[12] = msa_cle_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1754     pwd->b[13] = msa_cle_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1755     pwd->b[14] = msa_cle_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1756     pwd->b[15] = msa_cle_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1757 }
1758 
1759 void helper_msa_cle_s_h(CPUMIPSState *env,
1760                         uint32_t wd, uint32_t ws, uint32_t wt)
1761 {
1762     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1763     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1764     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1765 
1766     pwd->h[0]  = msa_cle_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1767     pwd->h[1]  = msa_cle_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1768     pwd->h[2]  = msa_cle_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1769     pwd->h[3]  = msa_cle_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1770     pwd->h[4]  = msa_cle_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1771     pwd->h[5]  = msa_cle_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1772     pwd->h[6]  = msa_cle_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1773     pwd->h[7]  = msa_cle_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1774 }
1775 
1776 void helper_msa_cle_s_w(CPUMIPSState *env,
1777                         uint32_t wd, uint32_t ws, uint32_t wt)
1778 {
1779     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1780     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1781     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1782 
1783     pwd->w[0]  = msa_cle_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1784     pwd->w[1]  = msa_cle_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1785     pwd->w[2]  = msa_cle_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1786     pwd->w[3]  = msa_cle_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1787 }
1788 
1789 void helper_msa_cle_s_d(CPUMIPSState *env,
1790                         uint32_t wd, uint32_t ws, uint32_t wt)
1791 {
1792     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1793     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1794     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1795 
1796     pwd->d[0]  = msa_cle_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1797     pwd->d[1]  = msa_cle_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1798 }
1799 
1800 static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1801 {
1802     uint64_t u_arg1 = UNSIGNED(arg1, df);
1803     uint64_t u_arg2 = UNSIGNED(arg2, df);
1804     return u_arg1 <= u_arg2 ? -1 : 0;
1805 }
1806 
1807 void helper_msa_cle_u_b(CPUMIPSState *env,
1808                         uint32_t wd, uint32_t ws, uint32_t wt)
1809 {
1810     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1811     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1812     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1813 
1814     pwd->b[0]  = msa_cle_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1815     pwd->b[1]  = msa_cle_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1816     pwd->b[2]  = msa_cle_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1817     pwd->b[3]  = msa_cle_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1818     pwd->b[4]  = msa_cle_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1819     pwd->b[5]  = msa_cle_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1820     pwd->b[6]  = msa_cle_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1821     pwd->b[7]  = msa_cle_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1822     pwd->b[8]  = msa_cle_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1823     pwd->b[9]  = msa_cle_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1824     pwd->b[10] = msa_cle_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1825     pwd->b[11] = msa_cle_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1826     pwd->b[12] = msa_cle_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1827     pwd->b[13] = msa_cle_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1828     pwd->b[14] = msa_cle_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1829     pwd->b[15] = msa_cle_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1830 }
1831 
1832 void helper_msa_cle_u_h(CPUMIPSState *env,
1833                         uint32_t wd, uint32_t ws, uint32_t wt)
1834 {
1835     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1836     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1837     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1838 
1839     pwd->h[0]  = msa_cle_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1840     pwd->h[1]  = msa_cle_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1841     pwd->h[2]  = msa_cle_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1842     pwd->h[3]  = msa_cle_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1843     pwd->h[4]  = msa_cle_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1844     pwd->h[5]  = msa_cle_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1845     pwd->h[6]  = msa_cle_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1846     pwd->h[7]  = msa_cle_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1847 }
1848 
1849 void helper_msa_cle_u_w(CPUMIPSState *env,
1850                         uint32_t wd, uint32_t ws, uint32_t wt)
1851 {
1852     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1853     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1854     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1855 
1856     pwd->w[0]  = msa_cle_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1857     pwd->w[1]  = msa_cle_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1858     pwd->w[2]  = msa_cle_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1859     pwd->w[3]  = msa_cle_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1860 }
1861 
1862 void helper_msa_cle_u_d(CPUMIPSState *env,
1863                         uint32_t wd, uint32_t ws, uint32_t wt)
1864 {
1865     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1866     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1867     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1868 
1869     pwd->d[0]  = msa_cle_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1870     pwd->d[1]  = msa_cle_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1871 }
1872 
1873 static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1874 {
1875     return arg1 < arg2 ? -1 : 0;
1876 }
1877 
1878 static inline int8_t msa_clt_s_b(int8_t arg1, int8_t arg2)
1879 {
1880     return arg1 < arg2 ? -1 : 0;
1881 }
1882 
1883 void helper_msa_clt_s_b(CPUMIPSState *env,
1884                         uint32_t wd, uint32_t ws, uint32_t wt)
1885 {
1886     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1887     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1888     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1889 
1890     pwd->b[0]  = msa_clt_s_b(pws->b[0],  pwt->b[0]);
1891     pwd->b[1]  = msa_clt_s_b(pws->b[1],  pwt->b[1]);
1892     pwd->b[2]  = msa_clt_s_b(pws->b[2],  pwt->b[2]);
1893     pwd->b[3]  = msa_clt_s_b(pws->b[3],  pwt->b[3]);
1894     pwd->b[4]  = msa_clt_s_b(pws->b[4],  pwt->b[4]);
1895     pwd->b[5]  = msa_clt_s_b(pws->b[5],  pwt->b[5]);
1896     pwd->b[6]  = msa_clt_s_b(pws->b[6],  pwt->b[6]);
1897     pwd->b[7]  = msa_clt_s_b(pws->b[7],  pwt->b[7]);
1898     pwd->b[8]  = msa_clt_s_b(pws->b[8],  pwt->b[8]);
1899     pwd->b[9]  = msa_clt_s_b(pws->b[9],  pwt->b[9]);
1900     pwd->b[10] = msa_clt_s_b(pws->b[10], pwt->b[10]);
1901     pwd->b[11] = msa_clt_s_b(pws->b[11], pwt->b[11]);
1902     pwd->b[12] = msa_clt_s_b(pws->b[12], pwt->b[12]);
1903     pwd->b[13] = msa_clt_s_b(pws->b[13], pwt->b[13]);
1904     pwd->b[14] = msa_clt_s_b(pws->b[14], pwt->b[14]);
1905     pwd->b[15] = msa_clt_s_b(pws->b[15], pwt->b[15]);
1906 }
1907 
1908 static inline int16_t msa_clt_s_h(int16_t arg1, int16_t arg2)
1909 {
1910     return arg1 < arg2 ? -1 : 0;
1911 }
1912 
1913 void helper_msa_clt_s_h(CPUMIPSState *env,
1914                         uint32_t wd, uint32_t ws, uint32_t wt)
1915 {
1916     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1917     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1918     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1919 
1920     pwd->h[0]  = msa_clt_s_h(pws->h[0],  pwt->h[0]);
1921     pwd->h[1]  = msa_clt_s_h(pws->h[1],  pwt->h[1]);
1922     pwd->h[2]  = msa_clt_s_h(pws->h[2],  pwt->h[2]);
1923     pwd->h[3]  = msa_clt_s_h(pws->h[3],  pwt->h[3]);
1924     pwd->h[4]  = msa_clt_s_h(pws->h[4],  pwt->h[4]);
1925     pwd->h[5]  = msa_clt_s_h(pws->h[5],  pwt->h[5]);
1926     pwd->h[6]  = msa_clt_s_h(pws->h[6],  pwt->h[6]);
1927     pwd->h[7]  = msa_clt_s_h(pws->h[7],  pwt->h[7]);
1928 }
1929 
1930 static inline int32_t msa_clt_s_w(int32_t arg1, int32_t arg2)
1931 {
1932     return arg1 < arg2 ? -1 : 0;
1933 }
1934 
1935 void helper_msa_clt_s_w(CPUMIPSState *env,
1936                         uint32_t wd, uint32_t ws, uint32_t wt)
1937 {
1938     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1939     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1940     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1941 
1942     pwd->w[0]  = msa_clt_s_w(pws->w[0],  pwt->w[0]);
1943     pwd->w[1]  = msa_clt_s_w(pws->w[1],  pwt->w[1]);
1944     pwd->w[2]  = msa_clt_s_w(pws->w[2],  pwt->w[2]);
1945     pwd->w[3]  = msa_clt_s_w(pws->w[3],  pwt->w[3]);
1946 }
1947 
1948 static inline int64_t msa_clt_s_d(int64_t arg1, int64_t arg2)
1949 {
1950     return arg1 < arg2 ? -1 : 0;
1951 }
1952 
1953 void helper_msa_clt_s_d(CPUMIPSState *env,
1954                         uint32_t wd, uint32_t ws, uint32_t wt)
1955 {
1956     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1957     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1958     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1959 
1960     pwd->d[0]  = msa_clt_s_d(pws->d[0],  pwt->d[0]);
1961     pwd->d[1]  = msa_clt_s_d(pws->d[1],  pwt->d[1]);
1962 }
1963 
1964 static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1965 {
1966     uint64_t u_arg1 = UNSIGNED(arg1, df);
1967     uint64_t u_arg2 = UNSIGNED(arg2, df);
1968     return u_arg1 < u_arg2 ? -1 : 0;
1969 }
1970 
1971 void helper_msa_clt_u_b(CPUMIPSState *env,
1972                         uint32_t wd, uint32_t ws, uint32_t wt)
1973 {
1974     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1975     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1976     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1977 
1978     pwd->b[0]  = msa_clt_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1979     pwd->b[1]  = msa_clt_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1980     pwd->b[2]  = msa_clt_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1981     pwd->b[3]  = msa_clt_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1982     pwd->b[4]  = msa_clt_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1983     pwd->b[5]  = msa_clt_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1984     pwd->b[6]  = msa_clt_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1985     pwd->b[7]  = msa_clt_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1986     pwd->b[8]  = msa_clt_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1987     pwd->b[9]  = msa_clt_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1988     pwd->b[10] = msa_clt_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1989     pwd->b[11] = msa_clt_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1990     pwd->b[12] = msa_clt_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1991     pwd->b[13] = msa_clt_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1992     pwd->b[14] = msa_clt_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1993     pwd->b[15] = msa_clt_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1994 }
1995 
1996 void helper_msa_clt_u_h(CPUMIPSState *env,
1997                         uint32_t wd, uint32_t ws, uint32_t wt)
1998 {
1999     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2000     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2001     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2002 
2003     pwd->h[0]  = msa_clt_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2004     pwd->h[1]  = msa_clt_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2005     pwd->h[2]  = msa_clt_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2006     pwd->h[3]  = msa_clt_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2007     pwd->h[4]  = msa_clt_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2008     pwd->h[5]  = msa_clt_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2009     pwd->h[6]  = msa_clt_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2010     pwd->h[7]  = msa_clt_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2011 }
2012 
2013 void helper_msa_clt_u_w(CPUMIPSState *env,
2014                         uint32_t wd, uint32_t ws, uint32_t wt)
2015 {
2016     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2017     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2018     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2019 
2020     pwd->w[0]  = msa_clt_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2021     pwd->w[1]  = msa_clt_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2022     pwd->w[2]  = msa_clt_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2023     pwd->w[3]  = msa_clt_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2024 }
2025 
2026 void helper_msa_clt_u_d(CPUMIPSState *env,
2027                         uint32_t wd, uint32_t ws, uint32_t wt)
2028 {
2029     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2030     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2031     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2032 
2033     pwd->d[0]  = msa_clt_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2034     pwd->d[1]  = msa_clt_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2035 }
2036 
2037 
2038 /*
2039  * Int Divide
2040  * ----------
2041  *
2042  * +---------------+----------------------------------------------------------+
2043  * | DIV_S.B       | Vector Signed Divide (byte)                              |
2044  * | DIV_S.H       | Vector Signed Divide (halfword)                          |
2045  * | DIV_S.W       | Vector Signed Divide (word)                              |
2046  * | DIV_S.D       | Vector Signed Divide (doubleword)                        |
2047  * | DIV_U.B       | Vector Unsigned Divide (byte)                            |
2048  * | DIV_U.H       | Vector Unsigned Divide (halfword)                        |
2049  * | DIV_U.W       | Vector Unsigned Divide (word)                            |
2050  * | DIV_U.D       | Vector Unsigned Divide (doubleword)                      |
2051  * +---------------+----------------------------------------------------------+
2052  */
2053 
2054 
2055 static inline int64_t msa_div_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2056 {
2057     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
2058         return DF_MIN_INT(df);
2059     }
2060     return arg2 ? arg1 / arg2
2061                 : arg1 >= 0 ? -1 : 1;
2062 }
2063 
2064 void helper_msa_div_s_b(CPUMIPSState *env,
2065                         uint32_t wd, uint32_t ws, uint32_t wt)
2066 {
2067     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2068     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2069     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2070 
2071     pwd->b[0]  = msa_div_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2072     pwd->b[1]  = msa_div_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2073     pwd->b[2]  = msa_div_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2074     pwd->b[3]  = msa_div_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2075     pwd->b[4]  = msa_div_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2076     pwd->b[5]  = msa_div_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2077     pwd->b[6]  = msa_div_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2078     pwd->b[7]  = msa_div_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2079     pwd->b[8]  = msa_div_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2080     pwd->b[9]  = msa_div_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2081     pwd->b[10] = msa_div_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2082     pwd->b[11] = msa_div_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2083     pwd->b[12] = msa_div_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2084     pwd->b[13] = msa_div_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2085     pwd->b[14] = msa_div_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2086     pwd->b[15] = msa_div_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2087 }
2088 
2089 void helper_msa_div_s_h(CPUMIPSState *env,
2090                         uint32_t wd, uint32_t ws, uint32_t wt)
2091 {
2092     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2093     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2094     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2095 
2096     pwd->h[0]  = msa_div_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2097     pwd->h[1]  = msa_div_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2098     pwd->h[2]  = msa_div_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2099     pwd->h[3]  = msa_div_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2100     pwd->h[4]  = msa_div_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2101     pwd->h[5]  = msa_div_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2102     pwd->h[6]  = msa_div_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2103     pwd->h[7]  = msa_div_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2104 }
2105 
2106 void helper_msa_div_s_w(CPUMIPSState *env,
2107                         uint32_t wd, uint32_t ws, uint32_t wt)
2108 {
2109     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2110     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2111     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2112 
2113     pwd->w[0]  = msa_div_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2114     pwd->w[1]  = msa_div_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2115     pwd->w[2]  = msa_div_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2116     pwd->w[3]  = msa_div_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2117 }
2118 
2119 void helper_msa_div_s_d(CPUMIPSState *env,
2120                         uint32_t wd, uint32_t ws, uint32_t wt)
2121 {
2122     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2123     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2124     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2125 
2126     pwd->d[0]  = msa_div_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2127     pwd->d[1]  = msa_div_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2128 }
2129 
2130 static inline int64_t msa_div_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2131 {
2132     uint64_t u_arg1 = UNSIGNED(arg1, df);
2133     uint64_t u_arg2 = UNSIGNED(arg2, df);
2134     return arg2 ? u_arg1 / u_arg2 : -1;
2135 }
2136 
2137 void helper_msa_div_u_b(CPUMIPSState *env,
2138                         uint32_t wd, uint32_t ws, uint32_t wt)
2139 {
2140     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2141     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2142     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2143 
2144     pwd->b[0]  = msa_div_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2145     pwd->b[1]  = msa_div_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2146     pwd->b[2]  = msa_div_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2147     pwd->b[3]  = msa_div_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2148     pwd->b[4]  = msa_div_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2149     pwd->b[5]  = msa_div_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2150     pwd->b[6]  = msa_div_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2151     pwd->b[7]  = msa_div_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2152     pwd->b[8]  = msa_div_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2153     pwd->b[9]  = msa_div_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2154     pwd->b[10] = msa_div_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2155     pwd->b[11] = msa_div_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2156     pwd->b[12] = msa_div_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2157     pwd->b[13] = msa_div_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2158     pwd->b[14] = msa_div_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2159     pwd->b[15] = msa_div_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2160 }
2161 
2162 void helper_msa_div_u_h(CPUMIPSState *env,
2163                         uint32_t wd, uint32_t ws, uint32_t wt)
2164 {
2165     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2166     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2167     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2168 
2169     pwd->h[0]  = msa_div_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2170     pwd->h[1]  = msa_div_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2171     pwd->h[2]  = msa_div_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2172     pwd->h[3]  = msa_div_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2173     pwd->h[4]  = msa_div_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2174     pwd->h[5]  = msa_div_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2175     pwd->h[6]  = msa_div_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2176     pwd->h[7]  = msa_div_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2177 }
2178 
2179 void helper_msa_div_u_w(CPUMIPSState *env,
2180                         uint32_t wd, uint32_t ws, uint32_t wt)
2181 {
2182     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2183     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2184     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2185 
2186     pwd->w[0]  = msa_div_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2187     pwd->w[1]  = msa_div_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2188     pwd->w[2]  = msa_div_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2189     pwd->w[3]  = msa_div_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2190 }
2191 
2192 void helper_msa_div_u_d(CPUMIPSState *env,
2193                         uint32_t wd, uint32_t ws, uint32_t wt)
2194 {
2195     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2196     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2197     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2198 
2199     pwd->d[0]  = msa_div_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2200     pwd->d[1]  = msa_div_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2201 }
2202 
2203 
2204 /*
2205  * Int Dot Product
2206  * ---------------
2207  *
2208  * +---------------+----------------------------------------------------------+
2209  * | DOTP_S.H      | Vector Signed Dot Product (halfword)                     |
2210  * | DOTP_S.W      | Vector Signed Dot Product (word)                         |
2211  * | DOTP_S.D      | Vector Signed Dot Product (doubleword)                   |
2212  * | DOTP_U.H      | Vector Unsigned Dot Product (halfword)                   |
2213  * | DOTP_U.W      | Vector Unsigned Dot Product (word)                       |
2214  * | DOTP_U.D      | Vector Unsigned Dot Product (doubleword)                 |
2215  * | DPADD_S.H     | Vector Signed Dot Product (halfword)                     |
2216  * | DPADD_S.W     | Vector Signed Dot Product (word)                         |
2217  * | DPADD_S.D     | Vector Signed Dot Product (doubleword)                   |
2218  * | DPADD_U.H     | Vector Unsigned Dot Product (halfword)                   |
2219  * | DPADD_U.W     | Vector Unsigned Dot Product (word)                       |
2220  * | DPADD_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2221  * | DPSUB_S.H     | Vector Signed Dot Product (halfword)                     |
2222  * | DPSUB_S.W     | Vector Signed Dot Product (word)                         |
2223  * | DPSUB_S.D     | Vector Signed Dot Product (doubleword)                   |
2224  * | DPSUB_U.H     | Vector Unsigned Dot Product (halfword)                   |
2225  * | DPSUB_U.W     | Vector Unsigned Dot Product (word)                       |
2226  * | DPSUB_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2227  * +---------------+----------------------------------------------------------+
2228  */
2229 
2230 #define SIGNED_EXTRACT(e, o, a, df)     \
2231     do {                                \
2232         e = SIGNED_EVEN(a, df);         \
2233         o = SIGNED_ODD(a, df);          \
2234     } while (0)
2235 
2236 #define UNSIGNED_EXTRACT(e, o, a, df)   \
2237     do {                                \
2238         e = UNSIGNED_EVEN(a, df);       \
2239         o = UNSIGNED_ODD(a, df);        \
2240     } while (0)
2241 
2242 
2243 static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2244 {
2245     int64_t even_arg1;
2246     int64_t even_arg2;
2247     int64_t odd_arg1;
2248     int64_t odd_arg2;
2249     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2250     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2251     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2252 }
2253 
2254 void helper_msa_dotp_s_h(CPUMIPSState *env,
2255                          uint32_t wd, uint32_t ws, uint32_t wt)
2256 {
2257     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2258     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2259     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2260 
2261     pwd->h[0]  = msa_dotp_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2262     pwd->h[1]  = msa_dotp_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2263     pwd->h[2]  = msa_dotp_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2264     pwd->h[3]  = msa_dotp_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2265     pwd->h[4]  = msa_dotp_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2266     pwd->h[5]  = msa_dotp_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2267     pwd->h[6]  = msa_dotp_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2268     pwd->h[7]  = msa_dotp_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2269 }
2270 
2271 void helper_msa_dotp_s_w(CPUMIPSState *env,
2272                          uint32_t wd, uint32_t ws, uint32_t wt)
2273 {
2274     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2275     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2276     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2277 
2278     pwd->w[0]  = msa_dotp_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2279     pwd->w[1]  = msa_dotp_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2280     pwd->w[2]  = msa_dotp_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2281     pwd->w[3]  = msa_dotp_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2282 }
2283 
2284 void helper_msa_dotp_s_d(CPUMIPSState *env,
2285                          uint32_t wd, uint32_t ws, uint32_t wt)
2286 {
2287     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2288     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2289     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2290 
2291     pwd->d[0]  = msa_dotp_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2292     pwd->d[1]  = msa_dotp_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2293 }
2294 
2295 
2296 static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2297 {
2298     int64_t even_arg1;
2299     int64_t even_arg2;
2300     int64_t odd_arg1;
2301     int64_t odd_arg2;
2302     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2303     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2304     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2305 }
2306 
2307 void helper_msa_dotp_u_h(CPUMIPSState *env,
2308                          uint32_t wd, uint32_t ws, uint32_t wt)
2309 {
2310     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2311     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2312     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2313 
2314     pwd->h[0]  = msa_dotp_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2315     pwd->h[1]  = msa_dotp_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2316     pwd->h[2]  = msa_dotp_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2317     pwd->h[3]  = msa_dotp_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2318     pwd->h[4]  = msa_dotp_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2319     pwd->h[5]  = msa_dotp_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2320     pwd->h[6]  = msa_dotp_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2321     pwd->h[7]  = msa_dotp_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2322 }
2323 
2324 void helper_msa_dotp_u_w(CPUMIPSState *env,
2325                          uint32_t wd, uint32_t ws, uint32_t wt)
2326 {
2327     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2328     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2329     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2330 
2331     pwd->w[0]  = msa_dotp_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2332     pwd->w[1]  = msa_dotp_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2333     pwd->w[2]  = msa_dotp_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2334     pwd->w[3]  = msa_dotp_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2335 }
2336 
2337 void helper_msa_dotp_u_d(CPUMIPSState *env,
2338                          uint32_t wd, uint32_t ws, uint32_t wt)
2339 {
2340     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2341     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2342     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2343 
2344     pwd->d[0]  = msa_dotp_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2345     pwd->d[1]  = msa_dotp_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2346 }
2347 
2348 
2349 static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
2350                                      int64_t arg2)
2351 {
2352     int64_t even_arg1;
2353     int64_t even_arg2;
2354     int64_t odd_arg1;
2355     int64_t odd_arg2;
2356     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2357     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2358     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2359 }
2360 
2361 void helper_msa_dpadd_s_h(CPUMIPSState *env,
2362                           uint32_t wd, uint32_t ws, uint32_t wt)
2363 {
2364     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2365     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2366     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2367 
2368     pwd->h[0]  = msa_dpadd_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2369     pwd->h[1]  = msa_dpadd_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2370     pwd->h[2]  = msa_dpadd_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2371     pwd->h[3]  = msa_dpadd_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2372     pwd->h[4]  = msa_dpadd_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2373     pwd->h[5]  = msa_dpadd_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2374     pwd->h[6]  = msa_dpadd_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2375     pwd->h[7]  = msa_dpadd_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2376 }
2377 
2378 void helper_msa_dpadd_s_w(CPUMIPSState *env,
2379                           uint32_t wd, uint32_t ws, uint32_t wt)
2380 {
2381     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2382     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2383     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2384 
2385     pwd->w[0]  = msa_dpadd_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2386     pwd->w[1]  = msa_dpadd_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2387     pwd->w[2]  = msa_dpadd_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2388     pwd->w[3]  = msa_dpadd_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2389 }
2390 
2391 void helper_msa_dpadd_s_d(CPUMIPSState *env,
2392                           uint32_t wd, uint32_t ws, uint32_t wt)
2393 {
2394     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2395     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2396     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2397 
2398     pwd->d[0]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2399     pwd->d[1]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2400 }
2401 
2402 
2403 static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
2404                                      int64_t arg2)
2405 {
2406     int64_t even_arg1;
2407     int64_t even_arg2;
2408     int64_t odd_arg1;
2409     int64_t odd_arg2;
2410     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2411     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2412     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2413 }
2414 
2415 void helper_msa_dpadd_u_h(CPUMIPSState *env,
2416                           uint32_t wd, uint32_t ws, uint32_t wt)
2417 {
2418     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2419     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2420     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2421 
2422     pwd->h[0]  = msa_dpadd_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2423     pwd->h[1]  = msa_dpadd_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2424     pwd->h[2]  = msa_dpadd_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2425     pwd->h[3]  = msa_dpadd_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2426     pwd->h[4]  = msa_dpadd_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2427     pwd->h[5]  = msa_dpadd_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2428     pwd->h[6]  = msa_dpadd_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2429     pwd->h[7]  = msa_dpadd_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2430 }
2431 
2432 void helper_msa_dpadd_u_w(CPUMIPSState *env,
2433                           uint32_t wd, uint32_t ws, uint32_t wt)
2434 {
2435     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2436     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2437     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2438 
2439     pwd->w[0]  = msa_dpadd_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2440     pwd->w[1]  = msa_dpadd_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2441     pwd->w[2]  = msa_dpadd_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2442     pwd->w[3]  = msa_dpadd_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2443 }
2444 
2445 void helper_msa_dpadd_u_d(CPUMIPSState *env,
2446                           uint32_t wd, uint32_t ws, uint32_t wt)
2447 {
2448     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2449     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2450     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2451 
2452     pwd->d[0]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2453     pwd->d[1]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2454 }
2455 
2456 
2457 static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
2458                                      int64_t arg2)
2459 {
2460     int64_t even_arg1;
2461     int64_t even_arg2;
2462     int64_t odd_arg1;
2463     int64_t odd_arg2;
2464     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2465     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2466     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2467 }
2468 
2469 void helper_msa_dpsub_s_h(CPUMIPSState *env,
2470                           uint32_t wd, uint32_t ws, uint32_t wt)
2471 {
2472     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2473     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2474     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2475 
2476     pwd->h[0]  = msa_dpsub_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2477     pwd->h[1]  = msa_dpsub_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2478     pwd->h[2]  = msa_dpsub_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2479     pwd->h[3]  = msa_dpsub_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2480     pwd->h[4]  = msa_dpsub_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2481     pwd->h[5]  = msa_dpsub_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2482     pwd->h[6]  = msa_dpsub_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2483     pwd->h[7]  = msa_dpsub_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2484 }
2485 
2486 void helper_msa_dpsub_s_w(CPUMIPSState *env,
2487                           uint32_t wd, uint32_t ws, uint32_t wt)
2488 {
2489     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2490     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2491     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2492 
2493     pwd->w[0]  = msa_dpsub_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2494     pwd->w[1]  = msa_dpsub_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2495     pwd->w[2]  = msa_dpsub_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2496     pwd->w[3]  = msa_dpsub_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2497 }
2498 
2499 void helper_msa_dpsub_s_d(CPUMIPSState *env,
2500                           uint32_t wd, uint32_t ws, uint32_t wt)
2501 {
2502     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2503     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2504     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2505 
2506     pwd->d[0]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2507     pwd->d[1]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2508 }
2509 
2510 
2511 static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
2512                                      int64_t arg2)
2513 {
2514     int64_t even_arg1;
2515     int64_t even_arg2;
2516     int64_t odd_arg1;
2517     int64_t odd_arg2;
2518     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2519     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2520     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2521 }
2522 
2523 void helper_msa_dpsub_u_h(CPUMIPSState *env,
2524                           uint32_t wd, uint32_t ws, uint32_t wt)
2525 {
2526     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2527     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2528     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2529 
2530     pwd->h[0]  = msa_dpsub_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2531     pwd->h[1]  = msa_dpsub_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2532     pwd->h[2]  = msa_dpsub_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2533     pwd->h[3]  = msa_dpsub_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2534     pwd->h[4]  = msa_dpsub_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2535     pwd->h[5]  = msa_dpsub_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2536     pwd->h[6]  = msa_dpsub_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2537     pwd->h[7]  = msa_dpsub_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2538 }
2539 
2540 void helper_msa_dpsub_u_w(CPUMIPSState *env,
2541                           uint32_t wd, uint32_t ws, uint32_t wt)
2542 {
2543     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2544     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2545     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2546 
2547     pwd->w[0]  = msa_dpsub_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2548     pwd->w[1]  = msa_dpsub_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2549     pwd->w[2]  = msa_dpsub_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2550     pwd->w[3]  = msa_dpsub_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2551 }
2552 
2553 void helper_msa_dpsub_u_d(CPUMIPSState *env,
2554                           uint32_t wd, uint32_t ws, uint32_t wt)
2555 {
2556     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2557     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2558     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2559 
2560     pwd->d[0]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2561     pwd->d[1]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2562 }
2563 
2564 
2565 /*
2566  * Int Max Min
2567  * -----------
2568  *
2569  * +---------------+----------------------------------------------------------+
2570  * | MAX_A.B       | Vector Maximum Based on Absolute Value (byte)            |
2571  * | MAX_A.H       | Vector Maximum Based on Absolute Value (halfword)        |
2572  * | MAX_A.W       | Vector Maximum Based on Absolute Value (word)            |
2573  * | MAX_A.D       | Vector Maximum Based on Absolute Value (doubleword)      |
2574  * | MAX_S.B       | Vector Signed Maximum (byte)                             |
2575  * | MAX_S.H       | Vector Signed Maximum (halfword)                         |
2576  * | MAX_S.W       | Vector Signed Maximum (word)                             |
2577  * | MAX_S.D       | Vector Signed Maximum (doubleword)                       |
2578  * | MAX_U.B       | Vector Unsigned Maximum (byte)                           |
2579  * | MAX_U.H       | Vector Unsigned Maximum (halfword)                       |
2580  * | MAX_U.W       | Vector Unsigned Maximum (word)                           |
2581  * | MAX_U.D       | Vector Unsigned Maximum (doubleword)                     |
2582  * | MIN_A.B       | Vector Minimum Based on Absolute Value (byte)            |
2583  * | MIN_A.H       | Vector Minimum Based on Absolute Value (halfword)        |
2584  * | MIN_A.W       | Vector Minimum Based on Absolute Value (word)            |
2585  * | MIN_A.D       | Vector Minimum Based on Absolute Value (doubleword)      |
2586  * | MIN_S.B       | Vector Signed Minimum (byte)                             |
2587  * | MIN_S.H       | Vector Signed Minimum (halfword)                         |
2588  * | MIN_S.W       | Vector Signed Minimum (word)                             |
2589  * | MIN_S.D       | Vector Signed Minimum (doubleword)                       |
2590  * | MIN_U.B       | Vector Unsigned Minimum (byte)                           |
2591  * | MIN_U.H       | Vector Unsigned Minimum (halfword)                       |
2592  * | MIN_U.W       | Vector Unsigned Minimum (word)                           |
2593  * | MIN_U.D       | Vector Unsigned Minimum (doubleword)                     |
2594  * +---------------+----------------------------------------------------------+
2595  */
2596 
2597 static inline int64_t msa_max_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2598 {
2599     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2600     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2601     return abs_arg1 > abs_arg2 ? arg1 : arg2;
2602 }
2603 
2604 void helper_msa_max_a_b(CPUMIPSState *env,
2605                         uint32_t wd, uint32_t ws, uint32_t wt)
2606 {
2607     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2608     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2609     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2610 
2611     pwd->b[0]  = msa_max_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2612     pwd->b[1]  = msa_max_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2613     pwd->b[2]  = msa_max_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2614     pwd->b[3]  = msa_max_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2615     pwd->b[4]  = msa_max_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2616     pwd->b[5]  = msa_max_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2617     pwd->b[6]  = msa_max_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2618     pwd->b[7]  = msa_max_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2619     pwd->b[8]  = msa_max_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2620     pwd->b[9]  = msa_max_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2621     pwd->b[10] = msa_max_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2622     pwd->b[11] = msa_max_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2623     pwd->b[12] = msa_max_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2624     pwd->b[13] = msa_max_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2625     pwd->b[14] = msa_max_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2626     pwd->b[15] = msa_max_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2627 }
2628 
2629 void helper_msa_max_a_h(CPUMIPSState *env,
2630                         uint32_t wd, uint32_t ws, uint32_t wt)
2631 {
2632     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2633     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2634     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2635 
2636     pwd->h[0]  = msa_max_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2637     pwd->h[1]  = msa_max_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2638     pwd->h[2]  = msa_max_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2639     pwd->h[3]  = msa_max_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2640     pwd->h[4]  = msa_max_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2641     pwd->h[5]  = msa_max_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2642     pwd->h[6]  = msa_max_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2643     pwd->h[7]  = msa_max_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2644 }
2645 
2646 void helper_msa_max_a_w(CPUMIPSState *env,
2647                         uint32_t wd, uint32_t ws, uint32_t wt)
2648 {
2649     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2650     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2651     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2652 
2653     pwd->w[0]  = msa_max_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2654     pwd->w[1]  = msa_max_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2655     pwd->w[2]  = msa_max_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2656     pwd->w[3]  = msa_max_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2657 }
2658 
2659 void helper_msa_max_a_d(CPUMIPSState *env,
2660                         uint32_t wd, uint32_t ws, uint32_t wt)
2661 {
2662     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2663     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2664     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2665 
2666     pwd->d[0]  = msa_max_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2667     pwd->d[1]  = msa_max_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2668 }
2669 
2670 
2671 static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2672 {
2673     return arg1 > arg2 ? arg1 : arg2;
2674 }
2675 
2676 void helper_msa_max_s_b(CPUMIPSState *env,
2677                         uint32_t wd, uint32_t ws, uint32_t wt)
2678 {
2679     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2680     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2681     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2682 
2683     pwd->b[0]  = msa_max_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2684     pwd->b[1]  = msa_max_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2685     pwd->b[2]  = msa_max_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2686     pwd->b[3]  = msa_max_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2687     pwd->b[4]  = msa_max_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2688     pwd->b[5]  = msa_max_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2689     pwd->b[6]  = msa_max_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2690     pwd->b[7]  = msa_max_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2691     pwd->b[8]  = msa_max_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2692     pwd->b[9]  = msa_max_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2693     pwd->b[10] = msa_max_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2694     pwd->b[11] = msa_max_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2695     pwd->b[12] = msa_max_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2696     pwd->b[13] = msa_max_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2697     pwd->b[14] = msa_max_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2698     pwd->b[15] = msa_max_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2699 }
2700 
2701 void helper_msa_max_s_h(CPUMIPSState *env,
2702                         uint32_t wd, uint32_t ws, uint32_t wt)
2703 {
2704     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2705     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2706     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2707 
2708     pwd->h[0]  = msa_max_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2709     pwd->h[1]  = msa_max_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2710     pwd->h[2]  = msa_max_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2711     pwd->h[3]  = msa_max_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2712     pwd->h[4]  = msa_max_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2713     pwd->h[5]  = msa_max_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2714     pwd->h[6]  = msa_max_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2715     pwd->h[7]  = msa_max_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2716 }
2717 
2718 void helper_msa_max_s_w(CPUMIPSState *env,
2719                         uint32_t wd, uint32_t ws, uint32_t wt)
2720 {
2721     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2722     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2723     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2724 
2725     pwd->w[0]  = msa_max_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2726     pwd->w[1]  = msa_max_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2727     pwd->w[2]  = msa_max_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2728     pwd->w[3]  = msa_max_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2729 }
2730 
2731 void helper_msa_max_s_d(CPUMIPSState *env,
2732                         uint32_t wd, uint32_t ws, uint32_t wt)
2733 {
2734     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2735     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2736     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2737 
2738     pwd->d[0]  = msa_max_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2739     pwd->d[1]  = msa_max_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2740 }
2741 
2742 
2743 static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2744 {
2745     uint64_t u_arg1 = UNSIGNED(arg1, df);
2746     uint64_t u_arg2 = UNSIGNED(arg2, df);
2747     return u_arg1 > u_arg2 ? arg1 : arg2;
2748 }
2749 
2750 void helper_msa_max_u_b(CPUMIPSState *env,
2751                         uint32_t wd, uint32_t ws, uint32_t wt)
2752 {
2753     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2754     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2755     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2756 
2757     pwd->b[0]  = msa_max_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2758     pwd->b[1]  = msa_max_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2759     pwd->b[2]  = msa_max_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2760     pwd->b[3]  = msa_max_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2761     pwd->b[4]  = msa_max_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2762     pwd->b[5]  = msa_max_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2763     pwd->b[6]  = msa_max_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2764     pwd->b[7]  = msa_max_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2765     pwd->b[8]  = msa_max_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2766     pwd->b[9]  = msa_max_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2767     pwd->b[10] = msa_max_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2768     pwd->b[11] = msa_max_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2769     pwd->b[12] = msa_max_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2770     pwd->b[13] = msa_max_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2771     pwd->b[14] = msa_max_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2772     pwd->b[15] = msa_max_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2773 }
2774 
2775 void helper_msa_max_u_h(CPUMIPSState *env,
2776                         uint32_t wd, uint32_t ws, uint32_t wt)
2777 {
2778     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2779     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2780     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2781 
2782     pwd->h[0]  = msa_max_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2783     pwd->h[1]  = msa_max_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2784     pwd->h[2]  = msa_max_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2785     pwd->h[3]  = msa_max_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2786     pwd->h[4]  = msa_max_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2787     pwd->h[5]  = msa_max_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2788     pwd->h[6]  = msa_max_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2789     pwd->h[7]  = msa_max_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2790 }
2791 
2792 void helper_msa_max_u_w(CPUMIPSState *env,
2793                         uint32_t wd, uint32_t ws, uint32_t wt)
2794 {
2795     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2796     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2797     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2798 
2799     pwd->w[0]  = msa_max_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2800     pwd->w[1]  = msa_max_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2801     pwd->w[2]  = msa_max_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2802     pwd->w[3]  = msa_max_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2803 }
2804 
2805 void helper_msa_max_u_d(CPUMIPSState *env,
2806                         uint32_t wd, uint32_t ws, uint32_t wt)
2807 {
2808     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2809     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2810     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2811 
2812     pwd->d[0]  = msa_max_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2813     pwd->d[1]  = msa_max_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2814 }
2815 
2816 
2817 static inline int64_t msa_min_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2818 {
2819     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2820     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2821     return abs_arg1 < abs_arg2 ? arg1 : arg2;
2822 }
2823 
2824 void helper_msa_min_a_b(CPUMIPSState *env,
2825                         uint32_t wd, uint32_t ws, uint32_t wt)
2826 {
2827     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2828     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2829     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2830 
2831     pwd->b[0]  = msa_min_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2832     pwd->b[1]  = msa_min_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2833     pwd->b[2]  = msa_min_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2834     pwd->b[3]  = msa_min_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2835     pwd->b[4]  = msa_min_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2836     pwd->b[5]  = msa_min_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2837     pwd->b[6]  = msa_min_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2838     pwd->b[7]  = msa_min_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2839     pwd->b[8]  = msa_min_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2840     pwd->b[9]  = msa_min_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2841     pwd->b[10] = msa_min_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2842     pwd->b[11] = msa_min_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2843     pwd->b[12] = msa_min_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2844     pwd->b[13] = msa_min_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2845     pwd->b[14] = msa_min_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2846     pwd->b[15] = msa_min_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2847 }
2848 
2849 void helper_msa_min_a_h(CPUMIPSState *env,
2850                         uint32_t wd, uint32_t ws, uint32_t wt)
2851 {
2852     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2853     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2854     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2855 
2856     pwd->h[0]  = msa_min_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2857     pwd->h[1]  = msa_min_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2858     pwd->h[2]  = msa_min_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2859     pwd->h[3]  = msa_min_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2860     pwd->h[4]  = msa_min_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2861     pwd->h[5]  = msa_min_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2862     pwd->h[6]  = msa_min_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2863     pwd->h[7]  = msa_min_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2864 }
2865 
2866 void helper_msa_min_a_w(CPUMIPSState *env,
2867                         uint32_t wd, uint32_t ws, uint32_t wt)
2868 {
2869     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2870     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2871     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2872 
2873     pwd->w[0]  = msa_min_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2874     pwd->w[1]  = msa_min_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2875     pwd->w[2]  = msa_min_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2876     pwd->w[3]  = msa_min_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2877 }
2878 
2879 void helper_msa_min_a_d(CPUMIPSState *env,
2880                         uint32_t wd, uint32_t ws, uint32_t wt)
2881 {
2882     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2883     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2884     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2885 
2886     pwd->d[0]  = msa_min_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2887     pwd->d[1]  = msa_min_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2888 }
2889 
2890 
2891 static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2892 {
2893     return arg1 < arg2 ? arg1 : arg2;
2894 }
2895 
2896 void helper_msa_min_s_b(CPUMIPSState *env,
2897                         uint32_t wd, uint32_t ws, uint32_t wt)
2898 {
2899     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2900     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2901     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2902 
2903     pwd->b[0]  = msa_min_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2904     pwd->b[1]  = msa_min_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2905     pwd->b[2]  = msa_min_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2906     pwd->b[3]  = msa_min_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2907     pwd->b[4]  = msa_min_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2908     pwd->b[5]  = msa_min_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2909     pwd->b[6]  = msa_min_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2910     pwd->b[7]  = msa_min_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2911     pwd->b[8]  = msa_min_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2912     pwd->b[9]  = msa_min_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2913     pwd->b[10] = msa_min_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2914     pwd->b[11] = msa_min_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2915     pwd->b[12] = msa_min_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2916     pwd->b[13] = msa_min_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2917     pwd->b[14] = msa_min_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2918     pwd->b[15] = msa_min_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2919 }
2920 
2921 void helper_msa_min_s_h(CPUMIPSState *env,
2922                         uint32_t wd, uint32_t ws, uint32_t wt)
2923 {
2924     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2925     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2926     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2927 
2928     pwd->h[0]  = msa_min_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2929     pwd->h[1]  = msa_min_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2930     pwd->h[2]  = msa_min_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2931     pwd->h[3]  = msa_min_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2932     pwd->h[4]  = msa_min_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2933     pwd->h[5]  = msa_min_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2934     pwd->h[6]  = msa_min_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2935     pwd->h[7]  = msa_min_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2936 }
2937 
2938 void helper_msa_min_s_w(CPUMIPSState *env,
2939                         uint32_t wd, uint32_t ws, uint32_t wt)
2940 {
2941     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2942     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2943     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2944 
2945     pwd->w[0]  = msa_min_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2946     pwd->w[1]  = msa_min_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2947     pwd->w[2]  = msa_min_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2948     pwd->w[3]  = msa_min_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2949 }
2950 
2951 void helper_msa_min_s_d(CPUMIPSState *env,
2952                         uint32_t wd, uint32_t ws, uint32_t wt)
2953 {
2954     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2955     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2956     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2957 
2958     pwd->d[0]  = msa_min_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2959     pwd->d[1]  = msa_min_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2960 }
2961 
2962 
2963 static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2964 {
2965     uint64_t u_arg1 = UNSIGNED(arg1, df);
2966     uint64_t u_arg2 = UNSIGNED(arg2, df);
2967     return u_arg1 < u_arg2 ? arg1 : arg2;
2968 }
2969 
2970 void helper_msa_min_u_b(CPUMIPSState *env,
2971                         uint32_t wd, uint32_t ws, uint32_t wt)
2972 {
2973     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2974     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2975     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2976 
2977     pwd->b[0]  = msa_min_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2978     pwd->b[1]  = msa_min_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2979     pwd->b[2]  = msa_min_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2980     pwd->b[3]  = msa_min_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2981     pwd->b[4]  = msa_min_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2982     pwd->b[5]  = msa_min_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2983     pwd->b[6]  = msa_min_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2984     pwd->b[7]  = msa_min_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2985     pwd->b[8]  = msa_min_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2986     pwd->b[9]  = msa_min_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2987     pwd->b[10] = msa_min_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2988     pwd->b[11] = msa_min_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2989     pwd->b[12] = msa_min_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2990     pwd->b[13] = msa_min_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2991     pwd->b[14] = msa_min_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2992     pwd->b[15] = msa_min_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2993 }
2994 
2995 void helper_msa_min_u_h(CPUMIPSState *env,
2996                         uint32_t wd, uint32_t ws, uint32_t wt)
2997 {
2998     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2999     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3000     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3001 
3002     pwd->h[0]  = msa_min_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3003     pwd->h[1]  = msa_min_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3004     pwd->h[2]  = msa_min_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3005     pwd->h[3]  = msa_min_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3006     pwd->h[4]  = msa_min_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3007     pwd->h[5]  = msa_min_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3008     pwd->h[6]  = msa_min_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3009     pwd->h[7]  = msa_min_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3010 }
3011 
3012 void helper_msa_min_u_w(CPUMIPSState *env,
3013                         uint32_t wd, uint32_t ws, uint32_t wt)
3014 {
3015     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3016     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3017     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3018 
3019     pwd->w[0]  = msa_min_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3020     pwd->w[1]  = msa_min_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3021     pwd->w[2]  = msa_min_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3022     pwd->w[3]  = msa_min_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3023 }
3024 
3025 void helper_msa_min_u_d(CPUMIPSState *env,
3026                         uint32_t wd, uint32_t ws, uint32_t wt)
3027 {
3028     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3029     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3030     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3031 
3032     pwd->d[0]  = msa_min_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3033     pwd->d[1]  = msa_min_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3034 }
3035 
3036 
3037 /*
3038  * Int Modulo
3039  * ----------
3040  *
3041  * +---------------+----------------------------------------------------------+
3042  * | MOD_S.B       | Vector Signed Modulo (byte)                              |
3043  * | MOD_S.H       | Vector Signed Modulo (halfword)                          |
3044  * | MOD_S.W       | Vector Signed Modulo (word)                              |
3045  * | MOD_S.D       | Vector Signed Modulo (doubleword)                        |
3046  * | MOD_U.B       | Vector Unsigned Modulo (byte)                            |
3047  * | MOD_U.H       | Vector Unsigned Modulo (halfword)                        |
3048  * | MOD_U.W       | Vector Unsigned Modulo (word)                            |
3049  * | MOD_U.D       | Vector Unsigned Modulo (doubleword)                      |
3050  * +---------------+----------------------------------------------------------+
3051  */
3052 
3053 static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3054 {
3055     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
3056         return 0;
3057     }
3058     return arg2 ? arg1 % arg2 : arg1;
3059 }
3060 
3061 void helper_msa_mod_s_b(CPUMIPSState *env,
3062                         uint32_t wd, uint32_t ws, uint32_t wt)
3063 {
3064     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3065     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3066     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3067 
3068     pwd->b[0]  = msa_mod_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3069     pwd->b[1]  = msa_mod_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3070     pwd->b[2]  = msa_mod_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3071     pwd->b[3]  = msa_mod_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3072     pwd->b[4]  = msa_mod_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3073     pwd->b[5]  = msa_mod_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3074     pwd->b[6]  = msa_mod_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3075     pwd->b[7]  = msa_mod_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3076     pwd->b[8]  = msa_mod_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3077     pwd->b[9]  = msa_mod_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3078     pwd->b[10] = msa_mod_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3079     pwd->b[11] = msa_mod_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3080     pwd->b[12] = msa_mod_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3081     pwd->b[13] = msa_mod_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3082     pwd->b[14] = msa_mod_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3083     pwd->b[15] = msa_mod_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3084 }
3085 
3086 void helper_msa_mod_s_h(CPUMIPSState *env,
3087                         uint32_t wd, uint32_t ws, uint32_t wt)
3088 {
3089     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3090     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3091     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3092 
3093     pwd->h[0]  = msa_mod_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3094     pwd->h[1]  = msa_mod_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3095     pwd->h[2]  = msa_mod_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3096     pwd->h[3]  = msa_mod_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3097     pwd->h[4]  = msa_mod_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3098     pwd->h[5]  = msa_mod_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3099     pwd->h[6]  = msa_mod_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3100     pwd->h[7]  = msa_mod_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3101 }
3102 
3103 void helper_msa_mod_s_w(CPUMIPSState *env,
3104                         uint32_t wd, uint32_t ws, uint32_t wt)
3105 {
3106     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3107     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3108     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3109 
3110     pwd->w[0]  = msa_mod_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3111     pwd->w[1]  = msa_mod_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3112     pwd->w[2]  = msa_mod_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3113     pwd->w[3]  = msa_mod_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3114 }
3115 
3116 void helper_msa_mod_s_d(CPUMIPSState *env,
3117                         uint32_t wd, uint32_t ws, uint32_t wt)
3118 {
3119     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3120     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3121     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3122 
3123     pwd->d[0]  = msa_mod_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3124     pwd->d[1]  = msa_mod_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3125 }
3126 
3127 static inline int64_t msa_mod_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3128 {
3129     uint64_t u_arg1 = UNSIGNED(arg1, df);
3130     uint64_t u_arg2 = UNSIGNED(arg2, df);
3131     return u_arg2 ? u_arg1 % u_arg2 : u_arg1;
3132 }
3133 
3134 void helper_msa_mod_u_b(CPUMIPSState *env,
3135                         uint32_t wd, uint32_t ws, uint32_t wt)
3136 {
3137     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3138     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3139     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3140 
3141     pwd->b[0]  = msa_mod_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3142     pwd->b[1]  = msa_mod_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3143     pwd->b[2]  = msa_mod_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3144     pwd->b[3]  = msa_mod_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3145     pwd->b[4]  = msa_mod_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3146     pwd->b[5]  = msa_mod_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3147     pwd->b[6]  = msa_mod_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3148     pwd->b[7]  = msa_mod_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3149     pwd->b[8]  = msa_mod_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3150     pwd->b[9]  = msa_mod_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3151     pwd->b[10] = msa_mod_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3152     pwd->b[11] = msa_mod_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3153     pwd->b[12] = msa_mod_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3154     pwd->b[13] = msa_mod_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3155     pwd->b[14] = msa_mod_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3156     pwd->b[15] = msa_mod_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3157 }
3158 
3159 void helper_msa_mod_u_h(CPUMIPSState *env,
3160                         uint32_t wd, uint32_t ws, uint32_t wt)
3161 {
3162     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3163     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3164     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3165 
3166     pwd->h[0]  = msa_mod_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3167     pwd->h[1]  = msa_mod_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3168     pwd->h[2]  = msa_mod_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3169     pwd->h[3]  = msa_mod_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3170     pwd->h[4]  = msa_mod_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3171     pwd->h[5]  = msa_mod_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3172     pwd->h[6]  = msa_mod_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3173     pwd->h[7]  = msa_mod_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3174 }
3175 
3176 void helper_msa_mod_u_w(CPUMIPSState *env,
3177                         uint32_t wd, uint32_t ws, uint32_t wt)
3178 {
3179     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3180     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3181     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3182 
3183     pwd->w[0]  = msa_mod_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3184     pwd->w[1]  = msa_mod_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3185     pwd->w[2]  = msa_mod_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3186     pwd->w[3]  = msa_mod_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3187 }
3188 
3189 void helper_msa_mod_u_d(CPUMIPSState *env,
3190                         uint32_t wd, uint32_t ws, uint32_t wt)
3191 {
3192     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3193     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3194     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3195 
3196     pwd->d[0]  = msa_mod_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3197     pwd->d[1]  = msa_mod_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3198 }
3199 
3200 
3201 /*
3202  * Int Multiply
3203  * ------------
3204  *
3205  * +---------------+----------------------------------------------------------+
3206  * | MADDV.B       | Vector Multiply and Add (byte)                           |
3207  * | MADDV.H       | Vector Multiply and Add (halfword)                       |
3208  * | MADDV.W       | Vector Multiply and Add (word)                           |
3209  * | MADDV.D       | Vector Multiply and Add (doubleword)                     |
3210  * | MSUBV.B       | Vector Multiply and Subtract (byte)                      |
3211  * | MSUBV.H       | Vector Multiply and Subtract (halfword)                  |
3212  * | MSUBV.W       | Vector Multiply and Subtract (word)                      |
3213  * | MSUBV.D       | Vector Multiply and Subtract (doubleword)                |
3214  * | MULV.B        | Vector Multiply (byte)                                   |
3215  * | MULV.H        | Vector Multiply (halfword)                               |
3216  * | MULV.W        | Vector Multiply (word)                                   |
3217  * | MULV.D        | Vector Multiply (doubleword)                             |
3218  * +---------------+----------------------------------------------------------+
3219  */
3220 
3221 static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
3222                                    int64_t arg2)
3223 {
3224     return dest + arg1 * arg2;
3225 }
3226 
3227 void helper_msa_maddv_b(CPUMIPSState *env,
3228                         uint32_t wd, uint32_t ws, uint32_t wt)
3229 {
3230     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3231     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3232     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3233 
3234     pwd->b[0]  = msa_maddv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3235     pwd->b[1]  = msa_maddv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3236     pwd->b[2]  = msa_maddv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3237     pwd->b[3]  = msa_maddv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3238     pwd->b[4]  = msa_maddv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3239     pwd->b[5]  = msa_maddv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3240     pwd->b[6]  = msa_maddv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3241     pwd->b[7]  = msa_maddv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3242     pwd->b[8]  = msa_maddv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3243     pwd->b[9]  = msa_maddv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3244     pwd->b[10] = msa_maddv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3245     pwd->b[11] = msa_maddv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3246     pwd->b[12] = msa_maddv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3247     pwd->b[13] = msa_maddv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3248     pwd->b[14] = msa_maddv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3249     pwd->b[15] = msa_maddv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3250 }
3251 
3252 void helper_msa_maddv_h(CPUMIPSState *env,
3253                         uint32_t wd, uint32_t ws, uint32_t wt)
3254 {
3255     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3256     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3257     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3258 
3259     pwd->h[0]  = msa_maddv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3260     pwd->h[1]  = msa_maddv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3261     pwd->h[2]  = msa_maddv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3262     pwd->h[3]  = msa_maddv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3263     pwd->h[4]  = msa_maddv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3264     pwd->h[5]  = msa_maddv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3265     pwd->h[6]  = msa_maddv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3266     pwd->h[7]  = msa_maddv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3267 }
3268 
3269 void helper_msa_maddv_w(CPUMIPSState *env,
3270                         uint32_t wd, uint32_t ws, uint32_t wt)
3271 {
3272     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3273     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3274     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3275 
3276     pwd->w[0]  = msa_maddv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3277     pwd->w[1]  = msa_maddv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3278     pwd->w[2]  = msa_maddv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3279     pwd->w[3]  = msa_maddv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3280 }
3281 
3282 void helper_msa_maddv_d(CPUMIPSState *env,
3283                         uint32_t wd, uint32_t ws, uint32_t wt)
3284 {
3285     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3286     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3287     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3288 
3289     pwd->d[0]  = msa_maddv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3290     pwd->d[1]  = msa_maddv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3291 }
3292 
3293 static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
3294                                    int64_t arg2)
3295 {
3296     return dest - arg1 * arg2;
3297 }
3298 
3299 void helper_msa_msubv_b(CPUMIPSState *env,
3300                         uint32_t wd, uint32_t ws, uint32_t wt)
3301 {
3302     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3303     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3304     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3305 
3306     pwd->b[0]  = msa_msubv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3307     pwd->b[1]  = msa_msubv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3308     pwd->b[2]  = msa_msubv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3309     pwd->b[3]  = msa_msubv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3310     pwd->b[4]  = msa_msubv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3311     pwd->b[5]  = msa_msubv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3312     pwd->b[6]  = msa_msubv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3313     pwd->b[7]  = msa_msubv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3314     pwd->b[8]  = msa_msubv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3315     pwd->b[9]  = msa_msubv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3316     pwd->b[10] = msa_msubv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3317     pwd->b[11] = msa_msubv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3318     pwd->b[12] = msa_msubv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3319     pwd->b[13] = msa_msubv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3320     pwd->b[14] = msa_msubv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3321     pwd->b[15] = msa_msubv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3322 }
3323 
3324 void helper_msa_msubv_h(CPUMIPSState *env,
3325                         uint32_t wd, uint32_t ws, uint32_t wt)
3326 {
3327     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3328     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3329     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3330 
3331     pwd->h[0]  = msa_msubv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3332     pwd->h[1]  = msa_msubv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3333     pwd->h[2]  = msa_msubv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3334     pwd->h[3]  = msa_msubv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3335     pwd->h[4]  = msa_msubv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3336     pwd->h[5]  = msa_msubv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3337     pwd->h[6]  = msa_msubv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3338     pwd->h[7]  = msa_msubv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3339 }
3340 
3341 void helper_msa_msubv_w(CPUMIPSState *env,
3342                         uint32_t wd, uint32_t ws, uint32_t wt)
3343 {
3344     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3345     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3346     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3347 
3348     pwd->w[0]  = msa_msubv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3349     pwd->w[1]  = msa_msubv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3350     pwd->w[2]  = msa_msubv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3351     pwd->w[3]  = msa_msubv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3352 }
3353 
3354 void helper_msa_msubv_d(CPUMIPSState *env,
3355                         uint32_t wd, uint32_t ws, uint32_t wt)
3356 {
3357     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3358     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3359     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3360 
3361     pwd->d[0]  = msa_msubv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3362     pwd->d[1]  = msa_msubv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3363 }
3364 
3365 
3366 static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
3367 {
3368     return arg1 * arg2;
3369 }
3370 
3371 void helper_msa_mulv_b(CPUMIPSState *env,
3372                        uint32_t wd, uint32_t ws, uint32_t wt)
3373 {
3374     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3375     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3376     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3377 
3378     pwd->b[0]  = msa_mulv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3379     pwd->b[1]  = msa_mulv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3380     pwd->b[2]  = msa_mulv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3381     pwd->b[3]  = msa_mulv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3382     pwd->b[4]  = msa_mulv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3383     pwd->b[5]  = msa_mulv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3384     pwd->b[6]  = msa_mulv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3385     pwd->b[7]  = msa_mulv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3386     pwd->b[8]  = msa_mulv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3387     pwd->b[9]  = msa_mulv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3388     pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]);
3389     pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]);
3390     pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]);
3391     pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]);
3392     pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]);
3393     pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]);
3394 }
3395 
3396 void helper_msa_mulv_h(CPUMIPSState *env,
3397                        uint32_t wd, uint32_t ws, uint32_t wt)
3398 {
3399     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3400     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3401     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3402 
3403     pwd->h[0]  = msa_mulv_df(DF_HALF, pws->h[0],  pwt->h[0]);
3404     pwd->h[1]  = msa_mulv_df(DF_HALF, pws->h[1],  pwt->h[1]);
3405     pwd->h[2]  = msa_mulv_df(DF_HALF, pws->h[2],  pwt->h[2]);
3406     pwd->h[3]  = msa_mulv_df(DF_HALF, pws->h[3],  pwt->h[3]);
3407     pwd->h[4]  = msa_mulv_df(DF_HALF, pws->h[4],  pwt->h[4]);
3408     pwd->h[5]  = msa_mulv_df(DF_HALF, pws->h[5],  pwt->h[5]);
3409     pwd->h[6]  = msa_mulv_df(DF_HALF, pws->h[6],  pwt->h[6]);
3410     pwd->h[7]  = msa_mulv_df(DF_HALF, pws->h[7],  pwt->h[7]);
3411 }
3412 
3413 void helper_msa_mulv_w(CPUMIPSState *env,
3414                        uint32_t wd, uint32_t ws, uint32_t wt)
3415 {
3416     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3417     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3418     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3419 
3420     pwd->w[0]  = msa_mulv_df(DF_WORD, pws->w[0],  pwt->w[0]);
3421     pwd->w[1]  = msa_mulv_df(DF_WORD, pws->w[1],  pwt->w[1]);
3422     pwd->w[2]  = msa_mulv_df(DF_WORD, pws->w[2],  pwt->w[2]);
3423     pwd->w[3]  = msa_mulv_df(DF_WORD, pws->w[3],  pwt->w[3]);
3424 }
3425 
3426 void helper_msa_mulv_d(CPUMIPSState *env,
3427                        uint32_t wd, uint32_t ws, uint32_t wt)
3428 {
3429     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3430     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3431     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3432 
3433     pwd->d[0]  = msa_mulv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3434     pwd->d[1]  = msa_mulv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3435 }
3436 
3437 
3438 /*
3439  * Int Subtract
3440  * ------------
3441  *
3442  * +---------------+----------------------------------------------------------+
3443  * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
3444  * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
3445  * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
3446  * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
3447  * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
3448  * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
3449  * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
3450  * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
3451  * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
3452  * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
3453  * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
3454  * | HSUB_U.H      | Vector Unigned Horizontal Subtract (halfword)            |
3455  * | HSUB_U.W      | Vector Unigned Horizontal Subtract (word)                |
3456  * | HSUB_U.D      | Vector Unigned Horizontal Subtract (doubleword)          |
3457  * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
3458  * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
3459  * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
3460  * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
3461  * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
3462  * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
3463  * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
3464  * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
3465  * | SUBSUS_U.B    | Vector Uns. Sat. Subtract (of S. from Uns.) (byte)       |
3466  * | SUBSUS_U.H    | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword)   |
3467  * | SUBSUS_U.W    | Vector Uns. Sat. Subtract (of S. from Uns.) (word)       |
3468  * | SUBSUS_U.D    | Vector Uns. Sat. Subtract (of S. from Uns.) (doubleword) |
3469  * | SUBSUU_S.B    | Vector Signed Saturated Subtract (of Uns.) (byte)        |
3470  * | SUBSUU_S.H    | Vector Signed Saturated Subtract (of Uns.) (halfword)    |
3471  * | SUBSUU_S.W    | Vector Signed Saturated Subtract (of Uns.) (word)        |
3472  * | SUBSUU_S.D    | Vector Signed Saturated Subtract (of Uns.) (doubleword)  |
3473  * | SUBV.B        | Vector Subtract (byte)                                   |
3474  * | SUBV.H        | Vector Subtract (halfword)                               |
3475  * | SUBV.W        | Vector Subtract (word)                                   |
3476  * | SUBV.D        | Vector Subtract (doubleword)                             |
3477  * +---------------+----------------------------------------------------------+
3478  */
3479 
3480 
3481 static inline int64_t msa_asub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3482 {
3483     /* signed compare */
3484     return (arg1 < arg2) ?
3485         (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
3486 }
3487 
3488 void helper_msa_asub_s_b(CPUMIPSState *env,
3489                          uint32_t wd, uint32_t ws, uint32_t wt)
3490 {
3491     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3492     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3493     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3494 
3495     pwd->b[0]  = msa_asub_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3496     pwd->b[1]  = msa_asub_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3497     pwd->b[2]  = msa_asub_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3498     pwd->b[3]  = msa_asub_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3499     pwd->b[4]  = msa_asub_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3500     pwd->b[5]  = msa_asub_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3501     pwd->b[6]  = msa_asub_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3502     pwd->b[7]  = msa_asub_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3503     pwd->b[8]  = msa_asub_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3504     pwd->b[9]  = msa_asub_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3505     pwd->b[10] = msa_asub_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3506     pwd->b[11] = msa_asub_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3507     pwd->b[12] = msa_asub_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3508     pwd->b[13] = msa_asub_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3509     pwd->b[14] = msa_asub_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3510     pwd->b[15] = msa_asub_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3511 }
3512 
3513 void helper_msa_asub_s_h(CPUMIPSState *env,
3514                          uint32_t wd, uint32_t ws, uint32_t wt)
3515 {
3516     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3517     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3518     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3519 
3520     pwd->h[0]  = msa_asub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3521     pwd->h[1]  = msa_asub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3522     pwd->h[2]  = msa_asub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3523     pwd->h[3]  = msa_asub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3524     pwd->h[4]  = msa_asub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3525     pwd->h[5]  = msa_asub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3526     pwd->h[6]  = msa_asub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3527     pwd->h[7]  = msa_asub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3528 }
3529 
3530 void helper_msa_asub_s_w(CPUMIPSState *env,
3531                          uint32_t wd, uint32_t ws, uint32_t wt)
3532 {
3533     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3534     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3535     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3536 
3537     pwd->w[0]  = msa_asub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3538     pwd->w[1]  = msa_asub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3539     pwd->w[2]  = msa_asub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3540     pwd->w[3]  = msa_asub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3541 }
3542 
3543 void helper_msa_asub_s_d(CPUMIPSState *env,
3544                          uint32_t wd, uint32_t ws, uint32_t wt)
3545 {
3546     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3547     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3548     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3549 
3550     pwd->d[0]  = msa_asub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3551     pwd->d[1]  = msa_asub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3552 }
3553 
3554 
3555 static inline uint64_t msa_asub_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
3556 {
3557     uint64_t u_arg1 = UNSIGNED(arg1, df);
3558     uint64_t u_arg2 = UNSIGNED(arg2, df);
3559     /* unsigned compare */
3560     return (u_arg1 < u_arg2) ?
3561         (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
3562 }
3563 
3564 void helper_msa_asub_u_b(CPUMIPSState *env,
3565                          uint32_t wd, uint32_t ws, uint32_t wt)
3566 {
3567     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3568     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3569     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3570 
3571     pwd->b[0]  = msa_asub_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3572     pwd->b[1]  = msa_asub_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3573     pwd->b[2]  = msa_asub_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3574     pwd->b[3]  = msa_asub_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3575     pwd->b[4]  = msa_asub_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3576     pwd->b[5]  = msa_asub_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3577     pwd->b[6]  = msa_asub_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3578     pwd->b[7]  = msa_asub_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3579     pwd->b[8]  = msa_asub_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3580     pwd->b[9]  = msa_asub_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3581     pwd->b[10] = msa_asub_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3582     pwd->b[11] = msa_asub_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3583     pwd->b[12] = msa_asub_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3584     pwd->b[13] = msa_asub_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3585     pwd->b[14] = msa_asub_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3586     pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3587 }
3588 
3589 void helper_msa_asub_u_h(CPUMIPSState *env,
3590                          uint32_t wd, uint32_t ws, uint32_t wt)
3591 {
3592     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3593     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3594     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3595 
3596     pwd->h[0]  = msa_asub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3597     pwd->h[1]  = msa_asub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3598     pwd->h[2]  = msa_asub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3599     pwd->h[3]  = msa_asub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3600     pwd->h[4]  = msa_asub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3601     pwd->h[5]  = msa_asub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3602     pwd->h[6]  = msa_asub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3603     pwd->h[7]  = msa_asub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3604 }
3605 
3606 void helper_msa_asub_u_w(CPUMIPSState *env,
3607                          uint32_t wd, uint32_t ws, uint32_t wt)
3608 {
3609     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3610     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3611     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3612 
3613     pwd->w[0]  = msa_asub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3614     pwd->w[1]  = msa_asub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3615     pwd->w[2]  = msa_asub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3616     pwd->w[3]  = msa_asub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3617 }
3618 
3619 void helper_msa_asub_u_d(CPUMIPSState *env,
3620                          uint32_t wd, uint32_t ws, uint32_t wt)
3621 {
3622     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3623     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3624     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3625 
3626     pwd->d[0]  = msa_asub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3627     pwd->d[1]  = msa_asub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3628 }
3629 
3630 
3631 static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3632 {
3633     return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
3634 }
3635 
3636 void helper_msa_hsub_s_h(CPUMIPSState *env,
3637                          uint32_t wd, uint32_t ws, uint32_t wt)
3638 {
3639     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3640     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3641     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3642 
3643     pwd->h[0]  = msa_hsub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3644     pwd->h[1]  = msa_hsub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3645     pwd->h[2]  = msa_hsub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3646     pwd->h[3]  = msa_hsub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3647     pwd->h[4]  = msa_hsub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3648     pwd->h[5]  = msa_hsub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3649     pwd->h[6]  = msa_hsub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3650     pwd->h[7]  = msa_hsub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3651 }
3652 
3653 void helper_msa_hsub_s_w(CPUMIPSState *env,
3654                          uint32_t wd, uint32_t ws, uint32_t wt)
3655 {
3656     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3657     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3658     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3659 
3660     pwd->w[0]  = msa_hsub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3661     pwd->w[1]  = msa_hsub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3662     pwd->w[2]  = msa_hsub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3663     pwd->w[3]  = msa_hsub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3664 }
3665 
3666 void helper_msa_hsub_s_d(CPUMIPSState *env,
3667                          uint32_t wd, uint32_t ws, uint32_t wt)
3668 {
3669     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3670     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3671     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3672 
3673     pwd->d[0]  = msa_hsub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3674     pwd->d[1]  = msa_hsub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3675 }
3676 
3677 
3678 static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3679 {
3680     return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
3681 }
3682 
3683 void helper_msa_hsub_u_h(CPUMIPSState *env,
3684                          uint32_t wd, uint32_t ws, uint32_t wt)
3685 {
3686     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3687     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3688     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3689 
3690     pwd->h[0]  = msa_hsub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3691     pwd->h[1]  = msa_hsub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3692     pwd->h[2]  = msa_hsub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3693     pwd->h[3]  = msa_hsub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3694     pwd->h[4]  = msa_hsub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3695     pwd->h[5]  = msa_hsub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3696     pwd->h[6]  = msa_hsub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3697     pwd->h[7]  = msa_hsub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3698 }
3699 
3700 void helper_msa_hsub_u_w(CPUMIPSState *env,
3701                          uint32_t wd, uint32_t ws, uint32_t wt)
3702 {
3703     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3704     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3705     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3706 
3707     pwd->w[0]  = msa_hsub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3708     pwd->w[1]  = msa_hsub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3709     pwd->w[2]  = msa_hsub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3710     pwd->w[3]  = msa_hsub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3711 }
3712 
3713 void helper_msa_hsub_u_d(CPUMIPSState *env,
3714                          uint32_t wd, uint32_t ws, uint32_t wt)
3715 {
3716     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3717     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3718     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3719 
3720     pwd->d[0]  = msa_hsub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3721     pwd->d[1]  = msa_hsub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3722 }
3723 
3724 
3725 static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3726 {
3727     int64_t max_int = DF_MAX_INT(df);
3728     int64_t min_int = DF_MIN_INT(df);
3729     if (arg2 > 0) {
3730         return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
3731     } else {
3732         return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
3733     }
3734 }
3735 
3736 void helper_msa_subs_s_b(CPUMIPSState *env,
3737                          uint32_t wd, uint32_t ws, uint32_t wt)
3738 {
3739     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3740     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3741     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3742 
3743     pwd->b[0]  = msa_subs_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3744     pwd->b[1]  = msa_subs_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3745     pwd->b[2]  = msa_subs_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3746     pwd->b[3]  = msa_subs_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3747     pwd->b[4]  = msa_subs_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3748     pwd->b[5]  = msa_subs_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3749     pwd->b[6]  = msa_subs_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3750     pwd->b[7]  = msa_subs_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3751     pwd->b[8]  = msa_subs_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3752     pwd->b[9]  = msa_subs_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3753     pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3754     pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3755     pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3756     pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3757     pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3758     pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3759 }
3760 
3761 void helper_msa_subs_s_h(CPUMIPSState *env,
3762                          uint32_t wd, uint32_t ws, uint32_t wt)
3763 {
3764     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3765     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3766     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3767 
3768     pwd->h[0]  = msa_subs_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3769     pwd->h[1]  = msa_subs_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3770     pwd->h[2]  = msa_subs_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3771     pwd->h[3]  = msa_subs_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3772     pwd->h[4]  = msa_subs_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3773     pwd->h[5]  = msa_subs_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3774     pwd->h[6]  = msa_subs_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3775     pwd->h[7]  = msa_subs_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3776 }
3777 
3778 void helper_msa_subs_s_w(CPUMIPSState *env,
3779                          uint32_t wd, uint32_t ws, uint32_t wt)
3780 {
3781     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3782     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3783     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3784 
3785     pwd->w[0]  = msa_subs_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3786     pwd->w[1]  = msa_subs_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3787     pwd->w[2]  = msa_subs_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3788     pwd->w[3]  = msa_subs_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3789 }
3790 
3791 void helper_msa_subs_s_d(CPUMIPSState *env,
3792                          uint32_t wd, uint32_t ws, uint32_t wt)
3793 {
3794     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3795     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3796     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3797 
3798     pwd->d[0]  = msa_subs_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3799     pwd->d[1]  = msa_subs_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3800 }
3801 
3802 
3803 static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3804 {
3805     uint64_t u_arg1 = UNSIGNED(arg1, df);
3806     uint64_t u_arg2 = UNSIGNED(arg2, df);
3807     return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
3808 }
3809 
3810 void helper_msa_subs_u_b(CPUMIPSState *env,
3811                          uint32_t wd, uint32_t ws, uint32_t wt)
3812 {
3813     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3814     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3815     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3816 
3817     pwd->b[0]  = msa_subs_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3818     pwd->b[1]  = msa_subs_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3819     pwd->b[2]  = msa_subs_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3820     pwd->b[3]  = msa_subs_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3821     pwd->b[4]  = msa_subs_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3822     pwd->b[5]  = msa_subs_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3823     pwd->b[6]  = msa_subs_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3824     pwd->b[7]  = msa_subs_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3825     pwd->b[8]  = msa_subs_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3826     pwd->b[9]  = msa_subs_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3827     pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3828     pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3829     pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3830     pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3831     pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3832     pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3833 }
3834 
3835 void helper_msa_subs_u_h(CPUMIPSState *env,
3836                          uint32_t wd, uint32_t ws, uint32_t wt)
3837 {
3838     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3839     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3840     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3841 
3842     pwd->h[0]  = msa_subs_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3843     pwd->h[1]  = msa_subs_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3844     pwd->h[2]  = msa_subs_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3845     pwd->h[3]  = msa_subs_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3846     pwd->h[4]  = msa_subs_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3847     pwd->h[5]  = msa_subs_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3848     pwd->h[6]  = msa_subs_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3849     pwd->h[7]  = msa_subs_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3850 }
3851 
3852 void helper_msa_subs_u_w(CPUMIPSState *env,
3853                          uint32_t wd, uint32_t ws, uint32_t wt)
3854 {
3855     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3856     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3857     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3858 
3859     pwd->w[0]  = msa_subs_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3860     pwd->w[1]  = msa_subs_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3861     pwd->w[2]  = msa_subs_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3862     pwd->w[3]  = msa_subs_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3863 }
3864 
3865 void helper_msa_subs_u_d(CPUMIPSState *env,
3866                          uint32_t wd, uint32_t ws, uint32_t wt)
3867 {
3868     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3869     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3870     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3871 
3872     pwd->d[0]  = msa_subs_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3873     pwd->d[1]  = msa_subs_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3874 }
3875 
3876 
3877 static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3878 {
3879     uint64_t u_arg1 = UNSIGNED(arg1, df);
3880     uint64_t max_uint = DF_MAX_UINT(df);
3881     if (arg2 >= 0) {
3882         uint64_t u_arg2 = (uint64_t)arg2;
3883         return (u_arg1 > u_arg2) ?
3884             (int64_t)(u_arg1 - u_arg2) :
3885             0;
3886     } else {
3887         uint64_t u_arg2 = (uint64_t)(-arg2);
3888         return (u_arg1 < max_uint - u_arg2) ?
3889             (int64_t)(u_arg1 + u_arg2) :
3890             (int64_t)max_uint;
3891     }
3892 }
3893 
3894 void helper_msa_subsus_u_b(CPUMIPSState *env,
3895                            uint32_t wd, uint32_t ws, uint32_t wt)
3896 {
3897     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3898     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3899     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3900 
3901     pwd->b[0]  = msa_subsus_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3902     pwd->b[1]  = msa_subsus_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3903     pwd->b[2]  = msa_subsus_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3904     pwd->b[3]  = msa_subsus_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3905     pwd->b[4]  = msa_subsus_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3906     pwd->b[5]  = msa_subsus_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3907     pwd->b[6]  = msa_subsus_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3908     pwd->b[7]  = msa_subsus_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3909     pwd->b[8]  = msa_subsus_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3910     pwd->b[9]  = msa_subsus_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3911     pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3912     pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3913     pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3914     pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3915     pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3916     pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3917 }
3918 
3919 void helper_msa_subsus_u_h(CPUMIPSState *env,
3920                            uint32_t wd, uint32_t ws, uint32_t wt)
3921 {
3922     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3923     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3924     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3925 
3926     pwd->h[0]  = msa_subsus_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3927     pwd->h[1]  = msa_subsus_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3928     pwd->h[2]  = msa_subsus_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3929     pwd->h[3]  = msa_subsus_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3930     pwd->h[4]  = msa_subsus_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3931     pwd->h[5]  = msa_subsus_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3932     pwd->h[6]  = msa_subsus_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3933     pwd->h[7]  = msa_subsus_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3934 }
3935 
3936 void helper_msa_subsus_u_w(CPUMIPSState *env,
3937                            uint32_t wd, uint32_t ws, uint32_t wt)
3938 {
3939     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3940     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3941     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3942 
3943     pwd->w[0]  = msa_subsus_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3944     pwd->w[1]  = msa_subsus_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3945     pwd->w[2]  = msa_subsus_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3946     pwd->w[3]  = msa_subsus_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3947 }
3948 
3949 void helper_msa_subsus_u_d(CPUMIPSState *env,
3950                            uint32_t wd, uint32_t ws, uint32_t wt)
3951 {
3952     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3953     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3954     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3955 
3956     pwd->d[0]  = msa_subsus_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3957     pwd->d[1]  = msa_subsus_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3958 }
3959 
3960 
3961 static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3962 {
3963     uint64_t u_arg1 = UNSIGNED(arg1, df);
3964     uint64_t u_arg2 = UNSIGNED(arg2, df);
3965     int64_t max_int = DF_MAX_INT(df);
3966     int64_t min_int = DF_MIN_INT(df);
3967     if (u_arg1 > u_arg2) {
3968         return u_arg1 - u_arg2 < (uint64_t)max_int ?
3969             (int64_t)(u_arg1 - u_arg2) :
3970             max_int;
3971     } else {
3972         return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
3973             (int64_t)(u_arg1 - u_arg2) :
3974             min_int;
3975     }
3976 }
3977 
3978 void helper_msa_subsuu_s_b(CPUMIPSState *env,
3979                            uint32_t wd, uint32_t ws, uint32_t wt)
3980 {
3981     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3982     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3983     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3984 
3985     pwd->b[0]  = msa_subsuu_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3986     pwd->b[1]  = msa_subsuu_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3987     pwd->b[2]  = msa_subsuu_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3988     pwd->b[3]  = msa_subsuu_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3989     pwd->b[4]  = msa_subsuu_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3990     pwd->b[5]  = msa_subsuu_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3991     pwd->b[6]  = msa_subsuu_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3992     pwd->b[7]  = msa_subsuu_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3993     pwd->b[8]  = msa_subsuu_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3994     pwd->b[9]  = msa_subsuu_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3995     pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3996     pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3997     pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3998     pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3999     pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
4000     pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
4001 }
4002 
4003 void helper_msa_subsuu_s_h(CPUMIPSState *env,
4004                            uint32_t wd, uint32_t ws, uint32_t wt)
4005 {
4006     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4007     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4008     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4009 
4010     pwd->h[0]  = msa_subsuu_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
4011     pwd->h[1]  = msa_subsuu_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
4012     pwd->h[2]  = msa_subsuu_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
4013     pwd->h[3]  = msa_subsuu_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
4014     pwd->h[4]  = msa_subsuu_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
4015     pwd->h[5]  = msa_subsuu_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
4016     pwd->h[6]  = msa_subsuu_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
4017     pwd->h[7]  = msa_subsuu_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
4018 }
4019 
4020 void helper_msa_subsuu_s_w(CPUMIPSState *env,
4021                            uint32_t wd, uint32_t ws, uint32_t wt)
4022 {
4023     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4024     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4025     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4026 
4027     pwd->w[0]  = msa_subsuu_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
4028     pwd->w[1]  = msa_subsuu_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
4029     pwd->w[2]  = msa_subsuu_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
4030     pwd->w[3]  = msa_subsuu_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
4031 }
4032 
4033 void helper_msa_subsuu_s_d(CPUMIPSState *env,
4034                            uint32_t wd, uint32_t ws, uint32_t wt)
4035 {
4036     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4037     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4038     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4039 
4040     pwd->d[0]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4041     pwd->d[1]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4042 }
4043 
4044 
4045 static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
4046 {
4047     return arg1 - arg2;
4048 }
4049 
4050 void helper_msa_subv_b(CPUMIPSState *env,
4051                        uint32_t wd, uint32_t ws, uint32_t wt)
4052 {
4053     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4054     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4055     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4056 
4057     pwd->b[0]  = msa_subv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4058     pwd->b[1]  = msa_subv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4059     pwd->b[2]  = msa_subv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4060     pwd->b[3]  = msa_subv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4061     pwd->b[4]  = msa_subv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4062     pwd->b[5]  = msa_subv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4063     pwd->b[6]  = msa_subv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4064     pwd->b[7]  = msa_subv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4065     pwd->b[8]  = msa_subv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4066     pwd->b[9]  = msa_subv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4067     pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]);
4068     pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]);
4069     pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]);
4070     pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]);
4071     pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]);
4072     pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]);
4073 }
4074 
4075 void helper_msa_subv_h(CPUMIPSState *env,
4076                        uint32_t wd, uint32_t ws, uint32_t wt)
4077 {
4078     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4079     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4080     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4081 
4082     pwd->h[0]  = msa_subv_df(DF_HALF, pws->h[0],  pwt->h[0]);
4083     pwd->h[1]  = msa_subv_df(DF_HALF, pws->h[1],  pwt->h[1]);
4084     pwd->h[2]  = msa_subv_df(DF_HALF, pws->h[2],  pwt->h[2]);
4085     pwd->h[3]  = msa_subv_df(DF_HALF, pws->h[3],  pwt->h[3]);
4086     pwd->h[4]  = msa_subv_df(DF_HALF, pws->h[4],  pwt->h[4]);
4087     pwd->h[5]  = msa_subv_df(DF_HALF, pws->h[5],  pwt->h[5]);
4088     pwd->h[6]  = msa_subv_df(DF_HALF, pws->h[6],  pwt->h[6]);
4089     pwd->h[7]  = msa_subv_df(DF_HALF, pws->h[7],  pwt->h[7]);
4090 }
4091 
4092 void helper_msa_subv_w(CPUMIPSState *env,
4093                        uint32_t wd, uint32_t ws, uint32_t wt)
4094 {
4095     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4096     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4097     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4098 
4099     pwd->w[0]  = msa_subv_df(DF_WORD, pws->w[0],  pwt->w[0]);
4100     pwd->w[1]  = msa_subv_df(DF_WORD, pws->w[1],  pwt->w[1]);
4101     pwd->w[2]  = msa_subv_df(DF_WORD, pws->w[2],  pwt->w[2]);
4102     pwd->w[3]  = msa_subv_df(DF_WORD, pws->w[3],  pwt->w[3]);
4103 }
4104 
4105 void helper_msa_subv_d(CPUMIPSState *env,
4106                        uint32_t wd, uint32_t ws, uint32_t wt)
4107 {
4108     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4109     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4110     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4111 
4112     pwd->d[0]  = msa_subv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4113     pwd->d[1]  = msa_subv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4114 }
4115 
4116 
4117 /*
4118  * Interleave
4119  * ----------
4120  *
4121  * +---------------+----------------------------------------------------------+
4122  * | ILVEV.B       | Vector Interleave Even (byte)                            |
4123  * | ILVEV.H       | Vector Interleave Even (halfword)                        |
4124  * | ILVEV.W       | Vector Interleave Even (word)                            |
4125  * | ILVEV.D       | Vector Interleave Even (doubleword)                      |
4126  * | ILVOD.B       | Vector Interleave Odd (byte)                             |
4127  * | ILVOD.H       | Vector Interleave Odd (halfword)                         |
4128  * | ILVOD.W       | Vector Interleave Odd (word)                             |
4129  * | ILVOD.D       | Vector Interleave Odd (doubleword)                       |
4130  * | ILVL.B        | Vector Interleave Left (byte)                            |
4131  * | ILVL.H        | Vector Interleave Left (halfword)                        |
4132  * | ILVL.W        | Vector Interleave Left (word)                            |
4133  * | ILVL.D        | Vector Interleave Left (doubleword)                      |
4134  * | ILVR.B        | Vector Interleave Right (byte)                           |
4135  * | ILVR.H        | Vector Interleave Right (halfword)                       |
4136  * | ILVR.W        | Vector Interleave Right (word)                           |
4137  * | ILVR.D        | Vector Interleave Right (doubleword)                     |
4138  * +---------------+----------------------------------------------------------+
4139  */
4140 
4141 
4142 void helper_msa_ilvev_b(CPUMIPSState *env,
4143                         uint32_t wd, uint32_t ws, uint32_t wt)
4144 {
4145     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4146     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4147     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4148 
4149 #if HOST_BIG_ENDIAN
4150     pwd->b[8]  = pws->b[9];
4151     pwd->b[9]  = pwt->b[9];
4152     pwd->b[10] = pws->b[11];
4153     pwd->b[11] = pwt->b[11];
4154     pwd->b[12] = pws->b[13];
4155     pwd->b[13] = pwt->b[13];
4156     pwd->b[14] = pws->b[15];
4157     pwd->b[15] = pwt->b[15];
4158     pwd->b[0]  = pws->b[1];
4159     pwd->b[1]  = pwt->b[1];
4160     pwd->b[2]  = pws->b[3];
4161     pwd->b[3]  = pwt->b[3];
4162     pwd->b[4]  = pws->b[5];
4163     pwd->b[5]  = pwt->b[5];
4164     pwd->b[6]  = pws->b[7];
4165     pwd->b[7]  = pwt->b[7];
4166 #else
4167     pwd->b[15] = pws->b[14];
4168     pwd->b[14] = pwt->b[14];
4169     pwd->b[13] = pws->b[12];
4170     pwd->b[12] = pwt->b[12];
4171     pwd->b[11] = pws->b[10];
4172     pwd->b[10] = pwt->b[10];
4173     pwd->b[9]  = pws->b[8];
4174     pwd->b[8]  = pwt->b[8];
4175     pwd->b[7]  = pws->b[6];
4176     pwd->b[6]  = pwt->b[6];
4177     pwd->b[5]  = pws->b[4];
4178     pwd->b[4]  = pwt->b[4];
4179     pwd->b[3]  = pws->b[2];
4180     pwd->b[2]  = pwt->b[2];
4181     pwd->b[1]  = pws->b[0];
4182     pwd->b[0]  = pwt->b[0];
4183 #endif
4184 }
4185 
4186 void helper_msa_ilvev_h(CPUMIPSState *env,
4187                         uint32_t wd, uint32_t ws, uint32_t wt)
4188 {
4189     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4190     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4191     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4192 
4193 #if HOST_BIG_ENDIAN
4194     pwd->h[4] = pws->h[5];
4195     pwd->h[5] = pwt->h[5];
4196     pwd->h[6] = pws->h[7];
4197     pwd->h[7] = pwt->h[7];
4198     pwd->h[0] = pws->h[1];
4199     pwd->h[1] = pwt->h[1];
4200     pwd->h[2] = pws->h[3];
4201     pwd->h[3] = pwt->h[3];
4202 #else
4203     pwd->h[7] = pws->h[6];
4204     pwd->h[6] = pwt->h[6];
4205     pwd->h[5] = pws->h[4];
4206     pwd->h[4] = pwt->h[4];
4207     pwd->h[3] = pws->h[2];
4208     pwd->h[2] = pwt->h[2];
4209     pwd->h[1] = pws->h[0];
4210     pwd->h[0] = pwt->h[0];
4211 #endif
4212 }
4213 
4214 void helper_msa_ilvev_w(CPUMIPSState *env,
4215                         uint32_t wd, uint32_t ws, uint32_t wt)
4216 {
4217     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4218     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4219     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4220 
4221 #if HOST_BIG_ENDIAN
4222     pwd->w[2] = pws->w[3];
4223     pwd->w[3] = pwt->w[3];
4224     pwd->w[0] = pws->w[1];
4225     pwd->w[1] = pwt->w[1];
4226 #else
4227     pwd->w[3] = pws->w[2];
4228     pwd->w[2] = pwt->w[2];
4229     pwd->w[1] = pws->w[0];
4230     pwd->w[0] = pwt->w[0];
4231 #endif
4232 }
4233 
4234 void helper_msa_ilvev_d(CPUMIPSState *env,
4235                         uint32_t wd, uint32_t ws, uint32_t wt)
4236 {
4237     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4238     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4239     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4240 
4241     pwd->d[1] = pws->d[0];
4242     pwd->d[0] = pwt->d[0];
4243 }
4244 
4245 
4246 void helper_msa_ilvod_b(CPUMIPSState *env,
4247                         uint32_t wd, uint32_t ws, uint32_t wt)
4248 {
4249     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4250     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4251     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4252 
4253 #if HOST_BIG_ENDIAN
4254     pwd->b[7]  = pwt->b[6];
4255     pwd->b[6]  = pws->b[6];
4256     pwd->b[5]  = pwt->b[4];
4257     pwd->b[4]  = pws->b[4];
4258     pwd->b[3]  = pwt->b[2];
4259     pwd->b[2]  = pws->b[2];
4260     pwd->b[1]  = pwt->b[0];
4261     pwd->b[0]  = pws->b[0];
4262     pwd->b[15] = pwt->b[14];
4263     pwd->b[14] = pws->b[14];
4264     pwd->b[13] = pwt->b[12];
4265     pwd->b[12] = pws->b[12];
4266     pwd->b[11] = pwt->b[10];
4267     pwd->b[10] = pws->b[10];
4268     pwd->b[9]  = pwt->b[8];
4269     pwd->b[8]  = pws->b[8];
4270 #else
4271     pwd->b[0]  = pwt->b[1];
4272     pwd->b[1]  = pws->b[1];
4273     pwd->b[2]  = pwt->b[3];
4274     pwd->b[3]  = pws->b[3];
4275     pwd->b[4]  = pwt->b[5];
4276     pwd->b[5]  = pws->b[5];
4277     pwd->b[6]  = pwt->b[7];
4278     pwd->b[7]  = pws->b[7];
4279     pwd->b[8]  = pwt->b[9];
4280     pwd->b[9]  = pws->b[9];
4281     pwd->b[10] = pwt->b[11];
4282     pwd->b[11] = pws->b[11];
4283     pwd->b[12] = pwt->b[13];
4284     pwd->b[13] = pws->b[13];
4285     pwd->b[14] = pwt->b[15];
4286     pwd->b[15] = pws->b[15];
4287 #endif
4288 }
4289 
4290 void helper_msa_ilvod_h(CPUMIPSState *env,
4291                         uint32_t wd, uint32_t ws, uint32_t wt)
4292 {
4293     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4294     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4295     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4296 
4297 #if HOST_BIG_ENDIAN
4298     pwd->h[3] = pwt->h[2];
4299     pwd->h[2] = pws->h[2];
4300     pwd->h[1] = pwt->h[0];
4301     pwd->h[0] = pws->h[0];
4302     pwd->h[7] = pwt->h[6];
4303     pwd->h[6] = pws->h[6];
4304     pwd->h[5] = pwt->h[4];
4305     pwd->h[4] = pws->h[4];
4306 #else
4307     pwd->h[0] = pwt->h[1];
4308     pwd->h[1] = pws->h[1];
4309     pwd->h[2] = pwt->h[3];
4310     pwd->h[3] = pws->h[3];
4311     pwd->h[4] = pwt->h[5];
4312     pwd->h[5] = pws->h[5];
4313     pwd->h[6] = pwt->h[7];
4314     pwd->h[7] = pws->h[7];
4315 #endif
4316 }
4317 
4318 void helper_msa_ilvod_w(CPUMIPSState *env,
4319                         uint32_t wd, uint32_t ws, uint32_t wt)
4320 {
4321     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4322     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4323     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4324 
4325 #if HOST_BIG_ENDIAN
4326     pwd->w[1] = pwt->w[0];
4327     pwd->w[0] = pws->w[0];
4328     pwd->w[3] = pwt->w[2];
4329     pwd->w[2] = pws->w[2];
4330 #else
4331     pwd->w[0] = pwt->w[1];
4332     pwd->w[1] = pws->w[1];
4333     pwd->w[2] = pwt->w[3];
4334     pwd->w[3] = pws->w[3];
4335 #endif
4336 }
4337 
4338 void helper_msa_ilvod_d(CPUMIPSState *env,
4339                         uint32_t wd, uint32_t ws, uint32_t wt)
4340 {
4341     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4342     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4343     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4344 
4345     pwd->d[0] = pwt->d[1];
4346     pwd->d[1] = pws->d[1];
4347 }
4348 
4349 
4350 void helper_msa_ilvl_b(CPUMIPSState *env,
4351                        uint32_t wd, uint32_t ws, uint32_t wt)
4352 {
4353     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4354     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4355     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4356 
4357 #if HOST_BIG_ENDIAN
4358     pwd->b[7]  = pwt->b[15];
4359     pwd->b[6]  = pws->b[15];
4360     pwd->b[5]  = pwt->b[14];
4361     pwd->b[4]  = pws->b[14];
4362     pwd->b[3]  = pwt->b[13];
4363     pwd->b[2]  = pws->b[13];
4364     pwd->b[1]  = pwt->b[12];
4365     pwd->b[0]  = pws->b[12];
4366     pwd->b[15] = pwt->b[11];
4367     pwd->b[14] = pws->b[11];
4368     pwd->b[13] = pwt->b[10];
4369     pwd->b[12] = pws->b[10];
4370     pwd->b[11] = pwt->b[9];
4371     pwd->b[10] = pws->b[9];
4372     pwd->b[9]  = pwt->b[8];
4373     pwd->b[8]  = pws->b[8];
4374 #else
4375     pwd->b[0]  = pwt->b[8];
4376     pwd->b[1]  = pws->b[8];
4377     pwd->b[2]  = pwt->b[9];
4378     pwd->b[3]  = pws->b[9];
4379     pwd->b[4]  = pwt->b[10];
4380     pwd->b[5]  = pws->b[10];
4381     pwd->b[6]  = pwt->b[11];
4382     pwd->b[7]  = pws->b[11];
4383     pwd->b[8]  = pwt->b[12];
4384     pwd->b[9]  = pws->b[12];
4385     pwd->b[10] = pwt->b[13];
4386     pwd->b[11] = pws->b[13];
4387     pwd->b[12] = pwt->b[14];
4388     pwd->b[13] = pws->b[14];
4389     pwd->b[14] = pwt->b[15];
4390     pwd->b[15] = pws->b[15];
4391 #endif
4392 }
4393 
4394 void helper_msa_ilvl_h(CPUMIPSState *env,
4395                        uint32_t wd, uint32_t ws, uint32_t wt)
4396 {
4397     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4398     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4399     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4400 
4401 #if HOST_BIG_ENDIAN
4402     pwd->h[3] = pwt->h[7];
4403     pwd->h[2] = pws->h[7];
4404     pwd->h[1] = pwt->h[6];
4405     pwd->h[0] = pws->h[6];
4406     pwd->h[7] = pwt->h[5];
4407     pwd->h[6] = pws->h[5];
4408     pwd->h[5] = pwt->h[4];
4409     pwd->h[4] = pws->h[4];
4410 #else
4411     pwd->h[0] = pwt->h[4];
4412     pwd->h[1] = pws->h[4];
4413     pwd->h[2] = pwt->h[5];
4414     pwd->h[3] = pws->h[5];
4415     pwd->h[4] = pwt->h[6];
4416     pwd->h[5] = pws->h[6];
4417     pwd->h[6] = pwt->h[7];
4418     pwd->h[7] = pws->h[7];
4419 #endif
4420 }
4421 
4422 void helper_msa_ilvl_w(CPUMIPSState *env,
4423                        uint32_t wd, uint32_t ws, uint32_t wt)
4424 {
4425     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4426     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4427     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4428 
4429 #if HOST_BIG_ENDIAN
4430     pwd->w[1] = pwt->w[3];
4431     pwd->w[0] = pws->w[3];
4432     pwd->w[3] = pwt->w[2];
4433     pwd->w[2] = pws->w[2];
4434 #else
4435     pwd->w[0] = pwt->w[2];
4436     pwd->w[1] = pws->w[2];
4437     pwd->w[2] = pwt->w[3];
4438     pwd->w[3] = pws->w[3];
4439 #endif
4440 }
4441 
4442 void helper_msa_ilvl_d(CPUMIPSState *env,
4443                        uint32_t wd, uint32_t ws, uint32_t wt)
4444 {
4445     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4446     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4447     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4448 
4449     pwd->d[0] = pwt->d[1];
4450     pwd->d[1] = pws->d[1];
4451 }
4452 
4453 
4454 void helper_msa_ilvr_b(CPUMIPSState *env,
4455                        uint32_t wd, uint32_t ws, uint32_t wt)
4456 {
4457     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4458     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4459     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4460 
4461 #if HOST_BIG_ENDIAN
4462     pwd->b[8]  = pws->b[0];
4463     pwd->b[9]  = pwt->b[0];
4464     pwd->b[10] = pws->b[1];
4465     pwd->b[11] = pwt->b[1];
4466     pwd->b[12] = pws->b[2];
4467     pwd->b[13] = pwt->b[2];
4468     pwd->b[14] = pws->b[3];
4469     pwd->b[15] = pwt->b[3];
4470     pwd->b[0]  = pws->b[4];
4471     pwd->b[1]  = pwt->b[4];
4472     pwd->b[2]  = pws->b[5];
4473     pwd->b[3]  = pwt->b[5];
4474     pwd->b[4]  = pws->b[6];
4475     pwd->b[5]  = pwt->b[6];
4476     pwd->b[6]  = pws->b[7];
4477     pwd->b[7]  = pwt->b[7];
4478 #else
4479     pwd->b[15] = pws->b[7];
4480     pwd->b[14] = pwt->b[7];
4481     pwd->b[13] = pws->b[6];
4482     pwd->b[12] = pwt->b[6];
4483     pwd->b[11] = pws->b[5];
4484     pwd->b[10] = pwt->b[5];
4485     pwd->b[9]  = pws->b[4];
4486     pwd->b[8]  = pwt->b[4];
4487     pwd->b[7]  = pws->b[3];
4488     pwd->b[6]  = pwt->b[3];
4489     pwd->b[5]  = pws->b[2];
4490     pwd->b[4]  = pwt->b[2];
4491     pwd->b[3]  = pws->b[1];
4492     pwd->b[2]  = pwt->b[1];
4493     pwd->b[1]  = pws->b[0];
4494     pwd->b[0]  = pwt->b[0];
4495 #endif
4496 }
4497 
4498 void helper_msa_ilvr_h(CPUMIPSState *env,
4499                        uint32_t wd, uint32_t ws, uint32_t wt)
4500 {
4501     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4502     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4503     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4504 
4505 #if HOST_BIG_ENDIAN
4506     pwd->h[4] = pws->h[0];
4507     pwd->h[5] = pwt->h[0];
4508     pwd->h[6] = pws->h[1];
4509     pwd->h[7] = pwt->h[1];
4510     pwd->h[0] = pws->h[2];
4511     pwd->h[1] = pwt->h[2];
4512     pwd->h[2] = pws->h[3];
4513     pwd->h[3] = pwt->h[3];
4514 #else
4515     pwd->h[7] = pws->h[3];
4516     pwd->h[6] = pwt->h[3];
4517     pwd->h[5] = pws->h[2];
4518     pwd->h[4] = pwt->h[2];
4519     pwd->h[3] = pws->h[1];
4520     pwd->h[2] = pwt->h[1];
4521     pwd->h[1] = pws->h[0];
4522     pwd->h[0] = pwt->h[0];
4523 #endif
4524 }
4525 
4526 void helper_msa_ilvr_w(CPUMIPSState *env,
4527                        uint32_t wd, uint32_t ws, uint32_t wt)
4528 {
4529     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4530     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4531     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4532 
4533 #if HOST_BIG_ENDIAN
4534     pwd->w[2] = pws->w[0];
4535     pwd->w[3] = pwt->w[0];
4536     pwd->w[0] = pws->w[1];
4537     pwd->w[1] = pwt->w[1];
4538 #else
4539     pwd->w[3] = pws->w[1];
4540     pwd->w[2] = pwt->w[1];
4541     pwd->w[1] = pws->w[0];
4542     pwd->w[0] = pwt->w[0];
4543 #endif
4544 }
4545 
4546 void helper_msa_ilvr_d(CPUMIPSState *env,
4547                        uint32_t wd, uint32_t ws, uint32_t wt)
4548 {
4549     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4550     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4551     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4552 
4553     pwd->d[1] = pws->d[0];
4554     pwd->d[0] = pwt->d[0];
4555 }
4556 
4557 
4558 /*
4559  * Logic
4560  * -----
4561  *
4562  * +---------------+----------------------------------------------------------+
4563  * | AND.V         | Vector Logical And                                       |
4564  * | NOR.V         | Vector Logical Negated Or                                |
4565  * | OR.V          | Vector Logical Or                                        |
4566  * | XOR.V         | Vector Logical Exclusive Or                              |
4567  * +---------------+----------------------------------------------------------+
4568  */
4569 
4570 
4571 void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4572 {
4573     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4574     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4575     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4576 
4577     pwd->d[0] = pws->d[0] & pwt->d[0];
4578     pwd->d[1] = pws->d[1] & pwt->d[1];
4579 }
4580 
4581 void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4582 {
4583     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4584     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4585     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4586 
4587     pwd->d[0] = ~(pws->d[0] | pwt->d[0]);
4588     pwd->d[1] = ~(pws->d[1] | pwt->d[1]);
4589 }
4590 
4591 void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4592 {
4593     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4594     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4595     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4596 
4597     pwd->d[0] = pws->d[0] | pwt->d[0];
4598     pwd->d[1] = pws->d[1] | pwt->d[1];
4599 }
4600 
4601 void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4602 {
4603     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4604     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4605     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4606 
4607     pwd->d[0] = pws->d[0] ^ pwt->d[0];
4608     pwd->d[1] = pws->d[1] ^ pwt->d[1];
4609 }
4610 
4611 
4612 /*
4613  * Move
4614  * ----
4615  *
4616  * +---------------+----------------------------------------------------------+
4617  * | MOVE.V        | Vector Move                                              |
4618  * +---------------+----------------------------------------------------------+
4619  */
4620 
4621 static inline void msa_move_v(wr_t *pwd, wr_t *pws)
4622 {
4623     pwd->d[0] = pws->d[0];
4624     pwd->d[1] = pws->d[1];
4625 }
4626 
4627 void helper_msa_move_v(CPUMIPSState *env, uint32_t wd, uint32_t ws)
4628 {
4629     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4630     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4631 
4632     msa_move_v(pwd, pws);
4633 }
4634 
4635 
4636 /*
4637  * Pack
4638  * ----
4639  *
4640  * +---------------+----------------------------------------------------------+
4641  * | PCKEV.B       | Vector Pack Even (byte)                                  |
4642  * | PCKEV.H       | Vector Pack Even (halfword)                              |
4643  * | PCKEV.W       | Vector Pack Even (word)                                  |
4644  * | PCKEV.D       | Vector Pack Even (doubleword)                            |
4645  * | PCKOD.B       | Vector Pack Odd (byte)                                   |
4646  * | PCKOD.H       | Vector Pack Odd (halfword)                               |
4647  * | PCKOD.W       | Vector Pack Odd (word)                                   |
4648  * | PCKOD.D       | Vector Pack Odd (doubleword)                             |
4649  * | VSHF.B        | Vector Data Preserving Shuffle (byte)                    |
4650  * | VSHF.H        | Vector Data Preserving Shuffle (halfword)                |
4651  * | VSHF.W        | Vector Data Preserving Shuffle (word)                    |
4652  * | VSHF.D        | Vector Data Preserving Shuffle (doubleword)              |
4653  * +---------------+----------------------------------------------------------+
4654  */
4655 
4656 
4657 void helper_msa_pckev_b(CPUMIPSState *env,
4658                         uint32_t wd, uint32_t ws, uint32_t wt)
4659 {
4660     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4661     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4662     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4663 
4664 #if HOST_BIG_ENDIAN
4665     pwd->b[8]  = pws->b[9];
4666     pwd->b[10] = pws->b[13];
4667     pwd->b[12] = pws->b[1];
4668     pwd->b[14] = pws->b[5];
4669     pwd->b[0]  = pwt->b[9];
4670     pwd->b[2]  = pwt->b[13];
4671     pwd->b[4]  = pwt->b[1];
4672     pwd->b[6]  = pwt->b[5];
4673     pwd->b[9]  = pws->b[11];
4674     pwd->b[13] = pws->b[3];
4675     pwd->b[1]  = pwt->b[11];
4676     pwd->b[5]  = pwt->b[3];
4677     pwd->b[11] = pws->b[15];
4678     pwd->b[3]  = pwt->b[15];
4679     pwd->b[15] = pws->b[7];
4680     pwd->b[7]  = pwt->b[7];
4681 #else
4682     pwd->b[15] = pws->b[14];
4683     pwd->b[13] = pws->b[10];
4684     pwd->b[11] = pws->b[6];
4685     pwd->b[9]  = pws->b[2];
4686     pwd->b[7]  = pwt->b[14];
4687     pwd->b[5]  = pwt->b[10];
4688     pwd->b[3]  = pwt->b[6];
4689     pwd->b[1]  = pwt->b[2];
4690     pwd->b[14] = pws->b[12];
4691     pwd->b[10] = pws->b[4];
4692     pwd->b[6]  = pwt->b[12];
4693     pwd->b[2]  = pwt->b[4];
4694     pwd->b[12] = pws->b[8];
4695     pwd->b[4]  = pwt->b[8];
4696     pwd->b[8]  = pws->b[0];
4697     pwd->b[0]  = pwt->b[0];
4698 #endif
4699 }
4700 
4701 void helper_msa_pckev_h(CPUMIPSState *env,
4702                         uint32_t wd, uint32_t ws, uint32_t wt)
4703 {
4704     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4705     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4706     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4707 
4708 #if HOST_BIG_ENDIAN
4709     pwd->h[4] = pws->h[5];
4710     pwd->h[6] = pws->h[1];
4711     pwd->h[0] = pwt->h[5];
4712     pwd->h[2] = pwt->h[1];
4713     pwd->h[5] = pws->h[7];
4714     pwd->h[1] = pwt->h[7];
4715     pwd->h[7] = pws->h[3];
4716     pwd->h[3] = pwt->h[3];
4717 #else
4718     pwd->h[7] = pws->h[6];
4719     pwd->h[5] = pws->h[2];
4720     pwd->h[3] = pwt->h[6];
4721     pwd->h[1] = pwt->h[2];
4722     pwd->h[6] = pws->h[4];
4723     pwd->h[2] = pwt->h[4];
4724     pwd->h[4] = pws->h[0];
4725     pwd->h[0] = pwt->h[0];
4726 #endif
4727 }
4728 
4729 void helper_msa_pckev_w(CPUMIPSState *env,
4730                         uint32_t wd, uint32_t ws, uint32_t wt)
4731 {
4732     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4733     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4734     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4735 
4736 #if HOST_BIG_ENDIAN
4737     pwd->w[2] = pws->w[3];
4738     pwd->w[0] = pwt->w[3];
4739     pwd->w[3] = pws->w[1];
4740     pwd->w[1] = pwt->w[1];
4741 #else
4742     pwd->w[3] = pws->w[2];
4743     pwd->w[1] = pwt->w[2];
4744     pwd->w[2] = pws->w[0];
4745     pwd->w[0] = pwt->w[0];
4746 #endif
4747 }
4748 
4749 void helper_msa_pckev_d(CPUMIPSState *env,
4750                         uint32_t wd, uint32_t ws, uint32_t wt)
4751 {
4752     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4753     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4754     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4755 
4756     pwd->d[1] = pws->d[0];
4757     pwd->d[0] = pwt->d[0];
4758 }
4759 
4760 
4761 void helper_msa_pckod_b(CPUMIPSState *env,
4762                         uint32_t wd, uint32_t ws, uint32_t wt)
4763 {
4764     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4765     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4766     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4767 
4768 #if HOST_BIG_ENDIAN
4769     pwd->b[7]  = pwt->b[6];
4770     pwd->b[5]  = pwt->b[2];
4771     pwd->b[3]  = pwt->b[14];
4772     pwd->b[1]  = pwt->b[10];
4773     pwd->b[15] = pws->b[6];
4774     pwd->b[13] = pws->b[2];
4775     pwd->b[11] = pws->b[14];
4776     pwd->b[9]  = pws->b[10];
4777     pwd->b[6]  = pwt->b[4];
4778     pwd->b[2]  = pwt->b[12];
4779     pwd->b[14] = pws->b[4];
4780     pwd->b[10] = pws->b[12];
4781     pwd->b[4]  = pwt->b[0];
4782     pwd->b[12] = pws->b[0];
4783     pwd->b[0]  = pwt->b[8];
4784     pwd->b[8]  = pws->b[8];
4785 #else
4786     pwd->b[0]  = pwt->b[1];
4787     pwd->b[2]  = pwt->b[5];
4788     pwd->b[4]  = pwt->b[9];
4789     pwd->b[6]  = pwt->b[13];
4790     pwd->b[8]  = pws->b[1];
4791     pwd->b[10] = pws->b[5];
4792     pwd->b[12] = pws->b[9];
4793     pwd->b[14] = pws->b[13];
4794     pwd->b[1]  = pwt->b[3];
4795     pwd->b[5]  = pwt->b[11];
4796     pwd->b[9]  = pws->b[3];
4797     pwd->b[13] = pws->b[11];
4798     pwd->b[3]  = pwt->b[7];
4799     pwd->b[11] = pws->b[7];
4800     pwd->b[7]  = pwt->b[15];
4801     pwd->b[15] = pws->b[15];
4802 #endif
4803 
4804 }
4805 
4806 void helper_msa_pckod_h(CPUMIPSState *env,
4807                         uint32_t wd, uint32_t ws, uint32_t wt)
4808 {
4809     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4810     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4811     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4812 
4813 #if HOST_BIG_ENDIAN
4814     pwd->h[3] = pwt->h[2];
4815     pwd->h[1] = pwt->h[6];
4816     pwd->h[7] = pws->h[2];
4817     pwd->h[5] = pws->h[6];
4818     pwd->h[2] = pwt->h[0];
4819     pwd->h[6] = pws->h[0];
4820     pwd->h[0] = pwt->h[4];
4821     pwd->h[4] = pws->h[4];
4822 #else
4823     pwd->h[0] = pwt->h[1];
4824     pwd->h[2] = pwt->h[5];
4825     pwd->h[4] = pws->h[1];
4826     pwd->h[6] = pws->h[5];
4827     pwd->h[1] = pwt->h[3];
4828     pwd->h[5] = pws->h[3];
4829     pwd->h[3] = pwt->h[7];
4830     pwd->h[7] = pws->h[7];
4831 #endif
4832 }
4833 
4834 void helper_msa_pckod_w(CPUMIPSState *env,
4835                         uint32_t wd, uint32_t ws, uint32_t wt)
4836 {
4837     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4838     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4839     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4840 
4841 #if HOST_BIG_ENDIAN
4842     pwd->w[1] = pwt->w[0];
4843     pwd->w[3] = pws->w[0];
4844     pwd->w[0] = pwt->w[2];
4845     pwd->w[2] = pws->w[2];
4846 #else
4847     pwd->w[0] = pwt->w[1];
4848     pwd->w[2] = pws->w[1];
4849     pwd->w[1] = pwt->w[3];
4850     pwd->w[3] = pws->w[3];
4851 #endif
4852 }
4853 
4854 void helper_msa_pckod_d(CPUMIPSState *env,
4855                         uint32_t wd, uint32_t ws, uint32_t wt)
4856 {
4857     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4858     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4859     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4860 
4861     pwd->d[0] = pwt->d[1];
4862     pwd->d[1] = pws->d[1];
4863 }
4864 
4865 
4866 /*
4867  * Shift
4868  * -----
4869  *
4870  * +---------------+----------------------------------------------------------+
4871  * | SLL.B         | Vector Shift Left (byte)                                 |
4872  * | SLL.H         | Vector Shift Left (halfword)                             |
4873  * | SLL.W         | Vector Shift Left (word)                                 |
4874  * | SLL.D         | Vector Shift Left (doubleword)                           |
4875  * | SRA.B         | Vector Shift Right Arithmetic (byte)                     |
4876  * | SRA.H         | Vector Shift Right Arithmetic (halfword)                 |
4877  * | SRA.W         | Vector Shift Right Arithmetic (word)                     |
4878  * | SRA.D         | Vector Shift Right Arithmetic (doubleword)               |
4879  * | SRAR.B        | Vector Shift Right Arithmetic Rounded (byte)             |
4880  * | SRAR.H        | Vector Shift Right Arithmetic Rounded (halfword)         |
4881  * | SRAR.W        | Vector Shift Right Arithmetic Rounded (word)             |
4882  * | SRAR.D        | Vector Shift Right Arithmetic Rounded (doubleword)       |
4883  * | SRL.B         | Vector Shift Right Logical (byte)                        |
4884  * | SRL.H         | Vector Shift Right Logical (halfword)                    |
4885  * | SRL.W         | Vector Shift Right Logical (word)                        |
4886  * | SRL.D         | Vector Shift Right Logical (doubleword)                  |
4887  * | SRLR.B        | Vector Shift Right Logical Rounded (byte)                |
4888  * | SRLR.H        | Vector Shift Right Logical Rounded (halfword)            |
4889  * | SRLR.W        | Vector Shift Right Logical Rounded (word)                |
4890  * | SRLR.D        | Vector Shift Right Logical Rounded (doubleword)          |
4891  * +---------------+----------------------------------------------------------+
4892  */
4893 
4894 
4895 static inline int64_t msa_sll_df(uint32_t df, int64_t arg1, int64_t arg2)
4896 {
4897     int32_t b_arg2 = BIT_POSITION(arg2, df);
4898     return arg1 << b_arg2;
4899 }
4900 
4901 void helper_msa_sll_b(CPUMIPSState *env,
4902                       uint32_t wd, uint32_t ws, uint32_t wt)
4903 {
4904     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4905     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4906     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4907 
4908     pwd->b[0]  = msa_sll_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4909     pwd->b[1]  = msa_sll_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4910     pwd->b[2]  = msa_sll_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4911     pwd->b[3]  = msa_sll_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4912     pwd->b[4]  = msa_sll_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4913     pwd->b[5]  = msa_sll_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4914     pwd->b[6]  = msa_sll_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4915     pwd->b[7]  = msa_sll_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4916     pwd->b[8]  = msa_sll_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4917     pwd->b[9]  = msa_sll_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4918     pwd->b[10] = msa_sll_df(DF_BYTE, pws->b[10], pwt->b[10]);
4919     pwd->b[11] = msa_sll_df(DF_BYTE, pws->b[11], pwt->b[11]);
4920     pwd->b[12] = msa_sll_df(DF_BYTE, pws->b[12], pwt->b[12]);
4921     pwd->b[13] = msa_sll_df(DF_BYTE, pws->b[13], pwt->b[13]);
4922     pwd->b[14] = msa_sll_df(DF_BYTE, pws->b[14], pwt->b[14]);
4923     pwd->b[15] = msa_sll_df(DF_BYTE, pws->b[15], pwt->b[15]);
4924 }
4925 
4926 void helper_msa_sll_h(CPUMIPSState *env,
4927                       uint32_t wd, uint32_t ws, uint32_t wt)
4928 {
4929     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4930     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4931     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4932 
4933     pwd->h[0]  = msa_sll_df(DF_HALF, pws->h[0],  pwt->h[0]);
4934     pwd->h[1]  = msa_sll_df(DF_HALF, pws->h[1],  pwt->h[1]);
4935     pwd->h[2]  = msa_sll_df(DF_HALF, pws->h[2],  pwt->h[2]);
4936     pwd->h[3]  = msa_sll_df(DF_HALF, pws->h[3],  pwt->h[3]);
4937     pwd->h[4]  = msa_sll_df(DF_HALF, pws->h[4],  pwt->h[4]);
4938     pwd->h[5]  = msa_sll_df(DF_HALF, pws->h[5],  pwt->h[5]);
4939     pwd->h[6]  = msa_sll_df(DF_HALF, pws->h[6],  pwt->h[6]);
4940     pwd->h[7]  = msa_sll_df(DF_HALF, pws->h[7],  pwt->h[7]);
4941 }
4942 
4943 void helper_msa_sll_w(CPUMIPSState *env,
4944                       uint32_t wd, uint32_t ws, uint32_t wt)
4945 {
4946     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4947     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4948     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4949 
4950     pwd->w[0]  = msa_sll_df(DF_WORD, pws->w[0],  pwt->w[0]);
4951     pwd->w[1]  = msa_sll_df(DF_WORD, pws->w[1],  pwt->w[1]);
4952     pwd->w[2]  = msa_sll_df(DF_WORD, pws->w[2],  pwt->w[2]);
4953     pwd->w[3]  = msa_sll_df(DF_WORD, pws->w[3],  pwt->w[3]);
4954 }
4955 
4956 void helper_msa_sll_d(CPUMIPSState *env,
4957                       uint32_t wd, uint32_t ws, uint32_t wt)
4958 {
4959     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4960     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4961     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4962 
4963     pwd->d[0]  = msa_sll_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4964     pwd->d[1]  = msa_sll_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4965 }
4966 
4967 
4968 static inline int64_t msa_sra_df(uint32_t df, int64_t arg1, int64_t arg2)
4969 {
4970     int32_t b_arg2 = BIT_POSITION(arg2, df);
4971     return arg1 >> b_arg2;
4972 }
4973 
4974 void helper_msa_sra_b(CPUMIPSState *env,
4975                       uint32_t wd, uint32_t ws, uint32_t wt)
4976 {
4977     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4978     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4979     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4980 
4981     pwd->b[0]  = msa_sra_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4982     pwd->b[1]  = msa_sra_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4983     pwd->b[2]  = msa_sra_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4984     pwd->b[3]  = msa_sra_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4985     pwd->b[4]  = msa_sra_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4986     pwd->b[5]  = msa_sra_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4987     pwd->b[6]  = msa_sra_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4988     pwd->b[7]  = msa_sra_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4989     pwd->b[8]  = msa_sra_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4990     pwd->b[9]  = msa_sra_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4991     pwd->b[10] = msa_sra_df(DF_BYTE, pws->b[10], pwt->b[10]);
4992     pwd->b[11] = msa_sra_df(DF_BYTE, pws->b[11], pwt->b[11]);
4993     pwd->b[12] = msa_sra_df(DF_BYTE, pws->b[12], pwt->b[12]);
4994     pwd->b[13] = msa_sra_df(DF_BYTE, pws->b[13], pwt->b[13]);
4995     pwd->b[14] = msa_sra_df(DF_BYTE, pws->b[14], pwt->b[14]);
4996     pwd->b[15] = msa_sra_df(DF_BYTE, pws->b[15], pwt->b[15]);
4997 }
4998 
4999 void helper_msa_sra_h(CPUMIPSState *env,
5000                       uint32_t wd, uint32_t ws, uint32_t wt)
5001 {
5002     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5003     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5004     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5005 
5006     pwd->h[0]  = msa_sra_df(DF_HALF, pws->h[0],  pwt->h[0]);
5007     pwd->h[1]  = msa_sra_df(DF_HALF, pws->h[1],  pwt->h[1]);
5008     pwd->h[2]  = msa_sra_df(DF_HALF, pws->h[2],  pwt->h[2]);
5009     pwd->h[3]  = msa_sra_df(DF_HALF, pws->h[3],  pwt->h[3]);
5010     pwd->h[4]  = msa_sra_df(DF_HALF, pws->h[4],  pwt->h[4]);
5011     pwd->h[5]  = msa_sra_df(DF_HALF, pws->h[5],  pwt->h[5]);
5012     pwd->h[6]  = msa_sra_df(DF_HALF, pws->h[6],  pwt->h[6]);
5013     pwd->h[7]  = msa_sra_df(DF_HALF, pws->h[7],  pwt->h[7]);
5014 }
5015 
5016 void helper_msa_sra_w(CPUMIPSState *env,
5017                       uint32_t wd, uint32_t ws, uint32_t wt)
5018 {
5019     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5020     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5021     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5022 
5023     pwd->w[0]  = msa_sra_df(DF_WORD, pws->w[0],  pwt->w[0]);
5024     pwd->w[1]  = msa_sra_df(DF_WORD, pws->w[1],  pwt->w[1]);
5025     pwd->w[2]  = msa_sra_df(DF_WORD, pws->w[2],  pwt->w[2]);
5026     pwd->w[3]  = msa_sra_df(DF_WORD, pws->w[3],  pwt->w[3]);
5027 }
5028 
5029 void helper_msa_sra_d(CPUMIPSState *env,
5030                       uint32_t wd, uint32_t ws, uint32_t wt)
5031 {
5032     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5033     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5034     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5035 
5036     pwd->d[0]  = msa_sra_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5037     pwd->d[1]  = msa_sra_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5038 }
5039 
5040 
5041 static inline int64_t msa_srar_df(uint32_t df, int64_t arg1, int64_t arg2)
5042 {
5043     int32_t b_arg2 = BIT_POSITION(arg2, df);
5044     if (b_arg2 == 0) {
5045         return arg1;
5046     } else {
5047         int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
5048         return (arg1 >> b_arg2) + r_bit;
5049     }
5050 }
5051 
5052 void helper_msa_srar_b(CPUMIPSState *env,
5053                        uint32_t wd, uint32_t ws, uint32_t wt)
5054 {
5055     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5056     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5057     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5058 
5059     pwd->b[0]  = msa_srar_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5060     pwd->b[1]  = msa_srar_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5061     pwd->b[2]  = msa_srar_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5062     pwd->b[3]  = msa_srar_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5063     pwd->b[4]  = msa_srar_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5064     pwd->b[5]  = msa_srar_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5065     pwd->b[6]  = msa_srar_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5066     pwd->b[7]  = msa_srar_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5067     pwd->b[8]  = msa_srar_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5068     pwd->b[9]  = msa_srar_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5069     pwd->b[10] = msa_srar_df(DF_BYTE, pws->b[10], pwt->b[10]);
5070     pwd->b[11] = msa_srar_df(DF_BYTE, pws->b[11], pwt->b[11]);
5071     pwd->b[12] = msa_srar_df(DF_BYTE, pws->b[12], pwt->b[12]);
5072     pwd->b[13] = msa_srar_df(DF_BYTE, pws->b[13], pwt->b[13]);
5073     pwd->b[14] = msa_srar_df(DF_BYTE, pws->b[14], pwt->b[14]);
5074     pwd->b[15] = msa_srar_df(DF_BYTE, pws->b[15], pwt->b[15]);
5075 }
5076 
5077 void helper_msa_srar_h(CPUMIPSState *env,
5078                        uint32_t wd, uint32_t ws, uint32_t wt)
5079 {
5080     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5081     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5082     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5083 
5084     pwd->h[0]  = msa_srar_df(DF_HALF, pws->h[0],  pwt->h[0]);
5085     pwd->h[1]  = msa_srar_df(DF_HALF, pws->h[1],  pwt->h[1]);
5086     pwd->h[2]  = msa_srar_df(DF_HALF, pws->h[2],  pwt->h[2]);
5087     pwd->h[3]  = msa_srar_df(DF_HALF, pws->h[3],  pwt->h[3]);
5088     pwd->h[4]  = msa_srar_df(DF_HALF, pws->h[4],  pwt->h[4]);
5089     pwd->h[5]  = msa_srar_df(DF_HALF, pws->h[5],  pwt->h[5]);
5090     pwd->h[6]  = msa_srar_df(DF_HALF, pws->h[6],  pwt->h[6]);
5091     pwd->h[7]  = msa_srar_df(DF_HALF, pws->h[7],  pwt->h[7]);
5092 }
5093 
5094 void helper_msa_srar_w(CPUMIPSState *env,
5095                        uint32_t wd, uint32_t ws, uint32_t wt)
5096 {
5097     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5098     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5099     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5100 
5101     pwd->w[0]  = msa_srar_df(DF_WORD, pws->w[0],  pwt->w[0]);
5102     pwd->w[1]  = msa_srar_df(DF_WORD, pws->w[1],  pwt->w[1]);
5103     pwd->w[2]  = msa_srar_df(DF_WORD, pws->w[2],  pwt->w[2]);
5104     pwd->w[3]  = msa_srar_df(DF_WORD, pws->w[3],  pwt->w[3]);
5105 }
5106 
5107 void helper_msa_srar_d(CPUMIPSState *env,
5108                        uint32_t wd, uint32_t ws, uint32_t wt)
5109 {
5110     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5111     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5112     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5113 
5114     pwd->d[0]  = msa_srar_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5115     pwd->d[1]  = msa_srar_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5116 }
5117 
5118 
5119 static inline int64_t msa_srl_df(uint32_t df, int64_t arg1, int64_t arg2)
5120 {
5121     uint64_t u_arg1 = UNSIGNED(arg1, df);
5122     int32_t b_arg2 = BIT_POSITION(arg2, df);
5123     return u_arg1 >> b_arg2;
5124 }
5125 
5126 void helper_msa_srl_b(CPUMIPSState *env,
5127                       uint32_t wd, uint32_t ws, uint32_t wt)
5128 {
5129     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5130     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5131     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5132 
5133     pwd->b[0]  = msa_srl_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5134     pwd->b[1]  = msa_srl_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5135     pwd->b[2]  = msa_srl_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5136     pwd->b[3]  = msa_srl_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5137     pwd->b[4]  = msa_srl_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5138     pwd->b[5]  = msa_srl_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5139     pwd->b[6]  = msa_srl_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5140     pwd->b[7]  = msa_srl_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5141     pwd->b[8]  = msa_srl_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5142     pwd->b[9]  = msa_srl_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5143     pwd->b[10] = msa_srl_df(DF_BYTE, pws->b[10], pwt->b[10]);
5144     pwd->b[11] = msa_srl_df(DF_BYTE, pws->b[11], pwt->b[11]);
5145     pwd->b[12] = msa_srl_df(DF_BYTE, pws->b[12], pwt->b[12]);
5146     pwd->b[13] = msa_srl_df(DF_BYTE, pws->b[13], pwt->b[13]);
5147     pwd->b[14] = msa_srl_df(DF_BYTE, pws->b[14], pwt->b[14]);
5148     pwd->b[15] = msa_srl_df(DF_BYTE, pws->b[15], pwt->b[15]);
5149 }
5150 
5151 void helper_msa_srl_h(CPUMIPSState *env,
5152                       uint32_t wd, uint32_t ws, uint32_t wt)
5153 {
5154     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5155     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5156     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5157 
5158     pwd->h[0]  = msa_srl_df(DF_HALF, pws->h[0],  pwt->h[0]);
5159     pwd->h[1]  = msa_srl_df(DF_HALF, pws->h[1],  pwt->h[1]);
5160     pwd->h[2]  = msa_srl_df(DF_HALF, pws->h[2],  pwt->h[2]);
5161     pwd->h[3]  = msa_srl_df(DF_HALF, pws->h[3],  pwt->h[3]);
5162     pwd->h[4]  = msa_srl_df(DF_HALF, pws->h[4],  pwt->h[4]);
5163     pwd->h[5]  = msa_srl_df(DF_HALF, pws->h[5],  pwt->h[5]);
5164     pwd->h[6]  = msa_srl_df(DF_HALF, pws->h[6],  pwt->h[6]);
5165     pwd->h[7]  = msa_srl_df(DF_HALF, pws->h[7],  pwt->h[7]);
5166 }
5167 
5168 void helper_msa_srl_w(CPUMIPSState *env,
5169                       uint32_t wd, uint32_t ws, uint32_t wt)
5170 {
5171     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5172     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5173     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5174 
5175     pwd->w[0]  = msa_srl_df(DF_WORD, pws->w[0],  pwt->w[0]);
5176     pwd->w[1]  = msa_srl_df(DF_WORD, pws->w[1],  pwt->w[1]);
5177     pwd->w[2]  = msa_srl_df(DF_WORD, pws->w[2],  pwt->w[2]);
5178     pwd->w[3]  = msa_srl_df(DF_WORD, pws->w[3],  pwt->w[3]);
5179 }
5180 
5181 void helper_msa_srl_d(CPUMIPSState *env,
5182                       uint32_t wd, uint32_t ws, uint32_t wt)
5183 {
5184     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5185     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5186     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5187 
5188     pwd->d[0]  = msa_srl_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5189     pwd->d[1]  = msa_srl_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5190 }
5191 
5192 
5193 static inline int64_t msa_srlr_df(uint32_t df, int64_t arg1, int64_t arg2)
5194 {
5195     uint64_t u_arg1 = UNSIGNED(arg1, df);
5196     int32_t b_arg2 = BIT_POSITION(arg2, df);
5197     if (b_arg2 == 0) {
5198         return u_arg1;
5199     } else {
5200         uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
5201         return (u_arg1 >> b_arg2) + r_bit;
5202     }
5203 }
5204 
5205 void helper_msa_srlr_b(CPUMIPSState *env,
5206                        uint32_t wd, uint32_t ws, uint32_t wt)
5207 {
5208     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5209     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5210     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5211 
5212     pwd->b[0]  = msa_srlr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5213     pwd->b[1]  = msa_srlr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5214     pwd->b[2]  = msa_srlr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5215     pwd->b[3]  = msa_srlr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5216     pwd->b[4]  = msa_srlr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5217     pwd->b[5]  = msa_srlr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5218     pwd->b[6]  = msa_srlr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5219     pwd->b[7]  = msa_srlr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5220     pwd->b[8]  = msa_srlr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5221     pwd->b[9]  = msa_srlr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5222     pwd->b[10] = msa_srlr_df(DF_BYTE, pws->b[10], pwt->b[10]);
5223     pwd->b[11] = msa_srlr_df(DF_BYTE, pws->b[11], pwt->b[11]);
5224     pwd->b[12] = msa_srlr_df(DF_BYTE, pws->b[12], pwt->b[12]);
5225     pwd->b[13] = msa_srlr_df(DF_BYTE, pws->b[13], pwt->b[13]);
5226     pwd->b[14] = msa_srlr_df(DF_BYTE, pws->b[14], pwt->b[14]);
5227     pwd->b[15] = msa_srlr_df(DF_BYTE, pws->b[15], pwt->b[15]);
5228 }
5229 
5230 void helper_msa_srlr_h(CPUMIPSState *env,
5231                        uint32_t wd, uint32_t ws, uint32_t wt)
5232 {
5233     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5234     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5235     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5236 
5237     pwd->h[0]  = msa_srlr_df(DF_HALF, pws->h[0],  pwt->h[0]);
5238     pwd->h[1]  = msa_srlr_df(DF_HALF, pws->h[1],  pwt->h[1]);
5239     pwd->h[2]  = msa_srlr_df(DF_HALF, pws->h[2],  pwt->h[2]);
5240     pwd->h[3]  = msa_srlr_df(DF_HALF, pws->h[3],  pwt->h[3]);
5241     pwd->h[4]  = msa_srlr_df(DF_HALF, pws->h[4],  pwt->h[4]);
5242     pwd->h[5]  = msa_srlr_df(DF_HALF, pws->h[5],  pwt->h[5]);
5243     pwd->h[6]  = msa_srlr_df(DF_HALF, pws->h[6],  pwt->h[6]);
5244     pwd->h[7]  = msa_srlr_df(DF_HALF, pws->h[7],  pwt->h[7]);
5245 }
5246 
5247 void helper_msa_srlr_w(CPUMIPSState *env,
5248                        uint32_t wd, uint32_t ws, uint32_t wt)
5249 {
5250     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5251     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5252     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5253 
5254     pwd->w[0]  = msa_srlr_df(DF_WORD, pws->w[0],  pwt->w[0]);
5255     pwd->w[1]  = msa_srlr_df(DF_WORD, pws->w[1],  pwt->w[1]);
5256     pwd->w[2]  = msa_srlr_df(DF_WORD, pws->w[2],  pwt->w[2]);
5257     pwd->w[3]  = msa_srlr_df(DF_WORD, pws->w[3],  pwt->w[3]);
5258 }
5259 
5260 void helper_msa_srlr_d(CPUMIPSState *env,
5261                        uint32_t wd, uint32_t ws, uint32_t wt)
5262 {
5263     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5264     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5265     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5266 
5267     pwd->d[0]  = msa_srlr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5268     pwd->d[1]  = msa_srlr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5269 }
5270 
5271 
5272 #define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
5273 void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
5274         uint32_t i8)                                                    \
5275 {                                                                       \
5276     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5277     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5278     uint32_t i;                                                         \
5279     for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
5280         DEST = OPERATION;                                               \
5281     }                                                                   \
5282 }
5283 
5284 MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
5285 MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
5286 MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
5287 MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
5288 
5289 #define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
5290             UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
5291 MSA_FN_IMM8(bmnzi_b, pwd->b[i],
5292         BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5293 
5294 #define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
5295             UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
5296 MSA_FN_IMM8(bmzi_b, pwd->b[i],
5297         BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5298 
5299 #define BIT_SELECT(dest, arg1, arg2, df) \
5300             UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
5301 MSA_FN_IMM8(bseli_b, pwd->b[i],
5302         BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
5303 
5304 #undef BIT_SELECT
5305 #undef BIT_MOVE_IF_ZERO
5306 #undef BIT_MOVE_IF_NOT_ZERO
5307 #undef MSA_FN_IMM8
5308 
5309 #define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
5310 
5311 void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5312                        uint32_t ws, uint32_t imm)
5313 {
5314     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5315     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5316     wr_t wx, *pwx = &wx;
5317     uint32_t i;
5318 
5319     switch (df) {
5320     case DF_BYTE:
5321         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5322             pwx->b[i] = pws->b[SHF_POS(i, imm)];
5323         }
5324         break;
5325     case DF_HALF:
5326         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5327             pwx->h[i] = pws->h[SHF_POS(i, imm)];
5328         }
5329         break;
5330     case DF_WORD:
5331         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5332             pwx->w[i] = pws->w[SHF_POS(i, imm)];
5333         }
5334         break;
5335     default:
5336         assert(0);
5337     }
5338     msa_move_v(pwd, pwx);
5339 }
5340 
5341 #define MSA_BINOP_IMM_DF(helper, func)                                  \
5342 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5343                         uint32_t wd, uint32_t ws, int32_t u5)           \
5344 {                                                                       \
5345     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5346     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5347     uint32_t i;                                                         \
5348                                                                         \
5349     switch (df) {                                                       \
5350     case DF_BYTE:                                                       \
5351         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5352             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5353         }                                                               \
5354         break;                                                          \
5355     case DF_HALF:                                                       \
5356         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5357             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5358         }                                                               \
5359         break;                                                          \
5360     case DF_WORD:                                                       \
5361         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5362             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5363         }                                                               \
5364         break;                                                          \
5365     case DF_DOUBLE:                                                     \
5366         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5367             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5368         }                                                               \
5369         break;                                                          \
5370     default:                                                            \
5371         assert(0);                                                      \
5372     }                                                                   \
5373 }
5374 
5375 MSA_BINOP_IMM_DF(addvi, addv)
5376 MSA_BINOP_IMM_DF(subvi, subv)
5377 MSA_BINOP_IMM_DF(ceqi, ceq)
5378 MSA_BINOP_IMM_DF(clei_s, cle_s)
5379 MSA_BINOP_IMM_DF(clei_u, cle_u)
5380 MSA_BINOP_IMM_DF(clti_s, clt_s)
5381 MSA_BINOP_IMM_DF(clti_u, clt_u)
5382 MSA_BINOP_IMM_DF(maxi_s, max_s)
5383 MSA_BINOP_IMM_DF(maxi_u, max_u)
5384 MSA_BINOP_IMM_DF(mini_s, min_s)
5385 MSA_BINOP_IMM_DF(mini_u, min_u)
5386 #undef MSA_BINOP_IMM_DF
5387 
5388 void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5389                        int32_t s10)
5390 {
5391     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5392     uint32_t i;
5393 
5394     switch (df) {
5395     case DF_BYTE:
5396         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5397             pwd->b[i] = (int8_t)s10;
5398         }
5399         break;
5400     case DF_HALF:
5401         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5402             pwd->h[i] = (int16_t)s10;
5403         }
5404         break;
5405     case DF_WORD:
5406         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5407             pwd->w[i] = (int32_t)s10;
5408         }
5409         break;
5410     case DF_DOUBLE:
5411         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5412             pwd->d[i] = (int64_t)s10;
5413         }
5414        break;
5415     default:
5416         assert(0);
5417     }
5418 }
5419 
5420 static inline int64_t msa_sat_s_df(uint32_t df, int64_t arg, uint32_t m)
5421 {
5422     return arg < M_MIN_INT(m + 1) ? M_MIN_INT(m + 1) :
5423                                     arg > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) :
5424                                                              arg;
5425 }
5426 
5427 static inline int64_t msa_sat_u_df(uint32_t df, int64_t arg, uint32_t m)
5428 {
5429     uint64_t u_arg = UNSIGNED(arg, df);
5430     return  u_arg < M_MAX_UINT(m + 1) ? u_arg :
5431                                         M_MAX_UINT(m + 1);
5432 }
5433 
5434 #define MSA_BINOP_IMMU_DF(helper, func)                                  \
5435 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5436                        uint32_t ws, uint32_t u5)                        \
5437 {                                                                       \
5438     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5439     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5440     uint32_t i;                                                         \
5441                                                                         \
5442     switch (df) {                                                       \
5443     case DF_BYTE:                                                       \
5444         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5445             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5446         }                                                               \
5447         break;                                                          \
5448     case DF_HALF:                                                       \
5449         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5450             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5451         }                                                               \
5452         break;                                                          \
5453     case DF_WORD:                                                       \
5454         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5455             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5456         }                                                               \
5457         break;                                                          \
5458     case DF_DOUBLE:                                                     \
5459         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5460             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5461         }                                                               \
5462         break;                                                          \
5463     default:                                                            \
5464         assert(0);                                                      \
5465     }                                                                   \
5466 }
5467 
5468 MSA_BINOP_IMMU_DF(slli, sll)
5469 MSA_BINOP_IMMU_DF(srai, sra)
5470 MSA_BINOP_IMMU_DF(srli, srl)
5471 MSA_BINOP_IMMU_DF(bclri, bclr)
5472 MSA_BINOP_IMMU_DF(bseti, bset)
5473 MSA_BINOP_IMMU_DF(bnegi, bneg)
5474 MSA_BINOP_IMMU_DF(sat_s, sat_s)
5475 MSA_BINOP_IMMU_DF(sat_u, sat_u)
5476 MSA_BINOP_IMMU_DF(srari, srar)
5477 MSA_BINOP_IMMU_DF(srlri, srlr)
5478 #undef MSA_BINOP_IMMU_DF
5479 
5480 #define MSA_TEROP_IMMU_DF(helper, func)                                  \
5481 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5482                                   uint32_t wd, uint32_t ws, uint32_t u5) \
5483 {                                                                       \
5484     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5485     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5486     uint32_t i;                                                         \
5487                                                                         \
5488     switch (df) {                                                       \
5489     case DF_BYTE:                                                       \
5490         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5491             pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
5492                                             u5);                        \
5493         }                                                               \
5494         break;                                                          \
5495     case DF_HALF:                                                       \
5496         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5497             pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
5498                                             u5);                        \
5499         }                                                               \
5500         break;                                                          \
5501     case DF_WORD:                                                       \
5502         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5503             pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
5504                                             u5);                        \
5505         }                                                               \
5506         break;                                                          \
5507     case DF_DOUBLE:                                                     \
5508         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5509             pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
5510                                             u5);                        \
5511         }                                                               \
5512         break;                                                          \
5513     default:                                                            \
5514         assert(0);                                                      \
5515     }                                                                   \
5516 }
5517 
5518 MSA_TEROP_IMMU_DF(binsli, binsl)
5519 MSA_TEROP_IMMU_DF(binsri, binsr)
5520 #undef MSA_TEROP_IMMU_DF
5521 
5522 #define CONCATENATE_AND_SLIDE(s, k)             \
5523     do {                                        \
5524         for (i = 0; i < s; i++) {               \
5525             v[i]     = pws->b[s * k + i];       \
5526             v[i + s] = pwd->b[s * k + i];       \
5527         }                                       \
5528         for (i = 0; i < s; i++) {               \
5529             pwd->b[s * k + i] = v[i + n];       \
5530         }                                       \
5531     } while (0)
5532 
5533 static inline void msa_sld_df(uint32_t df, wr_t *pwd,
5534                               wr_t *pws, target_ulong rt)
5535 {
5536     uint32_t n = rt % DF_ELEMENTS(df);
5537     uint8_t v[64];
5538     uint32_t i, k;
5539 
5540     switch (df) {
5541     case DF_BYTE:
5542         CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_BYTE), 0);
5543         break;
5544     case DF_HALF:
5545         for (k = 0; k < 2; k++) {
5546             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_HALF), k);
5547         }
5548         break;
5549     case DF_WORD:
5550         for (k = 0; k < 4; k++) {
5551             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_WORD), k);
5552         }
5553         break;
5554     case DF_DOUBLE:
5555         for (k = 0; k < 8; k++) {
5556             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_DOUBLE), k);
5557         }
5558         break;
5559     default:
5560         assert(0);
5561     }
5562 }
5563 
5564 static inline int64_t msa_mul_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5565 {
5566     int64_t q_min = DF_MIN_INT(df);
5567     int64_t q_max = DF_MAX_INT(df);
5568 
5569     if (arg1 == q_min && arg2 == q_min) {
5570         return q_max;
5571     }
5572     return (arg1 * arg2) >> (DF_BITS(df) - 1);
5573 }
5574 
5575 static inline int64_t msa_mulr_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5576 {
5577     int64_t q_min = DF_MIN_INT(df);
5578     int64_t q_max = DF_MAX_INT(df);
5579     int64_t r_bit = 1 << (DF_BITS(df) - 2);
5580 
5581     if (arg1 == q_min && arg2 == q_min) {
5582         return q_max;
5583     }
5584     return (arg1 * arg2 + r_bit) >> (DF_BITS(df) - 1);
5585 }
5586 
5587 #define MSA_BINOP_DF(func) \
5588 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
5589                                 uint32_t wd, uint32_t ws, uint32_t wt)  \
5590 {                                                                       \
5591     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5592     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5593     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
5594                                                                         \
5595     switch (df) {                                                       \
5596     case DF_BYTE:                                                       \
5597         pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0],  pwt->b[0]);    \
5598         pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1],  pwt->b[1]);    \
5599         pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2],  pwt->b[2]);    \
5600         pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3],  pwt->b[3]);    \
5601         pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4],  pwt->b[4]);    \
5602         pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5],  pwt->b[5]);    \
5603         pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6],  pwt->b[6]);    \
5604         pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7],  pwt->b[7]);    \
5605         pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8],  pwt->b[8]);    \
5606         pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9],  pwt->b[9]);    \
5607         pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]);   \
5608         pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]);   \
5609         pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]);   \
5610         pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]);   \
5611         pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]);   \
5612         pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]);   \
5613         break;                                                          \
5614     case DF_HALF:                                                       \
5615         pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]);      \
5616         pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]);      \
5617         pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]);      \
5618         pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]);      \
5619         pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]);      \
5620         pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]);      \
5621         pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]);      \
5622         pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]);      \
5623         break;                                                          \
5624     case DF_WORD:                                                       \
5625         pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]);      \
5626         pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]);      \
5627         pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]);      \
5628         pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]);      \
5629         break;                                                          \
5630     case DF_DOUBLE:                                                     \
5631         pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]);      \
5632         pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]);      \
5633         break;                                                          \
5634     default:                                                            \
5635         assert(0);                                                      \
5636     }                                                                   \
5637 }
5638 
5639 MSA_BINOP_DF(mul_q)
5640 MSA_BINOP_DF(mulr_q)
5641 #undef MSA_BINOP_DF
5642 
5643 void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5644                        uint32_t ws, uint32_t rt)
5645 {
5646     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5647     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5648 
5649     msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]);
5650 }
5651 
5652 static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1,
5653                                     int64_t arg2)
5654 {
5655     int64_t q_prod, q_ret;
5656 
5657     int64_t q_max = DF_MAX_INT(df);
5658     int64_t q_min = DF_MIN_INT(df);
5659 
5660     q_prod = arg1 * arg2;
5661     q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod) >> (DF_BITS(df) - 1);
5662 
5663     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5664 }
5665 
5666 static inline int64_t msa_msub_q_df(uint32_t df, int64_t dest, int64_t arg1,
5667                                     int64_t arg2)
5668 {
5669     int64_t q_prod, q_ret;
5670 
5671     int64_t q_max = DF_MAX_INT(df);
5672     int64_t q_min = DF_MIN_INT(df);
5673 
5674     q_prod = arg1 * arg2;
5675     q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod) >> (DF_BITS(df) - 1);
5676 
5677     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5678 }
5679 
5680 static inline int64_t msa_maddr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5681                                      int64_t arg2)
5682 {
5683     int64_t q_prod, q_ret;
5684 
5685     int64_t q_max = DF_MAX_INT(df);
5686     int64_t q_min = DF_MIN_INT(df);
5687     int64_t r_bit = 1 << (DF_BITS(df) - 2);
5688 
5689     q_prod = arg1 * arg2;
5690     q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod + r_bit) >> (DF_BITS(df) - 1);
5691 
5692     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5693 }
5694 
5695 static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5696                                      int64_t arg2)
5697 {
5698     int64_t q_prod, q_ret;
5699 
5700     int64_t q_max = DF_MAX_INT(df);
5701     int64_t q_min = DF_MIN_INT(df);
5702     int64_t r_bit = 1 << (DF_BITS(df) - 2);
5703 
5704     q_prod = arg1 * arg2;
5705     q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod + r_bit) >> (DF_BITS(df) - 1);
5706 
5707     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5708 }
5709 
5710 #define MSA_TEROP_DF(func) \
5711 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
5712                                 uint32_t ws, uint32_t wt)                     \
5713 {                                                                             \
5714     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                                \
5715     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                                \
5716     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                                \
5717                                                                               \
5718     switch (df) {                                                             \
5719     case DF_BYTE:                                                             \
5720         pwd->b[0]  = msa_ ## func ## _df(df, pwd->b[0],  pws->b[0],           \
5721                                              pwt->b[0]);                      \
5722         pwd->b[1]  = msa_ ## func ## _df(df, pwd->b[1],  pws->b[1],           \
5723                                              pwt->b[1]);                      \
5724         pwd->b[2]  = msa_ ## func ## _df(df, pwd->b[2],  pws->b[2],           \
5725                                              pwt->b[2]);                      \
5726         pwd->b[3]  = msa_ ## func ## _df(df, pwd->b[3],  pws->b[3],           \
5727                                              pwt->b[3]);                      \
5728         pwd->b[4]  = msa_ ## func ## _df(df, pwd->b[4],  pws->b[4],           \
5729                                              pwt->b[4]);                      \
5730         pwd->b[5]  = msa_ ## func ## _df(df, pwd->b[5],  pws->b[5],           \
5731                                              pwt->b[5]);                      \
5732         pwd->b[6]  = msa_ ## func ## _df(df, pwd->b[6],  pws->b[6],           \
5733                                              pwt->b[6]);                      \
5734         pwd->b[7]  = msa_ ## func ## _df(df, pwd->b[7],  pws->b[7],           \
5735                                              pwt->b[7]);                      \
5736         pwd->b[8]  = msa_ ## func ## _df(df, pwd->b[8],  pws->b[8],           \
5737                                              pwt->b[8]);                      \
5738         pwd->b[9]  = msa_ ## func ## _df(df, pwd->b[9],  pws->b[9],           \
5739                                              pwt->b[9]);                      \
5740         pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10],          \
5741                                              pwt->b[10]);                     \
5742         pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11],          \
5743                                              pwt->b[11]);                     \
5744         pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12],          \
5745                                              pwt->b[12]);                     \
5746         pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13],          \
5747                                              pwt->b[13]);                     \
5748         pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14],          \
5749                                              pwt->b[14]);                     \
5750         pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15],          \
5751                                              pwt->b[15]);                     \
5752         break;                                                                \
5753     case DF_HALF:                                                             \
5754         pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
5755         pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
5756         pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
5757         pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
5758         pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
5759         pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
5760         pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
5761         pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
5762         break;                                                                \
5763     case DF_WORD:                                                             \
5764         pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
5765         pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
5766         pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
5767         pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
5768         break;                                                                \
5769     case DF_DOUBLE:                                                           \
5770         pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
5771         pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
5772         break;                                                                \
5773     default:                                                                  \
5774         assert(0);                                                            \
5775     }                                                                         \
5776 }
5777 
5778 MSA_TEROP_DF(binsl)
5779 MSA_TEROP_DF(binsr)
5780 MSA_TEROP_DF(madd_q)
5781 MSA_TEROP_DF(msub_q)
5782 MSA_TEROP_DF(maddr_q)
5783 MSA_TEROP_DF(msubr_q)
5784 #undef MSA_TEROP_DF
5785 
5786 static inline void msa_splat_df(uint32_t df, wr_t *pwd,
5787                                 wr_t *pws, target_ulong rt)
5788 {
5789     uint32_t n = rt % DF_ELEMENTS(df);
5790     uint32_t i;
5791 
5792     switch (df) {
5793     case DF_BYTE:
5794         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5795             pwd->b[i] = pws->b[n];
5796         }
5797         break;
5798     case DF_HALF:
5799         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5800             pwd->h[i] = pws->h[n];
5801         }
5802         break;
5803     case DF_WORD:
5804         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5805             pwd->w[i] = pws->w[n];
5806         }
5807         break;
5808     case DF_DOUBLE:
5809         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5810             pwd->d[i] = pws->d[n];
5811         }
5812        break;
5813     default:
5814         assert(0);
5815     }
5816 }
5817 
5818 void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5819                          uint32_t ws, uint32_t rt)
5820 {
5821     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5822     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5823 
5824     msa_splat_df(df, pwd, pws, env->active_tc.gpr[rt]);
5825 }
5826 
5827 #define MSA_DO_B MSA_DO(b)
5828 #define MSA_DO_H MSA_DO(h)
5829 #define MSA_DO_W MSA_DO(w)
5830 #define MSA_DO_D MSA_DO(d)
5831 
5832 #define MSA_LOOP_B MSA_LOOP(B)
5833 #define MSA_LOOP_H MSA_LOOP(H)
5834 #define MSA_LOOP_W MSA_LOOP(W)
5835 #define MSA_LOOP_D MSA_LOOP(D)
5836 
5837 #define MSA_LOOP_COND_B MSA_LOOP_COND(DF_BYTE)
5838 #define MSA_LOOP_COND_H MSA_LOOP_COND(DF_HALF)
5839 #define MSA_LOOP_COND_W MSA_LOOP_COND(DF_WORD)
5840 #define MSA_LOOP_COND_D MSA_LOOP_COND(DF_DOUBLE)
5841 
5842 #define MSA_LOOP(DF) \
5843     do { \
5844         for (i = 0; i < (MSA_LOOP_COND_ ## DF) ; i++) { \
5845             MSA_DO_ ## DF; \
5846         } \
5847     } while (0)
5848 
5849 #define MSA_FN_DF(FUNC)                                             \
5850 void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5851         uint32_t ws, uint32_t wt)                                   \
5852 {                                                                   \
5853     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                      \
5854     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                      \
5855     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                      \
5856     wr_t wx, *pwx = &wx;                                            \
5857     uint32_t i;                                                     \
5858     switch (df) {                                                   \
5859     case DF_BYTE:                                                   \
5860         MSA_LOOP_B;                                                 \
5861         break;                                                      \
5862     case DF_HALF:                                                   \
5863         MSA_LOOP_H;                                                 \
5864         break;                                                      \
5865     case DF_WORD:                                                   \
5866         MSA_LOOP_W;                                                 \
5867         break;                                                      \
5868     case DF_DOUBLE:                                                 \
5869         MSA_LOOP_D;                                                 \
5870         break;                                                      \
5871     default:                                                        \
5872         assert(0);                                                  \
5873     }                                                               \
5874     msa_move_v(pwd, pwx);                                           \
5875 }
5876 
5877 #define MSA_LOOP_COND(DF) \
5878             (DF_ELEMENTS(DF) / 2)
5879 
5880 #define Rb(pwr, i) (pwr->b[i])
5881 #define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE) / 2])
5882 #define Rh(pwr, i) (pwr->h[i])
5883 #define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF) / 2])
5884 #define Rw(pwr, i) (pwr->w[i])
5885 #define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD) / 2])
5886 #define Rd(pwr, i) (pwr->d[i])
5887 #define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE) / 2])
5888 
5889 #undef MSA_LOOP_COND
5890 
5891 #define MSA_LOOP_COND(DF) \
5892             (DF_ELEMENTS(DF))
5893 
5894 #define MSA_DO(DF)                                                          \
5895     do {                                                                    \
5896         uint32_t n = DF_ELEMENTS(df);                                       \
5897         uint32_t k = (pwd->DF[i] & 0x3f) % (2 * n);                         \
5898         pwx->DF[i] =                                                        \
5899             (pwd->DF[i] & 0xc0) ? 0 : k < n ? pwt->DF[k] : pws->DF[k - n];  \
5900     } while (0)
5901 MSA_FN_DF(vshf_df)
5902 #undef MSA_DO
5903 #undef MSA_LOOP_COND
5904 #undef MSA_FN_DF
5905 
5906 
5907 void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5908                         uint32_t ws, uint32_t n)
5909 {
5910     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5911     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5912 
5913     msa_sld_df(df, pwd, pws, n);
5914 }
5915 
5916 void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5917                           uint32_t ws, uint32_t n)
5918 {
5919     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5920     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5921 
5922     msa_splat_df(df, pwd, pws, n);
5923 }
5924 
5925 void helper_msa_copy_s_b(CPUMIPSState *env, uint32_t rd,
5926                          uint32_t ws, uint32_t n)
5927 {
5928     n %= 16;
5929 #if HOST_BIG_ENDIAN
5930     if (n < 8) {
5931         n = 8 - n - 1;
5932     } else {
5933         n = 24 - n - 1;
5934     }
5935 #endif
5936     env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
5937 }
5938 
5939 void helper_msa_copy_s_h(CPUMIPSState *env, uint32_t rd,
5940                          uint32_t ws, uint32_t n)
5941 {
5942     n %= 8;
5943 #if HOST_BIG_ENDIAN
5944     if (n < 4) {
5945         n = 4 - n - 1;
5946     } else {
5947         n = 12 - n - 1;
5948     }
5949 #endif
5950     env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
5951 }
5952 
5953 void helper_msa_copy_s_w(CPUMIPSState *env, uint32_t rd,
5954                          uint32_t ws, uint32_t n)
5955 {
5956     n %= 4;
5957 #if HOST_BIG_ENDIAN
5958     if (n < 2) {
5959         n = 2 - n - 1;
5960     } else {
5961         n = 6 - n - 1;
5962     }
5963 #endif
5964     env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
5965 }
5966 
5967 void helper_msa_copy_s_d(CPUMIPSState *env, uint32_t rd,
5968                          uint32_t ws, uint32_t n)
5969 {
5970     n %= 2;
5971     env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
5972 }
5973 
5974 void helper_msa_copy_u_b(CPUMIPSState *env, uint32_t rd,
5975                          uint32_t ws, uint32_t n)
5976 {
5977     n %= 16;
5978 #if HOST_BIG_ENDIAN
5979     if (n < 8) {
5980         n = 8 - n - 1;
5981     } else {
5982         n = 24 - n - 1;
5983     }
5984 #endif
5985     env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
5986 }
5987 
5988 void helper_msa_copy_u_h(CPUMIPSState *env, uint32_t rd,
5989                          uint32_t ws, uint32_t n)
5990 {
5991     n %= 8;
5992 #if HOST_BIG_ENDIAN
5993     if (n < 4) {
5994         n = 4 - n - 1;
5995     } else {
5996         n = 12 - n - 1;
5997     }
5998 #endif
5999     env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
6000 }
6001 
6002 void helper_msa_copy_u_w(CPUMIPSState *env, uint32_t rd,
6003                          uint32_t ws, uint32_t n)
6004 {
6005     n %= 4;
6006 #if HOST_BIG_ENDIAN
6007     if (n < 2) {
6008         n = 2 - n - 1;
6009     } else {
6010         n = 6 - n - 1;
6011     }
6012 #endif
6013     env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
6014 }
6015 
6016 void helper_msa_insert_b(CPUMIPSState *env, uint32_t wd,
6017                           uint32_t rs_num, uint32_t n)
6018 {
6019     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6020     target_ulong rs = env->active_tc.gpr[rs_num];
6021     n %= 16;
6022 #if HOST_BIG_ENDIAN
6023     if (n < 8) {
6024         n = 8 - n - 1;
6025     } else {
6026         n = 24 - n - 1;
6027     }
6028 #endif
6029     pwd->b[n] = (int8_t)rs;
6030 }
6031 
6032 void helper_msa_insert_h(CPUMIPSState *env, uint32_t wd,
6033                           uint32_t rs_num, uint32_t n)
6034 {
6035     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6036     target_ulong rs = env->active_tc.gpr[rs_num];
6037     n %= 8;
6038 #if HOST_BIG_ENDIAN
6039     if (n < 4) {
6040         n = 4 - n - 1;
6041     } else {
6042         n = 12 - n - 1;
6043     }
6044 #endif
6045     pwd->h[n] = (int16_t)rs;
6046 }
6047 
6048 void helper_msa_insert_w(CPUMIPSState *env, uint32_t wd,
6049                           uint32_t rs_num, uint32_t n)
6050 {
6051     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6052     target_ulong rs = env->active_tc.gpr[rs_num];
6053     n %= 4;
6054 #if HOST_BIG_ENDIAN
6055     if (n < 2) {
6056         n = 2 - n - 1;
6057     } else {
6058         n = 6 - n - 1;
6059     }
6060 #endif
6061     pwd->w[n] = (int32_t)rs;
6062 }
6063 
6064 void helper_msa_insert_d(CPUMIPSState *env, uint32_t wd,
6065                           uint32_t rs_num, uint32_t n)
6066 {
6067     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6068     target_ulong rs = env->active_tc.gpr[rs_num];
6069     n %= 2;
6070     pwd->d[n] = (int64_t)rs;
6071 }
6072 
6073 void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6074                          uint32_t ws, uint32_t n)
6075 {
6076     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6077     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6078 
6079     switch (df) {
6080     case DF_BYTE:
6081         pwd->b[n] = (int8_t)pws->b[0];
6082         break;
6083     case DF_HALF:
6084         pwd->h[n] = (int16_t)pws->h[0];
6085         break;
6086     case DF_WORD:
6087         pwd->w[n] = (int32_t)pws->w[0];
6088         break;
6089     case DF_DOUBLE:
6090         pwd->d[n] = (int64_t)pws->d[0];
6091         break;
6092     default:
6093         assert(0);
6094     }
6095 }
6096 
6097 void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
6098 {
6099     switch (cd) {
6100     case 0:
6101         break;
6102     case 1:
6103         env->active_tc.msacsr = (int32_t)elm & MSACSR_MASK;
6104         restore_msa_fp_status(env);
6105         /* check exception */
6106         if ((GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)
6107             & GET_FP_CAUSE(env->active_tc.msacsr)) {
6108             do_raise_exception(env, EXCP_MSAFPE, GETPC());
6109         }
6110         break;
6111     }
6112 }
6113 
6114 target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
6115 {
6116     switch (cs) {
6117     case 0:
6118         return env->msair;
6119     case 1:
6120         return env->active_tc.msacsr & MSACSR_MASK;
6121     }
6122     return 0;
6123 }
6124 
6125 void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6126                         uint32_t rs)
6127 {
6128     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6129     uint32_t i;
6130 
6131     switch (df) {
6132     case DF_BYTE:
6133         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
6134             pwd->b[i] = (int8_t)env->active_tc.gpr[rs];
6135         }
6136         break;
6137     case DF_HALF:
6138         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
6139             pwd->h[i] = (int16_t)env->active_tc.gpr[rs];
6140         }
6141         break;
6142     case DF_WORD:
6143         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6144             pwd->w[i] = (int32_t)env->active_tc.gpr[rs];
6145         }
6146         break;
6147     case DF_DOUBLE:
6148         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6149             pwd->d[i] = (int64_t)env->active_tc.gpr[rs];
6150         }
6151        break;
6152     default:
6153         assert(0);
6154     }
6155 }
6156 
6157 
6158 #define FLOAT_ONE32 make_float32(0x3f8 << 20)
6159 #define FLOAT_ONE64 make_float64(0x3ffULL << 52)
6160 
6161 #define FLOAT_SNAN16(s) (float16_default_nan(s) ^ 0x0220)
6162         /* 0x7c20 */
6163 #define FLOAT_SNAN32(s) (float32_default_nan(s) ^ 0x00400020)
6164         /* 0x7f800020 */
6165 #define FLOAT_SNAN64(s) (float64_default_nan(s) ^ 0x0008000000000020ULL)
6166         /* 0x7ff0000000000020 */
6167 
6168 static inline void clear_msacsr_cause(CPUMIPSState *env)
6169 {
6170     SET_FP_CAUSE(env->active_tc.msacsr, 0);
6171 }
6172 
6173 static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr)
6174 {
6175     if ((GET_FP_CAUSE(env->active_tc.msacsr) &
6176             (GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)) == 0) {
6177         UPDATE_FP_FLAGS(env->active_tc.msacsr,
6178                 GET_FP_CAUSE(env->active_tc.msacsr));
6179     } else {
6180         do_raise_exception(env, EXCP_MSAFPE, retaddr);
6181     }
6182 }
6183 
6184 /* Flush-to-zero use cases for update_msacsr() */
6185 #define CLEAR_FS_UNDERFLOW 1
6186 #define CLEAR_IS_INEXACT   2
6187 #define RECIPROCAL_INEXACT 4
6188 
6189 
6190 static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt)
6191 {
6192     int mips_xcpt = 0;
6193 
6194     if (ieee_xcpt & float_flag_invalid) {
6195         mips_xcpt |= FP_INVALID;
6196     }
6197     if (ieee_xcpt & float_flag_overflow) {
6198         mips_xcpt |= FP_OVERFLOW;
6199     }
6200     if (ieee_xcpt & float_flag_underflow) {
6201         mips_xcpt |= FP_UNDERFLOW;
6202     }
6203     if (ieee_xcpt & float_flag_divbyzero) {
6204         mips_xcpt |= FP_DIV0;
6205     }
6206     if (ieee_xcpt & float_flag_inexact) {
6207         mips_xcpt |= FP_INEXACT;
6208     }
6209 
6210     return mips_xcpt;
6211 }
6212 
6213 static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
6214 {
6215     int ieee_exception_flags;
6216     int mips_exception_flags = 0;
6217     int cause;
6218     int enable;
6219 
6220     ieee_exception_flags = get_float_exception_flags(
6221                                &env->active_tc.msa_fp_status);
6222 
6223     /* QEMU softfloat does not signal all underflow cases */
6224     if (denormal) {
6225         ieee_exception_flags |= float_flag_underflow;
6226     }
6227     if (ieee_exception_flags) {
6228         mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags);
6229     }
6230     enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6231 
6232     /* Set Inexact (I) when flushing inputs to zero */
6233     if ((ieee_exception_flags & float_flag_input_denormal) &&
6234             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6235         if (action & CLEAR_IS_INEXACT) {
6236             mips_exception_flags &= ~FP_INEXACT;
6237         } else {
6238             mips_exception_flags |= FP_INEXACT;
6239         }
6240     }
6241 
6242     /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
6243     if ((ieee_exception_flags & float_flag_output_denormal) &&
6244             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6245         mips_exception_flags |= FP_INEXACT;
6246         if (action & CLEAR_FS_UNDERFLOW) {
6247             mips_exception_flags &= ~FP_UNDERFLOW;
6248         } else {
6249             mips_exception_flags |= FP_UNDERFLOW;
6250         }
6251     }
6252 
6253     /* Set Inexact (I) when Overflow (O) is not enabled */
6254     if ((mips_exception_flags & FP_OVERFLOW) != 0 &&
6255            (enable & FP_OVERFLOW) == 0) {
6256         mips_exception_flags |= FP_INEXACT;
6257     }
6258 
6259     /* Clear Exact Underflow when Underflow (U) is not enabled */
6260     if ((mips_exception_flags & FP_UNDERFLOW) != 0 &&
6261            (enable & FP_UNDERFLOW) == 0 &&
6262            (mips_exception_flags & FP_INEXACT) == 0) {
6263         mips_exception_flags &= ~FP_UNDERFLOW;
6264     }
6265 
6266     /*
6267      * Reciprocal operations set only Inexact when valid and not
6268      * divide by zero
6269      */
6270     if ((action & RECIPROCAL_INEXACT) &&
6271             (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) {
6272         mips_exception_flags = FP_INEXACT;
6273     }
6274 
6275     cause = mips_exception_flags & enable; /* all current enabled exceptions */
6276 
6277     if (cause == 0) {
6278         /*
6279          * No enabled exception, update the MSACSR Cause
6280          * with all current exceptions
6281          */
6282         SET_FP_CAUSE(env->active_tc.msacsr,
6283             (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6284     } else {
6285         /* Current exceptions are enabled */
6286         if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) {
6287             /*
6288              * Exception(s) will trap, update MSACSR Cause
6289              * with all enabled exceptions
6290              */
6291             SET_FP_CAUSE(env->active_tc.msacsr,
6292                 (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6293         }
6294     }
6295 
6296     return mips_exception_flags;
6297 }
6298 
6299 static inline int get_enabled_exceptions(const CPUMIPSState *env, int c)
6300 {
6301     int enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6302     return c & enable;
6303 }
6304 
6305 static inline float16 float16_from_float32(int32_t a, bool ieee,
6306                                            float_status *status)
6307 {
6308       float16 f_val;
6309 
6310       f_val = float32_to_float16((float32)a, ieee, status);
6311 
6312       return a < 0 ? (f_val | (1 << 15)) : f_val;
6313 }
6314 
6315 static inline float32 float32_from_float64(int64_t a, float_status *status)
6316 {
6317       float32 f_val;
6318 
6319       f_val = float64_to_float32((float64)a, status);
6320 
6321       return a < 0 ? (f_val | (1 << 31)) : f_val;
6322 }
6323 
6324 static inline float32 float32_from_float16(int16_t a, bool ieee,
6325                                            float_status *status)
6326 {
6327       float32 f_val;
6328 
6329       f_val = float16_to_float32((float16)a, ieee, status);
6330 
6331       return a < 0 ? (f_val | (1 << 31)) : f_val;
6332 }
6333 
6334 static inline float64 float64_from_float32(int32_t a, float_status *status)
6335 {
6336       float64 f_val;
6337 
6338       f_val = float32_to_float64((float64)a, status);
6339 
6340       return a < 0 ? (f_val | (1ULL << 63)) : f_val;
6341 }
6342 
6343 static inline float32 float32_from_q16(int16_t a, float_status *status)
6344 {
6345     float32 f_val;
6346 
6347     /* conversion as integer and scaling */
6348     f_val = int32_to_float32(a, status);
6349     f_val = float32_scalbn(f_val, -15, status);
6350 
6351     return f_val;
6352 }
6353 
6354 static inline float64 float64_from_q32(int32_t a, float_status *status)
6355 {
6356     float64 f_val;
6357 
6358     /* conversion as integer and scaling */
6359     f_val = int32_to_float64(a, status);
6360     f_val = float64_scalbn(f_val, -31, status);
6361 
6362     return f_val;
6363 }
6364 
6365 static inline int16_t float32_to_q16(float32 a, float_status *status)
6366 {
6367     int32_t q_val;
6368     int32_t q_min = 0xffff8000;
6369     int32_t q_max = 0x00007fff;
6370 
6371     int ieee_ex;
6372 
6373     if (float32_is_any_nan(a)) {
6374         float_raise(float_flag_invalid, status);
6375         return 0;
6376     }
6377 
6378     /* scaling */
6379     a = float32_scalbn(a, 15, status);
6380 
6381     ieee_ex = get_float_exception_flags(status);
6382     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6383                              , status);
6384 
6385     if (ieee_ex & float_flag_overflow) {
6386         float_raise(float_flag_inexact, status);
6387         return (int32_t)a < 0 ? q_min : q_max;
6388     }
6389 
6390     /* conversion to int */
6391     q_val = float32_to_int32(a, status);
6392 
6393     ieee_ex = get_float_exception_flags(status);
6394     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6395                              , status);
6396 
6397     if (ieee_ex & float_flag_invalid) {
6398         set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6399                                , status);
6400         float_raise(float_flag_overflow | float_flag_inexact, status);
6401         return (int32_t)a < 0 ? q_min : q_max;
6402     }
6403 
6404     if (q_val < q_min) {
6405         float_raise(float_flag_overflow | float_flag_inexact, status);
6406         return (int16_t)q_min;
6407     }
6408 
6409     if (q_max < q_val) {
6410         float_raise(float_flag_overflow | float_flag_inexact, status);
6411         return (int16_t)q_max;
6412     }
6413 
6414     return (int16_t)q_val;
6415 }
6416 
6417 static inline int32_t float64_to_q32(float64 a, float_status *status)
6418 {
6419     int64_t q_val;
6420     int64_t q_min = 0xffffffff80000000LL;
6421     int64_t q_max = 0x000000007fffffffLL;
6422 
6423     int ieee_ex;
6424 
6425     if (float64_is_any_nan(a)) {
6426         float_raise(float_flag_invalid, status);
6427         return 0;
6428     }
6429 
6430     /* scaling */
6431     a = float64_scalbn(a, 31, status);
6432 
6433     ieee_ex = get_float_exception_flags(status);
6434     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6435            , status);
6436 
6437     if (ieee_ex & float_flag_overflow) {
6438         float_raise(float_flag_inexact, status);
6439         return (int64_t)a < 0 ? q_min : q_max;
6440     }
6441 
6442     /* conversion to integer */
6443     q_val = float64_to_int64(a, status);
6444 
6445     ieee_ex = get_float_exception_flags(status);
6446     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6447            , status);
6448 
6449     if (ieee_ex & float_flag_invalid) {
6450         set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6451                , status);
6452         float_raise(float_flag_overflow | float_flag_inexact, status);
6453         return (int64_t)a < 0 ? q_min : q_max;
6454     }
6455 
6456     if (q_val < q_min) {
6457         float_raise(float_flag_overflow | float_flag_inexact, status);
6458         return (int32_t)q_min;
6459     }
6460 
6461     if (q_max < q_val) {
6462         float_raise(float_flag_overflow | float_flag_inexact, status);
6463         return (int32_t)q_max;
6464     }
6465 
6466     return (int32_t)q_val;
6467 }
6468 
6469 #define MSA_FLOAT_COND(DEST, OP, ARG1, ARG2, BITS, QUIET)                   \
6470     do {                                                                    \
6471         float_status *status = &env->active_tc.msa_fp_status;               \
6472         int c;                                                              \
6473         int64_t cond;                                                       \
6474         set_float_exception_flags(0, status);                               \
6475         if (!QUIET) {                                                       \
6476             cond = float ## BITS ## _ ## OP(ARG1, ARG2, status);            \
6477         } else {                                                            \
6478             cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2, status);  \
6479         }                                                                   \
6480         DEST = cond ? M_MAX_UINT(BITS) : 0;                                 \
6481         c = update_msacsr(env, CLEAR_IS_INEXACT, 0);                        \
6482                                                                             \
6483         if (get_enabled_exceptions(env, c)) {                               \
6484             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
6485         }                                                                   \
6486     } while (0)
6487 
6488 #define MSA_FLOAT_AF(DEST, ARG1, ARG2, BITS, QUIET)                 \
6489     do {                                                            \
6490         MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);          \
6491         if ((DEST & M_MAX_UINT(BITS)) == M_MAX_UINT(BITS)) {        \
6492             DEST = 0;                                               \
6493         }                                                           \
6494     } while (0)
6495 
6496 #define MSA_FLOAT_UEQ(DEST, ARG1, ARG2, BITS, QUIET)                \
6497     do {                                                            \
6498         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6499         if (DEST == 0) {                                            \
6500             MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);      \
6501         }                                                           \
6502     } while (0)
6503 
6504 #define MSA_FLOAT_NE(DEST, ARG1, ARG2, BITS, QUIET)                 \
6505     do {                                                            \
6506         MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);          \
6507         if (DEST == 0) {                                            \
6508             MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);      \
6509         }                                                           \
6510     } while (0)
6511 
6512 #define MSA_FLOAT_UNE(DEST, ARG1, ARG2, BITS, QUIET)                \
6513     do {                                                            \
6514         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6515         if (DEST == 0) {                                            \
6516             MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6517             if (DEST == 0) {                                        \
6518                 MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);  \
6519             }                                                       \
6520         }                                                           \
6521     } while (0)
6522 
6523 #define MSA_FLOAT_ULE(DEST, ARG1, ARG2, BITS, QUIET)                \
6524     do {                                                            \
6525         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6526         if (DEST == 0) {                                            \
6527             MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);      \
6528         }                                                           \
6529     } while (0)
6530 
6531 #define MSA_FLOAT_ULT(DEST, ARG1, ARG2, BITS, QUIET)                \
6532     do {                                                            \
6533         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6534         if (DEST == 0) {                                            \
6535             MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6536         }                                                           \
6537     } while (0)
6538 
6539 #define MSA_FLOAT_OR(DEST, ARG1, ARG2, BITS, QUIET)                 \
6540     do {                                                            \
6541         MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);          \
6542         if (DEST == 0) {                                            \
6543             MSA_FLOAT_COND(DEST, le, ARG2, ARG1, BITS, QUIET);      \
6544         }                                                           \
6545     } while (0)
6546 
6547 static inline void compare_af(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6548                               wr_t *pwt, uint32_t df, int quiet,
6549                               uintptr_t retaddr)
6550 {
6551     wr_t wx, *pwx = &wx;
6552     uint32_t i;
6553 
6554     clear_msacsr_cause(env);
6555 
6556     switch (df) {
6557     case DF_WORD:
6558         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6559             MSA_FLOAT_AF(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6560         }
6561         break;
6562     case DF_DOUBLE:
6563         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6564             MSA_FLOAT_AF(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6565         }
6566         break;
6567     default:
6568         assert(0);
6569     }
6570 
6571     check_msacsr_cause(env, retaddr);
6572 
6573     msa_move_v(pwd, pwx);
6574 }
6575 
6576 static inline void compare_un(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6577                               wr_t *pwt, uint32_t df, int quiet,
6578                               uintptr_t retaddr)
6579 {
6580     wr_t wx, *pwx = &wx;
6581     uint32_t i;
6582 
6583     clear_msacsr_cause(env);
6584 
6585     switch (df) {
6586     case DF_WORD:
6587         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6588             MSA_FLOAT_COND(pwx->w[i], unordered, pws->w[i], pwt->w[i], 32,
6589                     quiet);
6590         }
6591         break;
6592     case DF_DOUBLE:
6593         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6594             MSA_FLOAT_COND(pwx->d[i], unordered, pws->d[i], pwt->d[i], 64,
6595                     quiet);
6596         }
6597         break;
6598     default:
6599         assert(0);
6600     }
6601 
6602     check_msacsr_cause(env, retaddr);
6603 
6604     msa_move_v(pwd, pwx);
6605 }
6606 
6607 static inline void compare_eq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6608                               wr_t *pwt, uint32_t df, int quiet,
6609                               uintptr_t retaddr)
6610 {
6611     wr_t wx, *pwx = &wx;
6612     uint32_t i;
6613 
6614     clear_msacsr_cause(env);
6615 
6616     switch (df) {
6617     case DF_WORD:
6618         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6619             MSA_FLOAT_COND(pwx->w[i], eq, pws->w[i], pwt->w[i], 32, quiet);
6620         }
6621         break;
6622     case DF_DOUBLE:
6623         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6624             MSA_FLOAT_COND(pwx->d[i], eq, pws->d[i], pwt->d[i], 64, quiet);
6625         }
6626         break;
6627     default:
6628         assert(0);
6629     }
6630 
6631     check_msacsr_cause(env, retaddr);
6632 
6633     msa_move_v(pwd, pwx);
6634 }
6635 
6636 static inline void compare_ueq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6637                                wr_t *pwt, uint32_t df, int quiet,
6638                                uintptr_t retaddr)
6639 {
6640     wr_t wx, *pwx = &wx;
6641     uint32_t i;
6642 
6643     clear_msacsr_cause(env);
6644 
6645     switch (df) {
6646     case DF_WORD:
6647         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6648             MSA_FLOAT_UEQ(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6649         }
6650         break;
6651     case DF_DOUBLE:
6652         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6653             MSA_FLOAT_UEQ(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6654         }
6655         break;
6656     default:
6657         assert(0);
6658     }
6659 
6660     check_msacsr_cause(env, retaddr);
6661 
6662     msa_move_v(pwd, pwx);
6663 }
6664 
6665 static inline void compare_lt(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6666                               wr_t *pwt, uint32_t df, int quiet,
6667                               uintptr_t retaddr)
6668 {
6669     wr_t wx, *pwx = &wx;
6670     uint32_t i;
6671 
6672     clear_msacsr_cause(env);
6673 
6674     switch (df) {
6675     case DF_WORD:
6676         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6677             MSA_FLOAT_COND(pwx->w[i], lt, pws->w[i], pwt->w[i], 32, quiet);
6678         }
6679         break;
6680     case DF_DOUBLE:
6681         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6682             MSA_FLOAT_COND(pwx->d[i], lt, pws->d[i], pwt->d[i], 64, quiet);
6683         }
6684         break;
6685     default:
6686         assert(0);
6687     }
6688 
6689     check_msacsr_cause(env, retaddr);
6690 
6691     msa_move_v(pwd, pwx);
6692 }
6693 
6694 static inline void compare_ult(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6695                                wr_t *pwt, uint32_t df, int quiet,
6696                                uintptr_t retaddr)
6697 {
6698     wr_t wx, *pwx = &wx;
6699     uint32_t i;
6700 
6701     clear_msacsr_cause(env);
6702 
6703     switch (df) {
6704     case DF_WORD:
6705         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6706             MSA_FLOAT_ULT(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6707         }
6708         break;
6709     case DF_DOUBLE:
6710         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6711             MSA_FLOAT_ULT(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6712         }
6713         break;
6714     default:
6715         assert(0);
6716     }
6717 
6718     check_msacsr_cause(env, retaddr);
6719 
6720     msa_move_v(pwd, pwx);
6721 }
6722 
6723 static inline void compare_le(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6724                               wr_t *pwt, uint32_t df, int quiet,
6725                               uintptr_t retaddr)
6726 {
6727     wr_t wx, *pwx = &wx;
6728     uint32_t i;
6729 
6730     clear_msacsr_cause(env);
6731 
6732     switch (df) {
6733     case DF_WORD:
6734         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6735             MSA_FLOAT_COND(pwx->w[i], le, pws->w[i], pwt->w[i], 32, quiet);
6736         }
6737         break;
6738     case DF_DOUBLE:
6739         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6740             MSA_FLOAT_COND(pwx->d[i], le, pws->d[i], pwt->d[i], 64, quiet);
6741         }
6742         break;
6743     default:
6744         assert(0);
6745     }
6746 
6747     check_msacsr_cause(env, retaddr);
6748 
6749     msa_move_v(pwd, pwx);
6750 }
6751 
6752 static inline void compare_ule(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6753                                wr_t *pwt, uint32_t df, int quiet,
6754                                uintptr_t retaddr)
6755 {
6756     wr_t wx, *pwx = &wx;
6757     uint32_t i;
6758 
6759     clear_msacsr_cause(env);
6760 
6761     switch (df) {
6762     case DF_WORD:
6763         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6764             MSA_FLOAT_ULE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6765         }
6766         break;
6767     case DF_DOUBLE:
6768         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6769             MSA_FLOAT_ULE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6770         }
6771         break;
6772     default:
6773         assert(0);
6774     }
6775 
6776     check_msacsr_cause(env, retaddr);
6777 
6778     msa_move_v(pwd, pwx);
6779 }
6780 
6781 static inline void compare_or(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6782                               wr_t *pwt, uint32_t df, int quiet,
6783                               uintptr_t retaddr)
6784 {
6785     wr_t wx, *pwx = &wx;
6786     uint32_t i;
6787 
6788     clear_msacsr_cause(env);
6789 
6790     switch (df) {
6791     case DF_WORD:
6792         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6793             MSA_FLOAT_OR(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6794         }
6795         break;
6796     case DF_DOUBLE:
6797         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6798             MSA_FLOAT_OR(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6799         }
6800         break;
6801     default:
6802         assert(0);
6803     }
6804 
6805     check_msacsr_cause(env, retaddr);
6806 
6807     msa_move_v(pwd, pwx);
6808 }
6809 
6810 static inline void compare_une(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6811                                wr_t *pwt, uint32_t df, int quiet,
6812                                uintptr_t retaddr)
6813 {
6814     wr_t wx, *pwx = &wx;
6815     uint32_t i;
6816 
6817     clear_msacsr_cause(env);
6818 
6819     switch (df) {
6820     case DF_WORD:
6821         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6822             MSA_FLOAT_UNE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6823         }
6824         break;
6825     case DF_DOUBLE:
6826         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6827             MSA_FLOAT_UNE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6828         }
6829         break;
6830     default:
6831         assert(0);
6832     }
6833 
6834     check_msacsr_cause(env, retaddr);
6835 
6836     msa_move_v(pwd, pwx);
6837 }
6838 
6839 static inline void compare_ne(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6840                               wr_t *pwt, uint32_t df, int quiet,
6841                               uintptr_t retaddr)
6842 {
6843     wr_t wx, *pwx = &wx;
6844     uint32_t i;
6845 
6846     clear_msacsr_cause(env);
6847 
6848     switch (df) {
6849     case DF_WORD:
6850         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6851             MSA_FLOAT_NE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6852         }
6853         break;
6854     case DF_DOUBLE:
6855         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6856             MSA_FLOAT_NE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6857         }
6858         break;
6859     default:
6860         assert(0);
6861     }
6862 
6863     check_msacsr_cause(env, retaddr);
6864 
6865     msa_move_v(pwd, pwx);
6866 }
6867 
6868 void helper_msa_fcaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6869                         uint32_t ws, uint32_t wt)
6870 {
6871     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6872     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6873     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6874     compare_af(env, pwd, pws, pwt, df, 1, GETPC());
6875 }
6876 
6877 void helper_msa_fcun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6878                         uint32_t ws, uint32_t wt)
6879 {
6880     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6881     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6882     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6883     compare_un(env, pwd, pws, pwt, df, 1, GETPC());
6884 }
6885 
6886 void helper_msa_fceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6887                         uint32_t ws, uint32_t wt)
6888 {
6889     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6890     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6891     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6892     compare_eq(env, pwd, pws, pwt, df, 1, GETPC());
6893 }
6894 
6895 void helper_msa_fcueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6896                          uint32_t ws, uint32_t wt)
6897 {
6898     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6899     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6900     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6901     compare_ueq(env, pwd, pws, pwt, df, 1, GETPC());
6902 }
6903 
6904 void helper_msa_fclt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6905                         uint32_t ws, uint32_t wt)
6906 {
6907     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6908     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6909     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6910     compare_lt(env, pwd, pws, pwt, df, 1, GETPC());
6911 }
6912 
6913 void helper_msa_fcult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6914                          uint32_t ws, uint32_t wt)
6915 {
6916     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6917     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6918     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6919     compare_ult(env, pwd, pws, pwt, df, 1, GETPC());
6920 }
6921 
6922 void helper_msa_fcle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6923                         uint32_t ws, uint32_t wt)
6924 {
6925     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6926     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6927     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6928     compare_le(env, pwd, pws, pwt, df, 1, GETPC());
6929 }
6930 
6931 void helper_msa_fcule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6932                          uint32_t ws, uint32_t wt)
6933 {
6934     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6935     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6936     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6937     compare_ule(env, pwd, pws, pwt, df, 1, GETPC());
6938 }
6939 
6940 void helper_msa_fsaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6941                         uint32_t ws, uint32_t wt)
6942 {
6943     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6944     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6945     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6946     compare_af(env, pwd, pws, pwt, df, 0, GETPC());
6947 }
6948 
6949 void helper_msa_fsun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6950                         uint32_t ws, uint32_t wt)
6951 {
6952     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6953     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6954     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6955     compare_un(env, pwd, pws, pwt, df, 0, GETPC());
6956 }
6957 
6958 void helper_msa_fseq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6959                         uint32_t ws, uint32_t wt)
6960 {
6961     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6962     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6963     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6964     compare_eq(env, pwd, pws, pwt, df, 0, GETPC());
6965 }
6966 
6967 void helper_msa_fsueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6968                          uint32_t ws, uint32_t wt)
6969 {
6970     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6971     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6972     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6973     compare_ueq(env, pwd, pws, pwt, df, 0, GETPC());
6974 }
6975 
6976 void helper_msa_fslt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6977                         uint32_t ws, uint32_t wt)
6978 {
6979     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6980     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6981     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6982     compare_lt(env, pwd, pws, pwt, df, 0, GETPC());
6983 }
6984 
6985 void helper_msa_fsult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6986                          uint32_t ws, uint32_t wt)
6987 {
6988     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6989     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6990     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6991     compare_ult(env, pwd, pws, pwt, df, 0, GETPC());
6992 }
6993 
6994 void helper_msa_fsle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6995                         uint32_t ws, uint32_t wt)
6996 {
6997     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6998     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6999     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7000     compare_le(env, pwd, pws, pwt, df, 0, GETPC());
7001 }
7002 
7003 void helper_msa_fsule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7004                          uint32_t ws, uint32_t wt)
7005 {
7006     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7007     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7008     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7009     compare_ule(env, pwd, pws, pwt, df, 0, GETPC());
7010 }
7011 
7012 void helper_msa_fcor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7013                         uint32_t ws, uint32_t wt)
7014 {
7015     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7016     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7017     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7018     compare_or(env, pwd, pws, pwt, df, 1, GETPC());
7019 }
7020 
7021 void helper_msa_fcune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7022                          uint32_t ws, uint32_t wt)
7023 {
7024     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7025     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7026     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7027     compare_une(env, pwd, pws, pwt, df, 1, GETPC());
7028 }
7029 
7030 void helper_msa_fcne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7031                         uint32_t ws, uint32_t wt)
7032 {
7033     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7034     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7035     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7036     compare_ne(env, pwd, pws, pwt, df, 1, GETPC());
7037 }
7038 
7039 void helper_msa_fsor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7040                         uint32_t ws, uint32_t wt)
7041 {
7042     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7043     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7044     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7045     compare_or(env, pwd, pws, pwt, df, 0, GETPC());
7046 }
7047 
7048 void helper_msa_fsune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7049                          uint32_t ws, uint32_t wt)
7050 {
7051     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7052     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7053     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7054     compare_une(env, pwd, pws, pwt, df, 0, GETPC());
7055 }
7056 
7057 void helper_msa_fsne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7058                         uint32_t ws, uint32_t wt)
7059 {
7060     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7061     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7062     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7063     compare_ne(env, pwd, pws, pwt, df, 0, GETPC());
7064 }
7065 
7066 #define float16_is_zero(ARG) 0
7067 #define float16_is_zero_or_denormal(ARG) 0
7068 
7069 #define IS_DENORMAL(ARG, BITS)                      \
7070     (!float ## BITS ## _is_zero(ARG)                \
7071     && float ## BITS ## _is_zero_or_denormal(ARG))
7072 
7073 #define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
7074     do {                                                                    \
7075         float_status *status = &env->active_tc.msa_fp_status;               \
7076         int c;                                                              \
7077                                                                             \
7078         set_float_exception_flags(0, status);                               \
7079         DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
7080         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7081                                                                             \
7082         if (get_enabled_exceptions(env, c)) {                               \
7083             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7084         }                                                                   \
7085     } while (0)
7086 
7087 void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7088         uint32_t ws, uint32_t wt)
7089 {
7090     wr_t wx, *pwx = &wx;
7091     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7092     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7093     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7094     uint32_t i;
7095 
7096     clear_msacsr_cause(env);
7097 
7098     switch (df) {
7099     case DF_WORD:
7100         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7101             MSA_FLOAT_BINOP(pwx->w[i], add, pws->w[i], pwt->w[i], 32);
7102         }
7103         break;
7104     case DF_DOUBLE:
7105         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7106             MSA_FLOAT_BINOP(pwx->d[i], add, pws->d[i], pwt->d[i], 64);
7107         }
7108         break;
7109     default:
7110         assert(0);
7111     }
7112 
7113     check_msacsr_cause(env, GETPC());
7114     msa_move_v(pwd, pwx);
7115 }
7116 
7117 void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7118         uint32_t ws, uint32_t wt)
7119 {
7120     wr_t wx, *pwx = &wx;
7121     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7122     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7123     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7124     uint32_t i;
7125 
7126     clear_msacsr_cause(env);
7127 
7128     switch (df) {
7129     case DF_WORD:
7130         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7131             MSA_FLOAT_BINOP(pwx->w[i], sub, pws->w[i], pwt->w[i], 32);
7132         }
7133         break;
7134     case DF_DOUBLE:
7135         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7136             MSA_FLOAT_BINOP(pwx->d[i], sub, pws->d[i], pwt->d[i], 64);
7137         }
7138         break;
7139     default:
7140         assert(0);
7141     }
7142 
7143     check_msacsr_cause(env, GETPC());
7144     msa_move_v(pwd, pwx);
7145 }
7146 
7147 void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7148         uint32_t ws, uint32_t wt)
7149 {
7150     wr_t wx, *pwx = &wx;
7151     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7152     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7153     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7154     uint32_t i;
7155 
7156     clear_msacsr_cause(env);
7157 
7158     switch (df) {
7159     case DF_WORD:
7160         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7161             MSA_FLOAT_BINOP(pwx->w[i], mul, pws->w[i], pwt->w[i], 32);
7162         }
7163         break;
7164     case DF_DOUBLE:
7165         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7166             MSA_FLOAT_BINOP(pwx->d[i], mul, pws->d[i], pwt->d[i], 64);
7167         }
7168         break;
7169     default:
7170         assert(0);
7171     }
7172 
7173     check_msacsr_cause(env, GETPC());
7174 
7175     msa_move_v(pwd, pwx);
7176 }
7177 
7178 void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7179         uint32_t ws, uint32_t wt)
7180 {
7181     wr_t wx, *pwx = &wx;
7182     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7183     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7184     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7185     uint32_t i;
7186 
7187     clear_msacsr_cause(env);
7188 
7189     switch (df) {
7190     case DF_WORD:
7191         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7192             MSA_FLOAT_BINOP(pwx->w[i], div, pws->w[i], pwt->w[i], 32);
7193         }
7194         break;
7195     case DF_DOUBLE:
7196         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7197             MSA_FLOAT_BINOP(pwx->d[i], div, pws->d[i], pwt->d[i], 64);
7198         }
7199         break;
7200     default:
7201         assert(0);
7202     }
7203 
7204     check_msacsr_cause(env, GETPC());
7205 
7206     msa_move_v(pwd, pwx);
7207 }
7208 
7209 #define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
7210     do {                                                                    \
7211         float_status *status = &env->active_tc.msa_fp_status;               \
7212         int c;                                                              \
7213                                                                             \
7214         set_float_exception_flags(0, status);                               \
7215         DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE, status);  \
7216         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7217                                                                             \
7218         if (get_enabled_exceptions(env, c)) {                               \
7219             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7220         }                                                                   \
7221     } while (0)
7222 
7223 void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7224         uint32_t ws, uint32_t wt)
7225 {
7226     wr_t wx, *pwx = &wx;
7227     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7228     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7229     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7230     uint32_t i;
7231 
7232     clear_msacsr_cause(env);
7233 
7234     switch (df) {
7235     case DF_WORD:
7236         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7237             MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7238                            pws->w[i], pwt->w[i], 0, 32);
7239         }
7240         break;
7241     case DF_DOUBLE:
7242         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7243             MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7244                            pws->d[i], pwt->d[i], 0, 64);
7245         }
7246         break;
7247     default:
7248         assert(0);
7249     }
7250 
7251     check_msacsr_cause(env, GETPC());
7252 
7253     msa_move_v(pwd, pwx);
7254 }
7255 
7256 void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7257         uint32_t ws, uint32_t wt)
7258 {
7259     wr_t wx, *pwx = &wx;
7260     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7261     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7262     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7263     uint32_t i;
7264 
7265     clear_msacsr_cause(env);
7266 
7267     switch (df) {
7268     case DF_WORD:
7269         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7270             MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7271                            pws->w[i], pwt->w[i],
7272                            float_muladd_negate_product, 32);
7273       }
7274       break;
7275     case DF_DOUBLE:
7276         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7277             MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7278                            pws->d[i], pwt->d[i],
7279                            float_muladd_negate_product, 64);
7280         }
7281         break;
7282     default:
7283         assert(0);
7284     }
7285 
7286     check_msacsr_cause(env, GETPC());
7287 
7288     msa_move_v(pwd, pwx);
7289 }
7290 
7291 void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7292         uint32_t ws, uint32_t wt)
7293 {
7294     wr_t wx, *pwx = &wx;
7295     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7296     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7297     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7298     uint32_t i;
7299 
7300     clear_msacsr_cause(env);
7301 
7302     switch (df) {
7303     case DF_WORD:
7304         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7305             MSA_FLOAT_BINOP(pwx->w[i], scalbn, pws->w[i],
7306                             pwt->w[i] >  0x200 ?  0x200 :
7307                             pwt->w[i] < -0x200 ? -0x200 : pwt->w[i],
7308                             32);
7309         }
7310         break;
7311     case DF_DOUBLE:
7312         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7313             MSA_FLOAT_BINOP(pwx->d[i], scalbn, pws->d[i],
7314                             pwt->d[i] >  0x1000 ?  0x1000 :
7315                             pwt->d[i] < -0x1000 ? -0x1000 : pwt->d[i],
7316                             64);
7317         }
7318         break;
7319     default:
7320         assert(0);
7321     }
7322 
7323     check_msacsr_cause(env, GETPC());
7324 
7325     msa_move_v(pwd, pwx);
7326 }
7327 
7328 #define MSA_FLOAT_UNOP(DEST, OP, ARG, BITS)                                 \
7329     do {                                                                    \
7330         float_status *status = &env->active_tc.msa_fp_status;               \
7331         int c;                                                              \
7332                                                                             \
7333         set_float_exception_flags(0, status);                               \
7334         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7335         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7336                                                                             \
7337         if (get_enabled_exceptions(env, c)) {                               \
7338             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7339         }                                                                   \
7340     } while (0)
7341 
7342 void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7343                          uint32_t ws, uint32_t wt)
7344 {
7345     wr_t wx, *pwx = &wx;
7346     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7347     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7348     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7349     uint32_t i;
7350 
7351     clear_msacsr_cause(env);
7352 
7353     switch (df) {
7354     case DF_WORD:
7355         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7356             /*
7357              * Half precision floats come in two formats: standard
7358              * IEEE and "ARM" format.  The latter gains extra exponent
7359              * range by omitting the NaN/Inf encodings.
7360              */
7361             bool ieee = true;
7362 
7363             MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16);
7364             MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16);
7365         }
7366         break;
7367     case DF_DOUBLE:
7368         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7369             MSA_FLOAT_UNOP(Lw(pwx, i), from_float64, pws->d[i], 32);
7370             MSA_FLOAT_UNOP(Rw(pwx, i), from_float64, pwt->d[i], 32);
7371         }
7372         break;
7373     default:
7374         assert(0);
7375     }
7376 
7377     check_msacsr_cause(env, GETPC());
7378     msa_move_v(pwd, pwx);
7379 }
7380 
7381 #define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
7382     do {                                                                    \
7383         float_status *status = &env->active_tc.msa_fp_status;               \
7384         int c;                                                              \
7385                                                                             \
7386         set_float_exception_flags(0, status);                               \
7387         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7388         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7389                                                                             \
7390         if (get_enabled_exceptions(env, c)) {                               \
7391             DEST = ((FLOAT_SNAN ## XBITS(status) >> 6) << 6) | c;           \
7392         }                                                                   \
7393     } while (0)
7394 
7395 void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7396                        uint32_t ws, uint32_t wt)
7397 {
7398     wr_t wx, *pwx = &wx;
7399     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7400     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7401     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7402     uint32_t i;
7403 
7404     clear_msacsr_cause(env);
7405 
7406     switch (df) {
7407     case DF_WORD:
7408         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7409             MSA_FLOAT_UNOP_XD(Lh(pwx, i), to_q16, pws->w[i], 32, 16);
7410             MSA_FLOAT_UNOP_XD(Rh(pwx, i), to_q16, pwt->w[i], 32, 16);
7411         }
7412         break;
7413     case DF_DOUBLE:
7414         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7415             MSA_FLOAT_UNOP_XD(Lw(pwx, i), to_q32, pws->d[i], 64, 32);
7416             MSA_FLOAT_UNOP_XD(Rw(pwx, i), to_q32, pwt->d[i], 64, 32);
7417         }
7418         break;
7419     default:
7420         assert(0);
7421     }
7422 
7423     check_msacsr_cause(env, GETPC());
7424 
7425     msa_move_v(pwd, pwx);
7426 }
7427 
7428 #define NUMBER_QNAN_PAIR(ARG1, ARG2, BITS, STATUS)      \
7429     !float ## BITS ## _is_any_nan(ARG1)                 \
7430     && float ## BITS ## _is_quiet_nan(ARG2, STATUS)
7431 
7432 #define MSA_FLOAT_MAXOP(DEST, OP, ARG1, ARG2, BITS)                         \
7433     do {                                                                    \
7434         float_status *status = &env->active_tc.msa_fp_status;               \
7435         int c;                                                              \
7436                                                                             \
7437         set_float_exception_flags(0, status);                               \
7438         DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
7439         c = update_msacsr(env, 0, 0);                                       \
7440                                                                             \
7441         if (get_enabled_exceptions(env, c)) {                               \
7442             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7443         }                                                                   \
7444     } while (0)
7445 
7446 #define FMAXMIN_A(F, G, X, _S, _T, BITS, STATUS)                    \
7447     do {                                                            \
7448         uint## BITS ##_t S = _S, T = _T;                            \
7449         uint## BITS ##_t as, at, xs, xt, xd;                        \
7450         if (NUMBER_QNAN_PAIR(S, T, BITS, STATUS)) {                 \
7451             T = S;                                                  \
7452         }                                                           \
7453         else if (NUMBER_QNAN_PAIR(T, S, BITS, STATUS)) {            \
7454             S = T;                                                  \
7455         }                                                           \
7456         as = float## BITS ##_abs(S);                                \
7457         at = float## BITS ##_abs(T);                                \
7458         MSA_FLOAT_MAXOP(xs, F,  S,  T, BITS);                       \
7459         MSA_FLOAT_MAXOP(xt, G,  S,  T, BITS);                       \
7460         MSA_FLOAT_MAXOP(xd, F, as, at, BITS);                       \
7461         X = (as == at || xd == float## BITS ##_abs(xs)) ? xs : xt;  \
7462     } while (0)
7463 
7464 void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7465         uint32_t ws, uint32_t wt)
7466 {
7467     float_status *status = &env->active_tc.msa_fp_status;
7468     wr_t wx, *pwx = &wx;
7469     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7470     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7471     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7472 
7473     clear_msacsr_cause(env);
7474 
7475     if (df == DF_WORD) {
7476 
7477         if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7478             MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32);
7479         } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7480             MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32);
7481         } else {
7482             MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32);
7483         }
7484 
7485         if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7486             MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32);
7487         } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7488             MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32);
7489         } else {
7490             MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32);
7491         }
7492 
7493         if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7494             MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32);
7495         } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7496             MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32);
7497         } else {
7498             MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32);
7499         }
7500 
7501         if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7502             MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32);
7503         } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7504             MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32);
7505         } else {
7506             MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32);
7507         }
7508 
7509     } else if (df == DF_DOUBLE) {
7510 
7511         if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7512             MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64);
7513         } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7514             MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64);
7515         } else {
7516             MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64);
7517         }
7518 
7519         if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7520             MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64);
7521         } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7522             MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64);
7523         } else {
7524             MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64);
7525         }
7526 
7527     } else {
7528 
7529         assert(0);
7530 
7531     }
7532 
7533     check_msacsr_cause(env, GETPC());
7534 
7535     msa_move_v(pwd, pwx);
7536 }
7537 
7538 void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7539         uint32_t ws, uint32_t wt)
7540 {
7541     float_status *status = &env->active_tc.msa_fp_status;
7542     wr_t wx, *pwx = &wx;
7543     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7544     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7545     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7546 
7547     clear_msacsr_cause(env);
7548 
7549     if (df == DF_WORD) {
7550         FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7551         FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7552         FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7553         FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7554     } else if (df == DF_DOUBLE) {
7555         FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7556         FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7557     } else {
7558         assert(0);
7559     }
7560 
7561     check_msacsr_cause(env, GETPC());
7562 
7563     msa_move_v(pwd, pwx);
7564 }
7565 
7566 void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7567         uint32_t ws, uint32_t wt)
7568 {
7569      float_status *status = &env->active_tc.msa_fp_status;
7570     wr_t wx, *pwx = &wx;
7571     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7572     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7573     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7574 
7575     clear_msacsr_cause(env);
7576 
7577     if (df == DF_WORD) {
7578 
7579         if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7580             MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32);
7581         } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7582             MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32);
7583         } else {
7584             MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32);
7585         }
7586 
7587         if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7588             MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32);
7589         } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7590             MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32);
7591         } else {
7592             MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32);
7593         }
7594 
7595         if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7596             MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32);
7597         } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7598             MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32);
7599         } else {
7600             MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32);
7601         }
7602 
7603         if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7604             MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32);
7605         } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7606             MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32);
7607         } else {
7608             MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32);
7609         }
7610 
7611     } else if (df == DF_DOUBLE) {
7612 
7613         if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7614             MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64);
7615         } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7616             MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64);
7617         } else {
7618             MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64);
7619         }
7620 
7621         if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7622             MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64);
7623         } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7624             MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64);
7625         } else {
7626             MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64);
7627         }
7628 
7629     } else {
7630 
7631         assert(0);
7632 
7633     }
7634 
7635     check_msacsr_cause(env, GETPC());
7636 
7637     msa_move_v(pwd, pwx);
7638 }
7639 
7640 void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7641         uint32_t ws, uint32_t wt)
7642 {
7643     float_status *status = &env->active_tc.msa_fp_status;
7644     wr_t wx, *pwx = &wx;
7645     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7646     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7647     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7648 
7649     clear_msacsr_cause(env);
7650 
7651     if (df == DF_WORD) {
7652         FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7653         FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7654         FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7655         FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7656     } else if (df == DF_DOUBLE) {
7657         FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7658         FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7659     } else {
7660         assert(0);
7661     }
7662 
7663     check_msacsr_cause(env, GETPC());
7664 
7665     msa_move_v(pwd, pwx);
7666 }
7667 
7668 void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df,
7669         uint32_t wd, uint32_t ws)
7670 {
7671     float_status *status = &env->active_tc.msa_fp_status;
7672 
7673     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7674     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7675     if (df == DF_WORD) {
7676         pwd->w[0] = float_class_s(pws->w[0], status);
7677         pwd->w[1] = float_class_s(pws->w[1], status);
7678         pwd->w[2] = float_class_s(pws->w[2], status);
7679         pwd->w[3] = float_class_s(pws->w[3], status);
7680     } else if (df == DF_DOUBLE) {
7681         pwd->d[0] = float_class_d(pws->d[0], status);
7682         pwd->d[1] = float_class_d(pws->d[1], status);
7683     } else {
7684         assert(0);
7685     }
7686 }
7687 
7688 #define MSA_FLOAT_UNOP0(DEST, OP, ARG, BITS)                                \
7689     do {                                                                    \
7690         float_status *status = &env->active_tc.msa_fp_status;               \
7691         int c;                                                              \
7692                                                                             \
7693         set_float_exception_flags(0, status);                               \
7694         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7695         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7696                                                                             \
7697         if (get_enabled_exceptions(env, c)) {                               \
7698             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7699         } else if (float ## BITS ## _is_any_nan(ARG)) {                     \
7700             DEST = 0;                                                       \
7701         }                                                                   \
7702     } while (0)
7703 
7704 void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7705                             uint32_t ws)
7706 {
7707     wr_t wx, *pwx = &wx;
7708     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7709     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7710     uint32_t i;
7711 
7712     clear_msacsr_cause(env);
7713 
7714     switch (df) {
7715     case DF_WORD:
7716         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7717             MSA_FLOAT_UNOP0(pwx->w[i], to_int32_round_to_zero, pws->w[i], 32);
7718         }
7719         break;
7720     case DF_DOUBLE:
7721         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7722             MSA_FLOAT_UNOP0(pwx->d[i], to_int64_round_to_zero, pws->d[i], 64);
7723         }
7724         break;
7725     default:
7726         assert(0);
7727     }
7728 
7729     check_msacsr_cause(env, GETPC());
7730 
7731     msa_move_v(pwd, pwx);
7732 }
7733 
7734 void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7735                             uint32_t ws)
7736 {
7737     wr_t wx, *pwx = &wx;
7738     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7739     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7740     uint32_t i;
7741 
7742     clear_msacsr_cause(env);
7743 
7744     switch (df) {
7745     case DF_WORD:
7746         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7747             MSA_FLOAT_UNOP0(pwx->w[i], to_uint32_round_to_zero, pws->w[i], 32);
7748         }
7749         break;
7750     case DF_DOUBLE:
7751         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7752             MSA_FLOAT_UNOP0(pwx->d[i], to_uint64_round_to_zero, pws->d[i], 64);
7753         }
7754         break;
7755     default:
7756         assert(0);
7757     }
7758 
7759     check_msacsr_cause(env, GETPC());
7760 
7761     msa_move_v(pwd, pwx);
7762 }
7763 
7764 void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7765                          uint32_t ws)
7766 {
7767     wr_t wx, *pwx = &wx;
7768     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7769     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7770     uint32_t i;
7771 
7772     clear_msacsr_cause(env);
7773 
7774     switch (df) {
7775     case DF_WORD:
7776         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7777             MSA_FLOAT_UNOP(pwx->w[i], sqrt, pws->w[i], 32);
7778         }
7779         break;
7780     case DF_DOUBLE:
7781         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7782             MSA_FLOAT_UNOP(pwx->d[i], sqrt, pws->d[i], 64);
7783         }
7784         break;
7785     default:
7786         assert(0);
7787     }
7788 
7789     check_msacsr_cause(env, GETPC());
7790 
7791     msa_move_v(pwd, pwx);
7792 }
7793 
7794 #define MSA_FLOAT_RECIPROCAL(DEST, ARG, BITS)                               \
7795     do {                                                                    \
7796         float_status *status = &env->active_tc.msa_fp_status;               \
7797         int c;                                                              \
7798                                                                             \
7799         set_float_exception_flags(0, status);                               \
7800         DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG, status);   \
7801         c = update_msacsr(env, float ## BITS ## _is_infinity(ARG) ||        \
7802                           float ## BITS ## _is_quiet_nan(DEST, status) ?    \
7803                           0 : RECIPROCAL_INEXACT,                           \
7804                           IS_DENORMAL(DEST, BITS));                         \
7805                                                                             \
7806         if (get_enabled_exceptions(env, c)) {                               \
7807             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7808         }                                                                   \
7809     } while (0)
7810 
7811 void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7812                           uint32_t ws)
7813 {
7814     wr_t wx, *pwx = &wx;
7815     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7816     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7817     uint32_t i;
7818 
7819     clear_msacsr_cause(env);
7820 
7821     switch (df) {
7822     case DF_WORD:
7823         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7824             MSA_FLOAT_RECIPROCAL(pwx->w[i], float32_sqrt(pws->w[i],
7825                     &env->active_tc.msa_fp_status), 32);
7826         }
7827         break;
7828     case DF_DOUBLE:
7829         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7830             MSA_FLOAT_RECIPROCAL(pwx->d[i], float64_sqrt(pws->d[i],
7831                     &env->active_tc.msa_fp_status), 64);
7832         }
7833         break;
7834     default:
7835         assert(0);
7836     }
7837 
7838     check_msacsr_cause(env, GETPC());
7839 
7840     msa_move_v(pwd, pwx);
7841 }
7842 
7843 void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7844                         uint32_t ws)
7845 {
7846     wr_t wx, *pwx = &wx;
7847     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7848     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7849     uint32_t i;
7850 
7851     clear_msacsr_cause(env);
7852 
7853     switch (df) {
7854     case DF_WORD:
7855         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7856             MSA_FLOAT_RECIPROCAL(pwx->w[i], pws->w[i], 32);
7857         }
7858         break;
7859     case DF_DOUBLE:
7860         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7861             MSA_FLOAT_RECIPROCAL(pwx->d[i], pws->d[i], 64);
7862         }
7863         break;
7864     default:
7865         assert(0);
7866     }
7867 
7868     check_msacsr_cause(env, GETPC());
7869 
7870     msa_move_v(pwd, pwx);
7871 }
7872 
7873 void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7874                          uint32_t ws)
7875 {
7876     wr_t wx, *pwx = &wx;
7877     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7878     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7879     uint32_t i;
7880 
7881     clear_msacsr_cause(env);
7882 
7883     switch (df) {
7884     case DF_WORD:
7885         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7886             MSA_FLOAT_UNOP(pwx->w[i], round_to_int, pws->w[i], 32);
7887         }
7888         break;
7889     case DF_DOUBLE:
7890         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7891             MSA_FLOAT_UNOP(pwx->d[i], round_to_int, pws->d[i], 64);
7892         }
7893         break;
7894     default:
7895         assert(0);
7896     }
7897 
7898     check_msacsr_cause(env, GETPC());
7899 
7900     msa_move_v(pwd, pwx);
7901 }
7902 
7903 #define MSA_FLOAT_LOGB(DEST, ARG, BITS)                                     \
7904     do {                                                                    \
7905         float_status *status = &env->active_tc.msa_fp_status;               \
7906         int c;                                                              \
7907                                                                             \
7908         set_float_exception_flags(0, status);                               \
7909         set_float_rounding_mode(float_round_down, status);                  \
7910         DEST = float ## BITS ## _ ## log2(ARG, status);                     \
7911         DEST = float ## BITS ## _ ## round_to_int(DEST, status);            \
7912         set_float_rounding_mode(ieee_rm[(env->active_tc.msacsr &            \
7913                                          MSACSR_RM_MASK) >> MSACSR_RM],     \
7914                                 status);                                    \
7915                                                                             \
7916         set_float_exception_flags(get_float_exception_flags(status) &       \
7917                                   (~float_flag_inexact),                    \
7918                                   status);                                  \
7919                                                                             \
7920         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7921                                                                             \
7922         if (get_enabled_exceptions(env, c)) {                               \
7923             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7924         }                                                                   \
7925     } while (0)
7926 
7927 void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7928                          uint32_t ws)
7929 {
7930     wr_t wx, *pwx = &wx;
7931     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7932     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7933     uint32_t i;
7934 
7935     clear_msacsr_cause(env);
7936 
7937     switch (df) {
7938     case DF_WORD:
7939         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7940             MSA_FLOAT_LOGB(pwx->w[i], pws->w[i], 32);
7941         }
7942         break;
7943     case DF_DOUBLE:
7944         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7945             MSA_FLOAT_LOGB(pwx->d[i], pws->d[i], 64);
7946         }
7947         break;
7948     default:
7949         assert(0);
7950     }
7951 
7952     check_msacsr_cause(env, GETPC());
7953 
7954     msa_move_v(pwd, pwx);
7955 }
7956 
7957 void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7958                           uint32_t ws)
7959 {
7960     wr_t wx, *pwx = &wx;
7961     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7962     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7963     uint32_t i;
7964 
7965     clear_msacsr_cause(env);
7966 
7967     switch (df) {
7968     case DF_WORD:
7969         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7970             /*
7971              * Half precision floats come in two formats: standard
7972              * IEEE and "ARM" format.  The latter gains extra exponent
7973              * range by omitting the NaN/Inf encodings.
7974              */
7975             bool ieee = true;
7976 
7977             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32);
7978         }
7979         break;
7980     case DF_DOUBLE:
7981         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7982             MSA_FLOAT_UNOP(pwx->d[i], from_float32, Lw(pws, i), 64);
7983         }
7984         break;
7985     default:
7986         assert(0);
7987     }
7988 
7989     check_msacsr_cause(env, GETPC());
7990     msa_move_v(pwd, pwx);
7991 }
7992 
7993 void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7994                           uint32_t ws)
7995 {
7996     wr_t wx, *pwx = &wx;
7997     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7998     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7999     uint32_t i;
8000 
8001     clear_msacsr_cause(env);
8002 
8003     switch (df) {
8004     case DF_WORD:
8005         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8006             /*
8007              * Half precision floats come in two formats: standard
8008              * IEEE and "ARM" format.  The latter gains extra exponent
8009              * range by omitting the NaN/Inf encodings.
8010              */
8011             bool ieee = true;
8012 
8013             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32);
8014         }
8015         break;
8016     case DF_DOUBLE:
8017         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8018             MSA_FLOAT_UNOP(pwx->d[i], from_float32, Rw(pws, i), 64);
8019         }
8020         break;
8021     default:
8022         assert(0);
8023     }
8024 
8025     check_msacsr_cause(env, GETPC());
8026     msa_move_v(pwd, pwx);
8027 }
8028 
8029 void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8030                         uint32_t ws)
8031 {
8032     wr_t wx, *pwx = &wx;
8033     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8034     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8035     uint32_t i;
8036 
8037     switch (df) {
8038     case DF_WORD:
8039         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8040             MSA_FLOAT_UNOP(pwx->w[i], from_q16, Lh(pws, i), 32);
8041         }
8042         break;
8043     case DF_DOUBLE:
8044         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8045             MSA_FLOAT_UNOP(pwx->d[i], from_q32, Lw(pws, i), 64);
8046         }
8047         break;
8048     default:
8049         assert(0);
8050     }
8051 
8052     msa_move_v(pwd, pwx);
8053 }
8054 
8055 void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8056                         uint32_t ws)
8057 {
8058     wr_t wx, *pwx = &wx;
8059     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8060     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8061     uint32_t i;
8062 
8063     switch (df) {
8064     case DF_WORD:
8065         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8066             MSA_FLOAT_UNOP(pwx->w[i], from_q16, Rh(pws, i), 32);
8067         }
8068         break;
8069     case DF_DOUBLE:
8070         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8071             MSA_FLOAT_UNOP(pwx->d[i], from_q32, Rw(pws, i), 64);
8072         }
8073         break;
8074     default:
8075         assert(0);
8076     }
8077 
8078     msa_move_v(pwd, pwx);
8079 }
8080 
8081 void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8082                            uint32_t ws)
8083 {
8084     wr_t wx, *pwx = &wx;
8085     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8086     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8087     uint32_t i;
8088 
8089     clear_msacsr_cause(env);
8090 
8091     switch (df) {
8092     case DF_WORD:
8093         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8094             MSA_FLOAT_UNOP0(pwx->w[i], to_int32, pws->w[i], 32);
8095         }
8096         break;
8097     case DF_DOUBLE:
8098         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8099             MSA_FLOAT_UNOP0(pwx->d[i], to_int64, pws->d[i], 64);
8100         }
8101         break;
8102     default:
8103         assert(0);
8104     }
8105 
8106     check_msacsr_cause(env, GETPC());
8107 
8108     msa_move_v(pwd, pwx);
8109 }
8110 
8111 void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8112                            uint32_t ws)
8113 {
8114     wr_t wx, *pwx = &wx;
8115     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8116     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8117     uint32_t i;
8118 
8119     clear_msacsr_cause(env);
8120 
8121     switch (df) {
8122     case DF_WORD:
8123         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8124             MSA_FLOAT_UNOP0(pwx->w[i], to_uint32, pws->w[i], 32);
8125         }
8126         break;
8127     case DF_DOUBLE:
8128         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8129             MSA_FLOAT_UNOP0(pwx->d[i], to_uint64, pws->d[i], 64);
8130         }
8131         break;
8132     default:
8133         assert(0);
8134     }
8135 
8136     check_msacsr_cause(env, GETPC());
8137 
8138     msa_move_v(pwd, pwx);
8139 }
8140 
8141 #define float32_from_int32 int32_to_float32
8142 #define float32_from_uint32 uint32_to_float32
8143 
8144 #define float64_from_int64 int64_to_float64
8145 #define float64_from_uint64 uint64_to_float64
8146 
8147 void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8148                            uint32_t ws)
8149 {
8150     wr_t wx, *pwx = &wx;
8151     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8152     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8153     uint32_t i;
8154 
8155     clear_msacsr_cause(env);
8156 
8157     switch (df) {
8158     case DF_WORD:
8159         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8160             MSA_FLOAT_UNOP(pwx->w[i], from_int32, pws->w[i], 32);
8161         }
8162         break;
8163     case DF_DOUBLE:
8164         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8165             MSA_FLOAT_UNOP(pwx->d[i], from_int64, pws->d[i], 64);
8166         }
8167         break;
8168     default:
8169         assert(0);
8170     }
8171 
8172     check_msacsr_cause(env, GETPC());
8173 
8174     msa_move_v(pwd, pwx);
8175 }
8176 
8177 void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8178                            uint32_t ws)
8179 {
8180     wr_t wx, *pwx = &wx;
8181     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8182     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8183     uint32_t i;
8184 
8185     clear_msacsr_cause(env);
8186 
8187     switch (df) {
8188     case DF_WORD:
8189         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8190             MSA_FLOAT_UNOP(pwx->w[i], from_uint32, pws->w[i], 32);
8191         }
8192         break;
8193     case DF_DOUBLE:
8194         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8195             MSA_FLOAT_UNOP(pwx->d[i], from_uint64, pws->d[i], 64);
8196         }
8197         break;
8198     default:
8199         assert(0);
8200     }
8201 
8202     check_msacsr_cause(env, GETPC());
8203 
8204     msa_move_v(pwd, pwx);
8205 }
8206 
8207 /* Data format min and max values */
8208 #define DF_BITS(df) (1 << ((df) + 3))
8209 
8210 /* Element-by-element access macros */
8211 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
8212 
8213 #if !defined(CONFIG_USER_ONLY)
8214 #define MEMOP_IDX(DF)                                                   \
8215     MemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN,                 \
8216                                  cpu_mmu_index(env, false));
8217 #else
8218 #define MEMOP_IDX(DF)
8219 #endif
8220 
8221 #if TARGET_BIG_ENDIAN
8222 static inline uint64_t bswap16x4(uint64_t x)
8223 {
8224     uint64_t m = 0x00ff00ff00ff00ffull;
8225     return ((x & m) << 8) | ((x >> 8) & m);
8226 }
8227 
8228 static inline uint64_t bswap32x2(uint64_t x)
8229 {
8230     return ror64(bswap64(x), 32);
8231 }
8232 #endif
8233 
8234 void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd,
8235                      target_ulong addr)
8236 {
8237     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8238     uintptr_t ra = GETPC();
8239     uint64_t d0, d1;
8240 
8241     /* Load 8 bytes at a time.  Vector element ordering makes this LE.  */
8242     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8243     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8244     pwd->d[0] = d0;
8245     pwd->d[1] = d1;
8246 }
8247 
8248 void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
8249                      target_ulong addr)
8250 {
8251     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8252     uintptr_t ra = GETPC();
8253     uint64_t d0, d1;
8254 
8255     /*
8256      * Load 8 bytes at a time.  Use little-endian load, then for
8257      * big-endian target, we must then swap the four halfwords.
8258      */
8259     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8260     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8261 #if TARGET_BIG_ENDIAN
8262     d0 = bswap16x4(d0);
8263     d1 = bswap16x4(d1);
8264 #endif
8265     pwd->d[0] = d0;
8266     pwd->d[1] = d1;
8267 }
8268 
8269 void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
8270                      target_ulong addr)
8271 {
8272     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8273     uintptr_t ra = GETPC();
8274     uint64_t d0, d1;
8275 
8276     /*
8277      * Load 8 bytes at a time.  Use little-endian load, then for
8278      * big-endian target, we must then bswap the two words.
8279      */
8280     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8281     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8282 #if TARGET_BIG_ENDIAN
8283     d0 = bswap32x2(d0);
8284     d1 = bswap32x2(d1);
8285 #endif
8286     pwd->d[0] = d0;
8287     pwd->d[1] = d1;
8288 }
8289 
8290 void helper_msa_ld_d(CPUMIPSState *env, uint32_t wd,
8291                      target_ulong addr)
8292 {
8293     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8294     uintptr_t ra = GETPC();
8295     uint64_t d0, d1;
8296 
8297     d0 = cpu_ldq_data_ra(env, addr + 0, ra);
8298     d1 = cpu_ldq_data_ra(env, addr + 8, ra);
8299     pwd->d[0] = d0;
8300     pwd->d[1] = d1;
8301 }
8302 
8303 #define MSA_PAGESPAN(x) \
8304         ((((x) & ~TARGET_PAGE_MASK) + MSA_WRLEN / 8 - 1) >= TARGET_PAGE_SIZE)
8305 
8306 static inline void ensure_writable_pages(CPUMIPSState *env,
8307                                          target_ulong addr,
8308                                          int mmu_idx,
8309                                          uintptr_t retaddr)
8310 {
8311     /* FIXME: Probe the actual accesses (pass and use a size) */
8312     if (unlikely(MSA_PAGESPAN(addr))) {
8313         /* first page */
8314         probe_write(env, addr, 0, mmu_idx, retaddr);
8315         /* second page */
8316         addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
8317         probe_write(env, addr, 0, mmu_idx, retaddr);
8318     }
8319 }
8320 
8321 void helper_msa_st_b(CPUMIPSState *env, uint32_t wd,
8322                      target_ulong addr)
8323 {
8324     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8325     int mmu_idx = cpu_mmu_index(env, false);
8326     uintptr_t ra = GETPC();
8327 
8328     ensure_writable_pages(env, addr, mmu_idx, ra);
8329 
8330     /* Store 8 bytes at a time.  Vector element ordering makes this LE.  */
8331     cpu_stq_le_data_ra(env, addr + 0, pwd->d[0], ra);
8332     cpu_stq_le_data_ra(env, addr + 8, pwd->d[1], ra);
8333 }
8334 
8335 void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
8336                      target_ulong addr)
8337 {
8338     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8339     int mmu_idx = cpu_mmu_index(env, false);
8340     uintptr_t ra = GETPC();
8341     uint64_t d0, d1;
8342 
8343     ensure_writable_pages(env, addr, mmu_idx, ra);
8344 
8345     /* Store 8 bytes at a time.  See helper_msa_ld_h. */
8346     d0 = pwd->d[0];
8347     d1 = pwd->d[1];
8348 #if TARGET_BIG_ENDIAN
8349     d0 = bswap16x4(d0);
8350     d1 = bswap16x4(d1);
8351 #endif
8352     cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8353     cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8354 }
8355 
8356 void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
8357                      target_ulong addr)
8358 {
8359     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8360     int mmu_idx = cpu_mmu_index(env, false);
8361     uintptr_t ra = GETPC();
8362     uint64_t d0, d1;
8363 
8364     ensure_writable_pages(env, addr, mmu_idx, ra);
8365 
8366     /* Store 8 bytes at a time.  See helper_msa_ld_w. */
8367     d0 = pwd->d[0];
8368     d1 = pwd->d[1];
8369 #if TARGET_BIG_ENDIAN
8370     d0 = bswap32x2(d0);
8371     d1 = bswap32x2(d1);
8372 #endif
8373     cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8374     cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8375 }
8376 
8377 void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
8378                      target_ulong addr)
8379 {
8380     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8381     int mmu_idx = cpu_mmu_index(env, false);
8382     uintptr_t ra = GETPC();
8383 
8384     ensure_writable_pages(env, addr, mmu_idx, GETPC());
8385 
8386     cpu_stq_data_ra(env, addr + 0, pwd->d[0], ra);
8387     cpu_stq_data_ra(env, addr + 8, pwd->d[1], ra);
8388 }
8389