xref: /openbmc/qemu/target/mips/tcg/msa_helper.c (revision fe1a3ace13a8b53fc20c74fb7e3337f754396e6b)
1 /*
2  * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
3  *
4  * Copyright (c) 2014 Imagination Technologies
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "tcg/tcg.h"
24 #include "exec/exec-all.h"
25 #include "accel/tcg/cpu-ldst.h"
26 #include "accel/tcg/probe.h"
27 #include "exec/helper-proto.h"
28 #include "exec/memop.h"
29 #include "exec/target_page.h"
30 #include "fpu/softfloat.h"
31 #include "fpu_helper.h"
32 
33 /* Data format min and max values */
34 #define DF_BITS(df) (1 << ((df) + 3))
35 
36 #define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
37 #define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
38 
39 #define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
40 #define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
41 
42 #define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
43 #define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
44 
45 #define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
46 #define SIGNED(x, df)                                                   \
47     ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
48 
49 /* Element-by-element access macros */
50 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
51 
52 
53 
54 /*
55  * Bit Count
56  * ---------
57  *
58  * +---------------+----------------------------------------------------------+
59  * | NLOC.B        | Vector Leading Ones Count (byte)                         |
60  * | NLOC.H        | Vector Leading Ones Count (halfword)                     |
61  * | NLOC.W        | Vector Leading Ones Count (word)                         |
62  * | NLOC.D        | Vector Leading Ones Count (doubleword)                   |
63  * | NLZC.B        | Vector Leading Zeros Count (byte)                        |
64  * | NLZC.H        | Vector Leading Zeros Count (halfword)                    |
65  * | NLZC.W        | Vector Leading Zeros Count (word)                        |
66  * | NLZC.D        | Vector Leading Zeros Count (doubleword)                  |
67  * | PCNT.B        | Vector Population Count (byte)                           |
68  * | PCNT.H        | Vector Population Count (halfword)                       |
69  * | PCNT.W        | Vector Population Count (word)                           |
70  * | PCNT.D        | Vector Population Count (doubleword)                     |
71  * +---------------+----------------------------------------------------------+
72  */
73 
74 static inline int64_t msa_nlzc_df(uint32_t df, int64_t arg)
75 {
76     uint64_t x, y;
77     int n, c;
78 
79     x = UNSIGNED(arg, df);
80     n = DF_BITS(df);
81     c = DF_BITS(df) / 2;
82 
83     do {
84         y = x >> c;
85         if (y != 0) {
86             n = n - c;
87             x = y;
88         }
89         c = c >> 1;
90     } while (c != 0);
91 
92     return n - x;
93 }
94 
95 static inline int64_t msa_nloc_df(uint32_t df, int64_t arg)
96 {
97     return msa_nlzc_df(df, UNSIGNED((~arg), df));
98 }
99 
100 void helper_msa_nloc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
101 {
102     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
103     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
104 
105     pwd->b[0]  = msa_nloc_df(DF_BYTE, pws->b[0]);
106     pwd->b[1]  = msa_nloc_df(DF_BYTE, pws->b[1]);
107     pwd->b[2]  = msa_nloc_df(DF_BYTE, pws->b[2]);
108     pwd->b[3]  = msa_nloc_df(DF_BYTE, pws->b[3]);
109     pwd->b[4]  = msa_nloc_df(DF_BYTE, pws->b[4]);
110     pwd->b[5]  = msa_nloc_df(DF_BYTE, pws->b[5]);
111     pwd->b[6]  = msa_nloc_df(DF_BYTE, pws->b[6]);
112     pwd->b[7]  = msa_nloc_df(DF_BYTE, pws->b[7]);
113     pwd->b[8]  = msa_nloc_df(DF_BYTE, pws->b[8]);
114     pwd->b[9]  = msa_nloc_df(DF_BYTE, pws->b[9]);
115     pwd->b[10] = msa_nloc_df(DF_BYTE, pws->b[10]);
116     pwd->b[11] = msa_nloc_df(DF_BYTE, pws->b[11]);
117     pwd->b[12] = msa_nloc_df(DF_BYTE, pws->b[12]);
118     pwd->b[13] = msa_nloc_df(DF_BYTE, pws->b[13]);
119     pwd->b[14] = msa_nloc_df(DF_BYTE, pws->b[14]);
120     pwd->b[15] = msa_nloc_df(DF_BYTE, pws->b[15]);
121 }
122 
123 void helper_msa_nloc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
124 {
125     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
126     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
127 
128     pwd->h[0]  = msa_nloc_df(DF_HALF, pws->h[0]);
129     pwd->h[1]  = msa_nloc_df(DF_HALF, pws->h[1]);
130     pwd->h[2]  = msa_nloc_df(DF_HALF, pws->h[2]);
131     pwd->h[3]  = msa_nloc_df(DF_HALF, pws->h[3]);
132     pwd->h[4]  = msa_nloc_df(DF_HALF, pws->h[4]);
133     pwd->h[5]  = msa_nloc_df(DF_HALF, pws->h[5]);
134     pwd->h[6]  = msa_nloc_df(DF_HALF, pws->h[6]);
135     pwd->h[7]  = msa_nloc_df(DF_HALF, pws->h[7]);
136 }
137 
138 void helper_msa_nloc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
139 {
140     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
141     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
142 
143     pwd->w[0]  = msa_nloc_df(DF_WORD, pws->w[0]);
144     pwd->w[1]  = msa_nloc_df(DF_WORD, pws->w[1]);
145     pwd->w[2]  = msa_nloc_df(DF_WORD, pws->w[2]);
146     pwd->w[3]  = msa_nloc_df(DF_WORD, pws->w[3]);
147 }
148 
149 void helper_msa_nloc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
150 {
151     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
152     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
153 
154     pwd->d[0]  = msa_nloc_df(DF_DOUBLE, pws->d[0]);
155     pwd->d[1]  = msa_nloc_df(DF_DOUBLE, pws->d[1]);
156 }
157 
158 void helper_msa_nlzc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
159 {
160     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
161     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
162 
163     pwd->b[0]  = msa_nlzc_df(DF_BYTE, pws->b[0]);
164     pwd->b[1]  = msa_nlzc_df(DF_BYTE, pws->b[1]);
165     pwd->b[2]  = msa_nlzc_df(DF_BYTE, pws->b[2]);
166     pwd->b[3]  = msa_nlzc_df(DF_BYTE, pws->b[3]);
167     pwd->b[4]  = msa_nlzc_df(DF_BYTE, pws->b[4]);
168     pwd->b[5]  = msa_nlzc_df(DF_BYTE, pws->b[5]);
169     pwd->b[6]  = msa_nlzc_df(DF_BYTE, pws->b[6]);
170     pwd->b[7]  = msa_nlzc_df(DF_BYTE, pws->b[7]);
171     pwd->b[8]  = msa_nlzc_df(DF_BYTE, pws->b[8]);
172     pwd->b[9]  = msa_nlzc_df(DF_BYTE, pws->b[9]);
173     pwd->b[10] = msa_nlzc_df(DF_BYTE, pws->b[10]);
174     pwd->b[11] = msa_nlzc_df(DF_BYTE, pws->b[11]);
175     pwd->b[12] = msa_nlzc_df(DF_BYTE, pws->b[12]);
176     pwd->b[13] = msa_nlzc_df(DF_BYTE, pws->b[13]);
177     pwd->b[14] = msa_nlzc_df(DF_BYTE, pws->b[14]);
178     pwd->b[15] = msa_nlzc_df(DF_BYTE, pws->b[15]);
179 }
180 
181 void helper_msa_nlzc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
182 {
183     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
184     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
185 
186     pwd->h[0]  = msa_nlzc_df(DF_HALF, pws->h[0]);
187     pwd->h[1]  = msa_nlzc_df(DF_HALF, pws->h[1]);
188     pwd->h[2]  = msa_nlzc_df(DF_HALF, pws->h[2]);
189     pwd->h[3]  = msa_nlzc_df(DF_HALF, pws->h[3]);
190     pwd->h[4]  = msa_nlzc_df(DF_HALF, pws->h[4]);
191     pwd->h[5]  = msa_nlzc_df(DF_HALF, pws->h[5]);
192     pwd->h[6]  = msa_nlzc_df(DF_HALF, pws->h[6]);
193     pwd->h[7]  = msa_nlzc_df(DF_HALF, pws->h[7]);
194 }
195 
196 void helper_msa_nlzc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
197 {
198     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
199     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
200 
201     pwd->w[0]  = msa_nlzc_df(DF_WORD, pws->w[0]);
202     pwd->w[1]  = msa_nlzc_df(DF_WORD, pws->w[1]);
203     pwd->w[2]  = msa_nlzc_df(DF_WORD, pws->w[2]);
204     pwd->w[3]  = msa_nlzc_df(DF_WORD, pws->w[3]);
205 }
206 
207 void helper_msa_nlzc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
208 {
209     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
210     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
211 
212     pwd->d[0]  = msa_nlzc_df(DF_DOUBLE, pws->d[0]);
213     pwd->d[1]  = msa_nlzc_df(DF_DOUBLE, pws->d[1]);
214 }
215 
216 static inline int64_t msa_pcnt_df(uint32_t df, int64_t arg)
217 {
218     uint64_t x;
219 
220     x = UNSIGNED(arg, df);
221 
222     x = (x & 0x5555555555555555ULL) + ((x >>  1) & 0x5555555555555555ULL);
223     x = (x & 0x3333333333333333ULL) + ((x >>  2) & 0x3333333333333333ULL);
224     x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >>  4) & 0x0F0F0F0F0F0F0F0FULL);
225     x = (x & 0x00FF00FF00FF00FFULL) + ((x >>  8) & 0x00FF00FF00FF00FFULL);
226     x = (x & 0x0000FFFF0000FFFFULL) + ((x >> 16) & 0x0000FFFF0000FFFFULL);
227     x = (x & 0x00000000FFFFFFFFULL) + ((x >> 32));
228 
229     return x;
230 }
231 
232 void helper_msa_pcnt_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
233 {
234     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
235     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
236 
237     pwd->b[0]  = msa_pcnt_df(DF_BYTE, pws->b[0]);
238     pwd->b[1]  = msa_pcnt_df(DF_BYTE, pws->b[1]);
239     pwd->b[2]  = msa_pcnt_df(DF_BYTE, pws->b[2]);
240     pwd->b[3]  = msa_pcnt_df(DF_BYTE, pws->b[3]);
241     pwd->b[4]  = msa_pcnt_df(DF_BYTE, pws->b[4]);
242     pwd->b[5]  = msa_pcnt_df(DF_BYTE, pws->b[5]);
243     pwd->b[6]  = msa_pcnt_df(DF_BYTE, pws->b[6]);
244     pwd->b[7]  = msa_pcnt_df(DF_BYTE, pws->b[7]);
245     pwd->b[8]  = msa_pcnt_df(DF_BYTE, pws->b[8]);
246     pwd->b[9]  = msa_pcnt_df(DF_BYTE, pws->b[9]);
247     pwd->b[10] = msa_pcnt_df(DF_BYTE, pws->b[10]);
248     pwd->b[11] = msa_pcnt_df(DF_BYTE, pws->b[11]);
249     pwd->b[12] = msa_pcnt_df(DF_BYTE, pws->b[12]);
250     pwd->b[13] = msa_pcnt_df(DF_BYTE, pws->b[13]);
251     pwd->b[14] = msa_pcnt_df(DF_BYTE, pws->b[14]);
252     pwd->b[15] = msa_pcnt_df(DF_BYTE, pws->b[15]);
253 }
254 
255 void helper_msa_pcnt_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
256 {
257     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
258     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
259 
260     pwd->h[0]  = msa_pcnt_df(DF_HALF, pws->h[0]);
261     pwd->h[1]  = msa_pcnt_df(DF_HALF, pws->h[1]);
262     pwd->h[2]  = msa_pcnt_df(DF_HALF, pws->h[2]);
263     pwd->h[3]  = msa_pcnt_df(DF_HALF, pws->h[3]);
264     pwd->h[4]  = msa_pcnt_df(DF_HALF, pws->h[4]);
265     pwd->h[5]  = msa_pcnt_df(DF_HALF, pws->h[5]);
266     pwd->h[6]  = msa_pcnt_df(DF_HALF, pws->h[6]);
267     pwd->h[7]  = msa_pcnt_df(DF_HALF, pws->h[7]);
268 }
269 
270 void helper_msa_pcnt_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
271 {
272     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
273     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
274 
275     pwd->w[0]  = msa_pcnt_df(DF_WORD, pws->w[0]);
276     pwd->w[1]  = msa_pcnt_df(DF_WORD, pws->w[1]);
277     pwd->w[2]  = msa_pcnt_df(DF_WORD, pws->w[2]);
278     pwd->w[3]  = msa_pcnt_df(DF_WORD, pws->w[3]);
279 }
280 
281 void helper_msa_pcnt_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
282 {
283     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
284     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
285 
286     pwd->d[0]  = msa_pcnt_df(DF_DOUBLE, pws->d[0]);
287     pwd->d[1]  = msa_pcnt_df(DF_DOUBLE, pws->d[1]);
288 }
289 
290 
291 /*
292  * Bit Move
293  * --------
294  *
295  * +---------------+----------------------------------------------------------+
296  * | BINSL.B       | Vector Bit Insert Left (byte)                            |
297  * | BINSL.H       | Vector Bit Insert Left (halfword)                        |
298  * | BINSL.W       | Vector Bit Insert Left (word)                            |
299  * | BINSL.D       | Vector Bit Insert Left (doubleword)                      |
300  * | BINSR.B       | Vector Bit Insert Right (byte)                           |
301  * | BINSR.H       | Vector Bit Insert Right (halfword)                       |
302  * | BINSR.W       | Vector Bit Insert Right (word)                           |
303  * | BINSR.D       | Vector Bit Insert Right (doubleword)                     |
304  * | BMNZ.V        | Vector Bit Move If Not Zero                              |
305  * | BMZ.V         | Vector Bit Move If Zero                                  |
306  * | BSEL.V        | Vector Bit Select                                        |
307  * +---------------+----------------------------------------------------------+
308  */
309 
310 /* Data format bit position and unsigned values */
311 #define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
312 
313 static inline int64_t msa_binsl_df(uint32_t df,
314                                    int64_t dest, int64_t arg1, int64_t arg2)
315 {
316     uint64_t u_arg1 = UNSIGNED(arg1, df);
317     uint64_t u_dest = UNSIGNED(dest, df);
318     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
319     int32_t sh_a = DF_BITS(df) - sh_d;
320     if (sh_d == DF_BITS(df)) {
321         return u_arg1;
322     } else {
323         return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
324                UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
325     }
326 }
327 
328 void helper_msa_binsl_b(CPUMIPSState *env,
329                         uint32_t wd, uint32_t ws, uint32_t wt)
330 {
331     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
332     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
333     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
334 
335     pwd->b[0]  = msa_binsl_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
336     pwd->b[1]  = msa_binsl_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
337     pwd->b[2]  = msa_binsl_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
338     pwd->b[3]  = msa_binsl_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
339     pwd->b[4]  = msa_binsl_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
340     pwd->b[5]  = msa_binsl_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
341     pwd->b[6]  = msa_binsl_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
342     pwd->b[7]  = msa_binsl_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
343     pwd->b[8]  = msa_binsl_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
344     pwd->b[9]  = msa_binsl_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
345     pwd->b[10] = msa_binsl_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
346     pwd->b[11] = msa_binsl_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
347     pwd->b[12] = msa_binsl_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
348     pwd->b[13] = msa_binsl_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
349     pwd->b[14] = msa_binsl_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
350     pwd->b[15] = msa_binsl_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
351 }
352 
353 void helper_msa_binsl_h(CPUMIPSState *env,
354                         uint32_t wd, uint32_t ws, uint32_t wt)
355 {
356     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
357     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
358     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
359 
360     pwd->h[0]  = msa_binsl_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
361     pwd->h[1]  = msa_binsl_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
362     pwd->h[2]  = msa_binsl_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
363     pwd->h[3]  = msa_binsl_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
364     pwd->h[4]  = msa_binsl_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
365     pwd->h[5]  = msa_binsl_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
366     pwd->h[6]  = msa_binsl_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
367     pwd->h[7]  = msa_binsl_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
368 }
369 
370 void helper_msa_binsl_w(CPUMIPSState *env,
371                         uint32_t wd, uint32_t ws, uint32_t wt)
372 {
373     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
374     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
375     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
376 
377     pwd->w[0]  = msa_binsl_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
378     pwd->w[1]  = msa_binsl_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
379     pwd->w[2]  = msa_binsl_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
380     pwd->w[3]  = msa_binsl_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
381 }
382 
383 void helper_msa_binsl_d(CPUMIPSState *env,
384                         uint32_t wd, uint32_t ws, uint32_t wt)
385 {
386     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
387     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
388     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
389 
390     pwd->d[0]  = msa_binsl_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
391     pwd->d[1]  = msa_binsl_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
392 }
393 
394 static inline int64_t msa_binsr_df(uint32_t df,
395                                    int64_t dest, int64_t arg1, int64_t arg2)
396 {
397     uint64_t u_arg1 = UNSIGNED(arg1, df);
398     uint64_t u_dest = UNSIGNED(dest, df);
399     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
400     int32_t sh_a = DF_BITS(df) - sh_d;
401     if (sh_d == DF_BITS(df)) {
402         return u_arg1;
403     } else {
404         return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
405                UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
406     }
407 }
408 
409 void helper_msa_binsr_b(CPUMIPSState *env,
410                         uint32_t wd, uint32_t ws, uint32_t wt)
411 {
412     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
413     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
414     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
415 
416     pwd->b[0]  = msa_binsr_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
417     pwd->b[1]  = msa_binsr_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
418     pwd->b[2]  = msa_binsr_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
419     pwd->b[3]  = msa_binsr_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
420     pwd->b[4]  = msa_binsr_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
421     pwd->b[5]  = msa_binsr_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
422     pwd->b[6]  = msa_binsr_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
423     pwd->b[7]  = msa_binsr_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
424     pwd->b[8]  = msa_binsr_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
425     pwd->b[9]  = msa_binsr_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
426     pwd->b[10] = msa_binsr_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
427     pwd->b[11] = msa_binsr_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
428     pwd->b[12] = msa_binsr_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
429     pwd->b[13] = msa_binsr_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
430     pwd->b[14] = msa_binsr_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
431     pwd->b[15] = msa_binsr_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
432 }
433 
434 void helper_msa_binsr_h(CPUMIPSState *env,
435                         uint32_t wd, uint32_t ws, uint32_t wt)
436 {
437     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
438     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
439     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
440 
441     pwd->h[0]  = msa_binsr_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
442     pwd->h[1]  = msa_binsr_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
443     pwd->h[2]  = msa_binsr_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
444     pwd->h[3]  = msa_binsr_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
445     pwd->h[4]  = msa_binsr_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
446     pwd->h[5]  = msa_binsr_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
447     pwd->h[6]  = msa_binsr_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
448     pwd->h[7]  = msa_binsr_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
449 }
450 
451 void helper_msa_binsr_w(CPUMIPSState *env,
452                         uint32_t wd, uint32_t ws, uint32_t wt)
453 {
454     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
455     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
456     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
457 
458     pwd->w[0]  = msa_binsr_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
459     pwd->w[1]  = msa_binsr_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
460     pwd->w[2]  = msa_binsr_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
461     pwd->w[3]  = msa_binsr_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
462 }
463 
464 void helper_msa_binsr_d(CPUMIPSState *env,
465                         uint32_t wd, uint32_t ws, uint32_t wt)
466 {
467     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
468     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
469     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
470 
471     pwd->d[0]  = msa_binsr_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
472     pwd->d[1]  = msa_binsr_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
473 }
474 
475 void helper_msa_bmnz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
476 {
477     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
478     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
479     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
480 
481     pwd->d[0] = UNSIGNED(                                                     \
482         ((pwd->d[0] & (~pwt->d[0])) | (pws->d[0] & pwt->d[0])), DF_DOUBLE);
483     pwd->d[1] = UNSIGNED(                                                     \
484         ((pwd->d[1] & (~pwt->d[1])) | (pws->d[1] & pwt->d[1])), DF_DOUBLE);
485 }
486 
487 void helper_msa_bmz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
488 {
489     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
490     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
491     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
492 
493     pwd->d[0] = UNSIGNED(                                                     \
494         ((pwd->d[0] & pwt->d[0]) | (pws->d[0] & (~pwt->d[0]))), DF_DOUBLE);
495     pwd->d[1] = UNSIGNED(                                                     \
496         ((pwd->d[1] & pwt->d[1]) | (pws->d[1] & (~pwt->d[1]))), DF_DOUBLE);
497 }
498 
499 void helper_msa_bsel_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
500 {
501     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
502     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
503     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
504 
505     pwd->d[0] = UNSIGNED(                                                     \
506         (pws->d[0] & (~pwd->d[0])) | (pwt->d[0] & pwd->d[0]), DF_DOUBLE);
507     pwd->d[1] = UNSIGNED(                                                     \
508         (pws->d[1] & (~pwd->d[1])) | (pwt->d[1] & pwd->d[1]), DF_DOUBLE);
509 }
510 
511 
512 /*
513  * Bit Set
514  * -------
515  *
516  * +---------------+----------------------------------------------------------+
517  * | BCLR.B        | Vector Bit Clear (byte)                                  |
518  * | BCLR.H        | Vector Bit Clear (halfword)                              |
519  * | BCLR.W        | Vector Bit Clear (word)                                  |
520  * | BCLR.D        | Vector Bit Clear (doubleword)                            |
521  * | BNEG.B        | Vector Bit Negate (byte)                                 |
522  * | BNEG.H        | Vector Bit Negate (halfword)                             |
523  * | BNEG.W        | Vector Bit Negate (word)                                 |
524  * | BNEG.D        | Vector Bit Negate (doubleword)                           |
525  * | BSET.B        | Vector Bit Set (byte)                                    |
526  * | BSET.H        | Vector Bit Set (halfword)                                |
527  * | BSET.W        | Vector Bit Set (word)                                    |
528  * | BSET.D        | Vector Bit Set (doubleword)                              |
529  * +---------------+----------------------------------------------------------+
530  */
531 
532 static inline int64_t msa_bclr_df(uint32_t df, int64_t arg1, int64_t arg2)
533 {
534     int32_t b_arg2 = BIT_POSITION(arg2, df);
535     return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
536 }
537 
538 void helper_msa_bclr_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
539 {
540     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
541     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
542     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
543 
544     pwd->b[0]  = msa_bclr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
545     pwd->b[1]  = msa_bclr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
546     pwd->b[2]  = msa_bclr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
547     pwd->b[3]  = msa_bclr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
548     pwd->b[4]  = msa_bclr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
549     pwd->b[5]  = msa_bclr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
550     pwd->b[6]  = msa_bclr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
551     pwd->b[7]  = msa_bclr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
552     pwd->b[8]  = msa_bclr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
553     pwd->b[9]  = msa_bclr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
554     pwd->b[10] = msa_bclr_df(DF_BYTE, pws->b[10], pwt->b[10]);
555     pwd->b[11] = msa_bclr_df(DF_BYTE, pws->b[11], pwt->b[11]);
556     pwd->b[12] = msa_bclr_df(DF_BYTE, pws->b[12], pwt->b[12]);
557     pwd->b[13] = msa_bclr_df(DF_BYTE, pws->b[13], pwt->b[13]);
558     pwd->b[14] = msa_bclr_df(DF_BYTE, pws->b[14], pwt->b[14]);
559     pwd->b[15] = msa_bclr_df(DF_BYTE, pws->b[15], pwt->b[15]);
560 }
561 
562 void helper_msa_bclr_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
563 {
564     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
565     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
566     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
567 
568     pwd->h[0]  = msa_bclr_df(DF_HALF, pws->h[0],  pwt->h[0]);
569     pwd->h[1]  = msa_bclr_df(DF_HALF, pws->h[1],  pwt->h[1]);
570     pwd->h[2]  = msa_bclr_df(DF_HALF, pws->h[2],  pwt->h[2]);
571     pwd->h[3]  = msa_bclr_df(DF_HALF, pws->h[3],  pwt->h[3]);
572     pwd->h[4]  = msa_bclr_df(DF_HALF, pws->h[4],  pwt->h[4]);
573     pwd->h[5]  = msa_bclr_df(DF_HALF, pws->h[5],  pwt->h[5]);
574     pwd->h[6]  = msa_bclr_df(DF_HALF, pws->h[6],  pwt->h[6]);
575     pwd->h[7]  = msa_bclr_df(DF_HALF, pws->h[7],  pwt->h[7]);
576 }
577 
578 void helper_msa_bclr_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
579 {
580     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
581     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
582     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
583 
584     pwd->w[0]  = msa_bclr_df(DF_WORD, pws->w[0],  pwt->w[0]);
585     pwd->w[1]  = msa_bclr_df(DF_WORD, pws->w[1],  pwt->w[1]);
586     pwd->w[2]  = msa_bclr_df(DF_WORD, pws->w[2],  pwt->w[2]);
587     pwd->w[3]  = msa_bclr_df(DF_WORD, pws->w[3],  pwt->w[3]);
588 }
589 
590 void helper_msa_bclr_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
591 {
592     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
593     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
594     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
595 
596     pwd->d[0]  = msa_bclr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
597     pwd->d[1]  = msa_bclr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
598 }
599 
600 static inline int64_t msa_bneg_df(uint32_t df, int64_t arg1, int64_t arg2)
601 {
602     int32_t b_arg2 = BIT_POSITION(arg2, df);
603     return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
604 }
605 
606 void helper_msa_bneg_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
607 {
608     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
609     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
610     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
611 
612     pwd->b[0]  = msa_bneg_df(DF_BYTE, pws->b[0],  pwt->b[0]);
613     pwd->b[1]  = msa_bneg_df(DF_BYTE, pws->b[1],  pwt->b[1]);
614     pwd->b[2]  = msa_bneg_df(DF_BYTE, pws->b[2],  pwt->b[2]);
615     pwd->b[3]  = msa_bneg_df(DF_BYTE, pws->b[3],  pwt->b[3]);
616     pwd->b[4]  = msa_bneg_df(DF_BYTE, pws->b[4],  pwt->b[4]);
617     pwd->b[5]  = msa_bneg_df(DF_BYTE, pws->b[5],  pwt->b[5]);
618     pwd->b[6]  = msa_bneg_df(DF_BYTE, pws->b[6],  pwt->b[6]);
619     pwd->b[7]  = msa_bneg_df(DF_BYTE, pws->b[7],  pwt->b[7]);
620     pwd->b[8]  = msa_bneg_df(DF_BYTE, pws->b[8],  pwt->b[8]);
621     pwd->b[9]  = msa_bneg_df(DF_BYTE, pws->b[9],  pwt->b[9]);
622     pwd->b[10] = msa_bneg_df(DF_BYTE, pws->b[10], pwt->b[10]);
623     pwd->b[11] = msa_bneg_df(DF_BYTE, pws->b[11], pwt->b[11]);
624     pwd->b[12] = msa_bneg_df(DF_BYTE, pws->b[12], pwt->b[12]);
625     pwd->b[13] = msa_bneg_df(DF_BYTE, pws->b[13], pwt->b[13]);
626     pwd->b[14] = msa_bneg_df(DF_BYTE, pws->b[14], pwt->b[14]);
627     pwd->b[15] = msa_bneg_df(DF_BYTE, pws->b[15], pwt->b[15]);
628 }
629 
630 void helper_msa_bneg_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
631 {
632     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
633     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
634     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
635 
636     pwd->h[0]  = msa_bneg_df(DF_HALF, pws->h[0],  pwt->h[0]);
637     pwd->h[1]  = msa_bneg_df(DF_HALF, pws->h[1],  pwt->h[1]);
638     pwd->h[2]  = msa_bneg_df(DF_HALF, pws->h[2],  pwt->h[2]);
639     pwd->h[3]  = msa_bneg_df(DF_HALF, pws->h[3],  pwt->h[3]);
640     pwd->h[4]  = msa_bneg_df(DF_HALF, pws->h[4],  pwt->h[4]);
641     pwd->h[5]  = msa_bneg_df(DF_HALF, pws->h[5],  pwt->h[5]);
642     pwd->h[6]  = msa_bneg_df(DF_HALF, pws->h[6],  pwt->h[6]);
643     pwd->h[7]  = msa_bneg_df(DF_HALF, pws->h[7],  pwt->h[7]);
644 }
645 
646 void helper_msa_bneg_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
647 {
648     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
649     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
650     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
651 
652     pwd->w[0]  = msa_bneg_df(DF_WORD, pws->w[0],  pwt->w[0]);
653     pwd->w[1]  = msa_bneg_df(DF_WORD, pws->w[1],  pwt->w[1]);
654     pwd->w[2]  = msa_bneg_df(DF_WORD, pws->w[2],  pwt->w[2]);
655     pwd->w[3]  = msa_bneg_df(DF_WORD, pws->w[3],  pwt->w[3]);
656 }
657 
658 void helper_msa_bneg_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
659 {
660     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
661     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
662     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
663 
664     pwd->d[0]  = msa_bneg_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
665     pwd->d[1]  = msa_bneg_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
666 }
667 
668 static inline int64_t msa_bset_df(uint32_t df, int64_t arg1,
669         int64_t arg2)
670 {
671     int32_t b_arg2 = BIT_POSITION(arg2, df);
672     return UNSIGNED(arg1 | (1LL << b_arg2), df);
673 }
674 
675 void helper_msa_bset_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
676 {
677     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
678     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
679     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
680 
681     pwd->b[0]  = msa_bset_df(DF_BYTE, pws->b[0],  pwt->b[0]);
682     pwd->b[1]  = msa_bset_df(DF_BYTE, pws->b[1],  pwt->b[1]);
683     pwd->b[2]  = msa_bset_df(DF_BYTE, pws->b[2],  pwt->b[2]);
684     pwd->b[3]  = msa_bset_df(DF_BYTE, pws->b[3],  pwt->b[3]);
685     pwd->b[4]  = msa_bset_df(DF_BYTE, pws->b[4],  pwt->b[4]);
686     pwd->b[5]  = msa_bset_df(DF_BYTE, pws->b[5],  pwt->b[5]);
687     pwd->b[6]  = msa_bset_df(DF_BYTE, pws->b[6],  pwt->b[6]);
688     pwd->b[7]  = msa_bset_df(DF_BYTE, pws->b[7],  pwt->b[7]);
689     pwd->b[8]  = msa_bset_df(DF_BYTE, pws->b[8],  pwt->b[8]);
690     pwd->b[9]  = msa_bset_df(DF_BYTE, pws->b[9],  pwt->b[9]);
691     pwd->b[10] = msa_bset_df(DF_BYTE, pws->b[10], pwt->b[10]);
692     pwd->b[11] = msa_bset_df(DF_BYTE, pws->b[11], pwt->b[11]);
693     pwd->b[12] = msa_bset_df(DF_BYTE, pws->b[12], pwt->b[12]);
694     pwd->b[13] = msa_bset_df(DF_BYTE, pws->b[13], pwt->b[13]);
695     pwd->b[14] = msa_bset_df(DF_BYTE, pws->b[14], pwt->b[14]);
696     pwd->b[15] = msa_bset_df(DF_BYTE, pws->b[15], pwt->b[15]);
697 }
698 
699 void helper_msa_bset_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
700 {
701     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
702     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
703     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
704 
705     pwd->h[0]  = msa_bset_df(DF_HALF, pws->h[0],  pwt->h[0]);
706     pwd->h[1]  = msa_bset_df(DF_HALF, pws->h[1],  pwt->h[1]);
707     pwd->h[2]  = msa_bset_df(DF_HALF, pws->h[2],  pwt->h[2]);
708     pwd->h[3]  = msa_bset_df(DF_HALF, pws->h[3],  pwt->h[3]);
709     pwd->h[4]  = msa_bset_df(DF_HALF, pws->h[4],  pwt->h[4]);
710     pwd->h[5]  = msa_bset_df(DF_HALF, pws->h[5],  pwt->h[5]);
711     pwd->h[6]  = msa_bset_df(DF_HALF, pws->h[6],  pwt->h[6]);
712     pwd->h[7]  = msa_bset_df(DF_HALF, pws->h[7],  pwt->h[7]);
713 }
714 
715 void helper_msa_bset_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
716 {
717     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
718     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
719     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
720 
721     pwd->w[0]  = msa_bset_df(DF_WORD, pws->w[0],  pwt->w[0]);
722     pwd->w[1]  = msa_bset_df(DF_WORD, pws->w[1],  pwt->w[1]);
723     pwd->w[2]  = msa_bset_df(DF_WORD, pws->w[2],  pwt->w[2]);
724     pwd->w[3]  = msa_bset_df(DF_WORD, pws->w[3],  pwt->w[3]);
725 }
726 
727 void helper_msa_bset_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
728 {
729     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
730     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
731     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
732 
733     pwd->d[0]  = msa_bset_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
734     pwd->d[1]  = msa_bset_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
735 }
736 
737 
738 /*
739  * Fixed Multiply
740  * --------------
741  *
742  * +---------------+----------------------------------------------------------+
743  * | MADD_Q.H      | Vector Fixed-Point Multiply and Add (halfword)           |
744  * | MADD_Q.W      | Vector Fixed-Point Multiply and Add (word)               |
745  * | MADDR_Q.H     | Vector Fixed-Point Multiply and Add Rounded (halfword)   |
746  * | MADDR_Q.W     | Vector Fixed-Point Multiply and Add Rounded (word)       |
747  * | MSUB_Q.H      | Vector Fixed-Point Multiply and Subtr. (halfword)        |
748  * | MSUB_Q.W      | Vector Fixed-Point Multiply and Subtr. (word)            |
749  * | MSUBR_Q.H     | Vector Fixed-Point Multiply and Subtr. Rounded (halfword)|
750  * | MSUBR_Q.W     | Vector Fixed-Point Multiply and Subtr. Rounded (word)    |
751  * | MUL_Q.H       | Vector Fixed-Point Multiply (halfword)                   |
752  * | MUL_Q.W       | Vector Fixed-Point Multiply (word)                       |
753  * | MULR_Q.H      | Vector Fixed-Point Multiply Rounded (halfword)           |
754  * | MULR_Q.W      | Vector Fixed-Point Multiply Rounded (word)               |
755  * +---------------+----------------------------------------------------------+
756  */
757 
758 /* TODO: insert Fixed Multiply group helpers here */
759 
760 
761 /*
762  * Float Max Min
763  * -------------
764  *
765  * +---------------+----------------------------------------------------------+
766  * | FMAX_A.W      | Vector Floating-Point Maximum (Absolute) (word)          |
767  * | FMAX_A.D      | Vector Floating-Point Maximum (Absolute) (doubleword)    |
768  * | FMAX.W        | Vector Floating-Point Maximum (word)                     |
769  * | FMAX.D        | Vector Floating-Point Maximum (doubleword)               |
770  * | FMIN_A.W      | Vector Floating-Point Minimum (Absolute) (word)          |
771  * | FMIN_A.D      | Vector Floating-Point Minimum (Absolute) (doubleword)    |
772  * | FMIN.W        | Vector Floating-Point Minimum (word)                     |
773  * | FMIN.D        | Vector Floating-Point Minimum (doubleword)               |
774  * +---------------+----------------------------------------------------------+
775  */
776 
777 /* TODO: insert Float Max Min group helpers here */
778 
779 
780 /*
781  * Int Add
782  * -------
783  *
784  * +---------------+----------------------------------------------------------+
785  * | ADD_A.B       | Vector Add Absolute Values (byte)                        |
786  * | ADD_A.H       | Vector Add Absolute Values (halfword)                    |
787  * | ADD_A.W       | Vector Add Absolute Values (word)                        |
788  * | ADD_A.D       | Vector Add Absolute Values (doubleword)                  |
789  * | ADDS_A.B      | Vector Signed Saturated Add (of Absolute) (byte)         |
790  * | ADDS_A.H      | Vector Signed Saturated Add (of Absolute) (halfword)     |
791  * | ADDS_A.W      | Vector Signed Saturated Add (of Absolute) (word)         |
792  * | ADDS_A.D      | Vector Signed Saturated Add (of Absolute) (doubleword)   |
793  * | ADDS_S.B      | Vector Signed Saturated Add (of Signed) (byte)           |
794  * | ADDS_S.H      | Vector Signed Saturated Add (of Signed) (halfword)       |
795  * | ADDS_S.W      | Vector Signed Saturated Add (of Signed) (word)           |
796  * | ADDS_S.D      | Vector Signed Saturated Add (of Signed) (doubleword)     |
797  * | ADDS_U.B      | Vector Unsigned Saturated Add (of Unsigned) (byte)       |
798  * | ADDS_U.H      | Vector Unsigned Saturated Add (of Unsigned) (halfword)   |
799  * | ADDS_U.W      | Vector Unsigned Saturated Add (of Unsigned) (word)       |
800  * | ADDS_U.D      | Vector Unsigned Saturated Add (of Unsigned) (doubleword) |
801  * | ADDV.B        | Vector Add (byte)                                        |
802  * | ADDV.H        | Vector Add (halfword)                                    |
803  * | ADDV.W        | Vector Add (word)                                        |
804  * | ADDV.D        | Vector Add (doubleword)                                  |
805  * | HADD_S.H      | Vector Signed Horizontal Add (halfword)                  |
806  * | HADD_S.W      | Vector Signed Horizontal Add (word)                      |
807  * | HADD_S.D      | Vector Signed Horizontal Add (doubleword)                |
808  * | HADD_U.H      | Vector Unsigned Horizontal Add (halfword)                |
809  * | HADD_U.W      | Vector Unsigned Horizontal Add (word)                    |
810  * | HADD_U.D      | Vector Unsigned Horizontal Add (doubleword)              |
811  * +---------------+----------------------------------------------------------+
812  */
813 
814 
815 static inline int64_t msa_add_a_df(uint32_t df, int64_t arg1, int64_t arg2)
816 {
817     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
818     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
819     return abs_arg1 + abs_arg2;
820 }
821 
822 void helper_msa_add_a_b(CPUMIPSState *env,
823                         uint32_t wd, uint32_t ws, uint32_t wt)
824 {
825     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
826     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
827     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
828 
829     pwd->b[0]  = msa_add_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
830     pwd->b[1]  = msa_add_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
831     pwd->b[2]  = msa_add_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
832     pwd->b[3]  = msa_add_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
833     pwd->b[4]  = msa_add_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
834     pwd->b[5]  = msa_add_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
835     pwd->b[6]  = msa_add_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
836     pwd->b[7]  = msa_add_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
837     pwd->b[8]  = msa_add_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
838     pwd->b[9]  = msa_add_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
839     pwd->b[10] = msa_add_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
840     pwd->b[11] = msa_add_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
841     pwd->b[12] = msa_add_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
842     pwd->b[13] = msa_add_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
843     pwd->b[14] = msa_add_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
844     pwd->b[15] = msa_add_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
845 }
846 
847 void helper_msa_add_a_h(CPUMIPSState *env,
848                         uint32_t wd, uint32_t ws, uint32_t wt)
849 {
850     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
851     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
852     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
853 
854     pwd->h[0]  = msa_add_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
855     pwd->h[1]  = msa_add_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
856     pwd->h[2]  = msa_add_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
857     pwd->h[3]  = msa_add_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
858     pwd->h[4]  = msa_add_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
859     pwd->h[5]  = msa_add_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
860     pwd->h[6]  = msa_add_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
861     pwd->h[7]  = msa_add_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
862 }
863 
864 void helper_msa_add_a_w(CPUMIPSState *env,
865                         uint32_t wd, uint32_t ws, uint32_t wt)
866 {
867     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
868     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
869     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
870 
871     pwd->w[0]  = msa_add_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
872     pwd->w[1]  = msa_add_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
873     pwd->w[2]  = msa_add_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
874     pwd->w[3]  = msa_add_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
875 }
876 
877 void helper_msa_add_a_d(CPUMIPSState *env,
878                         uint32_t wd, uint32_t ws, uint32_t wt)
879 {
880     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
881     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
882     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
883 
884     pwd->d[0]  = msa_add_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
885     pwd->d[1]  = msa_add_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
886 }
887 
888 
889 static inline int64_t msa_adds_a_df(uint32_t df, int64_t arg1, int64_t arg2)
890 {
891     uint64_t max_int = (uint64_t)DF_MAX_INT(df);
892     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
893     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
894     if (abs_arg1 > max_int || abs_arg2 > max_int) {
895         return (int64_t)max_int;
896     } else {
897         return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
898     }
899 }
900 
901 void helper_msa_adds_a_b(CPUMIPSState *env,
902                          uint32_t wd, uint32_t ws, uint32_t wt)
903 {
904     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
905     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
906     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
907 
908     pwd->b[0]  = msa_adds_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
909     pwd->b[1]  = msa_adds_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
910     pwd->b[2]  = msa_adds_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
911     pwd->b[3]  = msa_adds_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
912     pwd->b[4]  = msa_adds_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
913     pwd->b[5]  = msa_adds_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
914     pwd->b[6]  = msa_adds_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
915     pwd->b[7]  = msa_adds_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
916     pwd->b[8]  = msa_adds_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
917     pwd->b[9]  = msa_adds_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
918     pwd->b[10] = msa_adds_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
919     pwd->b[11] = msa_adds_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
920     pwd->b[12] = msa_adds_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
921     pwd->b[13] = msa_adds_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
922     pwd->b[14] = msa_adds_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
923     pwd->b[15] = msa_adds_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
924 }
925 
926 void helper_msa_adds_a_h(CPUMIPSState *env,
927                          uint32_t wd, uint32_t ws, uint32_t wt)
928 {
929     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
930     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
931     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
932 
933     pwd->h[0]  = msa_adds_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
934     pwd->h[1]  = msa_adds_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
935     pwd->h[2]  = msa_adds_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
936     pwd->h[3]  = msa_adds_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
937     pwd->h[4]  = msa_adds_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
938     pwd->h[5]  = msa_adds_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
939     pwd->h[6]  = msa_adds_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
940     pwd->h[7]  = msa_adds_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
941 }
942 
943 void helper_msa_adds_a_w(CPUMIPSState *env,
944                          uint32_t wd, uint32_t ws, uint32_t wt)
945 {
946     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
947     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
948     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
949 
950     pwd->w[0]  = msa_adds_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
951     pwd->w[1]  = msa_adds_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
952     pwd->w[2]  = msa_adds_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
953     pwd->w[3]  = msa_adds_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
954 }
955 
956 void helper_msa_adds_a_d(CPUMIPSState *env,
957                          uint32_t wd, uint32_t ws, uint32_t wt)
958 {
959     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
960     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
961     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
962 
963     pwd->d[0]  = msa_adds_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
964     pwd->d[1]  = msa_adds_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
965 }
966 
967 
968 static inline int64_t msa_adds_s_df(uint32_t df, int64_t arg1, int64_t arg2)
969 {
970     int64_t max_int = DF_MAX_INT(df);
971     int64_t min_int = DF_MIN_INT(df);
972     if (arg1 < 0) {
973         return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
974     } else {
975         return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
976     }
977 }
978 
979 void helper_msa_adds_s_b(CPUMIPSState *env,
980                          uint32_t wd, uint32_t ws, uint32_t wt)
981 {
982     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
983     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
984     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
985 
986     pwd->b[0]  = msa_adds_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
987     pwd->b[1]  = msa_adds_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
988     pwd->b[2]  = msa_adds_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
989     pwd->b[3]  = msa_adds_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
990     pwd->b[4]  = msa_adds_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
991     pwd->b[5]  = msa_adds_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
992     pwd->b[6]  = msa_adds_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
993     pwd->b[7]  = msa_adds_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
994     pwd->b[8]  = msa_adds_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
995     pwd->b[9]  = msa_adds_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
996     pwd->b[10] = msa_adds_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
997     pwd->b[11] = msa_adds_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
998     pwd->b[12] = msa_adds_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
999     pwd->b[13] = msa_adds_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1000     pwd->b[14] = msa_adds_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1001     pwd->b[15] = msa_adds_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1002 }
1003 
1004 void helper_msa_adds_s_h(CPUMIPSState *env,
1005                          uint32_t wd, uint32_t ws, uint32_t wt)
1006 {
1007     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1008     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1009     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1010 
1011     pwd->h[0]  = msa_adds_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1012     pwd->h[1]  = msa_adds_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1013     pwd->h[2]  = msa_adds_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1014     pwd->h[3]  = msa_adds_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1015     pwd->h[4]  = msa_adds_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1016     pwd->h[5]  = msa_adds_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1017     pwd->h[6]  = msa_adds_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1018     pwd->h[7]  = msa_adds_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1019 }
1020 
1021 void helper_msa_adds_s_w(CPUMIPSState *env,
1022                          uint32_t wd, uint32_t ws, uint32_t wt)
1023 {
1024     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1025     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1026     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1027 
1028     pwd->w[0]  = msa_adds_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1029     pwd->w[1]  = msa_adds_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1030     pwd->w[2]  = msa_adds_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1031     pwd->w[3]  = msa_adds_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1032 }
1033 
1034 void helper_msa_adds_s_d(CPUMIPSState *env,
1035                          uint32_t wd, uint32_t ws, uint32_t wt)
1036 {
1037     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1038     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1039     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1040 
1041     pwd->d[0]  = msa_adds_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1042     pwd->d[1]  = msa_adds_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1043 }
1044 
1045 
1046 static inline uint64_t msa_adds_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1047 {
1048     uint64_t max_uint = DF_MAX_UINT(df);
1049     uint64_t u_arg1 = UNSIGNED(arg1, df);
1050     uint64_t u_arg2 = UNSIGNED(arg2, df);
1051     return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
1052 }
1053 
1054 void helper_msa_adds_u_b(CPUMIPSState *env,
1055                          uint32_t wd, uint32_t ws, uint32_t wt)
1056 {
1057     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1058     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1059     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1060 
1061     pwd->b[0]  = msa_adds_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1062     pwd->b[1]  = msa_adds_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1063     pwd->b[2]  = msa_adds_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1064     pwd->b[3]  = msa_adds_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1065     pwd->b[4]  = msa_adds_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1066     pwd->b[5]  = msa_adds_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1067     pwd->b[6]  = msa_adds_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1068     pwd->b[7]  = msa_adds_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1069     pwd->b[8]  = msa_adds_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1070     pwd->b[9]  = msa_adds_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1071     pwd->b[10] = msa_adds_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1072     pwd->b[11] = msa_adds_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1073     pwd->b[12] = msa_adds_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1074     pwd->b[13] = msa_adds_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1075     pwd->b[14] = msa_adds_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1076     pwd->b[15] = msa_adds_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1077 }
1078 
1079 void helper_msa_adds_u_h(CPUMIPSState *env,
1080                          uint32_t wd, uint32_t ws, uint32_t wt)
1081 {
1082     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1083     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1084     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1085 
1086     pwd->h[0]  = msa_adds_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1087     pwd->h[1]  = msa_adds_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1088     pwd->h[2]  = msa_adds_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1089     pwd->h[3]  = msa_adds_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1090     pwd->h[4]  = msa_adds_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1091     pwd->h[5]  = msa_adds_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1092     pwd->h[6]  = msa_adds_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1093     pwd->h[7]  = msa_adds_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1094 }
1095 
1096 void helper_msa_adds_u_w(CPUMIPSState *env,
1097                          uint32_t wd, uint32_t ws, uint32_t wt)
1098 {
1099     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1100     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1101     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1102 
1103     pwd->w[0]  = msa_adds_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1104     pwd->w[1]  = msa_adds_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1105     pwd->w[2]  = msa_adds_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1106     pwd->w[3]  = msa_adds_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1107 }
1108 
1109 void helper_msa_adds_u_d(CPUMIPSState *env,
1110                          uint32_t wd, uint32_t ws, uint32_t wt)
1111 {
1112     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1113     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1114     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1115 
1116     pwd->d[0]  = msa_adds_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1117     pwd->d[1]  = msa_adds_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1118 }
1119 
1120 
1121 static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
1122 {
1123     return arg1 + arg2;
1124 }
1125 
1126 void helper_msa_addv_b(CPUMIPSState *env,
1127                        uint32_t wd, uint32_t ws, uint32_t wt)
1128 {
1129     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1130     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1131     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1132 
1133     pwd->b[0]  = msa_addv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1134     pwd->b[1]  = msa_addv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1135     pwd->b[2]  = msa_addv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1136     pwd->b[3]  = msa_addv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1137     pwd->b[4]  = msa_addv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1138     pwd->b[5]  = msa_addv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1139     pwd->b[6]  = msa_addv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1140     pwd->b[7]  = msa_addv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1141     pwd->b[8]  = msa_addv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1142     pwd->b[9]  = msa_addv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1143     pwd->b[10] = msa_addv_df(DF_BYTE, pws->b[10], pwt->b[10]);
1144     pwd->b[11] = msa_addv_df(DF_BYTE, pws->b[11], pwt->b[11]);
1145     pwd->b[12] = msa_addv_df(DF_BYTE, pws->b[12], pwt->b[12]);
1146     pwd->b[13] = msa_addv_df(DF_BYTE, pws->b[13], pwt->b[13]);
1147     pwd->b[14] = msa_addv_df(DF_BYTE, pws->b[14], pwt->b[14]);
1148     pwd->b[15] = msa_addv_df(DF_BYTE, pws->b[15], pwt->b[15]);
1149 }
1150 
1151 void helper_msa_addv_h(CPUMIPSState *env,
1152                        uint32_t wd, uint32_t ws, uint32_t wt)
1153 {
1154     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1155     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1156     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1157 
1158     pwd->h[0]  = msa_addv_df(DF_HALF, pws->h[0],  pwt->h[0]);
1159     pwd->h[1]  = msa_addv_df(DF_HALF, pws->h[1],  pwt->h[1]);
1160     pwd->h[2]  = msa_addv_df(DF_HALF, pws->h[2],  pwt->h[2]);
1161     pwd->h[3]  = msa_addv_df(DF_HALF, pws->h[3],  pwt->h[3]);
1162     pwd->h[4]  = msa_addv_df(DF_HALF, pws->h[4],  pwt->h[4]);
1163     pwd->h[5]  = msa_addv_df(DF_HALF, pws->h[5],  pwt->h[5]);
1164     pwd->h[6]  = msa_addv_df(DF_HALF, pws->h[6],  pwt->h[6]);
1165     pwd->h[7]  = msa_addv_df(DF_HALF, pws->h[7],  pwt->h[7]);
1166 }
1167 
1168 void helper_msa_addv_w(CPUMIPSState *env,
1169                        uint32_t wd, uint32_t ws, uint32_t wt)
1170 {
1171     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1172     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1173     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1174 
1175     pwd->w[0]  = msa_addv_df(DF_WORD, pws->w[0],  pwt->w[0]);
1176     pwd->w[1]  = msa_addv_df(DF_WORD, pws->w[1],  pwt->w[1]);
1177     pwd->w[2]  = msa_addv_df(DF_WORD, pws->w[2],  pwt->w[2]);
1178     pwd->w[3]  = msa_addv_df(DF_WORD, pws->w[3],  pwt->w[3]);
1179 }
1180 
1181 void helper_msa_addv_d(CPUMIPSState *env,
1182                        uint32_t wd, uint32_t ws, uint32_t wt)
1183 {
1184     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1185     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1186     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1187 
1188     pwd->d[0]  = msa_addv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1189     pwd->d[1]  = msa_addv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1190 }
1191 
1192 
1193 #define SIGNED_EVEN(a, df) \
1194         ((((int64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1195 
1196 #define UNSIGNED_EVEN(a, df) \
1197         ((((uint64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1198 
1199 #define SIGNED_ODD(a, df) \
1200         ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1201 
1202 #define UNSIGNED_ODD(a, df) \
1203         ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1204 
1205 
1206 static inline int64_t msa_hadd_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1207 {
1208     return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
1209 }
1210 
1211 void helper_msa_hadd_s_h(CPUMIPSState *env,
1212                          uint32_t wd, uint32_t ws, uint32_t wt)
1213 {
1214     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1215     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1216     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1217 
1218     pwd->h[0]  = msa_hadd_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1219     pwd->h[1]  = msa_hadd_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1220     pwd->h[2]  = msa_hadd_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1221     pwd->h[3]  = msa_hadd_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1222     pwd->h[4]  = msa_hadd_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1223     pwd->h[5]  = msa_hadd_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1224     pwd->h[6]  = msa_hadd_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1225     pwd->h[7]  = msa_hadd_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1226 }
1227 
1228 void helper_msa_hadd_s_w(CPUMIPSState *env,
1229                          uint32_t wd, uint32_t ws, uint32_t wt)
1230 {
1231     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1232     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1233     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1234 
1235     pwd->w[0]  = msa_hadd_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1236     pwd->w[1]  = msa_hadd_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1237     pwd->w[2]  = msa_hadd_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1238     pwd->w[3]  = msa_hadd_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1239 }
1240 
1241 void helper_msa_hadd_s_d(CPUMIPSState *env,
1242                          uint32_t wd, uint32_t ws, uint32_t wt)
1243 {
1244     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1245     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1246     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1247 
1248     pwd->d[0]  = msa_hadd_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1249     pwd->d[1]  = msa_hadd_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1250 }
1251 
1252 
1253 static inline int64_t msa_hadd_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1254 {
1255     return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
1256 }
1257 
1258 void helper_msa_hadd_u_h(CPUMIPSState *env,
1259                          uint32_t wd, uint32_t ws, uint32_t wt)
1260 {
1261     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1262     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1263     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1264 
1265     pwd->h[0]  = msa_hadd_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1266     pwd->h[1]  = msa_hadd_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1267     pwd->h[2]  = msa_hadd_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1268     pwd->h[3]  = msa_hadd_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1269     pwd->h[4]  = msa_hadd_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1270     pwd->h[5]  = msa_hadd_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1271     pwd->h[6]  = msa_hadd_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1272     pwd->h[7]  = msa_hadd_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1273 }
1274 
1275 void helper_msa_hadd_u_w(CPUMIPSState *env,
1276                          uint32_t wd, uint32_t ws, uint32_t wt)
1277 {
1278     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1279     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1280     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1281 
1282     pwd->w[0]  = msa_hadd_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1283     pwd->w[1]  = msa_hadd_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1284     pwd->w[2]  = msa_hadd_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1285     pwd->w[3]  = msa_hadd_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1286 }
1287 
1288 void helper_msa_hadd_u_d(CPUMIPSState *env,
1289                          uint32_t wd, uint32_t ws, uint32_t wt)
1290 {
1291     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1292     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1293     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1294 
1295     pwd->d[0]  = msa_hadd_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1296     pwd->d[1]  = msa_hadd_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1297 }
1298 
1299 
1300 /*
1301  * Int Average
1302  * -----------
1303  *
1304  * +---------------+----------------------------------------------------------+
1305  * | AVE_S.B       | Vector Signed Average (byte)                             |
1306  * | AVE_S.H       | Vector Signed Average (halfword)                         |
1307  * | AVE_S.W       | Vector Signed Average (word)                             |
1308  * | AVE_S.D       | Vector Signed Average (doubleword)                       |
1309  * | AVE_U.B       | Vector Unsigned Average (byte)                           |
1310  * | AVE_U.H       | Vector Unsigned Average (halfword)                       |
1311  * | AVE_U.W       | Vector Unsigned Average (word)                           |
1312  * | AVE_U.D       | Vector Unsigned Average (doubleword)                     |
1313  * | AVER_S.B      | Vector Signed Average Rounded (byte)                     |
1314  * | AVER_S.H      | Vector Signed Average Rounded (halfword)                 |
1315  * | AVER_S.W      | Vector Signed Average Rounded (word)                     |
1316  * | AVER_S.D      | Vector Signed Average Rounded (doubleword)               |
1317  * | AVER_U.B      | Vector Unsigned Average Rounded (byte)                   |
1318  * | AVER_U.H      | Vector Unsigned Average Rounded (halfword)               |
1319  * | AVER_U.W      | Vector Unsigned Average Rounded (word)                   |
1320  * | AVER_U.D      | Vector Unsigned Average Rounded (doubleword)             |
1321  * +---------------+----------------------------------------------------------+
1322  */
1323 
1324 static inline int64_t msa_ave_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1325 {
1326     /* signed shift */
1327     return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
1328 }
1329 
1330 void helper_msa_ave_s_b(CPUMIPSState *env,
1331                         uint32_t wd, uint32_t ws, uint32_t wt)
1332 {
1333     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1334     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1335     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1336 
1337     pwd->b[0]  = msa_ave_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1338     pwd->b[1]  = msa_ave_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1339     pwd->b[2]  = msa_ave_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1340     pwd->b[3]  = msa_ave_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1341     pwd->b[4]  = msa_ave_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1342     pwd->b[5]  = msa_ave_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1343     pwd->b[6]  = msa_ave_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1344     pwd->b[7]  = msa_ave_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1345     pwd->b[8]  = msa_ave_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1346     pwd->b[9]  = msa_ave_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1347     pwd->b[10] = msa_ave_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1348     pwd->b[11] = msa_ave_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1349     pwd->b[12] = msa_ave_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1350     pwd->b[13] = msa_ave_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1351     pwd->b[14] = msa_ave_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1352     pwd->b[15] = msa_ave_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1353 }
1354 
1355 void helper_msa_ave_s_h(CPUMIPSState *env,
1356                         uint32_t wd, uint32_t ws, uint32_t wt)
1357 {
1358     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1359     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1360     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1361 
1362     pwd->h[0]  = msa_ave_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1363     pwd->h[1]  = msa_ave_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1364     pwd->h[2]  = msa_ave_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1365     pwd->h[3]  = msa_ave_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1366     pwd->h[4]  = msa_ave_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1367     pwd->h[5]  = msa_ave_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1368     pwd->h[6]  = msa_ave_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1369     pwd->h[7]  = msa_ave_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1370 }
1371 
1372 void helper_msa_ave_s_w(CPUMIPSState *env,
1373                         uint32_t wd, uint32_t ws, uint32_t wt)
1374 {
1375     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1376     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1377     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1378 
1379     pwd->w[0]  = msa_ave_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1380     pwd->w[1]  = msa_ave_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1381     pwd->w[2]  = msa_ave_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1382     pwd->w[3]  = msa_ave_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1383 }
1384 
1385 void helper_msa_ave_s_d(CPUMIPSState *env,
1386                         uint32_t wd, uint32_t ws, uint32_t wt)
1387 {
1388     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1389     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1390     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1391 
1392     pwd->d[0]  = msa_ave_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1393     pwd->d[1]  = msa_ave_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1394 }
1395 
1396 static inline uint64_t msa_ave_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1397 {
1398     uint64_t u_arg1 = UNSIGNED(arg1, df);
1399     uint64_t u_arg2 = UNSIGNED(arg2, df);
1400     /* unsigned shift */
1401     return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
1402 }
1403 
1404 void helper_msa_ave_u_b(CPUMIPSState *env,
1405                         uint32_t wd, uint32_t ws, uint32_t wt)
1406 {
1407     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1408     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1409     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1410 
1411     pwd->b[0]  = msa_ave_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1412     pwd->b[1]  = msa_ave_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1413     pwd->b[2]  = msa_ave_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1414     pwd->b[3]  = msa_ave_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1415     pwd->b[4]  = msa_ave_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1416     pwd->b[5]  = msa_ave_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1417     pwd->b[6]  = msa_ave_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1418     pwd->b[7]  = msa_ave_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1419     pwd->b[8]  = msa_ave_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1420     pwd->b[9]  = msa_ave_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1421     pwd->b[10] = msa_ave_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1422     pwd->b[11] = msa_ave_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1423     pwd->b[12] = msa_ave_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1424     pwd->b[13] = msa_ave_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1425     pwd->b[14] = msa_ave_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1426     pwd->b[15] = msa_ave_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1427 }
1428 
1429 void helper_msa_ave_u_h(CPUMIPSState *env,
1430                         uint32_t wd, uint32_t ws, uint32_t wt)
1431 {
1432     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1433     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1434     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1435 
1436     pwd->h[0]  = msa_ave_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1437     pwd->h[1]  = msa_ave_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1438     pwd->h[2]  = msa_ave_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1439     pwd->h[3]  = msa_ave_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1440     pwd->h[4]  = msa_ave_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1441     pwd->h[5]  = msa_ave_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1442     pwd->h[6]  = msa_ave_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1443     pwd->h[7]  = msa_ave_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1444 }
1445 
1446 void helper_msa_ave_u_w(CPUMIPSState *env,
1447                         uint32_t wd, uint32_t ws, uint32_t wt)
1448 {
1449     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1450     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1451     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1452 
1453     pwd->w[0]  = msa_ave_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1454     pwd->w[1]  = msa_ave_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1455     pwd->w[2]  = msa_ave_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1456     pwd->w[3]  = msa_ave_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1457 }
1458 
1459 void helper_msa_ave_u_d(CPUMIPSState *env,
1460                         uint32_t wd, uint32_t ws, uint32_t wt)
1461 {
1462     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1463     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1464     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1465 
1466     pwd->d[0]  = msa_ave_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1467     pwd->d[1]  = msa_ave_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1468 }
1469 
1470 static inline int64_t msa_aver_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1471 {
1472     /* signed shift */
1473     return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
1474 }
1475 
1476 void helper_msa_aver_s_b(CPUMIPSState *env,
1477                          uint32_t wd, uint32_t ws, uint32_t wt)
1478 {
1479     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1480     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1481     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1482 
1483     pwd->b[0]  = msa_aver_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1484     pwd->b[1]  = msa_aver_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1485     pwd->b[2]  = msa_aver_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1486     pwd->b[3]  = msa_aver_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1487     pwd->b[4]  = msa_aver_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1488     pwd->b[5]  = msa_aver_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1489     pwd->b[6]  = msa_aver_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1490     pwd->b[7]  = msa_aver_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1491     pwd->b[8]  = msa_aver_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1492     pwd->b[9]  = msa_aver_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1493     pwd->b[10] = msa_aver_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1494     pwd->b[11] = msa_aver_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1495     pwd->b[12] = msa_aver_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1496     pwd->b[13] = msa_aver_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1497     pwd->b[14] = msa_aver_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1498     pwd->b[15] = msa_aver_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1499 }
1500 
1501 void helper_msa_aver_s_h(CPUMIPSState *env,
1502                          uint32_t wd, uint32_t ws, uint32_t wt)
1503 {
1504     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1505     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1506     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1507 
1508     pwd->h[0]  = msa_aver_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1509     pwd->h[1]  = msa_aver_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1510     pwd->h[2]  = msa_aver_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1511     pwd->h[3]  = msa_aver_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1512     pwd->h[4]  = msa_aver_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1513     pwd->h[5]  = msa_aver_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1514     pwd->h[6]  = msa_aver_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1515     pwd->h[7]  = msa_aver_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1516 }
1517 
1518 void helper_msa_aver_s_w(CPUMIPSState *env,
1519                          uint32_t wd, uint32_t ws, uint32_t wt)
1520 {
1521     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1522     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1523     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1524 
1525     pwd->w[0]  = msa_aver_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1526     pwd->w[1]  = msa_aver_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1527     pwd->w[2]  = msa_aver_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1528     pwd->w[3]  = msa_aver_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1529 }
1530 
1531 void helper_msa_aver_s_d(CPUMIPSState *env,
1532                          uint32_t wd, uint32_t ws, uint32_t wt)
1533 {
1534     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1535     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1536     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1537 
1538     pwd->d[0]  = msa_aver_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1539     pwd->d[1]  = msa_aver_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1540 }
1541 
1542 static inline uint64_t msa_aver_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1543 {
1544     uint64_t u_arg1 = UNSIGNED(arg1, df);
1545     uint64_t u_arg2 = UNSIGNED(arg2, df);
1546     /* unsigned shift */
1547     return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
1548 }
1549 
1550 void helper_msa_aver_u_b(CPUMIPSState *env,
1551                          uint32_t wd, uint32_t ws, uint32_t wt)
1552 {
1553     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1554     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1555     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1556 
1557     pwd->b[0]  = msa_aver_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1558     pwd->b[1]  = msa_aver_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1559     pwd->b[2]  = msa_aver_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1560     pwd->b[3]  = msa_aver_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1561     pwd->b[4]  = msa_aver_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1562     pwd->b[5]  = msa_aver_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1563     pwd->b[6]  = msa_aver_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1564     pwd->b[7]  = msa_aver_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1565     pwd->b[8]  = msa_aver_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1566     pwd->b[9]  = msa_aver_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1567     pwd->b[10] = msa_aver_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1568     pwd->b[11] = msa_aver_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1569     pwd->b[12] = msa_aver_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1570     pwd->b[13] = msa_aver_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1571     pwd->b[14] = msa_aver_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1572     pwd->b[15] = msa_aver_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1573 }
1574 
1575 void helper_msa_aver_u_h(CPUMIPSState *env,
1576                          uint32_t wd, uint32_t ws, uint32_t wt)
1577 {
1578     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1579     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1580     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1581 
1582     pwd->h[0]  = msa_aver_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1583     pwd->h[1]  = msa_aver_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1584     pwd->h[2]  = msa_aver_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1585     pwd->h[3]  = msa_aver_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1586     pwd->h[4]  = msa_aver_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1587     pwd->h[5]  = msa_aver_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1588     pwd->h[6]  = msa_aver_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1589     pwd->h[7]  = msa_aver_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1590 }
1591 
1592 void helper_msa_aver_u_w(CPUMIPSState *env,
1593                          uint32_t wd, uint32_t ws, uint32_t wt)
1594 {
1595     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1596     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1597     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1598 
1599     pwd->w[0]  = msa_aver_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1600     pwd->w[1]  = msa_aver_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1601     pwd->w[2]  = msa_aver_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1602     pwd->w[3]  = msa_aver_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1603 }
1604 
1605 void helper_msa_aver_u_d(CPUMIPSState *env,
1606                          uint32_t wd, uint32_t ws, uint32_t wt)
1607 {
1608     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1609     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1610     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1611 
1612     pwd->d[0]  = msa_aver_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1613     pwd->d[1]  = msa_aver_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1614 }
1615 
1616 
1617 /*
1618  * Int Compare
1619  * -----------
1620  *
1621  * +---------------+----------------------------------------------------------+
1622  * | CEQ.B         | Vector Compare Equal (byte)                              |
1623  * | CEQ.H         | Vector Compare Equal (halfword)                          |
1624  * | CEQ.W         | Vector Compare Equal (word)                              |
1625  * | CEQ.D         | Vector Compare Equal (doubleword)                        |
1626  * | CLE_S.B       | Vector Compare Signed Less Than or Equal (byte)          |
1627  * | CLE_S.H       | Vector Compare Signed Less Than or Equal (halfword)      |
1628  * | CLE_S.W       | Vector Compare Signed Less Than or Equal (word)          |
1629  * | CLE_S.D       | Vector Compare Signed Less Than or Equal (doubleword)    |
1630  * | CLE_U.B       | Vector Compare Unsigned Less Than or Equal (byte)        |
1631  * | CLE_U.H       | Vector Compare Unsigned Less Than or Equal (halfword)    |
1632  * | CLE_U.W       | Vector Compare Unsigned Less Than or Equal (word)        |
1633  * | CLE_U.D       | Vector Compare Unsigned Less Than or Equal (doubleword)  |
1634  * | CLT_S.B       | Vector Compare Signed Less Than (byte)                   |
1635  * | CLT_S.H       | Vector Compare Signed Less Than (halfword)               |
1636  * | CLT_S.W       | Vector Compare Signed Less Than (word)                   |
1637  * | CLT_S.D       | Vector Compare Signed Less Than (doubleword)             |
1638  * | CLT_U.B       | Vector Compare Unsigned Less Than (byte)                 |
1639  * | CLT_U.H       | Vector Compare Unsigned Less Than (halfword)             |
1640  * | CLT_U.W       | Vector Compare Unsigned Less Than (word)                 |
1641  * | CLT_U.D       | Vector Compare Unsigned Less Than (doubleword)           |
1642  * +---------------+----------------------------------------------------------+
1643  */
1644 
1645 static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
1646 {
1647     return arg1 == arg2 ? -1 : 0;
1648 }
1649 
1650 static inline int8_t msa_ceq_b(int8_t arg1, int8_t arg2)
1651 {
1652     return arg1 == arg2 ? -1 : 0;
1653 }
1654 
1655 void helper_msa_ceq_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1656 {
1657     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1658     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1659     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1660 
1661     pwd->b[0]  = msa_ceq_b(pws->b[0],  pwt->b[0]);
1662     pwd->b[1]  = msa_ceq_b(pws->b[1],  pwt->b[1]);
1663     pwd->b[2]  = msa_ceq_b(pws->b[2],  pwt->b[2]);
1664     pwd->b[3]  = msa_ceq_b(pws->b[3],  pwt->b[3]);
1665     pwd->b[4]  = msa_ceq_b(pws->b[4],  pwt->b[4]);
1666     pwd->b[5]  = msa_ceq_b(pws->b[5],  pwt->b[5]);
1667     pwd->b[6]  = msa_ceq_b(pws->b[6],  pwt->b[6]);
1668     pwd->b[7]  = msa_ceq_b(pws->b[7],  pwt->b[7]);
1669     pwd->b[8]  = msa_ceq_b(pws->b[8],  pwt->b[8]);
1670     pwd->b[9]  = msa_ceq_b(pws->b[9],  pwt->b[9]);
1671     pwd->b[10] = msa_ceq_b(pws->b[10], pwt->b[10]);
1672     pwd->b[11] = msa_ceq_b(pws->b[11], pwt->b[11]);
1673     pwd->b[12] = msa_ceq_b(pws->b[12], pwt->b[12]);
1674     pwd->b[13] = msa_ceq_b(pws->b[13], pwt->b[13]);
1675     pwd->b[14] = msa_ceq_b(pws->b[14], pwt->b[14]);
1676     pwd->b[15] = msa_ceq_b(pws->b[15], pwt->b[15]);
1677 }
1678 
1679 static inline int16_t msa_ceq_h(int16_t arg1, int16_t arg2)
1680 {
1681     return arg1 == arg2 ? -1 : 0;
1682 }
1683 
1684 void helper_msa_ceq_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1685 {
1686     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1687     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1688     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1689 
1690     pwd->h[0]  = msa_ceq_h(pws->h[0],  pwt->h[0]);
1691     pwd->h[1]  = msa_ceq_h(pws->h[1],  pwt->h[1]);
1692     pwd->h[2]  = msa_ceq_h(pws->h[2],  pwt->h[2]);
1693     pwd->h[3]  = msa_ceq_h(pws->h[3],  pwt->h[3]);
1694     pwd->h[4]  = msa_ceq_h(pws->h[4],  pwt->h[4]);
1695     pwd->h[5]  = msa_ceq_h(pws->h[5],  pwt->h[5]);
1696     pwd->h[6]  = msa_ceq_h(pws->h[6],  pwt->h[6]);
1697     pwd->h[7]  = msa_ceq_h(pws->h[7],  pwt->h[7]);
1698 }
1699 
1700 static inline int32_t msa_ceq_w(int32_t arg1, int32_t arg2)
1701 {
1702     return arg1 == arg2 ? -1 : 0;
1703 }
1704 
1705 void helper_msa_ceq_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1706 {
1707     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1708     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1709     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1710 
1711     pwd->w[0]  = msa_ceq_w(pws->w[0],  pwt->w[0]);
1712     pwd->w[1]  = msa_ceq_w(pws->w[1],  pwt->w[1]);
1713     pwd->w[2]  = msa_ceq_w(pws->w[2],  pwt->w[2]);
1714     pwd->w[3]  = msa_ceq_w(pws->w[3],  pwt->w[3]);
1715 }
1716 
1717 static inline int64_t msa_ceq_d(int64_t arg1, int64_t arg2)
1718 {
1719     return arg1 == arg2 ? -1 : 0;
1720 }
1721 
1722 void helper_msa_ceq_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1723 {
1724     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1725     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1726     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1727 
1728     pwd->d[0]  = msa_ceq_d(pws->d[0],  pwt->d[0]);
1729     pwd->d[1]  = msa_ceq_d(pws->d[1],  pwt->d[1]);
1730 }
1731 
1732 static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1733 {
1734     return arg1 <= arg2 ? -1 : 0;
1735 }
1736 
1737 void helper_msa_cle_s_b(CPUMIPSState *env,
1738                         uint32_t wd, uint32_t ws, uint32_t wt)
1739 {
1740     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1741     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1742     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1743 
1744     pwd->b[0]  = msa_cle_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1745     pwd->b[1]  = msa_cle_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1746     pwd->b[2]  = msa_cle_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1747     pwd->b[3]  = msa_cle_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1748     pwd->b[4]  = msa_cle_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1749     pwd->b[5]  = msa_cle_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1750     pwd->b[6]  = msa_cle_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1751     pwd->b[7]  = msa_cle_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1752     pwd->b[8]  = msa_cle_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1753     pwd->b[9]  = msa_cle_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1754     pwd->b[10] = msa_cle_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1755     pwd->b[11] = msa_cle_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1756     pwd->b[12] = msa_cle_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1757     pwd->b[13] = msa_cle_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1758     pwd->b[14] = msa_cle_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1759     pwd->b[15] = msa_cle_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1760 }
1761 
1762 void helper_msa_cle_s_h(CPUMIPSState *env,
1763                         uint32_t wd, uint32_t ws, uint32_t wt)
1764 {
1765     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1766     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1767     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1768 
1769     pwd->h[0]  = msa_cle_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1770     pwd->h[1]  = msa_cle_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1771     pwd->h[2]  = msa_cle_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1772     pwd->h[3]  = msa_cle_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1773     pwd->h[4]  = msa_cle_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1774     pwd->h[5]  = msa_cle_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1775     pwd->h[6]  = msa_cle_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1776     pwd->h[7]  = msa_cle_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1777 }
1778 
1779 void helper_msa_cle_s_w(CPUMIPSState *env,
1780                         uint32_t wd, uint32_t ws, uint32_t wt)
1781 {
1782     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1783     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1784     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1785 
1786     pwd->w[0]  = msa_cle_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1787     pwd->w[1]  = msa_cle_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1788     pwd->w[2]  = msa_cle_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1789     pwd->w[3]  = msa_cle_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1790 }
1791 
1792 void helper_msa_cle_s_d(CPUMIPSState *env,
1793                         uint32_t wd, uint32_t ws, uint32_t wt)
1794 {
1795     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1796     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1797     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1798 
1799     pwd->d[0]  = msa_cle_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1800     pwd->d[1]  = msa_cle_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1801 }
1802 
1803 static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1804 {
1805     uint64_t u_arg1 = UNSIGNED(arg1, df);
1806     uint64_t u_arg2 = UNSIGNED(arg2, df);
1807     return u_arg1 <= u_arg2 ? -1 : 0;
1808 }
1809 
1810 void helper_msa_cle_u_b(CPUMIPSState *env,
1811                         uint32_t wd, uint32_t ws, uint32_t wt)
1812 {
1813     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1814     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1815     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1816 
1817     pwd->b[0]  = msa_cle_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1818     pwd->b[1]  = msa_cle_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1819     pwd->b[2]  = msa_cle_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1820     pwd->b[3]  = msa_cle_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1821     pwd->b[4]  = msa_cle_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1822     pwd->b[5]  = msa_cle_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1823     pwd->b[6]  = msa_cle_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1824     pwd->b[7]  = msa_cle_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1825     pwd->b[8]  = msa_cle_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1826     pwd->b[9]  = msa_cle_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1827     pwd->b[10] = msa_cle_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1828     pwd->b[11] = msa_cle_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1829     pwd->b[12] = msa_cle_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1830     pwd->b[13] = msa_cle_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1831     pwd->b[14] = msa_cle_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1832     pwd->b[15] = msa_cle_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1833 }
1834 
1835 void helper_msa_cle_u_h(CPUMIPSState *env,
1836                         uint32_t wd, uint32_t ws, uint32_t wt)
1837 {
1838     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1839     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1840     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1841 
1842     pwd->h[0]  = msa_cle_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1843     pwd->h[1]  = msa_cle_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1844     pwd->h[2]  = msa_cle_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1845     pwd->h[3]  = msa_cle_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1846     pwd->h[4]  = msa_cle_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1847     pwd->h[5]  = msa_cle_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1848     pwd->h[6]  = msa_cle_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1849     pwd->h[7]  = msa_cle_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1850 }
1851 
1852 void helper_msa_cle_u_w(CPUMIPSState *env,
1853                         uint32_t wd, uint32_t ws, uint32_t wt)
1854 {
1855     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1856     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1857     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1858 
1859     pwd->w[0]  = msa_cle_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1860     pwd->w[1]  = msa_cle_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1861     pwd->w[2]  = msa_cle_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1862     pwd->w[3]  = msa_cle_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1863 }
1864 
1865 void helper_msa_cle_u_d(CPUMIPSState *env,
1866                         uint32_t wd, uint32_t ws, uint32_t wt)
1867 {
1868     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1869     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1870     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1871 
1872     pwd->d[0]  = msa_cle_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1873     pwd->d[1]  = msa_cle_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1874 }
1875 
1876 static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1877 {
1878     return arg1 < arg2 ? -1 : 0;
1879 }
1880 
1881 static inline int8_t msa_clt_s_b(int8_t arg1, int8_t arg2)
1882 {
1883     return arg1 < arg2 ? -1 : 0;
1884 }
1885 
1886 void helper_msa_clt_s_b(CPUMIPSState *env,
1887                         uint32_t wd, uint32_t ws, uint32_t wt)
1888 {
1889     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1890     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1891     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1892 
1893     pwd->b[0]  = msa_clt_s_b(pws->b[0],  pwt->b[0]);
1894     pwd->b[1]  = msa_clt_s_b(pws->b[1],  pwt->b[1]);
1895     pwd->b[2]  = msa_clt_s_b(pws->b[2],  pwt->b[2]);
1896     pwd->b[3]  = msa_clt_s_b(pws->b[3],  pwt->b[3]);
1897     pwd->b[4]  = msa_clt_s_b(pws->b[4],  pwt->b[4]);
1898     pwd->b[5]  = msa_clt_s_b(pws->b[5],  pwt->b[5]);
1899     pwd->b[6]  = msa_clt_s_b(pws->b[6],  pwt->b[6]);
1900     pwd->b[7]  = msa_clt_s_b(pws->b[7],  pwt->b[7]);
1901     pwd->b[8]  = msa_clt_s_b(pws->b[8],  pwt->b[8]);
1902     pwd->b[9]  = msa_clt_s_b(pws->b[9],  pwt->b[9]);
1903     pwd->b[10] = msa_clt_s_b(pws->b[10], pwt->b[10]);
1904     pwd->b[11] = msa_clt_s_b(pws->b[11], pwt->b[11]);
1905     pwd->b[12] = msa_clt_s_b(pws->b[12], pwt->b[12]);
1906     pwd->b[13] = msa_clt_s_b(pws->b[13], pwt->b[13]);
1907     pwd->b[14] = msa_clt_s_b(pws->b[14], pwt->b[14]);
1908     pwd->b[15] = msa_clt_s_b(pws->b[15], pwt->b[15]);
1909 }
1910 
1911 static inline int16_t msa_clt_s_h(int16_t arg1, int16_t arg2)
1912 {
1913     return arg1 < arg2 ? -1 : 0;
1914 }
1915 
1916 void helper_msa_clt_s_h(CPUMIPSState *env,
1917                         uint32_t wd, uint32_t ws, uint32_t wt)
1918 {
1919     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1920     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1921     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1922 
1923     pwd->h[0]  = msa_clt_s_h(pws->h[0],  pwt->h[0]);
1924     pwd->h[1]  = msa_clt_s_h(pws->h[1],  pwt->h[1]);
1925     pwd->h[2]  = msa_clt_s_h(pws->h[2],  pwt->h[2]);
1926     pwd->h[3]  = msa_clt_s_h(pws->h[3],  pwt->h[3]);
1927     pwd->h[4]  = msa_clt_s_h(pws->h[4],  pwt->h[4]);
1928     pwd->h[5]  = msa_clt_s_h(pws->h[5],  pwt->h[5]);
1929     pwd->h[6]  = msa_clt_s_h(pws->h[6],  pwt->h[6]);
1930     pwd->h[7]  = msa_clt_s_h(pws->h[7],  pwt->h[7]);
1931 }
1932 
1933 static inline int32_t msa_clt_s_w(int32_t arg1, int32_t arg2)
1934 {
1935     return arg1 < arg2 ? -1 : 0;
1936 }
1937 
1938 void helper_msa_clt_s_w(CPUMIPSState *env,
1939                         uint32_t wd, uint32_t ws, uint32_t wt)
1940 {
1941     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1942     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1943     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1944 
1945     pwd->w[0]  = msa_clt_s_w(pws->w[0],  pwt->w[0]);
1946     pwd->w[1]  = msa_clt_s_w(pws->w[1],  pwt->w[1]);
1947     pwd->w[2]  = msa_clt_s_w(pws->w[2],  pwt->w[2]);
1948     pwd->w[3]  = msa_clt_s_w(pws->w[3],  pwt->w[3]);
1949 }
1950 
1951 static inline int64_t msa_clt_s_d(int64_t arg1, int64_t arg2)
1952 {
1953     return arg1 < arg2 ? -1 : 0;
1954 }
1955 
1956 void helper_msa_clt_s_d(CPUMIPSState *env,
1957                         uint32_t wd, uint32_t ws, uint32_t wt)
1958 {
1959     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1960     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1961     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1962 
1963     pwd->d[0]  = msa_clt_s_d(pws->d[0],  pwt->d[0]);
1964     pwd->d[1]  = msa_clt_s_d(pws->d[1],  pwt->d[1]);
1965 }
1966 
1967 static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1968 {
1969     uint64_t u_arg1 = UNSIGNED(arg1, df);
1970     uint64_t u_arg2 = UNSIGNED(arg2, df);
1971     return u_arg1 < u_arg2 ? -1 : 0;
1972 }
1973 
1974 void helper_msa_clt_u_b(CPUMIPSState *env,
1975                         uint32_t wd, uint32_t ws, uint32_t wt)
1976 {
1977     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1978     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1979     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1980 
1981     pwd->b[0]  = msa_clt_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1982     pwd->b[1]  = msa_clt_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1983     pwd->b[2]  = msa_clt_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1984     pwd->b[3]  = msa_clt_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1985     pwd->b[4]  = msa_clt_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1986     pwd->b[5]  = msa_clt_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1987     pwd->b[6]  = msa_clt_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1988     pwd->b[7]  = msa_clt_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1989     pwd->b[8]  = msa_clt_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1990     pwd->b[9]  = msa_clt_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1991     pwd->b[10] = msa_clt_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1992     pwd->b[11] = msa_clt_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1993     pwd->b[12] = msa_clt_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1994     pwd->b[13] = msa_clt_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1995     pwd->b[14] = msa_clt_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1996     pwd->b[15] = msa_clt_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1997 }
1998 
1999 void helper_msa_clt_u_h(CPUMIPSState *env,
2000                         uint32_t wd, uint32_t ws, uint32_t wt)
2001 {
2002     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2003     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2004     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2005 
2006     pwd->h[0]  = msa_clt_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2007     pwd->h[1]  = msa_clt_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2008     pwd->h[2]  = msa_clt_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2009     pwd->h[3]  = msa_clt_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2010     pwd->h[4]  = msa_clt_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2011     pwd->h[5]  = msa_clt_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2012     pwd->h[6]  = msa_clt_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2013     pwd->h[7]  = msa_clt_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2014 }
2015 
2016 void helper_msa_clt_u_w(CPUMIPSState *env,
2017                         uint32_t wd, uint32_t ws, uint32_t wt)
2018 {
2019     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2020     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2021     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2022 
2023     pwd->w[0]  = msa_clt_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2024     pwd->w[1]  = msa_clt_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2025     pwd->w[2]  = msa_clt_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2026     pwd->w[3]  = msa_clt_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2027 }
2028 
2029 void helper_msa_clt_u_d(CPUMIPSState *env,
2030                         uint32_t wd, uint32_t ws, uint32_t wt)
2031 {
2032     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2033     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2034     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2035 
2036     pwd->d[0]  = msa_clt_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2037     pwd->d[1]  = msa_clt_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2038 }
2039 
2040 
2041 /*
2042  * Int Divide
2043  * ----------
2044  *
2045  * +---------------+----------------------------------------------------------+
2046  * | DIV_S.B       | Vector Signed Divide (byte)                              |
2047  * | DIV_S.H       | Vector Signed Divide (halfword)                          |
2048  * | DIV_S.W       | Vector Signed Divide (word)                              |
2049  * | DIV_S.D       | Vector Signed Divide (doubleword)                        |
2050  * | DIV_U.B       | Vector Unsigned Divide (byte)                            |
2051  * | DIV_U.H       | Vector Unsigned Divide (halfword)                        |
2052  * | DIV_U.W       | Vector Unsigned Divide (word)                            |
2053  * | DIV_U.D       | Vector Unsigned Divide (doubleword)                      |
2054  * +---------------+----------------------------------------------------------+
2055  */
2056 
2057 
2058 static inline int64_t msa_div_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2059 {
2060     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
2061         return DF_MIN_INT(df);
2062     }
2063     return arg2 ? arg1 / arg2
2064                 : arg1 >= 0 ? -1 : 1;
2065 }
2066 
2067 void helper_msa_div_s_b(CPUMIPSState *env,
2068                         uint32_t wd, uint32_t ws, uint32_t wt)
2069 {
2070     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2071     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2072     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2073 
2074     pwd->b[0]  = msa_div_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2075     pwd->b[1]  = msa_div_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2076     pwd->b[2]  = msa_div_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2077     pwd->b[3]  = msa_div_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2078     pwd->b[4]  = msa_div_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2079     pwd->b[5]  = msa_div_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2080     pwd->b[6]  = msa_div_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2081     pwd->b[7]  = msa_div_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2082     pwd->b[8]  = msa_div_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2083     pwd->b[9]  = msa_div_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2084     pwd->b[10] = msa_div_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2085     pwd->b[11] = msa_div_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2086     pwd->b[12] = msa_div_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2087     pwd->b[13] = msa_div_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2088     pwd->b[14] = msa_div_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2089     pwd->b[15] = msa_div_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2090 }
2091 
2092 void helper_msa_div_s_h(CPUMIPSState *env,
2093                         uint32_t wd, uint32_t ws, uint32_t wt)
2094 {
2095     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2096     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2097     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2098 
2099     pwd->h[0]  = msa_div_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2100     pwd->h[1]  = msa_div_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2101     pwd->h[2]  = msa_div_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2102     pwd->h[3]  = msa_div_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2103     pwd->h[4]  = msa_div_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2104     pwd->h[5]  = msa_div_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2105     pwd->h[6]  = msa_div_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2106     pwd->h[7]  = msa_div_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2107 }
2108 
2109 void helper_msa_div_s_w(CPUMIPSState *env,
2110                         uint32_t wd, uint32_t ws, uint32_t wt)
2111 {
2112     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2113     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2114     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2115 
2116     pwd->w[0]  = msa_div_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2117     pwd->w[1]  = msa_div_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2118     pwd->w[2]  = msa_div_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2119     pwd->w[3]  = msa_div_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2120 }
2121 
2122 void helper_msa_div_s_d(CPUMIPSState *env,
2123                         uint32_t wd, uint32_t ws, uint32_t wt)
2124 {
2125     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2126     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2127     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2128 
2129     pwd->d[0]  = msa_div_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2130     pwd->d[1]  = msa_div_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2131 }
2132 
2133 static inline int64_t msa_div_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2134 {
2135     uint64_t u_arg1 = UNSIGNED(arg1, df);
2136     uint64_t u_arg2 = UNSIGNED(arg2, df);
2137     return arg2 ? u_arg1 / u_arg2 : -1;
2138 }
2139 
2140 void helper_msa_div_u_b(CPUMIPSState *env,
2141                         uint32_t wd, uint32_t ws, uint32_t wt)
2142 {
2143     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2144     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2145     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2146 
2147     pwd->b[0]  = msa_div_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2148     pwd->b[1]  = msa_div_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2149     pwd->b[2]  = msa_div_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2150     pwd->b[3]  = msa_div_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2151     pwd->b[4]  = msa_div_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2152     pwd->b[5]  = msa_div_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2153     pwd->b[6]  = msa_div_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2154     pwd->b[7]  = msa_div_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2155     pwd->b[8]  = msa_div_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2156     pwd->b[9]  = msa_div_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2157     pwd->b[10] = msa_div_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2158     pwd->b[11] = msa_div_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2159     pwd->b[12] = msa_div_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2160     pwd->b[13] = msa_div_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2161     pwd->b[14] = msa_div_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2162     pwd->b[15] = msa_div_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2163 }
2164 
2165 void helper_msa_div_u_h(CPUMIPSState *env,
2166                         uint32_t wd, uint32_t ws, uint32_t wt)
2167 {
2168     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2169     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2170     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2171 
2172     pwd->h[0]  = msa_div_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2173     pwd->h[1]  = msa_div_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2174     pwd->h[2]  = msa_div_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2175     pwd->h[3]  = msa_div_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2176     pwd->h[4]  = msa_div_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2177     pwd->h[5]  = msa_div_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2178     pwd->h[6]  = msa_div_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2179     pwd->h[7]  = msa_div_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2180 }
2181 
2182 void helper_msa_div_u_w(CPUMIPSState *env,
2183                         uint32_t wd, uint32_t ws, uint32_t wt)
2184 {
2185     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2186     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2187     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2188 
2189     pwd->w[0]  = msa_div_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2190     pwd->w[1]  = msa_div_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2191     pwd->w[2]  = msa_div_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2192     pwd->w[3]  = msa_div_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2193 }
2194 
2195 void helper_msa_div_u_d(CPUMIPSState *env,
2196                         uint32_t wd, uint32_t ws, uint32_t wt)
2197 {
2198     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2199     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2200     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2201 
2202     pwd->d[0]  = msa_div_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2203     pwd->d[1]  = msa_div_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2204 }
2205 
2206 
2207 /*
2208  * Int Dot Product
2209  * ---------------
2210  *
2211  * +---------------+----------------------------------------------------------+
2212  * | DOTP_S.H      | Vector Signed Dot Product (halfword)                     |
2213  * | DOTP_S.W      | Vector Signed Dot Product (word)                         |
2214  * | DOTP_S.D      | Vector Signed Dot Product (doubleword)                   |
2215  * | DOTP_U.H      | Vector Unsigned Dot Product (halfword)                   |
2216  * | DOTP_U.W      | Vector Unsigned Dot Product (word)                       |
2217  * | DOTP_U.D      | Vector Unsigned Dot Product (doubleword)                 |
2218  * | DPADD_S.H     | Vector Signed Dot Product (halfword)                     |
2219  * | DPADD_S.W     | Vector Signed Dot Product (word)                         |
2220  * | DPADD_S.D     | Vector Signed Dot Product (doubleword)                   |
2221  * | DPADD_U.H     | Vector Unsigned Dot Product (halfword)                   |
2222  * | DPADD_U.W     | Vector Unsigned Dot Product (word)                       |
2223  * | DPADD_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2224  * | DPSUB_S.H     | Vector Signed Dot Product (halfword)                     |
2225  * | DPSUB_S.W     | Vector Signed Dot Product (word)                         |
2226  * | DPSUB_S.D     | Vector Signed Dot Product (doubleword)                   |
2227  * | DPSUB_U.H     | Vector Unsigned Dot Product (halfword)                   |
2228  * | DPSUB_U.W     | Vector Unsigned Dot Product (word)                       |
2229  * | DPSUB_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2230  * +---------------+----------------------------------------------------------+
2231  */
2232 
2233 #define SIGNED_EXTRACT(e, o, a, df)     \
2234     do {                                \
2235         e = SIGNED_EVEN(a, df);         \
2236         o = SIGNED_ODD(a, df);          \
2237     } while (0)
2238 
2239 #define UNSIGNED_EXTRACT(e, o, a, df)   \
2240     do {                                \
2241         e = UNSIGNED_EVEN(a, df);       \
2242         o = UNSIGNED_ODD(a, df);        \
2243     } while (0)
2244 
2245 
2246 static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2247 {
2248     int64_t even_arg1;
2249     int64_t even_arg2;
2250     int64_t odd_arg1;
2251     int64_t odd_arg2;
2252     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2253     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2254     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2255 }
2256 
2257 void helper_msa_dotp_s_h(CPUMIPSState *env,
2258                          uint32_t wd, uint32_t ws, uint32_t wt)
2259 {
2260     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2261     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2262     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2263 
2264     pwd->h[0]  = msa_dotp_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2265     pwd->h[1]  = msa_dotp_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2266     pwd->h[2]  = msa_dotp_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2267     pwd->h[3]  = msa_dotp_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2268     pwd->h[4]  = msa_dotp_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2269     pwd->h[5]  = msa_dotp_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2270     pwd->h[6]  = msa_dotp_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2271     pwd->h[7]  = msa_dotp_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2272 }
2273 
2274 void helper_msa_dotp_s_w(CPUMIPSState *env,
2275                          uint32_t wd, uint32_t ws, uint32_t wt)
2276 {
2277     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2278     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2279     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2280 
2281     pwd->w[0]  = msa_dotp_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2282     pwd->w[1]  = msa_dotp_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2283     pwd->w[2]  = msa_dotp_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2284     pwd->w[3]  = msa_dotp_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2285 }
2286 
2287 void helper_msa_dotp_s_d(CPUMIPSState *env,
2288                          uint32_t wd, uint32_t ws, uint32_t wt)
2289 {
2290     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2291     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2292     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2293 
2294     pwd->d[0]  = msa_dotp_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2295     pwd->d[1]  = msa_dotp_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2296 }
2297 
2298 
2299 static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2300 {
2301     int64_t even_arg1;
2302     int64_t even_arg2;
2303     int64_t odd_arg1;
2304     int64_t odd_arg2;
2305     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2306     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2307     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2308 }
2309 
2310 void helper_msa_dotp_u_h(CPUMIPSState *env,
2311                          uint32_t wd, uint32_t ws, uint32_t wt)
2312 {
2313     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2314     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2315     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2316 
2317     pwd->h[0]  = msa_dotp_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2318     pwd->h[1]  = msa_dotp_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2319     pwd->h[2]  = msa_dotp_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2320     pwd->h[3]  = msa_dotp_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2321     pwd->h[4]  = msa_dotp_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2322     pwd->h[5]  = msa_dotp_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2323     pwd->h[6]  = msa_dotp_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2324     pwd->h[7]  = msa_dotp_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2325 }
2326 
2327 void helper_msa_dotp_u_w(CPUMIPSState *env,
2328                          uint32_t wd, uint32_t ws, uint32_t wt)
2329 {
2330     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2331     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2332     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2333 
2334     pwd->w[0]  = msa_dotp_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2335     pwd->w[1]  = msa_dotp_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2336     pwd->w[2]  = msa_dotp_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2337     pwd->w[3]  = msa_dotp_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2338 }
2339 
2340 void helper_msa_dotp_u_d(CPUMIPSState *env,
2341                          uint32_t wd, uint32_t ws, uint32_t wt)
2342 {
2343     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2344     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2345     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2346 
2347     pwd->d[0]  = msa_dotp_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2348     pwd->d[1]  = msa_dotp_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2349 }
2350 
2351 
2352 static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
2353                                      int64_t arg2)
2354 {
2355     int64_t even_arg1;
2356     int64_t even_arg2;
2357     int64_t odd_arg1;
2358     int64_t odd_arg2;
2359     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2360     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2361     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2362 }
2363 
2364 void helper_msa_dpadd_s_h(CPUMIPSState *env,
2365                           uint32_t wd, uint32_t ws, uint32_t wt)
2366 {
2367     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2368     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2369     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2370 
2371     pwd->h[0]  = msa_dpadd_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2372     pwd->h[1]  = msa_dpadd_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2373     pwd->h[2]  = msa_dpadd_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2374     pwd->h[3]  = msa_dpadd_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2375     pwd->h[4]  = msa_dpadd_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2376     pwd->h[5]  = msa_dpadd_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2377     pwd->h[6]  = msa_dpadd_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2378     pwd->h[7]  = msa_dpadd_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2379 }
2380 
2381 void helper_msa_dpadd_s_w(CPUMIPSState *env,
2382                           uint32_t wd, uint32_t ws, uint32_t wt)
2383 {
2384     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2385     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2386     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2387 
2388     pwd->w[0]  = msa_dpadd_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2389     pwd->w[1]  = msa_dpadd_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2390     pwd->w[2]  = msa_dpadd_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2391     pwd->w[3]  = msa_dpadd_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2392 }
2393 
2394 void helper_msa_dpadd_s_d(CPUMIPSState *env,
2395                           uint32_t wd, uint32_t ws, uint32_t wt)
2396 {
2397     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2398     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2399     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2400 
2401     pwd->d[0]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2402     pwd->d[1]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2403 }
2404 
2405 
2406 static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
2407                                      int64_t arg2)
2408 {
2409     int64_t even_arg1;
2410     int64_t even_arg2;
2411     int64_t odd_arg1;
2412     int64_t odd_arg2;
2413     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2414     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2415     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2416 }
2417 
2418 void helper_msa_dpadd_u_h(CPUMIPSState *env,
2419                           uint32_t wd, uint32_t ws, uint32_t wt)
2420 {
2421     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2422     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2423     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2424 
2425     pwd->h[0]  = msa_dpadd_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2426     pwd->h[1]  = msa_dpadd_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2427     pwd->h[2]  = msa_dpadd_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2428     pwd->h[3]  = msa_dpadd_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2429     pwd->h[4]  = msa_dpadd_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2430     pwd->h[5]  = msa_dpadd_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2431     pwd->h[6]  = msa_dpadd_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2432     pwd->h[7]  = msa_dpadd_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2433 }
2434 
2435 void helper_msa_dpadd_u_w(CPUMIPSState *env,
2436                           uint32_t wd, uint32_t ws, uint32_t wt)
2437 {
2438     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2439     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2440     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2441 
2442     pwd->w[0]  = msa_dpadd_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2443     pwd->w[1]  = msa_dpadd_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2444     pwd->w[2]  = msa_dpadd_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2445     pwd->w[3]  = msa_dpadd_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2446 }
2447 
2448 void helper_msa_dpadd_u_d(CPUMIPSState *env,
2449                           uint32_t wd, uint32_t ws, uint32_t wt)
2450 {
2451     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2452     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2453     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2454 
2455     pwd->d[0]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2456     pwd->d[1]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2457 }
2458 
2459 
2460 static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
2461                                      int64_t arg2)
2462 {
2463     int64_t even_arg1;
2464     int64_t even_arg2;
2465     int64_t odd_arg1;
2466     int64_t odd_arg2;
2467     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2468     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2469     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2470 }
2471 
2472 void helper_msa_dpsub_s_h(CPUMIPSState *env,
2473                           uint32_t wd, uint32_t ws, uint32_t wt)
2474 {
2475     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2476     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2477     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2478 
2479     pwd->h[0]  = msa_dpsub_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2480     pwd->h[1]  = msa_dpsub_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2481     pwd->h[2]  = msa_dpsub_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2482     pwd->h[3]  = msa_dpsub_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2483     pwd->h[4]  = msa_dpsub_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2484     pwd->h[5]  = msa_dpsub_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2485     pwd->h[6]  = msa_dpsub_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2486     pwd->h[7]  = msa_dpsub_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2487 }
2488 
2489 void helper_msa_dpsub_s_w(CPUMIPSState *env,
2490                           uint32_t wd, uint32_t ws, uint32_t wt)
2491 {
2492     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2493     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2494     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2495 
2496     pwd->w[0]  = msa_dpsub_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2497     pwd->w[1]  = msa_dpsub_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2498     pwd->w[2]  = msa_dpsub_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2499     pwd->w[3]  = msa_dpsub_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2500 }
2501 
2502 void helper_msa_dpsub_s_d(CPUMIPSState *env,
2503                           uint32_t wd, uint32_t ws, uint32_t wt)
2504 {
2505     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2506     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2507     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2508 
2509     pwd->d[0]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2510     pwd->d[1]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2511 }
2512 
2513 
2514 static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
2515                                      int64_t arg2)
2516 {
2517     int64_t even_arg1;
2518     int64_t even_arg2;
2519     int64_t odd_arg1;
2520     int64_t odd_arg2;
2521     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2522     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2523     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2524 }
2525 
2526 void helper_msa_dpsub_u_h(CPUMIPSState *env,
2527                           uint32_t wd, uint32_t ws, uint32_t wt)
2528 {
2529     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2530     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2531     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2532 
2533     pwd->h[0]  = msa_dpsub_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2534     pwd->h[1]  = msa_dpsub_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2535     pwd->h[2]  = msa_dpsub_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2536     pwd->h[3]  = msa_dpsub_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2537     pwd->h[4]  = msa_dpsub_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2538     pwd->h[5]  = msa_dpsub_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2539     pwd->h[6]  = msa_dpsub_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2540     pwd->h[7]  = msa_dpsub_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2541 }
2542 
2543 void helper_msa_dpsub_u_w(CPUMIPSState *env,
2544                           uint32_t wd, uint32_t ws, uint32_t wt)
2545 {
2546     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2547     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2548     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2549 
2550     pwd->w[0]  = msa_dpsub_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2551     pwd->w[1]  = msa_dpsub_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2552     pwd->w[2]  = msa_dpsub_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2553     pwd->w[3]  = msa_dpsub_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2554 }
2555 
2556 void helper_msa_dpsub_u_d(CPUMIPSState *env,
2557                           uint32_t wd, uint32_t ws, uint32_t wt)
2558 {
2559     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2560     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2561     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2562 
2563     pwd->d[0]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2564     pwd->d[1]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2565 }
2566 
2567 
2568 /*
2569  * Int Max Min
2570  * -----------
2571  *
2572  * +---------------+----------------------------------------------------------+
2573  * | MAX_A.B       | Vector Maximum Based on Absolute Value (byte)            |
2574  * | MAX_A.H       | Vector Maximum Based on Absolute Value (halfword)        |
2575  * | MAX_A.W       | Vector Maximum Based on Absolute Value (word)            |
2576  * | MAX_A.D       | Vector Maximum Based on Absolute Value (doubleword)      |
2577  * | MAX_S.B       | Vector Signed Maximum (byte)                             |
2578  * | MAX_S.H       | Vector Signed Maximum (halfword)                         |
2579  * | MAX_S.W       | Vector Signed Maximum (word)                             |
2580  * | MAX_S.D       | Vector Signed Maximum (doubleword)                       |
2581  * | MAX_U.B       | Vector Unsigned Maximum (byte)                           |
2582  * | MAX_U.H       | Vector Unsigned Maximum (halfword)                       |
2583  * | MAX_U.W       | Vector Unsigned Maximum (word)                           |
2584  * | MAX_U.D       | Vector Unsigned Maximum (doubleword)                     |
2585  * | MIN_A.B       | Vector Minimum Based on Absolute Value (byte)            |
2586  * | MIN_A.H       | Vector Minimum Based on Absolute Value (halfword)        |
2587  * | MIN_A.W       | Vector Minimum Based on Absolute Value (word)            |
2588  * | MIN_A.D       | Vector Minimum Based on Absolute Value (doubleword)      |
2589  * | MIN_S.B       | Vector Signed Minimum (byte)                             |
2590  * | MIN_S.H       | Vector Signed Minimum (halfword)                         |
2591  * | MIN_S.W       | Vector Signed Minimum (word)                             |
2592  * | MIN_S.D       | Vector Signed Minimum (doubleword)                       |
2593  * | MIN_U.B       | Vector Unsigned Minimum (byte)                           |
2594  * | MIN_U.H       | Vector Unsigned Minimum (halfword)                       |
2595  * | MIN_U.W       | Vector Unsigned Minimum (word)                           |
2596  * | MIN_U.D       | Vector Unsigned Minimum (doubleword)                     |
2597  * +---------------+----------------------------------------------------------+
2598  */
2599 
2600 static inline int64_t msa_max_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2601 {
2602     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2603     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2604     return abs_arg1 > abs_arg2 ? arg1 : arg2;
2605 }
2606 
2607 void helper_msa_max_a_b(CPUMIPSState *env,
2608                         uint32_t wd, uint32_t ws, uint32_t wt)
2609 {
2610     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2611     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2612     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2613 
2614     pwd->b[0]  = msa_max_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2615     pwd->b[1]  = msa_max_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2616     pwd->b[2]  = msa_max_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2617     pwd->b[3]  = msa_max_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2618     pwd->b[4]  = msa_max_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2619     pwd->b[5]  = msa_max_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2620     pwd->b[6]  = msa_max_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2621     pwd->b[7]  = msa_max_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2622     pwd->b[8]  = msa_max_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2623     pwd->b[9]  = msa_max_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2624     pwd->b[10] = msa_max_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2625     pwd->b[11] = msa_max_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2626     pwd->b[12] = msa_max_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2627     pwd->b[13] = msa_max_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2628     pwd->b[14] = msa_max_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2629     pwd->b[15] = msa_max_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2630 }
2631 
2632 void helper_msa_max_a_h(CPUMIPSState *env,
2633                         uint32_t wd, uint32_t ws, uint32_t wt)
2634 {
2635     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2636     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2637     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2638 
2639     pwd->h[0]  = msa_max_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2640     pwd->h[1]  = msa_max_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2641     pwd->h[2]  = msa_max_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2642     pwd->h[3]  = msa_max_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2643     pwd->h[4]  = msa_max_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2644     pwd->h[5]  = msa_max_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2645     pwd->h[6]  = msa_max_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2646     pwd->h[7]  = msa_max_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2647 }
2648 
2649 void helper_msa_max_a_w(CPUMIPSState *env,
2650                         uint32_t wd, uint32_t ws, uint32_t wt)
2651 {
2652     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2653     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2654     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2655 
2656     pwd->w[0]  = msa_max_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2657     pwd->w[1]  = msa_max_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2658     pwd->w[2]  = msa_max_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2659     pwd->w[3]  = msa_max_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2660 }
2661 
2662 void helper_msa_max_a_d(CPUMIPSState *env,
2663                         uint32_t wd, uint32_t ws, uint32_t wt)
2664 {
2665     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2666     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2667     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2668 
2669     pwd->d[0]  = msa_max_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2670     pwd->d[1]  = msa_max_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2671 }
2672 
2673 
2674 static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2675 {
2676     return arg1 > arg2 ? arg1 : arg2;
2677 }
2678 
2679 void helper_msa_max_s_b(CPUMIPSState *env,
2680                         uint32_t wd, uint32_t ws, uint32_t wt)
2681 {
2682     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2683     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2684     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2685 
2686     pwd->b[0]  = msa_max_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2687     pwd->b[1]  = msa_max_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2688     pwd->b[2]  = msa_max_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2689     pwd->b[3]  = msa_max_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2690     pwd->b[4]  = msa_max_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2691     pwd->b[5]  = msa_max_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2692     pwd->b[6]  = msa_max_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2693     pwd->b[7]  = msa_max_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2694     pwd->b[8]  = msa_max_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2695     pwd->b[9]  = msa_max_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2696     pwd->b[10] = msa_max_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2697     pwd->b[11] = msa_max_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2698     pwd->b[12] = msa_max_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2699     pwd->b[13] = msa_max_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2700     pwd->b[14] = msa_max_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2701     pwd->b[15] = msa_max_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2702 }
2703 
2704 void helper_msa_max_s_h(CPUMIPSState *env,
2705                         uint32_t wd, uint32_t ws, uint32_t wt)
2706 {
2707     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2708     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2709     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2710 
2711     pwd->h[0]  = msa_max_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2712     pwd->h[1]  = msa_max_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2713     pwd->h[2]  = msa_max_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2714     pwd->h[3]  = msa_max_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2715     pwd->h[4]  = msa_max_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2716     pwd->h[5]  = msa_max_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2717     pwd->h[6]  = msa_max_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2718     pwd->h[7]  = msa_max_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2719 }
2720 
2721 void helper_msa_max_s_w(CPUMIPSState *env,
2722                         uint32_t wd, uint32_t ws, uint32_t wt)
2723 {
2724     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2725     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2726     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2727 
2728     pwd->w[0]  = msa_max_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2729     pwd->w[1]  = msa_max_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2730     pwd->w[2]  = msa_max_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2731     pwd->w[3]  = msa_max_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2732 }
2733 
2734 void helper_msa_max_s_d(CPUMIPSState *env,
2735                         uint32_t wd, uint32_t ws, uint32_t wt)
2736 {
2737     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2738     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2739     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2740 
2741     pwd->d[0]  = msa_max_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2742     pwd->d[1]  = msa_max_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2743 }
2744 
2745 
2746 static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2747 {
2748     uint64_t u_arg1 = UNSIGNED(arg1, df);
2749     uint64_t u_arg2 = UNSIGNED(arg2, df);
2750     return u_arg1 > u_arg2 ? arg1 : arg2;
2751 }
2752 
2753 void helper_msa_max_u_b(CPUMIPSState *env,
2754                         uint32_t wd, uint32_t ws, uint32_t wt)
2755 {
2756     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2757     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2758     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2759 
2760     pwd->b[0]  = msa_max_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2761     pwd->b[1]  = msa_max_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2762     pwd->b[2]  = msa_max_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2763     pwd->b[3]  = msa_max_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2764     pwd->b[4]  = msa_max_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2765     pwd->b[5]  = msa_max_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2766     pwd->b[6]  = msa_max_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2767     pwd->b[7]  = msa_max_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2768     pwd->b[8]  = msa_max_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2769     pwd->b[9]  = msa_max_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2770     pwd->b[10] = msa_max_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2771     pwd->b[11] = msa_max_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2772     pwd->b[12] = msa_max_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2773     pwd->b[13] = msa_max_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2774     pwd->b[14] = msa_max_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2775     pwd->b[15] = msa_max_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2776 }
2777 
2778 void helper_msa_max_u_h(CPUMIPSState *env,
2779                         uint32_t wd, uint32_t ws, uint32_t wt)
2780 {
2781     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2782     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2783     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2784 
2785     pwd->h[0]  = msa_max_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2786     pwd->h[1]  = msa_max_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2787     pwd->h[2]  = msa_max_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2788     pwd->h[3]  = msa_max_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2789     pwd->h[4]  = msa_max_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2790     pwd->h[5]  = msa_max_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2791     pwd->h[6]  = msa_max_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2792     pwd->h[7]  = msa_max_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2793 }
2794 
2795 void helper_msa_max_u_w(CPUMIPSState *env,
2796                         uint32_t wd, uint32_t ws, uint32_t wt)
2797 {
2798     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2799     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2800     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2801 
2802     pwd->w[0]  = msa_max_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2803     pwd->w[1]  = msa_max_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2804     pwd->w[2]  = msa_max_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2805     pwd->w[3]  = msa_max_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2806 }
2807 
2808 void helper_msa_max_u_d(CPUMIPSState *env,
2809                         uint32_t wd, uint32_t ws, uint32_t wt)
2810 {
2811     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2812     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2813     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2814 
2815     pwd->d[0]  = msa_max_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2816     pwd->d[1]  = msa_max_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2817 }
2818 
2819 
2820 static inline int64_t msa_min_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2821 {
2822     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2823     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2824     return abs_arg1 < abs_arg2 ? arg1 : arg2;
2825 }
2826 
2827 void helper_msa_min_a_b(CPUMIPSState *env,
2828                         uint32_t wd, uint32_t ws, uint32_t wt)
2829 {
2830     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2831     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2832     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2833 
2834     pwd->b[0]  = msa_min_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2835     pwd->b[1]  = msa_min_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2836     pwd->b[2]  = msa_min_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2837     pwd->b[3]  = msa_min_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2838     pwd->b[4]  = msa_min_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2839     pwd->b[5]  = msa_min_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2840     pwd->b[6]  = msa_min_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2841     pwd->b[7]  = msa_min_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2842     pwd->b[8]  = msa_min_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2843     pwd->b[9]  = msa_min_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2844     pwd->b[10] = msa_min_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2845     pwd->b[11] = msa_min_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2846     pwd->b[12] = msa_min_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2847     pwd->b[13] = msa_min_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2848     pwd->b[14] = msa_min_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2849     pwd->b[15] = msa_min_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2850 }
2851 
2852 void helper_msa_min_a_h(CPUMIPSState *env,
2853                         uint32_t wd, uint32_t ws, uint32_t wt)
2854 {
2855     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2856     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2857     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2858 
2859     pwd->h[0]  = msa_min_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2860     pwd->h[1]  = msa_min_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2861     pwd->h[2]  = msa_min_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2862     pwd->h[3]  = msa_min_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2863     pwd->h[4]  = msa_min_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2864     pwd->h[5]  = msa_min_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2865     pwd->h[6]  = msa_min_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2866     pwd->h[7]  = msa_min_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2867 }
2868 
2869 void helper_msa_min_a_w(CPUMIPSState *env,
2870                         uint32_t wd, uint32_t ws, uint32_t wt)
2871 {
2872     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2873     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2874     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2875 
2876     pwd->w[0]  = msa_min_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2877     pwd->w[1]  = msa_min_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2878     pwd->w[2]  = msa_min_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2879     pwd->w[3]  = msa_min_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2880 }
2881 
2882 void helper_msa_min_a_d(CPUMIPSState *env,
2883                         uint32_t wd, uint32_t ws, uint32_t wt)
2884 {
2885     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2886     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2887     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2888 
2889     pwd->d[0]  = msa_min_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2890     pwd->d[1]  = msa_min_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2891 }
2892 
2893 
2894 static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2895 {
2896     return arg1 < arg2 ? arg1 : arg2;
2897 }
2898 
2899 void helper_msa_min_s_b(CPUMIPSState *env,
2900                         uint32_t wd, uint32_t ws, uint32_t wt)
2901 {
2902     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2903     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2904     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2905 
2906     pwd->b[0]  = msa_min_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2907     pwd->b[1]  = msa_min_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2908     pwd->b[2]  = msa_min_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2909     pwd->b[3]  = msa_min_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2910     pwd->b[4]  = msa_min_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2911     pwd->b[5]  = msa_min_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2912     pwd->b[6]  = msa_min_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2913     pwd->b[7]  = msa_min_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2914     pwd->b[8]  = msa_min_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2915     pwd->b[9]  = msa_min_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2916     pwd->b[10] = msa_min_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2917     pwd->b[11] = msa_min_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2918     pwd->b[12] = msa_min_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2919     pwd->b[13] = msa_min_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2920     pwd->b[14] = msa_min_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2921     pwd->b[15] = msa_min_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2922 }
2923 
2924 void helper_msa_min_s_h(CPUMIPSState *env,
2925                         uint32_t wd, uint32_t ws, uint32_t wt)
2926 {
2927     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2928     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2929     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2930 
2931     pwd->h[0]  = msa_min_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2932     pwd->h[1]  = msa_min_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2933     pwd->h[2]  = msa_min_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2934     pwd->h[3]  = msa_min_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2935     pwd->h[4]  = msa_min_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2936     pwd->h[5]  = msa_min_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2937     pwd->h[6]  = msa_min_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2938     pwd->h[7]  = msa_min_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2939 }
2940 
2941 void helper_msa_min_s_w(CPUMIPSState *env,
2942                         uint32_t wd, uint32_t ws, uint32_t wt)
2943 {
2944     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2945     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2946     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2947 
2948     pwd->w[0]  = msa_min_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2949     pwd->w[1]  = msa_min_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2950     pwd->w[2]  = msa_min_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2951     pwd->w[3]  = msa_min_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2952 }
2953 
2954 void helper_msa_min_s_d(CPUMIPSState *env,
2955                         uint32_t wd, uint32_t ws, uint32_t wt)
2956 {
2957     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2958     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2959     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2960 
2961     pwd->d[0]  = msa_min_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2962     pwd->d[1]  = msa_min_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2963 }
2964 
2965 
2966 static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2967 {
2968     uint64_t u_arg1 = UNSIGNED(arg1, df);
2969     uint64_t u_arg2 = UNSIGNED(arg2, df);
2970     return u_arg1 < u_arg2 ? arg1 : arg2;
2971 }
2972 
2973 void helper_msa_min_u_b(CPUMIPSState *env,
2974                         uint32_t wd, uint32_t ws, uint32_t wt)
2975 {
2976     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2977     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2978     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2979 
2980     pwd->b[0]  = msa_min_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2981     pwd->b[1]  = msa_min_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2982     pwd->b[2]  = msa_min_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2983     pwd->b[3]  = msa_min_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2984     pwd->b[4]  = msa_min_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2985     pwd->b[5]  = msa_min_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2986     pwd->b[6]  = msa_min_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2987     pwd->b[7]  = msa_min_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2988     pwd->b[8]  = msa_min_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2989     pwd->b[9]  = msa_min_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2990     pwd->b[10] = msa_min_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2991     pwd->b[11] = msa_min_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2992     pwd->b[12] = msa_min_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2993     pwd->b[13] = msa_min_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2994     pwd->b[14] = msa_min_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2995     pwd->b[15] = msa_min_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2996 }
2997 
2998 void helper_msa_min_u_h(CPUMIPSState *env,
2999                         uint32_t wd, uint32_t ws, uint32_t wt)
3000 {
3001     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3002     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3003     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3004 
3005     pwd->h[0]  = msa_min_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3006     pwd->h[1]  = msa_min_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3007     pwd->h[2]  = msa_min_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3008     pwd->h[3]  = msa_min_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3009     pwd->h[4]  = msa_min_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3010     pwd->h[5]  = msa_min_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3011     pwd->h[6]  = msa_min_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3012     pwd->h[7]  = msa_min_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3013 }
3014 
3015 void helper_msa_min_u_w(CPUMIPSState *env,
3016                         uint32_t wd, uint32_t ws, uint32_t wt)
3017 {
3018     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3019     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3020     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3021 
3022     pwd->w[0]  = msa_min_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3023     pwd->w[1]  = msa_min_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3024     pwd->w[2]  = msa_min_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3025     pwd->w[3]  = msa_min_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3026 }
3027 
3028 void helper_msa_min_u_d(CPUMIPSState *env,
3029                         uint32_t wd, uint32_t ws, uint32_t wt)
3030 {
3031     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3032     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3033     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3034 
3035     pwd->d[0]  = msa_min_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3036     pwd->d[1]  = msa_min_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3037 }
3038 
3039 
3040 /*
3041  * Int Modulo
3042  * ----------
3043  *
3044  * +---------------+----------------------------------------------------------+
3045  * | MOD_S.B       | Vector Signed Modulo (byte)                              |
3046  * | MOD_S.H       | Vector Signed Modulo (halfword)                          |
3047  * | MOD_S.W       | Vector Signed Modulo (word)                              |
3048  * | MOD_S.D       | Vector Signed Modulo (doubleword)                        |
3049  * | MOD_U.B       | Vector Unsigned Modulo (byte)                            |
3050  * | MOD_U.H       | Vector Unsigned Modulo (halfword)                        |
3051  * | MOD_U.W       | Vector Unsigned Modulo (word)                            |
3052  * | MOD_U.D       | Vector Unsigned Modulo (doubleword)                      |
3053  * +---------------+----------------------------------------------------------+
3054  */
3055 
3056 static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3057 {
3058     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
3059         return 0;
3060     }
3061     return arg2 ? arg1 % arg2 : arg1;
3062 }
3063 
3064 void helper_msa_mod_s_b(CPUMIPSState *env,
3065                         uint32_t wd, uint32_t ws, uint32_t wt)
3066 {
3067     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3068     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3069     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3070 
3071     pwd->b[0]  = msa_mod_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3072     pwd->b[1]  = msa_mod_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3073     pwd->b[2]  = msa_mod_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3074     pwd->b[3]  = msa_mod_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3075     pwd->b[4]  = msa_mod_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3076     pwd->b[5]  = msa_mod_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3077     pwd->b[6]  = msa_mod_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3078     pwd->b[7]  = msa_mod_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3079     pwd->b[8]  = msa_mod_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3080     pwd->b[9]  = msa_mod_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3081     pwd->b[10] = msa_mod_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3082     pwd->b[11] = msa_mod_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3083     pwd->b[12] = msa_mod_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3084     pwd->b[13] = msa_mod_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3085     pwd->b[14] = msa_mod_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3086     pwd->b[15] = msa_mod_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3087 }
3088 
3089 void helper_msa_mod_s_h(CPUMIPSState *env,
3090                         uint32_t wd, uint32_t ws, uint32_t wt)
3091 {
3092     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3093     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3094     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3095 
3096     pwd->h[0]  = msa_mod_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3097     pwd->h[1]  = msa_mod_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3098     pwd->h[2]  = msa_mod_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3099     pwd->h[3]  = msa_mod_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3100     pwd->h[4]  = msa_mod_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3101     pwd->h[5]  = msa_mod_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3102     pwd->h[6]  = msa_mod_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3103     pwd->h[7]  = msa_mod_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3104 }
3105 
3106 void helper_msa_mod_s_w(CPUMIPSState *env,
3107                         uint32_t wd, uint32_t ws, uint32_t wt)
3108 {
3109     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3110     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3111     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3112 
3113     pwd->w[0]  = msa_mod_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3114     pwd->w[1]  = msa_mod_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3115     pwd->w[2]  = msa_mod_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3116     pwd->w[3]  = msa_mod_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3117 }
3118 
3119 void helper_msa_mod_s_d(CPUMIPSState *env,
3120                         uint32_t wd, uint32_t ws, uint32_t wt)
3121 {
3122     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3123     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3124     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3125 
3126     pwd->d[0]  = msa_mod_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3127     pwd->d[1]  = msa_mod_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3128 }
3129 
3130 static inline int64_t msa_mod_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3131 {
3132     uint64_t u_arg1 = UNSIGNED(arg1, df);
3133     uint64_t u_arg2 = UNSIGNED(arg2, df);
3134     return u_arg2 ? u_arg1 % u_arg2 : u_arg1;
3135 }
3136 
3137 void helper_msa_mod_u_b(CPUMIPSState *env,
3138                         uint32_t wd, uint32_t ws, uint32_t wt)
3139 {
3140     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3141     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3142     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3143 
3144     pwd->b[0]  = msa_mod_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3145     pwd->b[1]  = msa_mod_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3146     pwd->b[2]  = msa_mod_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3147     pwd->b[3]  = msa_mod_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3148     pwd->b[4]  = msa_mod_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3149     pwd->b[5]  = msa_mod_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3150     pwd->b[6]  = msa_mod_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3151     pwd->b[7]  = msa_mod_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3152     pwd->b[8]  = msa_mod_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3153     pwd->b[9]  = msa_mod_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3154     pwd->b[10] = msa_mod_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3155     pwd->b[11] = msa_mod_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3156     pwd->b[12] = msa_mod_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3157     pwd->b[13] = msa_mod_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3158     pwd->b[14] = msa_mod_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3159     pwd->b[15] = msa_mod_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3160 }
3161 
3162 void helper_msa_mod_u_h(CPUMIPSState *env,
3163                         uint32_t wd, uint32_t ws, uint32_t wt)
3164 {
3165     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3166     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3167     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3168 
3169     pwd->h[0]  = msa_mod_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3170     pwd->h[1]  = msa_mod_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3171     pwd->h[2]  = msa_mod_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3172     pwd->h[3]  = msa_mod_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3173     pwd->h[4]  = msa_mod_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3174     pwd->h[5]  = msa_mod_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3175     pwd->h[6]  = msa_mod_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3176     pwd->h[7]  = msa_mod_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3177 }
3178 
3179 void helper_msa_mod_u_w(CPUMIPSState *env,
3180                         uint32_t wd, uint32_t ws, uint32_t wt)
3181 {
3182     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3183     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3184     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3185 
3186     pwd->w[0]  = msa_mod_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3187     pwd->w[1]  = msa_mod_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3188     pwd->w[2]  = msa_mod_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3189     pwd->w[3]  = msa_mod_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3190 }
3191 
3192 void helper_msa_mod_u_d(CPUMIPSState *env,
3193                         uint32_t wd, uint32_t ws, uint32_t wt)
3194 {
3195     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3196     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3197     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3198 
3199     pwd->d[0]  = msa_mod_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3200     pwd->d[1]  = msa_mod_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3201 }
3202 
3203 
3204 /*
3205  * Int Multiply
3206  * ------------
3207  *
3208  * +---------------+----------------------------------------------------------+
3209  * | MADDV.B       | Vector Multiply and Add (byte)                           |
3210  * | MADDV.H       | Vector Multiply and Add (halfword)                       |
3211  * | MADDV.W       | Vector Multiply and Add (word)                           |
3212  * | MADDV.D       | Vector Multiply and Add (doubleword)                     |
3213  * | MSUBV.B       | Vector Multiply and Subtract (byte)                      |
3214  * | MSUBV.H       | Vector Multiply and Subtract (halfword)                  |
3215  * | MSUBV.W       | Vector Multiply and Subtract (word)                      |
3216  * | MSUBV.D       | Vector Multiply and Subtract (doubleword)                |
3217  * | MULV.B        | Vector Multiply (byte)                                   |
3218  * | MULV.H        | Vector Multiply (halfword)                               |
3219  * | MULV.W        | Vector Multiply (word)                                   |
3220  * | MULV.D        | Vector Multiply (doubleword)                             |
3221  * +---------------+----------------------------------------------------------+
3222  */
3223 
3224 static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
3225                                    int64_t arg2)
3226 {
3227     return dest + arg1 * arg2;
3228 }
3229 
3230 void helper_msa_maddv_b(CPUMIPSState *env,
3231                         uint32_t wd, uint32_t ws, uint32_t wt)
3232 {
3233     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3234     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3235     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3236 
3237     pwd->b[0]  = msa_maddv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3238     pwd->b[1]  = msa_maddv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3239     pwd->b[2]  = msa_maddv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3240     pwd->b[3]  = msa_maddv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3241     pwd->b[4]  = msa_maddv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3242     pwd->b[5]  = msa_maddv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3243     pwd->b[6]  = msa_maddv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3244     pwd->b[7]  = msa_maddv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3245     pwd->b[8]  = msa_maddv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3246     pwd->b[9]  = msa_maddv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3247     pwd->b[10] = msa_maddv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3248     pwd->b[11] = msa_maddv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3249     pwd->b[12] = msa_maddv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3250     pwd->b[13] = msa_maddv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3251     pwd->b[14] = msa_maddv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3252     pwd->b[15] = msa_maddv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3253 }
3254 
3255 void helper_msa_maddv_h(CPUMIPSState *env,
3256                         uint32_t wd, uint32_t ws, uint32_t wt)
3257 {
3258     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3259     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3260     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3261 
3262     pwd->h[0]  = msa_maddv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3263     pwd->h[1]  = msa_maddv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3264     pwd->h[2]  = msa_maddv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3265     pwd->h[3]  = msa_maddv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3266     pwd->h[4]  = msa_maddv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3267     pwd->h[5]  = msa_maddv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3268     pwd->h[6]  = msa_maddv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3269     pwd->h[7]  = msa_maddv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3270 }
3271 
3272 void helper_msa_maddv_w(CPUMIPSState *env,
3273                         uint32_t wd, uint32_t ws, uint32_t wt)
3274 {
3275     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3276     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3277     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3278 
3279     pwd->w[0]  = msa_maddv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3280     pwd->w[1]  = msa_maddv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3281     pwd->w[2]  = msa_maddv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3282     pwd->w[3]  = msa_maddv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3283 }
3284 
3285 void helper_msa_maddv_d(CPUMIPSState *env,
3286                         uint32_t wd, uint32_t ws, uint32_t wt)
3287 {
3288     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3289     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3290     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3291 
3292     pwd->d[0]  = msa_maddv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3293     pwd->d[1]  = msa_maddv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3294 }
3295 
3296 static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
3297                                    int64_t arg2)
3298 {
3299     return dest - arg1 * arg2;
3300 }
3301 
3302 void helper_msa_msubv_b(CPUMIPSState *env,
3303                         uint32_t wd, uint32_t ws, uint32_t wt)
3304 {
3305     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3306     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3307     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3308 
3309     pwd->b[0]  = msa_msubv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3310     pwd->b[1]  = msa_msubv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3311     pwd->b[2]  = msa_msubv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3312     pwd->b[3]  = msa_msubv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3313     pwd->b[4]  = msa_msubv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3314     pwd->b[5]  = msa_msubv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3315     pwd->b[6]  = msa_msubv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3316     pwd->b[7]  = msa_msubv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3317     pwd->b[8]  = msa_msubv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3318     pwd->b[9]  = msa_msubv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3319     pwd->b[10] = msa_msubv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3320     pwd->b[11] = msa_msubv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3321     pwd->b[12] = msa_msubv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3322     pwd->b[13] = msa_msubv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3323     pwd->b[14] = msa_msubv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3324     pwd->b[15] = msa_msubv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3325 }
3326 
3327 void helper_msa_msubv_h(CPUMIPSState *env,
3328                         uint32_t wd, uint32_t ws, uint32_t wt)
3329 {
3330     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3331     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3332     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3333 
3334     pwd->h[0]  = msa_msubv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3335     pwd->h[1]  = msa_msubv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3336     pwd->h[2]  = msa_msubv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3337     pwd->h[3]  = msa_msubv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3338     pwd->h[4]  = msa_msubv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3339     pwd->h[5]  = msa_msubv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3340     pwd->h[6]  = msa_msubv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3341     pwd->h[7]  = msa_msubv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3342 }
3343 
3344 void helper_msa_msubv_w(CPUMIPSState *env,
3345                         uint32_t wd, uint32_t ws, uint32_t wt)
3346 {
3347     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3348     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3349     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3350 
3351     pwd->w[0]  = msa_msubv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3352     pwd->w[1]  = msa_msubv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3353     pwd->w[2]  = msa_msubv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3354     pwd->w[3]  = msa_msubv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3355 }
3356 
3357 void helper_msa_msubv_d(CPUMIPSState *env,
3358                         uint32_t wd, uint32_t ws, uint32_t wt)
3359 {
3360     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3361     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3362     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3363 
3364     pwd->d[0]  = msa_msubv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3365     pwd->d[1]  = msa_msubv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3366 }
3367 
3368 
3369 static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
3370 {
3371     return arg1 * arg2;
3372 }
3373 
3374 void helper_msa_mulv_b(CPUMIPSState *env,
3375                        uint32_t wd, uint32_t ws, uint32_t wt)
3376 {
3377     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3378     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3379     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3380 
3381     pwd->b[0]  = msa_mulv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3382     pwd->b[1]  = msa_mulv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3383     pwd->b[2]  = msa_mulv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3384     pwd->b[3]  = msa_mulv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3385     pwd->b[4]  = msa_mulv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3386     pwd->b[5]  = msa_mulv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3387     pwd->b[6]  = msa_mulv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3388     pwd->b[7]  = msa_mulv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3389     pwd->b[8]  = msa_mulv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3390     pwd->b[9]  = msa_mulv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3391     pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]);
3392     pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]);
3393     pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]);
3394     pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]);
3395     pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]);
3396     pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]);
3397 }
3398 
3399 void helper_msa_mulv_h(CPUMIPSState *env,
3400                        uint32_t wd, uint32_t ws, uint32_t wt)
3401 {
3402     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3403     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3404     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3405 
3406     pwd->h[0]  = msa_mulv_df(DF_HALF, pws->h[0],  pwt->h[0]);
3407     pwd->h[1]  = msa_mulv_df(DF_HALF, pws->h[1],  pwt->h[1]);
3408     pwd->h[2]  = msa_mulv_df(DF_HALF, pws->h[2],  pwt->h[2]);
3409     pwd->h[3]  = msa_mulv_df(DF_HALF, pws->h[3],  pwt->h[3]);
3410     pwd->h[4]  = msa_mulv_df(DF_HALF, pws->h[4],  pwt->h[4]);
3411     pwd->h[5]  = msa_mulv_df(DF_HALF, pws->h[5],  pwt->h[5]);
3412     pwd->h[6]  = msa_mulv_df(DF_HALF, pws->h[6],  pwt->h[6]);
3413     pwd->h[7]  = msa_mulv_df(DF_HALF, pws->h[7],  pwt->h[7]);
3414 }
3415 
3416 void helper_msa_mulv_w(CPUMIPSState *env,
3417                        uint32_t wd, uint32_t ws, uint32_t wt)
3418 {
3419     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3420     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3421     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3422 
3423     pwd->w[0]  = msa_mulv_df(DF_WORD, pws->w[0],  pwt->w[0]);
3424     pwd->w[1]  = msa_mulv_df(DF_WORD, pws->w[1],  pwt->w[1]);
3425     pwd->w[2]  = msa_mulv_df(DF_WORD, pws->w[2],  pwt->w[2]);
3426     pwd->w[3]  = msa_mulv_df(DF_WORD, pws->w[3],  pwt->w[3]);
3427 }
3428 
3429 void helper_msa_mulv_d(CPUMIPSState *env,
3430                        uint32_t wd, uint32_t ws, uint32_t wt)
3431 {
3432     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3433     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3434     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3435 
3436     pwd->d[0]  = msa_mulv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3437     pwd->d[1]  = msa_mulv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3438 }
3439 
3440 
3441 /*
3442  * Int Subtract
3443  * ------------
3444  *
3445  * +---------------+----------------------------------------------------------+
3446  * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
3447  * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
3448  * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
3449  * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
3450  * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
3451  * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
3452  * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
3453  * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
3454  * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
3455  * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
3456  * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
3457  * | HSUB_U.H      | Vector Unsigned Horizontal Subtract (halfword)           |
3458  * | HSUB_U.W      | Vector Unsigned Horizontal Subtract (word)               |
3459  * | HSUB_U.D      | Vector Unsigned Horizontal Subtract (doubleword)         |
3460  * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
3461  * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
3462  * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
3463  * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
3464  * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
3465  * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
3466  * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
3467  * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
3468  * | SUBSUS_U.B    | Vector Uns. Sat. Subtract (of S. from Uns.) (byte)       |
3469  * | SUBSUS_U.H    | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword)   |
3470  * | SUBSUS_U.W    | Vector Uns. Sat. Subtract (of S. from Uns.) (word)       |
3471  * | SUBSUS_U.D    | Vector Uns. Sat. Subtract (of S. from Uns.) (doubleword) |
3472  * | SUBSUU_S.B    | Vector Signed Saturated Subtract (of Uns.) (byte)        |
3473  * | SUBSUU_S.H    | Vector Signed Saturated Subtract (of Uns.) (halfword)    |
3474  * | SUBSUU_S.W    | Vector Signed Saturated Subtract (of Uns.) (word)        |
3475  * | SUBSUU_S.D    | Vector Signed Saturated Subtract (of Uns.) (doubleword)  |
3476  * | SUBV.B        | Vector Subtract (byte)                                   |
3477  * | SUBV.H        | Vector Subtract (halfword)                               |
3478  * | SUBV.W        | Vector Subtract (word)                                   |
3479  * | SUBV.D        | Vector Subtract (doubleword)                             |
3480  * +---------------+----------------------------------------------------------+
3481  */
3482 
3483 
3484 static inline int64_t msa_asub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3485 {
3486     /* signed compare */
3487     return (arg1 < arg2) ?
3488         (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
3489 }
3490 
3491 void helper_msa_asub_s_b(CPUMIPSState *env,
3492                          uint32_t wd, uint32_t ws, uint32_t wt)
3493 {
3494     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3495     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3496     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3497 
3498     pwd->b[0]  = msa_asub_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3499     pwd->b[1]  = msa_asub_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3500     pwd->b[2]  = msa_asub_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3501     pwd->b[3]  = msa_asub_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3502     pwd->b[4]  = msa_asub_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3503     pwd->b[5]  = msa_asub_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3504     pwd->b[6]  = msa_asub_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3505     pwd->b[7]  = msa_asub_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3506     pwd->b[8]  = msa_asub_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3507     pwd->b[9]  = msa_asub_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3508     pwd->b[10] = msa_asub_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3509     pwd->b[11] = msa_asub_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3510     pwd->b[12] = msa_asub_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3511     pwd->b[13] = msa_asub_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3512     pwd->b[14] = msa_asub_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3513     pwd->b[15] = msa_asub_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3514 }
3515 
3516 void helper_msa_asub_s_h(CPUMIPSState *env,
3517                          uint32_t wd, uint32_t ws, uint32_t wt)
3518 {
3519     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3520     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3521     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3522 
3523     pwd->h[0]  = msa_asub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3524     pwd->h[1]  = msa_asub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3525     pwd->h[2]  = msa_asub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3526     pwd->h[3]  = msa_asub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3527     pwd->h[4]  = msa_asub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3528     pwd->h[5]  = msa_asub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3529     pwd->h[6]  = msa_asub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3530     pwd->h[7]  = msa_asub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3531 }
3532 
3533 void helper_msa_asub_s_w(CPUMIPSState *env,
3534                          uint32_t wd, uint32_t ws, uint32_t wt)
3535 {
3536     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3537     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3538     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3539 
3540     pwd->w[0]  = msa_asub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3541     pwd->w[1]  = msa_asub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3542     pwd->w[2]  = msa_asub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3543     pwd->w[3]  = msa_asub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3544 }
3545 
3546 void helper_msa_asub_s_d(CPUMIPSState *env,
3547                          uint32_t wd, uint32_t ws, uint32_t wt)
3548 {
3549     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3550     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3551     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3552 
3553     pwd->d[0]  = msa_asub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3554     pwd->d[1]  = msa_asub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3555 }
3556 
3557 
3558 static inline uint64_t msa_asub_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
3559 {
3560     uint64_t u_arg1 = UNSIGNED(arg1, df);
3561     uint64_t u_arg2 = UNSIGNED(arg2, df);
3562     /* unsigned compare */
3563     return (u_arg1 < u_arg2) ?
3564         (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
3565 }
3566 
3567 void helper_msa_asub_u_b(CPUMIPSState *env,
3568                          uint32_t wd, uint32_t ws, uint32_t wt)
3569 {
3570     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3571     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3572     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3573 
3574     pwd->b[0]  = msa_asub_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3575     pwd->b[1]  = msa_asub_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3576     pwd->b[2]  = msa_asub_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3577     pwd->b[3]  = msa_asub_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3578     pwd->b[4]  = msa_asub_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3579     pwd->b[5]  = msa_asub_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3580     pwd->b[6]  = msa_asub_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3581     pwd->b[7]  = msa_asub_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3582     pwd->b[8]  = msa_asub_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3583     pwd->b[9]  = msa_asub_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3584     pwd->b[10] = msa_asub_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3585     pwd->b[11] = msa_asub_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3586     pwd->b[12] = msa_asub_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3587     pwd->b[13] = msa_asub_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3588     pwd->b[14] = msa_asub_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3589     pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3590 }
3591 
3592 void helper_msa_asub_u_h(CPUMIPSState *env,
3593                          uint32_t wd, uint32_t ws, uint32_t wt)
3594 {
3595     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3596     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3597     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3598 
3599     pwd->h[0]  = msa_asub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3600     pwd->h[1]  = msa_asub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3601     pwd->h[2]  = msa_asub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3602     pwd->h[3]  = msa_asub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3603     pwd->h[4]  = msa_asub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3604     pwd->h[5]  = msa_asub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3605     pwd->h[6]  = msa_asub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3606     pwd->h[7]  = msa_asub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3607 }
3608 
3609 void helper_msa_asub_u_w(CPUMIPSState *env,
3610                          uint32_t wd, uint32_t ws, uint32_t wt)
3611 {
3612     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3613     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3614     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3615 
3616     pwd->w[0]  = msa_asub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3617     pwd->w[1]  = msa_asub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3618     pwd->w[2]  = msa_asub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3619     pwd->w[3]  = msa_asub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3620 }
3621 
3622 void helper_msa_asub_u_d(CPUMIPSState *env,
3623                          uint32_t wd, uint32_t ws, uint32_t wt)
3624 {
3625     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3626     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3627     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3628 
3629     pwd->d[0]  = msa_asub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3630     pwd->d[1]  = msa_asub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3631 }
3632 
3633 
3634 static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3635 {
3636     return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
3637 }
3638 
3639 void helper_msa_hsub_s_h(CPUMIPSState *env,
3640                          uint32_t wd, uint32_t ws, uint32_t wt)
3641 {
3642     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3643     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3644     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3645 
3646     pwd->h[0]  = msa_hsub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3647     pwd->h[1]  = msa_hsub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3648     pwd->h[2]  = msa_hsub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3649     pwd->h[3]  = msa_hsub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3650     pwd->h[4]  = msa_hsub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3651     pwd->h[5]  = msa_hsub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3652     pwd->h[6]  = msa_hsub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3653     pwd->h[7]  = msa_hsub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3654 }
3655 
3656 void helper_msa_hsub_s_w(CPUMIPSState *env,
3657                          uint32_t wd, uint32_t ws, uint32_t wt)
3658 {
3659     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3660     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3661     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3662 
3663     pwd->w[0]  = msa_hsub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3664     pwd->w[1]  = msa_hsub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3665     pwd->w[2]  = msa_hsub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3666     pwd->w[3]  = msa_hsub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3667 }
3668 
3669 void helper_msa_hsub_s_d(CPUMIPSState *env,
3670                          uint32_t wd, uint32_t ws, uint32_t wt)
3671 {
3672     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3673     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3674     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3675 
3676     pwd->d[0]  = msa_hsub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3677     pwd->d[1]  = msa_hsub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3678 }
3679 
3680 
3681 static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3682 {
3683     return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
3684 }
3685 
3686 void helper_msa_hsub_u_h(CPUMIPSState *env,
3687                          uint32_t wd, uint32_t ws, uint32_t wt)
3688 {
3689     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3690     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3691     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3692 
3693     pwd->h[0]  = msa_hsub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3694     pwd->h[1]  = msa_hsub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3695     pwd->h[2]  = msa_hsub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3696     pwd->h[3]  = msa_hsub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3697     pwd->h[4]  = msa_hsub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3698     pwd->h[5]  = msa_hsub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3699     pwd->h[6]  = msa_hsub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3700     pwd->h[7]  = msa_hsub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3701 }
3702 
3703 void helper_msa_hsub_u_w(CPUMIPSState *env,
3704                          uint32_t wd, uint32_t ws, uint32_t wt)
3705 {
3706     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3707     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3708     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3709 
3710     pwd->w[0]  = msa_hsub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3711     pwd->w[1]  = msa_hsub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3712     pwd->w[2]  = msa_hsub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3713     pwd->w[3]  = msa_hsub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3714 }
3715 
3716 void helper_msa_hsub_u_d(CPUMIPSState *env,
3717                          uint32_t wd, uint32_t ws, uint32_t wt)
3718 {
3719     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3720     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3721     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3722 
3723     pwd->d[0]  = msa_hsub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3724     pwd->d[1]  = msa_hsub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3725 }
3726 
3727 
3728 static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3729 {
3730     int64_t max_int = DF_MAX_INT(df);
3731     int64_t min_int = DF_MIN_INT(df);
3732     if (arg2 > 0) {
3733         return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
3734     } else {
3735         return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
3736     }
3737 }
3738 
3739 void helper_msa_subs_s_b(CPUMIPSState *env,
3740                          uint32_t wd, uint32_t ws, uint32_t wt)
3741 {
3742     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3743     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3744     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3745 
3746     pwd->b[0]  = msa_subs_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3747     pwd->b[1]  = msa_subs_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3748     pwd->b[2]  = msa_subs_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3749     pwd->b[3]  = msa_subs_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3750     pwd->b[4]  = msa_subs_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3751     pwd->b[5]  = msa_subs_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3752     pwd->b[6]  = msa_subs_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3753     pwd->b[7]  = msa_subs_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3754     pwd->b[8]  = msa_subs_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3755     pwd->b[9]  = msa_subs_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3756     pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3757     pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3758     pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3759     pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3760     pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3761     pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3762 }
3763 
3764 void helper_msa_subs_s_h(CPUMIPSState *env,
3765                          uint32_t wd, uint32_t ws, uint32_t wt)
3766 {
3767     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3768     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3769     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3770 
3771     pwd->h[0]  = msa_subs_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3772     pwd->h[1]  = msa_subs_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3773     pwd->h[2]  = msa_subs_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3774     pwd->h[3]  = msa_subs_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3775     pwd->h[4]  = msa_subs_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3776     pwd->h[5]  = msa_subs_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3777     pwd->h[6]  = msa_subs_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3778     pwd->h[7]  = msa_subs_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3779 }
3780 
3781 void helper_msa_subs_s_w(CPUMIPSState *env,
3782                          uint32_t wd, uint32_t ws, uint32_t wt)
3783 {
3784     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3785     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3786     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3787 
3788     pwd->w[0]  = msa_subs_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3789     pwd->w[1]  = msa_subs_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3790     pwd->w[2]  = msa_subs_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3791     pwd->w[3]  = msa_subs_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3792 }
3793 
3794 void helper_msa_subs_s_d(CPUMIPSState *env,
3795                          uint32_t wd, uint32_t ws, uint32_t wt)
3796 {
3797     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3798     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3799     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3800 
3801     pwd->d[0]  = msa_subs_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3802     pwd->d[1]  = msa_subs_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3803 }
3804 
3805 
3806 static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3807 {
3808     uint64_t u_arg1 = UNSIGNED(arg1, df);
3809     uint64_t u_arg2 = UNSIGNED(arg2, df);
3810     return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
3811 }
3812 
3813 void helper_msa_subs_u_b(CPUMIPSState *env,
3814                          uint32_t wd, uint32_t ws, uint32_t wt)
3815 {
3816     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3817     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3818     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3819 
3820     pwd->b[0]  = msa_subs_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3821     pwd->b[1]  = msa_subs_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3822     pwd->b[2]  = msa_subs_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3823     pwd->b[3]  = msa_subs_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3824     pwd->b[4]  = msa_subs_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3825     pwd->b[5]  = msa_subs_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3826     pwd->b[6]  = msa_subs_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3827     pwd->b[7]  = msa_subs_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3828     pwd->b[8]  = msa_subs_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3829     pwd->b[9]  = msa_subs_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3830     pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3831     pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3832     pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3833     pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3834     pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3835     pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3836 }
3837 
3838 void helper_msa_subs_u_h(CPUMIPSState *env,
3839                          uint32_t wd, uint32_t ws, uint32_t wt)
3840 {
3841     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3842     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3843     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3844 
3845     pwd->h[0]  = msa_subs_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3846     pwd->h[1]  = msa_subs_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3847     pwd->h[2]  = msa_subs_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3848     pwd->h[3]  = msa_subs_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3849     pwd->h[4]  = msa_subs_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3850     pwd->h[5]  = msa_subs_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3851     pwd->h[6]  = msa_subs_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3852     pwd->h[7]  = msa_subs_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3853 }
3854 
3855 void helper_msa_subs_u_w(CPUMIPSState *env,
3856                          uint32_t wd, uint32_t ws, uint32_t wt)
3857 {
3858     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3859     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3860     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3861 
3862     pwd->w[0]  = msa_subs_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3863     pwd->w[1]  = msa_subs_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3864     pwd->w[2]  = msa_subs_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3865     pwd->w[3]  = msa_subs_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3866 }
3867 
3868 void helper_msa_subs_u_d(CPUMIPSState *env,
3869                          uint32_t wd, uint32_t ws, uint32_t wt)
3870 {
3871     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3872     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3873     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3874 
3875     pwd->d[0]  = msa_subs_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3876     pwd->d[1]  = msa_subs_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3877 }
3878 
3879 
3880 static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3881 {
3882     uint64_t u_arg1 = UNSIGNED(arg1, df);
3883     uint64_t max_uint = DF_MAX_UINT(df);
3884     if (arg2 >= 0) {
3885         uint64_t u_arg2 = (uint64_t)arg2;
3886         return (u_arg1 > u_arg2) ?
3887             (int64_t)(u_arg1 - u_arg2) :
3888             0;
3889     } else {
3890         uint64_t u_arg2 = (uint64_t)(-arg2);
3891         return (u_arg1 < max_uint - u_arg2) ?
3892             (int64_t)(u_arg1 + u_arg2) :
3893             (int64_t)max_uint;
3894     }
3895 }
3896 
3897 void helper_msa_subsus_u_b(CPUMIPSState *env,
3898                            uint32_t wd, uint32_t ws, uint32_t wt)
3899 {
3900     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3901     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3902     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3903 
3904     pwd->b[0]  = msa_subsus_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3905     pwd->b[1]  = msa_subsus_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3906     pwd->b[2]  = msa_subsus_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3907     pwd->b[3]  = msa_subsus_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3908     pwd->b[4]  = msa_subsus_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3909     pwd->b[5]  = msa_subsus_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3910     pwd->b[6]  = msa_subsus_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3911     pwd->b[7]  = msa_subsus_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3912     pwd->b[8]  = msa_subsus_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3913     pwd->b[9]  = msa_subsus_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3914     pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3915     pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3916     pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3917     pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3918     pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3919     pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3920 }
3921 
3922 void helper_msa_subsus_u_h(CPUMIPSState *env,
3923                            uint32_t wd, uint32_t ws, uint32_t wt)
3924 {
3925     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3926     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3927     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3928 
3929     pwd->h[0]  = msa_subsus_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3930     pwd->h[1]  = msa_subsus_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3931     pwd->h[2]  = msa_subsus_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3932     pwd->h[3]  = msa_subsus_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3933     pwd->h[4]  = msa_subsus_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3934     pwd->h[5]  = msa_subsus_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3935     pwd->h[6]  = msa_subsus_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3936     pwd->h[7]  = msa_subsus_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3937 }
3938 
3939 void helper_msa_subsus_u_w(CPUMIPSState *env,
3940                            uint32_t wd, uint32_t ws, uint32_t wt)
3941 {
3942     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3943     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3944     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3945 
3946     pwd->w[0]  = msa_subsus_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3947     pwd->w[1]  = msa_subsus_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3948     pwd->w[2]  = msa_subsus_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3949     pwd->w[3]  = msa_subsus_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3950 }
3951 
3952 void helper_msa_subsus_u_d(CPUMIPSState *env,
3953                            uint32_t wd, uint32_t ws, uint32_t wt)
3954 {
3955     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3956     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3957     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3958 
3959     pwd->d[0]  = msa_subsus_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3960     pwd->d[1]  = msa_subsus_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3961 }
3962 
3963 
3964 static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3965 {
3966     uint64_t u_arg1 = UNSIGNED(arg1, df);
3967     uint64_t u_arg2 = UNSIGNED(arg2, df);
3968     int64_t max_int = DF_MAX_INT(df);
3969     int64_t min_int = DF_MIN_INT(df);
3970     if (u_arg1 > u_arg2) {
3971         return u_arg1 - u_arg2 < (uint64_t)max_int ?
3972             (int64_t)(u_arg1 - u_arg2) :
3973             max_int;
3974     } else {
3975         return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
3976             (int64_t)(u_arg1 - u_arg2) :
3977             min_int;
3978     }
3979 }
3980 
3981 void helper_msa_subsuu_s_b(CPUMIPSState *env,
3982                            uint32_t wd, uint32_t ws, uint32_t wt)
3983 {
3984     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3985     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3986     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3987 
3988     pwd->b[0]  = msa_subsuu_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3989     pwd->b[1]  = msa_subsuu_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3990     pwd->b[2]  = msa_subsuu_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3991     pwd->b[3]  = msa_subsuu_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3992     pwd->b[4]  = msa_subsuu_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3993     pwd->b[5]  = msa_subsuu_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3994     pwd->b[6]  = msa_subsuu_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3995     pwd->b[7]  = msa_subsuu_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3996     pwd->b[8]  = msa_subsuu_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3997     pwd->b[9]  = msa_subsuu_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3998     pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3999     pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
4000     pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
4001     pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
4002     pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
4003     pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
4004 }
4005 
4006 void helper_msa_subsuu_s_h(CPUMIPSState *env,
4007                            uint32_t wd, uint32_t ws, uint32_t wt)
4008 {
4009     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4010     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4011     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4012 
4013     pwd->h[0]  = msa_subsuu_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
4014     pwd->h[1]  = msa_subsuu_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
4015     pwd->h[2]  = msa_subsuu_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
4016     pwd->h[3]  = msa_subsuu_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
4017     pwd->h[4]  = msa_subsuu_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
4018     pwd->h[5]  = msa_subsuu_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
4019     pwd->h[6]  = msa_subsuu_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
4020     pwd->h[7]  = msa_subsuu_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
4021 }
4022 
4023 void helper_msa_subsuu_s_w(CPUMIPSState *env,
4024                            uint32_t wd, uint32_t ws, uint32_t wt)
4025 {
4026     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4027     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4028     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4029 
4030     pwd->w[0]  = msa_subsuu_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
4031     pwd->w[1]  = msa_subsuu_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
4032     pwd->w[2]  = msa_subsuu_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
4033     pwd->w[3]  = msa_subsuu_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
4034 }
4035 
4036 void helper_msa_subsuu_s_d(CPUMIPSState *env,
4037                            uint32_t wd, uint32_t ws, uint32_t wt)
4038 {
4039     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4040     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4041     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4042 
4043     pwd->d[0]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4044     pwd->d[1]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4045 }
4046 
4047 
4048 static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
4049 {
4050     return arg1 - arg2;
4051 }
4052 
4053 void helper_msa_subv_b(CPUMIPSState *env,
4054                        uint32_t wd, uint32_t ws, uint32_t wt)
4055 {
4056     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4057     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4058     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4059 
4060     pwd->b[0]  = msa_subv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4061     pwd->b[1]  = msa_subv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4062     pwd->b[2]  = msa_subv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4063     pwd->b[3]  = msa_subv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4064     pwd->b[4]  = msa_subv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4065     pwd->b[5]  = msa_subv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4066     pwd->b[6]  = msa_subv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4067     pwd->b[7]  = msa_subv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4068     pwd->b[8]  = msa_subv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4069     pwd->b[9]  = msa_subv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4070     pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]);
4071     pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]);
4072     pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]);
4073     pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]);
4074     pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]);
4075     pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]);
4076 }
4077 
4078 void helper_msa_subv_h(CPUMIPSState *env,
4079                        uint32_t wd, uint32_t ws, uint32_t wt)
4080 {
4081     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4082     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4083     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4084 
4085     pwd->h[0]  = msa_subv_df(DF_HALF, pws->h[0],  pwt->h[0]);
4086     pwd->h[1]  = msa_subv_df(DF_HALF, pws->h[1],  pwt->h[1]);
4087     pwd->h[2]  = msa_subv_df(DF_HALF, pws->h[2],  pwt->h[2]);
4088     pwd->h[3]  = msa_subv_df(DF_HALF, pws->h[3],  pwt->h[3]);
4089     pwd->h[4]  = msa_subv_df(DF_HALF, pws->h[4],  pwt->h[4]);
4090     pwd->h[5]  = msa_subv_df(DF_HALF, pws->h[5],  pwt->h[5]);
4091     pwd->h[6]  = msa_subv_df(DF_HALF, pws->h[6],  pwt->h[6]);
4092     pwd->h[7]  = msa_subv_df(DF_HALF, pws->h[7],  pwt->h[7]);
4093 }
4094 
4095 void helper_msa_subv_w(CPUMIPSState *env,
4096                        uint32_t wd, uint32_t ws, uint32_t wt)
4097 {
4098     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4099     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4100     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4101 
4102     pwd->w[0]  = msa_subv_df(DF_WORD, pws->w[0],  pwt->w[0]);
4103     pwd->w[1]  = msa_subv_df(DF_WORD, pws->w[1],  pwt->w[1]);
4104     pwd->w[2]  = msa_subv_df(DF_WORD, pws->w[2],  pwt->w[2]);
4105     pwd->w[3]  = msa_subv_df(DF_WORD, pws->w[3],  pwt->w[3]);
4106 }
4107 
4108 void helper_msa_subv_d(CPUMIPSState *env,
4109                        uint32_t wd, uint32_t ws, uint32_t wt)
4110 {
4111     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4112     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4113     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4114 
4115     pwd->d[0]  = msa_subv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4116     pwd->d[1]  = msa_subv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4117 }
4118 
4119 
4120 /*
4121  * Interleave
4122  * ----------
4123  *
4124  * +---------------+----------------------------------------------------------+
4125  * | ILVEV.B       | Vector Interleave Even (byte)                            |
4126  * | ILVEV.H       | Vector Interleave Even (halfword)                        |
4127  * | ILVEV.W       | Vector Interleave Even (word)                            |
4128  * | ILVEV.D       | Vector Interleave Even (doubleword)                      |
4129  * | ILVOD.B       | Vector Interleave Odd (byte)                             |
4130  * | ILVOD.H       | Vector Interleave Odd (halfword)                         |
4131  * | ILVOD.W       | Vector Interleave Odd (word)                             |
4132  * | ILVOD.D       | Vector Interleave Odd (doubleword)                       |
4133  * | ILVL.B        | Vector Interleave Left (byte)                            |
4134  * | ILVL.H        | Vector Interleave Left (halfword)                        |
4135  * | ILVL.W        | Vector Interleave Left (word)                            |
4136  * | ILVL.D        | Vector Interleave Left (doubleword)                      |
4137  * | ILVR.B        | Vector Interleave Right (byte)                           |
4138  * | ILVR.H        | Vector Interleave Right (halfword)                       |
4139  * | ILVR.W        | Vector Interleave Right (word)                           |
4140  * | ILVR.D        | Vector Interleave Right (doubleword)                     |
4141  * +---------------+----------------------------------------------------------+
4142  */
4143 
4144 
4145 void helper_msa_ilvev_b(CPUMIPSState *env,
4146                         uint32_t wd, uint32_t ws, uint32_t wt)
4147 {
4148     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4149     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4150     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4151 
4152 #if HOST_BIG_ENDIAN
4153     pwd->b[8]  = pws->b[9];
4154     pwd->b[9]  = pwt->b[9];
4155     pwd->b[10] = pws->b[11];
4156     pwd->b[11] = pwt->b[11];
4157     pwd->b[12] = pws->b[13];
4158     pwd->b[13] = pwt->b[13];
4159     pwd->b[14] = pws->b[15];
4160     pwd->b[15] = pwt->b[15];
4161     pwd->b[0]  = pws->b[1];
4162     pwd->b[1]  = pwt->b[1];
4163     pwd->b[2]  = pws->b[3];
4164     pwd->b[3]  = pwt->b[3];
4165     pwd->b[4]  = pws->b[5];
4166     pwd->b[5]  = pwt->b[5];
4167     pwd->b[6]  = pws->b[7];
4168     pwd->b[7]  = pwt->b[7];
4169 #else
4170     pwd->b[15] = pws->b[14];
4171     pwd->b[14] = pwt->b[14];
4172     pwd->b[13] = pws->b[12];
4173     pwd->b[12] = pwt->b[12];
4174     pwd->b[11] = pws->b[10];
4175     pwd->b[10] = pwt->b[10];
4176     pwd->b[9]  = pws->b[8];
4177     pwd->b[8]  = pwt->b[8];
4178     pwd->b[7]  = pws->b[6];
4179     pwd->b[6]  = pwt->b[6];
4180     pwd->b[5]  = pws->b[4];
4181     pwd->b[4]  = pwt->b[4];
4182     pwd->b[3]  = pws->b[2];
4183     pwd->b[2]  = pwt->b[2];
4184     pwd->b[1]  = pws->b[0];
4185     pwd->b[0]  = pwt->b[0];
4186 #endif
4187 }
4188 
4189 void helper_msa_ilvev_h(CPUMIPSState *env,
4190                         uint32_t wd, uint32_t ws, uint32_t wt)
4191 {
4192     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4193     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4194     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4195 
4196 #if HOST_BIG_ENDIAN
4197     pwd->h[4] = pws->h[5];
4198     pwd->h[5] = pwt->h[5];
4199     pwd->h[6] = pws->h[7];
4200     pwd->h[7] = pwt->h[7];
4201     pwd->h[0] = pws->h[1];
4202     pwd->h[1] = pwt->h[1];
4203     pwd->h[2] = pws->h[3];
4204     pwd->h[3] = pwt->h[3];
4205 #else
4206     pwd->h[7] = pws->h[6];
4207     pwd->h[6] = pwt->h[6];
4208     pwd->h[5] = pws->h[4];
4209     pwd->h[4] = pwt->h[4];
4210     pwd->h[3] = pws->h[2];
4211     pwd->h[2] = pwt->h[2];
4212     pwd->h[1] = pws->h[0];
4213     pwd->h[0] = pwt->h[0];
4214 #endif
4215 }
4216 
4217 void helper_msa_ilvev_w(CPUMIPSState *env,
4218                         uint32_t wd, uint32_t ws, uint32_t wt)
4219 {
4220     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4221     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4222     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4223 
4224 #if HOST_BIG_ENDIAN
4225     pwd->w[2] = pws->w[3];
4226     pwd->w[3] = pwt->w[3];
4227     pwd->w[0] = pws->w[1];
4228     pwd->w[1] = pwt->w[1];
4229 #else
4230     pwd->w[3] = pws->w[2];
4231     pwd->w[2] = pwt->w[2];
4232     pwd->w[1] = pws->w[0];
4233     pwd->w[0] = pwt->w[0];
4234 #endif
4235 }
4236 
4237 void helper_msa_ilvev_d(CPUMIPSState *env,
4238                         uint32_t wd, uint32_t ws, uint32_t wt)
4239 {
4240     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4241     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4242     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4243 
4244     pwd->d[1] = pws->d[0];
4245     pwd->d[0] = pwt->d[0];
4246 }
4247 
4248 
4249 void helper_msa_ilvod_b(CPUMIPSState *env,
4250                         uint32_t wd, uint32_t ws, uint32_t wt)
4251 {
4252     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4253     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4254     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4255 
4256 #if HOST_BIG_ENDIAN
4257     pwd->b[7]  = pwt->b[6];
4258     pwd->b[6]  = pws->b[6];
4259     pwd->b[5]  = pwt->b[4];
4260     pwd->b[4]  = pws->b[4];
4261     pwd->b[3]  = pwt->b[2];
4262     pwd->b[2]  = pws->b[2];
4263     pwd->b[1]  = pwt->b[0];
4264     pwd->b[0]  = pws->b[0];
4265     pwd->b[15] = pwt->b[14];
4266     pwd->b[14] = pws->b[14];
4267     pwd->b[13] = pwt->b[12];
4268     pwd->b[12] = pws->b[12];
4269     pwd->b[11] = pwt->b[10];
4270     pwd->b[10] = pws->b[10];
4271     pwd->b[9]  = pwt->b[8];
4272     pwd->b[8]  = pws->b[8];
4273 #else
4274     pwd->b[0]  = pwt->b[1];
4275     pwd->b[1]  = pws->b[1];
4276     pwd->b[2]  = pwt->b[3];
4277     pwd->b[3]  = pws->b[3];
4278     pwd->b[4]  = pwt->b[5];
4279     pwd->b[5]  = pws->b[5];
4280     pwd->b[6]  = pwt->b[7];
4281     pwd->b[7]  = pws->b[7];
4282     pwd->b[8]  = pwt->b[9];
4283     pwd->b[9]  = pws->b[9];
4284     pwd->b[10] = pwt->b[11];
4285     pwd->b[11] = pws->b[11];
4286     pwd->b[12] = pwt->b[13];
4287     pwd->b[13] = pws->b[13];
4288     pwd->b[14] = pwt->b[15];
4289     pwd->b[15] = pws->b[15];
4290 #endif
4291 }
4292 
4293 void helper_msa_ilvod_h(CPUMIPSState *env,
4294                         uint32_t wd, uint32_t ws, uint32_t wt)
4295 {
4296     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4297     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4298     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4299 
4300 #if HOST_BIG_ENDIAN
4301     pwd->h[3] = pwt->h[2];
4302     pwd->h[2] = pws->h[2];
4303     pwd->h[1] = pwt->h[0];
4304     pwd->h[0] = pws->h[0];
4305     pwd->h[7] = pwt->h[6];
4306     pwd->h[6] = pws->h[6];
4307     pwd->h[5] = pwt->h[4];
4308     pwd->h[4] = pws->h[4];
4309 #else
4310     pwd->h[0] = pwt->h[1];
4311     pwd->h[1] = pws->h[1];
4312     pwd->h[2] = pwt->h[3];
4313     pwd->h[3] = pws->h[3];
4314     pwd->h[4] = pwt->h[5];
4315     pwd->h[5] = pws->h[5];
4316     pwd->h[6] = pwt->h[7];
4317     pwd->h[7] = pws->h[7];
4318 #endif
4319 }
4320 
4321 void helper_msa_ilvod_w(CPUMIPSState *env,
4322                         uint32_t wd, uint32_t ws, uint32_t wt)
4323 {
4324     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4325     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4326     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4327 
4328 #if HOST_BIG_ENDIAN
4329     pwd->w[1] = pwt->w[0];
4330     pwd->w[0] = pws->w[0];
4331     pwd->w[3] = pwt->w[2];
4332     pwd->w[2] = pws->w[2];
4333 #else
4334     pwd->w[0] = pwt->w[1];
4335     pwd->w[1] = pws->w[1];
4336     pwd->w[2] = pwt->w[3];
4337     pwd->w[3] = pws->w[3];
4338 #endif
4339 }
4340 
4341 void helper_msa_ilvod_d(CPUMIPSState *env,
4342                         uint32_t wd, uint32_t ws, uint32_t wt)
4343 {
4344     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4345     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4346     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4347 
4348     pwd->d[0] = pwt->d[1];
4349     pwd->d[1] = pws->d[1];
4350 }
4351 
4352 
4353 void helper_msa_ilvl_b(CPUMIPSState *env,
4354                        uint32_t wd, uint32_t ws, uint32_t wt)
4355 {
4356     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4357     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4358     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4359 
4360 #if HOST_BIG_ENDIAN
4361     pwd->b[7]  = pwt->b[15];
4362     pwd->b[6]  = pws->b[15];
4363     pwd->b[5]  = pwt->b[14];
4364     pwd->b[4]  = pws->b[14];
4365     pwd->b[3]  = pwt->b[13];
4366     pwd->b[2]  = pws->b[13];
4367     pwd->b[1]  = pwt->b[12];
4368     pwd->b[0]  = pws->b[12];
4369     pwd->b[15] = pwt->b[11];
4370     pwd->b[14] = pws->b[11];
4371     pwd->b[13] = pwt->b[10];
4372     pwd->b[12] = pws->b[10];
4373     pwd->b[11] = pwt->b[9];
4374     pwd->b[10] = pws->b[9];
4375     pwd->b[9]  = pwt->b[8];
4376     pwd->b[8]  = pws->b[8];
4377 #else
4378     pwd->b[0]  = pwt->b[8];
4379     pwd->b[1]  = pws->b[8];
4380     pwd->b[2]  = pwt->b[9];
4381     pwd->b[3]  = pws->b[9];
4382     pwd->b[4]  = pwt->b[10];
4383     pwd->b[5]  = pws->b[10];
4384     pwd->b[6]  = pwt->b[11];
4385     pwd->b[7]  = pws->b[11];
4386     pwd->b[8]  = pwt->b[12];
4387     pwd->b[9]  = pws->b[12];
4388     pwd->b[10] = pwt->b[13];
4389     pwd->b[11] = pws->b[13];
4390     pwd->b[12] = pwt->b[14];
4391     pwd->b[13] = pws->b[14];
4392     pwd->b[14] = pwt->b[15];
4393     pwd->b[15] = pws->b[15];
4394 #endif
4395 }
4396 
4397 void helper_msa_ilvl_h(CPUMIPSState *env,
4398                        uint32_t wd, uint32_t ws, uint32_t wt)
4399 {
4400     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4401     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4402     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4403 
4404 #if HOST_BIG_ENDIAN
4405     pwd->h[3] = pwt->h[7];
4406     pwd->h[2] = pws->h[7];
4407     pwd->h[1] = pwt->h[6];
4408     pwd->h[0] = pws->h[6];
4409     pwd->h[7] = pwt->h[5];
4410     pwd->h[6] = pws->h[5];
4411     pwd->h[5] = pwt->h[4];
4412     pwd->h[4] = pws->h[4];
4413 #else
4414     pwd->h[0] = pwt->h[4];
4415     pwd->h[1] = pws->h[4];
4416     pwd->h[2] = pwt->h[5];
4417     pwd->h[3] = pws->h[5];
4418     pwd->h[4] = pwt->h[6];
4419     pwd->h[5] = pws->h[6];
4420     pwd->h[6] = pwt->h[7];
4421     pwd->h[7] = pws->h[7];
4422 #endif
4423 }
4424 
4425 void helper_msa_ilvl_w(CPUMIPSState *env,
4426                        uint32_t wd, uint32_t ws, uint32_t wt)
4427 {
4428     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4429     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4430     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4431 
4432 #if HOST_BIG_ENDIAN
4433     pwd->w[1] = pwt->w[3];
4434     pwd->w[0] = pws->w[3];
4435     pwd->w[3] = pwt->w[2];
4436     pwd->w[2] = pws->w[2];
4437 #else
4438     pwd->w[0] = pwt->w[2];
4439     pwd->w[1] = pws->w[2];
4440     pwd->w[2] = pwt->w[3];
4441     pwd->w[3] = pws->w[3];
4442 #endif
4443 }
4444 
4445 void helper_msa_ilvl_d(CPUMIPSState *env,
4446                        uint32_t wd, uint32_t ws, uint32_t wt)
4447 {
4448     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4449     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4450     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4451 
4452     pwd->d[0] = pwt->d[1];
4453     pwd->d[1] = pws->d[1];
4454 }
4455 
4456 
4457 void helper_msa_ilvr_b(CPUMIPSState *env,
4458                        uint32_t wd, uint32_t ws, uint32_t wt)
4459 {
4460     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4461     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4462     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4463 
4464 #if HOST_BIG_ENDIAN
4465     pwd->b[8]  = pws->b[0];
4466     pwd->b[9]  = pwt->b[0];
4467     pwd->b[10] = pws->b[1];
4468     pwd->b[11] = pwt->b[1];
4469     pwd->b[12] = pws->b[2];
4470     pwd->b[13] = pwt->b[2];
4471     pwd->b[14] = pws->b[3];
4472     pwd->b[15] = pwt->b[3];
4473     pwd->b[0]  = pws->b[4];
4474     pwd->b[1]  = pwt->b[4];
4475     pwd->b[2]  = pws->b[5];
4476     pwd->b[3]  = pwt->b[5];
4477     pwd->b[4]  = pws->b[6];
4478     pwd->b[5]  = pwt->b[6];
4479     pwd->b[6]  = pws->b[7];
4480     pwd->b[7]  = pwt->b[7];
4481 #else
4482     pwd->b[15] = pws->b[7];
4483     pwd->b[14] = pwt->b[7];
4484     pwd->b[13] = pws->b[6];
4485     pwd->b[12] = pwt->b[6];
4486     pwd->b[11] = pws->b[5];
4487     pwd->b[10] = pwt->b[5];
4488     pwd->b[9]  = pws->b[4];
4489     pwd->b[8]  = pwt->b[4];
4490     pwd->b[7]  = pws->b[3];
4491     pwd->b[6]  = pwt->b[3];
4492     pwd->b[5]  = pws->b[2];
4493     pwd->b[4]  = pwt->b[2];
4494     pwd->b[3]  = pws->b[1];
4495     pwd->b[2]  = pwt->b[1];
4496     pwd->b[1]  = pws->b[0];
4497     pwd->b[0]  = pwt->b[0];
4498 #endif
4499 }
4500 
4501 void helper_msa_ilvr_h(CPUMIPSState *env,
4502                        uint32_t wd, uint32_t ws, uint32_t wt)
4503 {
4504     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4505     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4506     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4507 
4508 #if HOST_BIG_ENDIAN
4509     pwd->h[4] = pws->h[0];
4510     pwd->h[5] = pwt->h[0];
4511     pwd->h[6] = pws->h[1];
4512     pwd->h[7] = pwt->h[1];
4513     pwd->h[0] = pws->h[2];
4514     pwd->h[1] = pwt->h[2];
4515     pwd->h[2] = pws->h[3];
4516     pwd->h[3] = pwt->h[3];
4517 #else
4518     pwd->h[7] = pws->h[3];
4519     pwd->h[6] = pwt->h[3];
4520     pwd->h[5] = pws->h[2];
4521     pwd->h[4] = pwt->h[2];
4522     pwd->h[3] = pws->h[1];
4523     pwd->h[2] = pwt->h[1];
4524     pwd->h[1] = pws->h[0];
4525     pwd->h[0] = pwt->h[0];
4526 #endif
4527 }
4528 
4529 void helper_msa_ilvr_w(CPUMIPSState *env,
4530                        uint32_t wd, uint32_t ws, uint32_t wt)
4531 {
4532     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4533     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4534     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4535 
4536 #if HOST_BIG_ENDIAN
4537     pwd->w[2] = pws->w[0];
4538     pwd->w[3] = pwt->w[0];
4539     pwd->w[0] = pws->w[1];
4540     pwd->w[1] = pwt->w[1];
4541 #else
4542     pwd->w[3] = pws->w[1];
4543     pwd->w[2] = pwt->w[1];
4544     pwd->w[1] = pws->w[0];
4545     pwd->w[0] = pwt->w[0];
4546 #endif
4547 }
4548 
4549 void helper_msa_ilvr_d(CPUMIPSState *env,
4550                        uint32_t wd, uint32_t ws, uint32_t wt)
4551 {
4552     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4553     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4554     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4555 
4556     pwd->d[1] = pws->d[0];
4557     pwd->d[0] = pwt->d[0];
4558 }
4559 
4560 
4561 /*
4562  * Logic
4563  * -----
4564  *
4565  * +---------------+----------------------------------------------------------+
4566  * | AND.V         | Vector Logical And                                       |
4567  * | NOR.V         | Vector Logical Negated Or                                |
4568  * | OR.V          | Vector Logical Or                                        |
4569  * | XOR.V         | Vector Logical Exclusive Or                              |
4570  * +---------------+----------------------------------------------------------+
4571  */
4572 
4573 
4574 void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4575 {
4576     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4577     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4578     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4579 
4580     pwd->d[0] = pws->d[0] & pwt->d[0];
4581     pwd->d[1] = pws->d[1] & pwt->d[1];
4582 }
4583 
4584 void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4585 {
4586     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4587     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4588     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4589 
4590     pwd->d[0] = ~(pws->d[0] | pwt->d[0]);
4591     pwd->d[1] = ~(pws->d[1] | pwt->d[1]);
4592 }
4593 
4594 void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4595 {
4596     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4597     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4598     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4599 
4600     pwd->d[0] = pws->d[0] | pwt->d[0];
4601     pwd->d[1] = pws->d[1] | pwt->d[1];
4602 }
4603 
4604 void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4605 {
4606     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4607     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4608     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4609 
4610     pwd->d[0] = pws->d[0] ^ pwt->d[0];
4611     pwd->d[1] = pws->d[1] ^ pwt->d[1];
4612 }
4613 
4614 
4615 /*
4616  * Move
4617  * ----
4618  *
4619  * +---------------+----------------------------------------------------------+
4620  * | MOVE.V        | Vector Move                                              |
4621  * +---------------+----------------------------------------------------------+
4622  */
4623 
4624 static inline void msa_move_v(wr_t *pwd, wr_t *pws)
4625 {
4626     pwd->d[0] = pws->d[0];
4627     pwd->d[1] = pws->d[1];
4628 }
4629 
4630 void helper_msa_move_v(CPUMIPSState *env, uint32_t wd, uint32_t ws)
4631 {
4632     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4633     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4634 
4635     msa_move_v(pwd, pws);
4636 }
4637 
4638 
4639 /*
4640  * Pack
4641  * ----
4642  *
4643  * +---------------+----------------------------------------------------------+
4644  * | PCKEV.B       | Vector Pack Even (byte)                                  |
4645  * | PCKEV.H       | Vector Pack Even (halfword)                              |
4646  * | PCKEV.W       | Vector Pack Even (word)                                  |
4647  * | PCKEV.D       | Vector Pack Even (doubleword)                            |
4648  * | PCKOD.B       | Vector Pack Odd (byte)                                   |
4649  * | PCKOD.H       | Vector Pack Odd (halfword)                               |
4650  * | PCKOD.W       | Vector Pack Odd (word)                                   |
4651  * | PCKOD.D       | Vector Pack Odd (doubleword)                             |
4652  * | VSHF.B        | Vector Data Preserving Shuffle (byte)                    |
4653  * | VSHF.H        | Vector Data Preserving Shuffle (halfword)                |
4654  * | VSHF.W        | Vector Data Preserving Shuffle (word)                    |
4655  * | VSHF.D        | Vector Data Preserving Shuffle (doubleword)              |
4656  * +---------------+----------------------------------------------------------+
4657  */
4658 
4659 
4660 void helper_msa_pckev_b(CPUMIPSState *env,
4661                         uint32_t wd, uint32_t ws, uint32_t wt)
4662 {
4663     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4664     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4665     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4666 
4667 #if HOST_BIG_ENDIAN
4668     pwd->b[8]  = pws->b[9];
4669     pwd->b[10] = pws->b[13];
4670     pwd->b[12] = pws->b[1];
4671     pwd->b[14] = pws->b[5];
4672     pwd->b[0]  = pwt->b[9];
4673     pwd->b[2]  = pwt->b[13];
4674     pwd->b[4]  = pwt->b[1];
4675     pwd->b[6]  = pwt->b[5];
4676     pwd->b[9]  = pws->b[11];
4677     pwd->b[13] = pws->b[3];
4678     pwd->b[1]  = pwt->b[11];
4679     pwd->b[5]  = pwt->b[3];
4680     pwd->b[11] = pws->b[15];
4681     pwd->b[3]  = pwt->b[15];
4682     pwd->b[15] = pws->b[7];
4683     pwd->b[7]  = pwt->b[7];
4684 #else
4685     pwd->b[15] = pws->b[14];
4686     pwd->b[13] = pws->b[10];
4687     pwd->b[11] = pws->b[6];
4688     pwd->b[9]  = pws->b[2];
4689     pwd->b[7]  = pwt->b[14];
4690     pwd->b[5]  = pwt->b[10];
4691     pwd->b[3]  = pwt->b[6];
4692     pwd->b[1]  = pwt->b[2];
4693     pwd->b[14] = pws->b[12];
4694     pwd->b[10] = pws->b[4];
4695     pwd->b[6]  = pwt->b[12];
4696     pwd->b[2]  = pwt->b[4];
4697     pwd->b[12] = pws->b[8];
4698     pwd->b[4]  = pwt->b[8];
4699     pwd->b[8]  = pws->b[0];
4700     pwd->b[0]  = pwt->b[0];
4701 #endif
4702 }
4703 
4704 void helper_msa_pckev_h(CPUMIPSState *env,
4705                         uint32_t wd, uint32_t ws, uint32_t wt)
4706 {
4707     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4708     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4709     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4710 
4711 #if HOST_BIG_ENDIAN
4712     pwd->h[4] = pws->h[5];
4713     pwd->h[6] = pws->h[1];
4714     pwd->h[0] = pwt->h[5];
4715     pwd->h[2] = pwt->h[1];
4716     pwd->h[5] = pws->h[7];
4717     pwd->h[1] = pwt->h[7];
4718     pwd->h[7] = pws->h[3];
4719     pwd->h[3] = pwt->h[3];
4720 #else
4721     pwd->h[7] = pws->h[6];
4722     pwd->h[5] = pws->h[2];
4723     pwd->h[3] = pwt->h[6];
4724     pwd->h[1] = pwt->h[2];
4725     pwd->h[6] = pws->h[4];
4726     pwd->h[2] = pwt->h[4];
4727     pwd->h[4] = pws->h[0];
4728     pwd->h[0] = pwt->h[0];
4729 #endif
4730 }
4731 
4732 void helper_msa_pckev_w(CPUMIPSState *env,
4733                         uint32_t wd, uint32_t ws, uint32_t wt)
4734 {
4735     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4736     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4737     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4738 
4739 #if HOST_BIG_ENDIAN
4740     pwd->w[2] = pws->w[3];
4741     pwd->w[0] = pwt->w[3];
4742     pwd->w[3] = pws->w[1];
4743     pwd->w[1] = pwt->w[1];
4744 #else
4745     pwd->w[3] = pws->w[2];
4746     pwd->w[1] = pwt->w[2];
4747     pwd->w[2] = pws->w[0];
4748     pwd->w[0] = pwt->w[0];
4749 #endif
4750 }
4751 
4752 void helper_msa_pckev_d(CPUMIPSState *env,
4753                         uint32_t wd, uint32_t ws, uint32_t wt)
4754 {
4755     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4756     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4757     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4758 
4759     pwd->d[1] = pws->d[0];
4760     pwd->d[0] = pwt->d[0];
4761 }
4762 
4763 
4764 void helper_msa_pckod_b(CPUMIPSState *env,
4765                         uint32_t wd, uint32_t ws, uint32_t wt)
4766 {
4767     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4768     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4769     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4770 
4771 #if HOST_BIG_ENDIAN
4772     pwd->b[7]  = pwt->b[6];
4773     pwd->b[5]  = pwt->b[2];
4774     pwd->b[3]  = pwt->b[14];
4775     pwd->b[1]  = pwt->b[10];
4776     pwd->b[15] = pws->b[6];
4777     pwd->b[13] = pws->b[2];
4778     pwd->b[11] = pws->b[14];
4779     pwd->b[9]  = pws->b[10];
4780     pwd->b[6]  = pwt->b[4];
4781     pwd->b[2]  = pwt->b[12];
4782     pwd->b[14] = pws->b[4];
4783     pwd->b[10] = pws->b[12];
4784     pwd->b[4]  = pwt->b[0];
4785     pwd->b[12] = pws->b[0];
4786     pwd->b[0]  = pwt->b[8];
4787     pwd->b[8]  = pws->b[8];
4788 #else
4789     pwd->b[0]  = pwt->b[1];
4790     pwd->b[2]  = pwt->b[5];
4791     pwd->b[4]  = pwt->b[9];
4792     pwd->b[6]  = pwt->b[13];
4793     pwd->b[8]  = pws->b[1];
4794     pwd->b[10] = pws->b[5];
4795     pwd->b[12] = pws->b[9];
4796     pwd->b[14] = pws->b[13];
4797     pwd->b[1]  = pwt->b[3];
4798     pwd->b[5]  = pwt->b[11];
4799     pwd->b[9]  = pws->b[3];
4800     pwd->b[13] = pws->b[11];
4801     pwd->b[3]  = pwt->b[7];
4802     pwd->b[11] = pws->b[7];
4803     pwd->b[7]  = pwt->b[15];
4804     pwd->b[15] = pws->b[15];
4805 #endif
4806 
4807 }
4808 
4809 void helper_msa_pckod_h(CPUMIPSState *env,
4810                         uint32_t wd, uint32_t ws, uint32_t wt)
4811 {
4812     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4813     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4814     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4815 
4816 #if HOST_BIG_ENDIAN
4817     pwd->h[3] = pwt->h[2];
4818     pwd->h[1] = pwt->h[6];
4819     pwd->h[7] = pws->h[2];
4820     pwd->h[5] = pws->h[6];
4821     pwd->h[2] = pwt->h[0];
4822     pwd->h[6] = pws->h[0];
4823     pwd->h[0] = pwt->h[4];
4824     pwd->h[4] = pws->h[4];
4825 #else
4826     pwd->h[0] = pwt->h[1];
4827     pwd->h[2] = pwt->h[5];
4828     pwd->h[4] = pws->h[1];
4829     pwd->h[6] = pws->h[5];
4830     pwd->h[1] = pwt->h[3];
4831     pwd->h[5] = pws->h[3];
4832     pwd->h[3] = pwt->h[7];
4833     pwd->h[7] = pws->h[7];
4834 #endif
4835 }
4836 
4837 void helper_msa_pckod_w(CPUMIPSState *env,
4838                         uint32_t wd, uint32_t ws, uint32_t wt)
4839 {
4840     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4841     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4842     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4843 
4844 #if HOST_BIG_ENDIAN
4845     pwd->w[1] = pwt->w[0];
4846     pwd->w[3] = pws->w[0];
4847     pwd->w[0] = pwt->w[2];
4848     pwd->w[2] = pws->w[2];
4849 #else
4850     pwd->w[0] = pwt->w[1];
4851     pwd->w[2] = pws->w[1];
4852     pwd->w[1] = pwt->w[3];
4853     pwd->w[3] = pws->w[3];
4854 #endif
4855 }
4856 
4857 void helper_msa_pckod_d(CPUMIPSState *env,
4858                         uint32_t wd, uint32_t ws, uint32_t wt)
4859 {
4860     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4861     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4862     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4863 
4864     pwd->d[0] = pwt->d[1];
4865     pwd->d[1] = pws->d[1];
4866 }
4867 
4868 
4869 /*
4870  * Shift
4871  * -----
4872  *
4873  * +---------------+----------------------------------------------------------+
4874  * | SLL.B         | Vector Shift Left (byte)                                 |
4875  * | SLL.H         | Vector Shift Left (halfword)                             |
4876  * | SLL.W         | Vector Shift Left (word)                                 |
4877  * | SLL.D         | Vector Shift Left (doubleword)                           |
4878  * | SRA.B         | Vector Shift Right Arithmetic (byte)                     |
4879  * | SRA.H         | Vector Shift Right Arithmetic (halfword)                 |
4880  * | SRA.W         | Vector Shift Right Arithmetic (word)                     |
4881  * | SRA.D         | Vector Shift Right Arithmetic (doubleword)               |
4882  * | SRAR.B        | Vector Shift Right Arithmetic Rounded (byte)             |
4883  * | SRAR.H        | Vector Shift Right Arithmetic Rounded (halfword)         |
4884  * | SRAR.W        | Vector Shift Right Arithmetic Rounded (word)             |
4885  * | SRAR.D        | Vector Shift Right Arithmetic Rounded (doubleword)       |
4886  * | SRL.B         | Vector Shift Right Logical (byte)                        |
4887  * | SRL.H         | Vector Shift Right Logical (halfword)                    |
4888  * | SRL.W         | Vector Shift Right Logical (word)                        |
4889  * | SRL.D         | Vector Shift Right Logical (doubleword)                  |
4890  * | SRLR.B        | Vector Shift Right Logical Rounded (byte)                |
4891  * | SRLR.H        | Vector Shift Right Logical Rounded (halfword)            |
4892  * | SRLR.W        | Vector Shift Right Logical Rounded (word)                |
4893  * | SRLR.D        | Vector Shift Right Logical Rounded (doubleword)          |
4894  * +---------------+----------------------------------------------------------+
4895  */
4896 
4897 
4898 static inline int64_t msa_sll_df(uint32_t df, int64_t arg1, int64_t arg2)
4899 {
4900     int32_t b_arg2 = BIT_POSITION(arg2, df);
4901     return arg1 << b_arg2;
4902 }
4903 
4904 void helper_msa_sll_b(CPUMIPSState *env,
4905                       uint32_t wd, uint32_t ws, uint32_t wt)
4906 {
4907     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4908     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4909     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4910 
4911     pwd->b[0]  = msa_sll_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4912     pwd->b[1]  = msa_sll_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4913     pwd->b[2]  = msa_sll_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4914     pwd->b[3]  = msa_sll_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4915     pwd->b[4]  = msa_sll_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4916     pwd->b[5]  = msa_sll_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4917     pwd->b[6]  = msa_sll_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4918     pwd->b[7]  = msa_sll_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4919     pwd->b[8]  = msa_sll_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4920     pwd->b[9]  = msa_sll_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4921     pwd->b[10] = msa_sll_df(DF_BYTE, pws->b[10], pwt->b[10]);
4922     pwd->b[11] = msa_sll_df(DF_BYTE, pws->b[11], pwt->b[11]);
4923     pwd->b[12] = msa_sll_df(DF_BYTE, pws->b[12], pwt->b[12]);
4924     pwd->b[13] = msa_sll_df(DF_BYTE, pws->b[13], pwt->b[13]);
4925     pwd->b[14] = msa_sll_df(DF_BYTE, pws->b[14], pwt->b[14]);
4926     pwd->b[15] = msa_sll_df(DF_BYTE, pws->b[15], pwt->b[15]);
4927 }
4928 
4929 void helper_msa_sll_h(CPUMIPSState *env,
4930                       uint32_t wd, uint32_t ws, uint32_t wt)
4931 {
4932     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4933     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4934     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4935 
4936     pwd->h[0]  = msa_sll_df(DF_HALF, pws->h[0],  pwt->h[0]);
4937     pwd->h[1]  = msa_sll_df(DF_HALF, pws->h[1],  pwt->h[1]);
4938     pwd->h[2]  = msa_sll_df(DF_HALF, pws->h[2],  pwt->h[2]);
4939     pwd->h[3]  = msa_sll_df(DF_HALF, pws->h[3],  pwt->h[3]);
4940     pwd->h[4]  = msa_sll_df(DF_HALF, pws->h[4],  pwt->h[4]);
4941     pwd->h[5]  = msa_sll_df(DF_HALF, pws->h[5],  pwt->h[5]);
4942     pwd->h[6]  = msa_sll_df(DF_HALF, pws->h[6],  pwt->h[6]);
4943     pwd->h[7]  = msa_sll_df(DF_HALF, pws->h[7],  pwt->h[7]);
4944 }
4945 
4946 void helper_msa_sll_w(CPUMIPSState *env,
4947                       uint32_t wd, uint32_t ws, uint32_t wt)
4948 {
4949     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4950     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4951     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4952 
4953     pwd->w[0]  = msa_sll_df(DF_WORD, pws->w[0],  pwt->w[0]);
4954     pwd->w[1]  = msa_sll_df(DF_WORD, pws->w[1],  pwt->w[1]);
4955     pwd->w[2]  = msa_sll_df(DF_WORD, pws->w[2],  pwt->w[2]);
4956     pwd->w[3]  = msa_sll_df(DF_WORD, pws->w[3],  pwt->w[3]);
4957 }
4958 
4959 void helper_msa_sll_d(CPUMIPSState *env,
4960                       uint32_t wd, uint32_t ws, uint32_t wt)
4961 {
4962     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4963     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4964     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4965 
4966     pwd->d[0]  = msa_sll_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4967     pwd->d[1]  = msa_sll_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4968 }
4969 
4970 
4971 static inline int64_t msa_sra_df(uint32_t df, int64_t arg1, int64_t arg2)
4972 {
4973     int32_t b_arg2 = BIT_POSITION(arg2, df);
4974     return arg1 >> b_arg2;
4975 }
4976 
4977 void helper_msa_sra_b(CPUMIPSState *env,
4978                       uint32_t wd, uint32_t ws, uint32_t wt)
4979 {
4980     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4981     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4982     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4983 
4984     pwd->b[0]  = msa_sra_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4985     pwd->b[1]  = msa_sra_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4986     pwd->b[2]  = msa_sra_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4987     pwd->b[3]  = msa_sra_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4988     pwd->b[4]  = msa_sra_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4989     pwd->b[5]  = msa_sra_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4990     pwd->b[6]  = msa_sra_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4991     pwd->b[7]  = msa_sra_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4992     pwd->b[8]  = msa_sra_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4993     pwd->b[9]  = msa_sra_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4994     pwd->b[10] = msa_sra_df(DF_BYTE, pws->b[10], pwt->b[10]);
4995     pwd->b[11] = msa_sra_df(DF_BYTE, pws->b[11], pwt->b[11]);
4996     pwd->b[12] = msa_sra_df(DF_BYTE, pws->b[12], pwt->b[12]);
4997     pwd->b[13] = msa_sra_df(DF_BYTE, pws->b[13], pwt->b[13]);
4998     pwd->b[14] = msa_sra_df(DF_BYTE, pws->b[14], pwt->b[14]);
4999     pwd->b[15] = msa_sra_df(DF_BYTE, pws->b[15], pwt->b[15]);
5000 }
5001 
5002 void helper_msa_sra_h(CPUMIPSState *env,
5003                       uint32_t wd, uint32_t ws, uint32_t wt)
5004 {
5005     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5006     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5007     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5008 
5009     pwd->h[0]  = msa_sra_df(DF_HALF, pws->h[0],  pwt->h[0]);
5010     pwd->h[1]  = msa_sra_df(DF_HALF, pws->h[1],  pwt->h[1]);
5011     pwd->h[2]  = msa_sra_df(DF_HALF, pws->h[2],  pwt->h[2]);
5012     pwd->h[3]  = msa_sra_df(DF_HALF, pws->h[3],  pwt->h[3]);
5013     pwd->h[4]  = msa_sra_df(DF_HALF, pws->h[4],  pwt->h[4]);
5014     pwd->h[5]  = msa_sra_df(DF_HALF, pws->h[5],  pwt->h[5]);
5015     pwd->h[6]  = msa_sra_df(DF_HALF, pws->h[6],  pwt->h[6]);
5016     pwd->h[7]  = msa_sra_df(DF_HALF, pws->h[7],  pwt->h[7]);
5017 }
5018 
5019 void helper_msa_sra_w(CPUMIPSState *env,
5020                       uint32_t wd, uint32_t ws, uint32_t wt)
5021 {
5022     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5023     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5024     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5025 
5026     pwd->w[0]  = msa_sra_df(DF_WORD, pws->w[0],  pwt->w[0]);
5027     pwd->w[1]  = msa_sra_df(DF_WORD, pws->w[1],  pwt->w[1]);
5028     pwd->w[2]  = msa_sra_df(DF_WORD, pws->w[2],  pwt->w[2]);
5029     pwd->w[3]  = msa_sra_df(DF_WORD, pws->w[3],  pwt->w[3]);
5030 }
5031 
5032 void helper_msa_sra_d(CPUMIPSState *env,
5033                       uint32_t wd, uint32_t ws, uint32_t wt)
5034 {
5035     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5036     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5037     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5038 
5039     pwd->d[0]  = msa_sra_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5040     pwd->d[1]  = msa_sra_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5041 }
5042 
5043 
5044 static inline int64_t msa_srar_df(uint32_t df, int64_t arg1, int64_t arg2)
5045 {
5046     int32_t b_arg2 = BIT_POSITION(arg2, df);
5047     if (b_arg2 == 0) {
5048         return arg1;
5049     } else {
5050         int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
5051         return (arg1 >> b_arg2) + r_bit;
5052     }
5053 }
5054 
5055 void helper_msa_srar_b(CPUMIPSState *env,
5056                        uint32_t wd, uint32_t ws, uint32_t wt)
5057 {
5058     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5059     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5060     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5061 
5062     pwd->b[0]  = msa_srar_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5063     pwd->b[1]  = msa_srar_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5064     pwd->b[2]  = msa_srar_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5065     pwd->b[3]  = msa_srar_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5066     pwd->b[4]  = msa_srar_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5067     pwd->b[5]  = msa_srar_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5068     pwd->b[6]  = msa_srar_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5069     pwd->b[7]  = msa_srar_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5070     pwd->b[8]  = msa_srar_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5071     pwd->b[9]  = msa_srar_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5072     pwd->b[10] = msa_srar_df(DF_BYTE, pws->b[10], pwt->b[10]);
5073     pwd->b[11] = msa_srar_df(DF_BYTE, pws->b[11], pwt->b[11]);
5074     pwd->b[12] = msa_srar_df(DF_BYTE, pws->b[12], pwt->b[12]);
5075     pwd->b[13] = msa_srar_df(DF_BYTE, pws->b[13], pwt->b[13]);
5076     pwd->b[14] = msa_srar_df(DF_BYTE, pws->b[14], pwt->b[14]);
5077     pwd->b[15] = msa_srar_df(DF_BYTE, pws->b[15], pwt->b[15]);
5078 }
5079 
5080 void helper_msa_srar_h(CPUMIPSState *env,
5081                        uint32_t wd, uint32_t ws, uint32_t wt)
5082 {
5083     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5084     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5085     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5086 
5087     pwd->h[0]  = msa_srar_df(DF_HALF, pws->h[0],  pwt->h[0]);
5088     pwd->h[1]  = msa_srar_df(DF_HALF, pws->h[1],  pwt->h[1]);
5089     pwd->h[2]  = msa_srar_df(DF_HALF, pws->h[2],  pwt->h[2]);
5090     pwd->h[3]  = msa_srar_df(DF_HALF, pws->h[3],  pwt->h[3]);
5091     pwd->h[4]  = msa_srar_df(DF_HALF, pws->h[4],  pwt->h[4]);
5092     pwd->h[5]  = msa_srar_df(DF_HALF, pws->h[5],  pwt->h[5]);
5093     pwd->h[6]  = msa_srar_df(DF_HALF, pws->h[6],  pwt->h[6]);
5094     pwd->h[7]  = msa_srar_df(DF_HALF, pws->h[7],  pwt->h[7]);
5095 }
5096 
5097 void helper_msa_srar_w(CPUMIPSState *env,
5098                        uint32_t wd, uint32_t ws, uint32_t wt)
5099 {
5100     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5101     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5102     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5103 
5104     pwd->w[0]  = msa_srar_df(DF_WORD, pws->w[0],  pwt->w[0]);
5105     pwd->w[1]  = msa_srar_df(DF_WORD, pws->w[1],  pwt->w[1]);
5106     pwd->w[2]  = msa_srar_df(DF_WORD, pws->w[2],  pwt->w[2]);
5107     pwd->w[3]  = msa_srar_df(DF_WORD, pws->w[3],  pwt->w[3]);
5108 }
5109 
5110 void helper_msa_srar_d(CPUMIPSState *env,
5111                        uint32_t wd, uint32_t ws, uint32_t wt)
5112 {
5113     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5114     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5115     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5116 
5117     pwd->d[0]  = msa_srar_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5118     pwd->d[1]  = msa_srar_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5119 }
5120 
5121 
5122 static inline int64_t msa_srl_df(uint32_t df, int64_t arg1, int64_t arg2)
5123 {
5124     uint64_t u_arg1 = UNSIGNED(arg1, df);
5125     int32_t b_arg2 = BIT_POSITION(arg2, df);
5126     return u_arg1 >> b_arg2;
5127 }
5128 
5129 void helper_msa_srl_b(CPUMIPSState *env,
5130                       uint32_t wd, uint32_t ws, uint32_t wt)
5131 {
5132     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5133     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5134     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5135 
5136     pwd->b[0]  = msa_srl_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5137     pwd->b[1]  = msa_srl_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5138     pwd->b[2]  = msa_srl_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5139     pwd->b[3]  = msa_srl_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5140     pwd->b[4]  = msa_srl_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5141     pwd->b[5]  = msa_srl_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5142     pwd->b[6]  = msa_srl_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5143     pwd->b[7]  = msa_srl_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5144     pwd->b[8]  = msa_srl_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5145     pwd->b[9]  = msa_srl_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5146     pwd->b[10] = msa_srl_df(DF_BYTE, pws->b[10], pwt->b[10]);
5147     pwd->b[11] = msa_srl_df(DF_BYTE, pws->b[11], pwt->b[11]);
5148     pwd->b[12] = msa_srl_df(DF_BYTE, pws->b[12], pwt->b[12]);
5149     pwd->b[13] = msa_srl_df(DF_BYTE, pws->b[13], pwt->b[13]);
5150     pwd->b[14] = msa_srl_df(DF_BYTE, pws->b[14], pwt->b[14]);
5151     pwd->b[15] = msa_srl_df(DF_BYTE, pws->b[15], pwt->b[15]);
5152 }
5153 
5154 void helper_msa_srl_h(CPUMIPSState *env,
5155                       uint32_t wd, uint32_t ws, uint32_t wt)
5156 {
5157     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5158     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5159     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5160 
5161     pwd->h[0]  = msa_srl_df(DF_HALF, pws->h[0],  pwt->h[0]);
5162     pwd->h[1]  = msa_srl_df(DF_HALF, pws->h[1],  pwt->h[1]);
5163     pwd->h[2]  = msa_srl_df(DF_HALF, pws->h[2],  pwt->h[2]);
5164     pwd->h[3]  = msa_srl_df(DF_HALF, pws->h[3],  pwt->h[3]);
5165     pwd->h[4]  = msa_srl_df(DF_HALF, pws->h[4],  pwt->h[4]);
5166     pwd->h[5]  = msa_srl_df(DF_HALF, pws->h[5],  pwt->h[5]);
5167     pwd->h[6]  = msa_srl_df(DF_HALF, pws->h[6],  pwt->h[6]);
5168     pwd->h[7]  = msa_srl_df(DF_HALF, pws->h[7],  pwt->h[7]);
5169 }
5170 
5171 void helper_msa_srl_w(CPUMIPSState *env,
5172                       uint32_t wd, uint32_t ws, uint32_t wt)
5173 {
5174     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5175     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5176     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5177 
5178     pwd->w[0]  = msa_srl_df(DF_WORD, pws->w[0],  pwt->w[0]);
5179     pwd->w[1]  = msa_srl_df(DF_WORD, pws->w[1],  pwt->w[1]);
5180     pwd->w[2]  = msa_srl_df(DF_WORD, pws->w[2],  pwt->w[2]);
5181     pwd->w[3]  = msa_srl_df(DF_WORD, pws->w[3],  pwt->w[3]);
5182 }
5183 
5184 void helper_msa_srl_d(CPUMIPSState *env,
5185                       uint32_t wd, uint32_t ws, uint32_t wt)
5186 {
5187     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5188     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5189     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5190 
5191     pwd->d[0]  = msa_srl_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5192     pwd->d[1]  = msa_srl_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5193 }
5194 
5195 
5196 static inline int64_t msa_srlr_df(uint32_t df, int64_t arg1, int64_t arg2)
5197 {
5198     uint64_t u_arg1 = UNSIGNED(arg1, df);
5199     int32_t b_arg2 = BIT_POSITION(arg2, df);
5200     if (b_arg2 == 0) {
5201         return u_arg1;
5202     } else {
5203         uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
5204         return (u_arg1 >> b_arg2) + r_bit;
5205     }
5206 }
5207 
5208 void helper_msa_srlr_b(CPUMIPSState *env,
5209                        uint32_t wd, uint32_t ws, uint32_t wt)
5210 {
5211     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5212     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5213     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5214 
5215     pwd->b[0]  = msa_srlr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5216     pwd->b[1]  = msa_srlr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5217     pwd->b[2]  = msa_srlr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5218     pwd->b[3]  = msa_srlr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5219     pwd->b[4]  = msa_srlr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5220     pwd->b[5]  = msa_srlr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5221     pwd->b[6]  = msa_srlr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5222     pwd->b[7]  = msa_srlr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5223     pwd->b[8]  = msa_srlr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5224     pwd->b[9]  = msa_srlr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5225     pwd->b[10] = msa_srlr_df(DF_BYTE, pws->b[10], pwt->b[10]);
5226     pwd->b[11] = msa_srlr_df(DF_BYTE, pws->b[11], pwt->b[11]);
5227     pwd->b[12] = msa_srlr_df(DF_BYTE, pws->b[12], pwt->b[12]);
5228     pwd->b[13] = msa_srlr_df(DF_BYTE, pws->b[13], pwt->b[13]);
5229     pwd->b[14] = msa_srlr_df(DF_BYTE, pws->b[14], pwt->b[14]);
5230     pwd->b[15] = msa_srlr_df(DF_BYTE, pws->b[15], pwt->b[15]);
5231 }
5232 
5233 void helper_msa_srlr_h(CPUMIPSState *env,
5234                        uint32_t wd, uint32_t ws, uint32_t wt)
5235 {
5236     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5237     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5238     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5239 
5240     pwd->h[0]  = msa_srlr_df(DF_HALF, pws->h[0],  pwt->h[0]);
5241     pwd->h[1]  = msa_srlr_df(DF_HALF, pws->h[1],  pwt->h[1]);
5242     pwd->h[2]  = msa_srlr_df(DF_HALF, pws->h[2],  pwt->h[2]);
5243     pwd->h[3]  = msa_srlr_df(DF_HALF, pws->h[3],  pwt->h[3]);
5244     pwd->h[4]  = msa_srlr_df(DF_HALF, pws->h[4],  pwt->h[4]);
5245     pwd->h[5]  = msa_srlr_df(DF_HALF, pws->h[5],  pwt->h[5]);
5246     pwd->h[6]  = msa_srlr_df(DF_HALF, pws->h[6],  pwt->h[6]);
5247     pwd->h[7]  = msa_srlr_df(DF_HALF, pws->h[7],  pwt->h[7]);
5248 }
5249 
5250 void helper_msa_srlr_w(CPUMIPSState *env,
5251                        uint32_t wd, uint32_t ws, uint32_t wt)
5252 {
5253     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5254     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5255     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5256 
5257     pwd->w[0]  = msa_srlr_df(DF_WORD, pws->w[0],  pwt->w[0]);
5258     pwd->w[1]  = msa_srlr_df(DF_WORD, pws->w[1],  pwt->w[1]);
5259     pwd->w[2]  = msa_srlr_df(DF_WORD, pws->w[2],  pwt->w[2]);
5260     pwd->w[3]  = msa_srlr_df(DF_WORD, pws->w[3],  pwt->w[3]);
5261 }
5262 
5263 void helper_msa_srlr_d(CPUMIPSState *env,
5264                        uint32_t wd, uint32_t ws, uint32_t wt)
5265 {
5266     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5267     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5268     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5269 
5270     pwd->d[0]  = msa_srlr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5271     pwd->d[1]  = msa_srlr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5272 }
5273 
5274 
5275 #define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
5276 void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
5277         uint32_t i8)                                                    \
5278 {                                                                       \
5279     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5280     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5281     uint32_t i;                                                         \
5282     for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
5283         DEST = OPERATION;                                               \
5284     }                                                                   \
5285 }
5286 
5287 MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
5288 MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
5289 MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
5290 MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
5291 
5292 #define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
5293             UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
5294 MSA_FN_IMM8(bmnzi_b, pwd->b[i],
5295         BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5296 
5297 #define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
5298             UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
5299 MSA_FN_IMM8(bmzi_b, pwd->b[i],
5300         BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5301 
5302 #define BIT_SELECT(dest, arg1, arg2, df) \
5303             UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
5304 MSA_FN_IMM8(bseli_b, pwd->b[i],
5305         BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
5306 
5307 #undef BIT_SELECT
5308 #undef BIT_MOVE_IF_ZERO
5309 #undef BIT_MOVE_IF_NOT_ZERO
5310 #undef MSA_FN_IMM8
5311 
5312 #define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
5313 
5314 void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5315                        uint32_t ws, uint32_t imm)
5316 {
5317     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5318     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5319     wr_t wx, *pwx = &wx;
5320     uint32_t i;
5321 
5322     switch (df) {
5323     case DF_BYTE:
5324         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5325             pwx->b[i] = pws->b[SHF_POS(i, imm)];
5326         }
5327         break;
5328     case DF_HALF:
5329         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5330             pwx->h[i] = pws->h[SHF_POS(i, imm)];
5331         }
5332         break;
5333     case DF_WORD:
5334         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5335             pwx->w[i] = pws->w[SHF_POS(i, imm)];
5336         }
5337         break;
5338     default:
5339         g_assert_not_reached();
5340     }
5341     msa_move_v(pwd, pwx);
5342 }
5343 
5344 #define MSA_BINOP_IMM_DF(helper, func)                                  \
5345 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5346                         uint32_t wd, uint32_t ws, int32_t u5)           \
5347 {                                                                       \
5348     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5349     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5350     uint32_t i;                                                         \
5351                                                                         \
5352     switch (df) {                                                       \
5353     case DF_BYTE:                                                       \
5354         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5355             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5356         }                                                               \
5357         break;                                                          \
5358     case DF_HALF:                                                       \
5359         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5360             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5361         }                                                               \
5362         break;                                                          \
5363     case DF_WORD:                                                       \
5364         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5365             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5366         }                                                               \
5367         break;                                                          \
5368     case DF_DOUBLE:                                                     \
5369         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5370             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5371         }                                                               \
5372         break;                                                          \
5373     default:                                                            \
5374         g_assert_not_reached();                                         \
5375     }                                                                   \
5376 }
5377 
5378 MSA_BINOP_IMM_DF(addvi, addv)
5379 MSA_BINOP_IMM_DF(subvi, subv)
5380 MSA_BINOP_IMM_DF(ceqi, ceq)
5381 MSA_BINOP_IMM_DF(clei_s, cle_s)
5382 MSA_BINOP_IMM_DF(clei_u, cle_u)
5383 MSA_BINOP_IMM_DF(clti_s, clt_s)
5384 MSA_BINOP_IMM_DF(clti_u, clt_u)
5385 MSA_BINOP_IMM_DF(maxi_s, max_s)
5386 MSA_BINOP_IMM_DF(maxi_u, max_u)
5387 MSA_BINOP_IMM_DF(mini_s, min_s)
5388 MSA_BINOP_IMM_DF(mini_u, min_u)
5389 #undef MSA_BINOP_IMM_DF
5390 
5391 void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5392                        int32_t s10)
5393 {
5394     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5395     uint32_t i;
5396 
5397     switch (df) {
5398     case DF_BYTE:
5399         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5400             pwd->b[i] = (int8_t)s10;
5401         }
5402         break;
5403     case DF_HALF:
5404         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5405             pwd->h[i] = (int16_t)s10;
5406         }
5407         break;
5408     case DF_WORD:
5409         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5410             pwd->w[i] = (int32_t)s10;
5411         }
5412         break;
5413     case DF_DOUBLE:
5414         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5415             pwd->d[i] = (int64_t)s10;
5416         }
5417        break;
5418     default:
5419         g_assert_not_reached();
5420     }
5421 }
5422 
5423 static inline int64_t msa_sat_s_df(uint32_t df, int64_t arg, uint32_t m)
5424 {
5425     return arg < M_MIN_INT(m + 1) ? M_MIN_INT(m + 1) :
5426                                     arg > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) :
5427                                                              arg;
5428 }
5429 
5430 static inline int64_t msa_sat_u_df(uint32_t df, int64_t arg, uint32_t m)
5431 {
5432     uint64_t u_arg = UNSIGNED(arg, df);
5433     return  u_arg < M_MAX_UINT(m + 1) ? u_arg :
5434                                         M_MAX_UINT(m + 1);
5435 }
5436 
5437 #define MSA_BINOP_IMMU_DF(helper, func)                                  \
5438 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5439                        uint32_t ws, uint32_t u5)                        \
5440 {                                                                       \
5441     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5442     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5443     uint32_t i;                                                         \
5444                                                                         \
5445     switch (df) {                                                       \
5446     case DF_BYTE:                                                       \
5447         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5448             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5449         }                                                               \
5450         break;                                                          \
5451     case DF_HALF:                                                       \
5452         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5453             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5454         }                                                               \
5455         break;                                                          \
5456     case DF_WORD:                                                       \
5457         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5458             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5459         }                                                               \
5460         break;                                                          \
5461     case DF_DOUBLE:                                                     \
5462         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5463             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5464         }                                                               \
5465         break;                                                          \
5466     default:                                                            \
5467         g_assert_not_reached();                                         \
5468     }                                                                   \
5469 }
5470 
5471 MSA_BINOP_IMMU_DF(slli, sll)
5472 MSA_BINOP_IMMU_DF(srai, sra)
5473 MSA_BINOP_IMMU_DF(srli, srl)
5474 MSA_BINOP_IMMU_DF(bclri, bclr)
5475 MSA_BINOP_IMMU_DF(bseti, bset)
5476 MSA_BINOP_IMMU_DF(bnegi, bneg)
5477 MSA_BINOP_IMMU_DF(sat_s, sat_s)
5478 MSA_BINOP_IMMU_DF(sat_u, sat_u)
5479 MSA_BINOP_IMMU_DF(srari, srar)
5480 MSA_BINOP_IMMU_DF(srlri, srlr)
5481 #undef MSA_BINOP_IMMU_DF
5482 
5483 #define MSA_TEROP_IMMU_DF(helper, func)                                  \
5484 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5485                                   uint32_t wd, uint32_t ws, uint32_t u5) \
5486 {                                                                       \
5487     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5488     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5489     uint32_t i;                                                         \
5490                                                                         \
5491     switch (df) {                                                       \
5492     case DF_BYTE:                                                       \
5493         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5494             pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
5495                                             u5);                        \
5496         }                                                               \
5497         break;                                                          \
5498     case DF_HALF:                                                       \
5499         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5500             pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
5501                                             u5);                        \
5502         }                                                               \
5503         break;                                                          \
5504     case DF_WORD:                                                       \
5505         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5506             pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
5507                                             u5);                        \
5508         }                                                               \
5509         break;                                                          \
5510     case DF_DOUBLE:                                                     \
5511         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5512             pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
5513                                             u5);                        \
5514         }                                                               \
5515         break;                                                          \
5516     default:                                                            \
5517         g_assert_not_reached();                                         \
5518     }                                                                   \
5519 }
5520 
5521 MSA_TEROP_IMMU_DF(binsli, binsl)
5522 MSA_TEROP_IMMU_DF(binsri, binsr)
5523 #undef MSA_TEROP_IMMU_DF
5524 
5525 #define CONCATENATE_AND_SLIDE(s, k)             \
5526     do {                                        \
5527         for (i = 0; i < s; i++) {               \
5528             v[i]     = pws->b[s * k + i];       \
5529             v[i + s] = pwd->b[s * k + i];       \
5530         }                                       \
5531         for (i = 0; i < s; i++) {               \
5532             pwd->b[s * k + i] = v[i + n];       \
5533         }                                       \
5534     } while (0)
5535 
5536 static inline void msa_sld_df(uint32_t df, wr_t *pwd,
5537                               wr_t *pws, target_ulong rt)
5538 {
5539     uint32_t n = rt % DF_ELEMENTS(df);
5540     uint8_t v[64];
5541     uint32_t i, k;
5542 
5543     switch (df) {
5544     case DF_BYTE:
5545         CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_BYTE), 0);
5546         break;
5547     case DF_HALF:
5548         for (k = 0; k < 2; k++) {
5549             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_HALF), k);
5550         }
5551         break;
5552     case DF_WORD:
5553         for (k = 0; k < 4; k++) {
5554             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_WORD), k);
5555         }
5556         break;
5557     case DF_DOUBLE:
5558         for (k = 0; k < 8; k++) {
5559             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_DOUBLE), k);
5560         }
5561         break;
5562     default:
5563         g_assert_not_reached();
5564     }
5565 }
5566 
5567 static inline int64_t msa_mul_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5568 {
5569     int64_t q_min = DF_MIN_INT(df);
5570     int64_t q_max = DF_MAX_INT(df);
5571 
5572     if (arg1 == q_min && arg2 == q_min) {
5573         return q_max;
5574     }
5575     return (arg1 * arg2) >> (DF_BITS(df) - 1);
5576 }
5577 
5578 static inline int64_t msa_mulr_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5579 {
5580     int64_t q_min = DF_MIN_INT(df);
5581     int64_t q_max = DF_MAX_INT(df);
5582     int64_t r_bit = 1LL << (DF_BITS(df) - 2);
5583 
5584     if (arg1 == q_min && arg2 == q_min) {
5585         return q_max;
5586     }
5587     return (arg1 * arg2 + r_bit) >> (DF_BITS(df) - 1);
5588 }
5589 
5590 #define MSA_BINOP_DF(func) \
5591 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
5592                                 uint32_t wd, uint32_t ws, uint32_t wt)  \
5593 {                                                                       \
5594     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5595     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5596     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
5597                                                                         \
5598     switch (df) {                                                       \
5599     case DF_BYTE:                                                       \
5600         pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0],  pwt->b[0]);    \
5601         pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1],  pwt->b[1]);    \
5602         pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2],  pwt->b[2]);    \
5603         pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3],  pwt->b[3]);    \
5604         pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4],  pwt->b[4]);    \
5605         pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5],  pwt->b[5]);    \
5606         pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6],  pwt->b[6]);    \
5607         pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7],  pwt->b[7]);    \
5608         pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8],  pwt->b[8]);    \
5609         pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9],  pwt->b[9]);    \
5610         pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]);   \
5611         pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]);   \
5612         pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]);   \
5613         pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]);   \
5614         pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]);   \
5615         pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]);   \
5616         break;                                                          \
5617     case DF_HALF:                                                       \
5618         pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]);      \
5619         pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]);      \
5620         pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]);      \
5621         pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]);      \
5622         pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]);      \
5623         pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]);      \
5624         pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]);      \
5625         pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]);      \
5626         break;                                                          \
5627     case DF_WORD:                                                       \
5628         pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]);      \
5629         pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]);      \
5630         pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]);      \
5631         pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]);      \
5632         break;                                                          \
5633     case DF_DOUBLE:                                                     \
5634         pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]);      \
5635         pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]);      \
5636         break;                                                          \
5637     default:                                                            \
5638         g_assert_not_reached();                                         \
5639     }                                                                   \
5640 }
5641 
5642 MSA_BINOP_DF(mul_q)
5643 MSA_BINOP_DF(mulr_q)
5644 #undef MSA_BINOP_DF
5645 
5646 void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5647                        uint32_t ws, uint32_t rt)
5648 {
5649     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5650     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5651 
5652     msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]);
5653 }
5654 
5655 static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1,
5656                                     int64_t arg2)
5657 {
5658     int64_t q_prod, q_ret;
5659 
5660     int64_t q_max = DF_MAX_INT(df);
5661     int64_t q_min = DF_MIN_INT(df);
5662 
5663     q_prod = arg1 * arg2;
5664     q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod) >> (DF_BITS(df) - 1);
5665 
5666     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5667 }
5668 
5669 static inline int64_t msa_msub_q_df(uint32_t df, int64_t dest, int64_t arg1,
5670                                     int64_t arg2)
5671 {
5672     int64_t q_prod, q_ret;
5673 
5674     int64_t q_max = DF_MAX_INT(df);
5675     int64_t q_min = DF_MIN_INT(df);
5676 
5677     q_prod = arg1 * arg2;
5678     q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod) >> (DF_BITS(df) - 1);
5679 
5680     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5681 }
5682 
5683 static inline int64_t msa_maddr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5684                                      int64_t arg2)
5685 {
5686     int64_t q_prod, q_ret;
5687 
5688     int64_t q_max = DF_MAX_INT(df);
5689     int64_t q_min = DF_MIN_INT(df);
5690     int64_t r_bit = 1LL << (DF_BITS(df) - 2);
5691 
5692     q_prod = arg1 * arg2;
5693     q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod + r_bit) >> (DF_BITS(df) - 1);
5694 
5695     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5696 }
5697 
5698 static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5699                                      int64_t arg2)
5700 {
5701     int64_t q_prod, q_ret;
5702 
5703     int64_t q_max = DF_MAX_INT(df);
5704     int64_t q_min = DF_MIN_INT(df);
5705     int64_t r_bit = 1LL << (DF_BITS(df) - 2);
5706 
5707     q_prod = arg1 * arg2;
5708     q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod + r_bit) >> (DF_BITS(df) - 1);
5709 
5710     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5711 }
5712 
5713 #define MSA_TEROP_DF(func) \
5714 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
5715                                 uint32_t ws, uint32_t wt)                     \
5716 {                                                                             \
5717     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                                \
5718     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                                \
5719     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                                \
5720                                                                               \
5721     switch (df) {                                                             \
5722     case DF_BYTE:                                                             \
5723         pwd->b[0]  = msa_ ## func ## _df(df, pwd->b[0],  pws->b[0],           \
5724                                              pwt->b[0]);                      \
5725         pwd->b[1]  = msa_ ## func ## _df(df, pwd->b[1],  pws->b[1],           \
5726                                              pwt->b[1]);                      \
5727         pwd->b[2]  = msa_ ## func ## _df(df, pwd->b[2],  pws->b[2],           \
5728                                              pwt->b[2]);                      \
5729         pwd->b[3]  = msa_ ## func ## _df(df, pwd->b[3],  pws->b[3],           \
5730                                              pwt->b[3]);                      \
5731         pwd->b[4]  = msa_ ## func ## _df(df, pwd->b[4],  pws->b[4],           \
5732                                              pwt->b[4]);                      \
5733         pwd->b[5]  = msa_ ## func ## _df(df, pwd->b[5],  pws->b[5],           \
5734                                              pwt->b[5]);                      \
5735         pwd->b[6]  = msa_ ## func ## _df(df, pwd->b[6],  pws->b[6],           \
5736                                              pwt->b[6]);                      \
5737         pwd->b[7]  = msa_ ## func ## _df(df, pwd->b[7],  pws->b[7],           \
5738                                              pwt->b[7]);                      \
5739         pwd->b[8]  = msa_ ## func ## _df(df, pwd->b[8],  pws->b[8],           \
5740                                              pwt->b[8]);                      \
5741         pwd->b[9]  = msa_ ## func ## _df(df, pwd->b[9],  pws->b[9],           \
5742                                              pwt->b[9]);                      \
5743         pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10],          \
5744                                              pwt->b[10]);                     \
5745         pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11],          \
5746                                              pwt->b[11]);                     \
5747         pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12],          \
5748                                              pwt->b[12]);                     \
5749         pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13],          \
5750                                              pwt->b[13]);                     \
5751         pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14],          \
5752                                              pwt->b[14]);                     \
5753         pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15],          \
5754                                              pwt->b[15]);                     \
5755         break;                                                                \
5756     case DF_HALF:                                                             \
5757         pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
5758         pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
5759         pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
5760         pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
5761         pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
5762         pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
5763         pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
5764         pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
5765         break;                                                                \
5766     case DF_WORD:                                                             \
5767         pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
5768         pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
5769         pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
5770         pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
5771         break;                                                                \
5772     case DF_DOUBLE:                                                           \
5773         pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
5774         pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
5775         break;                                                                \
5776     default:                                                                  \
5777         g_assert_not_reached();                                               \
5778     }                                                                         \
5779 }
5780 
5781 MSA_TEROP_DF(binsl)
5782 MSA_TEROP_DF(binsr)
5783 MSA_TEROP_DF(madd_q)
5784 MSA_TEROP_DF(msub_q)
5785 MSA_TEROP_DF(maddr_q)
5786 MSA_TEROP_DF(msubr_q)
5787 #undef MSA_TEROP_DF
5788 
5789 static inline void msa_splat_df(uint32_t df, wr_t *pwd,
5790                                 wr_t *pws, target_ulong rt)
5791 {
5792     uint32_t n = rt % DF_ELEMENTS(df);
5793     uint32_t i;
5794 
5795     switch (df) {
5796     case DF_BYTE:
5797         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5798             pwd->b[i] = pws->b[n];
5799         }
5800         break;
5801     case DF_HALF:
5802         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5803             pwd->h[i] = pws->h[n];
5804         }
5805         break;
5806     case DF_WORD:
5807         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5808             pwd->w[i] = pws->w[n];
5809         }
5810         break;
5811     case DF_DOUBLE:
5812         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5813             pwd->d[i] = pws->d[n];
5814         }
5815        break;
5816     default:
5817         g_assert_not_reached();
5818     }
5819 }
5820 
5821 void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5822                          uint32_t ws, uint32_t rt)
5823 {
5824     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5825     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5826 
5827     msa_splat_df(df, pwd, pws, env->active_tc.gpr[rt]);
5828 }
5829 
5830 #define MSA_DO_B MSA_DO(b)
5831 #define MSA_DO_H MSA_DO(h)
5832 #define MSA_DO_W MSA_DO(w)
5833 #define MSA_DO_D MSA_DO(d)
5834 
5835 #define MSA_LOOP_B MSA_LOOP(B)
5836 #define MSA_LOOP_H MSA_LOOP(H)
5837 #define MSA_LOOP_W MSA_LOOP(W)
5838 #define MSA_LOOP_D MSA_LOOP(D)
5839 
5840 #define MSA_LOOP_COND_B MSA_LOOP_COND(DF_BYTE)
5841 #define MSA_LOOP_COND_H MSA_LOOP_COND(DF_HALF)
5842 #define MSA_LOOP_COND_W MSA_LOOP_COND(DF_WORD)
5843 #define MSA_LOOP_COND_D MSA_LOOP_COND(DF_DOUBLE)
5844 
5845 #define MSA_LOOP(DF) \
5846     do { \
5847         for (i = 0; i < (MSA_LOOP_COND_ ## DF) ; i++) { \
5848             MSA_DO_ ## DF; \
5849         } \
5850     } while (0)
5851 
5852 #define MSA_FN_DF(FUNC)                                             \
5853 void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5854         uint32_t ws, uint32_t wt)                                   \
5855 {                                                                   \
5856     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                      \
5857     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                      \
5858     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                      \
5859     wr_t wx, *pwx = &wx;                                            \
5860     uint32_t i;                                                     \
5861     switch (df) {                                                   \
5862     case DF_BYTE:                                                   \
5863         MSA_LOOP_B;                                                 \
5864         break;                                                      \
5865     case DF_HALF:                                                   \
5866         MSA_LOOP_H;                                                 \
5867         break;                                                      \
5868     case DF_WORD:                                                   \
5869         MSA_LOOP_W;                                                 \
5870         break;                                                      \
5871     case DF_DOUBLE:                                                 \
5872         MSA_LOOP_D;                                                 \
5873         break;                                                      \
5874     default:                                                        \
5875         g_assert_not_reached();                                     \
5876     }                                                               \
5877     msa_move_v(pwd, pwx);                                           \
5878 }
5879 
5880 #define MSA_LOOP_COND(DF) \
5881             (DF_ELEMENTS(DF) / 2)
5882 
5883 #define Rb(pwr, i) (pwr->b[i])
5884 #define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE) / 2])
5885 #define Rh(pwr, i) (pwr->h[i])
5886 #define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF) / 2])
5887 #define Rw(pwr, i) (pwr->w[i])
5888 #define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD) / 2])
5889 #define Rd(pwr, i) (pwr->d[i])
5890 #define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE) / 2])
5891 
5892 #undef MSA_LOOP_COND
5893 
5894 #define MSA_LOOP_COND(DF) \
5895             (DF_ELEMENTS(DF))
5896 
5897 #define MSA_DO(DF)                                                          \
5898     do {                                                                    \
5899         uint32_t n = DF_ELEMENTS(df);                                       \
5900         uint32_t k = (pwd->DF[i] & 0x3f) % (2 * n);                         \
5901         pwx->DF[i] =                                                        \
5902             (pwd->DF[i] & 0xc0) ? 0 : k < n ? pwt->DF[k] : pws->DF[k - n];  \
5903     } while (0)
5904 MSA_FN_DF(vshf_df)
5905 #undef MSA_DO
5906 #undef MSA_LOOP_COND
5907 #undef MSA_FN_DF
5908 
5909 
5910 void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5911                         uint32_t ws, uint32_t n)
5912 {
5913     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5914     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5915 
5916     msa_sld_df(df, pwd, pws, n);
5917 }
5918 
5919 void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5920                           uint32_t ws, uint32_t n)
5921 {
5922     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5923     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5924 
5925     msa_splat_df(df, pwd, pws, n);
5926 }
5927 
5928 void helper_msa_copy_s_b(CPUMIPSState *env, uint32_t rd,
5929                          uint32_t ws, uint32_t n)
5930 {
5931     n %= 16;
5932 #if HOST_BIG_ENDIAN
5933     if (n < 8) {
5934         n = 8 - n - 1;
5935     } else {
5936         n = 24 - n - 1;
5937     }
5938 #endif
5939     env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
5940 }
5941 
5942 void helper_msa_copy_s_h(CPUMIPSState *env, uint32_t rd,
5943                          uint32_t ws, uint32_t n)
5944 {
5945     n %= 8;
5946 #if HOST_BIG_ENDIAN
5947     if (n < 4) {
5948         n = 4 - n - 1;
5949     } else {
5950         n = 12 - n - 1;
5951     }
5952 #endif
5953     env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
5954 }
5955 
5956 void helper_msa_copy_s_w(CPUMIPSState *env, uint32_t rd,
5957                          uint32_t ws, uint32_t n)
5958 {
5959     n %= 4;
5960 #if HOST_BIG_ENDIAN
5961     if (n < 2) {
5962         n = 2 - n - 1;
5963     } else {
5964         n = 6 - n - 1;
5965     }
5966 #endif
5967     env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
5968 }
5969 
5970 void helper_msa_copy_s_d(CPUMIPSState *env, uint32_t rd,
5971                          uint32_t ws, uint32_t n)
5972 {
5973     n %= 2;
5974     env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
5975 }
5976 
5977 void helper_msa_copy_u_b(CPUMIPSState *env, uint32_t rd,
5978                          uint32_t ws, uint32_t n)
5979 {
5980     n %= 16;
5981 #if HOST_BIG_ENDIAN
5982     if (n < 8) {
5983         n = 8 - n - 1;
5984     } else {
5985         n = 24 - n - 1;
5986     }
5987 #endif
5988     env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
5989 }
5990 
5991 void helper_msa_copy_u_h(CPUMIPSState *env, uint32_t rd,
5992                          uint32_t ws, uint32_t n)
5993 {
5994     n %= 8;
5995 #if HOST_BIG_ENDIAN
5996     if (n < 4) {
5997         n = 4 - n - 1;
5998     } else {
5999         n = 12 - n - 1;
6000     }
6001 #endif
6002     env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
6003 }
6004 
6005 void helper_msa_copy_u_w(CPUMIPSState *env, uint32_t rd,
6006                          uint32_t ws, uint32_t n)
6007 {
6008     n %= 4;
6009 #if HOST_BIG_ENDIAN
6010     if (n < 2) {
6011         n = 2 - n - 1;
6012     } else {
6013         n = 6 - n - 1;
6014     }
6015 #endif
6016     env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
6017 }
6018 
6019 void helper_msa_insert_b(CPUMIPSState *env, uint32_t wd,
6020                           uint32_t rs_num, uint32_t n)
6021 {
6022     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6023     target_ulong rs = env->active_tc.gpr[rs_num];
6024     n %= 16;
6025 #if HOST_BIG_ENDIAN
6026     if (n < 8) {
6027         n = 8 - n - 1;
6028     } else {
6029         n = 24 - n - 1;
6030     }
6031 #endif
6032     pwd->b[n] = (int8_t)rs;
6033 }
6034 
6035 void helper_msa_insert_h(CPUMIPSState *env, uint32_t wd,
6036                           uint32_t rs_num, uint32_t n)
6037 {
6038     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6039     target_ulong rs = env->active_tc.gpr[rs_num];
6040     n %= 8;
6041 #if HOST_BIG_ENDIAN
6042     if (n < 4) {
6043         n = 4 - n - 1;
6044     } else {
6045         n = 12 - n - 1;
6046     }
6047 #endif
6048     pwd->h[n] = (int16_t)rs;
6049 }
6050 
6051 void helper_msa_insert_w(CPUMIPSState *env, uint32_t wd,
6052                           uint32_t rs_num, uint32_t n)
6053 {
6054     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6055     target_ulong rs = env->active_tc.gpr[rs_num];
6056     n %= 4;
6057 #if HOST_BIG_ENDIAN
6058     if (n < 2) {
6059         n = 2 - n - 1;
6060     } else {
6061         n = 6 - n - 1;
6062     }
6063 #endif
6064     pwd->w[n] = (int32_t)rs;
6065 }
6066 
6067 void helper_msa_insert_d(CPUMIPSState *env, uint32_t wd,
6068                           uint32_t rs_num, uint32_t n)
6069 {
6070     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6071     target_ulong rs = env->active_tc.gpr[rs_num];
6072     n %= 2;
6073     pwd->d[n] = (int64_t)rs;
6074 }
6075 
6076 void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6077                          uint32_t ws, uint32_t n)
6078 {
6079     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6080     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6081 
6082     switch (df) {
6083     case DF_BYTE:
6084         pwd->b[n] = (int8_t)pws->b[0];
6085         break;
6086     case DF_HALF:
6087         pwd->h[n] = (int16_t)pws->h[0];
6088         break;
6089     case DF_WORD:
6090         pwd->w[n] = (int32_t)pws->w[0];
6091         break;
6092     case DF_DOUBLE:
6093         pwd->d[n] = (int64_t)pws->d[0];
6094         break;
6095     default:
6096         g_assert_not_reached();
6097     }
6098 }
6099 
6100 void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
6101 {
6102     switch (cd) {
6103     case 0:
6104         break;
6105     case 1:
6106         env->active_tc.msacsr = (int32_t)elm & MSACSR_MASK;
6107         restore_msa_fp_status(env);
6108         /* check exception */
6109         if ((GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)
6110             & GET_FP_CAUSE(env->active_tc.msacsr)) {
6111             do_raise_exception(env, EXCP_MSAFPE, GETPC());
6112         }
6113         break;
6114     }
6115 }
6116 
6117 target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
6118 {
6119     switch (cs) {
6120     case 0:
6121         return env->msair;
6122     case 1:
6123         return env->active_tc.msacsr & MSACSR_MASK;
6124     }
6125     return 0;
6126 }
6127 
6128 void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6129                         uint32_t rs)
6130 {
6131     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6132     uint32_t i;
6133 
6134     switch (df) {
6135     case DF_BYTE:
6136         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
6137             pwd->b[i] = (int8_t)env->active_tc.gpr[rs];
6138         }
6139         break;
6140     case DF_HALF:
6141         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
6142             pwd->h[i] = (int16_t)env->active_tc.gpr[rs];
6143         }
6144         break;
6145     case DF_WORD:
6146         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6147             pwd->w[i] = (int32_t)env->active_tc.gpr[rs];
6148         }
6149         break;
6150     case DF_DOUBLE:
6151         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6152             pwd->d[i] = (int64_t)env->active_tc.gpr[rs];
6153         }
6154        break;
6155     default:
6156         g_assert_not_reached();
6157     }
6158 }
6159 
6160 
6161 #define FLOAT_ONE32 make_float32(0x3f8 << 20)
6162 #define FLOAT_ONE64 make_float64(0x3ffULL << 52)
6163 
6164 #define FLOAT_SNAN16(s) (float16_default_nan(s) ^ 0x0220)
6165         /* 0x7c20 */
6166 #define FLOAT_SNAN32(s) (float32_default_nan(s) ^ 0x00400020)
6167         /* 0x7f800020 */
6168 #define FLOAT_SNAN64(s) (float64_default_nan(s) ^ 0x0008000000000020ULL)
6169         /* 0x7ff0000000000020 */
6170 
6171 static inline void clear_msacsr_cause(CPUMIPSState *env)
6172 {
6173     SET_FP_CAUSE(env->active_tc.msacsr, 0);
6174 }
6175 
6176 static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr)
6177 {
6178     if ((GET_FP_CAUSE(env->active_tc.msacsr) &
6179             (GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)) == 0) {
6180         UPDATE_FP_FLAGS(env->active_tc.msacsr,
6181                 GET_FP_CAUSE(env->active_tc.msacsr));
6182     } else {
6183         do_raise_exception(env, EXCP_MSAFPE, retaddr);
6184     }
6185 }
6186 
6187 /* Flush-to-zero use cases for update_msacsr() */
6188 #define CLEAR_FS_UNDERFLOW 1
6189 #define CLEAR_IS_INEXACT   2
6190 #define RECIPROCAL_INEXACT 4
6191 
6192 
6193 static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt)
6194 {
6195     int mips_xcpt = 0;
6196 
6197     if (ieee_xcpt & float_flag_invalid) {
6198         mips_xcpt |= FP_INVALID;
6199     }
6200     if (ieee_xcpt & float_flag_overflow) {
6201         mips_xcpt |= FP_OVERFLOW;
6202     }
6203     if (ieee_xcpt & float_flag_underflow) {
6204         mips_xcpt |= FP_UNDERFLOW;
6205     }
6206     if (ieee_xcpt & float_flag_divbyzero) {
6207         mips_xcpt |= FP_DIV0;
6208     }
6209     if (ieee_xcpt & float_flag_inexact) {
6210         mips_xcpt |= FP_INEXACT;
6211     }
6212 
6213     return mips_xcpt;
6214 }
6215 
6216 static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
6217 {
6218     int ieee_exception_flags;
6219     int mips_exception_flags = 0;
6220     int cause;
6221     int enable;
6222 
6223     ieee_exception_flags = get_float_exception_flags(
6224                                &env->active_tc.msa_fp_status);
6225 
6226     /* QEMU softfloat does not signal all underflow cases */
6227     if (denormal) {
6228         ieee_exception_flags |= float_flag_underflow;
6229     }
6230     if (ieee_exception_flags) {
6231         mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags);
6232     }
6233     enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6234 
6235     /* Set Inexact (I) when flushing inputs to zero */
6236     if ((ieee_exception_flags & float_flag_input_denormal_flushed) &&
6237             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6238         if (action & CLEAR_IS_INEXACT) {
6239             mips_exception_flags &= ~FP_INEXACT;
6240         } else {
6241             mips_exception_flags |= FP_INEXACT;
6242         }
6243     }
6244 
6245     /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
6246     if ((ieee_exception_flags & float_flag_output_denormal_flushed) &&
6247             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6248         mips_exception_flags |= FP_INEXACT;
6249         if (action & CLEAR_FS_UNDERFLOW) {
6250             mips_exception_flags &= ~FP_UNDERFLOW;
6251         } else {
6252             mips_exception_flags |= FP_UNDERFLOW;
6253         }
6254     }
6255 
6256     /* Set Inexact (I) when Overflow (O) is not enabled */
6257     if ((mips_exception_flags & FP_OVERFLOW) != 0 &&
6258            (enable & FP_OVERFLOW) == 0) {
6259         mips_exception_flags |= FP_INEXACT;
6260     }
6261 
6262     /* Clear Exact Underflow when Underflow (U) is not enabled */
6263     if ((mips_exception_flags & FP_UNDERFLOW) != 0 &&
6264            (enable & FP_UNDERFLOW) == 0 &&
6265            (mips_exception_flags & FP_INEXACT) == 0) {
6266         mips_exception_flags &= ~FP_UNDERFLOW;
6267     }
6268 
6269     /*
6270      * Reciprocal operations set only Inexact when valid and not
6271      * divide by zero
6272      */
6273     if ((action & RECIPROCAL_INEXACT) &&
6274             (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) {
6275         mips_exception_flags = FP_INEXACT;
6276     }
6277 
6278     cause = mips_exception_flags & enable; /* all current enabled exceptions */
6279 
6280     if (cause == 0) {
6281         /*
6282          * No enabled exception, update the MSACSR Cause
6283          * with all current exceptions
6284          */
6285         SET_FP_CAUSE(env->active_tc.msacsr,
6286             (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6287     } else {
6288         /* Current exceptions are enabled */
6289         if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) {
6290             /*
6291              * Exception(s) will trap, update MSACSR Cause
6292              * with all enabled exceptions
6293              */
6294             SET_FP_CAUSE(env->active_tc.msacsr,
6295                 (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6296         }
6297     }
6298 
6299     return mips_exception_flags;
6300 }
6301 
6302 static inline int get_enabled_exceptions(const CPUMIPSState *env, int c)
6303 {
6304     int enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6305     return c & enable;
6306 }
6307 
6308 static inline float16 float16_from_float32(int32_t a, bool ieee,
6309                                            float_status *status)
6310 {
6311       float16 f_val;
6312 
6313       f_val = float32_to_float16((float32)a, ieee, status);
6314 
6315       return a < 0 ? (f_val | (1 << 15)) : f_val;
6316 }
6317 
6318 static inline float32 float32_from_float64(int64_t a, float_status *status)
6319 {
6320       float32 f_val;
6321 
6322       f_val = float64_to_float32((float64)a, status);
6323 
6324       return a < 0 ? (f_val | (1 << 31)) : f_val;
6325 }
6326 
6327 static inline float32 float32_from_float16(int16_t a, bool ieee,
6328                                            float_status *status)
6329 {
6330       float32 f_val;
6331 
6332       f_val = float16_to_float32((float16)a, ieee, status);
6333 
6334       return a < 0 ? (f_val | (1 << 31)) : f_val;
6335 }
6336 
6337 static inline float64 float64_from_float32(int32_t a, float_status *status)
6338 {
6339       float64 f_val;
6340 
6341       f_val = float32_to_float64((float64)a, status);
6342 
6343       return a < 0 ? (f_val | (1ULL << 63)) : f_val;
6344 }
6345 
6346 static inline float32 float32_from_q16(int16_t a, float_status *status)
6347 {
6348     float32 f_val;
6349 
6350     /* conversion as integer and scaling */
6351     f_val = int32_to_float32(a, status);
6352     f_val = float32_scalbn(f_val, -15, status);
6353 
6354     return f_val;
6355 }
6356 
6357 static inline float64 float64_from_q32(int32_t a, float_status *status)
6358 {
6359     float64 f_val;
6360 
6361     /* conversion as integer and scaling */
6362     f_val = int32_to_float64(a, status);
6363     f_val = float64_scalbn(f_val, -31, status);
6364 
6365     return f_val;
6366 }
6367 
6368 static inline int16_t float32_to_q16(float32 a, float_status *status)
6369 {
6370     int32_t q_val;
6371     int32_t q_min = 0xffff8000;
6372     int32_t q_max = 0x00007fff;
6373 
6374     int ieee_ex;
6375 
6376     if (float32_is_any_nan(a)) {
6377         float_raise(float_flag_invalid, status);
6378         return 0;
6379     }
6380 
6381     /* scaling */
6382     a = float32_scalbn(a, 15, status);
6383 
6384     ieee_ex = get_float_exception_flags(status);
6385     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6386                              , status);
6387 
6388     if (ieee_ex & float_flag_overflow) {
6389         float_raise(float_flag_inexact, status);
6390         return (int32_t)a < 0 ? q_min : q_max;
6391     }
6392 
6393     /* conversion to int */
6394     q_val = float32_to_int32(a, status);
6395 
6396     ieee_ex = get_float_exception_flags(status);
6397     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6398                              , status);
6399 
6400     if (ieee_ex & float_flag_invalid) {
6401         set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6402                                , status);
6403         float_raise(float_flag_overflow | float_flag_inexact, status);
6404         return (int32_t)a < 0 ? q_min : q_max;
6405     }
6406 
6407     if (q_val < q_min) {
6408         float_raise(float_flag_overflow | float_flag_inexact, status);
6409         return (int16_t)q_min;
6410     }
6411 
6412     if (q_max < q_val) {
6413         float_raise(float_flag_overflow | float_flag_inexact, status);
6414         return (int16_t)q_max;
6415     }
6416 
6417     return (int16_t)q_val;
6418 }
6419 
6420 static inline int32_t float64_to_q32(float64 a, float_status *status)
6421 {
6422     int64_t q_val;
6423     int64_t q_min = 0xffffffff80000000LL;
6424     int64_t q_max = 0x000000007fffffffLL;
6425 
6426     int ieee_ex;
6427 
6428     if (float64_is_any_nan(a)) {
6429         float_raise(float_flag_invalid, status);
6430         return 0;
6431     }
6432 
6433     /* scaling */
6434     a = float64_scalbn(a, 31, status);
6435 
6436     ieee_ex = get_float_exception_flags(status);
6437     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6438            , status);
6439 
6440     if (ieee_ex & float_flag_overflow) {
6441         float_raise(float_flag_inexact, status);
6442         return (int64_t)a < 0 ? q_min : q_max;
6443     }
6444 
6445     /* conversion to integer */
6446     q_val = float64_to_int64(a, status);
6447 
6448     ieee_ex = get_float_exception_flags(status);
6449     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6450            , status);
6451 
6452     if (ieee_ex & float_flag_invalid) {
6453         set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6454                , status);
6455         float_raise(float_flag_overflow | float_flag_inexact, status);
6456         return (int64_t)a < 0 ? q_min : q_max;
6457     }
6458 
6459     if (q_val < q_min) {
6460         float_raise(float_flag_overflow | float_flag_inexact, status);
6461         return (int32_t)q_min;
6462     }
6463 
6464     if (q_max < q_val) {
6465         float_raise(float_flag_overflow | float_flag_inexact, status);
6466         return (int32_t)q_max;
6467     }
6468 
6469     return (int32_t)q_val;
6470 }
6471 
6472 #define MSA_FLOAT_COND(DEST, OP, ARG1, ARG2, BITS, QUIET)                   \
6473     do {                                                                    \
6474         float_status *status = &env->active_tc.msa_fp_status;               \
6475         int c;                                                              \
6476         int64_t cond;                                                       \
6477         set_float_exception_flags(0, status);                               \
6478         if (!QUIET) {                                                       \
6479             cond = float ## BITS ## _ ## OP(ARG1, ARG2, status);            \
6480         } else {                                                            \
6481             cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2, status);  \
6482         }                                                                   \
6483         DEST = cond ? M_MAX_UINT(BITS) : 0;                                 \
6484         c = update_msacsr(env, CLEAR_IS_INEXACT, 0);                        \
6485                                                                             \
6486         if (get_enabled_exceptions(env, c)) {                               \
6487             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
6488         }                                                                   \
6489     } while (0)
6490 
6491 #define MSA_FLOAT_AF(DEST, ARG1, ARG2, BITS, QUIET)                 \
6492     do {                                                            \
6493         MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);          \
6494         if ((DEST & M_MAX_UINT(BITS)) == M_MAX_UINT(BITS)) {        \
6495             DEST = 0;                                               \
6496         }                                                           \
6497     } while (0)
6498 
6499 #define MSA_FLOAT_UEQ(DEST, ARG1, ARG2, BITS, QUIET)                \
6500     do {                                                            \
6501         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6502         if (DEST == 0) {                                            \
6503             MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);      \
6504         }                                                           \
6505     } while (0)
6506 
6507 #define MSA_FLOAT_NE(DEST, ARG1, ARG2, BITS, QUIET)                 \
6508     do {                                                            \
6509         MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);          \
6510         if (DEST == 0) {                                            \
6511             MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);      \
6512         }                                                           \
6513     } while (0)
6514 
6515 #define MSA_FLOAT_UNE(DEST, ARG1, ARG2, BITS, QUIET)                \
6516     do {                                                            \
6517         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6518         if (DEST == 0) {                                            \
6519             MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6520             if (DEST == 0) {                                        \
6521                 MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);  \
6522             }                                                       \
6523         }                                                           \
6524     } while (0)
6525 
6526 #define MSA_FLOAT_ULE(DEST, ARG1, ARG2, BITS, QUIET)                \
6527     do {                                                            \
6528         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6529         if (DEST == 0) {                                            \
6530             MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);      \
6531         }                                                           \
6532     } while (0)
6533 
6534 #define MSA_FLOAT_ULT(DEST, ARG1, ARG2, BITS, QUIET)                \
6535     do {                                                            \
6536         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6537         if (DEST == 0) {                                            \
6538             MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6539         }                                                           \
6540     } while (0)
6541 
6542 #define MSA_FLOAT_OR(DEST, ARG1, ARG2, BITS, QUIET)                 \
6543     do {                                                            \
6544         MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);          \
6545         if (DEST == 0) {                                            \
6546             MSA_FLOAT_COND(DEST, le, ARG2, ARG1, BITS, QUIET);      \
6547         }                                                           \
6548     } while (0)
6549 
6550 static inline void compare_af(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6551                               wr_t *pwt, uint32_t df, int quiet,
6552                               uintptr_t retaddr)
6553 {
6554     wr_t wx, *pwx = &wx;
6555     uint32_t i;
6556 
6557     clear_msacsr_cause(env);
6558 
6559     switch (df) {
6560     case DF_WORD:
6561         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6562             MSA_FLOAT_AF(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6563         }
6564         break;
6565     case DF_DOUBLE:
6566         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6567             MSA_FLOAT_AF(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6568         }
6569         break;
6570     default:
6571         g_assert_not_reached();
6572     }
6573 
6574     check_msacsr_cause(env, retaddr);
6575 
6576     msa_move_v(pwd, pwx);
6577 }
6578 
6579 static inline void compare_un(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6580                               wr_t *pwt, uint32_t df, int quiet,
6581                               uintptr_t retaddr)
6582 {
6583     wr_t wx, *pwx = &wx;
6584     uint32_t i;
6585 
6586     clear_msacsr_cause(env);
6587 
6588     switch (df) {
6589     case DF_WORD:
6590         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6591             MSA_FLOAT_COND(pwx->w[i], unordered, pws->w[i], pwt->w[i], 32,
6592                     quiet);
6593         }
6594         break;
6595     case DF_DOUBLE:
6596         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6597             MSA_FLOAT_COND(pwx->d[i], unordered, pws->d[i], pwt->d[i], 64,
6598                     quiet);
6599         }
6600         break;
6601     default:
6602         g_assert_not_reached();
6603     }
6604 
6605     check_msacsr_cause(env, retaddr);
6606 
6607     msa_move_v(pwd, pwx);
6608 }
6609 
6610 static inline void compare_eq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6611                               wr_t *pwt, uint32_t df, int quiet,
6612                               uintptr_t retaddr)
6613 {
6614     wr_t wx, *pwx = &wx;
6615     uint32_t i;
6616 
6617     clear_msacsr_cause(env);
6618 
6619     switch (df) {
6620     case DF_WORD:
6621         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6622             MSA_FLOAT_COND(pwx->w[i], eq, pws->w[i], pwt->w[i], 32, quiet);
6623         }
6624         break;
6625     case DF_DOUBLE:
6626         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6627             MSA_FLOAT_COND(pwx->d[i], eq, pws->d[i], pwt->d[i], 64, quiet);
6628         }
6629         break;
6630     default:
6631         g_assert_not_reached();
6632     }
6633 
6634     check_msacsr_cause(env, retaddr);
6635 
6636     msa_move_v(pwd, pwx);
6637 }
6638 
6639 static inline void compare_ueq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6640                                wr_t *pwt, uint32_t df, int quiet,
6641                                uintptr_t retaddr)
6642 {
6643     wr_t wx, *pwx = &wx;
6644     uint32_t i;
6645 
6646     clear_msacsr_cause(env);
6647 
6648     switch (df) {
6649     case DF_WORD:
6650         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6651             MSA_FLOAT_UEQ(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6652         }
6653         break;
6654     case DF_DOUBLE:
6655         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6656             MSA_FLOAT_UEQ(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6657         }
6658         break;
6659     default:
6660         g_assert_not_reached();
6661     }
6662 
6663     check_msacsr_cause(env, retaddr);
6664 
6665     msa_move_v(pwd, pwx);
6666 }
6667 
6668 static inline void compare_lt(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6669                               wr_t *pwt, uint32_t df, int quiet,
6670                               uintptr_t retaddr)
6671 {
6672     wr_t wx, *pwx = &wx;
6673     uint32_t i;
6674 
6675     clear_msacsr_cause(env);
6676 
6677     switch (df) {
6678     case DF_WORD:
6679         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6680             MSA_FLOAT_COND(pwx->w[i], lt, pws->w[i], pwt->w[i], 32, quiet);
6681         }
6682         break;
6683     case DF_DOUBLE:
6684         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6685             MSA_FLOAT_COND(pwx->d[i], lt, pws->d[i], pwt->d[i], 64, quiet);
6686         }
6687         break;
6688     default:
6689         g_assert_not_reached();
6690     }
6691 
6692     check_msacsr_cause(env, retaddr);
6693 
6694     msa_move_v(pwd, pwx);
6695 }
6696 
6697 static inline void compare_ult(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6698                                wr_t *pwt, uint32_t df, int quiet,
6699                                uintptr_t retaddr)
6700 {
6701     wr_t wx, *pwx = &wx;
6702     uint32_t i;
6703 
6704     clear_msacsr_cause(env);
6705 
6706     switch (df) {
6707     case DF_WORD:
6708         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6709             MSA_FLOAT_ULT(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6710         }
6711         break;
6712     case DF_DOUBLE:
6713         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6714             MSA_FLOAT_ULT(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6715         }
6716         break;
6717     default:
6718         g_assert_not_reached();
6719     }
6720 
6721     check_msacsr_cause(env, retaddr);
6722 
6723     msa_move_v(pwd, pwx);
6724 }
6725 
6726 static inline void compare_le(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6727                               wr_t *pwt, uint32_t df, int quiet,
6728                               uintptr_t retaddr)
6729 {
6730     wr_t wx, *pwx = &wx;
6731     uint32_t i;
6732 
6733     clear_msacsr_cause(env);
6734 
6735     switch (df) {
6736     case DF_WORD:
6737         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6738             MSA_FLOAT_COND(pwx->w[i], le, pws->w[i], pwt->w[i], 32, quiet);
6739         }
6740         break;
6741     case DF_DOUBLE:
6742         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6743             MSA_FLOAT_COND(pwx->d[i], le, pws->d[i], pwt->d[i], 64, quiet);
6744         }
6745         break;
6746     default:
6747         g_assert_not_reached();
6748     }
6749 
6750     check_msacsr_cause(env, retaddr);
6751 
6752     msa_move_v(pwd, pwx);
6753 }
6754 
6755 static inline void compare_ule(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6756                                wr_t *pwt, uint32_t df, int quiet,
6757                                uintptr_t retaddr)
6758 {
6759     wr_t wx, *pwx = &wx;
6760     uint32_t i;
6761 
6762     clear_msacsr_cause(env);
6763 
6764     switch (df) {
6765     case DF_WORD:
6766         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6767             MSA_FLOAT_ULE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6768         }
6769         break;
6770     case DF_DOUBLE:
6771         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6772             MSA_FLOAT_ULE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6773         }
6774         break;
6775     default:
6776         g_assert_not_reached();
6777     }
6778 
6779     check_msacsr_cause(env, retaddr);
6780 
6781     msa_move_v(pwd, pwx);
6782 }
6783 
6784 static inline void compare_or(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6785                               wr_t *pwt, uint32_t df, int quiet,
6786                               uintptr_t retaddr)
6787 {
6788     wr_t wx, *pwx = &wx;
6789     uint32_t i;
6790 
6791     clear_msacsr_cause(env);
6792 
6793     switch (df) {
6794     case DF_WORD:
6795         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6796             MSA_FLOAT_OR(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6797         }
6798         break;
6799     case DF_DOUBLE:
6800         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6801             MSA_FLOAT_OR(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6802         }
6803         break;
6804     default:
6805         g_assert_not_reached();
6806     }
6807 
6808     check_msacsr_cause(env, retaddr);
6809 
6810     msa_move_v(pwd, pwx);
6811 }
6812 
6813 static inline void compare_une(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6814                                wr_t *pwt, uint32_t df, int quiet,
6815                                uintptr_t retaddr)
6816 {
6817     wr_t wx, *pwx = &wx;
6818     uint32_t i;
6819 
6820     clear_msacsr_cause(env);
6821 
6822     switch (df) {
6823     case DF_WORD:
6824         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6825             MSA_FLOAT_UNE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6826         }
6827         break;
6828     case DF_DOUBLE:
6829         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6830             MSA_FLOAT_UNE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6831         }
6832         break;
6833     default:
6834         g_assert_not_reached();
6835     }
6836 
6837     check_msacsr_cause(env, retaddr);
6838 
6839     msa_move_v(pwd, pwx);
6840 }
6841 
6842 static inline void compare_ne(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6843                               wr_t *pwt, uint32_t df, int quiet,
6844                               uintptr_t retaddr)
6845 {
6846     wr_t wx, *pwx = &wx;
6847     uint32_t i;
6848 
6849     clear_msacsr_cause(env);
6850 
6851     switch (df) {
6852     case DF_WORD:
6853         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6854             MSA_FLOAT_NE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6855         }
6856         break;
6857     case DF_DOUBLE:
6858         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6859             MSA_FLOAT_NE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6860         }
6861         break;
6862     default:
6863         g_assert_not_reached();
6864     }
6865 
6866     check_msacsr_cause(env, retaddr);
6867 
6868     msa_move_v(pwd, pwx);
6869 }
6870 
6871 void helper_msa_fcaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6872                         uint32_t ws, uint32_t wt)
6873 {
6874     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6875     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6876     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6877     compare_af(env, pwd, pws, pwt, df, 1, GETPC());
6878 }
6879 
6880 void helper_msa_fcun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6881                         uint32_t ws, uint32_t wt)
6882 {
6883     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6884     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6885     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6886     compare_un(env, pwd, pws, pwt, df, 1, GETPC());
6887 }
6888 
6889 void helper_msa_fceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6890                         uint32_t ws, uint32_t wt)
6891 {
6892     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6893     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6894     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6895     compare_eq(env, pwd, pws, pwt, df, 1, GETPC());
6896 }
6897 
6898 void helper_msa_fcueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6899                          uint32_t ws, uint32_t wt)
6900 {
6901     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6902     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6903     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6904     compare_ueq(env, pwd, pws, pwt, df, 1, GETPC());
6905 }
6906 
6907 void helper_msa_fclt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6908                         uint32_t ws, uint32_t wt)
6909 {
6910     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6911     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6912     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6913     compare_lt(env, pwd, pws, pwt, df, 1, GETPC());
6914 }
6915 
6916 void helper_msa_fcult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6917                          uint32_t ws, uint32_t wt)
6918 {
6919     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6920     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6921     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6922     compare_ult(env, pwd, pws, pwt, df, 1, GETPC());
6923 }
6924 
6925 void helper_msa_fcle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6926                         uint32_t ws, uint32_t wt)
6927 {
6928     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6929     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6930     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6931     compare_le(env, pwd, pws, pwt, df, 1, GETPC());
6932 }
6933 
6934 void helper_msa_fcule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6935                          uint32_t ws, uint32_t wt)
6936 {
6937     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6938     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6939     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6940     compare_ule(env, pwd, pws, pwt, df, 1, GETPC());
6941 }
6942 
6943 void helper_msa_fsaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6944                         uint32_t ws, uint32_t wt)
6945 {
6946     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6947     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6948     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6949     compare_af(env, pwd, pws, pwt, df, 0, GETPC());
6950 }
6951 
6952 void helper_msa_fsun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6953                         uint32_t ws, uint32_t wt)
6954 {
6955     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6956     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6957     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6958     compare_un(env, pwd, pws, pwt, df, 0, GETPC());
6959 }
6960 
6961 void helper_msa_fseq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6962                         uint32_t ws, uint32_t wt)
6963 {
6964     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6965     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6966     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6967     compare_eq(env, pwd, pws, pwt, df, 0, GETPC());
6968 }
6969 
6970 void helper_msa_fsueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6971                          uint32_t ws, uint32_t wt)
6972 {
6973     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6974     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6975     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6976     compare_ueq(env, pwd, pws, pwt, df, 0, GETPC());
6977 }
6978 
6979 void helper_msa_fslt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6980                         uint32_t ws, uint32_t wt)
6981 {
6982     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6983     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6984     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6985     compare_lt(env, pwd, pws, pwt, df, 0, GETPC());
6986 }
6987 
6988 void helper_msa_fsult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6989                          uint32_t ws, uint32_t wt)
6990 {
6991     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6992     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6993     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6994     compare_ult(env, pwd, pws, pwt, df, 0, GETPC());
6995 }
6996 
6997 void helper_msa_fsle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6998                         uint32_t ws, uint32_t wt)
6999 {
7000     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7001     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7002     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7003     compare_le(env, pwd, pws, pwt, df, 0, GETPC());
7004 }
7005 
7006 void helper_msa_fsule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7007                          uint32_t ws, uint32_t wt)
7008 {
7009     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7010     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7011     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7012     compare_ule(env, pwd, pws, pwt, df, 0, GETPC());
7013 }
7014 
7015 void helper_msa_fcor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7016                         uint32_t ws, uint32_t wt)
7017 {
7018     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7019     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7020     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7021     compare_or(env, pwd, pws, pwt, df, 1, GETPC());
7022 }
7023 
7024 void helper_msa_fcune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7025                          uint32_t ws, uint32_t wt)
7026 {
7027     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7028     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7029     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7030     compare_une(env, pwd, pws, pwt, df, 1, GETPC());
7031 }
7032 
7033 void helper_msa_fcne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7034                         uint32_t ws, uint32_t wt)
7035 {
7036     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7037     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7038     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7039     compare_ne(env, pwd, pws, pwt, df, 1, GETPC());
7040 }
7041 
7042 void helper_msa_fsor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7043                         uint32_t ws, uint32_t wt)
7044 {
7045     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7046     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7047     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7048     compare_or(env, pwd, pws, pwt, df, 0, GETPC());
7049 }
7050 
7051 void helper_msa_fsune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7052                          uint32_t ws, uint32_t wt)
7053 {
7054     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7055     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7056     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7057     compare_une(env, pwd, pws, pwt, df, 0, GETPC());
7058 }
7059 
7060 void helper_msa_fsne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7061                         uint32_t ws, uint32_t wt)
7062 {
7063     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7064     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7065     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7066     compare_ne(env, pwd, pws, pwt, df, 0, GETPC());
7067 }
7068 
7069 #define float16_is_zero(ARG) 0
7070 #define float16_is_zero_or_denormal(ARG) 0
7071 
7072 #define IS_DENORMAL(ARG, BITS)                      \
7073     (!float ## BITS ## _is_zero(ARG)                \
7074     && float ## BITS ## _is_zero_or_denormal(ARG))
7075 
7076 #define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
7077     do {                                                                    \
7078         float_status *status = &env->active_tc.msa_fp_status;               \
7079         int c;                                                              \
7080                                                                             \
7081         set_float_exception_flags(0, status);                               \
7082         DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
7083         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7084                                                                             \
7085         if (get_enabled_exceptions(env, c)) {                               \
7086             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7087         }                                                                   \
7088     } while (0)
7089 
7090 void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7091         uint32_t ws, uint32_t wt)
7092 {
7093     wr_t wx, *pwx = &wx;
7094     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7095     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7096     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7097     uint32_t i;
7098 
7099     clear_msacsr_cause(env);
7100 
7101     switch (df) {
7102     case DF_WORD:
7103         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7104             MSA_FLOAT_BINOP(pwx->w[i], add, pws->w[i], pwt->w[i], 32);
7105         }
7106         break;
7107     case DF_DOUBLE:
7108         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7109             MSA_FLOAT_BINOP(pwx->d[i], add, pws->d[i], pwt->d[i], 64);
7110         }
7111         break;
7112     default:
7113         g_assert_not_reached();
7114     }
7115 
7116     check_msacsr_cause(env, GETPC());
7117     msa_move_v(pwd, pwx);
7118 }
7119 
7120 void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7121         uint32_t ws, uint32_t wt)
7122 {
7123     wr_t wx, *pwx = &wx;
7124     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7125     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7126     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7127     uint32_t i;
7128 
7129     clear_msacsr_cause(env);
7130 
7131     switch (df) {
7132     case DF_WORD:
7133         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7134             MSA_FLOAT_BINOP(pwx->w[i], sub, pws->w[i], pwt->w[i], 32);
7135         }
7136         break;
7137     case DF_DOUBLE:
7138         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7139             MSA_FLOAT_BINOP(pwx->d[i], sub, pws->d[i], pwt->d[i], 64);
7140         }
7141         break;
7142     default:
7143         g_assert_not_reached();
7144     }
7145 
7146     check_msacsr_cause(env, GETPC());
7147     msa_move_v(pwd, pwx);
7148 }
7149 
7150 void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7151         uint32_t ws, uint32_t wt)
7152 {
7153     wr_t wx, *pwx = &wx;
7154     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7155     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7156     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7157     uint32_t i;
7158 
7159     clear_msacsr_cause(env);
7160 
7161     switch (df) {
7162     case DF_WORD:
7163         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7164             MSA_FLOAT_BINOP(pwx->w[i], mul, pws->w[i], pwt->w[i], 32);
7165         }
7166         break;
7167     case DF_DOUBLE:
7168         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7169             MSA_FLOAT_BINOP(pwx->d[i], mul, pws->d[i], pwt->d[i], 64);
7170         }
7171         break;
7172     default:
7173         g_assert_not_reached();
7174     }
7175 
7176     check_msacsr_cause(env, GETPC());
7177 
7178     msa_move_v(pwd, pwx);
7179 }
7180 
7181 void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7182         uint32_t ws, uint32_t wt)
7183 {
7184     wr_t wx, *pwx = &wx;
7185     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7186     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7187     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7188     uint32_t i;
7189 
7190     clear_msacsr_cause(env);
7191 
7192     switch (df) {
7193     case DF_WORD:
7194         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7195             MSA_FLOAT_BINOP(pwx->w[i], div, pws->w[i], pwt->w[i], 32);
7196         }
7197         break;
7198     case DF_DOUBLE:
7199         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7200             MSA_FLOAT_BINOP(pwx->d[i], div, pws->d[i], pwt->d[i], 64);
7201         }
7202         break;
7203     default:
7204         g_assert_not_reached();
7205     }
7206 
7207     check_msacsr_cause(env, GETPC());
7208 
7209     msa_move_v(pwd, pwx);
7210 }
7211 
7212 #define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
7213     do {                                                                    \
7214         float_status *status = &env->active_tc.msa_fp_status;               \
7215         int c;                                                              \
7216                                                                             \
7217         set_float_exception_flags(0, status);                               \
7218         DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE, status);  \
7219         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7220                                                                             \
7221         if (get_enabled_exceptions(env, c)) {                               \
7222             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7223         }                                                                   \
7224     } while (0)
7225 
7226 void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7227         uint32_t ws, uint32_t wt)
7228 {
7229     wr_t wx, *pwx = &wx;
7230     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7231     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7232     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7233     uint32_t i;
7234 
7235     clear_msacsr_cause(env);
7236 
7237     switch (df) {
7238     case DF_WORD:
7239         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7240             MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7241                            pws->w[i], pwt->w[i], 0, 32);
7242         }
7243         break;
7244     case DF_DOUBLE:
7245         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7246             MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7247                            pws->d[i], pwt->d[i], 0, 64);
7248         }
7249         break;
7250     default:
7251         g_assert_not_reached();
7252     }
7253 
7254     check_msacsr_cause(env, GETPC());
7255 
7256     msa_move_v(pwd, pwx);
7257 }
7258 
7259 void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7260         uint32_t ws, uint32_t wt)
7261 {
7262     wr_t wx, *pwx = &wx;
7263     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7264     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7265     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7266     uint32_t i;
7267 
7268     clear_msacsr_cause(env);
7269 
7270     switch (df) {
7271     case DF_WORD:
7272         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7273             MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7274                            pws->w[i], pwt->w[i],
7275                            float_muladd_negate_product, 32);
7276       }
7277       break;
7278     case DF_DOUBLE:
7279         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7280             MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7281                            pws->d[i], pwt->d[i],
7282                            float_muladd_negate_product, 64);
7283         }
7284         break;
7285     default:
7286         g_assert_not_reached();
7287     }
7288 
7289     check_msacsr_cause(env, GETPC());
7290 
7291     msa_move_v(pwd, pwx);
7292 }
7293 
7294 void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7295         uint32_t ws, uint32_t wt)
7296 {
7297     wr_t wx, *pwx = &wx;
7298     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7299     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7300     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7301     uint32_t i;
7302 
7303     clear_msacsr_cause(env);
7304 
7305     switch (df) {
7306     case DF_WORD:
7307         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7308             MSA_FLOAT_BINOP(pwx->w[i], scalbn, pws->w[i],
7309                             pwt->w[i] >  0x200 ?  0x200 :
7310                             pwt->w[i] < -0x200 ? -0x200 : pwt->w[i],
7311                             32);
7312         }
7313         break;
7314     case DF_DOUBLE:
7315         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7316             MSA_FLOAT_BINOP(pwx->d[i], scalbn, pws->d[i],
7317                             pwt->d[i] >  0x1000 ?  0x1000 :
7318                             pwt->d[i] < -0x1000 ? -0x1000 : pwt->d[i],
7319                             64);
7320         }
7321         break;
7322     default:
7323         g_assert_not_reached();
7324     }
7325 
7326     check_msacsr_cause(env, GETPC());
7327 
7328     msa_move_v(pwd, pwx);
7329 }
7330 
7331 #define MSA_FLOAT_UNOP(DEST, OP, ARG, BITS)                                 \
7332     do {                                                                    \
7333         float_status *status = &env->active_tc.msa_fp_status;               \
7334         int c;                                                              \
7335                                                                             \
7336         set_float_exception_flags(0, status);                               \
7337         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7338         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7339                                                                             \
7340         if (get_enabled_exceptions(env, c)) {                               \
7341             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7342         }                                                                   \
7343     } while (0)
7344 
7345 void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7346                          uint32_t ws, uint32_t wt)
7347 {
7348     wr_t wx, *pwx = &wx;
7349     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7350     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7351     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7352     uint32_t i;
7353 
7354     clear_msacsr_cause(env);
7355 
7356     switch (df) {
7357     case DF_WORD:
7358         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7359             /*
7360              * Half precision floats come in two formats: standard
7361              * IEEE and "ARM" format.  The latter gains extra exponent
7362              * range by omitting the NaN/Inf encodings.
7363              */
7364             bool ieee = true;
7365 
7366             MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16);
7367             MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16);
7368         }
7369         break;
7370     case DF_DOUBLE:
7371         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7372             MSA_FLOAT_UNOP(Lw(pwx, i), from_float64, pws->d[i], 32);
7373             MSA_FLOAT_UNOP(Rw(pwx, i), from_float64, pwt->d[i], 32);
7374         }
7375         break;
7376     default:
7377         g_assert_not_reached();
7378     }
7379 
7380     check_msacsr_cause(env, GETPC());
7381     msa_move_v(pwd, pwx);
7382 }
7383 
7384 #define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
7385     do {                                                                    \
7386         float_status *status = &env->active_tc.msa_fp_status;               \
7387         int c;                                                              \
7388                                                                             \
7389         set_float_exception_flags(0, status);                               \
7390         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7391         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7392                                                                             \
7393         if (get_enabled_exceptions(env, c)) {                               \
7394             DEST = ((FLOAT_SNAN ## XBITS(status) >> 6) << 6) | c;           \
7395         }                                                                   \
7396     } while (0)
7397 
7398 void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7399                        uint32_t ws, uint32_t wt)
7400 {
7401     wr_t wx, *pwx = &wx;
7402     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7403     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7404     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7405     uint32_t i;
7406 
7407     clear_msacsr_cause(env);
7408 
7409     switch (df) {
7410     case DF_WORD:
7411         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7412             MSA_FLOAT_UNOP_XD(Lh(pwx, i), to_q16, pws->w[i], 32, 16);
7413             MSA_FLOAT_UNOP_XD(Rh(pwx, i), to_q16, pwt->w[i], 32, 16);
7414         }
7415         break;
7416     case DF_DOUBLE:
7417         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7418             MSA_FLOAT_UNOP_XD(Lw(pwx, i), to_q32, pws->d[i], 64, 32);
7419             MSA_FLOAT_UNOP_XD(Rw(pwx, i), to_q32, pwt->d[i], 64, 32);
7420         }
7421         break;
7422     default:
7423         g_assert_not_reached();
7424     }
7425 
7426     check_msacsr_cause(env, GETPC());
7427 
7428     msa_move_v(pwd, pwx);
7429 }
7430 
7431 #define NUMBER_QNAN_PAIR(ARG1, ARG2, BITS, STATUS)      \
7432     !float ## BITS ## _is_any_nan(ARG1)                 \
7433     && float ## BITS ## _is_quiet_nan(ARG2, STATUS)
7434 
7435 #define MSA_FLOAT_MAXOP(DEST, OP, ARG1, ARG2, BITS)                         \
7436     do {                                                                    \
7437         float_status *status_ = &env->active_tc.msa_fp_status;              \
7438         int c;                                                              \
7439                                                                             \
7440         set_float_exception_flags(0, status_);                              \
7441         DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status_);               \
7442         c = update_msacsr(env, 0, 0);                                       \
7443                                                                             \
7444         if (get_enabled_exceptions(env, c)) {                               \
7445             DEST = ((FLOAT_SNAN ## BITS(status_) >> 6) << 6) | c;           \
7446         }                                                                   \
7447     } while (0)
7448 
7449 #define FMAXMIN_A(F, G, X, _S, _T, BITS, STATUS)                    \
7450     do {                                                            \
7451         uint## BITS ##_t S = _S, T = _T;                            \
7452         uint## BITS ##_t as, at, xs, xt, xd;                        \
7453         if (NUMBER_QNAN_PAIR(S, T, BITS, STATUS)) {                 \
7454             T = S;                                                  \
7455         }                                                           \
7456         else if (NUMBER_QNAN_PAIR(T, S, BITS, STATUS)) {            \
7457             S = T;                                                  \
7458         }                                                           \
7459         as = float## BITS ##_abs(S);                                \
7460         at = float## BITS ##_abs(T);                                \
7461         MSA_FLOAT_MAXOP(xs, F,  S,  T, BITS);                       \
7462         MSA_FLOAT_MAXOP(xt, G,  S,  T, BITS);                       \
7463         MSA_FLOAT_MAXOP(xd, F, as, at, BITS);                       \
7464         X = (as == at || xd == float## BITS ##_abs(xs)) ? xs : xt;  \
7465     } while (0)
7466 
7467 void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7468         uint32_t ws, uint32_t wt)
7469 {
7470     float_status *status = &env->active_tc.msa_fp_status;
7471     wr_t wx, *pwx = &wx;
7472     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7473     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7474     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7475 
7476     clear_msacsr_cause(env);
7477 
7478     if (df == DF_WORD) {
7479 
7480         if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7481             MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32);
7482         } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7483             MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32);
7484         } else {
7485             MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32);
7486         }
7487 
7488         if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7489             MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32);
7490         } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7491             MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32);
7492         } else {
7493             MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32);
7494         }
7495 
7496         if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7497             MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32);
7498         } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7499             MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32);
7500         } else {
7501             MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32);
7502         }
7503 
7504         if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7505             MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32);
7506         } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7507             MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32);
7508         } else {
7509             MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32);
7510         }
7511 
7512     } else if (df == DF_DOUBLE) {
7513 
7514         if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7515             MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64);
7516         } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7517             MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64);
7518         } else {
7519             MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64);
7520         }
7521 
7522         if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7523             MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64);
7524         } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7525             MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64);
7526         } else {
7527             MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64);
7528         }
7529 
7530     } else {
7531 
7532         g_assert_not_reached();
7533 
7534     }
7535 
7536     check_msacsr_cause(env, GETPC());
7537 
7538     msa_move_v(pwd, pwx);
7539 }
7540 
7541 void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7542         uint32_t ws, uint32_t wt)
7543 {
7544     float_status *status = &env->active_tc.msa_fp_status;
7545     wr_t wx, *pwx = &wx;
7546     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7547     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7548     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7549 
7550     clear_msacsr_cause(env);
7551 
7552     if (df == DF_WORD) {
7553         FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7554         FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7555         FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7556         FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7557     } else if (df == DF_DOUBLE) {
7558         FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7559         FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7560     } else {
7561         g_assert_not_reached();
7562     }
7563 
7564     check_msacsr_cause(env, GETPC());
7565 
7566     msa_move_v(pwd, pwx);
7567 }
7568 
7569 void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7570         uint32_t ws, uint32_t wt)
7571 {
7572      float_status *status = &env->active_tc.msa_fp_status;
7573     wr_t wx, *pwx = &wx;
7574     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7575     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7576     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7577 
7578     clear_msacsr_cause(env);
7579 
7580     if (df == DF_WORD) {
7581 
7582         if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7583             MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32);
7584         } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7585             MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32);
7586         } else {
7587             MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32);
7588         }
7589 
7590         if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7591             MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32);
7592         } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7593             MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32);
7594         } else {
7595             MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32);
7596         }
7597 
7598         if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7599             MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32);
7600         } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7601             MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32);
7602         } else {
7603             MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32);
7604         }
7605 
7606         if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7607             MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32);
7608         } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7609             MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32);
7610         } else {
7611             MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32);
7612         }
7613 
7614     } else if (df == DF_DOUBLE) {
7615 
7616         if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7617             MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64);
7618         } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7619             MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64);
7620         } else {
7621             MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64);
7622         }
7623 
7624         if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7625             MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64);
7626         } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7627             MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64);
7628         } else {
7629             MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64);
7630         }
7631 
7632     } else {
7633 
7634         g_assert_not_reached();
7635 
7636     }
7637 
7638     check_msacsr_cause(env, GETPC());
7639 
7640     msa_move_v(pwd, pwx);
7641 }
7642 
7643 void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7644         uint32_t ws, uint32_t wt)
7645 {
7646     float_status *status = &env->active_tc.msa_fp_status;
7647     wr_t wx, *pwx = &wx;
7648     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7649     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7650     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7651 
7652     clear_msacsr_cause(env);
7653 
7654     if (df == DF_WORD) {
7655         FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7656         FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7657         FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7658         FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7659     } else if (df == DF_DOUBLE) {
7660         FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7661         FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7662     } else {
7663         g_assert_not_reached();
7664     }
7665 
7666     check_msacsr_cause(env, GETPC());
7667 
7668     msa_move_v(pwd, pwx);
7669 }
7670 
7671 void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df,
7672         uint32_t wd, uint32_t ws)
7673 {
7674     float_status *status = &env->active_tc.msa_fp_status;
7675 
7676     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7677     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7678     if (df == DF_WORD) {
7679         pwd->w[0] = float_class_s(pws->w[0], status);
7680         pwd->w[1] = float_class_s(pws->w[1], status);
7681         pwd->w[2] = float_class_s(pws->w[2], status);
7682         pwd->w[3] = float_class_s(pws->w[3], status);
7683     } else if (df == DF_DOUBLE) {
7684         pwd->d[0] = float_class_d(pws->d[0], status);
7685         pwd->d[1] = float_class_d(pws->d[1], status);
7686     } else {
7687         g_assert_not_reached();
7688     }
7689 }
7690 
7691 #define MSA_FLOAT_UNOP0(DEST, OP, ARG, BITS)                                \
7692     do {                                                                    \
7693         float_status *status = &env->active_tc.msa_fp_status;               \
7694         int c;                                                              \
7695                                                                             \
7696         set_float_exception_flags(0, status);                               \
7697         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7698         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7699                                                                             \
7700         if (get_enabled_exceptions(env, c)) {                               \
7701             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7702         } else if (float ## BITS ## _is_any_nan(ARG)) {                     \
7703             DEST = 0;                                                       \
7704         }                                                                   \
7705     } while (0)
7706 
7707 void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7708                             uint32_t ws)
7709 {
7710     wr_t wx, *pwx = &wx;
7711     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7712     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7713     uint32_t i;
7714 
7715     clear_msacsr_cause(env);
7716 
7717     switch (df) {
7718     case DF_WORD:
7719         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7720             MSA_FLOAT_UNOP0(pwx->w[i], to_int32_round_to_zero, pws->w[i], 32);
7721         }
7722         break;
7723     case DF_DOUBLE:
7724         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7725             MSA_FLOAT_UNOP0(pwx->d[i], to_int64_round_to_zero, pws->d[i], 64);
7726         }
7727         break;
7728     default:
7729         g_assert_not_reached();
7730     }
7731 
7732     check_msacsr_cause(env, GETPC());
7733 
7734     msa_move_v(pwd, pwx);
7735 }
7736 
7737 void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7738                             uint32_t ws)
7739 {
7740     wr_t wx, *pwx = &wx;
7741     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7742     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7743     uint32_t i;
7744 
7745     clear_msacsr_cause(env);
7746 
7747     switch (df) {
7748     case DF_WORD:
7749         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7750             MSA_FLOAT_UNOP0(pwx->w[i], to_uint32_round_to_zero, pws->w[i], 32);
7751         }
7752         break;
7753     case DF_DOUBLE:
7754         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7755             MSA_FLOAT_UNOP0(pwx->d[i], to_uint64_round_to_zero, pws->d[i], 64);
7756         }
7757         break;
7758     default:
7759         g_assert_not_reached();
7760     }
7761 
7762     check_msacsr_cause(env, GETPC());
7763 
7764     msa_move_v(pwd, pwx);
7765 }
7766 
7767 void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7768                          uint32_t ws)
7769 {
7770     wr_t wx, *pwx = &wx;
7771     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7772     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7773     uint32_t i;
7774 
7775     clear_msacsr_cause(env);
7776 
7777     switch (df) {
7778     case DF_WORD:
7779         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7780             MSA_FLOAT_UNOP(pwx->w[i], sqrt, pws->w[i], 32);
7781         }
7782         break;
7783     case DF_DOUBLE:
7784         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7785             MSA_FLOAT_UNOP(pwx->d[i], sqrt, pws->d[i], 64);
7786         }
7787         break;
7788     default:
7789         g_assert_not_reached();
7790     }
7791 
7792     check_msacsr_cause(env, GETPC());
7793 
7794     msa_move_v(pwd, pwx);
7795 }
7796 
7797 #define MSA_FLOAT_RECIPROCAL(DEST, ARG, BITS)                               \
7798     do {                                                                    \
7799         float_status *status = &env->active_tc.msa_fp_status;               \
7800         int c;                                                              \
7801                                                                             \
7802         set_float_exception_flags(0, status);                               \
7803         DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG, status);   \
7804         c = update_msacsr(env, float ## BITS ## _is_infinity(ARG) ||        \
7805                           float ## BITS ## _is_quiet_nan(DEST, status) ?    \
7806                           0 : RECIPROCAL_INEXACT,                           \
7807                           IS_DENORMAL(DEST, BITS));                         \
7808                                                                             \
7809         if (get_enabled_exceptions(env, c)) {                               \
7810             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7811         }                                                                   \
7812     } while (0)
7813 
7814 void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7815                           uint32_t ws)
7816 {
7817     wr_t wx, *pwx = &wx;
7818     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7819     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7820     uint32_t i;
7821 
7822     clear_msacsr_cause(env);
7823 
7824     switch (df) {
7825     case DF_WORD:
7826         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7827             MSA_FLOAT_RECIPROCAL(pwx->w[i], float32_sqrt(pws->w[i],
7828                     &env->active_tc.msa_fp_status), 32);
7829         }
7830         break;
7831     case DF_DOUBLE:
7832         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7833             MSA_FLOAT_RECIPROCAL(pwx->d[i], float64_sqrt(pws->d[i],
7834                     &env->active_tc.msa_fp_status), 64);
7835         }
7836         break;
7837     default:
7838         g_assert_not_reached();
7839     }
7840 
7841     check_msacsr_cause(env, GETPC());
7842 
7843     msa_move_v(pwd, pwx);
7844 }
7845 
7846 void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7847                         uint32_t ws)
7848 {
7849     wr_t wx, *pwx = &wx;
7850     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7851     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7852     uint32_t i;
7853 
7854     clear_msacsr_cause(env);
7855 
7856     switch (df) {
7857     case DF_WORD:
7858         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7859             MSA_FLOAT_RECIPROCAL(pwx->w[i], pws->w[i], 32);
7860         }
7861         break;
7862     case DF_DOUBLE:
7863         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7864             MSA_FLOAT_RECIPROCAL(pwx->d[i], pws->d[i], 64);
7865         }
7866         break;
7867     default:
7868         g_assert_not_reached();
7869     }
7870 
7871     check_msacsr_cause(env, GETPC());
7872 
7873     msa_move_v(pwd, pwx);
7874 }
7875 
7876 void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7877                          uint32_t ws)
7878 {
7879     wr_t wx, *pwx = &wx;
7880     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7881     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7882     uint32_t i;
7883 
7884     clear_msacsr_cause(env);
7885 
7886     switch (df) {
7887     case DF_WORD:
7888         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7889             MSA_FLOAT_UNOP(pwx->w[i], round_to_int, pws->w[i], 32);
7890         }
7891         break;
7892     case DF_DOUBLE:
7893         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7894             MSA_FLOAT_UNOP(pwx->d[i], round_to_int, pws->d[i], 64);
7895         }
7896         break;
7897     default:
7898         g_assert_not_reached();
7899     }
7900 
7901     check_msacsr_cause(env, GETPC());
7902 
7903     msa_move_v(pwd, pwx);
7904 }
7905 
7906 #define MSA_FLOAT_LOGB(DEST, ARG, BITS)                                     \
7907     do {                                                                    \
7908         float_status *status = &env->active_tc.msa_fp_status;               \
7909         int c;                                                              \
7910                                                                             \
7911         set_float_exception_flags(0, status);                               \
7912         set_float_rounding_mode(float_round_down, status);                  \
7913         DEST = float ## BITS ## _ ## log2(ARG, status);                     \
7914         DEST = float ## BITS ## _ ## round_to_int(DEST, status);            \
7915         set_float_rounding_mode(ieee_rm[(env->active_tc.msacsr &            \
7916                                          MSACSR_RM_MASK) >> MSACSR_RM],     \
7917                                 status);                                    \
7918                                                                             \
7919         set_float_exception_flags(get_float_exception_flags(status) &       \
7920                                   (~float_flag_inexact),                    \
7921                                   status);                                  \
7922                                                                             \
7923         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7924                                                                             \
7925         if (get_enabled_exceptions(env, c)) {                               \
7926             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7927         }                                                                   \
7928     } while (0)
7929 
7930 void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7931                          uint32_t ws)
7932 {
7933     wr_t wx, *pwx = &wx;
7934     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7935     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7936     uint32_t i;
7937 
7938     clear_msacsr_cause(env);
7939 
7940     switch (df) {
7941     case DF_WORD:
7942         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7943             MSA_FLOAT_LOGB(pwx->w[i], pws->w[i], 32);
7944         }
7945         break;
7946     case DF_DOUBLE:
7947         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7948             MSA_FLOAT_LOGB(pwx->d[i], pws->d[i], 64);
7949         }
7950         break;
7951     default:
7952         g_assert_not_reached();
7953     }
7954 
7955     check_msacsr_cause(env, GETPC());
7956 
7957     msa_move_v(pwd, pwx);
7958 }
7959 
7960 void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7961                           uint32_t ws)
7962 {
7963     wr_t wx, *pwx = &wx;
7964     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7965     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7966     uint32_t i;
7967 
7968     clear_msacsr_cause(env);
7969 
7970     switch (df) {
7971     case DF_WORD:
7972         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7973             /*
7974              * Half precision floats come in two formats: standard
7975              * IEEE and "ARM" format.  The latter gains extra exponent
7976              * range by omitting the NaN/Inf encodings.
7977              */
7978             bool ieee = true;
7979 
7980             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32);
7981         }
7982         break;
7983     case DF_DOUBLE:
7984         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7985             MSA_FLOAT_UNOP(pwx->d[i], from_float32, Lw(pws, i), 64);
7986         }
7987         break;
7988     default:
7989         g_assert_not_reached();
7990     }
7991 
7992     check_msacsr_cause(env, GETPC());
7993     msa_move_v(pwd, pwx);
7994 }
7995 
7996 void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7997                           uint32_t ws)
7998 {
7999     wr_t wx, *pwx = &wx;
8000     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8001     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8002     uint32_t i;
8003 
8004     clear_msacsr_cause(env);
8005 
8006     switch (df) {
8007     case DF_WORD:
8008         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8009             /*
8010              * Half precision floats come in two formats: standard
8011              * IEEE and "ARM" format.  The latter gains extra exponent
8012              * range by omitting the NaN/Inf encodings.
8013              */
8014             bool ieee = true;
8015 
8016             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32);
8017         }
8018         break;
8019     case DF_DOUBLE:
8020         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8021             MSA_FLOAT_UNOP(pwx->d[i], from_float32, Rw(pws, i), 64);
8022         }
8023         break;
8024     default:
8025         g_assert_not_reached();
8026     }
8027 
8028     check_msacsr_cause(env, GETPC());
8029     msa_move_v(pwd, pwx);
8030 }
8031 
8032 void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8033                         uint32_t ws)
8034 {
8035     wr_t wx, *pwx = &wx;
8036     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8037     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8038     uint32_t i;
8039 
8040     switch (df) {
8041     case DF_WORD:
8042         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8043             MSA_FLOAT_UNOP(pwx->w[i], from_q16, Lh(pws, i), 32);
8044         }
8045         break;
8046     case DF_DOUBLE:
8047         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8048             MSA_FLOAT_UNOP(pwx->d[i], from_q32, Lw(pws, i), 64);
8049         }
8050         break;
8051     default:
8052         g_assert_not_reached();
8053     }
8054 
8055     msa_move_v(pwd, pwx);
8056 }
8057 
8058 void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8059                         uint32_t ws)
8060 {
8061     wr_t wx, *pwx = &wx;
8062     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8063     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8064     uint32_t i;
8065 
8066     switch (df) {
8067     case DF_WORD:
8068         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8069             MSA_FLOAT_UNOP(pwx->w[i], from_q16, Rh(pws, i), 32);
8070         }
8071         break;
8072     case DF_DOUBLE:
8073         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8074             MSA_FLOAT_UNOP(pwx->d[i], from_q32, Rw(pws, i), 64);
8075         }
8076         break;
8077     default:
8078         g_assert_not_reached();
8079     }
8080 
8081     msa_move_v(pwd, pwx);
8082 }
8083 
8084 void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8085                            uint32_t ws)
8086 {
8087     wr_t wx, *pwx = &wx;
8088     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8089     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8090     uint32_t i;
8091 
8092     clear_msacsr_cause(env);
8093 
8094     switch (df) {
8095     case DF_WORD:
8096         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8097             MSA_FLOAT_UNOP0(pwx->w[i], to_int32, pws->w[i], 32);
8098         }
8099         break;
8100     case DF_DOUBLE:
8101         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8102             MSA_FLOAT_UNOP0(pwx->d[i], to_int64, pws->d[i], 64);
8103         }
8104         break;
8105     default:
8106         g_assert_not_reached();
8107     }
8108 
8109     check_msacsr_cause(env, GETPC());
8110 
8111     msa_move_v(pwd, pwx);
8112 }
8113 
8114 void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8115                            uint32_t ws)
8116 {
8117     wr_t wx, *pwx = &wx;
8118     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8119     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8120     uint32_t i;
8121 
8122     clear_msacsr_cause(env);
8123 
8124     switch (df) {
8125     case DF_WORD:
8126         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8127             MSA_FLOAT_UNOP0(pwx->w[i], to_uint32, pws->w[i], 32);
8128         }
8129         break;
8130     case DF_DOUBLE:
8131         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8132             MSA_FLOAT_UNOP0(pwx->d[i], to_uint64, pws->d[i], 64);
8133         }
8134         break;
8135     default:
8136         g_assert_not_reached();
8137     }
8138 
8139     check_msacsr_cause(env, GETPC());
8140 
8141     msa_move_v(pwd, pwx);
8142 }
8143 
8144 #define float32_from_int32 int32_to_float32
8145 #define float32_from_uint32 uint32_to_float32
8146 
8147 #define float64_from_int64 int64_to_float64
8148 #define float64_from_uint64 uint64_to_float64
8149 
8150 void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8151                            uint32_t ws)
8152 {
8153     wr_t wx, *pwx = &wx;
8154     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8155     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8156     uint32_t i;
8157 
8158     clear_msacsr_cause(env);
8159 
8160     switch (df) {
8161     case DF_WORD:
8162         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8163             MSA_FLOAT_UNOP(pwx->w[i], from_int32, pws->w[i], 32);
8164         }
8165         break;
8166     case DF_DOUBLE:
8167         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8168             MSA_FLOAT_UNOP(pwx->d[i], from_int64, pws->d[i], 64);
8169         }
8170         break;
8171     default:
8172         g_assert_not_reached();
8173     }
8174 
8175     check_msacsr_cause(env, GETPC());
8176 
8177     msa_move_v(pwd, pwx);
8178 }
8179 
8180 void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8181                            uint32_t ws)
8182 {
8183     wr_t wx, *pwx = &wx;
8184     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8185     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8186     uint32_t i;
8187 
8188     clear_msacsr_cause(env);
8189 
8190     switch (df) {
8191     case DF_WORD:
8192         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8193             MSA_FLOAT_UNOP(pwx->w[i], from_uint32, pws->w[i], 32);
8194         }
8195         break;
8196     case DF_DOUBLE:
8197         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8198             MSA_FLOAT_UNOP(pwx->d[i], from_uint64, pws->d[i], 64);
8199         }
8200         break;
8201     default:
8202         g_assert_not_reached();
8203     }
8204 
8205     check_msacsr_cause(env, GETPC());
8206 
8207     msa_move_v(pwd, pwx);
8208 }
8209 
8210 /* Data format min and max values */
8211 #define DF_BITS(df) (1 << ((df) + 3))
8212 
8213 /* Element-by-element access macros */
8214 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
8215 
8216 static inline uint64_t bswap16x4(uint64_t x)
8217 {
8218     uint64_t m = 0x00ff00ff00ff00ffull;
8219     return ((x & m) << 8) | ((x >> 8) & m);
8220 }
8221 
8222 static inline uint64_t bswap32x2(uint64_t x)
8223 {
8224     return ror64(bswap64(x), 32);
8225 }
8226 
8227 void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd,
8228                      target_ulong addr)
8229 {
8230     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8231     uintptr_t ra = GETPC();
8232     uint64_t d0, d1;
8233 
8234     /* Load 8 bytes at a time.  Vector element ordering makes this LE.  */
8235     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8236     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8237     pwd->d[0] = d0;
8238     pwd->d[1] = d1;
8239 }
8240 
8241 void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
8242                      target_ulong addr)
8243 {
8244     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8245     uintptr_t ra = GETPC();
8246     uint64_t d0, d1;
8247 
8248     /*
8249      * Load 8 bytes at a time.  Use little-endian load, then for
8250      * big-endian target, we must then swap the four halfwords.
8251      */
8252     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8253     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8254     if (mips_env_is_bigendian(env)) {
8255         d0 = bswap16x4(d0);
8256         d1 = bswap16x4(d1);
8257     }
8258     pwd->d[0] = d0;
8259     pwd->d[1] = d1;
8260 }
8261 
8262 void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
8263                      target_ulong addr)
8264 {
8265     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8266     uintptr_t ra = GETPC();
8267     uint64_t d0, d1;
8268 
8269     /*
8270      * Load 8 bytes at a time.  Use little-endian load, then for
8271      * big-endian target, we must then bswap the two words.
8272      */
8273     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8274     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8275     if (mips_env_is_bigendian(env)) {
8276         d0 = bswap32x2(d0);
8277         d1 = bswap32x2(d1);
8278     }
8279     pwd->d[0] = d0;
8280     pwd->d[1] = d1;
8281 }
8282 
8283 void helper_msa_ld_d(CPUMIPSState *env, uint32_t wd,
8284                      target_ulong addr)
8285 {
8286     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8287     uintptr_t ra = GETPC();
8288     uint64_t d0, d1;
8289 
8290     d0 = cpu_ldq_data_ra(env, addr + 0, ra);
8291     d1 = cpu_ldq_data_ra(env, addr + 8, ra);
8292     pwd->d[0] = d0;
8293     pwd->d[1] = d1;
8294 }
8295 
8296 #define MSA_PAGESPAN(x) \
8297         ((((x) & ~TARGET_PAGE_MASK) + MSA_WRLEN / 8 - 1) >= TARGET_PAGE_SIZE)
8298 
8299 static inline void ensure_writable_pages(CPUMIPSState *env,
8300                                          target_ulong addr,
8301                                          int mmu_idx,
8302                                          uintptr_t retaddr)
8303 {
8304     /* FIXME: Probe the actual accesses (pass and use a size) */
8305     if (unlikely(MSA_PAGESPAN(addr))) {
8306         /* first page */
8307         probe_write(env, addr, 0, mmu_idx, retaddr);
8308         /* second page */
8309         addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
8310         probe_write(env, addr, 0, mmu_idx, retaddr);
8311     }
8312 }
8313 
8314 void helper_msa_st_b(CPUMIPSState *env, uint32_t wd,
8315                      target_ulong addr)
8316 {
8317     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8318     int mmu_idx = mips_env_mmu_index(env);
8319     uintptr_t ra = GETPC();
8320 
8321     ensure_writable_pages(env, addr, mmu_idx, ra);
8322 
8323     /* Store 8 bytes at a time.  Vector element ordering makes this LE.  */
8324     cpu_stq_le_data_ra(env, addr + 0, pwd->d[0], ra);
8325     cpu_stq_le_data_ra(env, addr + 8, pwd->d[1], ra);
8326 }
8327 
8328 void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
8329                      target_ulong addr)
8330 {
8331     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8332     int mmu_idx = mips_env_mmu_index(env);
8333     uintptr_t ra = GETPC();
8334     uint64_t d0, d1;
8335 
8336     ensure_writable_pages(env, addr, mmu_idx, ra);
8337 
8338     /* Store 8 bytes at a time.  See helper_msa_ld_h. */
8339     d0 = pwd->d[0];
8340     d1 = pwd->d[1];
8341     if (mips_env_is_bigendian(env)) {
8342         d0 = bswap16x4(d0);
8343         d1 = bswap16x4(d1);
8344     }
8345     cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8346     cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8347 }
8348 
8349 void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
8350                      target_ulong addr)
8351 {
8352     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8353     int mmu_idx = mips_env_mmu_index(env);
8354     uintptr_t ra = GETPC();
8355     uint64_t d0, d1;
8356 
8357     ensure_writable_pages(env, addr, mmu_idx, ra);
8358 
8359     /* Store 8 bytes at a time.  See helper_msa_ld_w. */
8360     d0 = pwd->d[0];
8361     d1 = pwd->d[1];
8362     if (mips_env_is_bigendian(env)) {
8363         d0 = bswap32x2(d0);
8364         d1 = bswap32x2(d1);
8365     }
8366     cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8367     cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8368 }
8369 
8370 void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
8371                      target_ulong addr)
8372 {
8373     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8374     int mmu_idx = mips_env_mmu_index(env);
8375     uintptr_t ra = GETPC();
8376 
8377     ensure_writable_pages(env, addr, mmu_idx, GETPC());
8378 
8379     cpu_stq_data_ra(env, addr + 0, pwd->d[0], ra);
8380     cpu_stq_data_ra(env, addr + 8, pwd->d[1], ra);
8381 }
8382