xref: /openbmc/qemu/target/mips/tcg/msa_helper.c (revision 1f97715c8390e582f154d8b579c70779bd8c9bdf)
1  /*
2   * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
3   *
4   * Copyright (c) 2014 Imagination Technologies
5   *
6   * This library is free software; you can redistribute it and/or
7   * modify it under the terms of the GNU Lesser General Public
8   * License as published by the Free Software Foundation; either
9   * version 2.1 of the License, or (at your option) any later version.
10   *
11   * This library is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   * Lesser General Public License for more details.
15   *
16   * You should have received a copy of the GNU Lesser General Public
17   * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18   */
19  
20  #include "qemu/osdep.h"
21  #include "cpu.h"
22  #include "internal.h"
23  #include "tcg/tcg.h"
24  #include "exec/exec-all.h"
25  #include "exec/cpu_ldst.h"
26  #include "exec/helper-proto.h"
27  #include "exec/memop.h"
28  #include "fpu/softfloat.h"
29  #include "fpu_helper.h"
30  
31  /* Data format min and max values */
32  #define DF_BITS(df) (1 << ((df) + 3))
33  
34  #define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
35  #define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
36  
37  #define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
38  #define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
39  
40  #define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
41  #define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
42  
43  #define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
44  #define SIGNED(x, df)                                                   \
45      ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
46  
47  /* Element-by-element access macros */
48  #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
49  
50  
51  
52  /*
53   * Bit Count
54   * ---------
55   *
56   * +---------------+----------------------------------------------------------+
57   * | NLOC.B        | Vector Leading Ones Count (byte)                         |
58   * | NLOC.H        | Vector Leading Ones Count (halfword)                     |
59   * | NLOC.W        | Vector Leading Ones Count (word)                         |
60   * | NLOC.D        | Vector Leading Ones Count (doubleword)                   |
61   * | NLZC.B        | Vector Leading Zeros Count (byte)                        |
62   * | NLZC.H        | Vector Leading Zeros Count (halfword)                    |
63   * | NLZC.W        | Vector Leading Zeros Count (word)                        |
64   * | NLZC.D        | Vector Leading Zeros Count (doubleword)                  |
65   * | PCNT.B        | Vector Population Count (byte)                           |
66   * | PCNT.H        | Vector Population Count (halfword)                       |
67   * | PCNT.W        | Vector Population Count (word)                           |
68   * | PCNT.D        | Vector Population Count (doubleword)                     |
69   * +---------------+----------------------------------------------------------+
70   */
71  
72  static inline int64_t msa_nlzc_df(uint32_t df, int64_t arg)
73  {
74      uint64_t x, y;
75      int n, c;
76  
77      x = UNSIGNED(arg, df);
78      n = DF_BITS(df);
79      c = DF_BITS(df) / 2;
80  
81      do {
82          y = x >> c;
83          if (y != 0) {
84              n = n - c;
85              x = y;
86          }
87          c = c >> 1;
88      } while (c != 0);
89  
90      return n - x;
91  }
92  
93  static inline int64_t msa_nloc_df(uint32_t df, int64_t arg)
94  {
95      return msa_nlzc_df(df, UNSIGNED((~arg), df));
96  }
97  
98  void helper_msa_nloc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
99  {
100      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
101      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
102  
103      pwd->b[0]  = msa_nloc_df(DF_BYTE, pws->b[0]);
104      pwd->b[1]  = msa_nloc_df(DF_BYTE, pws->b[1]);
105      pwd->b[2]  = msa_nloc_df(DF_BYTE, pws->b[2]);
106      pwd->b[3]  = msa_nloc_df(DF_BYTE, pws->b[3]);
107      pwd->b[4]  = msa_nloc_df(DF_BYTE, pws->b[4]);
108      pwd->b[5]  = msa_nloc_df(DF_BYTE, pws->b[5]);
109      pwd->b[6]  = msa_nloc_df(DF_BYTE, pws->b[6]);
110      pwd->b[7]  = msa_nloc_df(DF_BYTE, pws->b[7]);
111      pwd->b[8]  = msa_nloc_df(DF_BYTE, pws->b[8]);
112      pwd->b[9]  = msa_nloc_df(DF_BYTE, pws->b[9]);
113      pwd->b[10] = msa_nloc_df(DF_BYTE, pws->b[10]);
114      pwd->b[11] = msa_nloc_df(DF_BYTE, pws->b[11]);
115      pwd->b[12] = msa_nloc_df(DF_BYTE, pws->b[12]);
116      pwd->b[13] = msa_nloc_df(DF_BYTE, pws->b[13]);
117      pwd->b[14] = msa_nloc_df(DF_BYTE, pws->b[14]);
118      pwd->b[15] = msa_nloc_df(DF_BYTE, pws->b[15]);
119  }
120  
121  void helper_msa_nloc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
122  {
123      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
124      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
125  
126      pwd->h[0]  = msa_nloc_df(DF_HALF, pws->h[0]);
127      pwd->h[1]  = msa_nloc_df(DF_HALF, pws->h[1]);
128      pwd->h[2]  = msa_nloc_df(DF_HALF, pws->h[2]);
129      pwd->h[3]  = msa_nloc_df(DF_HALF, pws->h[3]);
130      pwd->h[4]  = msa_nloc_df(DF_HALF, pws->h[4]);
131      pwd->h[5]  = msa_nloc_df(DF_HALF, pws->h[5]);
132      pwd->h[6]  = msa_nloc_df(DF_HALF, pws->h[6]);
133      pwd->h[7]  = msa_nloc_df(DF_HALF, pws->h[7]);
134  }
135  
136  void helper_msa_nloc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
137  {
138      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
139      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
140  
141      pwd->w[0]  = msa_nloc_df(DF_WORD, pws->w[0]);
142      pwd->w[1]  = msa_nloc_df(DF_WORD, pws->w[1]);
143      pwd->w[2]  = msa_nloc_df(DF_WORD, pws->w[2]);
144      pwd->w[3]  = msa_nloc_df(DF_WORD, pws->w[3]);
145  }
146  
147  void helper_msa_nloc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
148  {
149      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
150      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
151  
152      pwd->d[0]  = msa_nloc_df(DF_DOUBLE, pws->d[0]);
153      pwd->d[1]  = msa_nloc_df(DF_DOUBLE, pws->d[1]);
154  }
155  
156  void helper_msa_nlzc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
157  {
158      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
159      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
160  
161      pwd->b[0]  = msa_nlzc_df(DF_BYTE, pws->b[0]);
162      pwd->b[1]  = msa_nlzc_df(DF_BYTE, pws->b[1]);
163      pwd->b[2]  = msa_nlzc_df(DF_BYTE, pws->b[2]);
164      pwd->b[3]  = msa_nlzc_df(DF_BYTE, pws->b[3]);
165      pwd->b[4]  = msa_nlzc_df(DF_BYTE, pws->b[4]);
166      pwd->b[5]  = msa_nlzc_df(DF_BYTE, pws->b[5]);
167      pwd->b[6]  = msa_nlzc_df(DF_BYTE, pws->b[6]);
168      pwd->b[7]  = msa_nlzc_df(DF_BYTE, pws->b[7]);
169      pwd->b[8]  = msa_nlzc_df(DF_BYTE, pws->b[8]);
170      pwd->b[9]  = msa_nlzc_df(DF_BYTE, pws->b[9]);
171      pwd->b[10] = msa_nlzc_df(DF_BYTE, pws->b[10]);
172      pwd->b[11] = msa_nlzc_df(DF_BYTE, pws->b[11]);
173      pwd->b[12] = msa_nlzc_df(DF_BYTE, pws->b[12]);
174      pwd->b[13] = msa_nlzc_df(DF_BYTE, pws->b[13]);
175      pwd->b[14] = msa_nlzc_df(DF_BYTE, pws->b[14]);
176      pwd->b[15] = msa_nlzc_df(DF_BYTE, pws->b[15]);
177  }
178  
179  void helper_msa_nlzc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
180  {
181      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
182      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
183  
184      pwd->h[0]  = msa_nlzc_df(DF_HALF, pws->h[0]);
185      pwd->h[1]  = msa_nlzc_df(DF_HALF, pws->h[1]);
186      pwd->h[2]  = msa_nlzc_df(DF_HALF, pws->h[2]);
187      pwd->h[3]  = msa_nlzc_df(DF_HALF, pws->h[3]);
188      pwd->h[4]  = msa_nlzc_df(DF_HALF, pws->h[4]);
189      pwd->h[5]  = msa_nlzc_df(DF_HALF, pws->h[5]);
190      pwd->h[6]  = msa_nlzc_df(DF_HALF, pws->h[6]);
191      pwd->h[7]  = msa_nlzc_df(DF_HALF, pws->h[7]);
192  }
193  
194  void helper_msa_nlzc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
195  {
196      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
197      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
198  
199      pwd->w[0]  = msa_nlzc_df(DF_WORD, pws->w[0]);
200      pwd->w[1]  = msa_nlzc_df(DF_WORD, pws->w[1]);
201      pwd->w[2]  = msa_nlzc_df(DF_WORD, pws->w[2]);
202      pwd->w[3]  = msa_nlzc_df(DF_WORD, pws->w[3]);
203  }
204  
205  void helper_msa_nlzc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
206  {
207      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
208      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
209  
210      pwd->d[0]  = msa_nlzc_df(DF_DOUBLE, pws->d[0]);
211      pwd->d[1]  = msa_nlzc_df(DF_DOUBLE, pws->d[1]);
212  }
213  
214  static inline int64_t msa_pcnt_df(uint32_t df, int64_t arg)
215  {
216      uint64_t x;
217  
218      x = UNSIGNED(arg, df);
219  
220      x = (x & 0x5555555555555555ULL) + ((x >>  1) & 0x5555555555555555ULL);
221      x = (x & 0x3333333333333333ULL) + ((x >>  2) & 0x3333333333333333ULL);
222      x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >>  4) & 0x0F0F0F0F0F0F0F0FULL);
223      x = (x & 0x00FF00FF00FF00FFULL) + ((x >>  8) & 0x00FF00FF00FF00FFULL);
224      x = (x & 0x0000FFFF0000FFFFULL) + ((x >> 16) & 0x0000FFFF0000FFFFULL);
225      x = (x & 0x00000000FFFFFFFFULL) + ((x >> 32));
226  
227      return x;
228  }
229  
230  void helper_msa_pcnt_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
231  {
232      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
233      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
234  
235      pwd->b[0]  = msa_pcnt_df(DF_BYTE, pws->b[0]);
236      pwd->b[1]  = msa_pcnt_df(DF_BYTE, pws->b[1]);
237      pwd->b[2]  = msa_pcnt_df(DF_BYTE, pws->b[2]);
238      pwd->b[3]  = msa_pcnt_df(DF_BYTE, pws->b[3]);
239      pwd->b[4]  = msa_pcnt_df(DF_BYTE, pws->b[4]);
240      pwd->b[5]  = msa_pcnt_df(DF_BYTE, pws->b[5]);
241      pwd->b[6]  = msa_pcnt_df(DF_BYTE, pws->b[6]);
242      pwd->b[7]  = msa_pcnt_df(DF_BYTE, pws->b[7]);
243      pwd->b[8]  = msa_pcnt_df(DF_BYTE, pws->b[8]);
244      pwd->b[9]  = msa_pcnt_df(DF_BYTE, pws->b[9]);
245      pwd->b[10] = msa_pcnt_df(DF_BYTE, pws->b[10]);
246      pwd->b[11] = msa_pcnt_df(DF_BYTE, pws->b[11]);
247      pwd->b[12] = msa_pcnt_df(DF_BYTE, pws->b[12]);
248      pwd->b[13] = msa_pcnt_df(DF_BYTE, pws->b[13]);
249      pwd->b[14] = msa_pcnt_df(DF_BYTE, pws->b[14]);
250      pwd->b[15] = msa_pcnt_df(DF_BYTE, pws->b[15]);
251  }
252  
253  void helper_msa_pcnt_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
254  {
255      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
256      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
257  
258      pwd->h[0]  = msa_pcnt_df(DF_HALF, pws->h[0]);
259      pwd->h[1]  = msa_pcnt_df(DF_HALF, pws->h[1]);
260      pwd->h[2]  = msa_pcnt_df(DF_HALF, pws->h[2]);
261      pwd->h[3]  = msa_pcnt_df(DF_HALF, pws->h[3]);
262      pwd->h[4]  = msa_pcnt_df(DF_HALF, pws->h[4]);
263      pwd->h[5]  = msa_pcnt_df(DF_HALF, pws->h[5]);
264      pwd->h[6]  = msa_pcnt_df(DF_HALF, pws->h[6]);
265      pwd->h[7]  = msa_pcnt_df(DF_HALF, pws->h[7]);
266  }
267  
268  void helper_msa_pcnt_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
269  {
270      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
271      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
272  
273      pwd->w[0]  = msa_pcnt_df(DF_WORD, pws->w[0]);
274      pwd->w[1]  = msa_pcnt_df(DF_WORD, pws->w[1]);
275      pwd->w[2]  = msa_pcnt_df(DF_WORD, pws->w[2]);
276      pwd->w[3]  = msa_pcnt_df(DF_WORD, pws->w[3]);
277  }
278  
279  void helper_msa_pcnt_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
280  {
281      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
282      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
283  
284      pwd->d[0]  = msa_pcnt_df(DF_DOUBLE, pws->d[0]);
285      pwd->d[1]  = msa_pcnt_df(DF_DOUBLE, pws->d[1]);
286  }
287  
288  
289  /*
290   * Bit Move
291   * --------
292   *
293   * +---------------+----------------------------------------------------------+
294   * | BINSL.B       | Vector Bit Insert Left (byte)                            |
295   * | BINSL.H       | Vector Bit Insert Left (halfword)                        |
296   * | BINSL.W       | Vector Bit Insert Left (word)                            |
297   * | BINSL.D       | Vector Bit Insert Left (doubleword)                      |
298   * | BINSR.B       | Vector Bit Insert Right (byte)                           |
299   * | BINSR.H       | Vector Bit Insert Right (halfword)                       |
300   * | BINSR.W       | Vector Bit Insert Right (word)                           |
301   * | BINSR.D       | Vector Bit Insert Right (doubleword)                     |
302   * | BMNZ.V        | Vector Bit Move If Not Zero                              |
303   * | BMZ.V         | Vector Bit Move If Zero                                  |
304   * | BSEL.V        | Vector Bit Select                                        |
305   * +---------------+----------------------------------------------------------+
306   */
307  
308  /* Data format bit position and unsigned values */
309  #define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
310  
311  static inline int64_t msa_binsl_df(uint32_t df,
312                                     int64_t dest, int64_t arg1, int64_t arg2)
313  {
314      uint64_t u_arg1 = UNSIGNED(arg1, df);
315      uint64_t u_dest = UNSIGNED(dest, df);
316      int32_t sh_d = BIT_POSITION(arg2, df) + 1;
317      int32_t sh_a = DF_BITS(df) - sh_d;
318      if (sh_d == DF_BITS(df)) {
319          return u_arg1;
320      } else {
321          return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
322                 UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
323      }
324  }
325  
326  void helper_msa_binsl_b(CPUMIPSState *env,
327                          uint32_t wd, uint32_t ws, uint32_t wt)
328  {
329      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
330      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
331      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
332  
333      pwd->b[0]  = msa_binsl_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
334      pwd->b[1]  = msa_binsl_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
335      pwd->b[2]  = msa_binsl_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
336      pwd->b[3]  = msa_binsl_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
337      pwd->b[4]  = msa_binsl_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
338      pwd->b[5]  = msa_binsl_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
339      pwd->b[6]  = msa_binsl_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
340      pwd->b[7]  = msa_binsl_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
341      pwd->b[8]  = msa_binsl_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
342      pwd->b[9]  = msa_binsl_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
343      pwd->b[10] = msa_binsl_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
344      pwd->b[11] = msa_binsl_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
345      pwd->b[12] = msa_binsl_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
346      pwd->b[13] = msa_binsl_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
347      pwd->b[14] = msa_binsl_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
348      pwd->b[15] = msa_binsl_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
349  }
350  
351  void helper_msa_binsl_h(CPUMIPSState *env,
352                          uint32_t wd, uint32_t ws, uint32_t wt)
353  {
354      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
355      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
356      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
357  
358      pwd->h[0]  = msa_binsl_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
359      pwd->h[1]  = msa_binsl_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
360      pwd->h[2]  = msa_binsl_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
361      pwd->h[3]  = msa_binsl_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
362      pwd->h[4]  = msa_binsl_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
363      pwd->h[5]  = msa_binsl_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
364      pwd->h[6]  = msa_binsl_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
365      pwd->h[7]  = msa_binsl_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
366  }
367  
368  void helper_msa_binsl_w(CPUMIPSState *env,
369                          uint32_t wd, uint32_t ws, uint32_t wt)
370  {
371      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
372      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
373      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
374  
375      pwd->w[0]  = msa_binsl_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
376      pwd->w[1]  = msa_binsl_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
377      pwd->w[2]  = msa_binsl_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
378      pwd->w[3]  = msa_binsl_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
379  }
380  
381  void helper_msa_binsl_d(CPUMIPSState *env,
382                          uint32_t wd, uint32_t ws, uint32_t wt)
383  {
384      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
385      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
386      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
387  
388      pwd->d[0]  = msa_binsl_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
389      pwd->d[1]  = msa_binsl_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
390  }
391  
392  static inline int64_t msa_binsr_df(uint32_t df,
393                                     int64_t dest, int64_t arg1, int64_t arg2)
394  {
395      uint64_t u_arg1 = UNSIGNED(arg1, df);
396      uint64_t u_dest = UNSIGNED(dest, df);
397      int32_t sh_d = BIT_POSITION(arg2, df) + 1;
398      int32_t sh_a = DF_BITS(df) - sh_d;
399      if (sh_d == DF_BITS(df)) {
400          return u_arg1;
401      } else {
402          return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
403                 UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
404      }
405  }
406  
407  void helper_msa_binsr_b(CPUMIPSState *env,
408                          uint32_t wd, uint32_t ws, uint32_t wt)
409  {
410      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
411      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
412      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
413  
414      pwd->b[0]  = msa_binsr_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
415      pwd->b[1]  = msa_binsr_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
416      pwd->b[2]  = msa_binsr_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
417      pwd->b[3]  = msa_binsr_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
418      pwd->b[4]  = msa_binsr_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
419      pwd->b[5]  = msa_binsr_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
420      pwd->b[6]  = msa_binsr_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
421      pwd->b[7]  = msa_binsr_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
422      pwd->b[8]  = msa_binsr_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
423      pwd->b[9]  = msa_binsr_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
424      pwd->b[10] = msa_binsr_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
425      pwd->b[11] = msa_binsr_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
426      pwd->b[12] = msa_binsr_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
427      pwd->b[13] = msa_binsr_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
428      pwd->b[14] = msa_binsr_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
429      pwd->b[15] = msa_binsr_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
430  }
431  
432  void helper_msa_binsr_h(CPUMIPSState *env,
433                          uint32_t wd, uint32_t ws, uint32_t wt)
434  {
435      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
436      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
437      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
438  
439      pwd->h[0]  = msa_binsr_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
440      pwd->h[1]  = msa_binsr_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
441      pwd->h[2]  = msa_binsr_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
442      pwd->h[3]  = msa_binsr_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
443      pwd->h[4]  = msa_binsr_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
444      pwd->h[5]  = msa_binsr_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
445      pwd->h[6]  = msa_binsr_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
446      pwd->h[7]  = msa_binsr_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
447  }
448  
449  void helper_msa_binsr_w(CPUMIPSState *env,
450                          uint32_t wd, uint32_t ws, uint32_t wt)
451  {
452      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
453      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
454      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
455  
456      pwd->w[0]  = msa_binsr_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
457      pwd->w[1]  = msa_binsr_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
458      pwd->w[2]  = msa_binsr_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
459      pwd->w[3]  = msa_binsr_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
460  }
461  
462  void helper_msa_binsr_d(CPUMIPSState *env,
463                          uint32_t wd, uint32_t ws, uint32_t wt)
464  {
465      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
466      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
467      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
468  
469      pwd->d[0]  = msa_binsr_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
470      pwd->d[1]  = msa_binsr_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
471  }
472  
473  void helper_msa_bmnz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
474  {
475      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
476      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
477      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
478  
479      pwd->d[0] = UNSIGNED(                                                     \
480          ((pwd->d[0] & (~pwt->d[0])) | (pws->d[0] & pwt->d[0])), DF_DOUBLE);
481      pwd->d[1] = UNSIGNED(                                                     \
482          ((pwd->d[1] & (~pwt->d[1])) | (pws->d[1] & pwt->d[1])), DF_DOUBLE);
483  }
484  
485  void helper_msa_bmz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
486  {
487      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
488      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
489      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
490  
491      pwd->d[0] = UNSIGNED(                                                     \
492          ((pwd->d[0] & pwt->d[0]) | (pws->d[0] & (~pwt->d[0]))), DF_DOUBLE);
493      pwd->d[1] = UNSIGNED(                                                     \
494          ((pwd->d[1] & pwt->d[1]) | (pws->d[1] & (~pwt->d[1]))), DF_DOUBLE);
495  }
496  
497  void helper_msa_bsel_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
498  {
499      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
500      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
501      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
502  
503      pwd->d[0] = UNSIGNED(                                                     \
504          (pws->d[0] & (~pwd->d[0])) | (pwt->d[0] & pwd->d[0]), DF_DOUBLE);
505      pwd->d[1] = UNSIGNED(                                                     \
506          (pws->d[1] & (~pwd->d[1])) | (pwt->d[1] & pwd->d[1]), DF_DOUBLE);
507  }
508  
509  
510  /*
511   * Bit Set
512   * -------
513   *
514   * +---------------+----------------------------------------------------------+
515   * | BCLR.B        | Vector Bit Clear (byte)                                  |
516   * | BCLR.H        | Vector Bit Clear (halfword)                              |
517   * | BCLR.W        | Vector Bit Clear (word)                                  |
518   * | BCLR.D        | Vector Bit Clear (doubleword)                            |
519   * | BNEG.B        | Vector Bit Negate (byte)                                 |
520   * | BNEG.H        | Vector Bit Negate (halfword)                             |
521   * | BNEG.W        | Vector Bit Negate (word)                                 |
522   * | BNEG.D        | Vector Bit Negate (doubleword)                           |
523   * | BSET.B        | Vector Bit Set (byte)                                    |
524   * | BSET.H        | Vector Bit Set (halfword)                                |
525   * | BSET.W        | Vector Bit Set (word)                                    |
526   * | BSET.D        | Vector Bit Set (doubleword)                              |
527   * +---------------+----------------------------------------------------------+
528   */
529  
530  static inline int64_t msa_bclr_df(uint32_t df, int64_t arg1, int64_t arg2)
531  {
532      int32_t b_arg2 = BIT_POSITION(arg2, df);
533      return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
534  }
535  
536  void helper_msa_bclr_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
537  {
538      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
539      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
540      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
541  
542      pwd->b[0]  = msa_bclr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
543      pwd->b[1]  = msa_bclr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
544      pwd->b[2]  = msa_bclr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
545      pwd->b[3]  = msa_bclr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
546      pwd->b[4]  = msa_bclr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
547      pwd->b[5]  = msa_bclr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
548      pwd->b[6]  = msa_bclr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
549      pwd->b[7]  = msa_bclr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
550      pwd->b[8]  = msa_bclr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
551      pwd->b[9]  = msa_bclr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
552      pwd->b[10] = msa_bclr_df(DF_BYTE, pws->b[10], pwt->b[10]);
553      pwd->b[11] = msa_bclr_df(DF_BYTE, pws->b[11], pwt->b[11]);
554      pwd->b[12] = msa_bclr_df(DF_BYTE, pws->b[12], pwt->b[12]);
555      pwd->b[13] = msa_bclr_df(DF_BYTE, pws->b[13], pwt->b[13]);
556      pwd->b[14] = msa_bclr_df(DF_BYTE, pws->b[14], pwt->b[14]);
557      pwd->b[15] = msa_bclr_df(DF_BYTE, pws->b[15], pwt->b[15]);
558  }
559  
560  void helper_msa_bclr_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
561  {
562      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
563      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
564      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
565  
566      pwd->h[0]  = msa_bclr_df(DF_HALF, pws->h[0],  pwt->h[0]);
567      pwd->h[1]  = msa_bclr_df(DF_HALF, pws->h[1],  pwt->h[1]);
568      pwd->h[2]  = msa_bclr_df(DF_HALF, pws->h[2],  pwt->h[2]);
569      pwd->h[3]  = msa_bclr_df(DF_HALF, pws->h[3],  pwt->h[3]);
570      pwd->h[4]  = msa_bclr_df(DF_HALF, pws->h[4],  pwt->h[4]);
571      pwd->h[5]  = msa_bclr_df(DF_HALF, pws->h[5],  pwt->h[5]);
572      pwd->h[6]  = msa_bclr_df(DF_HALF, pws->h[6],  pwt->h[6]);
573      pwd->h[7]  = msa_bclr_df(DF_HALF, pws->h[7],  pwt->h[7]);
574  }
575  
576  void helper_msa_bclr_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
577  {
578      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
579      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
580      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
581  
582      pwd->w[0]  = msa_bclr_df(DF_WORD, pws->w[0],  pwt->w[0]);
583      pwd->w[1]  = msa_bclr_df(DF_WORD, pws->w[1],  pwt->w[1]);
584      pwd->w[2]  = msa_bclr_df(DF_WORD, pws->w[2],  pwt->w[2]);
585      pwd->w[3]  = msa_bclr_df(DF_WORD, pws->w[3],  pwt->w[3]);
586  }
587  
588  void helper_msa_bclr_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
589  {
590      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
591      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
592      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
593  
594      pwd->d[0]  = msa_bclr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
595      pwd->d[1]  = msa_bclr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
596  }
597  
598  static inline int64_t msa_bneg_df(uint32_t df, int64_t arg1, int64_t arg2)
599  {
600      int32_t b_arg2 = BIT_POSITION(arg2, df);
601      return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
602  }
603  
604  void helper_msa_bneg_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
605  {
606      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
607      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
608      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
609  
610      pwd->b[0]  = msa_bneg_df(DF_BYTE, pws->b[0],  pwt->b[0]);
611      pwd->b[1]  = msa_bneg_df(DF_BYTE, pws->b[1],  pwt->b[1]);
612      pwd->b[2]  = msa_bneg_df(DF_BYTE, pws->b[2],  pwt->b[2]);
613      pwd->b[3]  = msa_bneg_df(DF_BYTE, pws->b[3],  pwt->b[3]);
614      pwd->b[4]  = msa_bneg_df(DF_BYTE, pws->b[4],  pwt->b[4]);
615      pwd->b[5]  = msa_bneg_df(DF_BYTE, pws->b[5],  pwt->b[5]);
616      pwd->b[6]  = msa_bneg_df(DF_BYTE, pws->b[6],  pwt->b[6]);
617      pwd->b[7]  = msa_bneg_df(DF_BYTE, pws->b[7],  pwt->b[7]);
618      pwd->b[8]  = msa_bneg_df(DF_BYTE, pws->b[8],  pwt->b[8]);
619      pwd->b[9]  = msa_bneg_df(DF_BYTE, pws->b[9],  pwt->b[9]);
620      pwd->b[10] = msa_bneg_df(DF_BYTE, pws->b[10], pwt->b[10]);
621      pwd->b[11] = msa_bneg_df(DF_BYTE, pws->b[11], pwt->b[11]);
622      pwd->b[12] = msa_bneg_df(DF_BYTE, pws->b[12], pwt->b[12]);
623      pwd->b[13] = msa_bneg_df(DF_BYTE, pws->b[13], pwt->b[13]);
624      pwd->b[14] = msa_bneg_df(DF_BYTE, pws->b[14], pwt->b[14]);
625      pwd->b[15] = msa_bneg_df(DF_BYTE, pws->b[15], pwt->b[15]);
626  }
627  
628  void helper_msa_bneg_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
629  {
630      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
631      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
632      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
633  
634      pwd->h[0]  = msa_bneg_df(DF_HALF, pws->h[0],  pwt->h[0]);
635      pwd->h[1]  = msa_bneg_df(DF_HALF, pws->h[1],  pwt->h[1]);
636      pwd->h[2]  = msa_bneg_df(DF_HALF, pws->h[2],  pwt->h[2]);
637      pwd->h[3]  = msa_bneg_df(DF_HALF, pws->h[3],  pwt->h[3]);
638      pwd->h[4]  = msa_bneg_df(DF_HALF, pws->h[4],  pwt->h[4]);
639      pwd->h[5]  = msa_bneg_df(DF_HALF, pws->h[5],  pwt->h[5]);
640      pwd->h[6]  = msa_bneg_df(DF_HALF, pws->h[6],  pwt->h[6]);
641      pwd->h[7]  = msa_bneg_df(DF_HALF, pws->h[7],  pwt->h[7]);
642  }
643  
644  void helper_msa_bneg_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
645  {
646      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
647      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
648      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
649  
650      pwd->w[0]  = msa_bneg_df(DF_WORD, pws->w[0],  pwt->w[0]);
651      pwd->w[1]  = msa_bneg_df(DF_WORD, pws->w[1],  pwt->w[1]);
652      pwd->w[2]  = msa_bneg_df(DF_WORD, pws->w[2],  pwt->w[2]);
653      pwd->w[3]  = msa_bneg_df(DF_WORD, pws->w[3],  pwt->w[3]);
654  }
655  
656  void helper_msa_bneg_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
657  {
658      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
659      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
660      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
661  
662      pwd->d[0]  = msa_bneg_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
663      pwd->d[1]  = msa_bneg_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
664  }
665  
666  static inline int64_t msa_bset_df(uint32_t df, int64_t arg1,
667          int64_t arg2)
668  {
669      int32_t b_arg2 = BIT_POSITION(arg2, df);
670      return UNSIGNED(arg1 | (1LL << b_arg2), df);
671  }
672  
673  void helper_msa_bset_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
674  {
675      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
676      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
677      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
678  
679      pwd->b[0]  = msa_bset_df(DF_BYTE, pws->b[0],  pwt->b[0]);
680      pwd->b[1]  = msa_bset_df(DF_BYTE, pws->b[1],  pwt->b[1]);
681      pwd->b[2]  = msa_bset_df(DF_BYTE, pws->b[2],  pwt->b[2]);
682      pwd->b[3]  = msa_bset_df(DF_BYTE, pws->b[3],  pwt->b[3]);
683      pwd->b[4]  = msa_bset_df(DF_BYTE, pws->b[4],  pwt->b[4]);
684      pwd->b[5]  = msa_bset_df(DF_BYTE, pws->b[5],  pwt->b[5]);
685      pwd->b[6]  = msa_bset_df(DF_BYTE, pws->b[6],  pwt->b[6]);
686      pwd->b[7]  = msa_bset_df(DF_BYTE, pws->b[7],  pwt->b[7]);
687      pwd->b[8]  = msa_bset_df(DF_BYTE, pws->b[8],  pwt->b[8]);
688      pwd->b[9]  = msa_bset_df(DF_BYTE, pws->b[9],  pwt->b[9]);
689      pwd->b[10] = msa_bset_df(DF_BYTE, pws->b[10], pwt->b[10]);
690      pwd->b[11] = msa_bset_df(DF_BYTE, pws->b[11], pwt->b[11]);
691      pwd->b[12] = msa_bset_df(DF_BYTE, pws->b[12], pwt->b[12]);
692      pwd->b[13] = msa_bset_df(DF_BYTE, pws->b[13], pwt->b[13]);
693      pwd->b[14] = msa_bset_df(DF_BYTE, pws->b[14], pwt->b[14]);
694      pwd->b[15] = msa_bset_df(DF_BYTE, pws->b[15], pwt->b[15]);
695  }
696  
697  void helper_msa_bset_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
698  {
699      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
700      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
701      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
702  
703      pwd->h[0]  = msa_bset_df(DF_HALF, pws->h[0],  pwt->h[0]);
704      pwd->h[1]  = msa_bset_df(DF_HALF, pws->h[1],  pwt->h[1]);
705      pwd->h[2]  = msa_bset_df(DF_HALF, pws->h[2],  pwt->h[2]);
706      pwd->h[3]  = msa_bset_df(DF_HALF, pws->h[3],  pwt->h[3]);
707      pwd->h[4]  = msa_bset_df(DF_HALF, pws->h[4],  pwt->h[4]);
708      pwd->h[5]  = msa_bset_df(DF_HALF, pws->h[5],  pwt->h[5]);
709      pwd->h[6]  = msa_bset_df(DF_HALF, pws->h[6],  pwt->h[6]);
710      pwd->h[7]  = msa_bset_df(DF_HALF, pws->h[7],  pwt->h[7]);
711  }
712  
713  void helper_msa_bset_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
714  {
715      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
716      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
717      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
718  
719      pwd->w[0]  = msa_bset_df(DF_WORD, pws->w[0],  pwt->w[0]);
720      pwd->w[1]  = msa_bset_df(DF_WORD, pws->w[1],  pwt->w[1]);
721      pwd->w[2]  = msa_bset_df(DF_WORD, pws->w[2],  pwt->w[2]);
722      pwd->w[3]  = msa_bset_df(DF_WORD, pws->w[3],  pwt->w[3]);
723  }
724  
725  void helper_msa_bset_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
726  {
727      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
728      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
729      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
730  
731      pwd->d[0]  = msa_bset_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
732      pwd->d[1]  = msa_bset_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
733  }
734  
735  
736  /*
737   * Fixed Multiply
738   * --------------
739   *
740   * +---------------+----------------------------------------------------------+
741   * | MADD_Q.H      | Vector Fixed-Point Multiply and Add (halfword)           |
742   * | MADD_Q.W      | Vector Fixed-Point Multiply and Add (word)               |
743   * | MADDR_Q.H     | Vector Fixed-Point Multiply and Add Rounded (halfword)   |
744   * | MADDR_Q.W     | Vector Fixed-Point Multiply and Add Rounded (word)       |
745   * | MSUB_Q.H      | Vector Fixed-Point Multiply and Subtr. (halfword)        |
746   * | MSUB_Q.W      | Vector Fixed-Point Multiply and Subtr. (word)            |
747   * | MSUBR_Q.H     | Vector Fixed-Point Multiply and Subtr. Rounded (halfword)|
748   * | MSUBR_Q.W     | Vector Fixed-Point Multiply and Subtr. Rounded (word)    |
749   * | MUL_Q.H       | Vector Fixed-Point Multiply (halfword)                   |
750   * | MUL_Q.W       | Vector Fixed-Point Multiply (word)                       |
751   * | MULR_Q.H      | Vector Fixed-Point Multiply Rounded (halfword)           |
752   * | MULR_Q.W      | Vector Fixed-Point Multiply Rounded (word)               |
753   * +---------------+----------------------------------------------------------+
754   */
755  
756  /* TODO: insert Fixed Multiply group helpers here */
757  
758  
759  /*
760   * Float Max Min
761   * -------------
762   *
763   * +---------------+----------------------------------------------------------+
764   * | FMAX_A.W      | Vector Floating-Point Maximum (Absolute) (word)          |
765   * | FMAX_A.D      | Vector Floating-Point Maximum (Absolute) (doubleword)    |
766   * | FMAX.W        | Vector Floating-Point Maximum (word)                     |
767   * | FMAX.D        | Vector Floating-Point Maximum (doubleword)               |
768   * | FMIN_A.W      | Vector Floating-Point Minimum (Absolute) (word)          |
769   * | FMIN_A.D      | Vector Floating-Point Minimum (Absolute) (doubleword)    |
770   * | FMIN.W        | Vector Floating-Point Minimum (word)                     |
771   * | FMIN.D        | Vector Floating-Point Minimum (doubleword)               |
772   * +---------------+----------------------------------------------------------+
773   */
774  
775  /* TODO: insert Float Max Min group helpers here */
776  
777  
778  /*
779   * Int Add
780   * -------
781   *
782   * +---------------+----------------------------------------------------------+
783   * | ADD_A.B       | Vector Add Absolute Values (byte)                        |
784   * | ADD_A.H       | Vector Add Absolute Values (halfword)                    |
785   * | ADD_A.W       | Vector Add Absolute Values (word)                        |
786   * | ADD_A.D       | Vector Add Absolute Values (doubleword)                  |
787   * | ADDS_A.B      | Vector Signed Saturated Add (of Absolute) (byte)         |
788   * | ADDS_A.H      | Vector Signed Saturated Add (of Absolute) (halfword)     |
789   * | ADDS_A.W      | Vector Signed Saturated Add (of Absolute) (word)         |
790   * | ADDS_A.D      | Vector Signed Saturated Add (of Absolute) (doubleword)   |
791   * | ADDS_S.B      | Vector Signed Saturated Add (of Signed) (byte)           |
792   * | ADDS_S.H      | Vector Signed Saturated Add (of Signed) (halfword)       |
793   * | ADDS_S.W      | Vector Signed Saturated Add (of Signed) (word)           |
794   * | ADDS_S.D      | Vector Signed Saturated Add (of Signed) (doubleword)     |
795   * | ADDS_U.B      | Vector Unsigned Saturated Add (of Unsigned) (byte)       |
796   * | ADDS_U.H      | Vector Unsigned Saturated Add (of Unsigned) (halfword)   |
797   * | ADDS_U.W      | Vector Unsigned Saturated Add (of Unsigned) (word)       |
798   * | ADDS_U.D      | Vector Unsigned Saturated Add (of Unsigned) (doubleword) |
799   * | ADDV.B        | Vector Add (byte)                                        |
800   * | ADDV.H        | Vector Add (halfword)                                    |
801   * | ADDV.W        | Vector Add (word)                                        |
802   * | ADDV.D        | Vector Add (doubleword)                                  |
803   * | HADD_S.H      | Vector Signed Horizontal Add (halfword)                  |
804   * | HADD_S.W      | Vector Signed Horizontal Add (word)                      |
805   * | HADD_S.D      | Vector Signed Horizontal Add (doubleword)                |
806   * | HADD_U.H      | Vector Unsigned Horizontal Add (halfword)                |
807   * | HADD_U.W      | Vector Unsigned Horizontal Add (word)                    |
808   * | HADD_U.D      | Vector Unsigned Horizontal Add (doubleword)              |
809   * +---------------+----------------------------------------------------------+
810   */
811  
812  
813  static inline int64_t msa_add_a_df(uint32_t df, int64_t arg1, int64_t arg2)
814  {
815      uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
816      uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
817      return abs_arg1 + abs_arg2;
818  }
819  
820  void helper_msa_add_a_b(CPUMIPSState *env,
821                          uint32_t wd, uint32_t ws, uint32_t wt)
822  {
823      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
824      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
825      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
826  
827      pwd->b[0]  = msa_add_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
828      pwd->b[1]  = msa_add_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
829      pwd->b[2]  = msa_add_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
830      pwd->b[3]  = msa_add_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
831      pwd->b[4]  = msa_add_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
832      pwd->b[5]  = msa_add_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
833      pwd->b[6]  = msa_add_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
834      pwd->b[7]  = msa_add_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
835      pwd->b[8]  = msa_add_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
836      pwd->b[9]  = msa_add_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
837      pwd->b[10] = msa_add_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
838      pwd->b[11] = msa_add_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
839      pwd->b[12] = msa_add_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
840      pwd->b[13] = msa_add_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
841      pwd->b[14] = msa_add_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
842      pwd->b[15] = msa_add_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
843  }
844  
845  void helper_msa_add_a_h(CPUMIPSState *env,
846                          uint32_t wd, uint32_t ws, uint32_t wt)
847  {
848      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
849      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
850      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
851  
852      pwd->h[0]  = msa_add_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
853      pwd->h[1]  = msa_add_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
854      pwd->h[2]  = msa_add_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
855      pwd->h[3]  = msa_add_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
856      pwd->h[4]  = msa_add_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
857      pwd->h[5]  = msa_add_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
858      pwd->h[6]  = msa_add_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
859      pwd->h[7]  = msa_add_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
860  }
861  
862  void helper_msa_add_a_w(CPUMIPSState *env,
863                          uint32_t wd, uint32_t ws, uint32_t wt)
864  {
865      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
866      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
867      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
868  
869      pwd->w[0]  = msa_add_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
870      pwd->w[1]  = msa_add_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
871      pwd->w[2]  = msa_add_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
872      pwd->w[3]  = msa_add_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
873  }
874  
875  void helper_msa_add_a_d(CPUMIPSState *env,
876                          uint32_t wd, uint32_t ws, uint32_t wt)
877  {
878      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
879      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
880      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
881  
882      pwd->d[0]  = msa_add_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
883      pwd->d[1]  = msa_add_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
884  }
885  
886  
887  static inline int64_t msa_adds_a_df(uint32_t df, int64_t arg1, int64_t arg2)
888  {
889      uint64_t max_int = (uint64_t)DF_MAX_INT(df);
890      uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
891      uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
892      if (abs_arg1 > max_int || abs_arg2 > max_int) {
893          return (int64_t)max_int;
894      } else {
895          return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
896      }
897  }
898  
899  void helper_msa_adds_a_b(CPUMIPSState *env,
900                           uint32_t wd, uint32_t ws, uint32_t wt)
901  {
902      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
903      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
904      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
905  
906      pwd->b[0]  = msa_adds_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
907      pwd->b[1]  = msa_adds_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
908      pwd->b[2]  = msa_adds_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
909      pwd->b[3]  = msa_adds_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
910      pwd->b[4]  = msa_adds_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
911      pwd->b[5]  = msa_adds_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
912      pwd->b[6]  = msa_adds_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
913      pwd->b[7]  = msa_adds_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
914      pwd->b[8]  = msa_adds_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
915      pwd->b[9]  = msa_adds_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
916      pwd->b[10] = msa_adds_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
917      pwd->b[11] = msa_adds_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
918      pwd->b[12] = msa_adds_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
919      pwd->b[13] = msa_adds_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
920      pwd->b[14] = msa_adds_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
921      pwd->b[15] = msa_adds_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
922  }
923  
924  void helper_msa_adds_a_h(CPUMIPSState *env,
925                           uint32_t wd, uint32_t ws, uint32_t wt)
926  {
927      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
928      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
929      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
930  
931      pwd->h[0]  = msa_adds_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
932      pwd->h[1]  = msa_adds_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
933      pwd->h[2]  = msa_adds_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
934      pwd->h[3]  = msa_adds_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
935      pwd->h[4]  = msa_adds_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
936      pwd->h[5]  = msa_adds_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
937      pwd->h[6]  = msa_adds_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
938      pwd->h[7]  = msa_adds_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
939  }
940  
941  void helper_msa_adds_a_w(CPUMIPSState *env,
942                           uint32_t wd, uint32_t ws, uint32_t wt)
943  {
944      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
945      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
946      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
947  
948      pwd->w[0]  = msa_adds_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
949      pwd->w[1]  = msa_adds_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
950      pwd->w[2]  = msa_adds_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
951      pwd->w[3]  = msa_adds_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
952  }
953  
954  void helper_msa_adds_a_d(CPUMIPSState *env,
955                           uint32_t wd, uint32_t ws, uint32_t wt)
956  {
957      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
958      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
959      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
960  
961      pwd->d[0]  = msa_adds_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
962      pwd->d[1]  = msa_adds_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
963  }
964  
965  
966  static inline int64_t msa_adds_s_df(uint32_t df, int64_t arg1, int64_t arg2)
967  {
968      int64_t max_int = DF_MAX_INT(df);
969      int64_t min_int = DF_MIN_INT(df);
970      if (arg1 < 0) {
971          return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
972      } else {
973          return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
974      }
975  }
976  
977  void helper_msa_adds_s_b(CPUMIPSState *env,
978                           uint32_t wd, uint32_t ws, uint32_t wt)
979  {
980      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
981      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
982      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
983  
984      pwd->b[0]  = msa_adds_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
985      pwd->b[1]  = msa_adds_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
986      pwd->b[2]  = msa_adds_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
987      pwd->b[3]  = msa_adds_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
988      pwd->b[4]  = msa_adds_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
989      pwd->b[5]  = msa_adds_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
990      pwd->b[6]  = msa_adds_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
991      pwd->b[7]  = msa_adds_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
992      pwd->b[8]  = msa_adds_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
993      pwd->b[9]  = msa_adds_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
994      pwd->b[10] = msa_adds_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
995      pwd->b[11] = msa_adds_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
996      pwd->b[12] = msa_adds_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
997      pwd->b[13] = msa_adds_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
998      pwd->b[14] = msa_adds_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
999      pwd->b[15] = msa_adds_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1000  }
1001  
1002  void helper_msa_adds_s_h(CPUMIPSState *env,
1003                           uint32_t wd, uint32_t ws, uint32_t wt)
1004  {
1005      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1006      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1007      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1008  
1009      pwd->h[0]  = msa_adds_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1010      pwd->h[1]  = msa_adds_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1011      pwd->h[2]  = msa_adds_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1012      pwd->h[3]  = msa_adds_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1013      pwd->h[4]  = msa_adds_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1014      pwd->h[5]  = msa_adds_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1015      pwd->h[6]  = msa_adds_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1016      pwd->h[7]  = msa_adds_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1017  }
1018  
1019  void helper_msa_adds_s_w(CPUMIPSState *env,
1020                           uint32_t wd, uint32_t ws, uint32_t wt)
1021  {
1022      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1023      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1024      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1025  
1026      pwd->w[0]  = msa_adds_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1027      pwd->w[1]  = msa_adds_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1028      pwd->w[2]  = msa_adds_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1029      pwd->w[3]  = msa_adds_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1030  }
1031  
1032  void helper_msa_adds_s_d(CPUMIPSState *env,
1033                           uint32_t wd, uint32_t ws, uint32_t wt)
1034  {
1035      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1036      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1037      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1038  
1039      pwd->d[0]  = msa_adds_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1040      pwd->d[1]  = msa_adds_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1041  }
1042  
1043  
1044  static inline uint64_t msa_adds_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1045  {
1046      uint64_t max_uint = DF_MAX_UINT(df);
1047      uint64_t u_arg1 = UNSIGNED(arg1, df);
1048      uint64_t u_arg2 = UNSIGNED(arg2, df);
1049      return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
1050  }
1051  
1052  void helper_msa_adds_u_b(CPUMIPSState *env,
1053                           uint32_t wd, uint32_t ws, uint32_t wt)
1054  {
1055      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1056      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1057      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1058  
1059      pwd->b[0]  = msa_adds_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1060      pwd->b[1]  = msa_adds_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1061      pwd->b[2]  = msa_adds_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1062      pwd->b[3]  = msa_adds_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1063      pwd->b[4]  = msa_adds_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1064      pwd->b[5]  = msa_adds_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1065      pwd->b[6]  = msa_adds_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1066      pwd->b[7]  = msa_adds_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1067      pwd->b[8]  = msa_adds_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1068      pwd->b[9]  = msa_adds_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1069      pwd->b[10] = msa_adds_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1070      pwd->b[11] = msa_adds_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1071      pwd->b[12] = msa_adds_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1072      pwd->b[13] = msa_adds_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1073      pwd->b[14] = msa_adds_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1074      pwd->b[15] = msa_adds_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1075  }
1076  
1077  void helper_msa_adds_u_h(CPUMIPSState *env,
1078                           uint32_t wd, uint32_t ws, uint32_t wt)
1079  {
1080      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1081      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1082      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1083  
1084      pwd->h[0]  = msa_adds_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1085      pwd->h[1]  = msa_adds_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1086      pwd->h[2]  = msa_adds_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1087      pwd->h[3]  = msa_adds_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1088      pwd->h[4]  = msa_adds_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1089      pwd->h[5]  = msa_adds_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1090      pwd->h[6]  = msa_adds_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1091      pwd->h[7]  = msa_adds_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1092  }
1093  
1094  void helper_msa_adds_u_w(CPUMIPSState *env,
1095                           uint32_t wd, uint32_t ws, uint32_t wt)
1096  {
1097      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1098      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1099      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1100  
1101      pwd->w[0]  = msa_adds_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1102      pwd->w[1]  = msa_adds_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1103      pwd->w[2]  = msa_adds_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1104      pwd->w[3]  = msa_adds_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1105  }
1106  
1107  void helper_msa_adds_u_d(CPUMIPSState *env,
1108                           uint32_t wd, uint32_t ws, uint32_t wt)
1109  {
1110      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1111      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1112      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1113  
1114      pwd->d[0]  = msa_adds_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1115      pwd->d[1]  = msa_adds_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1116  }
1117  
1118  
1119  static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
1120  {
1121      return arg1 + arg2;
1122  }
1123  
1124  void helper_msa_addv_b(CPUMIPSState *env,
1125                         uint32_t wd, uint32_t ws, uint32_t wt)
1126  {
1127      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1128      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1129      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1130  
1131      pwd->b[0]  = msa_addv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1132      pwd->b[1]  = msa_addv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1133      pwd->b[2]  = msa_addv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1134      pwd->b[3]  = msa_addv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1135      pwd->b[4]  = msa_addv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1136      pwd->b[5]  = msa_addv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1137      pwd->b[6]  = msa_addv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1138      pwd->b[7]  = msa_addv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1139      pwd->b[8]  = msa_addv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1140      pwd->b[9]  = msa_addv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1141      pwd->b[10] = msa_addv_df(DF_BYTE, pws->b[10], pwt->b[10]);
1142      pwd->b[11] = msa_addv_df(DF_BYTE, pws->b[11], pwt->b[11]);
1143      pwd->b[12] = msa_addv_df(DF_BYTE, pws->b[12], pwt->b[12]);
1144      pwd->b[13] = msa_addv_df(DF_BYTE, pws->b[13], pwt->b[13]);
1145      pwd->b[14] = msa_addv_df(DF_BYTE, pws->b[14], pwt->b[14]);
1146      pwd->b[15] = msa_addv_df(DF_BYTE, pws->b[15], pwt->b[15]);
1147  }
1148  
1149  void helper_msa_addv_h(CPUMIPSState *env,
1150                         uint32_t wd, uint32_t ws, uint32_t wt)
1151  {
1152      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1153      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1154      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1155  
1156      pwd->h[0]  = msa_addv_df(DF_HALF, pws->h[0],  pwt->h[0]);
1157      pwd->h[1]  = msa_addv_df(DF_HALF, pws->h[1],  pwt->h[1]);
1158      pwd->h[2]  = msa_addv_df(DF_HALF, pws->h[2],  pwt->h[2]);
1159      pwd->h[3]  = msa_addv_df(DF_HALF, pws->h[3],  pwt->h[3]);
1160      pwd->h[4]  = msa_addv_df(DF_HALF, pws->h[4],  pwt->h[4]);
1161      pwd->h[5]  = msa_addv_df(DF_HALF, pws->h[5],  pwt->h[5]);
1162      pwd->h[6]  = msa_addv_df(DF_HALF, pws->h[6],  pwt->h[6]);
1163      pwd->h[7]  = msa_addv_df(DF_HALF, pws->h[7],  pwt->h[7]);
1164  }
1165  
1166  void helper_msa_addv_w(CPUMIPSState *env,
1167                         uint32_t wd, uint32_t ws, uint32_t wt)
1168  {
1169      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1170      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1171      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1172  
1173      pwd->w[0]  = msa_addv_df(DF_WORD, pws->w[0],  pwt->w[0]);
1174      pwd->w[1]  = msa_addv_df(DF_WORD, pws->w[1],  pwt->w[1]);
1175      pwd->w[2]  = msa_addv_df(DF_WORD, pws->w[2],  pwt->w[2]);
1176      pwd->w[3]  = msa_addv_df(DF_WORD, pws->w[3],  pwt->w[3]);
1177  }
1178  
1179  void helper_msa_addv_d(CPUMIPSState *env,
1180                         uint32_t wd, uint32_t ws, uint32_t wt)
1181  {
1182      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1183      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1184      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1185  
1186      pwd->d[0]  = msa_addv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1187      pwd->d[1]  = msa_addv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1188  }
1189  
1190  
1191  #define SIGNED_EVEN(a, df) \
1192          ((((int64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1193  
1194  #define UNSIGNED_EVEN(a, df) \
1195          ((((uint64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1196  
1197  #define SIGNED_ODD(a, df) \
1198          ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1199  
1200  #define UNSIGNED_ODD(a, df) \
1201          ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1202  
1203  
1204  static inline int64_t msa_hadd_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1205  {
1206      return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
1207  }
1208  
1209  void helper_msa_hadd_s_h(CPUMIPSState *env,
1210                           uint32_t wd, uint32_t ws, uint32_t wt)
1211  {
1212      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1213      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1214      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1215  
1216      pwd->h[0]  = msa_hadd_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1217      pwd->h[1]  = msa_hadd_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1218      pwd->h[2]  = msa_hadd_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1219      pwd->h[3]  = msa_hadd_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1220      pwd->h[4]  = msa_hadd_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1221      pwd->h[5]  = msa_hadd_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1222      pwd->h[6]  = msa_hadd_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1223      pwd->h[7]  = msa_hadd_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1224  }
1225  
1226  void helper_msa_hadd_s_w(CPUMIPSState *env,
1227                           uint32_t wd, uint32_t ws, uint32_t wt)
1228  {
1229      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1230      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1231      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1232  
1233      pwd->w[0]  = msa_hadd_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1234      pwd->w[1]  = msa_hadd_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1235      pwd->w[2]  = msa_hadd_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1236      pwd->w[3]  = msa_hadd_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1237  }
1238  
1239  void helper_msa_hadd_s_d(CPUMIPSState *env,
1240                           uint32_t wd, uint32_t ws, uint32_t wt)
1241  {
1242      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1243      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1244      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1245  
1246      pwd->d[0]  = msa_hadd_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1247      pwd->d[1]  = msa_hadd_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1248  }
1249  
1250  
1251  static inline int64_t msa_hadd_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1252  {
1253      return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
1254  }
1255  
1256  void helper_msa_hadd_u_h(CPUMIPSState *env,
1257                           uint32_t wd, uint32_t ws, uint32_t wt)
1258  {
1259      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1260      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1261      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1262  
1263      pwd->h[0]  = msa_hadd_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1264      pwd->h[1]  = msa_hadd_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1265      pwd->h[2]  = msa_hadd_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1266      pwd->h[3]  = msa_hadd_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1267      pwd->h[4]  = msa_hadd_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1268      pwd->h[5]  = msa_hadd_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1269      pwd->h[6]  = msa_hadd_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1270      pwd->h[7]  = msa_hadd_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1271  }
1272  
1273  void helper_msa_hadd_u_w(CPUMIPSState *env,
1274                           uint32_t wd, uint32_t ws, uint32_t wt)
1275  {
1276      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1277      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1278      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1279  
1280      pwd->w[0]  = msa_hadd_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1281      pwd->w[1]  = msa_hadd_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1282      pwd->w[2]  = msa_hadd_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1283      pwd->w[3]  = msa_hadd_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1284  }
1285  
1286  void helper_msa_hadd_u_d(CPUMIPSState *env,
1287                           uint32_t wd, uint32_t ws, uint32_t wt)
1288  {
1289      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1290      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1291      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1292  
1293      pwd->d[0]  = msa_hadd_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1294      pwd->d[1]  = msa_hadd_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1295  }
1296  
1297  
1298  /*
1299   * Int Average
1300   * -----------
1301   *
1302   * +---------------+----------------------------------------------------------+
1303   * | AVE_S.B       | Vector Signed Average (byte)                             |
1304   * | AVE_S.H       | Vector Signed Average (halfword)                         |
1305   * | AVE_S.W       | Vector Signed Average (word)                             |
1306   * | AVE_S.D       | Vector Signed Average (doubleword)                       |
1307   * | AVE_U.B       | Vector Unsigned Average (byte)                           |
1308   * | AVE_U.H       | Vector Unsigned Average (halfword)                       |
1309   * | AVE_U.W       | Vector Unsigned Average (word)                           |
1310   * | AVE_U.D       | Vector Unsigned Average (doubleword)                     |
1311   * | AVER_S.B      | Vector Signed Average Rounded (byte)                     |
1312   * | AVER_S.H      | Vector Signed Average Rounded (halfword)                 |
1313   * | AVER_S.W      | Vector Signed Average Rounded (word)                     |
1314   * | AVER_S.D      | Vector Signed Average Rounded (doubleword)               |
1315   * | AVER_U.B      | Vector Unsigned Average Rounded (byte)                   |
1316   * | AVER_U.H      | Vector Unsigned Average Rounded (halfword)               |
1317   * | AVER_U.W      | Vector Unsigned Average Rounded (word)                   |
1318   * | AVER_U.D      | Vector Unsigned Average Rounded (doubleword)             |
1319   * +---------------+----------------------------------------------------------+
1320   */
1321  
1322  static inline int64_t msa_ave_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1323  {
1324      /* signed shift */
1325      return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
1326  }
1327  
1328  void helper_msa_ave_s_b(CPUMIPSState *env,
1329                          uint32_t wd, uint32_t ws, uint32_t wt)
1330  {
1331      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1332      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1333      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1334  
1335      pwd->b[0]  = msa_ave_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1336      pwd->b[1]  = msa_ave_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1337      pwd->b[2]  = msa_ave_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1338      pwd->b[3]  = msa_ave_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1339      pwd->b[4]  = msa_ave_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1340      pwd->b[5]  = msa_ave_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1341      pwd->b[6]  = msa_ave_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1342      pwd->b[7]  = msa_ave_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1343      pwd->b[8]  = msa_ave_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1344      pwd->b[9]  = msa_ave_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1345      pwd->b[10] = msa_ave_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1346      pwd->b[11] = msa_ave_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1347      pwd->b[12] = msa_ave_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1348      pwd->b[13] = msa_ave_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1349      pwd->b[14] = msa_ave_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1350      pwd->b[15] = msa_ave_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1351  }
1352  
1353  void helper_msa_ave_s_h(CPUMIPSState *env,
1354                          uint32_t wd, uint32_t ws, uint32_t wt)
1355  {
1356      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1357      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1358      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1359  
1360      pwd->h[0]  = msa_ave_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1361      pwd->h[1]  = msa_ave_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1362      pwd->h[2]  = msa_ave_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1363      pwd->h[3]  = msa_ave_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1364      pwd->h[4]  = msa_ave_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1365      pwd->h[5]  = msa_ave_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1366      pwd->h[6]  = msa_ave_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1367      pwd->h[7]  = msa_ave_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1368  }
1369  
1370  void helper_msa_ave_s_w(CPUMIPSState *env,
1371                          uint32_t wd, uint32_t ws, uint32_t wt)
1372  {
1373      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1374      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1375      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1376  
1377      pwd->w[0]  = msa_ave_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1378      pwd->w[1]  = msa_ave_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1379      pwd->w[2]  = msa_ave_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1380      pwd->w[3]  = msa_ave_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1381  }
1382  
1383  void helper_msa_ave_s_d(CPUMIPSState *env,
1384                          uint32_t wd, uint32_t ws, uint32_t wt)
1385  {
1386      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1387      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1388      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1389  
1390      pwd->d[0]  = msa_ave_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1391      pwd->d[1]  = msa_ave_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1392  }
1393  
1394  static inline uint64_t msa_ave_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1395  {
1396      uint64_t u_arg1 = UNSIGNED(arg1, df);
1397      uint64_t u_arg2 = UNSIGNED(arg2, df);
1398      /* unsigned shift */
1399      return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
1400  }
1401  
1402  void helper_msa_ave_u_b(CPUMIPSState *env,
1403                          uint32_t wd, uint32_t ws, uint32_t wt)
1404  {
1405      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1406      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1407      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1408  
1409      pwd->b[0]  = msa_ave_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1410      pwd->b[1]  = msa_ave_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1411      pwd->b[2]  = msa_ave_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1412      pwd->b[3]  = msa_ave_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1413      pwd->b[4]  = msa_ave_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1414      pwd->b[5]  = msa_ave_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1415      pwd->b[6]  = msa_ave_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1416      pwd->b[7]  = msa_ave_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1417      pwd->b[8]  = msa_ave_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1418      pwd->b[9]  = msa_ave_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1419      pwd->b[10] = msa_ave_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1420      pwd->b[11] = msa_ave_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1421      pwd->b[12] = msa_ave_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1422      pwd->b[13] = msa_ave_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1423      pwd->b[14] = msa_ave_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1424      pwd->b[15] = msa_ave_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1425  }
1426  
1427  void helper_msa_ave_u_h(CPUMIPSState *env,
1428                          uint32_t wd, uint32_t ws, uint32_t wt)
1429  {
1430      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1431      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1432      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1433  
1434      pwd->h[0]  = msa_ave_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1435      pwd->h[1]  = msa_ave_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1436      pwd->h[2]  = msa_ave_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1437      pwd->h[3]  = msa_ave_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1438      pwd->h[4]  = msa_ave_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1439      pwd->h[5]  = msa_ave_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1440      pwd->h[6]  = msa_ave_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1441      pwd->h[7]  = msa_ave_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1442  }
1443  
1444  void helper_msa_ave_u_w(CPUMIPSState *env,
1445                          uint32_t wd, uint32_t ws, uint32_t wt)
1446  {
1447      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1448      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1449      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1450  
1451      pwd->w[0]  = msa_ave_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1452      pwd->w[1]  = msa_ave_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1453      pwd->w[2]  = msa_ave_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1454      pwd->w[3]  = msa_ave_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1455  }
1456  
1457  void helper_msa_ave_u_d(CPUMIPSState *env,
1458                          uint32_t wd, uint32_t ws, uint32_t wt)
1459  {
1460      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1461      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1462      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1463  
1464      pwd->d[0]  = msa_ave_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1465      pwd->d[1]  = msa_ave_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1466  }
1467  
1468  static inline int64_t msa_aver_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1469  {
1470      /* signed shift */
1471      return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
1472  }
1473  
1474  void helper_msa_aver_s_b(CPUMIPSState *env,
1475                           uint32_t wd, uint32_t ws, uint32_t wt)
1476  {
1477      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1478      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1479      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1480  
1481      pwd->b[0]  = msa_aver_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1482      pwd->b[1]  = msa_aver_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1483      pwd->b[2]  = msa_aver_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1484      pwd->b[3]  = msa_aver_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1485      pwd->b[4]  = msa_aver_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1486      pwd->b[5]  = msa_aver_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1487      pwd->b[6]  = msa_aver_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1488      pwd->b[7]  = msa_aver_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1489      pwd->b[8]  = msa_aver_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1490      pwd->b[9]  = msa_aver_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1491      pwd->b[10] = msa_aver_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1492      pwd->b[11] = msa_aver_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1493      pwd->b[12] = msa_aver_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1494      pwd->b[13] = msa_aver_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1495      pwd->b[14] = msa_aver_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1496      pwd->b[15] = msa_aver_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1497  }
1498  
1499  void helper_msa_aver_s_h(CPUMIPSState *env,
1500                           uint32_t wd, uint32_t ws, uint32_t wt)
1501  {
1502      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1503      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1504      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1505  
1506      pwd->h[0]  = msa_aver_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1507      pwd->h[1]  = msa_aver_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1508      pwd->h[2]  = msa_aver_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1509      pwd->h[3]  = msa_aver_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1510      pwd->h[4]  = msa_aver_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1511      pwd->h[5]  = msa_aver_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1512      pwd->h[6]  = msa_aver_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1513      pwd->h[7]  = msa_aver_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1514  }
1515  
1516  void helper_msa_aver_s_w(CPUMIPSState *env,
1517                           uint32_t wd, uint32_t ws, uint32_t wt)
1518  {
1519      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1520      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1521      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1522  
1523      pwd->w[0]  = msa_aver_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1524      pwd->w[1]  = msa_aver_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1525      pwd->w[2]  = msa_aver_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1526      pwd->w[3]  = msa_aver_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1527  }
1528  
1529  void helper_msa_aver_s_d(CPUMIPSState *env,
1530                           uint32_t wd, uint32_t ws, uint32_t wt)
1531  {
1532      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1533      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1534      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1535  
1536      pwd->d[0]  = msa_aver_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1537      pwd->d[1]  = msa_aver_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1538  }
1539  
1540  static inline uint64_t msa_aver_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1541  {
1542      uint64_t u_arg1 = UNSIGNED(arg1, df);
1543      uint64_t u_arg2 = UNSIGNED(arg2, df);
1544      /* unsigned shift */
1545      return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
1546  }
1547  
1548  void helper_msa_aver_u_b(CPUMIPSState *env,
1549                           uint32_t wd, uint32_t ws, uint32_t wt)
1550  {
1551      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1552      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1553      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1554  
1555      pwd->b[0]  = msa_aver_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1556      pwd->b[1]  = msa_aver_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1557      pwd->b[2]  = msa_aver_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1558      pwd->b[3]  = msa_aver_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1559      pwd->b[4]  = msa_aver_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1560      pwd->b[5]  = msa_aver_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1561      pwd->b[6]  = msa_aver_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1562      pwd->b[7]  = msa_aver_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1563      pwd->b[8]  = msa_aver_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1564      pwd->b[9]  = msa_aver_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1565      pwd->b[10] = msa_aver_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1566      pwd->b[11] = msa_aver_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1567      pwd->b[12] = msa_aver_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1568      pwd->b[13] = msa_aver_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1569      pwd->b[14] = msa_aver_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1570      pwd->b[15] = msa_aver_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1571  }
1572  
1573  void helper_msa_aver_u_h(CPUMIPSState *env,
1574                           uint32_t wd, uint32_t ws, uint32_t wt)
1575  {
1576      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1577      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1578      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1579  
1580      pwd->h[0]  = msa_aver_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1581      pwd->h[1]  = msa_aver_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1582      pwd->h[2]  = msa_aver_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1583      pwd->h[3]  = msa_aver_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1584      pwd->h[4]  = msa_aver_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1585      pwd->h[5]  = msa_aver_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1586      pwd->h[6]  = msa_aver_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1587      pwd->h[7]  = msa_aver_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1588  }
1589  
1590  void helper_msa_aver_u_w(CPUMIPSState *env,
1591                           uint32_t wd, uint32_t ws, uint32_t wt)
1592  {
1593      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1594      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1595      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1596  
1597      pwd->w[0]  = msa_aver_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1598      pwd->w[1]  = msa_aver_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1599      pwd->w[2]  = msa_aver_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1600      pwd->w[3]  = msa_aver_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1601  }
1602  
1603  void helper_msa_aver_u_d(CPUMIPSState *env,
1604                           uint32_t wd, uint32_t ws, uint32_t wt)
1605  {
1606      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1607      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1608      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1609  
1610      pwd->d[0]  = msa_aver_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1611      pwd->d[1]  = msa_aver_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1612  }
1613  
1614  
1615  /*
1616   * Int Compare
1617   * -----------
1618   *
1619   * +---------------+----------------------------------------------------------+
1620   * | CEQ.B         | Vector Compare Equal (byte)                              |
1621   * | CEQ.H         | Vector Compare Equal (halfword)                          |
1622   * | CEQ.W         | Vector Compare Equal (word)                              |
1623   * | CEQ.D         | Vector Compare Equal (doubleword)                        |
1624   * | CLE_S.B       | Vector Compare Signed Less Than or Equal (byte)          |
1625   * | CLE_S.H       | Vector Compare Signed Less Than or Equal (halfword)      |
1626   * | CLE_S.W       | Vector Compare Signed Less Than or Equal (word)          |
1627   * | CLE_S.D       | Vector Compare Signed Less Than or Equal (doubleword)    |
1628   * | CLE_U.B       | Vector Compare Unsigned Less Than or Equal (byte)        |
1629   * | CLE_U.H       | Vector Compare Unsigned Less Than or Equal (halfword)    |
1630   * | CLE_U.W       | Vector Compare Unsigned Less Than or Equal (word)        |
1631   * | CLE_U.D       | Vector Compare Unsigned Less Than or Equal (doubleword)  |
1632   * | CLT_S.B       | Vector Compare Signed Less Than (byte)                   |
1633   * | CLT_S.H       | Vector Compare Signed Less Than (halfword)               |
1634   * | CLT_S.W       | Vector Compare Signed Less Than (word)                   |
1635   * | CLT_S.D       | Vector Compare Signed Less Than (doubleword)             |
1636   * | CLT_U.B       | Vector Compare Unsigned Less Than (byte)                 |
1637   * | CLT_U.H       | Vector Compare Unsigned Less Than (halfword)             |
1638   * | CLT_U.W       | Vector Compare Unsigned Less Than (word)                 |
1639   * | CLT_U.D       | Vector Compare Unsigned Less Than (doubleword)           |
1640   * +---------------+----------------------------------------------------------+
1641   */
1642  
1643  static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
1644  {
1645      return arg1 == arg2 ? -1 : 0;
1646  }
1647  
1648  static inline int8_t msa_ceq_b(int8_t arg1, int8_t arg2)
1649  {
1650      return arg1 == arg2 ? -1 : 0;
1651  }
1652  
1653  void helper_msa_ceq_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1654  {
1655      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1656      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1657      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1658  
1659      pwd->b[0]  = msa_ceq_b(pws->b[0],  pwt->b[0]);
1660      pwd->b[1]  = msa_ceq_b(pws->b[1],  pwt->b[1]);
1661      pwd->b[2]  = msa_ceq_b(pws->b[2],  pwt->b[2]);
1662      pwd->b[3]  = msa_ceq_b(pws->b[3],  pwt->b[3]);
1663      pwd->b[4]  = msa_ceq_b(pws->b[4],  pwt->b[4]);
1664      pwd->b[5]  = msa_ceq_b(pws->b[5],  pwt->b[5]);
1665      pwd->b[6]  = msa_ceq_b(pws->b[6],  pwt->b[6]);
1666      pwd->b[7]  = msa_ceq_b(pws->b[7],  pwt->b[7]);
1667      pwd->b[8]  = msa_ceq_b(pws->b[8],  pwt->b[8]);
1668      pwd->b[9]  = msa_ceq_b(pws->b[9],  pwt->b[9]);
1669      pwd->b[10] = msa_ceq_b(pws->b[10], pwt->b[10]);
1670      pwd->b[11] = msa_ceq_b(pws->b[11], pwt->b[11]);
1671      pwd->b[12] = msa_ceq_b(pws->b[12], pwt->b[12]);
1672      pwd->b[13] = msa_ceq_b(pws->b[13], pwt->b[13]);
1673      pwd->b[14] = msa_ceq_b(pws->b[14], pwt->b[14]);
1674      pwd->b[15] = msa_ceq_b(pws->b[15], pwt->b[15]);
1675  }
1676  
1677  static inline int16_t msa_ceq_h(int16_t arg1, int16_t arg2)
1678  {
1679      return arg1 == arg2 ? -1 : 0;
1680  }
1681  
1682  void helper_msa_ceq_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1683  {
1684      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1685      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1686      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1687  
1688      pwd->h[0]  = msa_ceq_h(pws->h[0],  pwt->h[0]);
1689      pwd->h[1]  = msa_ceq_h(pws->h[1],  pwt->h[1]);
1690      pwd->h[2]  = msa_ceq_h(pws->h[2],  pwt->h[2]);
1691      pwd->h[3]  = msa_ceq_h(pws->h[3],  pwt->h[3]);
1692      pwd->h[4]  = msa_ceq_h(pws->h[4],  pwt->h[4]);
1693      pwd->h[5]  = msa_ceq_h(pws->h[5],  pwt->h[5]);
1694      pwd->h[6]  = msa_ceq_h(pws->h[6],  pwt->h[6]);
1695      pwd->h[7]  = msa_ceq_h(pws->h[7],  pwt->h[7]);
1696  }
1697  
1698  static inline int32_t msa_ceq_w(int32_t arg1, int32_t arg2)
1699  {
1700      return arg1 == arg2 ? -1 : 0;
1701  }
1702  
1703  void helper_msa_ceq_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1704  {
1705      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1706      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1707      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1708  
1709      pwd->w[0]  = msa_ceq_w(pws->w[0],  pwt->w[0]);
1710      pwd->w[1]  = msa_ceq_w(pws->w[1],  pwt->w[1]);
1711      pwd->w[2]  = msa_ceq_w(pws->w[2],  pwt->w[2]);
1712      pwd->w[3]  = msa_ceq_w(pws->w[3],  pwt->w[3]);
1713  }
1714  
1715  static inline int64_t msa_ceq_d(int64_t arg1, int64_t arg2)
1716  {
1717      return arg1 == arg2 ? -1 : 0;
1718  }
1719  
1720  void helper_msa_ceq_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1721  {
1722      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1723      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1724      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1725  
1726      pwd->d[0]  = msa_ceq_d(pws->d[0],  pwt->d[0]);
1727      pwd->d[1]  = msa_ceq_d(pws->d[1],  pwt->d[1]);
1728  }
1729  
1730  static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1731  {
1732      return arg1 <= arg2 ? -1 : 0;
1733  }
1734  
1735  void helper_msa_cle_s_b(CPUMIPSState *env,
1736                          uint32_t wd, uint32_t ws, uint32_t wt)
1737  {
1738      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1739      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1740      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1741  
1742      pwd->b[0]  = msa_cle_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1743      pwd->b[1]  = msa_cle_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1744      pwd->b[2]  = msa_cle_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1745      pwd->b[3]  = msa_cle_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1746      pwd->b[4]  = msa_cle_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1747      pwd->b[5]  = msa_cle_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1748      pwd->b[6]  = msa_cle_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1749      pwd->b[7]  = msa_cle_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1750      pwd->b[8]  = msa_cle_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1751      pwd->b[9]  = msa_cle_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1752      pwd->b[10] = msa_cle_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1753      pwd->b[11] = msa_cle_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1754      pwd->b[12] = msa_cle_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1755      pwd->b[13] = msa_cle_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1756      pwd->b[14] = msa_cle_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1757      pwd->b[15] = msa_cle_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1758  }
1759  
1760  void helper_msa_cle_s_h(CPUMIPSState *env,
1761                          uint32_t wd, uint32_t ws, uint32_t wt)
1762  {
1763      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1764      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1765      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1766  
1767      pwd->h[0]  = msa_cle_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1768      pwd->h[1]  = msa_cle_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1769      pwd->h[2]  = msa_cle_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1770      pwd->h[3]  = msa_cle_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1771      pwd->h[4]  = msa_cle_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1772      pwd->h[5]  = msa_cle_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1773      pwd->h[6]  = msa_cle_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1774      pwd->h[7]  = msa_cle_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1775  }
1776  
1777  void helper_msa_cle_s_w(CPUMIPSState *env,
1778                          uint32_t wd, uint32_t ws, uint32_t wt)
1779  {
1780      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1781      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1782      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1783  
1784      pwd->w[0]  = msa_cle_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1785      pwd->w[1]  = msa_cle_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1786      pwd->w[2]  = msa_cle_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1787      pwd->w[3]  = msa_cle_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1788  }
1789  
1790  void helper_msa_cle_s_d(CPUMIPSState *env,
1791                          uint32_t wd, uint32_t ws, uint32_t wt)
1792  {
1793      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1794      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1795      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1796  
1797      pwd->d[0]  = msa_cle_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1798      pwd->d[1]  = msa_cle_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1799  }
1800  
1801  static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1802  {
1803      uint64_t u_arg1 = UNSIGNED(arg1, df);
1804      uint64_t u_arg2 = UNSIGNED(arg2, df);
1805      return u_arg1 <= u_arg2 ? -1 : 0;
1806  }
1807  
1808  void helper_msa_cle_u_b(CPUMIPSState *env,
1809                          uint32_t wd, uint32_t ws, uint32_t wt)
1810  {
1811      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1812      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1813      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1814  
1815      pwd->b[0]  = msa_cle_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1816      pwd->b[1]  = msa_cle_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1817      pwd->b[2]  = msa_cle_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1818      pwd->b[3]  = msa_cle_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1819      pwd->b[4]  = msa_cle_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1820      pwd->b[5]  = msa_cle_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1821      pwd->b[6]  = msa_cle_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1822      pwd->b[7]  = msa_cle_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1823      pwd->b[8]  = msa_cle_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1824      pwd->b[9]  = msa_cle_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1825      pwd->b[10] = msa_cle_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1826      pwd->b[11] = msa_cle_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1827      pwd->b[12] = msa_cle_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1828      pwd->b[13] = msa_cle_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1829      pwd->b[14] = msa_cle_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1830      pwd->b[15] = msa_cle_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1831  }
1832  
1833  void helper_msa_cle_u_h(CPUMIPSState *env,
1834                          uint32_t wd, uint32_t ws, uint32_t wt)
1835  {
1836      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1837      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1838      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1839  
1840      pwd->h[0]  = msa_cle_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1841      pwd->h[1]  = msa_cle_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1842      pwd->h[2]  = msa_cle_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1843      pwd->h[3]  = msa_cle_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1844      pwd->h[4]  = msa_cle_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1845      pwd->h[5]  = msa_cle_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1846      pwd->h[6]  = msa_cle_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1847      pwd->h[7]  = msa_cle_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1848  }
1849  
1850  void helper_msa_cle_u_w(CPUMIPSState *env,
1851                          uint32_t wd, uint32_t ws, uint32_t wt)
1852  {
1853      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1854      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1855      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1856  
1857      pwd->w[0]  = msa_cle_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1858      pwd->w[1]  = msa_cle_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1859      pwd->w[2]  = msa_cle_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1860      pwd->w[3]  = msa_cle_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1861  }
1862  
1863  void helper_msa_cle_u_d(CPUMIPSState *env,
1864                          uint32_t wd, uint32_t ws, uint32_t wt)
1865  {
1866      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1867      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1868      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1869  
1870      pwd->d[0]  = msa_cle_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1871      pwd->d[1]  = msa_cle_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1872  }
1873  
1874  static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1875  {
1876      return arg1 < arg2 ? -1 : 0;
1877  }
1878  
1879  static inline int8_t msa_clt_s_b(int8_t arg1, int8_t arg2)
1880  {
1881      return arg1 < arg2 ? -1 : 0;
1882  }
1883  
1884  void helper_msa_clt_s_b(CPUMIPSState *env,
1885                          uint32_t wd, uint32_t ws, uint32_t wt)
1886  {
1887      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1888      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1889      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1890  
1891      pwd->b[0]  = msa_clt_s_b(pws->b[0],  pwt->b[0]);
1892      pwd->b[1]  = msa_clt_s_b(pws->b[1],  pwt->b[1]);
1893      pwd->b[2]  = msa_clt_s_b(pws->b[2],  pwt->b[2]);
1894      pwd->b[3]  = msa_clt_s_b(pws->b[3],  pwt->b[3]);
1895      pwd->b[4]  = msa_clt_s_b(pws->b[4],  pwt->b[4]);
1896      pwd->b[5]  = msa_clt_s_b(pws->b[5],  pwt->b[5]);
1897      pwd->b[6]  = msa_clt_s_b(pws->b[6],  pwt->b[6]);
1898      pwd->b[7]  = msa_clt_s_b(pws->b[7],  pwt->b[7]);
1899      pwd->b[8]  = msa_clt_s_b(pws->b[8],  pwt->b[8]);
1900      pwd->b[9]  = msa_clt_s_b(pws->b[9],  pwt->b[9]);
1901      pwd->b[10] = msa_clt_s_b(pws->b[10], pwt->b[10]);
1902      pwd->b[11] = msa_clt_s_b(pws->b[11], pwt->b[11]);
1903      pwd->b[12] = msa_clt_s_b(pws->b[12], pwt->b[12]);
1904      pwd->b[13] = msa_clt_s_b(pws->b[13], pwt->b[13]);
1905      pwd->b[14] = msa_clt_s_b(pws->b[14], pwt->b[14]);
1906      pwd->b[15] = msa_clt_s_b(pws->b[15], pwt->b[15]);
1907  }
1908  
1909  static inline int16_t msa_clt_s_h(int16_t arg1, int16_t arg2)
1910  {
1911      return arg1 < arg2 ? -1 : 0;
1912  }
1913  
1914  void helper_msa_clt_s_h(CPUMIPSState *env,
1915                          uint32_t wd, uint32_t ws, uint32_t wt)
1916  {
1917      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1918      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1919      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1920  
1921      pwd->h[0]  = msa_clt_s_h(pws->h[0],  pwt->h[0]);
1922      pwd->h[1]  = msa_clt_s_h(pws->h[1],  pwt->h[1]);
1923      pwd->h[2]  = msa_clt_s_h(pws->h[2],  pwt->h[2]);
1924      pwd->h[3]  = msa_clt_s_h(pws->h[3],  pwt->h[3]);
1925      pwd->h[4]  = msa_clt_s_h(pws->h[4],  pwt->h[4]);
1926      pwd->h[5]  = msa_clt_s_h(pws->h[5],  pwt->h[5]);
1927      pwd->h[6]  = msa_clt_s_h(pws->h[6],  pwt->h[6]);
1928      pwd->h[7]  = msa_clt_s_h(pws->h[7],  pwt->h[7]);
1929  }
1930  
1931  static inline int32_t msa_clt_s_w(int32_t arg1, int32_t arg2)
1932  {
1933      return arg1 < arg2 ? -1 : 0;
1934  }
1935  
1936  void helper_msa_clt_s_w(CPUMIPSState *env,
1937                          uint32_t wd, uint32_t ws, uint32_t wt)
1938  {
1939      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1940      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1941      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1942  
1943      pwd->w[0]  = msa_clt_s_w(pws->w[0],  pwt->w[0]);
1944      pwd->w[1]  = msa_clt_s_w(pws->w[1],  pwt->w[1]);
1945      pwd->w[2]  = msa_clt_s_w(pws->w[2],  pwt->w[2]);
1946      pwd->w[3]  = msa_clt_s_w(pws->w[3],  pwt->w[3]);
1947  }
1948  
1949  static inline int64_t msa_clt_s_d(int64_t arg1, int64_t arg2)
1950  {
1951      return arg1 < arg2 ? -1 : 0;
1952  }
1953  
1954  void helper_msa_clt_s_d(CPUMIPSState *env,
1955                          uint32_t wd, uint32_t ws, uint32_t wt)
1956  {
1957      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1958      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1959      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1960  
1961      pwd->d[0]  = msa_clt_s_d(pws->d[0],  pwt->d[0]);
1962      pwd->d[1]  = msa_clt_s_d(pws->d[1],  pwt->d[1]);
1963  }
1964  
1965  static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1966  {
1967      uint64_t u_arg1 = UNSIGNED(arg1, df);
1968      uint64_t u_arg2 = UNSIGNED(arg2, df);
1969      return u_arg1 < u_arg2 ? -1 : 0;
1970  }
1971  
1972  void helper_msa_clt_u_b(CPUMIPSState *env,
1973                          uint32_t wd, uint32_t ws, uint32_t wt)
1974  {
1975      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1976      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1977      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1978  
1979      pwd->b[0]  = msa_clt_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1980      pwd->b[1]  = msa_clt_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1981      pwd->b[2]  = msa_clt_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1982      pwd->b[3]  = msa_clt_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1983      pwd->b[4]  = msa_clt_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1984      pwd->b[5]  = msa_clt_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1985      pwd->b[6]  = msa_clt_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1986      pwd->b[7]  = msa_clt_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1987      pwd->b[8]  = msa_clt_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1988      pwd->b[9]  = msa_clt_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1989      pwd->b[10] = msa_clt_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1990      pwd->b[11] = msa_clt_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1991      pwd->b[12] = msa_clt_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1992      pwd->b[13] = msa_clt_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1993      pwd->b[14] = msa_clt_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1994      pwd->b[15] = msa_clt_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1995  }
1996  
1997  void helper_msa_clt_u_h(CPUMIPSState *env,
1998                          uint32_t wd, uint32_t ws, uint32_t wt)
1999  {
2000      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2001      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2002      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2003  
2004      pwd->h[0]  = msa_clt_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2005      pwd->h[1]  = msa_clt_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2006      pwd->h[2]  = msa_clt_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2007      pwd->h[3]  = msa_clt_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2008      pwd->h[4]  = msa_clt_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2009      pwd->h[5]  = msa_clt_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2010      pwd->h[6]  = msa_clt_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2011      pwd->h[7]  = msa_clt_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2012  }
2013  
2014  void helper_msa_clt_u_w(CPUMIPSState *env,
2015                          uint32_t wd, uint32_t ws, uint32_t wt)
2016  {
2017      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2018      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2019      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2020  
2021      pwd->w[0]  = msa_clt_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2022      pwd->w[1]  = msa_clt_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2023      pwd->w[2]  = msa_clt_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2024      pwd->w[3]  = msa_clt_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2025  }
2026  
2027  void helper_msa_clt_u_d(CPUMIPSState *env,
2028                          uint32_t wd, uint32_t ws, uint32_t wt)
2029  {
2030      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2031      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2032      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2033  
2034      pwd->d[0]  = msa_clt_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2035      pwd->d[1]  = msa_clt_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2036  }
2037  
2038  
2039  /*
2040   * Int Divide
2041   * ----------
2042   *
2043   * +---------------+----------------------------------------------------------+
2044   * | DIV_S.B       | Vector Signed Divide (byte)                              |
2045   * | DIV_S.H       | Vector Signed Divide (halfword)                          |
2046   * | DIV_S.W       | Vector Signed Divide (word)                              |
2047   * | DIV_S.D       | Vector Signed Divide (doubleword)                        |
2048   * | DIV_U.B       | Vector Unsigned Divide (byte)                            |
2049   * | DIV_U.H       | Vector Unsigned Divide (halfword)                        |
2050   * | DIV_U.W       | Vector Unsigned Divide (word)                            |
2051   * | DIV_U.D       | Vector Unsigned Divide (doubleword)                      |
2052   * +---------------+----------------------------------------------------------+
2053   */
2054  
2055  
2056  static inline int64_t msa_div_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2057  {
2058      if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
2059          return DF_MIN_INT(df);
2060      }
2061      return arg2 ? arg1 / arg2
2062                  : arg1 >= 0 ? -1 : 1;
2063  }
2064  
2065  void helper_msa_div_s_b(CPUMIPSState *env,
2066                          uint32_t wd, uint32_t ws, uint32_t wt)
2067  {
2068      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2069      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2070      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2071  
2072      pwd->b[0]  = msa_div_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2073      pwd->b[1]  = msa_div_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2074      pwd->b[2]  = msa_div_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2075      pwd->b[3]  = msa_div_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2076      pwd->b[4]  = msa_div_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2077      pwd->b[5]  = msa_div_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2078      pwd->b[6]  = msa_div_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2079      pwd->b[7]  = msa_div_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2080      pwd->b[8]  = msa_div_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2081      pwd->b[9]  = msa_div_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2082      pwd->b[10] = msa_div_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2083      pwd->b[11] = msa_div_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2084      pwd->b[12] = msa_div_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2085      pwd->b[13] = msa_div_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2086      pwd->b[14] = msa_div_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2087      pwd->b[15] = msa_div_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2088  }
2089  
2090  void helper_msa_div_s_h(CPUMIPSState *env,
2091                          uint32_t wd, uint32_t ws, uint32_t wt)
2092  {
2093      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2094      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2095      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2096  
2097      pwd->h[0]  = msa_div_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2098      pwd->h[1]  = msa_div_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2099      pwd->h[2]  = msa_div_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2100      pwd->h[3]  = msa_div_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2101      pwd->h[4]  = msa_div_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2102      pwd->h[5]  = msa_div_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2103      pwd->h[6]  = msa_div_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2104      pwd->h[7]  = msa_div_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2105  }
2106  
2107  void helper_msa_div_s_w(CPUMIPSState *env,
2108                          uint32_t wd, uint32_t ws, uint32_t wt)
2109  {
2110      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2111      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2112      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2113  
2114      pwd->w[0]  = msa_div_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2115      pwd->w[1]  = msa_div_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2116      pwd->w[2]  = msa_div_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2117      pwd->w[3]  = msa_div_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2118  }
2119  
2120  void helper_msa_div_s_d(CPUMIPSState *env,
2121                          uint32_t wd, uint32_t ws, uint32_t wt)
2122  {
2123      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2124      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2125      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2126  
2127      pwd->d[0]  = msa_div_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2128      pwd->d[1]  = msa_div_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2129  }
2130  
2131  static inline int64_t msa_div_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2132  {
2133      uint64_t u_arg1 = UNSIGNED(arg1, df);
2134      uint64_t u_arg2 = UNSIGNED(arg2, df);
2135      return arg2 ? u_arg1 / u_arg2 : -1;
2136  }
2137  
2138  void helper_msa_div_u_b(CPUMIPSState *env,
2139                          uint32_t wd, uint32_t ws, uint32_t wt)
2140  {
2141      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2142      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2143      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2144  
2145      pwd->b[0]  = msa_div_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2146      pwd->b[1]  = msa_div_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2147      pwd->b[2]  = msa_div_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2148      pwd->b[3]  = msa_div_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2149      pwd->b[4]  = msa_div_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2150      pwd->b[5]  = msa_div_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2151      pwd->b[6]  = msa_div_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2152      pwd->b[7]  = msa_div_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2153      pwd->b[8]  = msa_div_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2154      pwd->b[9]  = msa_div_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2155      pwd->b[10] = msa_div_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2156      pwd->b[11] = msa_div_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2157      pwd->b[12] = msa_div_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2158      pwd->b[13] = msa_div_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2159      pwd->b[14] = msa_div_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2160      pwd->b[15] = msa_div_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2161  }
2162  
2163  void helper_msa_div_u_h(CPUMIPSState *env,
2164                          uint32_t wd, uint32_t ws, uint32_t wt)
2165  {
2166      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2167      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2168      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2169  
2170      pwd->h[0]  = msa_div_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2171      pwd->h[1]  = msa_div_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2172      pwd->h[2]  = msa_div_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2173      pwd->h[3]  = msa_div_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2174      pwd->h[4]  = msa_div_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2175      pwd->h[5]  = msa_div_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2176      pwd->h[6]  = msa_div_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2177      pwd->h[7]  = msa_div_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2178  }
2179  
2180  void helper_msa_div_u_w(CPUMIPSState *env,
2181                          uint32_t wd, uint32_t ws, uint32_t wt)
2182  {
2183      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2184      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2185      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2186  
2187      pwd->w[0]  = msa_div_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2188      pwd->w[1]  = msa_div_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2189      pwd->w[2]  = msa_div_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2190      pwd->w[3]  = msa_div_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2191  }
2192  
2193  void helper_msa_div_u_d(CPUMIPSState *env,
2194                          uint32_t wd, uint32_t ws, uint32_t wt)
2195  {
2196      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2197      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2198      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2199  
2200      pwd->d[0]  = msa_div_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2201      pwd->d[1]  = msa_div_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2202  }
2203  
2204  
2205  /*
2206   * Int Dot Product
2207   * ---------------
2208   *
2209   * +---------------+----------------------------------------------------------+
2210   * | DOTP_S.H      | Vector Signed Dot Product (halfword)                     |
2211   * | DOTP_S.W      | Vector Signed Dot Product (word)                         |
2212   * | DOTP_S.D      | Vector Signed Dot Product (doubleword)                   |
2213   * | DOTP_U.H      | Vector Unsigned Dot Product (halfword)                   |
2214   * | DOTP_U.W      | Vector Unsigned Dot Product (word)                       |
2215   * | DOTP_U.D      | Vector Unsigned Dot Product (doubleword)                 |
2216   * | DPADD_S.H     | Vector Signed Dot Product (halfword)                     |
2217   * | DPADD_S.W     | Vector Signed Dot Product (word)                         |
2218   * | DPADD_S.D     | Vector Signed Dot Product (doubleword)                   |
2219   * | DPADD_U.H     | Vector Unsigned Dot Product (halfword)                   |
2220   * | DPADD_U.W     | Vector Unsigned Dot Product (word)                       |
2221   * | DPADD_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2222   * | DPSUB_S.H     | Vector Signed Dot Product (halfword)                     |
2223   * | DPSUB_S.W     | Vector Signed Dot Product (word)                         |
2224   * | DPSUB_S.D     | Vector Signed Dot Product (doubleword)                   |
2225   * | DPSUB_U.H     | Vector Unsigned Dot Product (halfword)                   |
2226   * | DPSUB_U.W     | Vector Unsigned Dot Product (word)                       |
2227   * | DPSUB_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2228   * +---------------+----------------------------------------------------------+
2229   */
2230  
2231  #define SIGNED_EXTRACT(e, o, a, df)     \
2232      do {                                \
2233          e = SIGNED_EVEN(a, df);         \
2234          o = SIGNED_ODD(a, df);          \
2235      } while (0)
2236  
2237  #define UNSIGNED_EXTRACT(e, o, a, df)   \
2238      do {                                \
2239          e = UNSIGNED_EVEN(a, df);       \
2240          o = UNSIGNED_ODD(a, df);        \
2241      } while (0)
2242  
2243  
2244  static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2245  {
2246      int64_t even_arg1;
2247      int64_t even_arg2;
2248      int64_t odd_arg1;
2249      int64_t odd_arg2;
2250      SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2251      SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2252      return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2253  }
2254  
2255  void helper_msa_dotp_s_h(CPUMIPSState *env,
2256                           uint32_t wd, uint32_t ws, uint32_t wt)
2257  {
2258      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2259      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2260      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2261  
2262      pwd->h[0]  = msa_dotp_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2263      pwd->h[1]  = msa_dotp_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2264      pwd->h[2]  = msa_dotp_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2265      pwd->h[3]  = msa_dotp_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2266      pwd->h[4]  = msa_dotp_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2267      pwd->h[5]  = msa_dotp_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2268      pwd->h[6]  = msa_dotp_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2269      pwd->h[7]  = msa_dotp_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2270  }
2271  
2272  void helper_msa_dotp_s_w(CPUMIPSState *env,
2273                           uint32_t wd, uint32_t ws, uint32_t wt)
2274  {
2275      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2276      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2277      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2278  
2279      pwd->w[0]  = msa_dotp_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2280      pwd->w[1]  = msa_dotp_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2281      pwd->w[2]  = msa_dotp_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2282      pwd->w[3]  = msa_dotp_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2283  }
2284  
2285  void helper_msa_dotp_s_d(CPUMIPSState *env,
2286                           uint32_t wd, uint32_t ws, uint32_t wt)
2287  {
2288      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2289      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2290      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2291  
2292      pwd->d[0]  = msa_dotp_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2293      pwd->d[1]  = msa_dotp_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2294  }
2295  
2296  
2297  static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2298  {
2299      int64_t even_arg1;
2300      int64_t even_arg2;
2301      int64_t odd_arg1;
2302      int64_t odd_arg2;
2303      UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2304      UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2305      return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2306  }
2307  
2308  void helper_msa_dotp_u_h(CPUMIPSState *env,
2309                           uint32_t wd, uint32_t ws, uint32_t wt)
2310  {
2311      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2312      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2313      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2314  
2315      pwd->h[0]  = msa_dotp_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2316      pwd->h[1]  = msa_dotp_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2317      pwd->h[2]  = msa_dotp_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2318      pwd->h[3]  = msa_dotp_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2319      pwd->h[4]  = msa_dotp_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2320      pwd->h[5]  = msa_dotp_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2321      pwd->h[6]  = msa_dotp_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2322      pwd->h[7]  = msa_dotp_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2323  }
2324  
2325  void helper_msa_dotp_u_w(CPUMIPSState *env,
2326                           uint32_t wd, uint32_t ws, uint32_t wt)
2327  {
2328      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2329      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2330      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2331  
2332      pwd->w[0]  = msa_dotp_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2333      pwd->w[1]  = msa_dotp_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2334      pwd->w[2]  = msa_dotp_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2335      pwd->w[3]  = msa_dotp_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2336  }
2337  
2338  void helper_msa_dotp_u_d(CPUMIPSState *env,
2339                           uint32_t wd, uint32_t ws, uint32_t wt)
2340  {
2341      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2342      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2343      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2344  
2345      pwd->d[0]  = msa_dotp_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2346      pwd->d[1]  = msa_dotp_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2347  }
2348  
2349  
2350  static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
2351                                       int64_t arg2)
2352  {
2353      int64_t even_arg1;
2354      int64_t even_arg2;
2355      int64_t odd_arg1;
2356      int64_t odd_arg2;
2357      SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2358      SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2359      return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2360  }
2361  
2362  void helper_msa_dpadd_s_h(CPUMIPSState *env,
2363                            uint32_t wd, uint32_t ws, uint32_t wt)
2364  {
2365      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2366      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2367      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2368  
2369      pwd->h[0]  = msa_dpadd_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2370      pwd->h[1]  = msa_dpadd_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2371      pwd->h[2]  = msa_dpadd_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2372      pwd->h[3]  = msa_dpadd_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2373      pwd->h[4]  = msa_dpadd_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2374      pwd->h[5]  = msa_dpadd_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2375      pwd->h[6]  = msa_dpadd_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2376      pwd->h[7]  = msa_dpadd_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2377  }
2378  
2379  void helper_msa_dpadd_s_w(CPUMIPSState *env,
2380                            uint32_t wd, uint32_t ws, uint32_t wt)
2381  {
2382      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2383      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2384      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2385  
2386      pwd->w[0]  = msa_dpadd_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2387      pwd->w[1]  = msa_dpadd_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2388      pwd->w[2]  = msa_dpadd_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2389      pwd->w[3]  = msa_dpadd_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2390  }
2391  
2392  void helper_msa_dpadd_s_d(CPUMIPSState *env,
2393                            uint32_t wd, uint32_t ws, uint32_t wt)
2394  {
2395      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2396      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2397      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2398  
2399      pwd->d[0]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2400      pwd->d[1]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2401  }
2402  
2403  
2404  static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
2405                                       int64_t arg2)
2406  {
2407      int64_t even_arg1;
2408      int64_t even_arg2;
2409      int64_t odd_arg1;
2410      int64_t odd_arg2;
2411      UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2412      UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2413      return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2414  }
2415  
2416  void helper_msa_dpadd_u_h(CPUMIPSState *env,
2417                            uint32_t wd, uint32_t ws, uint32_t wt)
2418  {
2419      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2420      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2421      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2422  
2423      pwd->h[0]  = msa_dpadd_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2424      pwd->h[1]  = msa_dpadd_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2425      pwd->h[2]  = msa_dpadd_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2426      pwd->h[3]  = msa_dpadd_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2427      pwd->h[4]  = msa_dpadd_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2428      pwd->h[5]  = msa_dpadd_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2429      pwd->h[6]  = msa_dpadd_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2430      pwd->h[7]  = msa_dpadd_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2431  }
2432  
2433  void helper_msa_dpadd_u_w(CPUMIPSState *env,
2434                            uint32_t wd, uint32_t ws, uint32_t wt)
2435  {
2436      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2437      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2438      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2439  
2440      pwd->w[0]  = msa_dpadd_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2441      pwd->w[1]  = msa_dpadd_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2442      pwd->w[2]  = msa_dpadd_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2443      pwd->w[3]  = msa_dpadd_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2444  }
2445  
2446  void helper_msa_dpadd_u_d(CPUMIPSState *env,
2447                            uint32_t wd, uint32_t ws, uint32_t wt)
2448  {
2449      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2450      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2451      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2452  
2453      pwd->d[0]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2454      pwd->d[1]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2455  }
2456  
2457  
2458  static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
2459                                       int64_t arg2)
2460  {
2461      int64_t even_arg1;
2462      int64_t even_arg2;
2463      int64_t odd_arg1;
2464      int64_t odd_arg2;
2465      SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2466      SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2467      return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2468  }
2469  
2470  void helper_msa_dpsub_s_h(CPUMIPSState *env,
2471                            uint32_t wd, uint32_t ws, uint32_t wt)
2472  {
2473      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2474      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2475      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2476  
2477      pwd->h[0]  = msa_dpsub_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2478      pwd->h[1]  = msa_dpsub_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2479      pwd->h[2]  = msa_dpsub_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2480      pwd->h[3]  = msa_dpsub_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2481      pwd->h[4]  = msa_dpsub_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2482      pwd->h[5]  = msa_dpsub_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2483      pwd->h[6]  = msa_dpsub_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2484      pwd->h[7]  = msa_dpsub_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2485  }
2486  
2487  void helper_msa_dpsub_s_w(CPUMIPSState *env,
2488                            uint32_t wd, uint32_t ws, uint32_t wt)
2489  {
2490      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2491      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2492      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2493  
2494      pwd->w[0]  = msa_dpsub_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2495      pwd->w[1]  = msa_dpsub_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2496      pwd->w[2]  = msa_dpsub_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2497      pwd->w[3]  = msa_dpsub_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2498  }
2499  
2500  void helper_msa_dpsub_s_d(CPUMIPSState *env,
2501                            uint32_t wd, uint32_t ws, uint32_t wt)
2502  {
2503      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2504      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2505      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2506  
2507      pwd->d[0]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2508      pwd->d[1]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2509  }
2510  
2511  
2512  static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
2513                                       int64_t arg2)
2514  {
2515      int64_t even_arg1;
2516      int64_t even_arg2;
2517      int64_t odd_arg1;
2518      int64_t odd_arg2;
2519      UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2520      UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2521      return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2522  }
2523  
2524  void helper_msa_dpsub_u_h(CPUMIPSState *env,
2525                            uint32_t wd, uint32_t ws, uint32_t wt)
2526  {
2527      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2528      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2529      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2530  
2531      pwd->h[0]  = msa_dpsub_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2532      pwd->h[1]  = msa_dpsub_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2533      pwd->h[2]  = msa_dpsub_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2534      pwd->h[3]  = msa_dpsub_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2535      pwd->h[4]  = msa_dpsub_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2536      pwd->h[5]  = msa_dpsub_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2537      pwd->h[6]  = msa_dpsub_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2538      pwd->h[7]  = msa_dpsub_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2539  }
2540  
2541  void helper_msa_dpsub_u_w(CPUMIPSState *env,
2542                            uint32_t wd, uint32_t ws, uint32_t wt)
2543  {
2544      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2545      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2546      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2547  
2548      pwd->w[0]  = msa_dpsub_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2549      pwd->w[1]  = msa_dpsub_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2550      pwd->w[2]  = msa_dpsub_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2551      pwd->w[3]  = msa_dpsub_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2552  }
2553  
2554  void helper_msa_dpsub_u_d(CPUMIPSState *env,
2555                            uint32_t wd, uint32_t ws, uint32_t wt)
2556  {
2557      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2558      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2559      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2560  
2561      pwd->d[0]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2562      pwd->d[1]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2563  }
2564  
2565  
2566  /*
2567   * Int Max Min
2568   * -----------
2569   *
2570   * +---------------+----------------------------------------------------------+
2571   * | MAX_A.B       | Vector Maximum Based on Absolute Value (byte)            |
2572   * | MAX_A.H       | Vector Maximum Based on Absolute Value (halfword)        |
2573   * | MAX_A.W       | Vector Maximum Based on Absolute Value (word)            |
2574   * | MAX_A.D       | Vector Maximum Based on Absolute Value (doubleword)      |
2575   * | MAX_S.B       | Vector Signed Maximum (byte)                             |
2576   * | MAX_S.H       | Vector Signed Maximum (halfword)                         |
2577   * | MAX_S.W       | Vector Signed Maximum (word)                             |
2578   * | MAX_S.D       | Vector Signed Maximum (doubleword)                       |
2579   * | MAX_U.B       | Vector Unsigned Maximum (byte)                           |
2580   * | MAX_U.H       | Vector Unsigned Maximum (halfword)                       |
2581   * | MAX_U.W       | Vector Unsigned Maximum (word)                           |
2582   * | MAX_U.D       | Vector Unsigned Maximum (doubleword)                     |
2583   * | MIN_A.B       | Vector Minimum Based on Absolute Value (byte)            |
2584   * | MIN_A.H       | Vector Minimum Based on Absolute Value (halfword)        |
2585   * | MIN_A.W       | Vector Minimum Based on Absolute Value (word)            |
2586   * | MIN_A.D       | Vector Minimum Based on Absolute Value (doubleword)      |
2587   * | MIN_S.B       | Vector Signed Minimum (byte)                             |
2588   * | MIN_S.H       | Vector Signed Minimum (halfword)                         |
2589   * | MIN_S.W       | Vector Signed Minimum (word)                             |
2590   * | MIN_S.D       | Vector Signed Minimum (doubleword)                       |
2591   * | MIN_U.B       | Vector Unsigned Minimum (byte)                           |
2592   * | MIN_U.H       | Vector Unsigned Minimum (halfword)                       |
2593   * | MIN_U.W       | Vector Unsigned Minimum (word)                           |
2594   * | MIN_U.D       | Vector Unsigned Minimum (doubleword)                     |
2595   * +---------------+----------------------------------------------------------+
2596   */
2597  
2598  static inline int64_t msa_max_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2599  {
2600      uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2601      uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2602      return abs_arg1 > abs_arg2 ? arg1 : arg2;
2603  }
2604  
2605  void helper_msa_max_a_b(CPUMIPSState *env,
2606                          uint32_t wd, uint32_t ws, uint32_t wt)
2607  {
2608      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2609      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2610      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2611  
2612      pwd->b[0]  = msa_max_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2613      pwd->b[1]  = msa_max_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2614      pwd->b[2]  = msa_max_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2615      pwd->b[3]  = msa_max_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2616      pwd->b[4]  = msa_max_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2617      pwd->b[5]  = msa_max_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2618      pwd->b[6]  = msa_max_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2619      pwd->b[7]  = msa_max_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2620      pwd->b[8]  = msa_max_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2621      pwd->b[9]  = msa_max_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2622      pwd->b[10] = msa_max_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2623      pwd->b[11] = msa_max_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2624      pwd->b[12] = msa_max_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2625      pwd->b[13] = msa_max_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2626      pwd->b[14] = msa_max_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2627      pwd->b[15] = msa_max_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2628  }
2629  
2630  void helper_msa_max_a_h(CPUMIPSState *env,
2631                          uint32_t wd, uint32_t ws, uint32_t wt)
2632  {
2633      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2634      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2635      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2636  
2637      pwd->h[0]  = msa_max_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2638      pwd->h[1]  = msa_max_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2639      pwd->h[2]  = msa_max_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2640      pwd->h[3]  = msa_max_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2641      pwd->h[4]  = msa_max_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2642      pwd->h[5]  = msa_max_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2643      pwd->h[6]  = msa_max_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2644      pwd->h[7]  = msa_max_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2645  }
2646  
2647  void helper_msa_max_a_w(CPUMIPSState *env,
2648                          uint32_t wd, uint32_t ws, uint32_t wt)
2649  {
2650      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2651      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2652      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2653  
2654      pwd->w[0]  = msa_max_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2655      pwd->w[1]  = msa_max_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2656      pwd->w[2]  = msa_max_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2657      pwd->w[3]  = msa_max_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2658  }
2659  
2660  void helper_msa_max_a_d(CPUMIPSState *env,
2661                          uint32_t wd, uint32_t ws, uint32_t wt)
2662  {
2663      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2664      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2665      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2666  
2667      pwd->d[0]  = msa_max_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2668      pwd->d[1]  = msa_max_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2669  }
2670  
2671  
2672  static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2673  {
2674      return arg1 > arg2 ? arg1 : arg2;
2675  }
2676  
2677  void helper_msa_max_s_b(CPUMIPSState *env,
2678                          uint32_t wd, uint32_t ws, uint32_t wt)
2679  {
2680      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2681      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2682      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2683  
2684      pwd->b[0]  = msa_max_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2685      pwd->b[1]  = msa_max_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2686      pwd->b[2]  = msa_max_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2687      pwd->b[3]  = msa_max_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2688      pwd->b[4]  = msa_max_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2689      pwd->b[5]  = msa_max_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2690      pwd->b[6]  = msa_max_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2691      pwd->b[7]  = msa_max_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2692      pwd->b[8]  = msa_max_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2693      pwd->b[9]  = msa_max_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2694      pwd->b[10] = msa_max_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2695      pwd->b[11] = msa_max_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2696      pwd->b[12] = msa_max_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2697      pwd->b[13] = msa_max_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2698      pwd->b[14] = msa_max_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2699      pwd->b[15] = msa_max_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2700  }
2701  
2702  void helper_msa_max_s_h(CPUMIPSState *env,
2703                          uint32_t wd, uint32_t ws, uint32_t wt)
2704  {
2705      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2706      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2707      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2708  
2709      pwd->h[0]  = msa_max_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2710      pwd->h[1]  = msa_max_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2711      pwd->h[2]  = msa_max_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2712      pwd->h[3]  = msa_max_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2713      pwd->h[4]  = msa_max_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2714      pwd->h[5]  = msa_max_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2715      pwd->h[6]  = msa_max_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2716      pwd->h[7]  = msa_max_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2717  }
2718  
2719  void helper_msa_max_s_w(CPUMIPSState *env,
2720                          uint32_t wd, uint32_t ws, uint32_t wt)
2721  {
2722      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2723      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2724      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2725  
2726      pwd->w[0]  = msa_max_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2727      pwd->w[1]  = msa_max_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2728      pwd->w[2]  = msa_max_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2729      pwd->w[3]  = msa_max_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2730  }
2731  
2732  void helper_msa_max_s_d(CPUMIPSState *env,
2733                          uint32_t wd, uint32_t ws, uint32_t wt)
2734  {
2735      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2736      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2737      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2738  
2739      pwd->d[0]  = msa_max_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2740      pwd->d[1]  = msa_max_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2741  }
2742  
2743  
2744  static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2745  {
2746      uint64_t u_arg1 = UNSIGNED(arg1, df);
2747      uint64_t u_arg2 = UNSIGNED(arg2, df);
2748      return u_arg1 > u_arg2 ? arg1 : arg2;
2749  }
2750  
2751  void helper_msa_max_u_b(CPUMIPSState *env,
2752                          uint32_t wd, uint32_t ws, uint32_t wt)
2753  {
2754      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2755      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2756      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2757  
2758      pwd->b[0]  = msa_max_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2759      pwd->b[1]  = msa_max_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2760      pwd->b[2]  = msa_max_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2761      pwd->b[3]  = msa_max_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2762      pwd->b[4]  = msa_max_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2763      pwd->b[5]  = msa_max_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2764      pwd->b[6]  = msa_max_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2765      pwd->b[7]  = msa_max_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2766      pwd->b[8]  = msa_max_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2767      pwd->b[9]  = msa_max_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2768      pwd->b[10] = msa_max_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2769      pwd->b[11] = msa_max_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2770      pwd->b[12] = msa_max_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2771      pwd->b[13] = msa_max_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2772      pwd->b[14] = msa_max_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2773      pwd->b[15] = msa_max_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2774  }
2775  
2776  void helper_msa_max_u_h(CPUMIPSState *env,
2777                          uint32_t wd, uint32_t ws, uint32_t wt)
2778  {
2779      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2780      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2781      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2782  
2783      pwd->h[0]  = msa_max_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2784      pwd->h[1]  = msa_max_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2785      pwd->h[2]  = msa_max_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2786      pwd->h[3]  = msa_max_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2787      pwd->h[4]  = msa_max_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2788      pwd->h[5]  = msa_max_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2789      pwd->h[6]  = msa_max_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2790      pwd->h[7]  = msa_max_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2791  }
2792  
2793  void helper_msa_max_u_w(CPUMIPSState *env,
2794                          uint32_t wd, uint32_t ws, uint32_t wt)
2795  {
2796      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2797      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2798      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2799  
2800      pwd->w[0]  = msa_max_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2801      pwd->w[1]  = msa_max_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2802      pwd->w[2]  = msa_max_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2803      pwd->w[3]  = msa_max_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2804  }
2805  
2806  void helper_msa_max_u_d(CPUMIPSState *env,
2807                          uint32_t wd, uint32_t ws, uint32_t wt)
2808  {
2809      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2810      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2811      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2812  
2813      pwd->d[0]  = msa_max_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2814      pwd->d[1]  = msa_max_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2815  }
2816  
2817  
2818  static inline int64_t msa_min_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2819  {
2820      uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2821      uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2822      return abs_arg1 < abs_arg2 ? arg1 : arg2;
2823  }
2824  
2825  void helper_msa_min_a_b(CPUMIPSState *env,
2826                          uint32_t wd, uint32_t ws, uint32_t wt)
2827  {
2828      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2829      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2830      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2831  
2832      pwd->b[0]  = msa_min_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2833      pwd->b[1]  = msa_min_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2834      pwd->b[2]  = msa_min_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2835      pwd->b[3]  = msa_min_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2836      pwd->b[4]  = msa_min_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2837      pwd->b[5]  = msa_min_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2838      pwd->b[6]  = msa_min_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2839      pwd->b[7]  = msa_min_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2840      pwd->b[8]  = msa_min_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2841      pwd->b[9]  = msa_min_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2842      pwd->b[10] = msa_min_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2843      pwd->b[11] = msa_min_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2844      pwd->b[12] = msa_min_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2845      pwd->b[13] = msa_min_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2846      pwd->b[14] = msa_min_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2847      pwd->b[15] = msa_min_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2848  }
2849  
2850  void helper_msa_min_a_h(CPUMIPSState *env,
2851                          uint32_t wd, uint32_t ws, uint32_t wt)
2852  {
2853      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2854      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2855      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2856  
2857      pwd->h[0]  = msa_min_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2858      pwd->h[1]  = msa_min_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2859      pwd->h[2]  = msa_min_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2860      pwd->h[3]  = msa_min_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2861      pwd->h[4]  = msa_min_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2862      pwd->h[5]  = msa_min_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2863      pwd->h[6]  = msa_min_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2864      pwd->h[7]  = msa_min_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2865  }
2866  
2867  void helper_msa_min_a_w(CPUMIPSState *env,
2868                          uint32_t wd, uint32_t ws, uint32_t wt)
2869  {
2870      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2871      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2872      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2873  
2874      pwd->w[0]  = msa_min_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2875      pwd->w[1]  = msa_min_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2876      pwd->w[2]  = msa_min_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2877      pwd->w[3]  = msa_min_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2878  }
2879  
2880  void helper_msa_min_a_d(CPUMIPSState *env,
2881                          uint32_t wd, uint32_t ws, uint32_t wt)
2882  {
2883      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2884      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2885      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2886  
2887      pwd->d[0]  = msa_min_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2888      pwd->d[1]  = msa_min_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2889  }
2890  
2891  
2892  static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2893  {
2894      return arg1 < arg2 ? arg1 : arg2;
2895  }
2896  
2897  void helper_msa_min_s_b(CPUMIPSState *env,
2898                          uint32_t wd, uint32_t ws, uint32_t wt)
2899  {
2900      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2901      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2902      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2903  
2904      pwd->b[0]  = msa_min_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2905      pwd->b[1]  = msa_min_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2906      pwd->b[2]  = msa_min_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2907      pwd->b[3]  = msa_min_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2908      pwd->b[4]  = msa_min_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2909      pwd->b[5]  = msa_min_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2910      pwd->b[6]  = msa_min_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2911      pwd->b[7]  = msa_min_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2912      pwd->b[8]  = msa_min_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2913      pwd->b[9]  = msa_min_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2914      pwd->b[10] = msa_min_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2915      pwd->b[11] = msa_min_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2916      pwd->b[12] = msa_min_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2917      pwd->b[13] = msa_min_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2918      pwd->b[14] = msa_min_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2919      pwd->b[15] = msa_min_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2920  }
2921  
2922  void helper_msa_min_s_h(CPUMIPSState *env,
2923                          uint32_t wd, uint32_t ws, uint32_t wt)
2924  {
2925      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2926      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2927      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2928  
2929      pwd->h[0]  = msa_min_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2930      pwd->h[1]  = msa_min_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2931      pwd->h[2]  = msa_min_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2932      pwd->h[3]  = msa_min_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2933      pwd->h[4]  = msa_min_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2934      pwd->h[5]  = msa_min_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2935      pwd->h[6]  = msa_min_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2936      pwd->h[7]  = msa_min_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2937  }
2938  
2939  void helper_msa_min_s_w(CPUMIPSState *env,
2940                          uint32_t wd, uint32_t ws, uint32_t wt)
2941  {
2942      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2943      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2944      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2945  
2946      pwd->w[0]  = msa_min_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2947      pwd->w[1]  = msa_min_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2948      pwd->w[2]  = msa_min_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2949      pwd->w[3]  = msa_min_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2950  }
2951  
2952  void helper_msa_min_s_d(CPUMIPSState *env,
2953                          uint32_t wd, uint32_t ws, uint32_t wt)
2954  {
2955      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2956      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2957      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2958  
2959      pwd->d[0]  = msa_min_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2960      pwd->d[1]  = msa_min_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2961  }
2962  
2963  
2964  static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2965  {
2966      uint64_t u_arg1 = UNSIGNED(arg1, df);
2967      uint64_t u_arg2 = UNSIGNED(arg2, df);
2968      return u_arg1 < u_arg2 ? arg1 : arg2;
2969  }
2970  
2971  void helper_msa_min_u_b(CPUMIPSState *env,
2972                          uint32_t wd, uint32_t ws, uint32_t wt)
2973  {
2974      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2975      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2976      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2977  
2978      pwd->b[0]  = msa_min_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2979      pwd->b[1]  = msa_min_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2980      pwd->b[2]  = msa_min_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2981      pwd->b[3]  = msa_min_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2982      pwd->b[4]  = msa_min_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2983      pwd->b[5]  = msa_min_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2984      pwd->b[6]  = msa_min_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2985      pwd->b[7]  = msa_min_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2986      pwd->b[8]  = msa_min_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2987      pwd->b[9]  = msa_min_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2988      pwd->b[10] = msa_min_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2989      pwd->b[11] = msa_min_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2990      pwd->b[12] = msa_min_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2991      pwd->b[13] = msa_min_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2992      pwd->b[14] = msa_min_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2993      pwd->b[15] = msa_min_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2994  }
2995  
2996  void helper_msa_min_u_h(CPUMIPSState *env,
2997                          uint32_t wd, uint32_t ws, uint32_t wt)
2998  {
2999      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3000      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3001      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3002  
3003      pwd->h[0]  = msa_min_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3004      pwd->h[1]  = msa_min_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3005      pwd->h[2]  = msa_min_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3006      pwd->h[3]  = msa_min_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3007      pwd->h[4]  = msa_min_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3008      pwd->h[5]  = msa_min_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3009      pwd->h[6]  = msa_min_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3010      pwd->h[7]  = msa_min_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3011  }
3012  
3013  void helper_msa_min_u_w(CPUMIPSState *env,
3014                          uint32_t wd, uint32_t ws, uint32_t wt)
3015  {
3016      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3017      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3018      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3019  
3020      pwd->w[0]  = msa_min_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3021      pwd->w[1]  = msa_min_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3022      pwd->w[2]  = msa_min_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3023      pwd->w[3]  = msa_min_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3024  }
3025  
3026  void helper_msa_min_u_d(CPUMIPSState *env,
3027                          uint32_t wd, uint32_t ws, uint32_t wt)
3028  {
3029      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3030      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3031      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3032  
3033      pwd->d[0]  = msa_min_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3034      pwd->d[1]  = msa_min_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3035  }
3036  
3037  
3038  /*
3039   * Int Modulo
3040   * ----------
3041   *
3042   * +---------------+----------------------------------------------------------+
3043   * | MOD_S.B       | Vector Signed Modulo (byte)                              |
3044   * | MOD_S.H       | Vector Signed Modulo (halfword)                          |
3045   * | MOD_S.W       | Vector Signed Modulo (word)                              |
3046   * | MOD_S.D       | Vector Signed Modulo (doubleword)                        |
3047   * | MOD_U.B       | Vector Unsigned Modulo (byte)                            |
3048   * | MOD_U.H       | Vector Unsigned Modulo (halfword)                        |
3049   * | MOD_U.W       | Vector Unsigned Modulo (word)                            |
3050   * | MOD_U.D       | Vector Unsigned Modulo (doubleword)                      |
3051   * +---------------+----------------------------------------------------------+
3052   */
3053  
3054  static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3055  {
3056      if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
3057          return 0;
3058      }
3059      return arg2 ? arg1 % arg2 : arg1;
3060  }
3061  
3062  void helper_msa_mod_s_b(CPUMIPSState *env,
3063                          uint32_t wd, uint32_t ws, uint32_t wt)
3064  {
3065      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3066      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3067      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3068  
3069      pwd->b[0]  = msa_mod_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3070      pwd->b[1]  = msa_mod_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3071      pwd->b[2]  = msa_mod_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3072      pwd->b[3]  = msa_mod_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3073      pwd->b[4]  = msa_mod_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3074      pwd->b[5]  = msa_mod_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3075      pwd->b[6]  = msa_mod_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3076      pwd->b[7]  = msa_mod_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3077      pwd->b[8]  = msa_mod_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3078      pwd->b[9]  = msa_mod_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3079      pwd->b[10] = msa_mod_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3080      pwd->b[11] = msa_mod_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3081      pwd->b[12] = msa_mod_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3082      pwd->b[13] = msa_mod_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3083      pwd->b[14] = msa_mod_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3084      pwd->b[15] = msa_mod_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3085  }
3086  
3087  void helper_msa_mod_s_h(CPUMIPSState *env,
3088                          uint32_t wd, uint32_t ws, uint32_t wt)
3089  {
3090      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3091      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3092      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3093  
3094      pwd->h[0]  = msa_mod_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3095      pwd->h[1]  = msa_mod_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3096      pwd->h[2]  = msa_mod_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3097      pwd->h[3]  = msa_mod_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3098      pwd->h[4]  = msa_mod_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3099      pwd->h[5]  = msa_mod_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3100      pwd->h[6]  = msa_mod_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3101      pwd->h[7]  = msa_mod_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3102  }
3103  
3104  void helper_msa_mod_s_w(CPUMIPSState *env,
3105                          uint32_t wd, uint32_t ws, uint32_t wt)
3106  {
3107      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3108      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3109      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3110  
3111      pwd->w[0]  = msa_mod_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3112      pwd->w[1]  = msa_mod_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3113      pwd->w[2]  = msa_mod_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3114      pwd->w[3]  = msa_mod_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3115  }
3116  
3117  void helper_msa_mod_s_d(CPUMIPSState *env,
3118                          uint32_t wd, uint32_t ws, uint32_t wt)
3119  {
3120      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3121      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3122      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3123  
3124      pwd->d[0]  = msa_mod_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3125      pwd->d[1]  = msa_mod_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3126  }
3127  
3128  static inline int64_t msa_mod_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3129  {
3130      uint64_t u_arg1 = UNSIGNED(arg1, df);
3131      uint64_t u_arg2 = UNSIGNED(arg2, df);
3132      return u_arg2 ? u_arg1 % u_arg2 : u_arg1;
3133  }
3134  
3135  void helper_msa_mod_u_b(CPUMIPSState *env,
3136                          uint32_t wd, uint32_t ws, uint32_t wt)
3137  {
3138      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3139      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3140      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3141  
3142      pwd->b[0]  = msa_mod_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3143      pwd->b[1]  = msa_mod_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3144      pwd->b[2]  = msa_mod_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3145      pwd->b[3]  = msa_mod_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3146      pwd->b[4]  = msa_mod_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3147      pwd->b[5]  = msa_mod_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3148      pwd->b[6]  = msa_mod_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3149      pwd->b[7]  = msa_mod_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3150      pwd->b[8]  = msa_mod_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3151      pwd->b[9]  = msa_mod_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3152      pwd->b[10] = msa_mod_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3153      pwd->b[11] = msa_mod_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3154      pwd->b[12] = msa_mod_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3155      pwd->b[13] = msa_mod_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3156      pwd->b[14] = msa_mod_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3157      pwd->b[15] = msa_mod_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3158  }
3159  
3160  void helper_msa_mod_u_h(CPUMIPSState *env,
3161                          uint32_t wd, uint32_t ws, uint32_t wt)
3162  {
3163      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3164      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3165      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3166  
3167      pwd->h[0]  = msa_mod_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3168      pwd->h[1]  = msa_mod_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3169      pwd->h[2]  = msa_mod_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3170      pwd->h[3]  = msa_mod_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3171      pwd->h[4]  = msa_mod_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3172      pwd->h[5]  = msa_mod_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3173      pwd->h[6]  = msa_mod_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3174      pwd->h[7]  = msa_mod_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3175  }
3176  
3177  void helper_msa_mod_u_w(CPUMIPSState *env,
3178                          uint32_t wd, uint32_t ws, uint32_t wt)
3179  {
3180      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3181      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3182      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3183  
3184      pwd->w[0]  = msa_mod_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3185      pwd->w[1]  = msa_mod_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3186      pwd->w[2]  = msa_mod_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3187      pwd->w[3]  = msa_mod_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3188  }
3189  
3190  void helper_msa_mod_u_d(CPUMIPSState *env,
3191                          uint32_t wd, uint32_t ws, uint32_t wt)
3192  {
3193      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3194      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3195      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3196  
3197      pwd->d[0]  = msa_mod_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3198      pwd->d[1]  = msa_mod_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3199  }
3200  
3201  
3202  /*
3203   * Int Multiply
3204   * ------------
3205   *
3206   * +---------------+----------------------------------------------------------+
3207   * | MADDV.B       | Vector Multiply and Add (byte)                           |
3208   * | MADDV.H       | Vector Multiply and Add (halfword)                       |
3209   * | MADDV.W       | Vector Multiply and Add (word)                           |
3210   * | MADDV.D       | Vector Multiply and Add (doubleword)                     |
3211   * | MSUBV.B       | Vector Multiply and Subtract (byte)                      |
3212   * | MSUBV.H       | Vector Multiply and Subtract (halfword)                  |
3213   * | MSUBV.W       | Vector Multiply and Subtract (word)                      |
3214   * | MSUBV.D       | Vector Multiply and Subtract (doubleword)                |
3215   * | MULV.B        | Vector Multiply (byte)                                   |
3216   * | MULV.H        | Vector Multiply (halfword)                               |
3217   * | MULV.W        | Vector Multiply (word)                                   |
3218   * | MULV.D        | Vector Multiply (doubleword)                             |
3219   * +---------------+----------------------------------------------------------+
3220   */
3221  
3222  static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
3223                                     int64_t arg2)
3224  {
3225      return dest + arg1 * arg2;
3226  }
3227  
3228  void helper_msa_maddv_b(CPUMIPSState *env,
3229                          uint32_t wd, uint32_t ws, uint32_t wt)
3230  {
3231      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3232      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3233      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3234  
3235      pwd->b[0]  = msa_maddv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3236      pwd->b[1]  = msa_maddv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3237      pwd->b[2]  = msa_maddv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3238      pwd->b[3]  = msa_maddv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3239      pwd->b[4]  = msa_maddv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3240      pwd->b[5]  = msa_maddv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3241      pwd->b[6]  = msa_maddv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3242      pwd->b[7]  = msa_maddv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3243      pwd->b[8]  = msa_maddv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3244      pwd->b[9]  = msa_maddv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3245      pwd->b[10] = msa_maddv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3246      pwd->b[11] = msa_maddv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3247      pwd->b[12] = msa_maddv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3248      pwd->b[13] = msa_maddv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3249      pwd->b[14] = msa_maddv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3250      pwd->b[15] = msa_maddv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3251  }
3252  
3253  void helper_msa_maddv_h(CPUMIPSState *env,
3254                          uint32_t wd, uint32_t ws, uint32_t wt)
3255  {
3256      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3257      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3258      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3259  
3260      pwd->h[0]  = msa_maddv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3261      pwd->h[1]  = msa_maddv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3262      pwd->h[2]  = msa_maddv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3263      pwd->h[3]  = msa_maddv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3264      pwd->h[4]  = msa_maddv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3265      pwd->h[5]  = msa_maddv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3266      pwd->h[6]  = msa_maddv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3267      pwd->h[7]  = msa_maddv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3268  }
3269  
3270  void helper_msa_maddv_w(CPUMIPSState *env,
3271                          uint32_t wd, uint32_t ws, uint32_t wt)
3272  {
3273      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3274      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3275      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3276  
3277      pwd->w[0]  = msa_maddv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3278      pwd->w[1]  = msa_maddv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3279      pwd->w[2]  = msa_maddv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3280      pwd->w[3]  = msa_maddv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3281  }
3282  
3283  void helper_msa_maddv_d(CPUMIPSState *env,
3284                          uint32_t wd, uint32_t ws, uint32_t wt)
3285  {
3286      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3287      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3288      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3289  
3290      pwd->d[0]  = msa_maddv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3291      pwd->d[1]  = msa_maddv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3292  }
3293  
3294  static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
3295                                     int64_t arg2)
3296  {
3297      return dest - arg1 * arg2;
3298  }
3299  
3300  void helper_msa_msubv_b(CPUMIPSState *env,
3301                          uint32_t wd, uint32_t ws, uint32_t wt)
3302  {
3303      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3304      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3305      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3306  
3307      pwd->b[0]  = msa_msubv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3308      pwd->b[1]  = msa_msubv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3309      pwd->b[2]  = msa_msubv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3310      pwd->b[3]  = msa_msubv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3311      pwd->b[4]  = msa_msubv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3312      pwd->b[5]  = msa_msubv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3313      pwd->b[6]  = msa_msubv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3314      pwd->b[7]  = msa_msubv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3315      pwd->b[8]  = msa_msubv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3316      pwd->b[9]  = msa_msubv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3317      pwd->b[10] = msa_msubv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3318      pwd->b[11] = msa_msubv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3319      pwd->b[12] = msa_msubv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3320      pwd->b[13] = msa_msubv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3321      pwd->b[14] = msa_msubv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3322      pwd->b[15] = msa_msubv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3323  }
3324  
3325  void helper_msa_msubv_h(CPUMIPSState *env,
3326                          uint32_t wd, uint32_t ws, uint32_t wt)
3327  {
3328      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3329      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3330      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3331  
3332      pwd->h[0]  = msa_msubv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3333      pwd->h[1]  = msa_msubv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3334      pwd->h[2]  = msa_msubv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3335      pwd->h[3]  = msa_msubv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3336      pwd->h[4]  = msa_msubv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3337      pwd->h[5]  = msa_msubv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3338      pwd->h[6]  = msa_msubv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3339      pwd->h[7]  = msa_msubv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3340  }
3341  
3342  void helper_msa_msubv_w(CPUMIPSState *env,
3343                          uint32_t wd, uint32_t ws, uint32_t wt)
3344  {
3345      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3346      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3347      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3348  
3349      pwd->w[0]  = msa_msubv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3350      pwd->w[1]  = msa_msubv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3351      pwd->w[2]  = msa_msubv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3352      pwd->w[3]  = msa_msubv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3353  }
3354  
3355  void helper_msa_msubv_d(CPUMIPSState *env,
3356                          uint32_t wd, uint32_t ws, uint32_t wt)
3357  {
3358      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3359      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3360      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3361  
3362      pwd->d[0]  = msa_msubv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3363      pwd->d[1]  = msa_msubv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3364  }
3365  
3366  
3367  static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
3368  {
3369      return arg1 * arg2;
3370  }
3371  
3372  void helper_msa_mulv_b(CPUMIPSState *env,
3373                         uint32_t wd, uint32_t ws, uint32_t wt)
3374  {
3375      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3376      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3377      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3378  
3379      pwd->b[0]  = msa_mulv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3380      pwd->b[1]  = msa_mulv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3381      pwd->b[2]  = msa_mulv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3382      pwd->b[3]  = msa_mulv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3383      pwd->b[4]  = msa_mulv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3384      pwd->b[5]  = msa_mulv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3385      pwd->b[6]  = msa_mulv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3386      pwd->b[7]  = msa_mulv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3387      pwd->b[8]  = msa_mulv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3388      pwd->b[9]  = msa_mulv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3389      pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]);
3390      pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]);
3391      pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]);
3392      pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]);
3393      pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]);
3394      pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]);
3395  }
3396  
3397  void helper_msa_mulv_h(CPUMIPSState *env,
3398                         uint32_t wd, uint32_t ws, uint32_t wt)
3399  {
3400      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3401      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3402      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3403  
3404      pwd->h[0]  = msa_mulv_df(DF_HALF, pws->h[0],  pwt->h[0]);
3405      pwd->h[1]  = msa_mulv_df(DF_HALF, pws->h[1],  pwt->h[1]);
3406      pwd->h[2]  = msa_mulv_df(DF_HALF, pws->h[2],  pwt->h[2]);
3407      pwd->h[3]  = msa_mulv_df(DF_HALF, pws->h[3],  pwt->h[3]);
3408      pwd->h[4]  = msa_mulv_df(DF_HALF, pws->h[4],  pwt->h[4]);
3409      pwd->h[5]  = msa_mulv_df(DF_HALF, pws->h[5],  pwt->h[5]);
3410      pwd->h[6]  = msa_mulv_df(DF_HALF, pws->h[6],  pwt->h[6]);
3411      pwd->h[7]  = msa_mulv_df(DF_HALF, pws->h[7],  pwt->h[7]);
3412  }
3413  
3414  void helper_msa_mulv_w(CPUMIPSState *env,
3415                         uint32_t wd, uint32_t ws, uint32_t wt)
3416  {
3417      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3418      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3419      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3420  
3421      pwd->w[0]  = msa_mulv_df(DF_WORD, pws->w[0],  pwt->w[0]);
3422      pwd->w[1]  = msa_mulv_df(DF_WORD, pws->w[1],  pwt->w[1]);
3423      pwd->w[2]  = msa_mulv_df(DF_WORD, pws->w[2],  pwt->w[2]);
3424      pwd->w[3]  = msa_mulv_df(DF_WORD, pws->w[3],  pwt->w[3]);
3425  }
3426  
3427  void helper_msa_mulv_d(CPUMIPSState *env,
3428                         uint32_t wd, uint32_t ws, uint32_t wt)
3429  {
3430      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3431      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3432      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3433  
3434      pwd->d[0]  = msa_mulv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3435      pwd->d[1]  = msa_mulv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3436  }
3437  
3438  
3439  /*
3440   * Int Subtract
3441   * ------------
3442   *
3443   * +---------------+----------------------------------------------------------+
3444   * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
3445   * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
3446   * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
3447   * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
3448   * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
3449   * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
3450   * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
3451   * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
3452   * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
3453   * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
3454   * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
3455   * | HSUB_U.H      | Vector Unsigned Horizontal Subtract (halfword)           |
3456   * | HSUB_U.W      | Vector Unsigned Horizontal Subtract (word)               |
3457   * | HSUB_U.D      | Vector Unsigned Horizontal Subtract (doubleword)         |
3458   * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
3459   * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
3460   * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
3461   * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
3462   * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
3463   * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
3464   * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
3465   * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
3466   * | SUBSUS_U.B    | Vector Uns. Sat. Subtract (of S. from Uns.) (byte)       |
3467   * | SUBSUS_U.H    | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword)   |
3468   * | SUBSUS_U.W    | Vector Uns. Sat. Subtract (of S. from Uns.) (word)       |
3469   * | SUBSUS_U.D    | Vector Uns. Sat. Subtract (of S. from Uns.) (doubleword) |
3470   * | SUBSUU_S.B    | Vector Signed Saturated Subtract (of Uns.) (byte)        |
3471   * | SUBSUU_S.H    | Vector Signed Saturated Subtract (of Uns.) (halfword)    |
3472   * | SUBSUU_S.W    | Vector Signed Saturated Subtract (of Uns.) (word)        |
3473   * | SUBSUU_S.D    | Vector Signed Saturated Subtract (of Uns.) (doubleword)  |
3474   * | SUBV.B        | Vector Subtract (byte)                                   |
3475   * | SUBV.H        | Vector Subtract (halfword)                               |
3476   * | SUBV.W        | Vector Subtract (word)                                   |
3477   * | SUBV.D        | Vector Subtract (doubleword)                             |
3478   * +---------------+----------------------------------------------------------+
3479   */
3480  
3481  
3482  static inline int64_t msa_asub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3483  {
3484      /* signed compare */
3485      return (arg1 < arg2) ?
3486          (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
3487  }
3488  
3489  void helper_msa_asub_s_b(CPUMIPSState *env,
3490                           uint32_t wd, uint32_t ws, uint32_t wt)
3491  {
3492      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3493      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3494      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3495  
3496      pwd->b[0]  = msa_asub_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3497      pwd->b[1]  = msa_asub_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3498      pwd->b[2]  = msa_asub_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3499      pwd->b[3]  = msa_asub_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3500      pwd->b[4]  = msa_asub_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3501      pwd->b[5]  = msa_asub_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3502      pwd->b[6]  = msa_asub_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3503      pwd->b[7]  = msa_asub_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3504      pwd->b[8]  = msa_asub_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3505      pwd->b[9]  = msa_asub_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3506      pwd->b[10] = msa_asub_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3507      pwd->b[11] = msa_asub_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3508      pwd->b[12] = msa_asub_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3509      pwd->b[13] = msa_asub_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3510      pwd->b[14] = msa_asub_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3511      pwd->b[15] = msa_asub_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3512  }
3513  
3514  void helper_msa_asub_s_h(CPUMIPSState *env,
3515                           uint32_t wd, uint32_t ws, uint32_t wt)
3516  {
3517      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3518      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3519      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3520  
3521      pwd->h[0]  = msa_asub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3522      pwd->h[1]  = msa_asub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3523      pwd->h[2]  = msa_asub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3524      pwd->h[3]  = msa_asub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3525      pwd->h[4]  = msa_asub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3526      pwd->h[5]  = msa_asub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3527      pwd->h[6]  = msa_asub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3528      pwd->h[7]  = msa_asub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3529  }
3530  
3531  void helper_msa_asub_s_w(CPUMIPSState *env,
3532                           uint32_t wd, uint32_t ws, uint32_t wt)
3533  {
3534      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3535      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3536      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3537  
3538      pwd->w[0]  = msa_asub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3539      pwd->w[1]  = msa_asub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3540      pwd->w[2]  = msa_asub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3541      pwd->w[3]  = msa_asub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3542  }
3543  
3544  void helper_msa_asub_s_d(CPUMIPSState *env,
3545                           uint32_t wd, uint32_t ws, uint32_t wt)
3546  {
3547      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3548      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3549      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3550  
3551      pwd->d[0]  = msa_asub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3552      pwd->d[1]  = msa_asub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3553  }
3554  
3555  
3556  static inline uint64_t msa_asub_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
3557  {
3558      uint64_t u_arg1 = UNSIGNED(arg1, df);
3559      uint64_t u_arg2 = UNSIGNED(arg2, df);
3560      /* unsigned compare */
3561      return (u_arg1 < u_arg2) ?
3562          (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
3563  }
3564  
3565  void helper_msa_asub_u_b(CPUMIPSState *env,
3566                           uint32_t wd, uint32_t ws, uint32_t wt)
3567  {
3568      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3569      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3570      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3571  
3572      pwd->b[0]  = msa_asub_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3573      pwd->b[1]  = msa_asub_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3574      pwd->b[2]  = msa_asub_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3575      pwd->b[3]  = msa_asub_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3576      pwd->b[4]  = msa_asub_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3577      pwd->b[5]  = msa_asub_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3578      pwd->b[6]  = msa_asub_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3579      pwd->b[7]  = msa_asub_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3580      pwd->b[8]  = msa_asub_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3581      pwd->b[9]  = msa_asub_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3582      pwd->b[10] = msa_asub_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3583      pwd->b[11] = msa_asub_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3584      pwd->b[12] = msa_asub_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3585      pwd->b[13] = msa_asub_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3586      pwd->b[14] = msa_asub_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3587      pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3588  }
3589  
3590  void helper_msa_asub_u_h(CPUMIPSState *env,
3591                           uint32_t wd, uint32_t ws, uint32_t wt)
3592  {
3593      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3594      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3595      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3596  
3597      pwd->h[0]  = msa_asub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3598      pwd->h[1]  = msa_asub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3599      pwd->h[2]  = msa_asub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3600      pwd->h[3]  = msa_asub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3601      pwd->h[4]  = msa_asub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3602      pwd->h[5]  = msa_asub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3603      pwd->h[6]  = msa_asub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3604      pwd->h[7]  = msa_asub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3605  }
3606  
3607  void helper_msa_asub_u_w(CPUMIPSState *env,
3608                           uint32_t wd, uint32_t ws, uint32_t wt)
3609  {
3610      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3611      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3612      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3613  
3614      pwd->w[0]  = msa_asub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3615      pwd->w[1]  = msa_asub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3616      pwd->w[2]  = msa_asub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3617      pwd->w[3]  = msa_asub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3618  }
3619  
3620  void helper_msa_asub_u_d(CPUMIPSState *env,
3621                           uint32_t wd, uint32_t ws, uint32_t wt)
3622  {
3623      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3624      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3625      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3626  
3627      pwd->d[0]  = msa_asub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3628      pwd->d[1]  = msa_asub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3629  }
3630  
3631  
3632  static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3633  {
3634      return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
3635  }
3636  
3637  void helper_msa_hsub_s_h(CPUMIPSState *env,
3638                           uint32_t wd, uint32_t ws, uint32_t wt)
3639  {
3640      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3641      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3642      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3643  
3644      pwd->h[0]  = msa_hsub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3645      pwd->h[1]  = msa_hsub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3646      pwd->h[2]  = msa_hsub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3647      pwd->h[3]  = msa_hsub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3648      pwd->h[4]  = msa_hsub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3649      pwd->h[5]  = msa_hsub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3650      pwd->h[6]  = msa_hsub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3651      pwd->h[7]  = msa_hsub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3652  }
3653  
3654  void helper_msa_hsub_s_w(CPUMIPSState *env,
3655                           uint32_t wd, uint32_t ws, uint32_t wt)
3656  {
3657      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3658      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3659      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3660  
3661      pwd->w[0]  = msa_hsub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3662      pwd->w[1]  = msa_hsub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3663      pwd->w[2]  = msa_hsub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3664      pwd->w[3]  = msa_hsub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3665  }
3666  
3667  void helper_msa_hsub_s_d(CPUMIPSState *env,
3668                           uint32_t wd, uint32_t ws, uint32_t wt)
3669  {
3670      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3671      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3672      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3673  
3674      pwd->d[0]  = msa_hsub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3675      pwd->d[1]  = msa_hsub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3676  }
3677  
3678  
3679  static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3680  {
3681      return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
3682  }
3683  
3684  void helper_msa_hsub_u_h(CPUMIPSState *env,
3685                           uint32_t wd, uint32_t ws, uint32_t wt)
3686  {
3687      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3688      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3689      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3690  
3691      pwd->h[0]  = msa_hsub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3692      pwd->h[1]  = msa_hsub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3693      pwd->h[2]  = msa_hsub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3694      pwd->h[3]  = msa_hsub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3695      pwd->h[4]  = msa_hsub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3696      pwd->h[5]  = msa_hsub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3697      pwd->h[6]  = msa_hsub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3698      pwd->h[7]  = msa_hsub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3699  }
3700  
3701  void helper_msa_hsub_u_w(CPUMIPSState *env,
3702                           uint32_t wd, uint32_t ws, uint32_t wt)
3703  {
3704      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3705      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3706      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3707  
3708      pwd->w[0]  = msa_hsub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3709      pwd->w[1]  = msa_hsub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3710      pwd->w[2]  = msa_hsub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3711      pwd->w[3]  = msa_hsub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3712  }
3713  
3714  void helper_msa_hsub_u_d(CPUMIPSState *env,
3715                           uint32_t wd, uint32_t ws, uint32_t wt)
3716  {
3717      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3718      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3719      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3720  
3721      pwd->d[0]  = msa_hsub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3722      pwd->d[1]  = msa_hsub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3723  }
3724  
3725  
3726  static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3727  {
3728      int64_t max_int = DF_MAX_INT(df);
3729      int64_t min_int = DF_MIN_INT(df);
3730      if (arg2 > 0) {
3731          return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
3732      } else {
3733          return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
3734      }
3735  }
3736  
3737  void helper_msa_subs_s_b(CPUMIPSState *env,
3738                           uint32_t wd, uint32_t ws, uint32_t wt)
3739  {
3740      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3741      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3742      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3743  
3744      pwd->b[0]  = msa_subs_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3745      pwd->b[1]  = msa_subs_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3746      pwd->b[2]  = msa_subs_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3747      pwd->b[3]  = msa_subs_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3748      pwd->b[4]  = msa_subs_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3749      pwd->b[5]  = msa_subs_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3750      pwd->b[6]  = msa_subs_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3751      pwd->b[7]  = msa_subs_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3752      pwd->b[8]  = msa_subs_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3753      pwd->b[9]  = msa_subs_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3754      pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3755      pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3756      pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3757      pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3758      pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3759      pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3760  }
3761  
3762  void helper_msa_subs_s_h(CPUMIPSState *env,
3763                           uint32_t wd, uint32_t ws, uint32_t wt)
3764  {
3765      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3766      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3767      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3768  
3769      pwd->h[0]  = msa_subs_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3770      pwd->h[1]  = msa_subs_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3771      pwd->h[2]  = msa_subs_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3772      pwd->h[3]  = msa_subs_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3773      pwd->h[4]  = msa_subs_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3774      pwd->h[5]  = msa_subs_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3775      pwd->h[6]  = msa_subs_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3776      pwd->h[7]  = msa_subs_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3777  }
3778  
3779  void helper_msa_subs_s_w(CPUMIPSState *env,
3780                           uint32_t wd, uint32_t ws, uint32_t wt)
3781  {
3782      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3783      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3784      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3785  
3786      pwd->w[0]  = msa_subs_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3787      pwd->w[1]  = msa_subs_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3788      pwd->w[2]  = msa_subs_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3789      pwd->w[3]  = msa_subs_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3790  }
3791  
3792  void helper_msa_subs_s_d(CPUMIPSState *env,
3793                           uint32_t wd, uint32_t ws, uint32_t wt)
3794  {
3795      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3796      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3797      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3798  
3799      pwd->d[0]  = msa_subs_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3800      pwd->d[1]  = msa_subs_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3801  }
3802  
3803  
3804  static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3805  {
3806      uint64_t u_arg1 = UNSIGNED(arg1, df);
3807      uint64_t u_arg2 = UNSIGNED(arg2, df);
3808      return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
3809  }
3810  
3811  void helper_msa_subs_u_b(CPUMIPSState *env,
3812                           uint32_t wd, uint32_t ws, uint32_t wt)
3813  {
3814      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3815      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3816      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3817  
3818      pwd->b[0]  = msa_subs_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3819      pwd->b[1]  = msa_subs_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3820      pwd->b[2]  = msa_subs_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3821      pwd->b[3]  = msa_subs_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3822      pwd->b[4]  = msa_subs_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3823      pwd->b[5]  = msa_subs_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3824      pwd->b[6]  = msa_subs_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3825      pwd->b[7]  = msa_subs_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3826      pwd->b[8]  = msa_subs_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3827      pwd->b[9]  = msa_subs_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3828      pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3829      pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3830      pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3831      pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3832      pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3833      pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3834  }
3835  
3836  void helper_msa_subs_u_h(CPUMIPSState *env,
3837                           uint32_t wd, uint32_t ws, uint32_t wt)
3838  {
3839      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3840      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3841      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3842  
3843      pwd->h[0]  = msa_subs_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3844      pwd->h[1]  = msa_subs_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3845      pwd->h[2]  = msa_subs_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3846      pwd->h[3]  = msa_subs_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3847      pwd->h[4]  = msa_subs_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3848      pwd->h[5]  = msa_subs_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3849      pwd->h[6]  = msa_subs_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3850      pwd->h[7]  = msa_subs_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3851  }
3852  
3853  void helper_msa_subs_u_w(CPUMIPSState *env,
3854                           uint32_t wd, uint32_t ws, uint32_t wt)
3855  {
3856      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3857      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3858      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3859  
3860      pwd->w[0]  = msa_subs_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3861      pwd->w[1]  = msa_subs_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3862      pwd->w[2]  = msa_subs_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3863      pwd->w[3]  = msa_subs_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3864  }
3865  
3866  void helper_msa_subs_u_d(CPUMIPSState *env,
3867                           uint32_t wd, uint32_t ws, uint32_t wt)
3868  {
3869      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3870      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3871      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3872  
3873      pwd->d[0]  = msa_subs_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3874      pwd->d[1]  = msa_subs_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3875  }
3876  
3877  
3878  static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3879  {
3880      uint64_t u_arg1 = UNSIGNED(arg1, df);
3881      uint64_t max_uint = DF_MAX_UINT(df);
3882      if (arg2 >= 0) {
3883          uint64_t u_arg2 = (uint64_t)arg2;
3884          return (u_arg1 > u_arg2) ?
3885              (int64_t)(u_arg1 - u_arg2) :
3886              0;
3887      } else {
3888          uint64_t u_arg2 = (uint64_t)(-arg2);
3889          return (u_arg1 < max_uint - u_arg2) ?
3890              (int64_t)(u_arg1 + u_arg2) :
3891              (int64_t)max_uint;
3892      }
3893  }
3894  
3895  void helper_msa_subsus_u_b(CPUMIPSState *env,
3896                             uint32_t wd, uint32_t ws, uint32_t wt)
3897  {
3898      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3899      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3900      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3901  
3902      pwd->b[0]  = msa_subsus_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3903      pwd->b[1]  = msa_subsus_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3904      pwd->b[2]  = msa_subsus_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3905      pwd->b[3]  = msa_subsus_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3906      pwd->b[4]  = msa_subsus_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3907      pwd->b[5]  = msa_subsus_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3908      pwd->b[6]  = msa_subsus_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3909      pwd->b[7]  = msa_subsus_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3910      pwd->b[8]  = msa_subsus_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3911      pwd->b[9]  = msa_subsus_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3912      pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3913      pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3914      pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3915      pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3916      pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3917      pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3918  }
3919  
3920  void helper_msa_subsus_u_h(CPUMIPSState *env,
3921                             uint32_t wd, uint32_t ws, uint32_t wt)
3922  {
3923      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3924      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3925      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3926  
3927      pwd->h[0]  = msa_subsus_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3928      pwd->h[1]  = msa_subsus_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3929      pwd->h[2]  = msa_subsus_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3930      pwd->h[3]  = msa_subsus_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3931      pwd->h[4]  = msa_subsus_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3932      pwd->h[5]  = msa_subsus_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3933      pwd->h[6]  = msa_subsus_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3934      pwd->h[7]  = msa_subsus_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3935  }
3936  
3937  void helper_msa_subsus_u_w(CPUMIPSState *env,
3938                             uint32_t wd, uint32_t ws, uint32_t wt)
3939  {
3940      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3941      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3942      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3943  
3944      pwd->w[0]  = msa_subsus_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3945      pwd->w[1]  = msa_subsus_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3946      pwd->w[2]  = msa_subsus_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3947      pwd->w[3]  = msa_subsus_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3948  }
3949  
3950  void helper_msa_subsus_u_d(CPUMIPSState *env,
3951                             uint32_t wd, uint32_t ws, uint32_t wt)
3952  {
3953      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3954      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3955      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3956  
3957      pwd->d[0]  = msa_subsus_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3958      pwd->d[1]  = msa_subsus_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3959  }
3960  
3961  
3962  static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3963  {
3964      uint64_t u_arg1 = UNSIGNED(arg1, df);
3965      uint64_t u_arg2 = UNSIGNED(arg2, df);
3966      int64_t max_int = DF_MAX_INT(df);
3967      int64_t min_int = DF_MIN_INT(df);
3968      if (u_arg1 > u_arg2) {
3969          return u_arg1 - u_arg2 < (uint64_t)max_int ?
3970              (int64_t)(u_arg1 - u_arg2) :
3971              max_int;
3972      } else {
3973          return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
3974              (int64_t)(u_arg1 - u_arg2) :
3975              min_int;
3976      }
3977  }
3978  
3979  void helper_msa_subsuu_s_b(CPUMIPSState *env,
3980                             uint32_t wd, uint32_t ws, uint32_t wt)
3981  {
3982      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3983      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3984      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3985  
3986      pwd->b[0]  = msa_subsuu_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3987      pwd->b[1]  = msa_subsuu_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3988      pwd->b[2]  = msa_subsuu_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3989      pwd->b[3]  = msa_subsuu_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3990      pwd->b[4]  = msa_subsuu_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3991      pwd->b[5]  = msa_subsuu_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3992      pwd->b[6]  = msa_subsuu_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3993      pwd->b[7]  = msa_subsuu_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3994      pwd->b[8]  = msa_subsuu_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3995      pwd->b[9]  = msa_subsuu_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3996      pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3997      pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3998      pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3999      pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
4000      pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
4001      pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
4002  }
4003  
4004  void helper_msa_subsuu_s_h(CPUMIPSState *env,
4005                             uint32_t wd, uint32_t ws, uint32_t wt)
4006  {
4007      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4008      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4009      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4010  
4011      pwd->h[0]  = msa_subsuu_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
4012      pwd->h[1]  = msa_subsuu_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
4013      pwd->h[2]  = msa_subsuu_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
4014      pwd->h[3]  = msa_subsuu_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
4015      pwd->h[4]  = msa_subsuu_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
4016      pwd->h[5]  = msa_subsuu_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
4017      pwd->h[6]  = msa_subsuu_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
4018      pwd->h[7]  = msa_subsuu_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
4019  }
4020  
4021  void helper_msa_subsuu_s_w(CPUMIPSState *env,
4022                             uint32_t wd, uint32_t ws, uint32_t wt)
4023  {
4024      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4025      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4026      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4027  
4028      pwd->w[0]  = msa_subsuu_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
4029      pwd->w[1]  = msa_subsuu_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
4030      pwd->w[2]  = msa_subsuu_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
4031      pwd->w[3]  = msa_subsuu_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
4032  }
4033  
4034  void helper_msa_subsuu_s_d(CPUMIPSState *env,
4035                             uint32_t wd, uint32_t ws, uint32_t wt)
4036  {
4037      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4038      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4039      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4040  
4041      pwd->d[0]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4042      pwd->d[1]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4043  }
4044  
4045  
4046  static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
4047  {
4048      return arg1 - arg2;
4049  }
4050  
4051  void helper_msa_subv_b(CPUMIPSState *env,
4052                         uint32_t wd, uint32_t ws, uint32_t wt)
4053  {
4054      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4055      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4056      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4057  
4058      pwd->b[0]  = msa_subv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4059      pwd->b[1]  = msa_subv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4060      pwd->b[2]  = msa_subv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4061      pwd->b[3]  = msa_subv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4062      pwd->b[4]  = msa_subv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4063      pwd->b[5]  = msa_subv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4064      pwd->b[6]  = msa_subv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4065      pwd->b[7]  = msa_subv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4066      pwd->b[8]  = msa_subv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4067      pwd->b[9]  = msa_subv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4068      pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]);
4069      pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]);
4070      pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]);
4071      pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]);
4072      pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]);
4073      pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]);
4074  }
4075  
4076  void helper_msa_subv_h(CPUMIPSState *env,
4077                         uint32_t wd, uint32_t ws, uint32_t wt)
4078  {
4079      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4080      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4081      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4082  
4083      pwd->h[0]  = msa_subv_df(DF_HALF, pws->h[0],  pwt->h[0]);
4084      pwd->h[1]  = msa_subv_df(DF_HALF, pws->h[1],  pwt->h[1]);
4085      pwd->h[2]  = msa_subv_df(DF_HALF, pws->h[2],  pwt->h[2]);
4086      pwd->h[3]  = msa_subv_df(DF_HALF, pws->h[3],  pwt->h[3]);
4087      pwd->h[4]  = msa_subv_df(DF_HALF, pws->h[4],  pwt->h[4]);
4088      pwd->h[5]  = msa_subv_df(DF_HALF, pws->h[5],  pwt->h[5]);
4089      pwd->h[6]  = msa_subv_df(DF_HALF, pws->h[6],  pwt->h[6]);
4090      pwd->h[7]  = msa_subv_df(DF_HALF, pws->h[7],  pwt->h[7]);
4091  }
4092  
4093  void helper_msa_subv_w(CPUMIPSState *env,
4094                         uint32_t wd, uint32_t ws, uint32_t wt)
4095  {
4096      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4097      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4098      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4099  
4100      pwd->w[0]  = msa_subv_df(DF_WORD, pws->w[0],  pwt->w[0]);
4101      pwd->w[1]  = msa_subv_df(DF_WORD, pws->w[1],  pwt->w[1]);
4102      pwd->w[2]  = msa_subv_df(DF_WORD, pws->w[2],  pwt->w[2]);
4103      pwd->w[3]  = msa_subv_df(DF_WORD, pws->w[3],  pwt->w[3]);
4104  }
4105  
4106  void helper_msa_subv_d(CPUMIPSState *env,
4107                         uint32_t wd, uint32_t ws, uint32_t wt)
4108  {
4109      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4110      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4111      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4112  
4113      pwd->d[0]  = msa_subv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4114      pwd->d[1]  = msa_subv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4115  }
4116  
4117  
4118  /*
4119   * Interleave
4120   * ----------
4121   *
4122   * +---------------+----------------------------------------------------------+
4123   * | ILVEV.B       | Vector Interleave Even (byte)                            |
4124   * | ILVEV.H       | Vector Interleave Even (halfword)                        |
4125   * | ILVEV.W       | Vector Interleave Even (word)                            |
4126   * | ILVEV.D       | Vector Interleave Even (doubleword)                      |
4127   * | ILVOD.B       | Vector Interleave Odd (byte)                             |
4128   * | ILVOD.H       | Vector Interleave Odd (halfword)                         |
4129   * | ILVOD.W       | Vector Interleave Odd (word)                             |
4130   * | ILVOD.D       | Vector Interleave Odd (doubleword)                       |
4131   * | ILVL.B        | Vector Interleave Left (byte)                            |
4132   * | ILVL.H        | Vector Interleave Left (halfword)                        |
4133   * | ILVL.W        | Vector Interleave Left (word)                            |
4134   * | ILVL.D        | Vector Interleave Left (doubleword)                      |
4135   * | ILVR.B        | Vector Interleave Right (byte)                           |
4136   * | ILVR.H        | Vector Interleave Right (halfword)                       |
4137   * | ILVR.W        | Vector Interleave Right (word)                           |
4138   * | ILVR.D        | Vector Interleave Right (doubleword)                     |
4139   * +---------------+----------------------------------------------------------+
4140   */
4141  
4142  
4143  void helper_msa_ilvev_b(CPUMIPSState *env,
4144                          uint32_t wd, uint32_t ws, uint32_t wt)
4145  {
4146      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4147      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4148      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4149  
4150  #if HOST_BIG_ENDIAN
4151      pwd->b[8]  = pws->b[9];
4152      pwd->b[9]  = pwt->b[9];
4153      pwd->b[10] = pws->b[11];
4154      pwd->b[11] = pwt->b[11];
4155      pwd->b[12] = pws->b[13];
4156      pwd->b[13] = pwt->b[13];
4157      pwd->b[14] = pws->b[15];
4158      pwd->b[15] = pwt->b[15];
4159      pwd->b[0]  = pws->b[1];
4160      pwd->b[1]  = pwt->b[1];
4161      pwd->b[2]  = pws->b[3];
4162      pwd->b[3]  = pwt->b[3];
4163      pwd->b[4]  = pws->b[5];
4164      pwd->b[5]  = pwt->b[5];
4165      pwd->b[6]  = pws->b[7];
4166      pwd->b[7]  = pwt->b[7];
4167  #else
4168      pwd->b[15] = pws->b[14];
4169      pwd->b[14] = pwt->b[14];
4170      pwd->b[13] = pws->b[12];
4171      pwd->b[12] = pwt->b[12];
4172      pwd->b[11] = pws->b[10];
4173      pwd->b[10] = pwt->b[10];
4174      pwd->b[9]  = pws->b[8];
4175      pwd->b[8]  = pwt->b[8];
4176      pwd->b[7]  = pws->b[6];
4177      pwd->b[6]  = pwt->b[6];
4178      pwd->b[5]  = pws->b[4];
4179      pwd->b[4]  = pwt->b[4];
4180      pwd->b[3]  = pws->b[2];
4181      pwd->b[2]  = pwt->b[2];
4182      pwd->b[1]  = pws->b[0];
4183      pwd->b[0]  = pwt->b[0];
4184  #endif
4185  }
4186  
4187  void helper_msa_ilvev_h(CPUMIPSState *env,
4188                          uint32_t wd, uint32_t ws, uint32_t wt)
4189  {
4190      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4191      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4192      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4193  
4194  #if HOST_BIG_ENDIAN
4195      pwd->h[4] = pws->h[5];
4196      pwd->h[5] = pwt->h[5];
4197      pwd->h[6] = pws->h[7];
4198      pwd->h[7] = pwt->h[7];
4199      pwd->h[0] = pws->h[1];
4200      pwd->h[1] = pwt->h[1];
4201      pwd->h[2] = pws->h[3];
4202      pwd->h[3] = pwt->h[3];
4203  #else
4204      pwd->h[7] = pws->h[6];
4205      pwd->h[6] = pwt->h[6];
4206      pwd->h[5] = pws->h[4];
4207      pwd->h[4] = pwt->h[4];
4208      pwd->h[3] = pws->h[2];
4209      pwd->h[2] = pwt->h[2];
4210      pwd->h[1] = pws->h[0];
4211      pwd->h[0] = pwt->h[0];
4212  #endif
4213  }
4214  
4215  void helper_msa_ilvev_w(CPUMIPSState *env,
4216                          uint32_t wd, uint32_t ws, uint32_t wt)
4217  {
4218      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4219      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4220      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4221  
4222  #if HOST_BIG_ENDIAN
4223      pwd->w[2] = pws->w[3];
4224      pwd->w[3] = pwt->w[3];
4225      pwd->w[0] = pws->w[1];
4226      pwd->w[1] = pwt->w[1];
4227  #else
4228      pwd->w[3] = pws->w[2];
4229      pwd->w[2] = pwt->w[2];
4230      pwd->w[1] = pws->w[0];
4231      pwd->w[0] = pwt->w[0];
4232  #endif
4233  }
4234  
4235  void helper_msa_ilvev_d(CPUMIPSState *env,
4236                          uint32_t wd, uint32_t ws, uint32_t wt)
4237  {
4238      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4239      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4240      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4241  
4242      pwd->d[1] = pws->d[0];
4243      pwd->d[0] = pwt->d[0];
4244  }
4245  
4246  
4247  void helper_msa_ilvod_b(CPUMIPSState *env,
4248                          uint32_t wd, uint32_t ws, uint32_t wt)
4249  {
4250      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4251      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4252      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4253  
4254  #if HOST_BIG_ENDIAN
4255      pwd->b[7]  = pwt->b[6];
4256      pwd->b[6]  = pws->b[6];
4257      pwd->b[5]  = pwt->b[4];
4258      pwd->b[4]  = pws->b[4];
4259      pwd->b[3]  = pwt->b[2];
4260      pwd->b[2]  = pws->b[2];
4261      pwd->b[1]  = pwt->b[0];
4262      pwd->b[0]  = pws->b[0];
4263      pwd->b[15] = pwt->b[14];
4264      pwd->b[14] = pws->b[14];
4265      pwd->b[13] = pwt->b[12];
4266      pwd->b[12] = pws->b[12];
4267      pwd->b[11] = pwt->b[10];
4268      pwd->b[10] = pws->b[10];
4269      pwd->b[9]  = pwt->b[8];
4270      pwd->b[8]  = pws->b[8];
4271  #else
4272      pwd->b[0]  = pwt->b[1];
4273      pwd->b[1]  = pws->b[1];
4274      pwd->b[2]  = pwt->b[3];
4275      pwd->b[3]  = pws->b[3];
4276      pwd->b[4]  = pwt->b[5];
4277      pwd->b[5]  = pws->b[5];
4278      pwd->b[6]  = pwt->b[7];
4279      pwd->b[7]  = pws->b[7];
4280      pwd->b[8]  = pwt->b[9];
4281      pwd->b[9]  = pws->b[9];
4282      pwd->b[10] = pwt->b[11];
4283      pwd->b[11] = pws->b[11];
4284      pwd->b[12] = pwt->b[13];
4285      pwd->b[13] = pws->b[13];
4286      pwd->b[14] = pwt->b[15];
4287      pwd->b[15] = pws->b[15];
4288  #endif
4289  }
4290  
4291  void helper_msa_ilvod_h(CPUMIPSState *env,
4292                          uint32_t wd, uint32_t ws, uint32_t wt)
4293  {
4294      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4295      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4296      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4297  
4298  #if HOST_BIG_ENDIAN
4299      pwd->h[3] = pwt->h[2];
4300      pwd->h[2] = pws->h[2];
4301      pwd->h[1] = pwt->h[0];
4302      pwd->h[0] = pws->h[0];
4303      pwd->h[7] = pwt->h[6];
4304      pwd->h[6] = pws->h[6];
4305      pwd->h[5] = pwt->h[4];
4306      pwd->h[4] = pws->h[4];
4307  #else
4308      pwd->h[0] = pwt->h[1];
4309      pwd->h[1] = pws->h[1];
4310      pwd->h[2] = pwt->h[3];
4311      pwd->h[3] = pws->h[3];
4312      pwd->h[4] = pwt->h[5];
4313      pwd->h[5] = pws->h[5];
4314      pwd->h[6] = pwt->h[7];
4315      pwd->h[7] = pws->h[7];
4316  #endif
4317  }
4318  
4319  void helper_msa_ilvod_w(CPUMIPSState *env,
4320                          uint32_t wd, uint32_t ws, uint32_t wt)
4321  {
4322      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4323      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4324      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4325  
4326  #if HOST_BIG_ENDIAN
4327      pwd->w[1] = pwt->w[0];
4328      pwd->w[0] = pws->w[0];
4329      pwd->w[3] = pwt->w[2];
4330      pwd->w[2] = pws->w[2];
4331  #else
4332      pwd->w[0] = pwt->w[1];
4333      pwd->w[1] = pws->w[1];
4334      pwd->w[2] = pwt->w[3];
4335      pwd->w[3] = pws->w[3];
4336  #endif
4337  }
4338  
4339  void helper_msa_ilvod_d(CPUMIPSState *env,
4340                          uint32_t wd, uint32_t ws, uint32_t wt)
4341  {
4342      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4343      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4344      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4345  
4346      pwd->d[0] = pwt->d[1];
4347      pwd->d[1] = pws->d[1];
4348  }
4349  
4350  
4351  void helper_msa_ilvl_b(CPUMIPSState *env,
4352                         uint32_t wd, uint32_t ws, uint32_t wt)
4353  {
4354      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4355      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4356      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4357  
4358  #if HOST_BIG_ENDIAN
4359      pwd->b[7]  = pwt->b[15];
4360      pwd->b[6]  = pws->b[15];
4361      pwd->b[5]  = pwt->b[14];
4362      pwd->b[4]  = pws->b[14];
4363      pwd->b[3]  = pwt->b[13];
4364      pwd->b[2]  = pws->b[13];
4365      pwd->b[1]  = pwt->b[12];
4366      pwd->b[0]  = pws->b[12];
4367      pwd->b[15] = pwt->b[11];
4368      pwd->b[14] = pws->b[11];
4369      pwd->b[13] = pwt->b[10];
4370      pwd->b[12] = pws->b[10];
4371      pwd->b[11] = pwt->b[9];
4372      pwd->b[10] = pws->b[9];
4373      pwd->b[9]  = pwt->b[8];
4374      pwd->b[8]  = pws->b[8];
4375  #else
4376      pwd->b[0]  = pwt->b[8];
4377      pwd->b[1]  = pws->b[8];
4378      pwd->b[2]  = pwt->b[9];
4379      pwd->b[3]  = pws->b[9];
4380      pwd->b[4]  = pwt->b[10];
4381      pwd->b[5]  = pws->b[10];
4382      pwd->b[6]  = pwt->b[11];
4383      pwd->b[7]  = pws->b[11];
4384      pwd->b[8]  = pwt->b[12];
4385      pwd->b[9]  = pws->b[12];
4386      pwd->b[10] = pwt->b[13];
4387      pwd->b[11] = pws->b[13];
4388      pwd->b[12] = pwt->b[14];
4389      pwd->b[13] = pws->b[14];
4390      pwd->b[14] = pwt->b[15];
4391      pwd->b[15] = pws->b[15];
4392  #endif
4393  }
4394  
4395  void helper_msa_ilvl_h(CPUMIPSState *env,
4396                         uint32_t wd, uint32_t ws, uint32_t wt)
4397  {
4398      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4399      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4400      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4401  
4402  #if HOST_BIG_ENDIAN
4403      pwd->h[3] = pwt->h[7];
4404      pwd->h[2] = pws->h[7];
4405      pwd->h[1] = pwt->h[6];
4406      pwd->h[0] = pws->h[6];
4407      pwd->h[7] = pwt->h[5];
4408      pwd->h[6] = pws->h[5];
4409      pwd->h[5] = pwt->h[4];
4410      pwd->h[4] = pws->h[4];
4411  #else
4412      pwd->h[0] = pwt->h[4];
4413      pwd->h[1] = pws->h[4];
4414      pwd->h[2] = pwt->h[5];
4415      pwd->h[3] = pws->h[5];
4416      pwd->h[4] = pwt->h[6];
4417      pwd->h[5] = pws->h[6];
4418      pwd->h[6] = pwt->h[7];
4419      pwd->h[7] = pws->h[7];
4420  #endif
4421  }
4422  
4423  void helper_msa_ilvl_w(CPUMIPSState *env,
4424                         uint32_t wd, uint32_t ws, uint32_t wt)
4425  {
4426      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4427      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4428      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4429  
4430  #if HOST_BIG_ENDIAN
4431      pwd->w[1] = pwt->w[3];
4432      pwd->w[0] = pws->w[3];
4433      pwd->w[3] = pwt->w[2];
4434      pwd->w[2] = pws->w[2];
4435  #else
4436      pwd->w[0] = pwt->w[2];
4437      pwd->w[1] = pws->w[2];
4438      pwd->w[2] = pwt->w[3];
4439      pwd->w[3] = pws->w[3];
4440  #endif
4441  }
4442  
4443  void helper_msa_ilvl_d(CPUMIPSState *env,
4444                         uint32_t wd, uint32_t ws, uint32_t wt)
4445  {
4446      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4447      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4448      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4449  
4450      pwd->d[0] = pwt->d[1];
4451      pwd->d[1] = pws->d[1];
4452  }
4453  
4454  
4455  void helper_msa_ilvr_b(CPUMIPSState *env,
4456                         uint32_t wd, uint32_t ws, uint32_t wt)
4457  {
4458      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4459      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4460      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4461  
4462  #if HOST_BIG_ENDIAN
4463      pwd->b[8]  = pws->b[0];
4464      pwd->b[9]  = pwt->b[0];
4465      pwd->b[10] = pws->b[1];
4466      pwd->b[11] = pwt->b[1];
4467      pwd->b[12] = pws->b[2];
4468      pwd->b[13] = pwt->b[2];
4469      pwd->b[14] = pws->b[3];
4470      pwd->b[15] = pwt->b[3];
4471      pwd->b[0]  = pws->b[4];
4472      pwd->b[1]  = pwt->b[4];
4473      pwd->b[2]  = pws->b[5];
4474      pwd->b[3]  = pwt->b[5];
4475      pwd->b[4]  = pws->b[6];
4476      pwd->b[5]  = pwt->b[6];
4477      pwd->b[6]  = pws->b[7];
4478      pwd->b[7]  = pwt->b[7];
4479  #else
4480      pwd->b[15] = pws->b[7];
4481      pwd->b[14] = pwt->b[7];
4482      pwd->b[13] = pws->b[6];
4483      pwd->b[12] = pwt->b[6];
4484      pwd->b[11] = pws->b[5];
4485      pwd->b[10] = pwt->b[5];
4486      pwd->b[9]  = pws->b[4];
4487      pwd->b[8]  = pwt->b[4];
4488      pwd->b[7]  = pws->b[3];
4489      pwd->b[6]  = pwt->b[3];
4490      pwd->b[5]  = pws->b[2];
4491      pwd->b[4]  = pwt->b[2];
4492      pwd->b[3]  = pws->b[1];
4493      pwd->b[2]  = pwt->b[1];
4494      pwd->b[1]  = pws->b[0];
4495      pwd->b[0]  = pwt->b[0];
4496  #endif
4497  }
4498  
4499  void helper_msa_ilvr_h(CPUMIPSState *env,
4500                         uint32_t wd, uint32_t ws, uint32_t wt)
4501  {
4502      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4503      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4504      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4505  
4506  #if HOST_BIG_ENDIAN
4507      pwd->h[4] = pws->h[0];
4508      pwd->h[5] = pwt->h[0];
4509      pwd->h[6] = pws->h[1];
4510      pwd->h[7] = pwt->h[1];
4511      pwd->h[0] = pws->h[2];
4512      pwd->h[1] = pwt->h[2];
4513      pwd->h[2] = pws->h[3];
4514      pwd->h[3] = pwt->h[3];
4515  #else
4516      pwd->h[7] = pws->h[3];
4517      pwd->h[6] = pwt->h[3];
4518      pwd->h[5] = pws->h[2];
4519      pwd->h[4] = pwt->h[2];
4520      pwd->h[3] = pws->h[1];
4521      pwd->h[2] = pwt->h[1];
4522      pwd->h[1] = pws->h[0];
4523      pwd->h[0] = pwt->h[0];
4524  #endif
4525  }
4526  
4527  void helper_msa_ilvr_w(CPUMIPSState *env,
4528                         uint32_t wd, uint32_t ws, uint32_t wt)
4529  {
4530      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4531      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4532      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4533  
4534  #if HOST_BIG_ENDIAN
4535      pwd->w[2] = pws->w[0];
4536      pwd->w[3] = pwt->w[0];
4537      pwd->w[0] = pws->w[1];
4538      pwd->w[1] = pwt->w[1];
4539  #else
4540      pwd->w[3] = pws->w[1];
4541      pwd->w[2] = pwt->w[1];
4542      pwd->w[1] = pws->w[0];
4543      pwd->w[0] = pwt->w[0];
4544  #endif
4545  }
4546  
4547  void helper_msa_ilvr_d(CPUMIPSState *env,
4548                         uint32_t wd, uint32_t ws, uint32_t wt)
4549  {
4550      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4551      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4552      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4553  
4554      pwd->d[1] = pws->d[0];
4555      pwd->d[0] = pwt->d[0];
4556  }
4557  
4558  
4559  /*
4560   * Logic
4561   * -----
4562   *
4563   * +---------------+----------------------------------------------------------+
4564   * | AND.V         | Vector Logical And                                       |
4565   * | NOR.V         | Vector Logical Negated Or                                |
4566   * | OR.V          | Vector Logical Or                                        |
4567   * | XOR.V         | Vector Logical Exclusive Or                              |
4568   * +---------------+----------------------------------------------------------+
4569   */
4570  
4571  
4572  void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4573  {
4574      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4575      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4576      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4577  
4578      pwd->d[0] = pws->d[0] & pwt->d[0];
4579      pwd->d[1] = pws->d[1] & pwt->d[1];
4580  }
4581  
4582  void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4583  {
4584      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4585      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4586      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4587  
4588      pwd->d[0] = ~(pws->d[0] | pwt->d[0]);
4589      pwd->d[1] = ~(pws->d[1] | pwt->d[1]);
4590  }
4591  
4592  void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4593  {
4594      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4595      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4596      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4597  
4598      pwd->d[0] = pws->d[0] | pwt->d[0];
4599      pwd->d[1] = pws->d[1] | pwt->d[1];
4600  }
4601  
4602  void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4603  {
4604      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4605      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4606      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4607  
4608      pwd->d[0] = pws->d[0] ^ pwt->d[0];
4609      pwd->d[1] = pws->d[1] ^ pwt->d[1];
4610  }
4611  
4612  
4613  /*
4614   * Move
4615   * ----
4616   *
4617   * +---------------+----------------------------------------------------------+
4618   * | MOVE.V        | Vector Move                                              |
4619   * +---------------+----------------------------------------------------------+
4620   */
4621  
4622  static inline void msa_move_v(wr_t *pwd, wr_t *pws)
4623  {
4624      pwd->d[0] = pws->d[0];
4625      pwd->d[1] = pws->d[1];
4626  }
4627  
4628  void helper_msa_move_v(CPUMIPSState *env, uint32_t wd, uint32_t ws)
4629  {
4630      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4631      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4632  
4633      msa_move_v(pwd, pws);
4634  }
4635  
4636  
4637  /*
4638   * Pack
4639   * ----
4640   *
4641   * +---------------+----------------------------------------------------------+
4642   * | PCKEV.B       | Vector Pack Even (byte)                                  |
4643   * | PCKEV.H       | Vector Pack Even (halfword)                              |
4644   * | PCKEV.W       | Vector Pack Even (word)                                  |
4645   * | PCKEV.D       | Vector Pack Even (doubleword)                            |
4646   * | PCKOD.B       | Vector Pack Odd (byte)                                   |
4647   * | PCKOD.H       | Vector Pack Odd (halfword)                               |
4648   * | PCKOD.W       | Vector Pack Odd (word)                                   |
4649   * | PCKOD.D       | Vector Pack Odd (doubleword)                             |
4650   * | VSHF.B        | Vector Data Preserving Shuffle (byte)                    |
4651   * | VSHF.H        | Vector Data Preserving Shuffle (halfword)                |
4652   * | VSHF.W        | Vector Data Preserving Shuffle (word)                    |
4653   * | VSHF.D        | Vector Data Preserving Shuffle (doubleword)              |
4654   * +---------------+----------------------------------------------------------+
4655   */
4656  
4657  
4658  void helper_msa_pckev_b(CPUMIPSState *env,
4659                          uint32_t wd, uint32_t ws, uint32_t wt)
4660  {
4661      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4662      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4663      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4664  
4665  #if HOST_BIG_ENDIAN
4666      pwd->b[8]  = pws->b[9];
4667      pwd->b[10] = pws->b[13];
4668      pwd->b[12] = pws->b[1];
4669      pwd->b[14] = pws->b[5];
4670      pwd->b[0]  = pwt->b[9];
4671      pwd->b[2]  = pwt->b[13];
4672      pwd->b[4]  = pwt->b[1];
4673      pwd->b[6]  = pwt->b[5];
4674      pwd->b[9]  = pws->b[11];
4675      pwd->b[13] = pws->b[3];
4676      pwd->b[1]  = pwt->b[11];
4677      pwd->b[5]  = pwt->b[3];
4678      pwd->b[11] = pws->b[15];
4679      pwd->b[3]  = pwt->b[15];
4680      pwd->b[15] = pws->b[7];
4681      pwd->b[7]  = pwt->b[7];
4682  #else
4683      pwd->b[15] = pws->b[14];
4684      pwd->b[13] = pws->b[10];
4685      pwd->b[11] = pws->b[6];
4686      pwd->b[9]  = pws->b[2];
4687      pwd->b[7]  = pwt->b[14];
4688      pwd->b[5]  = pwt->b[10];
4689      pwd->b[3]  = pwt->b[6];
4690      pwd->b[1]  = pwt->b[2];
4691      pwd->b[14] = pws->b[12];
4692      pwd->b[10] = pws->b[4];
4693      pwd->b[6]  = pwt->b[12];
4694      pwd->b[2]  = pwt->b[4];
4695      pwd->b[12] = pws->b[8];
4696      pwd->b[4]  = pwt->b[8];
4697      pwd->b[8]  = pws->b[0];
4698      pwd->b[0]  = pwt->b[0];
4699  #endif
4700  }
4701  
4702  void helper_msa_pckev_h(CPUMIPSState *env,
4703                          uint32_t wd, uint32_t ws, uint32_t wt)
4704  {
4705      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4706      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4707      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4708  
4709  #if HOST_BIG_ENDIAN
4710      pwd->h[4] = pws->h[5];
4711      pwd->h[6] = pws->h[1];
4712      pwd->h[0] = pwt->h[5];
4713      pwd->h[2] = pwt->h[1];
4714      pwd->h[5] = pws->h[7];
4715      pwd->h[1] = pwt->h[7];
4716      pwd->h[7] = pws->h[3];
4717      pwd->h[3] = pwt->h[3];
4718  #else
4719      pwd->h[7] = pws->h[6];
4720      pwd->h[5] = pws->h[2];
4721      pwd->h[3] = pwt->h[6];
4722      pwd->h[1] = pwt->h[2];
4723      pwd->h[6] = pws->h[4];
4724      pwd->h[2] = pwt->h[4];
4725      pwd->h[4] = pws->h[0];
4726      pwd->h[0] = pwt->h[0];
4727  #endif
4728  }
4729  
4730  void helper_msa_pckev_w(CPUMIPSState *env,
4731                          uint32_t wd, uint32_t ws, uint32_t wt)
4732  {
4733      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4734      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4735      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4736  
4737  #if HOST_BIG_ENDIAN
4738      pwd->w[2] = pws->w[3];
4739      pwd->w[0] = pwt->w[3];
4740      pwd->w[3] = pws->w[1];
4741      pwd->w[1] = pwt->w[1];
4742  #else
4743      pwd->w[3] = pws->w[2];
4744      pwd->w[1] = pwt->w[2];
4745      pwd->w[2] = pws->w[0];
4746      pwd->w[0] = pwt->w[0];
4747  #endif
4748  }
4749  
4750  void helper_msa_pckev_d(CPUMIPSState *env,
4751                          uint32_t wd, uint32_t ws, uint32_t wt)
4752  {
4753      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4754      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4755      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4756  
4757      pwd->d[1] = pws->d[0];
4758      pwd->d[0] = pwt->d[0];
4759  }
4760  
4761  
4762  void helper_msa_pckod_b(CPUMIPSState *env,
4763                          uint32_t wd, uint32_t ws, uint32_t wt)
4764  {
4765      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4766      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4767      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4768  
4769  #if HOST_BIG_ENDIAN
4770      pwd->b[7]  = pwt->b[6];
4771      pwd->b[5]  = pwt->b[2];
4772      pwd->b[3]  = pwt->b[14];
4773      pwd->b[1]  = pwt->b[10];
4774      pwd->b[15] = pws->b[6];
4775      pwd->b[13] = pws->b[2];
4776      pwd->b[11] = pws->b[14];
4777      pwd->b[9]  = pws->b[10];
4778      pwd->b[6]  = pwt->b[4];
4779      pwd->b[2]  = pwt->b[12];
4780      pwd->b[14] = pws->b[4];
4781      pwd->b[10] = pws->b[12];
4782      pwd->b[4]  = pwt->b[0];
4783      pwd->b[12] = pws->b[0];
4784      pwd->b[0]  = pwt->b[8];
4785      pwd->b[8]  = pws->b[8];
4786  #else
4787      pwd->b[0]  = pwt->b[1];
4788      pwd->b[2]  = pwt->b[5];
4789      pwd->b[4]  = pwt->b[9];
4790      pwd->b[6]  = pwt->b[13];
4791      pwd->b[8]  = pws->b[1];
4792      pwd->b[10] = pws->b[5];
4793      pwd->b[12] = pws->b[9];
4794      pwd->b[14] = pws->b[13];
4795      pwd->b[1]  = pwt->b[3];
4796      pwd->b[5]  = pwt->b[11];
4797      pwd->b[9]  = pws->b[3];
4798      pwd->b[13] = pws->b[11];
4799      pwd->b[3]  = pwt->b[7];
4800      pwd->b[11] = pws->b[7];
4801      pwd->b[7]  = pwt->b[15];
4802      pwd->b[15] = pws->b[15];
4803  #endif
4804  
4805  }
4806  
4807  void helper_msa_pckod_h(CPUMIPSState *env,
4808                          uint32_t wd, uint32_t ws, uint32_t wt)
4809  {
4810      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4811      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4812      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4813  
4814  #if HOST_BIG_ENDIAN
4815      pwd->h[3] = pwt->h[2];
4816      pwd->h[1] = pwt->h[6];
4817      pwd->h[7] = pws->h[2];
4818      pwd->h[5] = pws->h[6];
4819      pwd->h[2] = pwt->h[0];
4820      pwd->h[6] = pws->h[0];
4821      pwd->h[0] = pwt->h[4];
4822      pwd->h[4] = pws->h[4];
4823  #else
4824      pwd->h[0] = pwt->h[1];
4825      pwd->h[2] = pwt->h[5];
4826      pwd->h[4] = pws->h[1];
4827      pwd->h[6] = pws->h[5];
4828      pwd->h[1] = pwt->h[3];
4829      pwd->h[5] = pws->h[3];
4830      pwd->h[3] = pwt->h[7];
4831      pwd->h[7] = pws->h[7];
4832  #endif
4833  }
4834  
4835  void helper_msa_pckod_w(CPUMIPSState *env,
4836                          uint32_t wd, uint32_t ws, uint32_t wt)
4837  {
4838      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4839      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4840      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4841  
4842  #if HOST_BIG_ENDIAN
4843      pwd->w[1] = pwt->w[0];
4844      pwd->w[3] = pws->w[0];
4845      pwd->w[0] = pwt->w[2];
4846      pwd->w[2] = pws->w[2];
4847  #else
4848      pwd->w[0] = pwt->w[1];
4849      pwd->w[2] = pws->w[1];
4850      pwd->w[1] = pwt->w[3];
4851      pwd->w[3] = pws->w[3];
4852  #endif
4853  }
4854  
4855  void helper_msa_pckod_d(CPUMIPSState *env,
4856                          uint32_t wd, uint32_t ws, uint32_t wt)
4857  {
4858      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4859      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4860      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4861  
4862      pwd->d[0] = pwt->d[1];
4863      pwd->d[1] = pws->d[1];
4864  }
4865  
4866  
4867  /*
4868   * Shift
4869   * -----
4870   *
4871   * +---------------+----------------------------------------------------------+
4872   * | SLL.B         | Vector Shift Left (byte)                                 |
4873   * | SLL.H         | Vector Shift Left (halfword)                             |
4874   * | SLL.W         | Vector Shift Left (word)                                 |
4875   * | SLL.D         | Vector Shift Left (doubleword)                           |
4876   * | SRA.B         | Vector Shift Right Arithmetic (byte)                     |
4877   * | SRA.H         | Vector Shift Right Arithmetic (halfword)                 |
4878   * | SRA.W         | Vector Shift Right Arithmetic (word)                     |
4879   * | SRA.D         | Vector Shift Right Arithmetic (doubleword)               |
4880   * | SRAR.B        | Vector Shift Right Arithmetic Rounded (byte)             |
4881   * | SRAR.H        | Vector Shift Right Arithmetic Rounded (halfword)         |
4882   * | SRAR.W        | Vector Shift Right Arithmetic Rounded (word)             |
4883   * | SRAR.D        | Vector Shift Right Arithmetic Rounded (doubleword)       |
4884   * | SRL.B         | Vector Shift Right Logical (byte)                        |
4885   * | SRL.H         | Vector Shift Right Logical (halfword)                    |
4886   * | SRL.W         | Vector Shift Right Logical (word)                        |
4887   * | SRL.D         | Vector Shift Right Logical (doubleword)                  |
4888   * | SRLR.B        | Vector Shift Right Logical Rounded (byte)                |
4889   * | SRLR.H        | Vector Shift Right Logical Rounded (halfword)            |
4890   * | SRLR.W        | Vector Shift Right Logical Rounded (word)                |
4891   * | SRLR.D        | Vector Shift Right Logical Rounded (doubleword)          |
4892   * +---------------+----------------------------------------------------------+
4893   */
4894  
4895  
4896  static inline int64_t msa_sll_df(uint32_t df, int64_t arg1, int64_t arg2)
4897  {
4898      int32_t b_arg2 = BIT_POSITION(arg2, df);
4899      return arg1 << b_arg2;
4900  }
4901  
4902  void helper_msa_sll_b(CPUMIPSState *env,
4903                        uint32_t wd, uint32_t ws, uint32_t wt)
4904  {
4905      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4906      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4907      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4908  
4909      pwd->b[0]  = msa_sll_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4910      pwd->b[1]  = msa_sll_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4911      pwd->b[2]  = msa_sll_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4912      pwd->b[3]  = msa_sll_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4913      pwd->b[4]  = msa_sll_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4914      pwd->b[5]  = msa_sll_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4915      pwd->b[6]  = msa_sll_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4916      pwd->b[7]  = msa_sll_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4917      pwd->b[8]  = msa_sll_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4918      pwd->b[9]  = msa_sll_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4919      pwd->b[10] = msa_sll_df(DF_BYTE, pws->b[10], pwt->b[10]);
4920      pwd->b[11] = msa_sll_df(DF_BYTE, pws->b[11], pwt->b[11]);
4921      pwd->b[12] = msa_sll_df(DF_BYTE, pws->b[12], pwt->b[12]);
4922      pwd->b[13] = msa_sll_df(DF_BYTE, pws->b[13], pwt->b[13]);
4923      pwd->b[14] = msa_sll_df(DF_BYTE, pws->b[14], pwt->b[14]);
4924      pwd->b[15] = msa_sll_df(DF_BYTE, pws->b[15], pwt->b[15]);
4925  }
4926  
4927  void helper_msa_sll_h(CPUMIPSState *env,
4928                        uint32_t wd, uint32_t ws, uint32_t wt)
4929  {
4930      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4931      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4932      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4933  
4934      pwd->h[0]  = msa_sll_df(DF_HALF, pws->h[0],  pwt->h[0]);
4935      pwd->h[1]  = msa_sll_df(DF_HALF, pws->h[1],  pwt->h[1]);
4936      pwd->h[2]  = msa_sll_df(DF_HALF, pws->h[2],  pwt->h[2]);
4937      pwd->h[3]  = msa_sll_df(DF_HALF, pws->h[3],  pwt->h[3]);
4938      pwd->h[4]  = msa_sll_df(DF_HALF, pws->h[4],  pwt->h[4]);
4939      pwd->h[5]  = msa_sll_df(DF_HALF, pws->h[5],  pwt->h[5]);
4940      pwd->h[6]  = msa_sll_df(DF_HALF, pws->h[6],  pwt->h[6]);
4941      pwd->h[7]  = msa_sll_df(DF_HALF, pws->h[7],  pwt->h[7]);
4942  }
4943  
4944  void helper_msa_sll_w(CPUMIPSState *env,
4945                        uint32_t wd, uint32_t ws, uint32_t wt)
4946  {
4947      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4948      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4949      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4950  
4951      pwd->w[0]  = msa_sll_df(DF_WORD, pws->w[0],  pwt->w[0]);
4952      pwd->w[1]  = msa_sll_df(DF_WORD, pws->w[1],  pwt->w[1]);
4953      pwd->w[2]  = msa_sll_df(DF_WORD, pws->w[2],  pwt->w[2]);
4954      pwd->w[3]  = msa_sll_df(DF_WORD, pws->w[3],  pwt->w[3]);
4955  }
4956  
4957  void helper_msa_sll_d(CPUMIPSState *env,
4958                        uint32_t wd, uint32_t ws, uint32_t wt)
4959  {
4960      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4961      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4962      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4963  
4964      pwd->d[0]  = msa_sll_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4965      pwd->d[1]  = msa_sll_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4966  }
4967  
4968  
4969  static inline int64_t msa_sra_df(uint32_t df, int64_t arg1, int64_t arg2)
4970  {
4971      int32_t b_arg2 = BIT_POSITION(arg2, df);
4972      return arg1 >> b_arg2;
4973  }
4974  
4975  void helper_msa_sra_b(CPUMIPSState *env,
4976                        uint32_t wd, uint32_t ws, uint32_t wt)
4977  {
4978      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4979      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4980      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4981  
4982      pwd->b[0]  = msa_sra_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4983      pwd->b[1]  = msa_sra_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4984      pwd->b[2]  = msa_sra_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4985      pwd->b[3]  = msa_sra_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4986      pwd->b[4]  = msa_sra_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4987      pwd->b[5]  = msa_sra_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4988      pwd->b[6]  = msa_sra_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4989      pwd->b[7]  = msa_sra_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4990      pwd->b[8]  = msa_sra_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4991      pwd->b[9]  = msa_sra_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4992      pwd->b[10] = msa_sra_df(DF_BYTE, pws->b[10], pwt->b[10]);
4993      pwd->b[11] = msa_sra_df(DF_BYTE, pws->b[11], pwt->b[11]);
4994      pwd->b[12] = msa_sra_df(DF_BYTE, pws->b[12], pwt->b[12]);
4995      pwd->b[13] = msa_sra_df(DF_BYTE, pws->b[13], pwt->b[13]);
4996      pwd->b[14] = msa_sra_df(DF_BYTE, pws->b[14], pwt->b[14]);
4997      pwd->b[15] = msa_sra_df(DF_BYTE, pws->b[15], pwt->b[15]);
4998  }
4999  
5000  void helper_msa_sra_h(CPUMIPSState *env,
5001                        uint32_t wd, uint32_t ws, uint32_t wt)
5002  {
5003      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5004      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5005      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5006  
5007      pwd->h[0]  = msa_sra_df(DF_HALF, pws->h[0],  pwt->h[0]);
5008      pwd->h[1]  = msa_sra_df(DF_HALF, pws->h[1],  pwt->h[1]);
5009      pwd->h[2]  = msa_sra_df(DF_HALF, pws->h[2],  pwt->h[2]);
5010      pwd->h[3]  = msa_sra_df(DF_HALF, pws->h[3],  pwt->h[3]);
5011      pwd->h[4]  = msa_sra_df(DF_HALF, pws->h[4],  pwt->h[4]);
5012      pwd->h[5]  = msa_sra_df(DF_HALF, pws->h[5],  pwt->h[5]);
5013      pwd->h[6]  = msa_sra_df(DF_HALF, pws->h[6],  pwt->h[6]);
5014      pwd->h[7]  = msa_sra_df(DF_HALF, pws->h[7],  pwt->h[7]);
5015  }
5016  
5017  void helper_msa_sra_w(CPUMIPSState *env,
5018                        uint32_t wd, uint32_t ws, uint32_t wt)
5019  {
5020      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5021      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5022      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5023  
5024      pwd->w[0]  = msa_sra_df(DF_WORD, pws->w[0],  pwt->w[0]);
5025      pwd->w[1]  = msa_sra_df(DF_WORD, pws->w[1],  pwt->w[1]);
5026      pwd->w[2]  = msa_sra_df(DF_WORD, pws->w[2],  pwt->w[2]);
5027      pwd->w[3]  = msa_sra_df(DF_WORD, pws->w[3],  pwt->w[3]);
5028  }
5029  
5030  void helper_msa_sra_d(CPUMIPSState *env,
5031                        uint32_t wd, uint32_t ws, uint32_t wt)
5032  {
5033      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5034      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5035      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5036  
5037      pwd->d[0]  = msa_sra_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5038      pwd->d[1]  = msa_sra_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5039  }
5040  
5041  
5042  static inline int64_t msa_srar_df(uint32_t df, int64_t arg1, int64_t arg2)
5043  {
5044      int32_t b_arg2 = BIT_POSITION(arg2, df);
5045      if (b_arg2 == 0) {
5046          return arg1;
5047      } else {
5048          int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
5049          return (arg1 >> b_arg2) + r_bit;
5050      }
5051  }
5052  
5053  void helper_msa_srar_b(CPUMIPSState *env,
5054                         uint32_t wd, uint32_t ws, uint32_t wt)
5055  {
5056      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5057      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5058      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5059  
5060      pwd->b[0]  = msa_srar_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5061      pwd->b[1]  = msa_srar_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5062      pwd->b[2]  = msa_srar_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5063      pwd->b[3]  = msa_srar_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5064      pwd->b[4]  = msa_srar_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5065      pwd->b[5]  = msa_srar_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5066      pwd->b[6]  = msa_srar_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5067      pwd->b[7]  = msa_srar_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5068      pwd->b[8]  = msa_srar_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5069      pwd->b[9]  = msa_srar_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5070      pwd->b[10] = msa_srar_df(DF_BYTE, pws->b[10], pwt->b[10]);
5071      pwd->b[11] = msa_srar_df(DF_BYTE, pws->b[11], pwt->b[11]);
5072      pwd->b[12] = msa_srar_df(DF_BYTE, pws->b[12], pwt->b[12]);
5073      pwd->b[13] = msa_srar_df(DF_BYTE, pws->b[13], pwt->b[13]);
5074      pwd->b[14] = msa_srar_df(DF_BYTE, pws->b[14], pwt->b[14]);
5075      pwd->b[15] = msa_srar_df(DF_BYTE, pws->b[15], pwt->b[15]);
5076  }
5077  
5078  void helper_msa_srar_h(CPUMIPSState *env,
5079                         uint32_t wd, uint32_t ws, uint32_t wt)
5080  {
5081      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5082      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5083      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5084  
5085      pwd->h[0]  = msa_srar_df(DF_HALF, pws->h[0],  pwt->h[0]);
5086      pwd->h[1]  = msa_srar_df(DF_HALF, pws->h[1],  pwt->h[1]);
5087      pwd->h[2]  = msa_srar_df(DF_HALF, pws->h[2],  pwt->h[2]);
5088      pwd->h[3]  = msa_srar_df(DF_HALF, pws->h[3],  pwt->h[3]);
5089      pwd->h[4]  = msa_srar_df(DF_HALF, pws->h[4],  pwt->h[4]);
5090      pwd->h[5]  = msa_srar_df(DF_HALF, pws->h[5],  pwt->h[5]);
5091      pwd->h[6]  = msa_srar_df(DF_HALF, pws->h[6],  pwt->h[6]);
5092      pwd->h[7]  = msa_srar_df(DF_HALF, pws->h[7],  pwt->h[7]);
5093  }
5094  
5095  void helper_msa_srar_w(CPUMIPSState *env,
5096                         uint32_t wd, uint32_t ws, uint32_t wt)
5097  {
5098      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5099      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5100      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5101  
5102      pwd->w[0]  = msa_srar_df(DF_WORD, pws->w[0],  pwt->w[0]);
5103      pwd->w[1]  = msa_srar_df(DF_WORD, pws->w[1],  pwt->w[1]);
5104      pwd->w[2]  = msa_srar_df(DF_WORD, pws->w[2],  pwt->w[2]);
5105      pwd->w[3]  = msa_srar_df(DF_WORD, pws->w[3],  pwt->w[3]);
5106  }
5107  
5108  void helper_msa_srar_d(CPUMIPSState *env,
5109                         uint32_t wd, uint32_t ws, uint32_t wt)
5110  {
5111      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5112      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5113      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5114  
5115      pwd->d[0]  = msa_srar_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5116      pwd->d[1]  = msa_srar_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5117  }
5118  
5119  
5120  static inline int64_t msa_srl_df(uint32_t df, int64_t arg1, int64_t arg2)
5121  {
5122      uint64_t u_arg1 = UNSIGNED(arg1, df);
5123      int32_t b_arg2 = BIT_POSITION(arg2, df);
5124      return u_arg1 >> b_arg2;
5125  }
5126  
5127  void helper_msa_srl_b(CPUMIPSState *env,
5128                        uint32_t wd, uint32_t ws, uint32_t wt)
5129  {
5130      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5131      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5132      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5133  
5134      pwd->b[0]  = msa_srl_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5135      pwd->b[1]  = msa_srl_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5136      pwd->b[2]  = msa_srl_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5137      pwd->b[3]  = msa_srl_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5138      pwd->b[4]  = msa_srl_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5139      pwd->b[5]  = msa_srl_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5140      pwd->b[6]  = msa_srl_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5141      pwd->b[7]  = msa_srl_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5142      pwd->b[8]  = msa_srl_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5143      pwd->b[9]  = msa_srl_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5144      pwd->b[10] = msa_srl_df(DF_BYTE, pws->b[10], pwt->b[10]);
5145      pwd->b[11] = msa_srl_df(DF_BYTE, pws->b[11], pwt->b[11]);
5146      pwd->b[12] = msa_srl_df(DF_BYTE, pws->b[12], pwt->b[12]);
5147      pwd->b[13] = msa_srl_df(DF_BYTE, pws->b[13], pwt->b[13]);
5148      pwd->b[14] = msa_srl_df(DF_BYTE, pws->b[14], pwt->b[14]);
5149      pwd->b[15] = msa_srl_df(DF_BYTE, pws->b[15], pwt->b[15]);
5150  }
5151  
5152  void helper_msa_srl_h(CPUMIPSState *env,
5153                        uint32_t wd, uint32_t ws, uint32_t wt)
5154  {
5155      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5156      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5157      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5158  
5159      pwd->h[0]  = msa_srl_df(DF_HALF, pws->h[0],  pwt->h[0]);
5160      pwd->h[1]  = msa_srl_df(DF_HALF, pws->h[1],  pwt->h[1]);
5161      pwd->h[2]  = msa_srl_df(DF_HALF, pws->h[2],  pwt->h[2]);
5162      pwd->h[3]  = msa_srl_df(DF_HALF, pws->h[3],  pwt->h[3]);
5163      pwd->h[4]  = msa_srl_df(DF_HALF, pws->h[4],  pwt->h[4]);
5164      pwd->h[5]  = msa_srl_df(DF_HALF, pws->h[5],  pwt->h[5]);
5165      pwd->h[6]  = msa_srl_df(DF_HALF, pws->h[6],  pwt->h[6]);
5166      pwd->h[7]  = msa_srl_df(DF_HALF, pws->h[7],  pwt->h[7]);
5167  }
5168  
5169  void helper_msa_srl_w(CPUMIPSState *env,
5170                        uint32_t wd, uint32_t ws, uint32_t wt)
5171  {
5172      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5173      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5174      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5175  
5176      pwd->w[0]  = msa_srl_df(DF_WORD, pws->w[0],  pwt->w[0]);
5177      pwd->w[1]  = msa_srl_df(DF_WORD, pws->w[1],  pwt->w[1]);
5178      pwd->w[2]  = msa_srl_df(DF_WORD, pws->w[2],  pwt->w[2]);
5179      pwd->w[3]  = msa_srl_df(DF_WORD, pws->w[3],  pwt->w[3]);
5180  }
5181  
5182  void helper_msa_srl_d(CPUMIPSState *env,
5183                        uint32_t wd, uint32_t ws, uint32_t wt)
5184  {
5185      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5186      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5187      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5188  
5189      pwd->d[0]  = msa_srl_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5190      pwd->d[1]  = msa_srl_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5191  }
5192  
5193  
5194  static inline int64_t msa_srlr_df(uint32_t df, int64_t arg1, int64_t arg2)
5195  {
5196      uint64_t u_arg1 = UNSIGNED(arg1, df);
5197      int32_t b_arg2 = BIT_POSITION(arg2, df);
5198      if (b_arg2 == 0) {
5199          return u_arg1;
5200      } else {
5201          uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
5202          return (u_arg1 >> b_arg2) + r_bit;
5203      }
5204  }
5205  
5206  void helper_msa_srlr_b(CPUMIPSState *env,
5207                         uint32_t wd, uint32_t ws, uint32_t wt)
5208  {
5209      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5210      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5211      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5212  
5213      pwd->b[0]  = msa_srlr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5214      pwd->b[1]  = msa_srlr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5215      pwd->b[2]  = msa_srlr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5216      pwd->b[3]  = msa_srlr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5217      pwd->b[4]  = msa_srlr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5218      pwd->b[5]  = msa_srlr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5219      pwd->b[6]  = msa_srlr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5220      pwd->b[7]  = msa_srlr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5221      pwd->b[8]  = msa_srlr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5222      pwd->b[9]  = msa_srlr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5223      pwd->b[10] = msa_srlr_df(DF_BYTE, pws->b[10], pwt->b[10]);
5224      pwd->b[11] = msa_srlr_df(DF_BYTE, pws->b[11], pwt->b[11]);
5225      pwd->b[12] = msa_srlr_df(DF_BYTE, pws->b[12], pwt->b[12]);
5226      pwd->b[13] = msa_srlr_df(DF_BYTE, pws->b[13], pwt->b[13]);
5227      pwd->b[14] = msa_srlr_df(DF_BYTE, pws->b[14], pwt->b[14]);
5228      pwd->b[15] = msa_srlr_df(DF_BYTE, pws->b[15], pwt->b[15]);
5229  }
5230  
5231  void helper_msa_srlr_h(CPUMIPSState *env,
5232                         uint32_t wd, uint32_t ws, uint32_t wt)
5233  {
5234      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5235      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5236      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5237  
5238      pwd->h[0]  = msa_srlr_df(DF_HALF, pws->h[0],  pwt->h[0]);
5239      pwd->h[1]  = msa_srlr_df(DF_HALF, pws->h[1],  pwt->h[1]);
5240      pwd->h[2]  = msa_srlr_df(DF_HALF, pws->h[2],  pwt->h[2]);
5241      pwd->h[3]  = msa_srlr_df(DF_HALF, pws->h[3],  pwt->h[3]);
5242      pwd->h[4]  = msa_srlr_df(DF_HALF, pws->h[4],  pwt->h[4]);
5243      pwd->h[5]  = msa_srlr_df(DF_HALF, pws->h[5],  pwt->h[5]);
5244      pwd->h[6]  = msa_srlr_df(DF_HALF, pws->h[6],  pwt->h[6]);
5245      pwd->h[7]  = msa_srlr_df(DF_HALF, pws->h[7],  pwt->h[7]);
5246  }
5247  
5248  void helper_msa_srlr_w(CPUMIPSState *env,
5249                         uint32_t wd, uint32_t ws, uint32_t wt)
5250  {
5251      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5252      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5253      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5254  
5255      pwd->w[0]  = msa_srlr_df(DF_WORD, pws->w[0],  pwt->w[0]);
5256      pwd->w[1]  = msa_srlr_df(DF_WORD, pws->w[1],  pwt->w[1]);
5257      pwd->w[2]  = msa_srlr_df(DF_WORD, pws->w[2],  pwt->w[2]);
5258      pwd->w[3]  = msa_srlr_df(DF_WORD, pws->w[3],  pwt->w[3]);
5259  }
5260  
5261  void helper_msa_srlr_d(CPUMIPSState *env,
5262                         uint32_t wd, uint32_t ws, uint32_t wt)
5263  {
5264      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5265      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5266      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5267  
5268      pwd->d[0]  = msa_srlr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5269      pwd->d[1]  = msa_srlr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5270  }
5271  
5272  
5273  #define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
5274  void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
5275          uint32_t i8)                                                    \
5276  {                                                                       \
5277      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5278      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5279      uint32_t i;                                                         \
5280      for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
5281          DEST = OPERATION;                                               \
5282      }                                                                   \
5283  }
5284  
5285  MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
5286  MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
5287  MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
5288  MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
5289  
5290  #define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
5291              UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
5292  MSA_FN_IMM8(bmnzi_b, pwd->b[i],
5293          BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5294  
5295  #define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
5296              UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
5297  MSA_FN_IMM8(bmzi_b, pwd->b[i],
5298          BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5299  
5300  #define BIT_SELECT(dest, arg1, arg2, df) \
5301              UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
5302  MSA_FN_IMM8(bseli_b, pwd->b[i],
5303          BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
5304  
5305  #undef BIT_SELECT
5306  #undef BIT_MOVE_IF_ZERO
5307  #undef BIT_MOVE_IF_NOT_ZERO
5308  #undef MSA_FN_IMM8
5309  
5310  #define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
5311  
5312  void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5313                         uint32_t ws, uint32_t imm)
5314  {
5315      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5316      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5317      wr_t wx, *pwx = &wx;
5318      uint32_t i;
5319  
5320      switch (df) {
5321      case DF_BYTE:
5322          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5323              pwx->b[i] = pws->b[SHF_POS(i, imm)];
5324          }
5325          break;
5326      case DF_HALF:
5327          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5328              pwx->h[i] = pws->h[SHF_POS(i, imm)];
5329          }
5330          break;
5331      case DF_WORD:
5332          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5333              pwx->w[i] = pws->w[SHF_POS(i, imm)];
5334          }
5335          break;
5336      default:
5337          g_assert_not_reached();
5338      }
5339      msa_move_v(pwd, pwx);
5340  }
5341  
5342  #define MSA_BINOP_IMM_DF(helper, func)                                  \
5343  void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5344                          uint32_t wd, uint32_t ws, int32_t u5)           \
5345  {                                                                       \
5346      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5347      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5348      uint32_t i;                                                         \
5349                                                                          \
5350      switch (df) {                                                       \
5351      case DF_BYTE:                                                       \
5352          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5353              pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5354          }                                                               \
5355          break;                                                          \
5356      case DF_HALF:                                                       \
5357          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5358              pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5359          }                                                               \
5360          break;                                                          \
5361      case DF_WORD:                                                       \
5362          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5363              pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5364          }                                                               \
5365          break;                                                          \
5366      case DF_DOUBLE:                                                     \
5367          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5368              pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5369          }                                                               \
5370          break;                                                          \
5371      default:                                                            \
5372          g_assert_not_reached();                                         \
5373      }                                                                   \
5374  }
5375  
5376  MSA_BINOP_IMM_DF(addvi, addv)
5377  MSA_BINOP_IMM_DF(subvi, subv)
5378  MSA_BINOP_IMM_DF(ceqi, ceq)
5379  MSA_BINOP_IMM_DF(clei_s, cle_s)
5380  MSA_BINOP_IMM_DF(clei_u, cle_u)
5381  MSA_BINOP_IMM_DF(clti_s, clt_s)
5382  MSA_BINOP_IMM_DF(clti_u, clt_u)
5383  MSA_BINOP_IMM_DF(maxi_s, max_s)
5384  MSA_BINOP_IMM_DF(maxi_u, max_u)
5385  MSA_BINOP_IMM_DF(mini_s, min_s)
5386  MSA_BINOP_IMM_DF(mini_u, min_u)
5387  #undef MSA_BINOP_IMM_DF
5388  
5389  void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5390                         int32_t s10)
5391  {
5392      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5393      uint32_t i;
5394  
5395      switch (df) {
5396      case DF_BYTE:
5397          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5398              pwd->b[i] = (int8_t)s10;
5399          }
5400          break;
5401      case DF_HALF:
5402          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5403              pwd->h[i] = (int16_t)s10;
5404          }
5405          break;
5406      case DF_WORD:
5407          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5408              pwd->w[i] = (int32_t)s10;
5409          }
5410          break;
5411      case DF_DOUBLE:
5412          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5413              pwd->d[i] = (int64_t)s10;
5414          }
5415         break;
5416      default:
5417          g_assert_not_reached();
5418      }
5419  }
5420  
5421  static inline int64_t msa_sat_s_df(uint32_t df, int64_t arg, uint32_t m)
5422  {
5423      return arg < M_MIN_INT(m + 1) ? M_MIN_INT(m + 1) :
5424                                      arg > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) :
5425                                                               arg;
5426  }
5427  
5428  static inline int64_t msa_sat_u_df(uint32_t df, int64_t arg, uint32_t m)
5429  {
5430      uint64_t u_arg = UNSIGNED(arg, df);
5431      return  u_arg < M_MAX_UINT(m + 1) ? u_arg :
5432                                          M_MAX_UINT(m + 1);
5433  }
5434  
5435  #define MSA_BINOP_IMMU_DF(helper, func)                                  \
5436  void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5437                         uint32_t ws, uint32_t u5)                        \
5438  {                                                                       \
5439      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5440      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5441      uint32_t i;                                                         \
5442                                                                          \
5443      switch (df) {                                                       \
5444      case DF_BYTE:                                                       \
5445          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5446              pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5447          }                                                               \
5448          break;                                                          \
5449      case DF_HALF:                                                       \
5450          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5451              pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5452          }                                                               \
5453          break;                                                          \
5454      case DF_WORD:                                                       \
5455          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5456              pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5457          }                                                               \
5458          break;                                                          \
5459      case DF_DOUBLE:                                                     \
5460          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5461              pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5462          }                                                               \
5463          break;                                                          \
5464      default:                                                            \
5465          g_assert_not_reached();                                         \
5466      }                                                                   \
5467  }
5468  
5469  MSA_BINOP_IMMU_DF(slli, sll)
5470  MSA_BINOP_IMMU_DF(srai, sra)
5471  MSA_BINOP_IMMU_DF(srli, srl)
5472  MSA_BINOP_IMMU_DF(bclri, bclr)
5473  MSA_BINOP_IMMU_DF(bseti, bset)
5474  MSA_BINOP_IMMU_DF(bnegi, bneg)
5475  MSA_BINOP_IMMU_DF(sat_s, sat_s)
5476  MSA_BINOP_IMMU_DF(sat_u, sat_u)
5477  MSA_BINOP_IMMU_DF(srari, srar)
5478  MSA_BINOP_IMMU_DF(srlri, srlr)
5479  #undef MSA_BINOP_IMMU_DF
5480  
5481  #define MSA_TEROP_IMMU_DF(helper, func)                                  \
5482  void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5483                                    uint32_t wd, uint32_t ws, uint32_t u5) \
5484  {                                                                       \
5485      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5486      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5487      uint32_t i;                                                         \
5488                                                                          \
5489      switch (df) {                                                       \
5490      case DF_BYTE:                                                       \
5491          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5492              pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
5493                                              u5);                        \
5494          }                                                               \
5495          break;                                                          \
5496      case DF_HALF:                                                       \
5497          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5498              pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
5499                                              u5);                        \
5500          }                                                               \
5501          break;                                                          \
5502      case DF_WORD:                                                       \
5503          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5504              pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
5505                                              u5);                        \
5506          }                                                               \
5507          break;                                                          \
5508      case DF_DOUBLE:                                                     \
5509          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5510              pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
5511                                              u5);                        \
5512          }                                                               \
5513          break;                                                          \
5514      default:                                                            \
5515          g_assert_not_reached();                                         \
5516      }                                                                   \
5517  }
5518  
5519  MSA_TEROP_IMMU_DF(binsli, binsl)
5520  MSA_TEROP_IMMU_DF(binsri, binsr)
5521  #undef MSA_TEROP_IMMU_DF
5522  
5523  #define CONCATENATE_AND_SLIDE(s, k)             \
5524      do {                                        \
5525          for (i = 0; i < s; i++) {               \
5526              v[i]     = pws->b[s * k + i];       \
5527              v[i + s] = pwd->b[s * k + i];       \
5528          }                                       \
5529          for (i = 0; i < s; i++) {               \
5530              pwd->b[s * k + i] = v[i + n];       \
5531          }                                       \
5532      } while (0)
5533  
5534  static inline void msa_sld_df(uint32_t df, wr_t *pwd,
5535                                wr_t *pws, target_ulong rt)
5536  {
5537      uint32_t n = rt % DF_ELEMENTS(df);
5538      uint8_t v[64];
5539      uint32_t i, k;
5540  
5541      switch (df) {
5542      case DF_BYTE:
5543          CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_BYTE), 0);
5544          break;
5545      case DF_HALF:
5546          for (k = 0; k < 2; k++) {
5547              CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_HALF), k);
5548          }
5549          break;
5550      case DF_WORD:
5551          for (k = 0; k < 4; k++) {
5552              CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_WORD), k);
5553          }
5554          break;
5555      case DF_DOUBLE:
5556          for (k = 0; k < 8; k++) {
5557              CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_DOUBLE), k);
5558          }
5559          break;
5560      default:
5561          g_assert_not_reached();
5562      }
5563  }
5564  
5565  static inline int64_t msa_mul_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5566  {
5567      int64_t q_min = DF_MIN_INT(df);
5568      int64_t q_max = DF_MAX_INT(df);
5569  
5570      if (arg1 == q_min && arg2 == q_min) {
5571          return q_max;
5572      }
5573      return (arg1 * arg2) >> (DF_BITS(df) - 1);
5574  }
5575  
5576  static inline int64_t msa_mulr_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5577  {
5578      int64_t q_min = DF_MIN_INT(df);
5579      int64_t q_max = DF_MAX_INT(df);
5580      int64_t r_bit = 1 << (DF_BITS(df) - 2);
5581  
5582      if (arg1 == q_min && arg2 == q_min) {
5583          return q_max;
5584      }
5585      return (arg1 * arg2 + r_bit) >> (DF_BITS(df) - 1);
5586  }
5587  
5588  #define MSA_BINOP_DF(func) \
5589  void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
5590                                  uint32_t wd, uint32_t ws, uint32_t wt)  \
5591  {                                                                       \
5592      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5593      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5594      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
5595                                                                          \
5596      switch (df) {                                                       \
5597      case DF_BYTE:                                                       \
5598          pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0],  pwt->b[0]);    \
5599          pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1],  pwt->b[1]);    \
5600          pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2],  pwt->b[2]);    \
5601          pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3],  pwt->b[3]);    \
5602          pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4],  pwt->b[4]);    \
5603          pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5],  pwt->b[5]);    \
5604          pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6],  pwt->b[6]);    \
5605          pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7],  pwt->b[7]);    \
5606          pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8],  pwt->b[8]);    \
5607          pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9],  pwt->b[9]);    \
5608          pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]);   \
5609          pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]);   \
5610          pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]);   \
5611          pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]);   \
5612          pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]);   \
5613          pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]);   \
5614          break;                                                          \
5615      case DF_HALF:                                                       \
5616          pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]);      \
5617          pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]);      \
5618          pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]);      \
5619          pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]);      \
5620          pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]);      \
5621          pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]);      \
5622          pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]);      \
5623          pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]);      \
5624          break;                                                          \
5625      case DF_WORD:                                                       \
5626          pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]);      \
5627          pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]);      \
5628          pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]);      \
5629          pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]);      \
5630          break;                                                          \
5631      case DF_DOUBLE:                                                     \
5632          pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]);      \
5633          pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]);      \
5634          break;                                                          \
5635      default:                                                            \
5636          g_assert_not_reached();                                         \
5637      }                                                                   \
5638  }
5639  
5640  MSA_BINOP_DF(mul_q)
5641  MSA_BINOP_DF(mulr_q)
5642  #undef MSA_BINOP_DF
5643  
5644  void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5645                         uint32_t ws, uint32_t rt)
5646  {
5647      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5648      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5649  
5650      msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]);
5651  }
5652  
5653  static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1,
5654                                      int64_t arg2)
5655  {
5656      int64_t q_prod, q_ret;
5657  
5658      int64_t q_max = DF_MAX_INT(df);
5659      int64_t q_min = DF_MIN_INT(df);
5660  
5661      q_prod = arg1 * arg2;
5662      q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod) >> (DF_BITS(df) - 1);
5663  
5664      return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5665  }
5666  
5667  static inline int64_t msa_msub_q_df(uint32_t df, int64_t dest, int64_t arg1,
5668                                      int64_t arg2)
5669  {
5670      int64_t q_prod, q_ret;
5671  
5672      int64_t q_max = DF_MAX_INT(df);
5673      int64_t q_min = DF_MIN_INT(df);
5674  
5675      q_prod = arg1 * arg2;
5676      q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod) >> (DF_BITS(df) - 1);
5677  
5678      return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5679  }
5680  
5681  static inline int64_t msa_maddr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5682                                       int64_t arg2)
5683  {
5684      int64_t q_prod, q_ret;
5685  
5686      int64_t q_max = DF_MAX_INT(df);
5687      int64_t q_min = DF_MIN_INT(df);
5688      int64_t r_bit = 1 << (DF_BITS(df) - 2);
5689  
5690      q_prod = arg1 * arg2;
5691      q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod + r_bit) >> (DF_BITS(df) - 1);
5692  
5693      return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5694  }
5695  
5696  static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5697                                       int64_t arg2)
5698  {
5699      int64_t q_prod, q_ret;
5700  
5701      int64_t q_max = DF_MAX_INT(df);
5702      int64_t q_min = DF_MIN_INT(df);
5703      int64_t r_bit = 1 << (DF_BITS(df) - 2);
5704  
5705      q_prod = arg1 * arg2;
5706      q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod + r_bit) >> (DF_BITS(df) - 1);
5707  
5708      return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5709  }
5710  
5711  #define MSA_TEROP_DF(func) \
5712  void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
5713                                  uint32_t ws, uint32_t wt)                     \
5714  {                                                                             \
5715      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                                \
5716      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                                \
5717      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                                \
5718                                                                                \
5719      switch (df) {                                                             \
5720      case DF_BYTE:                                                             \
5721          pwd->b[0]  = msa_ ## func ## _df(df, pwd->b[0],  pws->b[0],           \
5722                                               pwt->b[0]);                      \
5723          pwd->b[1]  = msa_ ## func ## _df(df, pwd->b[1],  pws->b[1],           \
5724                                               pwt->b[1]);                      \
5725          pwd->b[2]  = msa_ ## func ## _df(df, pwd->b[2],  pws->b[2],           \
5726                                               pwt->b[2]);                      \
5727          pwd->b[3]  = msa_ ## func ## _df(df, pwd->b[3],  pws->b[3],           \
5728                                               pwt->b[3]);                      \
5729          pwd->b[4]  = msa_ ## func ## _df(df, pwd->b[4],  pws->b[4],           \
5730                                               pwt->b[4]);                      \
5731          pwd->b[5]  = msa_ ## func ## _df(df, pwd->b[5],  pws->b[5],           \
5732                                               pwt->b[5]);                      \
5733          pwd->b[6]  = msa_ ## func ## _df(df, pwd->b[6],  pws->b[6],           \
5734                                               pwt->b[6]);                      \
5735          pwd->b[7]  = msa_ ## func ## _df(df, pwd->b[7],  pws->b[7],           \
5736                                               pwt->b[7]);                      \
5737          pwd->b[8]  = msa_ ## func ## _df(df, pwd->b[8],  pws->b[8],           \
5738                                               pwt->b[8]);                      \
5739          pwd->b[9]  = msa_ ## func ## _df(df, pwd->b[9],  pws->b[9],           \
5740                                               pwt->b[9]);                      \
5741          pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10],          \
5742                                               pwt->b[10]);                     \
5743          pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11],          \
5744                                               pwt->b[11]);                     \
5745          pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12],          \
5746                                               pwt->b[12]);                     \
5747          pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13],          \
5748                                               pwt->b[13]);                     \
5749          pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14],          \
5750                                               pwt->b[14]);                     \
5751          pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15],          \
5752                                               pwt->b[15]);                     \
5753          break;                                                                \
5754      case DF_HALF:                                                             \
5755          pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
5756          pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
5757          pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
5758          pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
5759          pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
5760          pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
5761          pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
5762          pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
5763          break;                                                                \
5764      case DF_WORD:                                                             \
5765          pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
5766          pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
5767          pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
5768          pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
5769          break;                                                                \
5770      case DF_DOUBLE:                                                           \
5771          pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
5772          pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
5773          break;                                                                \
5774      default:                                                                  \
5775          g_assert_not_reached();                                               \
5776      }                                                                         \
5777  }
5778  
5779  MSA_TEROP_DF(binsl)
5780  MSA_TEROP_DF(binsr)
5781  MSA_TEROP_DF(madd_q)
5782  MSA_TEROP_DF(msub_q)
5783  MSA_TEROP_DF(maddr_q)
5784  MSA_TEROP_DF(msubr_q)
5785  #undef MSA_TEROP_DF
5786  
5787  static inline void msa_splat_df(uint32_t df, wr_t *pwd,
5788                                  wr_t *pws, target_ulong rt)
5789  {
5790      uint32_t n = rt % DF_ELEMENTS(df);
5791      uint32_t i;
5792  
5793      switch (df) {
5794      case DF_BYTE:
5795          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5796              pwd->b[i] = pws->b[n];
5797          }
5798          break;
5799      case DF_HALF:
5800          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5801              pwd->h[i] = pws->h[n];
5802          }
5803          break;
5804      case DF_WORD:
5805          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5806              pwd->w[i] = pws->w[n];
5807          }
5808          break;
5809      case DF_DOUBLE:
5810          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5811              pwd->d[i] = pws->d[n];
5812          }
5813         break;
5814      default:
5815          g_assert_not_reached();
5816      }
5817  }
5818  
5819  void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5820                           uint32_t ws, uint32_t rt)
5821  {
5822      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5823      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5824  
5825      msa_splat_df(df, pwd, pws, env->active_tc.gpr[rt]);
5826  }
5827  
5828  #define MSA_DO_B MSA_DO(b)
5829  #define MSA_DO_H MSA_DO(h)
5830  #define MSA_DO_W MSA_DO(w)
5831  #define MSA_DO_D MSA_DO(d)
5832  
5833  #define MSA_LOOP_B MSA_LOOP(B)
5834  #define MSA_LOOP_H MSA_LOOP(H)
5835  #define MSA_LOOP_W MSA_LOOP(W)
5836  #define MSA_LOOP_D MSA_LOOP(D)
5837  
5838  #define MSA_LOOP_COND_B MSA_LOOP_COND(DF_BYTE)
5839  #define MSA_LOOP_COND_H MSA_LOOP_COND(DF_HALF)
5840  #define MSA_LOOP_COND_W MSA_LOOP_COND(DF_WORD)
5841  #define MSA_LOOP_COND_D MSA_LOOP_COND(DF_DOUBLE)
5842  
5843  #define MSA_LOOP(DF) \
5844      do { \
5845          for (i = 0; i < (MSA_LOOP_COND_ ## DF) ; i++) { \
5846              MSA_DO_ ## DF; \
5847          } \
5848      } while (0)
5849  
5850  #define MSA_FN_DF(FUNC)                                             \
5851  void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5852          uint32_t ws, uint32_t wt)                                   \
5853  {                                                                   \
5854      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                      \
5855      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                      \
5856      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                      \
5857      wr_t wx, *pwx = &wx;                                            \
5858      uint32_t i;                                                     \
5859      switch (df) {                                                   \
5860      case DF_BYTE:                                                   \
5861          MSA_LOOP_B;                                                 \
5862          break;                                                      \
5863      case DF_HALF:                                                   \
5864          MSA_LOOP_H;                                                 \
5865          break;                                                      \
5866      case DF_WORD:                                                   \
5867          MSA_LOOP_W;                                                 \
5868          break;                                                      \
5869      case DF_DOUBLE:                                                 \
5870          MSA_LOOP_D;                                                 \
5871          break;                                                      \
5872      default:                                                        \
5873          g_assert_not_reached();                                     \
5874      }                                                               \
5875      msa_move_v(pwd, pwx);                                           \
5876  }
5877  
5878  #define MSA_LOOP_COND(DF) \
5879              (DF_ELEMENTS(DF) / 2)
5880  
5881  #define Rb(pwr, i) (pwr->b[i])
5882  #define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE) / 2])
5883  #define Rh(pwr, i) (pwr->h[i])
5884  #define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF) / 2])
5885  #define Rw(pwr, i) (pwr->w[i])
5886  #define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD) / 2])
5887  #define Rd(pwr, i) (pwr->d[i])
5888  #define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE) / 2])
5889  
5890  #undef MSA_LOOP_COND
5891  
5892  #define MSA_LOOP_COND(DF) \
5893              (DF_ELEMENTS(DF))
5894  
5895  #define MSA_DO(DF)                                                          \
5896      do {                                                                    \
5897          uint32_t n = DF_ELEMENTS(df);                                       \
5898          uint32_t k = (pwd->DF[i] & 0x3f) % (2 * n);                         \
5899          pwx->DF[i] =                                                        \
5900              (pwd->DF[i] & 0xc0) ? 0 : k < n ? pwt->DF[k] : pws->DF[k - n];  \
5901      } while (0)
5902  MSA_FN_DF(vshf_df)
5903  #undef MSA_DO
5904  #undef MSA_LOOP_COND
5905  #undef MSA_FN_DF
5906  
5907  
5908  void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5909                          uint32_t ws, uint32_t n)
5910  {
5911      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5912      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5913  
5914      msa_sld_df(df, pwd, pws, n);
5915  }
5916  
5917  void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5918                            uint32_t ws, uint32_t n)
5919  {
5920      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5921      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5922  
5923      msa_splat_df(df, pwd, pws, n);
5924  }
5925  
5926  void helper_msa_copy_s_b(CPUMIPSState *env, uint32_t rd,
5927                           uint32_t ws, uint32_t n)
5928  {
5929      n %= 16;
5930  #if HOST_BIG_ENDIAN
5931      if (n < 8) {
5932          n = 8 - n - 1;
5933      } else {
5934          n = 24 - n - 1;
5935      }
5936  #endif
5937      env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
5938  }
5939  
5940  void helper_msa_copy_s_h(CPUMIPSState *env, uint32_t rd,
5941                           uint32_t ws, uint32_t n)
5942  {
5943      n %= 8;
5944  #if HOST_BIG_ENDIAN
5945      if (n < 4) {
5946          n = 4 - n - 1;
5947      } else {
5948          n = 12 - n - 1;
5949      }
5950  #endif
5951      env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
5952  }
5953  
5954  void helper_msa_copy_s_w(CPUMIPSState *env, uint32_t rd,
5955                           uint32_t ws, uint32_t n)
5956  {
5957      n %= 4;
5958  #if HOST_BIG_ENDIAN
5959      if (n < 2) {
5960          n = 2 - n - 1;
5961      } else {
5962          n = 6 - n - 1;
5963      }
5964  #endif
5965      env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
5966  }
5967  
5968  void helper_msa_copy_s_d(CPUMIPSState *env, uint32_t rd,
5969                           uint32_t ws, uint32_t n)
5970  {
5971      n %= 2;
5972      env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
5973  }
5974  
5975  void helper_msa_copy_u_b(CPUMIPSState *env, uint32_t rd,
5976                           uint32_t ws, uint32_t n)
5977  {
5978      n %= 16;
5979  #if HOST_BIG_ENDIAN
5980      if (n < 8) {
5981          n = 8 - n - 1;
5982      } else {
5983          n = 24 - n - 1;
5984      }
5985  #endif
5986      env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
5987  }
5988  
5989  void helper_msa_copy_u_h(CPUMIPSState *env, uint32_t rd,
5990                           uint32_t ws, uint32_t n)
5991  {
5992      n %= 8;
5993  #if HOST_BIG_ENDIAN
5994      if (n < 4) {
5995          n = 4 - n - 1;
5996      } else {
5997          n = 12 - n - 1;
5998      }
5999  #endif
6000      env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
6001  }
6002  
6003  void helper_msa_copy_u_w(CPUMIPSState *env, uint32_t rd,
6004                           uint32_t ws, uint32_t n)
6005  {
6006      n %= 4;
6007  #if HOST_BIG_ENDIAN
6008      if (n < 2) {
6009          n = 2 - n - 1;
6010      } else {
6011          n = 6 - n - 1;
6012      }
6013  #endif
6014      env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
6015  }
6016  
6017  void helper_msa_insert_b(CPUMIPSState *env, uint32_t wd,
6018                            uint32_t rs_num, uint32_t n)
6019  {
6020      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6021      target_ulong rs = env->active_tc.gpr[rs_num];
6022      n %= 16;
6023  #if HOST_BIG_ENDIAN
6024      if (n < 8) {
6025          n = 8 - n - 1;
6026      } else {
6027          n = 24 - n - 1;
6028      }
6029  #endif
6030      pwd->b[n] = (int8_t)rs;
6031  }
6032  
6033  void helper_msa_insert_h(CPUMIPSState *env, uint32_t wd,
6034                            uint32_t rs_num, uint32_t n)
6035  {
6036      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6037      target_ulong rs = env->active_tc.gpr[rs_num];
6038      n %= 8;
6039  #if HOST_BIG_ENDIAN
6040      if (n < 4) {
6041          n = 4 - n - 1;
6042      } else {
6043          n = 12 - n - 1;
6044      }
6045  #endif
6046      pwd->h[n] = (int16_t)rs;
6047  }
6048  
6049  void helper_msa_insert_w(CPUMIPSState *env, uint32_t wd,
6050                            uint32_t rs_num, uint32_t n)
6051  {
6052      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6053      target_ulong rs = env->active_tc.gpr[rs_num];
6054      n %= 4;
6055  #if HOST_BIG_ENDIAN
6056      if (n < 2) {
6057          n = 2 - n - 1;
6058      } else {
6059          n = 6 - n - 1;
6060      }
6061  #endif
6062      pwd->w[n] = (int32_t)rs;
6063  }
6064  
6065  void helper_msa_insert_d(CPUMIPSState *env, uint32_t wd,
6066                            uint32_t rs_num, uint32_t n)
6067  {
6068      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6069      target_ulong rs = env->active_tc.gpr[rs_num];
6070      n %= 2;
6071      pwd->d[n] = (int64_t)rs;
6072  }
6073  
6074  void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6075                           uint32_t ws, uint32_t n)
6076  {
6077      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6078      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6079  
6080      switch (df) {
6081      case DF_BYTE:
6082          pwd->b[n] = (int8_t)pws->b[0];
6083          break;
6084      case DF_HALF:
6085          pwd->h[n] = (int16_t)pws->h[0];
6086          break;
6087      case DF_WORD:
6088          pwd->w[n] = (int32_t)pws->w[0];
6089          break;
6090      case DF_DOUBLE:
6091          pwd->d[n] = (int64_t)pws->d[0];
6092          break;
6093      default:
6094          g_assert_not_reached();
6095      }
6096  }
6097  
6098  void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
6099  {
6100      switch (cd) {
6101      case 0:
6102          break;
6103      case 1:
6104          env->active_tc.msacsr = (int32_t)elm & MSACSR_MASK;
6105          restore_msa_fp_status(env);
6106          /* check exception */
6107          if ((GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)
6108              & GET_FP_CAUSE(env->active_tc.msacsr)) {
6109              do_raise_exception(env, EXCP_MSAFPE, GETPC());
6110          }
6111          break;
6112      }
6113  }
6114  
6115  target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
6116  {
6117      switch (cs) {
6118      case 0:
6119          return env->msair;
6120      case 1:
6121          return env->active_tc.msacsr & MSACSR_MASK;
6122      }
6123      return 0;
6124  }
6125  
6126  void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6127                          uint32_t rs)
6128  {
6129      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6130      uint32_t i;
6131  
6132      switch (df) {
6133      case DF_BYTE:
6134          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
6135              pwd->b[i] = (int8_t)env->active_tc.gpr[rs];
6136          }
6137          break;
6138      case DF_HALF:
6139          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
6140              pwd->h[i] = (int16_t)env->active_tc.gpr[rs];
6141          }
6142          break;
6143      case DF_WORD:
6144          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6145              pwd->w[i] = (int32_t)env->active_tc.gpr[rs];
6146          }
6147          break;
6148      case DF_DOUBLE:
6149          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6150              pwd->d[i] = (int64_t)env->active_tc.gpr[rs];
6151          }
6152         break;
6153      default:
6154          g_assert_not_reached();
6155      }
6156  }
6157  
6158  
6159  #define FLOAT_ONE32 make_float32(0x3f8 << 20)
6160  #define FLOAT_ONE64 make_float64(0x3ffULL << 52)
6161  
6162  #define FLOAT_SNAN16(s) (float16_default_nan(s) ^ 0x0220)
6163          /* 0x7c20 */
6164  #define FLOAT_SNAN32(s) (float32_default_nan(s) ^ 0x00400020)
6165          /* 0x7f800020 */
6166  #define FLOAT_SNAN64(s) (float64_default_nan(s) ^ 0x0008000000000020ULL)
6167          /* 0x7ff0000000000020 */
6168  
6169  static inline void clear_msacsr_cause(CPUMIPSState *env)
6170  {
6171      SET_FP_CAUSE(env->active_tc.msacsr, 0);
6172  }
6173  
6174  static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr)
6175  {
6176      if ((GET_FP_CAUSE(env->active_tc.msacsr) &
6177              (GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)) == 0) {
6178          UPDATE_FP_FLAGS(env->active_tc.msacsr,
6179                  GET_FP_CAUSE(env->active_tc.msacsr));
6180      } else {
6181          do_raise_exception(env, EXCP_MSAFPE, retaddr);
6182      }
6183  }
6184  
6185  /* Flush-to-zero use cases for update_msacsr() */
6186  #define CLEAR_FS_UNDERFLOW 1
6187  #define CLEAR_IS_INEXACT   2
6188  #define RECIPROCAL_INEXACT 4
6189  
6190  
6191  static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt)
6192  {
6193      int mips_xcpt = 0;
6194  
6195      if (ieee_xcpt & float_flag_invalid) {
6196          mips_xcpt |= FP_INVALID;
6197      }
6198      if (ieee_xcpt & float_flag_overflow) {
6199          mips_xcpt |= FP_OVERFLOW;
6200      }
6201      if (ieee_xcpt & float_flag_underflow) {
6202          mips_xcpt |= FP_UNDERFLOW;
6203      }
6204      if (ieee_xcpt & float_flag_divbyzero) {
6205          mips_xcpt |= FP_DIV0;
6206      }
6207      if (ieee_xcpt & float_flag_inexact) {
6208          mips_xcpt |= FP_INEXACT;
6209      }
6210  
6211      return mips_xcpt;
6212  }
6213  
6214  static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
6215  {
6216      int ieee_exception_flags;
6217      int mips_exception_flags = 0;
6218      int cause;
6219      int enable;
6220  
6221      ieee_exception_flags = get_float_exception_flags(
6222                                 &env->active_tc.msa_fp_status);
6223  
6224      /* QEMU softfloat does not signal all underflow cases */
6225      if (denormal) {
6226          ieee_exception_flags |= float_flag_underflow;
6227      }
6228      if (ieee_exception_flags) {
6229          mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags);
6230      }
6231      enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6232  
6233      /* Set Inexact (I) when flushing inputs to zero */
6234      if ((ieee_exception_flags & float_flag_input_denormal) &&
6235              (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6236          if (action & CLEAR_IS_INEXACT) {
6237              mips_exception_flags &= ~FP_INEXACT;
6238          } else {
6239              mips_exception_flags |= FP_INEXACT;
6240          }
6241      }
6242  
6243      /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
6244      if ((ieee_exception_flags & float_flag_output_denormal) &&
6245              (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6246          mips_exception_flags |= FP_INEXACT;
6247          if (action & CLEAR_FS_UNDERFLOW) {
6248              mips_exception_flags &= ~FP_UNDERFLOW;
6249          } else {
6250              mips_exception_flags |= FP_UNDERFLOW;
6251          }
6252      }
6253  
6254      /* Set Inexact (I) when Overflow (O) is not enabled */
6255      if ((mips_exception_flags & FP_OVERFLOW) != 0 &&
6256             (enable & FP_OVERFLOW) == 0) {
6257          mips_exception_flags |= FP_INEXACT;
6258      }
6259  
6260      /* Clear Exact Underflow when Underflow (U) is not enabled */
6261      if ((mips_exception_flags & FP_UNDERFLOW) != 0 &&
6262             (enable & FP_UNDERFLOW) == 0 &&
6263             (mips_exception_flags & FP_INEXACT) == 0) {
6264          mips_exception_flags &= ~FP_UNDERFLOW;
6265      }
6266  
6267      /*
6268       * Reciprocal operations set only Inexact when valid and not
6269       * divide by zero
6270       */
6271      if ((action & RECIPROCAL_INEXACT) &&
6272              (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) {
6273          mips_exception_flags = FP_INEXACT;
6274      }
6275  
6276      cause = mips_exception_flags & enable; /* all current enabled exceptions */
6277  
6278      if (cause == 0) {
6279          /*
6280           * No enabled exception, update the MSACSR Cause
6281           * with all current exceptions
6282           */
6283          SET_FP_CAUSE(env->active_tc.msacsr,
6284              (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6285      } else {
6286          /* Current exceptions are enabled */
6287          if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) {
6288              /*
6289               * Exception(s) will trap, update MSACSR Cause
6290               * with all enabled exceptions
6291               */
6292              SET_FP_CAUSE(env->active_tc.msacsr,
6293                  (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6294          }
6295      }
6296  
6297      return mips_exception_flags;
6298  }
6299  
6300  static inline int get_enabled_exceptions(const CPUMIPSState *env, int c)
6301  {
6302      int enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6303      return c & enable;
6304  }
6305  
6306  static inline float16 float16_from_float32(int32_t a, bool ieee,
6307                                             float_status *status)
6308  {
6309        float16 f_val;
6310  
6311        f_val = float32_to_float16((float32)a, ieee, status);
6312  
6313        return a < 0 ? (f_val | (1 << 15)) : f_val;
6314  }
6315  
6316  static inline float32 float32_from_float64(int64_t a, float_status *status)
6317  {
6318        float32 f_val;
6319  
6320        f_val = float64_to_float32((float64)a, status);
6321  
6322        return a < 0 ? (f_val | (1 << 31)) : f_val;
6323  }
6324  
6325  static inline float32 float32_from_float16(int16_t a, bool ieee,
6326                                             float_status *status)
6327  {
6328        float32 f_val;
6329  
6330        f_val = float16_to_float32((float16)a, ieee, status);
6331  
6332        return a < 0 ? (f_val | (1 << 31)) : f_val;
6333  }
6334  
6335  static inline float64 float64_from_float32(int32_t a, float_status *status)
6336  {
6337        float64 f_val;
6338  
6339        f_val = float32_to_float64((float64)a, status);
6340  
6341        return a < 0 ? (f_val | (1ULL << 63)) : f_val;
6342  }
6343  
6344  static inline float32 float32_from_q16(int16_t a, float_status *status)
6345  {
6346      float32 f_val;
6347  
6348      /* conversion as integer and scaling */
6349      f_val = int32_to_float32(a, status);
6350      f_val = float32_scalbn(f_val, -15, status);
6351  
6352      return f_val;
6353  }
6354  
6355  static inline float64 float64_from_q32(int32_t a, float_status *status)
6356  {
6357      float64 f_val;
6358  
6359      /* conversion as integer and scaling */
6360      f_val = int32_to_float64(a, status);
6361      f_val = float64_scalbn(f_val, -31, status);
6362  
6363      return f_val;
6364  }
6365  
6366  static inline int16_t float32_to_q16(float32 a, float_status *status)
6367  {
6368      int32_t q_val;
6369      int32_t q_min = 0xffff8000;
6370      int32_t q_max = 0x00007fff;
6371  
6372      int ieee_ex;
6373  
6374      if (float32_is_any_nan(a)) {
6375          float_raise(float_flag_invalid, status);
6376          return 0;
6377      }
6378  
6379      /* scaling */
6380      a = float32_scalbn(a, 15, status);
6381  
6382      ieee_ex = get_float_exception_flags(status);
6383      set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6384                               , status);
6385  
6386      if (ieee_ex & float_flag_overflow) {
6387          float_raise(float_flag_inexact, status);
6388          return (int32_t)a < 0 ? q_min : q_max;
6389      }
6390  
6391      /* conversion to int */
6392      q_val = float32_to_int32(a, status);
6393  
6394      ieee_ex = get_float_exception_flags(status);
6395      set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6396                               , status);
6397  
6398      if (ieee_ex & float_flag_invalid) {
6399          set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6400                                 , status);
6401          float_raise(float_flag_overflow | float_flag_inexact, status);
6402          return (int32_t)a < 0 ? q_min : q_max;
6403      }
6404  
6405      if (q_val < q_min) {
6406          float_raise(float_flag_overflow | float_flag_inexact, status);
6407          return (int16_t)q_min;
6408      }
6409  
6410      if (q_max < q_val) {
6411          float_raise(float_flag_overflow | float_flag_inexact, status);
6412          return (int16_t)q_max;
6413      }
6414  
6415      return (int16_t)q_val;
6416  }
6417  
6418  static inline int32_t float64_to_q32(float64 a, float_status *status)
6419  {
6420      int64_t q_val;
6421      int64_t q_min = 0xffffffff80000000LL;
6422      int64_t q_max = 0x000000007fffffffLL;
6423  
6424      int ieee_ex;
6425  
6426      if (float64_is_any_nan(a)) {
6427          float_raise(float_flag_invalid, status);
6428          return 0;
6429      }
6430  
6431      /* scaling */
6432      a = float64_scalbn(a, 31, status);
6433  
6434      ieee_ex = get_float_exception_flags(status);
6435      set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6436             , status);
6437  
6438      if (ieee_ex & float_flag_overflow) {
6439          float_raise(float_flag_inexact, status);
6440          return (int64_t)a < 0 ? q_min : q_max;
6441      }
6442  
6443      /* conversion to integer */
6444      q_val = float64_to_int64(a, status);
6445  
6446      ieee_ex = get_float_exception_flags(status);
6447      set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6448             , status);
6449  
6450      if (ieee_ex & float_flag_invalid) {
6451          set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6452                 , status);
6453          float_raise(float_flag_overflow | float_flag_inexact, status);
6454          return (int64_t)a < 0 ? q_min : q_max;
6455      }
6456  
6457      if (q_val < q_min) {
6458          float_raise(float_flag_overflow | float_flag_inexact, status);
6459          return (int32_t)q_min;
6460      }
6461  
6462      if (q_max < q_val) {
6463          float_raise(float_flag_overflow | float_flag_inexact, status);
6464          return (int32_t)q_max;
6465      }
6466  
6467      return (int32_t)q_val;
6468  }
6469  
6470  #define MSA_FLOAT_COND(DEST, OP, ARG1, ARG2, BITS, QUIET)                   \
6471      do {                                                                    \
6472          float_status *status = &env->active_tc.msa_fp_status;               \
6473          int c;                                                              \
6474          int64_t cond;                                                       \
6475          set_float_exception_flags(0, status);                               \
6476          if (!QUIET) {                                                       \
6477              cond = float ## BITS ## _ ## OP(ARG1, ARG2, status);            \
6478          } else {                                                            \
6479              cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2, status);  \
6480          }                                                                   \
6481          DEST = cond ? M_MAX_UINT(BITS) : 0;                                 \
6482          c = update_msacsr(env, CLEAR_IS_INEXACT, 0);                        \
6483                                                                              \
6484          if (get_enabled_exceptions(env, c)) {                               \
6485              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
6486          }                                                                   \
6487      } while (0)
6488  
6489  #define MSA_FLOAT_AF(DEST, ARG1, ARG2, BITS, QUIET)                 \
6490      do {                                                            \
6491          MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);          \
6492          if ((DEST & M_MAX_UINT(BITS)) == M_MAX_UINT(BITS)) {        \
6493              DEST = 0;                                               \
6494          }                                                           \
6495      } while (0)
6496  
6497  #define MSA_FLOAT_UEQ(DEST, ARG1, ARG2, BITS, QUIET)                \
6498      do {                                                            \
6499          MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6500          if (DEST == 0) {                                            \
6501              MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);      \
6502          }                                                           \
6503      } while (0)
6504  
6505  #define MSA_FLOAT_NE(DEST, ARG1, ARG2, BITS, QUIET)                 \
6506      do {                                                            \
6507          MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);          \
6508          if (DEST == 0) {                                            \
6509              MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);      \
6510          }                                                           \
6511      } while (0)
6512  
6513  #define MSA_FLOAT_UNE(DEST, ARG1, ARG2, BITS, QUIET)                \
6514      do {                                                            \
6515          MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6516          if (DEST == 0) {                                            \
6517              MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6518              if (DEST == 0) {                                        \
6519                  MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);  \
6520              }                                                       \
6521          }                                                           \
6522      } while (0)
6523  
6524  #define MSA_FLOAT_ULE(DEST, ARG1, ARG2, BITS, QUIET)                \
6525      do {                                                            \
6526          MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6527          if (DEST == 0) {                                            \
6528              MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);      \
6529          }                                                           \
6530      } while (0)
6531  
6532  #define MSA_FLOAT_ULT(DEST, ARG1, ARG2, BITS, QUIET)                \
6533      do {                                                            \
6534          MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6535          if (DEST == 0) {                                            \
6536              MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6537          }                                                           \
6538      } while (0)
6539  
6540  #define MSA_FLOAT_OR(DEST, ARG1, ARG2, BITS, QUIET)                 \
6541      do {                                                            \
6542          MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);          \
6543          if (DEST == 0) {                                            \
6544              MSA_FLOAT_COND(DEST, le, ARG2, ARG1, BITS, QUIET);      \
6545          }                                                           \
6546      } while (0)
6547  
6548  static inline void compare_af(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6549                                wr_t *pwt, uint32_t df, int quiet,
6550                                uintptr_t retaddr)
6551  {
6552      wr_t wx, *pwx = &wx;
6553      uint32_t i;
6554  
6555      clear_msacsr_cause(env);
6556  
6557      switch (df) {
6558      case DF_WORD:
6559          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6560              MSA_FLOAT_AF(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6561          }
6562          break;
6563      case DF_DOUBLE:
6564          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6565              MSA_FLOAT_AF(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6566          }
6567          break;
6568      default:
6569          g_assert_not_reached();
6570      }
6571  
6572      check_msacsr_cause(env, retaddr);
6573  
6574      msa_move_v(pwd, pwx);
6575  }
6576  
6577  static inline void compare_un(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6578                                wr_t *pwt, uint32_t df, int quiet,
6579                                uintptr_t retaddr)
6580  {
6581      wr_t wx, *pwx = &wx;
6582      uint32_t i;
6583  
6584      clear_msacsr_cause(env);
6585  
6586      switch (df) {
6587      case DF_WORD:
6588          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6589              MSA_FLOAT_COND(pwx->w[i], unordered, pws->w[i], pwt->w[i], 32,
6590                      quiet);
6591          }
6592          break;
6593      case DF_DOUBLE:
6594          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6595              MSA_FLOAT_COND(pwx->d[i], unordered, pws->d[i], pwt->d[i], 64,
6596                      quiet);
6597          }
6598          break;
6599      default:
6600          g_assert_not_reached();
6601      }
6602  
6603      check_msacsr_cause(env, retaddr);
6604  
6605      msa_move_v(pwd, pwx);
6606  }
6607  
6608  static inline void compare_eq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6609                                wr_t *pwt, uint32_t df, int quiet,
6610                                uintptr_t retaddr)
6611  {
6612      wr_t wx, *pwx = &wx;
6613      uint32_t i;
6614  
6615      clear_msacsr_cause(env);
6616  
6617      switch (df) {
6618      case DF_WORD:
6619          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6620              MSA_FLOAT_COND(pwx->w[i], eq, pws->w[i], pwt->w[i], 32, quiet);
6621          }
6622          break;
6623      case DF_DOUBLE:
6624          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6625              MSA_FLOAT_COND(pwx->d[i], eq, pws->d[i], pwt->d[i], 64, quiet);
6626          }
6627          break;
6628      default:
6629          g_assert_not_reached();
6630      }
6631  
6632      check_msacsr_cause(env, retaddr);
6633  
6634      msa_move_v(pwd, pwx);
6635  }
6636  
6637  static inline void compare_ueq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6638                                 wr_t *pwt, uint32_t df, int quiet,
6639                                 uintptr_t retaddr)
6640  {
6641      wr_t wx, *pwx = &wx;
6642      uint32_t i;
6643  
6644      clear_msacsr_cause(env);
6645  
6646      switch (df) {
6647      case DF_WORD:
6648          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6649              MSA_FLOAT_UEQ(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6650          }
6651          break;
6652      case DF_DOUBLE:
6653          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6654              MSA_FLOAT_UEQ(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6655          }
6656          break;
6657      default:
6658          g_assert_not_reached();
6659      }
6660  
6661      check_msacsr_cause(env, retaddr);
6662  
6663      msa_move_v(pwd, pwx);
6664  }
6665  
6666  static inline void compare_lt(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6667                                wr_t *pwt, uint32_t df, int quiet,
6668                                uintptr_t retaddr)
6669  {
6670      wr_t wx, *pwx = &wx;
6671      uint32_t i;
6672  
6673      clear_msacsr_cause(env);
6674  
6675      switch (df) {
6676      case DF_WORD:
6677          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6678              MSA_FLOAT_COND(pwx->w[i], lt, pws->w[i], pwt->w[i], 32, quiet);
6679          }
6680          break;
6681      case DF_DOUBLE:
6682          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6683              MSA_FLOAT_COND(pwx->d[i], lt, pws->d[i], pwt->d[i], 64, quiet);
6684          }
6685          break;
6686      default:
6687          g_assert_not_reached();
6688      }
6689  
6690      check_msacsr_cause(env, retaddr);
6691  
6692      msa_move_v(pwd, pwx);
6693  }
6694  
6695  static inline void compare_ult(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6696                                 wr_t *pwt, uint32_t df, int quiet,
6697                                 uintptr_t retaddr)
6698  {
6699      wr_t wx, *pwx = &wx;
6700      uint32_t i;
6701  
6702      clear_msacsr_cause(env);
6703  
6704      switch (df) {
6705      case DF_WORD:
6706          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6707              MSA_FLOAT_ULT(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6708          }
6709          break;
6710      case DF_DOUBLE:
6711          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6712              MSA_FLOAT_ULT(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6713          }
6714          break;
6715      default:
6716          g_assert_not_reached();
6717      }
6718  
6719      check_msacsr_cause(env, retaddr);
6720  
6721      msa_move_v(pwd, pwx);
6722  }
6723  
6724  static inline void compare_le(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6725                                wr_t *pwt, uint32_t df, int quiet,
6726                                uintptr_t retaddr)
6727  {
6728      wr_t wx, *pwx = &wx;
6729      uint32_t i;
6730  
6731      clear_msacsr_cause(env);
6732  
6733      switch (df) {
6734      case DF_WORD:
6735          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6736              MSA_FLOAT_COND(pwx->w[i], le, pws->w[i], pwt->w[i], 32, quiet);
6737          }
6738          break;
6739      case DF_DOUBLE:
6740          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6741              MSA_FLOAT_COND(pwx->d[i], le, pws->d[i], pwt->d[i], 64, quiet);
6742          }
6743          break;
6744      default:
6745          g_assert_not_reached();
6746      }
6747  
6748      check_msacsr_cause(env, retaddr);
6749  
6750      msa_move_v(pwd, pwx);
6751  }
6752  
6753  static inline void compare_ule(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6754                                 wr_t *pwt, uint32_t df, int quiet,
6755                                 uintptr_t retaddr)
6756  {
6757      wr_t wx, *pwx = &wx;
6758      uint32_t i;
6759  
6760      clear_msacsr_cause(env);
6761  
6762      switch (df) {
6763      case DF_WORD:
6764          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6765              MSA_FLOAT_ULE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6766          }
6767          break;
6768      case DF_DOUBLE:
6769          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6770              MSA_FLOAT_ULE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6771          }
6772          break;
6773      default:
6774          g_assert_not_reached();
6775      }
6776  
6777      check_msacsr_cause(env, retaddr);
6778  
6779      msa_move_v(pwd, pwx);
6780  }
6781  
6782  static inline void compare_or(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6783                                wr_t *pwt, uint32_t df, int quiet,
6784                                uintptr_t retaddr)
6785  {
6786      wr_t wx, *pwx = &wx;
6787      uint32_t i;
6788  
6789      clear_msacsr_cause(env);
6790  
6791      switch (df) {
6792      case DF_WORD:
6793          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6794              MSA_FLOAT_OR(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6795          }
6796          break;
6797      case DF_DOUBLE:
6798          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6799              MSA_FLOAT_OR(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6800          }
6801          break;
6802      default:
6803          g_assert_not_reached();
6804      }
6805  
6806      check_msacsr_cause(env, retaddr);
6807  
6808      msa_move_v(pwd, pwx);
6809  }
6810  
6811  static inline void compare_une(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6812                                 wr_t *pwt, uint32_t df, int quiet,
6813                                 uintptr_t retaddr)
6814  {
6815      wr_t wx, *pwx = &wx;
6816      uint32_t i;
6817  
6818      clear_msacsr_cause(env);
6819  
6820      switch (df) {
6821      case DF_WORD:
6822          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6823              MSA_FLOAT_UNE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6824          }
6825          break;
6826      case DF_DOUBLE:
6827          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6828              MSA_FLOAT_UNE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6829          }
6830          break;
6831      default:
6832          g_assert_not_reached();
6833      }
6834  
6835      check_msacsr_cause(env, retaddr);
6836  
6837      msa_move_v(pwd, pwx);
6838  }
6839  
6840  static inline void compare_ne(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6841                                wr_t *pwt, uint32_t df, int quiet,
6842                                uintptr_t retaddr)
6843  {
6844      wr_t wx, *pwx = &wx;
6845      uint32_t i;
6846  
6847      clear_msacsr_cause(env);
6848  
6849      switch (df) {
6850      case DF_WORD:
6851          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6852              MSA_FLOAT_NE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6853          }
6854          break;
6855      case DF_DOUBLE:
6856          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6857              MSA_FLOAT_NE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6858          }
6859          break;
6860      default:
6861          g_assert_not_reached();
6862      }
6863  
6864      check_msacsr_cause(env, retaddr);
6865  
6866      msa_move_v(pwd, pwx);
6867  }
6868  
6869  void helper_msa_fcaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6870                          uint32_t ws, uint32_t wt)
6871  {
6872      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6873      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6874      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6875      compare_af(env, pwd, pws, pwt, df, 1, GETPC());
6876  }
6877  
6878  void helper_msa_fcun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6879                          uint32_t ws, uint32_t wt)
6880  {
6881      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6882      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6883      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6884      compare_un(env, pwd, pws, pwt, df, 1, GETPC());
6885  }
6886  
6887  void helper_msa_fceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6888                          uint32_t ws, uint32_t wt)
6889  {
6890      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6891      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6892      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6893      compare_eq(env, pwd, pws, pwt, df, 1, GETPC());
6894  }
6895  
6896  void helper_msa_fcueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6897                           uint32_t ws, uint32_t wt)
6898  {
6899      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6900      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6901      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6902      compare_ueq(env, pwd, pws, pwt, df, 1, GETPC());
6903  }
6904  
6905  void helper_msa_fclt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6906                          uint32_t ws, uint32_t wt)
6907  {
6908      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6909      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6910      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6911      compare_lt(env, pwd, pws, pwt, df, 1, GETPC());
6912  }
6913  
6914  void helper_msa_fcult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6915                           uint32_t ws, uint32_t wt)
6916  {
6917      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6918      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6919      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6920      compare_ult(env, pwd, pws, pwt, df, 1, GETPC());
6921  }
6922  
6923  void helper_msa_fcle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6924                          uint32_t ws, uint32_t wt)
6925  {
6926      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6927      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6928      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6929      compare_le(env, pwd, pws, pwt, df, 1, GETPC());
6930  }
6931  
6932  void helper_msa_fcule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6933                           uint32_t ws, uint32_t wt)
6934  {
6935      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6936      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6937      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6938      compare_ule(env, pwd, pws, pwt, df, 1, GETPC());
6939  }
6940  
6941  void helper_msa_fsaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6942                          uint32_t ws, uint32_t wt)
6943  {
6944      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6945      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6946      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6947      compare_af(env, pwd, pws, pwt, df, 0, GETPC());
6948  }
6949  
6950  void helper_msa_fsun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6951                          uint32_t ws, uint32_t wt)
6952  {
6953      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6954      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6955      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6956      compare_un(env, pwd, pws, pwt, df, 0, GETPC());
6957  }
6958  
6959  void helper_msa_fseq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6960                          uint32_t ws, uint32_t wt)
6961  {
6962      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6963      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6964      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6965      compare_eq(env, pwd, pws, pwt, df, 0, GETPC());
6966  }
6967  
6968  void helper_msa_fsueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6969                           uint32_t ws, uint32_t wt)
6970  {
6971      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6972      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6973      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6974      compare_ueq(env, pwd, pws, pwt, df, 0, GETPC());
6975  }
6976  
6977  void helper_msa_fslt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6978                          uint32_t ws, uint32_t wt)
6979  {
6980      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6981      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6982      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6983      compare_lt(env, pwd, pws, pwt, df, 0, GETPC());
6984  }
6985  
6986  void helper_msa_fsult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6987                           uint32_t ws, uint32_t wt)
6988  {
6989      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6990      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6991      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6992      compare_ult(env, pwd, pws, pwt, df, 0, GETPC());
6993  }
6994  
6995  void helper_msa_fsle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6996                          uint32_t ws, uint32_t wt)
6997  {
6998      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6999      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7000      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7001      compare_le(env, pwd, pws, pwt, df, 0, GETPC());
7002  }
7003  
7004  void helper_msa_fsule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7005                           uint32_t ws, uint32_t wt)
7006  {
7007      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7008      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7009      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7010      compare_ule(env, pwd, pws, pwt, df, 0, GETPC());
7011  }
7012  
7013  void helper_msa_fcor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7014                          uint32_t ws, uint32_t wt)
7015  {
7016      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7017      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7018      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7019      compare_or(env, pwd, pws, pwt, df, 1, GETPC());
7020  }
7021  
7022  void helper_msa_fcune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7023                           uint32_t ws, uint32_t wt)
7024  {
7025      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7026      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7027      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7028      compare_une(env, pwd, pws, pwt, df, 1, GETPC());
7029  }
7030  
7031  void helper_msa_fcne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7032                          uint32_t ws, uint32_t wt)
7033  {
7034      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7035      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7036      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7037      compare_ne(env, pwd, pws, pwt, df, 1, GETPC());
7038  }
7039  
7040  void helper_msa_fsor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7041                          uint32_t ws, uint32_t wt)
7042  {
7043      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7044      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7045      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7046      compare_or(env, pwd, pws, pwt, df, 0, GETPC());
7047  }
7048  
7049  void helper_msa_fsune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7050                           uint32_t ws, uint32_t wt)
7051  {
7052      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7053      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7054      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7055      compare_une(env, pwd, pws, pwt, df, 0, GETPC());
7056  }
7057  
7058  void helper_msa_fsne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7059                          uint32_t ws, uint32_t wt)
7060  {
7061      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7062      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7063      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7064      compare_ne(env, pwd, pws, pwt, df, 0, GETPC());
7065  }
7066  
7067  #define float16_is_zero(ARG) 0
7068  #define float16_is_zero_or_denormal(ARG) 0
7069  
7070  #define IS_DENORMAL(ARG, BITS)                      \
7071      (!float ## BITS ## _is_zero(ARG)                \
7072      && float ## BITS ## _is_zero_or_denormal(ARG))
7073  
7074  #define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
7075      do {                                                                    \
7076          float_status *status = &env->active_tc.msa_fp_status;               \
7077          int c;                                                              \
7078                                                                              \
7079          set_float_exception_flags(0, status);                               \
7080          DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
7081          c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7082                                                                              \
7083          if (get_enabled_exceptions(env, c)) {                               \
7084              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7085          }                                                                   \
7086      } while (0)
7087  
7088  void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7089          uint32_t ws, uint32_t wt)
7090  {
7091      wr_t wx, *pwx = &wx;
7092      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7093      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7094      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7095      uint32_t i;
7096  
7097      clear_msacsr_cause(env);
7098  
7099      switch (df) {
7100      case DF_WORD:
7101          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7102              MSA_FLOAT_BINOP(pwx->w[i], add, pws->w[i], pwt->w[i], 32);
7103          }
7104          break;
7105      case DF_DOUBLE:
7106          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7107              MSA_FLOAT_BINOP(pwx->d[i], add, pws->d[i], pwt->d[i], 64);
7108          }
7109          break;
7110      default:
7111          g_assert_not_reached();
7112      }
7113  
7114      check_msacsr_cause(env, GETPC());
7115      msa_move_v(pwd, pwx);
7116  }
7117  
7118  void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7119          uint32_t ws, uint32_t wt)
7120  {
7121      wr_t wx, *pwx = &wx;
7122      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7123      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7124      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7125      uint32_t i;
7126  
7127      clear_msacsr_cause(env);
7128  
7129      switch (df) {
7130      case DF_WORD:
7131          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7132              MSA_FLOAT_BINOP(pwx->w[i], sub, pws->w[i], pwt->w[i], 32);
7133          }
7134          break;
7135      case DF_DOUBLE:
7136          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7137              MSA_FLOAT_BINOP(pwx->d[i], sub, pws->d[i], pwt->d[i], 64);
7138          }
7139          break;
7140      default:
7141          g_assert_not_reached();
7142      }
7143  
7144      check_msacsr_cause(env, GETPC());
7145      msa_move_v(pwd, pwx);
7146  }
7147  
7148  void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7149          uint32_t ws, uint32_t wt)
7150  {
7151      wr_t wx, *pwx = &wx;
7152      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7153      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7154      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7155      uint32_t i;
7156  
7157      clear_msacsr_cause(env);
7158  
7159      switch (df) {
7160      case DF_WORD:
7161          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7162              MSA_FLOAT_BINOP(pwx->w[i], mul, pws->w[i], pwt->w[i], 32);
7163          }
7164          break;
7165      case DF_DOUBLE:
7166          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7167              MSA_FLOAT_BINOP(pwx->d[i], mul, pws->d[i], pwt->d[i], 64);
7168          }
7169          break;
7170      default:
7171          g_assert_not_reached();
7172      }
7173  
7174      check_msacsr_cause(env, GETPC());
7175  
7176      msa_move_v(pwd, pwx);
7177  }
7178  
7179  void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7180          uint32_t ws, uint32_t wt)
7181  {
7182      wr_t wx, *pwx = &wx;
7183      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7184      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7185      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7186      uint32_t i;
7187  
7188      clear_msacsr_cause(env);
7189  
7190      switch (df) {
7191      case DF_WORD:
7192          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7193              MSA_FLOAT_BINOP(pwx->w[i], div, pws->w[i], pwt->w[i], 32);
7194          }
7195          break;
7196      case DF_DOUBLE:
7197          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7198              MSA_FLOAT_BINOP(pwx->d[i], div, pws->d[i], pwt->d[i], 64);
7199          }
7200          break;
7201      default:
7202          g_assert_not_reached();
7203      }
7204  
7205      check_msacsr_cause(env, GETPC());
7206  
7207      msa_move_v(pwd, pwx);
7208  }
7209  
7210  #define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
7211      do {                                                                    \
7212          float_status *status = &env->active_tc.msa_fp_status;               \
7213          int c;                                                              \
7214                                                                              \
7215          set_float_exception_flags(0, status);                               \
7216          DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE, status);  \
7217          c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7218                                                                              \
7219          if (get_enabled_exceptions(env, c)) {                               \
7220              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7221          }                                                                   \
7222      } while (0)
7223  
7224  void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7225          uint32_t ws, uint32_t wt)
7226  {
7227      wr_t wx, *pwx = &wx;
7228      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7229      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7230      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7231      uint32_t i;
7232  
7233      clear_msacsr_cause(env);
7234  
7235      switch (df) {
7236      case DF_WORD:
7237          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7238              MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7239                             pws->w[i], pwt->w[i], 0, 32);
7240          }
7241          break;
7242      case DF_DOUBLE:
7243          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7244              MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7245                             pws->d[i], pwt->d[i], 0, 64);
7246          }
7247          break;
7248      default:
7249          g_assert_not_reached();
7250      }
7251  
7252      check_msacsr_cause(env, GETPC());
7253  
7254      msa_move_v(pwd, pwx);
7255  }
7256  
7257  void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7258          uint32_t ws, uint32_t wt)
7259  {
7260      wr_t wx, *pwx = &wx;
7261      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7262      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7263      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7264      uint32_t i;
7265  
7266      clear_msacsr_cause(env);
7267  
7268      switch (df) {
7269      case DF_WORD:
7270          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7271              MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7272                             pws->w[i], pwt->w[i],
7273                             float_muladd_negate_product, 32);
7274        }
7275        break;
7276      case DF_DOUBLE:
7277          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7278              MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7279                             pws->d[i], pwt->d[i],
7280                             float_muladd_negate_product, 64);
7281          }
7282          break;
7283      default:
7284          g_assert_not_reached();
7285      }
7286  
7287      check_msacsr_cause(env, GETPC());
7288  
7289      msa_move_v(pwd, pwx);
7290  }
7291  
7292  void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7293          uint32_t ws, uint32_t wt)
7294  {
7295      wr_t wx, *pwx = &wx;
7296      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7297      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7298      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7299      uint32_t i;
7300  
7301      clear_msacsr_cause(env);
7302  
7303      switch (df) {
7304      case DF_WORD:
7305          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7306              MSA_FLOAT_BINOP(pwx->w[i], scalbn, pws->w[i],
7307                              pwt->w[i] >  0x200 ?  0x200 :
7308                              pwt->w[i] < -0x200 ? -0x200 : pwt->w[i],
7309                              32);
7310          }
7311          break;
7312      case DF_DOUBLE:
7313          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7314              MSA_FLOAT_BINOP(pwx->d[i], scalbn, pws->d[i],
7315                              pwt->d[i] >  0x1000 ?  0x1000 :
7316                              pwt->d[i] < -0x1000 ? -0x1000 : pwt->d[i],
7317                              64);
7318          }
7319          break;
7320      default:
7321          g_assert_not_reached();
7322      }
7323  
7324      check_msacsr_cause(env, GETPC());
7325  
7326      msa_move_v(pwd, pwx);
7327  }
7328  
7329  #define MSA_FLOAT_UNOP(DEST, OP, ARG, BITS)                                 \
7330      do {                                                                    \
7331          float_status *status = &env->active_tc.msa_fp_status;               \
7332          int c;                                                              \
7333                                                                              \
7334          set_float_exception_flags(0, status);                               \
7335          DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7336          c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7337                                                                              \
7338          if (get_enabled_exceptions(env, c)) {                               \
7339              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7340          }                                                                   \
7341      } while (0)
7342  
7343  void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7344                           uint32_t ws, uint32_t wt)
7345  {
7346      wr_t wx, *pwx = &wx;
7347      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7348      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7349      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7350      uint32_t i;
7351  
7352      clear_msacsr_cause(env);
7353  
7354      switch (df) {
7355      case DF_WORD:
7356          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7357              /*
7358               * Half precision floats come in two formats: standard
7359               * IEEE and "ARM" format.  The latter gains extra exponent
7360               * range by omitting the NaN/Inf encodings.
7361               */
7362              bool ieee = true;
7363  
7364              MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16);
7365              MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16);
7366          }
7367          break;
7368      case DF_DOUBLE:
7369          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7370              MSA_FLOAT_UNOP(Lw(pwx, i), from_float64, pws->d[i], 32);
7371              MSA_FLOAT_UNOP(Rw(pwx, i), from_float64, pwt->d[i], 32);
7372          }
7373          break;
7374      default:
7375          g_assert_not_reached();
7376      }
7377  
7378      check_msacsr_cause(env, GETPC());
7379      msa_move_v(pwd, pwx);
7380  }
7381  
7382  #define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
7383      do {                                                                    \
7384          float_status *status = &env->active_tc.msa_fp_status;               \
7385          int c;                                                              \
7386                                                                              \
7387          set_float_exception_flags(0, status);                               \
7388          DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7389          c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7390                                                                              \
7391          if (get_enabled_exceptions(env, c)) {                               \
7392              DEST = ((FLOAT_SNAN ## XBITS(status) >> 6) << 6) | c;           \
7393          }                                                                   \
7394      } while (0)
7395  
7396  void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7397                         uint32_t ws, uint32_t wt)
7398  {
7399      wr_t wx, *pwx = &wx;
7400      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7401      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7402      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7403      uint32_t i;
7404  
7405      clear_msacsr_cause(env);
7406  
7407      switch (df) {
7408      case DF_WORD:
7409          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7410              MSA_FLOAT_UNOP_XD(Lh(pwx, i), to_q16, pws->w[i], 32, 16);
7411              MSA_FLOAT_UNOP_XD(Rh(pwx, i), to_q16, pwt->w[i], 32, 16);
7412          }
7413          break;
7414      case DF_DOUBLE:
7415          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7416              MSA_FLOAT_UNOP_XD(Lw(pwx, i), to_q32, pws->d[i], 64, 32);
7417              MSA_FLOAT_UNOP_XD(Rw(pwx, i), to_q32, pwt->d[i], 64, 32);
7418          }
7419          break;
7420      default:
7421          g_assert_not_reached();
7422      }
7423  
7424      check_msacsr_cause(env, GETPC());
7425  
7426      msa_move_v(pwd, pwx);
7427  }
7428  
7429  #define NUMBER_QNAN_PAIR(ARG1, ARG2, BITS, STATUS)      \
7430      !float ## BITS ## _is_any_nan(ARG1)                 \
7431      && float ## BITS ## _is_quiet_nan(ARG2, STATUS)
7432  
7433  #define MSA_FLOAT_MAXOP(DEST, OP, ARG1, ARG2, BITS)                         \
7434      do {                                                                    \
7435          float_status *status_ = &env->active_tc.msa_fp_status;              \
7436          int c;                                                              \
7437                                                                              \
7438          set_float_exception_flags(0, status_);                              \
7439          DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status_);               \
7440          c = update_msacsr(env, 0, 0);                                       \
7441                                                                              \
7442          if (get_enabled_exceptions(env, c)) {                               \
7443              DEST = ((FLOAT_SNAN ## BITS(status_) >> 6) << 6) | c;           \
7444          }                                                                   \
7445      } while (0)
7446  
7447  #define FMAXMIN_A(F, G, X, _S, _T, BITS, STATUS)                    \
7448      do {                                                            \
7449          uint## BITS ##_t S = _S, T = _T;                            \
7450          uint## BITS ##_t as, at, xs, xt, xd;                        \
7451          if (NUMBER_QNAN_PAIR(S, T, BITS, STATUS)) {                 \
7452              T = S;                                                  \
7453          }                                                           \
7454          else if (NUMBER_QNAN_PAIR(T, S, BITS, STATUS)) {            \
7455              S = T;                                                  \
7456          }                                                           \
7457          as = float## BITS ##_abs(S);                                \
7458          at = float## BITS ##_abs(T);                                \
7459          MSA_FLOAT_MAXOP(xs, F,  S,  T, BITS);                       \
7460          MSA_FLOAT_MAXOP(xt, G,  S,  T, BITS);                       \
7461          MSA_FLOAT_MAXOP(xd, F, as, at, BITS);                       \
7462          X = (as == at || xd == float## BITS ##_abs(xs)) ? xs : xt;  \
7463      } while (0)
7464  
7465  void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7466          uint32_t ws, uint32_t wt)
7467  {
7468      float_status *status = &env->active_tc.msa_fp_status;
7469      wr_t wx, *pwx = &wx;
7470      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7471      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7472      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7473  
7474      clear_msacsr_cause(env);
7475  
7476      if (df == DF_WORD) {
7477  
7478          if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7479              MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32);
7480          } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7481              MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32);
7482          } else {
7483              MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32);
7484          }
7485  
7486          if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7487              MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32);
7488          } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7489              MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32);
7490          } else {
7491              MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32);
7492          }
7493  
7494          if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7495              MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32);
7496          } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7497              MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32);
7498          } else {
7499              MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32);
7500          }
7501  
7502          if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7503              MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32);
7504          } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7505              MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32);
7506          } else {
7507              MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32);
7508          }
7509  
7510      } else if (df == DF_DOUBLE) {
7511  
7512          if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7513              MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64);
7514          } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7515              MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64);
7516          } else {
7517              MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64);
7518          }
7519  
7520          if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7521              MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64);
7522          } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7523              MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64);
7524          } else {
7525              MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64);
7526          }
7527  
7528      } else {
7529  
7530          g_assert_not_reached();
7531  
7532      }
7533  
7534      check_msacsr_cause(env, GETPC());
7535  
7536      msa_move_v(pwd, pwx);
7537  }
7538  
7539  void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7540          uint32_t ws, uint32_t wt)
7541  {
7542      float_status *status = &env->active_tc.msa_fp_status;
7543      wr_t wx, *pwx = &wx;
7544      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7545      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7546      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7547  
7548      clear_msacsr_cause(env);
7549  
7550      if (df == DF_WORD) {
7551          FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7552          FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7553          FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7554          FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7555      } else if (df == DF_DOUBLE) {
7556          FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7557          FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7558      } else {
7559          g_assert_not_reached();
7560      }
7561  
7562      check_msacsr_cause(env, GETPC());
7563  
7564      msa_move_v(pwd, pwx);
7565  }
7566  
7567  void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7568          uint32_t ws, uint32_t wt)
7569  {
7570       float_status *status = &env->active_tc.msa_fp_status;
7571      wr_t wx, *pwx = &wx;
7572      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7573      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7574      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7575  
7576      clear_msacsr_cause(env);
7577  
7578      if (df == DF_WORD) {
7579  
7580          if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7581              MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32);
7582          } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7583              MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32);
7584          } else {
7585              MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32);
7586          }
7587  
7588          if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7589              MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32);
7590          } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7591              MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32);
7592          } else {
7593              MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32);
7594          }
7595  
7596          if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7597              MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32);
7598          } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7599              MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32);
7600          } else {
7601              MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32);
7602          }
7603  
7604          if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7605              MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32);
7606          } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7607              MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32);
7608          } else {
7609              MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32);
7610          }
7611  
7612      } else if (df == DF_DOUBLE) {
7613  
7614          if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7615              MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64);
7616          } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7617              MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64);
7618          } else {
7619              MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64);
7620          }
7621  
7622          if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7623              MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64);
7624          } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7625              MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64);
7626          } else {
7627              MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64);
7628          }
7629  
7630      } else {
7631  
7632          g_assert_not_reached();
7633  
7634      }
7635  
7636      check_msacsr_cause(env, GETPC());
7637  
7638      msa_move_v(pwd, pwx);
7639  }
7640  
7641  void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7642          uint32_t ws, uint32_t wt)
7643  {
7644      float_status *status = &env->active_tc.msa_fp_status;
7645      wr_t wx, *pwx = &wx;
7646      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7647      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7648      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7649  
7650      clear_msacsr_cause(env);
7651  
7652      if (df == DF_WORD) {
7653          FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7654          FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7655          FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7656          FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7657      } else if (df == DF_DOUBLE) {
7658          FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7659          FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7660      } else {
7661          g_assert_not_reached();
7662      }
7663  
7664      check_msacsr_cause(env, GETPC());
7665  
7666      msa_move_v(pwd, pwx);
7667  }
7668  
7669  void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df,
7670          uint32_t wd, uint32_t ws)
7671  {
7672      float_status *status = &env->active_tc.msa_fp_status;
7673  
7674      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7675      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7676      if (df == DF_WORD) {
7677          pwd->w[0] = float_class_s(pws->w[0], status);
7678          pwd->w[1] = float_class_s(pws->w[1], status);
7679          pwd->w[2] = float_class_s(pws->w[2], status);
7680          pwd->w[3] = float_class_s(pws->w[3], status);
7681      } else if (df == DF_DOUBLE) {
7682          pwd->d[0] = float_class_d(pws->d[0], status);
7683          pwd->d[1] = float_class_d(pws->d[1], status);
7684      } else {
7685          g_assert_not_reached();
7686      }
7687  }
7688  
7689  #define MSA_FLOAT_UNOP0(DEST, OP, ARG, BITS)                                \
7690      do {                                                                    \
7691          float_status *status = &env->active_tc.msa_fp_status;               \
7692          int c;                                                              \
7693                                                                              \
7694          set_float_exception_flags(0, status);                               \
7695          DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7696          c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7697                                                                              \
7698          if (get_enabled_exceptions(env, c)) {                               \
7699              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7700          } else if (float ## BITS ## _is_any_nan(ARG)) {                     \
7701              DEST = 0;                                                       \
7702          }                                                                   \
7703      } while (0)
7704  
7705  void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7706                              uint32_t ws)
7707  {
7708      wr_t wx, *pwx = &wx;
7709      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7710      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7711      uint32_t i;
7712  
7713      clear_msacsr_cause(env);
7714  
7715      switch (df) {
7716      case DF_WORD:
7717          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7718              MSA_FLOAT_UNOP0(pwx->w[i], to_int32_round_to_zero, pws->w[i], 32);
7719          }
7720          break;
7721      case DF_DOUBLE:
7722          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7723              MSA_FLOAT_UNOP0(pwx->d[i], to_int64_round_to_zero, pws->d[i], 64);
7724          }
7725          break;
7726      default:
7727          g_assert_not_reached();
7728      }
7729  
7730      check_msacsr_cause(env, GETPC());
7731  
7732      msa_move_v(pwd, pwx);
7733  }
7734  
7735  void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7736                              uint32_t ws)
7737  {
7738      wr_t wx, *pwx = &wx;
7739      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7740      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7741      uint32_t i;
7742  
7743      clear_msacsr_cause(env);
7744  
7745      switch (df) {
7746      case DF_WORD:
7747          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7748              MSA_FLOAT_UNOP0(pwx->w[i], to_uint32_round_to_zero, pws->w[i], 32);
7749          }
7750          break;
7751      case DF_DOUBLE:
7752          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7753              MSA_FLOAT_UNOP0(pwx->d[i], to_uint64_round_to_zero, pws->d[i], 64);
7754          }
7755          break;
7756      default:
7757          g_assert_not_reached();
7758      }
7759  
7760      check_msacsr_cause(env, GETPC());
7761  
7762      msa_move_v(pwd, pwx);
7763  }
7764  
7765  void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7766                           uint32_t ws)
7767  {
7768      wr_t wx, *pwx = &wx;
7769      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7770      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7771      uint32_t i;
7772  
7773      clear_msacsr_cause(env);
7774  
7775      switch (df) {
7776      case DF_WORD:
7777          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7778              MSA_FLOAT_UNOP(pwx->w[i], sqrt, pws->w[i], 32);
7779          }
7780          break;
7781      case DF_DOUBLE:
7782          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7783              MSA_FLOAT_UNOP(pwx->d[i], sqrt, pws->d[i], 64);
7784          }
7785          break;
7786      default:
7787          g_assert_not_reached();
7788      }
7789  
7790      check_msacsr_cause(env, GETPC());
7791  
7792      msa_move_v(pwd, pwx);
7793  }
7794  
7795  #define MSA_FLOAT_RECIPROCAL(DEST, ARG, BITS)                               \
7796      do {                                                                    \
7797          float_status *status = &env->active_tc.msa_fp_status;               \
7798          int c;                                                              \
7799                                                                              \
7800          set_float_exception_flags(0, status);                               \
7801          DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG, status);   \
7802          c = update_msacsr(env, float ## BITS ## _is_infinity(ARG) ||        \
7803                            float ## BITS ## _is_quiet_nan(DEST, status) ?    \
7804                            0 : RECIPROCAL_INEXACT,                           \
7805                            IS_DENORMAL(DEST, BITS));                         \
7806                                                                              \
7807          if (get_enabled_exceptions(env, c)) {                               \
7808              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7809          }                                                                   \
7810      } while (0)
7811  
7812  void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7813                            uint32_t ws)
7814  {
7815      wr_t wx, *pwx = &wx;
7816      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7817      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7818      uint32_t i;
7819  
7820      clear_msacsr_cause(env);
7821  
7822      switch (df) {
7823      case DF_WORD:
7824          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7825              MSA_FLOAT_RECIPROCAL(pwx->w[i], float32_sqrt(pws->w[i],
7826                      &env->active_tc.msa_fp_status), 32);
7827          }
7828          break;
7829      case DF_DOUBLE:
7830          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7831              MSA_FLOAT_RECIPROCAL(pwx->d[i], float64_sqrt(pws->d[i],
7832                      &env->active_tc.msa_fp_status), 64);
7833          }
7834          break;
7835      default:
7836          g_assert_not_reached();
7837      }
7838  
7839      check_msacsr_cause(env, GETPC());
7840  
7841      msa_move_v(pwd, pwx);
7842  }
7843  
7844  void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7845                          uint32_t ws)
7846  {
7847      wr_t wx, *pwx = &wx;
7848      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7849      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7850      uint32_t i;
7851  
7852      clear_msacsr_cause(env);
7853  
7854      switch (df) {
7855      case DF_WORD:
7856          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7857              MSA_FLOAT_RECIPROCAL(pwx->w[i], pws->w[i], 32);
7858          }
7859          break;
7860      case DF_DOUBLE:
7861          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7862              MSA_FLOAT_RECIPROCAL(pwx->d[i], pws->d[i], 64);
7863          }
7864          break;
7865      default:
7866          g_assert_not_reached();
7867      }
7868  
7869      check_msacsr_cause(env, GETPC());
7870  
7871      msa_move_v(pwd, pwx);
7872  }
7873  
7874  void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7875                           uint32_t ws)
7876  {
7877      wr_t wx, *pwx = &wx;
7878      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7879      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7880      uint32_t i;
7881  
7882      clear_msacsr_cause(env);
7883  
7884      switch (df) {
7885      case DF_WORD:
7886          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7887              MSA_FLOAT_UNOP(pwx->w[i], round_to_int, pws->w[i], 32);
7888          }
7889          break;
7890      case DF_DOUBLE:
7891          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7892              MSA_FLOAT_UNOP(pwx->d[i], round_to_int, pws->d[i], 64);
7893          }
7894          break;
7895      default:
7896          g_assert_not_reached();
7897      }
7898  
7899      check_msacsr_cause(env, GETPC());
7900  
7901      msa_move_v(pwd, pwx);
7902  }
7903  
7904  #define MSA_FLOAT_LOGB(DEST, ARG, BITS)                                     \
7905      do {                                                                    \
7906          float_status *status = &env->active_tc.msa_fp_status;               \
7907          int c;                                                              \
7908                                                                              \
7909          set_float_exception_flags(0, status);                               \
7910          set_float_rounding_mode(float_round_down, status);                  \
7911          DEST = float ## BITS ## _ ## log2(ARG, status);                     \
7912          DEST = float ## BITS ## _ ## round_to_int(DEST, status);            \
7913          set_float_rounding_mode(ieee_rm[(env->active_tc.msacsr &            \
7914                                           MSACSR_RM_MASK) >> MSACSR_RM],     \
7915                                  status);                                    \
7916                                                                              \
7917          set_float_exception_flags(get_float_exception_flags(status) &       \
7918                                    (~float_flag_inexact),                    \
7919                                    status);                                  \
7920                                                                              \
7921          c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7922                                                                              \
7923          if (get_enabled_exceptions(env, c)) {                               \
7924              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7925          }                                                                   \
7926      } while (0)
7927  
7928  void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7929                           uint32_t ws)
7930  {
7931      wr_t wx, *pwx = &wx;
7932      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7933      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7934      uint32_t i;
7935  
7936      clear_msacsr_cause(env);
7937  
7938      switch (df) {
7939      case DF_WORD:
7940          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7941              MSA_FLOAT_LOGB(pwx->w[i], pws->w[i], 32);
7942          }
7943          break;
7944      case DF_DOUBLE:
7945          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7946              MSA_FLOAT_LOGB(pwx->d[i], pws->d[i], 64);
7947          }
7948          break;
7949      default:
7950          g_assert_not_reached();
7951      }
7952  
7953      check_msacsr_cause(env, GETPC());
7954  
7955      msa_move_v(pwd, pwx);
7956  }
7957  
7958  void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7959                            uint32_t ws)
7960  {
7961      wr_t wx, *pwx = &wx;
7962      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7963      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7964      uint32_t i;
7965  
7966      clear_msacsr_cause(env);
7967  
7968      switch (df) {
7969      case DF_WORD:
7970          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7971              /*
7972               * Half precision floats come in two formats: standard
7973               * IEEE and "ARM" format.  The latter gains extra exponent
7974               * range by omitting the NaN/Inf encodings.
7975               */
7976              bool ieee = true;
7977  
7978              MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32);
7979          }
7980          break;
7981      case DF_DOUBLE:
7982          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7983              MSA_FLOAT_UNOP(pwx->d[i], from_float32, Lw(pws, i), 64);
7984          }
7985          break;
7986      default:
7987          g_assert_not_reached();
7988      }
7989  
7990      check_msacsr_cause(env, GETPC());
7991      msa_move_v(pwd, pwx);
7992  }
7993  
7994  void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7995                            uint32_t ws)
7996  {
7997      wr_t wx, *pwx = &wx;
7998      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7999      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8000      uint32_t i;
8001  
8002      clear_msacsr_cause(env);
8003  
8004      switch (df) {
8005      case DF_WORD:
8006          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8007              /*
8008               * Half precision floats come in two formats: standard
8009               * IEEE and "ARM" format.  The latter gains extra exponent
8010               * range by omitting the NaN/Inf encodings.
8011               */
8012              bool ieee = true;
8013  
8014              MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32);
8015          }
8016          break;
8017      case DF_DOUBLE:
8018          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8019              MSA_FLOAT_UNOP(pwx->d[i], from_float32, Rw(pws, i), 64);
8020          }
8021          break;
8022      default:
8023          g_assert_not_reached();
8024      }
8025  
8026      check_msacsr_cause(env, GETPC());
8027      msa_move_v(pwd, pwx);
8028  }
8029  
8030  void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8031                          uint32_t ws)
8032  {
8033      wr_t wx, *pwx = &wx;
8034      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8035      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8036      uint32_t i;
8037  
8038      switch (df) {
8039      case DF_WORD:
8040          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8041              MSA_FLOAT_UNOP(pwx->w[i], from_q16, Lh(pws, i), 32);
8042          }
8043          break;
8044      case DF_DOUBLE:
8045          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8046              MSA_FLOAT_UNOP(pwx->d[i], from_q32, Lw(pws, i), 64);
8047          }
8048          break;
8049      default:
8050          g_assert_not_reached();
8051      }
8052  
8053      msa_move_v(pwd, pwx);
8054  }
8055  
8056  void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8057                          uint32_t ws)
8058  {
8059      wr_t wx, *pwx = &wx;
8060      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8061      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8062      uint32_t i;
8063  
8064      switch (df) {
8065      case DF_WORD:
8066          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8067              MSA_FLOAT_UNOP(pwx->w[i], from_q16, Rh(pws, i), 32);
8068          }
8069          break;
8070      case DF_DOUBLE:
8071          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8072              MSA_FLOAT_UNOP(pwx->d[i], from_q32, Rw(pws, i), 64);
8073          }
8074          break;
8075      default:
8076          g_assert_not_reached();
8077      }
8078  
8079      msa_move_v(pwd, pwx);
8080  }
8081  
8082  void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8083                             uint32_t ws)
8084  {
8085      wr_t wx, *pwx = &wx;
8086      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8087      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8088      uint32_t i;
8089  
8090      clear_msacsr_cause(env);
8091  
8092      switch (df) {
8093      case DF_WORD:
8094          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8095              MSA_FLOAT_UNOP0(pwx->w[i], to_int32, pws->w[i], 32);
8096          }
8097          break;
8098      case DF_DOUBLE:
8099          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8100              MSA_FLOAT_UNOP0(pwx->d[i], to_int64, pws->d[i], 64);
8101          }
8102          break;
8103      default:
8104          g_assert_not_reached();
8105      }
8106  
8107      check_msacsr_cause(env, GETPC());
8108  
8109      msa_move_v(pwd, pwx);
8110  }
8111  
8112  void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8113                             uint32_t ws)
8114  {
8115      wr_t wx, *pwx = &wx;
8116      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8117      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8118      uint32_t i;
8119  
8120      clear_msacsr_cause(env);
8121  
8122      switch (df) {
8123      case DF_WORD:
8124          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8125              MSA_FLOAT_UNOP0(pwx->w[i], to_uint32, pws->w[i], 32);
8126          }
8127          break;
8128      case DF_DOUBLE:
8129          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8130              MSA_FLOAT_UNOP0(pwx->d[i], to_uint64, pws->d[i], 64);
8131          }
8132          break;
8133      default:
8134          g_assert_not_reached();
8135      }
8136  
8137      check_msacsr_cause(env, GETPC());
8138  
8139      msa_move_v(pwd, pwx);
8140  }
8141  
8142  #define float32_from_int32 int32_to_float32
8143  #define float32_from_uint32 uint32_to_float32
8144  
8145  #define float64_from_int64 int64_to_float64
8146  #define float64_from_uint64 uint64_to_float64
8147  
8148  void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8149                             uint32_t ws)
8150  {
8151      wr_t wx, *pwx = &wx;
8152      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8153      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8154      uint32_t i;
8155  
8156      clear_msacsr_cause(env);
8157  
8158      switch (df) {
8159      case DF_WORD:
8160          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8161              MSA_FLOAT_UNOP(pwx->w[i], from_int32, pws->w[i], 32);
8162          }
8163          break;
8164      case DF_DOUBLE:
8165          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8166              MSA_FLOAT_UNOP(pwx->d[i], from_int64, pws->d[i], 64);
8167          }
8168          break;
8169      default:
8170          g_assert_not_reached();
8171      }
8172  
8173      check_msacsr_cause(env, GETPC());
8174  
8175      msa_move_v(pwd, pwx);
8176  }
8177  
8178  void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8179                             uint32_t ws)
8180  {
8181      wr_t wx, *pwx = &wx;
8182      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8183      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8184      uint32_t i;
8185  
8186      clear_msacsr_cause(env);
8187  
8188      switch (df) {
8189      case DF_WORD:
8190          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8191              MSA_FLOAT_UNOP(pwx->w[i], from_uint32, pws->w[i], 32);
8192          }
8193          break;
8194      case DF_DOUBLE:
8195          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8196              MSA_FLOAT_UNOP(pwx->d[i], from_uint64, pws->d[i], 64);
8197          }
8198          break;
8199      default:
8200          g_assert_not_reached();
8201      }
8202  
8203      check_msacsr_cause(env, GETPC());
8204  
8205      msa_move_v(pwd, pwx);
8206  }
8207  
8208  /* Data format min and max values */
8209  #define DF_BITS(df) (1 << ((df) + 3))
8210  
8211  /* Element-by-element access macros */
8212  #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
8213  
8214  #if !defined(CONFIG_USER_ONLY)
8215  #define MEMOP_IDX(DF)                                                   \
8216      MemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN,                 \
8217                                   mips_env_mmu_index(env));
8218  #else
8219  #define MEMOP_IDX(DF)
8220  #endif
8221  
8222  #if TARGET_BIG_ENDIAN
8223  static inline uint64_t bswap16x4(uint64_t x)
8224  {
8225      uint64_t m = 0x00ff00ff00ff00ffull;
8226      return ((x & m) << 8) | ((x >> 8) & m);
8227  }
8228  
8229  static inline uint64_t bswap32x2(uint64_t x)
8230  {
8231      return ror64(bswap64(x), 32);
8232  }
8233  #endif
8234  
8235  void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd,
8236                       target_ulong addr)
8237  {
8238      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8239      uintptr_t ra = GETPC();
8240      uint64_t d0, d1;
8241  
8242      /* Load 8 bytes at a time.  Vector element ordering makes this LE.  */
8243      d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8244      d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8245      pwd->d[0] = d0;
8246      pwd->d[1] = d1;
8247  }
8248  
8249  void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
8250                       target_ulong addr)
8251  {
8252      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8253      uintptr_t ra = GETPC();
8254      uint64_t d0, d1;
8255  
8256      /*
8257       * Load 8 bytes at a time.  Use little-endian load, then for
8258       * big-endian target, we must then swap the four halfwords.
8259       */
8260      d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8261      d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8262  #if TARGET_BIG_ENDIAN
8263      d0 = bswap16x4(d0);
8264      d1 = bswap16x4(d1);
8265  #endif
8266      pwd->d[0] = d0;
8267      pwd->d[1] = d1;
8268  }
8269  
8270  void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
8271                       target_ulong addr)
8272  {
8273      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8274      uintptr_t ra = GETPC();
8275      uint64_t d0, d1;
8276  
8277      /*
8278       * Load 8 bytes at a time.  Use little-endian load, then for
8279       * big-endian target, we must then bswap the two words.
8280       */
8281      d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8282      d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8283  #if TARGET_BIG_ENDIAN
8284      d0 = bswap32x2(d0);
8285      d1 = bswap32x2(d1);
8286  #endif
8287      pwd->d[0] = d0;
8288      pwd->d[1] = d1;
8289  }
8290  
8291  void helper_msa_ld_d(CPUMIPSState *env, uint32_t wd,
8292                       target_ulong addr)
8293  {
8294      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8295      uintptr_t ra = GETPC();
8296      uint64_t d0, d1;
8297  
8298      d0 = cpu_ldq_data_ra(env, addr + 0, ra);
8299      d1 = cpu_ldq_data_ra(env, addr + 8, ra);
8300      pwd->d[0] = d0;
8301      pwd->d[1] = d1;
8302  }
8303  
8304  #define MSA_PAGESPAN(x) \
8305          ((((x) & ~TARGET_PAGE_MASK) + MSA_WRLEN / 8 - 1) >= TARGET_PAGE_SIZE)
8306  
8307  static inline void ensure_writable_pages(CPUMIPSState *env,
8308                                           target_ulong addr,
8309                                           int mmu_idx,
8310                                           uintptr_t retaddr)
8311  {
8312      /* FIXME: Probe the actual accesses (pass and use a size) */
8313      if (unlikely(MSA_PAGESPAN(addr))) {
8314          /* first page */
8315          probe_write(env, addr, 0, mmu_idx, retaddr);
8316          /* second page */
8317          addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
8318          probe_write(env, addr, 0, mmu_idx, retaddr);
8319      }
8320  }
8321  
8322  void helper_msa_st_b(CPUMIPSState *env, uint32_t wd,
8323                       target_ulong addr)
8324  {
8325      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8326      int mmu_idx = mips_env_mmu_index(env);
8327      uintptr_t ra = GETPC();
8328  
8329      ensure_writable_pages(env, addr, mmu_idx, ra);
8330  
8331      /* Store 8 bytes at a time.  Vector element ordering makes this LE.  */
8332      cpu_stq_le_data_ra(env, addr + 0, pwd->d[0], ra);
8333      cpu_stq_le_data_ra(env, addr + 8, pwd->d[1], ra);
8334  }
8335  
8336  void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
8337                       target_ulong addr)
8338  {
8339      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8340      int mmu_idx = mips_env_mmu_index(env);
8341      uintptr_t ra = GETPC();
8342      uint64_t d0, d1;
8343  
8344      ensure_writable_pages(env, addr, mmu_idx, ra);
8345  
8346      /* Store 8 bytes at a time.  See helper_msa_ld_h. */
8347      d0 = pwd->d[0];
8348      d1 = pwd->d[1];
8349  #if TARGET_BIG_ENDIAN
8350      d0 = bswap16x4(d0);
8351      d1 = bswap16x4(d1);
8352  #endif
8353      cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8354      cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8355  }
8356  
8357  void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
8358                       target_ulong addr)
8359  {
8360      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8361      int mmu_idx = mips_env_mmu_index(env);
8362      uintptr_t ra = GETPC();
8363      uint64_t d0, d1;
8364  
8365      ensure_writable_pages(env, addr, mmu_idx, ra);
8366  
8367      /* Store 8 bytes at a time.  See helper_msa_ld_w. */
8368      d0 = pwd->d[0];
8369      d1 = pwd->d[1];
8370  #if TARGET_BIG_ENDIAN
8371      d0 = bswap32x2(d0);
8372      d1 = bswap32x2(d1);
8373  #endif
8374      cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8375      cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8376  }
8377  
8378  void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
8379                       target_ulong addr)
8380  {
8381      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8382      int mmu_idx = mips_env_mmu_index(env);
8383      uintptr_t ra = GETPC();
8384  
8385      ensure_writable_pages(env, addr, mmu_idx, GETPC());
8386  
8387      cpu_stq_data_ra(env, addr + 0, pwd->d[0], ra);
8388      cpu_stq_data_ra(env, addr + 8, pwd->d[1], ra);
8389  }
8390