xref: /openbmc/qemu/target/ppc/int_helper.c (revision 93b799fafd9170da3a79a533ea6f73a18de82e22)
1  /*
2   *  PowerPC integer and vector emulation helpers for QEMU.
3   *
4   *  Copyright (c) 2003-2007 Jocelyn Mayer
5   *
6   * This library is free software; you can redistribute it and/or
7   * modify it under the terms of the GNU Lesser General Public
8   * License as published by the Free Software Foundation; either
9   * version 2.1 of the License, or (at your option) any later version.
10   *
11   * This library is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   * Lesser General Public License for more details.
15   *
16   * You should have received a copy of the GNU Lesser General Public
17   * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18   */
19  
20  #include "qemu/osdep.h"
21  #include "cpu.h"
22  #include "internal.h"
23  #include "qemu/host-utils.h"
24  #include "qemu/log.h"
25  #include "exec/helper-proto.h"
26  #include "crypto/aes.h"
27  #include "crypto/aes-round.h"
28  #include "crypto/clmul.h"
29  #include "fpu/softfloat.h"
30  #include "qapi/error.h"
31  #include "qemu/guest-random.h"
32  #include "tcg/tcg-gvec-desc.h"
33  
34  #include "helper_regs.h"
35  /*****************************************************************************/
36  /* Fixed point operations helpers */
37  
helper_update_ov_legacy(CPUPPCState * env,int ov)38  static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
39  {
40      if (unlikely(ov)) {
41          env->so = env->ov = env->ov32 = 1;
42      } else {
43          env->ov = env->ov32 = 0;
44      }
45  }
46  
helper_DIVWEU(CPUPPCState * env,target_ulong ra,target_ulong rb,uint32_t oe)47  target_ulong helper_DIVWEU(CPUPPCState *env, target_ulong ra, target_ulong rb,
48                             uint32_t oe)
49  {
50      uint64_t rt = 0;
51      int overflow = 0;
52  
53      uint64_t dividend = (uint64_t)ra << 32;
54      uint64_t divisor = (uint32_t)rb;
55  
56      if (unlikely(divisor == 0)) {
57          overflow = 1;
58      } else {
59          rt = dividend / divisor;
60          overflow = rt > UINT32_MAX;
61      }
62  
63      if (unlikely(overflow)) {
64          rt = 0; /* Undefined */
65      }
66  
67      if (oe) {
68          helper_update_ov_legacy(env, overflow);
69      }
70  
71      return (target_ulong)rt;
72  }
73  
helper_DIVWE(CPUPPCState * env,target_ulong ra,target_ulong rb,uint32_t oe)74  target_ulong helper_DIVWE(CPUPPCState *env, target_ulong ra, target_ulong rb,
75                            uint32_t oe)
76  {
77      int64_t rt = 0;
78      int overflow = 0;
79  
80      int64_t dividend = (int64_t)ra << 32;
81      int64_t divisor = (int64_t)((int32_t)rb);
82  
83      if (unlikely((divisor == 0) ||
84                   ((divisor == -1ull) && (dividend == INT64_MIN)))) {
85          overflow = 1;
86      } else {
87          rt = dividend / divisor;
88          overflow = rt != (int32_t)rt;
89      }
90  
91      if (unlikely(overflow)) {
92          rt = 0; /* Undefined */
93      }
94  
95      if (oe) {
96          helper_update_ov_legacy(env, overflow);
97      }
98  
99      return (target_ulong)rt;
100  }
101  
102  #if defined(TARGET_PPC64)
103  
helper_DIVDEU(CPUPPCState * env,uint64_t ra,uint64_t rb,uint32_t oe)104  uint64_t helper_DIVDEU(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
105  {
106      uint64_t rt = 0;
107      int overflow = 0;
108  
109      if (unlikely(rb == 0 || ra >= rb)) {
110          overflow = 1;
111          rt = 0; /* Undefined */
112      } else {
113          divu128(&rt, &ra, rb);
114      }
115  
116      if (oe) {
117          helper_update_ov_legacy(env, overflow);
118      }
119  
120      return rt;
121  }
122  
helper_DIVDE(CPUPPCState * env,uint64_t rau,uint64_t rbu,uint32_t oe)123  uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
124  {
125      uint64_t rt = 0;
126      int64_t ra = (int64_t)rau;
127      int64_t rb = (int64_t)rbu;
128      int overflow = 0;
129  
130      if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
131          overflow = 1;
132          rt = 0; /* Undefined */
133      } else {
134          divs128(&rt, &ra, rb);
135      }
136  
137      if (oe) {
138          helper_update_ov_legacy(env, overflow);
139      }
140  
141      return rt;
142  }
143  
144  #endif
145  
146  
147  #if defined(TARGET_PPC64)
148  /* if x = 0xab, returns 0xababababababababa */
149  #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
150  
151  /*
152   * subtract 1 from each byte, and with inverse, check if MSB is set at each
153   * byte.
154   * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
155   *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
156   */
157  #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
158  
159  /* When you XOR the pattern and there is a match, that byte will be zero */
160  #define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
161  
helper_CMPEQB(target_ulong ra,target_ulong rb)162  uint32_t helper_CMPEQB(target_ulong ra, target_ulong rb)
163  {
164      return hasvalue(rb, ra) ? CRF_GT : 0;
165  }
166  
167  #undef pattern
168  #undef haszero
169  #undef hasvalue
170  
171  /*
172   * Return a random number.
173   */
helper_DARN32(void)174  uint64_t helper_DARN32(void)
175  {
176      Error *err = NULL;
177      uint32_t ret;
178  
179      if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
180          qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
181                        error_get_pretty(err));
182          error_free(err);
183          return -1;
184      }
185  
186      return ret;
187  }
188  
helper_DARN64(void)189  uint64_t helper_DARN64(void)
190  {
191      Error *err = NULL;
192      uint64_t ret;
193  
194      if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
195          qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
196                        error_get_pretty(err));
197          error_free(err);
198          return -1;
199      }
200  
201      return ret;
202  }
203  
helper_BPERMD(uint64_t rs,uint64_t rb)204  uint64_t helper_BPERMD(uint64_t rs, uint64_t rb)
205  {
206      int i;
207      uint64_t ra = 0;
208  
209      for (i = 0; i < 8; i++) {
210          int index = (rs >> (i * 8)) & 0xFF;
211          if (index < 64) {
212              if (rb & PPC_BIT(index)) {
213                  ra |= 1 << i;
214              }
215          }
216      }
217      return ra;
218  }
219  
220  #endif
221  
helper_CMPB(target_ulong rs,target_ulong rb)222  target_ulong helper_CMPB(target_ulong rs, target_ulong rb)
223  {
224      target_ulong mask = 0xff;
225      target_ulong ra = 0;
226      int i;
227  
228      for (i = 0; i < sizeof(target_ulong); i++) {
229          if ((rs & mask) == (rb & mask)) {
230              ra |= mask;
231          }
232          mask <<= 8;
233      }
234      return ra;
235  }
236  
237  /* shift right arithmetic helper */
helper_sraw(CPUPPCState * env,target_ulong value,target_ulong shift)238  target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
239                           target_ulong shift)
240  {
241      int32_t ret;
242  
243      if (likely(!(shift & 0x20))) {
244          if (likely((uint32_t)shift != 0)) {
245              shift &= 0x1f;
246              ret = (int32_t)value >> shift;
247              if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
248                  env->ca32 = env->ca = 0;
249              } else {
250                  env->ca32 = env->ca = 1;
251              }
252          } else {
253              ret = (int32_t)value;
254              env->ca32 = env->ca = 0;
255          }
256      } else {
257          ret = (int32_t)value >> 31;
258          env->ca32 = env->ca = (ret != 0);
259      }
260      return (target_long)ret;
261  }
262  
263  #if defined(TARGET_PPC64)
helper_srad(CPUPPCState * env,target_ulong value,target_ulong shift)264  target_ulong helper_srad(CPUPPCState *env, target_ulong value,
265                           target_ulong shift)
266  {
267      int64_t ret;
268  
269      if (likely(!(shift & 0x40))) {
270          if (likely((uint64_t)shift != 0)) {
271              shift &= 0x3f;
272              ret = (int64_t)value >> shift;
273              if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
274                  env->ca32 = env->ca = 0;
275              } else {
276                  env->ca32 = env->ca = 1;
277              }
278          } else {
279              ret = (int64_t)value;
280              env->ca32 = env->ca = 0;
281          }
282      } else {
283          ret = (int64_t)value >> 63;
284          env->ca32 = env->ca = (ret != 0);
285      }
286      return ret;
287  }
288  #endif
289  
290  #if defined(TARGET_PPC64)
helper_POPCNTB(target_ulong val)291  target_ulong helper_POPCNTB(target_ulong val)
292  {
293      /* Note that we don't fold past bytes */
294      val = (val & 0x5555555555555555ULL) + ((val >>  1) &
295                                             0x5555555555555555ULL);
296      val = (val & 0x3333333333333333ULL) + ((val >>  2) &
297                                             0x3333333333333333ULL);
298      val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
299                                             0x0f0f0f0f0f0f0f0fULL);
300      return val;
301  }
302  
helper_POPCNTW(target_ulong val)303  target_ulong helper_POPCNTW(target_ulong val)
304  {
305      /* Note that we don't fold past words.  */
306      val = (val & 0x5555555555555555ULL) + ((val >>  1) &
307                                             0x5555555555555555ULL);
308      val = (val & 0x3333333333333333ULL) + ((val >>  2) &
309                                             0x3333333333333333ULL);
310      val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
311                                             0x0f0f0f0f0f0f0f0fULL);
312      val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
313                                             0x00ff00ff00ff00ffULL);
314      val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
315                                             0x0000ffff0000ffffULL);
316      return val;
317  }
318  #else
helper_POPCNTB(target_ulong val)319  target_ulong helper_POPCNTB(target_ulong val)
320  {
321      /* Note that we don't fold past bytes */
322      val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
323      val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
324      val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
325      return val;
326  }
327  #endif
328  
helper_CFUGED(uint64_t src,uint64_t mask)329  uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
330  {
331      /*
332       * Instead of processing the mask bit-by-bit from the most significant to
333       * the least significant bit, as described in PowerISA, we'll handle it in
334       * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
335       * ctz or cto, we negate the mask at the end of the loop.
336       */
337      target_ulong m, left = 0, right = 0;
338      unsigned int n, i = 64;
339      bool bit = false; /* tracks if we are processing zeros or ones */
340  
341      if (mask == 0 || mask == -1) {
342          return src;
343      }
344  
345      /* Processes the mask in blocks, from LSB to MSB */
346      while (i) {
347          /* Find how many bits we should take */
348          n = ctz64(mask);
349          if (n > i) {
350              n = i;
351          }
352  
353          /*
354           * Extracts 'n' trailing bits of src and put them on the leading 'n'
355           * bits of 'right' or 'left', pushing down the previously extracted
356           * values.
357           */
358          m = (1ll << n) - 1;
359          if (bit) {
360              right = ror64(right | (src & m), n);
361          } else {
362              left = ror64(left | (src & m), n);
363          }
364  
365          /*
366           * Discards the processed bits from 'src' and 'mask'. Note that we are
367           * removing 'n' trailing zeros from 'mask', but the logical shift will
368           * add 'n' leading zeros back, so the population count of 'mask' is kept
369           * the same.
370           */
371          src >>= n;
372          mask >>= n;
373          i -= n;
374          bit = !bit;
375          mask = ~mask;
376      }
377  
378      /*
379       * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
380       * we'll shift it more 64-ctpop(mask) times.
381       */
382      if (bit) {
383          n = ctpop64(mask);
384      } else {
385          n = 64 - ctpop64(mask);
386      }
387  
388      return left | (right >> n);
389  }
390  
helper_PDEPD(uint64_t src,uint64_t mask)391  uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
392  {
393      int i, o;
394      uint64_t result = 0;
395  
396      if (mask == -1) {
397          return src;
398      }
399  
400      for (i = 0; mask != 0; i++) {
401          o = ctz64(mask);
402          mask &= mask - 1;
403          result |= ((src >> i) & 1) << o;
404      }
405  
406      return result;
407  }
408  
helper_PEXTD(uint64_t src,uint64_t mask)409  uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
410  {
411      int i, o;
412      uint64_t result = 0;
413  
414      if (mask == -1) {
415          return src;
416      }
417  
418      for (o = 0; mask != 0; o++) {
419          i = ctz64(mask);
420          mask &= mask - 1;
421          result |= ((src >> i) & 1) << o;
422      }
423  
424      return result;
425  }
426  
427  /*****************************************************************************/
428  /* Altivec extension helpers */
429  #if HOST_BIG_ENDIAN
430  #define VECTOR_FOR_INORDER_I(index, element)                    \
431      for (index = 0; index < ARRAY_SIZE(r->element); index++)
432  #else
433  #define VECTOR_FOR_INORDER_I(index, element)                    \
434      for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
435  #endif
436  
437  /* Saturating arithmetic helpers.  */
438  #define SATCVT(from, to, from_type, to_type, min, max)          \
439      static inline to_type cvt##from##to(from_type x, int *sat)  \
440      {                                                           \
441          to_type r;                                              \
442                                                                  \
443          if (x < (from_type)min) {                               \
444              r = min;                                            \
445              *sat = 1;                                           \
446          } else if (x > (from_type)max) {                        \
447              r = max;                                            \
448              *sat = 1;                                           \
449          } else {                                                \
450              r = x;                                              \
451          }                                                       \
452          return r;                                               \
453      }
454  #define SATCVTU(from, to, from_type, to_type, min, max)         \
455      static inline to_type cvt##from##to(from_type x, int *sat)  \
456      {                                                           \
457          to_type r;                                              \
458                                                                  \
459          if (x > (from_type)max) {                               \
460              r = max;                                            \
461              *sat = 1;                                           \
462          } else {                                                \
463              r = x;                                              \
464          }                                                       \
465          return r;                                               \
466      }
SATCVT(sh,sb,int16_t,int8_t,INT8_MIN,INT8_MAX)467  SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
468  SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
469  SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
470  
471  SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
472  SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
473  SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
474  SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
475  SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
476  SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
477  #undef SATCVT
478  #undef SATCVTU
479  
480  void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
481  {
482      ppc_store_vscr(env, vscr);
483  }
484  
helper_mfvscr(CPUPPCState * env)485  uint32_t helper_mfvscr(CPUPPCState *env)
486  {
487      return ppc_get_vscr(env);
488  }
489  
set_vscr_sat(CPUPPCState * env)490  static inline void set_vscr_sat(CPUPPCState *env)
491  {
492      /* The choice of non-zero value is arbitrary.  */
493      env->vscr_sat.u32[0] = 1;
494  }
495  
496  /* vprtybq */
helper_VPRTYBQ(ppc_avr_t * r,ppc_avr_t * b,uint32_t v)497  void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v)
498  {
499      uint64_t res = b->u64[0] ^ b->u64[1];
500      res ^= res >> 32;
501      res ^= res >> 16;
502      res ^= res >> 8;
503      r->VsrD(1) = res & 1;
504      r->VsrD(0) = 0;
505  }
506  
507  #define VARITHFP(suffix, func)                                          \
508      void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
509                            ppc_avr_t *b)                                 \
510      {                                                                   \
511          int i;                                                          \
512                                                                          \
513          for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
514              r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status);   \
515          }                                                               \
516      }
517  VARITHFP(addfp, float32_add)
518  VARITHFP(subfp, float32_sub)
519  VARITHFP(minfp, float32_min)
520  VARITHFP(maxfp, float32_max)
521  #undef VARITHFP
522  
523  #define VARITHFPFMA(suffix, type)                                       \
524      void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
525                             ppc_avr_t *b, ppc_avr_t *c)                  \
526      {                                                                   \
527          int i;                                                          \
528          for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
529              r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
530                                         type, &env->vec_status);         \
531          }                                                               \
532      }
533  VARITHFPFMA(maddfp, 0);
534  VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
535  #undef VARITHFPFMA
536  
537  #define VARITHSAT_CASE(type, op, cvt, element)                          \
538      {                                                                   \
539          type result = (type)a->element[i] op (type)b->element[i];       \
540          r->element[i] = cvt(result, &sat);                              \
541      }
542  
543  #define VARITHSAT_DO(name, op, optype, cvt, element)                    \
544      void helper_V##name(ppc_avr_t *r, ppc_avr_t *vscr_sat,              \
545                          ppc_avr_t *a, ppc_avr_t *b, uint32_t desc)      \
546      {                                                                   \
547          int sat = 0;                                                    \
548          int i;                                                          \
549                                                                          \
550          for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
551              VARITHSAT_CASE(optype, op, cvt, element);                   \
552          }                                                               \
553          if (sat) {                                                      \
554              vscr_sat->u32[0] = 1;                                       \
555          }                                                               \
556      }
557  #define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
558      VARITHSAT_DO(ADDS##suffix##S, +, optype, cvt, element)      \
559      VARITHSAT_DO(SUBS##suffix##S, -, optype, cvt, element)
560  #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
561      VARITHSAT_DO(ADDU##suffix##S, +, optype, cvt, element)      \
562      VARITHSAT_DO(SUBU##suffix##S, -, optype, cvt, element)
VARITHSAT_SIGNED(B,s8,int16_t,cvtshsb)563  VARITHSAT_SIGNED(B, s8, int16_t, cvtshsb)
564  VARITHSAT_SIGNED(H, s16, int32_t, cvtswsh)
565  VARITHSAT_SIGNED(W, s32, int64_t, cvtsdsw)
566  VARITHSAT_UNSIGNED(B, u8, uint16_t, cvtshub)
567  VARITHSAT_UNSIGNED(H, u16, uint32_t, cvtswuh)
568  VARITHSAT_UNSIGNED(W, u32, uint64_t, cvtsduw)
569  #undef VARITHSAT_CASE
570  #undef VARITHSAT_DO
571  #undef VARITHSAT_SIGNED
572  #undef VARITHSAT_UNSIGNED
573  
574  #define VAVG(name, element, etype)                                          \
575      void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
576      {                                                                       \
577          int i;                                                              \
578                                                                              \
579          for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
580              etype x = (etype)a->element[i] + (etype)b->element[i] + 1;      \
581              r->element[i] = x >> 1;                                         \
582          }                                                                   \
583      }
584  
585  VAVG(VAVGSB, s8, int16_t)
586  VAVG(VAVGUB, u8, uint16_t)
587  VAVG(VAVGSH, s16, int32_t)
588  VAVG(VAVGUH, u16, uint32_t)
589  VAVG(VAVGSW, s32, int64_t)
590  VAVG(VAVGUW, u32, uint64_t)
591  #undef VAVG
592  
593  #define VABSDU(name, element)                                           \
594  void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
595  {                                                                       \
596      int i;                                                              \
597                                                                          \
598      for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
599          r->element[i] = (a->element[i] > b->element[i]) ?               \
600              (a->element[i] - b->element[i]) :                           \
601              (b->element[i] - a->element[i]);                            \
602      }                                                                   \
603  }
604  
605  /*
606   * VABSDU - Vector absolute difference unsigned
607   *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
608   *   element - element type to access from vector
609   */
610  VABSDU(VABSDUB, u8)
611  VABSDU(VABSDUH, u16)
612  VABSDU(VABSDUW, u32)
613  #undef VABSDU
614  
615  #define VCF(suffix, cvt, element)                                       \
616      void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
617                              ppc_avr_t *b, uint32_t uim)                 \
618      {                                                                   \
619          int i;                                                          \
620                                                                          \
621          for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
622              float32 t = cvt(b->element[i], &env->vec_status);           \
623              r->f32[i] = float32_scalbn(t, -uim, &env->vec_status);      \
624          }                                                               \
625      }
626  VCF(ux, uint32_to_float32, u32)
627  VCF(sx, int32_to_float32, s32)
628  #undef VCF
629  
630  #define VCMPNEZ(NAME, ELEM) \
631  void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
632  {                                                                           \
633      for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) {                         \
634          t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) ||             \
635                        (a->ELEM[i] != b->ELEM[i])) ? -1 : 0;                 \
636      }                                                                       \
637  }
638  VCMPNEZ(VCMPNEZB, u8)
639  VCMPNEZ(VCMPNEZH, u16)
640  VCMPNEZ(VCMPNEZW, u32)
641  #undef VCMPNEZ
642  
643  #define VCMPFP_DO(suffix, compare, order, record)                       \
644      void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
645                               ppc_avr_t *a, ppc_avr_t *b)                \
646      {                                                                   \
647          uint32_t ones = (uint32_t)-1;                                   \
648          uint32_t all = ones;                                            \
649          uint32_t none = 0;                                              \
650          int i;                                                          \
651                                                                          \
652          for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
653              uint32_t result;                                            \
654              FloatRelation rel =                                         \
655                  float32_compare_quiet(a->f32[i], b->f32[i],             \
656                                        &env->vec_status);                \
657              if (rel == float_relation_unordered) {                      \
658                  result = 0;                                             \
659              } else if (rel compare order) {                             \
660                  result = ones;                                          \
661              } else {                                                    \
662                  result = 0;                                             \
663              }                                                           \
664              r->u32[i] = result;                                         \
665              all &= result;                                              \
666              none |= result;                                             \
667          }                                                               \
668          if (record) {                                                   \
669              env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
670          }                                                               \
671      }
672  #define VCMPFP(suffix, compare, order)          \
673      VCMPFP_DO(suffix, compare, order, 0)        \
674      VCMPFP_DO(suffix##_dot, compare, order, 1)
675  VCMPFP(eqfp, ==, float_relation_equal)
676  VCMPFP(gefp, !=, float_relation_less)
677  VCMPFP(gtfp, ==, float_relation_greater)
678  #undef VCMPFP_DO
679  #undef VCMPFP
680  
681  static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
682                                      ppc_avr_t *a, ppc_avr_t *b, int record)
683  {
684      int i;
685      int all_in = 0;
686  
687      for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
688          FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
689                                                       &env->vec_status);
690          if (le_rel == float_relation_unordered) {
691              r->u32[i] = 0xc0000000;
692              all_in = 1;
693          } else {
694              float32 bneg = float32_chs(b->f32[i]);
695              FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
696                                                           &env->vec_status);
697              int le = le_rel != float_relation_greater;
698              int ge = ge_rel != float_relation_less;
699  
700              r->u32[i] = ((!le) << 31) | ((!ge) << 30);
701              all_in |= (!le | !ge);
702          }
703      }
704      if (record) {
705          env->crf[6] = (all_in == 0) << 1;
706      }
707  }
708  
helper_vcmpbfp(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)709  void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
710  {
711      vcmpbfp_internal(env, r, a, b, 0);
712  }
713  
helper_vcmpbfp_dot(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)714  void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
715                          ppc_avr_t *b)
716  {
717      vcmpbfp_internal(env, r, a, b, 1);
718  }
719  
720  #define VCT(suffix, satcvt, element)                                    \
721      void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
722                              ppc_avr_t *b, uint32_t uim)                 \
723      {                                                                   \
724          int i;                                                          \
725          int sat = 0;                                                    \
726          float_status s = env->vec_status;                               \
727                                                                          \
728          set_float_rounding_mode(float_round_to_zero, &s);               \
729          for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
730              if (float32_is_any_nan(b->f32[i])) {                        \
731                  r->element[i] = 0;                                      \
732              } else {                                                    \
733                  float64 t = float32_to_float64(b->f32[i], &s);          \
734                  int64_t j;                                              \
735                                                                          \
736                  t = float64_scalbn(t, uim, &s);                         \
737                  j = float64_to_int64(t, &s);                            \
738                  r->element[i] = satcvt(j, &sat);                        \
739              }                                                           \
740          }                                                               \
741          if (sat) {                                                      \
742              set_vscr_sat(env);                                          \
743          }                                                               \
744      }
745  VCT(uxs, cvtsduw, u32)
746  VCT(sxs, cvtsdsw, s32)
747  #undef VCT
748  
749  typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
750  
ger_rank8(uint32_t a,uint32_t b,uint32_t mask)751  static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
752  {
753      int64_t psum = 0;
754      for (int i = 0; i < 8; i++, mask >>= 1) {
755          if (mask & 1) {
756              psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
757          }
758      }
759      return psum;
760  }
761  
ger_rank4(uint32_t a,uint32_t b,uint32_t mask)762  static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
763  {
764      int64_t psum = 0;
765      for (int i = 0; i < 4; i++, mask >>= 1) {
766          if (mask & 1) {
767              psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
768          }
769      }
770      return psum;
771  }
772  
ger_rank2(uint32_t a,uint32_t b,uint32_t mask)773  static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
774  {
775      int64_t psum = 0;
776      for (int i = 0; i < 2; i++, mask >>= 1) {
777          if (mask & 1) {
778              psum += (int64_t)sextract32(a, 16 * i, 16) *
779                               sextract32(b, 16 * i, 16);
780          }
781      }
782      return psum;
783  }
784  
xviger(CPUPPCState * env,ppc_vsr_t * a,ppc_vsr_t * b,ppc_acc_t * at,uint32_t mask,bool sat,bool acc,do_ger ger)785  static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t  *at,
786                     uint32_t mask, bool sat, bool acc, do_ger ger)
787  {
788      uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
789              xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
790              ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
791      uint8_t xmsk_bit, ymsk_bit;
792      int64_t psum;
793      int i, j;
794      for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
795          for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
796              if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
797                  psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
798                  if (acc) {
799                      psum += at[i].VsrSW(j);
800                  }
801                  if (sat && psum > INT32_MAX) {
802                      set_vscr_sat(env);
803                      at[i].VsrSW(j) = INT32_MAX;
804                  } else if (sat && psum < INT32_MIN) {
805                      set_vscr_sat(env);
806                      at[i].VsrSW(j) = INT32_MIN;
807                  } else {
808                      at[i].VsrSW(j) = (int32_t) psum;
809                  }
810              } else {
811                  at[i].VsrSW(j) = 0;
812              }
813          }
814      }
815  }
816  
817  QEMU_FLATTEN
helper_XVI4GER8(CPUPPCState * env,ppc_vsr_t * a,ppc_vsr_t * b,ppc_acc_t * at,uint32_t mask)818  void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
819                       ppc_acc_t *at, uint32_t mask)
820  {
821      xviger(env, a, b, at, mask, false, false, ger_rank8);
822  }
823  
824  QEMU_FLATTEN
helper_XVI4GER8PP(CPUPPCState * env,ppc_vsr_t * a,ppc_vsr_t * b,ppc_acc_t * at,uint32_t mask)825  void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
826                         ppc_acc_t *at, uint32_t mask)
827  {
828      xviger(env, a, b, at, mask, false, true, ger_rank8);
829  }
830  
831  QEMU_FLATTEN
helper_XVI8GER4(CPUPPCState * env,ppc_vsr_t * a,ppc_vsr_t * b,ppc_acc_t * at,uint32_t mask)832  void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
833                       ppc_acc_t *at, uint32_t mask)
834  {
835      xviger(env, a, b, at, mask, false, false, ger_rank4);
836  }
837  
838  QEMU_FLATTEN
helper_XVI8GER4PP(CPUPPCState * env,ppc_vsr_t * a,ppc_vsr_t * b,ppc_acc_t * at,uint32_t mask)839  void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
840                         ppc_acc_t *at, uint32_t mask)
841  {
842      xviger(env, a, b, at, mask, false, true, ger_rank4);
843  }
844  
845  QEMU_FLATTEN
helper_XVI8GER4SPP(CPUPPCState * env,ppc_vsr_t * a,ppc_vsr_t * b,ppc_acc_t * at,uint32_t mask)846  void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
847                          ppc_acc_t *at, uint32_t mask)
848  {
849      xviger(env, a, b, at, mask, true, true, ger_rank4);
850  }
851  
852  QEMU_FLATTEN
helper_XVI16GER2(CPUPPCState * env,ppc_vsr_t * a,ppc_vsr_t * b,ppc_acc_t * at,uint32_t mask)853  void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
854                        ppc_acc_t *at, uint32_t mask)
855  {
856      xviger(env, a, b, at, mask, false, false, ger_rank2);
857  }
858  
859  QEMU_FLATTEN
helper_XVI16GER2S(CPUPPCState * env,ppc_vsr_t * a,ppc_vsr_t * b,ppc_acc_t * at,uint32_t mask)860  void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
861                         ppc_acc_t *at, uint32_t mask)
862  {
863      xviger(env, a, b, at, mask, true, false, ger_rank2);
864  }
865  
866  QEMU_FLATTEN
helper_XVI16GER2PP(CPUPPCState * env,ppc_vsr_t * a,ppc_vsr_t * b,ppc_acc_t * at,uint32_t mask)867  void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
868                          ppc_acc_t *at, uint32_t mask)
869  {
870      xviger(env, a, b, at, mask, false, true, ger_rank2);
871  }
872  
873  QEMU_FLATTEN
helper_XVI16GER2SPP(CPUPPCState * env,ppc_vsr_t * a,ppc_vsr_t * b,ppc_acc_t * at,uint32_t mask)874  void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
875                           ppc_acc_t *at, uint32_t mask)
876  {
877      xviger(env, a, b, at, mask, true, true, ger_rank2);
878  }
879  
helper_vclzlsbb(ppc_avr_t * r)880  target_ulong helper_vclzlsbb(ppc_avr_t *r)
881  {
882      target_ulong count = 0;
883      int i;
884      for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
885          if (r->VsrB(i) & 0x01) {
886              break;
887          }
888          count++;
889      }
890      return count;
891  }
892  
helper_vctzlsbb(ppc_avr_t * r)893  target_ulong helper_vctzlsbb(ppc_avr_t *r)
894  {
895      target_ulong count = 0;
896      int i;
897      for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
898          if (r->VsrB(i) & 0x01) {
899              break;
900          }
901          count++;
902      }
903      return count;
904  }
905  
helper_VMHADDSHS(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)906  void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
907                        ppc_avr_t *b, ppc_avr_t *c)
908  {
909      int sat = 0;
910      int i;
911  
912      for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
913          int32_t prod = a->s16[i] * b->s16[i];
914          int32_t t = (int32_t)c->s16[i] + (prod >> 15);
915  
916          r->s16[i] = cvtswsh(t, &sat);
917      }
918  
919      if (sat) {
920          set_vscr_sat(env);
921      }
922  }
923  
helper_VMHRADDSHS(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)924  void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
925                         ppc_avr_t *b, ppc_avr_t *c)
926  {
927      int sat = 0;
928      int i;
929  
930      for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
931          int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
932          int32_t t = (int32_t)c->s16[i] + (prod >> 15);
933          r->s16[i] = cvtswsh(t, &sat);
934      }
935  
936      if (sat) {
937          set_vscr_sat(env);
938      }
939  }
940  
helper_VMLADDUHM(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c,uint32_t v)941  void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
942                        uint32_t v)
943  {
944      int i;
945  
946      for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
947          int32_t prod = a->s16[i] * b->s16[i];
948          r->s16[i] = (int16_t) (prod + c->s16[i]);
949      }
950  }
951  
952  #define VMRG_DO(name, element, access, ofs)                                  \
953      void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)            \
954      {                                                                        \
955          ppc_avr_t result;                                                    \
956          int i, half = ARRAY_SIZE(r->element) / 2;                            \
957                                                                               \
958          for (i = 0; i < half; i++) {                                         \
959              result.access(i * 2 + 0) = a->access(i + ofs);                   \
960              result.access(i * 2 + 1) = b->access(i + ofs);                   \
961          }                                                                    \
962          *r = result;                                                         \
963      }
964  
965  #define VMRG(suffix, element, access)          \
966      VMRG_DO(mrgl##suffix, element, access, half)   \
967      VMRG_DO(mrgh##suffix, element, access, 0)
VMRG(b,u8,VsrB)968  VMRG(b, u8, VsrB)
969  VMRG(h, u16, VsrH)
970  VMRG(w, u32, VsrW)
971  #undef VMRG_DO
972  #undef VMRG
973  
974  void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
975  {
976      int32_t prod[16];
977      int i;
978  
979      for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
980          prod[i] = (int32_t)a->s8[i] * b->u8[i];
981      }
982  
983      VECTOR_FOR_INORDER_I(i, s32) {
984          r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
985              prod[4 * i + 2] + prod[4 * i + 3];
986      }
987  }
988  
helper_VMSUMSHM(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)989  void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
990  {
991      int32_t prod[8];
992      int i;
993  
994      for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
995          prod[i] = a->s16[i] * b->s16[i];
996      }
997  
998      VECTOR_FOR_INORDER_I(i, s32) {
999          r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1000      }
1001  }
1002  
helper_VMSUMSHS(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)1003  void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1004                       ppc_avr_t *b, ppc_avr_t *c)
1005  {
1006      int32_t prod[8];
1007      int i;
1008      int sat = 0;
1009  
1010      for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1011          prod[i] = (int32_t)a->s16[i] * b->s16[i];
1012      }
1013  
1014      VECTOR_FOR_INORDER_I(i, s32) {
1015          int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1016  
1017          r->u32[i] = cvtsdsw(t, &sat);
1018      }
1019  
1020      if (sat) {
1021          set_vscr_sat(env);
1022      }
1023  }
1024  
helper_VMSUMUBM(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)1025  void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1026  {
1027      uint16_t prod[16];
1028      int i;
1029  
1030      for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1031          prod[i] = a->u8[i] * b->u8[i];
1032      }
1033  
1034      VECTOR_FOR_INORDER_I(i, u32) {
1035          r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1036              prod[4 * i + 2] + prod[4 * i + 3];
1037      }
1038  }
1039  
helper_VMSUMUHM(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)1040  void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1041  {
1042      uint32_t prod[8];
1043      int i;
1044  
1045      for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1046          prod[i] = a->u16[i] * b->u16[i];
1047      }
1048  
1049      VECTOR_FOR_INORDER_I(i, u32) {
1050          r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1051      }
1052  }
1053  
helper_VMSUMUHS(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)1054  void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1055                       ppc_avr_t *b, ppc_avr_t *c)
1056  {
1057      uint32_t prod[8];
1058      int i;
1059      int sat = 0;
1060  
1061      for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1062          prod[i] = a->u16[i] * b->u16[i];
1063      }
1064  
1065      VECTOR_FOR_INORDER_I(i, s32) {
1066          uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1067  
1068          r->u32[i] = cvtuduw(t, &sat);
1069      }
1070  
1071      if (sat) {
1072          set_vscr_sat(env);
1073      }
1074  }
1075  
1076  #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast)   \
1077      void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1078      {                                                                   \
1079          int i;                                                          \
1080                                                                          \
1081          for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
1082              r->prod_access(i >> 1) = (cast)a->mul_access(i) *           \
1083                                       (cast)b->mul_access(i);            \
1084          }                                                               \
1085      }
1086  
1087  #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast)   \
1088      void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1089      {                                                                   \
1090          int i;                                                          \
1091                                                                          \
1092          for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
1093              r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) *       \
1094                                       (cast)b->mul_access(i + 1);        \
1095          }                                                               \
1096      }
1097  
1098  #define VMUL(suffix, mul_element, mul_access, prod_access, cast)       \
1099      VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast)  \
1100      VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
VMUL(SB,s8,VsrSB,VsrSH,int16_t)1101  VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1102  VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1103  VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1104  VMUL(UB, u8, VsrB, VsrH, uint16_t)
1105  VMUL(UH, u16, VsrH, VsrW, uint32_t)
1106  VMUL(UW, u32, VsrW, VsrD, uint64_t)
1107  #undef VMUL_DO_EVN
1108  #undef VMUL_DO_ODD
1109  #undef VMUL
1110  
1111  void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1112                      target_ulong uim)
1113  {
1114      int i, idx;
1115      ppc_vsr_t tmp = { .u64 = {0, 0} };
1116  
1117      for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1118          if ((pcv->VsrB(i) >> 5) == uim) {
1119              idx = pcv->VsrB(i) & 0x1f;
1120              if (idx < ARRAY_SIZE(t->u8)) {
1121                  tmp.VsrB(i) = s0->VsrB(idx);
1122              } else {
1123                  tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1124              }
1125          }
1126      }
1127  
1128      *t = tmp;
1129  }
1130  
helper_VDIVSQ(ppc_avr_t * t,ppc_avr_t * a,ppc_avr_t * b)1131  void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1132  {
1133      Int128 neg1 = int128_makes64(-1);
1134      Int128 int128_min = int128_make128(0, INT64_MIN);
1135      if (likely(int128_nz(b->s128) &&
1136                (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1137          t->s128 = int128_divs(a->s128, b->s128);
1138      } else {
1139          t->s128 = a->s128; /* Undefined behavior */
1140      }
1141  }
1142  
helper_VDIVUQ(ppc_avr_t * t,ppc_avr_t * a,ppc_avr_t * b)1143  void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1144  {
1145      if (int128_nz(b->s128)) {
1146          t->s128 = int128_divu(a->s128, b->s128);
1147      } else {
1148          t->s128 = a->s128; /* Undefined behavior */
1149      }
1150  }
1151  
helper_VDIVESD(ppc_avr_t * t,ppc_avr_t * a,ppc_avr_t * b)1152  void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1153  {
1154      int i;
1155      int64_t high;
1156      uint64_t low;
1157      for (i = 0; i < 2; i++) {
1158          high = a->s64[i];
1159          low = 0;
1160          if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) {
1161              t->s64[i] = a->s64[i]; /* Undefined behavior */
1162          } else {
1163              divs128(&low, &high, b->s64[i]);
1164              t->s64[i] = low;
1165          }
1166      }
1167  }
1168  
helper_VDIVEUD(ppc_avr_t * t,ppc_avr_t * a,ppc_avr_t * b)1169  void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1170  {
1171      int i;
1172      uint64_t high, low;
1173      for (i = 0; i < 2; i++) {
1174          high = a->u64[i];
1175          low = 0;
1176          if (unlikely(!b->u64[i])) {
1177              t->u64[i] = a->u64[i]; /* Undefined behavior */
1178          } else {
1179              divu128(&low, &high, b->u64[i]);
1180              t->u64[i] = low;
1181          }
1182      }
1183  }
1184  
helper_VDIVESQ(ppc_avr_t * t,ppc_avr_t * a,ppc_avr_t * b)1185  void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1186  {
1187      Int128 high, low;
1188      Int128 int128_min = int128_make128(0, INT64_MIN);
1189      Int128 neg1 = int128_makes64(-1);
1190  
1191      high = a->s128;
1192      low = int128_zero();
1193      if (unlikely(!int128_nz(b->s128) ||
1194                   (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) {
1195          t->s128 = a->s128; /* Undefined behavior */
1196      } else {
1197          divs256(&low, &high, b->s128);
1198          t->s128 = low;
1199      }
1200  }
1201  
helper_VDIVEUQ(ppc_avr_t * t,ppc_avr_t * a,ppc_avr_t * b)1202  void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1203  {
1204      Int128 high, low;
1205  
1206      high = a->s128;
1207      low = int128_zero();
1208      if (unlikely(!int128_nz(b->s128))) {
1209          t->s128 = a->s128; /* Undefined behavior */
1210      } else {
1211          divu256(&low, &high, b->s128);
1212          t->s128 = low;
1213      }
1214  }
1215  
helper_VMODSQ(ppc_avr_t * t,ppc_avr_t * a,ppc_avr_t * b)1216  void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1217  {
1218      Int128 neg1 = int128_makes64(-1);
1219      Int128 int128_min = int128_make128(0, INT64_MIN);
1220      if (likely(int128_nz(b->s128) &&
1221                (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1222          t->s128 = int128_rems(a->s128, b->s128);
1223      } else {
1224          t->s128 = int128_zero(); /* Undefined behavior */
1225      }
1226  }
1227  
helper_VMODUQ(ppc_avr_t * t,ppc_avr_t * a,ppc_avr_t * b)1228  void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1229  {
1230      if (likely(int128_nz(b->s128))) {
1231          t->s128 = int128_remu(a->s128, b->s128);
1232      } else {
1233          t->s128 = int128_zero(); /* Undefined behavior */
1234      }
1235  }
1236  
helper_VPERM(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)1237  void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1238  {
1239      ppc_avr_t result;
1240      int i;
1241  
1242      for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1243          int s = c->VsrB(i) & 0x1f;
1244          int index = s & 0xf;
1245  
1246          if (s & 0x10) {
1247              result.VsrB(i) = b->VsrB(index);
1248          } else {
1249              result.VsrB(i) = a->VsrB(index);
1250          }
1251      }
1252      *r = result;
1253  }
1254  
helper_VPERMR(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)1255  void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1256  {
1257      ppc_avr_t result;
1258      int i;
1259  
1260      for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1261          int s = c->VsrB(i) & 0x1f;
1262          int index = 15 - (s & 0xf);
1263  
1264          if (s & 0x10) {
1265              result.VsrB(i) = a->VsrB(index);
1266          } else {
1267              result.VsrB(i) = b->VsrB(index);
1268          }
1269      }
1270      *r = result;
1271  }
1272  
1273  #define XXGENPCV_BE_EXP(NAME, SZ) \
1274  void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1275  {                                                                   \
1276      ppc_vsr_t tmp;                                                  \
1277                                                                      \
1278      /* Initialize tmp with the result of an all-zeros mask */       \
1279      tmp.VsrD(0) = 0x1011121314151617;                               \
1280      tmp.VsrD(1) = 0x18191A1B1C1D1E1F;                               \
1281                                                                      \
1282      /* Iterate over the most significant byte of each element */    \
1283      for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
1284          if (b->VsrB(i) & 0x80) {                                    \
1285              /* Update each byte of the element */                   \
1286              for (int k = 0; k < SZ; k++) {                          \
1287                  tmp.VsrB(i + k) = j + k;                            \
1288              }                                                       \
1289              j += SZ;                                                \
1290          }                                                           \
1291      }                                                               \
1292                                                                      \
1293      *t = tmp;                                                       \
1294  }
1295  
1296  #define XXGENPCV_BE_COMP(NAME, SZ) \
1297  void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1298  {                                                                   \
1299      ppc_vsr_t tmp = { .u64 = { 0, 0 } };                            \
1300                                                                      \
1301      /* Iterate over the most significant byte of each element */    \
1302      for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
1303          if (b->VsrB(i) & 0x80) {                                    \
1304              /* Update each byte of the element */                   \
1305              for (int k = 0; k < SZ; k++) {                          \
1306                  tmp.VsrB(j + k) = i + k;                            \
1307              }                                                       \
1308              j += SZ;                                                \
1309          }                                                           \
1310      }                                                               \
1311                                                                      \
1312      *t = tmp;                                                       \
1313  }
1314  
1315  #define XXGENPCV_LE_EXP(NAME, SZ) \
1316  void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1317  {                                                                   \
1318      ppc_vsr_t tmp;                                                  \
1319                                                                      \
1320      /* Initialize tmp with the result of an all-zeros mask */       \
1321      tmp.VsrD(0) = 0x1F1E1D1C1B1A1918;                               \
1322      tmp.VsrD(1) = 0x1716151413121110;                               \
1323                                                                      \
1324      /* Iterate over the most significant byte of each element */    \
1325      for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
1326          /* Reverse indexing of "i" */                               \
1327          const int idx = ARRAY_SIZE(b->u8) - i - SZ;                 \
1328          if (b->VsrB(idx) & 0x80) {                                  \
1329              /* Update each byte of the element */                   \
1330              for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) {       \
1331                  tmp.VsrB(idx + rk) = j + k;                         \
1332              }                                                       \
1333              j += SZ;                                                \
1334          }                                                           \
1335      }                                                               \
1336                                                                      \
1337      *t = tmp;                                                       \
1338  }
1339  
1340  #define XXGENPCV_LE_COMP(NAME, SZ) \
1341  void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1342  {                                                                   \
1343      ppc_vsr_t tmp = { .u64 = { 0, 0 } };                            \
1344                                                                      \
1345      /* Iterate over the most significant byte of each element */    \
1346      for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
1347          if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) {           \
1348              /* Update each byte of the element */                   \
1349              for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) {       \
1350                  /* Reverse indexing of "j" */                       \
1351                  const int idx = ARRAY_SIZE(b->u8) - j - SZ;         \
1352                  tmp.VsrB(idx + rk) = i + k;                         \
1353              }                                                       \
1354              j += SZ;                                                \
1355          }                                                           \
1356      }                                                               \
1357                                                                      \
1358      *t = tmp;                                                       \
1359  }
1360  
1361  #define XXGENPCV(NAME, SZ) \
1362      XXGENPCV_BE_EXP(NAME, SZ)  \
1363      XXGENPCV_BE_COMP(NAME, SZ) \
1364      XXGENPCV_LE_EXP(NAME, SZ)  \
1365      XXGENPCV_LE_COMP(NAME, SZ) \
1366  
1367  XXGENPCV(XXGENPCVBM, 1)
1368  XXGENPCV(XXGENPCVHM, 2)
1369  XXGENPCV(XXGENPCVWM, 4)
1370  XXGENPCV(XXGENPCVDM, 8)
1371  
1372  #undef XXGENPCV_BE_EXP
1373  #undef XXGENPCV_BE_COMP
1374  #undef XXGENPCV_LE_EXP
1375  #undef XXGENPCV_LE_COMP
1376  #undef XXGENPCV
1377  
1378  #if HOST_BIG_ENDIAN
1379  #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1380  #define VBPERMD_INDEX(i) (i)
1381  #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1382  #else
1383  #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1384  #define VBPERMD_INDEX(i) (1 - i)
1385  #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1386  #endif
1387  #define EXTRACT_BIT(avr, i, index) \
1388          (extract64((avr)->VsrD(i), 63 - index, 1))
1389  
helper_vbpermd(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1390  void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1391  {
1392      int i, j;
1393      ppc_avr_t result = { .u64 = { 0, 0 } };
1394      VECTOR_FOR_INORDER_I(i, u64) {
1395          for (j = 0; j < 8; j++) {
1396              int index = VBPERMQ_INDEX(b, (i * 8) + j);
1397              if (index < 64 && EXTRACT_BIT(a, i, index)) {
1398                  result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1399              }
1400          }
1401      }
1402      *r = result;
1403  }
1404  
helper_vbpermq(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1405  void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1406  {
1407      int i;
1408      uint64_t perm = 0;
1409  
1410      VECTOR_FOR_INORDER_I(i, u8) {
1411          int index = VBPERMQ_INDEX(b, i);
1412  
1413          if (index < 128) {
1414              uint64_t mask = (1ull << (63 - (index & 0x3F)));
1415              if (a->u64[VBPERMQ_DW(index)] & mask) {
1416                  perm |= (0x8000 >> i);
1417              }
1418          }
1419      }
1420  
1421      r->VsrD(0) = perm;
1422      r->VsrD(1) = 0;
1423  }
1424  
1425  #undef VBPERMQ_INDEX
1426  #undef VBPERMQ_DW
1427  
1428  /*
1429   * There is no carry across the two doublewords, so their order does
1430   * not matter.  Nor is there partial overlap between registers.
1431   */
helper_vpmsumb(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1432  void helper_vpmsumb(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1433  {
1434      for (int i = 0; i < 2; ++i) {
1435          uint64_t aa = a->u64[i], bb = b->u64[i];
1436          r->u64[i] = clmul_8x4_even(aa, bb) ^ clmul_8x4_odd(aa, bb);
1437      }
1438  }
1439  
helper_vpmsumh(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1440  void helper_vpmsumh(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1441  {
1442      for (int i = 0; i < 2; ++i) {
1443          uint64_t aa = a->u64[i], bb = b->u64[i];
1444          r->u64[i] = clmul_16x2_even(aa, bb) ^ clmul_16x2_odd(aa, bb);
1445      }
1446  }
1447  
helper_vpmsumw(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1448  void helper_vpmsumw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1449  {
1450      for (int i = 0; i < 2; ++i) {
1451          uint64_t aa = a->u64[i], bb = b->u64[i];
1452          r->u64[i] = clmul_32(aa, bb) ^ clmul_32(aa >> 32, bb >> 32);
1453      }
1454  }
1455  
helper_VPMSUMD(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1456  void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1457  {
1458      Int128 e = clmul_64(a->u64[0], b->u64[0]);
1459      Int128 o = clmul_64(a->u64[1], b->u64[1]);
1460      r->s128 = int128_xor(e, o);
1461  }
1462  
1463  #if HOST_BIG_ENDIAN
1464  #define PKBIG 1
1465  #else
1466  #define PKBIG 0
1467  #endif
helper_vpkpx(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1468  void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1469  {
1470      int i, j;
1471      ppc_avr_t result;
1472  #if HOST_BIG_ENDIAN
1473      const ppc_avr_t *x[2] = { a, b };
1474  #else
1475      const ppc_avr_t *x[2] = { b, a };
1476  #endif
1477  
1478      VECTOR_FOR_INORDER_I(i, u64) {
1479          VECTOR_FOR_INORDER_I(j, u32) {
1480              uint32_t e = x[i]->u32[j];
1481  
1482              result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1483                                       ((e >> 6) & 0x3e0) |
1484                                       ((e >> 3) & 0x1f));
1485          }
1486      }
1487      *r = result;
1488  }
1489  
1490  #define VPK(suffix, from, to, cvt, dosat)                               \
1491      void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1492                              ppc_avr_t *a, ppc_avr_t *b)                 \
1493      {                                                                   \
1494          int i;                                                          \
1495          int sat = 0;                                                    \
1496          ppc_avr_t result;                                               \
1497          ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1498          ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1499                                                                          \
1500          VECTOR_FOR_INORDER_I(i, from) {                                 \
1501              result.to[i] = cvt(a0->from[i], &sat);                      \
1502              result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1503          }                                                               \
1504          *r = result;                                                    \
1505          if (dosat && sat) {                                             \
1506              set_vscr_sat(env);                                          \
1507          }                                                               \
1508      }
1509  #define I(x, y) (x)
1510  VPK(shss, s16, s8, cvtshsb, 1)
1511  VPK(shus, s16, u8, cvtshub, 1)
1512  VPK(swss, s32, s16, cvtswsh, 1)
1513  VPK(swus, s32, u16, cvtswuh, 1)
1514  VPK(sdss, s64, s32, cvtsdsw, 1)
1515  VPK(sdus, s64, u32, cvtsduw, 1)
1516  VPK(uhus, u16, u8, cvtuhub, 1)
1517  VPK(uwus, u32, u16, cvtuwuh, 1)
1518  VPK(udus, u64, u32, cvtuduw, 1)
1519  VPK(uhum, u16, u8, I, 0)
1520  VPK(uwum, u32, u16, I, 0)
1521  VPK(udum, u64, u32, I, 0)
1522  #undef I
1523  #undef VPK
1524  #undef PKBIG
1525  
helper_vrefp(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * b)1526  void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1527  {
1528      int i;
1529  
1530      for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1531          r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1532      }
1533  }
1534  
1535  #define VRFI(suffix, rounding)                                  \
1536      void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1537                               ppc_avr_t *b)                      \
1538      {                                                           \
1539          int i;                                                  \
1540          float_status s = env->vec_status;                       \
1541                                                                  \
1542          set_float_rounding_mode(rounding, &s);                  \
1543          for (i = 0; i < ARRAY_SIZE(r->f32); i++) {              \
1544              r->f32[i] = float32_round_to_int (b->f32[i], &s);   \
1545          }                                                       \
1546      }
VRFI(n,float_round_nearest_even)1547  VRFI(n, float_round_nearest_even)
1548  VRFI(m, float_round_down)
1549  VRFI(p, float_round_up)
1550  VRFI(z, float_round_to_zero)
1551  #undef VRFI
1552  
1553  void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1554  {
1555      int i;
1556  
1557      for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1558          float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1559  
1560          r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1561      }
1562  }
1563  
1564  #define VRLMI(name, size, element, insert)                                  \
1565  void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1566  {                                                                           \
1567      int i;                                                                  \
1568      for (i = 0; i < ARRAY_SIZE(r->element); i++) {                          \
1569          uint##size##_t src1 = a->element[i];                                \
1570          uint##size##_t src2 = b->element[i];                                \
1571          uint##size##_t src3 = r->element[i];                                \
1572          uint##size##_t begin, end, shift, mask, rot_val;                    \
1573                                                                              \
1574          shift = extract##size(src2, 0, 6);                                  \
1575          end   = extract##size(src2, 8, 6);                                  \
1576          begin = extract##size(src2, 16, 6);                                 \
1577          rot_val = rol##size(src1, shift);                                   \
1578          mask = mask_u##size(begin, end);                                    \
1579          if (insert) {                                                       \
1580              r->element[i] = (rot_val & mask) | (src3 & ~mask);              \
1581          } else {                                                            \
1582              r->element[i] = (rot_val & mask);                               \
1583          }                                                                   \
1584      }                                                                       \
1585  }
1586  
1587  VRLMI(VRLDMI, 64, u64, 1);
1588  VRLMI(VRLWMI, 32, u32, 1);
1589  VRLMI(VRLDNM, 64, u64, 0);
1590  VRLMI(VRLWNM, 32, u32, 0);
1591  
helper_vexptefp(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * b)1592  void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1593  {
1594      int i;
1595  
1596      for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1597          r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1598      }
1599  }
1600  
helper_vlogefp(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * b)1601  void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1602  {
1603      int i;
1604  
1605      for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1606          r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1607      }
1608  }
1609  
1610  #define VEXTU_X_DO(name, size, left)                            \
1611  target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
1612  {                                                               \
1613      int index = (a & 0xf) * 8;                                  \
1614      if (left) {                                                 \
1615          index = 128 - index - size;                             \
1616      }                                                           \
1617      return int128_getlo(int128_rshift(b->s128, index)) &        \
1618          MAKE_64BIT_MASK(0, size);                               \
1619  }
1620  VEXTU_X_DO(vextublx,  8, 1)
1621  VEXTU_X_DO(vextuhlx, 16, 1)
1622  VEXTU_X_DO(vextuwlx, 32, 1)
1623  VEXTU_X_DO(vextubrx,  8, 0)
1624  VEXTU_X_DO(vextuhrx, 16, 0)
1625  VEXTU_X_DO(vextuwrx, 32, 0)
1626  #undef VEXTU_X_DO
1627  
helper_vslv(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1628  void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1629  {
1630      int i;
1631      unsigned int shift, bytes, size;
1632  
1633      size = ARRAY_SIZE(r->u8);
1634      for (i = 0; i < size; i++) {
1635          shift = b->VsrB(i) & 0x7;             /* extract shift value */
1636          bytes = (a->VsrB(i) << 8) +           /* extract adjacent bytes */
1637              (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1638          r->VsrB(i) = (bytes << shift) >> 8;   /* shift and store result */
1639      }
1640  }
1641  
helper_vsrv(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1642  void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1643  {
1644      int i;
1645      unsigned int shift, bytes;
1646  
1647      /*
1648       * Use reverse order, as destination and source register can be
1649       * same. Its being modified in place saving temporary, reverse
1650       * order will guarantee that computed result is not fed back.
1651       */
1652      for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1653          shift = b->VsrB(i) & 0x7;               /* extract shift value */
1654          bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1655                                                  /* extract adjacent bytes */
1656          r->VsrB(i) = (bytes >> shift) & 0xFF;   /* shift and store result */
1657      }
1658  }
1659  
helper_vsldoi(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,uint32_t shift)1660  void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1661  {
1662      int sh = shift & 0xf;
1663      int i;
1664      ppc_avr_t result;
1665  
1666      for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1667          int index = sh + i;
1668          if (index > 0xf) {
1669              result.VsrB(i) = b->VsrB(index - 0x10);
1670          } else {
1671              result.VsrB(i) = a->VsrB(index);
1672          }
1673      }
1674      *r = result;
1675  }
1676  
helper_vslo(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1677  void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1678  {
1679      int sh = (b->VsrB(0xf) >> 3) & 0xf;
1680  
1681  #if HOST_BIG_ENDIAN
1682      memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1683      memset(&r->u8[16 - sh], 0, sh);
1684  #else
1685      memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1686      memset(&r->u8[0], 0, sh);
1687  #endif
1688  }
1689  
1690  #if HOST_BIG_ENDIAN
1691  #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1692  #else
1693  #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1694  #endif
1695  
1696  #define VINSX(SUFFIX, TYPE) \
1697  void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t,       \
1698                                           uint64_t val, target_ulong index)     \
1699  {                                                                              \
1700      const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE);                       \
1701      target_long idx = index;                                                   \
1702                                                                                 \
1703      if (idx < 0 || idx > maxidx) {                                             \
1704          idx =  idx < 0 ? sizeof(TYPE) - idx : idx;                             \
1705          qemu_log_mask(LOG_GUEST_ERROR,                                         \
1706              "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx   \
1707              ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx);         \
1708      } else {                                                                   \
1709          TYPE src = val;                                                        \
1710          memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE));           \
1711      }                                                                          \
1712  }
VINSX(B,uint8_t)1713  VINSX(B, uint8_t)
1714  VINSX(H, uint16_t)
1715  VINSX(W, uint32_t)
1716  VINSX(D, uint64_t)
1717  #undef ELEM_ADDR
1718  #undef VINSX
1719  #if HOST_BIG_ENDIAN
1720  #define VEXTDVLX(NAME, SIZE) \
1721  void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1722                     target_ulong index)                                         \
1723  {                                                                              \
1724      const target_long idx = index;                                             \
1725      ppc_avr_t tmp[2] = { *a, *b };                                             \
1726      memset(t, 0, sizeof(*t));                                                  \
1727      if (idx >= 0 && idx + SIZE <= sizeof(tmp)) {                               \
1728          memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1729      } else {                                                                   \
1730          qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x"  \
1731                        TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n",         \
1732                        env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE);        \
1733      }                                                                          \
1734  }
1735  #else
1736  #define VEXTDVLX(NAME, SIZE) \
1737  void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1738                     target_ulong index)                                         \
1739  {                                                                              \
1740      const target_long idx = index;                                             \
1741      ppc_avr_t tmp[2] = { *b, *a };                                             \
1742      memset(t, 0, sizeof(*t));                                                  \
1743      if (idx >= 0 && idx + SIZE <= sizeof(tmp)) {                               \
1744          memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2],                                  \
1745                 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE);                  \
1746      } else {                                                                   \
1747          qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x"  \
1748                        TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n",         \
1749                        env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE);        \
1750      }                                                                          \
1751  }
1752  #endif
1753  VEXTDVLX(VEXTDUBVLX, 1)
1754  VEXTDVLX(VEXTDUHVLX, 2)
1755  VEXTDVLX(VEXTDUWVLX, 4)
1756  VEXTDVLX(VEXTDDVLX, 8)
1757  #undef VEXTDVLX
1758  #if HOST_BIG_ENDIAN
1759  #define VEXTRACT(suffix, element)                                            \
1760      void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1761      {                                                                        \
1762          uint32_t es = sizeof(r->element[0]);                                 \
1763          memmove(&r->u8[8 - es], &b->u8[index], es);                          \
1764          memset(&r->u8[8], 0, 8);                                             \
1765          memset(&r->u8[0], 0, 8 - es);                                        \
1766      }
1767  #else
1768  #define VEXTRACT(suffix, element)                                            \
1769      void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1770      {                                                                        \
1771          uint32_t es = sizeof(r->element[0]);                                 \
1772          uint32_t s = (16 - index) - es;                                      \
1773          memmove(&r->u8[8], &b->u8[s], es);                                   \
1774          memset(&r->u8[0], 0, 8);                                             \
1775          memset(&r->u8[8 + es], 0, 8 - es);                                   \
1776      }
1777  #endif
1778  VEXTRACT(ub, u8)
1779  VEXTRACT(uh, u16)
1780  VEXTRACT(uw, u32)
1781  VEXTRACT(d, u64)
1782  #undef VEXTRACT
1783  
1784  #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1785  uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1786  {                                                   \
1787      int i, idx, crf = 0;                            \
1788                                                      \
1789      for (i = 0; i < NUM_ELEMS; i++) {               \
1790          idx = LEFT ? i : NUM_ELEMS - i - 1;         \
1791          if (b->Vsr##ELEM(idx)) {                    \
1792              t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx);  \
1793          } else {                                    \
1794              crf = 0b0010;                           \
1795              break;                                  \
1796          }                                           \
1797      }                                               \
1798                                                      \
1799      for (; i < NUM_ELEMS; i++) {                    \
1800          idx = LEFT ? i : NUM_ELEMS - i - 1;         \
1801          t->Vsr##ELEM(idx) = 0;                      \
1802      }                                               \
1803                                                      \
1804      return crf;                                     \
1805  }
1806  VSTRI(VSTRIBL, B, 16, true)
1807  VSTRI(VSTRIBR, B, 16, false)
1808  VSTRI(VSTRIHL, H, 8, true)
1809  VSTRI(VSTRIHR, H, 8, false)
1810  #undef VSTRI
1811  
1812  void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1813  {
1814      ppc_vsr_t t = { };
1815      size_t es = sizeof(uint32_t);
1816      uint32_t ext_index;
1817      int i;
1818  
1819      ext_index = index;
1820      for (i = 0; i < es; i++, ext_index++) {
1821          t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1822      }
1823  
1824      *xt = t;
1825  }
1826  
helper_XXINSERTW(ppc_vsr_t * xt,ppc_vsr_t * xb,uint32_t index)1827  void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1828  {
1829      ppc_vsr_t t = *xt;
1830      size_t es = sizeof(uint32_t);
1831      int ins_index, i = 0;
1832  
1833      ins_index = index;
1834      for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1835          t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1836      }
1837  
1838      *xt = t;
1839  }
1840  
helper_XXEVAL(ppc_avr_t * t,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c,uint32_t desc)1841  void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1842                     uint32_t desc)
1843  {
1844      /*
1845       * Instead of processing imm bit-by-bit, we'll skip the computation of
1846       * conjunctions whose corresponding bit is unset.
1847       */
1848      int bit, imm = simd_data(desc);
1849      Int128 conj, disj = int128_zero();
1850  
1851      /* Iterate over set bits from the least to the most significant bit */
1852      while (imm) {
1853          /*
1854           * Get the next bit to be processed with ctz64. Invert the result of
1855           * ctz64 to match the indexing used by PowerISA.
1856           */
1857          bit = 7 - ctzl(imm);
1858          if (bit & 0x4) {
1859              conj = a->s128;
1860          } else {
1861              conj = int128_not(a->s128);
1862          }
1863          if (bit & 0x2) {
1864              conj = int128_and(conj, b->s128);
1865          } else {
1866              conj = int128_and(conj, int128_not(b->s128));
1867          }
1868          if (bit & 0x1) {
1869              conj = int128_and(conj, c->s128);
1870          } else {
1871              conj = int128_and(conj, int128_not(c->s128));
1872          }
1873          disj = int128_or(disj, conj);
1874  
1875          /* Unset the least significant bit that is set */
1876          imm &= imm - 1;
1877      }
1878  
1879      t->s128 = disj;
1880  }
1881  
1882  #define XXBLEND(name, sz) \
1883  void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b,  \
1884                                   ppc_avr_t *c, uint32_t desc)               \
1885  {                                                                           \
1886      for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) {                  \
1887          t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ?               \
1888              b->glue(u, sz)[i] : a->glue(u, sz)[i];                          \
1889      }                                                                       \
1890  }
1891  XXBLEND(B, 8)
1892  XXBLEND(H, 16)
1893  XXBLEND(W, 32)
1894  XXBLEND(D, 64)
1895  #undef XXBLEND
1896  
helper_vsro(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1897  void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1898  {
1899      int sh = (b->VsrB(0xf) >> 3) & 0xf;
1900  
1901  #if HOST_BIG_ENDIAN
1902      memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1903      memset(&r->u8[0], 0, sh);
1904  #else
1905      memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1906      memset(&r->u8[16 - sh], 0, sh);
1907  #endif
1908  }
1909  
helper_vsumsws(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1910  void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1911  {
1912      int64_t t;
1913      int i, upper;
1914      ppc_avr_t result;
1915      int sat = 0;
1916  
1917      upper = ARRAY_SIZE(r->s32) - 1;
1918      t = (int64_t)b->VsrSW(upper);
1919      for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1920          t += a->VsrSW(i);
1921          result.VsrSW(i) = 0;
1922      }
1923      result.VsrSW(upper) = cvtsdsw(t, &sat);
1924      *r = result;
1925  
1926      if (sat) {
1927          set_vscr_sat(env);
1928      }
1929  }
1930  
helper_vsum2sws(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1931  void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1932  {
1933      int i, j, upper;
1934      ppc_avr_t result;
1935      int sat = 0;
1936  
1937      upper = 1;
1938      for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1939          int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1940  
1941          result.VsrD(i) = 0;
1942          for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1943              t += a->VsrSW(2 * i + j);
1944          }
1945          result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1946      }
1947  
1948      *r = result;
1949      if (sat) {
1950          set_vscr_sat(env);
1951      }
1952  }
1953  
helper_vsum4sbs(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1954  void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1955  {
1956      int i, j;
1957      int sat = 0;
1958  
1959      for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1960          int64_t t = (int64_t)b->s32[i];
1961  
1962          for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1963              t += a->s8[4 * i + j];
1964          }
1965          r->s32[i] = cvtsdsw(t, &sat);
1966      }
1967  
1968      if (sat) {
1969          set_vscr_sat(env);
1970      }
1971  }
1972  
helper_vsum4shs(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1973  void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1974  {
1975      int sat = 0;
1976      int i;
1977  
1978      for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1979          int64_t t = (int64_t)b->s32[i];
1980  
1981          t += a->s16[2 * i] + a->s16[2 * i + 1];
1982          r->s32[i] = cvtsdsw(t, &sat);
1983      }
1984  
1985      if (sat) {
1986          set_vscr_sat(env);
1987      }
1988  }
1989  
helper_vsum4ubs(CPUPPCState * env,ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)1990  void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1991  {
1992      int i, j;
1993      int sat = 0;
1994  
1995      for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1996          uint64_t t = (uint64_t)b->u32[i];
1997  
1998          for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1999              t += a->u8[4 * i + j];
2000          }
2001          r->u32[i] = cvtuduw(t, &sat);
2002      }
2003  
2004      if (sat) {
2005          set_vscr_sat(env);
2006      }
2007  }
2008  
2009  #if HOST_BIG_ENDIAN
2010  #define UPKHI 1
2011  #define UPKLO 0
2012  #else
2013  #define UPKHI 0
2014  #define UPKLO 1
2015  #endif
2016  #define VUPKPX(suffix, hi)                                              \
2017      void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2018      {                                                                   \
2019          int i;                                                          \
2020          ppc_avr_t result;                                               \
2021                                                                          \
2022          for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
2023              uint16_t _e = b->u16[hi ? i : i + 4];                       \
2024              uint8_t _a = (_e >> 15) ? 0xff : 0;                         \
2025              uint8_t _r = (_e >> 10) & 0x1f;                             \
2026              uint8_t _g = (_e >> 5) & 0x1f;                              \
2027              uint8_t _b = _e & 0x1f;                                     \
2028                                                                          \
2029              result.u32[i] = (_a << 24) | (_r << 16) | (_g << 8) | _b;   \
2030          }                                                               \
2031          *r = result;                                                    \
2032      }
VUPKPX(lpx,UPKLO)2033  VUPKPX(lpx, UPKLO)
2034  VUPKPX(hpx, UPKHI)
2035  #undef VUPKPX
2036  
2037  #define VUPK(suffix, unpacked, packee, hi)                              \
2038      void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2039      {                                                                   \
2040          int i;                                                          \
2041          ppc_avr_t result;                                               \
2042                                                                          \
2043          if (hi) {                                                       \
2044              for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
2045                  result.unpacked[i] = b->packee[i];                      \
2046              }                                                           \
2047          } else {                                                        \
2048              for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2049                   i++) {                                                 \
2050                  result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2051              }                                                           \
2052          }                                                               \
2053          *r = result;                                                    \
2054      }
2055  VUPK(hsb, s16, s8, UPKHI)
2056  VUPK(hsh, s32, s16, UPKHI)
2057  VUPK(hsw, s64, s32, UPKHI)
2058  VUPK(lsb, s16, s8, UPKLO)
2059  VUPK(lsh, s32, s16, UPKLO)
2060  VUPK(lsw, s64, s32, UPKLO)
2061  #undef VUPK
2062  #undef UPKHI
2063  #undef UPKLO
2064  
2065  #define VGENERIC_DO(name, element)                                      \
2066      void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
2067      {                                                                   \
2068          int i;                                                          \
2069                                                                          \
2070          for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
2071              r->element[i] = name(b->element[i]);                        \
2072          }                                                               \
2073      }
2074  
2075  #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2076  #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2077  
2078  VGENERIC_DO(clzb, u8)
2079  VGENERIC_DO(clzh, u16)
2080  
2081  #undef clzb
2082  #undef clzh
2083  
2084  #define ctzb(v) ((v) ? ctz32(v) : 8)
2085  #define ctzh(v) ((v) ? ctz32(v) : 16)
2086  #define ctzw(v) ctz32((v))
2087  #define ctzd(v) ctz64((v))
2088  
2089  VGENERIC_DO(ctzb, u8)
2090  VGENERIC_DO(ctzh, u16)
2091  VGENERIC_DO(ctzw, u32)
2092  VGENERIC_DO(ctzd, u64)
2093  
2094  #undef ctzb
2095  #undef ctzh
2096  #undef ctzw
2097  #undef ctzd
2098  
2099  #define popcntb(v) ctpop8(v)
2100  #define popcnth(v) ctpop16(v)
2101  #define popcntw(v) ctpop32(v)
2102  #define popcntd(v) ctpop64(v)
2103  
2104  VGENERIC_DO(popcntb, u8)
2105  VGENERIC_DO(popcnth, u16)
2106  VGENERIC_DO(popcntw, u32)
2107  VGENERIC_DO(popcntd, u64)
2108  
2109  #undef popcntb
2110  #undef popcnth
2111  #undef popcntw
2112  #undef popcntd
2113  
2114  #undef VGENERIC_DO
2115  
2116  void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2117  {
2118      r->s128 = int128_add(a->s128, b->s128);
2119  }
2120  
helper_VADDEUQM(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)2121  void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2122  {
2123      r->s128 = int128_add(int128_add(a->s128, b->s128),
2124                           int128_make64(int128_getlo(c->s128) & 1));
2125  }
2126  
helper_VADDCUQ(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)2127  void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2128  {
2129      r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128);
2130      r->VsrD(0) = 0;
2131  }
2132  
helper_VADDECUQ(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)2133  void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2134  {
2135      bool carry_out = int128_ult(int128_not(a->s128), b->s128),
2136           carry_in = int128_getlo(c->s128) & 1;
2137  
2138      if (!carry_out && carry_in) {
2139          carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) &&
2140                      int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1));
2141      }
2142  
2143      r->VsrD(0) = 0;
2144      r->VsrD(1) = carry_out;
2145  }
2146  
helper_VSUBUQM(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)2147  void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2148  {
2149      r->s128 = int128_sub(a->s128, b->s128);
2150  }
2151  
helper_VSUBEUQM(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)2152  void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2153  {
2154      r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)),
2155                           int128_make64(int128_getlo(c->s128) & 1));
2156  }
2157  
helper_VSUBCUQ(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)2158  void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2159  {
2160      Int128 tmp = int128_not(b->s128);
2161  
2162      r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) ||
2163                   int128_eq(int128_add(a->s128, tmp), int128_makes64(-1));
2164      r->VsrD(0) = 0;
2165  }
2166  
helper_VSUBECUQ(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)2167  void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2168  {
2169      Int128 tmp = int128_not(b->s128);
2170      bool carry_out = int128_ult(int128_not(a->s128), tmp),
2171           carry_in = int128_getlo(c->s128) & 1;
2172  
2173      r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp),
2174                                                       int128_makes64(-1)));
2175      r->VsrD(0) = 0;
2176  }
2177  
2178  #define BCD_PLUS_PREF_1 0xC
2179  #define BCD_PLUS_PREF_2 0xF
2180  #define BCD_PLUS_ALT_1  0xA
2181  #define BCD_NEG_PREF    0xD
2182  #define BCD_NEG_ALT     0xB
2183  #define BCD_PLUS_ALT_2  0xE
2184  #define NATIONAL_PLUS   0x2B
2185  #define NATIONAL_NEG    0x2D
2186  
2187  #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2188  
bcd_get_sgn(ppc_avr_t * bcd)2189  static int bcd_get_sgn(ppc_avr_t *bcd)
2190  {
2191      switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2192      case BCD_PLUS_PREF_1:
2193      case BCD_PLUS_PREF_2:
2194      case BCD_PLUS_ALT_1:
2195      case BCD_PLUS_ALT_2:
2196      {
2197          return 1;
2198      }
2199  
2200      case BCD_NEG_PREF:
2201      case BCD_NEG_ALT:
2202      {
2203          return -1;
2204      }
2205  
2206      default:
2207      {
2208          return 0;
2209      }
2210      }
2211  }
2212  
bcd_preferred_sgn(int sgn,int ps)2213  static int bcd_preferred_sgn(int sgn, int ps)
2214  {
2215      if (sgn >= 0) {
2216          return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2217      } else {
2218          return BCD_NEG_PREF;
2219      }
2220  }
2221  
bcd_get_digit(ppc_avr_t * bcd,int n,int * invalid)2222  static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2223  {
2224      uint8_t result;
2225      if (n & 1) {
2226          result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2227      } else {
2228         result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2229      }
2230  
2231      if (unlikely(result > 9)) {
2232          *invalid = true;
2233      }
2234      return result;
2235  }
2236  
bcd_put_digit(ppc_avr_t * bcd,uint8_t digit,int n)2237  static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2238  {
2239      if (n & 1) {
2240          bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2241          bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2242      } else {
2243          bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2244          bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2245      }
2246  }
2247  
bcd_is_valid(ppc_avr_t * bcd)2248  static bool bcd_is_valid(ppc_avr_t *bcd)
2249  {
2250      int i;
2251      int invalid = 0;
2252  
2253      if (bcd_get_sgn(bcd) == 0) {
2254          return false;
2255      }
2256  
2257      for (i = 1; i < 32; i++) {
2258          bcd_get_digit(bcd, i, &invalid);
2259          if (unlikely(invalid)) {
2260              return false;
2261          }
2262      }
2263      return true;
2264  }
2265  
bcd_cmp_zero(ppc_avr_t * bcd)2266  static int bcd_cmp_zero(ppc_avr_t *bcd)
2267  {
2268      if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2269          return CRF_EQ;
2270      } else {
2271          return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2272      }
2273  }
2274  
get_national_digit(ppc_avr_t * reg,int n)2275  static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2276  {
2277      return reg->VsrH(7 - n);
2278  }
2279  
set_national_digit(ppc_avr_t * reg,uint8_t val,int n)2280  static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2281  {
2282      reg->VsrH(7 - n) = val;
2283  }
2284  
bcd_cmp_mag(ppc_avr_t * a,ppc_avr_t * b)2285  static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2286  {
2287      int i;
2288      int invalid = 0;
2289      for (i = 31; i > 0; i--) {
2290          uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2291          uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2292          if (unlikely(invalid)) {
2293              return 0; /* doesn't matter */
2294          } else if (dig_a > dig_b) {
2295              return 1;
2296          } else if (dig_a < dig_b) {
2297              return -1;
2298          }
2299      }
2300  
2301      return 0;
2302  }
2303  
bcd_add_mag(ppc_avr_t * t,ppc_avr_t * a,ppc_avr_t * b,int * invalid,int * overflow)2304  static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2305                         int *overflow)
2306  {
2307      int carry = 0;
2308      int i;
2309      int is_zero = 1;
2310  
2311      for (i = 1; i <= 31; i++) {
2312          uint8_t digit = bcd_get_digit(a, i, invalid) +
2313                          bcd_get_digit(b, i, invalid) + carry;
2314          is_zero &= (digit == 0);
2315          if (digit > 9) {
2316              carry = 1;
2317              digit -= 10;
2318          } else {
2319              carry = 0;
2320          }
2321  
2322          bcd_put_digit(t, digit, i);
2323      }
2324  
2325      *overflow = carry;
2326      return is_zero;
2327  }
2328  
bcd_sub_mag(ppc_avr_t * t,ppc_avr_t * a,ppc_avr_t * b,int * invalid,int * overflow)2329  static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2330                         int *overflow)
2331  {
2332      int carry = 0;
2333      int i;
2334  
2335      for (i = 1; i <= 31; i++) {
2336          uint8_t digit = bcd_get_digit(a, i, invalid) -
2337                          bcd_get_digit(b, i, invalid) + carry;
2338          if (digit & 0x80) {
2339              carry = -1;
2340              digit += 10;
2341          } else {
2342              carry = 0;
2343          }
2344  
2345          bcd_put_digit(t, digit, i);
2346      }
2347  
2348      *overflow = carry;
2349  }
2350  
helper_bcdadd(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,uint32_t ps)2351  uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2352  {
2353  
2354      int sgna = bcd_get_sgn(a);
2355      int sgnb = bcd_get_sgn(b);
2356      int invalid = (sgna == 0) || (sgnb == 0);
2357      int overflow = 0;
2358      int zero = 0;
2359      uint32_t cr = 0;
2360      ppc_avr_t result = { .u64 = { 0, 0 } };
2361  
2362      if (!invalid) {
2363          if (sgna == sgnb) {
2364              result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2365              zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2366              cr = (sgna > 0) ? CRF_GT : CRF_LT;
2367          } else {
2368              int magnitude = bcd_cmp_mag(a, b);
2369              if (magnitude > 0) {
2370                  result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2371                  bcd_sub_mag(&result, a, b, &invalid, &overflow);
2372                  cr = (sgna > 0) ? CRF_GT : CRF_LT;
2373              } else if (magnitude < 0) {
2374                  result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2375                  bcd_sub_mag(&result, b, a, &invalid, &overflow);
2376                  cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2377              } else {
2378                  result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2379                  cr = CRF_EQ;
2380              }
2381          }
2382      }
2383  
2384      if (unlikely(invalid)) {
2385          result.VsrD(0) = result.VsrD(1) = -1;
2386          cr = CRF_SO;
2387      } else if (overflow) {
2388          cr |= CRF_SO;
2389      } else if (zero) {
2390          cr |= CRF_EQ;
2391      }
2392  
2393      *r = result;
2394  
2395      return cr;
2396  }
2397  
helper_bcdsub(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,uint32_t ps)2398  uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2399  {
2400      ppc_avr_t bcopy = *b;
2401      int sgnb = bcd_get_sgn(b);
2402      if (sgnb < 0) {
2403          bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2404      } else if (sgnb > 0) {
2405          bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2406      }
2407      /* else invalid ... defer to bcdadd code for proper handling */
2408  
2409      return helper_bcdadd(r, a, &bcopy, ps);
2410  }
2411  
helper_bcdcfn(ppc_avr_t * r,ppc_avr_t * b,uint32_t ps)2412  uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2413  {
2414      int i;
2415      int cr = 0;
2416      uint16_t national = 0;
2417      uint16_t sgnb = get_national_digit(b, 0);
2418      ppc_avr_t ret = { .u64 = { 0, 0 } };
2419      int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2420  
2421      for (i = 1; i < 8; i++) {
2422          national = get_national_digit(b, i);
2423          if (unlikely(national < 0x30 || national > 0x39)) {
2424              invalid = 1;
2425              break;
2426          }
2427  
2428          bcd_put_digit(&ret, national & 0xf, i);
2429      }
2430  
2431      if (sgnb == NATIONAL_PLUS) {
2432          bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2433      } else {
2434          bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2435      }
2436  
2437      cr = bcd_cmp_zero(&ret);
2438  
2439      if (unlikely(invalid)) {
2440          cr = CRF_SO;
2441      }
2442  
2443      *r = ret;
2444  
2445      return cr;
2446  }
2447  
helper_bcdctn(ppc_avr_t * r,ppc_avr_t * b,uint32_t ps)2448  uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2449  {
2450      int i;
2451      int cr = 0;
2452      int sgnb = bcd_get_sgn(b);
2453      int invalid = (sgnb == 0);
2454      ppc_avr_t ret = { .u64 = { 0, 0 } };
2455  
2456      int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2457  
2458      for (i = 1; i < 8; i++) {
2459          set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2460  
2461          if (unlikely(invalid)) {
2462              break;
2463          }
2464      }
2465      set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2466  
2467      cr = bcd_cmp_zero(b);
2468  
2469      if (ox_flag) {
2470          cr |= CRF_SO;
2471      }
2472  
2473      if (unlikely(invalid)) {
2474          cr = CRF_SO;
2475      }
2476  
2477      *r = ret;
2478  
2479      return cr;
2480  }
2481  
helper_bcdcfz(ppc_avr_t * r,ppc_avr_t * b,uint32_t ps)2482  uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2483  {
2484      int i;
2485      int cr = 0;
2486      int invalid = 0;
2487      int zone_digit = 0;
2488      int zone_lead = ps ? 0xF : 0x3;
2489      int digit = 0;
2490      ppc_avr_t ret = { .u64 = { 0, 0 } };
2491      int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2492  
2493      if (unlikely((sgnb < 0xA) && ps)) {
2494          invalid = 1;
2495      }
2496  
2497      for (i = 0; i < 16; i++) {
2498          zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2499          digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2500          if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2501              invalid = 1;
2502              break;
2503          }
2504  
2505          bcd_put_digit(&ret, digit, i + 1);
2506      }
2507  
2508      if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2509              (!ps && (sgnb & 0x4))) {
2510          bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2511      } else {
2512          bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2513      }
2514  
2515      cr = bcd_cmp_zero(&ret);
2516  
2517      if (unlikely(invalid)) {
2518          cr = CRF_SO;
2519      }
2520  
2521      *r = ret;
2522  
2523      return cr;
2524  }
2525  
helper_bcdctz(ppc_avr_t * r,ppc_avr_t * b,uint32_t ps)2526  uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2527  {
2528      int i;
2529      int cr = 0;
2530      uint8_t digit = 0;
2531      int sgnb = bcd_get_sgn(b);
2532      int zone_lead = (ps) ? 0xF0 : 0x30;
2533      int invalid = (sgnb == 0);
2534      ppc_avr_t ret = { .u64 = { 0, 0 } };
2535  
2536      int ox_flag = ((b->VsrD(0) >> 4) != 0);
2537  
2538      for (i = 0; i < 16; i++) {
2539          digit = bcd_get_digit(b, i + 1, &invalid);
2540  
2541          if (unlikely(invalid)) {
2542              break;
2543          }
2544  
2545          ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2546      }
2547  
2548      if (ps) {
2549          bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2550      } else {
2551          bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2552      }
2553  
2554      cr = bcd_cmp_zero(b);
2555  
2556      if (ox_flag) {
2557          cr |= CRF_SO;
2558      }
2559  
2560      if (unlikely(invalid)) {
2561          cr = CRF_SO;
2562      }
2563  
2564      *r = ret;
2565  
2566      return cr;
2567  }
2568  
2569  /**
2570   * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2571   *
2572   * Returns:
2573   * > 0 if ahi|alo > bhi|blo,
2574   * 0 if ahi|alo == bhi|blo,
2575   * < 0 if ahi|alo < bhi|blo
2576   */
ucmp128(uint64_t alo,uint64_t ahi,uint64_t blo,uint64_t bhi)2577  static inline int ucmp128(uint64_t alo, uint64_t ahi,
2578                            uint64_t blo, uint64_t bhi)
2579  {
2580      return (ahi == bhi) ?
2581          (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2582          (ahi > bhi ? 1 : -1);
2583  }
2584  
helper_bcdcfsq(ppc_avr_t * r,ppc_avr_t * b,uint32_t ps)2585  uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2586  {
2587      int i;
2588      int cr;
2589      uint64_t lo_value;
2590      uint64_t hi_value;
2591      uint64_t rem;
2592      ppc_avr_t ret = { .u64 = { 0, 0 } };
2593  
2594      if (b->VsrSD(0) < 0) {
2595          lo_value = -b->VsrSD(1);
2596          hi_value = ~b->VsrD(0) + !lo_value;
2597          bcd_put_digit(&ret, 0xD, 0);
2598  
2599          cr = CRF_LT;
2600      } else {
2601          lo_value = b->VsrD(1);
2602          hi_value = b->VsrD(0);
2603          bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2604  
2605          if (hi_value == 0 && lo_value == 0) {
2606              cr = CRF_EQ;
2607          } else {
2608              cr = CRF_GT;
2609          }
2610      }
2611  
2612      /*
2613       * Check src limits: abs(src) <= 10^31 - 1
2614       *
2615       * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2616       */
2617      if (ucmp128(lo_value, hi_value,
2618                  0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2619          cr |= CRF_SO;
2620  
2621          /*
2622           * According to the ISA, if src wouldn't fit in the destination
2623           * register, the result is undefined.
2624           * In that case, we leave r unchanged.
2625           */
2626      } else {
2627          rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2628  
2629          for (i = 1; i < 16; rem /= 10, i++) {
2630              bcd_put_digit(&ret, rem % 10, i);
2631          }
2632  
2633          for (; i < 32; lo_value /= 10, i++) {
2634              bcd_put_digit(&ret, lo_value % 10, i);
2635          }
2636  
2637          *r = ret;
2638      }
2639  
2640      return cr;
2641  }
2642  
helper_bcdctsq(ppc_avr_t * r,ppc_avr_t * b,uint32_t ps)2643  uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2644  {
2645      uint8_t i;
2646      int cr;
2647      uint64_t carry;
2648      uint64_t unused;
2649      uint64_t lo_value;
2650      uint64_t hi_value = 0;
2651      int sgnb = bcd_get_sgn(b);
2652      int invalid = (sgnb == 0);
2653  
2654      lo_value = bcd_get_digit(b, 31, &invalid);
2655      for (i = 30; i > 0; i--) {
2656          mulu64(&lo_value, &carry, lo_value, 10ULL);
2657          mulu64(&hi_value, &unused, hi_value, 10ULL);
2658          lo_value += bcd_get_digit(b, i, &invalid);
2659          hi_value += carry;
2660  
2661          if (unlikely(invalid)) {
2662              break;
2663          }
2664      }
2665  
2666      if (sgnb == -1) {
2667          r->VsrSD(1) = -lo_value;
2668          r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2669      } else {
2670          r->VsrSD(1) = lo_value;
2671          r->VsrSD(0) = hi_value;
2672      }
2673  
2674      cr = bcd_cmp_zero(b);
2675  
2676      if (unlikely(invalid)) {
2677          cr = CRF_SO;
2678      }
2679  
2680      return cr;
2681  }
2682  
helper_bcdcpsgn(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,uint32_t ps)2683  uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2684  {
2685      int i;
2686      int invalid = 0;
2687  
2688      if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2689          return CRF_SO;
2690      }
2691  
2692      *r = *a;
2693      bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2694  
2695      for (i = 1; i < 32; i++) {
2696          bcd_get_digit(a, i, &invalid);
2697          bcd_get_digit(b, i, &invalid);
2698          if (unlikely(invalid)) {
2699              return CRF_SO;
2700          }
2701      }
2702  
2703      return bcd_cmp_zero(r);
2704  }
2705  
helper_bcdsetsgn(ppc_avr_t * r,ppc_avr_t * b,uint32_t ps)2706  uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2707  {
2708      int sgnb = bcd_get_sgn(b);
2709  
2710      *r = *b;
2711      bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2712  
2713      if (bcd_is_valid(b) == false) {
2714          return CRF_SO;
2715      }
2716  
2717      return bcd_cmp_zero(r);
2718  }
2719  
helper_bcds(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,uint32_t ps)2720  uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2721  {
2722      int cr;
2723      int i = a->VsrSB(7);
2724      bool ox_flag = false;
2725      int sgnb = bcd_get_sgn(b);
2726      ppc_avr_t ret = *b;
2727      ret.VsrD(1) &= ~0xf;
2728  
2729      if (bcd_is_valid(b) == false) {
2730          return CRF_SO;
2731      }
2732  
2733      if (unlikely(i > 31)) {
2734          i = 31;
2735      } else if (unlikely(i < -31)) {
2736          i = -31;
2737      }
2738  
2739      if (i > 0) {
2740          ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2741      } else {
2742          urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2743      }
2744      bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2745  
2746      *r = ret;
2747  
2748      cr = bcd_cmp_zero(r);
2749      if (ox_flag) {
2750          cr |= CRF_SO;
2751      }
2752  
2753      return cr;
2754  }
2755  
helper_bcdus(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,uint32_t ps)2756  uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2757  {
2758      int cr;
2759      int i;
2760      int invalid = 0;
2761      bool ox_flag = false;
2762      ppc_avr_t ret = *b;
2763  
2764      for (i = 0; i < 32; i++) {
2765          bcd_get_digit(b, i, &invalid);
2766  
2767          if (unlikely(invalid)) {
2768              return CRF_SO;
2769          }
2770      }
2771  
2772      i = a->VsrSB(7);
2773      if (i >= 32) {
2774          ox_flag = true;
2775          ret.VsrD(1) = ret.VsrD(0) = 0;
2776      } else if (i <= -32) {
2777          ret.VsrD(1) = ret.VsrD(0) = 0;
2778      } else if (i > 0) {
2779          ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2780      } else {
2781          urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2782      }
2783      *r = ret;
2784  
2785      cr = bcd_cmp_zero(r);
2786      if (ox_flag) {
2787          cr |= CRF_SO;
2788      }
2789  
2790      return cr;
2791  }
2792  
helper_bcdsr(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,uint32_t ps)2793  uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2794  {
2795      int cr;
2796      int unused = 0;
2797      int invalid = 0;
2798      bool ox_flag = false;
2799      int sgnb = bcd_get_sgn(b);
2800      ppc_avr_t ret = *b;
2801      ret.VsrD(1) &= ~0xf;
2802  
2803      int i = a->VsrSB(7);
2804      ppc_avr_t bcd_one;
2805  
2806      bcd_one.VsrD(0) = 0;
2807      bcd_one.VsrD(1) = 0x10;
2808  
2809      if (bcd_is_valid(b) == false) {
2810          return CRF_SO;
2811      }
2812  
2813      if (unlikely(i > 31)) {
2814          i = 31;
2815      } else if (unlikely(i < -31)) {
2816          i = -31;
2817      }
2818  
2819      if (i > 0) {
2820          ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2821      } else {
2822          urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2823  
2824          if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2825              bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2826          }
2827      }
2828      bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2829  
2830      cr = bcd_cmp_zero(&ret);
2831      if (ox_flag) {
2832          cr |= CRF_SO;
2833      }
2834      *r = ret;
2835  
2836      return cr;
2837  }
2838  
helper_bcdtrunc(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,uint32_t ps)2839  uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2840  {
2841      uint64_t mask;
2842      uint32_t ox_flag = 0;
2843      int i = a->VsrSH(3) + 1;
2844      ppc_avr_t ret = *b;
2845  
2846      if (bcd_is_valid(b) == false) {
2847          return CRF_SO;
2848      }
2849  
2850      if (i > 16 && i < 32) {
2851          mask = (uint64_t)-1 >> (128 - i * 4);
2852          if (ret.VsrD(0) & ~mask) {
2853              ox_flag = CRF_SO;
2854          }
2855  
2856          ret.VsrD(0) &= mask;
2857      } else if (i >= 0 && i <= 16) {
2858          mask = (uint64_t)-1 >> (64 - i * 4);
2859          if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2860              ox_flag = CRF_SO;
2861          }
2862  
2863          ret.VsrD(1) &= mask;
2864          ret.VsrD(0) = 0;
2865      }
2866      bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2867      *r = ret;
2868  
2869      return bcd_cmp_zero(&ret) | ox_flag;
2870  }
2871  
helper_bcdutrunc(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,uint32_t ps)2872  uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2873  {
2874      int i;
2875      uint64_t mask;
2876      uint32_t ox_flag = 0;
2877      int invalid = 0;
2878      ppc_avr_t ret = *b;
2879  
2880      for (i = 0; i < 32; i++) {
2881          bcd_get_digit(b, i, &invalid);
2882  
2883          if (unlikely(invalid)) {
2884              return CRF_SO;
2885          }
2886      }
2887  
2888      i = a->VsrSH(3);
2889      if (i > 16 && i < 33) {
2890          mask = (uint64_t)-1 >> (128 - i * 4);
2891          if (ret.VsrD(0) & ~mask) {
2892              ox_flag = CRF_SO;
2893          }
2894  
2895          ret.VsrD(0) &= mask;
2896      } else if (i > 0 && i <= 16) {
2897          mask = (uint64_t)-1 >> (64 - i * 4);
2898          if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2899              ox_flag = CRF_SO;
2900          }
2901  
2902          ret.VsrD(1) &= mask;
2903          ret.VsrD(0) = 0;
2904      } else if (i == 0) {
2905          if (ret.VsrD(0) || ret.VsrD(1)) {
2906              ox_flag = CRF_SO;
2907          }
2908          ret.VsrD(0) = ret.VsrD(1) = 0;
2909      }
2910  
2911      *r = ret;
2912      if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2913          return ox_flag | CRF_EQ;
2914      }
2915  
2916      return ox_flag | CRF_GT;
2917  }
2918  
helper_vsbox(ppc_avr_t * r,ppc_avr_t * a)2919  void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2920  {
2921      int i;
2922      VECTOR_FOR_INORDER_I(i, u8) {
2923          r->u8[i] = AES_sbox[a->u8[i]];
2924      }
2925  }
2926  
helper_vcipher(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)2927  void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2928  {
2929      AESState *ad = (AESState *)r;
2930      AESState *st = (AESState *)a;
2931      AESState *rk = (AESState *)b;
2932  
2933      aesenc_SB_SR_MC_AK(ad, st, rk, true);
2934  }
2935  
helper_vcipherlast(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)2936  void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2937  {
2938      aesenc_SB_SR_AK((AESState *)r, (AESState *)a, (AESState *)b, true);
2939  }
2940  
helper_vncipher(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)2941  void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2942  {
2943      AESState *ad = (AESState *)r;
2944      AESState *st = (AESState *)a;
2945      AESState *rk = (AESState *)b;
2946  
2947      aesdec_ISB_ISR_AK_IMC(ad, st, rk, true);
2948  }
2949  
helper_vncipherlast(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b)2950  void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2951  {
2952      aesdec_ISB_ISR_AK((AESState *)r, (AESState *)a, (AESState *)b, true);
2953  }
2954  
helper_vshasigmaw(ppc_avr_t * r,ppc_avr_t * a,uint32_t st_six)2955  void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2956  {
2957      int st = (st_six & 0x10) != 0;
2958      int six = st_six & 0xF;
2959      int i;
2960  
2961      for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2962          if (st == 0) {
2963              if ((six & (0x8 >> i)) == 0) {
2964                  r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2965                               ror32(a->VsrW(i), 18) ^
2966                               (a->VsrW(i) >> 3);
2967              } else { /* six.bit[i] == 1 */
2968                  r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2969                               ror32(a->VsrW(i), 19) ^
2970                               (a->VsrW(i) >> 10);
2971              }
2972          } else { /* st == 1 */
2973              if ((six & (0x8 >> i)) == 0) {
2974                  r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2975                               ror32(a->VsrW(i), 13) ^
2976                               ror32(a->VsrW(i), 22);
2977              } else { /* six.bit[i] == 1 */
2978                  r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2979                               ror32(a->VsrW(i), 11) ^
2980                               ror32(a->VsrW(i), 25);
2981              }
2982          }
2983      }
2984  }
2985  
helper_vshasigmad(ppc_avr_t * r,ppc_avr_t * a,uint32_t st_six)2986  void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2987  {
2988      int st = (st_six & 0x10) != 0;
2989      int six = st_six & 0xF;
2990      int i;
2991  
2992      for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2993          if (st == 0) {
2994              if ((six & (0x8 >> (2 * i))) == 0) {
2995                  r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2996                               ror64(a->VsrD(i), 8) ^
2997                               (a->VsrD(i) >> 7);
2998              } else { /* six.bit[2*i] == 1 */
2999                  r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3000                               ror64(a->VsrD(i), 61) ^
3001                               (a->VsrD(i) >> 6);
3002              }
3003          } else { /* st == 1 */
3004              if ((six & (0x8 >> (2 * i))) == 0) {
3005                  r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3006                               ror64(a->VsrD(i), 34) ^
3007                               ror64(a->VsrD(i), 39);
3008              } else { /* six.bit[2*i] == 1 */
3009                  r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3010                               ror64(a->VsrD(i), 18) ^
3011                               ror64(a->VsrD(i), 41);
3012              }
3013          }
3014      }
3015  }
3016  
helper_vpermxor(ppc_avr_t * r,ppc_avr_t * a,ppc_avr_t * b,ppc_avr_t * c)3017  void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3018  {
3019      ppc_avr_t result;
3020      int i;
3021  
3022      for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3023          int indexA = c->VsrB(i) >> 4;
3024          int indexB = c->VsrB(i) & 0xF;
3025  
3026          result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3027      }
3028      *r = result;
3029  }
3030  
3031  #undef VECTOR_FOR_INORDER_I
3032  
3033  /*****************************************************************************/
3034  /* SPE extension helpers */
3035  /* Use a table to make this quicker */
3036  static const uint8_t hbrev[16] = {
3037      0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3038      0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3039  };
3040  
byte_reverse(uint8_t val)3041  static inline uint8_t byte_reverse(uint8_t val)
3042  {
3043      return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3044  }
3045  
word_reverse(uint32_t val)3046  static inline uint32_t word_reverse(uint32_t val)
3047  {
3048      return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3049          (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3050  }
3051  
3052  #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
helper_brinc(target_ulong arg1,target_ulong arg2)3053  target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3054  {
3055      uint32_t a, b, d, mask;
3056  
3057      mask = UINT32_MAX >> (32 - MASKBITS);
3058      a = arg1 & mask;
3059      b = arg2 & mask;
3060      d = word_reverse(1 + word_reverse(a | ~b));
3061      return (arg1 & ~mask) | (d & b);
3062  }
3063  
helper_cntlsw32(uint32_t val)3064  uint32_t helper_cntlsw32(uint32_t val)
3065  {
3066      if (val & 0x80000000) {
3067          return clz32(~val);
3068      } else {
3069          return clz32(val);
3070      }
3071  }
3072  
helper_cntlzw32(uint32_t val)3073  uint32_t helper_cntlzw32(uint32_t val)
3074  {
3075      return clz32(val);
3076  }
3077  
3078  /* 440 specific */
helper_dlmzb(CPUPPCState * env,target_ulong high,target_ulong low,uint32_t update_Rc)3079  target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3080                            target_ulong low, uint32_t update_Rc)
3081  {
3082      target_ulong mask;
3083      int i;
3084  
3085      i = 1;
3086      for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3087          if ((high & mask) == 0) {
3088              if (update_Rc) {
3089                  env->crf[0] = 0x4;
3090              }
3091              goto done;
3092          }
3093          i++;
3094      }
3095      for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3096          if ((low & mask) == 0) {
3097              if (update_Rc) {
3098                  env->crf[0] = 0x8;
3099              }
3100              goto done;
3101          }
3102          i++;
3103      }
3104      i = 8;
3105      if (update_Rc) {
3106          env->crf[0] = 0x2;
3107      }
3108   done:
3109      env->xer = (env->xer & ~0x7F) | i;
3110      if (update_Rc) {
3111          env->crf[0] |= xer_so;
3112      }
3113      return i;
3114  }
3115