xref: /openbmc/qemu/target/arm/tcg/helper-a64.c (revision 10eb3721fccd06e81bf36ebdf39dc1504eba9beb)
1  /*
2   *  AArch64 specific helpers
3   *
4   *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5   *
6   * This library is free software; you can redistribute it and/or
7   * modify it under the terms of the GNU Lesser General Public
8   * License as published by the Free Software Foundation; either
9   * version 2.1 of the License, or (at your option) any later version.
10   *
11   * This library is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   * Lesser General Public License for more details.
15   *
16   * You should have received a copy of the GNU Lesser General Public
17   * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18   */
19  
20  #include "qemu/osdep.h"
21  #include "qemu/units.h"
22  #include "cpu.h"
23  #include "gdbstub/helpers.h"
24  #include "exec/helper-proto.h"
25  #include "qemu/host-utils.h"
26  #include "qemu/log.h"
27  #include "qemu/main-loop.h"
28  #include "qemu/bitops.h"
29  #include "internals.h"
30  #include "qemu/crc32c.h"
31  #include "exec/exec-all.h"
32  #include "exec/cpu_ldst.h"
33  #include "qemu/int128.h"
34  #include "qemu/atomic128.h"
35  #include "fpu/softfloat.h"
36  #include <zlib.h> /* for crc32 */
37  
38  /* C2.4.7 Multiply and divide */
39  /* special cases for 0 and LLONG_MIN are mandated by the standard */
HELPER(udiv64)40  uint64_t HELPER(udiv64)(uint64_t num, uint64_t den)
41  {
42      if (den == 0) {
43          return 0;
44      }
45      return num / den;
46  }
47  
HELPER(sdiv64)48  int64_t HELPER(sdiv64)(int64_t num, int64_t den)
49  {
50      if (den == 0) {
51          return 0;
52      }
53      if (num == LLONG_MIN && den == -1) {
54          return LLONG_MIN;
55      }
56      return num / den;
57  }
58  
HELPER(rbit64)59  uint64_t HELPER(rbit64)(uint64_t x)
60  {
61      return revbit64(x);
62  }
63  
HELPER(msr_i_spsel)64  void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm)
65  {
66      update_spsel(env, imm);
67  }
68  
HELPER(msr_set_allint_el1)69  void HELPER(msr_set_allint_el1)(CPUARMState *env)
70  {
71      /* ALLINT update to PSTATE. */
72      if (arm_hcrx_el2_eff(env) & HCRX_TALLINT) {
73          raise_exception_ra(env, EXCP_UDEF,
74                             syn_aa64_sysregtrap(0, 1, 0, 4, 1, 0x1f, 0), 2,
75                             GETPC());
76      }
77  
78      env->pstate |= PSTATE_ALLINT;
79  }
80  
daif_check(CPUARMState * env,uint32_t op,uint32_t imm,uintptr_t ra)81  static void daif_check(CPUARMState *env, uint32_t op,
82                         uint32_t imm, uintptr_t ra)
83  {
84      /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set.  */
85      if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) {
86          raise_exception_ra(env, EXCP_UDEF,
87                             syn_aa64_sysregtrap(0, extract32(op, 0, 3),
88                                                 extract32(op, 3, 3), 4,
89                                                 imm, 0x1f, 0),
90                             exception_target_el(env), ra);
91      }
92  }
93  
HELPER(msr_i_daifset)94  void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm)
95  {
96      daif_check(env, 0x1e, imm, GETPC());
97      env->daif |= (imm << 6) & PSTATE_DAIF;
98      arm_rebuild_hflags(env);
99  }
100  
HELPER(msr_i_daifclear)101  void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm)
102  {
103      daif_check(env, 0x1f, imm, GETPC());
104      env->daif &= ~((imm << 6) & PSTATE_DAIF);
105      arm_rebuild_hflags(env);
106  }
107  
108  /* Convert a softfloat float_relation_ (as returned by
109   * the float*_compare functions) to the correct ARM
110   * NZCV flag state.
111   */
float_rel_to_flags(int res)112  static inline uint32_t float_rel_to_flags(int res)
113  {
114      uint64_t flags;
115      switch (res) {
116      case float_relation_equal:
117          flags = PSTATE_Z | PSTATE_C;
118          break;
119      case float_relation_less:
120          flags = PSTATE_N;
121          break;
122      case float_relation_greater:
123          flags = PSTATE_C;
124          break;
125      case float_relation_unordered:
126      default:
127          flags = PSTATE_C | PSTATE_V;
128          break;
129      }
130      return flags;
131  }
132  
HELPER(vfp_cmph_a64)133  uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status)
134  {
135      return float_rel_to_flags(float16_compare_quiet(x, y, fp_status));
136  }
137  
HELPER(vfp_cmpeh_a64)138  uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status)
139  {
140      return float_rel_to_flags(float16_compare(x, y, fp_status));
141  }
142  
HELPER(vfp_cmps_a64)143  uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
144  {
145      return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
146  }
147  
HELPER(vfp_cmpes_a64)148  uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status)
149  {
150      return float_rel_to_flags(float32_compare(x, y, fp_status));
151  }
152  
HELPER(vfp_cmpd_a64)153  uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status)
154  {
155      return float_rel_to_flags(float64_compare_quiet(x, y, fp_status));
156  }
157  
HELPER(vfp_cmped_a64)158  uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
159  {
160      return float_rel_to_flags(float64_compare(x, y, fp_status));
161  }
162  
HELPER(vfp_mulxs)163  float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
164  {
165      float_status *fpst = fpstp;
166  
167      a = float32_squash_input_denormal(a, fpst);
168      b = float32_squash_input_denormal(b, fpst);
169  
170      if ((float32_is_zero(a) && float32_is_infinity(b)) ||
171          (float32_is_infinity(a) && float32_is_zero(b))) {
172          /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
173          return make_float32((1U << 30) |
174                              ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
175      }
176      return float32_mul(a, b, fpst);
177  }
178  
HELPER(vfp_mulxd)179  float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
180  {
181      float_status *fpst = fpstp;
182  
183      a = float64_squash_input_denormal(a, fpst);
184      b = float64_squash_input_denormal(b, fpst);
185  
186      if ((float64_is_zero(a) && float64_is_infinity(b)) ||
187          (float64_is_infinity(a) && float64_is_zero(b))) {
188          /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
189          return make_float64((1ULL << 62) |
190                              ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
191      }
192      return float64_mul(a, b, fpst);
193  }
194  
195  /* 64bit/double versions of the neon float compare functions */
HELPER(neon_ceq_f64)196  uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
197  {
198      float_status *fpst = fpstp;
199      return -float64_eq_quiet(a, b, fpst);
200  }
201  
HELPER(neon_cge_f64)202  uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
203  {
204      float_status *fpst = fpstp;
205      return -float64_le(b, a, fpst);
206  }
207  
HELPER(neon_cgt_f64)208  uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
209  {
210      float_status *fpst = fpstp;
211      return -float64_lt(b, a, fpst);
212  }
213  
214  /* Reciprocal step and sqrt step. Note that unlike the A32/T32
215   * versions, these do a fully fused multiply-add or
216   * multiply-add-and-halve.
217   */
218  
HELPER(recpsf_f16)219  uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp)
220  {
221      float_status *fpst = fpstp;
222  
223      a = float16_squash_input_denormal(a, fpst);
224      b = float16_squash_input_denormal(b, fpst);
225  
226      a = float16_chs(a);
227      if ((float16_is_infinity(a) && float16_is_zero(b)) ||
228          (float16_is_infinity(b) && float16_is_zero(a))) {
229          return float16_two;
230      }
231      return float16_muladd(a, b, float16_two, 0, fpst);
232  }
233  
HELPER(recpsf_f32)234  float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
235  {
236      float_status *fpst = fpstp;
237  
238      a = float32_squash_input_denormal(a, fpst);
239      b = float32_squash_input_denormal(b, fpst);
240  
241      a = float32_chs(a);
242      if ((float32_is_infinity(a) && float32_is_zero(b)) ||
243          (float32_is_infinity(b) && float32_is_zero(a))) {
244          return float32_two;
245      }
246      return float32_muladd(a, b, float32_two, 0, fpst);
247  }
248  
HELPER(recpsf_f64)249  float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
250  {
251      float_status *fpst = fpstp;
252  
253      a = float64_squash_input_denormal(a, fpst);
254      b = float64_squash_input_denormal(b, fpst);
255  
256      a = float64_chs(a);
257      if ((float64_is_infinity(a) && float64_is_zero(b)) ||
258          (float64_is_infinity(b) && float64_is_zero(a))) {
259          return float64_two;
260      }
261      return float64_muladd(a, b, float64_two, 0, fpst);
262  }
263  
HELPER(rsqrtsf_f16)264  uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp)
265  {
266      float_status *fpst = fpstp;
267  
268      a = float16_squash_input_denormal(a, fpst);
269      b = float16_squash_input_denormal(b, fpst);
270  
271      a = float16_chs(a);
272      if ((float16_is_infinity(a) && float16_is_zero(b)) ||
273          (float16_is_infinity(b) && float16_is_zero(a))) {
274          return float16_one_point_five;
275      }
276      return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
277  }
278  
HELPER(rsqrtsf_f32)279  float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
280  {
281      float_status *fpst = fpstp;
282  
283      a = float32_squash_input_denormal(a, fpst);
284      b = float32_squash_input_denormal(b, fpst);
285  
286      a = float32_chs(a);
287      if ((float32_is_infinity(a) && float32_is_zero(b)) ||
288          (float32_is_infinity(b) && float32_is_zero(a))) {
289          return float32_one_point_five;
290      }
291      return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
292  }
293  
HELPER(rsqrtsf_f64)294  float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
295  {
296      float_status *fpst = fpstp;
297  
298      a = float64_squash_input_denormal(a, fpst);
299      b = float64_squash_input_denormal(b, fpst);
300  
301      a = float64_chs(a);
302      if ((float64_is_infinity(a) && float64_is_zero(b)) ||
303          (float64_is_infinity(b) && float64_is_zero(a))) {
304          return float64_one_point_five;
305      }
306      return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
307  }
308  
309  /* Pairwise long add: add pairs of adjacent elements into
310   * double-width elements in the result (eg _s8 is an 8x8->16 op)
311   */
HELPER(neon_addlp_s8)312  uint64_t HELPER(neon_addlp_s8)(uint64_t a)
313  {
314      uint64_t nsignmask = 0x0080008000800080ULL;
315      uint64_t wsignmask = 0x8000800080008000ULL;
316      uint64_t elementmask = 0x00ff00ff00ff00ffULL;
317      uint64_t tmp1, tmp2;
318      uint64_t res, signres;
319  
320      /* Extract odd elements, sign extend each to a 16 bit field */
321      tmp1 = a & elementmask;
322      tmp1 ^= nsignmask;
323      tmp1 |= wsignmask;
324      tmp1 = (tmp1 - nsignmask) ^ wsignmask;
325      /* Ditto for the even elements */
326      tmp2 = (a >> 8) & elementmask;
327      tmp2 ^= nsignmask;
328      tmp2 |= wsignmask;
329      tmp2 = (tmp2 - nsignmask) ^ wsignmask;
330  
331      /* calculate the result by summing bits 0..14, 16..22, etc,
332       * and then adjusting the sign bits 15, 23, etc manually.
333       * This ensures the addition can't overflow the 16 bit field.
334       */
335      signres = (tmp1 ^ tmp2) & wsignmask;
336      res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
337      res ^= signres;
338  
339      return res;
340  }
341  
HELPER(neon_addlp_u8)342  uint64_t HELPER(neon_addlp_u8)(uint64_t a)
343  {
344      uint64_t tmp;
345  
346      tmp = a & 0x00ff00ff00ff00ffULL;
347      tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
348      return tmp;
349  }
350  
HELPER(neon_addlp_s16)351  uint64_t HELPER(neon_addlp_s16)(uint64_t a)
352  {
353      int32_t reslo, reshi;
354  
355      reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
356      reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
357  
358      return (uint32_t)reslo | (((uint64_t)reshi) << 32);
359  }
360  
HELPER(neon_addlp_u16)361  uint64_t HELPER(neon_addlp_u16)(uint64_t a)
362  {
363      uint64_t tmp;
364  
365      tmp = a & 0x0000ffff0000ffffULL;
366      tmp += (a >> 16) & 0x0000ffff0000ffffULL;
367      return tmp;
368  }
369  
370  /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
HELPER(frecpx_f16)371  uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
372  {
373      float_status *fpst = fpstp;
374      uint16_t val16, sbit;
375      int16_t exp;
376  
377      if (float16_is_any_nan(a)) {
378          float16 nan = a;
379          if (float16_is_signaling_nan(a, fpst)) {
380              float_raise(float_flag_invalid, fpst);
381              if (!fpst->default_nan_mode) {
382                  nan = float16_silence_nan(a, fpst);
383              }
384          }
385          if (fpst->default_nan_mode) {
386              nan = float16_default_nan(fpst);
387          }
388          return nan;
389      }
390  
391      a = float16_squash_input_denormal(a, fpst);
392  
393      val16 = float16_val(a);
394      sbit = 0x8000 & val16;
395      exp = extract32(val16, 10, 5);
396  
397      if (exp == 0) {
398          return make_float16(deposit32(sbit, 10, 5, 0x1e));
399      } else {
400          return make_float16(deposit32(sbit, 10, 5, ~exp));
401      }
402  }
403  
HELPER(frecpx_f32)404  float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
405  {
406      float_status *fpst = fpstp;
407      uint32_t val32, sbit;
408      int32_t exp;
409  
410      if (float32_is_any_nan(a)) {
411          float32 nan = a;
412          if (float32_is_signaling_nan(a, fpst)) {
413              float_raise(float_flag_invalid, fpst);
414              if (!fpst->default_nan_mode) {
415                  nan = float32_silence_nan(a, fpst);
416              }
417          }
418          if (fpst->default_nan_mode) {
419              nan = float32_default_nan(fpst);
420          }
421          return nan;
422      }
423  
424      a = float32_squash_input_denormal(a, fpst);
425  
426      val32 = float32_val(a);
427      sbit = 0x80000000ULL & val32;
428      exp = extract32(val32, 23, 8);
429  
430      if (exp == 0) {
431          return make_float32(sbit | (0xfe << 23));
432      } else {
433          return make_float32(sbit | (~exp & 0xff) << 23);
434      }
435  }
436  
HELPER(frecpx_f64)437  float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
438  {
439      float_status *fpst = fpstp;
440      uint64_t val64, sbit;
441      int64_t exp;
442  
443      if (float64_is_any_nan(a)) {
444          float64 nan = a;
445          if (float64_is_signaling_nan(a, fpst)) {
446              float_raise(float_flag_invalid, fpst);
447              if (!fpst->default_nan_mode) {
448                  nan = float64_silence_nan(a, fpst);
449              }
450          }
451          if (fpst->default_nan_mode) {
452              nan = float64_default_nan(fpst);
453          }
454          return nan;
455      }
456  
457      a = float64_squash_input_denormal(a, fpst);
458  
459      val64 = float64_val(a);
460      sbit = 0x8000000000000000ULL & val64;
461      exp = extract64(float64_val(a), 52, 11);
462  
463      if (exp == 0) {
464          return make_float64(sbit | (0x7feULL << 52));
465      } else {
466          return make_float64(sbit | (~exp & 0x7ffULL) << 52);
467      }
468  }
469  
HELPER(fcvtx_f64_to_f32)470  float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
471  {
472      /* Von Neumann rounding is implemented by using round-to-zero
473       * and then setting the LSB of the result if Inexact was raised.
474       */
475      float32 r;
476      float_status *fpst = &env->vfp.fp_status;
477      float_status tstat = *fpst;
478      int exflags;
479  
480      set_float_rounding_mode(float_round_to_zero, &tstat);
481      set_float_exception_flags(0, &tstat);
482      r = float64_to_float32(a, &tstat);
483      exflags = get_float_exception_flags(&tstat);
484      if (exflags & float_flag_inexact) {
485          r = make_float32(float32_val(r) | 1);
486      }
487      exflags |= get_float_exception_flags(fpst);
488      set_float_exception_flags(exflags, fpst);
489      return r;
490  }
491  
492  /* 64-bit versions of the CRC helpers. Note that although the operation
493   * (and the prototypes of crc32c() and crc32() mean that only the bottom
494   * 32 bits of the accumulator and result are used, we pass and return
495   * uint64_t for convenience of the generated code. Unlike the 32-bit
496   * instruction set versions, val may genuinely have 64 bits of data in it.
497   * The upper bytes of val (above the number specified by 'bytes') must have
498   * been zeroed out by the caller.
499   */
HELPER(crc32_64)500  uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes)
501  {
502      uint8_t buf[8];
503  
504      stq_le_p(buf, val);
505  
506      /* zlib crc32 converts the accumulator and output to one's complement.  */
507      return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
508  }
509  
HELPER(crc32c_64)510  uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
511  {
512      uint8_t buf[8];
513  
514      stq_le_p(buf, val);
515  
516      /* Linux crc32c converts the output to one's complement.  */
517      return crc32c(acc, buf, bytes) ^ 0xffffffff;
518  }
519  
520  /*
521   * AdvSIMD half-precision
522   */
523  
524  #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
525  
526  #define ADVSIMD_HALFOP(name) \
527  uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \
528  { \
529      float_status *fpst = fpstp; \
530      return float16_ ## name(a, b, fpst);    \
531  }
532  
533  ADVSIMD_HALFOP(add)
ADVSIMD_HALFOP(sub)534  ADVSIMD_HALFOP(sub)
535  ADVSIMD_HALFOP(mul)
536  ADVSIMD_HALFOP(div)
537  ADVSIMD_HALFOP(min)
538  ADVSIMD_HALFOP(max)
539  ADVSIMD_HALFOP(minnum)
540  ADVSIMD_HALFOP(maxnum)
541  
542  #define ADVSIMD_TWOHALFOP(name)                                         \
543  uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
544  { \
545      float16  a1, a2, b1, b2;                        \
546      uint32_t r1, r2;                                \
547      float_status *fpst = fpstp;                     \
548      a1 = extract32(two_a, 0, 16);                   \
549      a2 = extract32(two_a, 16, 16);                  \
550      b1 = extract32(two_b, 0, 16);                   \
551      b2 = extract32(two_b, 16, 16);                  \
552      r1 = float16_ ## name(a1, b1, fpst);            \
553      r2 = float16_ ## name(a2, b2, fpst);            \
554      return deposit32(r1, 16, 16, r2);               \
555  }
556  
557  ADVSIMD_TWOHALFOP(add)
558  ADVSIMD_TWOHALFOP(sub)
559  ADVSIMD_TWOHALFOP(mul)
560  ADVSIMD_TWOHALFOP(div)
561  ADVSIMD_TWOHALFOP(min)
562  ADVSIMD_TWOHALFOP(max)
563  ADVSIMD_TWOHALFOP(minnum)
564  ADVSIMD_TWOHALFOP(maxnum)
565  
566  /* Data processing - scalar floating-point and advanced SIMD */
567  static float16 float16_mulx(float16 a, float16 b, void *fpstp)
568  {
569      float_status *fpst = fpstp;
570  
571      a = float16_squash_input_denormal(a, fpst);
572      b = float16_squash_input_denormal(b, fpst);
573  
574      if ((float16_is_zero(a) && float16_is_infinity(b)) ||
575          (float16_is_infinity(a) && float16_is_zero(b))) {
576          /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
577          return make_float16((1U << 14) |
578                              ((float16_val(a) ^ float16_val(b)) & (1U << 15)));
579      }
580      return float16_mul(a, b, fpst);
581  }
582  
583  ADVSIMD_HALFOP(mulx)
ADVSIMD_TWOHALFOP(mulx)584  ADVSIMD_TWOHALFOP(mulx)
585  
586  /* fused multiply-accumulate */
587  uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c,
588                                   void *fpstp)
589  {
590      float_status *fpst = fpstp;
591      return float16_muladd(a, b, c, 0, fpst);
592  }
593  
HELPER(advsimd_muladd2h)594  uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
595                                    uint32_t two_c, void *fpstp)
596  {
597      float_status *fpst = fpstp;
598      float16  a1, a2, b1, b2, c1, c2;
599      uint32_t r1, r2;
600      a1 = extract32(two_a, 0, 16);
601      a2 = extract32(two_a, 16, 16);
602      b1 = extract32(two_b, 0, 16);
603      b2 = extract32(two_b, 16, 16);
604      c1 = extract32(two_c, 0, 16);
605      c2 = extract32(two_c, 16, 16);
606      r1 = float16_muladd(a1, b1, c1, 0, fpst);
607      r2 = float16_muladd(a2, b2, c2, 0, fpst);
608      return deposit32(r1, 16, 16, r2);
609  }
610  
611  /*
612   * Floating point comparisons produce an integer result. Softfloat
613   * routines return float_relation types which we convert to the 0/-1
614   * Neon requires.
615   */
616  
617  #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
618  
HELPER(advsimd_ceq_f16)619  uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp)
620  {
621      float_status *fpst = fpstp;
622      int compare = float16_compare_quiet(a, b, fpst);
623      return ADVSIMD_CMPRES(compare == float_relation_equal);
624  }
625  
HELPER(advsimd_cge_f16)626  uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp)
627  {
628      float_status *fpst = fpstp;
629      int compare = float16_compare(a, b, fpst);
630      return ADVSIMD_CMPRES(compare == float_relation_greater ||
631                            compare == float_relation_equal);
632  }
633  
HELPER(advsimd_cgt_f16)634  uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp)
635  {
636      float_status *fpst = fpstp;
637      int compare = float16_compare(a, b, fpst);
638      return ADVSIMD_CMPRES(compare == float_relation_greater);
639  }
640  
HELPER(advsimd_acge_f16)641  uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp)
642  {
643      float_status *fpst = fpstp;
644      float16 f0 = float16_abs(a);
645      float16 f1 = float16_abs(b);
646      int compare = float16_compare(f0, f1, fpst);
647      return ADVSIMD_CMPRES(compare == float_relation_greater ||
648                            compare == float_relation_equal);
649  }
650  
HELPER(advsimd_acgt_f16)651  uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp)
652  {
653      float_status *fpst = fpstp;
654      float16 f0 = float16_abs(a);
655      float16 f1 = float16_abs(b);
656      int compare = float16_compare(f0, f1, fpst);
657      return ADVSIMD_CMPRES(compare == float_relation_greater);
658  }
659  
660  /* round to integral */
HELPER(advsimd_rinth_exact)661  uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status)
662  {
663      return float16_round_to_int(x, fp_status);
664  }
665  
HELPER(advsimd_rinth)666  uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
667  {
668      int old_flags = get_float_exception_flags(fp_status), new_flags;
669      float16 ret;
670  
671      ret = float16_round_to_int(x, fp_status);
672  
673      /* Suppress any inexact exceptions the conversion produced */
674      if (!(old_flags & float_flag_inexact)) {
675          new_flags = get_float_exception_flags(fp_status);
676          set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
677      }
678  
679      return ret;
680  }
681  
682  /*
683   * Half-precision floating point conversion functions
684   *
685   * There are a multitude of conversion functions with various
686   * different rounding modes. This is dealt with by the calling code
687   * setting the mode appropriately before calling the helper.
688   */
689  
HELPER(advsimd_f16tosinth)690  uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
691  {
692      float_status *fpst = fpstp;
693  
694      /* Invalid if we are passed a NaN */
695      if (float16_is_any_nan(a)) {
696          float_raise(float_flag_invalid, fpst);
697          return 0;
698      }
699      return float16_to_int16(a, fpst);
700  }
701  
HELPER(advsimd_f16touinth)702  uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
703  {
704      float_status *fpst = fpstp;
705  
706      /* Invalid if we are passed a NaN */
707      if (float16_is_any_nan(a)) {
708          float_raise(float_flag_invalid, fpst);
709          return 0;
710      }
711      return float16_to_uint16(a, fpst);
712  }
713  
el_from_spsr(uint32_t spsr)714  static int el_from_spsr(uint32_t spsr)
715  {
716      /* Return the exception level that this SPSR is requesting a return to,
717       * or -1 if it is invalid (an illegal return)
718       */
719      if (spsr & PSTATE_nRW) {
720          switch (spsr & CPSR_M) {
721          case ARM_CPU_MODE_USR:
722              return 0;
723          case ARM_CPU_MODE_HYP:
724              return 2;
725          case ARM_CPU_MODE_FIQ:
726          case ARM_CPU_MODE_IRQ:
727          case ARM_CPU_MODE_SVC:
728          case ARM_CPU_MODE_ABT:
729          case ARM_CPU_MODE_UND:
730          case ARM_CPU_MODE_SYS:
731              return 1;
732          case ARM_CPU_MODE_MON:
733              /* Returning to Mon from AArch64 is never possible,
734               * so this is an illegal return.
735               */
736          default:
737              return -1;
738          }
739      } else {
740          if (extract32(spsr, 1, 1)) {
741              /* Return with reserved M[1] bit set */
742              return -1;
743          }
744          if (extract32(spsr, 0, 4) == 1) {
745              /* return to EL0 with M[0] bit set */
746              return -1;
747          }
748          return extract32(spsr, 2, 2);
749      }
750  }
751  
cpsr_write_from_spsr_elx(CPUARMState * env,uint32_t val)752  static void cpsr_write_from_spsr_elx(CPUARMState *env,
753                                       uint32_t val)
754  {
755      uint32_t mask;
756  
757      /* Save SPSR_ELx.SS into PSTATE. */
758      env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS);
759      val &= ~PSTATE_SS;
760  
761      /* Move DIT to the correct location for CPSR */
762      if (val & PSTATE_DIT) {
763          val &= ~PSTATE_DIT;
764          val |= CPSR_DIT;
765      }
766  
767      mask = aarch32_cpsr_valid_mask(env->features, \
768          &env_archcpu(env)->isar);
769      cpsr_write(env, val, mask, CPSRWriteRaw);
770  }
771  
HELPER(exception_return)772  void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
773  {
774      int cur_el = arm_current_el(env);
775      unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el);
776      uint32_t spsr = env->banked_spsr[spsr_idx];
777      int new_el;
778      bool return_to_aa64 = (spsr & PSTATE_nRW) == 0;
779  
780      aarch64_save_sp(env, cur_el);
781  
782      arm_clear_exclusive(env);
783  
784      /* We must squash the PSTATE.SS bit to zero unless both of the
785       * following hold:
786       *  1. debug exceptions are currently disabled
787       *  2. singlestep will be active in the EL we return to
788       * We check 1 here and 2 after we've done the pstate/cpsr write() to
789       * transition to the EL we're going to.
790       */
791      if (arm_generate_debug_exceptions(env)) {
792          spsr &= ~PSTATE_SS;
793      }
794  
795      /*
796       * FEAT_RME forbids return from EL3 with an invalid security state.
797       * We don't need an explicit check for FEAT_RME here because we enforce
798       * in scr_write() that you can't set the NSE bit without it.
799       */
800      if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) {
801          goto illegal_return;
802      }
803  
804      new_el = el_from_spsr(spsr);
805      if (new_el == -1) {
806          goto illegal_return;
807      }
808      if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) {
809          /* Disallow return to an EL which is unimplemented or higher
810           * than the current one.
811           */
812          goto illegal_return;
813      }
814  
815      if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) {
816          /* Return to an EL which is configured for a different register width */
817          goto illegal_return;
818      }
819  
820      if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
821          goto illegal_return;
822      }
823  
824      bql_lock();
825      arm_call_pre_el_change_hook(env_archcpu(env));
826      bql_unlock();
827  
828      if (!return_to_aa64) {
829          env->aarch64 = false;
830          /* We do a raw CPSR write because aarch64_sync_64_to_32()
831           * will sort the register banks out for us, and we've already
832           * caught all the bad-mode cases in el_from_spsr().
833           */
834          cpsr_write_from_spsr_elx(env, spsr);
835          if (!arm_singlestep_active(env)) {
836              env->pstate &= ~PSTATE_SS;
837          }
838          aarch64_sync_64_to_32(env);
839  
840          if (spsr & CPSR_T) {
841              env->regs[15] = new_pc & ~0x1;
842          } else {
843              env->regs[15] = new_pc & ~0x3;
844          }
845          helper_rebuild_hflags_a32(env, new_el);
846          qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
847                        "AArch32 EL%d PC 0x%" PRIx32 "\n",
848                        cur_el, new_el, env->regs[15]);
849      } else {
850          int tbii;
851  
852          env->aarch64 = true;
853          spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar);
854          pstate_write(env, spsr);
855          if (!arm_singlestep_active(env)) {
856              env->pstate &= ~PSTATE_SS;
857          }
858          aarch64_restore_sp(env, new_el);
859          helper_rebuild_hflags_a64(env, new_el);
860  
861          /*
862           * Apply TBI to the exception return address.  We had to delay this
863           * until after we selected the new EL, so that we could select the
864           * correct TBI+TBID bits.  This is made easier by waiting until after
865           * the hflags rebuild, since we can pull the composite TBII field
866           * from there.
867           */
868          tbii = EX_TBFLAG_A64(env->hflags, TBII);
869          if ((tbii >> extract64(new_pc, 55, 1)) & 1) {
870              /* TBI is enabled. */
871              int core_mmu_idx = arm_env_mmu_index(env);
872              if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) {
873                  new_pc = sextract64(new_pc, 0, 56);
874              } else {
875                  new_pc = extract64(new_pc, 0, 56);
876              }
877          }
878          env->pc = new_pc;
879  
880          qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
881                        "AArch64 EL%d PC 0x%" PRIx64 "\n",
882                        cur_el, new_el, env->pc);
883      }
884  
885      /*
886       * Note that cur_el can never be 0.  If new_el is 0, then
887       * el0_a64 is return_to_aa64, else el0_a64 is ignored.
888       */
889      aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64);
890  
891      bql_lock();
892      arm_call_el_change_hook(env_archcpu(env));
893      bql_unlock();
894  
895      return;
896  
897  illegal_return:
898      /* Illegal return events of various kinds have architecturally
899       * mandated behaviour:
900       * restore NZCV and DAIF from SPSR_ELx
901       * set PSTATE.IL
902       * restore PC from ELR_ELx
903       * no change to exception level, execution state or stack pointer
904       */
905      env->pstate |= PSTATE_IL;
906      env->pc = new_pc;
907      spsr &= PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT;
908      spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT);
909      pstate_write(env, spsr);
910      if (!arm_singlestep_active(env)) {
911          env->pstate &= ~PSTATE_SS;
912      }
913      helper_rebuild_hflags_a64(env, cur_el);
914      qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: "
915                    "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc);
916  }
917  
918  /*
919   * Square Root and Reciprocal square root
920   */
921  
HELPER(sqrt_f16)922  uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
923  {
924      float_status *s = fpstp;
925  
926      return float16_sqrt(a, s);
927  }
928  
HELPER(dc_zva)929  void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
930  {
931      uintptr_t ra = GETPC();
932  
933      /*
934       * Implement DC ZVA, which zeroes a fixed-length block of memory.
935       * Note that we do not implement the (architecturally mandated)
936       * alignment fault for attempts to use this on Device memory
937       * (which matches the usual QEMU behaviour of not implementing either
938       * alignment faults or any memory attribute handling).
939       */
940      int blocklen = 4 << env_archcpu(env)->dcz_blocksize;
941      uint64_t vaddr = vaddr_in & ~(blocklen - 1);
942      int mmu_idx = arm_env_mmu_index(env);
943      void *mem;
944  
945      /*
946       * Trapless lookup.  In addition to actual invalid page, may
947       * return NULL for I/O, watchpoints, clean pages, etc.
948       */
949      mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
950  
951  #ifndef CONFIG_USER_ONLY
952      if (unlikely(!mem)) {
953          /*
954           * Trap if accessing an invalid page.  DC_ZVA requires that we supply
955           * the original pointer for an invalid page.  But watchpoints require
956           * that we probe the actual space.  So do both.
957           */
958          (void) probe_write(env, vaddr_in, 1, mmu_idx, ra);
959          mem = probe_write(env, vaddr, blocklen, mmu_idx, ra);
960  
961          if (unlikely(!mem)) {
962              /*
963               * The only remaining reason for mem == NULL is I/O.
964               * Just do a series of byte writes as the architecture demands.
965               */
966              for (int i = 0; i < blocklen; i++) {
967                  cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra);
968              }
969              return;
970          }
971      }
972  #endif
973  
974      set_helper_retaddr(ra);
975      memset(mem, 0, blocklen);
976      clear_helper_retaddr();
977  }
978  
HELPER(unaligned_access)979  void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr,
980                                uint32_t access_type, uint32_t mmu_idx)
981  {
982      arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type,
983                                  mmu_idx, GETPC());
984  }
985  
986  /* Memory operations (memset, memmove, memcpy) */
987  
988  /*
989   * Return true if the CPY* and SET* insns can execute; compare
990   * pseudocode CheckMOPSEnabled(), though we refactor it a little.
991   */
mops_enabled(CPUARMState * env)992  static bool mops_enabled(CPUARMState *env)
993  {
994      int el = arm_current_el(env);
995  
996      if (el < 2 &&
997          (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) &&
998          !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) {
999          return false;
1000      }
1001  
1002      if (el == 0) {
1003          if (!el_is_in_host(env, 0)) {
1004              return env->cp15.sctlr_el[1] & SCTLR_MSCEN;
1005          } else {
1006              return env->cp15.sctlr_el[2] & SCTLR_MSCEN;
1007          }
1008      }
1009      return true;
1010  }
1011  
check_mops_enabled(CPUARMState * env,uintptr_t ra)1012  static void check_mops_enabled(CPUARMState *env, uintptr_t ra)
1013  {
1014      if (!mops_enabled(env)) {
1015          raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(),
1016                             exception_target_el(env), ra);
1017      }
1018  }
1019  
1020  /*
1021   * Return the target exception level for an exception due
1022   * to mismatched arguments in a FEAT_MOPS copy or set.
1023   * Compare pseudocode MismatchedCpySetTargetEL()
1024   */
mops_mismatch_exception_target_el(CPUARMState * env)1025  static int mops_mismatch_exception_target_el(CPUARMState *env)
1026  {
1027      int el = arm_current_el(env);
1028  
1029      if (el > 1) {
1030          return el;
1031      }
1032      if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
1033          return 2;
1034      }
1035      if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) {
1036          return 2;
1037      }
1038      return 1;
1039  }
1040  
1041  /*
1042   * Check whether an M or E instruction was executed with a CF value
1043   * indicating the wrong option for this implementation.
1044   * Assumes we are always Option A.
1045   */
check_mops_wrong_option(CPUARMState * env,uint32_t syndrome,uintptr_t ra)1046  static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome,
1047                                      uintptr_t ra)
1048  {
1049      if (env->CF != 0) {
1050          syndrome |= 1 << 17; /* Set the wrong-option bit */
1051          raise_exception_ra(env, EXCP_UDEF, syndrome,
1052                             mops_mismatch_exception_target_el(env), ra);
1053      }
1054  }
1055  
1056  /*
1057   * Return the maximum number of bytes we can transfer starting at addr
1058   * without crossing a page boundary.
1059   */
page_limit(uint64_t addr)1060  static uint64_t page_limit(uint64_t addr)
1061  {
1062      return TARGET_PAGE_ALIGN(addr + 1) - addr;
1063  }
1064  
1065  /*
1066   * Return the number of bytes we can copy starting from addr and working
1067   * backwards without crossing a page boundary.
1068   */
page_limit_rev(uint64_t addr)1069  static uint64_t page_limit_rev(uint64_t addr)
1070  {
1071      return (addr & ~TARGET_PAGE_MASK) + 1;
1072  }
1073  
1074  /*
1075   * Perform part of a memory set on an area of guest memory starting at
1076   * toaddr (a dirty address) and extending for setsize bytes.
1077   *
1078   * Returns the number of bytes actually set, which might be less than
1079   * setsize; the caller should loop until the whole set has been done.
1080   * The caller should ensure that the guest registers are correct
1081   * for the possibility that the first byte of the set encounters
1082   * an exception or watchpoint. We guarantee not to take any faults
1083   * for bytes other than the first.
1084   */
set_step(CPUARMState * env,uint64_t toaddr,uint64_t setsize,uint32_t data,int memidx,uint32_t * mtedesc,uintptr_t ra)1085  static uint64_t set_step(CPUARMState *env, uint64_t toaddr,
1086                           uint64_t setsize, uint32_t data, int memidx,
1087                           uint32_t *mtedesc, uintptr_t ra)
1088  {
1089      void *mem;
1090  
1091      setsize = MIN(setsize, page_limit(toaddr));
1092      if (*mtedesc) {
1093          uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc);
1094          if (mtesize == 0) {
1095              /* Trap, or not. All CPU state is up to date */
1096              mte_check_fail(env, *mtedesc, toaddr, ra);
1097              /* Continue, with no further MTE checks required */
1098              *mtedesc = 0;
1099          } else {
1100              /* Advance to the end, or to the tag mismatch */
1101              setsize = MIN(setsize, mtesize);
1102          }
1103      }
1104  
1105      toaddr = useronly_clean_ptr(toaddr);
1106      /*
1107       * Trapless lookup: returns NULL for invalid page, I/O,
1108       * watchpoints, clean pages, etc.
1109       */
1110      mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx);
1111  
1112  #ifndef CONFIG_USER_ONLY
1113      if (unlikely(!mem)) {
1114          /*
1115           * Slow-path: just do one byte write. This will handle the
1116           * watchpoint, invalid page, etc handling correctly.
1117           * For clean code pages, the next iteration will see
1118           * the page dirty and will use the fast path.
1119           */
1120          cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra);
1121          return 1;
1122      }
1123  #endif
1124      /* Easy case: just memset the host memory */
1125      set_helper_retaddr(ra);
1126      memset(mem, data, setsize);
1127      clear_helper_retaddr();
1128      return setsize;
1129  }
1130  
1131  /*
1132   * Similar, but setting tags. The architecture requires us to do this
1133   * in 16-byte chunks. SETP accesses are not tag checked; they set
1134   * the tags.
1135   */
set_step_tags(CPUARMState * env,uint64_t toaddr,uint64_t setsize,uint32_t data,int memidx,uint32_t * mtedesc,uintptr_t ra)1136  static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr,
1137                                uint64_t setsize, uint32_t data, int memidx,
1138                                uint32_t *mtedesc, uintptr_t ra)
1139  {
1140      void *mem;
1141      uint64_t cleanaddr;
1142  
1143      setsize = MIN(setsize, page_limit(toaddr));
1144  
1145      cleanaddr = useronly_clean_ptr(toaddr);
1146      /*
1147       * Trapless lookup: returns NULL for invalid page, I/O,
1148       * watchpoints, clean pages, etc.
1149       */
1150      mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx);
1151  
1152  #ifndef CONFIG_USER_ONLY
1153      if (unlikely(!mem)) {
1154          /*
1155           * Slow-path: just do one write. This will handle the
1156           * watchpoint, invalid page, etc handling correctly.
1157           * The architecture requires that we do 16 bytes at a time,
1158           * and we know both ptr and size are 16 byte aligned.
1159           * For clean code pages, the next iteration will see
1160           * the page dirty and will use the fast path.
1161           */
1162          uint64_t repldata = data * 0x0101010101010101ULL;
1163          MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx);
1164          cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra);
1165          mte_mops_set_tags(env, toaddr, 16, *mtedesc);
1166          return 16;
1167      }
1168  #endif
1169      /* Easy case: just memset the host memory */
1170      set_helper_retaddr(ra);
1171      memset(mem, data, setsize);
1172      clear_helper_retaddr();
1173      mte_mops_set_tags(env, toaddr, setsize, *mtedesc);
1174      return setsize;
1175  }
1176  
1177  typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr,
1178                          uint64_t setsize, uint32_t data,
1179                          int memidx, uint32_t *mtedesc, uintptr_t ra);
1180  
1181  /* Extract register numbers from a MOPS exception syndrome value */
mops_destreg(uint32_t syndrome)1182  static int mops_destreg(uint32_t syndrome)
1183  {
1184      return extract32(syndrome, 10, 5);
1185  }
1186  
mops_srcreg(uint32_t syndrome)1187  static int mops_srcreg(uint32_t syndrome)
1188  {
1189      return extract32(syndrome, 5, 5);
1190  }
1191  
mops_sizereg(uint32_t syndrome)1192  static int mops_sizereg(uint32_t syndrome)
1193  {
1194      return extract32(syndrome, 0, 5);
1195  }
1196  
1197  /*
1198   * Return true if TCMA and TBI bits mean we need to do MTE checks.
1199   * We only need to do this once per MOPS insn, not for every page.
1200   */
mte_checks_needed(uint64_t ptr,uint32_t desc)1201  static bool mte_checks_needed(uint64_t ptr, uint32_t desc)
1202  {
1203      int bit55 = extract64(ptr, 55, 1);
1204  
1205      /*
1206       * Note that tbi_check() returns true for "access checked" but
1207       * tcma_check() returns true for "access unchecked".
1208       */
1209      if (!tbi_check(desc, bit55)) {
1210          return false;
1211      }
1212      return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr));
1213  }
1214  
1215  /* Take an exception if the SETG addr/size are not granule aligned */
check_setg_alignment(CPUARMState * env,uint64_t ptr,uint64_t size,uint32_t memidx,uintptr_t ra)1216  static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size,
1217                                   uint32_t memidx, uintptr_t ra)
1218  {
1219      if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) ||
1220          !QEMU_IS_ALIGNED(size, TAG_GRANULE)) {
1221          arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE,
1222                                      memidx, ra);
1223  
1224      }
1225  }
1226  
arm_reg_or_xzr(CPUARMState * env,int reg)1227  static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg)
1228  {
1229      /*
1230       * Runtime equivalent of cpu_reg() -- return the CPU register value,
1231       * for contexts when index 31 means XZR (not SP).
1232       */
1233      return reg == 31 ? 0 : env->xregs[reg];
1234  }
1235  
1236  /*
1237   * For the Memory Set operation, our implementation chooses
1238   * always to use "option A", where we update Xd to the final
1239   * address in the SETP insn, and set Xn to be -(bytes remaining).
1240   * On SETM and SETE insns we only need update Xn.
1241   *
1242   * @env: CPU
1243   * @syndrome: syndrome value for mismatch exceptions
1244   * (also contains the register numbers we need to use)
1245   * @mtedesc: MTE descriptor word
1246   * @stepfn: function which does a single part of the set operation
1247   * @is_setg: true if this is the tag-setting SETG variant
1248   */
do_setp(CPUARMState * env,uint32_t syndrome,uint32_t mtedesc,StepFn * stepfn,bool is_setg,uintptr_t ra)1249  static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
1250                      StepFn *stepfn, bool is_setg, uintptr_t ra)
1251  {
1252      /* Prologue: we choose to do up to the next page boundary */
1253      int rd = mops_destreg(syndrome);
1254      int rs = mops_srcreg(syndrome);
1255      int rn = mops_sizereg(syndrome);
1256      uint8_t data = arm_reg_or_xzr(env, rs);
1257      uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
1258      uint64_t toaddr = env->xregs[rd];
1259      uint64_t setsize = env->xregs[rn];
1260      uint64_t stagesetsize, step;
1261  
1262      check_mops_enabled(env, ra);
1263  
1264      if (setsize > INT64_MAX) {
1265          setsize = INT64_MAX;
1266          if (is_setg) {
1267              setsize &= ~0xf;
1268          }
1269      }
1270  
1271      if (unlikely(is_setg)) {
1272          check_setg_alignment(env, toaddr, setsize, memidx, ra);
1273      } else if (!mte_checks_needed(toaddr, mtedesc)) {
1274          mtedesc = 0;
1275      }
1276  
1277      stagesetsize = MIN(setsize, page_limit(toaddr));
1278      while (stagesetsize) {
1279          env->xregs[rd] = toaddr;
1280          env->xregs[rn] = setsize;
1281          step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra);
1282          toaddr += step;
1283          setsize -= step;
1284          stagesetsize -= step;
1285      }
1286      /* Insn completed, so update registers to the Option A format */
1287      env->xregs[rd] = toaddr + setsize;
1288      env->xregs[rn] = -setsize;
1289  
1290      /* Set NZCV = 0000 to indicate we are an Option A implementation */
1291      env->NF = 0;
1292      env->ZF = 1; /* our env->ZF encoding is inverted */
1293      env->CF = 0;
1294      env->VF = 0;
1295      return;
1296  }
1297  
HELPER(setp)1298  void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1299  {
1300      do_setp(env, syndrome, mtedesc, set_step, false, GETPC());
1301  }
1302  
HELPER(setgp)1303  void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1304  {
1305      do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC());
1306  }
1307  
do_setm(CPUARMState * env,uint32_t syndrome,uint32_t mtedesc,StepFn * stepfn,bool is_setg,uintptr_t ra)1308  static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
1309                      StepFn *stepfn, bool is_setg, uintptr_t ra)
1310  {
1311      /* Main: we choose to do all the full-page chunks */
1312      CPUState *cs = env_cpu(env);
1313      int rd = mops_destreg(syndrome);
1314      int rs = mops_srcreg(syndrome);
1315      int rn = mops_sizereg(syndrome);
1316      uint8_t data = arm_reg_or_xzr(env, rs);
1317      uint64_t toaddr = env->xregs[rd] + env->xregs[rn];
1318      uint64_t setsize = -env->xregs[rn];
1319      uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
1320      uint64_t step, stagesetsize;
1321  
1322      check_mops_enabled(env, ra);
1323  
1324      /*
1325       * We're allowed to NOP out "no data to copy" before the consistency
1326       * checks; we choose to do so.
1327       */
1328      if (env->xregs[rn] == 0) {
1329          return;
1330      }
1331  
1332      check_mops_wrong_option(env, syndrome, ra);
1333  
1334      /*
1335       * Our implementation will work fine even if we have an unaligned
1336       * destination address, and because we update Xn every time around
1337       * the loop below and the return value from stepfn() may be less
1338       * than requested, we might find toaddr is unaligned. So we don't
1339       * have an IMPDEF check for alignment here.
1340       */
1341  
1342      if (unlikely(is_setg)) {
1343          check_setg_alignment(env, toaddr, setsize, memidx, ra);
1344      } else if (!mte_checks_needed(toaddr, mtedesc)) {
1345          mtedesc = 0;
1346      }
1347  
1348      /* Do the actual memset: we leave the last partial page to SETE */
1349      stagesetsize = setsize & TARGET_PAGE_MASK;
1350      while (stagesetsize > 0) {
1351          step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra);
1352          toaddr += step;
1353          setsize -= step;
1354          stagesetsize -= step;
1355          env->xregs[rn] = -setsize;
1356          if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) {
1357              cpu_loop_exit_restore(cs, ra);
1358          }
1359      }
1360  }
1361  
HELPER(setm)1362  void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1363  {
1364      do_setm(env, syndrome, mtedesc, set_step, false, GETPC());
1365  }
1366  
HELPER(setgm)1367  void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1368  {
1369      do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC());
1370  }
1371  
do_sete(CPUARMState * env,uint32_t syndrome,uint32_t mtedesc,StepFn * stepfn,bool is_setg,uintptr_t ra)1372  static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
1373                      StepFn *stepfn, bool is_setg, uintptr_t ra)
1374  {
1375      /* Epilogue: do the last partial page */
1376      int rd = mops_destreg(syndrome);
1377      int rs = mops_srcreg(syndrome);
1378      int rn = mops_sizereg(syndrome);
1379      uint8_t data = arm_reg_or_xzr(env, rs);
1380      uint64_t toaddr = env->xregs[rd] + env->xregs[rn];
1381      uint64_t setsize = -env->xregs[rn];
1382      uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
1383      uint64_t step;
1384  
1385      check_mops_enabled(env, ra);
1386  
1387      /*
1388       * We're allowed to NOP out "no data to copy" before the consistency
1389       * checks; we choose to do so.
1390       */
1391      if (setsize == 0) {
1392          return;
1393      }
1394  
1395      check_mops_wrong_option(env, syndrome, ra);
1396  
1397      /*
1398       * Our implementation has no address alignment requirements, but
1399       * we do want to enforce the "less than a page" size requirement,
1400       * so we don't need to have the "check for interrupts" here.
1401       */
1402      if (setsize >= TARGET_PAGE_SIZE) {
1403          raise_exception_ra(env, EXCP_UDEF, syndrome,
1404                             mops_mismatch_exception_target_el(env), ra);
1405      }
1406  
1407      if (unlikely(is_setg)) {
1408          check_setg_alignment(env, toaddr, setsize, memidx, ra);
1409      } else if (!mte_checks_needed(toaddr, mtedesc)) {
1410          mtedesc = 0;
1411      }
1412  
1413      /* Do the actual memset */
1414      while (setsize > 0) {
1415          step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra);
1416          toaddr += step;
1417          setsize -= step;
1418          env->xregs[rn] = -setsize;
1419      }
1420  }
1421  
HELPER(sete)1422  void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1423  {
1424      do_sete(env, syndrome, mtedesc, set_step, false, GETPC());
1425  }
1426  
HELPER(setge)1427  void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1428  {
1429      do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC());
1430  }
1431  
1432  /*
1433   * Perform part of a memory copy from the guest memory at fromaddr
1434   * and extending for copysize bytes, to the guest memory at
1435   * toaddr. Both addresses are dirty.
1436   *
1437   * Returns the number of bytes actually set, which might be less than
1438   * copysize; the caller should loop until the whole copy has been done.
1439   * The caller should ensure that the guest registers are correct
1440   * for the possibility that the first byte of the copy encounters
1441   * an exception or watchpoint. We guarantee not to take any faults
1442   * for bytes other than the first.
1443   */
copy_step(CPUARMState * env,uint64_t toaddr,uint64_t fromaddr,uint64_t copysize,int wmemidx,int rmemidx,uint32_t * wdesc,uint32_t * rdesc,uintptr_t ra)1444  static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr,
1445                            uint64_t copysize, int wmemidx, int rmemidx,
1446                            uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra)
1447  {
1448      void *rmem;
1449      void *wmem;
1450  
1451      /* Don't cross a page boundary on either source or destination */
1452      copysize = MIN(copysize, page_limit(toaddr));
1453      copysize = MIN(copysize, page_limit(fromaddr));
1454      /*
1455       * Handle MTE tag checks: either handle the tag mismatch for byte 0,
1456       * or else copy up to but not including the byte with the mismatch.
1457       */
1458      if (*rdesc) {
1459          uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc);
1460          if (mtesize == 0) {
1461              mte_check_fail(env, *rdesc, fromaddr, ra);
1462              *rdesc = 0;
1463          } else {
1464              copysize = MIN(copysize, mtesize);
1465          }
1466      }
1467      if (*wdesc) {
1468          uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc);
1469          if (mtesize == 0) {
1470              mte_check_fail(env, *wdesc, toaddr, ra);
1471              *wdesc = 0;
1472          } else {
1473              copysize = MIN(copysize, mtesize);
1474          }
1475      }
1476  
1477      toaddr = useronly_clean_ptr(toaddr);
1478      fromaddr = useronly_clean_ptr(fromaddr);
1479      /* Trapless lookup of whether we can get a host memory pointer */
1480      wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx);
1481      rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx);
1482  
1483  #ifndef CONFIG_USER_ONLY
1484      /*
1485       * If we don't have host memory for both source and dest then just
1486       * do a single byte copy. This will handle watchpoints, invalid pages,
1487       * etc correctly. For clean code pages, the next iteration will see
1488       * the page dirty and will use the fast path.
1489       */
1490      if (unlikely(!rmem || !wmem)) {
1491          uint8_t byte;
1492          if (rmem) {
1493              byte = *(uint8_t *)rmem;
1494          } else {
1495              byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra);
1496          }
1497          if (wmem) {
1498              *(uint8_t *)wmem = byte;
1499          } else {
1500              cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra);
1501          }
1502          return 1;
1503      }
1504  #endif
1505      /* Easy case: just memmove the host memory */
1506      set_helper_retaddr(ra);
1507      memmove(wmem, rmem, copysize);
1508      clear_helper_retaddr();
1509      return copysize;
1510  }
1511  
1512  /*
1513   * Do part of a backwards memory copy. Here toaddr and fromaddr point
1514   * to the *last* byte to be copied.
1515   */
copy_step_rev(CPUARMState * env,uint64_t toaddr,uint64_t fromaddr,uint64_t copysize,int wmemidx,int rmemidx,uint32_t * wdesc,uint32_t * rdesc,uintptr_t ra)1516  static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr,
1517                                uint64_t fromaddr,
1518                                uint64_t copysize, int wmemidx, int rmemidx,
1519                                uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra)
1520  {
1521      void *rmem;
1522      void *wmem;
1523  
1524      /* Don't cross a page boundary on either source or destination */
1525      copysize = MIN(copysize, page_limit_rev(toaddr));
1526      copysize = MIN(copysize, page_limit_rev(fromaddr));
1527  
1528      /*
1529       * Handle MTE tag checks: either handle the tag mismatch for byte 0,
1530       * or else copy up to but not including the byte with the mismatch.
1531       */
1532      if (*rdesc) {
1533          uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc);
1534          if (mtesize == 0) {
1535              mte_check_fail(env, *rdesc, fromaddr, ra);
1536              *rdesc = 0;
1537          } else {
1538              copysize = MIN(copysize, mtesize);
1539          }
1540      }
1541      if (*wdesc) {
1542          uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc);
1543          if (mtesize == 0) {
1544              mte_check_fail(env, *wdesc, toaddr, ra);
1545              *wdesc = 0;
1546          } else {
1547              copysize = MIN(copysize, mtesize);
1548          }
1549      }
1550  
1551      toaddr = useronly_clean_ptr(toaddr);
1552      fromaddr = useronly_clean_ptr(fromaddr);
1553      /* Trapless lookup of whether we can get a host memory pointer */
1554      wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx);
1555      rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx);
1556  
1557  #ifndef CONFIG_USER_ONLY
1558      /*
1559       * If we don't have host memory for both source and dest then just
1560       * do a single byte copy. This will handle watchpoints, invalid pages,
1561       * etc correctly. For clean code pages, the next iteration will see
1562       * the page dirty and will use the fast path.
1563       */
1564      if (unlikely(!rmem || !wmem)) {
1565          uint8_t byte;
1566          if (rmem) {
1567              byte = *(uint8_t *)rmem;
1568          } else {
1569              byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra);
1570          }
1571          if (wmem) {
1572              *(uint8_t *)wmem = byte;
1573          } else {
1574              cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra);
1575          }
1576          return 1;
1577      }
1578  #endif
1579      /*
1580       * Easy case: just memmove the host memory. Note that wmem and
1581       * rmem here point to the *last* byte to copy.
1582       */
1583      set_helper_retaddr(ra);
1584      memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize);
1585      clear_helper_retaddr();
1586      return copysize;
1587  }
1588  
1589  /*
1590   * for the Memory Copy operation, our implementation chooses always
1591   * to use "option A", where we update Xd and Xs to the final addresses
1592   * in the CPYP insn, and then in CPYM and CPYE only need to update Xn.
1593   *
1594   * @env: CPU
1595   * @syndrome: syndrome value for mismatch exceptions
1596   * (also contains the register numbers we need to use)
1597   * @wdesc: MTE descriptor for the writes (destination)
1598   * @rdesc: MTE descriptor for the reads (source)
1599   * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards)
1600   */
do_cpyp(CPUARMState * env,uint32_t syndrome,uint32_t wdesc,uint32_t rdesc,uint32_t move,uintptr_t ra)1601  static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1602                      uint32_t rdesc, uint32_t move, uintptr_t ra)
1603  {
1604      int rd = mops_destreg(syndrome);
1605      int rs = mops_srcreg(syndrome);
1606      int rn = mops_sizereg(syndrome);
1607      uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
1608      uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
1609      bool forwards = true;
1610      uint64_t toaddr = env->xregs[rd];
1611      uint64_t fromaddr = env->xregs[rs];
1612      uint64_t copysize = env->xregs[rn];
1613      uint64_t stagecopysize, step;
1614  
1615      check_mops_enabled(env, ra);
1616  
1617  
1618      if (move) {
1619          /*
1620           * Copy backwards if necessary. The direction for a non-overlapping
1621           * copy is IMPDEF; we choose forwards.
1622           */
1623          if (copysize > 0x007FFFFFFFFFFFFFULL) {
1624              copysize = 0x007FFFFFFFFFFFFFULL;
1625          }
1626          uint64_t fs = extract64(fromaddr, 0, 56);
1627          uint64_t ts = extract64(toaddr, 0, 56);
1628          uint64_t fe = extract64(fromaddr + copysize, 0, 56);
1629  
1630          if (fs < ts && fe > ts) {
1631              forwards = false;
1632          }
1633      } else {
1634          if (copysize > INT64_MAX) {
1635              copysize = INT64_MAX;
1636          }
1637      }
1638  
1639      if (!mte_checks_needed(fromaddr, rdesc)) {
1640          rdesc = 0;
1641      }
1642      if (!mte_checks_needed(toaddr, wdesc)) {
1643          wdesc = 0;
1644      }
1645  
1646      if (forwards) {
1647          stagecopysize = MIN(copysize, page_limit(toaddr));
1648          stagecopysize = MIN(stagecopysize, page_limit(fromaddr));
1649          while (stagecopysize) {
1650              env->xregs[rd] = toaddr;
1651              env->xregs[rs] = fromaddr;
1652              env->xregs[rn] = copysize;
1653              step = copy_step(env, toaddr, fromaddr, stagecopysize,
1654                               wmemidx, rmemidx, &wdesc, &rdesc, ra);
1655              toaddr += step;
1656              fromaddr += step;
1657              copysize -= step;
1658              stagecopysize -= step;
1659          }
1660          /* Insn completed, so update registers to the Option A format */
1661          env->xregs[rd] = toaddr + copysize;
1662          env->xregs[rs] = fromaddr + copysize;
1663          env->xregs[rn] = -copysize;
1664      } else {
1665          /*
1666           * In a reverse copy the to and from addrs in Xs and Xd are the start
1667           * of the range, but it's more convenient for us to work with pointers
1668           * to the last byte being copied.
1669           */
1670          toaddr += copysize - 1;
1671          fromaddr += copysize - 1;
1672          stagecopysize = MIN(copysize, page_limit_rev(toaddr));
1673          stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr));
1674          while (stagecopysize) {
1675              env->xregs[rn] = copysize;
1676              step = copy_step_rev(env, toaddr, fromaddr, stagecopysize,
1677                                   wmemidx, rmemidx, &wdesc, &rdesc, ra);
1678              copysize -= step;
1679              stagecopysize -= step;
1680              toaddr -= step;
1681              fromaddr -= step;
1682          }
1683          /*
1684           * Insn completed, so update registers to the Option A format.
1685           * For a reverse copy this is no different to the CPYP input format.
1686           */
1687          env->xregs[rn] = copysize;
1688      }
1689  
1690      /* Set NZCV = 0000 to indicate we are an Option A implementation */
1691      env->NF = 0;
1692      env->ZF = 1; /* our env->ZF encoding is inverted */
1693      env->CF = 0;
1694      env->VF = 0;
1695      return;
1696  }
1697  
HELPER(cpyp)1698  void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1699                    uint32_t rdesc)
1700  {
1701      do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC());
1702  }
1703  
HELPER(cpyfp)1704  void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1705                     uint32_t rdesc)
1706  {
1707      do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC());
1708  }
1709  
do_cpym(CPUARMState * env,uint32_t syndrome,uint32_t wdesc,uint32_t rdesc,uint32_t move,uintptr_t ra)1710  static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1711                      uint32_t rdesc, uint32_t move, uintptr_t ra)
1712  {
1713      /* Main: we choose to copy until less than a page remaining */
1714      CPUState *cs = env_cpu(env);
1715      int rd = mops_destreg(syndrome);
1716      int rs = mops_srcreg(syndrome);
1717      int rn = mops_sizereg(syndrome);
1718      uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
1719      uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
1720      bool forwards = true;
1721      uint64_t toaddr, fromaddr, copysize, step;
1722  
1723      check_mops_enabled(env, ra);
1724  
1725      /* We choose to NOP out "no data to copy" before consistency checks */
1726      if (env->xregs[rn] == 0) {
1727          return;
1728      }
1729  
1730      check_mops_wrong_option(env, syndrome, ra);
1731  
1732      if (move) {
1733          forwards = (int64_t)env->xregs[rn] < 0;
1734      }
1735  
1736      if (forwards) {
1737          toaddr = env->xregs[rd] + env->xregs[rn];
1738          fromaddr = env->xregs[rs] + env->xregs[rn];
1739          copysize = -env->xregs[rn];
1740      } else {
1741          copysize = env->xregs[rn];
1742          /* This toaddr and fromaddr point to the *last* byte to copy */
1743          toaddr = env->xregs[rd] + copysize - 1;
1744          fromaddr = env->xregs[rs] + copysize - 1;
1745      }
1746  
1747      if (!mte_checks_needed(fromaddr, rdesc)) {
1748          rdesc = 0;
1749      }
1750      if (!mte_checks_needed(toaddr, wdesc)) {
1751          wdesc = 0;
1752      }
1753  
1754      /* Our implementation has no particular parameter requirements for CPYM */
1755  
1756      /* Do the actual memmove */
1757      if (forwards) {
1758          while (copysize >= TARGET_PAGE_SIZE) {
1759              step = copy_step(env, toaddr, fromaddr, copysize,
1760                               wmemidx, rmemidx, &wdesc, &rdesc, ra);
1761              toaddr += step;
1762              fromaddr += step;
1763              copysize -= step;
1764              env->xregs[rn] = -copysize;
1765              if (copysize >= TARGET_PAGE_SIZE &&
1766                  unlikely(cpu_loop_exit_requested(cs))) {
1767                  cpu_loop_exit_restore(cs, ra);
1768              }
1769          }
1770      } else {
1771          while (copysize >= TARGET_PAGE_SIZE) {
1772              step = copy_step_rev(env, toaddr, fromaddr, copysize,
1773                                   wmemidx, rmemidx, &wdesc, &rdesc, ra);
1774              toaddr -= step;
1775              fromaddr -= step;
1776              copysize -= step;
1777              env->xregs[rn] = copysize;
1778              if (copysize >= TARGET_PAGE_SIZE &&
1779                  unlikely(cpu_loop_exit_requested(cs))) {
1780                  cpu_loop_exit_restore(cs, ra);
1781              }
1782          }
1783      }
1784  }
1785  
HELPER(cpym)1786  void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1787                    uint32_t rdesc)
1788  {
1789      do_cpym(env, syndrome, wdesc, rdesc, true, GETPC());
1790  }
1791  
HELPER(cpyfm)1792  void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1793                     uint32_t rdesc)
1794  {
1795      do_cpym(env, syndrome, wdesc, rdesc, false, GETPC());
1796  }
1797  
do_cpye(CPUARMState * env,uint32_t syndrome,uint32_t wdesc,uint32_t rdesc,uint32_t move,uintptr_t ra)1798  static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1799                      uint32_t rdesc, uint32_t move, uintptr_t ra)
1800  {
1801      /* Epilogue: do the last partial page */
1802      int rd = mops_destreg(syndrome);
1803      int rs = mops_srcreg(syndrome);
1804      int rn = mops_sizereg(syndrome);
1805      uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
1806      uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
1807      bool forwards = true;
1808      uint64_t toaddr, fromaddr, copysize, step;
1809  
1810      check_mops_enabled(env, ra);
1811  
1812      /* We choose to NOP out "no data to copy" before consistency checks */
1813      if (env->xregs[rn] == 0) {
1814          return;
1815      }
1816  
1817      check_mops_wrong_option(env, syndrome, ra);
1818  
1819      if (move) {
1820          forwards = (int64_t)env->xregs[rn] < 0;
1821      }
1822  
1823      if (forwards) {
1824          toaddr = env->xregs[rd] + env->xregs[rn];
1825          fromaddr = env->xregs[rs] + env->xregs[rn];
1826          copysize = -env->xregs[rn];
1827      } else {
1828          copysize = env->xregs[rn];
1829          /* This toaddr and fromaddr point to the *last* byte to copy */
1830          toaddr = env->xregs[rd] + copysize - 1;
1831          fromaddr = env->xregs[rs] + copysize - 1;
1832      }
1833  
1834      if (!mte_checks_needed(fromaddr, rdesc)) {
1835          rdesc = 0;
1836      }
1837      if (!mte_checks_needed(toaddr, wdesc)) {
1838          wdesc = 0;
1839      }
1840  
1841      /* Check the size; we don't want to have do a check-for-interrupts */
1842      if (copysize >= TARGET_PAGE_SIZE) {
1843          raise_exception_ra(env, EXCP_UDEF, syndrome,
1844                             mops_mismatch_exception_target_el(env), ra);
1845      }
1846  
1847      /* Do the actual memmove */
1848      if (forwards) {
1849          while (copysize > 0) {
1850              step = copy_step(env, toaddr, fromaddr, copysize,
1851                               wmemidx, rmemidx, &wdesc, &rdesc, ra);
1852              toaddr += step;
1853              fromaddr += step;
1854              copysize -= step;
1855              env->xregs[rn] = -copysize;
1856          }
1857      } else {
1858          while (copysize > 0) {
1859              step = copy_step_rev(env, toaddr, fromaddr, copysize,
1860                                   wmemidx, rmemidx, &wdesc, &rdesc, ra);
1861              toaddr -= step;
1862              fromaddr -= step;
1863              copysize -= step;
1864              env->xregs[rn] = copysize;
1865          }
1866      }
1867  }
1868  
HELPER(cpye)1869  void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1870                    uint32_t rdesc)
1871  {
1872      do_cpye(env, syndrome, wdesc, rdesc, true, GETPC());
1873  }
1874  
HELPER(cpyfe)1875  void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1876                     uint32_t rdesc)
1877  {
1878      do_cpye(env, syndrome, wdesc, rdesc, false, GETPC());
1879  }
1880  
is_guarded_page(CPUARMState * env,target_ulong addr,uintptr_t ra)1881  static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra)
1882  {
1883  #ifdef CONFIG_USER_ONLY
1884      return page_get_flags(addr) & PAGE_BTI;
1885  #else
1886      CPUTLBEntryFull *full;
1887      void *host;
1888      int mmu_idx = cpu_mmu_index(env_cpu(env), true);
1889      int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
1890                                    false, &host, &full, ra);
1891  
1892      assert(!(flags & TLB_INVALID_MASK));
1893      return full->extra.arm.guarded;
1894  #endif
1895  }
1896  
HELPER(guarded_page_check)1897  void HELPER(guarded_page_check)(CPUARMState *env)
1898  {
1899      /*
1900       * We have already verified that bti is enabled, and that the
1901       * instruction at PC is not ok for BTYPE.  This is always at
1902       * the beginning of a block, so PC is always up-to-date and
1903       * no unwind is required.
1904       */
1905      if (is_guarded_page(env, env->pc, 0)) {
1906          raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype),
1907                          exception_target_el(env));
1908      }
1909  }
1910  
HELPER(guarded_page_br)1911  void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc)
1912  {
1913      /*
1914       * We have already checked for branch via x16 and x17.
1915       * What remains for choosing BTYPE is checking for a guarded page.
1916       */
1917      env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1;
1918  }
1919