1 /*
2 * AArch64 specific helpers
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/units.h"
22 #include "cpu.h"
23 #include "gdbstub/helpers.h"
24 #include "exec/helper-proto.h"
25 #include "qemu/host-utils.h"
26 #include "qemu/log.h"
27 #include "qemu/main-loop.h"
28 #include "qemu/bitops.h"
29 #include "internals.h"
30 #include "qemu/crc32c.h"
31 #include "exec/exec-all.h"
32 #include "exec/cpu_ldst.h"
33 #include "qemu/int128.h"
34 #include "qemu/atomic128.h"
35 #include "fpu/softfloat.h"
36 #include <zlib.h> /* For crc32 */
37
38 /* C2.4.7 Multiply and divide */
39 /* special cases for 0 and LLONG_MIN are mandated by the standard */
HELPER(udiv64)40 uint64_t HELPER(udiv64)(uint64_t num, uint64_t den)
41 {
42 if (den == 0) {
43 return 0;
44 }
45 return num / den;
46 }
47
HELPER(sdiv64)48 int64_t HELPER(sdiv64)(int64_t num, int64_t den)
49 {
50 if (den == 0) {
51 return 0;
52 }
53 if (num == LLONG_MIN && den == -1) {
54 return LLONG_MIN;
55 }
56 return num / den;
57 }
58
HELPER(rbit64)59 uint64_t HELPER(rbit64)(uint64_t x)
60 {
61 return revbit64(x);
62 }
63
HELPER(msr_i_spsel)64 void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm)
65 {
66 update_spsel(env, imm);
67 }
68
daif_check(CPUARMState * env,uint32_t op,uint32_t imm,uintptr_t ra)69 static void daif_check(CPUARMState *env, uint32_t op,
70 uint32_t imm, uintptr_t ra)
71 {
72 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */
73 if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) {
74 raise_exception_ra(env, EXCP_UDEF,
75 syn_aa64_sysregtrap(0, extract32(op, 0, 3),
76 extract32(op, 3, 3), 4,
77 imm, 0x1f, 0),
78 exception_target_el(env), ra);
79 }
80 }
81
HELPER(msr_i_daifset)82 void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm)
83 {
84 daif_check(env, 0x1e, imm, GETPC());
85 env->daif |= (imm << 6) & PSTATE_DAIF;
86 arm_rebuild_hflags(env);
87 }
88
HELPER(msr_i_daifclear)89 void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm)
90 {
91 daif_check(env, 0x1f, imm, GETPC());
92 env->daif &= ~((imm << 6) & PSTATE_DAIF);
93 arm_rebuild_hflags(env);
94 }
95
96 /* Convert a softfloat float_relation_ (as returned by
97 * the float*_compare functions) to the correct ARM
98 * NZCV flag state.
99 */
float_rel_to_flags(int res)100 static inline uint32_t float_rel_to_flags(int res)
101 {
102 uint64_t flags;
103 switch (res) {
104 case float_relation_equal:
105 flags = PSTATE_Z | PSTATE_C;
106 break;
107 case float_relation_less:
108 flags = PSTATE_N;
109 break;
110 case float_relation_greater:
111 flags = PSTATE_C;
112 break;
113 case float_relation_unordered:
114 default:
115 flags = PSTATE_C | PSTATE_V;
116 break;
117 }
118 return flags;
119 }
120
HELPER(vfp_cmph_a64)121 uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status)
122 {
123 return float_rel_to_flags(float16_compare_quiet(x, y, fp_status));
124 }
125
HELPER(vfp_cmpeh_a64)126 uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status)
127 {
128 return float_rel_to_flags(float16_compare(x, y, fp_status));
129 }
130
HELPER(vfp_cmps_a64)131 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
132 {
133 return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
134 }
135
HELPER(vfp_cmpes_a64)136 uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status)
137 {
138 return float_rel_to_flags(float32_compare(x, y, fp_status));
139 }
140
HELPER(vfp_cmpd_a64)141 uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status)
142 {
143 return float_rel_to_flags(float64_compare_quiet(x, y, fp_status));
144 }
145
HELPER(vfp_cmped_a64)146 uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
147 {
148 return float_rel_to_flags(float64_compare(x, y, fp_status));
149 }
150
HELPER(vfp_mulxs)151 float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
152 {
153 float_status *fpst = fpstp;
154
155 a = float32_squash_input_denormal(a, fpst);
156 b = float32_squash_input_denormal(b, fpst);
157
158 if ((float32_is_zero(a) && float32_is_infinity(b)) ||
159 (float32_is_infinity(a) && float32_is_zero(b))) {
160 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
161 return make_float32((1U << 30) |
162 ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
163 }
164 return float32_mul(a, b, fpst);
165 }
166
HELPER(vfp_mulxd)167 float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
168 {
169 float_status *fpst = fpstp;
170
171 a = float64_squash_input_denormal(a, fpst);
172 b = float64_squash_input_denormal(b, fpst);
173
174 if ((float64_is_zero(a) && float64_is_infinity(b)) ||
175 (float64_is_infinity(a) && float64_is_zero(b))) {
176 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
177 return make_float64((1ULL << 62) |
178 ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
179 }
180 return float64_mul(a, b, fpst);
181 }
182
183 /* 64bit/double versions of the neon float compare functions */
HELPER(neon_ceq_f64)184 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
185 {
186 float_status *fpst = fpstp;
187 return -float64_eq_quiet(a, b, fpst);
188 }
189
HELPER(neon_cge_f64)190 uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
191 {
192 float_status *fpst = fpstp;
193 return -float64_le(b, a, fpst);
194 }
195
HELPER(neon_cgt_f64)196 uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
197 {
198 float_status *fpst = fpstp;
199 return -float64_lt(b, a, fpst);
200 }
201
202 /* Reciprocal step and sqrt step. Note that unlike the A32/T32
203 * versions, these do a fully fused multiply-add or
204 * multiply-add-and-halve.
205 */
206
HELPER(recpsf_f16)207 uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp)
208 {
209 float_status *fpst = fpstp;
210
211 a = float16_squash_input_denormal(a, fpst);
212 b = float16_squash_input_denormal(b, fpst);
213
214 a = float16_chs(a);
215 if ((float16_is_infinity(a) && float16_is_zero(b)) ||
216 (float16_is_infinity(b) && float16_is_zero(a))) {
217 return float16_two;
218 }
219 return float16_muladd(a, b, float16_two, 0, fpst);
220 }
221
HELPER(recpsf_f32)222 float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
223 {
224 float_status *fpst = fpstp;
225
226 a = float32_squash_input_denormal(a, fpst);
227 b = float32_squash_input_denormal(b, fpst);
228
229 a = float32_chs(a);
230 if ((float32_is_infinity(a) && float32_is_zero(b)) ||
231 (float32_is_infinity(b) && float32_is_zero(a))) {
232 return float32_two;
233 }
234 return float32_muladd(a, b, float32_two, 0, fpst);
235 }
236
HELPER(recpsf_f64)237 float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
238 {
239 float_status *fpst = fpstp;
240
241 a = float64_squash_input_denormal(a, fpst);
242 b = float64_squash_input_denormal(b, fpst);
243
244 a = float64_chs(a);
245 if ((float64_is_infinity(a) && float64_is_zero(b)) ||
246 (float64_is_infinity(b) && float64_is_zero(a))) {
247 return float64_two;
248 }
249 return float64_muladd(a, b, float64_two, 0, fpst);
250 }
251
HELPER(rsqrtsf_f16)252 uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp)
253 {
254 float_status *fpst = fpstp;
255
256 a = float16_squash_input_denormal(a, fpst);
257 b = float16_squash_input_denormal(b, fpst);
258
259 a = float16_chs(a);
260 if ((float16_is_infinity(a) && float16_is_zero(b)) ||
261 (float16_is_infinity(b) && float16_is_zero(a))) {
262 return float16_one_point_five;
263 }
264 return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
265 }
266
HELPER(rsqrtsf_f32)267 float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
268 {
269 float_status *fpst = fpstp;
270
271 a = float32_squash_input_denormal(a, fpst);
272 b = float32_squash_input_denormal(b, fpst);
273
274 a = float32_chs(a);
275 if ((float32_is_infinity(a) && float32_is_zero(b)) ||
276 (float32_is_infinity(b) && float32_is_zero(a))) {
277 return float32_one_point_five;
278 }
279 return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
280 }
281
HELPER(rsqrtsf_f64)282 float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
283 {
284 float_status *fpst = fpstp;
285
286 a = float64_squash_input_denormal(a, fpst);
287 b = float64_squash_input_denormal(b, fpst);
288
289 a = float64_chs(a);
290 if ((float64_is_infinity(a) && float64_is_zero(b)) ||
291 (float64_is_infinity(b) && float64_is_zero(a))) {
292 return float64_one_point_five;
293 }
294 return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
295 }
296
297 /* Pairwise long add: add pairs of adjacent elements into
298 * double-width elements in the result (eg _s8 is an 8x8->16 op)
299 */
HELPER(neon_addlp_s8)300 uint64_t HELPER(neon_addlp_s8)(uint64_t a)
301 {
302 uint64_t nsignmask = 0x0080008000800080ULL;
303 uint64_t wsignmask = 0x8000800080008000ULL;
304 uint64_t elementmask = 0x00ff00ff00ff00ffULL;
305 uint64_t tmp1, tmp2;
306 uint64_t res, signres;
307
308 /* Extract odd elements, sign extend each to a 16 bit field */
309 tmp1 = a & elementmask;
310 tmp1 ^= nsignmask;
311 tmp1 |= wsignmask;
312 tmp1 = (tmp1 - nsignmask) ^ wsignmask;
313 /* Ditto for the even elements */
314 tmp2 = (a >> 8) & elementmask;
315 tmp2 ^= nsignmask;
316 tmp2 |= wsignmask;
317 tmp2 = (tmp2 - nsignmask) ^ wsignmask;
318
319 /* calculate the result by summing bits 0..14, 16..22, etc,
320 * and then adjusting the sign bits 15, 23, etc manually.
321 * This ensures the addition can't overflow the 16 bit field.
322 */
323 signres = (tmp1 ^ tmp2) & wsignmask;
324 res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
325 res ^= signres;
326
327 return res;
328 }
329
HELPER(neon_addlp_u8)330 uint64_t HELPER(neon_addlp_u8)(uint64_t a)
331 {
332 uint64_t tmp;
333
334 tmp = a & 0x00ff00ff00ff00ffULL;
335 tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
336 return tmp;
337 }
338
HELPER(neon_addlp_s16)339 uint64_t HELPER(neon_addlp_s16)(uint64_t a)
340 {
341 int32_t reslo, reshi;
342
343 reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
344 reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
345
346 return (uint32_t)reslo | (((uint64_t)reshi) << 32);
347 }
348
HELPER(neon_addlp_u16)349 uint64_t HELPER(neon_addlp_u16)(uint64_t a)
350 {
351 uint64_t tmp;
352
353 tmp = a & 0x0000ffff0000ffffULL;
354 tmp += (a >> 16) & 0x0000ffff0000ffffULL;
355 return tmp;
356 }
357
358 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
HELPER(frecpx_f16)359 uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
360 {
361 float_status *fpst = fpstp;
362 uint16_t val16, sbit;
363 int16_t exp;
364
365 if (float16_is_any_nan(a)) {
366 float16 nan = a;
367 if (float16_is_signaling_nan(a, fpst)) {
368 float_raise(float_flag_invalid, fpst);
369 if (!fpst->default_nan_mode) {
370 nan = float16_silence_nan(a, fpst);
371 }
372 }
373 if (fpst->default_nan_mode) {
374 nan = float16_default_nan(fpst);
375 }
376 return nan;
377 }
378
379 a = float16_squash_input_denormal(a, fpst);
380
381 val16 = float16_val(a);
382 sbit = 0x8000 & val16;
383 exp = extract32(val16, 10, 5);
384
385 if (exp == 0) {
386 return make_float16(deposit32(sbit, 10, 5, 0x1e));
387 } else {
388 return make_float16(deposit32(sbit, 10, 5, ~exp));
389 }
390 }
391
HELPER(frecpx_f32)392 float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
393 {
394 float_status *fpst = fpstp;
395 uint32_t val32, sbit;
396 int32_t exp;
397
398 if (float32_is_any_nan(a)) {
399 float32 nan = a;
400 if (float32_is_signaling_nan(a, fpst)) {
401 float_raise(float_flag_invalid, fpst);
402 if (!fpst->default_nan_mode) {
403 nan = float32_silence_nan(a, fpst);
404 }
405 }
406 if (fpst->default_nan_mode) {
407 nan = float32_default_nan(fpst);
408 }
409 return nan;
410 }
411
412 a = float32_squash_input_denormal(a, fpst);
413
414 val32 = float32_val(a);
415 sbit = 0x80000000ULL & val32;
416 exp = extract32(val32, 23, 8);
417
418 if (exp == 0) {
419 return make_float32(sbit | (0xfe << 23));
420 } else {
421 return make_float32(sbit | (~exp & 0xff) << 23);
422 }
423 }
424
HELPER(frecpx_f64)425 float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
426 {
427 float_status *fpst = fpstp;
428 uint64_t val64, sbit;
429 int64_t exp;
430
431 if (float64_is_any_nan(a)) {
432 float64 nan = a;
433 if (float64_is_signaling_nan(a, fpst)) {
434 float_raise(float_flag_invalid, fpst);
435 if (!fpst->default_nan_mode) {
436 nan = float64_silence_nan(a, fpst);
437 }
438 }
439 if (fpst->default_nan_mode) {
440 nan = float64_default_nan(fpst);
441 }
442 return nan;
443 }
444
445 a = float64_squash_input_denormal(a, fpst);
446
447 val64 = float64_val(a);
448 sbit = 0x8000000000000000ULL & val64;
449 exp = extract64(float64_val(a), 52, 11);
450
451 if (exp == 0) {
452 return make_float64(sbit | (0x7feULL << 52));
453 } else {
454 return make_float64(sbit | (~exp & 0x7ffULL) << 52);
455 }
456 }
457
HELPER(fcvtx_f64_to_f32)458 float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
459 {
460 /* Von Neumann rounding is implemented by using round-to-zero
461 * and then setting the LSB of the result if Inexact was raised.
462 */
463 float32 r;
464 float_status *fpst = &env->vfp.fp_status;
465 float_status tstat = *fpst;
466 int exflags;
467
468 set_float_rounding_mode(float_round_to_zero, &tstat);
469 set_float_exception_flags(0, &tstat);
470 r = float64_to_float32(a, &tstat);
471 exflags = get_float_exception_flags(&tstat);
472 if (exflags & float_flag_inexact) {
473 r = make_float32(float32_val(r) | 1);
474 }
475 exflags |= get_float_exception_flags(fpst);
476 set_float_exception_flags(exflags, fpst);
477 return r;
478 }
479
480 /* 64-bit versions of the CRC helpers. Note that although the operation
481 * (and the prototypes of crc32c() and crc32() mean that only the bottom
482 * 32 bits of the accumulator and result are used, we pass and return
483 * uint64_t for convenience of the generated code. Unlike the 32-bit
484 * instruction set versions, val may genuinely have 64 bits of data in it.
485 * The upper bytes of val (above the number specified by 'bytes') must have
486 * been zeroed out by the caller.
487 */
HELPER(crc32_64)488 uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes)
489 {
490 uint8_t buf[8];
491
492 stq_le_p(buf, val);
493
494 /* zlib crc32 converts the accumulator and output to one's complement. */
495 return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
496 }
497
HELPER(crc32c_64)498 uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
499 {
500 uint8_t buf[8];
501
502 stq_le_p(buf, val);
503
504 /* Linux crc32c converts the output to one's complement. */
505 return crc32c(acc, buf, bytes) ^ 0xffffffff;
506 }
507
508 /*
509 * AdvSIMD half-precision
510 */
511
512 #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
513
514 #define ADVSIMD_HALFOP(name) \
515 uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \
516 { \
517 float_status *fpst = fpstp; \
518 return float16_ ## name(a, b, fpst); \
519 }
520
521 ADVSIMD_HALFOP(add)
ADVSIMD_HALFOP(sub)522 ADVSIMD_HALFOP(sub)
523 ADVSIMD_HALFOP(mul)
524 ADVSIMD_HALFOP(div)
525 ADVSIMD_HALFOP(min)
526 ADVSIMD_HALFOP(max)
527 ADVSIMD_HALFOP(minnum)
528 ADVSIMD_HALFOP(maxnum)
529
530 #define ADVSIMD_TWOHALFOP(name) \
531 uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
532 { \
533 float16 a1, a2, b1, b2; \
534 uint32_t r1, r2; \
535 float_status *fpst = fpstp; \
536 a1 = extract32(two_a, 0, 16); \
537 a2 = extract32(two_a, 16, 16); \
538 b1 = extract32(two_b, 0, 16); \
539 b2 = extract32(two_b, 16, 16); \
540 r1 = float16_ ## name(a1, b1, fpst); \
541 r2 = float16_ ## name(a2, b2, fpst); \
542 return deposit32(r1, 16, 16, r2); \
543 }
544
545 ADVSIMD_TWOHALFOP(add)
546 ADVSIMD_TWOHALFOP(sub)
547 ADVSIMD_TWOHALFOP(mul)
548 ADVSIMD_TWOHALFOP(div)
549 ADVSIMD_TWOHALFOP(min)
550 ADVSIMD_TWOHALFOP(max)
551 ADVSIMD_TWOHALFOP(minnum)
552 ADVSIMD_TWOHALFOP(maxnum)
553
554 /* Data processing - scalar floating-point and advanced SIMD */
555 static float16 float16_mulx(float16 a, float16 b, void *fpstp)
556 {
557 float_status *fpst = fpstp;
558
559 a = float16_squash_input_denormal(a, fpst);
560 b = float16_squash_input_denormal(b, fpst);
561
562 if ((float16_is_zero(a) && float16_is_infinity(b)) ||
563 (float16_is_infinity(a) && float16_is_zero(b))) {
564 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
565 return make_float16((1U << 14) |
566 ((float16_val(a) ^ float16_val(b)) & (1U << 15)));
567 }
568 return float16_mul(a, b, fpst);
569 }
570
571 ADVSIMD_HALFOP(mulx)
ADVSIMD_TWOHALFOP(mulx)572 ADVSIMD_TWOHALFOP(mulx)
573
574 /* fused multiply-accumulate */
575 uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c,
576 void *fpstp)
577 {
578 float_status *fpst = fpstp;
579 return float16_muladd(a, b, c, 0, fpst);
580 }
581
HELPER(advsimd_muladd2h)582 uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
583 uint32_t two_c, void *fpstp)
584 {
585 float_status *fpst = fpstp;
586 float16 a1, a2, b1, b2, c1, c2;
587 uint32_t r1, r2;
588 a1 = extract32(two_a, 0, 16);
589 a2 = extract32(two_a, 16, 16);
590 b1 = extract32(two_b, 0, 16);
591 b2 = extract32(two_b, 16, 16);
592 c1 = extract32(two_c, 0, 16);
593 c2 = extract32(two_c, 16, 16);
594 r1 = float16_muladd(a1, b1, c1, 0, fpst);
595 r2 = float16_muladd(a2, b2, c2, 0, fpst);
596 return deposit32(r1, 16, 16, r2);
597 }
598
599 /*
600 * Floating point comparisons produce an integer result. Softfloat
601 * routines return float_relation types which we convert to the 0/-1
602 * Neon requires.
603 */
604
605 #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
606
HELPER(advsimd_ceq_f16)607 uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp)
608 {
609 float_status *fpst = fpstp;
610 int compare = float16_compare_quiet(a, b, fpst);
611 return ADVSIMD_CMPRES(compare == float_relation_equal);
612 }
613
HELPER(advsimd_cge_f16)614 uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp)
615 {
616 float_status *fpst = fpstp;
617 int compare = float16_compare(a, b, fpst);
618 return ADVSIMD_CMPRES(compare == float_relation_greater ||
619 compare == float_relation_equal);
620 }
621
HELPER(advsimd_cgt_f16)622 uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp)
623 {
624 float_status *fpst = fpstp;
625 int compare = float16_compare(a, b, fpst);
626 return ADVSIMD_CMPRES(compare == float_relation_greater);
627 }
628
HELPER(advsimd_acge_f16)629 uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp)
630 {
631 float_status *fpst = fpstp;
632 float16 f0 = float16_abs(a);
633 float16 f1 = float16_abs(b);
634 int compare = float16_compare(f0, f1, fpst);
635 return ADVSIMD_CMPRES(compare == float_relation_greater ||
636 compare == float_relation_equal);
637 }
638
HELPER(advsimd_acgt_f16)639 uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp)
640 {
641 float_status *fpst = fpstp;
642 float16 f0 = float16_abs(a);
643 float16 f1 = float16_abs(b);
644 int compare = float16_compare(f0, f1, fpst);
645 return ADVSIMD_CMPRES(compare == float_relation_greater);
646 }
647
648 /* round to integral */
HELPER(advsimd_rinth_exact)649 uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status)
650 {
651 return float16_round_to_int(x, fp_status);
652 }
653
HELPER(advsimd_rinth)654 uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
655 {
656 int old_flags = get_float_exception_flags(fp_status), new_flags;
657 float16 ret;
658
659 ret = float16_round_to_int(x, fp_status);
660
661 /* Suppress any inexact exceptions the conversion produced */
662 if (!(old_flags & float_flag_inexact)) {
663 new_flags = get_float_exception_flags(fp_status);
664 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
665 }
666
667 return ret;
668 }
669
670 /*
671 * Half-precision floating point conversion functions
672 *
673 * There are a multitude of conversion functions with various
674 * different rounding modes. This is dealt with by the calling code
675 * setting the mode appropriately before calling the helper.
676 */
677
HELPER(advsimd_f16tosinth)678 uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
679 {
680 float_status *fpst = fpstp;
681
682 /* Invalid if we are passed a NaN */
683 if (float16_is_any_nan(a)) {
684 float_raise(float_flag_invalid, fpst);
685 return 0;
686 }
687 return float16_to_int16(a, fpst);
688 }
689
HELPER(advsimd_f16touinth)690 uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
691 {
692 float_status *fpst = fpstp;
693
694 /* Invalid if we are passed a NaN */
695 if (float16_is_any_nan(a)) {
696 float_raise(float_flag_invalid, fpst);
697 return 0;
698 }
699 return float16_to_uint16(a, fpst);
700 }
701
el_from_spsr(uint32_t spsr)702 static int el_from_spsr(uint32_t spsr)
703 {
704 /* Return the exception level that this SPSR is requesting a return to,
705 * or -1 if it is invalid (an illegal return)
706 */
707 if (spsr & PSTATE_nRW) {
708 switch (spsr & CPSR_M) {
709 case ARM_CPU_MODE_USR:
710 return 0;
711 case ARM_CPU_MODE_HYP:
712 return 2;
713 case ARM_CPU_MODE_FIQ:
714 case ARM_CPU_MODE_IRQ:
715 case ARM_CPU_MODE_SVC:
716 case ARM_CPU_MODE_ABT:
717 case ARM_CPU_MODE_UND:
718 case ARM_CPU_MODE_SYS:
719 return 1;
720 case ARM_CPU_MODE_MON:
721 /* Returning to Mon from AArch64 is never possible,
722 * so this is an illegal return.
723 */
724 default:
725 return -1;
726 }
727 } else {
728 if (extract32(spsr, 1, 1)) {
729 /* Return with reserved M[1] bit set */
730 return -1;
731 }
732 if (extract32(spsr, 0, 4) == 1) {
733 /* return to EL0 with M[0] bit set */
734 return -1;
735 }
736 return extract32(spsr, 2, 2);
737 }
738 }
739
cpsr_write_from_spsr_elx(CPUARMState * env,uint32_t val)740 static void cpsr_write_from_spsr_elx(CPUARMState *env,
741 uint32_t val)
742 {
743 uint32_t mask;
744
745 /* Save SPSR_ELx.SS into PSTATE. */
746 env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS);
747 val &= ~PSTATE_SS;
748
749 /* Move DIT to the correct location for CPSR */
750 if (val & PSTATE_DIT) {
751 val &= ~PSTATE_DIT;
752 val |= CPSR_DIT;
753 }
754
755 mask = aarch32_cpsr_valid_mask(env->features, \
756 &env_archcpu(env)->isar);
757 cpsr_write(env, val, mask, CPSRWriteRaw);
758 }
759
HELPER(exception_return)760 void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
761 {
762 int cur_el = arm_current_el(env);
763 unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el);
764 uint32_t spsr = env->banked_spsr[spsr_idx];
765 int new_el;
766 bool return_to_aa64 = (spsr & PSTATE_nRW) == 0;
767
768 aarch64_save_sp(env, cur_el);
769
770 arm_clear_exclusive(env);
771
772 /* We must squash the PSTATE.SS bit to zero unless both of the
773 * following hold:
774 * 1. debug exceptions are currently disabled
775 * 2. singlestep will be active in the EL we return to
776 * We check 1 here and 2 after we've done the pstate/cpsr write() to
777 * transition to the EL we're going to.
778 */
779 if (arm_generate_debug_exceptions(env)) {
780 spsr &= ~PSTATE_SS;
781 }
782
783 /*
784 * FEAT_RME forbids return from EL3 with an invalid security state.
785 * We don't need an explicit check for FEAT_RME here because we enforce
786 * in scr_write() that you can't set the NSE bit without it.
787 */
788 if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) {
789 goto illegal_return;
790 }
791
792 new_el = el_from_spsr(spsr);
793 if (new_el == -1) {
794 goto illegal_return;
795 }
796 if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) {
797 /* Disallow return to an EL which is unimplemented or higher
798 * than the current one.
799 */
800 goto illegal_return;
801 }
802
803 if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) {
804 /* Return to an EL which is configured for a different register width */
805 goto illegal_return;
806 }
807
808 if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
809 goto illegal_return;
810 }
811
812 qemu_mutex_lock_iothread();
813 arm_call_pre_el_change_hook(env_archcpu(env));
814 qemu_mutex_unlock_iothread();
815
816 if (!return_to_aa64) {
817 env->aarch64 = false;
818 /* We do a raw CPSR write because aarch64_sync_64_to_32()
819 * will sort the register banks out for us, and we've already
820 * caught all the bad-mode cases in el_from_spsr().
821 */
822 cpsr_write_from_spsr_elx(env, spsr);
823 if (!arm_singlestep_active(env)) {
824 env->pstate &= ~PSTATE_SS;
825 }
826 aarch64_sync_64_to_32(env);
827
828 if (spsr & CPSR_T) {
829 env->regs[15] = new_pc & ~0x1;
830 } else {
831 env->regs[15] = new_pc & ~0x3;
832 }
833 helper_rebuild_hflags_a32(env, new_el);
834 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
835 "AArch32 EL%d PC 0x%" PRIx32 "\n",
836 cur_el, new_el, env->regs[15]);
837 } else {
838 int tbii;
839
840 env->aarch64 = true;
841 spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar);
842 pstate_write(env, spsr);
843 if (!arm_singlestep_active(env)) {
844 env->pstate &= ~PSTATE_SS;
845 }
846 aarch64_restore_sp(env, new_el);
847 helper_rebuild_hflags_a64(env, new_el);
848
849 /*
850 * Apply TBI to the exception return address. We had to delay this
851 * until after we selected the new EL, so that we could select the
852 * correct TBI+TBID bits. This is made easier by waiting until after
853 * the hflags rebuild, since we can pull the composite TBII field
854 * from there.
855 */
856 tbii = EX_TBFLAG_A64(env->hflags, TBII);
857 if ((tbii >> extract64(new_pc, 55, 1)) & 1) {
858 /* TBI is enabled. */
859 int core_mmu_idx = cpu_mmu_index(env, false);
860 if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) {
861 new_pc = sextract64(new_pc, 0, 56);
862 } else {
863 new_pc = extract64(new_pc, 0, 56);
864 }
865 }
866 env->pc = new_pc;
867
868 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
869 "AArch64 EL%d PC 0x%" PRIx64 "\n",
870 cur_el, new_el, env->pc);
871 }
872
873 /*
874 * Note that cur_el can never be 0. If new_el is 0, then
875 * el0_a64 is return_to_aa64, else el0_a64 is ignored.
876 */
877 aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64);
878
879 qemu_mutex_lock_iothread();
880 arm_call_el_change_hook(env_archcpu(env));
881 qemu_mutex_unlock_iothread();
882
883 return;
884
885 illegal_return:
886 /* Illegal return events of various kinds have architecturally
887 * mandated behaviour:
888 * restore NZCV and DAIF from SPSR_ELx
889 * set PSTATE.IL
890 * restore PC from ELR_ELx
891 * no change to exception level, execution state or stack pointer
892 */
893 env->pstate |= PSTATE_IL;
894 env->pc = new_pc;
895 spsr &= PSTATE_NZCV | PSTATE_DAIF;
896 spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF);
897 pstate_write(env, spsr);
898 if (!arm_singlestep_active(env)) {
899 env->pstate &= ~PSTATE_SS;
900 }
901 helper_rebuild_hflags_a64(env, cur_el);
902 qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: "
903 "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc);
904 }
905
906 /*
907 * Square Root and Reciprocal square root
908 */
909
HELPER(sqrt_f16)910 uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
911 {
912 float_status *s = fpstp;
913
914 return float16_sqrt(a, s);
915 }
916
HELPER(dc_zva)917 void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
918 {
919 /*
920 * Implement DC ZVA, which zeroes a fixed-length block of memory.
921 * Note that we do not implement the (architecturally mandated)
922 * alignment fault for attempts to use this on Device memory
923 * (which matches the usual QEMU behaviour of not implementing either
924 * alignment faults or any memory attribute handling).
925 */
926 int blocklen = 4 << env_archcpu(env)->dcz_blocksize;
927 uint64_t vaddr = vaddr_in & ~(blocklen - 1);
928 int mmu_idx = cpu_mmu_index(env, false);
929 void *mem;
930
931 /*
932 * Trapless lookup. In addition to actual invalid page, may
933 * return NULL for I/O, watchpoints, clean pages, etc.
934 */
935 mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
936
937 #ifndef CONFIG_USER_ONLY
938 if (unlikely(!mem)) {
939 uintptr_t ra = GETPC();
940
941 /*
942 * Trap if accessing an invalid page. DC_ZVA requires that we supply
943 * the original pointer for an invalid page. But watchpoints require
944 * that we probe the actual space. So do both.
945 */
946 (void) probe_write(env, vaddr_in, 1, mmu_idx, ra);
947 mem = probe_write(env, vaddr, blocklen, mmu_idx, ra);
948
949 if (unlikely(!mem)) {
950 /*
951 * The only remaining reason for mem == NULL is I/O.
952 * Just do a series of byte writes as the architecture demands.
953 */
954 for (int i = 0; i < blocklen; i++) {
955 cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra);
956 }
957 return;
958 }
959 }
960 #endif
961
962 memset(mem, 0, blocklen);
963 }
964
HELPER(unaligned_access)965 void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr,
966 uint32_t access_type, uint32_t mmu_idx)
967 {
968 arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type,
969 mmu_idx, GETPC());
970 }
971
972 /* Memory operations (memset, memmove, memcpy) */
973
974 /*
975 * Return true if the CPY* and SET* insns can execute; compare
976 * pseudocode CheckMOPSEnabled(), though we refactor it a little.
977 */
mops_enabled(CPUARMState * env)978 static bool mops_enabled(CPUARMState *env)
979 {
980 int el = arm_current_el(env);
981
982 if (el < 2 &&
983 (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) &&
984 !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) {
985 return false;
986 }
987
988 if (el == 0) {
989 if (!el_is_in_host(env, 0)) {
990 return env->cp15.sctlr_el[1] & SCTLR_MSCEN;
991 } else {
992 return env->cp15.sctlr_el[2] & SCTLR_MSCEN;
993 }
994 }
995 return true;
996 }
997
check_mops_enabled(CPUARMState * env,uintptr_t ra)998 static void check_mops_enabled(CPUARMState *env, uintptr_t ra)
999 {
1000 if (!mops_enabled(env)) {
1001 raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(),
1002 exception_target_el(env), ra);
1003 }
1004 }
1005
1006 /*
1007 * Return the target exception level for an exception due
1008 * to mismatched arguments in a FEAT_MOPS copy or set.
1009 * Compare pseudocode MismatchedCpySetTargetEL()
1010 */
mops_mismatch_exception_target_el(CPUARMState * env)1011 static int mops_mismatch_exception_target_el(CPUARMState *env)
1012 {
1013 int el = arm_current_el(env);
1014
1015 if (el > 1) {
1016 return el;
1017 }
1018 if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
1019 return 2;
1020 }
1021 if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) {
1022 return 2;
1023 }
1024 return 1;
1025 }
1026
1027 /*
1028 * Check whether an M or E instruction was executed with a CF value
1029 * indicating the wrong option for this implementation.
1030 * Assumes we are always Option A.
1031 */
check_mops_wrong_option(CPUARMState * env,uint32_t syndrome,uintptr_t ra)1032 static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome,
1033 uintptr_t ra)
1034 {
1035 if (env->CF != 0) {
1036 syndrome |= 1 << 17; /* Set the wrong-option bit */
1037 raise_exception_ra(env, EXCP_UDEF, syndrome,
1038 mops_mismatch_exception_target_el(env), ra);
1039 }
1040 }
1041
1042 /*
1043 * Return the maximum number of bytes we can transfer starting at addr
1044 * without crossing a page boundary.
1045 */
page_limit(uint64_t addr)1046 static uint64_t page_limit(uint64_t addr)
1047 {
1048 return TARGET_PAGE_ALIGN(addr + 1) - addr;
1049 }
1050
1051 /*
1052 * Return the number of bytes we can copy starting from addr and working
1053 * backwards without crossing a page boundary.
1054 */
page_limit_rev(uint64_t addr)1055 static uint64_t page_limit_rev(uint64_t addr)
1056 {
1057 return (addr & ~TARGET_PAGE_MASK) + 1;
1058 }
1059
1060 /*
1061 * Perform part of a memory set on an area of guest memory starting at
1062 * toaddr (a dirty address) and extending for setsize bytes.
1063 *
1064 * Returns the number of bytes actually set, which might be less than
1065 * setsize; the caller should loop until the whole set has been done.
1066 * The caller should ensure that the guest registers are correct
1067 * for the possibility that the first byte of the set encounters
1068 * an exception or watchpoint. We guarantee not to take any faults
1069 * for bytes other than the first.
1070 */
set_step(CPUARMState * env,uint64_t toaddr,uint64_t setsize,uint32_t data,int memidx,uint32_t * mtedesc,uintptr_t ra)1071 static uint64_t set_step(CPUARMState *env, uint64_t toaddr,
1072 uint64_t setsize, uint32_t data, int memidx,
1073 uint32_t *mtedesc, uintptr_t ra)
1074 {
1075 void *mem;
1076
1077 setsize = MIN(setsize, page_limit(toaddr));
1078 if (*mtedesc) {
1079 uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc);
1080 if (mtesize == 0) {
1081 /* Trap, or not. All CPU state is up to date */
1082 mte_check_fail(env, *mtedesc, toaddr, ra);
1083 /* Continue, with no further MTE checks required */
1084 *mtedesc = 0;
1085 } else {
1086 /* Advance to the end, or to the tag mismatch */
1087 setsize = MIN(setsize, mtesize);
1088 }
1089 }
1090
1091 toaddr = useronly_clean_ptr(toaddr);
1092 /*
1093 * Trapless lookup: returns NULL for invalid page, I/O,
1094 * watchpoints, clean pages, etc.
1095 */
1096 mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx);
1097
1098 #ifndef CONFIG_USER_ONLY
1099 if (unlikely(!mem)) {
1100 /*
1101 * Slow-path: just do one byte write. This will handle the
1102 * watchpoint, invalid page, etc handling correctly.
1103 * For clean code pages, the next iteration will see
1104 * the page dirty and will use the fast path.
1105 */
1106 cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra);
1107 return 1;
1108 }
1109 #endif
1110 /* Easy case: just memset the host memory */
1111 memset(mem, data, setsize);
1112 return setsize;
1113 }
1114
1115 /*
1116 * Similar, but setting tags. The architecture requires us to do this
1117 * in 16-byte chunks. SETP accesses are not tag checked; they set
1118 * the tags.
1119 */
set_step_tags(CPUARMState * env,uint64_t toaddr,uint64_t setsize,uint32_t data,int memidx,uint32_t * mtedesc,uintptr_t ra)1120 static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr,
1121 uint64_t setsize, uint32_t data, int memidx,
1122 uint32_t *mtedesc, uintptr_t ra)
1123 {
1124 void *mem;
1125 uint64_t cleanaddr;
1126
1127 setsize = MIN(setsize, page_limit(toaddr));
1128
1129 cleanaddr = useronly_clean_ptr(toaddr);
1130 /*
1131 * Trapless lookup: returns NULL for invalid page, I/O,
1132 * watchpoints, clean pages, etc.
1133 */
1134 mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx);
1135
1136 #ifndef CONFIG_USER_ONLY
1137 if (unlikely(!mem)) {
1138 /*
1139 * Slow-path: just do one write. This will handle the
1140 * watchpoint, invalid page, etc handling correctly.
1141 * The architecture requires that we do 16 bytes at a time,
1142 * and we know both ptr and size are 16 byte aligned.
1143 * For clean code pages, the next iteration will see
1144 * the page dirty and will use the fast path.
1145 */
1146 uint64_t repldata = data * 0x0101010101010101ULL;
1147 MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx);
1148 cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra);
1149 mte_mops_set_tags(env, toaddr, 16, *mtedesc);
1150 return 16;
1151 }
1152 #endif
1153 /* Easy case: just memset the host memory */
1154 memset(mem, data, setsize);
1155 mte_mops_set_tags(env, toaddr, setsize, *mtedesc);
1156 return setsize;
1157 }
1158
1159 typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr,
1160 uint64_t setsize, uint32_t data,
1161 int memidx, uint32_t *mtedesc, uintptr_t ra);
1162
1163 /* Extract register numbers from a MOPS exception syndrome value */
mops_destreg(uint32_t syndrome)1164 static int mops_destreg(uint32_t syndrome)
1165 {
1166 return extract32(syndrome, 10, 5);
1167 }
1168
mops_srcreg(uint32_t syndrome)1169 static int mops_srcreg(uint32_t syndrome)
1170 {
1171 return extract32(syndrome, 5, 5);
1172 }
1173
mops_sizereg(uint32_t syndrome)1174 static int mops_sizereg(uint32_t syndrome)
1175 {
1176 return extract32(syndrome, 0, 5);
1177 }
1178
1179 /*
1180 * Return true if TCMA and TBI bits mean we need to do MTE checks.
1181 * We only need to do this once per MOPS insn, not for every page.
1182 */
mte_checks_needed(uint64_t ptr,uint32_t desc)1183 static bool mte_checks_needed(uint64_t ptr, uint32_t desc)
1184 {
1185 int bit55 = extract64(ptr, 55, 1);
1186
1187 /*
1188 * Note that tbi_check() returns true for "access checked" but
1189 * tcma_check() returns true for "access unchecked".
1190 */
1191 if (!tbi_check(desc, bit55)) {
1192 return false;
1193 }
1194 return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr));
1195 }
1196
1197 /* Take an exception if the SETG addr/size are not granule aligned */
check_setg_alignment(CPUARMState * env,uint64_t ptr,uint64_t size,uint32_t memidx,uintptr_t ra)1198 static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size,
1199 uint32_t memidx, uintptr_t ra)
1200 {
1201 if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) ||
1202 !QEMU_IS_ALIGNED(size, TAG_GRANULE)) {
1203 arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE,
1204 memidx, ra);
1205
1206 }
1207 }
1208
arm_reg_or_xzr(CPUARMState * env,int reg)1209 static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg)
1210 {
1211 /*
1212 * Runtime equivalent of cpu_reg() -- return the CPU register value,
1213 * for contexts when index 31 means XZR (not SP).
1214 */
1215 return reg == 31 ? 0 : env->xregs[reg];
1216 }
1217
1218 /*
1219 * For the Memory Set operation, our implementation chooses
1220 * always to use "option A", where we update Xd to the final
1221 * address in the SETP insn, and set Xn to be -(bytes remaining).
1222 * On SETM and SETE insns we only need update Xn.
1223 *
1224 * @env: CPU
1225 * @syndrome: syndrome value for mismatch exceptions
1226 * (also contains the register numbers we need to use)
1227 * @mtedesc: MTE descriptor word
1228 * @stepfn: function which does a single part of the set operation
1229 * @is_setg: true if this is the tag-setting SETG variant
1230 */
do_setp(CPUARMState * env,uint32_t syndrome,uint32_t mtedesc,StepFn * stepfn,bool is_setg,uintptr_t ra)1231 static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
1232 StepFn *stepfn, bool is_setg, uintptr_t ra)
1233 {
1234 /* Prologue: we choose to do up to the next page boundary */
1235 int rd = mops_destreg(syndrome);
1236 int rs = mops_srcreg(syndrome);
1237 int rn = mops_sizereg(syndrome);
1238 uint8_t data = arm_reg_or_xzr(env, rs);
1239 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
1240 uint64_t toaddr = env->xregs[rd];
1241 uint64_t setsize = env->xregs[rn];
1242 uint64_t stagesetsize, step;
1243
1244 check_mops_enabled(env, ra);
1245
1246 if (setsize > INT64_MAX) {
1247 setsize = INT64_MAX;
1248 if (is_setg) {
1249 setsize &= ~0xf;
1250 }
1251 }
1252
1253 if (unlikely(is_setg)) {
1254 check_setg_alignment(env, toaddr, setsize, memidx, ra);
1255 } else if (!mte_checks_needed(toaddr, mtedesc)) {
1256 mtedesc = 0;
1257 }
1258
1259 stagesetsize = MIN(setsize, page_limit(toaddr));
1260 while (stagesetsize) {
1261 env->xregs[rd] = toaddr;
1262 env->xregs[rn] = setsize;
1263 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra);
1264 toaddr += step;
1265 setsize -= step;
1266 stagesetsize -= step;
1267 }
1268 /* Insn completed, so update registers to the Option A format */
1269 env->xregs[rd] = toaddr + setsize;
1270 env->xregs[rn] = -setsize;
1271
1272 /* Set NZCV = 0000 to indicate we are an Option A implementation */
1273 env->NF = 0;
1274 env->ZF = 1; /* our env->ZF encoding is inverted */
1275 env->CF = 0;
1276 env->VF = 0;
1277 return;
1278 }
1279
HELPER(setp)1280 void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1281 {
1282 do_setp(env, syndrome, mtedesc, set_step, false, GETPC());
1283 }
1284
HELPER(setgp)1285 void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1286 {
1287 do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC());
1288 }
1289
do_setm(CPUARMState * env,uint32_t syndrome,uint32_t mtedesc,StepFn * stepfn,bool is_setg,uintptr_t ra)1290 static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
1291 StepFn *stepfn, bool is_setg, uintptr_t ra)
1292 {
1293 /* Main: we choose to do all the full-page chunks */
1294 CPUState *cs = env_cpu(env);
1295 int rd = mops_destreg(syndrome);
1296 int rs = mops_srcreg(syndrome);
1297 int rn = mops_sizereg(syndrome);
1298 uint8_t data = arm_reg_or_xzr(env, rs);
1299 uint64_t toaddr = env->xregs[rd] + env->xregs[rn];
1300 uint64_t setsize = -env->xregs[rn];
1301 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
1302 uint64_t step, stagesetsize;
1303
1304 check_mops_enabled(env, ra);
1305
1306 /*
1307 * We're allowed to NOP out "no data to copy" before the consistency
1308 * checks; we choose to do so.
1309 */
1310 if (env->xregs[rn] == 0) {
1311 return;
1312 }
1313
1314 check_mops_wrong_option(env, syndrome, ra);
1315
1316 /*
1317 * Our implementation will work fine even if we have an unaligned
1318 * destination address, and because we update Xn every time around
1319 * the loop below and the return value from stepfn() may be less
1320 * than requested, we might find toaddr is unaligned. So we don't
1321 * have an IMPDEF check for alignment here.
1322 */
1323
1324 if (unlikely(is_setg)) {
1325 check_setg_alignment(env, toaddr, setsize, memidx, ra);
1326 } else if (!mte_checks_needed(toaddr, mtedesc)) {
1327 mtedesc = 0;
1328 }
1329
1330 /* Do the actual memset: we leave the last partial page to SETE */
1331 stagesetsize = setsize & TARGET_PAGE_MASK;
1332 while (stagesetsize > 0) {
1333 step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra);
1334 toaddr += step;
1335 setsize -= step;
1336 stagesetsize -= step;
1337 env->xregs[rn] = -setsize;
1338 if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) {
1339 cpu_loop_exit_restore(cs, ra);
1340 }
1341 }
1342 }
1343
HELPER(setm)1344 void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1345 {
1346 do_setm(env, syndrome, mtedesc, set_step, false, GETPC());
1347 }
1348
HELPER(setgm)1349 void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1350 {
1351 do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC());
1352 }
1353
do_sete(CPUARMState * env,uint32_t syndrome,uint32_t mtedesc,StepFn * stepfn,bool is_setg,uintptr_t ra)1354 static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
1355 StepFn *stepfn, bool is_setg, uintptr_t ra)
1356 {
1357 /* Epilogue: do the last partial page */
1358 int rd = mops_destreg(syndrome);
1359 int rs = mops_srcreg(syndrome);
1360 int rn = mops_sizereg(syndrome);
1361 uint8_t data = arm_reg_or_xzr(env, rs);
1362 uint64_t toaddr = env->xregs[rd] + env->xregs[rn];
1363 uint64_t setsize = -env->xregs[rn];
1364 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
1365 uint64_t step;
1366
1367 check_mops_enabled(env, ra);
1368
1369 /*
1370 * We're allowed to NOP out "no data to copy" before the consistency
1371 * checks; we choose to do so.
1372 */
1373 if (setsize == 0) {
1374 return;
1375 }
1376
1377 check_mops_wrong_option(env, syndrome, ra);
1378
1379 /*
1380 * Our implementation has no address alignment requirements, but
1381 * we do want to enforce the "less than a page" size requirement,
1382 * so we don't need to have the "check for interrupts" here.
1383 */
1384 if (setsize >= TARGET_PAGE_SIZE) {
1385 raise_exception_ra(env, EXCP_UDEF, syndrome,
1386 mops_mismatch_exception_target_el(env), ra);
1387 }
1388
1389 if (unlikely(is_setg)) {
1390 check_setg_alignment(env, toaddr, setsize, memidx, ra);
1391 } else if (!mte_checks_needed(toaddr, mtedesc)) {
1392 mtedesc = 0;
1393 }
1394
1395 /* Do the actual memset */
1396 while (setsize > 0) {
1397 step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra);
1398 toaddr += step;
1399 setsize -= step;
1400 env->xregs[rn] = -setsize;
1401 }
1402 }
1403
HELPER(sete)1404 void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1405 {
1406 do_sete(env, syndrome, mtedesc, set_step, false, GETPC());
1407 }
1408
HELPER(setge)1409 void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
1410 {
1411 do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC());
1412 }
1413
1414 /*
1415 * Perform part of a memory copy from the guest memory at fromaddr
1416 * and extending for copysize bytes, to the guest memory at
1417 * toaddr. Both addresses are dirty.
1418 *
1419 * Returns the number of bytes actually set, which might be less than
1420 * copysize; the caller should loop until the whole copy has been done.
1421 * The caller should ensure that the guest registers are correct
1422 * for the possibility that the first byte of the copy encounters
1423 * an exception or watchpoint. We guarantee not to take any faults
1424 * for bytes other than the first.
1425 */
copy_step(CPUARMState * env,uint64_t toaddr,uint64_t fromaddr,uint64_t copysize,int wmemidx,int rmemidx,uint32_t * wdesc,uint32_t * rdesc,uintptr_t ra)1426 static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr,
1427 uint64_t copysize, int wmemidx, int rmemidx,
1428 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra)
1429 {
1430 void *rmem;
1431 void *wmem;
1432
1433 /* Don't cross a page boundary on either source or destination */
1434 copysize = MIN(copysize, page_limit(toaddr));
1435 copysize = MIN(copysize, page_limit(fromaddr));
1436 /*
1437 * Handle MTE tag checks: either handle the tag mismatch for byte 0,
1438 * or else copy up to but not including the byte with the mismatch.
1439 */
1440 if (*rdesc) {
1441 uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc);
1442 if (mtesize == 0) {
1443 mte_check_fail(env, *rdesc, fromaddr, ra);
1444 *rdesc = 0;
1445 } else {
1446 copysize = MIN(copysize, mtesize);
1447 }
1448 }
1449 if (*wdesc) {
1450 uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc);
1451 if (mtesize == 0) {
1452 mte_check_fail(env, *wdesc, toaddr, ra);
1453 *wdesc = 0;
1454 } else {
1455 copysize = MIN(copysize, mtesize);
1456 }
1457 }
1458
1459 toaddr = useronly_clean_ptr(toaddr);
1460 fromaddr = useronly_clean_ptr(fromaddr);
1461 /* Trapless lookup of whether we can get a host memory pointer */
1462 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx);
1463 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx);
1464
1465 #ifndef CONFIG_USER_ONLY
1466 /*
1467 * If we don't have host memory for both source and dest then just
1468 * do a single byte copy. This will handle watchpoints, invalid pages,
1469 * etc correctly. For clean code pages, the next iteration will see
1470 * the page dirty and will use the fast path.
1471 */
1472 if (unlikely(!rmem || !wmem)) {
1473 uint8_t byte;
1474 if (rmem) {
1475 byte = *(uint8_t *)rmem;
1476 } else {
1477 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra);
1478 }
1479 if (wmem) {
1480 *(uint8_t *)wmem = byte;
1481 } else {
1482 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra);
1483 }
1484 return 1;
1485 }
1486 #endif
1487 /* Easy case: just memmove the host memory */
1488 memmove(wmem, rmem, copysize);
1489 return copysize;
1490 }
1491
1492 /*
1493 * Do part of a backwards memory copy. Here toaddr and fromaddr point
1494 * to the *last* byte to be copied.
1495 */
copy_step_rev(CPUARMState * env,uint64_t toaddr,uint64_t fromaddr,uint64_t copysize,int wmemidx,int rmemidx,uint32_t * wdesc,uint32_t * rdesc,uintptr_t ra)1496 static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr,
1497 uint64_t fromaddr,
1498 uint64_t copysize, int wmemidx, int rmemidx,
1499 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra)
1500 {
1501 void *rmem;
1502 void *wmem;
1503
1504 /* Don't cross a page boundary on either source or destination */
1505 copysize = MIN(copysize, page_limit_rev(toaddr));
1506 copysize = MIN(copysize, page_limit_rev(fromaddr));
1507
1508 /*
1509 * Handle MTE tag checks: either handle the tag mismatch for byte 0,
1510 * or else copy up to but not including the byte with the mismatch.
1511 */
1512 if (*rdesc) {
1513 uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc);
1514 if (mtesize == 0) {
1515 mte_check_fail(env, *rdesc, fromaddr, ra);
1516 *rdesc = 0;
1517 } else {
1518 copysize = MIN(copysize, mtesize);
1519 }
1520 }
1521 if (*wdesc) {
1522 uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc);
1523 if (mtesize == 0) {
1524 mte_check_fail(env, *wdesc, toaddr, ra);
1525 *wdesc = 0;
1526 } else {
1527 copysize = MIN(copysize, mtesize);
1528 }
1529 }
1530
1531 toaddr = useronly_clean_ptr(toaddr);
1532 fromaddr = useronly_clean_ptr(fromaddr);
1533 /* Trapless lookup of whether we can get a host memory pointer */
1534 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx);
1535 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx);
1536
1537 #ifndef CONFIG_USER_ONLY
1538 /*
1539 * If we don't have host memory for both source and dest then just
1540 * do a single byte copy. This will handle watchpoints, invalid pages,
1541 * etc correctly. For clean code pages, the next iteration will see
1542 * the page dirty and will use the fast path.
1543 */
1544 if (unlikely(!rmem || !wmem)) {
1545 uint8_t byte;
1546 if (rmem) {
1547 byte = *(uint8_t *)rmem;
1548 } else {
1549 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra);
1550 }
1551 if (wmem) {
1552 *(uint8_t *)wmem = byte;
1553 } else {
1554 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra);
1555 }
1556 return 1;
1557 }
1558 #endif
1559 /*
1560 * Easy case: just memmove the host memory. Note that wmem and
1561 * rmem here point to the *last* byte to copy.
1562 */
1563 memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize);
1564 return copysize;
1565 }
1566
1567 /*
1568 * for the Memory Copy operation, our implementation chooses always
1569 * to use "option A", where we update Xd and Xs to the final addresses
1570 * in the CPYP insn, and then in CPYM and CPYE only need to update Xn.
1571 *
1572 * @env: CPU
1573 * @syndrome: syndrome value for mismatch exceptions
1574 * (also contains the register numbers we need to use)
1575 * @wdesc: MTE descriptor for the writes (destination)
1576 * @rdesc: MTE descriptor for the reads (source)
1577 * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards)
1578 */
do_cpyp(CPUARMState * env,uint32_t syndrome,uint32_t wdesc,uint32_t rdesc,uint32_t move,uintptr_t ra)1579 static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1580 uint32_t rdesc, uint32_t move, uintptr_t ra)
1581 {
1582 int rd = mops_destreg(syndrome);
1583 int rs = mops_srcreg(syndrome);
1584 int rn = mops_sizereg(syndrome);
1585 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
1586 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
1587 bool forwards = true;
1588 uint64_t toaddr = env->xregs[rd];
1589 uint64_t fromaddr = env->xregs[rs];
1590 uint64_t copysize = env->xregs[rn];
1591 uint64_t stagecopysize, step;
1592
1593 check_mops_enabled(env, ra);
1594
1595
1596 if (move) {
1597 /*
1598 * Copy backwards if necessary. The direction for a non-overlapping
1599 * copy is IMPDEF; we choose forwards.
1600 */
1601 if (copysize > 0x007FFFFFFFFFFFFFULL) {
1602 copysize = 0x007FFFFFFFFFFFFFULL;
1603 }
1604 uint64_t fs = extract64(fromaddr, 0, 56);
1605 uint64_t ts = extract64(toaddr, 0, 56);
1606 uint64_t fe = extract64(fromaddr + copysize, 0, 56);
1607
1608 if (fs < ts && fe > ts) {
1609 forwards = false;
1610 }
1611 } else {
1612 if (copysize > INT64_MAX) {
1613 copysize = INT64_MAX;
1614 }
1615 }
1616
1617 if (!mte_checks_needed(fromaddr, rdesc)) {
1618 rdesc = 0;
1619 }
1620 if (!mte_checks_needed(toaddr, wdesc)) {
1621 wdesc = 0;
1622 }
1623
1624 if (forwards) {
1625 stagecopysize = MIN(copysize, page_limit(toaddr));
1626 stagecopysize = MIN(stagecopysize, page_limit(fromaddr));
1627 while (stagecopysize) {
1628 env->xregs[rd] = toaddr;
1629 env->xregs[rs] = fromaddr;
1630 env->xregs[rn] = copysize;
1631 step = copy_step(env, toaddr, fromaddr, stagecopysize,
1632 wmemidx, rmemidx, &wdesc, &rdesc, ra);
1633 toaddr += step;
1634 fromaddr += step;
1635 copysize -= step;
1636 stagecopysize -= step;
1637 }
1638 /* Insn completed, so update registers to the Option A format */
1639 env->xregs[rd] = toaddr + copysize;
1640 env->xregs[rs] = fromaddr + copysize;
1641 env->xregs[rn] = -copysize;
1642 } else {
1643 /*
1644 * In a reverse copy the to and from addrs in Xs and Xd are the start
1645 * of the range, but it's more convenient for us to work with pointers
1646 * to the last byte being copied.
1647 */
1648 toaddr += copysize - 1;
1649 fromaddr += copysize - 1;
1650 stagecopysize = MIN(copysize, page_limit_rev(toaddr));
1651 stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr));
1652 while (stagecopysize) {
1653 env->xregs[rn] = copysize;
1654 step = copy_step_rev(env, toaddr, fromaddr, stagecopysize,
1655 wmemidx, rmemidx, &wdesc, &rdesc, ra);
1656 copysize -= step;
1657 stagecopysize -= step;
1658 toaddr -= step;
1659 fromaddr -= step;
1660 }
1661 /*
1662 * Insn completed, so update registers to the Option A format.
1663 * For a reverse copy this is no different to the CPYP input format.
1664 */
1665 env->xregs[rn] = copysize;
1666 }
1667
1668 /* Set NZCV = 0000 to indicate we are an Option A implementation */
1669 env->NF = 0;
1670 env->ZF = 1; /* our env->ZF encoding is inverted */
1671 env->CF = 0;
1672 env->VF = 0;
1673 return;
1674 }
1675
HELPER(cpyp)1676 void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1677 uint32_t rdesc)
1678 {
1679 do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC());
1680 }
1681
HELPER(cpyfp)1682 void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1683 uint32_t rdesc)
1684 {
1685 do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC());
1686 }
1687
do_cpym(CPUARMState * env,uint32_t syndrome,uint32_t wdesc,uint32_t rdesc,uint32_t move,uintptr_t ra)1688 static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1689 uint32_t rdesc, uint32_t move, uintptr_t ra)
1690 {
1691 /* Main: we choose to copy until less than a page remaining */
1692 CPUState *cs = env_cpu(env);
1693 int rd = mops_destreg(syndrome);
1694 int rs = mops_srcreg(syndrome);
1695 int rn = mops_sizereg(syndrome);
1696 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
1697 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
1698 bool forwards = true;
1699 uint64_t toaddr, fromaddr, copysize, step;
1700
1701 check_mops_enabled(env, ra);
1702
1703 /* We choose to NOP out "no data to copy" before consistency checks */
1704 if (env->xregs[rn] == 0) {
1705 return;
1706 }
1707
1708 check_mops_wrong_option(env, syndrome, ra);
1709
1710 if (move) {
1711 forwards = (int64_t)env->xregs[rn] < 0;
1712 }
1713
1714 if (forwards) {
1715 toaddr = env->xregs[rd] + env->xregs[rn];
1716 fromaddr = env->xregs[rs] + env->xregs[rn];
1717 copysize = -env->xregs[rn];
1718 } else {
1719 copysize = env->xregs[rn];
1720 /* This toaddr and fromaddr point to the *last* byte to copy */
1721 toaddr = env->xregs[rd] + copysize - 1;
1722 fromaddr = env->xregs[rs] + copysize - 1;
1723 }
1724
1725 if (!mte_checks_needed(fromaddr, rdesc)) {
1726 rdesc = 0;
1727 }
1728 if (!mte_checks_needed(toaddr, wdesc)) {
1729 wdesc = 0;
1730 }
1731
1732 /* Our implementation has no particular parameter requirements for CPYM */
1733
1734 /* Do the actual memmove */
1735 if (forwards) {
1736 while (copysize >= TARGET_PAGE_SIZE) {
1737 step = copy_step(env, toaddr, fromaddr, copysize,
1738 wmemidx, rmemidx, &wdesc, &rdesc, ra);
1739 toaddr += step;
1740 fromaddr += step;
1741 copysize -= step;
1742 env->xregs[rn] = -copysize;
1743 if (copysize >= TARGET_PAGE_SIZE &&
1744 unlikely(cpu_loop_exit_requested(cs))) {
1745 cpu_loop_exit_restore(cs, ra);
1746 }
1747 }
1748 } else {
1749 while (copysize >= TARGET_PAGE_SIZE) {
1750 step = copy_step_rev(env, toaddr, fromaddr, copysize,
1751 wmemidx, rmemidx, &wdesc, &rdesc, ra);
1752 toaddr -= step;
1753 fromaddr -= step;
1754 copysize -= step;
1755 env->xregs[rn] = copysize;
1756 if (copysize >= TARGET_PAGE_SIZE &&
1757 unlikely(cpu_loop_exit_requested(cs))) {
1758 cpu_loop_exit_restore(cs, ra);
1759 }
1760 }
1761 }
1762 }
1763
HELPER(cpym)1764 void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1765 uint32_t rdesc)
1766 {
1767 do_cpym(env, syndrome, wdesc, rdesc, true, GETPC());
1768 }
1769
HELPER(cpyfm)1770 void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1771 uint32_t rdesc)
1772 {
1773 do_cpym(env, syndrome, wdesc, rdesc, false, GETPC());
1774 }
1775
do_cpye(CPUARMState * env,uint32_t syndrome,uint32_t wdesc,uint32_t rdesc,uint32_t move,uintptr_t ra)1776 static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1777 uint32_t rdesc, uint32_t move, uintptr_t ra)
1778 {
1779 /* Epilogue: do the last partial page */
1780 int rd = mops_destreg(syndrome);
1781 int rs = mops_srcreg(syndrome);
1782 int rn = mops_sizereg(syndrome);
1783 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
1784 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
1785 bool forwards = true;
1786 uint64_t toaddr, fromaddr, copysize, step;
1787
1788 check_mops_enabled(env, ra);
1789
1790 /* We choose to NOP out "no data to copy" before consistency checks */
1791 if (env->xregs[rn] == 0) {
1792 return;
1793 }
1794
1795 check_mops_wrong_option(env, syndrome, ra);
1796
1797 if (move) {
1798 forwards = (int64_t)env->xregs[rn] < 0;
1799 }
1800
1801 if (forwards) {
1802 toaddr = env->xregs[rd] + env->xregs[rn];
1803 fromaddr = env->xregs[rs] + env->xregs[rn];
1804 copysize = -env->xregs[rn];
1805 } else {
1806 copysize = env->xregs[rn];
1807 /* This toaddr and fromaddr point to the *last* byte to copy */
1808 toaddr = env->xregs[rd] + copysize - 1;
1809 fromaddr = env->xregs[rs] + copysize - 1;
1810 }
1811
1812 if (!mte_checks_needed(fromaddr, rdesc)) {
1813 rdesc = 0;
1814 }
1815 if (!mte_checks_needed(toaddr, wdesc)) {
1816 wdesc = 0;
1817 }
1818
1819 /* Check the size; we don't want to have do a check-for-interrupts */
1820 if (copysize >= TARGET_PAGE_SIZE) {
1821 raise_exception_ra(env, EXCP_UDEF, syndrome,
1822 mops_mismatch_exception_target_el(env), ra);
1823 }
1824
1825 /* Do the actual memmove */
1826 if (forwards) {
1827 while (copysize > 0) {
1828 step = copy_step(env, toaddr, fromaddr, copysize,
1829 wmemidx, rmemidx, &wdesc, &rdesc, ra);
1830 toaddr += step;
1831 fromaddr += step;
1832 copysize -= step;
1833 env->xregs[rn] = -copysize;
1834 }
1835 } else {
1836 while (copysize > 0) {
1837 step = copy_step_rev(env, toaddr, fromaddr, copysize,
1838 wmemidx, rmemidx, &wdesc, &rdesc, ra);
1839 toaddr -= step;
1840 fromaddr -= step;
1841 copysize -= step;
1842 env->xregs[rn] = copysize;
1843 }
1844 }
1845 }
1846
HELPER(cpye)1847 void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1848 uint32_t rdesc)
1849 {
1850 do_cpye(env, syndrome, wdesc, rdesc, true, GETPC());
1851 }
1852
HELPER(cpyfe)1853 void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
1854 uint32_t rdesc)
1855 {
1856 do_cpye(env, syndrome, wdesc, rdesc, false, GETPC());
1857 }
1858