1 /* 2 * AArch64 specific helpers 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/units.h" 22 #include "cpu.h" 23 #include "gdbstub/helpers.h" 24 #include "exec/helper-proto.h" 25 #include "qemu/host-utils.h" 26 #include "qemu/log.h" 27 #include "qemu/main-loop.h" 28 #include "qemu/bitops.h" 29 #include "internals.h" 30 #include "qemu/crc32c.h" 31 #include "exec/cpu-common.h" 32 #include "exec/exec-all.h" 33 #include "exec/cpu_ldst.h" 34 #include "qemu/int128.h" 35 #include "qemu/atomic128.h" 36 #include "fpu/softfloat.h" 37 #include <zlib.h> /* for crc32 */ 38 #ifdef CONFIG_USER_ONLY 39 #include "user/page-protection.h" 40 #endif 41 #include "vec_internal.h" 42 43 /* C2.4.7 Multiply and divide */ 44 /* special cases for 0 and LLONG_MIN are mandated by the standard */ 45 uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) 46 { 47 if (den == 0) { 48 return 0; 49 } 50 return num / den; 51 } 52 53 int64_t HELPER(sdiv64)(int64_t num, int64_t den) 54 { 55 if (den == 0) { 56 return 0; 57 } 58 if (num == LLONG_MIN && den == -1) { 59 return LLONG_MIN; 60 } 61 return num / den; 62 } 63 64 uint64_t HELPER(rbit64)(uint64_t x) 65 { 66 return revbit64(x); 67 } 68 69 void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) 70 { 71 update_spsel(env, imm); 72 } 73 74 void HELPER(msr_set_allint_el1)(CPUARMState *env) 75 { 76 /* ALLINT update to PSTATE. */ 77 if (arm_hcrx_el2_eff(env) & HCRX_TALLINT) { 78 raise_exception_ra(env, EXCP_UDEF, 79 syn_aa64_sysregtrap(0, 1, 0, 4, 1, 0x1f, 0), 2, 80 GETPC()); 81 } 82 83 env->pstate |= PSTATE_ALLINT; 84 } 85 86 static void daif_check(CPUARMState *env, uint32_t op, 87 uint32_t imm, uintptr_t ra) 88 { 89 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ 90 if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { 91 raise_exception_ra(env, EXCP_UDEF, 92 syn_aa64_sysregtrap(0, extract32(op, 0, 3), 93 extract32(op, 3, 3), 4, 94 imm, 0x1f, 0), 95 exception_target_el(env), ra); 96 } 97 } 98 99 void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) 100 { 101 daif_check(env, 0x1e, imm, GETPC()); 102 env->daif |= (imm << 6) & PSTATE_DAIF; 103 arm_rebuild_hflags(env); 104 } 105 106 void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) 107 { 108 daif_check(env, 0x1f, imm, GETPC()); 109 env->daif &= ~((imm << 6) & PSTATE_DAIF); 110 arm_rebuild_hflags(env); 111 } 112 113 /* Convert a softfloat float_relation_ (as returned by 114 * the float*_compare functions) to the correct ARM 115 * NZCV flag state. 116 */ 117 static inline uint32_t float_rel_to_flags(int res) 118 { 119 uint64_t flags; 120 switch (res) { 121 case float_relation_equal: 122 flags = PSTATE_Z | PSTATE_C; 123 break; 124 case float_relation_less: 125 flags = PSTATE_N; 126 break; 127 case float_relation_greater: 128 flags = PSTATE_C; 129 break; 130 case float_relation_unordered: 131 default: 132 flags = PSTATE_C | PSTATE_V; 133 break; 134 } 135 return flags; 136 } 137 138 uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, float_status *fp_status) 139 { 140 return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); 141 } 142 143 uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, float_status *fp_status) 144 { 145 return float_rel_to_flags(float16_compare(x, y, fp_status)); 146 } 147 148 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, float_status *fp_status) 149 { 150 return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); 151 } 152 153 uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, float_status *fp_status) 154 { 155 return float_rel_to_flags(float32_compare(x, y, fp_status)); 156 } 157 158 uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, float_status *fp_status) 159 { 160 return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); 161 } 162 163 uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, float_status *fp_status) 164 { 165 return float_rel_to_flags(float64_compare(x, y, fp_status)); 166 } 167 168 float32 HELPER(vfp_mulxs)(float32 a, float32 b, float_status *fpst) 169 { 170 a = float32_squash_input_denormal(a, fpst); 171 b = float32_squash_input_denormal(b, fpst); 172 173 if ((float32_is_zero(a) && float32_is_infinity(b)) || 174 (float32_is_infinity(a) && float32_is_zero(b))) { 175 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 176 return make_float32((1U << 30) | 177 ((float32_val(a) ^ float32_val(b)) & (1U << 31))); 178 } 179 return float32_mul(a, b, fpst); 180 } 181 182 float64 HELPER(vfp_mulxd)(float64 a, float64 b, float_status *fpst) 183 { 184 a = float64_squash_input_denormal(a, fpst); 185 b = float64_squash_input_denormal(b, fpst); 186 187 if ((float64_is_zero(a) && float64_is_infinity(b)) || 188 (float64_is_infinity(a) && float64_is_zero(b))) { 189 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 190 return make_float64((1ULL << 62) | 191 ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); 192 } 193 return float64_mul(a, b, fpst); 194 } 195 196 /* 64bit/double versions of the neon float compare functions */ 197 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, float_status *fpst) 198 { 199 return -float64_eq_quiet(a, b, fpst); 200 } 201 202 uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, float_status *fpst) 203 { 204 return -float64_le(b, a, fpst); 205 } 206 207 uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) 208 { 209 return -float64_lt(b, a, fpst); 210 } 211 212 /* 213 * Reciprocal step and sqrt step. Note that unlike the A32/T32 214 * versions, these do a fully fused multiply-add or 215 * multiply-add-and-halve. 216 * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. 217 */ 218 #define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ 219 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 220 { \ 221 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 222 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 223 a = FLOATTYPE ## _ ## CHSFN(a); \ 224 if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ 225 (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ 226 return FLOATTYPE ## _two; \ 227 } \ 228 return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ 229 } 230 231 DO_RECPS(recpsf_f16, uint32_t, float16, chs) 232 DO_RECPS(recpsf_f32, float32, float32, chs) 233 DO_RECPS(recpsf_f64, float64, float64, chs) 234 DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) 235 DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) 236 DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) 237 238 #define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ 239 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 240 { \ 241 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 242 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 243 a = FLOATTYPE ## _ ## CHSFN(a); \ 244 if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ 245 (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ 246 return FLOATTYPE ## _one_point_five; \ 247 } \ 248 return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ 249 -1, 0, fpst); \ 250 } \ 251 252 DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) 253 DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) 254 DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) 255 DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) 256 DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) 257 DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) 258 259 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ 260 uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) 261 { 262 uint16_t val16, sbit; 263 int16_t exp; 264 265 if (float16_is_any_nan(a)) { 266 float16 nan = a; 267 if (float16_is_signaling_nan(a, fpst)) { 268 float_raise(float_flag_invalid, fpst); 269 if (!fpst->default_nan_mode) { 270 nan = float16_silence_nan(a, fpst); 271 } 272 } 273 if (fpst->default_nan_mode) { 274 nan = float16_default_nan(fpst); 275 } 276 return nan; 277 } 278 279 a = float16_squash_input_denormal(a, fpst); 280 281 val16 = float16_val(a); 282 sbit = 0x8000 & val16; 283 exp = extract32(val16, 10, 5); 284 285 if (exp == 0) { 286 return make_float16(deposit32(sbit, 10, 5, 0x1e)); 287 } else { 288 return make_float16(deposit32(sbit, 10, 5, ~exp)); 289 } 290 } 291 292 float32 HELPER(frecpx_f32)(float32 a, float_status *fpst) 293 { 294 uint32_t val32, sbit; 295 int32_t exp; 296 297 if (float32_is_any_nan(a)) { 298 float32 nan = a; 299 if (float32_is_signaling_nan(a, fpst)) { 300 float_raise(float_flag_invalid, fpst); 301 if (!fpst->default_nan_mode) { 302 nan = float32_silence_nan(a, fpst); 303 } 304 } 305 if (fpst->default_nan_mode) { 306 nan = float32_default_nan(fpst); 307 } 308 return nan; 309 } 310 311 a = float32_squash_input_denormal(a, fpst); 312 313 val32 = float32_val(a); 314 sbit = 0x80000000ULL & val32; 315 exp = extract32(val32, 23, 8); 316 317 if (exp == 0) { 318 return make_float32(sbit | (0xfe << 23)); 319 } else { 320 return make_float32(sbit | (~exp & 0xff) << 23); 321 } 322 } 323 324 float64 HELPER(frecpx_f64)(float64 a, float_status *fpst) 325 { 326 uint64_t val64, sbit; 327 int64_t exp; 328 329 if (float64_is_any_nan(a)) { 330 float64 nan = a; 331 if (float64_is_signaling_nan(a, fpst)) { 332 float_raise(float_flag_invalid, fpst); 333 if (!fpst->default_nan_mode) { 334 nan = float64_silence_nan(a, fpst); 335 } 336 } 337 if (fpst->default_nan_mode) { 338 nan = float64_default_nan(fpst); 339 } 340 return nan; 341 } 342 343 a = float64_squash_input_denormal(a, fpst); 344 345 val64 = float64_val(a); 346 sbit = 0x8000000000000000ULL & val64; 347 exp = extract64(float64_val(a), 52, 11); 348 349 if (exp == 0) { 350 return make_float64(sbit | (0x7feULL << 52)); 351 } else { 352 return make_float64(sbit | (~exp & 0x7ffULL) << 52); 353 } 354 } 355 356 float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) 357 { 358 float32 r; 359 int old = get_float_rounding_mode(fpst); 360 361 set_float_rounding_mode(float_round_to_odd, fpst); 362 r = float64_to_float32(a, fpst); 363 set_float_rounding_mode(old, fpst); 364 return r; 365 } 366 367 /* 368 * AH=1 min/max have some odd special cases: 369 * comparing two zeroes (regardless of sign), (NaN, anything), 370 * or (anything, NaN) should return the second argument (possibly 371 * squashed to zero). 372 * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. 373 */ 374 #define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ 375 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 376 { \ 377 bool save; \ 378 CTYPE r; \ 379 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 380 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 381 if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ 382 return b; \ 383 } \ 384 if (FLOATTYPE ## _is_any_nan(a) || \ 385 FLOATTYPE ## _is_any_nan(b)) { \ 386 float_raise(float_flag_invalid, fpst); \ 387 return b; \ 388 } \ 389 save = get_flush_to_zero(fpst); \ 390 set_flush_to_zero(false, fpst); \ 391 r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ 392 set_flush_to_zero(save, fpst); \ 393 return r; \ 394 } 395 396 AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) 397 AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) 398 AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) 399 AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) 400 AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) 401 AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) 402 403 /* 64-bit versions of the CRC helpers. Note that although the operation 404 * (and the prototypes of crc32c() and crc32() mean that only the bottom 405 * 32 bits of the accumulator and result are used, we pass and return 406 * uint64_t for convenience of the generated code. Unlike the 32-bit 407 * instruction set versions, val may genuinely have 64 bits of data in it. 408 * The upper bytes of val (above the number specified by 'bytes') must have 409 * been zeroed out by the caller. 410 */ 411 uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) 412 { 413 uint8_t buf[8]; 414 415 stq_le_p(buf, val); 416 417 /* zlib crc32 converts the accumulator and output to one's complement. */ 418 return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; 419 } 420 421 uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) 422 { 423 uint8_t buf[8]; 424 425 stq_le_p(buf, val); 426 427 /* Linux crc32c converts the output to one's complement. */ 428 return crc32c(acc, buf, bytes) ^ 0xffffffff; 429 } 430 431 /* 432 * AdvSIMD half-precision 433 */ 434 435 #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) 436 437 #define ADVSIMD_HALFOP(name) \ 438 uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \ 439 { \ 440 return float16_ ## name(a, b, fpst); \ 441 } 442 443 #define ADVSIMD_TWOHALFOP(name) \ 444 uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \ 445 float_status *fpst) \ 446 { \ 447 float16 a1, a2, b1, b2; \ 448 uint32_t r1, r2; \ 449 a1 = extract32(two_a, 0, 16); \ 450 a2 = extract32(two_a, 16, 16); \ 451 b1 = extract32(two_b, 0, 16); \ 452 b2 = extract32(two_b, 16, 16); \ 453 r1 = float16_ ## name(a1, b1, fpst); \ 454 r2 = float16_ ## name(a2, b2, fpst); \ 455 return deposit32(r1, 16, 16, r2); \ 456 } 457 458 ADVSIMD_TWOHALFOP(add) 459 ADVSIMD_TWOHALFOP(sub) 460 ADVSIMD_TWOHALFOP(mul) 461 ADVSIMD_TWOHALFOP(div) 462 ADVSIMD_TWOHALFOP(min) 463 ADVSIMD_TWOHALFOP(max) 464 ADVSIMD_TWOHALFOP(minnum) 465 ADVSIMD_TWOHALFOP(maxnum) 466 467 /* Data processing - scalar floating-point and advanced SIMD */ 468 static float16 float16_mulx(float16 a, float16 b, float_status *fpst) 469 { 470 a = float16_squash_input_denormal(a, fpst); 471 b = float16_squash_input_denormal(b, fpst); 472 473 if ((float16_is_zero(a) && float16_is_infinity(b)) || 474 (float16_is_infinity(a) && float16_is_zero(b))) { 475 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 476 return make_float16((1U << 14) | 477 ((float16_val(a) ^ float16_val(b)) & (1U << 15))); 478 } 479 return float16_mul(a, b, fpst); 480 } 481 482 ADVSIMD_HALFOP(mulx) 483 ADVSIMD_TWOHALFOP(mulx) 484 485 /* fused multiply-accumulate */ 486 uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, 487 float_status *fpst) 488 { 489 return float16_muladd(a, b, c, 0, fpst); 490 } 491 492 uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, 493 uint32_t two_c, float_status *fpst) 494 { 495 float16 a1, a2, b1, b2, c1, c2; 496 uint32_t r1, r2; 497 a1 = extract32(two_a, 0, 16); 498 a2 = extract32(two_a, 16, 16); 499 b1 = extract32(two_b, 0, 16); 500 b2 = extract32(two_b, 16, 16); 501 c1 = extract32(two_c, 0, 16); 502 c2 = extract32(two_c, 16, 16); 503 r1 = float16_muladd(a1, b1, c1, 0, fpst); 504 r2 = float16_muladd(a2, b2, c2, 0, fpst); 505 return deposit32(r1, 16, 16, r2); 506 } 507 508 /* 509 * Floating point comparisons produce an integer result. Softfloat 510 * routines return float_relation types which we convert to the 0/-1 511 * Neon requires. 512 */ 513 514 #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 515 516 uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, float_status *fpst) 517 { 518 int compare = float16_compare_quiet(a, b, fpst); 519 return ADVSIMD_CMPRES(compare == float_relation_equal); 520 } 521 522 uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, float_status *fpst) 523 { 524 int compare = float16_compare(a, b, fpst); 525 return ADVSIMD_CMPRES(compare == float_relation_greater || 526 compare == float_relation_equal); 527 } 528 529 uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 530 { 531 int compare = float16_compare(a, b, fpst); 532 return ADVSIMD_CMPRES(compare == float_relation_greater); 533 } 534 535 uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, float_status *fpst) 536 { 537 float16 f0 = float16_abs(a); 538 float16 f1 = float16_abs(b); 539 int compare = float16_compare(f0, f1, fpst); 540 return ADVSIMD_CMPRES(compare == float_relation_greater || 541 compare == float_relation_equal); 542 } 543 544 uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 545 { 546 float16 f0 = float16_abs(a); 547 float16 f1 = float16_abs(b); 548 int compare = float16_compare(f0, f1, fpst); 549 return ADVSIMD_CMPRES(compare == float_relation_greater); 550 } 551 552 /* round to integral */ 553 uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, float_status *fp_status) 554 { 555 return float16_round_to_int(x, fp_status); 556 } 557 558 uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) 559 { 560 int old_flags = get_float_exception_flags(fp_status), new_flags; 561 float16 ret; 562 563 ret = float16_round_to_int(x, fp_status); 564 565 /* Suppress any inexact exceptions the conversion produced */ 566 if (!(old_flags & float_flag_inexact)) { 567 new_flags = get_float_exception_flags(fp_status); 568 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 569 } 570 571 return ret; 572 } 573 574 static int el_from_spsr(uint32_t spsr) 575 { 576 /* Return the exception level that this SPSR is requesting a return to, 577 * or -1 if it is invalid (an illegal return) 578 */ 579 if (spsr & PSTATE_nRW) { 580 switch (spsr & CPSR_M) { 581 case ARM_CPU_MODE_USR: 582 return 0; 583 case ARM_CPU_MODE_HYP: 584 return 2; 585 case ARM_CPU_MODE_FIQ: 586 case ARM_CPU_MODE_IRQ: 587 case ARM_CPU_MODE_SVC: 588 case ARM_CPU_MODE_ABT: 589 case ARM_CPU_MODE_UND: 590 case ARM_CPU_MODE_SYS: 591 return 1; 592 case ARM_CPU_MODE_MON: 593 /* Returning to Mon from AArch64 is never possible, 594 * so this is an illegal return. 595 */ 596 default: 597 return -1; 598 } 599 } else { 600 if (extract32(spsr, 1, 1)) { 601 /* Return with reserved M[1] bit set */ 602 return -1; 603 } 604 if (extract32(spsr, 0, 4) == 1) { 605 /* return to EL0 with M[0] bit set */ 606 return -1; 607 } 608 return extract32(spsr, 2, 2); 609 } 610 } 611 612 static void cpsr_write_from_spsr_elx(CPUARMState *env, 613 uint32_t val) 614 { 615 uint32_t mask; 616 617 /* Save SPSR_ELx.SS into PSTATE. */ 618 env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); 619 val &= ~PSTATE_SS; 620 621 /* Move DIT to the correct location for CPSR */ 622 if (val & PSTATE_DIT) { 623 val &= ~PSTATE_DIT; 624 val |= CPSR_DIT; 625 } 626 627 mask = aarch32_cpsr_valid_mask(env->features, \ 628 &env_archcpu(env)->isar); 629 cpsr_write(env, val, mask, CPSRWriteRaw); 630 } 631 632 void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) 633 { 634 ARMCPU *cpu = env_archcpu(env); 635 int cur_el = arm_current_el(env); 636 unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); 637 uint32_t spsr = env->banked_spsr[spsr_idx]; 638 int new_el; 639 bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; 640 641 aarch64_save_sp(env, cur_el); 642 643 arm_clear_exclusive(env); 644 645 /* We must squash the PSTATE.SS bit to zero unless both of the 646 * following hold: 647 * 1. debug exceptions are currently disabled 648 * 2. singlestep will be active in the EL we return to 649 * We check 1 here and 2 after we've done the pstate/cpsr write() to 650 * transition to the EL we're going to. 651 */ 652 if (arm_generate_debug_exceptions(env)) { 653 spsr &= ~PSTATE_SS; 654 } 655 656 /* 657 * FEAT_RME forbids return from EL3 with an invalid security state. 658 * We don't need an explicit check for FEAT_RME here because we enforce 659 * in scr_write() that you can't set the NSE bit without it. 660 */ 661 if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { 662 goto illegal_return; 663 } 664 665 new_el = el_from_spsr(spsr); 666 if (new_el == -1) { 667 goto illegal_return; 668 } 669 if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) { 670 /* Disallow return to an EL which is unimplemented or higher 671 * than the current one. 672 */ 673 goto illegal_return; 674 } 675 676 if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { 677 /* Return to an EL which is configured for a different register width */ 678 goto illegal_return; 679 } 680 681 if (!return_to_aa64 && !cpu_isar_feature(aa64_aa32, cpu)) { 682 /* Return to AArch32 when CPU is AArch64-only */ 683 goto illegal_return; 684 } 685 686 if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 687 goto illegal_return; 688 } 689 690 bql_lock(); 691 arm_call_pre_el_change_hook(cpu); 692 bql_unlock(); 693 694 if (!return_to_aa64) { 695 env->aarch64 = false; 696 /* We do a raw CPSR write because aarch64_sync_64_to_32() 697 * will sort the register banks out for us, and we've already 698 * caught all the bad-mode cases in el_from_spsr(). 699 */ 700 cpsr_write_from_spsr_elx(env, spsr); 701 if (!arm_singlestep_active(env)) { 702 env->pstate &= ~PSTATE_SS; 703 } 704 aarch64_sync_64_to_32(env); 705 706 if (spsr & CPSR_T) { 707 env->regs[15] = new_pc & ~0x1; 708 } else { 709 env->regs[15] = new_pc & ~0x3; 710 } 711 helper_rebuild_hflags_a32(env, new_el); 712 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 713 "AArch32 EL%d PC 0x%" PRIx32 "\n", 714 cur_el, new_el, env->regs[15]); 715 } else { 716 int tbii; 717 718 env->aarch64 = true; 719 spsr &= aarch64_pstate_valid_mask(&cpu->isar); 720 pstate_write(env, spsr); 721 if (!arm_singlestep_active(env)) { 722 env->pstate &= ~PSTATE_SS; 723 } 724 aarch64_restore_sp(env, new_el); 725 helper_rebuild_hflags_a64(env, new_el); 726 727 /* 728 * Apply TBI to the exception return address. We had to delay this 729 * until after we selected the new EL, so that we could select the 730 * correct TBI+TBID bits. This is made easier by waiting until after 731 * the hflags rebuild, since we can pull the composite TBII field 732 * from there. 733 */ 734 tbii = EX_TBFLAG_A64(env->hflags, TBII); 735 if ((tbii >> extract64(new_pc, 55, 1)) & 1) { 736 /* TBI is enabled. */ 737 int core_mmu_idx = arm_env_mmu_index(env); 738 if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) { 739 new_pc = sextract64(new_pc, 0, 56); 740 } else { 741 new_pc = extract64(new_pc, 0, 56); 742 } 743 } 744 env->pc = new_pc; 745 746 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 747 "AArch64 EL%d PC 0x%" PRIx64 "\n", 748 cur_el, new_el, env->pc); 749 } 750 751 /* 752 * Note that cur_el can never be 0. If new_el is 0, then 753 * el0_a64 is return_to_aa64, else el0_a64 is ignored. 754 */ 755 aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); 756 757 bql_lock(); 758 arm_call_el_change_hook(cpu); 759 bql_unlock(); 760 761 return; 762 763 illegal_return: 764 /* Illegal return events of various kinds have architecturally 765 * mandated behaviour: 766 * restore NZCV and DAIF from SPSR_ELx 767 * set PSTATE.IL 768 * restore PC from ELR_ELx 769 * no change to exception level, execution state or stack pointer 770 */ 771 env->pstate |= PSTATE_IL; 772 env->pc = new_pc; 773 spsr &= PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT; 774 spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT); 775 pstate_write(env, spsr); 776 if (!arm_singlestep_active(env)) { 777 env->pstate &= ~PSTATE_SS; 778 } 779 helper_rebuild_hflags_a64(env, cur_el); 780 qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " 781 "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); 782 } 783 784 void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) 785 { 786 uintptr_t ra = GETPC(); 787 788 /* 789 * Implement DC ZVA, which zeroes a fixed-length block of memory. 790 * Note that we do not implement the (architecturally mandated) 791 * alignment fault for attempts to use this on Device memory 792 * (which matches the usual QEMU behaviour of not implementing either 793 * alignment faults or any memory attribute handling). 794 */ 795 int blocklen = 4 << env_archcpu(env)->dcz_blocksize; 796 uint64_t vaddr = vaddr_in & ~(blocklen - 1); 797 int mmu_idx = arm_env_mmu_index(env); 798 void *mem; 799 800 /* 801 * Trapless lookup. In addition to actual invalid page, may 802 * return NULL for I/O, watchpoints, clean pages, etc. 803 */ 804 mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); 805 806 #ifndef CONFIG_USER_ONLY 807 if (unlikely(!mem)) { 808 /* 809 * Trap if accessing an invalid page. DC_ZVA requires that we supply 810 * the original pointer for an invalid page. But watchpoints require 811 * that we probe the actual space. So do both. 812 */ 813 (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); 814 mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); 815 816 if (unlikely(!mem)) { 817 /* 818 * The only remaining reason for mem == NULL is I/O. 819 * Just do a series of byte writes as the architecture demands. 820 */ 821 for (int i = 0; i < blocklen; i++) { 822 cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); 823 } 824 return; 825 } 826 } 827 #endif 828 829 set_helper_retaddr(ra); 830 memset(mem, 0, blocklen); 831 clear_helper_retaddr(); 832 } 833 834 void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, 835 uint32_t access_type, uint32_t mmu_idx) 836 { 837 arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type, 838 mmu_idx, GETPC()); 839 } 840 841 /* Memory operations (memset, memmove, memcpy) */ 842 843 /* 844 * Return true if the CPY* and SET* insns can execute; compare 845 * pseudocode CheckMOPSEnabled(), though we refactor it a little. 846 */ 847 static bool mops_enabled(CPUARMState *env) 848 { 849 int el = arm_current_el(env); 850 851 if (el < 2 && 852 (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) && 853 !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) { 854 return false; 855 } 856 857 if (el == 0) { 858 if (!el_is_in_host(env, 0)) { 859 return env->cp15.sctlr_el[1] & SCTLR_MSCEN; 860 } else { 861 return env->cp15.sctlr_el[2] & SCTLR_MSCEN; 862 } 863 } 864 return true; 865 } 866 867 static void check_mops_enabled(CPUARMState *env, uintptr_t ra) 868 { 869 if (!mops_enabled(env)) { 870 raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(), 871 exception_target_el(env), ra); 872 } 873 } 874 875 /* 876 * Return the target exception level for an exception due 877 * to mismatched arguments in a FEAT_MOPS copy or set. 878 * Compare pseudocode MismatchedCpySetTargetEL() 879 */ 880 static int mops_mismatch_exception_target_el(CPUARMState *env) 881 { 882 int el = arm_current_el(env); 883 884 if (el > 1) { 885 return el; 886 } 887 if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 888 return 2; 889 } 890 if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) { 891 return 2; 892 } 893 return 1; 894 } 895 896 /* 897 * Check whether an M or E instruction was executed with a CF value 898 * indicating the wrong option for this implementation. 899 * Assumes we are always Option A. 900 */ 901 static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome, 902 uintptr_t ra) 903 { 904 if (env->CF != 0) { 905 syndrome |= 1 << 17; /* Set the wrong-option bit */ 906 raise_exception_ra(env, EXCP_UDEF, syndrome, 907 mops_mismatch_exception_target_el(env), ra); 908 } 909 } 910 911 /* 912 * Return the maximum number of bytes we can transfer starting at addr 913 * without crossing a page boundary. 914 */ 915 static uint64_t page_limit(uint64_t addr) 916 { 917 return TARGET_PAGE_ALIGN(addr + 1) - addr; 918 } 919 920 /* 921 * Return the number of bytes we can copy starting from addr and working 922 * backwards without crossing a page boundary. 923 */ 924 static uint64_t page_limit_rev(uint64_t addr) 925 { 926 return (addr & ~TARGET_PAGE_MASK) + 1; 927 } 928 929 /* 930 * Perform part of a memory set on an area of guest memory starting at 931 * toaddr (a dirty address) and extending for setsize bytes. 932 * 933 * Returns the number of bytes actually set, which might be less than 934 * setsize; the caller should loop until the whole set has been done. 935 * The caller should ensure that the guest registers are correct 936 * for the possibility that the first byte of the set encounters 937 * an exception or watchpoint. We guarantee not to take any faults 938 * for bytes other than the first. 939 */ 940 static uint64_t set_step(CPUARMState *env, uint64_t toaddr, 941 uint64_t setsize, uint32_t data, int memidx, 942 uint32_t *mtedesc, uintptr_t ra) 943 { 944 void *mem; 945 946 setsize = MIN(setsize, page_limit(toaddr)); 947 if (*mtedesc) { 948 uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc); 949 if (mtesize == 0) { 950 /* Trap, or not. All CPU state is up to date */ 951 mte_check_fail(env, *mtedesc, toaddr, ra); 952 /* Continue, with no further MTE checks required */ 953 *mtedesc = 0; 954 } else { 955 /* Advance to the end, or to the tag mismatch */ 956 setsize = MIN(setsize, mtesize); 957 } 958 } 959 960 toaddr = useronly_clean_ptr(toaddr); 961 /* 962 * Trapless lookup: returns NULL for invalid page, I/O, 963 * watchpoints, clean pages, etc. 964 */ 965 mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx); 966 967 #ifndef CONFIG_USER_ONLY 968 if (unlikely(!mem)) { 969 /* 970 * Slow-path: just do one byte write. This will handle the 971 * watchpoint, invalid page, etc handling correctly. 972 * For clean code pages, the next iteration will see 973 * the page dirty and will use the fast path. 974 */ 975 cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra); 976 return 1; 977 } 978 #endif 979 /* Easy case: just memset the host memory */ 980 set_helper_retaddr(ra); 981 memset(mem, data, setsize); 982 clear_helper_retaddr(); 983 return setsize; 984 } 985 986 /* 987 * Similar, but setting tags. The architecture requires us to do this 988 * in 16-byte chunks. SETP accesses are not tag checked; they set 989 * the tags. 990 */ 991 static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, 992 uint64_t setsize, uint32_t data, int memidx, 993 uint32_t *mtedesc, uintptr_t ra) 994 { 995 void *mem; 996 uint64_t cleanaddr; 997 998 setsize = MIN(setsize, page_limit(toaddr)); 999 1000 cleanaddr = useronly_clean_ptr(toaddr); 1001 /* 1002 * Trapless lookup: returns NULL for invalid page, I/O, 1003 * watchpoints, clean pages, etc. 1004 */ 1005 mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); 1006 1007 #ifndef CONFIG_USER_ONLY 1008 if (unlikely(!mem)) { 1009 /* 1010 * Slow-path: just do one write. This will handle the 1011 * watchpoint, invalid page, etc handling correctly. 1012 * The architecture requires that we do 16 bytes at a time, 1013 * and we know both ptr and size are 16 byte aligned. 1014 * For clean code pages, the next iteration will see 1015 * the page dirty and will use the fast path. 1016 */ 1017 uint64_t repldata = data * 0x0101010101010101ULL; 1018 MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx); 1019 cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra); 1020 mte_mops_set_tags(env, toaddr, 16, *mtedesc); 1021 return 16; 1022 } 1023 #endif 1024 /* Easy case: just memset the host memory */ 1025 set_helper_retaddr(ra); 1026 memset(mem, data, setsize); 1027 clear_helper_retaddr(); 1028 mte_mops_set_tags(env, toaddr, setsize, *mtedesc); 1029 return setsize; 1030 } 1031 1032 typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr, 1033 uint64_t setsize, uint32_t data, 1034 int memidx, uint32_t *mtedesc, uintptr_t ra); 1035 1036 /* Extract register numbers from a MOPS exception syndrome value */ 1037 static int mops_destreg(uint32_t syndrome) 1038 { 1039 return extract32(syndrome, 10, 5); 1040 } 1041 1042 static int mops_srcreg(uint32_t syndrome) 1043 { 1044 return extract32(syndrome, 5, 5); 1045 } 1046 1047 static int mops_sizereg(uint32_t syndrome) 1048 { 1049 return extract32(syndrome, 0, 5); 1050 } 1051 1052 /* 1053 * Return true if TCMA and TBI bits mean we need to do MTE checks. 1054 * We only need to do this once per MOPS insn, not for every page. 1055 */ 1056 static bool mte_checks_needed(uint64_t ptr, uint32_t desc) 1057 { 1058 int bit55 = extract64(ptr, 55, 1); 1059 1060 /* 1061 * Note that tbi_check() returns true for "access checked" but 1062 * tcma_check() returns true for "access unchecked". 1063 */ 1064 if (!tbi_check(desc, bit55)) { 1065 return false; 1066 } 1067 return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr)); 1068 } 1069 1070 /* Take an exception if the SETG addr/size are not granule aligned */ 1071 static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size, 1072 uint32_t memidx, uintptr_t ra) 1073 { 1074 if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) || 1075 !QEMU_IS_ALIGNED(size, TAG_GRANULE)) { 1076 arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, 1077 memidx, ra); 1078 1079 } 1080 } 1081 1082 static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg) 1083 { 1084 /* 1085 * Runtime equivalent of cpu_reg() -- return the CPU register value, 1086 * for contexts when index 31 means XZR (not SP). 1087 */ 1088 return reg == 31 ? 0 : env->xregs[reg]; 1089 } 1090 1091 /* 1092 * For the Memory Set operation, our implementation chooses 1093 * always to use "option A", where we update Xd to the final 1094 * address in the SETP insn, and set Xn to be -(bytes remaining). 1095 * On SETM and SETE insns we only need update Xn. 1096 * 1097 * @env: CPU 1098 * @syndrome: syndrome value for mismatch exceptions 1099 * (also contains the register numbers we need to use) 1100 * @mtedesc: MTE descriptor word 1101 * @stepfn: function which does a single part of the set operation 1102 * @is_setg: true if this is the tag-setting SETG variant 1103 */ 1104 static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1105 StepFn *stepfn, bool is_setg, uintptr_t ra) 1106 { 1107 /* Prologue: we choose to do up to the next page boundary */ 1108 int rd = mops_destreg(syndrome); 1109 int rs = mops_srcreg(syndrome); 1110 int rn = mops_sizereg(syndrome); 1111 uint8_t data = arm_reg_or_xzr(env, rs); 1112 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1113 uint64_t toaddr = env->xregs[rd]; 1114 uint64_t setsize = env->xregs[rn]; 1115 uint64_t stagesetsize, step; 1116 1117 check_mops_enabled(env, ra); 1118 1119 if (setsize > INT64_MAX) { 1120 setsize = INT64_MAX; 1121 if (is_setg) { 1122 setsize &= ~0xf; 1123 } 1124 } 1125 1126 if (unlikely(is_setg)) { 1127 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1128 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1129 mtedesc = 0; 1130 } 1131 1132 stagesetsize = MIN(setsize, page_limit(toaddr)); 1133 while (stagesetsize) { 1134 env->xregs[rd] = toaddr; 1135 env->xregs[rn] = setsize; 1136 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1137 toaddr += step; 1138 setsize -= step; 1139 stagesetsize -= step; 1140 } 1141 /* Insn completed, so update registers to the Option A format */ 1142 env->xregs[rd] = toaddr + setsize; 1143 env->xregs[rn] = -setsize; 1144 1145 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1146 env->NF = 0; 1147 env->ZF = 1; /* our env->ZF encoding is inverted */ 1148 env->CF = 0; 1149 env->VF = 0; 1150 } 1151 1152 void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1153 { 1154 do_setp(env, syndrome, mtedesc, set_step, false, GETPC()); 1155 } 1156 1157 void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1158 { 1159 do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1160 } 1161 1162 static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1163 StepFn *stepfn, bool is_setg, uintptr_t ra) 1164 { 1165 /* Main: we choose to do all the full-page chunks */ 1166 CPUState *cs = env_cpu(env); 1167 int rd = mops_destreg(syndrome); 1168 int rs = mops_srcreg(syndrome); 1169 int rn = mops_sizereg(syndrome); 1170 uint8_t data = arm_reg_or_xzr(env, rs); 1171 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1172 uint64_t setsize = -env->xregs[rn]; 1173 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1174 uint64_t step, stagesetsize; 1175 1176 check_mops_enabled(env, ra); 1177 1178 /* 1179 * We're allowed to NOP out "no data to copy" before the consistency 1180 * checks; we choose to do so. 1181 */ 1182 if (env->xregs[rn] == 0) { 1183 return; 1184 } 1185 1186 check_mops_wrong_option(env, syndrome, ra); 1187 1188 /* 1189 * Our implementation will work fine even if we have an unaligned 1190 * destination address, and because we update Xn every time around 1191 * the loop below and the return value from stepfn() may be less 1192 * than requested, we might find toaddr is unaligned. So we don't 1193 * have an IMPDEF check for alignment here. 1194 */ 1195 1196 if (unlikely(is_setg)) { 1197 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1198 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1199 mtedesc = 0; 1200 } 1201 1202 /* Do the actual memset: we leave the last partial page to SETE */ 1203 stagesetsize = setsize & TARGET_PAGE_MASK; 1204 while (stagesetsize > 0) { 1205 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1206 toaddr += step; 1207 setsize -= step; 1208 stagesetsize -= step; 1209 env->xregs[rn] = -setsize; 1210 if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) { 1211 cpu_loop_exit_restore(cs, ra); 1212 } 1213 } 1214 } 1215 1216 void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1217 { 1218 do_setm(env, syndrome, mtedesc, set_step, false, GETPC()); 1219 } 1220 1221 void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1222 { 1223 do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1224 } 1225 1226 static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1227 StepFn *stepfn, bool is_setg, uintptr_t ra) 1228 { 1229 /* Epilogue: do the last partial page */ 1230 int rd = mops_destreg(syndrome); 1231 int rs = mops_srcreg(syndrome); 1232 int rn = mops_sizereg(syndrome); 1233 uint8_t data = arm_reg_or_xzr(env, rs); 1234 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1235 uint64_t setsize = -env->xregs[rn]; 1236 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1237 uint64_t step; 1238 1239 check_mops_enabled(env, ra); 1240 1241 /* 1242 * We're allowed to NOP out "no data to copy" before the consistency 1243 * checks; we choose to do so. 1244 */ 1245 if (setsize == 0) { 1246 return; 1247 } 1248 1249 check_mops_wrong_option(env, syndrome, ra); 1250 1251 /* 1252 * Our implementation has no address alignment requirements, but 1253 * we do want to enforce the "less than a page" size requirement, 1254 * so we don't need to have the "check for interrupts" here. 1255 */ 1256 if (setsize >= TARGET_PAGE_SIZE) { 1257 raise_exception_ra(env, EXCP_UDEF, syndrome, 1258 mops_mismatch_exception_target_el(env), ra); 1259 } 1260 1261 if (unlikely(is_setg)) { 1262 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1263 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1264 mtedesc = 0; 1265 } 1266 1267 /* Do the actual memset */ 1268 while (setsize > 0) { 1269 step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); 1270 toaddr += step; 1271 setsize -= step; 1272 env->xregs[rn] = -setsize; 1273 } 1274 } 1275 1276 void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1277 { 1278 do_sete(env, syndrome, mtedesc, set_step, false, GETPC()); 1279 } 1280 1281 void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1282 { 1283 do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1284 } 1285 1286 /* 1287 * Perform part of a memory copy from the guest memory at fromaddr 1288 * and extending for copysize bytes, to the guest memory at 1289 * toaddr. Both addresses are dirty. 1290 * 1291 * Returns the number of bytes actually set, which might be less than 1292 * copysize; the caller should loop until the whole copy has been done. 1293 * The caller should ensure that the guest registers are correct 1294 * for the possibility that the first byte of the copy encounters 1295 * an exception or watchpoint. We guarantee not to take any faults 1296 * for bytes other than the first. 1297 */ 1298 static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, 1299 uint64_t copysize, int wmemidx, int rmemidx, 1300 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1301 { 1302 void *rmem; 1303 void *wmem; 1304 1305 /* Don't cross a page boundary on either source or destination */ 1306 copysize = MIN(copysize, page_limit(toaddr)); 1307 copysize = MIN(copysize, page_limit(fromaddr)); 1308 /* 1309 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1310 * or else copy up to but not including the byte with the mismatch. 1311 */ 1312 if (*rdesc) { 1313 uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc); 1314 if (mtesize == 0) { 1315 mte_check_fail(env, *rdesc, fromaddr, ra); 1316 *rdesc = 0; 1317 } else { 1318 copysize = MIN(copysize, mtesize); 1319 } 1320 } 1321 if (*wdesc) { 1322 uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc); 1323 if (mtesize == 0) { 1324 mte_check_fail(env, *wdesc, toaddr, ra); 1325 *wdesc = 0; 1326 } else { 1327 copysize = MIN(copysize, mtesize); 1328 } 1329 } 1330 1331 toaddr = useronly_clean_ptr(toaddr); 1332 fromaddr = useronly_clean_ptr(fromaddr); 1333 /* Trapless lookup of whether we can get a host memory pointer */ 1334 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1335 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1336 1337 #ifndef CONFIG_USER_ONLY 1338 /* 1339 * If we don't have host memory for both source and dest then just 1340 * do a single byte copy. This will handle watchpoints, invalid pages, 1341 * etc correctly. For clean code pages, the next iteration will see 1342 * the page dirty and will use the fast path. 1343 */ 1344 if (unlikely(!rmem || !wmem)) { 1345 uint8_t byte; 1346 if (rmem) { 1347 byte = *(uint8_t *)rmem; 1348 } else { 1349 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1350 } 1351 if (wmem) { 1352 *(uint8_t *)wmem = byte; 1353 } else { 1354 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1355 } 1356 return 1; 1357 } 1358 #endif 1359 /* Easy case: just memmove the host memory */ 1360 set_helper_retaddr(ra); 1361 memmove(wmem, rmem, copysize); 1362 clear_helper_retaddr(); 1363 return copysize; 1364 } 1365 1366 /* 1367 * Do part of a backwards memory copy. Here toaddr and fromaddr point 1368 * to the *last* byte to be copied. 1369 */ 1370 static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, 1371 uint64_t fromaddr, 1372 uint64_t copysize, int wmemidx, int rmemidx, 1373 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1374 { 1375 void *rmem; 1376 void *wmem; 1377 1378 /* Don't cross a page boundary on either source or destination */ 1379 copysize = MIN(copysize, page_limit_rev(toaddr)); 1380 copysize = MIN(copysize, page_limit_rev(fromaddr)); 1381 1382 /* 1383 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1384 * or else copy up to but not including the byte with the mismatch. 1385 */ 1386 if (*rdesc) { 1387 uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc); 1388 if (mtesize == 0) { 1389 mte_check_fail(env, *rdesc, fromaddr, ra); 1390 *rdesc = 0; 1391 } else { 1392 copysize = MIN(copysize, mtesize); 1393 } 1394 } 1395 if (*wdesc) { 1396 uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc); 1397 if (mtesize == 0) { 1398 mte_check_fail(env, *wdesc, toaddr, ra); 1399 *wdesc = 0; 1400 } else { 1401 copysize = MIN(copysize, mtesize); 1402 } 1403 } 1404 1405 toaddr = useronly_clean_ptr(toaddr); 1406 fromaddr = useronly_clean_ptr(fromaddr); 1407 /* Trapless lookup of whether we can get a host memory pointer */ 1408 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1409 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1410 1411 #ifndef CONFIG_USER_ONLY 1412 /* 1413 * If we don't have host memory for both source and dest then just 1414 * do a single byte copy. This will handle watchpoints, invalid pages, 1415 * etc correctly. For clean code pages, the next iteration will see 1416 * the page dirty and will use the fast path. 1417 */ 1418 if (unlikely(!rmem || !wmem)) { 1419 uint8_t byte; 1420 if (rmem) { 1421 byte = *(uint8_t *)rmem; 1422 } else { 1423 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1424 } 1425 if (wmem) { 1426 *(uint8_t *)wmem = byte; 1427 } else { 1428 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1429 } 1430 return 1; 1431 } 1432 #endif 1433 /* 1434 * Easy case: just memmove the host memory. Note that wmem and 1435 * rmem here point to the *last* byte to copy. 1436 */ 1437 set_helper_retaddr(ra); 1438 memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); 1439 clear_helper_retaddr(); 1440 return copysize; 1441 } 1442 1443 /* 1444 * for the Memory Copy operation, our implementation chooses always 1445 * to use "option A", where we update Xd and Xs to the final addresses 1446 * in the CPYP insn, and then in CPYM and CPYE only need to update Xn. 1447 * 1448 * @env: CPU 1449 * @syndrome: syndrome value for mismatch exceptions 1450 * (also contains the register numbers we need to use) 1451 * @wdesc: MTE descriptor for the writes (destination) 1452 * @rdesc: MTE descriptor for the reads (source) 1453 * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards) 1454 */ 1455 static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1456 uint32_t rdesc, uint32_t move, uintptr_t ra) 1457 { 1458 int rd = mops_destreg(syndrome); 1459 int rs = mops_srcreg(syndrome); 1460 int rn = mops_sizereg(syndrome); 1461 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1462 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1463 bool forwards = true; 1464 uint64_t toaddr = env->xregs[rd]; 1465 uint64_t fromaddr = env->xregs[rs]; 1466 uint64_t copysize = env->xregs[rn]; 1467 uint64_t stagecopysize, step; 1468 1469 check_mops_enabled(env, ra); 1470 1471 1472 if (move) { 1473 /* 1474 * Copy backwards if necessary. The direction for a non-overlapping 1475 * copy is IMPDEF; we choose forwards. 1476 */ 1477 if (copysize > 0x007FFFFFFFFFFFFFULL) { 1478 copysize = 0x007FFFFFFFFFFFFFULL; 1479 } 1480 uint64_t fs = extract64(fromaddr, 0, 56); 1481 uint64_t ts = extract64(toaddr, 0, 56); 1482 uint64_t fe = extract64(fromaddr + copysize, 0, 56); 1483 1484 if (fs < ts && fe > ts) { 1485 forwards = false; 1486 } 1487 } else { 1488 if (copysize > INT64_MAX) { 1489 copysize = INT64_MAX; 1490 } 1491 } 1492 1493 if (!mte_checks_needed(fromaddr, rdesc)) { 1494 rdesc = 0; 1495 } 1496 if (!mte_checks_needed(toaddr, wdesc)) { 1497 wdesc = 0; 1498 } 1499 1500 if (forwards) { 1501 stagecopysize = MIN(copysize, page_limit(toaddr)); 1502 stagecopysize = MIN(stagecopysize, page_limit(fromaddr)); 1503 while (stagecopysize) { 1504 env->xregs[rd] = toaddr; 1505 env->xregs[rs] = fromaddr; 1506 env->xregs[rn] = copysize; 1507 step = copy_step(env, toaddr, fromaddr, stagecopysize, 1508 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1509 toaddr += step; 1510 fromaddr += step; 1511 copysize -= step; 1512 stagecopysize -= step; 1513 } 1514 /* Insn completed, so update registers to the Option A format */ 1515 env->xregs[rd] = toaddr + copysize; 1516 env->xregs[rs] = fromaddr + copysize; 1517 env->xregs[rn] = -copysize; 1518 } else { 1519 /* 1520 * In a reverse copy the to and from addrs in Xs and Xd are the start 1521 * of the range, but it's more convenient for us to work with pointers 1522 * to the last byte being copied. 1523 */ 1524 toaddr += copysize - 1; 1525 fromaddr += copysize - 1; 1526 stagecopysize = MIN(copysize, page_limit_rev(toaddr)); 1527 stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr)); 1528 while (stagecopysize) { 1529 env->xregs[rn] = copysize; 1530 step = copy_step_rev(env, toaddr, fromaddr, stagecopysize, 1531 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1532 copysize -= step; 1533 stagecopysize -= step; 1534 toaddr -= step; 1535 fromaddr -= step; 1536 } 1537 /* 1538 * Insn completed, so update registers to the Option A format. 1539 * For a reverse copy this is no different to the CPYP input format. 1540 */ 1541 env->xregs[rn] = copysize; 1542 } 1543 1544 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1545 env->NF = 0; 1546 env->ZF = 1; /* our env->ZF encoding is inverted */ 1547 env->CF = 0; 1548 env->VF = 0; 1549 } 1550 1551 void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1552 uint32_t rdesc) 1553 { 1554 do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC()); 1555 } 1556 1557 void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1558 uint32_t rdesc) 1559 { 1560 do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC()); 1561 } 1562 1563 static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1564 uint32_t rdesc, uint32_t move, uintptr_t ra) 1565 { 1566 /* Main: we choose to copy until less than a page remaining */ 1567 CPUState *cs = env_cpu(env); 1568 int rd = mops_destreg(syndrome); 1569 int rs = mops_srcreg(syndrome); 1570 int rn = mops_sizereg(syndrome); 1571 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1572 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1573 bool forwards = true; 1574 uint64_t toaddr, fromaddr, copysize, step; 1575 1576 check_mops_enabled(env, ra); 1577 1578 /* We choose to NOP out "no data to copy" before consistency checks */ 1579 if (env->xregs[rn] == 0) { 1580 return; 1581 } 1582 1583 check_mops_wrong_option(env, syndrome, ra); 1584 1585 if (move) { 1586 forwards = (int64_t)env->xregs[rn] < 0; 1587 } 1588 1589 if (forwards) { 1590 toaddr = env->xregs[rd] + env->xregs[rn]; 1591 fromaddr = env->xregs[rs] + env->xregs[rn]; 1592 copysize = -env->xregs[rn]; 1593 } else { 1594 copysize = env->xregs[rn]; 1595 /* This toaddr and fromaddr point to the *last* byte to copy */ 1596 toaddr = env->xregs[rd] + copysize - 1; 1597 fromaddr = env->xregs[rs] + copysize - 1; 1598 } 1599 1600 if (!mte_checks_needed(fromaddr, rdesc)) { 1601 rdesc = 0; 1602 } 1603 if (!mte_checks_needed(toaddr, wdesc)) { 1604 wdesc = 0; 1605 } 1606 1607 /* Our implementation has no particular parameter requirements for CPYM */ 1608 1609 /* Do the actual memmove */ 1610 if (forwards) { 1611 while (copysize >= TARGET_PAGE_SIZE) { 1612 step = copy_step(env, toaddr, fromaddr, copysize, 1613 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1614 toaddr += step; 1615 fromaddr += step; 1616 copysize -= step; 1617 env->xregs[rn] = -copysize; 1618 if (copysize >= TARGET_PAGE_SIZE && 1619 unlikely(cpu_loop_exit_requested(cs))) { 1620 cpu_loop_exit_restore(cs, ra); 1621 } 1622 } 1623 } else { 1624 while (copysize >= TARGET_PAGE_SIZE) { 1625 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1626 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1627 toaddr -= step; 1628 fromaddr -= step; 1629 copysize -= step; 1630 env->xregs[rn] = copysize; 1631 if (copysize >= TARGET_PAGE_SIZE && 1632 unlikely(cpu_loop_exit_requested(cs))) { 1633 cpu_loop_exit_restore(cs, ra); 1634 } 1635 } 1636 } 1637 } 1638 1639 void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1640 uint32_t rdesc) 1641 { 1642 do_cpym(env, syndrome, wdesc, rdesc, true, GETPC()); 1643 } 1644 1645 void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1646 uint32_t rdesc) 1647 { 1648 do_cpym(env, syndrome, wdesc, rdesc, false, GETPC()); 1649 } 1650 1651 static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1652 uint32_t rdesc, uint32_t move, uintptr_t ra) 1653 { 1654 /* Epilogue: do the last partial page */ 1655 int rd = mops_destreg(syndrome); 1656 int rs = mops_srcreg(syndrome); 1657 int rn = mops_sizereg(syndrome); 1658 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1659 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1660 bool forwards = true; 1661 uint64_t toaddr, fromaddr, copysize, step; 1662 1663 check_mops_enabled(env, ra); 1664 1665 /* We choose to NOP out "no data to copy" before consistency checks */ 1666 if (env->xregs[rn] == 0) { 1667 return; 1668 } 1669 1670 check_mops_wrong_option(env, syndrome, ra); 1671 1672 if (move) { 1673 forwards = (int64_t)env->xregs[rn] < 0; 1674 } 1675 1676 if (forwards) { 1677 toaddr = env->xregs[rd] + env->xregs[rn]; 1678 fromaddr = env->xregs[rs] + env->xregs[rn]; 1679 copysize = -env->xregs[rn]; 1680 } else { 1681 copysize = env->xregs[rn]; 1682 /* This toaddr and fromaddr point to the *last* byte to copy */ 1683 toaddr = env->xregs[rd] + copysize - 1; 1684 fromaddr = env->xregs[rs] + copysize - 1; 1685 } 1686 1687 if (!mte_checks_needed(fromaddr, rdesc)) { 1688 rdesc = 0; 1689 } 1690 if (!mte_checks_needed(toaddr, wdesc)) { 1691 wdesc = 0; 1692 } 1693 1694 /* Check the size; we don't want to have do a check-for-interrupts */ 1695 if (copysize >= TARGET_PAGE_SIZE) { 1696 raise_exception_ra(env, EXCP_UDEF, syndrome, 1697 mops_mismatch_exception_target_el(env), ra); 1698 } 1699 1700 /* Do the actual memmove */ 1701 if (forwards) { 1702 while (copysize > 0) { 1703 step = copy_step(env, toaddr, fromaddr, copysize, 1704 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1705 toaddr += step; 1706 fromaddr += step; 1707 copysize -= step; 1708 env->xregs[rn] = -copysize; 1709 } 1710 } else { 1711 while (copysize > 0) { 1712 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1713 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1714 toaddr -= step; 1715 fromaddr -= step; 1716 copysize -= step; 1717 env->xregs[rn] = copysize; 1718 } 1719 } 1720 } 1721 1722 void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1723 uint32_t rdesc) 1724 { 1725 do_cpye(env, syndrome, wdesc, rdesc, true, GETPC()); 1726 } 1727 1728 void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1729 uint32_t rdesc) 1730 { 1731 do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); 1732 } 1733 1734 static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra) 1735 { 1736 #ifdef CONFIG_USER_ONLY 1737 return page_get_flags(addr) & PAGE_BTI; 1738 #else 1739 CPUTLBEntryFull *full; 1740 void *host; 1741 int mmu_idx = cpu_mmu_index(env_cpu(env), true); 1742 int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 1743 false, &host, &full, ra); 1744 1745 assert(!(flags & TLB_INVALID_MASK)); 1746 return full->extra.arm.guarded; 1747 #endif 1748 } 1749 1750 void HELPER(guarded_page_check)(CPUARMState *env) 1751 { 1752 /* 1753 * We have already verified that bti is enabled, and that the 1754 * instruction at PC is not ok for BTYPE. This is always at 1755 * the beginning of a block, so PC is always up-to-date and 1756 * no unwind is required. 1757 */ 1758 if (is_guarded_page(env, env->pc, 0)) { 1759 raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype), 1760 exception_target_el(env)); 1761 } 1762 } 1763 1764 void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc) 1765 { 1766 /* 1767 * We have already checked for branch via x16 and x17. 1768 * What remains for choosing BTYPE is checking for a guarded page. 1769 */ 1770 env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1; 1771 } 1772