Lines Matching +full:width +full:- +full:mm
23 #include "exec/page-protection.h"
24 #include "exec/helper-proto.h"
26 #include "exec/tlb-flags.h"
27 #include "tcg/tcg-gvec-desc.h"
32 #include "accel/tcg/cpu-ldst.h"
33 #include "accel/tcg/helper-retaddr.h"
34 #include "accel/tcg/cpu-ops.h"
37 #include "user/page-protection.h"
60 flags |= ((d & (g & -g)) != 0) << 31; in iter_predtest_fwd()
82 flags += 4 - 1; /* add bit 2, subtract C from PREDTEST_INIT */ in iter_predtest_bwd()
90 flags = deposit32(flags, 31, 1, (d & (g & -g)) != 0); in iter_predtest_bwd()
101 /* The same for a multi-word predicate. */
128 return -(uint64_t)(byte & 1); in expand_pred_d()
170 /* Fully general three-operand expander, controlled by a predicate. in LOGICAL_PPPP()
171 * This is complicated by the host-endian storage of the register file. in LOGICAL_PPPP()
187 TYPE mm = *(TYPE *)(vm + H(i)); \ in LOGICAL_PPPP()
188 *(TYPE *)(vd + H(i)) = OP(nn, mm); \ in LOGICAL_PPPP()
195 /* Similarly, specialized for 64-bit operands. */
204 TYPE nn = n[i], mm = m[i]; \
205 d[i] = OP(nn, mm); \
216 #define DO_SUB(N, M) (N - M)
219 #define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N))
225 * zero and signed division of INT_MIN by -1. Both of these
227 * We special case all signed divisions by -1 to avoid having
230 #define DO_SDIV(N, M) (unlikely(M == 0) ? 0 : unlikely(M == -1) ? -N : N / M)
347 #define DO_ASR(N, M) (N >> MIN(M, sizeof(N) * 8 - 1)) in DO_ZPZZ()
523 #define DO_HSUB_BHS(n, m) (((int64_t)n - m) >> 1)
524 #define DO_HSUB_D(n, m) ((n >> 1) - (m >> 1) - (~n & m & 1))
570 #define DO_SQSUB_B(n, m) do_ssat_b((int64_t)n - m) in DO_ZPZZ()
571 #define DO_SQSUB_H(n, m) do_ssat_h((int64_t)n - m) in DO_ZPZZ()
572 #define DO_SQSUB_S(n, m) do_ssat_s((int64_t)n - m) in DO_ZPZZ()
576 int64_t r = n - m; in DO_ZPZZ()
589 #define DO_UQSUB_B(n, m) do_usat_b((int64_t)n - m) in DO_ZPZZ()
590 #define DO_UQSUB_H(n, m) do_usat_h((int64_t)n - m) in DO_ZPZZ()
591 #define DO_UQSUB_S(n, m) do_usat_s((int64_t)n - m) in DO_ZPZZ()
595 return n > m ? n - m : 0; in DO_ZPZZ()
612 /* Note that m - abs(n) cannot underflow. */ in DO_ZPZZ()
615 if (m > -n) { in DO_ZPZZ()
644 return n < -m ? 0 : r; in DO_ZPZZ()
660 * If the slot I is odd, the elements from from VM {I-1, I}. in DO_ZPZZ()
686 /* Similarly, specialized for 64-bit operands. */
779 /* Three-operand expander, controlled by a predicate, in which the
780 * third operand is "wide". That is, for D = N op M, the same 64-bit
789 TYPEW mm = *(TYPEW *)(vm + i); \
793 *(TYPE *)(vd + H(i)) = OP(nn, mm); \
814 /* Fully general two-operand expander, controlled by a predicate.
832 /* Similarly, specialized for 64-bit operands. */
847 #define DO_CLS_B(N) (clrsb32(N) - 24)
848 #define DO_CLS_H(N) (clrsb32(N) - 16)
855 #define DO_CLZ_B(N) (clz32(N) - 24)
856 #define DO_CLZ_H(N) (clz32(N) - 16)
875 #define DO_FABS(N) (N & ((__typeof(N))-1 >> 1))
889 #define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1))
931 #define DO_ABS(N) (N < 0 ? -N : N)
938 #define DO_NEG(N) (-N)
976 ({ __typeof(X) x_ = (X), min_ = 1ull << (sizeof(X) * 8 - 1); \ in DO_ZPZ()
977 x_ >= 0 ? x_ : x_ == min_ ? -min_ - 1 : -x_; }) in DO_ZPZ()
985 ({ __typeof(X) x_ = (X), min_ = 1ull << (sizeof(X) * 8 - 1); \
986 x_ == min_ ? -min_ - 1 : -x_; })
996 /* Three-operand expander, unpredicated, in which the third operand is "wide".
1003 TYPEW mm = *(TYPEW *)(vm + i); \
1006 *(TYPE *)(vd + H(i)) = OP(nn, mm); \
1039 * Three-operand expander, unpredicated, in which the two inputs are
1050 TYPEW mm = *(TYPEN *)(vm + HN(i + sel2)); \
1051 *(TYPEW *)(vd + HW(i)) = OP(nn, mm); \
1119 TYPEW mm = *(TYPEN *)(vm + HN(i + sel2)); \ in DO_ZZZ_TB()
1120 *(TYPEW *)(vd + HW(i)) = OP(nn, mm); \ in DO_ZZZ_TB()
1150 TYPE mm = *(TYPE *)(vm + H(i + sel2)); \
1151 *(TYPE *)(vd + H(i + sel1)) = OP(nn, mm); \
1169 TYPEW mm = *(TYPEN *)(vm + HN(i + sel1)); \
1171 *(TYPEW *)(vd + HW(i)) = OP(nn, mm) + aa; \
1191 #define DO_NMUL(N, M) -(N * M)
1255 uint32_t inv = -extract32(desc, SIMD_DATA_SHIFT + 1, 1);
1263 /* Compute and store the entire 33-bit result at once. */
1272 uint64_t inv = -(uint64_t)extract32(desc, SIMD_DATA_SHIFT + 1, 1); in HELPER()
1293 TYPEW mm = *(TYPEN *)(vm + HN(i + sel2)); \
1295 *(TYPEW *)(vd + HW(i)) = SUM_OP(aa, DMUL_OP(nn, mm)); \
1333 #define DO_CMLA(N, M, A, S) (A + (N * M) * (S ? -1 : 1))
1425 int sub_i = (rot == 0 || rot == 3 ? -1 : 1); in HELPER()
1440 int sub_i = (rot == 0 || rot == 3 ? -1 : 1); in HELPER()
1456 int sub_i = (rot == 0 || rot == 3 ? -1 : 1); in HELPER()
1476 int sub_i = (rot == 0 || rot == 3 ? -1 : 1); in HELPER()
1495 TYPE mm = m[i]; \
1497 d[i + j] = OP(n[i + j], mm, a[i + j]); \
1531 TYPEW mm = *(TYPEN *)(vm + HN(i + idx)); \
1535 *(TYPEW *)(vd + HW(i + j)) = OP(nn, mm, aa); \
1547 #define DO_MLS(N, M, A) (A - N * M)
1577 TYPEW mm = *(TYPEN *)(vm + HN(i + idx)); \
1580 *(TYPEW *)(vd + HW(i + j)) = OP(nn, mm); \
1602 TYPE mm = *(TYPE *)(vm + i); \
1603 *(TYPE *)(vd + i) = OP(nn, mm, sizeof(TYPE) * 8); \
1733 /* Two-operand reduction expander, controlled by a predicate.
1735 * sign-extension. E.g. for SMAX, TYPERED must be signed,
1736 * but TYPERET must be unsigned so that e.g. a 32-bit value
1737 * is not sign-extended to the ABI uint64_t return type.
1786 DO_VPZ(sve_andv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_AND)
1787 DO_VPZ(sve_andv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_AND)
1788 DO_VPZ(sve_andv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_AND)
1789 DO_VPZ_D(sve_andv_d, uint64_t, uint64_t, -1, DO_AND)
1815 DO_VPZ(sve_uminv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_MIN)
1816 DO_VPZ(sve_uminv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_MIN)
1817 DO_VPZ(sve_uminv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_MIN)
1818 DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN)
1826 TYPE tmp[16 / sizeof(TYPE)] = { [0 ... 16 / sizeof(TYPE) - 1] = INIT }; \
1862 DO_VPQ(sve2p1_uminqv_b, uint8_t, H1, -1, DO_MIN)
1863 DO_VPQ(sve2p1_uminqv_h, uint16_t, H2, -1, DO_MIN)
1864 DO_VPQ(sve2p1_uminqv_s, uint32_t, H4, -1, DO_MIN)
1865 DO_VPQ(sve2p1_uminqv_d, uint64_t, H8, -1, DO_MIN)
1880 #define DO_SUBR(X, Y) (Y - X)
1937 DO_LOGIC_QV(sve2p1_andqv, b, -1, DO_AND, DO_ORC)
1938 DO_LOGIC_QV(sve2p1_andqv, h, -1, DO_AND, DO_ORC)
1939 DO_LOGIC_QV(sve2p1_andqv, s, -1, DO_AND, DO_ORC)
1940 DO_LOGIC_QV(sve2p1_andqv, d, -1, DO_AND, DO_ORC)
1963 indication; e.g. not found for esz=3 is -8. */
1970 uint64_t this_g = g[--i] & mask;
1972 return i * 64 + (63 - clz64(this_g));
1975 return (intptr_t)-1 << esz;
1992 this_d |= this_g & -this_g; in HELPER()
2016 uint64_t mask = -1; in HELPER()
2019 mask = ~((1ull << (next & 63)) - 1); in HELPER()
2020 next &= -64; in HELPER()
2025 next = (next & -64) + ctz64(this_g); in HELPER()
2029 mask = -1; in HELPER()
2053 uint64_t inv = -(uint64_t)(simd_data(desc) & 1); in HELPER()
2065 uint64_t inv = -(uint64_t)(simd_data(desc) & 1); in HELPER()
2077 uint64_t inv = -(uint64_t)(simd_data(desc) & 1); in HELPER()
2094 d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1); in HELPER()
2098 /* Three-operand expander, immediate operand, controlled by a predicate.
2117 /* Similarly, specialized for 64-bit operands. */
2138 when N is negative, add 2**M-1. */
2139 #define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M)
2319 TYPEW mm = *(TYPEW *)(vm + i); \
2320 *(TYPEW *)(vd + i) = (TYPEN)OP(nn, mm, SHIFT); \
2330 TYPEW mm = *(TYPEW *)(vm + HW(i)); \
2331 *(TYPEN *)(vd + HN(i + sizeof(TYPEN))) = OP(nn, mm, SHIFT); \
2336 #define DO_RADDHN(N, M, SH) ((N + M + ((__typeof(N))1 << (SH - 1))) >> SH)
2337 #define DO_SUBHN(N, M, SH) ((N - M) >> SH)
2338 #define DO_RSUBHN(N, M, SH) ((N - M + ((__typeof(N))1 << (SH - 1))) >> SH)
2379 /* Fully general four-operand expander, controlled by a predicate.
2391 TYPE mm = *(TYPE *)(vm + H(i)); \
2393 *(TYPE *)(vd + H(i)) = OP(aa, nn, mm); \
2400 /* Similarly, specialized for 64-bit operands. */
2410 TYPE aa = a[i], nn = n[i], mm = m[i]; \
2411 d[i] = OP(aa, nn, mm); \
2417 #define DO_MLS(A, N, M) (A - N * M)
2518 /* These constants are cut-and-paste directly from the ARM pseudocode. */ in HELPER()
2538 /* These constants are cut-and-paste directly from the ARM pseudocode. */ in HELPER()
2570 /* These constants are cut-and-paste directly from the ARM pseudocode. */ in HELPER()
2613 uint16_t mm = m[i]; in HELPER() local
2614 if (mm & 1) { in HELPER()
2617 if (mm & 2) { in HELPER()
2631 uint32_t mm = m[i]; in HELPER() local
2632 if (mm & 1) { in HELPER()
2635 if (mm & 2) { in HELPER()
2649 uint64_t mm = m[i]; in HELPER() local
2650 if (mm & 1) { in HELPER()
2653 if (mm & 2) { in HELPER()
2753 uint64_t mm, uint32_t desc) in HELPER()
2759 mm = dup_const(MO_8, mm); in HELPER()
2763 d[i] = (mm & pp) | (nn & ~pp); in HELPER()
2768 uint64_t mm, uint32_t desc) in HELPER()
2774 mm = dup_const(MO_16, mm); in HELPER()
2778 d[i] = (mm & pp) | (nn & ~pp); in HELPER()
2783 uint64_t mm, uint32_t desc) in HELPER()
2789 mm = dup_const(MO_32, mm); in HELPER()
2793 d[i] = (mm & pp) | (nn & ~pp); in HELPER()
2798 uint64_t mm, uint32_t desc) in HELPER()
2806 d[i] = (pg[H1(i)] & 1 ? mm : nn); in HELPER()
2857 /* Big-endian hosts need to frob the byte indices. If the copy
2858 * happens to be 8-byte aligned, then no frobbing necessary.
2882 i -= 4; in swap_memmove()
2896 i -= 2; in swap_memmove()
2909 i -= 1; in swap_memmove()
2962 size_t n_siz = opr_sz - n_ofs; in HELPER()
2983 swap_memmove(vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE)); \
2997 for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { in DO_INSR()
3008 for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { in HELPER()
3019 for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { in HELPER()
3030 for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { in HELPER()
3052 depositn(d->p, e * ESIZE, 1, extractn(s->d, elements * idx + e, 1)); \
3078 depositn(d->d, elements * idx + e, 1, extractn(s->p, e * ESIZE, 1)); \
3132 index -= nelem; \
3170 if (unlikely(vn - vd < opr_sz)) { \
3188 /* Mask of bits included in the even numbered predicates of width esz.
3190 * same pattern out to 16-bit units.
3200 /* Zero-extend units of 2**N bits to units of 2**(N+1) bits.
3203 * section 7-2 Shuffling Bits.
3210 for (i = 4; i >= n; i--) { in expand_bits()
3220 * section 7-2 Shuffling Bits, where it is called an inverse half shuffle.
3245 uint64_t mm = *(uint64_t *)vm; in HELPER() local
3249 mm = extract64(mm, high * half, half); in HELPER()
3251 mm = expand_bits(mm, esz); in HELPER()
3252 d[0] = nn | (mm << esize); in HELPER()
3276 uint64_t mm = m[H4(high + i)]; in HELPER() local
3279 mm = expand_bits(mm, esz); in HELPER()
3280 d[i] = nn | (mm << esize); in HELPER()
3288 uint16_t mm = m[H1(high + i)]; in HELPER() local
3291 mm = expand_bits(mm, esz); in HELPER()
3292 d16[H2(i)] = nn | (mm << esize); in HELPER()
3315 if ((vm - vd) < (uintptr_t)oprsz) { in HELPER()
3388 uint64_t mm = (m[i] & mask) << shl; in HELPER() local
3389 d[i] = nn + mm; in HELPER()
3399 for (i = 2, sh = 4; i >= n; i--, sh >>= 1) { in reverse_bits_64()
3411 for (i = 2, sh = 4; i >= n; i--, sh >>= 1) { in reverse_bits_8()
3425 l = reverse_bits_64(l << (64 - 8 * oprsz), esz); in HELPER()
3429 intptr_t ih = oprsz - 8 - i; in HELPER()
3438 intptr_t ih = H1(oprsz - 1 - i); in HELPER()
3466 if ((vn - vd) < (uintptr_t)oprsz) { in HELPER()
3502 if (unlikely((vn - vd) < (uintptr_t)oprsz)) { \
3505 if (unlikely((vm - vd) < (uintptr_t)oprsz)) { \
3514 memset(vd + oprsz - 16, 0, 16); \
3531 if (unlikely((vm - vd) < (uintptr_t)oprsz)) { \
3539 p -= oprsz; \
3605 memset(vd + oprsz - 16, 0, 16); \ in DO_PERSEG_ZZZ()
3655 * indication; e.g. not found for esz=3 is -8.
3677 for (i = QEMU_ALIGN_UP(opr_sz, 8) - 8; i >= 0; i -= 8) { in HELPER()
3692 last_i = last_i * 8 + 63 - clz64(last_g); in HELPER()
3693 len = last_i - first_i + (1 << esz); in HELPER()
3699 swap_memmove(vd + len, vm, opr_sz * 8 - len); in HELPER()
3710 uint64_t nn = n[i], mm = m[i]; in HELPER() local
3712 d[i] = (nn & pp) | (mm & ~pp); in HELPER()
3724 uint64_t nn = n[i], mm = m[i]; in HELPER() local
3726 d[i] = (nn & pp) | (mm & ~pp); in HELPER()
3738 uint64_t nn = n[i], mm = m[i]; in HELPER() local
3740 d[i] = (nn & pp) | (mm & ~pp); in HELPER()
3752 uint64_t nn = n[i], mm = m[i]; in HELPER() local
3753 d[i] = (pg[H1(i)] & 1 ? nn : mm); in HELPER()
3787 * a scalar output, and also handles the byte-ordering of sub-uint64_t
3799 i -= sizeof(TYPE), out <<= sizeof(TYPE); \
3801 TYPE mm = *(TYPE *)(vm + H(i)); \
3802 out |= nn OP mm; \
3867 TYPEW mm = *(TYPEW *)(vm + i - 8); \
3869 i -= sizeof(TYPE), out <<= sizeof(TYPE); \
3871 out |= nn OP mm; \
3940 TYPE mm = simd_data(desc); \
3945 i -= sizeof(TYPE), out <<= sizeof(TYPE); \
3947 out |= nn OP mm; \
4027 for (i = QEMU_ALIGN_UP(oprsz, 8) - 8; i >= 0; i -= 8) { in last_active_pred()
4053 b = b & -b; /* first such */ in compute_brk()
4055 b = b | (b - 1); /* break after same */ in compute_brk()
4057 b = b - 1; /* break before same */ in compute_brk()
4244 flags = iter_predtest_fwd(d->p[i], -1, flags); in HELPER()
4247 uint64_t mask = ~(-1ULL << (8 * (oprsz & 7))); in HELPER()
4248 flags = iter_predtest_fwd(d->p[i], mask, flags); in HELPER()
4284 count = maxelem - count; in HELPER()
4301 count = elements - count; in encode_pred_count()
4341 d->p[i] = esz_mask; in do_whilel()
4344 d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask; in do_whilel()
4377 do_whilel(&d[1], esz_mask, count - oprbits, oprbits); in HELPER()
4404 uint32_t i, invcount = oprbits - count; in do_whileg()
4408 d->p[i] = bits; in do_whileg()
4412 d->p[i] = bits & MAKE_64BIT_MASK(0, oprbits & 63); in do_whileg()
4445 do_whileg(&d[0], esz_mask, count - oprbits, oprbits); in HELPER()
4472 * little to gain with a more complex non-recursive form.
4569 float16 mm = *(float16 *)(vm + H1_2(i)); in DO_REDUCE() local
4570 result = float16_add(result, mm, status); in DO_REDUCE()
4589 float32 mm = *(float32 *)(vm + H1_2(i)); in HELPER() local
4590 result = float32_add(result, mm, status); in HELPER()
4615 /* Fully general three-operand expander, controlled by a predicate,
4625 uint64_t pg = g[(i - 1) >> 6]; \
4627 i -= sizeof(TYPE); \
4630 TYPE mm = *(TYPE *)(vm + H(i)); \
4631 *(TYPE *)(vd + H(i)) = OP(nn, mm, status); \
4743 /* Three-operand expander, with one scalar operand, controlled by in DO_ZPZZ_FP()
4752 TYPE mm = scalar; \ in DO_ZPZZ_FP()
4754 uint64_t pg = g[(i - 1) >> 6]; \ in DO_ZPZZ_FP()
4756 i -= sizeof(TYPE); \ in DO_ZPZZ_FP()
4759 *(TYPE *)(vd + H(i)) = OP(nn, mm, status); \ in DO_ZPZZ_FP()
4820 /* Fully general two-operand expander, controlled by a predicate, in DO_ZPZS_FP()
4830 uint64_t pg = g[(i - 1) >> 6]; \ in DO_ZPZS_FP()
4832 i -= sizeof(TYPE); \ in DO_ZPZS_FP()
5026 /* denormal: bias - fractional_zeros */ in DO_ZPZ_FP()
5027 return -15 - clz32(frac); in DO_ZPZ_FP()
5037 /* normal: exp - bias */ in DO_ZPZ_FP()
5038 return exp - 15; in DO_ZPZ_FP()
5054 /* denormal: bias - fractional_zeros */ in do_float32_logb_as_int()
5055 return -127 - clz32(frac); in do_float32_logb_as_int()
5065 /* normal: exp - bias */ in do_float32_logb_as_int()
5066 return exp - 127; in do_float32_logb_as_int()
5082 /* denormal: bias - fractional_zeros */ in do_float64_logb_as_int()
5083 return -1023 - clz64(frac); in do_float64_logb_as_int()
5093 /* normal: exp - bias */ in do_float64_logb_as_int()
5094 return exp - 1023; in do_float64_logb_as_int()
5115 uint64_t pg = g[(i - 1) >> 6]; in DO_ZPZ_FP()
5117 i -= 2; in DO_ZPZ_FP()
5184 uint64_t pg = g[(i - 1) >> 6]; in do_fmla_zpzzz_h()
5186 i -= 2; in do_fmla_zpzzz_h()
5253 uint64_t pg = g[(i - 1) >> 6]; in do_fmla_zpzzz_s()
5255 i -= 4; in do_fmla_zpzzz_s()
5322 uint64_t pg = g[(i - 1) >> 6]; in do_fmla_zpzzz_d()
5324 i -= 8; in do_fmla_zpzzz_d()
5383 /* Two operand floating-point comparison controlled by a predicate.
5392 intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \
5397 i -= sizeof(TYPE), out <<= sizeof(TYPE); \
5400 TYPE mm = *(TYPE *)(vm + H(i)); \
5401 out |= OP(TYPE, nn, mm, status); \
5404 d[j--] = out; \
5447 /* One operand floating-point comparison against zero, controlled in DO_FPCMP_PPZZ_ALL()
5454 intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ in DO_FPCMP_PPZZ_ALL()
5459 i -= sizeof(TYPE), out <<= sizeof(TYPE); \ in DO_FPCMP_PPZZ_ALL()
5465 d[j--] = out; \ in DO_FPCMP_PPZZ_ALL()
5488 /* FP Trig Multiply-Add. */
5503 float16 mm = m[i]; local
5507 if (float16_is_neg(mm)) {
5511 mm = float16_abs(mm);
5515 d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s);
5534 float32 mm = m[i]; in HELPER() local
5538 if (float32_is_neg(mm)) { in HELPER()
5542 mm = float32_abs(mm); in HELPER()
5546 d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s); in HELPER()
5569 float64 mm = m[i]; in HELPER() local
5573 if (float64_is_neg(mm)) { in HELPER()
5577 mm = float64_abs(mm); in HELPER()
5581 d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s); in HELPER()
5598 uint64_t pg = g[(i - 1) >> 6]; in HELPER()
5603 j = i - sizeof(float16); in HELPER()
5604 i -= 2 * sizeof(float16); in HELPER()
5636 uint64_t pg = g[(i - 1) >> 6]; in HELPER()
5641 j = i - sizeof(float32); in HELPER()
5642 i -= 2 * sizeof(float32); in HELPER()
5674 uint64_t pg = g[(i - 1) >> 6]; in HELPER()
5679 j = i - sizeof(float64); in HELPER()
5680 i -= 2 * sizeof(float64); in HELPER()
5725 uint64_t pg = g[(i - 1) >> 6]; in HELPER()
5730 j = i - sizeof(float16); in HELPER()
5731 i -= 2 * sizeof(float16); in HELPER()
5775 uint64_t pg = g[(i - 1) >> 6]; in HELPER()
5780 j = i - sizeof(float32); in HELPER()
5781 i -= 2 * sizeof(float32); in HELPER()
5825 uint64_t pg = g[(i - 1) >> 6]; in HELPER()
5830 j = i - sizeof(float64); in HELPER()
5831 i -= 2 * sizeof(float64); in HELPER()
5878 reg_off &= -64; in find_next_active()
5896 * Resolve the guest virtual address to info->host and info->flags.
5910 * User-only currently always issues with TBI. See the comment in sve_probe_page()
5915 * We currently always enable TBI for user-only, and do not provide in sve_probe_page()
5923 &info->host, retaddr); in sve_probe_page()
5927 &info->host, &full, retaddr); in sve_probe_page()
5929 info->flags = flags; in sve_probe_page()
5937 memset(&info->attrs, 0, sizeof(info->attrs)); in sve_probe_page()
5939 info->tagged = (flags & PAGE_ANON) && (flags & PAGE_MTE); in sve_probe_page()
5941 info->attrs = full->attrs; in sve_probe_page()
5942 info->tagged = full->extra.arm.pte_attrs == 0xf0; in sve_probe_page()
5945 /* Ensure that info->host[] is relative to addr, not addr + mem_off. */ in sve_probe_page()
5946 info->host -= mem_off; in sve_probe_page()
5960 intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split; in sve_cont_ldst_elements()
5965 /* Set all of the element indices to -1, and the TLB data to 0. */ in sve_cont_ldst_elements()
5966 memset(info, -1, offsetof(SVEContLdSt, page)); in sve_cont_ldst_elements()
5967 memset(info->page, 0, sizeof(info->page)); in sve_cont_ldst_elements()
5974 reg_off_last = i * 64 + 63 - clz64(pg); in sve_cont_ldst_elements()
5987 info->reg_off_first[0] = reg_off_first; in sve_cont_ldst_elements()
5988 info->mem_off_first[0] = (reg_off_first >> esz) * msize; in sve_cont_ldst_elements()
5991 page_split = -(addr | TARGET_PAGE_MASK); in sve_cont_ldst_elements()
5994 info->reg_off_last[0] = reg_off_last; in sve_cont_ldst_elements()
5998 info->page_split = page_split; in sve_cont_ldst_elements()
6006 * active element is the one that's split, this value remains -1. in sve_cont_ldst_elements()
6010 info->reg_off_last[0] = reg_off_split - esize; in sve_cont_ldst_elements()
6017 info->reg_off_split = reg_off_split; in sve_cont_ldst_elements()
6018 info->mem_off_split = mem_off_split; in sve_cont_ldst_elements()
6035 info->reg_off_first[1] = reg_off_split; in sve_cont_ldst_elements()
6036 info->mem_off_first[1] = (reg_off_split >> esz) * msize; in sve_cont_ldst_elements()
6037 info->reg_off_last[1] = reg_off_last; in sve_cont_ldst_elements()
6042 * Resolve the guest virtual addresses to info->page[].
6051 int mem_off = info->mem_off_first[0]; in sve_cont_ldst_pages()
6055 if (!sve_probe_page(&info->page[0], nofault, env, addr, mem_off, in sve_cont_ldst_pages()
6061 if (likely(info->page_split < 0)) { in sve_cont_ldst_pages()
6070 if (info->mem_off_split >= 0) { in sve_cont_ldst_pages()
6075 mem_off = info->page_split; in sve_cont_ldst_pages()
6078 * of the vector, then: For first-fault we should continue in sve_cont_ldst_pages()
6079 * to generate faults for the second page. For no-fault, in sve_cont_ldst_pages()
6082 if (info->mem_off_first[0] < info->mem_off_split) { in sve_cont_ldst_pages()
6091 mem_off = info->mem_off_first[1]; in sve_cont_ldst_pages()
6094 * so we're out of first-fault territory. in sve_cont_ldst_pages()
6099 have_work |= sve_probe_page(&info->page[1], nofault, env, addr, mem_off, in sve_cont_ldst_pages()
6111 int flags0 = info->page[0].flags; in sve_cont_ldst_watchpoints()
6112 int flags1 = info->page[1].flags; in sve_cont_ldst_watchpoints()
6119 info->page[0].flags = flags0 & ~TLB_WATCHPOINT; in sve_cont_ldst_watchpoints()
6120 info->page[1].flags = flags1 & ~TLB_WATCHPOINT; in sve_cont_ldst_watchpoints()
6123 mem_off = info->mem_off_first[0]; in sve_cont_ldst_watchpoints()
6124 reg_off = info->reg_off_first[0]; in sve_cont_ldst_watchpoints()
6125 reg_last = info->reg_off_last[0]; in sve_cont_ldst_watchpoints()
6132 msize, info->page[0].attrs, in sve_cont_ldst_watchpoints()
6141 mem_off = info->mem_off_split; in sve_cont_ldst_watchpoints()
6144 info->page[0].attrs, wp_access, retaddr); in sve_cont_ldst_watchpoints()
6147 mem_off = info->mem_off_first[1]; in sve_cont_ldst_watchpoints()
6149 reg_off = info->reg_off_first[1]; in sve_cont_ldst_watchpoints()
6150 reg_last = info->reg_off_last[1]; in sve_cont_ldst_watchpoints()
6157 msize, info->page[1].attrs, in sve_cont_ldst_watchpoints()
6175 if (info->page[0].tagged) { in sve_cont_ldst_mte_check()
6176 mem_off = info->mem_off_first[0]; in sve_cont_ldst_mte_check()
6177 reg_off = info->reg_off_first[0]; in sve_cont_ldst_mte_check()
6178 reg_last = info->reg_off_split; in sve_cont_ldst_mte_check()
6180 reg_last = info->reg_off_last[0]; in sve_cont_ldst_mte_check()
6195 mem_off = info->mem_off_first[1]; in sve_cont_ldst_mte_check()
6196 if (mem_off >= 0 && info->page[1].tagged) { in sve_cont_ldst_mte_check()
6197 reg_off = info->reg_off_first[1]; in sve_cont_ldst_mte_check()
6198 reg_last = info->reg_off_last[1]; in sve_cont_ldst_mte_check()
6214 * Common helper for all contiguous 1,2,3,4-register predicated stores.
6234 memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max); in sve_ldN_r()
6290 memcpy(&env->vfp.zregs[(rd + i) & 31], &scratch[i], reg_max); in sve_ldN_r()
6298 memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max); in sve_ldN_r()
6313 host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, in sve_ldN_r()
6325 * Use the slow path to manage the cross-page misalignment. in sve_ldN_r()
6332 tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, in sve_ldN_r()
6350 host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, in sve_ldN_r()
6512 * Load contiguous data, first-fault and no-fault.
6514 * For user-only, we control the race between page_check_range and
6527 uint64_t *ffr = env->vfp.pregs[FFR_PRED_NUM].p;
6539 * Common helper for all contiguous no-fault and first-fault loads.
6549 void *vd = &env->vfp.zregs[rd]; in sve_ldnfff1_r()
6584 /* Trapping mte check for the first-fault element. */ in sve_ldnfff1_r()
6596 * Use the slow path for cross-page handling. in sve_ldnfff1_r()
6605 swap_memzero(vd + reg_off, reg_max - reg_off); in sve_ldnfff1_r()
6633 * Use the slow path for cross-page handling. in sve_ldnfff1_r()
6644 * Per the MemSingleNF pseudocode, a no-fault load from Device memory in sve_ldnfff1_r()
6645 * must not actually hit the bus -- it returns (UNKNOWN, FAULT) instead. in sve_ldnfff1_r()
6698 * As an implementation choice, decline to handle a cross-page element in sve_ldnfff1_r()
6716 * be low frequency as the guest walks through memory -- the next in sve_ldnfff1_r()
6845 * Common helper for all contiguous 1,2,3,4-register predicated stores. in DO_LDFF1_LDNF1_1()
6907 tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, in DO_LDFF1_LDNF1_1()
6930 host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, in DO_LDFF1_LDNF1_1()
6942 * Use the slow path to manage the cross-page misalignment. in DO_LDFF1_LDNF1_1()
6949 tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, in DO_LDFF1_LDNF1_1()
6967 host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, in DO_LDFF1_LDNF1_1()
7080 * Load the element at @reg + @reg_ofs, sign or zero-extend as needed.
7131 target_ulong in_page = -(addr | TARGET_PAGE_MASK); in sve_ld1_z()
7185 * ??? TODO: For the 32-bit offset extractions, base + ofs cannot in sve_ld1_z_mte()
7309 * Common helpers for all gather first-faulting loads. in DO_LD1_ZPZ_S()
7355 swap_memzero(vd + reg_off, reg_max - reg_off); in DO_LD1_ZPZ_S()
7365 in_page = -(addr | TARGET_PAGE_MASK); in DO_LD1_ZPZ_S()
7410 * ??? TODO: For the 32-bit offset extractions, base + ofs cannot in sve_ldff1_z_mte()
7540 target_ulong in_page = -(addr | TARGET_PAGE_MASK); in DO_LDFF1_ZPZ_S()
7583 * as a first-level check against the predicate, since only enabled in DO_LDFF1_ZPZ_S()
7584 * elements have non-null host addresses. in DO_LDFF1_ZPZ_S()
7612 * ??? TODO: For the 32-bit offset extractions, base + ofs cannot in sve_st1_z_mte()
7714 intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split; in DO_ST1_ZPZ_S()
7720 /* Set all of the element indices to -1, and the TLB data to 0. */ in DO_ST1_ZPZ_S()
7721 memset(info, -1, offsetof(SVEContLdSt, page)); in DO_ST1_ZPZ_S()
7722 memset(info->page, 0, sizeof(info->page)); in DO_ST1_ZPZ_S()
7729 reg_off_last = reg_max * N - b_stride; in DO_ST1_ZPZ_S()
7735 reg_off_last = MIN(b_count - esize, reg_max * N - b_stride); in DO_ST1_ZPZ_S()
7738 info->reg_off_first[0] = reg_off_first; in DO_ST1_ZPZ_S()
7739 info->mem_off_first[0] = reg_off_first; in DO_ST1_ZPZ_S()
7741 page_split = -(addr | TARGET_PAGE_MASK); in DO_ST1_ZPZ_S()
7744 info->reg_off_last[0] = reg_off_last; in DO_ST1_ZPZ_S()
7748 info->page_split = page_split; in DO_ST1_ZPZ_S()
7754 * active element is the one that's split, this value remains -1. in DO_ST1_ZPZ_S()
7758 info->reg_off_last[0] = ROUND_DOWN(reg_off_split - esize, b_stride); in DO_ST1_ZPZ_S()
7762 if (page_split & (esize - 1)) { in DO_ST1_ZPZ_S()
7764 if ((reg_off_split & (b_stride - 1)) == 0) { in DO_ST1_ZPZ_S()
7765 info->reg_off_split = reg_off_split; in DO_ST1_ZPZ_S()
7766 info->mem_off_split = reg_off_split; in DO_ST1_ZPZ_S()
7777 info->reg_off_first[1] = reg_off_split; in DO_ST1_ZPZ_S()
7778 info->mem_off_first[1] = reg_off_split; in DO_ST1_ZPZ_S()
7779 info->reg_off_last[1] = reg_off_last; in DO_ST1_ZPZ_S()
7790 int flags0 = info->page[0].flags; in sve2p1_cont_ldst_watchpoints()
7791 int flags1 = info->page[1].flags; in sve2p1_cont_ldst_watchpoints()
7798 info->page[0].flags = flags0 & ~TLB_WATCHPOINT; in sve2p1_cont_ldst_watchpoints()
7799 info->page[1].flags = flags1 & ~TLB_WATCHPOINT; in sve2p1_cont_ldst_watchpoints()
7802 count_off = info->reg_off_first[0]; in sve2p1_cont_ldst_watchpoints()
7803 count_last = info->reg_off_split; in sve2p1_cont_ldst_watchpoints()
7805 count_last = info->reg_off_last[0]; in sve2p1_cont_ldst_watchpoints()
7809 esize, info->page[0].attrs, wp_access, ra); in sve2p1_cont_ldst_watchpoints()
7814 count_off = info->reg_off_first[1]; in sve2p1_cont_ldst_watchpoints()
7816 count_last = info->reg_off_last[1]; in sve2p1_cont_ldst_watchpoints()
7819 esize, info->page[1].attrs, in sve2p1_cont_ldst_watchpoints()
7837 * - first iteration hits addr + off, as required, in sve2p1_cont_ldst_mte_check()
7838 * - second iteration hits ALIGN_UP(addr, 16), in sve2p1_cont_ldst_mte_check()
7839 * - other iterations advance addr by 16. in sve2p1_cont_ldst_mte_check()
7844 if (info->page[0].tagged) { in sve2p1_cont_ldst_mte_check()
7845 count_off = info->reg_off_first[0]; in sve2p1_cont_ldst_mte_check()
7846 count_last = info->reg_off_split; in sve2p1_cont_ldst_mte_check()
7848 count_last = info->reg_off_last[0]; in sve2p1_cont_ldst_mte_check()
7857 count_off = info->reg_off_first[1]; in sve2p1_cont_ldst_mte_check()
7858 if (count_off >= 0 && info->page[1].tagged) { in sve2p1_cont_ldst_mte_check()
7859 count_last = info->reg_off_last[1]; in sve2p1_cont_ldst_mte_check()
7933 reg_last = MIN(count_last - count_off, reg_max - esize); in sve2p1_ld1_c()
7964 reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); in sve2p1_ld1_c()
7977 * Use the slow path to manage the cross-page misalignment. in sve2p1_ld1_c()
7997 reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); in sve2p1_ld1_c()
8101 reg_last = MIN(count_last - count_off, reg_max - esize); in DO_LD1_2()
8124 reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); in DO_LD1_2()
8137 * Use the slow path to manage the cross-page misalignment. in DO_LD1_2()
8157 reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); in DO_LD1_2()
8259 uint64_t signs = ones << (bits - 1); in do_match2()
8265 cmp0 = (cmp0 - ones) & ~cmp0; in do_match2()
8266 cmp1 = (cmp1 - ones) & ~cmp1; in do_match2()
8449 int shl = 8 - shr; in HELPER()
8463 int shl = 16 - shr; in HELPER()
8565 uint64_t pg = g[(i - 1) >> 6]; \
8567 i -= sizeof(TYPEW); \
8587 uint64_t pg = g[(i - 1) >> 6]; \ in DO_FCVTNT()
8589 i -= sizeof(TYPEW); \ in DO_FCVTNT()
8618 int b_count = (p.count << v_esz) - vl * part;
8625 do_whileg(vd, mask, vl - b_count, vl);