Lines Matching +full:- +full:chs

2  * M-profile MVE Operations
24 #include "exec/helper-proto.h"
25 #include "accel/tcg/cpu-ldst.h"
39 if ((env->condexec_bits & 0xf) != 0) { in mve_eci_mask()
43 eci = env->condexec_bits >> 4; in mve_eci_mask()
66 * (3) low-overhead-branch tail predication will mask out part in mve_element_mask()
70 * We combine all these into a 16-bit result with the same semantics in mve_element_mask()
72 * 8-bit vector ops will look at all bits of the result; in mve_element_mask()
73 * 16-bit ops will look at bits 0, 2, 4, ...; in mve_element_mask()
74 * 32-bit ops will look at bits 0, 4, 8 and 12. in mve_element_mask()
76 * the 4-bit slice of the mask corresponding to a single beat. in mve_element_mask()
78 uint16_t mask = FIELD_EX32(env->v7m.vpr, V7M_VPR, P0); in mve_element_mask()
80 if (!(env->v7m.vpr & R_V7M_VPR_MASK01_MASK)) { in mve_element_mask()
83 if (!(env->v7m.vpr & R_V7M_VPR_MASK23_MASK)) { in mve_element_mask()
87 if (env->v7m.ltpsize < 4 && in mve_element_mask()
88 env->regs[14] <= (1 << (4 - env->v7m.ltpsize))) { in mve_element_mask()
95 int masklen = env->regs[14] << env->v7m.ltpsize; in mve_element_mask()
112 uint32_t vpr = env->v7m.vpr; in mve_advance_vpt()
117 if ((env->condexec_bits & 0xf) == 0) { in mve_advance_vpt()
118 env->condexec_bits = (env->condexec_bits == (ECI_A0A1A2B0 << 4)) ? in mve_advance_vpt()
147 env->v7m.vpr = vpr; in mve_advance_vpt()
276 * 64-bit accesses are slightly different: they are done as two 32-bit
278 * and with a single 32-bit offset in the first of the two Qm elements.
281 * stored in the even-beat element.
303 m[H4(e & ~1)] = addr - 4; \
331 m[H4(e & ~1)] = addr - 4; \
396 * one 32-bit memory access per beat. in DO_VLDR64_SG()
589 for (e = 3; e >= 0; e--) { \
681 for (e = 3; e >= 0; e--) { \
708 for (e = 1; e >= 0; e--) { \
815 * into the 32-bit value, so we only need to write the 32-bit in HELPER()
839 #define DO_CLS_B(N) (clrsb32(N) - 24)
840 #define DO_CLS_H(N) (clrsb32(N) - 16)
846 #define DO_CLZ_B(N) (clz32(N) - 24)
847 #define DO_CLZ_H(N) (clz32(N) - 16)
864 #define DO_ABS(N) ((N) < 0 ? -(N) : (N))
876 #define DO_NEG(N) (-(N))
890 * All these insns work at 64-bit widths.
926 /* provide unsigned 2-op helpers for all sizes */
932 /* provide signed 2-op helpers for all sizes */
939 * "Long" operations where two half-sized inputs (taken from either the
940 * top or the bottom of the input vector) produce a double-width result.
972 env->vfp.qc[0] = qc; \
977 /* provide unsigned 2-op helpers for all sizes */
983 /* provide signed 2-op helpers for all sizes */
1002 #define DO_SUB(N, M) ((N) - (M))
1088 #define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N)) in DO_2OP_S()
1105 return ((uint64_t)n - m) >> 1; in do_vhsub_u()
1110 return ((int64_t)n - m) >> 1; in do_vhsub_s()
1157 env->vfp.fpsr &= ~FPSR_NZCV_MASK; in DO_2OP_S()
1158 env->vfp.fpsr |= carry_in * FPSR_C; in DO_2OP_S()
1165 bool carry_in = env->vfp.fpsr & FPSR_C; in HELPER()
1171 bool carry_in = env->vfp.fpsr & FPSR_C; in HELPER()
1172 do_vadc(env, vd, vn, vm, -1, carry_in, false); in HELPER()
1183 do_vadc(env, vd, vn, vm, -1, 1, true); in HELPER()
1198 r[e] = FN1(n[H##ESIZE(e)], m[H##ESIZE(e - 1)]); \
1237 #define DO_SQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, INT8_MIN, INT8_MAX, s)
1238 #define DO_SQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, INT16_MIN, INT16_MAX, s)
1239 #define DO_SQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, INT32_MIN, INT32_MAX, s)
1241 #define DO_UQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT8_MAX, s)
1242 #define DO_UQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT16_MAX, s)
1243 #define DO_UQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT32_MAX, s)
1247 * "shift by esize-1", adjusting the QRDMULH rounding constant to match.
1321 * (A * B - C * D) etc for VQDMLSDH. in DO_2OP_SAT_S()
1335 m[H##ESIZE(e - XCHG)], \ in DO_2OP_SAT_S()
1336 n[H##ESIZE(e + (1 - 2 * XCHG))], \ in DO_2OP_SAT_S()
1337 m[H##ESIZE(e + (1 - XCHG))], \ in DO_2OP_SAT_S()
1344 env->vfp.qc[0] = qc; \ in DO_2OP_SAT_S()
1374 * bring it back into the non-saturated range. However, if in do_vqdmladh_w()
1393 int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 7); in do_vqdmlsdh_b()
1400 int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 15); in do_vqdmlsdh_h()
1478 env->vfp.qc[0] = qc; \
1516 env->vfp.qc[0] = qc; \
1521 /* provide unsigned 2-op scalar helpers for all sizes */
1586 * bring it back into the non-saturated range. However, if in do_vqdmlah_w()
1652 * whether to propagate a saturation indication into FPSCR.QC -- for in DO_2OP_ACC_SCALAR_U()
1653 * the 16x16->32 case we must check only the bit corresponding to the T or B in DO_2OP_ACC_SCALAR_U()
1654 * half that we used, but for the 32x32->64 case we propagate if the mask in DO_2OP_ACC_SCALAR_U()
1674 env->vfp.qc[0] = qc; \ in DO_2OP_ACC_SCALAR_U()
1733 env->vfp.qc[0] = qc; \
1751 n >>= 8 - m; in do_vbrsrb()
1764 n >>= 16 - m; in do_vbrsrh()
1777 n >>= 32 - m; in do_vbrsrw()
1800 (int64_t)n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \
1819 DO_LDAV(vmlsldavsh, 2, int16_t, false, +=, -=)
1820 DO_LDAV(vmlsldavxsh, 2, int16_t, true, +=, -=)
1821 DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=)
1822 DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
1838 n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \
1861 DO_DAV_S(vmlsdav, false, +=, -=)
1863 DO_DAV_S(vmlsdavx, true, +=, -=)
1867 * this is implemented with a 72-bit internal accumulator value of which
1869 * use 128-bit arithmetic -- we can do this because the 74-bit accumulator
1870 * is squashed back into 64-bits after each beat.
1883 mul = (LTYPE)n[H4(e - 1 * XCHG)] * m[H4(e)]; \
1885 mul = -mul; \
1968 m = -m;
1976 m = -m; in do_mina()
2007 uint32_t r = n0 >= m0 ? (n0 - m0) : (m0 - n0); \ in DO_VMAXMINV_S()
2071 env->vfp.qc[0] = qc; \
2076 /* provide unsigned 2-op shift helpers for all sizes */
2105 /* Shift-and-insert; we always work with 64 bits at a time */
2118 * this because it would try to shift by an out-of-range \
2138 #define SHL_MASK(EBITS, SHIFT) MAKE_64BIT_MASK((SHIFT), (EBITS) - (SHIFT))
2139 #define SHR_MASK(EBITS, SHIFT) MAKE_64BIT_MASK(0, (EBITS) - (SHIFT))
2149 * Long shifts taking half-sized inputs from top or bottom of the input
2150 * vector and producing a double-width result. ESIZE, TYPE are for
2153 * because the long shift is strictly left-only.
2242 env->vfp.qc[0] = qc; \
2341 env->vfp.qc[0] = qc; \
2392 * For each 32-bit element, we shift it left, bringing in the
2412 rdm = d[H4(e)] >> (32 - shift);
2423 return do_sqrshl_d(n, -(int8_t)shift, false, NULL); in HELPER()
2433 return do_sqrshl_d(n, (int8_t)shift, false, &env->QF); in HELPER()
2438 return do_uqrshl_d(n, (int8_t)shift, false, &env->QF); in HELPER()
2443 return do_sqrshl_d(n, -(int8_t)shift, true, &env->QF); in HELPER()
2448 return do_uqrshl_d(n, (int8_t)shift, true, &env->QF); in HELPER()
2451 /* Operate on 64-bit values, but saturate at 48 bits */
2457 if (shift <= -48) { in do_sqrshl48_d()
2465 src >>= -shift - 1; in do_sqrshl48_d()
2468 val = src >> -shift; in do_sqrshl48_d()
2487 /* Operate on 64-bit values, but saturate at 48 bits */
2493 if (shift <= -(48 + round)) { in do_uqrshl48_d()
2497 val = src >> (-shift - 1); in do_uqrshl48_d()
2500 val = src >> -shift; in do_uqrshl48_d()
2521 return do_sqrshl48_d(n, -(int8_t)shift, true, &env->QF); in HELPER()
2526 return do_uqrshl48_d(n, (int8_t)shift, true, &env->QF); in HELPER()
2531 return do_uqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF); in HELPER()
2536 return do_sqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF); in HELPER()
2541 return do_uqrshl_bhs(n, (int8_t)shift, 32, true, &env->QF); in HELPER()
2546 return do_sqrshl_bhs(n, -(int8_t)shift, 32, true, &env->QF); in HELPER()
2604 offset -= imm; in do_sub_wrap()
2614 * P0 bits for non-executed beats (where eci_mask is 0) are unchanged. in DO_VIDUP_ALL()
2635 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \ in DO_VIDUP_ALL()
2657 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
2705 uint16_t p0 = FIELD_EX32(env->v7m.vpr, V7M_VPR, P0);
2722 * This insn is itself subject to predication and to beat-wise execution, in HELPER()
2727 uint16_t beatpred = ~env->v7m.vpr & mask; in HELPER()
2728 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (beatpred & eci_mask); in HELPER()
2736 * ltpmask in mve_element_mask(), but we have pre-calculated
2748 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (newmask & eci_mask); in HELPER()
2765 env->vfp.qc[0] = qc; \
2777 #define DO_VQNEG_B(N, SATP) do_sat_bhs(-(int64_t)N, INT8_MIN, INT8_MAX, SATP)
2778 #define DO_VQNEG_H(N, SATP) do_sat_bhs(-(int64_t)N, INT16_MIN, INT16_MAX, SATP)
2779 #define DO_VQNEG_W(N, SATP) do_sat_bhs(-(int64_t)N, INT32_MIN, INT32_MAX, SATP)
2816 * 2-operand floating point. Note that if an element is partially
2817 * predicated we must do the FP operation to update the non-predicated
2835 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
2908 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ in DO_2OP_FP_ALL()
2917 r[e] = FN1(n[H##ESIZE(e)], m[H##ESIZE(e - 1)], fpst); \ in DO_2OP_FP_ALL()
2931 #define DO_VFMA(OP, ESIZE, TYPE, CHS) \ argument
2945 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
2952 if (CHS) { \
2982 fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3066 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3100 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3132 &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3182 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3193 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
3215 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3226 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
3279 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3312 &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3353 bool ieee = !(env->vfp.fpcr & FPCR_AHP);
3357 float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
3383 bool ieee = !(env->vfp.fpcr & FPCR_AHP); in do_vcvt_hs()
3387 float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; in do_vcvt_hs()
3437 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \