1 /* 2 * AArch64 generic vector expansion 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 24 25 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 26 { 27 tcg_gen_rotli_i64(d, m, 1); 28 tcg_gen_xor_i64(d, d, n); 29 } 30 31 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) 32 { 33 tcg_gen_rotli_vec(vece, d, m, 1); 34 tcg_gen_xor_vec(vece, d, d, n); 35 } 36 37 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 38 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 39 { 40 static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; 41 static const GVecGen3 op = { 42 .fni8 = gen_rax1_i64, 43 .fniv = gen_rax1_vec, 44 .opt_opc = vecop_list, 45 .fno = gen_helper_crypto_rax1, 46 .vece = MO_64, 47 }; 48 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); 49 } 50 51 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 52 { 53 TCGv_i64 t = tcg_temp_new_i64(); 54 uint64_t mask = dup_const(MO_8, 0xff >> sh); 55 56 tcg_gen_xor_i64(t, n, m); 57 tcg_gen_shri_i64(d, t, sh); 58 tcg_gen_shli_i64(t, t, 8 - sh); 59 tcg_gen_andi_i64(d, d, mask); 60 tcg_gen_andi_i64(t, t, ~mask); 61 tcg_gen_or_i64(d, d, t); 62 } 63 64 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 65 { 66 TCGv_i64 t = tcg_temp_new_i64(); 67 uint64_t mask = dup_const(MO_16, 0xffff >> sh); 68 69 tcg_gen_xor_i64(t, n, m); 70 tcg_gen_shri_i64(d, t, sh); 71 tcg_gen_shli_i64(t, t, 16 - sh); 72 tcg_gen_andi_i64(d, d, mask); 73 tcg_gen_andi_i64(t, t, ~mask); 74 tcg_gen_or_i64(d, d, t); 75 } 76 77 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 78 { 79 tcg_gen_xor_i32(d, n, m); 80 tcg_gen_rotri_i32(d, d, sh); 81 } 82 83 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 84 { 85 tcg_gen_xor_i64(d, n, m); 86 tcg_gen_rotri_i64(d, d, sh); 87 } 88 89 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 90 TCGv_vec m, int64_t sh) 91 { 92 tcg_gen_xor_vec(vece, d, n, m); 93 tcg_gen_rotri_vec(vece, d, d, sh); 94 } 95 96 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 97 uint32_t rm_ofs, int64_t shift, 98 uint32_t opr_sz, uint32_t max_sz) 99 { 100 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 101 static const GVecGen3i ops[4] = { 102 { .fni8 = gen_xar8_i64, 103 .fniv = gen_xar_vec, 104 .fno = gen_helper_sve2_xar_b, 105 .opt_opc = vecop, 106 .vece = MO_8 }, 107 { .fni8 = gen_xar16_i64, 108 .fniv = gen_xar_vec, 109 .fno = gen_helper_sve2_xar_h, 110 .opt_opc = vecop, 111 .vece = MO_16 }, 112 { .fni4 = gen_xar_i32, 113 .fniv = gen_xar_vec, 114 .fno = gen_helper_sve2_xar_s, 115 .opt_opc = vecop, 116 .vece = MO_32 }, 117 { .fni8 = gen_xar_i64, 118 .fniv = gen_xar_vec, 119 .fno = gen_helper_gvec_xar_d, 120 .opt_opc = vecop, 121 .vece = MO_64 } 122 }; 123 int esize = 8 << vece; 124 125 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 126 tcg_debug_assert(shift >= 0); 127 tcg_debug_assert(shift <= esize); 128 shift &= esize - 1; 129 130 if (shift == 0) { 131 /* xar with no rotate devolves to xor. */ 132 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 133 } else { 134 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 135 shift, &ops[vece]); 136 } 137 } 138 139 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 140 { 141 tcg_gen_xor_i64(d, n, m); 142 tcg_gen_xor_i64(d, d, k); 143 } 144 145 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 146 TCGv_vec m, TCGv_vec k) 147 { 148 tcg_gen_xor_vec(vece, d, n, m); 149 tcg_gen_xor_vec(vece, d, d, k); 150 } 151 152 void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 153 uint32_t a, uint32_t oprsz, uint32_t maxsz) 154 { 155 static const GVecGen4 op = { 156 .fni8 = gen_eor3_i64, 157 .fniv = gen_eor3_vec, 158 .fno = gen_helper_sve2_eor3, 159 .vece = MO_64, 160 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 161 }; 162 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 163 } 164 165 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 166 { 167 tcg_gen_andc_i64(d, m, k); 168 tcg_gen_xor_i64(d, d, n); 169 } 170 171 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 172 TCGv_vec m, TCGv_vec k) 173 { 174 tcg_gen_andc_vec(vece, d, m, k); 175 tcg_gen_xor_vec(vece, d, d, n); 176 } 177 178 void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 179 uint32_t a, uint32_t oprsz, uint32_t maxsz) 180 { 181 static const GVecGen4 op = { 182 .fni8 = gen_bcax_i64, 183 .fniv = gen_bcax_vec, 184 .fno = gen_helper_sve2_bcax, 185 .vece = MO_64, 186 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 187 }; 188 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 189 } 190 191 /* 192 * Set @res to the correctly saturated result. 193 * Set @qc non-zero if saturation occured. 194 */ 195 void gen_suqadd_bhs(TCGv_i64 res, TCGv_i64 qc, 196 TCGv_i64 a, TCGv_i64 b, MemOp esz) 197 { 198 TCGv_i64 max = tcg_constant_i64((1ull << ((8 << esz) - 1)) - 1); 199 TCGv_i64 t = tcg_temp_new_i64(); 200 201 tcg_gen_add_i64(t, a, b); 202 tcg_gen_smin_i64(res, t, max); 203 tcg_gen_xor_i64(t, t, res); 204 tcg_gen_or_i64(qc, qc, t); 205 } 206 207 void gen_suqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 208 { 209 TCGv_i64 max = tcg_constant_i64(INT64_MAX); 210 TCGv_i64 t = tcg_temp_new_i64(); 211 212 /* Maximum value that can be added to @a without overflow. */ 213 tcg_gen_sub_i64(t, max, a); 214 215 /* Constrain addend so that the next addition never overflows. */ 216 tcg_gen_umin_i64(t, t, b); 217 tcg_gen_add_i64(res, a, t); 218 219 tcg_gen_xor_i64(t, t, b); 220 tcg_gen_or_i64(qc, qc, t); 221 } 222 223 static void gen_suqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 224 TCGv_vec a, TCGv_vec b) 225 { 226 TCGv_vec max = 227 tcg_constant_vec_matching(t, vece, (1ull << ((8 << vece) - 1)) - 1); 228 TCGv_vec u = tcg_temp_new_vec_matching(t); 229 230 /* Maximum value that can be added to @a without overflow. */ 231 tcg_gen_sub_vec(vece, u, max, a); 232 233 /* Constrain addend so that the next addition never overflows. */ 234 tcg_gen_umin_vec(vece, u, u, b); 235 tcg_gen_add_vec(vece, t, u, a); 236 237 /* Compute QC by comparing the adjusted @b. */ 238 tcg_gen_xor_vec(vece, u, u, b); 239 tcg_gen_or_vec(vece, qc, qc, u); 240 } 241 242 void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs, 243 uint32_t rn_ofs, uint32_t rm_ofs, 244 uint32_t opr_sz, uint32_t max_sz) 245 { 246 static const TCGOpcode vecop_list[] = { 247 INDEX_op_add_vec, INDEX_op_sub_vec, INDEX_op_umin_vec, 0 248 }; 249 static const GVecGen4 ops[4] = { 250 { .fniv = gen_suqadd_vec, 251 .fno = gen_helper_gvec_suqadd_b, 252 .opt_opc = vecop_list, 253 .write_aofs = true, 254 .vece = MO_8 }, 255 { .fniv = gen_suqadd_vec, 256 .fno = gen_helper_gvec_suqadd_h, 257 .opt_opc = vecop_list, 258 .write_aofs = true, 259 .vece = MO_16 }, 260 { .fniv = gen_suqadd_vec, 261 .fno = gen_helper_gvec_suqadd_s, 262 .opt_opc = vecop_list, 263 .write_aofs = true, 264 .vece = MO_32 }, 265 { .fniv = gen_suqadd_vec, 266 .fni8 = gen_suqadd_d, 267 .fno = gen_helper_gvec_suqadd_d, 268 .opt_opc = vecop_list, 269 .write_aofs = true, 270 .vece = MO_64 }, 271 }; 272 273 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 274 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 275 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 276 } 277 278 void gen_usqadd_bhs(TCGv_i64 res, TCGv_i64 qc, 279 TCGv_i64 a, TCGv_i64 b, MemOp esz) 280 { 281 TCGv_i64 max = tcg_constant_i64(MAKE_64BIT_MASK(0, 8 << esz)); 282 TCGv_i64 zero = tcg_constant_i64(0); 283 TCGv_i64 tmp = tcg_temp_new_i64(); 284 285 tcg_gen_add_i64(tmp, a, b); 286 tcg_gen_smin_i64(res, tmp, max); 287 tcg_gen_smax_i64(res, res, zero); 288 tcg_gen_xor_i64(tmp, tmp, res); 289 tcg_gen_or_i64(qc, qc, tmp); 290 } 291 292 void gen_usqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 293 { 294 TCGv_i64 tmp = tcg_temp_new_i64(); 295 TCGv_i64 tneg = tcg_temp_new_i64(); 296 TCGv_i64 tpos = tcg_temp_new_i64(); 297 TCGv_i64 max = tcg_constant_i64(UINT64_MAX); 298 TCGv_i64 zero = tcg_constant_i64(0); 299 300 tcg_gen_add_i64(tmp, a, b); 301 302 /* If @b is positive, saturate if (a + b) < a, aka unsigned overflow. */ 303 tcg_gen_movcond_i64(TCG_COND_LTU, tpos, tmp, a, max, tmp); 304 305 /* If @b is negative, saturate if a < -b, ie subtraction is negative. */ 306 tcg_gen_neg_i64(tneg, b); 307 tcg_gen_movcond_i64(TCG_COND_LTU, tneg, a, tneg, zero, tmp); 308 309 /* Select correct result from sign of @b. */ 310 tcg_gen_movcond_i64(TCG_COND_LT, res, b, zero, tneg, tpos); 311 tcg_gen_xor_i64(tmp, tmp, res); 312 tcg_gen_or_i64(qc, qc, tmp); 313 } 314 315 static void gen_usqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 316 TCGv_vec a, TCGv_vec b) 317 { 318 TCGv_vec u = tcg_temp_new_vec_matching(t); 319 TCGv_vec z = tcg_constant_vec_matching(t, vece, 0); 320 321 /* Compute unsigned saturation of add for +b and sub for -b. */ 322 tcg_gen_neg_vec(vece, t, b); 323 tcg_gen_usadd_vec(vece, u, a, b); 324 tcg_gen_ussub_vec(vece, t, a, t); 325 326 /* Select the correct result depending on the sign of b. */ 327 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, b, z, t, u); 328 329 /* Compute QC by comparing against the non-saturated result. */ 330 tcg_gen_add_vec(vece, u, a, b); 331 tcg_gen_xor_vec(vece, u, u, t); 332 tcg_gen_or_vec(vece, qc, qc, u); 333 } 334 335 void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs, 336 uint32_t rn_ofs, uint32_t rm_ofs, 337 uint32_t opr_sz, uint32_t max_sz) 338 { 339 static const TCGOpcode vecop_list[] = { 340 INDEX_op_neg_vec, INDEX_op_add_vec, 341 INDEX_op_usadd_vec, INDEX_op_ussub_vec, 342 INDEX_op_cmpsel_vec, 0 343 }; 344 static const GVecGen4 ops[4] = { 345 { .fniv = gen_usqadd_vec, 346 .fno = gen_helper_gvec_usqadd_b, 347 .opt_opc = vecop_list, 348 .write_aofs = true, 349 .vece = MO_8 }, 350 { .fniv = gen_usqadd_vec, 351 .fno = gen_helper_gvec_usqadd_h, 352 .opt_opc = vecop_list, 353 .write_aofs = true, 354 .vece = MO_16 }, 355 { .fniv = gen_usqadd_vec, 356 .fno = gen_helper_gvec_usqadd_s, 357 .opt_opc = vecop_list, 358 .write_aofs = true, 359 .vece = MO_32 }, 360 { .fniv = gen_usqadd_vec, 361 .fni8 = gen_usqadd_d, 362 .fno = gen_helper_gvec_usqadd_d, 363 .opt_opc = vecop_list, 364 .write_aofs = true, 365 .vece = MO_64 }, 366 }; 367 368 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 369 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 370 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 371 } 372