1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2018 Linaro, Inc. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "tcg/tcg.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-mo.h" 25 26 /* Reduce the number of ifdefs below. This assumes that all uses of 27 TCGV_HIGH and TCGV_LOW are properly protected by a conditional that 28 the compiler can eliminate. */ 29 #if TCG_TARGET_REG_BITS == 64 30 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64); 31 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); 32 #define TCGV_LOW TCGV_LOW_link_error 33 #define TCGV_HIGH TCGV_HIGH_link_error 34 #endif 35 36 /* 37 * Vector optional opcode tracking. 38 * Except for the basic logical operations (and, or, xor), and 39 * data movement (mov, ld, st, dupi), many vector opcodes are 40 * optional and may not be supported on the host. Thank Intel 41 * for the irregularity in their instruction set. 42 * 43 * The gvec expanders allow custom vector operations to be composed, 44 * generally via the .fniv callback in the GVecGen* structures. At 45 * the same time, in deciding whether to use this hook we need to 46 * know if the host supports the required operations. This is 47 * presented as an array of opcodes, terminated by 0. Each opcode 48 * is assumed to be expanded with the given VECE. 49 * 50 * For debugging, we want to validate this array. Therefore, when 51 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders 52 * will validate that their opcode is present in the list. 53 */ 54 #ifdef CONFIG_DEBUG_TCG 55 void tcg_assert_listed_vecop(TCGOpcode op) 56 { 57 const TCGOpcode *p = tcg_ctx->vecop_list; 58 if (p) { 59 for (; *p; ++p) { 60 if (*p == op) { 61 return; 62 } 63 } 64 g_assert_not_reached(); 65 } 66 } 67 #endif 68 69 bool tcg_can_emit_vecop_list(const TCGOpcode *list, 70 TCGType type, unsigned vece) 71 { 72 if (list == NULL) { 73 return true; 74 } 75 76 for (; *list; ++list) { 77 TCGOpcode opc = *list; 78 79 #ifdef CONFIG_DEBUG_TCG 80 switch (opc) { 81 case INDEX_op_and_vec: 82 case INDEX_op_or_vec: 83 case INDEX_op_xor_vec: 84 case INDEX_op_mov_vec: 85 case INDEX_op_dup_vec: 86 case INDEX_op_dupi_vec: 87 case INDEX_op_dup2_vec: 88 case INDEX_op_ld_vec: 89 case INDEX_op_st_vec: 90 case INDEX_op_bitsel_vec: 91 /* These opcodes are mandatory and should not be listed. */ 92 g_assert_not_reached(); 93 case INDEX_op_not_vec: 94 /* These opcodes have generic expansions using the above. */ 95 g_assert_not_reached(); 96 default: 97 break; 98 } 99 #endif 100 101 if (tcg_can_emit_vec_op(opc, type, vece)) { 102 continue; 103 } 104 105 /* 106 * The opcode list is created by front ends based on what they 107 * actually invoke. We must mirror the logic in the routines 108 * below for generic expansions using other opcodes. 109 */ 110 switch (opc) { 111 case INDEX_op_neg_vec: 112 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) { 113 continue; 114 } 115 break; 116 case INDEX_op_abs_vec: 117 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece) 118 && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0 119 || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0 120 || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) { 121 continue; 122 } 123 break; 124 case INDEX_op_cmpsel_vec: 125 case INDEX_op_smin_vec: 126 case INDEX_op_smax_vec: 127 case INDEX_op_umin_vec: 128 case INDEX_op_umax_vec: 129 if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 130 continue; 131 } 132 break; 133 default: 134 break; 135 } 136 return false; 137 } 138 return true; 139 } 140 141 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) 142 { 143 TCGOp *op = tcg_emit_op(opc); 144 TCGOP_VECL(op) = type - TCG_TYPE_V64; 145 TCGOP_VECE(op) = vece; 146 op->args[0] = r; 147 op->args[1] = a; 148 } 149 150 void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, 151 TCGArg r, TCGArg a, TCGArg b) 152 { 153 TCGOp *op = tcg_emit_op(opc); 154 TCGOP_VECL(op) = type - TCG_TYPE_V64; 155 TCGOP_VECE(op) = vece; 156 op->args[0] = r; 157 op->args[1] = a; 158 op->args[2] = b; 159 } 160 161 void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, 162 TCGArg r, TCGArg a, TCGArg b, TCGArg c) 163 { 164 TCGOp *op = tcg_emit_op(opc); 165 TCGOP_VECL(op) = type - TCG_TYPE_V64; 166 TCGOP_VECE(op) = vece; 167 op->args[0] = r; 168 op->args[1] = a; 169 op->args[2] = b; 170 op->args[3] = c; 171 } 172 173 static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, 174 TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) 175 { 176 TCGOp *op = tcg_emit_op(opc); 177 TCGOP_VECL(op) = type - TCG_TYPE_V64; 178 TCGOP_VECE(op) = vece; 179 op->args[0] = r; 180 op->args[1] = a; 181 op->args[2] = b; 182 op->args[3] = c; 183 op->args[4] = d; 184 op->args[5] = e; 185 } 186 187 static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a) 188 { 189 TCGTemp *rt = tcgv_vec_temp(r); 190 TCGTemp *at = tcgv_vec_temp(a); 191 TCGType type = rt->base_type; 192 193 /* Must enough inputs for the output. */ 194 tcg_debug_assert(at->base_type >= type); 195 vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at)); 196 } 197 198 static void vec_gen_op3(TCGOpcode opc, unsigned vece, 199 TCGv_vec r, TCGv_vec a, TCGv_vec b) 200 { 201 TCGTemp *rt = tcgv_vec_temp(r); 202 TCGTemp *at = tcgv_vec_temp(a); 203 TCGTemp *bt = tcgv_vec_temp(b); 204 TCGType type = rt->base_type; 205 206 /* Must enough inputs for the output. */ 207 tcg_debug_assert(at->base_type >= type); 208 tcg_debug_assert(bt->base_type >= type); 209 vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt)); 210 } 211 212 void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a) 213 { 214 if (r != a) { 215 vec_gen_op2(INDEX_op_mov_vec, 0, r, a); 216 } 217 } 218 219 #define MO_REG (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32) 220 221 static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a) 222 { 223 TCGTemp *rt = tcgv_vec_temp(r); 224 vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a); 225 } 226 227 TCGv_vec tcg_const_zeros_vec(TCGType type) 228 { 229 TCGv_vec ret = tcg_temp_new_vec(type); 230 do_dupi_vec(ret, MO_REG, 0); 231 return ret; 232 } 233 234 TCGv_vec tcg_const_ones_vec(TCGType type) 235 { 236 TCGv_vec ret = tcg_temp_new_vec(type); 237 do_dupi_vec(ret, MO_REG, -1); 238 return ret; 239 } 240 241 TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m) 242 { 243 TCGTemp *t = tcgv_vec_temp(m); 244 return tcg_const_zeros_vec(t->base_type); 245 } 246 247 TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m) 248 { 249 TCGTemp *t = tcgv_vec_temp(m); 250 return tcg_const_ones_vec(t->base_type); 251 } 252 253 void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a) 254 { 255 if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) { 256 do_dupi_vec(r, MO_32, a); 257 } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) { 258 do_dupi_vec(r, MO_64, a); 259 } else { 260 TCGv_i64 c = tcg_const_i64(a); 261 tcg_gen_dup_i64_vec(MO_64, r, c); 262 tcg_temp_free_i64(c); 263 } 264 } 265 266 void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a) 267 { 268 do_dupi_vec(r, MO_REG, dup_const(MO_32, a)); 269 } 270 271 void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a) 272 { 273 do_dupi_vec(r, MO_REG, dup_const(MO_16, a)); 274 } 275 276 void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a) 277 { 278 do_dupi_vec(r, MO_REG, dup_const(MO_8, a)); 279 } 280 281 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a) 282 { 283 do_dupi_vec(r, MO_REG, dup_const(vece, a)); 284 } 285 286 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a) 287 { 288 TCGArg ri = tcgv_vec_arg(r); 289 TCGTemp *rt = arg_temp(ri); 290 TCGType type = rt->base_type; 291 292 if (TCG_TARGET_REG_BITS == 64) { 293 TCGArg ai = tcgv_i64_arg(a); 294 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 295 } else if (vece == MO_64) { 296 TCGArg al = tcgv_i32_arg(TCGV_LOW(a)); 297 TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a)); 298 vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah); 299 } else { 300 TCGArg ai = tcgv_i32_arg(TCGV_LOW(a)); 301 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 302 } 303 } 304 305 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a) 306 { 307 TCGArg ri = tcgv_vec_arg(r); 308 TCGArg ai = tcgv_i32_arg(a); 309 TCGTemp *rt = arg_temp(ri); 310 TCGType type = rt->base_type; 311 312 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 313 } 314 315 void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b, 316 tcg_target_long ofs) 317 { 318 TCGArg ri = tcgv_vec_arg(r); 319 TCGArg bi = tcgv_ptr_arg(b); 320 TCGTemp *rt = arg_temp(ri); 321 TCGType type = rt->base_type; 322 323 vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs); 324 } 325 326 static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o) 327 { 328 TCGArg ri = tcgv_vec_arg(r); 329 TCGArg bi = tcgv_ptr_arg(b); 330 TCGTemp *rt = arg_temp(ri); 331 TCGType type = rt->base_type; 332 333 vec_gen_3(opc, type, 0, ri, bi, o); 334 } 335 336 void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 337 { 338 vec_gen_ldst(INDEX_op_ld_vec, r, b, o); 339 } 340 341 void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 342 { 343 vec_gen_ldst(INDEX_op_st_vec, r, b, o); 344 } 345 346 void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type) 347 { 348 TCGArg ri = tcgv_vec_arg(r); 349 TCGArg bi = tcgv_ptr_arg(b); 350 TCGTemp *rt = arg_temp(ri); 351 TCGType type = rt->base_type; 352 353 tcg_debug_assert(low_type >= TCG_TYPE_V64); 354 tcg_debug_assert(low_type <= type); 355 vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o); 356 } 357 358 void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 359 { 360 vec_gen_op3(INDEX_op_and_vec, 0, r, a, b); 361 } 362 363 void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 364 { 365 vec_gen_op3(INDEX_op_or_vec, 0, r, a, b); 366 } 367 368 void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 369 { 370 vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b); 371 } 372 373 void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 374 { 375 if (TCG_TARGET_HAS_andc_vec) { 376 vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b); 377 } else { 378 TCGv_vec t = tcg_temp_new_vec_matching(r); 379 tcg_gen_not_vec(0, t, b); 380 tcg_gen_and_vec(0, r, a, t); 381 tcg_temp_free_vec(t); 382 } 383 } 384 385 void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 386 { 387 if (TCG_TARGET_HAS_orc_vec) { 388 vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b); 389 } else { 390 TCGv_vec t = tcg_temp_new_vec_matching(r); 391 tcg_gen_not_vec(0, t, b); 392 tcg_gen_or_vec(0, r, a, t); 393 tcg_temp_free_vec(t); 394 } 395 } 396 397 void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 398 { 399 /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */ 400 tcg_gen_and_vec(0, r, a, b); 401 tcg_gen_not_vec(0, r, r); 402 } 403 404 void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 405 { 406 /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */ 407 tcg_gen_or_vec(0, r, a, b); 408 tcg_gen_not_vec(0, r, r); 409 } 410 411 void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 412 { 413 /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */ 414 tcg_gen_xor_vec(0, r, a, b); 415 tcg_gen_not_vec(0, r, r); 416 } 417 418 static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) 419 { 420 TCGTemp *rt = tcgv_vec_temp(r); 421 TCGTemp *at = tcgv_vec_temp(a); 422 TCGArg ri = temp_arg(rt); 423 TCGArg ai = temp_arg(at); 424 TCGType type = rt->base_type; 425 int can; 426 427 tcg_debug_assert(at->base_type >= type); 428 tcg_assert_listed_vecop(opc); 429 can = tcg_can_emit_vec_op(opc, type, vece); 430 if (can > 0) { 431 vec_gen_2(opc, type, vece, ri, ai); 432 } else if (can < 0) { 433 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 434 tcg_expand_vec_op(opc, type, vece, ri, ai); 435 tcg_swap_vecop_list(hold_list); 436 } else { 437 return false; 438 } 439 return true; 440 } 441 442 void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 443 { 444 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 445 446 if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) { 447 TCGv_vec t = tcg_const_ones_vec_matching(r); 448 tcg_gen_xor_vec(0, r, a, t); 449 tcg_temp_free_vec(t); 450 } 451 tcg_swap_vecop_list(hold_list); 452 } 453 454 void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 455 { 456 const TCGOpcode *hold_list; 457 458 tcg_assert_listed_vecop(INDEX_op_neg_vec); 459 hold_list = tcg_swap_vecop_list(NULL); 460 461 if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) { 462 TCGv_vec t = tcg_const_zeros_vec_matching(r); 463 tcg_gen_sub_vec(vece, r, t, a); 464 tcg_temp_free_vec(t); 465 } 466 tcg_swap_vecop_list(hold_list); 467 } 468 469 void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 470 { 471 const TCGOpcode *hold_list; 472 473 tcg_assert_listed_vecop(INDEX_op_abs_vec); 474 hold_list = tcg_swap_vecop_list(NULL); 475 476 if (!do_op2(vece, r, a, INDEX_op_abs_vec)) { 477 TCGType type = tcgv_vec_temp(r)->base_type; 478 TCGv_vec t = tcg_temp_new_vec(type); 479 480 tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)); 481 if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) { 482 tcg_gen_neg_vec(vece, t, a); 483 tcg_gen_smax_vec(vece, r, a, t); 484 } else { 485 if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) { 486 tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1); 487 } else { 488 do_dupi_vec(t, MO_REG, 0); 489 tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t); 490 } 491 tcg_gen_xor_vec(vece, r, a, t); 492 tcg_gen_sub_vec(vece, r, r, t); 493 } 494 495 tcg_temp_free_vec(t); 496 } 497 tcg_swap_vecop_list(hold_list); 498 } 499 500 static void do_shifti(TCGOpcode opc, unsigned vece, 501 TCGv_vec r, TCGv_vec a, int64_t i) 502 { 503 TCGTemp *rt = tcgv_vec_temp(r); 504 TCGTemp *at = tcgv_vec_temp(a); 505 TCGArg ri = temp_arg(rt); 506 TCGArg ai = temp_arg(at); 507 TCGType type = rt->base_type; 508 int can; 509 510 tcg_debug_assert(at->base_type == type); 511 tcg_debug_assert(i >= 0 && i < (8 << vece)); 512 tcg_assert_listed_vecop(opc); 513 514 if (i == 0) { 515 tcg_gen_mov_vec(r, a); 516 return; 517 } 518 519 can = tcg_can_emit_vec_op(opc, type, vece); 520 if (can > 0) { 521 vec_gen_3(opc, type, vece, ri, ai, i); 522 } else { 523 /* We leave the choice of expansion via scalar or vector shift 524 to the target. Often, but not always, dupi can feed a vector 525 shift easier than a scalar. */ 526 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 527 tcg_debug_assert(can < 0); 528 tcg_expand_vec_op(opc, type, vece, ri, ai, i); 529 tcg_swap_vecop_list(hold_list); 530 } 531 } 532 533 void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 534 { 535 do_shifti(INDEX_op_shli_vec, vece, r, a, i); 536 } 537 538 void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 539 { 540 do_shifti(INDEX_op_shri_vec, vece, r, a, i); 541 } 542 543 void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 544 { 545 do_shifti(INDEX_op_sari_vec, vece, r, a, i); 546 } 547 548 void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 549 { 550 do_shifti(INDEX_op_rotli_vec, vece, r, a, i); 551 } 552 553 void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 554 { 555 int bits = 8 << vece; 556 tcg_debug_assert(i >= 0 && i < bits); 557 do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1)); 558 } 559 560 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, 561 TCGv_vec r, TCGv_vec a, TCGv_vec b) 562 { 563 TCGTemp *rt = tcgv_vec_temp(r); 564 TCGTemp *at = tcgv_vec_temp(a); 565 TCGTemp *bt = tcgv_vec_temp(b); 566 TCGArg ri = temp_arg(rt); 567 TCGArg ai = temp_arg(at); 568 TCGArg bi = temp_arg(bt); 569 TCGType type = rt->base_type; 570 int can; 571 572 tcg_debug_assert(at->base_type >= type); 573 tcg_debug_assert(bt->base_type >= type); 574 tcg_assert_listed_vecop(INDEX_op_cmp_vec); 575 can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); 576 if (can > 0) { 577 vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 578 } else { 579 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 580 tcg_debug_assert(can < 0); 581 tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 582 tcg_swap_vecop_list(hold_list); 583 } 584 } 585 586 static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, 587 TCGv_vec b, TCGOpcode opc) 588 { 589 TCGTemp *rt = tcgv_vec_temp(r); 590 TCGTemp *at = tcgv_vec_temp(a); 591 TCGTemp *bt = tcgv_vec_temp(b); 592 TCGArg ri = temp_arg(rt); 593 TCGArg ai = temp_arg(at); 594 TCGArg bi = temp_arg(bt); 595 TCGType type = rt->base_type; 596 int can; 597 598 tcg_debug_assert(at->base_type >= type); 599 tcg_debug_assert(bt->base_type >= type); 600 tcg_assert_listed_vecop(opc); 601 can = tcg_can_emit_vec_op(opc, type, vece); 602 if (can > 0) { 603 vec_gen_3(opc, type, vece, ri, ai, bi); 604 } else if (can < 0) { 605 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 606 tcg_expand_vec_op(opc, type, vece, ri, ai, bi); 607 tcg_swap_vecop_list(hold_list); 608 } else { 609 return false; 610 } 611 return true; 612 } 613 614 static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a, 615 TCGv_vec b, TCGOpcode opc) 616 { 617 bool ok = do_op3(vece, r, a, b, opc); 618 tcg_debug_assert(ok); 619 } 620 621 void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 622 { 623 do_op3_nofail(vece, r, a, b, INDEX_op_add_vec); 624 } 625 626 void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 627 { 628 do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec); 629 } 630 631 void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 632 { 633 do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec); 634 } 635 636 void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 637 { 638 do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec); 639 } 640 641 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 642 { 643 do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec); 644 } 645 646 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 647 { 648 do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec); 649 } 650 651 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 652 { 653 do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec); 654 } 655 656 static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, 657 TCGv_vec b, TCGOpcode opc, TCGCond cond) 658 { 659 if (!do_op3(vece, r, a, b, opc)) { 660 tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b); 661 } 662 } 663 664 void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 665 { 666 do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT); 667 } 668 669 void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 670 { 671 do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU); 672 } 673 674 void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 675 { 676 do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT); 677 } 678 679 void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 680 { 681 do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU); 682 } 683 684 void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 685 { 686 do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec); 687 } 688 689 void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 690 { 691 do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec); 692 } 693 694 void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 695 { 696 do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec); 697 } 698 699 void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 700 { 701 do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec); 702 } 703 704 void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 705 { 706 do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec); 707 } 708 709 static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, 710 TCGv_i32 s, TCGOpcode opc) 711 { 712 TCGTemp *rt = tcgv_vec_temp(r); 713 TCGTemp *at = tcgv_vec_temp(a); 714 TCGTemp *st = tcgv_i32_temp(s); 715 TCGArg ri = temp_arg(rt); 716 TCGArg ai = temp_arg(at); 717 TCGArg si = temp_arg(st); 718 TCGType type = rt->base_type; 719 int can; 720 721 tcg_debug_assert(at->base_type >= type); 722 tcg_assert_listed_vecop(opc); 723 can = tcg_can_emit_vec_op(opc, type, vece); 724 if (can > 0) { 725 vec_gen_3(opc, type, vece, ri, ai, si); 726 } else if (can < 0) { 727 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 728 tcg_expand_vec_op(opc, type, vece, ri, ai, si); 729 tcg_swap_vecop_list(hold_list); 730 } else { 731 g_assert_not_reached(); 732 } 733 } 734 735 void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 736 { 737 do_shifts(vece, r, a, b, INDEX_op_shls_vec); 738 } 739 740 void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 741 { 742 do_shifts(vece, r, a, b, INDEX_op_shrs_vec); 743 } 744 745 void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 746 { 747 do_shifts(vece, r, a, b, INDEX_op_sars_vec); 748 } 749 750 void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s) 751 { 752 do_shifts(vece, r, a, s, INDEX_op_rotls_vec); 753 } 754 755 void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, 756 TCGv_vec b, TCGv_vec c) 757 { 758 TCGTemp *rt = tcgv_vec_temp(r); 759 TCGTemp *at = tcgv_vec_temp(a); 760 TCGTemp *bt = tcgv_vec_temp(b); 761 TCGTemp *ct = tcgv_vec_temp(c); 762 TCGType type = rt->base_type; 763 764 tcg_debug_assert(at->base_type >= type); 765 tcg_debug_assert(bt->base_type >= type); 766 tcg_debug_assert(ct->base_type >= type); 767 768 if (TCG_TARGET_HAS_bitsel_vec) { 769 vec_gen_4(INDEX_op_bitsel_vec, type, MO_8, 770 temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct)); 771 } else { 772 TCGv_vec t = tcg_temp_new_vec(type); 773 tcg_gen_and_vec(MO_8, t, a, b); 774 tcg_gen_andc_vec(MO_8, r, c, a); 775 tcg_gen_or_vec(MO_8, r, r, t); 776 tcg_temp_free_vec(t); 777 } 778 } 779 780 void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, 781 TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d) 782 { 783 TCGTemp *rt = tcgv_vec_temp(r); 784 TCGTemp *at = tcgv_vec_temp(a); 785 TCGTemp *bt = tcgv_vec_temp(b); 786 TCGTemp *ct = tcgv_vec_temp(c); 787 TCGTemp *dt = tcgv_vec_temp(d); 788 TCGArg ri = temp_arg(rt); 789 TCGArg ai = temp_arg(at); 790 TCGArg bi = temp_arg(bt); 791 TCGArg ci = temp_arg(ct); 792 TCGArg di = temp_arg(dt); 793 TCGType type = rt->base_type; 794 const TCGOpcode *hold_list; 795 int can; 796 797 tcg_debug_assert(at->base_type >= type); 798 tcg_debug_assert(bt->base_type >= type); 799 tcg_debug_assert(ct->base_type >= type); 800 tcg_debug_assert(dt->base_type >= type); 801 802 tcg_assert_listed_vecop(INDEX_op_cmpsel_vec); 803 hold_list = tcg_swap_vecop_list(NULL); 804 can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece); 805 806 if (can > 0) { 807 vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond); 808 } else if (can < 0) { 809 tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece, 810 ri, ai, bi, ci, di, cond); 811 } else { 812 TCGv_vec t = tcg_temp_new_vec(type); 813 tcg_gen_cmp_vec(cond, vece, t, a, b); 814 tcg_gen_bitsel_vec(vece, r, t, c, d); 815 tcg_temp_free_vec(t); 816 } 817 tcg_swap_vecop_list(hold_list); 818 } 819