1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2018 Linaro, Inc. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu-common.h" 22 #include "cpu.h" 23 #include "tcg.h" 24 #include "tcg-op.h" 25 #include "tcg-mo.h" 26 27 /* Reduce the number of ifdefs below. This assumes that all uses of 28 TCGV_HIGH and TCGV_LOW are properly protected by a conditional that 29 the compiler can eliminate. */ 30 #if TCG_TARGET_REG_BITS == 64 31 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64); 32 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); 33 #define TCGV_LOW TCGV_LOW_link_error 34 #define TCGV_HIGH TCGV_HIGH_link_error 35 #endif 36 37 /* 38 * Vector optional opcode tracking. 39 * Except for the basic logical operations (and, or, xor), and 40 * data movement (mov, ld, st, dupi), many vector opcodes are 41 * optional and may not be supported on the host. Thank Intel 42 * for the irregularity in their instruction set. 43 * 44 * The gvec expanders allow custom vector operations to be composed, 45 * generally via the .fniv callback in the GVecGen* structures. At 46 * the same time, in deciding whether to use this hook we need to 47 * know if the host supports the required operations. This is 48 * presented as an array of opcodes, terminated by 0. Each opcode 49 * is assumed to be expanded with the given VECE. 50 * 51 * For debugging, we want to validate this array. Therefore, when 52 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders 53 * will validate that their opcode is present in the list. 54 */ 55 #ifdef CONFIG_DEBUG_TCG 56 void tcg_assert_listed_vecop(TCGOpcode op) 57 { 58 const TCGOpcode *p = tcg_ctx->vecop_list; 59 if (p) { 60 for (; *p; ++p) { 61 if (*p == op) { 62 return; 63 } 64 } 65 g_assert_not_reached(); 66 } 67 } 68 #endif 69 70 bool tcg_can_emit_vecop_list(const TCGOpcode *list, 71 TCGType type, unsigned vece) 72 { 73 if (list == NULL) { 74 return true; 75 } 76 77 for (; *list; ++list) { 78 TCGOpcode opc = *list; 79 80 #ifdef CONFIG_DEBUG_TCG 81 switch (opc) { 82 case INDEX_op_and_vec: 83 case INDEX_op_or_vec: 84 case INDEX_op_xor_vec: 85 case INDEX_op_mov_vec: 86 case INDEX_op_dup_vec: 87 case INDEX_op_dupi_vec: 88 case INDEX_op_dup2_vec: 89 case INDEX_op_ld_vec: 90 case INDEX_op_st_vec: 91 case INDEX_op_bitsel_vec: 92 /* These opcodes are mandatory and should not be listed. */ 93 g_assert_not_reached(); 94 default: 95 break; 96 } 97 #endif 98 99 if (tcg_can_emit_vec_op(opc, type, vece)) { 100 continue; 101 } 102 103 /* 104 * The opcode list is created by front ends based on what they 105 * actually invoke. We must mirror the logic in the routines 106 * below for generic expansions using other opcodes. 107 */ 108 switch (opc) { 109 case INDEX_op_neg_vec: 110 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) { 111 continue; 112 } 113 break; 114 case INDEX_op_abs_vec: 115 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece) 116 && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0 117 || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0 118 || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) { 119 continue; 120 } 121 break; 122 case INDEX_op_cmpsel_vec: 123 case INDEX_op_smin_vec: 124 case INDEX_op_smax_vec: 125 case INDEX_op_umin_vec: 126 case INDEX_op_umax_vec: 127 if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 128 continue; 129 } 130 break; 131 default: 132 break; 133 } 134 return false; 135 } 136 return true; 137 } 138 139 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) 140 { 141 TCGOp *op = tcg_emit_op(opc); 142 TCGOP_VECL(op) = type - TCG_TYPE_V64; 143 TCGOP_VECE(op) = vece; 144 op->args[0] = r; 145 op->args[1] = a; 146 } 147 148 void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, 149 TCGArg r, TCGArg a, TCGArg b) 150 { 151 TCGOp *op = tcg_emit_op(opc); 152 TCGOP_VECL(op) = type - TCG_TYPE_V64; 153 TCGOP_VECE(op) = vece; 154 op->args[0] = r; 155 op->args[1] = a; 156 op->args[2] = b; 157 } 158 159 void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, 160 TCGArg r, TCGArg a, TCGArg b, TCGArg c) 161 { 162 TCGOp *op = tcg_emit_op(opc); 163 TCGOP_VECL(op) = type - TCG_TYPE_V64; 164 TCGOP_VECE(op) = vece; 165 op->args[0] = r; 166 op->args[1] = a; 167 op->args[2] = b; 168 op->args[3] = c; 169 } 170 171 static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, 172 TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) 173 { 174 TCGOp *op = tcg_emit_op(opc); 175 TCGOP_VECL(op) = type - TCG_TYPE_V64; 176 TCGOP_VECE(op) = vece; 177 op->args[0] = r; 178 op->args[1] = a; 179 op->args[2] = b; 180 op->args[3] = c; 181 op->args[4] = d; 182 op->args[5] = e; 183 } 184 185 static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a) 186 { 187 TCGTemp *rt = tcgv_vec_temp(r); 188 TCGTemp *at = tcgv_vec_temp(a); 189 TCGType type = rt->base_type; 190 191 /* Must enough inputs for the output. */ 192 tcg_debug_assert(at->base_type >= type); 193 vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at)); 194 } 195 196 static void vec_gen_op3(TCGOpcode opc, unsigned vece, 197 TCGv_vec r, TCGv_vec a, TCGv_vec b) 198 { 199 TCGTemp *rt = tcgv_vec_temp(r); 200 TCGTemp *at = tcgv_vec_temp(a); 201 TCGTemp *bt = tcgv_vec_temp(b); 202 TCGType type = rt->base_type; 203 204 /* Must enough inputs for the output. */ 205 tcg_debug_assert(at->base_type >= type); 206 tcg_debug_assert(bt->base_type >= type); 207 vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt)); 208 } 209 210 void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a) 211 { 212 if (r != a) { 213 vec_gen_op2(INDEX_op_mov_vec, 0, r, a); 214 } 215 } 216 217 #define MO_REG (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32) 218 219 static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a) 220 { 221 TCGTemp *rt = tcgv_vec_temp(r); 222 vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a); 223 } 224 225 TCGv_vec tcg_const_zeros_vec(TCGType type) 226 { 227 TCGv_vec ret = tcg_temp_new_vec(type); 228 do_dupi_vec(ret, MO_REG, 0); 229 return ret; 230 } 231 232 TCGv_vec tcg_const_ones_vec(TCGType type) 233 { 234 TCGv_vec ret = tcg_temp_new_vec(type); 235 do_dupi_vec(ret, MO_REG, -1); 236 return ret; 237 } 238 239 TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m) 240 { 241 TCGTemp *t = tcgv_vec_temp(m); 242 return tcg_const_zeros_vec(t->base_type); 243 } 244 245 TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m) 246 { 247 TCGTemp *t = tcgv_vec_temp(m); 248 return tcg_const_ones_vec(t->base_type); 249 } 250 251 void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a) 252 { 253 if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) { 254 do_dupi_vec(r, MO_32, a); 255 } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) { 256 do_dupi_vec(r, MO_64, a); 257 } else { 258 TCGv_i64 c = tcg_const_i64(a); 259 tcg_gen_dup_i64_vec(MO_64, r, c); 260 tcg_temp_free_i64(c); 261 } 262 } 263 264 void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a) 265 { 266 do_dupi_vec(r, MO_REG, dup_const(MO_32, a)); 267 } 268 269 void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a) 270 { 271 do_dupi_vec(r, MO_REG, dup_const(MO_16, a)); 272 } 273 274 void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a) 275 { 276 do_dupi_vec(r, MO_REG, dup_const(MO_8, a)); 277 } 278 279 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a) 280 { 281 do_dupi_vec(r, MO_REG, dup_const(vece, a)); 282 } 283 284 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a) 285 { 286 TCGArg ri = tcgv_vec_arg(r); 287 TCGTemp *rt = arg_temp(ri); 288 TCGType type = rt->base_type; 289 290 if (TCG_TARGET_REG_BITS == 64) { 291 TCGArg ai = tcgv_i64_arg(a); 292 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 293 } else if (vece == MO_64) { 294 TCGArg al = tcgv_i32_arg(TCGV_LOW(a)); 295 TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a)); 296 vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah); 297 } else { 298 TCGArg ai = tcgv_i32_arg(TCGV_LOW(a)); 299 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 300 } 301 } 302 303 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a) 304 { 305 TCGArg ri = tcgv_vec_arg(r); 306 TCGArg ai = tcgv_i32_arg(a); 307 TCGTemp *rt = arg_temp(ri); 308 TCGType type = rt->base_type; 309 310 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 311 } 312 313 void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b, 314 tcg_target_long ofs) 315 { 316 TCGArg ri = tcgv_vec_arg(r); 317 TCGArg bi = tcgv_ptr_arg(b); 318 TCGTemp *rt = arg_temp(ri); 319 TCGType type = rt->base_type; 320 321 vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs); 322 } 323 324 static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o) 325 { 326 TCGArg ri = tcgv_vec_arg(r); 327 TCGArg bi = tcgv_ptr_arg(b); 328 TCGTemp *rt = arg_temp(ri); 329 TCGType type = rt->base_type; 330 331 vec_gen_3(opc, type, 0, ri, bi, o); 332 } 333 334 void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 335 { 336 vec_gen_ldst(INDEX_op_ld_vec, r, b, o); 337 } 338 339 void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 340 { 341 vec_gen_ldst(INDEX_op_st_vec, r, b, o); 342 } 343 344 void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type) 345 { 346 TCGArg ri = tcgv_vec_arg(r); 347 TCGArg bi = tcgv_ptr_arg(b); 348 TCGTemp *rt = arg_temp(ri); 349 TCGType type = rt->base_type; 350 351 tcg_debug_assert(low_type >= TCG_TYPE_V64); 352 tcg_debug_assert(low_type <= type); 353 vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o); 354 } 355 356 void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 357 { 358 vec_gen_op3(INDEX_op_and_vec, 0, r, a, b); 359 } 360 361 void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 362 { 363 vec_gen_op3(INDEX_op_or_vec, 0, r, a, b); 364 } 365 366 void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 367 { 368 vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b); 369 } 370 371 void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 372 { 373 if (TCG_TARGET_HAS_andc_vec) { 374 vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b); 375 } else { 376 TCGv_vec t = tcg_temp_new_vec_matching(r); 377 tcg_gen_not_vec(0, t, b); 378 tcg_gen_and_vec(0, r, a, t); 379 tcg_temp_free_vec(t); 380 } 381 } 382 383 void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 384 { 385 if (TCG_TARGET_HAS_orc_vec) { 386 vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b); 387 } else { 388 TCGv_vec t = tcg_temp_new_vec_matching(r); 389 tcg_gen_not_vec(0, t, b); 390 tcg_gen_or_vec(0, r, a, t); 391 tcg_temp_free_vec(t); 392 } 393 } 394 395 void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 396 { 397 /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */ 398 tcg_gen_and_vec(0, r, a, b); 399 tcg_gen_not_vec(0, r, r); 400 } 401 402 void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 403 { 404 /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */ 405 tcg_gen_or_vec(0, r, a, b); 406 tcg_gen_not_vec(0, r, r); 407 } 408 409 void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 410 { 411 /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */ 412 tcg_gen_xor_vec(0, r, a, b); 413 tcg_gen_not_vec(0, r, r); 414 } 415 416 static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) 417 { 418 TCGTemp *rt = tcgv_vec_temp(r); 419 TCGTemp *at = tcgv_vec_temp(a); 420 TCGArg ri = temp_arg(rt); 421 TCGArg ai = temp_arg(at); 422 TCGType type = rt->base_type; 423 int can; 424 425 tcg_debug_assert(at->base_type >= type); 426 tcg_assert_listed_vecop(opc); 427 can = tcg_can_emit_vec_op(opc, type, vece); 428 if (can > 0) { 429 vec_gen_2(opc, type, vece, ri, ai); 430 } else if (can < 0) { 431 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 432 tcg_expand_vec_op(opc, type, vece, ri, ai); 433 tcg_swap_vecop_list(hold_list); 434 } else { 435 return false; 436 } 437 return true; 438 } 439 440 void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 441 { 442 if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) { 443 TCGv_vec t = tcg_const_ones_vec_matching(r); 444 tcg_gen_xor_vec(0, r, a, t); 445 tcg_temp_free_vec(t); 446 } 447 } 448 449 void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 450 { 451 const TCGOpcode *hold_list; 452 453 tcg_assert_listed_vecop(INDEX_op_neg_vec); 454 hold_list = tcg_swap_vecop_list(NULL); 455 456 if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) { 457 TCGv_vec t = tcg_const_zeros_vec_matching(r); 458 tcg_gen_sub_vec(vece, r, t, a); 459 tcg_temp_free_vec(t); 460 } 461 tcg_swap_vecop_list(hold_list); 462 } 463 464 void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 465 { 466 const TCGOpcode *hold_list; 467 468 tcg_assert_listed_vecop(INDEX_op_abs_vec); 469 hold_list = tcg_swap_vecop_list(NULL); 470 471 if (!do_op2(vece, r, a, INDEX_op_abs_vec)) { 472 TCGType type = tcgv_vec_temp(r)->base_type; 473 TCGv_vec t = tcg_temp_new_vec(type); 474 475 tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)); 476 if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) { 477 tcg_gen_neg_vec(vece, t, a); 478 tcg_gen_smax_vec(vece, r, a, t); 479 } else { 480 if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) { 481 tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1); 482 } else { 483 do_dupi_vec(t, MO_REG, 0); 484 tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t); 485 } 486 tcg_gen_xor_vec(vece, r, a, t); 487 tcg_gen_sub_vec(vece, r, r, t); 488 } 489 490 tcg_temp_free_vec(t); 491 } 492 tcg_swap_vecop_list(hold_list); 493 } 494 495 static void do_shifti(TCGOpcode opc, unsigned vece, 496 TCGv_vec r, TCGv_vec a, int64_t i) 497 { 498 TCGTemp *rt = tcgv_vec_temp(r); 499 TCGTemp *at = tcgv_vec_temp(a); 500 TCGArg ri = temp_arg(rt); 501 TCGArg ai = temp_arg(at); 502 TCGType type = rt->base_type; 503 int can; 504 505 tcg_debug_assert(at->base_type == type); 506 tcg_debug_assert(i >= 0 && i < (8 << vece)); 507 tcg_assert_listed_vecop(opc); 508 509 if (i == 0) { 510 tcg_gen_mov_vec(r, a); 511 return; 512 } 513 514 can = tcg_can_emit_vec_op(opc, type, vece); 515 if (can > 0) { 516 vec_gen_3(opc, type, vece, ri, ai, i); 517 } else { 518 /* We leave the choice of expansion via scalar or vector shift 519 to the target. Often, but not always, dupi can feed a vector 520 shift easier than a scalar. */ 521 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 522 tcg_debug_assert(can < 0); 523 tcg_expand_vec_op(opc, type, vece, ri, ai, i); 524 tcg_swap_vecop_list(hold_list); 525 } 526 } 527 528 void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 529 { 530 do_shifti(INDEX_op_shli_vec, vece, r, a, i); 531 } 532 533 void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 534 { 535 do_shifti(INDEX_op_shri_vec, vece, r, a, i); 536 } 537 538 void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 539 { 540 do_shifti(INDEX_op_sari_vec, vece, r, a, i); 541 } 542 543 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, 544 TCGv_vec r, TCGv_vec a, TCGv_vec b) 545 { 546 TCGTemp *rt = tcgv_vec_temp(r); 547 TCGTemp *at = tcgv_vec_temp(a); 548 TCGTemp *bt = tcgv_vec_temp(b); 549 TCGArg ri = temp_arg(rt); 550 TCGArg ai = temp_arg(at); 551 TCGArg bi = temp_arg(bt); 552 TCGType type = rt->base_type; 553 int can; 554 555 tcg_debug_assert(at->base_type >= type); 556 tcg_debug_assert(bt->base_type >= type); 557 tcg_assert_listed_vecop(INDEX_op_cmp_vec); 558 can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); 559 if (can > 0) { 560 vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 561 } else { 562 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 563 tcg_debug_assert(can < 0); 564 tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 565 tcg_swap_vecop_list(hold_list); 566 } 567 } 568 569 static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, 570 TCGv_vec b, TCGOpcode opc) 571 { 572 TCGTemp *rt = tcgv_vec_temp(r); 573 TCGTemp *at = tcgv_vec_temp(a); 574 TCGTemp *bt = tcgv_vec_temp(b); 575 TCGArg ri = temp_arg(rt); 576 TCGArg ai = temp_arg(at); 577 TCGArg bi = temp_arg(bt); 578 TCGType type = rt->base_type; 579 int can; 580 581 tcg_debug_assert(at->base_type >= type); 582 tcg_debug_assert(bt->base_type >= type); 583 tcg_assert_listed_vecop(opc); 584 can = tcg_can_emit_vec_op(opc, type, vece); 585 if (can > 0) { 586 vec_gen_3(opc, type, vece, ri, ai, bi); 587 } else if (can < 0) { 588 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 589 tcg_expand_vec_op(opc, type, vece, ri, ai, bi); 590 tcg_swap_vecop_list(hold_list); 591 } else { 592 return false; 593 } 594 return true; 595 } 596 597 static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a, 598 TCGv_vec b, TCGOpcode opc) 599 { 600 bool ok = do_op3(vece, r, a, b, opc); 601 tcg_debug_assert(ok); 602 } 603 604 void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 605 { 606 do_op3_nofail(vece, r, a, b, INDEX_op_add_vec); 607 } 608 609 void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 610 { 611 do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec); 612 } 613 614 void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 615 { 616 do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec); 617 } 618 619 void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 620 { 621 do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec); 622 } 623 624 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 625 { 626 do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec); 627 } 628 629 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 630 { 631 do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec); 632 } 633 634 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 635 { 636 do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec); 637 } 638 639 static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, 640 TCGv_vec b, TCGOpcode opc, TCGCond cond) 641 { 642 if (!do_op3(vece, r, a, b, opc)) { 643 tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b); 644 } 645 } 646 647 void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 648 { 649 do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT); 650 } 651 652 void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 653 { 654 do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU); 655 } 656 657 void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 658 { 659 do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT); 660 } 661 662 void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 663 { 664 do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU); 665 } 666 667 void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 668 { 669 do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec); 670 } 671 672 void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 673 { 674 do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec); 675 } 676 677 void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 678 { 679 do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec); 680 } 681 682 static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, 683 TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v) 684 { 685 TCGTemp *rt = tcgv_vec_temp(r); 686 TCGTemp *at = tcgv_vec_temp(a); 687 TCGTemp *st = tcgv_i32_temp(s); 688 TCGArg ri = temp_arg(rt); 689 TCGArg ai = temp_arg(at); 690 TCGArg si = temp_arg(st); 691 TCGType type = rt->base_type; 692 const TCGOpcode *hold_list; 693 int can; 694 695 tcg_debug_assert(at->base_type >= type); 696 tcg_assert_listed_vecop(opc_s); 697 hold_list = tcg_swap_vecop_list(NULL); 698 699 can = tcg_can_emit_vec_op(opc_s, type, vece); 700 if (can > 0) { 701 vec_gen_3(opc_s, type, vece, ri, ai, si); 702 } else if (can < 0) { 703 tcg_expand_vec_op(opc_s, type, vece, ri, ai, si); 704 } else { 705 TCGv_vec vec_s = tcg_temp_new_vec(type); 706 707 if (vece == MO_64) { 708 TCGv_i64 s64 = tcg_temp_new_i64(); 709 tcg_gen_extu_i32_i64(s64, s); 710 tcg_gen_dup_i64_vec(MO_64, vec_s, s64); 711 tcg_temp_free_i64(s64); 712 } else { 713 tcg_gen_dup_i32_vec(vece, vec_s, s); 714 } 715 do_op3_nofail(vece, r, a, vec_s, opc_v); 716 tcg_temp_free_vec(vec_s); 717 } 718 tcg_swap_vecop_list(hold_list); 719 } 720 721 void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 722 { 723 do_shifts(vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec); 724 } 725 726 void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 727 { 728 do_shifts(vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec); 729 } 730 731 void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 732 { 733 do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec); 734 } 735 736 void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, 737 TCGv_vec b, TCGv_vec c) 738 { 739 TCGTemp *rt = tcgv_vec_temp(r); 740 TCGTemp *at = tcgv_vec_temp(a); 741 TCGTemp *bt = tcgv_vec_temp(b); 742 TCGTemp *ct = tcgv_vec_temp(c); 743 TCGType type = rt->base_type; 744 745 tcg_debug_assert(at->base_type >= type); 746 tcg_debug_assert(bt->base_type >= type); 747 tcg_debug_assert(ct->base_type >= type); 748 749 if (TCG_TARGET_HAS_bitsel_vec) { 750 vec_gen_4(INDEX_op_bitsel_vec, type, MO_8, 751 temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct)); 752 } else { 753 TCGv_vec t = tcg_temp_new_vec(type); 754 tcg_gen_and_vec(MO_8, t, a, b); 755 tcg_gen_andc_vec(MO_8, r, c, a); 756 tcg_gen_or_vec(MO_8, r, r, t); 757 tcg_temp_free_vec(t); 758 } 759 } 760 761 void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, 762 TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d) 763 { 764 TCGTemp *rt = tcgv_vec_temp(r); 765 TCGTemp *at = tcgv_vec_temp(a); 766 TCGTemp *bt = tcgv_vec_temp(b); 767 TCGTemp *ct = tcgv_vec_temp(c); 768 TCGTemp *dt = tcgv_vec_temp(d); 769 TCGArg ri = temp_arg(rt); 770 TCGArg ai = temp_arg(at); 771 TCGArg bi = temp_arg(bt); 772 TCGArg ci = temp_arg(ct); 773 TCGArg di = temp_arg(dt); 774 TCGType type = rt->base_type; 775 const TCGOpcode *hold_list; 776 int can; 777 778 tcg_debug_assert(at->base_type >= type); 779 tcg_debug_assert(bt->base_type >= type); 780 tcg_debug_assert(ct->base_type >= type); 781 tcg_debug_assert(dt->base_type >= type); 782 783 tcg_assert_listed_vecop(INDEX_op_cmpsel_vec); 784 hold_list = tcg_swap_vecop_list(NULL); 785 can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece); 786 787 if (can > 0) { 788 vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond); 789 } else if (can < 0) { 790 tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece, 791 ri, ai, bi, ci, di, cond); 792 } else { 793 TCGv_vec t = tcg_temp_new_vec(type); 794 tcg_gen_cmp_vec(cond, vece, t, a, b); 795 tcg_gen_bitsel_vec(vece, r, t, c, d); 796 tcg_temp_free_vec(t); 797 } 798 tcg_swap_vecop_list(hold_list); 799 } 800