1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2018 Linaro, Inc. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "tcg/tcg.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-mo.h" 25 26 /* Reduce the number of ifdefs below. This assumes that all uses of 27 TCGV_HIGH and TCGV_LOW are properly protected by a conditional that 28 the compiler can eliminate. */ 29 #if TCG_TARGET_REG_BITS == 64 30 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64); 31 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); 32 #define TCGV_LOW TCGV_LOW_link_error 33 #define TCGV_HIGH TCGV_HIGH_link_error 34 #endif 35 36 /* 37 * Vector optional opcode tracking. 38 * Except for the basic logical operations (and, or, xor), and 39 * data movement (mov, ld, st, dupi), many vector opcodes are 40 * optional and may not be supported on the host. Thank Intel 41 * for the irregularity in their instruction set. 42 * 43 * The gvec expanders allow custom vector operations to be composed, 44 * generally via the .fniv callback in the GVecGen* structures. At 45 * the same time, in deciding whether to use this hook we need to 46 * know if the host supports the required operations. This is 47 * presented as an array of opcodes, terminated by 0. Each opcode 48 * is assumed to be expanded with the given VECE. 49 * 50 * For debugging, we want to validate this array. Therefore, when 51 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders 52 * will validate that their opcode is present in the list. 53 */ 54 #ifdef CONFIG_DEBUG_TCG 55 void tcg_assert_listed_vecop(TCGOpcode op) 56 { 57 const TCGOpcode *p = tcg_ctx->vecop_list; 58 if (p) { 59 for (; *p; ++p) { 60 if (*p == op) { 61 return; 62 } 63 } 64 g_assert_not_reached(); 65 } 66 } 67 #endif 68 69 bool tcg_can_emit_vecop_list(const TCGOpcode *list, 70 TCGType type, unsigned vece) 71 { 72 if (list == NULL) { 73 return true; 74 } 75 76 for (; *list; ++list) { 77 TCGOpcode opc = *list; 78 79 #ifdef CONFIG_DEBUG_TCG 80 switch (opc) { 81 case INDEX_op_and_vec: 82 case INDEX_op_or_vec: 83 case INDEX_op_xor_vec: 84 case INDEX_op_mov_vec: 85 case INDEX_op_dup_vec: 86 case INDEX_op_dupi_vec: 87 case INDEX_op_dup2_vec: 88 case INDEX_op_ld_vec: 89 case INDEX_op_st_vec: 90 case INDEX_op_bitsel_vec: 91 /* These opcodes are mandatory and should not be listed. */ 92 g_assert_not_reached(); 93 case INDEX_op_not_vec: 94 /* These opcodes have generic expansions using the above. */ 95 g_assert_not_reached(); 96 default: 97 break; 98 } 99 #endif 100 101 if (tcg_can_emit_vec_op(opc, type, vece)) { 102 continue; 103 } 104 105 /* 106 * The opcode list is created by front ends based on what they 107 * actually invoke. We must mirror the logic in the routines 108 * below for generic expansions using other opcodes. 109 */ 110 switch (opc) { 111 case INDEX_op_neg_vec: 112 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) { 113 continue; 114 } 115 break; 116 case INDEX_op_abs_vec: 117 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece) 118 && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0 119 || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0 120 || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) { 121 continue; 122 } 123 break; 124 case INDEX_op_cmpsel_vec: 125 case INDEX_op_smin_vec: 126 case INDEX_op_smax_vec: 127 case INDEX_op_umin_vec: 128 case INDEX_op_umax_vec: 129 if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 130 continue; 131 } 132 break; 133 default: 134 break; 135 } 136 return false; 137 } 138 return true; 139 } 140 141 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) 142 { 143 TCGOp *op = tcg_emit_op(opc); 144 TCGOP_VECL(op) = type - TCG_TYPE_V64; 145 TCGOP_VECE(op) = vece; 146 op->args[0] = r; 147 op->args[1] = a; 148 } 149 150 void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, 151 TCGArg r, TCGArg a, TCGArg b) 152 { 153 TCGOp *op = tcg_emit_op(opc); 154 TCGOP_VECL(op) = type - TCG_TYPE_V64; 155 TCGOP_VECE(op) = vece; 156 op->args[0] = r; 157 op->args[1] = a; 158 op->args[2] = b; 159 } 160 161 void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, 162 TCGArg r, TCGArg a, TCGArg b, TCGArg c) 163 { 164 TCGOp *op = tcg_emit_op(opc); 165 TCGOP_VECL(op) = type - TCG_TYPE_V64; 166 TCGOP_VECE(op) = vece; 167 op->args[0] = r; 168 op->args[1] = a; 169 op->args[2] = b; 170 op->args[3] = c; 171 } 172 173 static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, 174 TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) 175 { 176 TCGOp *op = tcg_emit_op(opc); 177 TCGOP_VECL(op) = type - TCG_TYPE_V64; 178 TCGOP_VECE(op) = vece; 179 op->args[0] = r; 180 op->args[1] = a; 181 op->args[2] = b; 182 op->args[3] = c; 183 op->args[4] = d; 184 op->args[5] = e; 185 } 186 187 static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a) 188 { 189 TCGTemp *rt = tcgv_vec_temp(r); 190 TCGTemp *at = tcgv_vec_temp(a); 191 TCGType type = rt->base_type; 192 193 /* Must enough inputs for the output. */ 194 tcg_debug_assert(at->base_type >= type); 195 vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at)); 196 } 197 198 static void vec_gen_op3(TCGOpcode opc, unsigned vece, 199 TCGv_vec r, TCGv_vec a, TCGv_vec b) 200 { 201 TCGTemp *rt = tcgv_vec_temp(r); 202 TCGTemp *at = tcgv_vec_temp(a); 203 TCGTemp *bt = tcgv_vec_temp(b); 204 TCGType type = rt->base_type; 205 206 /* Must enough inputs for the output. */ 207 tcg_debug_assert(at->base_type >= type); 208 tcg_debug_assert(bt->base_type >= type); 209 vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt)); 210 } 211 212 void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a) 213 { 214 if (r != a) { 215 vec_gen_op2(INDEX_op_mov_vec, 0, r, a); 216 } 217 } 218 219 #define MO_REG (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32) 220 221 static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a) 222 { 223 TCGTemp *rt = tcgv_vec_temp(r); 224 vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a); 225 } 226 227 TCGv_vec tcg_const_zeros_vec(TCGType type) 228 { 229 TCGv_vec ret = tcg_temp_new_vec(type); 230 do_dupi_vec(ret, MO_REG, 0); 231 return ret; 232 } 233 234 TCGv_vec tcg_const_ones_vec(TCGType type) 235 { 236 TCGv_vec ret = tcg_temp_new_vec(type); 237 do_dupi_vec(ret, MO_REG, -1); 238 return ret; 239 } 240 241 TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m) 242 { 243 TCGTemp *t = tcgv_vec_temp(m); 244 return tcg_const_zeros_vec(t->base_type); 245 } 246 247 TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m) 248 { 249 TCGTemp *t = tcgv_vec_temp(m); 250 return tcg_const_ones_vec(t->base_type); 251 } 252 253 void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a) 254 { 255 if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) { 256 do_dupi_vec(r, MO_32, a); 257 } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) { 258 do_dupi_vec(r, MO_64, a); 259 } else { 260 TCGv_i64 c = tcg_const_i64(a); 261 tcg_gen_dup_i64_vec(MO_64, r, c); 262 tcg_temp_free_i64(c); 263 } 264 } 265 266 void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a) 267 { 268 do_dupi_vec(r, MO_REG, dup_const(MO_32, a)); 269 } 270 271 void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a) 272 { 273 do_dupi_vec(r, MO_REG, dup_const(MO_16, a)); 274 } 275 276 void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a) 277 { 278 do_dupi_vec(r, MO_REG, dup_const(MO_8, a)); 279 } 280 281 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a) 282 { 283 do_dupi_vec(r, MO_REG, dup_const(vece, a)); 284 } 285 286 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a) 287 { 288 TCGArg ri = tcgv_vec_arg(r); 289 TCGTemp *rt = arg_temp(ri); 290 TCGType type = rt->base_type; 291 292 if (TCG_TARGET_REG_BITS == 64) { 293 TCGArg ai = tcgv_i64_arg(a); 294 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 295 } else if (vece == MO_64) { 296 TCGArg al = tcgv_i32_arg(TCGV_LOW(a)); 297 TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a)); 298 vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah); 299 } else { 300 TCGArg ai = tcgv_i32_arg(TCGV_LOW(a)); 301 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 302 } 303 } 304 305 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a) 306 { 307 TCGArg ri = tcgv_vec_arg(r); 308 TCGArg ai = tcgv_i32_arg(a); 309 TCGTemp *rt = arg_temp(ri); 310 TCGType type = rt->base_type; 311 312 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 313 } 314 315 void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b, 316 tcg_target_long ofs) 317 { 318 TCGArg ri = tcgv_vec_arg(r); 319 TCGArg bi = tcgv_ptr_arg(b); 320 TCGTemp *rt = arg_temp(ri); 321 TCGType type = rt->base_type; 322 323 vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs); 324 } 325 326 static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o) 327 { 328 TCGArg ri = tcgv_vec_arg(r); 329 TCGArg bi = tcgv_ptr_arg(b); 330 TCGTemp *rt = arg_temp(ri); 331 TCGType type = rt->base_type; 332 333 vec_gen_3(opc, type, 0, ri, bi, o); 334 } 335 336 void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 337 { 338 vec_gen_ldst(INDEX_op_ld_vec, r, b, o); 339 } 340 341 void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 342 { 343 vec_gen_ldst(INDEX_op_st_vec, r, b, o); 344 } 345 346 void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type) 347 { 348 TCGArg ri = tcgv_vec_arg(r); 349 TCGArg bi = tcgv_ptr_arg(b); 350 TCGTemp *rt = arg_temp(ri); 351 TCGType type = rt->base_type; 352 353 tcg_debug_assert(low_type >= TCG_TYPE_V64); 354 tcg_debug_assert(low_type <= type); 355 vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o); 356 } 357 358 void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 359 { 360 vec_gen_op3(INDEX_op_and_vec, 0, r, a, b); 361 } 362 363 void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 364 { 365 vec_gen_op3(INDEX_op_or_vec, 0, r, a, b); 366 } 367 368 void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 369 { 370 vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b); 371 } 372 373 void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 374 { 375 if (TCG_TARGET_HAS_andc_vec) { 376 vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b); 377 } else { 378 TCGv_vec t = tcg_temp_new_vec_matching(r); 379 tcg_gen_not_vec(0, t, b); 380 tcg_gen_and_vec(0, r, a, t); 381 tcg_temp_free_vec(t); 382 } 383 } 384 385 void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 386 { 387 if (TCG_TARGET_HAS_orc_vec) { 388 vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b); 389 } else { 390 TCGv_vec t = tcg_temp_new_vec_matching(r); 391 tcg_gen_not_vec(0, t, b); 392 tcg_gen_or_vec(0, r, a, t); 393 tcg_temp_free_vec(t); 394 } 395 } 396 397 void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 398 { 399 /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */ 400 tcg_gen_and_vec(0, r, a, b); 401 tcg_gen_not_vec(0, r, r); 402 } 403 404 void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 405 { 406 /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */ 407 tcg_gen_or_vec(0, r, a, b); 408 tcg_gen_not_vec(0, r, r); 409 } 410 411 void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 412 { 413 /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */ 414 tcg_gen_xor_vec(0, r, a, b); 415 tcg_gen_not_vec(0, r, r); 416 } 417 418 static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) 419 { 420 TCGTemp *rt = tcgv_vec_temp(r); 421 TCGTemp *at = tcgv_vec_temp(a); 422 TCGArg ri = temp_arg(rt); 423 TCGArg ai = temp_arg(at); 424 TCGType type = rt->base_type; 425 int can; 426 427 tcg_debug_assert(at->base_type >= type); 428 tcg_assert_listed_vecop(opc); 429 can = tcg_can_emit_vec_op(opc, type, vece); 430 if (can > 0) { 431 vec_gen_2(opc, type, vece, ri, ai); 432 } else if (can < 0) { 433 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 434 tcg_expand_vec_op(opc, type, vece, ri, ai); 435 tcg_swap_vecop_list(hold_list); 436 } else { 437 return false; 438 } 439 return true; 440 } 441 442 void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 443 { 444 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 445 446 if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) { 447 TCGv_vec t = tcg_const_ones_vec_matching(r); 448 tcg_gen_xor_vec(0, r, a, t); 449 tcg_temp_free_vec(t); 450 } 451 tcg_swap_vecop_list(hold_list); 452 } 453 454 void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 455 { 456 const TCGOpcode *hold_list; 457 458 tcg_assert_listed_vecop(INDEX_op_neg_vec); 459 hold_list = tcg_swap_vecop_list(NULL); 460 461 if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) { 462 TCGv_vec t = tcg_const_zeros_vec_matching(r); 463 tcg_gen_sub_vec(vece, r, t, a); 464 tcg_temp_free_vec(t); 465 } 466 tcg_swap_vecop_list(hold_list); 467 } 468 469 void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 470 { 471 const TCGOpcode *hold_list; 472 473 tcg_assert_listed_vecop(INDEX_op_abs_vec); 474 hold_list = tcg_swap_vecop_list(NULL); 475 476 if (!do_op2(vece, r, a, INDEX_op_abs_vec)) { 477 TCGType type = tcgv_vec_temp(r)->base_type; 478 TCGv_vec t = tcg_temp_new_vec(type); 479 480 tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)); 481 if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) { 482 tcg_gen_neg_vec(vece, t, a); 483 tcg_gen_smax_vec(vece, r, a, t); 484 } else { 485 if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) { 486 tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1); 487 } else { 488 do_dupi_vec(t, MO_REG, 0); 489 tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t); 490 } 491 tcg_gen_xor_vec(vece, r, a, t); 492 tcg_gen_sub_vec(vece, r, r, t); 493 } 494 495 tcg_temp_free_vec(t); 496 } 497 tcg_swap_vecop_list(hold_list); 498 } 499 500 static void do_shifti(TCGOpcode opc, unsigned vece, 501 TCGv_vec r, TCGv_vec a, int64_t i) 502 { 503 TCGTemp *rt = tcgv_vec_temp(r); 504 TCGTemp *at = tcgv_vec_temp(a); 505 TCGArg ri = temp_arg(rt); 506 TCGArg ai = temp_arg(at); 507 TCGType type = rt->base_type; 508 int can; 509 510 tcg_debug_assert(at->base_type == type); 511 tcg_debug_assert(i >= 0 && i < (8 << vece)); 512 tcg_assert_listed_vecop(opc); 513 514 if (i == 0) { 515 tcg_gen_mov_vec(r, a); 516 return; 517 } 518 519 can = tcg_can_emit_vec_op(opc, type, vece); 520 if (can > 0) { 521 vec_gen_3(opc, type, vece, ri, ai, i); 522 } else { 523 /* We leave the choice of expansion via scalar or vector shift 524 to the target. Often, but not always, dupi can feed a vector 525 shift easier than a scalar. */ 526 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 527 tcg_debug_assert(can < 0); 528 tcg_expand_vec_op(opc, type, vece, ri, ai, i); 529 tcg_swap_vecop_list(hold_list); 530 } 531 } 532 533 void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 534 { 535 do_shifti(INDEX_op_shli_vec, vece, r, a, i); 536 } 537 538 void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 539 { 540 do_shifti(INDEX_op_shri_vec, vece, r, a, i); 541 } 542 543 void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 544 { 545 do_shifti(INDEX_op_sari_vec, vece, r, a, i); 546 } 547 548 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, 549 TCGv_vec r, TCGv_vec a, TCGv_vec b) 550 { 551 TCGTemp *rt = tcgv_vec_temp(r); 552 TCGTemp *at = tcgv_vec_temp(a); 553 TCGTemp *bt = tcgv_vec_temp(b); 554 TCGArg ri = temp_arg(rt); 555 TCGArg ai = temp_arg(at); 556 TCGArg bi = temp_arg(bt); 557 TCGType type = rt->base_type; 558 int can; 559 560 tcg_debug_assert(at->base_type >= type); 561 tcg_debug_assert(bt->base_type >= type); 562 tcg_assert_listed_vecop(INDEX_op_cmp_vec); 563 can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); 564 if (can > 0) { 565 vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 566 } else { 567 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 568 tcg_debug_assert(can < 0); 569 tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 570 tcg_swap_vecop_list(hold_list); 571 } 572 } 573 574 static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, 575 TCGv_vec b, TCGOpcode opc) 576 { 577 TCGTemp *rt = tcgv_vec_temp(r); 578 TCGTemp *at = tcgv_vec_temp(a); 579 TCGTemp *bt = tcgv_vec_temp(b); 580 TCGArg ri = temp_arg(rt); 581 TCGArg ai = temp_arg(at); 582 TCGArg bi = temp_arg(bt); 583 TCGType type = rt->base_type; 584 int can; 585 586 tcg_debug_assert(at->base_type >= type); 587 tcg_debug_assert(bt->base_type >= type); 588 tcg_assert_listed_vecop(opc); 589 can = tcg_can_emit_vec_op(opc, type, vece); 590 if (can > 0) { 591 vec_gen_3(opc, type, vece, ri, ai, bi); 592 } else if (can < 0) { 593 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 594 tcg_expand_vec_op(opc, type, vece, ri, ai, bi); 595 tcg_swap_vecop_list(hold_list); 596 } else { 597 return false; 598 } 599 return true; 600 } 601 602 static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a, 603 TCGv_vec b, TCGOpcode opc) 604 { 605 bool ok = do_op3(vece, r, a, b, opc); 606 tcg_debug_assert(ok); 607 } 608 609 void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 610 { 611 do_op3_nofail(vece, r, a, b, INDEX_op_add_vec); 612 } 613 614 void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 615 { 616 do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec); 617 } 618 619 void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 620 { 621 do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec); 622 } 623 624 void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 625 { 626 do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec); 627 } 628 629 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 630 { 631 do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec); 632 } 633 634 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 635 { 636 do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec); 637 } 638 639 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 640 { 641 do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec); 642 } 643 644 static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, 645 TCGv_vec b, TCGOpcode opc, TCGCond cond) 646 { 647 if (!do_op3(vece, r, a, b, opc)) { 648 tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b); 649 } 650 } 651 652 void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 653 { 654 do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT); 655 } 656 657 void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 658 { 659 do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU); 660 } 661 662 void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 663 { 664 do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT); 665 } 666 667 void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 668 { 669 do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU); 670 } 671 672 void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 673 { 674 do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec); 675 } 676 677 void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 678 { 679 do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec); 680 } 681 682 void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 683 { 684 do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec); 685 } 686 687 static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, 688 TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v) 689 { 690 TCGTemp *rt = tcgv_vec_temp(r); 691 TCGTemp *at = tcgv_vec_temp(a); 692 TCGTemp *st = tcgv_i32_temp(s); 693 TCGArg ri = temp_arg(rt); 694 TCGArg ai = temp_arg(at); 695 TCGArg si = temp_arg(st); 696 TCGType type = rt->base_type; 697 const TCGOpcode *hold_list; 698 int can; 699 700 tcg_debug_assert(at->base_type >= type); 701 tcg_assert_listed_vecop(opc_s); 702 hold_list = tcg_swap_vecop_list(NULL); 703 704 can = tcg_can_emit_vec_op(opc_s, type, vece); 705 if (can > 0) { 706 vec_gen_3(opc_s, type, vece, ri, ai, si); 707 } else if (can < 0) { 708 tcg_expand_vec_op(opc_s, type, vece, ri, ai, si); 709 } else { 710 TCGv_vec vec_s = tcg_temp_new_vec(type); 711 712 if (vece == MO_64) { 713 TCGv_i64 s64 = tcg_temp_new_i64(); 714 tcg_gen_extu_i32_i64(s64, s); 715 tcg_gen_dup_i64_vec(MO_64, vec_s, s64); 716 tcg_temp_free_i64(s64); 717 } else { 718 tcg_gen_dup_i32_vec(vece, vec_s, s); 719 } 720 do_op3_nofail(vece, r, a, vec_s, opc_v); 721 tcg_temp_free_vec(vec_s); 722 } 723 tcg_swap_vecop_list(hold_list); 724 } 725 726 void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 727 { 728 do_shifts(vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec); 729 } 730 731 void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 732 { 733 do_shifts(vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec); 734 } 735 736 void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 737 { 738 do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec); 739 } 740 741 void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, 742 TCGv_vec b, TCGv_vec c) 743 { 744 TCGTemp *rt = tcgv_vec_temp(r); 745 TCGTemp *at = tcgv_vec_temp(a); 746 TCGTemp *bt = tcgv_vec_temp(b); 747 TCGTemp *ct = tcgv_vec_temp(c); 748 TCGType type = rt->base_type; 749 750 tcg_debug_assert(at->base_type >= type); 751 tcg_debug_assert(bt->base_type >= type); 752 tcg_debug_assert(ct->base_type >= type); 753 754 if (TCG_TARGET_HAS_bitsel_vec) { 755 vec_gen_4(INDEX_op_bitsel_vec, type, MO_8, 756 temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct)); 757 } else { 758 TCGv_vec t = tcg_temp_new_vec(type); 759 tcg_gen_and_vec(MO_8, t, a, b); 760 tcg_gen_andc_vec(MO_8, r, c, a); 761 tcg_gen_or_vec(MO_8, r, r, t); 762 tcg_temp_free_vec(t); 763 } 764 } 765 766 void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, 767 TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d) 768 { 769 TCGTemp *rt = tcgv_vec_temp(r); 770 TCGTemp *at = tcgv_vec_temp(a); 771 TCGTemp *bt = tcgv_vec_temp(b); 772 TCGTemp *ct = tcgv_vec_temp(c); 773 TCGTemp *dt = tcgv_vec_temp(d); 774 TCGArg ri = temp_arg(rt); 775 TCGArg ai = temp_arg(at); 776 TCGArg bi = temp_arg(bt); 777 TCGArg ci = temp_arg(ct); 778 TCGArg di = temp_arg(dt); 779 TCGType type = rt->base_type; 780 const TCGOpcode *hold_list; 781 int can; 782 783 tcg_debug_assert(at->base_type >= type); 784 tcg_debug_assert(bt->base_type >= type); 785 tcg_debug_assert(ct->base_type >= type); 786 tcg_debug_assert(dt->base_type >= type); 787 788 tcg_assert_listed_vecop(INDEX_op_cmpsel_vec); 789 hold_list = tcg_swap_vecop_list(NULL); 790 can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece); 791 792 if (can > 0) { 793 vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond); 794 } else if (can < 0) { 795 tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece, 796 ri, ai, bi, ci, di, cond); 797 } else { 798 TCGv_vec t = tcg_temp_new_vec(type); 799 tcg_gen_cmp_vec(cond, vece, t, a, b); 800 tcg_gen_bitsel_vec(vece, r, t, c, d); 801 tcg_temp_free_vec(t); 802 } 803 tcg_swap_vecop_list(hold_list); 804 } 805