1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2018 Linaro, Inc. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg.h" 22 #include "tcg/tcg-temp-internal.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-mo.h" 25 #include "tcg-internal.h" 26 27 28 /* Reduce the number of ifdefs below. This assumes that all uses of 29 TCGV_HIGH and TCGV_LOW are properly protected by a conditional that 30 the compiler can eliminate. */ 31 #if TCG_TARGET_REG_BITS == 64 32 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64); 33 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); 34 #define TCGV_LOW TCGV_LOW_link_error 35 #define TCGV_HIGH TCGV_HIGH_link_error 36 #endif 37 38 /* 39 * Vector optional opcode tracking. 40 * Except for the basic logical operations (and, or, xor), and 41 * data movement (mov, ld, st, dupi), many vector opcodes are 42 * optional and may not be supported on the host. Thank Intel 43 * for the irregularity in their instruction set. 44 * 45 * The gvec expanders allow custom vector operations to be composed, 46 * generally via the .fniv callback in the GVecGen* structures. At 47 * the same time, in deciding whether to use this hook we need to 48 * know if the host supports the required operations. This is 49 * presented as an array of opcodes, terminated by 0. Each opcode 50 * is assumed to be expanded with the given VECE. 51 * 52 * For debugging, we want to validate this array. Therefore, when 53 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders 54 * will validate that their opcode is present in the list. 55 */ 56 #ifdef CONFIG_DEBUG_TCG 57 void tcg_assert_listed_vecop(TCGOpcode op) 58 { 59 const TCGOpcode *p = tcg_ctx->vecop_list; 60 if (p) { 61 for (; *p; ++p) { 62 if (*p == op) { 63 return; 64 } 65 } 66 g_assert_not_reached(); 67 } 68 } 69 #endif 70 71 bool tcg_can_emit_vecop_list(const TCGOpcode *list, 72 TCGType type, unsigned vece) 73 { 74 if (list == NULL) { 75 return true; 76 } 77 78 for (; *list; ++list) { 79 TCGOpcode opc = *list; 80 81 #ifdef CONFIG_DEBUG_TCG 82 switch (opc) { 83 case INDEX_op_and_vec: 84 case INDEX_op_or_vec: 85 case INDEX_op_xor_vec: 86 case INDEX_op_mov_vec: 87 case INDEX_op_dup_vec: 88 case INDEX_op_dup2_vec: 89 case INDEX_op_ld_vec: 90 case INDEX_op_st_vec: 91 case INDEX_op_bitsel_vec: 92 /* These opcodes are mandatory and should not be listed. */ 93 g_assert_not_reached(); 94 case INDEX_op_not_vec: 95 /* These opcodes have generic expansions using the above. */ 96 g_assert_not_reached(); 97 default: 98 break; 99 } 100 #endif 101 102 if (tcg_can_emit_vec_op(opc, type, vece)) { 103 continue; 104 } 105 106 /* 107 * The opcode list is created by front ends based on what they 108 * actually invoke. We must mirror the logic in the routines 109 * below for generic expansions using other opcodes. 110 */ 111 switch (opc) { 112 case INDEX_op_neg_vec: 113 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) { 114 continue; 115 } 116 break; 117 case INDEX_op_abs_vec: 118 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece) 119 && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0 120 || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0 121 || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) { 122 continue; 123 } 124 break; 125 case INDEX_op_usadd_vec: 126 if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) || 127 tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 128 continue; 129 } 130 break; 131 case INDEX_op_ussub_vec: 132 if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) || 133 tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 134 continue; 135 } 136 break; 137 case INDEX_op_cmpsel_vec: 138 case INDEX_op_smin_vec: 139 case INDEX_op_smax_vec: 140 case INDEX_op_umin_vec: 141 case INDEX_op_umax_vec: 142 if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 143 continue; 144 } 145 break; 146 default: 147 break; 148 } 149 return false; 150 } 151 return true; 152 } 153 154 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) 155 { 156 TCGOp *op = tcg_emit_op(opc, 2); 157 TCGOP_VECL(op) = type - TCG_TYPE_V64; 158 TCGOP_VECE(op) = vece; 159 op->args[0] = r; 160 op->args[1] = a; 161 } 162 163 void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, 164 TCGArg r, TCGArg a, TCGArg b) 165 { 166 TCGOp *op = tcg_emit_op(opc, 3); 167 TCGOP_VECL(op) = type - TCG_TYPE_V64; 168 TCGOP_VECE(op) = vece; 169 op->args[0] = r; 170 op->args[1] = a; 171 op->args[2] = b; 172 } 173 174 void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, 175 TCGArg r, TCGArg a, TCGArg b, TCGArg c) 176 { 177 TCGOp *op = tcg_emit_op(opc, 4); 178 TCGOP_VECL(op) = type - TCG_TYPE_V64; 179 TCGOP_VECE(op) = vece; 180 op->args[0] = r; 181 op->args[1] = a; 182 op->args[2] = b; 183 op->args[3] = c; 184 } 185 186 static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, 187 TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) 188 { 189 TCGOp *op = tcg_emit_op(opc, 6); 190 TCGOP_VECL(op) = type - TCG_TYPE_V64; 191 TCGOP_VECE(op) = vece; 192 op->args[0] = r; 193 op->args[1] = a; 194 op->args[2] = b; 195 op->args[3] = c; 196 op->args[4] = d; 197 op->args[5] = e; 198 } 199 200 static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a) 201 { 202 TCGTemp *rt = tcgv_vec_temp(r); 203 TCGTemp *at = tcgv_vec_temp(a); 204 TCGType type = rt->base_type; 205 206 /* Must enough inputs for the output. */ 207 tcg_debug_assert(at->base_type >= type); 208 vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at)); 209 } 210 211 static void vec_gen_op3(TCGOpcode opc, unsigned vece, 212 TCGv_vec r, TCGv_vec a, TCGv_vec b) 213 { 214 TCGTemp *rt = tcgv_vec_temp(r); 215 TCGTemp *at = tcgv_vec_temp(a); 216 TCGTemp *bt = tcgv_vec_temp(b); 217 TCGType type = rt->base_type; 218 219 /* Must enough inputs for the output. */ 220 tcg_debug_assert(at->base_type >= type); 221 tcg_debug_assert(bt->base_type >= type); 222 vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt)); 223 } 224 225 void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a) 226 { 227 if (r != a) { 228 vec_gen_op2(INDEX_op_mov_vec, 0, r, a); 229 } 230 } 231 232 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a) 233 { 234 TCGTemp *rt = tcgv_vec_temp(r); 235 tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a)); 236 } 237 238 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a) 239 { 240 TCGArg ri = tcgv_vec_arg(r); 241 TCGTemp *rt = arg_temp(ri); 242 TCGType type = rt->base_type; 243 244 if (TCG_TARGET_REG_BITS == 64) { 245 TCGArg ai = tcgv_i64_arg(a); 246 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 247 } else if (vece == MO_64) { 248 TCGArg al = tcgv_i32_arg(TCGV_LOW(a)); 249 TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a)); 250 vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah); 251 } else { 252 TCGArg ai = tcgv_i32_arg(TCGV_LOW(a)); 253 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 254 } 255 } 256 257 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a) 258 { 259 TCGArg ri = tcgv_vec_arg(r); 260 TCGArg ai = tcgv_i32_arg(a); 261 TCGTemp *rt = arg_temp(ri); 262 TCGType type = rt->base_type; 263 264 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 265 } 266 267 void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b, 268 tcg_target_long ofs) 269 { 270 TCGArg ri = tcgv_vec_arg(r); 271 TCGArg bi = tcgv_ptr_arg(b); 272 TCGTemp *rt = arg_temp(ri); 273 TCGType type = rt->base_type; 274 275 vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs); 276 } 277 278 static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o) 279 { 280 TCGArg ri = tcgv_vec_arg(r); 281 TCGArg bi = tcgv_ptr_arg(b); 282 TCGTemp *rt = arg_temp(ri); 283 TCGType type = rt->base_type; 284 285 vec_gen_3(opc, type, 0, ri, bi, o); 286 } 287 288 void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 289 { 290 vec_gen_ldst(INDEX_op_ld_vec, r, b, o); 291 } 292 293 void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 294 { 295 vec_gen_ldst(INDEX_op_st_vec, r, b, o); 296 } 297 298 void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type) 299 { 300 TCGArg ri = tcgv_vec_arg(r); 301 TCGArg bi = tcgv_ptr_arg(b); 302 TCGTemp *rt = arg_temp(ri); 303 TCGType type = rt->base_type; 304 305 tcg_debug_assert(low_type >= TCG_TYPE_V64); 306 tcg_debug_assert(low_type <= type); 307 vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o); 308 } 309 310 void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 311 { 312 vec_gen_op3(INDEX_op_and_vec, 0, r, a, b); 313 } 314 315 void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 316 { 317 vec_gen_op3(INDEX_op_or_vec, 0, r, a, b); 318 } 319 320 void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 321 { 322 vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b); 323 } 324 325 void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 326 { 327 if (TCG_TARGET_HAS_andc_vec) { 328 vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b); 329 } else { 330 TCGv_vec t = tcg_temp_new_vec_matching(r); 331 tcg_gen_not_vec(0, t, b); 332 tcg_gen_and_vec(0, r, a, t); 333 tcg_temp_free_vec(t); 334 } 335 } 336 337 void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 338 { 339 if (TCG_TARGET_HAS_orc_vec) { 340 vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b); 341 } else { 342 TCGv_vec t = tcg_temp_new_vec_matching(r); 343 tcg_gen_not_vec(0, t, b); 344 tcg_gen_or_vec(0, r, a, t); 345 tcg_temp_free_vec(t); 346 } 347 } 348 349 void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 350 { 351 if (TCG_TARGET_HAS_nand_vec) { 352 vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b); 353 } else { 354 tcg_gen_and_vec(0, r, a, b); 355 tcg_gen_not_vec(0, r, r); 356 } 357 } 358 359 void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 360 { 361 if (TCG_TARGET_HAS_nor_vec) { 362 vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b); 363 } else { 364 tcg_gen_or_vec(0, r, a, b); 365 tcg_gen_not_vec(0, r, r); 366 } 367 } 368 369 void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 370 { 371 if (TCG_TARGET_HAS_eqv_vec) { 372 vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b); 373 } else { 374 tcg_gen_xor_vec(0, r, a, b); 375 tcg_gen_not_vec(0, r, r); 376 } 377 } 378 379 static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) 380 { 381 TCGTemp *rt = tcgv_vec_temp(r); 382 TCGTemp *at = tcgv_vec_temp(a); 383 TCGArg ri = temp_arg(rt); 384 TCGArg ai = temp_arg(at); 385 TCGType type = rt->base_type; 386 int can; 387 388 tcg_debug_assert(at->base_type >= type); 389 tcg_assert_listed_vecop(opc); 390 can = tcg_can_emit_vec_op(opc, type, vece); 391 if (can > 0) { 392 vec_gen_2(opc, type, vece, ri, ai); 393 } else if (can < 0) { 394 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 395 tcg_expand_vec_op(opc, type, vece, ri, ai); 396 tcg_swap_vecop_list(hold_list); 397 } else { 398 return false; 399 } 400 return true; 401 } 402 403 void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 404 { 405 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 406 407 if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) { 408 tcg_gen_xor_vec(0, r, a, tcg_constant_vec_matching(r, 0, -1)); 409 } 410 tcg_swap_vecop_list(hold_list); 411 } 412 413 void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 414 { 415 const TCGOpcode *hold_list; 416 417 tcg_assert_listed_vecop(INDEX_op_neg_vec); 418 hold_list = tcg_swap_vecop_list(NULL); 419 420 if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) { 421 tcg_gen_sub_vec(vece, r, tcg_constant_vec_matching(r, vece, 0), a); 422 } 423 tcg_swap_vecop_list(hold_list); 424 } 425 426 void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 427 { 428 const TCGOpcode *hold_list; 429 430 tcg_assert_listed_vecop(INDEX_op_abs_vec); 431 hold_list = tcg_swap_vecop_list(NULL); 432 433 if (!do_op2(vece, r, a, INDEX_op_abs_vec)) { 434 TCGType type = tcgv_vec_temp(r)->base_type; 435 TCGv_vec t = tcg_temp_new_vec(type); 436 437 tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)); 438 if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) { 439 tcg_gen_neg_vec(vece, t, a); 440 tcg_gen_smax_vec(vece, r, a, t); 441 } else { 442 if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) { 443 tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1); 444 } else { 445 tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, 446 tcg_constant_vec(type, vece, 0)); 447 } 448 tcg_gen_xor_vec(vece, r, a, t); 449 tcg_gen_sub_vec(vece, r, r, t); 450 } 451 452 tcg_temp_free_vec(t); 453 } 454 tcg_swap_vecop_list(hold_list); 455 } 456 457 static void do_shifti(TCGOpcode opc, unsigned vece, 458 TCGv_vec r, TCGv_vec a, int64_t i) 459 { 460 TCGTemp *rt = tcgv_vec_temp(r); 461 TCGTemp *at = tcgv_vec_temp(a); 462 TCGArg ri = temp_arg(rt); 463 TCGArg ai = temp_arg(at); 464 TCGType type = rt->base_type; 465 int can; 466 467 tcg_debug_assert(at->base_type == type); 468 tcg_debug_assert(i >= 0 && i < (8 << vece)); 469 tcg_assert_listed_vecop(opc); 470 471 if (i == 0) { 472 tcg_gen_mov_vec(r, a); 473 return; 474 } 475 476 can = tcg_can_emit_vec_op(opc, type, vece); 477 if (can > 0) { 478 vec_gen_3(opc, type, vece, ri, ai, i); 479 } else { 480 /* We leave the choice of expansion via scalar or vector shift 481 to the target. Often, but not always, dupi can feed a vector 482 shift easier than a scalar. */ 483 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 484 tcg_debug_assert(can < 0); 485 tcg_expand_vec_op(opc, type, vece, ri, ai, i); 486 tcg_swap_vecop_list(hold_list); 487 } 488 } 489 490 void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 491 { 492 do_shifti(INDEX_op_shli_vec, vece, r, a, i); 493 } 494 495 void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 496 { 497 do_shifti(INDEX_op_shri_vec, vece, r, a, i); 498 } 499 500 void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 501 { 502 do_shifti(INDEX_op_sari_vec, vece, r, a, i); 503 } 504 505 void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 506 { 507 do_shifti(INDEX_op_rotli_vec, vece, r, a, i); 508 } 509 510 void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 511 { 512 int bits = 8 << vece; 513 tcg_debug_assert(i >= 0 && i < bits); 514 do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1)); 515 } 516 517 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, 518 TCGv_vec r, TCGv_vec a, TCGv_vec b) 519 { 520 TCGTemp *rt = tcgv_vec_temp(r); 521 TCGTemp *at = tcgv_vec_temp(a); 522 TCGTemp *bt = tcgv_vec_temp(b); 523 TCGArg ri = temp_arg(rt); 524 TCGArg ai = temp_arg(at); 525 TCGArg bi = temp_arg(bt); 526 TCGType type = rt->base_type; 527 int can; 528 529 tcg_debug_assert(at->base_type >= type); 530 tcg_debug_assert(bt->base_type >= type); 531 tcg_assert_listed_vecop(INDEX_op_cmp_vec); 532 can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); 533 if (can > 0) { 534 vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 535 } else { 536 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 537 tcg_debug_assert(can < 0); 538 tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 539 tcg_swap_vecop_list(hold_list); 540 } 541 } 542 543 static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, 544 TCGv_vec b, TCGOpcode opc) 545 { 546 TCGTemp *rt = tcgv_vec_temp(r); 547 TCGTemp *at = tcgv_vec_temp(a); 548 TCGTemp *bt = tcgv_vec_temp(b); 549 TCGArg ri = temp_arg(rt); 550 TCGArg ai = temp_arg(at); 551 TCGArg bi = temp_arg(bt); 552 TCGType type = rt->base_type; 553 int can; 554 555 tcg_debug_assert(at->base_type >= type); 556 tcg_debug_assert(bt->base_type >= type); 557 tcg_assert_listed_vecop(opc); 558 can = tcg_can_emit_vec_op(opc, type, vece); 559 if (can > 0) { 560 vec_gen_3(opc, type, vece, ri, ai, bi); 561 } else if (can < 0) { 562 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 563 tcg_expand_vec_op(opc, type, vece, ri, ai, bi); 564 tcg_swap_vecop_list(hold_list); 565 } else { 566 return false; 567 } 568 return true; 569 } 570 571 static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a, 572 TCGv_vec b, TCGOpcode opc) 573 { 574 bool ok = do_op3(vece, r, a, b, opc); 575 tcg_debug_assert(ok); 576 } 577 578 void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 579 { 580 do_op3_nofail(vece, r, a, b, INDEX_op_add_vec); 581 } 582 583 void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 584 { 585 do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec); 586 } 587 588 void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 589 { 590 do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec); 591 } 592 593 void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 594 { 595 do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec); 596 } 597 598 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 599 { 600 if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) { 601 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 602 TCGv_vec t = tcg_temp_new_vec_matching(r); 603 604 /* usadd(a, b) = min(a, ~b) + b */ 605 tcg_gen_not_vec(vece, t, b); 606 tcg_gen_umin_vec(vece, t, t, a); 607 tcg_gen_add_vec(vece, r, t, b); 608 609 tcg_temp_free_vec(t); 610 tcg_swap_vecop_list(hold_list); 611 } 612 } 613 614 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 615 { 616 do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec); 617 } 618 619 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 620 { 621 if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) { 622 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 623 TCGv_vec t = tcg_temp_new_vec_matching(r); 624 625 /* ussub(a, b) = max(a, b) - b */ 626 tcg_gen_umax_vec(vece, t, a, b); 627 tcg_gen_sub_vec(vece, r, t, b); 628 629 tcg_temp_free_vec(t); 630 tcg_swap_vecop_list(hold_list); 631 } 632 } 633 634 static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, 635 TCGv_vec b, TCGOpcode opc, TCGCond cond) 636 { 637 if (!do_op3(vece, r, a, b, opc)) { 638 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 639 tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b); 640 tcg_swap_vecop_list(hold_list); 641 } 642 } 643 644 void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 645 { 646 do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT); 647 } 648 649 void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 650 { 651 do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU); 652 } 653 654 void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 655 { 656 do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT); 657 } 658 659 void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 660 { 661 do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU); 662 } 663 664 void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 665 { 666 do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec); 667 } 668 669 void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 670 { 671 do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec); 672 } 673 674 void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 675 { 676 do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec); 677 } 678 679 void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 680 { 681 do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec); 682 } 683 684 void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 685 { 686 do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec); 687 } 688 689 static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, 690 TCGv_i32 s, TCGOpcode opc) 691 { 692 TCGTemp *rt = tcgv_vec_temp(r); 693 TCGTemp *at = tcgv_vec_temp(a); 694 TCGTemp *st = tcgv_i32_temp(s); 695 TCGArg ri = temp_arg(rt); 696 TCGArg ai = temp_arg(at); 697 TCGArg si = temp_arg(st); 698 TCGType type = rt->base_type; 699 int can; 700 701 tcg_debug_assert(at->base_type >= type); 702 tcg_assert_listed_vecop(opc); 703 can = tcg_can_emit_vec_op(opc, type, vece); 704 if (can > 0) { 705 vec_gen_3(opc, type, vece, ri, ai, si); 706 } else if (can < 0) { 707 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 708 tcg_expand_vec_op(opc, type, vece, ri, ai, si); 709 tcg_swap_vecop_list(hold_list); 710 } else { 711 g_assert_not_reached(); 712 } 713 } 714 715 void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 716 { 717 do_shifts(vece, r, a, b, INDEX_op_shls_vec); 718 } 719 720 void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 721 { 722 do_shifts(vece, r, a, b, INDEX_op_shrs_vec); 723 } 724 725 void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 726 { 727 do_shifts(vece, r, a, b, INDEX_op_sars_vec); 728 } 729 730 void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s) 731 { 732 do_shifts(vece, r, a, s, INDEX_op_rotls_vec); 733 } 734 735 void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, 736 TCGv_vec b, TCGv_vec c) 737 { 738 TCGTemp *rt = tcgv_vec_temp(r); 739 TCGTemp *at = tcgv_vec_temp(a); 740 TCGTemp *bt = tcgv_vec_temp(b); 741 TCGTemp *ct = tcgv_vec_temp(c); 742 TCGType type = rt->base_type; 743 744 tcg_debug_assert(at->base_type >= type); 745 tcg_debug_assert(bt->base_type >= type); 746 tcg_debug_assert(ct->base_type >= type); 747 748 if (TCG_TARGET_HAS_bitsel_vec) { 749 vec_gen_4(INDEX_op_bitsel_vec, type, MO_8, 750 temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct)); 751 } else { 752 TCGv_vec t = tcg_temp_new_vec(type); 753 tcg_gen_and_vec(MO_8, t, a, b); 754 tcg_gen_andc_vec(MO_8, r, c, a); 755 tcg_gen_or_vec(MO_8, r, r, t); 756 tcg_temp_free_vec(t); 757 } 758 } 759 760 void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, 761 TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d) 762 { 763 TCGTemp *rt = tcgv_vec_temp(r); 764 TCGTemp *at = tcgv_vec_temp(a); 765 TCGTemp *bt = tcgv_vec_temp(b); 766 TCGTemp *ct = tcgv_vec_temp(c); 767 TCGTemp *dt = tcgv_vec_temp(d); 768 TCGArg ri = temp_arg(rt); 769 TCGArg ai = temp_arg(at); 770 TCGArg bi = temp_arg(bt); 771 TCGArg ci = temp_arg(ct); 772 TCGArg di = temp_arg(dt); 773 TCGType type = rt->base_type; 774 const TCGOpcode *hold_list; 775 int can; 776 777 tcg_debug_assert(at->base_type >= type); 778 tcg_debug_assert(bt->base_type >= type); 779 tcg_debug_assert(ct->base_type >= type); 780 tcg_debug_assert(dt->base_type >= type); 781 782 tcg_assert_listed_vecop(INDEX_op_cmpsel_vec); 783 hold_list = tcg_swap_vecop_list(NULL); 784 can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece); 785 786 if (can > 0) { 787 vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond); 788 } else if (can < 0) { 789 tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece, 790 ri, ai, bi, ci, di, cond); 791 } else { 792 TCGv_vec t = tcg_temp_new_vec(type); 793 tcg_gen_cmp_vec(cond, vece, t, a, b); 794 tcg_gen_bitsel_vec(vece, r, t, c, d); 795 tcg_temp_free_vec(t); 796 } 797 tcg_swap_vecop_list(hold_list); 798 } 799