1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2018 Linaro, Inc. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg.h" 22 #include "tcg/tcg-temp-internal.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-mo.h" 25 #include "tcg-internal.h" 26 27 28 /* Reduce the number of ifdefs below. This assumes that all uses of 29 TCGV_HIGH and TCGV_LOW are properly protected by a conditional that 30 the compiler can eliminate. */ 31 #if TCG_TARGET_REG_BITS == 64 32 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64); 33 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); 34 #define TCGV_LOW TCGV_LOW_link_error 35 #define TCGV_HIGH TCGV_HIGH_link_error 36 #endif 37 38 /* 39 * Vector optional opcode tracking. 40 * Except for the basic logical operations (and, or, xor), and 41 * data movement (mov, ld, st, dupi), many vector opcodes are 42 * optional and may not be supported on the host. Thank Intel 43 * for the irregularity in their instruction set. 44 * 45 * The gvec expanders allow custom vector operations to be composed, 46 * generally via the .fniv callback in the GVecGen* structures. At 47 * the same time, in deciding whether to use this hook we need to 48 * know if the host supports the required operations. This is 49 * presented as an array of opcodes, terminated by 0. Each opcode 50 * is assumed to be expanded with the given VECE. 51 * 52 * For debugging, we want to validate this array. Therefore, when 53 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders 54 * will validate that their opcode is present in the list. 55 */ 56 #ifdef CONFIG_DEBUG_TCG 57 void tcg_assert_listed_vecop(TCGOpcode op) 58 { 59 const TCGOpcode *p = tcg_ctx->vecop_list; 60 if (p) { 61 for (; *p; ++p) { 62 if (*p == op) { 63 return; 64 } 65 } 66 g_assert_not_reached(); 67 } 68 } 69 #endif 70 71 bool tcg_can_emit_vecop_list(const TCGOpcode *list, 72 TCGType type, unsigned vece) 73 { 74 if (list == NULL) { 75 return true; 76 } 77 78 for (; *list; ++list) { 79 TCGOpcode opc = *list; 80 81 #ifdef CONFIG_DEBUG_TCG 82 switch (opc) { 83 case INDEX_op_and_vec: 84 case INDEX_op_or_vec: 85 case INDEX_op_xor_vec: 86 case INDEX_op_mov_vec: 87 case INDEX_op_dup_vec: 88 case INDEX_op_dup2_vec: 89 case INDEX_op_ld_vec: 90 case INDEX_op_st_vec: 91 case INDEX_op_bitsel_vec: 92 /* These opcodes are mandatory and should not be listed. */ 93 g_assert_not_reached(); 94 case INDEX_op_not_vec: 95 /* These opcodes have generic expansions using the above. */ 96 g_assert_not_reached(); 97 default: 98 break; 99 } 100 #endif 101 102 if (tcg_can_emit_vec_op(opc, type, vece)) { 103 continue; 104 } 105 106 /* 107 * The opcode list is created by front ends based on what they 108 * actually invoke. We must mirror the logic in the routines 109 * below for generic expansions using other opcodes. 110 */ 111 switch (opc) { 112 case INDEX_op_neg_vec: 113 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) { 114 continue; 115 } 116 break; 117 case INDEX_op_abs_vec: 118 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece) 119 && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0 120 || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0 121 || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) { 122 continue; 123 } 124 break; 125 case INDEX_op_usadd_vec: 126 if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) || 127 tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 128 continue; 129 } 130 break; 131 case INDEX_op_ussub_vec: 132 if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) || 133 tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 134 continue; 135 } 136 break; 137 case INDEX_op_cmpsel_vec: 138 case INDEX_op_smin_vec: 139 case INDEX_op_smax_vec: 140 case INDEX_op_umin_vec: 141 case INDEX_op_umax_vec: 142 if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 143 continue; 144 } 145 break; 146 default: 147 break; 148 } 149 return false; 150 } 151 return true; 152 } 153 154 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) 155 { 156 TCGOp *op = tcg_emit_op(opc, 2); 157 TCGOP_VECL(op) = type - TCG_TYPE_V64; 158 TCGOP_VECE(op) = vece; 159 op->args[0] = r; 160 op->args[1] = a; 161 } 162 163 void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, 164 TCGArg r, TCGArg a, TCGArg b) 165 { 166 TCGOp *op = tcg_emit_op(opc, 3); 167 TCGOP_VECL(op) = type - TCG_TYPE_V64; 168 TCGOP_VECE(op) = vece; 169 op->args[0] = r; 170 op->args[1] = a; 171 op->args[2] = b; 172 } 173 174 void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, 175 TCGArg r, TCGArg a, TCGArg b, TCGArg c) 176 { 177 TCGOp *op = tcg_emit_op(opc, 4); 178 TCGOP_VECL(op) = type - TCG_TYPE_V64; 179 TCGOP_VECE(op) = vece; 180 op->args[0] = r; 181 op->args[1] = a; 182 op->args[2] = b; 183 op->args[3] = c; 184 } 185 186 static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, 187 TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) 188 { 189 TCGOp *op = tcg_emit_op(opc, 6); 190 TCGOP_VECL(op) = type - TCG_TYPE_V64; 191 TCGOP_VECE(op) = vece; 192 op->args[0] = r; 193 op->args[1] = a; 194 op->args[2] = b; 195 op->args[3] = c; 196 op->args[4] = d; 197 op->args[5] = e; 198 } 199 200 static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a) 201 { 202 TCGTemp *rt = tcgv_vec_temp(r); 203 TCGTemp *at = tcgv_vec_temp(a); 204 TCGType type = rt->base_type; 205 206 /* Must enough inputs for the output. */ 207 tcg_debug_assert(at->base_type >= type); 208 vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at)); 209 } 210 211 static void vec_gen_op3(TCGOpcode opc, unsigned vece, 212 TCGv_vec r, TCGv_vec a, TCGv_vec b) 213 { 214 TCGTemp *rt = tcgv_vec_temp(r); 215 TCGTemp *at = tcgv_vec_temp(a); 216 TCGTemp *bt = tcgv_vec_temp(b); 217 TCGType type = rt->base_type; 218 219 /* Must enough inputs for the output. */ 220 tcg_debug_assert(at->base_type >= type); 221 tcg_debug_assert(bt->base_type >= type); 222 vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt)); 223 } 224 225 void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a) 226 { 227 if (r != a) { 228 vec_gen_op2(INDEX_op_mov_vec, 0, r, a); 229 } 230 } 231 232 TCGv_vec tcg_const_zeros_vec(TCGType type) 233 { 234 TCGv_vec ret = tcg_temp_new_vec(type); 235 tcg_gen_dupi_vec(MO_64, ret, 0); 236 return ret; 237 } 238 239 TCGv_vec tcg_const_ones_vec(TCGType type) 240 { 241 TCGv_vec ret = tcg_temp_new_vec(type); 242 tcg_gen_dupi_vec(MO_64, ret, -1); 243 return ret; 244 } 245 246 TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m) 247 { 248 TCGTemp *t = tcgv_vec_temp(m); 249 return tcg_const_zeros_vec(t->base_type); 250 } 251 252 TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m) 253 { 254 TCGTemp *t = tcgv_vec_temp(m); 255 return tcg_const_ones_vec(t->base_type); 256 } 257 258 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a) 259 { 260 TCGTemp *rt = tcgv_vec_temp(r); 261 tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a)); 262 } 263 264 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a) 265 { 266 TCGArg ri = tcgv_vec_arg(r); 267 TCGTemp *rt = arg_temp(ri); 268 TCGType type = rt->base_type; 269 270 if (TCG_TARGET_REG_BITS == 64) { 271 TCGArg ai = tcgv_i64_arg(a); 272 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 273 } else if (vece == MO_64) { 274 TCGArg al = tcgv_i32_arg(TCGV_LOW(a)); 275 TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a)); 276 vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah); 277 } else { 278 TCGArg ai = tcgv_i32_arg(TCGV_LOW(a)); 279 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 280 } 281 } 282 283 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a) 284 { 285 TCGArg ri = tcgv_vec_arg(r); 286 TCGArg ai = tcgv_i32_arg(a); 287 TCGTemp *rt = arg_temp(ri); 288 TCGType type = rt->base_type; 289 290 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 291 } 292 293 void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b, 294 tcg_target_long ofs) 295 { 296 TCGArg ri = tcgv_vec_arg(r); 297 TCGArg bi = tcgv_ptr_arg(b); 298 TCGTemp *rt = arg_temp(ri); 299 TCGType type = rt->base_type; 300 301 vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs); 302 } 303 304 static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o) 305 { 306 TCGArg ri = tcgv_vec_arg(r); 307 TCGArg bi = tcgv_ptr_arg(b); 308 TCGTemp *rt = arg_temp(ri); 309 TCGType type = rt->base_type; 310 311 vec_gen_3(opc, type, 0, ri, bi, o); 312 } 313 314 void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 315 { 316 vec_gen_ldst(INDEX_op_ld_vec, r, b, o); 317 } 318 319 void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 320 { 321 vec_gen_ldst(INDEX_op_st_vec, r, b, o); 322 } 323 324 void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type) 325 { 326 TCGArg ri = tcgv_vec_arg(r); 327 TCGArg bi = tcgv_ptr_arg(b); 328 TCGTemp *rt = arg_temp(ri); 329 TCGType type = rt->base_type; 330 331 tcg_debug_assert(low_type >= TCG_TYPE_V64); 332 tcg_debug_assert(low_type <= type); 333 vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o); 334 } 335 336 void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 337 { 338 vec_gen_op3(INDEX_op_and_vec, 0, r, a, b); 339 } 340 341 void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 342 { 343 vec_gen_op3(INDEX_op_or_vec, 0, r, a, b); 344 } 345 346 void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 347 { 348 vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b); 349 } 350 351 void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 352 { 353 if (TCG_TARGET_HAS_andc_vec) { 354 vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b); 355 } else { 356 TCGv_vec t = tcg_temp_new_vec_matching(r); 357 tcg_gen_not_vec(0, t, b); 358 tcg_gen_and_vec(0, r, a, t); 359 tcg_temp_free_vec(t); 360 } 361 } 362 363 void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 364 { 365 if (TCG_TARGET_HAS_orc_vec) { 366 vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b); 367 } else { 368 TCGv_vec t = tcg_temp_new_vec_matching(r); 369 tcg_gen_not_vec(0, t, b); 370 tcg_gen_or_vec(0, r, a, t); 371 tcg_temp_free_vec(t); 372 } 373 } 374 375 void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 376 { 377 if (TCG_TARGET_HAS_nand_vec) { 378 vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b); 379 } else { 380 tcg_gen_and_vec(0, r, a, b); 381 tcg_gen_not_vec(0, r, r); 382 } 383 } 384 385 void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 386 { 387 if (TCG_TARGET_HAS_nor_vec) { 388 vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b); 389 } else { 390 tcg_gen_or_vec(0, r, a, b); 391 tcg_gen_not_vec(0, r, r); 392 } 393 } 394 395 void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 396 { 397 if (TCG_TARGET_HAS_eqv_vec) { 398 vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b); 399 } else { 400 tcg_gen_xor_vec(0, r, a, b); 401 tcg_gen_not_vec(0, r, r); 402 } 403 } 404 405 static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) 406 { 407 TCGTemp *rt = tcgv_vec_temp(r); 408 TCGTemp *at = tcgv_vec_temp(a); 409 TCGArg ri = temp_arg(rt); 410 TCGArg ai = temp_arg(at); 411 TCGType type = rt->base_type; 412 int can; 413 414 tcg_debug_assert(at->base_type >= type); 415 tcg_assert_listed_vecop(opc); 416 can = tcg_can_emit_vec_op(opc, type, vece); 417 if (can > 0) { 418 vec_gen_2(opc, type, vece, ri, ai); 419 } else if (can < 0) { 420 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 421 tcg_expand_vec_op(opc, type, vece, ri, ai); 422 tcg_swap_vecop_list(hold_list); 423 } else { 424 return false; 425 } 426 return true; 427 } 428 429 void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 430 { 431 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 432 433 if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) { 434 TCGv_vec t = tcg_const_ones_vec_matching(r); 435 tcg_gen_xor_vec(0, r, a, t); 436 tcg_temp_free_vec(t); 437 } 438 tcg_swap_vecop_list(hold_list); 439 } 440 441 void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 442 { 443 const TCGOpcode *hold_list; 444 445 tcg_assert_listed_vecop(INDEX_op_neg_vec); 446 hold_list = tcg_swap_vecop_list(NULL); 447 448 if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) { 449 TCGv_vec t = tcg_const_zeros_vec_matching(r); 450 tcg_gen_sub_vec(vece, r, t, a); 451 tcg_temp_free_vec(t); 452 } 453 tcg_swap_vecop_list(hold_list); 454 } 455 456 void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 457 { 458 const TCGOpcode *hold_list; 459 460 tcg_assert_listed_vecop(INDEX_op_abs_vec); 461 hold_list = tcg_swap_vecop_list(NULL); 462 463 if (!do_op2(vece, r, a, INDEX_op_abs_vec)) { 464 TCGType type = tcgv_vec_temp(r)->base_type; 465 TCGv_vec t = tcg_temp_new_vec(type); 466 467 tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)); 468 if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) { 469 tcg_gen_neg_vec(vece, t, a); 470 tcg_gen_smax_vec(vece, r, a, t); 471 } else { 472 if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) { 473 tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1); 474 } else { 475 tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, 476 tcg_constant_vec(type, vece, 0)); 477 } 478 tcg_gen_xor_vec(vece, r, a, t); 479 tcg_gen_sub_vec(vece, r, r, t); 480 } 481 482 tcg_temp_free_vec(t); 483 } 484 tcg_swap_vecop_list(hold_list); 485 } 486 487 static void do_shifti(TCGOpcode opc, unsigned vece, 488 TCGv_vec r, TCGv_vec a, int64_t i) 489 { 490 TCGTemp *rt = tcgv_vec_temp(r); 491 TCGTemp *at = tcgv_vec_temp(a); 492 TCGArg ri = temp_arg(rt); 493 TCGArg ai = temp_arg(at); 494 TCGType type = rt->base_type; 495 int can; 496 497 tcg_debug_assert(at->base_type == type); 498 tcg_debug_assert(i >= 0 && i < (8 << vece)); 499 tcg_assert_listed_vecop(opc); 500 501 if (i == 0) { 502 tcg_gen_mov_vec(r, a); 503 return; 504 } 505 506 can = tcg_can_emit_vec_op(opc, type, vece); 507 if (can > 0) { 508 vec_gen_3(opc, type, vece, ri, ai, i); 509 } else { 510 /* We leave the choice of expansion via scalar or vector shift 511 to the target. Often, but not always, dupi can feed a vector 512 shift easier than a scalar. */ 513 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 514 tcg_debug_assert(can < 0); 515 tcg_expand_vec_op(opc, type, vece, ri, ai, i); 516 tcg_swap_vecop_list(hold_list); 517 } 518 } 519 520 void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 521 { 522 do_shifti(INDEX_op_shli_vec, vece, r, a, i); 523 } 524 525 void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 526 { 527 do_shifti(INDEX_op_shri_vec, vece, r, a, i); 528 } 529 530 void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 531 { 532 do_shifti(INDEX_op_sari_vec, vece, r, a, i); 533 } 534 535 void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 536 { 537 do_shifti(INDEX_op_rotli_vec, vece, r, a, i); 538 } 539 540 void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 541 { 542 int bits = 8 << vece; 543 tcg_debug_assert(i >= 0 && i < bits); 544 do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1)); 545 } 546 547 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, 548 TCGv_vec r, TCGv_vec a, TCGv_vec b) 549 { 550 TCGTemp *rt = tcgv_vec_temp(r); 551 TCGTemp *at = tcgv_vec_temp(a); 552 TCGTemp *bt = tcgv_vec_temp(b); 553 TCGArg ri = temp_arg(rt); 554 TCGArg ai = temp_arg(at); 555 TCGArg bi = temp_arg(bt); 556 TCGType type = rt->base_type; 557 int can; 558 559 tcg_debug_assert(at->base_type >= type); 560 tcg_debug_assert(bt->base_type >= type); 561 tcg_assert_listed_vecop(INDEX_op_cmp_vec); 562 can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); 563 if (can > 0) { 564 vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 565 } else { 566 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 567 tcg_debug_assert(can < 0); 568 tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 569 tcg_swap_vecop_list(hold_list); 570 } 571 } 572 573 static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, 574 TCGv_vec b, TCGOpcode opc) 575 { 576 TCGTemp *rt = tcgv_vec_temp(r); 577 TCGTemp *at = tcgv_vec_temp(a); 578 TCGTemp *bt = tcgv_vec_temp(b); 579 TCGArg ri = temp_arg(rt); 580 TCGArg ai = temp_arg(at); 581 TCGArg bi = temp_arg(bt); 582 TCGType type = rt->base_type; 583 int can; 584 585 tcg_debug_assert(at->base_type >= type); 586 tcg_debug_assert(bt->base_type >= type); 587 tcg_assert_listed_vecop(opc); 588 can = tcg_can_emit_vec_op(opc, type, vece); 589 if (can > 0) { 590 vec_gen_3(opc, type, vece, ri, ai, bi); 591 } else if (can < 0) { 592 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 593 tcg_expand_vec_op(opc, type, vece, ri, ai, bi); 594 tcg_swap_vecop_list(hold_list); 595 } else { 596 return false; 597 } 598 return true; 599 } 600 601 static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a, 602 TCGv_vec b, TCGOpcode opc) 603 { 604 bool ok = do_op3(vece, r, a, b, opc); 605 tcg_debug_assert(ok); 606 } 607 608 void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 609 { 610 do_op3_nofail(vece, r, a, b, INDEX_op_add_vec); 611 } 612 613 void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 614 { 615 do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec); 616 } 617 618 void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 619 { 620 do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec); 621 } 622 623 void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 624 { 625 do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec); 626 } 627 628 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 629 { 630 if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) { 631 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 632 TCGv_vec t = tcg_temp_new_vec_matching(r); 633 634 /* usadd(a, b) = min(a, ~b) + b */ 635 tcg_gen_not_vec(vece, t, b); 636 tcg_gen_umin_vec(vece, t, t, a); 637 tcg_gen_add_vec(vece, r, t, b); 638 639 tcg_temp_free_vec(t); 640 tcg_swap_vecop_list(hold_list); 641 } 642 } 643 644 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 645 { 646 do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec); 647 } 648 649 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 650 { 651 if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) { 652 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 653 TCGv_vec t = tcg_temp_new_vec_matching(r); 654 655 /* ussub(a, b) = max(a, b) - b */ 656 tcg_gen_umax_vec(vece, t, a, b); 657 tcg_gen_sub_vec(vece, r, t, b); 658 659 tcg_temp_free_vec(t); 660 tcg_swap_vecop_list(hold_list); 661 } 662 } 663 664 static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, 665 TCGv_vec b, TCGOpcode opc, TCGCond cond) 666 { 667 if (!do_op3(vece, r, a, b, opc)) { 668 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 669 tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b); 670 tcg_swap_vecop_list(hold_list); 671 } 672 } 673 674 void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 675 { 676 do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT); 677 } 678 679 void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 680 { 681 do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU); 682 } 683 684 void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 685 { 686 do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT); 687 } 688 689 void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 690 { 691 do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU); 692 } 693 694 void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 695 { 696 do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec); 697 } 698 699 void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 700 { 701 do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec); 702 } 703 704 void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 705 { 706 do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec); 707 } 708 709 void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 710 { 711 do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec); 712 } 713 714 void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 715 { 716 do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec); 717 } 718 719 static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, 720 TCGv_i32 s, TCGOpcode opc) 721 { 722 TCGTemp *rt = tcgv_vec_temp(r); 723 TCGTemp *at = tcgv_vec_temp(a); 724 TCGTemp *st = tcgv_i32_temp(s); 725 TCGArg ri = temp_arg(rt); 726 TCGArg ai = temp_arg(at); 727 TCGArg si = temp_arg(st); 728 TCGType type = rt->base_type; 729 int can; 730 731 tcg_debug_assert(at->base_type >= type); 732 tcg_assert_listed_vecop(opc); 733 can = tcg_can_emit_vec_op(opc, type, vece); 734 if (can > 0) { 735 vec_gen_3(opc, type, vece, ri, ai, si); 736 } else if (can < 0) { 737 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 738 tcg_expand_vec_op(opc, type, vece, ri, ai, si); 739 tcg_swap_vecop_list(hold_list); 740 } else { 741 g_assert_not_reached(); 742 } 743 } 744 745 void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 746 { 747 do_shifts(vece, r, a, b, INDEX_op_shls_vec); 748 } 749 750 void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 751 { 752 do_shifts(vece, r, a, b, INDEX_op_shrs_vec); 753 } 754 755 void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 756 { 757 do_shifts(vece, r, a, b, INDEX_op_sars_vec); 758 } 759 760 void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s) 761 { 762 do_shifts(vece, r, a, s, INDEX_op_rotls_vec); 763 } 764 765 void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, 766 TCGv_vec b, TCGv_vec c) 767 { 768 TCGTemp *rt = tcgv_vec_temp(r); 769 TCGTemp *at = tcgv_vec_temp(a); 770 TCGTemp *bt = tcgv_vec_temp(b); 771 TCGTemp *ct = tcgv_vec_temp(c); 772 TCGType type = rt->base_type; 773 774 tcg_debug_assert(at->base_type >= type); 775 tcg_debug_assert(bt->base_type >= type); 776 tcg_debug_assert(ct->base_type >= type); 777 778 if (TCG_TARGET_HAS_bitsel_vec) { 779 vec_gen_4(INDEX_op_bitsel_vec, type, MO_8, 780 temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct)); 781 } else { 782 TCGv_vec t = tcg_temp_new_vec(type); 783 tcg_gen_and_vec(MO_8, t, a, b); 784 tcg_gen_andc_vec(MO_8, r, c, a); 785 tcg_gen_or_vec(MO_8, r, r, t); 786 tcg_temp_free_vec(t); 787 } 788 } 789 790 void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, 791 TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d) 792 { 793 TCGTemp *rt = tcgv_vec_temp(r); 794 TCGTemp *at = tcgv_vec_temp(a); 795 TCGTemp *bt = tcgv_vec_temp(b); 796 TCGTemp *ct = tcgv_vec_temp(c); 797 TCGTemp *dt = tcgv_vec_temp(d); 798 TCGArg ri = temp_arg(rt); 799 TCGArg ai = temp_arg(at); 800 TCGArg bi = temp_arg(bt); 801 TCGArg ci = temp_arg(ct); 802 TCGArg di = temp_arg(dt); 803 TCGType type = rt->base_type; 804 const TCGOpcode *hold_list; 805 int can; 806 807 tcg_debug_assert(at->base_type >= type); 808 tcg_debug_assert(bt->base_type >= type); 809 tcg_debug_assert(ct->base_type >= type); 810 tcg_debug_assert(dt->base_type >= type); 811 812 tcg_assert_listed_vecop(INDEX_op_cmpsel_vec); 813 hold_list = tcg_swap_vecop_list(NULL); 814 can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece); 815 816 if (can > 0) { 817 vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond); 818 } else if (can < 0) { 819 tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece, 820 ri, ai, bi, ci, di, cond); 821 } else { 822 TCGv_vec t = tcg_temp_new_vec(type); 823 tcg_gen_cmp_vec(cond, vece, t, a, b); 824 tcg_gen_bitsel_vec(vece, r, t, c, d); 825 tcg_temp_free_vec(t); 826 } 827 tcg_swap_vecop_list(hold_list); 828 } 829