1 // SPDX-License-Identifier: GPL-2.0 2 /* visemul.c: Emulation of VIS instructions. 3 * 4 * Copyright (C) 2006 David S. Miller (davem@davemloft.net) 5 */ 6 #include <linux/kernel.h> 7 #include <linux/errno.h> 8 #include <linux/thread_info.h> 9 #include <linux/perf_event.h> 10 11 #include <asm/ptrace.h> 12 #include <asm/pstate.h> 13 #include <asm/fpumacro.h> 14 #include <linux/uaccess.h> 15 #include <asm/cacheflush.h> 16 17 /* OPF field of various VIS instructions. */ 18 19 /* 000111011 - four 16-bit packs */ 20 #define FPACK16_OPF 0x03b 21 22 /* 000111010 - two 32-bit packs */ 23 #define FPACK32_OPF 0x03a 24 25 /* 000111101 - four 16-bit packs */ 26 #define FPACKFIX_OPF 0x03d 27 28 /* 001001101 - four 16-bit expands */ 29 #define FEXPAND_OPF 0x04d 30 31 /* 001001011 - two 32-bit merges */ 32 #define FPMERGE_OPF 0x04b 33 34 /* 000110001 - 8-by-16-bit partitioned product */ 35 #define FMUL8x16_OPF 0x031 36 37 /* 000110011 - 8-by-16-bit upper alpha partitioned product */ 38 #define FMUL8x16AU_OPF 0x033 39 40 /* 000110101 - 8-by-16-bit lower alpha partitioned product */ 41 #define FMUL8x16AL_OPF 0x035 42 43 /* 000110110 - upper 8-by-16-bit partitioned product */ 44 #define FMUL8SUx16_OPF 0x036 45 46 /* 000110111 - lower 8-by-16-bit partitioned product */ 47 #define FMUL8ULx16_OPF 0x037 48 49 /* 000111000 - upper 8-by-16-bit partitioned product */ 50 #define FMULD8SUx16_OPF 0x038 51 52 /* 000111001 - lower unsigned 8-by-16-bit partitioned product */ 53 #define FMULD8ULx16_OPF 0x039 54 55 /* 000101000 - four 16-bit compare; set rd if src1 > src2 */ 56 #define FCMPGT16_OPF 0x028 57 58 /* 000101100 - two 32-bit compare; set rd if src1 > src2 */ 59 #define FCMPGT32_OPF 0x02c 60 61 /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */ 62 #define FCMPLE16_OPF 0x020 63 64 /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */ 65 #define FCMPLE32_OPF 0x024 66 67 /* 000100010 - four 16-bit compare; set rd if src1 != src2 */ 68 #define FCMPNE16_OPF 0x022 69 70 /* 000100110 - two 32-bit compare; set rd if src1 != src2 */ 71 #define FCMPNE32_OPF 0x026 72 73 /* 000101010 - four 16-bit compare; set rd if src1 == src2 */ 74 #define FCMPEQ16_OPF 0x02a 75 76 /* 000101110 - two 32-bit compare; set rd if src1 == src2 */ 77 #define FCMPEQ32_OPF 0x02e 78 79 /* 000000000 - Eight 8-bit edge boundary processing */ 80 #define EDGE8_OPF 0x000 81 82 /* 000000001 - Eight 8-bit edge boundary processing, no CC */ 83 #define EDGE8N_OPF 0x001 84 85 /* 000000010 - Eight 8-bit edge boundary processing, little-endian */ 86 #define EDGE8L_OPF 0x002 87 88 /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */ 89 #define EDGE8LN_OPF 0x003 90 91 /* 000000100 - Four 16-bit edge boundary processing */ 92 #define EDGE16_OPF 0x004 93 94 /* 000000101 - Four 16-bit edge boundary processing, no CC */ 95 #define EDGE16N_OPF 0x005 96 97 /* 000000110 - Four 16-bit edge boundary processing, little-endian */ 98 #define EDGE16L_OPF 0x006 99 100 /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */ 101 #define EDGE16LN_OPF 0x007 102 103 /* 000001000 - Two 32-bit edge boundary processing */ 104 #define EDGE32_OPF 0x008 105 106 /* 000001001 - Two 32-bit edge boundary processing, no CC */ 107 #define EDGE32N_OPF 0x009 108 109 /* 000001010 - Two 32-bit edge boundary processing, little-endian */ 110 #define EDGE32L_OPF 0x00a 111 112 /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */ 113 #define EDGE32LN_OPF 0x00b 114 115 /* 000111110 - distance between 8 8-bit components */ 116 #define PDIST_OPF 0x03e 117 118 /* 000010000 - convert 8-bit 3-D address to blocked byte address */ 119 #define ARRAY8_OPF 0x010 120 121 /* 000010010 - convert 16-bit 3-D address to blocked byte address */ 122 #define ARRAY16_OPF 0x012 123 124 /* 000010100 - convert 32-bit 3-D address to blocked byte address */ 125 #define ARRAY32_OPF 0x014 126 127 /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */ 128 #define BMASK_OPF 0x019 129 130 /* 001001100 - Permute bytes as specified by GSR.MASK */ 131 #define BSHUFFLE_OPF 0x04c 132 133 #define VIS_OPF_SHIFT 5 134 #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT) 135 136 #define RS1(INSN) (((INSN) >> 14) & 0x1f) 137 #define RS2(INSN) (((INSN) >> 0) & 0x1f) 138 #define RD(INSN) (((INSN) >> 25) & 0x1f) 139 140 static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, 141 unsigned int rd, int from_kernel) 142 { 143 if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { 144 if (from_kernel != 0) 145 __asm__ __volatile__("flushw"); 146 else 147 flushw_user(); 148 } 149 } 150 151 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) 152 { 153 unsigned long value, fp; 154 155 if (reg < 16) 156 return (!reg ? 0 : regs->u_regs[reg]); 157 158 fp = regs->u_regs[UREG_FP]; 159 160 if (regs->tstate & TSTATE_PRIV) { 161 struct reg_window *win; 162 win = (struct reg_window *)(fp + STACK_BIAS); 163 value = win->locals[reg - 16]; 164 } else if (!test_thread_64bit_stack(fp)) { 165 struct reg_window32 __user *win32; 166 win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); 167 get_user(value, &win32->locals[reg - 16]); 168 } else { 169 struct reg_window __user *win; 170 win = (struct reg_window __user *)(fp + STACK_BIAS); 171 get_user(value, &win->locals[reg - 16]); 172 } 173 return value; 174 } 175 176 static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, 177 struct pt_regs *regs) 178 { 179 unsigned long fp = regs->u_regs[UREG_FP]; 180 181 BUG_ON(reg < 16); 182 BUG_ON(regs->tstate & TSTATE_PRIV); 183 184 if (!test_thread_64bit_stack(fp)) { 185 struct reg_window32 __user *win32; 186 win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); 187 return (unsigned long __user *)&win32->locals[reg - 16]; 188 } else { 189 struct reg_window __user *win; 190 win = (struct reg_window __user *)(fp + STACK_BIAS); 191 return &win->locals[reg - 16]; 192 } 193 } 194 195 static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg, 196 struct pt_regs *regs) 197 { 198 BUG_ON(reg >= 16); 199 BUG_ON(regs->tstate & TSTATE_PRIV); 200 201 return ®s->u_regs[reg]; 202 } 203 204 static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) 205 { 206 if (rd < 16) { 207 unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs); 208 209 *rd_kern = val; 210 } else { 211 unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); 212 213 if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) 214 __put_user((u32)val, (u32 __user *)rd_user); 215 else 216 __put_user(val, rd_user); 217 } 218 } 219 220 static inline unsigned long fpd_regval(struct fpustate *f, 221 unsigned int insn_regnum) 222 { 223 insn_regnum = (((insn_regnum & 1) << 5) | 224 (insn_regnum & 0x1e)); 225 226 return *(unsigned long *) &f->regs[insn_regnum]; 227 } 228 229 static inline unsigned long *fpd_regaddr(struct fpustate *f, 230 unsigned int insn_regnum) 231 { 232 insn_regnum = (((insn_regnum & 1) << 5) | 233 (insn_regnum & 0x1e)); 234 235 return (unsigned long *) &f->regs[insn_regnum]; 236 } 237 238 static inline unsigned int fps_regval(struct fpustate *f, 239 unsigned int insn_regnum) 240 { 241 return f->regs[insn_regnum]; 242 } 243 244 static inline unsigned int *fps_regaddr(struct fpustate *f, 245 unsigned int insn_regnum) 246 { 247 return &f->regs[insn_regnum]; 248 } 249 250 struct edge_tab { 251 u16 left, right; 252 }; 253 static struct edge_tab edge8_tab[8] = { 254 { 0xff, 0x80 }, 255 { 0x7f, 0xc0 }, 256 { 0x3f, 0xe0 }, 257 { 0x1f, 0xf0 }, 258 { 0x0f, 0xf8 }, 259 { 0x07, 0xfc }, 260 { 0x03, 0xfe }, 261 { 0x01, 0xff }, 262 }; 263 static struct edge_tab edge8_tab_l[8] = { 264 { 0xff, 0x01 }, 265 { 0xfe, 0x03 }, 266 { 0xfc, 0x07 }, 267 { 0xf8, 0x0f }, 268 { 0xf0, 0x1f }, 269 { 0xe0, 0x3f }, 270 { 0xc0, 0x7f }, 271 { 0x80, 0xff }, 272 }; 273 static struct edge_tab edge16_tab[4] = { 274 { 0xf, 0x8 }, 275 { 0x7, 0xc }, 276 { 0x3, 0xe }, 277 { 0x1, 0xf }, 278 }; 279 static struct edge_tab edge16_tab_l[4] = { 280 { 0xf, 0x1 }, 281 { 0xe, 0x3 }, 282 { 0xc, 0x7 }, 283 { 0x8, 0xf }, 284 }; 285 static struct edge_tab edge32_tab[2] = { 286 { 0x3, 0x2 }, 287 { 0x1, 0x3 }, 288 }; 289 static struct edge_tab edge32_tab_l[2] = { 290 { 0x3, 0x1 }, 291 { 0x2, 0x3 }, 292 }; 293 294 static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf) 295 { 296 unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val; 297 u16 left, right; 298 299 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); 300 orig_rs1 = rs1 = fetch_reg(RS1(insn), regs); 301 orig_rs2 = rs2 = fetch_reg(RS2(insn), regs); 302 303 if (test_thread_flag(TIF_32BIT)) { 304 rs1 = rs1 & 0xffffffff; 305 rs2 = rs2 & 0xffffffff; 306 } 307 switch (opf) { 308 default: 309 case EDGE8_OPF: 310 case EDGE8N_OPF: 311 left = edge8_tab[rs1 & 0x7].left; 312 right = edge8_tab[rs2 & 0x7].right; 313 break; 314 case EDGE8L_OPF: 315 case EDGE8LN_OPF: 316 left = edge8_tab_l[rs1 & 0x7].left; 317 right = edge8_tab_l[rs2 & 0x7].right; 318 break; 319 320 case EDGE16_OPF: 321 case EDGE16N_OPF: 322 left = edge16_tab[(rs1 >> 1) & 0x3].left; 323 right = edge16_tab[(rs2 >> 1) & 0x3].right; 324 break; 325 326 case EDGE16L_OPF: 327 case EDGE16LN_OPF: 328 left = edge16_tab_l[(rs1 >> 1) & 0x3].left; 329 right = edge16_tab_l[(rs2 >> 1) & 0x3].right; 330 break; 331 332 case EDGE32_OPF: 333 case EDGE32N_OPF: 334 left = edge32_tab[(rs1 >> 2) & 0x1].left; 335 right = edge32_tab[(rs2 >> 2) & 0x1].right; 336 break; 337 338 case EDGE32L_OPF: 339 case EDGE32LN_OPF: 340 left = edge32_tab_l[(rs1 >> 2) & 0x1].left; 341 right = edge32_tab_l[(rs2 >> 2) & 0x1].right; 342 break; 343 } 344 345 if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL)) 346 rd_val = right & left; 347 else 348 rd_val = left; 349 350 store_reg(regs, rd_val, RD(insn)); 351 352 switch (opf) { 353 case EDGE8_OPF: 354 case EDGE8L_OPF: 355 case EDGE16_OPF: 356 case EDGE16L_OPF: 357 case EDGE32_OPF: 358 case EDGE32L_OPF: { 359 unsigned long ccr, tstate; 360 361 __asm__ __volatile__("subcc %1, %2, %%g0\n\t" 362 "rd %%ccr, %0" 363 : "=r" (ccr) 364 : "r" (orig_rs1), "r" (orig_rs2) 365 : "cc"); 366 tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC); 367 regs->tstate = tstate | (ccr << 32UL); 368 } 369 } 370 } 371 372 static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf) 373 { 374 unsigned long rs1, rs2, rd_val; 375 unsigned int bits, bits_mask; 376 377 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); 378 rs1 = fetch_reg(RS1(insn), regs); 379 rs2 = fetch_reg(RS2(insn), regs); 380 381 bits = (rs2 > 5 ? 5 : rs2); 382 bits_mask = (1UL << bits) - 1UL; 383 384 rd_val = ((((rs1 >> 11) & 0x3) << 0) | 385 (((rs1 >> 33) & 0x3) << 2) | 386 (((rs1 >> 55) & 0x1) << 4) | 387 (((rs1 >> 13) & 0xf) << 5) | 388 (((rs1 >> 35) & 0xf) << 9) | 389 (((rs1 >> 56) & 0xf) << 13) | 390 (((rs1 >> 17) & bits_mask) << 17) | 391 (((rs1 >> 39) & bits_mask) << (17 + bits)) | 392 (((rs1 >> 60) & 0xf) << (17 + (2*bits)))); 393 394 switch (opf) { 395 case ARRAY16_OPF: 396 rd_val <<= 1; 397 break; 398 399 case ARRAY32_OPF: 400 rd_val <<= 2; 401 } 402 403 store_reg(regs, rd_val, RD(insn)); 404 } 405 406 static void bmask(struct pt_regs *regs, unsigned int insn) 407 { 408 unsigned long rs1, rs2, rd_val, gsr; 409 410 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); 411 rs1 = fetch_reg(RS1(insn), regs); 412 rs2 = fetch_reg(RS2(insn), regs); 413 rd_val = rs1 + rs2; 414 415 store_reg(regs, rd_val, RD(insn)); 416 417 gsr = current_thread_info()->gsr[0] & 0xffffffff; 418 gsr |= rd_val << 32UL; 419 current_thread_info()->gsr[0] = gsr; 420 } 421 422 static void bshuffle(struct pt_regs *regs, unsigned int insn) 423 { 424 struct fpustate *f = FPUSTATE; 425 unsigned long rs1, rs2, rd_val; 426 unsigned long bmask, i; 427 428 bmask = current_thread_info()->gsr[0] >> 32UL; 429 430 rs1 = fpd_regval(f, RS1(insn)); 431 rs2 = fpd_regval(f, RS2(insn)); 432 433 rd_val = 0UL; 434 for (i = 0; i < 8; i++) { 435 unsigned long which = (bmask >> (i * 4)) & 0xf; 436 unsigned long byte; 437 438 if (which < 8) 439 byte = (rs1 >> (which * 8)) & 0xff; 440 else 441 byte = (rs2 >> ((which-8)*8)) & 0xff; 442 rd_val |= (byte << (i * 8)); 443 } 444 445 *fpd_regaddr(f, RD(insn)) = rd_val; 446 } 447 448 static void pdist(struct pt_regs *regs, unsigned int insn) 449 { 450 struct fpustate *f = FPUSTATE; 451 unsigned long rs1, rs2, *rd, rd_val; 452 unsigned long i; 453 454 rs1 = fpd_regval(f, RS1(insn)); 455 rs2 = fpd_regval(f, RS2(insn)); 456 rd = fpd_regaddr(f, RD(insn)); 457 458 rd_val = *rd; 459 460 for (i = 0; i < 8; i++) { 461 s16 s1, s2; 462 463 s1 = (rs1 >> (56 - (i * 8))) & 0xff; 464 s2 = (rs2 >> (56 - (i * 8))) & 0xff; 465 466 /* Absolute value of difference. */ 467 s1 -= s2; 468 if (s1 < 0) 469 s1 = ~s1 + 1; 470 471 rd_val += s1; 472 } 473 474 *rd = rd_val; 475 } 476 477 static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf) 478 { 479 struct fpustate *f = FPUSTATE; 480 unsigned long rs1, rs2, gsr, scale, rd_val; 481 482 gsr = current_thread_info()->gsr[0]; 483 scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f); 484 switch (opf) { 485 case FPACK16_OPF: { 486 unsigned long byte; 487 488 rs2 = fpd_regval(f, RS2(insn)); 489 rd_val = 0; 490 for (byte = 0; byte < 4; byte++) { 491 unsigned int val; 492 s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL; 493 int scaled = src << scale; 494 int from_fixed = scaled >> 7; 495 496 val = ((from_fixed < 0) ? 497 0 : 498 (from_fixed > 255) ? 499 255 : from_fixed); 500 501 rd_val |= (val << (8 * byte)); 502 } 503 *fps_regaddr(f, RD(insn)) = rd_val; 504 break; 505 } 506 507 case FPACK32_OPF: { 508 unsigned long word; 509 510 rs1 = fpd_regval(f, RS1(insn)); 511 rs2 = fpd_regval(f, RS2(insn)); 512 rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL); 513 for (word = 0; word < 2; word++) { 514 unsigned long val; 515 s32 src = (rs2 >> (word * 32UL)); 516 s64 scaled = src << scale; 517 s64 from_fixed = scaled >> 23; 518 519 val = ((from_fixed < 0) ? 520 0 : 521 (from_fixed > 255) ? 522 255 : from_fixed); 523 524 rd_val |= (val << (32 * word)); 525 } 526 *fpd_regaddr(f, RD(insn)) = rd_val; 527 break; 528 } 529 530 case FPACKFIX_OPF: { 531 unsigned long word; 532 533 rs2 = fpd_regval(f, RS2(insn)); 534 535 rd_val = 0; 536 for (word = 0; word < 2; word++) { 537 long val; 538 s32 src = (rs2 >> (word * 32UL)); 539 s64 scaled = src << scale; 540 s64 from_fixed = scaled >> 16; 541 542 val = ((from_fixed < -32768) ? 543 -32768 : 544 (from_fixed > 32767) ? 545 32767 : from_fixed); 546 547 rd_val |= ((val & 0xffff) << (word * 16)); 548 } 549 *fps_regaddr(f, RD(insn)) = rd_val; 550 break; 551 } 552 553 case FEXPAND_OPF: { 554 unsigned long byte; 555 556 rs2 = fps_regval(f, RS2(insn)); 557 558 rd_val = 0; 559 for (byte = 0; byte < 4; byte++) { 560 unsigned long val; 561 u8 src = (rs2 >> (byte * 8)) & 0xff; 562 563 val = src << 4; 564 565 rd_val |= (val << (byte * 16)); 566 } 567 *fpd_regaddr(f, RD(insn)) = rd_val; 568 break; 569 } 570 571 case FPMERGE_OPF: { 572 rs1 = fps_regval(f, RS1(insn)); 573 rs2 = fps_regval(f, RS2(insn)); 574 575 rd_val = (((rs2 & 0x000000ff) << 0) | 576 ((rs1 & 0x000000ff) << 8) | 577 ((rs2 & 0x0000ff00) << 8) | 578 ((rs1 & 0x0000ff00) << 16) | 579 ((rs2 & 0x00ff0000) << 16) | 580 ((rs1 & 0x00ff0000) << 24) | 581 ((rs2 & 0xff000000) << 24) | 582 ((rs1 & 0xff000000) << 32)); 583 *fpd_regaddr(f, RD(insn)) = rd_val; 584 break; 585 } 586 } 587 } 588 589 static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) 590 { 591 struct fpustate *f = FPUSTATE; 592 unsigned long rs1, rs2, rd_val; 593 594 switch (opf) { 595 case FMUL8x16_OPF: { 596 unsigned long byte; 597 598 rs1 = fps_regval(f, RS1(insn)); 599 rs2 = fpd_regval(f, RS2(insn)); 600 601 rd_val = 0; 602 for (byte = 0; byte < 4; byte++) { 603 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; 604 s16 src2 = (rs2 >> (byte * 16)) & 0xffff; 605 u32 prod = src1 * src2; 606 u16 scaled = ((prod & 0x00ffff00) >> 8); 607 608 /* Round up. */ 609 if (prod & 0x80) 610 scaled++; 611 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); 612 } 613 614 *fpd_regaddr(f, RD(insn)) = rd_val; 615 break; 616 } 617 618 case FMUL8x16AU_OPF: 619 case FMUL8x16AL_OPF: { 620 unsigned long byte; 621 s16 src2; 622 623 rs1 = fps_regval(f, RS1(insn)); 624 rs2 = fps_regval(f, RS2(insn)); 625 626 rd_val = 0; 627 src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0); 628 for (byte = 0; byte < 4; byte++) { 629 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; 630 u32 prod = src1 * src2; 631 u16 scaled = ((prod & 0x00ffff00) >> 8); 632 633 /* Round up. */ 634 if (prod & 0x80) 635 scaled++; 636 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); 637 } 638 639 *fpd_regaddr(f, RD(insn)) = rd_val; 640 break; 641 } 642 643 case FMUL8SUx16_OPF: 644 case FMUL8ULx16_OPF: { 645 unsigned long byte, ushift; 646 647 rs1 = fpd_regval(f, RS1(insn)); 648 rs2 = fpd_regval(f, RS2(insn)); 649 650 rd_val = 0; 651 ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0; 652 for (byte = 0; byte < 4; byte++) { 653 u16 src1; 654 s16 src2; 655 u32 prod; 656 u16 scaled; 657 658 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); 659 src2 = ((rs2 >> (16 * byte)) & 0xffff); 660 prod = src1 * src2; 661 scaled = ((prod & 0x00ffff00) >> 8); 662 663 /* Round up. */ 664 if (prod & 0x80) 665 scaled++; 666 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); 667 } 668 669 *fpd_regaddr(f, RD(insn)) = rd_val; 670 break; 671 } 672 673 case FMULD8SUx16_OPF: 674 case FMULD8ULx16_OPF: { 675 unsigned long byte, ushift; 676 677 rs1 = fps_regval(f, RS1(insn)); 678 rs2 = fps_regval(f, RS2(insn)); 679 680 rd_val = 0; 681 ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0; 682 for (byte = 0; byte < 2; byte++) { 683 u16 src1; 684 s16 src2; 685 u32 prod; 686 u16 scaled; 687 688 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); 689 src2 = ((rs2 >> (16 * byte)) & 0xffff); 690 prod = src1 * src2; 691 scaled = ((prod & 0x00ffff00) >> 8); 692 693 /* Round up. */ 694 if (prod & 0x80) 695 scaled++; 696 rd_val |= ((scaled & 0xffffUL) << 697 ((byte * 32UL) + 7UL)); 698 } 699 *fpd_regaddr(f, RD(insn)) = rd_val; 700 break; 701 } 702 } 703 } 704 705 static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf) 706 { 707 struct fpustate *f = FPUSTATE; 708 unsigned long rs1, rs2, rd_val, i; 709 710 rs1 = fpd_regval(f, RS1(insn)); 711 rs2 = fpd_regval(f, RS2(insn)); 712 713 rd_val = 0; 714 715 switch (opf) { 716 case FCMPGT16_OPF: 717 for (i = 0; i < 4; i++) { 718 s16 a = (rs1 >> (i * 16)) & 0xffff; 719 s16 b = (rs2 >> (i * 16)) & 0xffff; 720 721 if (a > b) 722 rd_val |= 8 >> i; 723 } 724 break; 725 726 case FCMPGT32_OPF: 727 for (i = 0; i < 2; i++) { 728 s32 a = (rs1 >> (i * 32)) & 0xffffffff; 729 s32 b = (rs2 >> (i * 32)) & 0xffffffff; 730 731 if (a > b) 732 rd_val |= 2 >> i; 733 } 734 break; 735 736 case FCMPLE16_OPF: 737 for (i = 0; i < 4; i++) { 738 s16 a = (rs1 >> (i * 16)) & 0xffff; 739 s16 b = (rs2 >> (i * 16)) & 0xffff; 740 741 if (a <= b) 742 rd_val |= 8 >> i; 743 } 744 break; 745 746 case FCMPLE32_OPF: 747 for (i = 0; i < 2; i++) { 748 s32 a = (rs1 >> (i * 32)) & 0xffffffff; 749 s32 b = (rs2 >> (i * 32)) & 0xffffffff; 750 751 if (a <= b) 752 rd_val |= 2 >> i; 753 } 754 break; 755 756 case FCMPNE16_OPF: 757 for (i = 0; i < 4; i++) { 758 s16 a = (rs1 >> (i * 16)) & 0xffff; 759 s16 b = (rs2 >> (i * 16)) & 0xffff; 760 761 if (a != b) 762 rd_val |= 8 >> i; 763 } 764 break; 765 766 case FCMPNE32_OPF: 767 for (i = 0; i < 2; i++) { 768 s32 a = (rs1 >> (i * 32)) & 0xffffffff; 769 s32 b = (rs2 >> (i * 32)) & 0xffffffff; 770 771 if (a != b) 772 rd_val |= 2 >> i; 773 } 774 break; 775 776 case FCMPEQ16_OPF: 777 for (i = 0; i < 4; i++) { 778 s16 a = (rs1 >> (i * 16)) & 0xffff; 779 s16 b = (rs2 >> (i * 16)) & 0xffff; 780 781 if (a == b) 782 rd_val |= 8 >> i; 783 } 784 break; 785 786 case FCMPEQ32_OPF: 787 for (i = 0; i < 2; i++) { 788 s32 a = (rs1 >> (i * 32)) & 0xffffffff; 789 s32 b = (rs2 >> (i * 32)) & 0xffffffff; 790 791 if (a == b) 792 rd_val |= 2 >> i; 793 } 794 break; 795 } 796 797 maybe_flush_windows(0, 0, RD(insn), 0); 798 store_reg(regs, rd_val, RD(insn)); 799 } 800 801 /* Emulate the VIS instructions which are not implemented in 802 * hardware on Niagara. 803 */ 804 int vis_emul(struct pt_regs *regs, unsigned int insn) 805 { 806 unsigned long pc = regs->tpc; 807 unsigned int opf; 808 809 BUG_ON(regs->tstate & TSTATE_PRIV); 810 811 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); 812 813 if (test_thread_flag(TIF_32BIT)) 814 pc = (u32)pc; 815 816 if (get_user(insn, (u32 __user *) pc)) 817 return -EFAULT; 818 819 save_and_clear_fpu(); 820 821 opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT; 822 switch (opf) { 823 default: 824 return -EINVAL; 825 826 /* Pixel Formatting Instructions. */ 827 case FPACK16_OPF: 828 case FPACK32_OPF: 829 case FPACKFIX_OPF: 830 case FEXPAND_OPF: 831 case FPMERGE_OPF: 832 pformat(regs, insn, opf); 833 break; 834 835 /* Partitioned Multiply Instructions */ 836 case FMUL8x16_OPF: 837 case FMUL8x16AU_OPF: 838 case FMUL8x16AL_OPF: 839 case FMUL8SUx16_OPF: 840 case FMUL8ULx16_OPF: 841 case FMULD8SUx16_OPF: 842 case FMULD8ULx16_OPF: 843 pmul(regs, insn, opf); 844 break; 845 846 /* Pixel Compare Instructions */ 847 case FCMPGT16_OPF: 848 case FCMPGT32_OPF: 849 case FCMPLE16_OPF: 850 case FCMPLE32_OPF: 851 case FCMPNE16_OPF: 852 case FCMPNE32_OPF: 853 case FCMPEQ16_OPF: 854 case FCMPEQ32_OPF: 855 pcmp(regs, insn, opf); 856 break; 857 858 /* Edge Handling Instructions */ 859 case EDGE8_OPF: 860 case EDGE8N_OPF: 861 case EDGE8L_OPF: 862 case EDGE8LN_OPF: 863 case EDGE16_OPF: 864 case EDGE16N_OPF: 865 case EDGE16L_OPF: 866 case EDGE16LN_OPF: 867 case EDGE32_OPF: 868 case EDGE32N_OPF: 869 case EDGE32L_OPF: 870 case EDGE32LN_OPF: 871 edge(regs, insn, opf); 872 break; 873 874 /* Pixel Component Distance */ 875 case PDIST_OPF: 876 pdist(regs, insn); 877 break; 878 879 /* Three-Dimensional Array Addressing Instructions */ 880 case ARRAY8_OPF: 881 case ARRAY16_OPF: 882 case ARRAY32_OPF: 883 array(regs, insn, opf); 884 break; 885 886 /* Byte Mask and Shuffle Instructions */ 887 case BMASK_OPF: 888 bmask(regs, insn); 889 break; 890 891 case BSHUFFLE_OPF: 892 bshuffle(regs, insn); 893 break; 894 } 895 896 regs->tpc = regs->tnpc; 897 regs->tnpc += 4; 898 return 0; 899 } 900