1 /* visemul.c: Emulation of VIS instructions. 2 * 3 * Copyright (C) 2006 David S. Miller (davem@davemloft.net) 4 */ 5 #include <linux/kernel.h> 6 #include <linux/errno.h> 7 #include <linux/thread_info.h> 8 #include <linux/perf_event.h> 9 10 #include <asm/ptrace.h> 11 #include <asm/pstate.h> 12 #include <asm/system.h> 13 #include <asm/fpumacro.h> 14 #include <asm/uaccess.h> 15 16 /* OPF field of various VIS instructions. */ 17 18 /* 000111011 - four 16-bit packs */ 19 #define FPACK16_OPF 0x03b 20 21 /* 000111010 - two 32-bit packs */ 22 #define FPACK32_OPF 0x03a 23 24 /* 000111101 - four 16-bit packs */ 25 #define FPACKFIX_OPF 0x03d 26 27 /* 001001101 - four 16-bit expands */ 28 #define FEXPAND_OPF 0x04d 29 30 /* 001001011 - two 32-bit merges */ 31 #define FPMERGE_OPF 0x04b 32 33 /* 000110001 - 8-by-16-bit partitoned product */ 34 #define FMUL8x16_OPF 0x031 35 36 /* 000110011 - 8-by-16-bit upper alpha partitioned product */ 37 #define FMUL8x16AU_OPF 0x033 38 39 /* 000110101 - 8-by-16-bit lower alpha partitioned product */ 40 #define FMUL8x16AL_OPF 0x035 41 42 /* 000110110 - upper 8-by-16-bit partitioned product */ 43 #define FMUL8SUx16_OPF 0x036 44 45 /* 000110111 - lower 8-by-16-bit partitioned product */ 46 #define FMUL8ULx16_OPF 0x037 47 48 /* 000111000 - upper 8-by-16-bit partitioned product */ 49 #define FMULD8SUx16_OPF 0x038 50 51 /* 000111001 - lower unsigned 8-by-16-bit partitioned product */ 52 #define FMULD8ULx16_OPF 0x039 53 54 /* 000101000 - four 16-bit compare; set rd if src1 > src2 */ 55 #define FCMPGT16_OPF 0x028 56 57 /* 000101100 - two 32-bit compare; set rd if src1 > src2 */ 58 #define FCMPGT32_OPF 0x02c 59 60 /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */ 61 #define FCMPLE16_OPF 0x020 62 63 /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */ 64 #define FCMPLE32_OPF 0x024 65 66 /* 000100010 - four 16-bit compare; set rd if src1 != src2 */ 67 #define FCMPNE16_OPF 0x022 68 69 /* 000100110 - two 32-bit compare; set rd if src1 != src2 */ 70 #define FCMPNE32_OPF 0x026 71 72 /* 000101010 - four 16-bit compare; set rd if src1 == src2 */ 73 #define FCMPEQ16_OPF 0x02a 74 75 /* 000101110 - two 32-bit compare; set rd if src1 == src2 */ 76 #define FCMPEQ32_OPF 0x02e 77 78 /* 000000000 - Eight 8-bit edge boundary processing */ 79 #define EDGE8_OPF 0x000 80 81 /* 000000001 - Eight 8-bit edge boundary processing, no CC */ 82 #define EDGE8N_OPF 0x001 83 84 /* 000000010 - Eight 8-bit edge boundary processing, little-endian */ 85 #define EDGE8L_OPF 0x002 86 87 /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */ 88 #define EDGE8LN_OPF 0x003 89 90 /* 000000100 - Four 16-bit edge boundary processing */ 91 #define EDGE16_OPF 0x004 92 93 /* 000000101 - Four 16-bit edge boundary processing, no CC */ 94 #define EDGE16N_OPF 0x005 95 96 /* 000000110 - Four 16-bit edge boundary processing, little-endian */ 97 #define EDGE16L_OPF 0x006 98 99 /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */ 100 #define EDGE16LN_OPF 0x007 101 102 /* 000001000 - Two 32-bit edge boundary processing */ 103 #define EDGE32_OPF 0x008 104 105 /* 000001001 - Two 32-bit edge boundary processing, no CC */ 106 #define EDGE32N_OPF 0x009 107 108 /* 000001010 - Two 32-bit edge boundary processing, little-endian */ 109 #define EDGE32L_OPF 0x00a 110 111 /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */ 112 #define EDGE32LN_OPF 0x00b 113 114 /* 000111110 - distance between 8 8-bit components */ 115 #define PDIST_OPF 0x03e 116 117 /* 000010000 - convert 8-bit 3-D address to blocked byte address */ 118 #define ARRAY8_OPF 0x010 119 120 /* 000010010 - convert 16-bit 3-D address to blocked byte address */ 121 #define ARRAY16_OPF 0x012 122 123 /* 000010100 - convert 32-bit 3-D address to blocked byte address */ 124 #define ARRAY32_OPF 0x014 125 126 /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */ 127 #define BMASK_OPF 0x019 128 129 /* 001001100 - Permute bytes as specified by GSR.MASK */ 130 #define BSHUFFLE_OPF 0x04c 131 132 #define VIS_OPF_SHIFT 5 133 #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT) 134 135 #define RS1(INSN) (((INSN) >> 14) & 0x1f) 136 #define RS2(INSN) (((INSN) >> 0) & 0x1f) 137 #define RD(INSN) (((INSN) >> 25) & 0x1f) 138 139 static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, 140 unsigned int rd, int from_kernel) 141 { 142 if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { 143 if (from_kernel != 0) 144 __asm__ __volatile__("flushw"); 145 else 146 flushw_user(); 147 } 148 } 149 150 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) 151 { 152 unsigned long value; 153 154 if (reg < 16) 155 return (!reg ? 0 : regs->u_regs[reg]); 156 if (regs->tstate & TSTATE_PRIV) { 157 struct reg_window *win; 158 win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); 159 value = win->locals[reg - 16]; 160 } else if (test_thread_flag(TIF_32BIT)) { 161 struct reg_window32 __user *win32; 162 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); 163 get_user(value, &win32->locals[reg - 16]); 164 } else { 165 struct reg_window __user *win; 166 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); 167 get_user(value, &win->locals[reg - 16]); 168 } 169 return value; 170 } 171 172 static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, 173 struct pt_regs *regs) 174 { 175 BUG_ON(reg < 16); 176 BUG_ON(regs->tstate & TSTATE_PRIV); 177 178 if (test_thread_flag(TIF_32BIT)) { 179 struct reg_window32 __user *win32; 180 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); 181 return (unsigned long __user *)&win32->locals[reg - 16]; 182 } else { 183 struct reg_window __user *win; 184 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); 185 return &win->locals[reg - 16]; 186 } 187 } 188 189 static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg, 190 struct pt_regs *regs) 191 { 192 BUG_ON(reg >= 16); 193 BUG_ON(regs->tstate & TSTATE_PRIV); 194 195 return ®s->u_regs[reg]; 196 } 197 198 static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) 199 { 200 if (rd < 16) { 201 unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs); 202 203 *rd_kern = val; 204 } else { 205 unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); 206 207 if (test_thread_flag(TIF_32BIT)) 208 __put_user((u32)val, (u32 __user *)rd_user); 209 else 210 __put_user(val, rd_user); 211 } 212 } 213 214 static inline unsigned long fpd_regval(struct fpustate *f, 215 unsigned int insn_regnum) 216 { 217 insn_regnum = (((insn_regnum & 1) << 5) | 218 (insn_regnum & 0x1e)); 219 220 return *(unsigned long *) &f->regs[insn_regnum]; 221 } 222 223 static inline unsigned long *fpd_regaddr(struct fpustate *f, 224 unsigned int insn_regnum) 225 { 226 insn_regnum = (((insn_regnum & 1) << 5) | 227 (insn_regnum & 0x1e)); 228 229 return (unsigned long *) &f->regs[insn_regnum]; 230 } 231 232 static inline unsigned int fps_regval(struct fpustate *f, 233 unsigned int insn_regnum) 234 { 235 return f->regs[insn_regnum]; 236 } 237 238 static inline unsigned int *fps_regaddr(struct fpustate *f, 239 unsigned int insn_regnum) 240 { 241 return &f->regs[insn_regnum]; 242 } 243 244 struct edge_tab { 245 u16 left, right; 246 }; 247 static struct edge_tab edge8_tab[8] = { 248 { 0xff, 0x80 }, 249 { 0x7f, 0xc0 }, 250 { 0x3f, 0xe0 }, 251 { 0x1f, 0xf0 }, 252 { 0x0f, 0xf8 }, 253 { 0x07, 0xfc }, 254 { 0x03, 0xfe }, 255 { 0x01, 0xff }, 256 }; 257 static struct edge_tab edge8_tab_l[8] = { 258 { 0xff, 0x01 }, 259 { 0xfe, 0x03 }, 260 { 0xfc, 0x07 }, 261 { 0xf8, 0x0f }, 262 { 0xf0, 0x1f }, 263 { 0xe0, 0x3f }, 264 { 0xc0, 0x7f }, 265 { 0x80, 0xff }, 266 }; 267 static struct edge_tab edge16_tab[4] = { 268 { 0xf, 0x8 }, 269 { 0x7, 0xc }, 270 { 0x3, 0xe }, 271 { 0x1, 0xf }, 272 }; 273 static struct edge_tab edge16_tab_l[4] = { 274 { 0xf, 0x1 }, 275 { 0xe, 0x3 }, 276 { 0xc, 0x7 }, 277 { 0x8, 0xf }, 278 }; 279 static struct edge_tab edge32_tab[2] = { 280 { 0x3, 0x2 }, 281 { 0x1, 0x3 }, 282 }; 283 static struct edge_tab edge32_tab_l[2] = { 284 { 0x3, 0x1 }, 285 { 0x2, 0x3 }, 286 }; 287 288 static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf) 289 { 290 unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val; 291 u16 left, right; 292 293 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); 294 orig_rs1 = rs1 = fetch_reg(RS1(insn), regs); 295 orig_rs2 = rs2 = fetch_reg(RS2(insn), regs); 296 297 if (test_thread_flag(TIF_32BIT)) { 298 rs1 = rs1 & 0xffffffff; 299 rs2 = rs2 & 0xffffffff; 300 } 301 switch (opf) { 302 default: 303 case EDGE8_OPF: 304 case EDGE8N_OPF: 305 left = edge8_tab[rs1 & 0x7].left; 306 right = edge8_tab[rs2 & 0x7].right; 307 break; 308 case EDGE8L_OPF: 309 case EDGE8LN_OPF: 310 left = edge8_tab_l[rs1 & 0x7].left; 311 right = edge8_tab_l[rs2 & 0x7].right; 312 break; 313 314 case EDGE16_OPF: 315 case EDGE16N_OPF: 316 left = edge16_tab[(rs1 >> 1) & 0x3].left; 317 right = edge16_tab[(rs2 >> 1) & 0x3].right; 318 break; 319 320 case EDGE16L_OPF: 321 case EDGE16LN_OPF: 322 left = edge16_tab_l[(rs1 >> 1) & 0x3].left; 323 right = edge16_tab_l[(rs2 >> 1) & 0x3].right; 324 break; 325 326 case EDGE32_OPF: 327 case EDGE32N_OPF: 328 left = edge32_tab[(rs1 >> 2) & 0x1].left; 329 right = edge32_tab[(rs2 >> 2) & 0x1].right; 330 break; 331 332 case EDGE32L_OPF: 333 case EDGE32LN_OPF: 334 left = edge32_tab_l[(rs1 >> 2) & 0x1].left; 335 right = edge32_tab_l[(rs2 >> 2) & 0x1].right; 336 break; 337 }; 338 339 if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL)) 340 rd_val = right & left; 341 else 342 rd_val = left; 343 344 store_reg(regs, rd_val, RD(insn)); 345 346 switch (opf) { 347 case EDGE8_OPF: 348 case EDGE8L_OPF: 349 case EDGE16_OPF: 350 case EDGE16L_OPF: 351 case EDGE32_OPF: 352 case EDGE32L_OPF: { 353 unsigned long ccr, tstate; 354 355 __asm__ __volatile__("subcc %1, %2, %%g0\n\t" 356 "rd %%ccr, %0" 357 : "=r" (ccr) 358 : "r" (orig_rs1), "r" (orig_rs2) 359 : "cc"); 360 tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC); 361 regs->tstate = tstate | (ccr << 32UL); 362 } 363 }; 364 } 365 366 static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf) 367 { 368 unsigned long rs1, rs2, rd_val; 369 unsigned int bits, bits_mask; 370 371 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); 372 rs1 = fetch_reg(RS1(insn), regs); 373 rs2 = fetch_reg(RS2(insn), regs); 374 375 bits = (rs2 > 5 ? 5 : rs2); 376 bits_mask = (1UL << bits) - 1UL; 377 378 rd_val = ((((rs1 >> 11) & 0x3) << 0) | 379 (((rs1 >> 33) & 0x3) << 2) | 380 (((rs1 >> 55) & 0x1) << 4) | 381 (((rs1 >> 13) & 0xf) << 5) | 382 (((rs1 >> 35) & 0xf) << 9) | 383 (((rs1 >> 56) & 0xf) << 13) | 384 (((rs1 >> 17) & bits_mask) << 17) | 385 (((rs1 >> 39) & bits_mask) << (17 + bits)) | 386 (((rs1 >> 60) & 0xf) << (17 + (2*bits)))); 387 388 switch (opf) { 389 case ARRAY16_OPF: 390 rd_val <<= 1; 391 break; 392 393 case ARRAY32_OPF: 394 rd_val <<= 2; 395 }; 396 397 store_reg(regs, rd_val, RD(insn)); 398 } 399 400 static void bmask(struct pt_regs *regs, unsigned int insn) 401 { 402 unsigned long rs1, rs2, rd_val, gsr; 403 404 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); 405 rs1 = fetch_reg(RS1(insn), regs); 406 rs2 = fetch_reg(RS2(insn), regs); 407 rd_val = rs1 + rs2; 408 409 store_reg(regs, rd_val, RD(insn)); 410 411 gsr = current_thread_info()->gsr[0] & 0xffffffff; 412 gsr |= rd_val << 32UL; 413 current_thread_info()->gsr[0] = gsr; 414 } 415 416 static void bshuffle(struct pt_regs *regs, unsigned int insn) 417 { 418 struct fpustate *f = FPUSTATE; 419 unsigned long rs1, rs2, rd_val; 420 unsigned long bmask, i; 421 422 bmask = current_thread_info()->gsr[0] >> 32UL; 423 424 rs1 = fpd_regval(f, RS1(insn)); 425 rs2 = fpd_regval(f, RS2(insn)); 426 427 rd_val = 0UL; 428 for (i = 0; i < 8; i++) { 429 unsigned long which = (bmask >> (i * 4)) & 0xf; 430 unsigned long byte; 431 432 if (which < 8) 433 byte = (rs1 >> (which * 8)) & 0xff; 434 else 435 byte = (rs2 >> ((which-8)*8)) & 0xff; 436 rd_val |= (byte << (i * 8)); 437 } 438 439 *fpd_regaddr(f, RD(insn)) = rd_val; 440 } 441 442 static void pdist(struct pt_regs *regs, unsigned int insn) 443 { 444 struct fpustate *f = FPUSTATE; 445 unsigned long rs1, rs2, *rd, rd_val; 446 unsigned long i; 447 448 rs1 = fpd_regval(f, RS1(insn)); 449 rs2 = fpd_regval(f, RS2(insn)); 450 rd = fpd_regaddr(f, RD(insn)); 451 452 rd_val = *rd; 453 454 for (i = 0; i < 8; i++) { 455 s16 s1, s2; 456 457 s1 = (rs1 >> (56 - (i * 8))) & 0xff; 458 s2 = (rs2 >> (56 - (i * 8))) & 0xff; 459 460 /* Absolute value of difference. */ 461 s1 -= s2; 462 if (s1 < 0) 463 s1 = ~s1 + 1; 464 465 rd_val += s1; 466 } 467 468 *rd = rd_val; 469 } 470 471 static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf) 472 { 473 struct fpustate *f = FPUSTATE; 474 unsigned long rs1, rs2, gsr, scale, rd_val; 475 476 gsr = current_thread_info()->gsr[0]; 477 scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f); 478 switch (opf) { 479 case FPACK16_OPF: { 480 unsigned long byte; 481 482 rs2 = fpd_regval(f, RS2(insn)); 483 rd_val = 0; 484 for (byte = 0; byte < 4; byte++) { 485 unsigned int val; 486 s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL; 487 int scaled = src << scale; 488 int from_fixed = scaled >> 7; 489 490 val = ((from_fixed < 0) ? 491 0 : 492 (from_fixed > 255) ? 493 255 : from_fixed); 494 495 rd_val |= (val << (8 * byte)); 496 } 497 *fps_regaddr(f, RD(insn)) = rd_val; 498 break; 499 } 500 501 case FPACK32_OPF: { 502 unsigned long word; 503 504 rs1 = fpd_regval(f, RS1(insn)); 505 rs2 = fpd_regval(f, RS2(insn)); 506 rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL); 507 for (word = 0; word < 2; word++) { 508 unsigned long val; 509 s32 src = (rs2 >> (word * 32UL)); 510 s64 scaled = src << scale; 511 s64 from_fixed = scaled >> 23; 512 513 val = ((from_fixed < 0) ? 514 0 : 515 (from_fixed > 255) ? 516 255 : from_fixed); 517 518 rd_val |= (val << (32 * word)); 519 } 520 *fpd_regaddr(f, RD(insn)) = rd_val; 521 break; 522 } 523 524 case FPACKFIX_OPF: { 525 unsigned long word; 526 527 rs2 = fpd_regval(f, RS2(insn)); 528 529 rd_val = 0; 530 for (word = 0; word < 2; word++) { 531 long val; 532 s32 src = (rs2 >> (word * 32UL)); 533 s64 scaled = src << scale; 534 s64 from_fixed = scaled >> 16; 535 536 val = ((from_fixed < -32768) ? 537 -32768 : 538 (from_fixed > 32767) ? 539 32767 : from_fixed); 540 541 rd_val |= ((val & 0xffff) << (word * 16)); 542 } 543 *fps_regaddr(f, RD(insn)) = rd_val; 544 break; 545 } 546 547 case FEXPAND_OPF: { 548 unsigned long byte; 549 550 rs2 = fps_regval(f, RS2(insn)); 551 552 rd_val = 0; 553 for (byte = 0; byte < 4; byte++) { 554 unsigned long val; 555 u8 src = (rs2 >> (byte * 8)) & 0xff; 556 557 val = src << 4; 558 559 rd_val |= (val << (byte * 16)); 560 } 561 *fpd_regaddr(f, RD(insn)) = rd_val; 562 break; 563 } 564 565 case FPMERGE_OPF: { 566 rs1 = fps_regval(f, RS1(insn)); 567 rs2 = fps_regval(f, RS2(insn)); 568 569 rd_val = (((rs2 & 0x000000ff) << 0) | 570 ((rs1 & 0x000000ff) << 8) | 571 ((rs2 & 0x0000ff00) << 8) | 572 ((rs1 & 0x0000ff00) << 16) | 573 ((rs2 & 0x00ff0000) << 16) | 574 ((rs1 & 0x00ff0000) << 24) | 575 ((rs2 & 0xff000000) << 24) | 576 ((rs1 & 0xff000000) << 32)); 577 *fpd_regaddr(f, RD(insn)) = rd_val; 578 break; 579 } 580 }; 581 } 582 583 static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) 584 { 585 struct fpustate *f = FPUSTATE; 586 unsigned long rs1, rs2, rd_val; 587 588 switch (opf) { 589 case FMUL8x16_OPF: { 590 unsigned long byte; 591 592 rs1 = fps_regval(f, RS1(insn)); 593 rs2 = fpd_regval(f, RS2(insn)); 594 595 rd_val = 0; 596 for (byte = 0; byte < 4; byte++) { 597 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; 598 s16 src2 = (rs2 >> (byte * 16)) & 0xffff; 599 u32 prod = src1 * src2; 600 u16 scaled = ((prod & 0x00ffff00) >> 8); 601 602 /* Round up. */ 603 if (prod & 0x80) 604 scaled++; 605 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); 606 } 607 608 *fpd_regaddr(f, RD(insn)) = rd_val; 609 break; 610 } 611 612 case FMUL8x16AU_OPF: 613 case FMUL8x16AL_OPF: { 614 unsigned long byte; 615 s16 src2; 616 617 rs1 = fps_regval(f, RS1(insn)); 618 rs2 = fps_regval(f, RS2(insn)); 619 620 rd_val = 0; 621 src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0); 622 for (byte = 0; byte < 4; byte++) { 623 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; 624 u32 prod = src1 * src2; 625 u16 scaled = ((prod & 0x00ffff00) >> 8); 626 627 /* Round up. */ 628 if (prod & 0x80) 629 scaled++; 630 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); 631 } 632 633 *fpd_regaddr(f, RD(insn)) = rd_val; 634 break; 635 } 636 637 case FMUL8SUx16_OPF: 638 case FMUL8ULx16_OPF: { 639 unsigned long byte, ushift; 640 641 rs1 = fpd_regval(f, RS1(insn)); 642 rs2 = fpd_regval(f, RS2(insn)); 643 644 rd_val = 0; 645 ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0; 646 for (byte = 0; byte < 4; byte++) { 647 u16 src1; 648 s16 src2; 649 u32 prod; 650 u16 scaled; 651 652 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); 653 src2 = ((rs2 >> (16 * byte)) & 0xffff); 654 prod = src1 * src2; 655 scaled = ((prod & 0x00ffff00) >> 8); 656 657 /* Round up. */ 658 if (prod & 0x80) 659 scaled++; 660 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); 661 } 662 663 *fpd_regaddr(f, RD(insn)) = rd_val; 664 break; 665 } 666 667 case FMULD8SUx16_OPF: 668 case FMULD8ULx16_OPF: { 669 unsigned long byte, ushift; 670 671 rs1 = fps_regval(f, RS1(insn)); 672 rs2 = fps_regval(f, RS2(insn)); 673 674 rd_val = 0; 675 ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0; 676 for (byte = 0; byte < 2; byte++) { 677 u16 src1; 678 s16 src2; 679 u32 prod; 680 u16 scaled; 681 682 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); 683 src2 = ((rs2 >> (16 * byte)) & 0xffff); 684 prod = src1 * src2; 685 scaled = ((prod & 0x00ffff00) >> 8); 686 687 /* Round up. */ 688 if (prod & 0x80) 689 scaled++; 690 rd_val |= ((scaled & 0xffffUL) << 691 ((byte * 32UL) + 7UL)); 692 } 693 *fpd_regaddr(f, RD(insn)) = rd_val; 694 break; 695 } 696 }; 697 } 698 699 static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf) 700 { 701 struct fpustate *f = FPUSTATE; 702 unsigned long rs1, rs2, rd_val, i; 703 704 rs1 = fpd_regval(f, RS1(insn)); 705 rs2 = fpd_regval(f, RS2(insn)); 706 707 rd_val = 0; 708 709 switch (opf) { 710 case FCMPGT16_OPF: 711 for (i = 0; i < 4; i++) { 712 s16 a = (rs1 >> (i * 16)) & 0xffff; 713 s16 b = (rs2 >> (i * 16)) & 0xffff; 714 715 if (a > b) 716 rd_val |= 1 << i; 717 } 718 break; 719 720 case FCMPGT32_OPF: 721 for (i = 0; i < 2; i++) { 722 s32 a = (rs1 >> (i * 32)) & 0xffff; 723 s32 b = (rs2 >> (i * 32)) & 0xffff; 724 725 if (a > b) 726 rd_val |= 1 << i; 727 } 728 break; 729 730 case FCMPLE16_OPF: 731 for (i = 0; i < 4; i++) { 732 s16 a = (rs1 >> (i * 16)) & 0xffff; 733 s16 b = (rs2 >> (i * 16)) & 0xffff; 734 735 if (a <= b) 736 rd_val |= 1 << i; 737 } 738 break; 739 740 case FCMPLE32_OPF: 741 for (i = 0; i < 2; i++) { 742 s32 a = (rs1 >> (i * 32)) & 0xffff; 743 s32 b = (rs2 >> (i * 32)) & 0xffff; 744 745 if (a <= b) 746 rd_val |= 1 << i; 747 } 748 break; 749 750 case FCMPNE16_OPF: 751 for (i = 0; i < 4; i++) { 752 s16 a = (rs1 >> (i * 16)) & 0xffff; 753 s16 b = (rs2 >> (i * 16)) & 0xffff; 754 755 if (a != b) 756 rd_val |= 1 << i; 757 } 758 break; 759 760 case FCMPNE32_OPF: 761 for (i = 0; i < 2; i++) { 762 s32 a = (rs1 >> (i * 32)) & 0xffff; 763 s32 b = (rs2 >> (i * 32)) & 0xffff; 764 765 if (a != b) 766 rd_val |= 1 << i; 767 } 768 break; 769 770 case FCMPEQ16_OPF: 771 for (i = 0; i < 4; i++) { 772 s16 a = (rs1 >> (i * 16)) & 0xffff; 773 s16 b = (rs2 >> (i * 16)) & 0xffff; 774 775 if (a == b) 776 rd_val |= 1 << i; 777 } 778 break; 779 780 case FCMPEQ32_OPF: 781 for (i = 0; i < 2; i++) { 782 s32 a = (rs1 >> (i * 32)) & 0xffff; 783 s32 b = (rs2 >> (i * 32)) & 0xffff; 784 785 if (a == b) 786 rd_val |= 1 << i; 787 } 788 break; 789 }; 790 791 maybe_flush_windows(0, 0, RD(insn), 0); 792 store_reg(regs, rd_val, RD(insn)); 793 } 794 795 /* Emulate the VIS instructions which are not implemented in 796 * hardware on Niagara. 797 */ 798 int vis_emul(struct pt_regs *regs, unsigned int insn) 799 { 800 unsigned long pc = regs->tpc; 801 unsigned int opf; 802 803 BUG_ON(regs->tstate & TSTATE_PRIV); 804 805 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 806 807 if (test_thread_flag(TIF_32BIT)) 808 pc = (u32)pc; 809 810 if (get_user(insn, (u32 __user *) pc)) 811 return -EFAULT; 812 813 save_and_clear_fpu(); 814 815 opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT; 816 switch (opf) { 817 default: 818 return -EINVAL; 819 820 /* Pixel Formatting Instructions. */ 821 case FPACK16_OPF: 822 case FPACK32_OPF: 823 case FPACKFIX_OPF: 824 case FEXPAND_OPF: 825 case FPMERGE_OPF: 826 pformat(regs, insn, opf); 827 break; 828 829 /* Partitioned Multiply Instructions */ 830 case FMUL8x16_OPF: 831 case FMUL8x16AU_OPF: 832 case FMUL8x16AL_OPF: 833 case FMUL8SUx16_OPF: 834 case FMUL8ULx16_OPF: 835 case FMULD8SUx16_OPF: 836 case FMULD8ULx16_OPF: 837 pmul(regs, insn, opf); 838 break; 839 840 /* Pixel Compare Instructions */ 841 case FCMPGT16_OPF: 842 case FCMPGT32_OPF: 843 case FCMPLE16_OPF: 844 case FCMPLE32_OPF: 845 case FCMPNE16_OPF: 846 case FCMPNE32_OPF: 847 case FCMPEQ16_OPF: 848 case FCMPEQ32_OPF: 849 pcmp(regs, insn, opf); 850 break; 851 852 /* Edge Handling Instructions */ 853 case EDGE8_OPF: 854 case EDGE8N_OPF: 855 case EDGE8L_OPF: 856 case EDGE8LN_OPF: 857 case EDGE16_OPF: 858 case EDGE16N_OPF: 859 case EDGE16L_OPF: 860 case EDGE16LN_OPF: 861 case EDGE32_OPF: 862 case EDGE32N_OPF: 863 case EDGE32L_OPF: 864 case EDGE32LN_OPF: 865 edge(regs, insn, opf); 866 break; 867 868 /* Pixel Component Distance */ 869 case PDIST_OPF: 870 pdist(regs, insn); 871 break; 872 873 /* Three-Dimensional Array Addressing Instructions */ 874 case ARRAY8_OPF: 875 case ARRAY16_OPF: 876 case ARRAY32_OPF: 877 array(regs, insn, opf); 878 break; 879 880 /* Byte Mask and Shuffle Instructions */ 881 case BMASK_OPF: 882 bmask(regs, insn); 883 break; 884 885 case BSHUFFLE_OPF: 886 bshuffle(regs, insn); 887 break; 888 }; 889 890 regs->tpc = regs->tnpc; 891 regs->tnpc += 4; 892 return 0; 893 } 894