1 /* 2 * arch/powerpc/math-emu/math_efp.c 3 * 4 * Copyright (C) 2006-2008, 2010 Freescale Semiconductor, Inc. 5 * 6 * Author: Ebony Zhu, <ebony.zhu@freescale.com> 7 * Yu Liu, <yu.liu@freescale.com> 8 * 9 * Derived from arch/alpha/math-emu/math.c 10 * arch/powerpc/math-emu/math.c 11 * 12 * Description: 13 * This file is the exception handler to make E500 SPE instructions 14 * fully comply with IEEE-754 floating point standard. 15 * 16 * This program is free software; you can redistribute it and/or 17 * modify it under the terms of the GNU General Public License 18 * as published by the Free Software Foundation; either version 19 * 2 of the License, or (at your option) any later version. 20 */ 21 22 #include <linux/types.h> 23 #include <linux/prctl.h> 24 25 #include <linux/uaccess.h> 26 #include <asm/reg.h> 27 28 #define FP_EX_BOOKE_E500_SPE 29 #include <asm/sfp-machine.h> 30 31 #include <math-emu/soft-fp.h> 32 #include <math-emu/single.h> 33 #include <math-emu/double.h> 34 35 #define EFAPU 0x4 36 37 #define VCT 0x4 38 #define SPFP 0x6 39 #define DPFP 0x7 40 41 #define EFSADD 0x2c0 42 #define EFSSUB 0x2c1 43 #define EFSABS 0x2c4 44 #define EFSNABS 0x2c5 45 #define EFSNEG 0x2c6 46 #define EFSMUL 0x2c8 47 #define EFSDIV 0x2c9 48 #define EFSCMPGT 0x2cc 49 #define EFSCMPLT 0x2cd 50 #define EFSCMPEQ 0x2ce 51 #define EFSCFD 0x2cf 52 #define EFSCFSI 0x2d1 53 #define EFSCTUI 0x2d4 54 #define EFSCTSI 0x2d5 55 #define EFSCTUF 0x2d6 56 #define EFSCTSF 0x2d7 57 #define EFSCTUIZ 0x2d8 58 #define EFSCTSIZ 0x2da 59 60 #define EVFSADD 0x280 61 #define EVFSSUB 0x281 62 #define EVFSABS 0x284 63 #define EVFSNABS 0x285 64 #define EVFSNEG 0x286 65 #define EVFSMUL 0x288 66 #define EVFSDIV 0x289 67 #define EVFSCMPGT 0x28c 68 #define EVFSCMPLT 0x28d 69 #define EVFSCMPEQ 0x28e 70 #define EVFSCTUI 0x294 71 #define EVFSCTSI 0x295 72 #define EVFSCTUF 0x296 73 #define EVFSCTSF 0x297 74 #define EVFSCTUIZ 0x298 75 #define EVFSCTSIZ 0x29a 76 77 #define EFDADD 0x2e0 78 #define EFDSUB 0x2e1 79 #define EFDABS 0x2e4 80 #define EFDNABS 0x2e5 81 #define EFDNEG 0x2e6 82 #define EFDMUL 0x2e8 83 #define EFDDIV 0x2e9 84 #define EFDCTUIDZ 0x2ea 85 #define EFDCTSIDZ 0x2eb 86 #define EFDCMPGT 0x2ec 87 #define EFDCMPLT 0x2ed 88 #define EFDCMPEQ 0x2ee 89 #define EFDCFS 0x2ef 90 #define EFDCTUI 0x2f4 91 #define EFDCTSI 0x2f5 92 #define EFDCTUF 0x2f6 93 #define EFDCTSF 0x2f7 94 #define EFDCTUIZ 0x2f8 95 #define EFDCTSIZ 0x2fa 96 97 #define AB 2 98 #define XA 3 99 #define XB 4 100 #define XCR 5 101 #define NOTYPE 0 102 103 #define SIGN_BIT_S (1UL << 31) 104 #define SIGN_BIT_D (1ULL << 63) 105 #define FP_EX_MASK (FP_EX_INEXACT | FP_EX_INVALID | FP_EX_DIVZERO | \ 106 FP_EX_UNDERFLOW | FP_EX_OVERFLOW) 107 108 static int have_e500_cpu_a005_erratum; 109 110 union dw_union { 111 u64 dp[1]; 112 u32 wp[2]; 113 }; 114 115 static unsigned long insn_type(unsigned long speinsn) 116 { 117 unsigned long ret = NOTYPE; 118 119 switch (speinsn & 0x7ff) { 120 case EFSABS: ret = XA; break; 121 case EFSADD: ret = AB; break; 122 case EFSCFD: ret = XB; break; 123 case EFSCMPEQ: ret = XCR; break; 124 case EFSCMPGT: ret = XCR; break; 125 case EFSCMPLT: ret = XCR; break; 126 case EFSCTSF: ret = XB; break; 127 case EFSCTSI: ret = XB; break; 128 case EFSCTSIZ: ret = XB; break; 129 case EFSCTUF: ret = XB; break; 130 case EFSCTUI: ret = XB; break; 131 case EFSCTUIZ: ret = XB; break; 132 case EFSDIV: ret = AB; break; 133 case EFSMUL: ret = AB; break; 134 case EFSNABS: ret = XA; break; 135 case EFSNEG: ret = XA; break; 136 case EFSSUB: ret = AB; break; 137 case EFSCFSI: ret = XB; break; 138 139 case EVFSABS: ret = XA; break; 140 case EVFSADD: ret = AB; break; 141 case EVFSCMPEQ: ret = XCR; break; 142 case EVFSCMPGT: ret = XCR; break; 143 case EVFSCMPLT: ret = XCR; break; 144 case EVFSCTSF: ret = XB; break; 145 case EVFSCTSI: ret = XB; break; 146 case EVFSCTSIZ: ret = XB; break; 147 case EVFSCTUF: ret = XB; break; 148 case EVFSCTUI: ret = XB; break; 149 case EVFSCTUIZ: ret = XB; break; 150 case EVFSDIV: ret = AB; break; 151 case EVFSMUL: ret = AB; break; 152 case EVFSNABS: ret = XA; break; 153 case EVFSNEG: ret = XA; break; 154 case EVFSSUB: ret = AB; break; 155 156 case EFDABS: ret = XA; break; 157 case EFDADD: ret = AB; break; 158 case EFDCFS: ret = XB; break; 159 case EFDCMPEQ: ret = XCR; break; 160 case EFDCMPGT: ret = XCR; break; 161 case EFDCMPLT: ret = XCR; break; 162 case EFDCTSF: ret = XB; break; 163 case EFDCTSI: ret = XB; break; 164 case EFDCTSIDZ: ret = XB; break; 165 case EFDCTSIZ: ret = XB; break; 166 case EFDCTUF: ret = XB; break; 167 case EFDCTUI: ret = XB; break; 168 case EFDCTUIDZ: ret = XB; break; 169 case EFDCTUIZ: ret = XB; break; 170 case EFDDIV: ret = AB; break; 171 case EFDMUL: ret = AB; break; 172 case EFDNABS: ret = XA; break; 173 case EFDNEG: ret = XA; break; 174 case EFDSUB: ret = AB; break; 175 } 176 177 return ret; 178 } 179 180 int do_spe_mathemu(struct pt_regs *regs) 181 { 182 FP_DECL_EX; 183 int IR, cmp; 184 185 unsigned long type, func, fc, fa, fb, src, speinsn; 186 union dw_union vc, va, vb; 187 188 if (get_user(speinsn, (unsigned int __user *) regs->nip)) 189 return -EFAULT; 190 if ((speinsn >> 26) != EFAPU) 191 return -EINVAL; /* not an spe instruction */ 192 193 type = insn_type(speinsn); 194 if (type == NOTYPE) 195 goto illegal; 196 197 func = speinsn & 0x7ff; 198 fc = (speinsn >> 21) & 0x1f; 199 fa = (speinsn >> 16) & 0x1f; 200 fb = (speinsn >> 11) & 0x1f; 201 src = (speinsn >> 5) & 0x7; 202 203 vc.wp[0] = current->thread.evr[fc]; 204 vc.wp[1] = regs->gpr[fc]; 205 va.wp[0] = current->thread.evr[fa]; 206 va.wp[1] = regs->gpr[fa]; 207 vb.wp[0] = current->thread.evr[fb]; 208 vb.wp[1] = regs->gpr[fb]; 209 210 __FPU_FPSCR = mfspr(SPRN_SPEFSCR); 211 212 pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR); 213 pr_debug("vc: %08x %08x\n", vc.wp[0], vc.wp[1]); 214 pr_debug("va: %08x %08x\n", va.wp[0], va.wp[1]); 215 pr_debug("vb: %08x %08x\n", vb.wp[0], vb.wp[1]); 216 217 switch (src) { 218 case SPFP: { 219 FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); 220 221 switch (type) { 222 case AB: 223 case XCR: 224 FP_UNPACK_SP(SA, va.wp + 1); 225 case XB: 226 FP_UNPACK_SP(SB, vb.wp + 1); 227 break; 228 case XA: 229 FP_UNPACK_SP(SA, va.wp + 1); 230 break; 231 } 232 233 pr_debug("SA: %ld %08lx %ld (%ld)\n", SA_s, SA_f, SA_e, SA_c); 234 pr_debug("SB: %ld %08lx %ld (%ld)\n", SB_s, SB_f, SB_e, SB_c); 235 236 switch (func) { 237 case EFSABS: 238 vc.wp[1] = va.wp[1] & ~SIGN_BIT_S; 239 goto update_regs; 240 241 case EFSNABS: 242 vc.wp[1] = va.wp[1] | SIGN_BIT_S; 243 goto update_regs; 244 245 case EFSNEG: 246 vc.wp[1] = va.wp[1] ^ SIGN_BIT_S; 247 goto update_regs; 248 249 case EFSADD: 250 FP_ADD_S(SR, SA, SB); 251 goto pack_s; 252 253 case EFSSUB: 254 FP_SUB_S(SR, SA, SB); 255 goto pack_s; 256 257 case EFSMUL: 258 FP_MUL_S(SR, SA, SB); 259 goto pack_s; 260 261 case EFSDIV: 262 FP_DIV_S(SR, SA, SB); 263 goto pack_s; 264 265 case EFSCMPEQ: 266 cmp = 0; 267 goto cmp_s; 268 269 case EFSCMPGT: 270 cmp = 1; 271 goto cmp_s; 272 273 case EFSCMPLT: 274 cmp = -1; 275 goto cmp_s; 276 277 case EFSCTSF: 278 case EFSCTUF: 279 if (SB_c == FP_CLS_NAN) { 280 vc.wp[1] = 0; 281 FP_SET_EXCEPTION(FP_EX_INVALID); 282 } else { 283 SB_e += (func == EFSCTSF ? 31 : 32); 284 FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, 285 (func == EFSCTSF)); 286 } 287 goto update_regs; 288 289 case EFSCFD: { 290 FP_DECL_D(DB); 291 FP_CLEAR_EXCEPTIONS; 292 FP_UNPACK_DP(DB, vb.dp); 293 294 pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n", 295 DB_s, DB_f1, DB_f0, DB_e, DB_c); 296 297 FP_CONV(S, D, 1, 2, SR, DB); 298 goto pack_s; 299 } 300 301 case EFSCTSI: 302 case EFSCTUI: 303 if (SB_c == FP_CLS_NAN) { 304 vc.wp[1] = 0; 305 FP_SET_EXCEPTION(FP_EX_INVALID); 306 } else { 307 FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, 308 ((func & 0x3) != 0)); 309 } 310 goto update_regs; 311 312 case EFSCTSIZ: 313 case EFSCTUIZ: 314 if (SB_c == FP_CLS_NAN) { 315 vc.wp[1] = 0; 316 FP_SET_EXCEPTION(FP_EX_INVALID); 317 } else { 318 FP_TO_INT_S(vc.wp[1], SB, 32, 319 ((func & 0x3) != 0)); 320 } 321 goto update_regs; 322 323 default: 324 goto illegal; 325 } 326 break; 327 328 pack_s: 329 pr_debug("SR: %ld %08lx %ld (%ld)\n", SR_s, SR_f, SR_e, SR_c); 330 331 FP_PACK_SP(vc.wp + 1, SR); 332 goto update_regs; 333 334 cmp_s: 335 FP_CMP_S(IR, SA, SB, 3); 336 if (IR == 3 && (FP_ISSIGNAN_S(SA) || FP_ISSIGNAN_S(SB))) 337 FP_SET_EXCEPTION(FP_EX_INVALID); 338 if (IR == cmp) { 339 IR = 0x4; 340 } else { 341 IR = 0; 342 } 343 goto update_ccr; 344 } 345 346 case DPFP: { 347 FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); 348 349 switch (type) { 350 case AB: 351 case XCR: 352 FP_UNPACK_DP(DA, va.dp); 353 case XB: 354 FP_UNPACK_DP(DB, vb.dp); 355 break; 356 case XA: 357 FP_UNPACK_DP(DA, va.dp); 358 break; 359 } 360 361 pr_debug("DA: %ld %08lx %08lx %ld (%ld)\n", 362 DA_s, DA_f1, DA_f0, DA_e, DA_c); 363 pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n", 364 DB_s, DB_f1, DB_f0, DB_e, DB_c); 365 366 switch (func) { 367 case EFDABS: 368 vc.dp[0] = va.dp[0] & ~SIGN_BIT_D; 369 goto update_regs; 370 371 case EFDNABS: 372 vc.dp[0] = va.dp[0] | SIGN_BIT_D; 373 goto update_regs; 374 375 case EFDNEG: 376 vc.dp[0] = va.dp[0] ^ SIGN_BIT_D; 377 goto update_regs; 378 379 case EFDADD: 380 FP_ADD_D(DR, DA, DB); 381 goto pack_d; 382 383 case EFDSUB: 384 FP_SUB_D(DR, DA, DB); 385 goto pack_d; 386 387 case EFDMUL: 388 FP_MUL_D(DR, DA, DB); 389 goto pack_d; 390 391 case EFDDIV: 392 FP_DIV_D(DR, DA, DB); 393 goto pack_d; 394 395 case EFDCMPEQ: 396 cmp = 0; 397 goto cmp_d; 398 399 case EFDCMPGT: 400 cmp = 1; 401 goto cmp_d; 402 403 case EFDCMPLT: 404 cmp = -1; 405 goto cmp_d; 406 407 case EFDCTSF: 408 case EFDCTUF: 409 if (DB_c == FP_CLS_NAN) { 410 vc.wp[1] = 0; 411 FP_SET_EXCEPTION(FP_EX_INVALID); 412 } else { 413 DB_e += (func == EFDCTSF ? 31 : 32); 414 FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, 415 (func == EFDCTSF)); 416 } 417 goto update_regs; 418 419 case EFDCFS: { 420 FP_DECL_S(SB); 421 FP_CLEAR_EXCEPTIONS; 422 FP_UNPACK_SP(SB, vb.wp + 1); 423 424 pr_debug("SB: %ld %08lx %ld (%ld)\n", 425 SB_s, SB_f, SB_e, SB_c); 426 427 FP_CONV(D, S, 2, 1, DR, SB); 428 goto pack_d; 429 } 430 431 case EFDCTUIDZ: 432 case EFDCTSIDZ: 433 if (DB_c == FP_CLS_NAN) { 434 vc.dp[0] = 0; 435 FP_SET_EXCEPTION(FP_EX_INVALID); 436 } else { 437 FP_TO_INT_D(vc.dp[0], DB, 64, 438 ((func & 0x1) == 0)); 439 } 440 goto update_regs; 441 442 case EFDCTUI: 443 case EFDCTSI: 444 if (DB_c == FP_CLS_NAN) { 445 vc.wp[1] = 0; 446 FP_SET_EXCEPTION(FP_EX_INVALID); 447 } else { 448 FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, 449 ((func & 0x3) != 0)); 450 } 451 goto update_regs; 452 453 case EFDCTUIZ: 454 case EFDCTSIZ: 455 if (DB_c == FP_CLS_NAN) { 456 vc.wp[1] = 0; 457 FP_SET_EXCEPTION(FP_EX_INVALID); 458 } else { 459 FP_TO_INT_D(vc.wp[1], DB, 32, 460 ((func & 0x3) != 0)); 461 } 462 goto update_regs; 463 464 default: 465 goto illegal; 466 } 467 break; 468 469 pack_d: 470 pr_debug("DR: %ld %08lx %08lx %ld (%ld)\n", 471 DR_s, DR_f1, DR_f0, DR_e, DR_c); 472 473 FP_PACK_DP(vc.dp, DR); 474 goto update_regs; 475 476 cmp_d: 477 FP_CMP_D(IR, DA, DB, 3); 478 if (IR == 3 && (FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB))) 479 FP_SET_EXCEPTION(FP_EX_INVALID); 480 if (IR == cmp) { 481 IR = 0x4; 482 } else { 483 IR = 0; 484 } 485 goto update_ccr; 486 487 } 488 489 case VCT: { 490 FP_DECL_S(SA0); FP_DECL_S(SB0); FP_DECL_S(SR0); 491 FP_DECL_S(SA1); FP_DECL_S(SB1); FP_DECL_S(SR1); 492 int IR0, IR1; 493 494 switch (type) { 495 case AB: 496 case XCR: 497 FP_UNPACK_SP(SA0, va.wp); 498 FP_UNPACK_SP(SA1, va.wp + 1); 499 case XB: 500 FP_UNPACK_SP(SB0, vb.wp); 501 FP_UNPACK_SP(SB1, vb.wp + 1); 502 break; 503 case XA: 504 FP_UNPACK_SP(SA0, va.wp); 505 FP_UNPACK_SP(SA1, va.wp + 1); 506 break; 507 } 508 509 pr_debug("SA0: %ld %08lx %ld (%ld)\n", 510 SA0_s, SA0_f, SA0_e, SA0_c); 511 pr_debug("SA1: %ld %08lx %ld (%ld)\n", 512 SA1_s, SA1_f, SA1_e, SA1_c); 513 pr_debug("SB0: %ld %08lx %ld (%ld)\n", 514 SB0_s, SB0_f, SB0_e, SB0_c); 515 pr_debug("SB1: %ld %08lx %ld (%ld)\n", 516 SB1_s, SB1_f, SB1_e, SB1_c); 517 518 switch (func) { 519 case EVFSABS: 520 vc.wp[0] = va.wp[0] & ~SIGN_BIT_S; 521 vc.wp[1] = va.wp[1] & ~SIGN_BIT_S; 522 goto update_regs; 523 524 case EVFSNABS: 525 vc.wp[0] = va.wp[0] | SIGN_BIT_S; 526 vc.wp[1] = va.wp[1] | SIGN_BIT_S; 527 goto update_regs; 528 529 case EVFSNEG: 530 vc.wp[0] = va.wp[0] ^ SIGN_BIT_S; 531 vc.wp[1] = va.wp[1] ^ SIGN_BIT_S; 532 goto update_regs; 533 534 case EVFSADD: 535 FP_ADD_S(SR0, SA0, SB0); 536 FP_ADD_S(SR1, SA1, SB1); 537 goto pack_vs; 538 539 case EVFSSUB: 540 FP_SUB_S(SR0, SA0, SB0); 541 FP_SUB_S(SR1, SA1, SB1); 542 goto pack_vs; 543 544 case EVFSMUL: 545 FP_MUL_S(SR0, SA0, SB0); 546 FP_MUL_S(SR1, SA1, SB1); 547 goto pack_vs; 548 549 case EVFSDIV: 550 FP_DIV_S(SR0, SA0, SB0); 551 FP_DIV_S(SR1, SA1, SB1); 552 goto pack_vs; 553 554 case EVFSCMPEQ: 555 cmp = 0; 556 goto cmp_vs; 557 558 case EVFSCMPGT: 559 cmp = 1; 560 goto cmp_vs; 561 562 case EVFSCMPLT: 563 cmp = -1; 564 goto cmp_vs; 565 566 case EVFSCTUF: 567 case EVFSCTSF: 568 if (SB0_c == FP_CLS_NAN) { 569 vc.wp[0] = 0; 570 FP_SET_EXCEPTION(FP_EX_INVALID); 571 } else { 572 SB0_e += (func == EVFSCTSF ? 31 : 32); 573 FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, 574 (func == EVFSCTSF)); 575 } 576 if (SB1_c == FP_CLS_NAN) { 577 vc.wp[1] = 0; 578 FP_SET_EXCEPTION(FP_EX_INVALID); 579 } else { 580 SB1_e += (func == EVFSCTSF ? 31 : 32); 581 FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, 582 (func == EVFSCTSF)); 583 } 584 goto update_regs; 585 586 case EVFSCTUI: 587 case EVFSCTSI: 588 if (SB0_c == FP_CLS_NAN) { 589 vc.wp[0] = 0; 590 FP_SET_EXCEPTION(FP_EX_INVALID); 591 } else { 592 FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, 593 ((func & 0x3) != 0)); 594 } 595 if (SB1_c == FP_CLS_NAN) { 596 vc.wp[1] = 0; 597 FP_SET_EXCEPTION(FP_EX_INVALID); 598 } else { 599 FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, 600 ((func & 0x3) != 0)); 601 } 602 goto update_regs; 603 604 case EVFSCTUIZ: 605 case EVFSCTSIZ: 606 if (SB0_c == FP_CLS_NAN) { 607 vc.wp[0] = 0; 608 FP_SET_EXCEPTION(FP_EX_INVALID); 609 } else { 610 FP_TO_INT_S(vc.wp[0], SB0, 32, 611 ((func & 0x3) != 0)); 612 } 613 if (SB1_c == FP_CLS_NAN) { 614 vc.wp[1] = 0; 615 FP_SET_EXCEPTION(FP_EX_INVALID); 616 } else { 617 FP_TO_INT_S(vc.wp[1], SB1, 32, 618 ((func & 0x3) != 0)); 619 } 620 goto update_regs; 621 622 default: 623 goto illegal; 624 } 625 break; 626 627 pack_vs: 628 pr_debug("SR0: %ld %08lx %ld (%ld)\n", 629 SR0_s, SR0_f, SR0_e, SR0_c); 630 pr_debug("SR1: %ld %08lx %ld (%ld)\n", 631 SR1_s, SR1_f, SR1_e, SR1_c); 632 633 FP_PACK_SP(vc.wp, SR0); 634 FP_PACK_SP(vc.wp + 1, SR1); 635 goto update_regs; 636 637 cmp_vs: 638 { 639 int ch, cl; 640 641 FP_CMP_S(IR0, SA0, SB0, 3); 642 FP_CMP_S(IR1, SA1, SB1, 3); 643 if (IR0 == 3 && (FP_ISSIGNAN_S(SA0) || FP_ISSIGNAN_S(SB0))) 644 FP_SET_EXCEPTION(FP_EX_INVALID); 645 if (IR1 == 3 && (FP_ISSIGNAN_S(SA1) || FP_ISSIGNAN_S(SB1))) 646 FP_SET_EXCEPTION(FP_EX_INVALID); 647 ch = (IR0 == cmp) ? 1 : 0; 648 cl = (IR1 == cmp) ? 1 : 0; 649 IR = (ch << 3) | (cl << 2) | ((ch | cl) << 1) | 650 ((ch & cl) << 0); 651 goto update_ccr; 652 } 653 } 654 default: 655 return -EINVAL; 656 } 657 658 update_ccr: 659 regs->ccr &= ~(15 << ((7 - ((speinsn >> 23) & 0x7)) << 2)); 660 regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2)); 661 662 update_regs: 663 /* 664 * If the "invalid" exception sticky bit was set by the 665 * processor for non-finite input, but was not set before the 666 * instruction being emulated, clear it. Likewise for the 667 * "underflow" bit, which may have been set by the processor 668 * for exact underflow, not just inexact underflow when the 669 * flag should be set for IEEE 754 semantics. Other sticky 670 * exceptions will only be set by the processor when they are 671 * correct according to IEEE 754 semantics, and we must not 672 * clear sticky bits that were already set before the emulated 673 * instruction as they represent the user-visible sticky 674 * exception status. "inexact" traps to kernel are not 675 * required for IEEE semantics and are not enabled by default, 676 * so the "inexact" sticky bit may have been set by a previous 677 * instruction without the kernel being aware of it. 678 */ 679 __FPU_FPSCR 680 &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last; 681 __FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK); 682 mtspr(SPRN_SPEFSCR, __FPU_FPSCR); 683 current->thread.spefscr_last = __FPU_FPSCR; 684 685 current->thread.evr[fc] = vc.wp[0]; 686 regs->gpr[fc] = vc.wp[1]; 687 688 pr_debug("ccr = %08lx\n", regs->ccr); 689 pr_debug("cur exceptions = %08x spefscr = %08lx\n", 690 FP_CUR_EXCEPTIONS, __FPU_FPSCR); 691 pr_debug("vc: %08x %08x\n", vc.wp[0], vc.wp[1]); 692 pr_debug("va: %08x %08x\n", va.wp[0], va.wp[1]); 693 pr_debug("vb: %08x %08x\n", vb.wp[0], vb.wp[1]); 694 695 if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) { 696 if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO) 697 && (current->thread.fpexc_mode & PR_FP_EXC_DIV)) 698 return 1; 699 if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW) 700 && (current->thread.fpexc_mode & PR_FP_EXC_OVF)) 701 return 1; 702 if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW) 703 && (current->thread.fpexc_mode & PR_FP_EXC_UND)) 704 return 1; 705 if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT) 706 && (current->thread.fpexc_mode & PR_FP_EXC_RES)) 707 return 1; 708 if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID) 709 && (current->thread.fpexc_mode & PR_FP_EXC_INV)) 710 return 1; 711 } 712 return 0; 713 714 illegal: 715 if (have_e500_cpu_a005_erratum) { 716 /* according to e500 cpu a005 erratum, reissue efp inst */ 717 regs->nip -= 4; 718 pr_debug("re-issue efp inst: %08lx\n", speinsn); 719 return 0; 720 } 721 722 printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered un-supported instruction.\ninst code: %08lx\n", speinsn); 723 return -ENOSYS; 724 } 725 726 int speround_handler(struct pt_regs *regs) 727 { 728 union dw_union fgpr; 729 int s_lo, s_hi; 730 int lo_inexact, hi_inexact; 731 int fp_result; 732 unsigned long speinsn, type, fb, fc, fptype, func; 733 734 if (get_user(speinsn, (unsigned int __user *) regs->nip)) 735 return -EFAULT; 736 if ((speinsn >> 26) != 4) 737 return -EINVAL; /* not an spe instruction */ 738 739 func = speinsn & 0x7ff; 740 type = insn_type(func); 741 if (type == XCR) return -ENOSYS; 742 743 __FPU_FPSCR = mfspr(SPRN_SPEFSCR); 744 pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR); 745 746 fptype = (speinsn >> 5) & 0x7; 747 748 /* No need to round if the result is exact */ 749 lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX); 750 hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH); 751 if (!(lo_inexact || (hi_inexact && fptype == VCT))) 752 return 0; 753 754 fc = (speinsn >> 21) & 0x1f; 755 s_lo = regs->gpr[fc] & SIGN_BIT_S; 756 s_hi = current->thread.evr[fc] & SIGN_BIT_S; 757 fgpr.wp[0] = current->thread.evr[fc]; 758 fgpr.wp[1] = regs->gpr[fc]; 759 760 fb = (speinsn >> 11) & 0x1f; 761 switch (func) { 762 case EFSCTUIZ: 763 case EFSCTSIZ: 764 case EVFSCTUIZ: 765 case EVFSCTSIZ: 766 case EFDCTUIDZ: 767 case EFDCTSIDZ: 768 case EFDCTUIZ: 769 case EFDCTSIZ: 770 /* 771 * These instructions always round to zero, 772 * independent of the rounding mode. 773 */ 774 return 0; 775 776 case EFSCTUI: 777 case EFSCTUF: 778 case EVFSCTUI: 779 case EVFSCTUF: 780 case EFDCTUI: 781 case EFDCTUF: 782 fp_result = 0; 783 s_lo = 0; 784 s_hi = 0; 785 break; 786 787 case EFSCTSI: 788 case EFSCTSF: 789 fp_result = 0; 790 /* Recover the sign of a zero result if possible. */ 791 if (fgpr.wp[1] == 0) 792 s_lo = regs->gpr[fb] & SIGN_BIT_S; 793 break; 794 795 case EVFSCTSI: 796 case EVFSCTSF: 797 fp_result = 0; 798 /* Recover the sign of a zero result if possible. */ 799 if (fgpr.wp[1] == 0) 800 s_lo = regs->gpr[fb] & SIGN_BIT_S; 801 if (fgpr.wp[0] == 0) 802 s_hi = current->thread.evr[fb] & SIGN_BIT_S; 803 break; 804 805 case EFDCTSI: 806 case EFDCTSF: 807 fp_result = 0; 808 s_hi = s_lo; 809 /* Recover the sign of a zero result if possible. */ 810 if (fgpr.wp[1] == 0) 811 s_hi = current->thread.evr[fb] & SIGN_BIT_S; 812 break; 813 814 default: 815 fp_result = 1; 816 break; 817 } 818 819 pr_debug("round fgpr: %08x %08x\n", fgpr.wp[0], fgpr.wp[1]); 820 821 switch (fptype) { 822 /* Since SPE instructions on E500 core can handle round to nearest 823 * and round toward zero with IEEE-754 complied, we just need 824 * to handle round toward +Inf and round toward -Inf by software. 825 */ 826 case SPFP: 827 if ((FP_ROUNDMODE) == FP_RND_PINF) { 828 if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */ 829 } else { /* round to -Inf */ 830 if (s_lo) { 831 if (fp_result) 832 fgpr.wp[1]++; /* Z < 0, choose Z2 */ 833 else 834 fgpr.wp[1]--; /* Z < 0, choose Z2 */ 835 } 836 } 837 break; 838 839 case DPFP: 840 if (FP_ROUNDMODE == FP_RND_PINF) { 841 if (!s_hi) { 842 if (fp_result) 843 fgpr.dp[0]++; /* Z > 0, choose Z1 */ 844 else 845 fgpr.wp[1]++; /* Z > 0, choose Z1 */ 846 } 847 } else { /* round to -Inf */ 848 if (s_hi) { 849 if (fp_result) 850 fgpr.dp[0]++; /* Z < 0, choose Z2 */ 851 else 852 fgpr.wp[1]--; /* Z < 0, choose Z2 */ 853 } 854 } 855 break; 856 857 case VCT: 858 if (FP_ROUNDMODE == FP_RND_PINF) { 859 if (lo_inexact && !s_lo) 860 fgpr.wp[1]++; /* Z_low > 0, choose Z1 */ 861 if (hi_inexact && !s_hi) 862 fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */ 863 } else { /* round to -Inf */ 864 if (lo_inexact && s_lo) { 865 if (fp_result) 866 fgpr.wp[1]++; /* Z_low < 0, choose Z2 */ 867 else 868 fgpr.wp[1]--; /* Z_low < 0, choose Z2 */ 869 } 870 if (hi_inexact && s_hi) { 871 if (fp_result) 872 fgpr.wp[0]++; /* Z_high < 0, choose Z2 */ 873 else 874 fgpr.wp[0]--; /* Z_high < 0, choose Z2 */ 875 } 876 } 877 break; 878 879 default: 880 return -EINVAL; 881 } 882 883 current->thread.evr[fc] = fgpr.wp[0]; 884 regs->gpr[fc] = fgpr.wp[1]; 885 886 pr_debug(" to fgpr: %08x %08x\n", fgpr.wp[0], fgpr.wp[1]); 887 888 if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) 889 return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0; 890 return 0; 891 } 892 893 int __init spe_mathemu_init(void) 894 { 895 u32 pvr, maj, min; 896 897 pvr = mfspr(SPRN_PVR); 898 899 if ((PVR_VER(pvr) == PVR_VER_E500V1) || 900 (PVR_VER(pvr) == PVR_VER_E500V2)) { 901 maj = PVR_MAJ(pvr); 902 min = PVR_MIN(pvr); 903 904 /* 905 * E500 revision below 1.1, 2.3, 3.1, 4.1, 5.1 906 * need cpu a005 errata workaround 907 */ 908 switch (maj) { 909 case 1: 910 if (min < 1) 911 have_e500_cpu_a005_erratum = 1; 912 break; 913 case 2: 914 if (min < 3) 915 have_e500_cpu_a005_erratum = 1; 916 break; 917 case 3: 918 case 4: 919 case 5: 920 if (min < 1) 921 have_e500_cpu_a005_erratum = 1; 922 break; 923 default: 924 break; 925 } 926 } 927 928 return 0; 929 } 930 931 module_init(spe_mathemu_init); 932