1 /* 2 * Save/restore floating point context for signal handlers. 3 * 4 * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka 5 * 6 * This file is subject to the terms and conditions of the GNU General Public 7 * License. See the file "COPYING" in the main directory of this archive 8 * for more details. 9 * 10 * FIXME! These routines can be optimized in big endian case. 11 */ 12 #include <linux/sched.h> 13 #include <linux/signal.h> 14 #include <asm/processor.h> 15 #include <asm/io.h> 16 #include <asm/fpu.h> 17 18 /* The PR (precision) bit in the FP Status Register must be clear when 19 * an frchg instruction is executed, otherwise the instruction is undefined. 20 * Executing frchg with PR set causes a trap on some SH4 implementations. 21 */ 22 23 #define FPSCR_RCHG 0x00000000 24 25 26 /* 27 * Save FPU registers onto task structure. 28 * Assume called with FPU enabled (SR.FD=0). 29 */ 30 void 31 save_fpu(struct task_struct *tsk, struct pt_regs *regs) 32 { 33 unsigned long dummy; 34 35 clear_tsk_thread_flag(tsk, TIF_USEDFPU); 36 enable_fpu(); 37 asm volatile("sts.l fpul, @-%0\n\t" 38 "sts.l fpscr, @-%0\n\t" 39 "fmov.s fr15, @-%0\n\t" 40 "fmov.s fr14, @-%0\n\t" 41 "fmov.s fr13, @-%0\n\t" 42 "fmov.s fr12, @-%0\n\t" 43 "fmov.s fr11, @-%0\n\t" 44 "fmov.s fr10, @-%0\n\t" 45 "fmov.s fr9, @-%0\n\t" 46 "fmov.s fr8, @-%0\n\t" 47 "fmov.s fr7, @-%0\n\t" 48 "fmov.s fr6, @-%0\n\t" 49 "fmov.s fr5, @-%0\n\t" 50 "fmov.s fr4, @-%0\n\t" 51 "fmov.s fr3, @-%0\n\t" 52 "fmov.s fr2, @-%0\n\t" 53 "fmov.s fr1, @-%0\n\t" 54 "fmov.s fr0, @-%0\n\t" 55 "lds %3, fpscr\n\t" 56 : "=r" (dummy) 57 : "0" ((char *)(&tsk->thread.fpu.hard.status)), 58 "r" (FPSCR_RCHG), 59 "r" (FPSCR_INIT) 60 : "memory"); 61 62 disable_fpu(); 63 release_fpu(regs); 64 } 65 66 static void 67 restore_fpu(struct task_struct *tsk) 68 { 69 unsigned long dummy; 70 71 enable_fpu(); 72 asm volatile("fmov.s @%0+, fr0\n\t" 73 "fmov.s @%0+, fr1\n\t" 74 "fmov.s @%0+, fr2\n\t" 75 "fmov.s @%0+, fr3\n\t" 76 "fmov.s @%0+, fr4\n\t" 77 "fmov.s @%0+, fr5\n\t" 78 "fmov.s @%0+, fr6\n\t" 79 "fmov.s @%0+, fr7\n\t" 80 "fmov.s @%0+, fr8\n\t" 81 "fmov.s @%0+, fr9\n\t" 82 "fmov.s @%0+, fr10\n\t" 83 "fmov.s @%0+, fr11\n\t" 84 "fmov.s @%0+, fr12\n\t" 85 "fmov.s @%0+, fr13\n\t" 86 "fmov.s @%0+, fr14\n\t" 87 "fmov.s @%0+, fr15\n\t" 88 "lds.l @%0+, fpscr\n\t" 89 "lds.l @%0+, fpul\n\t" 90 : "=r" (dummy) 91 : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG) 92 : "memory"); 93 disable_fpu(); 94 } 95 96 /* 97 * Load the FPU with signalling NANS. This bit pattern we're using 98 * has the property that no matter wether considered as single or as 99 * double precission represents signaling NANS. 100 */ 101 102 static void 103 fpu_init(void) 104 { 105 enable_fpu(); 106 asm volatile("lds %0, fpul\n\t" 107 "fsts fpul, fr0\n\t" 108 "fsts fpul, fr1\n\t" 109 "fsts fpul, fr2\n\t" 110 "fsts fpul, fr3\n\t" 111 "fsts fpul, fr4\n\t" 112 "fsts fpul, fr5\n\t" 113 "fsts fpul, fr6\n\t" 114 "fsts fpul, fr7\n\t" 115 "fsts fpul, fr8\n\t" 116 "fsts fpul, fr9\n\t" 117 "fsts fpul, fr10\n\t" 118 "fsts fpul, fr11\n\t" 119 "fsts fpul, fr12\n\t" 120 "fsts fpul, fr13\n\t" 121 "fsts fpul, fr14\n\t" 122 "fsts fpul, fr15\n\t" 123 "lds %2, fpscr\n\t" 124 : /* no output */ 125 : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); 126 disable_fpu(); 127 } 128 129 /* 130 * Emulate arithmetic ops on denormalized number for some FPU insns. 131 */ 132 133 /* denormalized float * float */ 134 static int denormal_mulf(int hx, int hy) 135 { 136 unsigned int ix, iy; 137 unsigned long long m, n; 138 int exp, w; 139 140 ix = hx & 0x7fffffff; 141 iy = hy & 0x7fffffff; 142 if (iy < 0x00800000 || ix == 0) 143 return ((hx ^ hy) & 0x80000000); 144 145 exp = (iy & 0x7f800000) >> 23; 146 ix &= 0x007fffff; 147 iy = (iy & 0x007fffff) | 0x00800000; 148 m = (unsigned long long)ix * iy; 149 n = m; 150 w = -1; 151 while (n) { n >>= 1; w++; } 152 153 /* FIXME: use guard bits */ 154 exp += w - 126 - 46; 155 if (exp > 0) 156 ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23); 157 else if (exp + 22 >= 0) 158 ix = (int) (m >> (w - 22 - exp)) & 0x007fffff; 159 else 160 ix = 0; 161 162 ix |= (hx ^ hy) & 0x80000000; 163 return ix; 164 } 165 166 /* denormalized double * double */ 167 static void mult64(unsigned long long x, unsigned long long y, 168 unsigned long long *highp, unsigned long long *lowp) 169 { 170 unsigned long long sub0, sub1, sub2, sub3; 171 unsigned long long high, low; 172 173 sub0 = (x >> 32) * (unsigned long) (y >> 32); 174 sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32); 175 sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL); 176 sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL); 177 low = sub3; 178 high = 0LL; 179 sub3 += (sub1 << 32); 180 if (low > sub3) 181 high++; 182 low = sub3; 183 sub3 += (sub2 << 32); 184 if (low > sub3) 185 high++; 186 low = sub3; 187 high += (sub1 >> 32) + (sub2 >> 32); 188 high += sub0; 189 *lowp = low; 190 *highp = high; 191 } 192 193 static inline long long rshift64(unsigned long long mh, 194 unsigned long long ml, int n) 195 { 196 if (n >= 64) 197 return mh >> (n - 64); 198 return (mh << (64 - n)) | (ml >> n); 199 } 200 201 static long long denormal_muld(long long hx, long long hy) 202 { 203 unsigned long long ix, iy; 204 unsigned long long mh, ml, nh, nl; 205 int exp, w; 206 207 ix = hx & 0x7fffffffffffffffLL; 208 iy = hy & 0x7fffffffffffffffLL; 209 if (iy < 0x0010000000000000LL || ix == 0) 210 return ((hx ^ hy) & 0x8000000000000000LL); 211 212 exp = (iy & 0x7ff0000000000000LL) >> 52; 213 ix &= 0x000fffffffffffffLL; 214 iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL; 215 mult64(ix, iy, &mh, &ml); 216 nh = mh; 217 nl = ml; 218 w = -1; 219 if (nh) { 220 while (nh) { nh >>= 1; w++;} 221 w += 64; 222 } else 223 while (nl) { nl >>= 1; w++;} 224 225 /* FIXME: use guard bits */ 226 exp += w - 1022 - 52 * 2; 227 if (exp > 0) 228 ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL) 229 | ((long long)exp << 52); 230 else if (exp + 51 >= 0) 231 ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL; 232 else 233 ix = 0; 234 235 ix |= (hx ^ hy) & 0x8000000000000000LL; 236 return ix; 237 } 238 239 /* ix - iy where iy: denormal and ix, iy >= 0 */ 240 static int denormal_subf1(unsigned int ix, unsigned int iy) 241 { 242 int frac; 243 int exp; 244 245 if (ix < 0x00800000) 246 return ix - iy; 247 248 exp = (ix & 0x7f800000) >> 23; 249 if (exp - 1 > 31) 250 return ix; 251 iy >>= exp - 1; 252 if (iy == 0) 253 return ix; 254 255 frac = (ix & 0x007fffff) | 0x00800000; 256 frac -= iy; 257 while (frac < 0x00800000) { 258 if (--exp == 0) 259 return frac; 260 frac <<= 1; 261 } 262 263 return (exp << 23) | (frac & 0x007fffff); 264 } 265 266 /* ix + iy where iy: denormal and ix, iy >= 0 */ 267 static int denormal_addf1(unsigned int ix, unsigned int iy) 268 { 269 int frac; 270 int exp; 271 272 if (ix < 0x00800000) 273 return ix + iy; 274 275 exp = (ix & 0x7f800000) >> 23; 276 if (exp - 1 > 31) 277 return ix; 278 iy >>= exp - 1; 279 if (iy == 0) 280 return ix; 281 282 frac = (ix & 0x007fffff) | 0x00800000; 283 frac += iy; 284 if (frac >= 0x01000000) { 285 frac >>= 1; 286 ++exp; 287 } 288 289 return (exp << 23) | (frac & 0x007fffff); 290 } 291 292 static int denormal_addf(int hx, int hy) 293 { 294 unsigned int ix, iy; 295 int sign; 296 297 if ((hx ^ hy) & 0x80000000) { 298 sign = hx & 0x80000000; 299 ix = hx & 0x7fffffff; 300 iy = hy & 0x7fffffff; 301 if (iy < 0x00800000) { 302 ix = denormal_subf1(ix, iy); 303 if (ix < 0) { 304 ix = -ix; 305 sign ^= 0x80000000; 306 } 307 } else { 308 ix = denormal_subf1(iy, ix); 309 sign ^= 0x80000000; 310 } 311 } else { 312 sign = hx & 0x80000000; 313 ix = hx & 0x7fffffff; 314 iy = hy & 0x7fffffff; 315 if (iy < 0x00800000) 316 ix = denormal_addf1(ix, iy); 317 else 318 ix = denormal_addf1(iy, ix); 319 } 320 321 return sign | ix; 322 } 323 324 /* ix - iy where iy: denormal and ix, iy >= 0 */ 325 static long long denormal_subd1(unsigned long long ix, unsigned long long iy) 326 { 327 long long frac; 328 int exp; 329 330 if (ix < 0x0010000000000000LL) 331 return ix - iy; 332 333 exp = (ix & 0x7ff0000000000000LL) >> 52; 334 if (exp - 1 > 63) 335 return ix; 336 iy >>= exp - 1; 337 if (iy == 0) 338 return ix; 339 340 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL; 341 frac -= iy; 342 while (frac < 0x0010000000000000LL) { 343 if (--exp == 0) 344 return frac; 345 frac <<= 1; 346 } 347 348 return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL); 349 } 350 351 /* ix + iy where iy: denormal and ix, iy >= 0 */ 352 static long long denormal_addd1(unsigned long long ix, unsigned long long iy) 353 { 354 long long frac; 355 long long exp; 356 357 if (ix < 0x0010000000000000LL) 358 return ix + iy; 359 360 exp = (ix & 0x7ff0000000000000LL) >> 52; 361 if (exp - 1 > 63) 362 return ix; 363 iy >>= exp - 1; 364 if (iy == 0) 365 return ix; 366 367 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL; 368 frac += iy; 369 if (frac >= 0x0020000000000000LL) { 370 frac >>= 1; 371 ++exp; 372 } 373 374 return (exp << 52) | (frac & 0x000fffffffffffffLL); 375 } 376 377 static long long denormal_addd(long long hx, long long hy) 378 { 379 unsigned long long ix, iy; 380 long long sign; 381 382 if ((hx ^ hy) & 0x8000000000000000LL) { 383 sign = hx & 0x8000000000000000LL; 384 ix = hx & 0x7fffffffffffffffLL; 385 iy = hy & 0x7fffffffffffffffLL; 386 if (iy < 0x0010000000000000LL) { 387 ix = denormal_subd1(ix, iy); 388 if (ix < 0) { 389 ix = -ix; 390 sign ^= 0x8000000000000000LL; 391 } 392 } else { 393 ix = denormal_subd1(iy, ix); 394 sign ^= 0x8000000000000000LL; 395 } 396 } else { 397 sign = hx & 0x8000000000000000LL; 398 ix = hx & 0x7fffffffffffffffLL; 399 iy = hy & 0x7fffffffffffffffLL; 400 if (iy < 0x0010000000000000LL) 401 ix = denormal_addd1(ix, iy); 402 else 403 ix = denormal_addd1(iy, ix); 404 } 405 406 return sign | ix; 407 } 408 409 /** 410 * denormal_to_double - Given denormalized float number, 411 * store double float 412 * 413 * @fpu: Pointer to sh_fpu_hard structure 414 * @n: Index to FP register 415 */ 416 static void 417 denormal_to_double (struct sh_fpu_hard_struct *fpu, int n) 418 { 419 unsigned long du, dl; 420 unsigned long x = fpu->fpul; 421 int exp = 1023 - 126; 422 423 if (x != 0 && (x & 0x7f800000) == 0) { 424 du = (x & 0x80000000); 425 while ((x & 0x00800000) == 0) { 426 x <<= 1; 427 exp--; 428 } 429 x &= 0x007fffff; 430 du |= (exp << 20) | (x >> 3); 431 dl = x << 29; 432 433 fpu->fp_regs[n] = du; 434 fpu->fp_regs[n+1] = dl; 435 } 436 } 437 438 /** 439 * ieee_fpe_handler - Handle denormalized number exception 440 * 441 * @regs: Pointer to register structure 442 * 443 * Returns 1 when it's handled (should not cause exception). 444 */ 445 static int 446 ieee_fpe_handler (struct pt_regs *regs) 447 { 448 unsigned short insn = *(unsigned short *) regs->pc; 449 unsigned short finsn; 450 unsigned long nextpc; 451 int nib[4] = { 452 (insn >> 12) & 0xf, 453 (insn >> 8) & 0xf, 454 (insn >> 4) & 0xf, 455 insn & 0xf}; 456 457 if (nib[0] == 0xb || 458 (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ 459 regs->pr = regs->pc + 4; 460 if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ 461 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); 462 finsn = *(unsigned short *) (regs->pc + 2); 463 } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */ 464 if (regs->sr & 1) 465 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); 466 else 467 nextpc = regs->pc + 4; 468 finsn = *(unsigned short *) (regs->pc + 2); 469 } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */ 470 if (regs->sr & 1) 471 nextpc = regs->pc + 4; 472 else 473 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); 474 finsn = *(unsigned short *) (regs->pc + 2); 475 } else if (nib[0] == 0x4 && nib[3] == 0xb && 476 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */ 477 nextpc = regs->regs[nib[1]]; 478 finsn = *(unsigned short *) (regs->pc + 2); 479 } else if (nib[0] == 0x0 && nib[3] == 0x3 && 480 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */ 481 nextpc = regs->pc + 4 + regs->regs[nib[1]]; 482 finsn = *(unsigned short *) (regs->pc + 2); 483 } else if (insn == 0x000b) { /* rts */ 484 nextpc = regs->pr; 485 finsn = *(unsigned short *) (regs->pc + 2); 486 } else { 487 nextpc = regs->pc + 2; 488 finsn = insn; 489 } 490 491 #define FPSCR_FPU_ERROR (1 << 17) 492 493 if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ 494 struct task_struct *tsk = current; 495 496 if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) { 497 /* FPU error */ 498 denormal_to_double (&tsk->thread.fpu.hard, 499 (finsn >> 8) & 0xf); 500 } else 501 return 0; 502 503 regs->pc = nextpc; 504 return 1; 505 } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */ 506 struct task_struct *tsk = current; 507 int fpscr; 508 int n, m, prec; 509 unsigned int hx, hy; 510 511 n = (finsn >> 8) & 0xf; 512 m = (finsn >> 4) & 0xf; 513 hx = tsk->thread.fpu.hard.fp_regs[n]; 514 hy = tsk->thread.fpu.hard.fp_regs[m]; 515 fpscr = tsk->thread.fpu.hard.fpscr; 516 prec = fpscr & (1 << 19); 517 518 if ((fpscr & FPSCR_FPU_ERROR) 519 && (prec && ((hx & 0x7fffffff) < 0x00100000 520 || (hy & 0x7fffffff) < 0x00100000))) { 521 long long llx, lly; 522 523 /* FPU error because of denormal */ 524 llx = ((long long) hx << 32) 525 | tsk->thread.fpu.hard.fp_regs[n+1]; 526 lly = ((long long) hy << 32) 527 | tsk->thread.fpu.hard.fp_regs[m+1]; 528 if ((hx & 0x7fffffff) >= 0x00100000) 529 llx = denormal_muld(lly, llx); 530 else 531 llx = denormal_muld(llx, lly); 532 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 533 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff; 534 } else if ((fpscr & FPSCR_FPU_ERROR) 535 && (!prec && ((hx & 0x7fffffff) < 0x00800000 536 || (hy & 0x7fffffff) < 0x00800000))) { 537 /* FPU error because of denormal */ 538 if ((hx & 0x7fffffff) >= 0x00800000) 539 hx = denormal_mulf(hy, hx); 540 else 541 hx = denormal_mulf(hx, hy); 542 tsk->thread.fpu.hard.fp_regs[n] = hx; 543 } else 544 return 0; 545 546 regs->pc = nextpc; 547 return 1; 548 } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */ 549 struct task_struct *tsk = current; 550 int fpscr; 551 int n, m, prec; 552 unsigned int hx, hy; 553 554 n = (finsn >> 8) & 0xf; 555 m = (finsn >> 4) & 0xf; 556 hx = tsk->thread.fpu.hard.fp_regs[n]; 557 hy = tsk->thread.fpu.hard.fp_regs[m]; 558 fpscr = tsk->thread.fpu.hard.fpscr; 559 prec = fpscr & (1 << 19); 560 561 if ((fpscr & FPSCR_FPU_ERROR) 562 && (prec && ((hx & 0x7fffffff) < 0x00100000 563 || (hy & 0x7fffffff) < 0x00100000))) { 564 long long llx, lly; 565 566 /* FPU error because of denormal */ 567 llx = ((long long) hx << 32) 568 | tsk->thread.fpu.hard.fp_regs[n+1]; 569 lly = ((long long) hy << 32) 570 | tsk->thread.fpu.hard.fp_regs[m+1]; 571 if ((finsn & 0xf00f) == 0xf000) 572 llx = denormal_addd(llx, lly); 573 else 574 llx = denormal_addd(llx, lly ^ (1LL << 63)); 575 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 576 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff; 577 } else if ((fpscr & FPSCR_FPU_ERROR) 578 && (!prec && ((hx & 0x7fffffff) < 0x00800000 579 || (hy & 0x7fffffff) < 0x00800000))) { 580 /* FPU error because of denormal */ 581 if ((finsn & 0xf00f) == 0xf000) 582 hx = denormal_addf(hx, hy); 583 else 584 hx = denormal_addf(hx, hy ^ 0x80000000); 585 tsk->thread.fpu.hard.fp_regs[n] = hx; 586 } else 587 return 0; 588 589 regs->pc = nextpc; 590 return 1; 591 } 592 593 return 0; 594 } 595 596 BUILD_TRAP_HANDLER(fpu_error) 597 { 598 struct task_struct *tsk = current; 599 TRAP_HANDLER_DECL; 600 601 save_fpu(tsk, regs); 602 if (ieee_fpe_handler(regs)) { 603 tsk->thread.fpu.hard.fpscr &= 604 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); 605 grab_fpu(regs); 606 restore_fpu(tsk); 607 set_tsk_thread_flag(tsk, TIF_USEDFPU); 608 return; 609 } 610 611 force_sig(SIGFPE, tsk); 612 } 613 614 BUILD_TRAP_HANDLER(fpu_state_restore) 615 { 616 struct task_struct *tsk = current; 617 TRAP_HANDLER_DECL; 618 619 grab_fpu(regs); 620 if (!user_mode(regs)) { 621 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); 622 return; 623 } 624 625 if (used_math()) { 626 /* Using the FPU again. */ 627 restore_fpu(tsk); 628 } else { 629 /* First time FPU user. */ 630 fpu_init(); 631 set_used_math(); 632 } 633 set_tsk_thread_flag(tsk, TIF_USEDFPU); 634 } 635