1*74d99a5eSPaul Mundt /* 2*74d99a5eSPaul Mundt * Save/restore floating point context for signal handlers. 3*74d99a5eSPaul Mundt * 4*74d99a5eSPaul Mundt * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka 5*74d99a5eSPaul Mundt * 6*74d99a5eSPaul Mundt * This file is subject to the terms and conditions of the GNU General Public 7*74d99a5eSPaul Mundt * License. See the file "COPYING" in the main directory of this archive 8*74d99a5eSPaul Mundt * for more details. 9*74d99a5eSPaul Mundt * 10*74d99a5eSPaul Mundt * FIXME! These routines can be optimized in big endian case. 11*74d99a5eSPaul Mundt */ 12*74d99a5eSPaul Mundt #include <linux/sched.h> 13*74d99a5eSPaul Mundt #include <linux/signal.h> 14*74d99a5eSPaul Mundt #include <asm/processor.h> 15*74d99a5eSPaul Mundt #include <asm/io.h> 16*74d99a5eSPaul Mundt 17*74d99a5eSPaul Mundt /* The PR (precision) bit in the FP Status Register must be clear when 18*74d99a5eSPaul Mundt * an frchg instruction is executed, otherwise the instruction is undefined. 19*74d99a5eSPaul Mundt * Executing frchg with PR set causes a trap on some SH4 implementations. 20*74d99a5eSPaul Mundt */ 21*74d99a5eSPaul Mundt 22*74d99a5eSPaul Mundt #define FPSCR_RCHG 0x00000000 23*74d99a5eSPaul Mundt 24*74d99a5eSPaul Mundt 25*74d99a5eSPaul Mundt /* 26*74d99a5eSPaul Mundt * Save FPU registers onto task structure. 27*74d99a5eSPaul Mundt * Assume called with FPU enabled (SR.FD=0). 28*74d99a5eSPaul Mundt */ 29*74d99a5eSPaul Mundt void 30*74d99a5eSPaul Mundt save_fpu(struct task_struct *tsk, struct pt_regs *regs) 31*74d99a5eSPaul Mundt { 32*74d99a5eSPaul Mundt unsigned long dummy; 33*74d99a5eSPaul Mundt 34*74d99a5eSPaul Mundt clear_tsk_thread_flag(tsk, TIF_USEDFPU); 35*74d99a5eSPaul Mundt enable_fpu(); 36*74d99a5eSPaul Mundt asm volatile("sts.l fpul, @-%0\n\t" 37*74d99a5eSPaul Mundt "sts.l fpscr, @-%0\n\t" 38*74d99a5eSPaul Mundt "fmov.s fr15, @-%0\n\t" 39*74d99a5eSPaul Mundt "fmov.s fr14, @-%0\n\t" 40*74d99a5eSPaul Mundt "fmov.s fr13, @-%0\n\t" 41*74d99a5eSPaul Mundt "fmov.s fr12, @-%0\n\t" 42*74d99a5eSPaul Mundt "fmov.s fr11, @-%0\n\t" 43*74d99a5eSPaul Mundt "fmov.s fr10, @-%0\n\t" 44*74d99a5eSPaul Mundt "fmov.s fr9, @-%0\n\t" 45*74d99a5eSPaul Mundt "fmov.s fr8, @-%0\n\t" 46*74d99a5eSPaul Mundt "fmov.s fr7, @-%0\n\t" 47*74d99a5eSPaul Mundt "fmov.s fr6, @-%0\n\t" 48*74d99a5eSPaul Mundt "fmov.s fr5, @-%0\n\t" 49*74d99a5eSPaul Mundt "fmov.s fr4, @-%0\n\t" 50*74d99a5eSPaul Mundt "fmov.s fr3, @-%0\n\t" 51*74d99a5eSPaul Mundt "fmov.s fr2, @-%0\n\t" 52*74d99a5eSPaul Mundt "fmov.s fr1, @-%0\n\t" 53*74d99a5eSPaul Mundt "fmov.s fr0, @-%0\n\t" 54*74d99a5eSPaul Mundt "lds %3, fpscr\n\t" 55*74d99a5eSPaul Mundt : "=r" (dummy) 56*74d99a5eSPaul Mundt : "0" ((char *)(&tsk->thread.fpu.hard.status)), 57*74d99a5eSPaul Mundt "r" (FPSCR_RCHG), 58*74d99a5eSPaul Mundt "r" (FPSCR_INIT) 59*74d99a5eSPaul Mundt : "memory"); 60*74d99a5eSPaul Mundt 61*74d99a5eSPaul Mundt disable_fpu(); 62*74d99a5eSPaul Mundt release_fpu(regs); 63*74d99a5eSPaul Mundt } 64*74d99a5eSPaul Mundt 65*74d99a5eSPaul Mundt static void 66*74d99a5eSPaul Mundt restore_fpu(struct task_struct *tsk) 67*74d99a5eSPaul Mundt { 68*74d99a5eSPaul Mundt unsigned long dummy; 69*74d99a5eSPaul Mundt 70*74d99a5eSPaul Mundt enable_fpu(); 71*74d99a5eSPaul Mundt asm volatile("fmov.s @%0+, fr0\n\t" 72*74d99a5eSPaul Mundt "fmov.s @%0+, fr1\n\t" 73*74d99a5eSPaul Mundt "fmov.s @%0+, fr2\n\t" 74*74d99a5eSPaul Mundt "fmov.s @%0+, fr3\n\t" 75*74d99a5eSPaul Mundt "fmov.s @%0+, fr4\n\t" 76*74d99a5eSPaul Mundt "fmov.s @%0+, fr5\n\t" 77*74d99a5eSPaul Mundt "fmov.s @%0+, fr6\n\t" 78*74d99a5eSPaul Mundt "fmov.s @%0+, fr7\n\t" 79*74d99a5eSPaul Mundt "fmov.s @%0+, fr8\n\t" 80*74d99a5eSPaul Mundt "fmov.s @%0+, fr9\n\t" 81*74d99a5eSPaul Mundt "fmov.s @%0+, fr10\n\t" 82*74d99a5eSPaul Mundt "fmov.s @%0+, fr11\n\t" 83*74d99a5eSPaul Mundt "fmov.s @%0+, fr12\n\t" 84*74d99a5eSPaul Mundt "fmov.s @%0+, fr13\n\t" 85*74d99a5eSPaul Mundt "fmov.s @%0+, fr14\n\t" 86*74d99a5eSPaul Mundt "fmov.s @%0+, fr15\n\t" 87*74d99a5eSPaul Mundt "lds.l @%0+, fpscr\n\t" 88*74d99a5eSPaul Mundt "lds.l @%0+, fpul\n\t" 89*74d99a5eSPaul Mundt : "=r" (dummy) 90*74d99a5eSPaul Mundt : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG) 91*74d99a5eSPaul Mundt : "memory"); 92*74d99a5eSPaul Mundt disable_fpu(); 93*74d99a5eSPaul Mundt } 94*74d99a5eSPaul Mundt 95*74d99a5eSPaul Mundt /* 96*74d99a5eSPaul Mundt * Load the FPU with signalling NANS. This bit pattern we're using 97*74d99a5eSPaul Mundt * has the property that no matter wether considered as single or as 98*74d99a5eSPaul Mundt * double precission represents signaling NANS. 99*74d99a5eSPaul Mundt */ 100*74d99a5eSPaul Mundt 101*74d99a5eSPaul Mundt static void 102*74d99a5eSPaul Mundt fpu_init(void) 103*74d99a5eSPaul Mundt { 104*74d99a5eSPaul Mundt enable_fpu(); 105*74d99a5eSPaul Mundt asm volatile("lds %0, fpul\n\t" 106*74d99a5eSPaul Mundt "fsts fpul, fr0\n\t" 107*74d99a5eSPaul Mundt "fsts fpul, fr1\n\t" 108*74d99a5eSPaul Mundt "fsts fpul, fr2\n\t" 109*74d99a5eSPaul Mundt "fsts fpul, fr3\n\t" 110*74d99a5eSPaul Mundt "fsts fpul, fr4\n\t" 111*74d99a5eSPaul Mundt "fsts fpul, fr5\n\t" 112*74d99a5eSPaul Mundt "fsts fpul, fr6\n\t" 113*74d99a5eSPaul Mundt "fsts fpul, fr7\n\t" 114*74d99a5eSPaul Mundt "fsts fpul, fr8\n\t" 115*74d99a5eSPaul Mundt "fsts fpul, fr9\n\t" 116*74d99a5eSPaul Mundt "fsts fpul, fr10\n\t" 117*74d99a5eSPaul Mundt "fsts fpul, fr11\n\t" 118*74d99a5eSPaul Mundt "fsts fpul, fr12\n\t" 119*74d99a5eSPaul Mundt "fsts fpul, fr13\n\t" 120*74d99a5eSPaul Mundt "fsts fpul, fr14\n\t" 121*74d99a5eSPaul Mundt "fsts fpul, fr15\n\t" 122*74d99a5eSPaul Mundt "lds %2, fpscr\n\t" 123*74d99a5eSPaul Mundt : /* no output */ 124*74d99a5eSPaul Mundt : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); 125*74d99a5eSPaul Mundt disable_fpu(); 126*74d99a5eSPaul Mundt } 127*74d99a5eSPaul Mundt 128*74d99a5eSPaul Mundt /* 129*74d99a5eSPaul Mundt * Emulate arithmetic ops on denormalized number for some FPU insns. 130*74d99a5eSPaul Mundt */ 131*74d99a5eSPaul Mundt 132*74d99a5eSPaul Mundt /* denormalized float * float */ 133*74d99a5eSPaul Mundt static int denormal_mulf(int hx, int hy) 134*74d99a5eSPaul Mundt { 135*74d99a5eSPaul Mundt unsigned int ix, iy; 136*74d99a5eSPaul Mundt unsigned long long m, n; 137*74d99a5eSPaul Mundt int exp, w; 138*74d99a5eSPaul Mundt 139*74d99a5eSPaul Mundt ix = hx & 0x7fffffff; 140*74d99a5eSPaul Mundt iy = hy & 0x7fffffff; 141*74d99a5eSPaul Mundt if (iy < 0x00800000 || ix == 0) 142*74d99a5eSPaul Mundt return ((hx ^ hy) & 0x80000000); 143*74d99a5eSPaul Mundt 144*74d99a5eSPaul Mundt exp = (iy & 0x7f800000) >> 23; 145*74d99a5eSPaul Mundt ix &= 0x007fffff; 146*74d99a5eSPaul Mundt iy = (iy & 0x007fffff) | 0x00800000; 147*74d99a5eSPaul Mundt m = (unsigned long long)ix * iy; 148*74d99a5eSPaul Mundt n = m; 149*74d99a5eSPaul Mundt w = -1; 150*74d99a5eSPaul Mundt while (n) { n >>= 1; w++; } 151*74d99a5eSPaul Mundt 152*74d99a5eSPaul Mundt /* FIXME: use guard bits */ 153*74d99a5eSPaul Mundt exp += w - 126 - 46; 154*74d99a5eSPaul Mundt if (exp > 0) 155*74d99a5eSPaul Mundt ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23); 156*74d99a5eSPaul Mundt else if (exp + 22 >= 0) 157*74d99a5eSPaul Mundt ix = (int) (m >> (w - 22 - exp)) & 0x007fffff; 158*74d99a5eSPaul Mundt else 159*74d99a5eSPaul Mundt ix = 0; 160*74d99a5eSPaul Mundt 161*74d99a5eSPaul Mundt ix |= (hx ^ hy) & 0x80000000; 162*74d99a5eSPaul Mundt return ix; 163*74d99a5eSPaul Mundt } 164*74d99a5eSPaul Mundt 165*74d99a5eSPaul Mundt /* denormalized double * double */ 166*74d99a5eSPaul Mundt static void mult64(unsigned long long x, unsigned long long y, 167*74d99a5eSPaul Mundt unsigned long long *highp, unsigned long long *lowp) 168*74d99a5eSPaul Mundt { 169*74d99a5eSPaul Mundt unsigned long long sub0, sub1, sub2, sub3; 170*74d99a5eSPaul Mundt unsigned long long high, low; 171*74d99a5eSPaul Mundt 172*74d99a5eSPaul Mundt sub0 = (x >> 32) * (unsigned long) (y >> 32); 173*74d99a5eSPaul Mundt sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32); 174*74d99a5eSPaul Mundt sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL); 175*74d99a5eSPaul Mundt sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL); 176*74d99a5eSPaul Mundt low = sub3; 177*74d99a5eSPaul Mundt high = 0LL; 178*74d99a5eSPaul Mundt sub3 += (sub1 << 32); 179*74d99a5eSPaul Mundt if (low > sub3) 180*74d99a5eSPaul Mundt high++; 181*74d99a5eSPaul Mundt low = sub3; 182*74d99a5eSPaul Mundt sub3 += (sub2 << 32); 183*74d99a5eSPaul Mundt if (low > sub3) 184*74d99a5eSPaul Mundt high++; 185*74d99a5eSPaul Mundt low = sub3; 186*74d99a5eSPaul Mundt high += (sub1 >> 32) + (sub2 >> 32); 187*74d99a5eSPaul Mundt high += sub0; 188*74d99a5eSPaul Mundt *lowp = low; 189*74d99a5eSPaul Mundt *highp = high; 190*74d99a5eSPaul Mundt } 191*74d99a5eSPaul Mundt 192*74d99a5eSPaul Mundt static inline long long rshift64(unsigned long long mh, 193*74d99a5eSPaul Mundt unsigned long long ml, int n) 194*74d99a5eSPaul Mundt { 195*74d99a5eSPaul Mundt if (n >= 64) 196*74d99a5eSPaul Mundt return mh >> (n - 64); 197*74d99a5eSPaul Mundt return (mh << (64 - n)) | (ml >> n); 198*74d99a5eSPaul Mundt } 199*74d99a5eSPaul Mundt 200*74d99a5eSPaul Mundt static long long denormal_muld(long long hx, long long hy) 201*74d99a5eSPaul Mundt { 202*74d99a5eSPaul Mundt unsigned long long ix, iy; 203*74d99a5eSPaul Mundt unsigned long long mh, ml, nh, nl; 204*74d99a5eSPaul Mundt int exp, w; 205*74d99a5eSPaul Mundt 206*74d99a5eSPaul Mundt ix = hx & 0x7fffffffffffffffLL; 207*74d99a5eSPaul Mundt iy = hy & 0x7fffffffffffffffLL; 208*74d99a5eSPaul Mundt if (iy < 0x0010000000000000LL || ix == 0) 209*74d99a5eSPaul Mundt return ((hx ^ hy) & 0x8000000000000000LL); 210*74d99a5eSPaul Mundt 211*74d99a5eSPaul Mundt exp = (iy & 0x7ff0000000000000LL) >> 52; 212*74d99a5eSPaul Mundt ix &= 0x000fffffffffffffLL; 213*74d99a5eSPaul Mundt iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL; 214*74d99a5eSPaul Mundt mult64(ix, iy, &mh, &ml); 215*74d99a5eSPaul Mundt nh = mh; 216*74d99a5eSPaul Mundt nl = ml; 217*74d99a5eSPaul Mundt w = -1; 218*74d99a5eSPaul Mundt if (nh) { 219*74d99a5eSPaul Mundt while (nh) { nh >>= 1; w++;} 220*74d99a5eSPaul Mundt w += 64; 221*74d99a5eSPaul Mundt } else 222*74d99a5eSPaul Mundt while (nl) { nl >>= 1; w++;} 223*74d99a5eSPaul Mundt 224*74d99a5eSPaul Mundt /* FIXME: use guard bits */ 225*74d99a5eSPaul Mundt exp += w - 1022 - 52 * 2; 226*74d99a5eSPaul Mundt if (exp > 0) 227*74d99a5eSPaul Mundt ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL) 228*74d99a5eSPaul Mundt | ((long long)exp << 52); 229*74d99a5eSPaul Mundt else if (exp + 51 >= 0) 230*74d99a5eSPaul Mundt ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL; 231*74d99a5eSPaul Mundt else 232*74d99a5eSPaul Mundt ix = 0; 233*74d99a5eSPaul Mundt 234*74d99a5eSPaul Mundt ix |= (hx ^ hy) & 0x8000000000000000LL; 235*74d99a5eSPaul Mundt return ix; 236*74d99a5eSPaul Mundt } 237*74d99a5eSPaul Mundt 238*74d99a5eSPaul Mundt /* ix - iy where iy: denormal and ix, iy >= 0 */ 239*74d99a5eSPaul Mundt static int denormal_subf1(unsigned int ix, unsigned int iy) 240*74d99a5eSPaul Mundt { 241*74d99a5eSPaul Mundt int frac; 242*74d99a5eSPaul Mundt int exp; 243*74d99a5eSPaul Mundt 244*74d99a5eSPaul Mundt if (ix < 0x00800000) 245*74d99a5eSPaul Mundt return ix - iy; 246*74d99a5eSPaul Mundt 247*74d99a5eSPaul Mundt exp = (ix & 0x7f800000) >> 23; 248*74d99a5eSPaul Mundt if (exp - 1 > 31) 249*74d99a5eSPaul Mundt return ix; 250*74d99a5eSPaul Mundt iy >>= exp - 1; 251*74d99a5eSPaul Mundt if (iy == 0) 252*74d99a5eSPaul Mundt return ix; 253*74d99a5eSPaul Mundt 254*74d99a5eSPaul Mundt frac = (ix & 0x007fffff) | 0x00800000; 255*74d99a5eSPaul Mundt frac -= iy; 256*74d99a5eSPaul Mundt while (frac < 0x00800000) { 257*74d99a5eSPaul Mundt if (--exp == 0) 258*74d99a5eSPaul Mundt return frac; 259*74d99a5eSPaul Mundt frac <<= 1; 260*74d99a5eSPaul Mundt } 261*74d99a5eSPaul Mundt 262*74d99a5eSPaul Mundt return (exp << 23) | (frac & 0x007fffff); 263*74d99a5eSPaul Mundt } 264*74d99a5eSPaul Mundt 265*74d99a5eSPaul Mundt /* ix + iy where iy: denormal and ix, iy >= 0 */ 266*74d99a5eSPaul Mundt static int denormal_addf1(unsigned int ix, unsigned int iy) 267*74d99a5eSPaul Mundt { 268*74d99a5eSPaul Mundt int frac; 269*74d99a5eSPaul Mundt int exp; 270*74d99a5eSPaul Mundt 271*74d99a5eSPaul Mundt if (ix < 0x00800000) 272*74d99a5eSPaul Mundt return ix + iy; 273*74d99a5eSPaul Mundt 274*74d99a5eSPaul Mundt exp = (ix & 0x7f800000) >> 23; 275*74d99a5eSPaul Mundt if (exp - 1 > 31) 276*74d99a5eSPaul Mundt return ix; 277*74d99a5eSPaul Mundt iy >>= exp - 1; 278*74d99a5eSPaul Mundt if (iy == 0) 279*74d99a5eSPaul Mundt return ix; 280*74d99a5eSPaul Mundt 281*74d99a5eSPaul Mundt frac = (ix & 0x007fffff) | 0x00800000; 282*74d99a5eSPaul Mundt frac += iy; 283*74d99a5eSPaul Mundt if (frac >= 0x01000000) { 284*74d99a5eSPaul Mundt frac >>= 1; 285*74d99a5eSPaul Mundt ++exp; 286*74d99a5eSPaul Mundt } 287*74d99a5eSPaul Mundt 288*74d99a5eSPaul Mundt return (exp << 23) | (frac & 0x007fffff); 289*74d99a5eSPaul Mundt } 290*74d99a5eSPaul Mundt 291*74d99a5eSPaul Mundt static int denormal_addf(int hx, int hy) 292*74d99a5eSPaul Mundt { 293*74d99a5eSPaul Mundt unsigned int ix, iy; 294*74d99a5eSPaul Mundt int sign; 295*74d99a5eSPaul Mundt 296*74d99a5eSPaul Mundt if ((hx ^ hy) & 0x80000000) { 297*74d99a5eSPaul Mundt sign = hx & 0x80000000; 298*74d99a5eSPaul Mundt ix = hx & 0x7fffffff; 299*74d99a5eSPaul Mundt iy = hy & 0x7fffffff; 300*74d99a5eSPaul Mundt if (iy < 0x00800000) { 301*74d99a5eSPaul Mundt ix = denormal_subf1(ix, iy); 302*74d99a5eSPaul Mundt if (ix < 0) { 303*74d99a5eSPaul Mundt ix = -ix; 304*74d99a5eSPaul Mundt sign ^= 0x80000000; 305*74d99a5eSPaul Mundt } 306*74d99a5eSPaul Mundt } else { 307*74d99a5eSPaul Mundt ix = denormal_subf1(iy, ix); 308*74d99a5eSPaul Mundt sign ^= 0x80000000; 309*74d99a5eSPaul Mundt } 310*74d99a5eSPaul Mundt } else { 311*74d99a5eSPaul Mundt sign = hx & 0x80000000; 312*74d99a5eSPaul Mundt ix = hx & 0x7fffffff; 313*74d99a5eSPaul Mundt iy = hy & 0x7fffffff; 314*74d99a5eSPaul Mundt if (iy < 0x00800000) 315*74d99a5eSPaul Mundt ix = denormal_addf1(ix, iy); 316*74d99a5eSPaul Mundt else 317*74d99a5eSPaul Mundt ix = denormal_addf1(iy, ix); 318*74d99a5eSPaul Mundt } 319*74d99a5eSPaul Mundt 320*74d99a5eSPaul Mundt return sign | ix; 321*74d99a5eSPaul Mundt } 322*74d99a5eSPaul Mundt 323*74d99a5eSPaul Mundt /* ix - iy where iy: denormal and ix, iy >= 0 */ 324*74d99a5eSPaul Mundt static long long denormal_subd1(unsigned long long ix, unsigned long long iy) 325*74d99a5eSPaul Mundt { 326*74d99a5eSPaul Mundt long long frac; 327*74d99a5eSPaul Mundt int exp; 328*74d99a5eSPaul Mundt 329*74d99a5eSPaul Mundt if (ix < 0x0010000000000000LL) 330*74d99a5eSPaul Mundt return ix - iy; 331*74d99a5eSPaul Mundt 332*74d99a5eSPaul Mundt exp = (ix & 0x7ff0000000000000LL) >> 52; 333*74d99a5eSPaul Mundt if (exp - 1 > 63) 334*74d99a5eSPaul Mundt return ix; 335*74d99a5eSPaul Mundt iy >>= exp - 1; 336*74d99a5eSPaul Mundt if (iy == 0) 337*74d99a5eSPaul Mundt return ix; 338*74d99a5eSPaul Mundt 339*74d99a5eSPaul Mundt frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL; 340*74d99a5eSPaul Mundt frac -= iy; 341*74d99a5eSPaul Mundt while (frac < 0x0010000000000000LL) { 342*74d99a5eSPaul Mundt if (--exp == 0) 343*74d99a5eSPaul Mundt return frac; 344*74d99a5eSPaul Mundt frac <<= 1; 345*74d99a5eSPaul Mundt } 346*74d99a5eSPaul Mundt 347*74d99a5eSPaul Mundt return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL); 348*74d99a5eSPaul Mundt } 349*74d99a5eSPaul Mundt 350*74d99a5eSPaul Mundt /* ix + iy where iy: denormal and ix, iy >= 0 */ 351*74d99a5eSPaul Mundt static long long denormal_addd1(unsigned long long ix, unsigned long long iy) 352*74d99a5eSPaul Mundt { 353*74d99a5eSPaul Mundt long long frac; 354*74d99a5eSPaul Mundt long long exp; 355*74d99a5eSPaul Mundt 356*74d99a5eSPaul Mundt if (ix < 0x0010000000000000LL) 357*74d99a5eSPaul Mundt return ix + iy; 358*74d99a5eSPaul Mundt 359*74d99a5eSPaul Mundt exp = (ix & 0x7ff0000000000000LL) >> 52; 360*74d99a5eSPaul Mundt if (exp - 1 > 63) 361*74d99a5eSPaul Mundt return ix; 362*74d99a5eSPaul Mundt iy >>= exp - 1; 363*74d99a5eSPaul Mundt if (iy == 0) 364*74d99a5eSPaul Mundt return ix; 365*74d99a5eSPaul Mundt 366*74d99a5eSPaul Mundt frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL; 367*74d99a5eSPaul Mundt frac += iy; 368*74d99a5eSPaul Mundt if (frac >= 0x0020000000000000LL) { 369*74d99a5eSPaul Mundt frac >>= 1; 370*74d99a5eSPaul Mundt ++exp; 371*74d99a5eSPaul Mundt } 372*74d99a5eSPaul Mundt 373*74d99a5eSPaul Mundt return (exp << 52) | (frac & 0x000fffffffffffffLL); 374*74d99a5eSPaul Mundt } 375*74d99a5eSPaul Mundt 376*74d99a5eSPaul Mundt static long long denormal_addd(long long hx, long long hy) 377*74d99a5eSPaul Mundt { 378*74d99a5eSPaul Mundt unsigned long long ix, iy; 379*74d99a5eSPaul Mundt long long sign; 380*74d99a5eSPaul Mundt 381*74d99a5eSPaul Mundt if ((hx ^ hy) & 0x8000000000000000LL) { 382*74d99a5eSPaul Mundt sign = hx & 0x8000000000000000LL; 383*74d99a5eSPaul Mundt ix = hx & 0x7fffffffffffffffLL; 384*74d99a5eSPaul Mundt iy = hy & 0x7fffffffffffffffLL; 385*74d99a5eSPaul Mundt if (iy < 0x0010000000000000LL) { 386*74d99a5eSPaul Mundt ix = denormal_subd1(ix, iy); 387*74d99a5eSPaul Mundt if (ix < 0) { 388*74d99a5eSPaul Mundt ix = -ix; 389*74d99a5eSPaul Mundt sign ^= 0x8000000000000000LL; 390*74d99a5eSPaul Mundt } 391*74d99a5eSPaul Mundt } else { 392*74d99a5eSPaul Mundt ix = denormal_subd1(iy, ix); 393*74d99a5eSPaul Mundt sign ^= 0x8000000000000000LL; 394*74d99a5eSPaul Mundt } 395*74d99a5eSPaul Mundt } else { 396*74d99a5eSPaul Mundt sign = hx & 0x8000000000000000LL; 397*74d99a5eSPaul Mundt ix = hx & 0x7fffffffffffffffLL; 398*74d99a5eSPaul Mundt iy = hy & 0x7fffffffffffffffLL; 399*74d99a5eSPaul Mundt if (iy < 0x0010000000000000LL) 400*74d99a5eSPaul Mundt ix = denormal_addd1(ix, iy); 401*74d99a5eSPaul Mundt else 402*74d99a5eSPaul Mundt ix = denormal_addd1(iy, ix); 403*74d99a5eSPaul Mundt } 404*74d99a5eSPaul Mundt 405*74d99a5eSPaul Mundt return sign | ix; 406*74d99a5eSPaul Mundt } 407*74d99a5eSPaul Mundt 408*74d99a5eSPaul Mundt /** 409*74d99a5eSPaul Mundt * denormal_to_double - Given denormalized float number, 410*74d99a5eSPaul Mundt * store double float 411*74d99a5eSPaul Mundt * 412*74d99a5eSPaul Mundt * @fpu: Pointer to sh_fpu_hard structure 413*74d99a5eSPaul Mundt * @n: Index to FP register 414*74d99a5eSPaul Mundt */ 415*74d99a5eSPaul Mundt static void 416*74d99a5eSPaul Mundt denormal_to_double (struct sh_fpu_hard_struct *fpu, int n) 417*74d99a5eSPaul Mundt { 418*74d99a5eSPaul Mundt unsigned long du, dl; 419*74d99a5eSPaul Mundt unsigned long x = fpu->fpul; 420*74d99a5eSPaul Mundt int exp = 1023 - 126; 421*74d99a5eSPaul Mundt 422*74d99a5eSPaul Mundt if (x != 0 && (x & 0x7f800000) == 0) { 423*74d99a5eSPaul Mundt du = (x & 0x80000000); 424*74d99a5eSPaul Mundt while ((x & 0x00800000) == 0) { 425*74d99a5eSPaul Mundt x <<= 1; 426*74d99a5eSPaul Mundt exp--; 427*74d99a5eSPaul Mundt } 428*74d99a5eSPaul Mundt x &= 0x007fffff; 429*74d99a5eSPaul Mundt du |= (exp << 20) | (x >> 3); 430*74d99a5eSPaul Mundt dl = x << 29; 431*74d99a5eSPaul Mundt 432*74d99a5eSPaul Mundt fpu->fp_regs[n] = du; 433*74d99a5eSPaul Mundt fpu->fp_regs[n+1] = dl; 434*74d99a5eSPaul Mundt } 435*74d99a5eSPaul Mundt } 436*74d99a5eSPaul Mundt 437*74d99a5eSPaul Mundt /** 438*74d99a5eSPaul Mundt * ieee_fpe_handler - Handle denormalized number exception 439*74d99a5eSPaul Mundt * 440*74d99a5eSPaul Mundt * @regs: Pointer to register structure 441*74d99a5eSPaul Mundt * 442*74d99a5eSPaul Mundt * Returns 1 when it's handled (should not cause exception). 443*74d99a5eSPaul Mundt */ 444*74d99a5eSPaul Mundt static int 445*74d99a5eSPaul Mundt ieee_fpe_handler (struct pt_regs *regs) 446*74d99a5eSPaul Mundt { 447*74d99a5eSPaul Mundt unsigned short insn = *(unsigned short *) regs->pc; 448*74d99a5eSPaul Mundt unsigned short finsn; 449*74d99a5eSPaul Mundt unsigned long nextpc; 450*74d99a5eSPaul Mundt int nib[4] = { 451*74d99a5eSPaul Mundt (insn >> 12) & 0xf, 452*74d99a5eSPaul Mundt (insn >> 8) & 0xf, 453*74d99a5eSPaul Mundt (insn >> 4) & 0xf, 454*74d99a5eSPaul Mundt insn & 0xf}; 455*74d99a5eSPaul Mundt 456*74d99a5eSPaul Mundt if (nib[0] == 0xb || 457*74d99a5eSPaul Mundt (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ 458*74d99a5eSPaul Mundt regs->pr = regs->pc + 4; 459*74d99a5eSPaul Mundt if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ 460*74d99a5eSPaul Mundt nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); 461*74d99a5eSPaul Mundt finsn = *(unsigned short *) (regs->pc + 2); 462*74d99a5eSPaul Mundt } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */ 463*74d99a5eSPaul Mundt if (regs->sr & 1) 464*74d99a5eSPaul Mundt nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); 465*74d99a5eSPaul Mundt else 466*74d99a5eSPaul Mundt nextpc = regs->pc + 4; 467*74d99a5eSPaul Mundt finsn = *(unsigned short *) (regs->pc + 2); 468*74d99a5eSPaul Mundt } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */ 469*74d99a5eSPaul Mundt if (regs->sr & 1) 470*74d99a5eSPaul Mundt nextpc = regs->pc + 4; 471*74d99a5eSPaul Mundt else 472*74d99a5eSPaul Mundt nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); 473*74d99a5eSPaul Mundt finsn = *(unsigned short *) (regs->pc + 2); 474*74d99a5eSPaul Mundt } else if (nib[0] == 0x4 && nib[3] == 0xb && 475*74d99a5eSPaul Mundt (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */ 476*74d99a5eSPaul Mundt nextpc = regs->regs[nib[1]]; 477*74d99a5eSPaul Mundt finsn = *(unsigned short *) (regs->pc + 2); 478*74d99a5eSPaul Mundt } else if (nib[0] == 0x0 && nib[3] == 0x3 && 479*74d99a5eSPaul Mundt (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */ 480*74d99a5eSPaul Mundt nextpc = regs->pc + 4 + regs->regs[nib[1]]; 481*74d99a5eSPaul Mundt finsn = *(unsigned short *) (regs->pc + 2); 482*74d99a5eSPaul Mundt } else if (insn == 0x000b) { /* rts */ 483*74d99a5eSPaul Mundt nextpc = regs->pr; 484*74d99a5eSPaul Mundt finsn = *(unsigned short *) (regs->pc + 2); 485*74d99a5eSPaul Mundt } else { 486*74d99a5eSPaul Mundt nextpc = regs->pc + 2; 487*74d99a5eSPaul Mundt finsn = insn; 488*74d99a5eSPaul Mundt } 489*74d99a5eSPaul Mundt 490*74d99a5eSPaul Mundt #define FPSCR_FPU_ERROR (1 << 17) 491*74d99a5eSPaul Mundt 492*74d99a5eSPaul Mundt if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ 493*74d99a5eSPaul Mundt struct task_struct *tsk = current; 494*74d99a5eSPaul Mundt 495*74d99a5eSPaul Mundt if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) { 496*74d99a5eSPaul Mundt /* FPU error */ 497*74d99a5eSPaul Mundt denormal_to_double (&tsk->thread.fpu.hard, 498*74d99a5eSPaul Mundt (finsn >> 8) & 0xf); 499*74d99a5eSPaul Mundt } else 500*74d99a5eSPaul Mundt return 0; 501*74d99a5eSPaul Mundt 502*74d99a5eSPaul Mundt regs->pc = nextpc; 503*74d99a5eSPaul Mundt return 1; 504*74d99a5eSPaul Mundt } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */ 505*74d99a5eSPaul Mundt struct task_struct *tsk = current; 506*74d99a5eSPaul Mundt int fpscr; 507*74d99a5eSPaul Mundt int n, m, prec; 508*74d99a5eSPaul Mundt unsigned int hx, hy; 509*74d99a5eSPaul Mundt 510*74d99a5eSPaul Mundt n = (finsn >> 8) & 0xf; 511*74d99a5eSPaul Mundt m = (finsn >> 4) & 0xf; 512*74d99a5eSPaul Mundt hx = tsk->thread.fpu.hard.fp_regs[n]; 513*74d99a5eSPaul Mundt hy = tsk->thread.fpu.hard.fp_regs[m]; 514*74d99a5eSPaul Mundt fpscr = tsk->thread.fpu.hard.fpscr; 515*74d99a5eSPaul Mundt prec = fpscr & (1 << 19); 516*74d99a5eSPaul Mundt 517*74d99a5eSPaul Mundt if ((fpscr & FPSCR_FPU_ERROR) 518*74d99a5eSPaul Mundt && (prec && ((hx & 0x7fffffff) < 0x00100000 519*74d99a5eSPaul Mundt || (hy & 0x7fffffff) < 0x00100000))) { 520*74d99a5eSPaul Mundt long long llx, lly; 521*74d99a5eSPaul Mundt 522*74d99a5eSPaul Mundt /* FPU error because of denormal */ 523*74d99a5eSPaul Mundt llx = ((long long) hx << 32) 524*74d99a5eSPaul Mundt | tsk->thread.fpu.hard.fp_regs[n+1]; 525*74d99a5eSPaul Mundt lly = ((long long) hy << 32) 526*74d99a5eSPaul Mundt | tsk->thread.fpu.hard.fp_regs[m+1]; 527*74d99a5eSPaul Mundt if ((hx & 0x7fffffff) >= 0x00100000) 528*74d99a5eSPaul Mundt llx = denormal_muld(lly, llx); 529*74d99a5eSPaul Mundt else 530*74d99a5eSPaul Mundt llx = denormal_muld(llx, lly); 531*74d99a5eSPaul Mundt tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 532*74d99a5eSPaul Mundt tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff; 533*74d99a5eSPaul Mundt } else if ((fpscr & FPSCR_FPU_ERROR) 534*74d99a5eSPaul Mundt && (!prec && ((hx & 0x7fffffff) < 0x00800000 535*74d99a5eSPaul Mundt || (hy & 0x7fffffff) < 0x00800000))) { 536*74d99a5eSPaul Mundt /* FPU error because of denormal */ 537*74d99a5eSPaul Mundt if ((hx & 0x7fffffff) >= 0x00800000) 538*74d99a5eSPaul Mundt hx = denormal_mulf(hy, hx); 539*74d99a5eSPaul Mundt else 540*74d99a5eSPaul Mundt hx = denormal_mulf(hx, hy); 541*74d99a5eSPaul Mundt tsk->thread.fpu.hard.fp_regs[n] = hx; 542*74d99a5eSPaul Mundt } else 543*74d99a5eSPaul Mundt return 0; 544*74d99a5eSPaul Mundt 545*74d99a5eSPaul Mundt regs->pc = nextpc; 546*74d99a5eSPaul Mundt return 1; 547*74d99a5eSPaul Mundt } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */ 548*74d99a5eSPaul Mundt struct task_struct *tsk = current; 549*74d99a5eSPaul Mundt int fpscr; 550*74d99a5eSPaul Mundt int n, m, prec; 551*74d99a5eSPaul Mundt unsigned int hx, hy; 552*74d99a5eSPaul Mundt 553*74d99a5eSPaul Mundt n = (finsn >> 8) & 0xf; 554*74d99a5eSPaul Mundt m = (finsn >> 4) & 0xf; 555*74d99a5eSPaul Mundt hx = tsk->thread.fpu.hard.fp_regs[n]; 556*74d99a5eSPaul Mundt hy = tsk->thread.fpu.hard.fp_regs[m]; 557*74d99a5eSPaul Mundt fpscr = tsk->thread.fpu.hard.fpscr; 558*74d99a5eSPaul Mundt prec = fpscr & (1 << 19); 559*74d99a5eSPaul Mundt 560*74d99a5eSPaul Mundt if ((fpscr & FPSCR_FPU_ERROR) 561*74d99a5eSPaul Mundt && (prec && ((hx & 0x7fffffff) < 0x00100000 562*74d99a5eSPaul Mundt || (hy & 0x7fffffff) < 0x00100000))) { 563*74d99a5eSPaul Mundt long long llx, lly; 564*74d99a5eSPaul Mundt 565*74d99a5eSPaul Mundt /* FPU error because of denormal */ 566*74d99a5eSPaul Mundt llx = ((long long) hx << 32) 567*74d99a5eSPaul Mundt | tsk->thread.fpu.hard.fp_regs[n+1]; 568*74d99a5eSPaul Mundt lly = ((long long) hy << 32) 569*74d99a5eSPaul Mundt | tsk->thread.fpu.hard.fp_regs[m+1]; 570*74d99a5eSPaul Mundt if ((finsn & 0xf00f) == 0xf000) 571*74d99a5eSPaul Mundt llx = denormal_addd(llx, lly); 572*74d99a5eSPaul Mundt else 573*74d99a5eSPaul Mundt llx = denormal_addd(llx, lly ^ (1LL << 63)); 574*74d99a5eSPaul Mundt tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 575*74d99a5eSPaul Mundt tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff; 576*74d99a5eSPaul Mundt } else if ((fpscr & FPSCR_FPU_ERROR) 577*74d99a5eSPaul Mundt && (!prec && ((hx & 0x7fffffff) < 0x00800000 578*74d99a5eSPaul Mundt || (hy & 0x7fffffff) < 0x00800000))) { 579*74d99a5eSPaul Mundt /* FPU error because of denormal */ 580*74d99a5eSPaul Mundt if ((finsn & 0xf00f) == 0xf000) 581*74d99a5eSPaul Mundt hx = denormal_addf(hx, hy); 582*74d99a5eSPaul Mundt else 583*74d99a5eSPaul Mundt hx = denormal_addf(hx, hy ^ 0x80000000); 584*74d99a5eSPaul Mundt tsk->thread.fpu.hard.fp_regs[n] = hx; 585*74d99a5eSPaul Mundt } else 586*74d99a5eSPaul Mundt return 0; 587*74d99a5eSPaul Mundt 588*74d99a5eSPaul Mundt regs->pc = nextpc; 589*74d99a5eSPaul Mundt return 1; 590*74d99a5eSPaul Mundt } 591*74d99a5eSPaul Mundt 592*74d99a5eSPaul Mundt return 0; 593*74d99a5eSPaul Mundt } 594*74d99a5eSPaul Mundt 595*74d99a5eSPaul Mundt BUILD_TRAP_HANDLER(fpu_error) 596*74d99a5eSPaul Mundt { 597*74d99a5eSPaul Mundt struct task_struct *tsk = current; 598*74d99a5eSPaul Mundt TRAP_HANDLER_DECL; 599*74d99a5eSPaul Mundt 600*74d99a5eSPaul Mundt save_fpu(tsk, regs); 601*74d99a5eSPaul Mundt if (ieee_fpe_handler(regs)) { 602*74d99a5eSPaul Mundt tsk->thread.fpu.hard.fpscr &= 603*74d99a5eSPaul Mundt ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); 604*74d99a5eSPaul Mundt grab_fpu(regs); 605*74d99a5eSPaul Mundt restore_fpu(tsk); 606*74d99a5eSPaul Mundt set_tsk_thread_flag(tsk, TIF_USEDFPU); 607*74d99a5eSPaul Mundt return; 608*74d99a5eSPaul Mundt } 609*74d99a5eSPaul Mundt 610*74d99a5eSPaul Mundt force_sig(SIGFPE, tsk); 611*74d99a5eSPaul Mundt } 612*74d99a5eSPaul Mundt 613*74d99a5eSPaul Mundt BUILD_TRAP_HANDLER(fpu_state_restore) 614*74d99a5eSPaul Mundt { 615*74d99a5eSPaul Mundt struct task_struct *tsk = current; 616*74d99a5eSPaul Mundt TRAP_HANDLER_DECL; 617*74d99a5eSPaul Mundt 618*74d99a5eSPaul Mundt grab_fpu(regs); 619*74d99a5eSPaul Mundt if (!user_mode(regs)) { 620*74d99a5eSPaul Mundt printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); 621*74d99a5eSPaul Mundt return; 622*74d99a5eSPaul Mundt } 623*74d99a5eSPaul Mundt 624*74d99a5eSPaul Mundt if (used_math()) { 625*74d99a5eSPaul Mundt /* Using the FPU again. */ 626*74d99a5eSPaul Mundt restore_fpu(tsk); 627*74d99a5eSPaul Mundt } else { 628*74d99a5eSPaul Mundt /* First time FPU user. */ 629*74d99a5eSPaul Mundt fpu_init(); 630*74d99a5eSPaul Mundt set_used_math(); 631*74d99a5eSPaul Mundt } 632*74d99a5eSPaul Mundt set_tsk_thread_flag(tsk, TIF_USEDFPU); 633*74d99a5eSPaul Mundt } 634