xref: /openbmc/linux/arch/sh/kernel/cpu/sh2a/fpu.c (revision 9731e287e08b804592191d8bffaad023154af2aa)
174d99a5eSPaul Mundt /*
274d99a5eSPaul Mundt  * Save/restore floating point context for signal handlers.
374d99a5eSPaul Mundt  *
474d99a5eSPaul Mundt  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
574d99a5eSPaul Mundt  *
674d99a5eSPaul Mundt  * This file is subject to the terms and conditions of the GNU General Public
774d99a5eSPaul Mundt  * License.  See the file "COPYING" in the main directory of this archive
874d99a5eSPaul Mundt  * for more details.
974d99a5eSPaul Mundt  *
1074d99a5eSPaul Mundt  * FIXME! These routines can be optimized in big endian case.
1174d99a5eSPaul Mundt  */
1274d99a5eSPaul Mundt #include <linux/sched.h>
1374d99a5eSPaul Mundt #include <linux/signal.h>
1474d99a5eSPaul Mundt #include <asm/processor.h>
1574d99a5eSPaul Mundt #include <asm/io.h>
169bbafce2SPaul Mundt #include <asm/fpu.h>
1774d99a5eSPaul Mundt 
1874d99a5eSPaul Mundt /* The PR (precision) bit in the FP Status Register must be clear when
1974d99a5eSPaul Mundt  * an frchg instruction is executed, otherwise the instruction is undefined.
2074d99a5eSPaul Mundt  * Executing frchg with PR set causes a trap on some SH4 implementations.
2174d99a5eSPaul Mundt  */
2274d99a5eSPaul Mundt 
2374d99a5eSPaul Mundt #define FPSCR_RCHG 0x00000000
2474d99a5eSPaul Mundt 
2574d99a5eSPaul Mundt 
2674d99a5eSPaul Mundt /*
2774d99a5eSPaul Mundt  * Save FPU registers onto task structure.
2874d99a5eSPaul Mundt  * Assume called with FPU enabled (SR.FD=0).
2974d99a5eSPaul Mundt  */
3074d99a5eSPaul Mundt void
3174d99a5eSPaul Mundt save_fpu(struct task_struct *tsk, struct pt_regs *regs)
3274d99a5eSPaul Mundt {
3374d99a5eSPaul Mundt 	unsigned long dummy;
3474d99a5eSPaul Mundt 
3574d99a5eSPaul Mundt 	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
3674d99a5eSPaul Mundt 	enable_fpu();
3774d99a5eSPaul Mundt 	asm volatile("sts.l	fpul, @-%0\n\t"
3874d99a5eSPaul Mundt 		     "sts.l	fpscr, @-%0\n\t"
3974d99a5eSPaul Mundt 		     "fmov.s	fr15, @-%0\n\t"
4074d99a5eSPaul Mundt 		     "fmov.s	fr14, @-%0\n\t"
4174d99a5eSPaul Mundt 		     "fmov.s	fr13, @-%0\n\t"
4274d99a5eSPaul Mundt 		     "fmov.s	fr12, @-%0\n\t"
4374d99a5eSPaul Mundt 		     "fmov.s	fr11, @-%0\n\t"
4474d99a5eSPaul Mundt 		     "fmov.s	fr10, @-%0\n\t"
4574d99a5eSPaul Mundt 		     "fmov.s	fr9, @-%0\n\t"
4674d99a5eSPaul Mundt 		     "fmov.s	fr8, @-%0\n\t"
4774d99a5eSPaul Mundt 		     "fmov.s	fr7, @-%0\n\t"
4874d99a5eSPaul Mundt 		     "fmov.s	fr6, @-%0\n\t"
4974d99a5eSPaul Mundt 		     "fmov.s	fr5, @-%0\n\t"
5074d99a5eSPaul Mundt 		     "fmov.s	fr4, @-%0\n\t"
5174d99a5eSPaul Mundt 		     "fmov.s	fr3, @-%0\n\t"
5274d99a5eSPaul Mundt 		     "fmov.s	fr2, @-%0\n\t"
5374d99a5eSPaul Mundt 		     "fmov.s	fr1, @-%0\n\t"
5474d99a5eSPaul Mundt 		     "fmov.s	fr0, @-%0\n\t"
5574d99a5eSPaul Mundt 		     "lds	%3, fpscr\n\t"
5674d99a5eSPaul Mundt 		     : "=r" (dummy)
5774d99a5eSPaul Mundt 		     : "0" ((char *)(&tsk->thread.fpu.hard.status)),
5874d99a5eSPaul Mundt 		       "r" (FPSCR_RCHG),
5974d99a5eSPaul Mundt 		       "r" (FPSCR_INIT)
6074d99a5eSPaul Mundt 		     : "memory");
6174d99a5eSPaul Mundt 
6274d99a5eSPaul Mundt 	disable_fpu();
6374d99a5eSPaul Mundt 	release_fpu(regs);
6474d99a5eSPaul Mundt }
6574d99a5eSPaul Mundt 
6674d99a5eSPaul Mundt static void
6774d99a5eSPaul Mundt restore_fpu(struct task_struct *tsk)
6874d99a5eSPaul Mundt {
6974d99a5eSPaul Mundt 	unsigned long dummy;
7074d99a5eSPaul Mundt 
7174d99a5eSPaul Mundt 	enable_fpu();
7274d99a5eSPaul Mundt 	asm volatile("fmov.s	@%0+, fr0\n\t"
7374d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr1\n\t"
7474d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr2\n\t"
7574d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr3\n\t"
7674d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr4\n\t"
7774d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr5\n\t"
7874d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr6\n\t"
7974d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr7\n\t"
8074d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr8\n\t"
8174d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr9\n\t"
8274d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr10\n\t"
8374d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr11\n\t"
8474d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr12\n\t"
8574d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr13\n\t"
8674d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr14\n\t"
8774d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr15\n\t"
8874d99a5eSPaul Mundt 		     "lds.l	@%0+, fpscr\n\t"
8974d99a5eSPaul Mundt 		     "lds.l	@%0+, fpul\n\t"
9074d99a5eSPaul Mundt 		     : "=r" (dummy)
9174d99a5eSPaul Mundt 		     : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
9274d99a5eSPaul Mundt 		     : "memory");
9374d99a5eSPaul Mundt 	disable_fpu();
9474d99a5eSPaul Mundt }
9574d99a5eSPaul Mundt 
9674d99a5eSPaul Mundt /*
9774d99a5eSPaul Mundt  * Load the FPU with signalling NANS.  This bit pattern we're using
9874d99a5eSPaul Mundt  * has the property that no matter wether considered as single or as
9974d99a5eSPaul Mundt  * double precission represents signaling NANS.
10074d99a5eSPaul Mundt  */
10174d99a5eSPaul Mundt 
10274d99a5eSPaul Mundt static void
10374d99a5eSPaul Mundt fpu_init(void)
10474d99a5eSPaul Mundt {
10574d99a5eSPaul Mundt 	enable_fpu();
10674d99a5eSPaul Mundt 	asm volatile("lds	%0, fpul\n\t"
10774d99a5eSPaul Mundt 		     "fsts	fpul, fr0\n\t"
10874d99a5eSPaul Mundt 		     "fsts	fpul, fr1\n\t"
10974d99a5eSPaul Mundt 		     "fsts	fpul, fr2\n\t"
11074d99a5eSPaul Mundt 		     "fsts	fpul, fr3\n\t"
11174d99a5eSPaul Mundt 		     "fsts	fpul, fr4\n\t"
11274d99a5eSPaul Mundt 		     "fsts	fpul, fr5\n\t"
11374d99a5eSPaul Mundt 		     "fsts	fpul, fr6\n\t"
11474d99a5eSPaul Mundt 		     "fsts	fpul, fr7\n\t"
11574d99a5eSPaul Mundt 		     "fsts	fpul, fr8\n\t"
11674d99a5eSPaul Mundt 		     "fsts	fpul, fr9\n\t"
11774d99a5eSPaul Mundt 		     "fsts	fpul, fr10\n\t"
11874d99a5eSPaul Mundt 		     "fsts	fpul, fr11\n\t"
11974d99a5eSPaul Mundt 		     "fsts	fpul, fr12\n\t"
12074d99a5eSPaul Mundt 		     "fsts	fpul, fr13\n\t"
12174d99a5eSPaul Mundt 		     "fsts	fpul, fr14\n\t"
12274d99a5eSPaul Mundt 		     "fsts	fpul, fr15\n\t"
12374d99a5eSPaul Mundt 		     "lds	%2, fpscr\n\t"
12474d99a5eSPaul Mundt 		     : /* no output */
12574d99a5eSPaul Mundt 		     : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
12674d99a5eSPaul Mundt 	disable_fpu();
12774d99a5eSPaul Mundt }
12874d99a5eSPaul Mundt 
12974d99a5eSPaul Mundt /*
13074d99a5eSPaul Mundt  *	Emulate arithmetic ops on denormalized number for some FPU insns.
13174d99a5eSPaul Mundt  */
13274d99a5eSPaul Mundt 
13374d99a5eSPaul Mundt /* denormalized float * float */
13474d99a5eSPaul Mundt static int denormal_mulf(int hx, int hy)
13574d99a5eSPaul Mundt {
13674d99a5eSPaul Mundt 	unsigned int ix, iy;
13774d99a5eSPaul Mundt 	unsigned long long m, n;
13874d99a5eSPaul Mundt 	int exp, w;
13974d99a5eSPaul Mundt 
14074d99a5eSPaul Mundt 	ix = hx & 0x7fffffff;
14174d99a5eSPaul Mundt 	iy = hy & 0x7fffffff;
14274d99a5eSPaul Mundt 	if (iy < 0x00800000 || ix == 0)
14374d99a5eSPaul Mundt 		return ((hx ^ hy) & 0x80000000);
14474d99a5eSPaul Mundt 
14574d99a5eSPaul Mundt 	exp = (iy & 0x7f800000) >> 23;
14674d99a5eSPaul Mundt 	ix &= 0x007fffff;
14774d99a5eSPaul Mundt 	iy = (iy & 0x007fffff) | 0x00800000;
14874d99a5eSPaul Mundt 	m = (unsigned long long)ix * iy;
14974d99a5eSPaul Mundt 	n = m;
15074d99a5eSPaul Mundt 	w = -1;
15174d99a5eSPaul Mundt 	while (n) { n >>= 1; w++; }
15274d99a5eSPaul Mundt 
15374d99a5eSPaul Mundt 	/* FIXME: use guard bits */
15474d99a5eSPaul Mundt 	exp += w - 126 - 46;
15574d99a5eSPaul Mundt 	if (exp > 0)
15674d99a5eSPaul Mundt 		ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
15774d99a5eSPaul Mundt 	else if (exp + 22 >= 0)
15874d99a5eSPaul Mundt 		ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
15974d99a5eSPaul Mundt 	else
16074d99a5eSPaul Mundt 		ix = 0;
16174d99a5eSPaul Mundt 
16274d99a5eSPaul Mundt 	ix |= (hx ^ hy) & 0x80000000;
16374d99a5eSPaul Mundt 	return ix;
16474d99a5eSPaul Mundt }
16574d99a5eSPaul Mundt 
16674d99a5eSPaul Mundt /* denormalized double * double */
16774d99a5eSPaul Mundt static void mult64(unsigned long long x, unsigned long long y,
16874d99a5eSPaul Mundt 		unsigned long long *highp, unsigned long long *lowp)
16974d99a5eSPaul Mundt {
17074d99a5eSPaul Mundt 	unsigned long long sub0, sub1, sub2, sub3;
17174d99a5eSPaul Mundt 	unsigned long long high, low;
17274d99a5eSPaul Mundt 
17374d99a5eSPaul Mundt 	sub0 = (x >> 32) * (unsigned long) (y >> 32);
17474d99a5eSPaul Mundt 	sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
17574d99a5eSPaul Mundt 	sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
17674d99a5eSPaul Mundt 	sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
17774d99a5eSPaul Mundt 	low = sub3;
17874d99a5eSPaul Mundt 	high = 0LL;
17974d99a5eSPaul Mundt 	sub3 += (sub1 << 32);
18074d99a5eSPaul Mundt 	if (low > sub3)
18174d99a5eSPaul Mundt 		high++;
18274d99a5eSPaul Mundt 	low = sub3;
18374d99a5eSPaul Mundt 	sub3 += (sub2 << 32);
18474d99a5eSPaul Mundt 	if (low > sub3)
18574d99a5eSPaul Mundt 		high++;
18674d99a5eSPaul Mundt 	low = sub3;
18774d99a5eSPaul Mundt 	high += (sub1 >> 32) + (sub2 >> 32);
18874d99a5eSPaul Mundt 	high += sub0;
18974d99a5eSPaul Mundt 	*lowp = low;
19074d99a5eSPaul Mundt 	*highp = high;
19174d99a5eSPaul Mundt }
19274d99a5eSPaul Mundt 
19374d99a5eSPaul Mundt static inline long long rshift64(unsigned long long mh,
19474d99a5eSPaul Mundt 		unsigned long long ml, int n)
19574d99a5eSPaul Mundt {
19674d99a5eSPaul Mundt 	if (n >= 64)
19774d99a5eSPaul Mundt 		return mh >> (n - 64);
19874d99a5eSPaul Mundt 	return (mh << (64 - n)) | (ml >> n);
19974d99a5eSPaul Mundt }
20074d99a5eSPaul Mundt 
20174d99a5eSPaul Mundt static long long denormal_muld(long long hx, long long hy)
20274d99a5eSPaul Mundt {
20374d99a5eSPaul Mundt 	unsigned long long ix, iy;
20474d99a5eSPaul Mundt 	unsigned long long mh, ml, nh, nl;
20574d99a5eSPaul Mundt 	int exp, w;
20674d99a5eSPaul Mundt 
20774d99a5eSPaul Mundt 	ix = hx & 0x7fffffffffffffffLL;
20874d99a5eSPaul Mundt 	iy = hy & 0x7fffffffffffffffLL;
20974d99a5eSPaul Mundt 	if (iy < 0x0010000000000000LL || ix == 0)
21074d99a5eSPaul Mundt 		return ((hx ^ hy) & 0x8000000000000000LL);
21174d99a5eSPaul Mundt 
21274d99a5eSPaul Mundt 	exp = (iy & 0x7ff0000000000000LL) >> 52;
21374d99a5eSPaul Mundt 	ix &= 0x000fffffffffffffLL;
21474d99a5eSPaul Mundt 	iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
21574d99a5eSPaul Mundt 	mult64(ix, iy, &mh, &ml);
21674d99a5eSPaul Mundt 	nh = mh;
21774d99a5eSPaul Mundt 	nl = ml;
21874d99a5eSPaul Mundt 	w = -1;
21974d99a5eSPaul Mundt 	if (nh) {
22074d99a5eSPaul Mundt 		while (nh) { nh >>= 1; w++;}
22174d99a5eSPaul Mundt 		w += 64;
22274d99a5eSPaul Mundt 	} else
22374d99a5eSPaul Mundt 		while (nl) { nl >>= 1; w++;}
22474d99a5eSPaul Mundt 
22574d99a5eSPaul Mundt 	/* FIXME: use guard bits */
22674d99a5eSPaul Mundt 	exp += w - 1022 - 52 * 2;
22774d99a5eSPaul Mundt 	if (exp > 0)
22874d99a5eSPaul Mundt 		ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
22974d99a5eSPaul Mundt 			| ((long long)exp << 52);
23074d99a5eSPaul Mundt 	else if (exp + 51 >= 0)
23174d99a5eSPaul Mundt 		ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
23274d99a5eSPaul Mundt 	else
23374d99a5eSPaul Mundt 		ix = 0;
23474d99a5eSPaul Mundt 
23574d99a5eSPaul Mundt 	ix |= (hx ^ hy) & 0x8000000000000000LL;
23674d99a5eSPaul Mundt 	return ix;
23774d99a5eSPaul Mundt }
23874d99a5eSPaul Mundt 
23974d99a5eSPaul Mundt /* ix - iy where iy: denormal and ix, iy >= 0 */
24074d99a5eSPaul Mundt static int denormal_subf1(unsigned int ix, unsigned int iy)
24174d99a5eSPaul Mundt {
24274d99a5eSPaul Mundt 	int frac;
24374d99a5eSPaul Mundt 	int exp;
24474d99a5eSPaul Mundt 
24574d99a5eSPaul Mundt 	if (ix < 0x00800000)
24674d99a5eSPaul Mundt 		return ix - iy;
24774d99a5eSPaul Mundt 
24874d99a5eSPaul Mundt 	exp = (ix & 0x7f800000) >> 23;
24974d99a5eSPaul Mundt 	if (exp - 1 > 31)
25074d99a5eSPaul Mundt 		return ix;
25174d99a5eSPaul Mundt 	iy >>= exp - 1;
25274d99a5eSPaul Mundt 	if (iy == 0)
25374d99a5eSPaul Mundt 		return ix;
25474d99a5eSPaul Mundt 
25574d99a5eSPaul Mundt 	frac = (ix & 0x007fffff) | 0x00800000;
25674d99a5eSPaul Mundt 	frac -= iy;
25774d99a5eSPaul Mundt 	while (frac < 0x00800000) {
25874d99a5eSPaul Mundt 		if (--exp == 0)
25974d99a5eSPaul Mundt 			return frac;
26074d99a5eSPaul Mundt 		frac <<= 1;
26174d99a5eSPaul Mundt 	}
26274d99a5eSPaul Mundt 
26374d99a5eSPaul Mundt 	return (exp << 23) | (frac & 0x007fffff);
26474d99a5eSPaul Mundt }
26574d99a5eSPaul Mundt 
26674d99a5eSPaul Mundt /* ix + iy where iy: denormal and ix, iy >= 0 */
26774d99a5eSPaul Mundt static int denormal_addf1(unsigned int ix, unsigned int iy)
26874d99a5eSPaul Mundt {
26974d99a5eSPaul Mundt 	int frac;
27074d99a5eSPaul Mundt 	int exp;
27174d99a5eSPaul Mundt 
27274d99a5eSPaul Mundt 	if (ix < 0x00800000)
27374d99a5eSPaul Mundt 		return ix + iy;
27474d99a5eSPaul Mundt 
27574d99a5eSPaul Mundt 	exp = (ix & 0x7f800000) >> 23;
27674d99a5eSPaul Mundt 	if (exp - 1 > 31)
27774d99a5eSPaul Mundt 		return ix;
27874d99a5eSPaul Mundt 	iy >>= exp - 1;
27974d99a5eSPaul Mundt 	if (iy == 0)
28074d99a5eSPaul Mundt 	  return ix;
28174d99a5eSPaul Mundt 
28274d99a5eSPaul Mundt 	frac = (ix & 0x007fffff) | 0x00800000;
28374d99a5eSPaul Mundt 	frac += iy;
28474d99a5eSPaul Mundt 	if (frac >= 0x01000000) {
28574d99a5eSPaul Mundt 		frac >>= 1;
28674d99a5eSPaul Mundt 		++exp;
28774d99a5eSPaul Mundt 	}
28874d99a5eSPaul Mundt 
28974d99a5eSPaul Mundt 	return (exp << 23) | (frac & 0x007fffff);
29074d99a5eSPaul Mundt }
29174d99a5eSPaul Mundt 
29274d99a5eSPaul Mundt static int denormal_addf(int hx, int hy)
29374d99a5eSPaul Mundt {
29474d99a5eSPaul Mundt 	unsigned int ix, iy;
29574d99a5eSPaul Mundt 	int sign;
29674d99a5eSPaul Mundt 
29774d99a5eSPaul Mundt 	if ((hx ^ hy) & 0x80000000) {
29874d99a5eSPaul Mundt 		sign = hx & 0x80000000;
29974d99a5eSPaul Mundt 		ix = hx & 0x7fffffff;
30074d99a5eSPaul Mundt 		iy = hy & 0x7fffffff;
30174d99a5eSPaul Mundt 		if (iy < 0x00800000) {
30274d99a5eSPaul Mundt 			ix = denormal_subf1(ix, iy);
303*9731e287SRoel Kluin 			if ((int) ix < 0) {
30474d99a5eSPaul Mundt 				ix = -ix;
30574d99a5eSPaul Mundt 				sign ^= 0x80000000;
30674d99a5eSPaul Mundt 			}
30774d99a5eSPaul Mundt 		} else {
30874d99a5eSPaul Mundt 			ix = denormal_subf1(iy, ix);
30974d99a5eSPaul Mundt 			sign ^= 0x80000000;
31074d99a5eSPaul Mundt 		}
31174d99a5eSPaul Mundt 	} else {
31274d99a5eSPaul Mundt 		sign = hx & 0x80000000;
31374d99a5eSPaul Mundt 		ix = hx & 0x7fffffff;
31474d99a5eSPaul Mundt 		iy = hy & 0x7fffffff;
31574d99a5eSPaul Mundt 		if (iy < 0x00800000)
31674d99a5eSPaul Mundt 			ix = denormal_addf1(ix, iy);
31774d99a5eSPaul Mundt 		else
31874d99a5eSPaul Mundt 			ix = denormal_addf1(iy, ix);
31974d99a5eSPaul Mundt 	}
32074d99a5eSPaul Mundt 
32174d99a5eSPaul Mundt 	return sign | ix;
32274d99a5eSPaul Mundt }
32374d99a5eSPaul Mundt 
32474d99a5eSPaul Mundt /* ix - iy where iy: denormal and ix, iy >= 0 */
32574d99a5eSPaul Mundt static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
32674d99a5eSPaul Mundt {
32774d99a5eSPaul Mundt 	long long frac;
32874d99a5eSPaul Mundt 	int exp;
32974d99a5eSPaul Mundt 
33074d99a5eSPaul Mundt 	if (ix < 0x0010000000000000LL)
33174d99a5eSPaul Mundt 		return ix - iy;
33274d99a5eSPaul Mundt 
33374d99a5eSPaul Mundt 	exp = (ix & 0x7ff0000000000000LL) >> 52;
33474d99a5eSPaul Mundt 	if (exp - 1 > 63)
33574d99a5eSPaul Mundt 		return ix;
33674d99a5eSPaul Mundt 	iy >>= exp - 1;
33774d99a5eSPaul Mundt 	if (iy == 0)
33874d99a5eSPaul Mundt 		return ix;
33974d99a5eSPaul Mundt 
34074d99a5eSPaul Mundt 	frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
34174d99a5eSPaul Mundt 	frac -= iy;
34274d99a5eSPaul Mundt 	while (frac < 0x0010000000000000LL) {
34374d99a5eSPaul Mundt 		if (--exp == 0)
34474d99a5eSPaul Mundt 			return frac;
34574d99a5eSPaul Mundt 		frac <<= 1;
34674d99a5eSPaul Mundt 	}
34774d99a5eSPaul Mundt 
34874d99a5eSPaul Mundt 	return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
34974d99a5eSPaul Mundt }
35074d99a5eSPaul Mundt 
35174d99a5eSPaul Mundt /* ix + iy where iy: denormal and ix, iy >= 0 */
35274d99a5eSPaul Mundt static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
35374d99a5eSPaul Mundt {
35474d99a5eSPaul Mundt 	long long frac;
35574d99a5eSPaul Mundt 	long long exp;
35674d99a5eSPaul Mundt 
35774d99a5eSPaul Mundt 	if (ix < 0x0010000000000000LL)
35874d99a5eSPaul Mundt 		return ix + iy;
35974d99a5eSPaul Mundt 
36074d99a5eSPaul Mundt 	exp = (ix & 0x7ff0000000000000LL) >> 52;
36174d99a5eSPaul Mundt 	if (exp - 1 > 63)
36274d99a5eSPaul Mundt 		return ix;
36374d99a5eSPaul Mundt 	iy >>= exp - 1;
36474d99a5eSPaul Mundt 	if (iy == 0)
36574d99a5eSPaul Mundt 	  return ix;
36674d99a5eSPaul Mundt 
36774d99a5eSPaul Mundt 	frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
36874d99a5eSPaul Mundt 	frac += iy;
36974d99a5eSPaul Mundt 	if (frac >= 0x0020000000000000LL) {
37074d99a5eSPaul Mundt 		frac >>= 1;
37174d99a5eSPaul Mundt 		++exp;
37274d99a5eSPaul Mundt 	}
37374d99a5eSPaul Mundt 
37474d99a5eSPaul Mundt 	return (exp << 52) | (frac & 0x000fffffffffffffLL);
37574d99a5eSPaul Mundt }
37674d99a5eSPaul Mundt 
37774d99a5eSPaul Mundt static long long denormal_addd(long long hx, long long hy)
37874d99a5eSPaul Mundt {
37974d99a5eSPaul Mundt 	unsigned long long ix, iy;
38074d99a5eSPaul Mundt 	long long sign;
38174d99a5eSPaul Mundt 
38274d99a5eSPaul Mundt 	if ((hx ^ hy) & 0x8000000000000000LL) {
38374d99a5eSPaul Mundt 		sign = hx & 0x8000000000000000LL;
38474d99a5eSPaul Mundt 		ix = hx & 0x7fffffffffffffffLL;
38574d99a5eSPaul Mundt 		iy = hy & 0x7fffffffffffffffLL;
38674d99a5eSPaul Mundt 		if (iy < 0x0010000000000000LL) {
38774d99a5eSPaul Mundt 			ix = denormal_subd1(ix, iy);
388*9731e287SRoel Kluin 			if ((int) ix < 0) {
38974d99a5eSPaul Mundt 				ix = -ix;
39074d99a5eSPaul Mundt 				sign ^= 0x8000000000000000LL;
39174d99a5eSPaul Mundt 			}
39274d99a5eSPaul Mundt 		} else {
39374d99a5eSPaul Mundt 			ix = denormal_subd1(iy, ix);
39474d99a5eSPaul Mundt 			sign ^= 0x8000000000000000LL;
39574d99a5eSPaul Mundt 		}
39674d99a5eSPaul Mundt 	} else {
39774d99a5eSPaul Mundt 		sign = hx & 0x8000000000000000LL;
39874d99a5eSPaul Mundt 		ix = hx & 0x7fffffffffffffffLL;
39974d99a5eSPaul Mundt 		iy = hy & 0x7fffffffffffffffLL;
40074d99a5eSPaul Mundt 		if (iy < 0x0010000000000000LL)
40174d99a5eSPaul Mundt 			ix = denormal_addd1(ix, iy);
40274d99a5eSPaul Mundt 		else
40374d99a5eSPaul Mundt 			ix = denormal_addd1(iy, ix);
40474d99a5eSPaul Mundt 	}
40574d99a5eSPaul Mundt 
40674d99a5eSPaul Mundt 	return sign | ix;
40774d99a5eSPaul Mundt }
40874d99a5eSPaul Mundt 
40974d99a5eSPaul Mundt /**
41074d99a5eSPaul Mundt  *	denormal_to_double - Given denormalized float number,
41174d99a5eSPaul Mundt  *	                     store double float
41274d99a5eSPaul Mundt  *
41374d99a5eSPaul Mundt  *	@fpu: Pointer to sh_fpu_hard structure
41474d99a5eSPaul Mundt  *	@n: Index to FP register
41574d99a5eSPaul Mundt  */
41674d99a5eSPaul Mundt static void
41774d99a5eSPaul Mundt denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
41874d99a5eSPaul Mundt {
41974d99a5eSPaul Mundt 	unsigned long du, dl;
42074d99a5eSPaul Mundt 	unsigned long x = fpu->fpul;
42174d99a5eSPaul Mundt 	int exp = 1023 - 126;
42274d99a5eSPaul Mundt 
42374d99a5eSPaul Mundt 	if (x != 0 && (x & 0x7f800000) == 0) {
42474d99a5eSPaul Mundt 		du = (x & 0x80000000);
42574d99a5eSPaul Mundt 		while ((x & 0x00800000) == 0) {
42674d99a5eSPaul Mundt 			x <<= 1;
42774d99a5eSPaul Mundt 			exp--;
42874d99a5eSPaul Mundt 		}
42974d99a5eSPaul Mundt 		x &= 0x007fffff;
43074d99a5eSPaul Mundt 		du |= (exp << 20) | (x >> 3);
43174d99a5eSPaul Mundt 		dl = x << 29;
43274d99a5eSPaul Mundt 
43374d99a5eSPaul Mundt 		fpu->fp_regs[n] = du;
43474d99a5eSPaul Mundt 		fpu->fp_regs[n+1] = dl;
43574d99a5eSPaul Mundt 	}
43674d99a5eSPaul Mundt }
43774d99a5eSPaul Mundt 
43874d99a5eSPaul Mundt /**
43974d99a5eSPaul Mundt  *	ieee_fpe_handler - Handle denormalized number exception
44074d99a5eSPaul Mundt  *
44174d99a5eSPaul Mundt  *	@regs: Pointer to register structure
44274d99a5eSPaul Mundt  *
44374d99a5eSPaul Mundt  *	Returns 1 when it's handled (should not cause exception).
44474d99a5eSPaul Mundt  */
44574d99a5eSPaul Mundt static int
44674d99a5eSPaul Mundt ieee_fpe_handler (struct pt_regs *regs)
44774d99a5eSPaul Mundt {
44874d99a5eSPaul Mundt 	unsigned short insn = *(unsigned short *) regs->pc;
44974d99a5eSPaul Mundt 	unsigned short finsn;
45074d99a5eSPaul Mundt 	unsigned long nextpc;
45174d99a5eSPaul Mundt 	int nib[4] = {
45274d99a5eSPaul Mundt 		(insn >> 12) & 0xf,
45374d99a5eSPaul Mundt 		(insn >> 8) & 0xf,
45474d99a5eSPaul Mundt 		(insn >> 4) & 0xf,
45574d99a5eSPaul Mundt 		insn & 0xf};
45674d99a5eSPaul Mundt 
45774d99a5eSPaul Mundt 	if (nib[0] == 0xb ||
45874d99a5eSPaul Mundt 	    (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
45974d99a5eSPaul Mundt 		regs->pr = regs->pc + 4;
46074d99a5eSPaul Mundt 	if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
46174d99a5eSPaul Mundt 		nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
46274d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
46374d99a5eSPaul Mundt 	} else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
46474d99a5eSPaul Mundt 		if (regs->sr & 1)
46574d99a5eSPaul Mundt 			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
46674d99a5eSPaul Mundt 		else
46774d99a5eSPaul Mundt 			nextpc = regs->pc + 4;
46874d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
46974d99a5eSPaul Mundt 	} else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
47074d99a5eSPaul Mundt 		if (regs->sr & 1)
47174d99a5eSPaul Mundt 			nextpc = regs->pc + 4;
47274d99a5eSPaul Mundt 		else
47374d99a5eSPaul Mundt 			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
47474d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
47574d99a5eSPaul Mundt 	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
47674d99a5eSPaul Mundt 		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
47774d99a5eSPaul Mundt 		nextpc = regs->regs[nib[1]];
47874d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
47974d99a5eSPaul Mundt 	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
48074d99a5eSPaul Mundt 		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
48174d99a5eSPaul Mundt 		nextpc = regs->pc + 4 + regs->regs[nib[1]];
48274d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
48374d99a5eSPaul Mundt 	} else if (insn == 0x000b) { /* rts */
48474d99a5eSPaul Mundt 		nextpc = regs->pr;
48574d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
48674d99a5eSPaul Mundt 	} else {
48774d99a5eSPaul Mundt 		nextpc = regs->pc + 2;
48874d99a5eSPaul Mundt 		finsn = insn;
48974d99a5eSPaul Mundt 	}
49074d99a5eSPaul Mundt 
49174d99a5eSPaul Mundt #define FPSCR_FPU_ERROR (1 << 17)
49274d99a5eSPaul Mundt 
49374d99a5eSPaul Mundt 	if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
49474d99a5eSPaul Mundt 		struct task_struct *tsk = current;
49574d99a5eSPaul Mundt 
49674d99a5eSPaul Mundt 		if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
49774d99a5eSPaul Mundt 			/* FPU error */
49874d99a5eSPaul Mundt 			denormal_to_double (&tsk->thread.fpu.hard,
49974d99a5eSPaul Mundt 					    (finsn >> 8) & 0xf);
50074d99a5eSPaul Mundt 		} else
50174d99a5eSPaul Mundt 			return 0;
50274d99a5eSPaul Mundt 
50374d99a5eSPaul Mundt 		regs->pc = nextpc;
50474d99a5eSPaul Mundt 		return 1;
50574d99a5eSPaul Mundt 	} else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
50674d99a5eSPaul Mundt 		struct task_struct *tsk = current;
50774d99a5eSPaul Mundt 		int fpscr;
50874d99a5eSPaul Mundt 		int n, m, prec;
50974d99a5eSPaul Mundt 		unsigned int hx, hy;
51074d99a5eSPaul Mundt 
51174d99a5eSPaul Mundt 		n = (finsn >> 8) & 0xf;
51274d99a5eSPaul Mundt 		m = (finsn >> 4) & 0xf;
51374d99a5eSPaul Mundt 		hx = tsk->thread.fpu.hard.fp_regs[n];
51474d99a5eSPaul Mundt 		hy = tsk->thread.fpu.hard.fp_regs[m];
51574d99a5eSPaul Mundt 		fpscr = tsk->thread.fpu.hard.fpscr;
51674d99a5eSPaul Mundt 		prec = fpscr & (1 << 19);
51774d99a5eSPaul Mundt 
51874d99a5eSPaul Mundt 		if ((fpscr & FPSCR_FPU_ERROR)
51974d99a5eSPaul Mundt 		     && (prec && ((hx & 0x7fffffff) < 0x00100000
52074d99a5eSPaul Mundt 				   || (hy & 0x7fffffff) < 0x00100000))) {
52174d99a5eSPaul Mundt 			long long llx, lly;
52274d99a5eSPaul Mundt 
52374d99a5eSPaul Mundt 			/* FPU error because of denormal */
52474d99a5eSPaul Mundt 			llx = ((long long) hx << 32)
52574d99a5eSPaul Mundt 			       | tsk->thread.fpu.hard.fp_regs[n+1];
52674d99a5eSPaul Mundt 			lly = ((long long) hy << 32)
52774d99a5eSPaul Mundt 			       | tsk->thread.fpu.hard.fp_regs[m+1];
52874d99a5eSPaul Mundt 			if ((hx & 0x7fffffff) >= 0x00100000)
52974d99a5eSPaul Mundt 				llx = denormal_muld(lly, llx);
53074d99a5eSPaul Mundt 			else
53174d99a5eSPaul Mundt 				llx = denormal_muld(llx, lly);
53274d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
53374d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
53474d99a5eSPaul Mundt 		} else if ((fpscr & FPSCR_FPU_ERROR)
53574d99a5eSPaul Mundt 		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
53674d99a5eSPaul Mundt 				   || (hy & 0x7fffffff) < 0x00800000))) {
53774d99a5eSPaul Mundt 			/* FPU error because of denormal */
53874d99a5eSPaul Mundt 			if ((hx & 0x7fffffff) >= 0x00800000)
53974d99a5eSPaul Mundt 				hx = denormal_mulf(hy, hx);
54074d99a5eSPaul Mundt 			else
54174d99a5eSPaul Mundt 				hx = denormal_mulf(hx, hy);
54274d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n] = hx;
54374d99a5eSPaul Mundt 		} else
54474d99a5eSPaul Mundt 			return 0;
54574d99a5eSPaul Mundt 
54674d99a5eSPaul Mundt 		regs->pc = nextpc;
54774d99a5eSPaul Mundt 		return 1;
54874d99a5eSPaul Mundt 	} else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
54974d99a5eSPaul Mundt 		struct task_struct *tsk = current;
55074d99a5eSPaul Mundt 		int fpscr;
55174d99a5eSPaul Mundt 		int n, m, prec;
55274d99a5eSPaul Mundt 		unsigned int hx, hy;
55374d99a5eSPaul Mundt 
55474d99a5eSPaul Mundt 		n = (finsn >> 8) & 0xf;
55574d99a5eSPaul Mundt 		m = (finsn >> 4) & 0xf;
55674d99a5eSPaul Mundt 		hx = tsk->thread.fpu.hard.fp_regs[n];
55774d99a5eSPaul Mundt 		hy = tsk->thread.fpu.hard.fp_regs[m];
55874d99a5eSPaul Mundt 		fpscr = tsk->thread.fpu.hard.fpscr;
55974d99a5eSPaul Mundt 		prec = fpscr & (1 << 19);
56074d99a5eSPaul Mundt 
56174d99a5eSPaul Mundt 		if ((fpscr & FPSCR_FPU_ERROR)
56274d99a5eSPaul Mundt 		     && (prec && ((hx & 0x7fffffff) < 0x00100000
56374d99a5eSPaul Mundt 				   || (hy & 0x7fffffff) < 0x00100000))) {
56474d99a5eSPaul Mundt 			long long llx, lly;
56574d99a5eSPaul Mundt 
56674d99a5eSPaul Mundt 			/* FPU error because of denormal */
56774d99a5eSPaul Mundt 			llx = ((long long) hx << 32)
56874d99a5eSPaul Mundt 			       | tsk->thread.fpu.hard.fp_regs[n+1];
56974d99a5eSPaul Mundt 			lly = ((long long) hy << 32)
57074d99a5eSPaul Mundt 			       | tsk->thread.fpu.hard.fp_regs[m+1];
57174d99a5eSPaul Mundt 			if ((finsn & 0xf00f) == 0xf000)
57274d99a5eSPaul Mundt 				llx = denormal_addd(llx, lly);
57374d99a5eSPaul Mundt 			else
57474d99a5eSPaul Mundt 				llx = denormal_addd(llx, lly ^ (1LL << 63));
57574d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
57674d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
57774d99a5eSPaul Mundt 		} else if ((fpscr & FPSCR_FPU_ERROR)
57874d99a5eSPaul Mundt 		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
57974d99a5eSPaul Mundt 				   || (hy & 0x7fffffff) < 0x00800000))) {
58074d99a5eSPaul Mundt 			/* FPU error because of denormal */
58174d99a5eSPaul Mundt 			if ((finsn & 0xf00f) == 0xf000)
58274d99a5eSPaul Mundt 				hx = denormal_addf(hx, hy);
58374d99a5eSPaul Mundt 			else
58474d99a5eSPaul Mundt 				hx = denormal_addf(hx, hy ^ 0x80000000);
58574d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n] = hx;
58674d99a5eSPaul Mundt 		} else
58774d99a5eSPaul Mundt 			return 0;
58874d99a5eSPaul Mundt 
58974d99a5eSPaul Mundt 		regs->pc = nextpc;
59074d99a5eSPaul Mundt 		return 1;
59174d99a5eSPaul Mundt 	}
59274d99a5eSPaul Mundt 
59374d99a5eSPaul Mundt 	return 0;
59474d99a5eSPaul Mundt }
59574d99a5eSPaul Mundt 
59674d99a5eSPaul Mundt BUILD_TRAP_HANDLER(fpu_error)
59774d99a5eSPaul Mundt {
59874d99a5eSPaul Mundt 	struct task_struct *tsk = current;
59974d99a5eSPaul Mundt 	TRAP_HANDLER_DECL;
60074d99a5eSPaul Mundt 
60174d99a5eSPaul Mundt 	save_fpu(tsk, regs);
60274d99a5eSPaul Mundt 	if (ieee_fpe_handler(regs)) {
60374d99a5eSPaul Mundt 		tsk->thread.fpu.hard.fpscr &=
60474d99a5eSPaul Mundt 			~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
60574d99a5eSPaul Mundt 		grab_fpu(regs);
60674d99a5eSPaul Mundt 		restore_fpu(tsk);
60774d99a5eSPaul Mundt 		set_tsk_thread_flag(tsk, TIF_USEDFPU);
60874d99a5eSPaul Mundt 		return;
60974d99a5eSPaul Mundt 	}
61074d99a5eSPaul Mundt 
61174d99a5eSPaul Mundt 	force_sig(SIGFPE, tsk);
61274d99a5eSPaul Mundt }
61374d99a5eSPaul Mundt 
61474d99a5eSPaul Mundt BUILD_TRAP_HANDLER(fpu_state_restore)
61574d99a5eSPaul Mundt {
61674d99a5eSPaul Mundt 	struct task_struct *tsk = current;
61774d99a5eSPaul Mundt 	TRAP_HANDLER_DECL;
61874d99a5eSPaul Mundt 
61974d99a5eSPaul Mundt 	grab_fpu(regs);
62074d99a5eSPaul Mundt 	if (!user_mode(regs)) {
62174d99a5eSPaul Mundt 		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
62274d99a5eSPaul Mundt 		return;
62374d99a5eSPaul Mundt 	}
62474d99a5eSPaul Mundt 
62574d99a5eSPaul Mundt 	if (used_math()) {
62674d99a5eSPaul Mundt 		/* Using the FPU again.  */
62774d99a5eSPaul Mundt 		restore_fpu(tsk);
62874d99a5eSPaul Mundt 	} else	{
62974d99a5eSPaul Mundt 		/* First time FPU user.  */
63074d99a5eSPaul Mundt 		fpu_init();
63174d99a5eSPaul Mundt 		set_used_math();
63274d99a5eSPaul Mundt 	}
63374d99a5eSPaul Mundt 	set_tsk_thread_flag(tsk, TIF_USEDFPU);
63474d99a5eSPaul Mundt }
635