xref: /openbmc/linux/arch/sh/kernel/cpu/sh2a/fpu.c (revision f42b3800)
1 /*
2  * Save/restore floating point context for signal handlers.
3  *
4  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
5  *
6  * This file is subject to the terms and conditions of the GNU General Public
7  * License.  See the file "COPYING" in the main directory of this archive
8  * for more details.
9  *
10  * FIXME! These routines can be optimized in big endian case.
11  */
12 #include <linux/sched.h>
13 #include <linux/signal.h>
14 #include <asm/processor.h>
15 #include <asm/io.h>
16 #include <asm/fpu.h>
17 
18 /* The PR (precision) bit in the FP Status Register must be clear when
19  * an frchg instruction is executed, otherwise the instruction is undefined.
20  * Executing frchg with PR set causes a trap on some SH4 implementations.
21  */
22 
23 #define FPSCR_RCHG 0x00000000
24 
25 
26 /*
27  * Save FPU registers onto task structure.
28  * Assume called with FPU enabled (SR.FD=0).
29  */
30 void
31 save_fpu(struct task_struct *tsk, struct pt_regs *regs)
32 {
33 	unsigned long dummy;
34 
35 	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
36 	enable_fpu();
37 	asm volatile("sts.l	fpul, @-%0\n\t"
38 		     "sts.l	fpscr, @-%0\n\t"
39 		     "fmov.s	fr15, @-%0\n\t"
40 		     "fmov.s	fr14, @-%0\n\t"
41 		     "fmov.s	fr13, @-%0\n\t"
42 		     "fmov.s	fr12, @-%0\n\t"
43 		     "fmov.s	fr11, @-%0\n\t"
44 		     "fmov.s	fr10, @-%0\n\t"
45 		     "fmov.s	fr9, @-%0\n\t"
46 		     "fmov.s	fr8, @-%0\n\t"
47 		     "fmov.s	fr7, @-%0\n\t"
48 		     "fmov.s	fr6, @-%0\n\t"
49 		     "fmov.s	fr5, @-%0\n\t"
50 		     "fmov.s	fr4, @-%0\n\t"
51 		     "fmov.s	fr3, @-%0\n\t"
52 		     "fmov.s	fr2, @-%0\n\t"
53 		     "fmov.s	fr1, @-%0\n\t"
54 		     "fmov.s	fr0, @-%0\n\t"
55 		     "lds	%3, fpscr\n\t"
56 		     : "=r" (dummy)
57 		     : "0" ((char *)(&tsk->thread.fpu.hard.status)),
58 		       "r" (FPSCR_RCHG),
59 		       "r" (FPSCR_INIT)
60 		     : "memory");
61 
62 	disable_fpu();
63 	release_fpu(regs);
64 }
65 
66 static void
67 restore_fpu(struct task_struct *tsk)
68 {
69 	unsigned long dummy;
70 
71 	enable_fpu();
72 	asm volatile("fmov.s	@%0+, fr0\n\t"
73 		     "fmov.s	@%0+, fr1\n\t"
74 		     "fmov.s	@%0+, fr2\n\t"
75 		     "fmov.s	@%0+, fr3\n\t"
76 		     "fmov.s	@%0+, fr4\n\t"
77 		     "fmov.s	@%0+, fr5\n\t"
78 		     "fmov.s	@%0+, fr6\n\t"
79 		     "fmov.s	@%0+, fr7\n\t"
80 		     "fmov.s	@%0+, fr8\n\t"
81 		     "fmov.s	@%0+, fr9\n\t"
82 		     "fmov.s	@%0+, fr10\n\t"
83 		     "fmov.s	@%0+, fr11\n\t"
84 		     "fmov.s	@%0+, fr12\n\t"
85 		     "fmov.s	@%0+, fr13\n\t"
86 		     "fmov.s	@%0+, fr14\n\t"
87 		     "fmov.s	@%0+, fr15\n\t"
88 		     "lds.l	@%0+, fpscr\n\t"
89 		     "lds.l	@%0+, fpul\n\t"
90 		     : "=r" (dummy)
91 		     : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
92 		     : "memory");
93 	disable_fpu();
94 }
95 
96 /*
97  * Load the FPU with signalling NANS.  This bit pattern we're using
98  * has the property that no matter wether considered as single or as
99  * double precission represents signaling NANS.
100  */
101 
102 static void
103 fpu_init(void)
104 {
105 	enable_fpu();
106 	asm volatile("lds	%0, fpul\n\t"
107 		     "fsts	fpul, fr0\n\t"
108 		     "fsts	fpul, fr1\n\t"
109 		     "fsts	fpul, fr2\n\t"
110 		     "fsts	fpul, fr3\n\t"
111 		     "fsts	fpul, fr4\n\t"
112 		     "fsts	fpul, fr5\n\t"
113 		     "fsts	fpul, fr6\n\t"
114 		     "fsts	fpul, fr7\n\t"
115 		     "fsts	fpul, fr8\n\t"
116 		     "fsts	fpul, fr9\n\t"
117 		     "fsts	fpul, fr10\n\t"
118 		     "fsts	fpul, fr11\n\t"
119 		     "fsts	fpul, fr12\n\t"
120 		     "fsts	fpul, fr13\n\t"
121 		     "fsts	fpul, fr14\n\t"
122 		     "fsts	fpul, fr15\n\t"
123 		     "lds	%2, fpscr\n\t"
124 		     : /* no output */
125 		     : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
126 	disable_fpu();
127 }
128 
129 /*
130  *	Emulate arithmetic ops on denormalized number for some FPU insns.
131  */
132 
133 /* denormalized float * float */
134 static int denormal_mulf(int hx, int hy)
135 {
136 	unsigned int ix, iy;
137 	unsigned long long m, n;
138 	int exp, w;
139 
140 	ix = hx & 0x7fffffff;
141 	iy = hy & 0x7fffffff;
142 	if (iy < 0x00800000 || ix == 0)
143 		return ((hx ^ hy) & 0x80000000);
144 
145 	exp = (iy & 0x7f800000) >> 23;
146 	ix &= 0x007fffff;
147 	iy = (iy & 0x007fffff) | 0x00800000;
148 	m = (unsigned long long)ix * iy;
149 	n = m;
150 	w = -1;
151 	while (n) { n >>= 1; w++; }
152 
153 	/* FIXME: use guard bits */
154 	exp += w - 126 - 46;
155 	if (exp > 0)
156 		ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
157 	else if (exp + 22 >= 0)
158 		ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
159 	else
160 		ix = 0;
161 
162 	ix |= (hx ^ hy) & 0x80000000;
163 	return ix;
164 }
165 
166 /* denormalized double * double */
167 static void mult64(unsigned long long x, unsigned long long y,
168 		unsigned long long *highp, unsigned long long *lowp)
169 {
170 	unsigned long long sub0, sub1, sub2, sub3;
171 	unsigned long long high, low;
172 
173 	sub0 = (x >> 32) * (unsigned long) (y >> 32);
174 	sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
175 	sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
176 	sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
177 	low = sub3;
178 	high = 0LL;
179 	sub3 += (sub1 << 32);
180 	if (low > sub3)
181 		high++;
182 	low = sub3;
183 	sub3 += (sub2 << 32);
184 	if (low > sub3)
185 		high++;
186 	low = sub3;
187 	high += (sub1 >> 32) + (sub2 >> 32);
188 	high += sub0;
189 	*lowp = low;
190 	*highp = high;
191 }
192 
193 static inline long long rshift64(unsigned long long mh,
194 		unsigned long long ml, int n)
195 {
196 	if (n >= 64)
197 		return mh >> (n - 64);
198 	return (mh << (64 - n)) | (ml >> n);
199 }
200 
201 static long long denormal_muld(long long hx, long long hy)
202 {
203 	unsigned long long ix, iy;
204 	unsigned long long mh, ml, nh, nl;
205 	int exp, w;
206 
207 	ix = hx & 0x7fffffffffffffffLL;
208 	iy = hy & 0x7fffffffffffffffLL;
209 	if (iy < 0x0010000000000000LL || ix == 0)
210 		return ((hx ^ hy) & 0x8000000000000000LL);
211 
212 	exp = (iy & 0x7ff0000000000000LL) >> 52;
213 	ix &= 0x000fffffffffffffLL;
214 	iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
215 	mult64(ix, iy, &mh, &ml);
216 	nh = mh;
217 	nl = ml;
218 	w = -1;
219 	if (nh) {
220 		while (nh) { nh >>= 1; w++;}
221 		w += 64;
222 	} else
223 		while (nl) { nl >>= 1; w++;}
224 
225 	/* FIXME: use guard bits */
226 	exp += w - 1022 - 52 * 2;
227 	if (exp > 0)
228 		ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
229 			| ((long long)exp << 52);
230 	else if (exp + 51 >= 0)
231 		ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
232 	else
233 		ix = 0;
234 
235 	ix |= (hx ^ hy) & 0x8000000000000000LL;
236 	return ix;
237 }
238 
239 /* ix - iy where iy: denormal and ix, iy >= 0 */
240 static int denormal_subf1(unsigned int ix, unsigned int iy)
241 {
242 	int frac;
243 	int exp;
244 
245 	if (ix < 0x00800000)
246 		return ix - iy;
247 
248 	exp = (ix & 0x7f800000) >> 23;
249 	if (exp - 1 > 31)
250 		return ix;
251 	iy >>= exp - 1;
252 	if (iy == 0)
253 		return ix;
254 
255 	frac = (ix & 0x007fffff) | 0x00800000;
256 	frac -= iy;
257 	while (frac < 0x00800000) {
258 		if (--exp == 0)
259 			return frac;
260 		frac <<= 1;
261 	}
262 
263 	return (exp << 23) | (frac & 0x007fffff);
264 }
265 
266 /* ix + iy where iy: denormal and ix, iy >= 0 */
267 static int denormal_addf1(unsigned int ix, unsigned int iy)
268 {
269 	int frac;
270 	int exp;
271 
272 	if (ix < 0x00800000)
273 		return ix + iy;
274 
275 	exp = (ix & 0x7f800000) >> 23;
276 	if (exp - 1 > 31)
277 		return ix;
278 	iy >>= exp - 1;
279 	if (iy == 0)
280 	  return ix;
281 
282 	frac = (ix & 0x007fffff) | 0x00800000;
283 	frac += iy;
284 	if (frac >= 0x01000000) {
285 		frac >>= 1;
286 		++exp;
287 	}
288 
289 	return (exp << 23) | (frac & 0x007fffff);
290 }
291 
292 static int denormal_addf(int hx, int hy)
293 {
294 	unsigned int ix, iy;
295 	int sign;
296 
297 	if ((hx ^ hy) & 0x80000000) {
298 		sign = hx & 0x80000000;
299 		ix = hx & 0x7fffffff;
300 		iy = hy & 0x7fffffff;
301 		if (iy < 0x00800000) {
302 			ix = denormal_subf1(ix, iy);
303 			if (ix < 0) {
304 				ix = -ix;
305 				sign ^= 0x80000000;
306 			}
307 		} else {
308 			ix = denormal_subf1(iy, ix);
309 			sign ^= 0x80000000;
310 		}
311 	} else {
312 		sign = hx & 0x80000000;
313 		ix = hx & 0x7fffffff;
314 		iy = hy & 0x7fffffff;
315 		if (iy < 0x00800000)
316 			ix = denormal_addf1(ix, iy);
317 		else
318 			ix = denormal_addf1(iy, ix);
319 	}
320 
321 	return sign | ix;
322 }
323 
324 /* ix - iy where iy: denormal and ix, iy >= 0 */
325 static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
326 {
327 	long long frac;
328 	int exp;
329 
330 	if (ix < 0x0010000000000000LL)
331 		return ix - iy;
332 
333 	exp = (ix & 0x7ff0000000000000LL) >> 52;
334 	if (exp - 1 > 63)
335 		return ix;
336 	iy >>= exp - 1;
337 	if (iy == 0)
338 		return ix;
339 
340 	frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
341 	frac -= iy;
342 	while (frac < 0x0010000000000000LL) {
343 		if (--exp == 0)
344 			return frac;
345 		frac <<= 1;
346 	}
347 
348 	return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
349 }
350 
351 /* ix + iy where iy: denormal and ix, iy >= 0 */
352 static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
353 {
354 	long long frac;
355 	long long exp;
356 
357 	if (ix < 0x0010000000000000LL)
358 		return ix + iy;
359 
360 	exp = (ix & 0x7ff0000000000000LL) >> 52;
361 	if (exp - 1 > 63)
362 		return ix;
363 	iy >>= exp - 1;
364 	if (iy == 0)
365 	  return ix;
366 
367 	frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
368 	frac += iy;
369 	if (frac >= 0x0020000000000000LL) {
370 		frac >>= 1;
371 		++exp;
372 	}
373 
374 	return (exp << 52) | (frac & 0x000fffffffffffffLL);
375 }
376 
377 static long long denormal_addd(long long hx, long long hy)
378 {
379 	unsigned long long ix, iy;
380 	long long sign;
381 
382 	if ((hx ^ hy) & 0x8000000000000000LL) {
383 		sign = hx & 0x8000000000000000LL;
384 		ix = hx & 0x7fffffffffffffffLL;
385 		iy = hy & 0x7fffffffffffffffLL;
386 		if (iy < 0x0010000000000000LL) {
387 			ix = denormal_subd1(ix, iy);
388 			if (ix < 0) {
389 				ix = -ix;
390 				sign ^= 0x8000000000000000LL;
391 			}
392 		} else {
393 			ix = denormal_subd1(iy, ix);
394 			sign ^= 0x8000000000000000LL;
395 		}
396 	} else {
397 		sign = hx & 0x8000000000000000LL;
398 		ix = hx & 0x7fffffffffffffffLL;
399 		iy = hy & 0x7fffffffffffffffLL;
400 		if (iy < 0x0010000000000000LL)
401 			ix = denormal_addd1(ix, iy);
402 		else
403 			ix = denormal_addd1(iy, ix);
404 	}
405 
406 	return sign | ix;
407 }
408 
409 /**
410  *	denormal_to_double - Given denormalized float number,
411  *	                     store double float
412  *
413  *	@fpu: Pointer to sh_fpu_hard structure
414  *	@n: Index to FP register
415  */
416 static void
417 denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
418 {
419 	unsigned long du, dl;
420 	unsigned long x = fpu->fpul;
421 	int exp = 1023 - 126;
422 
423 	if (x != 0 && (x & 0x7f800000) == 0) {
424 		du = (x & 0x80000000);
425 		while ((x & 0x00800000) == 0) {
426 			x <<= 1;
427 			exp--;
428 		}
429 		x &= 0x007fffff;
430 		du |= (exp << 20) | (x >> 3);
431 		dl = x << 29;
432 
433 		fpu->fp_regs[n] = du;
434 		fpu->fp_regs[n+1] = dl;
435 	}
436 }
437 
438 /**
439  *	ieee_fpe_handler - Handle denormalized number exception
440  *
441  *	@regs: Pointer to register structure
442  *
443  *	Returns 1 when it's handled (should not cause exception).
444  */
445 static int
446 ieee_fpe_handler (struct pt_regs *regs)
447 {
448 	unsigned short insn = *(unsigned short *) regs->pc;
449 	unsigned short finsn;
450 	unsigned long nextpc;
451 	int nib[4] = {
452 		(insn >> 12) & 0xf,
453 		(insn >> 8) & 0xf,
454 		(insn >> 4) & 0xf,
455 		insn & 0xf};
456 
457 	if (nib[0] == 0xb ||
458 	    (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
459 		regs->pr = regs->pc + 4;
460 	if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
461 		nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
462 		finsn = *(unsigned short *) (regs->pc + 2);
463 	} else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
464 		if (regs->sr & 1)
465 			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
466 		else
467 			nextpc = regs->pc + 4;
468 		finsn = *(unsigned short *) (regs->pc + 2);
469 	} else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
470 		if (regs->sr & 1)
471 			nextpc = regs->pc + 4;
472 		else
473 			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
474 		finsn = *(unsigned short *) (regs->pc + 2);
475 	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
476 		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
477 		nextpc = regs->regs[nib[1]];
478 		finsn = *(unsigned short *) (regs->pc + 2);
479 	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
480 		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
481 		nextpc = regs->pc + 4 + regs->regs[nib[1]];
482 		finsn = *(unsigned short *) (regs->pc + 2);
483 	} else if (insn == 0x000b) { /* rts */
484 		nextpc = regs->pr;
485 		finsn = *(unsigned short *) (regs->pc + 2);
486 	} else {
487 		nextpc = regs->pc + 2;
488 		finsn = insn;
489 	}
490 
491 #define FPSCR_FPU_ERROR (1 << 17)
492 
493 	if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
494 		struct task_struct *tsk = current;
495 
496 		if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
497 			/* FPU error */
498 			denormal_to_double (&tsk->thread.fpu.hard,
499 					    (finsn >> 8) & 0xf);
500 		} else
501 			return 0;
502 
503 		regs->pc = nextpc;
504 		return 1;
505 	} else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
506 		struct task_struct *tsk = current;
507 		int fpscr;
508 		int n, m, prec;
509 		unsigned int hx, hy;
510 
511 		n = (finsn >> 8) & 0xf;
512 		m = (finsn >> 4) & 0xf;
513 		hx = tsk->thread.fpu.hard.fp_regs[n];
514 		hy = tsk->thread.fpu.hard.fp_regs[m];
515 		fpscr = tsk->thread.fpu.hard.fpscr;
516 		prec = fpscr & (1 << 19);
517 
518 		if ((fpscr & FPSCR_FPU_ERROR)
519 		     && (prec && ((hx & 0x7fffffff) < 0x00100000
520 				   || (hy & 0x7fffffff) < 0x00100000))) {
521 			long long llx, lly;
522 
523 			/* FPU error because of denormal */
524 			llx = ((long long) hx << 32)
525 			       | tsk->thread.fpu.hard.fp_regs[n+1];
526 			lly = ((long long) hy << 32)
527 			       | tsk->thread.fpu.hard.fp_regs[m+1];
528 			if ((hx & 0x7fffffff) >= 0x00100000)
529 				llx = denormal_muld(lly, llx);
530 			else
531 				llx = denormal_muld(llx, lly);
532 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
533 			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
534 		} else if ((fpscr & FPSCR_FPU_ERROR)
535 		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
536 				   || (hy & 0x7fffffff) < 0x00800000))) {
537 			/* FPU error because of denormal */
538 			if ((hx & 0x7fffffff) >= 0x00800000)
539 				hx = denormal_mulf(hy, hx);
540 			else
541 				hx = denormal_mulf(hx, hy);
542 			tsk->thread.fpu.hard.fp_regs[n] = hx;
543 		} else
544 			return 0;
545 
546 		regs->pc = nextpc;
547 		return 1;
548 	} else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
549 		struct task_struct *tsk = current;
550 		int fpscr;
551 		int n, m, prec;
552 		unsigned int hx, hy;
553 
554 		n = (finsn >> 8) & 0xf;
555 		m = (finsn >> 4) & 0xf;
556 		hx = tsk->thread.fpu.hard.fp_regs[n];
557 		hy = tsk->thread.fpu.hard.fp_regs[m];
558 		fpscr = tsk->thread.fpu.hard.fpscr;
559 		prec = fpscr & (1 << 19);
560 
561 		if ((fpscr & FPSCR_FPU_ERROR)
562 		     && (prec && ((hx & 0x7fffffff) < 0x00100000
563 				   || (hy & 0x7fffffff) < 0x00100000))) {
564 			long long llx, lly;
565 
566 			/* FPU error because of denormal */
567 			llx = ((long long) hx << 32)
568 			       | tsk->thread.fpu.hard.fp_regs[n+1];
569 			lly = ((long long) hy << 32)
570 			       | tsk->thread.fpu.hard.fp_regs[m+1];
571 			if ((finsn & 0xf00f) == 0xf000)
572 				llx = denormal_addd(llx, lly);
573 			else
574 				llx = denormal_addd(llx, lly ^ (1LL << 63));
575 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
576 			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
577 		} else if ((fpscr & FPSCR_FPU_ERROR)
578 		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
579 				   || (hy & 0x7fffffff) < 0x00800000))) {
580 			/* FPU error because of denormal */
581 			if ((finsn & 0xf00f) == 0xf000)
582 				hx = denormal_addf(hx, hy);
583 			else
584 				hx = denormal_addf(hx, hy ^ 0x80000000);
585 			tsk->thread.fpu.hard.fp_regs[n] = hx;
586 		} else
587 			return 0;
588 
589 		regs->pc = nextpc;
590 		return 1;
591 	}
592 
593 	return 0;
594 }
595 
596 BUILD_TRAP_HANDLER(fpu_error)
597 {
598 	struct task_struct *tsk = current;
599 	TRAP_HANDLER_DECL;
600 
601 	save_fpu(tsk, regs);
602 	if (ieee_fpe_handler(regs)) {
603 		tsk->thread.fpu.hard.fpscr &=
604 			~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
605 		grab_fpu(regs);
606 		restore_fpu(tsk);
607 		set_tsk_thread_flag(tsk, TIF_USEDFPU);
608 		return;
609 	}
610 
611 	force_sig(SIGFPE, tsk);
612 }
613 
614 BUILD_TRAP_HANDLER(fpu_state_restore)
615 {
616 	struct task_struct *tsk = current;
617 	TRAP_HANDLER_DECL;
618 
619 	grab_fpu(regs);
620 	if (!user_mode(regs)) {
621 		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
622 		return;
623 	}
624 
625 	if (used_math()) {
626 		/* Using the FPU again.  */
627 		restore_fpu(tsk);
628 	} else	{
629 		/* First time FPU user.  */
630 		fpu_init();
631 		set_used_math();
632 	}
633 	set_tsk_thread_flag(tsk, TIF_USEDFPU);
634 }
635