xref: /openbmc/linux/arch/sh/kernel/cpu/sh2a/fpu.c (revision 643d1f7f)
1 /*
2  * Save/restore floating point context for signal handlers.
3  *
4  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
5  *
6  * This file is subject to the terms and conditions of the GNU General Public
7  * License.  See the file "COPYING" in the main directory of this archive
8  * for more details.
9  *
10  * FIXME! These routines can be optimized in big endian case.
11  */
12 #include <linux/sched.h>
13 #include <linux/signal.h>
14 #include <asm/processor.h>
15 #include <asm/io.h>
16 
17 /* The PR (precision) bit in the FP Status Register must be clear when
18  * an frchg instruction is executed, otherwise the instruction is undefined.
19  * Executing frchg with PR set causes a trap on some SH4 implementations.
20  */
21 
22 #define FPSCR_RCHG 0x00000000
23 
24 
25 /*
26  * Save FPU registers onto task structure.
27  * Assume called with FPU enabled (SR.FD=0).
28  */
29 void
30 save_fpu(struct task_struct *tsk, struct pt_regs *regs)
31 {
32 	unsigned long dummy;
33 
34 	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
35 	enable_fpu();
36 	asm volatile("sts.l	fpul, @-%0\n\t"
37 		     "sts.l	fpscr, @-%0\n\t"
38 		     "fmov.s	fr15, @-%0\n\t"
39 		     "fmov.s	fr14, @-%0\n\t"
40 		     "fmov.s	fr13, @-%0\n\t"
41 		     "fmov.s	fr12, @-%0\n\t"
42 		     "fmov.s	fr11, @-%0\n\t"
43 		     "fmov.s	fr10, @-%0\n\t"
44 		     "fmov.s	fr9, @-%0\n\t"
45 		     "fmov.s	fr8, @-%0\n\t"
46 		     "fmov.s	fr7, @-%0\n\t"
47 		     "fmov.s	fr6, @-%0\n\t"
48 		     "fmov.s	fr5, @-%0\n\t"
49 		     "fmov.s	fr4, @-%0\n\t"
50 		     "fmov.s	fr3, @-%0\n\t"
51 		     "fmov.s	fr2, @-%0\n\t"
52 		     "fmov.s	fr1, @-%0\n\t"
53 		     "fmov.s	fr0, @-%0\n\t"
54 		     "lds	%3, fpscr\n\t"
55 		     : "=r" (dummy)
56 		     : "0" ((char *)(&tsk->thread.fpu.hard.status)),
57 		       "r" (FPSCR_RCHG),
58 		       "r" (FPSCR_INIT)
59 		     : "memory");
60 
61 	disable_fpu();
62 	release_fpu(regs);
63 }
64 
65 static void
66 restore_fpu(struct task_struct *tsk)
67 {
68 	unsigned long dummy;
69 
70 	enable_fpu();
71 	asm volatile("fmov.s	@%0+, fr0\n\t"
72 		     "fmov.s	@%0+, fr1\n\t"
73 		     "fmov.s	@%0+, fr2\n\t"
74 		     "fmov.s	@%0+, fr3\n\t"
75 		     "fmov.s	@%0+, fr4\n\t"
76 		     "fmov.s	@%0+, fr5\n\t"
77 		     "fmov.s	@%0+, fr6\n\t"
78 		     "fmov.s	@%0+, fr7\n\t"
79 		     "fmov.s	@%0+, fr8\n\t"
80 		     "fmov.s	@%0+, fr9\n\t"
81 		     "fmov.s	@%0+, fr10\n\t"
82 		     "fmov.s	@%0+, fr11\n\t"
83 		     "fmov.s	@%0+, fr12\n\t"
84 		     "fmov.s	@%0+, fr13\n\t"
85 		     "fmov.s	@%0+, fr14\n\t"
86 		     "fmov.s	@%0+, fr15\n\t"
87 		     "lds.l	@%0+, fpscr\n\t"
88 		     "lds.l	@%0+, fpul\n\t"
89 		     : "=r" (dummy)
90 		     : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
91 		     : "memory");
92 	disable_fpu();
93 }
94 
95 /*
96  * Load the FPU with signalling NANS.  This bit pattern we're using
97  * has the property that no matter wether considered as single or as
98  * double precission represents signaling NANS.
99  */
100 
101 static void
102 fpu_init(void)
103 {
104 	enable_fpu();
105 	asm volatile("lds	%0, fpul\n\t"
106 		     "fsts	fpul, fr0\n\t"
107 		     "fsts	fpul, fr1\n\t"
108 		     "fsts	fpul, fr2\n\t"
109 		     "fsts	fpul, fr3\n\t"
110 		     "fsts	fpul, fr4\n\t"
111 		     "fsts	fpul, fr5\n\t"
112 		     "fsts	fpul, fr6\n\t"
113 		     "fsts	fpul, fr7\n\t"
114 		     "fsts	fpul, fr8\n\t"
115 		     "fsts	fpul, fr9\n\t"
116 		     "fsts	fpul, fr10\n\t"
117 		     "fsts	fpul, fr11\n\t"
118 		     "fsts	fpul, fr12\n\t"
119 		     "fsts	fpul, fr13\n\t"
120 		     "fsts	fpul, fr14\n\t"
121 		     "fsts	fpul, fr15\n\t"
122 		     "lds	%2, fpscr\n\t"
123 		     : /* no output */
124 		     : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
125 	disable_fpu();
126 }
127 
128 /*
129  *	Emulate arithmetic ops on denormalized number for some FPU insns.
130  */
131 
132 /* denormalized float * float */
133 static int denormal_mulf(int hx, int hy)
134 {
135 	unsigned int ix, iy;
136 	unsigned long long m, n;
137 	int exp, w;
138 
139 	ix = hx & 0x7fffffff;
140 	iy = hy & 0x7fffffff;
141 	if (iy < 0x00800000 || ix == 0)
142 		return ((hx ^ hy) & 0x80000000);
143 
144 	exp = (iy & 0x7f800000) >> 23;
145 	ix &= 0x007fffff;
146 	iy = (iy & 0x007fffff) | 0x00800000;
147 	m = (unsigned long long)ix * iy;
148 	n = m;
149 	w = -1;
150 	while (n) { n >>= 1; w++; }
151 
152 	/* FIXME: use guard bits */
153 	exp += w - 126 - 46;
154 	if (exp > 0)
155 		ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
156 	else if (exp + 22 >= 0)
157 		ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
158 	else
159 		ix = 0;
160 
161 	ix |= (hx ^ hy) & 0x80000000;
162 	return ix;
163 }
164 
165 /* denormalized double * double */
166 static void mult64(unsigned long long x, unsigned long long y,
167 		unsigned long long *highp, unsigned long long *lowp)
168 {
169 	unsigned long long sub0, sub1, sub2, sub3;
170 	unsigned long long high, low;
171 
172 	sub0 = (x >> 32) * (unsigned long) (y >> 32);
173 	sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
174 	sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
175 	sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
176 	low = sub3;
177 	high = 0LL;
178 	sub3 += (sub1 << 32);
179 	if (low > sub3)
180 		high++;
181 	low = sub3;
182 	sub3 += (sub2 << 32);
183 	if (low > sub3)
184 		high++;
185 	low = sub3;
186 	high += (sub1 >> 32) + (sub2 >> 32);
187 	high += sub0;
188 	*lowp = low;
189 	*highp = high;
190 }
191 
192 static inline long long rshift64(unsigned long long mh,
193 		unsigned long long ml, int n)
194 {
195 	if (n >= 64)
196 		return mh >> (n - 64);
197 	return (mh << (64 - n)) | (ml >> n);
198 }
199 
200 static long long denormal_muld(long long hx, long long hy)
201 {
202 	unsigned long long ix, iy;
203 	unsigned long long mh, ml, nh, nl;
204 	int exp, w;
205 
206 	ix = hx & 0x7fffffffffffffffLL;
207 	iy = hy & 0x7fffffffffffffffLL;
208 	if (iy < 0x0010000000000000LL || ix == 0)
209 		return ((hx ^ hy) & 0x8000000000000000LL);
210 
211 	exp = (iy & 0x7ff0000000000000LL) >> 52;
212 	ix &= 0x000fffffffffffffLL;
213 	iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
214 	mult64(ix, iy, &mh, &ml);
215 	nh = mh;
216 	nl = ml;
217 	w = -1;
218 	if (nh) {
219 		while (nh) { nh >>= 1; w++;}
220 		w += 64;
221 	} else
222 		while (nl) { nl >>= 1; w++;}
223 
224 	/* FIXME: use guard bits */
225 	exp += w - 1022 - 52 * 2;
226 	if (exp > 0)
227 		ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
228 			| ((long long)exp << 52);
229 	else if (exp + 51 >= 0)
230 		ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
231 	else
232 		ix = 0;
233 
234 	ix |= (hx ^ hy) & 0x8000000000000000LL;
235 	return ix;
236 }
237 
238 /* ix - iy where iy: denormal and ix, iy >= 0 */
239 static int denormal_subf1(unsigned int ix, unsigned int iy)
240 {
241 	int frac;
242 	int exp;
243 
244 	if (ix < 0x00800000)
245 		return ix - iy;
246 
247 	exp = (ix & 0x7f800000) >> 23;
248 	if (exp - 1 > 31)
249 		return ix;
250 	iy >>= exp - 1;
251 	if (iy == 0)
252 		return ix;
253 
254 	frac = (ix & 0x007fffff) | 0x00800000;
255 	frac -= iy;
256 	while (frac < 0x00800000) {
257 		if (--exp == 0)
258 			return frac;
259 		frac <<= 1;
260 	}
261 
262 	return (exp << 23) | (frac & 0x007fffff);
263 }
264 
265 /* ix + iy where iy: denormal and ix, iy >= 0 */
266 static int denormal_addf1(unsigned int ix, unsigned int iy)
267 {
268 	int frac;
269 	int exp;
270 
271 	if (ix < 0x00800000)
272 		return ix + iy;
273 
274 	exp = (ix & 0x7f800000) >> 23;
275 	if (exp - 1 > 31)
276 		return ix;
277 	iy >>= exp - 1;
278 	if (iy == 0)
279 	  return ix;
280 
281 	frac = (ix & 0x007fffff) | 0x00800000;
282 	frac += iy;
283 	if (frac >= 0x01000000) {
284 		frac >>= 1;
285 		++exp;
286 	}
287 
288 	return (exp << 23) | (frac & 0x007fffff);
289 }
290 
291 static int denormal_addf(int hx, int hy)
292 {
293 	unsigned int ix, iy;
294 	int sign;
295 
296 	if ((hx ^ hy) & 0x80000000) {
297 		sign = hx & 0x80000000;
298 		ix = hx & 0x7fffffff;
299 		iy = hy & 0x7fffffff;
300 		if (iy < 0x00800000) {
301 			ix = denormal_subf1(ix, iy);
302 			if (ix < 0) {
303 				ix = -ix;
304 				sign ^= 0x80000000;
305 			}
306 		} else {
307 			ix = denormal_subf1(iy, ix);
308 			sign ^= 0x80000000;
309 		}
310 	} else {
311 		sign = hx & 0x80000000;
312 		ix = hx & 0x7fffffff;
313 		iy = hy & 0x7fffffff;
314 		if (iy < 0x00800000)
315 			ix = denormal_addf1(ix, iy);
316 		else
317 			ix = denormal_addf1(iy, ix);
318 	}
319 
320 	return sign | ix;
321 }
322 
323 /* ix - iy where iy: denormal and ix, iy >= 0 */
324 static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
325 {
326 	long long frac;
327 	int exp;
328 
329 	if (ix < 0x0010000000000000LL)
330 		return ix - iy;
331 
332 	exp = (ix & 0x7ff0000000000000LL) >> 52;
333 	if (exp - 1 > 63)
334 		return ix;
335 	iy >>= exp - 1;
336 	if (iy == 0)
337 		return ix;
338 
339 	frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
340 	frac -= iy;
341 	while (frac < 0x0010000000000000LL) {
342 		if (--exp == 0)
343 			return frac;
344 		frac <<= 1;
345 	}
346 
347 	return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
348 }
349 
350 /* ix + iy where iy: denormal and ix, iy >= 0 */
351 static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
352 {
353 	long long frac;
354 	long long exp;
355 
356 	if (ix < 0x0010000000000000LL)
357 		return ix + iy;
358 
359 	exp = (ix & 0x7ff0000000000000LL) >> 52;
360 	if (exp - 1 > 63)
361 		return ix;
362 	iy >>= exp - 1;
363 	if (iy == 0)
364 	  return ix;
365 
366 	frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
367 	frac += iy;
368 	if (frac >= 0x0020000000000000LL) {
369 		frac >>= 1;
370 		++exp;
371 	}
372 
373 	return (exp << 52) | (frac & 0x000fffffffffffffLL);
374 }
375 
376 static long long denormal_addd(long long hx, long long hy)
377 {
378 	unsigned long long ix, iy;
379 	long long sign;
380 
381 	if ((hx ^ hy) & 0x8000000000000000LL) {
382 		sign = hx & 0x8000000000000000LL;
383 		ix = hx & 0x7fffffffffffffffLL;
384 		iy = hy & 0x7fffffffffffffffLL;
385 		if (iy < 0x0010000000000000LL) {
386 			ix = denormal_subd1(ix, iy);
387 			if (ix < 0) {
388 				ix = -ix;
389 				sign ^= 0x8000000000000000LL;
390 			}
391 		} else {
392 			ix = denormal_subd1(iy, ix);
393 			sign ^= 0x8000000000000000LL;
394 		}
395 	} else {
396 		sign = hx & 0x8000000000000000LL;
397 		ix = hx & 0x7fffffffffffffffLL;
398 		iy = hy & 0x7fffffffffffffffLL;
399 		if (iy < 0x0010000000000000LL)
400 			ix = denormal_addd1(ix, iy);
401 		else
402 			ix = denormal_addd1(iy, ix);
403 	}
404 
405 	return sign | ix;
406 }
407 
408 /**
409  *	denormal_to_double - Given denormalized float number,
410  *	                     store double float
411  *
412  *	@fpu: Pointer to sh_fpu_hard structure
413  *	@n: Index to FP register
414  */
415 static void
416 denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
417 {
418 	unsigned long du, dl;
419 	unsigned long x = fpu->fpul;
420 	int exp = 1023 - 126;
421 
422 	if (x != 0 && (x & 0x7f800000) == 0) {
423 		du = (x & 0x80000000);
424 		while ((x & 0x00800000) == 0) {
425 			x <<= 1;
426 			exp--;
427 		}
428 		x &= 0x007fffff;
429 		du |= (exp << 20) | (x >> 3);
430 		dl = x << 29;
431 
432 		fpu->fp_regs[n] = du;
433 		fpu->fp_regs[n+1] = dl;
434 	}
435 }
436 
437 /**
438  *	ieee_fpe_handler - Handle denormalized number exception
439  *
440  *	@regs: Pointer to register structure
441  *
442  *	Returns 1 when it's handled (should not cause exception).
443  */
444 static int
445 ieee_fpe_handler (struct pt_regs *regs)
446 {
447 	unsigned short insn = *(unsigned short *) regs->pc;
448 	unsigned short finsn;
449 	unsigned long nextpc;
450 	int nib[4] = {
451 		(insn >> 12) & 0xf,
452 		(insn >> 8) & 0xf,
453 		(insn >> 4) & 0xf,
454 		insn & 0xf};
455 
456 	if (nib[0] == 0xb ||
457 	    (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
458 		regs->pr = regs->pc + 4;
459 	if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
460 		nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
461 		finsn = *(unsigned short *) (regs->pc + 2);
462 	} else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
463 		if (regs->sr & 1)
464 			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
465 		else
466 			nextpc = regs->pc + 4;
467 		finsn = *(unsigned short *) (regs->pc + 2);
468 	} else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
469 		if (regs->sr & 1)
470 			nextpc = regs->pc + 4;
471 		else
472 			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
473 		finsn = *(unsigned short *) (regs->pc + 2);
474 	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
475 		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
476 		nextpc = regs->regs[nib[1]];
477 		finsn = *(unsigned short *) (regs->pc + 2);
478 	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
479 		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
480 		nextpc = regs->pc + 4 + regs->regs[nib[1]];
481 		finsn = *(unsigned short *) (regs->pc + 2);
482 	} else if (insn == 0x000b) { /* rts */
483 		nextpc = regs->pr;
484 		finsn = *(unsigned short *) (regs->pc + 2);
485 	} else {
486 		nextpc = regs->pc + 2;
487 		finsn = insn;
488 	}
489 
490 #define FPSCR_FPU_ERROR (1 << 17)
491 
492 	if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
493 		struct task_struct *tsk = current;
494 
495 		if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
496 			/* FPU error */
497 			denormal_to_double (&tsk->thread.fpu.hard,
498 					    (finsn >> 8) & 0xf);
499 		} else
500 			return 0;
501 
502 		regs->pc = nextpc;
503 		return 1;
504 	} else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
505 		struct task_struct *tsk = current;
506 		int fpscr;
507 		int n, m, prec;
508 		unsigned int hx, hy;
509 
510 		n = (finsn >> 8) & 0xf;
511 		m = (finsn >> 4) & 0xf;
512 		hx = tsk->thread.fpu.hard.fp_regs[n];
513 		hy = tsk->thread.fpu.hard.fp_regs[m];
514 		fpscr = tsk->thread.fpu.hard.fpscr;
515 		prec = fpscr & (1 << 19);
516 
517 		if ((fpscr & FPSCR_FPU_ERROR)
518 		     && (prec && ((hx & 0x7fffffff) < 0x00100000
519 				   || (hy & 0x7fffffff) < 0x00100000))) {
520 			long long llx, lly;
521 
522 			/* FPU error because of denormal */
523 			llx = ((long long) hx << 32)
524 			       | tsk->thread.fpu.hard.fp_regs[n+1];
525 			lly = ((long long) hy << 32)
526 			       | tsk->thread.fpu.hard.fp_regs[m+1];
527 			if ((hx & 0x7fffffff) >= 0x00100000)
528 				llx = denormal_muld(lly, llx);
529 			else
530 				llx = denormal_muld(llx, lly);
531 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
532 			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
533 		} else if ((fpscr & FPSCR_FPU_ERROR)
534 		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
535 				   || (hy & 0x7fffffff) < 0x00800000))) {
536 			/* FPU error because of denormal */
537 			if ((hx & 0x7fffffff) >= 0x00800000)
538 				hx = denormal_mulf(hy, hx);
539 			else
540 				hx = denormal_mulf(hx, hy);
541 			tsk->thread.fpu.hard.fp_regs[n] = hx;
542 		} else
543 			return 0;
544 
545 		regs->pc = nextpc;
546 		return 1;
547 	} else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
548 		struct task_struct *tsk = current;
549 		int fpscr;
550 		int n, m, prec;
551 		unsigned int hx, hy;
552 
553 		n = (finsn >> 8) & 0xf;
554 		m = (finsn >> 4) & 0xf;
555 		hx = tsk->thread.fpu.hard.fp_regs[n];
556 		hy = tsk->thread.fpu.hard.fp_regs[m];
557 		fpscr = tsk->thread.fpu.hard.fpscr;
558 		prec = fpscr & (1 << 19);
559 
560 		if ((fpscr & FPSCR_FPU_ERROR)
561 		     && (prec && ((hx & 0x7fffffff) < 0x00100000
562 				   || (hy & 0x7fffffff) < 0x00100000))) {
563 			long long llx, lly;
564 
565 			/* FPU error because of denormal */
566 			llx = ((long long) hx << 32)
567 			       | tsk->thread.fpu.hard.fp_regs[n+1];
568 			lly = ((long long) hy << 32)
569 			       | tsk->thread.fpu.hard.fp_regs[m+1];
570 			if ((finsn & 0xf00f) == 0xf000)
571 				llx = denormal_addd(llx, lly);
572 			else
573 				llx = denormal_addd(llx, lly ^ (1LL << 63));
574 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
575 			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
576 		} else if ((fpscr & FPSCR_FPU_ERROR)
577 		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
578 				   || (hy & 0x7fffffff) < 0x00800000))) {
579 			/* FPU error because of denormal */
580 			if ((finsn & 0xf00f) == 0xf000)
581 				hx = denormal_addf(hx, hy);
582 			else
583 				hx = denormal_addf(hx, hy ^ 0x80000000);
584 			tsk->thread.fpu.hard.fp_regs[n] = hx;
585 		} else
586 			return 0;
587 
588 		regs->pc = nextpc;
589 		return 1;
590 	}
591 
592 	return 0;
593 }
594 
595 BUILD_TRAP_HANDLER(fpu_error)
596 {
597 	struct task_struct *tsk = current;
598 	TRAP_HANDLER_DECL;
599 
600 	save_fpu(tsk, regs);
601 	if (ieee_fpe_handler(regs)) {
602 		tsk->thread.fpu.hard.fpscr &=
603 			~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
604 		grab_fpu(regs);
605 		restore_fpu(tsk);
606 		set_tsk_thread_flag(tsk, TIF_USEDFPU);
607 		return;
608 	}
609 
610 	force_sig(SIGFPE, tsk);
611 }
612 
613 BUILD_TRAP_HANDLER(fpu_state_restore)
614 {
615 	struct task_struct *tsk = current;
616 	TRAP_HANDLER_DECL;
617 
618 	grab_fpu(regs);
619 	if (!user_mode(regs)) {
620 		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
621 		return;
622 	}
623 
624 	if (used_math()) {
625 		/* Using the FPU again.  */
626 		restore_fpu(tsk);
627 	} else	{
628 		/* First time FPU user.  */
629 		fpu_init();
630 		set_used_math();
631 	}
632 	set_tsk_thread_flag(tsk, TIF_USEDFPU);
633 }
634