xref: /openbmc/linux/arch/sh/kernel/cpu/sh4/fpu.c (revision b8bb76713ec50df2f11efee386e16f93d51e1076)
1 /*
2  * Save/restore floating point context for signal handlers.
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
10  *
11  * FIXME! These routines have not been tested for big endian case.
12  */
13 #include <linux/sched.h>
14 #include <linux/signal.h>
15 #include <linux/io.h>
16 #include <cpu/fpu.h>
17 #include <asm/processor.h>
18 #include <asm/system.h>
19 #include <asm/fpu.h>
20 
21 /* The PR (precision) bit in the FP Status Register must be clear when
22  * an frchg instruction is executed, otherwise the instruction is undefined.
23  * Executing frchg with PR set causes a trap on some SH4 implementations.
24  */
25 
26 #define FPSCR_RCHG 0x00000000
27 extern unsigned long long float64_div(unsigned long long a,
28 				      unsigned long long b);
29 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
30 extern unsigned long long float64_mul(unsigned long long a,
31 				      unsigned long long b);
32 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
33 extern unsigned long long float64_add(unsigned long long a,
34 				      unsigned long long b);
35 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
36 extern unsigned long long float64_sub(unsigned long long a,
37 				      unsigned long long b);
38 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
39 extern unsigned long int float64_to_float32(unsigned long long a);
40 static unsigned int fpu_exception_flags;
41 
42 /*
43  * Save FPU registers onto task structure.
44  * Assume called with FPU enabled (SR.FD=0).
45  */
46 void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
47 {
48 	unsigned long dummy;
49 
50 	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
51 	enable_fpu();
52 	asm volatile ("sts.l	fpul, @-%0\n\t"
53 		      "sts.l	fpscr, @-%0\n\t"
54 		      "lds	%2, fpscr\n\t"
55 		      "frchg\n\t"
56 		      "fmov.s	fr15, @-%0\n\t"
57 		      "fmov.s	fr14, @-%0\n\t"
58 		      "fmov.s	fr13, @-%0\n\t"
59 		      "fmov.s	fr12, @-%0\n\t"
60 		      "fmov.s	fr11, @-%0\n\t"
61 		      "fmov.s	fr10, @-%0\n\t"
62 		      "fmov.s	fr9, @-%0\n\t"
63 		      "fmov.s	fr8, @-%0\n\t"
64 		      "fmov.s	fr7, @-%0\n\t"
65 		      "fmov.s	fr6, @-%0\n\t"
66 		      "fmov.s	fr5, @-%0\n\t"
67 		      "fmov.s	fr4, @-%0\n\t"
68 		      "fmov.s	fr3, @-%0\n\t"
69 		      "fmov.s	fr2, @-%0\n\t"
70 		      "fmov.s	fr1, @-%0\n\t"
71 		      "fmov.s	fr0, @-%0\n\t"
72 		      "frchg\n\t"
73 		      "fmov.s	fr15, @-%0\n\t"
74 		      "fmov.s	fr14, @-%0\n\t"
75 		      "fmov.s	fr13, @-%0\n\t"
76 		      "fmov.s	fr12, @-%0\n\t"
77 		      "fmov.s	fr11, @-%0\n\t"
78 		      "fmov.s	fr10, @-%0\n\t"
79 		      "fmov.s	fr9, @-%0\n\t"
80 		      "fmov.s	fr8, @-%0\n\t"
81 		      "fmov.s	fr7, @-%0\n\t"
82 		      "fmov.s	fr6, @-%0\n\t"
83 		      "fmov.s	fr5, @-%0\n\t"
84 		      "fmov.s	fr4, @-%0\n\t"
85 		      "fmov.s	fr3, @-%0\n\t"
86 		      "fmov.s	fr2, @-%0\n\t"
87 		      "fmov.s	fr1, @-%0\n\t"
88 		      "fmov.s	fr0, @-%0\n\t"
89 		      "lds	%3, fpscr\n\t":"=r" (dummy)
90 		      :"0"((char *)(&tsk->thread.fpu.hard.status)),
91 		      "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
92 		      :"memory");
93 
94 	disable_fpu();
95 	release_fpu(regs);
96 }
97 
98 static void restore_fpu(struct task_struct *tsk)
99 {
100 	unsigned long dummy;
101 
102 	enable_fpu();
103 	asm volatile ("lds	%2, fpscr\n\t"
104 		      "fmov.s	@%0+, fr0\n\t"
105 		      "fmov.s	@%0+, fr1\n\t"
106 		      "fmov.s	@%0+, fr2\n\t"
107 		      "fmov.s	@%0+, fr3\n\t"
108 		      "fmov.s	@%0+, fr4\n\t"
109 		      "fmov.s	@%0+, fr5\n\t"
110 		      "fmov.s	@%0+, fr6\n\t"
111 		      "fmov.s	@%0+, fr7\n\t"
112 		      "fmov.s	@%0+, fr8\n\t"
113 		      "fmov.s	@%0+, fr9\n\t"
114 		      "fmov.s	@%0+, fr10\n\t"
115 		      "fmov.s	@%0+, fr11\n\t"
116 		      "fmov.s	@%0+, fr12\n\t"
117 		      "fmov.s	@%0+, fr13\n\t"
118 		      "fmov.s	@%0+, fr14\n\t"
119 		      "fmov.s	@%0+, fr15\n\t"
120 		      "frchg\n\t"
121 		      "fmov.s	@%0+, fr0\n\t"
122 		      "fmov.s	@%0+, fr1\n\t"
123 		      "fmov.s	@%0+, fr2\n\t"
124 		      "fmov.s	@%0+, fr3\n\t"
125 		      "fmov.s	@%0+, fr4\n\t"
126 		      "fmov.s	@%0+, fr5\n\t"
127 		      "fmov.s	@%0+, fr6\n\t"
128 		      "fmov.s	@%0+, fr7\n\t"
129 		      "fmov.s	@%0+, fr8\n\t"
130 		      "fmov.s	@%0+, fr9\n\t"
131 		      "fmov.s	@%0+, fr10\n\t"
132 		      "fmov.s	@%0+, fr11\n\t"
133 		      "fmov.s	@%0+, fr12\n\t"
134 		      "fmov.s	@%0+, fr13\n\t"
135 		      "fmov.s	@%0+, fr14\n\t"
136 		      "fmov.s	@%0+, fr15\n\t"
137 		      "frchg\n\t"
138 		      "lds.l	@%0+, fpscr\n\t"
139 		      "lds.l	@%0+, fpul\n\t"
140 		      :"=r" (dummy)
141 		      :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
142 		      :"memory");
143 	disable_fpu();
144 }
145 
146 /*
147  * Load the FPU with signalling NANS.  This bit pattern we're using
148  * has the property that no matter wether considered as single or as
149  * double precision represents signaling NANS.
150  */
151 
152 static void fpu_init(void)
153 {
154 	enable_fpu();
155 	asm volatile (	"lds	%0, fpul\n\t"
156 			"lds	%1, fpscr\n\t"
157 			"fsts	fpul, fr0\n\t"
158 			"fsts	fpul, fr1\n\t"
159 			"fsts	fpul, fr2\n\t"
160 			"fsts	fpul, fr3\n\t"
161 			"fsts	fpul, fr4\n\t"
162 			"fsts	fpul, fr5\n\t"
163 			"fsts	fpul, fr6\n\t"
164 			"fsts	fpul, fr7\n\t"
165 			"fsts	fpul, fr8\n\t"
166 			"fsts	fpul, fr9\n\t"
167 			"fsts	fpul, fr10\n\t"
168 			"fsts	fpul, fr11\n\t"
169 			"fsts	fpul, fr12\n\t"
170 			"fsts	fpul, fr13\n\t"
171 			"fsts	fpul, fr14\n\t"
172 			"fsts	fpul, fr15\n\t"
173 			"frchg\n\t"
174 			"fsts	fpul, fr0\n\t"
175 			"fsts	fpul, fr1\n\t"
176 			"fsts	fpul, fr2\n\t"
177 			"fsts	fpul, fr3\n\t"
178 			"fsts	fpul, fr4\n\t"
179 			"fsts	fpul, fr5\n\t"
180 			"fsts	fpul, fr6\n\t"
181 			"fsts	fpul, fr7\n\t"
182 			"fsts	fpul, fr8\n\t"
183 			"fsts	fpul, fr9\n\t"
184 			"fsts	fpul, fr10\n\t"
185 			"fsts	fpul, fr11\n\t"
186 			"fsts	fpul, fr12\n\t"
187 			"fsts	fpul, fr13\n\t"
188 			"fsts	fpul, fr14\n\t"
189 			"fsts	fpul, fr15\n\t"
190 			"frchg\n\t"
191 			"lds	%2, fpscr\n\t"
192 			:	/* no output */
193 			:"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
194 	disable_fpu();
195 }
196 
197 /**
198  *      denormal_to_double - Given denormalized float number,
199  *                           store double float
200  *
201  *      @fpu: Pointer to sh_fpu_hard structure
202  *      @n: Index to FP register
203  */
204 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
205 {
206 	unsigned long du, dl;
207 	unsigned long x = fpu->fpul;
208 	int exp = 1023 - 126;
209 
210 	if (x != 0 && (x & 0x7f800000) == 0) {
211 		du = (x & 0x80000000);
212 		while ((x & 0x00800000) == 0) {
213 			x <<= 1;
214 			exp--;
215 		}
216 		x &= 0x007fffff;
217 		du |= (exp << 20) | (x >> 3);
218 		dl = x << 29;
219 
220 		fpu->fp_regs[n] = du;
221 		fpu->fp_regs[n + 1] = dl;
222 	}
223 }
224 
225 /**
226  *	ieee_fpe_handler - Handle denormalized number exception
227  *
228  *	@regs: Pointer to register structure
229  *
230  *	Returns 1 when it's handled (should not cause exception).
231  */
232 static int ieee_fpe_handler(struct pt_regs *regs)
233 {
234 	unsigned short insn = *(unsigned short *)regs->pc;
235 	unsigned short finsn;
236 	unsigned long nextpc;
237 	int nib[4] = {
238 		(insn >> 12) & 0xf,
239 		(insn >> 8) & 0xf,
240 		(insn >> 4) & 0xf,
241 		insn & 0xf
242 	};
243 
244 	if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
245 		regs->pr = regs->pc + 4;  /* bsr & jsr */
246 
247 	if (nib[0] == 0xa || nib[0] == 0xb) {
248 		/* bra & bsr */
249 		nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
250 		finsn = *(unsigned short *)(regs->pc + 2);
251 	} else if (nib[0] == 0x8 && nib[1] == 0xd) {
252 		/* bt/s */
253 		if (regs->sr & 1)
254 			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
255 		else
256 			nextpc = regs->pc + 4;
257 		finsn = *(unsigned short *)(regs->pc + 2);
258 	} else if (nib[0] == 0x8 && nib[1] == 0xf) {
259 		/* bf/s */
260 		if (regs->sr & 1)
261 			nextpc = regs->pc + 4;
262 		else
263 			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
264 		finsn = *(unsigned short *)(regs->pc + 2);
265 	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
266 		   (nib[2] == 0x0 || nib[2] == 0x2)) {
267 		/* jmp & jsr */
268 		nextpc = regs->regs[nib[1]];
269 		finsn = *(unsigned short *)(regs->pc + 2);
270 	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
271 		   (nib[2] == 0x0 || nib[2] == 0x2)) {
272 		/* braf & bsrf */
273 		nextpc = regs->pc + 4 + regs->regs[nib[1]];
274 		finsn = *(unsigned short *)(regs->pc + 2);
275 	} else if (insn == 0x000b) {
276 		/* rts */
277 		nextpc = regs->pr;
278 		finsn = *(unsigned short *)(regs->pc + 2);
279 	} else {
280 		nextpc = regs->pc + instruction_size(insn);
281 		finsn = insn;
282 	}
283 
284 	if ((finsn & 0xf1ff) == 0xf0ad) {
285 		/* fcnvsd */
286 		struct task_struct *tsk = current;
287 
288 		save_fpu(tsk, regs);
289 		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
290 			/* FPU error */
291 			denormal_to_double(&tsk->thread.fpu.hard,
292 					   (finsn >> 8) & 0xf);
293 		else
294 			return 0;
295 
296 		regs->pc = nextpc;
297 		return 1;
298 	} else if ((finsn & 0xf00f) == 0xf002) {
299 		/* fmul */
300 		struct task_struct *tsk = current;
301 		int fpscr;
302 		int n, m, prec;
303 		unsigned int hx, hy;
304 
305 		n = (finsn >> 8) & 0xf;
306 		m = (finsn >> 4) & 0xf;
307 		hx = tsk->thread.fpu.hard.fp_regs[n];
308 		hy = tsk->thread.fpu.hard.fp_regs[m];
309 		fpscr = tsk->thread.fpu.hard.fpscr;
310 		prec = fpscr & FPSCR_DBL_PRECISION;
311 
312 		if ((fpscr & FPSCR_CAUSE_ERROR)
313 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
314 				 || (hy & 0x7fffffff) < 0x00100000))) {
315 			long long llx, lly;
316 
317 			/* FPU error because of denormal (doubles) */
318 			llx = ((long long)hx << 32)
319 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
320 			lly = ((long long)hy << 32)
321 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
322 			llx = float64_mul(llx, lly);
323 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
324 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
325 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
326 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
327 					 || (hy & 0x7fffffff) < 0x00800000))) {
328 			/* FPU error because of denormal (floats) */
329 			hx = float32_mul(hx, hy);
330 			tsk->thread.fpu.hard.fp_regs[n] = hx;
331 		} else
332 			return 0;
333 
334 		regs->pc = nextpc;
335 		return 1;
336 	} else if ((finsn & 0xf00e) == 0xf000) {
337 		/* fadd, fsub */
338 		struct task_struct *tsk = current;
339 		int fpscr;
340 		int n, m, prec;
341 		unsigned int hx, hy;
342 
343 		n = (finsn >> 8) & 0xf;
344 		m = (finsn >> 4) & 0xf;
345 		hx = tsk->thread.fpu.hard.fp_regs[n];
346 		hy = tsk->thread.fpu.hard.fp_regs[m];
347 		fpscr = tsk->thread.fpu.hard.fpscr;
348 		prec = fpscr & FPSCR_DBL_PRECISION;
349 
350 		if ((fpscr & FPSCR_CAUSE_ERROR)
351 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
352 				 || (hy & 0x7fffffff) < 0x00100000))) {
353 			long long llx, lly;
354 
355 			/* FPU error because of denormal (doubles) */
356 			llx = ((long long)hx << 32)
357 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
358 			lly = ((long long)hy << 32)
359 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
360 			if ((finsn & 0xf00f) == 0xf000)
361 				llx = float64_add(llx, lly);
362 			else
363 				llx = float64_sub(llx, lly);
364 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
365 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
366 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
367 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
368 					 || (hy & 0x7fffffff) < 0x00800000))) {
369 			/* FPU error because of denormal (floats) */
370 			if ((finsn & 0xf00f) == 0xf000)
371 				hx = float32_add(hx, hy);
372 			else
373 				hx = float32_sub(hx, hy);
374 			tsk->thread.fpu.hard.fp_regs[n] = hx;
375 		} else
376 			return 0;
377 
378 		regs->pc = nextpc;
379 		return 1;
380 	} else if ((finsn & 0xf003) == 0xf003) {
381 		/* fdiv */
382 		struct task_struct *tsk = current;
383 		int fpscr;
384 		int n, m, prec;
385 		unsigned int hx, hy;
386 
387 		n = (finsn >> 8) & 0xf;
388 		m = (finsn >> 4) & 0xf;
389 		hx = tsk->thread.fpu.hard.fp_regs[n];
390 		hy = tsk->thread.fpu.hard.fp_regs[m];
391 		fpscr = tsk->thread.fpu.hard.fpscr;
392 		prec = fpscr & FPSCR_DBL_PRECISION;
393 
394 		if ((fpscr & FPSCR_CAUSE_ERROR)
395 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
396 				 || (hy & 0x7fffffff) < 0x00100000))) {
397 			long long llx, lly;
398 
399 			/* FPU error because of denormal (doubles) */
400 			llx = ((long long)hx << 32)
401 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
402 			lly = ((long long)hy << 32)
403 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
404 
405 			llx = float64_div(llx, lly);
406 
407 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
408 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
409 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
410 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
411 					 || (hy & 0x7fffffff) < 0x00800000))) {
412 			/* FPU error because of denormal (floats) */
413 			hx = float32_div(hx, hy);
414 			tsk->thread.fpu.hard.fp_regs[n] = hx;
415 		} else
416 			return 0;
417 
418 		regs->pc = nextpc;
419 		return 1;
420 	} else if ((finsn & 0xf0bd) == 0xf0bd) {
421 		/* fcnvds - double to single precision convert */
422 		struct task_struct *tsk = current;
423 		int m;
424 		unsigned int hx;
425 
426 		m = (finsn >> 8) & 0x7;
427 		hx = tsk->thread.fpu.hard.fp_regs[m];
428 
429 		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
430 			&& ((hx & 0x7fffffff) < 0x00100000)) {
431 			/* subnormal double to float conversion */
432 			long long llx;
433 
434 			llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
435 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
436 
437 			tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
438 		} else
439 			return 0;
440 
441 		regs->pc = nextpc;
442 		return 1;
443 	}
444 
445 	return 0;
446 }
447 
448 void float_raise(unsigned int flags)
449 {
450 	fpu_exception_flags |= flags;
451 }
452 
453 int float_rounding_mode(void)
454 {
455 	struct task_struct *tsk = current;
456 	int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
457 	return roundingMode;
458 }
459 
460 BUILD_TRAP_HANDLER(fpu_error)
461 {
462 	struct task_struct *tsk = current;
463 	TRAP_HANDLER_DECL;
464 
465 	save_fpu(tsk, regs);
466 	fpu_exception_flags = 0;
467 	if (ieee_fpe_handler(regs)) {
468 		tsk->thread.fpu.hard.fpscr &=
469 		    ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
470 		tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
471 		/* Set the FPSCR flag as well as cause bits - simply
472 		 * replicate the cause */
473 		tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
474 		grab_fpu(regs);
475 		restore_fpu(tsk);
476 		set_tsk_thread_flag(tsk, TIF_USEDFPU);
477 		if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
478 		     (fpu_exception_flags >> 2)) == 0) {
479 			return;
480 		}
481 	}
482 
483 	force_sig(SIGFPE, tsk);
484 }
485 
486 BUILD_TRAP_HANDLER(fpu_state_restore)
487 {
488 	struct task_struct *tsk = current;
489 	TRAP_HANDLER_DECL;
490 
491 	grab_fpu(regs);
492 	if (!user_mode(regs)) {
493 		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
494 		return;
495 	}
496 
497 	if (used_math()) {
498 		/* Using the FPU again.  */
499 		restore_fpu(tsk);
500 	} else {
501 		/* First time FPU user.  */
502 		fpu_init();
503 		set_used_math();
504 	}
505 	set_tsk_thread_flag(tsk, TIF_USEDFPU);
506 }
507