xref: /openbmc/linux/arch/sh/kernel/cpu/sh4/fpu.c (revision b6dcefde)
1 /*
2  * Save/restore floating point context for signal handlers.
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
10  *
11  * FIXME! These routines have not been tested for big endian case.
12  */
13 #include <linux/sched.h>
14 #include <linux/signal.h>
15 #include <linux/io.h>
16 #include <cpu/fpu.h>
17 #include <asm/processor.h>
18 #include <asm/system.h>
19 #include <asm/fpu.h>
20 
21 /* The PR (precision) bit in the FP Status Register must be clear when
22  * an frchg instruction is executed, otherwise the instruction is undefined.
23  * Executing frchg with PR set causes a trap on some SH4 implementations.
24  */
25 
26 #define FPSCR_RCHG 0x00000000
27 extern unsigned long long float64_div(unsigned long long a,
28 				      unsigned long long b);
29 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
30 extern unsigned long long float64_mul(unsigned long long a,
31 				      unsigned long long b);
32 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
33 extern unsigned long long float64_add(unsigned long long a,
34 				      unsigned long long b);
35 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
36 extern unsigned long long float64_sub(unsigned long long a,
37 				      unsigned long long b);
38 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
39 extern unsigned long int float64_to_float32(unsigned long long a);
40 static unsigned int fpu_exception_flags;
41 
42 /*
43  * Save FPU registers onto task structure.
44  */
45 void save_fpu(struct task_struct *tsk)
46 {
47 	unsigned long dummy;
48 
49 	enable_fpu();
50 	asm volatile ("sts.l	fpul, @-%0\n\t"
51 		      "sts.l	fpscr, @-%0\n\t"
52 		      "lds	%2, fpscr\n\t"
53 		      "frchg\n\t"
54 		      "fmov.s	fr15, @-%0\n\t"
55 		      "fmov.s	fr14, @-%0\n\t"
56 		      "fmov.s	fr13, @-%0\n\t"
57 		      "fmov.s	fr12, @-%0\n\t"
58 		      "fmov.s	fr11, @-%0\n\t"
59 		      "fmov.s	fr10, @-%0\n\t"
60 		      "fmov.s	fr9, @-%0\n\t"
61 		      "fmov.s	fr8, @-%0\n\t"
62 		      "fmov.s	fr7, @-%0\n\t"
63 		      "fmov.s	fr6, @-%0\n\t"
64 		      "fmov.s	fr5, @-%0\n\t"
65 		      "fmov.s	fr4, @-%0\n\t"
66 		      "fmov.s	fr3, @-%0\n\t"
67 		      "fmov.s	fr2, @-%0\n\t"
68 		      "fmov.s	fr1, @-%0\n\t"
69 		      "fmov.s	fr0, @-%0\n\t"
70 		      "frchg\n\t"
71 		      "fmov.s	fr15, @-%0\n\t"
72 		      "fmov.s	fr14, @-%0\n\t"
73 		      "fmov.s	fr13, @-%0\n\t"
74 		      "fmov.s	fr12, @-%0\n\t"
75 		      "fmov.s	fr11, @-%0\n\t"
76 		      "fmov.s	fr10, @-%0\n\t"
77 		      "fmov.s	fr9, @-%0\n\t"
78 		      "fmov.s	fr8, @-%0\n\t"
79 		      "fmov.s	fr7, @-%0\n\t"
80 		      "fmov.s	fr6, @-%0\n\t"
81 		      "fmov.s	fr5, @-%0\n\t"
82 		      "fmov.s	fr4, @-%0\n\t"
83 		      "fmov.s	fr3, @-%0\n\t"
84 		      "fmov.s	fr2, @-%0\n\t"
85 		      "fmov.s	fr1, @-%0\n\t"
86 		      "fmov.s	fr0, @-%0\n\t"
87 		      "lds	%3, fpscr\n\t":"=r" (dummy)
88 		      :"0"((char *)(&tsk->thread.fpu.hard.status)),
89 		      "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
90 		      :"memory");
91 
92 	disable_fpu();
93 }
94 
95 static void restore_fpu(struct task_struct *tsk)
96 {
97 	unsigned long dummy;
98 
99 	enable_fpu();
100 	asm volatile ("lds	%2, fpscr\n\t"
101 		      "fmov.s	@%0+, fr0\n\t"
102 		      "fmov.s	@%0+, fr1\n\t"
103 		      "fmov.s	@%0+, fr2\n\t"
104 		      "fmov.s	@%0+, fr3\n\t"
105 		      "fmov.s	@%0+, fr4\n\t"
106 		      "fmov.s	@%0+, fr5\n\t"
107 		      "fmov.s	@%0+, fr6\n\t"
108 		      "fmov.s	@%0+, fr7\n\t"
109 		      "fmov.s	@%0+, fr8\n\t"
110 		      "fmov.s	@%0+, fr9\n\t"
111 		      "fmov.s	@%0+, fr10\n\t"
112 		      "fmov.s	@%0+, fr11\n\t"
113 		      "fmov.s	@%0+, fr12\n\t"
114 		      "fmov.s	@%0+, fr13\n\t"
115 		      "fmov.s	@%0+, fr14\n\t"
116 		      "fmov.s	@%0+, fr15\n\t"
117 		      "frchg\n\t"
118 		      "fmov.s	@%0+, fr0\n\t"
119 		      "fmov.s	@%0+, fr1\n\t"
120 		      "fmov.s	@%0+, fr2\n\t"
121 		      "fmov.s	@%0+, fr3\n\t"
122 		      "fmov.s	@%0+, fr4\n\t"
123 		      "fmov.s	@%0+, fr5\n\t"
124 		      "fmov.s	@%0+, fr6\n\t"
125 		      "fmov.s	@%0+, fr7\n\t"
126 		      "fmov.s	@%0+, fr8\n\t"
127 		      "fmov.s	@%0+, fr9\n\t"
128 		      "fmov.s	@%0+, fr10\n\t"
129 		      "fmov.s	@%0+, fr11\n\t"
130 		      "fmov.s	@%0+, fr12\n\t"
131 		      "fmov.s	@%0+, fr13\n\t"
132 		      "fmov.s	@%0+, fr14\n\t"
133 		      "fmov.s	@%0+, fr15\n\t"
134 		      "frchg\n\t"
135 		      "lds.l	@%0+, fpscr\n\t"
136 		      "lds.l	@%0+, fpul\n\t"
137 		      :"=r" (dummy)
138 		      :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
139 		      :"memory");
140 	disable_fpu();
141 }
142 
143 /*
144  * Load the FPU with signalling NANS.  This bit pattern we're using
145  * has the property that no matter wether considered as single or as
146  * double precision represents signaling NANS.
147  */
148 
149 static void fpu_init(void)
150 {
151 	enable_fpu();
152 	asm volatile (	"lds	%0, fpul\n\t"
153 			"lds	%1, fpscr\n\t"
154 			"fsts	fpul, fr0\n\t"
155 			"fsts	fpul, fr1\n\t"
156 			"fsts	fpul, fr2\n\t"
157 			"fsts	fpul, fr3\n\t"
158 			"fsts	fpul, fr4\n\t"
159 			"fsts	fpul, fr5\n\t"
160 			"fsts	fpul, fr6\n\t"
161 			"fsts	fpul, fr7\n\t"
162 			"fsts	fpul, fr8\n\t"
163 			"fsts	fpul, fr9\n\t"
164 			"fsts	fpul, fr10\n\t"
165 			"fsts	fpul, fr11\n\t"
166 			"fsts	fpul, fr12\n\t"
167 			"fsts	fpul, fr13\n\t"
168 			"fsts	fpul, fr14\n\t"
169 			"fsts	fpul, fr15\n\t"
170 			"frchg\n\t"
171 			"fsts	fpul, fr0\n\t"
172 			"fsts	fpul, fr1\n\t"
173 			"fsts	fpul, fr2\n\t"
174 			"fsts	fpul, fr3\n\t"
175 			"fsts	fpul, fr4\n\t"
176 			"fsts	fpul, fr5\n\t"
177 			"fsts	fpul, fr6\n\t"
178 			"fsts	fpul, fr7\n\t"
179 			"fsts	fpul, fr8\n\t"
180 			"fsts	fpul, fr9\n\t"
181 			"fsts	fpul, fr10\n\t"
182 			"fsts	fpul, fr11\n\t"
183 			"fsts	fpul, fr12\n\t"
184 			"fsts	fpul, fr13\n\t"
185 			"fsts	fpul, fr14\n\t"
186 			"fsts	fpul, fr15\n\t"
187 			"frchg\n\t"
188 			"lds	%2, fpscr\n\t"
189 			:	/* no output */
190 			:"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
191 	disable_fpu();
192 }
193 
194 /**
195  *      denormal_to_double - Given denormalized float number,
196  *                           store double float
197  *
198  *      @fpu: Pointer to sh_fpu_hard structure
199  *      @n: Index to FP register
200  */
201 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
202 {
203 	unsigned long du, dl;
204 	unsigned long x = fpu->fpul;
205 	int exp = 1023 - 126;
206 
207 	if (x != 0 && (x & 0x7f800000) == 0) {
208 		du = (x & 0x80000000);
209 		while ((x & 0x00800000) == 0) {
210 			x <<= 1;
211 			exp--;
212 		}
213 		x &= 0x007fffff;
214 		du |= (exp << 20) | (x >> 3);
215 		dl = x << 29;
216 
217 		fpu->fp_regs[n] = du;
218 		fpu->fp_regs[n + 1] = dl;
219 	}
220 }
221 
222 /**
223  *	ieee_fpe_handler - Handle denormalized number exception
224  *
225  *	@regs: Pointer to register structure
226  *
227  *	Returns 1 when it's handled (should not cause exception).
228  */
229 static int ieee_fpe_handler(struct pt_regs *regs)
230 {
231 	unsigned short insn = *(unsigned short *)regs->pc;
232 	unsigned short finsn;
233 	unsigned long nextpc;
234 	int nib[4] = {
235 		(insn >> 12) & 0xf,
236 		(insn >> 8) & 0xf,
237 		(insn >> 4) & 0xf,
238 		insn & 0xf
239 	};
240 
241 	if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
242 		regs->pr = regs->pc + 4;  /* bsr & jsr */
243 
244 	if (nib[0] == 0xa || nib[0] == 0xb) {
245 		/* bra & bsr */
246 		nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
247 		finsn = *(unsigned short *)(regs->pc + 2);
248 	} else if (nib[0] == 0x8 && nib[1] == 0xd) {
249 		/* bt/s */
250 		if (regs->sr & 1)
251 			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
252 		else
253 			nextpc = regs->pc + 4;
254 		finsn = *(unsigned short *)(regs->pc + 2);
255 	} else if (nib[0] == 0x8 && nib[1] == 0xf) {
256 		/* bf/s */
257 		if (regs->sr & 1)
258 			nextpc = regs->pc + 4;
259 		else
260 			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
261 		finsn = *(unsigned short *)(regs->pc + 2);
262 	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
263 		   (nib[2] == 0x0 || nib[2] == 0x2)) {
264 		/* jmp & jsr */
265 		nextpc = regs->regs[nib[1]];
266 		finsn = *(unsigned short *)(regs->pc + 2);
267 	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
268 		   (nib[2] == 0x0 || nib[2] == 0x2)) {
269 		/* braf & bsrf */
270 		nextpc = regs->pc + 4 + regs->regs[nib[1]];
271 		finsn = *(unsigned short *)(regs->pc + 2);
272 	} else if (insn == 0x000b) {
273 		/* rts */
274 		nextpc = regs->pr;
275 		finsn = *(unsigned short *)(regs->pc + 2);
276 	} else {
277 		nextpc = regs->pc + instruction_size(insn);
278 		finsn = insn;
279 	}
280 
281 	if ((finsn & 0xf1ff) == 0xf0ad) {
282 		/* fcnvsd */
283 		struct task_struct *tsk = current;
284 
285 		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
286 			/* FPU error */
287 			denormal_to_double(&tsk->thread.fpu.hard,
288 					   (finsn >> 8) & 0xf);
289 		else
290 			return 0;
291 
292 		regs->pc = nextpc;
293 		return 1;
294 	} else if ((finsn & 0xf00f) == 0xf002) {
295 		/* fmul */
296 		struct task_struct *tsk = current;
297 		int fpscr;
298 		int n, m, prec;
299 		unsigned int hx, hy;
300 
301 		n = (finsn >> 8) & 0xf;
302 		m = (finsn >> 4) & 0xf;
303 		hx = tsk->thread.fpu.hard.fp_regs[n];
304 		hy = tsk->thread.fpu.hard.fp_regs[m];
305 		fpscr = tsk->thread.fpu.hard.fpscr;
306 		prec = fpscr & FPSCR_DBL_PRECISION;
307 
308 		if ((fpscr & FPSCR_CAUSE_ERROR)
309 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
310 				 || (hy & 0x7fffffff) < 0x00100000))) {
311 			long long llx, lly;
312 
313 			/* FPU error because of denormal (doubles) */
314 			llx = ((long long)hx << 32)
315 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
316 			lly = ((long long)hy << 32)
317 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
318 			llx = float64_mul(llx, lly);
319 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
320 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
321 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
322 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
323 					 || (hy & 0x7fffffff) < 0x00800000))) {
324 			/* FPU error because of denormal (floats) */
325 			hx = float32_mul(hx, hy);
326 			tsk->thread.fpu.hard.fp_regs[n] = hx;
327 		} else
328 			return 0;
329 
330 		regs->pc = nextpc;
331 		return 1;
332 	} else if ((finsn & 0xf00e) == 0xf000) {
333 		/* fadd, fsub */
334 		struct task_struct *tsk = current;
335 		int fpscr;
336 		int n, m, prec;
337 		unsigned int hx, hy;
338 
339 		n = (finsn >> 8) & 0xf;
340 		m = (finsn >> 4) & 0xf;
341 		hx = tsk->thread.fpu.hard.fp_regs[n];
342 		hy = tsk->thread.fpu.hard.fp_regs[m];
343 		fpscr = tsk->thread.fpu.hard.fpscr;
344 		prec = fpscr & FPSCR_DBL_PRECISION;
345 
346 		if ((fpscr & FPSCR_CAUSE_ERROR)
347 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
348 				 || (hy & 0x7fffffff) < 0x00100000))) {
349 			long long llx, lly;
350 
351 			/* FPU error because of denormal (doubles) */
352 			llx = ((long long)hx << 32)
353 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
354 			lly = ((long long)hy << 32)
355 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
356 			if ((finsn & 0xf00f) == 0xf000)
357 				llx = float64_add(llx, lly);
358 			else
359 				llx = float64_sub(llx, lly);
360 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
361 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
362 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
363 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
364 					 || (hy & 0x7fffffff) < 0x00800000))) {
365 			/* FPU error because of denormal (floats) */
366 			if ((finsn & 0xf00f) == 0xf000)
367 				hx = float32_add(hx, hy);
368 			else
369 				hx = float32_sub(hx, hy);
370 			tsk->thread.fpu.hard.fp_regs[n] = hx;
371 		} else
372 			return 0;
373 
374 		regs->pc = nextpc;
375 		return 1;
376 	} else if ((finsn & 0xf003) == 0xf003) {
377 		/* fdiv */
378 		struct task_struct *tsk = current;
379 		int fpscr;
380 		int n, m, prec;
381 		unsigned int hx, hy;
382 
383 		n = (finsn >> 8) & 0xf;
384 		m = (finsn >> 4) & 0xf;
385 		hx = tsk->thread.fpu.hard.fp_regs[n];
386 		hy = tsk->thread.fpu.hard.fp_regs[m];
387 		fpscr = tsk->thread.fpu.hard.fpscr;
388 		prec = fpscr & FPSCR_DBL_PRECISION;
389 
390 		if ((fpscr & FPSCR_CAUSE_ERROR)
391 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
392 				 || (hy & 0x7fffffff) < 0x00100000))) {
393 			long long llx, lly;
394 
395 			/* FPU error because of denormal (doubles) */
396 			llx = ((long long)hx << 32)
397 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
398 			lly = ((long long)hy << 32)
399 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
400 
401 			llx = float64_div(llx, lly);
402 
403 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
404 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
405 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
406 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
407 					 || (hy & 0x7fffffff) < 0x00800000))) {
408 			/* FPU error because of denormal (floats) */
409 			hx = float32_div(hx, hy);
410 			tsk->thread.fpu.hard.fp_regs[n] = hx;
411 		} else
412 			return 0;
413 
414 		regs->pc = nextpc;
415 		return 1;
416 	} else if ((finsn & 0xf0bd) == 0xf0bd) {
417 		/* fcnvds - double to single precision convert */
418 		struct task_struct *tsk = current;
419 		int m;
420 		unsigned int hx;
421 
422 		m = (finsn >> 8) & 0x7;
423 		hx = tsk->thread.fpu.hard.fp_regs[m];
424 
425 		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
426 			&& ((hx & 0x7fffffff) < 0x00100000)) {
427 			/* subnormal double to float conversion */
428 			long long llx;
429 
430 			llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
431 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
432 
433 			tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
434 		} else
435 			return 0;
436 
437 		regs->pc = nextpc;
438 		return 1;
439 	}
440 
441 	return 0;
442 }
443 
444 void float_raise(unsigned int flags)
445 {
446 	fpu_exception_flags |= flags;
447 }
448 
449 int float_rounding_mode(void)
450 {
451 	struct task_struct *tsk = current;
452 	int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
453 	return roundingMode;
454 }
455 
456 BUILD_TRAP_HANDLER(fpu_error)
457 {
458 	struct task_struct *tsk = current;
459 	TRAP_HANDLER_DECL;
460 
461 	__unlazy_fpu(tsk, regs);
462 	fpu_exception_flags = 0;
463 	if (ieee_fpe_handler(regs)) {
464 		tsk->thread.fpu.hard.fpscr &=
465 		    ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
466 		tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
467 		/* Set the FPSCR flag as well as cause bits - simply
468 		 * replicate the cause */
469 		tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
470 		grab_fpu(regs);
471 		restore_fpu(tsk);
472 		task_thread_info(tsk)->status |= TS_USEDFPU;
473 		if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
474 		     (fpu_exception_flags >> 2)) == 0) {
475 			return;
476 		}
477 	}
478 
479 	force_sig(SIGFPE, tsk);
480 }
481 
482 void fpu_state_restore(struct pt_regs *regs)
483 {
484 	struct task_struct *tsk = current;
485 
486 	grab_fpu(regs);
487 	if (unlikely(!user_mode(regs))) {
488 		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
489 		BUG();
490 		return;
491 	}
492 
493 	if (likely(used_math())) {
494 		/* Using the FPU again.  */
495 		restore_fpu(tsk);
496 	} else {
497 		/* First time FPU user.  */
498 		fpu_init();
499 		set_used_math();
500 	}
501 	task_thread_info(tsk)->status |= TS_USEDFPU;
502 	tsk->fpu_counter++;
503 }
504 
505 BUILD_TRAP_HANDLER(fpu_state_restore)
506 {
507 	TRAP_HANDLER_DECL;
508 
509 	fpu_state_restore(regs);
510 }
511