xref: /openbmc/linux/arch/sh/kernel/cpu/sh4/fpu.c (revision 643d1f7f)
1 /*
2  * Save/restore floating point context for signal handlers.
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
10  *
11  * FIXME! These routines have not been tested for big endian case.
12  */
13 #include <linux/sched.h>
14 #include <linux/signal.h>
15 #include <linux/io.h>
16 #include <asm/cpu/fpu.h>
17 #include <asm/processor.h>
18 #include <asm/system.h>
19 
20 /* The PR (precision) bit in the FP Status Register must be clear when
21  * an frchg instruction is executed, otherwise the instruction is undefined.
22  * Executing frchg with PR set causes a trap on some SH4 implementations.
23  */
24 
25 #define FPSCR_RCHG 0x00000000
26 extern unsigned long long float64_div(unsigned long long a,
27 				      unsigned long long b);
28 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
29 extern unsigned long long float64_mul(unsigned long long a,
30 				      unsigned long long b);
31 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
32 extern unsigned long long float64_add(unsigned long long a,
33 				      unsigned long long b);
34 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
35 extern unsigned long long float64_sub(unsigned long long a,
36 				      unsigned long long b);
37 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
38 
39 static unsigned int fpu_exception_flags;
40 
41 /*
42  * Save FPU registers onto task structure.
43  * Assume called with FPU enabled (SR.FD=0).
44  */
45 void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
46 {
47 	unsigned long dummy;
48 
49 	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
50 	enable_fpu();
51 	asm volatile ("sts.l	fpul, @-%0\n\t"
52 		      "sts.l	fpscr, @-%0\n\t"
53 		      "lds	%2, fpscr\n\t"
54 		      "frchg\n\t"
55 		      "fmov.s	fr15, @-%0\n\t"
56 		      "fmov.s	fr14, @-%0\n\t"
57 		      "fmov.s	fr13, @-%0\n\t"
58 		      "fmov.s	fr12, @-%0\n\t"
59 		      "fmov.s	fr11, @-%0\n\t"
60 		      "fmov.s	fr10, @-%0\n\t"
61 		      "fmov.s	fr9, @-%0\n\t"
62 		      "fmov.s	fr8, @-%0\n\t"
63 		      "fmov.s	fr7, @-%0\n\t"
64 		      "fmov.s	fr6, @-%0\n\t"
65 		      "fmov.s	fr5, @-%0\n\t"
66 		      "fmov.s	fr4, @-%0\n\t"
67 		      "fmov.s	fr3, @-%0\n\t"
68 		      "fmov.s	fr2, @-%0\n\t"
69 		      "fmov.s	fr1, @-%0\n\t"
70 		      "fmov.s	fr0, @-%0\n\t"
71 		      "frchg\n\t"
72 		      "fmov.s	fr15, @-%0\n\t"
73 		      "fmov.s	fr14, @-%0\n\t"
74 		      "fmov.s	fr13, @-%0\n\t"
75 		      "fmov.s	fr12, @-%0\n\t"
76 		      "fmov.s	fr11, @-%0\n\t"
77 		      "fmov.s	fr10, @-%0\n\t"
78 		      "fmov.s	fr9, @-%0\n\t"
79 		      "fmov.s	fr8, @-%0\n\t"
80 		      "fmov.s	fr7, @-%0\n\t"
81 		      "fmov.s	fr6, @-%0\n\t"
82 		      "fmov.s	fr5, @-%0\n\t"
83 		      "fmov.s	fr4, @-%0\n\t"
84 		      "fmov.s	fr3, @-%0\n\t"
85 		      "fmov.s	fr2, @-%0\n\t"
86 		      "fmov.s	fr1, @-%0\n\t"
87 		      "fmov.s	fr0, @-%0\n\t"
88 		      "lds	%3, fpscr\n\t":"=r" (dummy)
89 		      :"0"((char *)(&tsk->thread.fpu.hard.status)),
90 		      "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
91 		      :"memory");
92 
93 	disable_fpu();
94 	release_fpu(regs);
95 }
96 
97 static void restore_fpu(struct task_struct *tsk)
98 {
99 	unsigned long dummy;
100 
101 	enable_fpu();
102 	asm volatile ("lds	%2, fpscr\n\t"
103 		      "fmov.s	@%0+, fr0\n\t"
104 		      "fmov.s	@%0+, fr1\n\t"
105 		      "fmov.s	@%0+, fr2\n\t"
106 		      "fmov.s	@%0+, fr3\n\t"
107 		      "fmov.s	@%0+, fr4\n\t"
108 		      "fmov.s	@%0+, fr5\n\t"
109 		      "fmov.s	@%0+, fr6\n\t"
110 		      "fmov.s	@%0+, fr7\n\t"
111 		      "fmov.s	@%0+, fr8\n\t"
112 		      "fmov.s	@%0+, fr9\n\t"
113 		      "fmov.s	@%0+, fr10\n\t"
114 		      "fmov.s	@%0+, fr11\n\t"
115 		      "fmov.s	@%0+, fr12\n\t"
116 		      "fmov.s	@%0+, fr13\n\t"
117 		      "fmov.s	@%0+, fr14\n\t"
118 		      "fmov.s	@%0+, fr15\n\t"
119 		      "frchg\n\t"
120 		      "fmov.s	@%0+, fr0\n\t"
121 		      "fmov.s	@%0+, fr1\n\t"
122 		      "fmov.s	@%0+, fr2\n\t"
123 		      "fmov.s	@%0+, fr3\n\t"
124 		      "fmov.s	@%0+, fr4\n\t"
125 		      "fmov.s	@%0+, fr5\n\t"
126 		      "fmov.s	@%0+, fr6\n\t"
127 		      "fmov.s	@%0+, fr7\n\t"
128 		      "fmov.s	@%0+, fr8\n\t"
129 		      "fmov.s	@%0+, fr9\n\t"
130 		      "fmov.s	@%0+, fr10\n\t"
131 		      "fmov.s	@%0+, fr11\n\t"
132 		      "fmov.s	@%0+, fr12\n\t"
133 		      "fmov.s	@%0+, fr13\n\t"
134 		      "fmov.s	@%0+, fr14\n\t"
135 		      "fmov.s	@%0+, fr15\n\t"
136 		      "frchg\n\t"
137 		      "lds.l	@%0+, fpscr\n\t"
138 		      "lds.l	@%0+, fpul\n\t"
139 		      :"=r" (dummy)
140 		      :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
141 		      :"memory");
142 	disable_fpu();
143 }
144 
145 /*
146  * Load the FPU with signalling NANS.  This bit pattern we're using
147  * has the property that no matter wether considered as single or as
148  * double precision represents signaling NANS.
149  */
150 
151 static void fpu_init(void)
152 {
153 	enable_fpu();
154 	asm volatile (	"lds	%0, fpul\n\t"
155 			"lds	%1, fpscr\n\t"
156 			"fsts	fpul, fr0\n\t"
157 			"fsts	fpul, fr1\n\t"
158 			"fsts	fpul, fr2\n\t"
159 			"fsts	fpul, fr3\n\t"
160 			"fsts	fpul, fr4\n\t"
161 			"fsts	fpul, fr5\n\t"
162 			"fsts	fpul, fr6\n\t"
163 			"fsts	fpul, fr7\n\t"
164 			"fsts	fpul, fr8\n\t"
165 			"fsts	fpul, fr9\n\t"
166 			"fsts	fpul, fr10\n\t"
167 			"fsts	fpul, fr11\n\t"
168 			"fsts	fpul, fr12\n\t"
169 			"fsts	fpul, fr13\n\t"
170 			"fsts	fpul, fr14\n\t"
171 			"fsts	fpul, fr15\n\t"
172 			"frchg\n\t"
173 			"fsts	fpul, fr0\n\t"
174 			"fsts	fpul, fr1\n\t"
175 			"fsts	fpul, fr2\n\t"
176 			"fsts	fpul, fr3\n\t"
177 			"fsts	fpul, fr4\n\t"
178 			"fsts	fpul, fr5\n\t"
179 			"fsts	fpul, fr6\n\t"
180 			"fsts	fpul, fr7\n\t"
181 			"fsts	fpul, fr8\n\t"
182 			"fsts	fpul, fr9\n\t"
183 			"fsts	fpul, fr10\n\t"
184 			"fsts	fpul, fr11\n\t"
185 			"fsts	fpul, fr12\n\t"
186 			"fsts	fpul, fr13\n\t"
187 			"fsts	fpul, fr14\n\t"
188 			"fsts	fpul, fr15\n\t"
189 			"frchg\n\t"
190 			"lds	%2, fpscr\n\t"
191 			:	/* no output */
192 			:"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
193 	disable_fpu();
194 }
195 
196 /**
197  *      denormal_to_double - Given denormalized float number,
198  *                           store double float
199  *
200  *      @fpu: Pointer to sh_fpu_hard structure
201  *      @n: Index to FP register
202  */
203 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
204 {
205 	unsigned long du, dl;
206 	unsigned long x = fpu->fpul;
207 	int exp = 1023 - 126;
208 
209 	if (x != 0 && (x & 0x7f800000) == 0) {
210 		du = (x & 0x80000000);
211 		while ((x & 0x00800000) == 0) {
212 			x <<= 1;
213 			exp--;
214 		}
215 		x &= 0x007fffff;
216 		du |= (exp << 20) | (x >> 3);
217 		dl = x << 29;
218 
219 		fpu->fp_regs[n] = du;
220 		fpu->fp_regs[n + 1] = dl;
221 	}
222 }
223 
224 /**
225  *	ieee_fpe_handler - Handle denormalized number exception
226  *
227  *	@regs: Pointer to register structure
228  *
229  *	Returns 1 when it's handled (should not cause exception).
230  */
231 static int ieee_fpe_handler(struct pt_regs *regs)
232 {
233 	unsigned short insn = *(unsigned short *)regs->pc;
234 	unsigned short finsn;
235 	unsigned long nextpc;
236 	int nib[4] = {
237 		(insn >> 12) & 0xf,
238 		(insn >> 8) & 0xf,
239 		(insn >> 4) & 0xf,
240 		insn & 0xf
241 	};
242 
243 	if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
244 		regs->pr = regs->pc + 4;  /* bsr & jsr */
245 
246 	if (nib[0] == 0xa || nib[0] == 0xb) {
247 		/* bra & bsr */
248 		nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
249 		finsn = *(unsigned short *)(regs->pc + 2);
250 	} else if (nib[0] == 0x8 && nib[1] == 0xd) {
251 		/* bt/s */
252 		if (regs->sr & 1)
253 			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
254 		else
255 			nextpc = regs->pc + 4;
256 		finsn = *(unsigned short *)(regs->pc + 2);
257 	} else if (nib[0] == 0x8 && nib[1] == 0xf) {
258 		/* bf/s */
259 		if (regs->sr & 1)
260 			nextpc = regs->pc + 4;
261 		else
262 			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
263 		finsn = *(unsigned short *)(regs->pc + 2);
264 	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
265 		   (nib[2] == 0x0 || nib[2] == 0x2)) {
266 		/* jmp & jsr */
267 		nextpc = regs->regs[nib[1]];
268 		finsn = *(unsigned short *)(regs->pc + 2);
269 	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
270 		   (nib[2] == 0x0 || nib[2] == 0x2)) {
271 		/* braf & bsrf */
272 		nextpc = regs->pc + 4 + regs->regs[nib[1]];
273 		finsn = *(unsigned short *)(regs->pc + 2);
274 	} else if (insn == 0x000b) {
275 		/* rts */
276 		nextpc = regs->pr;
277 		finsn = *(unsigned short *)(regs->pc + 2);
278 	} else {
279 		nextpc = regs->pc + instruction_size(insn);
280 		finsn = insn;
281 	}
282 
283 	if ((finsn & 0xf1ff) == 0xf0ad) {
284 		/* fcnvsd */
285 		struct task_struct *tsk = current;
286 
287 		save_fpu(tsk, regs);
288 		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
289 			/* FPU error */
290 			denormal_to_double(&tsk->thread.fpu.hard,
291 					   (finsn >> 8) & 0xf);
292 		else
293 			return 0;
294 
295 		regs->pc = nextpc;
296 		return 1;
297 	} else if ((finsn & 0xf00f) == 0xf002) {
298 		/* fmul */
299 		struct task_struct *tsk = current;
300 		int fpscr;
301 		int n, m, prec;
302 		unsigned int hx, hy;
303 
304 		n = (finsn >> 8) & 0xf;
305 		m = (finsn >> 4) & 0xf;
306 		hx = tsk->thread.fpu.hard.fp_regs[n];
307 		hy = tsk->thread.fpu.hard.fp_regs[m];
308 		fpscr = tsk->thread.fpu.hard.fpscr;
309 		prec = fpscr & FPSCR_DBL_PRECISION;
310 
311 		if ((fpscr & FPSCR_CAUSE_ERROR)
312 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
313 				 || (hy & 0x7fffffff) < 0x00100000))) {
314 			long long llx, lly;
315 
316 			/* FPU error because of denormal (doubles) */
317 			llx = ((long long)hx << 32)
318 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
319 			lly = ((long long)hy << 32)
320 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
321 			llx = float64_mul(llx, lly);
322 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
323 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
324 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
325 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
326 					 || (hy & 0x7fffffff) < 0x00800000))) {
327 			/* FPU error because of denormal (floats) */
328 			hx = float32_mul(hx, hy);
329 			tsk->thread.fpu.hard.fp_regs[n] = hx;
330 		} else
331 			return 0;
332 
333 		regs->pc = nextpc;
334 		return 1;
335 	} else if ((finsn & 0xf00e) == 0xf000) {
336 		/* fadd, fsub */
337 		struct task_struct *tsk = current;
338 		int fpscr;
339 		int n, m, prec;
340 		unsigned int hx, hy;
341 
342 		n = (finsn >> 8) & 0xf;
343 		m = (finsn >> 4) & 0xf;
344 		hx = tsk->thread.fpu.hard.fp_regs[n];
345 		hy = tsk->thread.fpu.hard.fp_regs[m];
346 		fpscr = tsk->thread.fpu.hard.fpscr;
347 		prec = fpscr & FPSCR_DBL_PRECISION;
348 
349 		if ((fpscr & FPSCR_CAUSE_ERROR)
350 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
351 				 || (hy & 0x7fffffff) < 0x00100000))) {
352 			long long llx, lly;
353 
354 			/* FPU error because of denormal (doubles) */
355 			llx = ((long long)hx << 32)
356 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
357 			lly = ((long long)hy << 32)
358 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
359 			if ((finsn & 0xf00f) == 0xf000)
360 				llx = float64_add(llx, lly);
361 			else
362 				llx = float64_sub(llx, lly);
363 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
364 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
365 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
366 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
367 					 || (hy & 0x7fffffff) < 0x00800000))) {
368 			/* FPU error because of denormal (floats) */
369 			if ((finsn & 0xf00f) == 0xf000)
370 				hx = float32_add(hx, hy);
371 			else
372 				hx = float32_sub(hx, hy);
373 			tsk->thread.fpu.hard.fp_regs[n] = hx;
374 		} else
375 			return 0;
376 
377 		regs->pc = nextpc;
378 		return 1;
379 	} else if ((finsn & 0xf003) == 0xf003) {
380 		/* fdiv */
381 		struct task_struct *tsk = current;
382 		int fpscr;
383 		int n, m, prec;
384 		unsigned int hx, hy;
385 
386 		n = (finsn >> 8) & 0xf;
387 		m = (finsn >> 4) & 0xf;
388 		hx = tsk->thread.fpu.hard.fp_regs[n];
389 		hy = tsk->thread.fpu.hard.fp_regs[m];
390 		fpscr = tsk->thread.fpu.hard.fpscr;
391 		prec = fpscr & FPSCR_DBL_PRECISION;
392 
393 		if ((fpscr & FPSCR_CAUSE_ERROR)
394 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
395 				 || (hy & 0x7fffffff) < 0x00100000))) {
396 			long long llx, lly;
397 
398 			/* FPU error because of denormal (doubles) */
399 			llx = ((long long)hx << 32)
400 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
401 			lly = ((long long)hy << 32)
402 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
403 
404 			llx = float64_div(llx, lly);
405 
406 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
407 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
408 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
409 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
410 					 || (hy & 0x7fffffff) < 0x00800000))) {
411 			/* FPU error because of denormal (floats) */
412 			hx = float32_div(hx, hy);
413 			tsk->thread.fpu.hard.fp_regs[n] = hx;
414 		} else
415 			return 0;
416 
417 		regs->pc = nextpc;
418 		return 1;
419 	}
420 
421 	return 0;
422 }
423 
424 void float_raise(unsigned int flags)
425 {
426 	fpu_exception_flags |= flags;
427 }
428 
429 int float_rounding_mode(void)
430 {
431 	struct task_struct *tsk = current;
432 	int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
433 	return roundingMode;
434 }
435 
436 BUILD_TRAP_HANDLER(fpu_error)
437 {
438 	struct task_struct *tsk = current;
439 	TRAP_HANDLER_DECL;
440 
441 	save_fpu(tsk, regs);
442 	fpu_exception_flags = 0;
443 	if (ieee_fpe_handler(regs)) {
444 		tsk->thread.fpu.hard.fpscr &=
445 		    ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
446 		tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
447 		/* Set the FPSCR flag as well as cause bits - simply
448 		 * replicate the cause */
449 		tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
450 		grab_fpu(regs);
451 		restore_fpu(tsk);
452 		set_tsk_thread_flag(tsk, TIF_USEDFPU);
453 		if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
454 		     (fpu_exception_flags >> 2)) == 0) {
455 			return;
456 		}
457 	}
458 
459 	force_sig(SIGFPE, tsk);
460 }
461 
462 BUILD_TRAP_HANDLER(fpu_state_restore)
463 {
464 	struct task_struct *tsk = current;
465 	TRAP_HANDLER_DECL;
466 
467 	grab_fpu(regs);
468 	if (!user_mode(regs)) {
469 		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
470 		return;
471 	}
472 
473 	if (used_math()) {
474 		/* Using the FPU again.  */
475 		restore_fpu(tsk);
476 	} else {
477 		/* First time FPU user.  */
478 		fpu_init();
479 		set_used_math();
480 	}
481 	set_tsk_thread_flag(tsk, TIF_USEDFPU);
482 }
483