xref: /openbmc/linux/arch/sh/kernel/cpu/sh4/fpu.c (revision 22246614)
1 /*
2  * Save/restore floating point context for signal handlers.
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
10  *
11  * FIXME! These routines have not been tested for big endian case.
12  */
13 #include <linux/sched.h>
14 #include <linux/signal.h>
15 #include <linux/io.h>
16 #include <asm/cpu/fpu.h>
17 #include <asm/processor.h>
18 #include <asm/system.h>
19 #include <asm/fpu.h>
20 
21 /* The PR (precision) bit in the FP Status Register must be clear when
22  * an frchg instruction is executed, otherwise the instruction is undefined.
23  * Executing frchg with PR set causes a trap on some SH4 implementations.
24  */
25 
26 #define FPSCR_RCHG 0x00000000
27 extern unsigned long long float64_div(unsigned long long a,
28 				      unsigned long long b);
29 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
30 extern unsigned long long float64_mul(unsigned long long a,
31 				      unsigned long long b);
32 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
33 extern unsigned long long float64_add(unsigned long long a,
34 				      unsigned long long b);
35 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
36 extern unsigned long long float64_sub(unsigned long long a,
37 				      unsigned long long b);
38 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
39 
40 static unsigned int fpu_exception_flags;
41 
42 /*
43  * Save FPU registers onto task structure.
44  * Assume called with FPU enabled (SR.FD=0).
45  */
46 void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
47 {
48 	unsigned long dummy;
49 
50 	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
51 	enable_fpu();
52 	asm volatile ("sts.l	fpul, @-%0\n\t"
53 		      "sts.l	fpscr, @-%0\n\t"
54 		      "lds	%2, fpscr\n\t"
55 		      "frchg\n\t"
56 		      "fmov.s	fr15, @-%0\n\t"
57 		      "fmov.s	fr14, @-%0\n\t"
58 		      "fmov.s	fr13, @-%0\n\t"
59 		      "fmov.s	fr12, @-%0\n\t"
60 		      "fmov.s	fr11, @-%0\n\t"
61 		      "fmov.s	fr10, @-%0\n\t"
62 		      "fmov.s	fr9, @-%0\n\t"
63 		      "fmov.s	fr8, @-%0\n\t"
64 		      "fmov.s	fr7, @-%0\n\t"
65 		      "fmov.s	fr6, @-%0\n\t"
66 		      "fmov.s	fr5, @-%0\n\t"
67 		      "fmov.s	fr4, @-%0\n\t"
68 		      "fmov.s	fr3, @-%0\n\t"
69 		      "fmov.s	fr2, @-%0\n\t"
70 		      "fmov.s	fr1, @-%0\n\t"
71 		      "fmov.s	fr0, @-%0\n\t"
72 		      "frchg\n\t"
73 		      "fmov.s	fr15, @-%0\n\t"
74 		      "fmov.s	fr14, @-%0\n\t"
75 		      "fmov.s	fr13, @-%0\n\t"
76 		      "fmov.s	fr12, @-%0\n\t"
77 		      "fmov.s	fr11, @-%0\n\t"
78 		      "fmov.s	fr10, @-%0\n\t"
79 		      "fmov.s	fr9, @-%0\n\t"
80 		      "fmov.s	fr8, @-%0\n\t"
81 		      "fmov.s	fr7, @-%0\n\t"
82 		      "fmov.s	fr6, @-%0\n\t"
83 		      "fmov.s	fr5, @-%0\n\t"
84 		      "fmov.s	fr4, @-%0\n\t"
85 		      "fmov.s	fr3, @-%0\n\t"
86 		      "fmov.s	fr2, @-%0\n\t"
87 		      "fmov.s	fr1, @-%0\n\t"
88 		      "fmov.s	fr0, @-%0\n\t"
89 		      "lds	%3, fpscr\n\t":"=r" (dummy)
90 		      :"0"((char *)(&tsk->thread.fpu.hard.status)),
91 		      "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
92 		      :"memory");
93 
94 	disable_fpu();
95 	release_fpu(regs);
96 }
97 
98 static void restore_fpu(struct task_struct *tsk)
99 {
100 	unsigned long dummy;
101 
102 	enable_fpu();
103 	asm volatile ("lds	%2, fpscr\n\t"
104 		      "fmov.s	@%0+, fr0\n\t"
105 		      "fmov.s	@%0+, fr1\n\t"
106 		      "fmov.s	@%0+, fr2\n\t"
107 		      "fmov.s	@%0+, fr3\n\t"
108 		      "fmov.s	@%0+, fr4\n\t"
109 		      "fmov.s	@%0+, fr5\n\t"
110 		      "fmov.s	@%0+, fr6\n\t"
111 		      "fmov.s	@%0+, fr7\n\t"
112 		      "fmov.s	@%0+, fr8\n\t"
113 		      "fmov.s	@%0+, fr9\n\t"
114 		      "fmov.s	@%0+, fr10\n\t"
115 		      "fmov.s	@%0+, fr11\n\t"
116 		      "fmov.s	@%0+, fr12\n\t"
117 		      "fmov.s	@%0+, fr13\n\t"
118 		      "fmov.s	@%0+, fr14\n\t"
119 		      "fmov.s	@%0+, fr15\n\t"
120 		      "frchg\n\t"
121 		      "fmov.s	@%0+, fr0\n\t"
122 		      "fmov.s	@%0+, fr1\n\t"
123 		      "fmov.s	@%0+, fr2\n\t"
124 		      "fmov.s	@%0+, fr3\n\t"
125 		      "fmov.s	@%0+, fr4\n\t"
126 		      "fmov.s	@%0+, fr5\n\t"
127 		      "fmov.s	@%0+, fr6\n\t"
128 		      "fmov.s	@%0+, fr7\n\t"
129 		      "fmov.s	@%0+, fr8\n\t"
130 		      "fmov.s	@%0+, fr9\n\t"
131 		      "fmov.s	@%0+, fr10\n\t"
132 		      "fmov.s	@%0+, fr11\n\t"
133 		      "fmov.s	@%0+, fr12\n\t"
134 		      "fmov.s	@%0+, fr13\n\t"
135 		      "fmov.s	@%0+, fr14\n\t"
136 		      "fmov.s	@%0+, fr15\n\t"
137 		      "frchg\n\t"
138 		      "lds.l	@%0+, fpscr\n\t"
139 		      "lds.l	@%0+, fpul\n\t"
140 		      :"=r" (dummy)
141 		      :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
142 		      :"memory");
143 	disable_fpu();
144 }
145 
146 /*
147  * Load the FPU with signalling NANS.  This bit pattern we're using
148  * has the property that no matter wether considered as single or as
149  * double precision represents signaling NANS.
150  */
151 
152 static void fpu_init(void)
153 {
154 	enable_fpu();
155 	asm volatile (	"lds	%0, fpul\n\t"
156 			"lds	%1, fpscr\n\t"
157 			"fsts	fpul, fr0\n\t"
158 			"fsts	fpul, fr1\n\t"
159 			"fsts	fpul, fr2\n\t"
160 			"fsts	fpul, fr3\n\t"
161 			"fsts	fpul, fr4\n\t"
162 			"fsts	fpul, fr5\n\t"
163 			"fsts	fpul, fr6\n\t"
164 			"fsts	fpul, fr7\n\t"
165 			"fsts	fpul, fr8\n\t"
166 			"fsts	fpul, fr9\n\t"
167 			"fsts	fpul, fr10\n\t"
168 			"fsts	fpul, fr11\n\t"
169 			"fsts	fpul, fr12\n\t"
170 			"fsts	fpul, fr13\n\t"
171 			"fsts	fpul, fr14\n\t"
172 			"fsts	fpul, fr15\n\t"
173 			"frchg\n\t"
174 			"fsts	fpul, fr0\n\t"
175 			"fsts	fpul, fr1\n\t"
176 			"fsts	fpul, fr2\n\t"
177 			"fsts	fpul, fr3\n\t"
178 			"fsts	fpul, fr4\n\t"
179 			"fsts	fpul, fr5\n\t"
180 			"fsts	fpul, fr6\n\t"
181 			"fsts	fpul, fr7\n\t"
182 			"fsts	fpul, fr8\n\t"
183 			"fsts	fpul, fr9\n\t"
184 			"fsts	fpul, fr10\n\t"
185 			"fsts	fpul, fr11\n\t"
186 			"fsts	fpul, fr12\n\t"
187 			"fsts	fpul, fr13\n\t"
188 			"fsts	fpul, fr14\n\t"
189 			"fsts	fpul, fr15\n\t"
190 			"frchg\n\t"
191 			"lds	%2, fpscr\n\t"
192 			:	/* no output */
193 			:"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
194 	disable_fpu();
195 }
196 
197 /**
198  *      denormal_to_double - Given denormalized float number,
199  *                           store double float
200  *
201  *      @fpu: Pointer to sh_fpu_hard structure
202  *      @n: Index to FP register
203  */
204 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
205 {
206 	unsigned long du, dl;
207 	unsigned long x = fpu->fpul;
208 	int exp = 1023 - 126;
209 
210 	if (x != 0 && (x & 0x7f800000) == 0) {
211 		du = (x & 0x80000000);
212 		while ((x & 0x00800000) == 0) {
213 			x <<= 1;
214 			exp--;
215 		}
216 		x &= 0x007fffff;
217 		du |= (exp << 20) | (x >> 3);
218 		dl = x << 29;
219 
220 		fpu->fp_regs[n] = du;
221 		fpu->fp_regs[n + 1] = dl;
222 	}
223 }
224 
225 /**
226  *	ieee_fpe_handler - Handle denormalized number exception
227  *
228  *	@regs: Pointer to register structure
229  *
230  *	Returns 1 when it's handled (should not cause exception).
231  */
232 static int ieee_fpe_handler(struct pt_regs *regs)
233 {
234 	unsigned short insn = *(unsigned short *)regs->pc;
235 	unsigned short finsn;
236 	unsigned long nextpc;
237 	int nib[4] = {
238 		(insn >> 12) & 0xf,
239 		(insn >> 8) & 0xf,
240 		(insn >> 4) & 0xf,
241 		insn & 0xf
242 	};
243 
244 	if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
245 		regs->pr = regs->pc + 4;  /* bsr & jsr */
246 
247 	if (nib[0] == 0xa || nib[0] == 0xb) {
248 		/* bra & bsr */
249 		nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
250 		finsn = *(unsigned short *)(regs->pc + 2);
251 	} else if (nib[0] == 0x8 && nib[1] == 0xd) {
252 		/* bt/s */
253 		if (regs->sr & 1)
254 			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
255 		else
256 			nextpc = regs->pc + 4;
257 		finsn = *(unsigned short *)(regs->pc + 2);
258 	} else if (nib[0] == 0x8 && nib[1] == 0xf) {
259 		/* bf/s */
260 		if (regs->sr & 1)
261 			nextpc = regs->pc + 4;
262 		else
263 			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
264 		finsn = *(unsigned short *)(regs->pc + 2);
265 	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
266 		   (nib[2] == 0x0 || nib[2] == 0x2)) {
267 		/* jmp & jsr */
268 		nextpc = regs->regs[nib[1]];
269 		finsn = *(unsigned short *)(regs->pc + 2);
270 	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
271 		   (nib[2] == 0x0 || nib[2] == 0x2)) {
272 		/* braf & bsrf */
273 		nextpc = regs->pc + 4 + regs->regs[nib[1]];
274 		finsn = *(unsigned short *)(regs->pc + 2);
275 	} else if (insn == 0x000b) {
276 		/* rts */
277 		nextpc = regs->pr;
278 		finsn = *(unsigned short *)(regs->pc + 2);
279 	} else {
280 		nextpc = regs->pc + instruction_size(insn);
281 		finsn = insn;
282 	}
283 
284 	if ((finsn & 0xf1ff) == 0xf0ad) {
285 		/* fcnvsd */
286 		struct task_struct *tsk = current;
287 
288 		save_fpu(tsk, regs);
289 		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
290 			/* FPU error */
291 			denormal_to_double(&tsk->thread.fpu.hard,
292 					   (finsn >> 8) & 0xf);
293 		else
294 			return 0;
295 
296 		regs->pc = nextpc;
297 		return 1;
298 	} else if ((finsn & 0xf00f) == 0xf002) {
299 		/* fmul */
300 		struct task_struct *tsk = current;
301 		int fpscr;
302 		int n, m, prec;
303 		unsigned int hx, hy;
304 
305 		n = (finsn >> 8) & 0xf;
306 		m = (finsn >> 4) & 0xf;
307 		hx = tsk->thread.fpu.hard.fp_regs[n];
308 		hy = tsk->thread.fpu.hard.fp_regs[m];
309 		fpscr = tsk->thread.fpu.hard.fpscr;
310 		prec = fpscr & FPSCR_DBL_PRECISION;
311 
312 		if ((fpscr & FPSCR_CAUSE_ERROR)
313 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
314 				 || (hy & 0x7fffffff) < 0x00100000))) {
315 			long long llx, lly;
316 
317 			/* FPU error because of denormal (doubles) */
318 			llx = ((long long)hx << 32)
319 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
320 			lly = ((long long)hy << 32)
321 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
322 			llx = float64_mul(llx, lly);
323 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
324 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
325 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
326 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
327 					 || (hy & 0x7fffffff) < 0x00800000))) {
328 			/* FPU error because of denormal (floats) */
329 			hx = float32_mul(hx, hy);
330 			tsk->thread.fpu.hard.fp_regs[n] = hx;
331 		} else
332 			return 0;
333 
334 		regs->pc = nextpc;
335 		return 1;
336 	} else if ((finsn & 0xf00e) == 0xf000) {
337 		/* fadd, fsub */
338 		struct task_struct *tsk = current;
339 		int fpscr;
340 		int n, m, prec;
341 		unsigned int hx, hy;
342 
343 		n = (finsn >> 8) & 0xf;
344 		m = (finsn >> 4) & 0xf;
345 		hx = tsk->thread.fpu.hard.fp_regs[n];
346 		hy = tsk->thread.fpu.hard.fp_regs[m];
347 		fpscr = tsk->thread.fpu.hard.fpscr;
348 		prec = fpscr & FPSCR_DBL_PRECISION;
349 
350 		if ((fpscr & FPSCR_CAUSE_ERROR)
351 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
352 				 || (hy & 0x7fffffff) < 0x00100000))) {
353 			long long llx, lly;
354 
355 			/* FPU error because of denormal (doubles) */
356 			llx = ((long long)hx << 32)
357 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
358 			lly = ((long long)hy << 32)
359 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
360 			if ((finsn & 0xf00f) == 0xf000)
361 				llx = float64_add(llx, lly);
362 			else
363 				llx = float64_sub(llx, lly);
364 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
365 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
366 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
367 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
368 					 || (hy & 0x7fffffff) < 0x00800000))) {
369 			/* FPU error because of denormal (floats) */
370 			if ((finsn & 0xf00f) == 0xf000)
371 				hx = float32_add(hx, hy);
372 			else
373 				hx = float32_sub(hx, hy);
374 			tsk->thread.fpu.hard.fp_regs[n] = hx;
375 		} else
376 			return 0;
377 
378 		regs->pc = nextpc;
379 		return 1;
380 	} else if ((finsn & 0xf003) == 0xf003) {
381 		/* fdiv */
382 		struct task_struct *tsk = current;
383 		int fpscr;
384 		int n, m, prec;
385 		unsigned int hx, hy;
386 
387 		n = (finsn >> 8) & 0xf;
388 		m = (finsn >> 4) & 0xf;
389 		hx = tsk->thread.fpu.hard.fp_regs[n];
390 		hy = tsk->thread.fpu.hard.fp_regs[m];
391 		fpscr = tsk->thread.fpu.hard.fpscr;
392 		prec = fpscr & FPSCR_DBL_PRECISION;
393 
394 		if ((fpscr & FPSCR_CAUSE_ERROR)
395 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
396 				 || (hy & 0x7fffffff) < 0x00100000))) {
397 			long long llx, lly;
398 
399 			/* FPU error because of denormal (doubles) */
400 			llx = ((long long)hx << 32)
401 			    | tsk->thread.fpu.hard.fp_regs[n + 1];
402 			lly = ((long long)hy << 32)
403 			    | tsk->thread.fpu.hard.fp_regs[m + 1];
404 
405 			llx = float64_div(llx, lly);
406 
407 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
408 			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
409 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
410 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
411 					 || (hy & 0x7fffffff) < 0x00800000))) {
412 			/* FPU error because of denormal (floats) */
413 			hx = float32_div(hx, hy);
414 			tsk->thread.fpu.hard.fp_regs[n] = hx;
415 		} else
416 			return 0;
417 
418 		regs->pc = nextpc;
419 		return 1;
420 	}
421 
422 	return 0;
423 }
424 
425 void float_raise(unsigned int flags)
426 {
427 	fpu_exception_flags |= flags;
428 }
429 
430 int float_rounding_mode(void)
431 {
432 	struct task_struct *tsk = current;
433 	int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
434 	return roundingMode;
435 }
436 
437 BUILD_TRAP_HANDLER(fpu_error)
438 {
439 	struct task_struct *tsk = current;
440 	TRAP_HANDLER_DECL;
441 
442 	save_fpu(tsk, regs);
443 	fpu_exception_flags = 0;
444 	if (ieee_fpe_handler(regs)) {
445 		tsk->thread.fpu.hard.fpscr &=
446 		    ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
447 		tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
448 		/* Set the FPSCR flag as well as cause bits - simply
449 		 * replicate the cause */
450 		tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
451 		grab_fpu(regs);
452 		restore_fpu(tsk);
453 		set_tsk_thread_flag(tsk, TIF_USEDFPU);
454 		if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
455 		     (fpu_exception_flags >> 2)) == 0) {
456 			return;
457 		}
458 	}
459 
460 	force_sig(SIGFPE, tsk);
461 }
462 
463 BUILD_TRAP_HANDLER(fpu_state_restore)
464 {
465 	struct task_struct *tsk = current;
466 	TRAP_HANDLER_DECL;
467 
468 	grab_fpu(regs);
469 	if (!user_mode(regs)) {
470 		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
471 		return;
472 	}
473 
474 	if (used_math()) {
475 		/* Using the FPU again.  */
476 		restore_fpu(tsk);
477 	} else {
478 		/* First time FPU user.  */
479 		fpu_init();
480 		set_used_math();
481 	}
482 	set_tsk_thread_flag(tsk, TIF_USEDFPU);
483 }
484