xref: /openbmc/linux/arch/sh/kernel/cpu/sh4/fpu.c (revision f3539c12)
1 /*
2  * Save/restore floating point context for signal handlers.
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
10  *
11  * FIXME! These routines have not been tested for big endian case.
12  */
13 #include <linux/sched.h>
14 #include <linux/signal.h>
15 #include <linux/io.h>
16 #include <cpu/fpu.h>
17 #include <asm/processor.h>
18 #include <asm/fpu.h>
19 #include <asm/traps.h>
20 
21 /* The PR (precision) bit in the FP Status Register must be clear when
22  * an frchg instruction is executed, otherwise the instruction is undefined.
23  * Executing frchg with PR set causes a trap on some SH4 implementations.
24  */
25 
26 #define FPSCR_RCHG 0x00000000
27 extern unsigned long long float64_div(unsigned long long a,
28 				      unsigned long long b);
29 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
30 extern unsigned long long float64_mul(unsigned long long a,
31 				      unsigned long long b);
32 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
33 extern unsigned long long float64_add(unsigned long long a,
34 				      unsigned long long b);
35 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
36 extern unsigned long long float64_sub(unsigned long long a,
37 				      unsigned long long b);
38 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
39 extern unsigned long int float64_to_float32(unsigned long long a);
40 static unsigned int fpu_exception_flags;
41 
42 /*
43  * Save FPU registers onto task structure.
44  */
45 void save_fpu(struct task_struct *tsk)
46 {
47 	unsigned long dummy;
48 
49 	enable_fpu();
50 	asm volatile ("sts.l	fpul, @-%0\n\t"
51 		      "sts.l	fpscr, @-%0\n\t"
52 		      "lds	%2, fpscr\n\t"
53 		      "frchg\n\t"
54 		      "fmov.s	fr15, @-%0\n\t"
55 		      "fmov.s	fr14, @-%0\n\t"
56 		      "fmov.s	fr13, @-%0\n\t"
57 		      "fmov.s	fr12, @-%0\n\t"
58 		      "fmov.s	fr11, @-%0\n\t"
59 		      "fmov.s	fr10, @-%0\n\t"
60 		      "fmov.s	fr9, @-%0\n\t"
61 		      "fmov.s	fr8, @-%0\n\t"
62 		      "fmov.s	fr7, @-%0\n\t"
63 		      "fmov.s	fr6, @-%0\n\t"
64 		      "fmov.s	fr5, @-%0\n\t"
65 		      "fmov.s	fr4, @-%0\n\t"
66 		      "fmov.s	fr3, @-%0\n\t"
67 		      "fmov.s	fr2, @-%0\n\t"
68 		      "fmov.s	fr1, @-%0\n\t"
69 		      "fmov.s	fr0, @-%0\n\t"
70 		      "frchg\n\t"
71 		      "fmov.s	fr15, @-%0\n\t"
72 		      "fmov.s	fr14, @-%0\n\t"
73 		      "fmov.s	fr13, @-%0\n\t"
74 		      "fmov.s	fr12, @-%0\n\t"
75 		      "fmov.s	fr11, @-%0\n\t"
76 		      "fmov.s	fr10, @-%0\n\t"
77 		      "fmov.s	fr9, @-%0\n\t"
78 		      "fmov.s	fr8, @-%0\n\t"
79 		      "fmov.s	fr7, @-%0\n\t"
80 		      "fmov.s	fr6, @-%0\n\t"
81 		      "fmov.s	fr5, @-%0\n\t"
82 		      "fmov.s	fr4, @-%0\n\t"
83 		      "fmov.s	fr3, @-%0\n\t"
84 		      "fmov.s	fr2, @-%0\n\t"
85 		      "fmov.s	fr1, @-%0\n\t"
86 		      "fmov.s	fr0, @-%0\n\t"
87 		      "lds	%3, fpscr\n\t":"=r" (dummy)
88 		      :"0"((char *)(&tsk->thread.xstate->hardfpu.status)),
89 		      "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
90 		      :"memory");
91 
92 	disable_fpu();
93 }
94 
95 void restore_fpu(struct task_struct *tsk)
96 {
97 	unsigned long dummy;
98 
99 	enable_fpu();
100 	asm volatile ("lds	%2, fpscr\n\t"
101 		      "fmov.s	@%0+, fr0\n\t"
102 		      "fmov.s	@%0+, fr1\n\t"
103 		      "fmov.s	@%0+, fr2\n\t"
104 		      "fmov.s	@%0+, fr3\n\t"
105 		      "fmov.s	@%0+, fr4\n\t"
106 		      "fmov.s	@%0+, fr5\n\t"
107 		      "fmov.s	@%0+, fr6\n\t"
108 		      "fmov.s	@%0+, fr7\n\t"
109 		      "fmov.s	@%0+, fr8\n\t"
110 		      "fmov.s	@%0+, fr9\n\t"
111 		      "fmov.s	@%0+, fr10\n\t"
112 		      "fmov.s	@%0+, fr11\n\t"
113 		      "fmov.s	@%0+, fr12\n\t"
114 		      "fmov.s	@%0+, fr13\n\t"
115 		      "fmov.s	@%0+, fr14\n\t"
116 		      "fmov.s	@%0+, fr15\n\t"
117 		      "frchg\n\t"
118 		      "fmov.s	@%0+, fr0\n\t"
119 		      "fmov.s	@%0+, fr1\n\t"
120 		      "fmov.s	@%0+, fr2\n\t"
121 		      "fmov.s	@%0+, fr3\n\t"
122 		      "fmov.s	@%0+, fr4\n\t"
123 		      "fmov.s	@%0+, fr5\n\t"
124 		      "fmov.s	@%0+, fr6\n\t"
125 		      "fmov.s	@%0+, fr7\n\t"
126 		      "fmov.s	@%0+, fr8\n\t"
127 		      "fmov.s	@%0+, fr9\n\t"
128 		      "fmov.s	@%0+, fr10\n\t"
129 		      "fmov.s	@%0+, fr11\n\t"
130 		      "fmov.s	@%0+, fr12\n\t"
131 		      "fmov.s	@%0+, fr13\n\t"
132 		      "fmov.s	@%0+, fr14\n\t"
133 		      "fmov.s	@%0+, fr15\n\t"
134 		      "frchg\n\t"
135 		      "lds.l	@%0+, fpscr\n\t"
136 		      "lds.l	@%0+, fpul\n\t"
137 		      :"=r" (dummy)
138 		      :"0" (tsk->thread.xstate), "r" (FPSCR_RCHG)
139 		      :"memory");
140 	disable_fpu();
141 }
142 
143 /**
144  *      denormal_to_double - Given denormalized float number,
145  *                           store double float
146  *
147  *      @fpu: Pointer to sh_fpu_hard structure
148  *      @n: Index to FP register
149  */
150 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
151 {
152 	unsigned long du, dl;
153 	unsigned long x = fpu->fpul;
154 	int exp = 1023 - 126;
155 
156 	if (x != 0 && (x & 0x7f800000) == 0) {
157 		du = (x & 0x80000000);
158 		while ((x & 0x00800000) == 0) {
159 			x <<= 1;
160 			exp--;
161 		}
162 		x &= 0x007fffff;
163 		du |= (exp << 20) | (x >> 3);
164 		dl = x << 29;
165 
166 		fpu->fp_regs[n] = du;
167 		fpu->fp_regs[n + 1] = dl;
168 	}
169 }
170 
171 /**
172  *	ieee_fpe_handler - Handle denormalized number exception
173  *
174  *	@regs: Pointer to register structure
175  *
176  *	Returns 1 when it's handled (should not cause exception).
177  */
178 static int ieee_fpe_handler(struct pt_regs *regs)
179 {
180 	unsigned short insn = *(unsigned short *)regs->pc;
181 	unsigned short finsn;
182 	unsigned long nextpc;
183 	int nib[4] = {
184 		(insn >> 12) & 0xf,
185 		(insn >> 8) & 0xf,
186 		(insn >> 4) & 0xf,
187 		insn & 0xf
188 	};
189 
190 	if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
191 		regs->pr = regs->pc + 4;  /* bsr & jsr */
192 
193 	if (nib[0] == 0xa || nib[0] == 0xb) {
194 		/* bra & bsr */
195 		nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
196 		finsn = *(unsigned short *)(regs->pc + 2);
197 	} else if (nib[0] == 0x8 && nib[1] == 0xd) {
198 		/* bt/s */
199 		if (regs->sr & 1)
200 			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
201 		else
202 			nextpc = regs->pc + 4;
203 		finsn = *(unsigned short *)(regs->pc + 2);
204 	} else if (nib[0] == 0x8 && nib[1] == 0xf) {
205 		/* bf/s */
206 		if (regs->sr & 1)
207 			nextpc = regs->pc + 4;
208 		else
209 			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
210 		finsn = *(unsigned short *)(regs->pc + 2);
211 	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
212 		   (nib[2] == 0x0 || nib[2] == 0x2)) {
213 		/* jmp & jsr */
214 		nextpc = regs->regs[nib[1]];
215 		finsn = *(unsigned short *)(regs->pc + 2);
216 	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
217 		   (nib[2] == 0x0 || nib[2] == 0x2)) {
218 		/* braf & bsrf */
219 		nextpc = regs->pc + 4 + regs->regs[nib[1]];
220 		finsn = *(unsigned short *)(regs->pc + 2);
221 	} else if (insn == 0x000b) {
222 		/* rts */
223 		nextpc = regs->pr;
224 		finsn = *(unsigned short *)(regs->pc + 2);
225 	} else {
226 		nextpc = regs->pc + instruction_size(insn);
227 		finsn = insn;
228 	}
229 
230 	if ((finsn & 0xf1ff) == 0xf0ad) {
231 		/* fcnvsd */
232 		struct task_struct *tsk = current;
233 
234 		if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR))
235 			/* FPU error */
236 			denormal_to_double(&tsk->thread.xstate->hardfpu,
237 					   (finsn >> 8) & 0xf);
238 		else
239 			return 0;
240 
241 		regs->pc = nextpc;
242 		return 1;
243 	} else if ((finsn & 0xf00f) == 0xf002) {
244 		/* fmul */
245 		struct task_struct *tsk = current;
246 		int fpscr;
247 		int n, m, prec;
248 		unsigned int hx, hy;
249 
250 		n = (finsn >> 8) & 0xf;
251 		m = (finsn >> 4) & 0xf;
252 		hx = tsk->thread.xstate->hardfpu.fp_regs[n];
253 		hy = tsk->thread.xstate->hardfpu.fp_regs[m];
254 		fpscr = tsk->thread.xstate->hardfpu.fpscr;
255 		prec = fpscr & FPSCR_DBL_PRECISION;
256 
257 		if ((fpscr & FPSCR_CAUSE_ERROR)
258 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
259 				 || (hy & 0x7fffffff) < 0x00100000))) {
260 			long long llx, lly;
261 
262 			/* FPU error because of denormal (doubles) */
263 			llx = ((long long)hx << 32)
264 			    | tsk->thread.xstate->hardfpu.fp_regs[n + 1];
265 			lly = ((long long)hy << 32)
266 			    | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
267 			llx = float64_mul(llx, lly);
268 			tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
269 			tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff;
270 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
271 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
272 					 || (hy & 0x7fffffff) < 0x00800000))) {
273 			/* FPU error because of denormal (floats) */
274 			hx = float32_mul(hx, hy);
275 			tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
276 		} else
277 			return 0;
278 
279 		regs->pc = nextpc;
280 		return 1;
281 	} else if ((finsn & 0xf00e) == 0xf000) {
282 		/* fadd, fsub */
283 		struct task_struct *tsk = current;
284 		int fpscr;
285 		int n, m, prec;
286 		unsigned int hx, hy;
287 
288 		n = (finsn >> 8) & 0xf;
289 		m = (finsn >> 4) & 0xf;
290 		hx = tsk->thread.xstate->hardfpu.fp_regs[n];
291 		hy = tsk->thread.xstate->hardfpu.fp_regs[m];
292 		fpscr = tsk->thread.xstate->hardfpu.fpscr;
293 		prec = fpscr & FPSCR_DBL_PRECISION;
294 
295 		if ((fpscr & FPSCR_CAUSE_ERROR)
296 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
297 				 || (hy & 0x7fffffff) < 0x00100000))) {
298 			long long llx, lly;
299 
300 			/* FPU error because of denormal (doubles) */
301 			llx = ((long long)hx << 32)
302 			    | tsk->thread.xstate->hardfpu.fp_regs[n + 1];
303 			lly = ((long long)hy << 32)
304 			    | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
305 			if ((finsn & 0xf00f) == 0xf000)
306 				llx = float64_add(llx, lly);
307 			else
308 				llx = float64_sub(llx, lly);
309 			tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
310 			tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff;
311 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
312 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
313 					 || (hy & 0x7fffffff) < 0x00800000))) {
314 			/* FPU error because of denormal (floats) */
315 			if ((finsn & 0xf00f) == 0xf000)
316 				hx = float32_add(hx, hy);
317 			else
318 				hx = float32_sub(hx, hy);
319 			tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
320 		} else
321 			return 0;
322 
323 		regs->pc = nextpc;
324 		return 1;
325 	} else if ((finsn & 0xf003) == 0xf003) {
326 		/* fdiv */
327 		struct task_struct *tsk = current;
328 		int fpscr;
329 		int n, m, prec;
330 		unsigned int hx, hy;
331 
332 		n = (finsn >> 8) & 0xf;
333 		m = (finsn >> 4) & 0xf;
334 		hx = tsk->thread.xstate->hardfpu.fp_regs[n];
335 		hy = tsk->thread.xstate->hardfpu.fp_regs[m];
336 		fpscr = tsk->thread.xstate->hardfpu.fpscr;
337 		prec = fpscr & FPSCR_DBL_PRECISION;
338 
339 		if ((fpscr & FPSCR_CAUSE_ERROR)
340 		    && (prec && ((hx & 0x7fffffff) < 0x00100000
341 				 || (hy & 0x7fffffff) < 0x00100000))) {
342 			long long llx, lly;
343 
344 			/* FPU error because of denormal (doubles) */
345 			llx = ((long long)hx << 32)
346 			    | tsk->thread.xstate->hardfpu.fp_regs[n + 1];
347 			lly = ((long long)hy << 32)
348 			    | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
349 
350 			llx = float64_div(llx, lly);
351 
352 			tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
353 			tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff;
354 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
355 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
356 					 || (hy & 0x7fffffff) < 0x00800000))) {
357 			/* FPU error because of denormal (floats) */
358 			hx = float32_div(hx, hy);
359 			tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
360 		} else
361 			return 0;
362 
363 		regs->pc = nextpc;
364 		return 1;
365 	} else if ((finsn & 0xf0bd) == 0xf0bd) {
366 		/* fcnvds - double to single precision convert */
367 		struct task_struct *tsk = current;
368 		int m;
369 		unsigned int hx;
370 
371 		m = (finsn >> 8) & 0x7;
372 		hx = tsk->thread.xstate->hardfpu.fp_regs[m];
373 
374 		if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR)
375 			&& ((hx & 0x7fffffff) < 0x00100000)) {
376 			/* subnormal double to float conversion */
377 			long long llx;
378 
379 			llx = ((long long)tsk->thread.xstate->hardfpu.fp_regs[m] << 32)
380 			    | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
381 
382 			tsk->thread.xstate->hardfpu.fpul = float64_to_float32(llx);
383 		} else
384 			return 0;
385 
386 		regs->pc = nextpc;
387 		return 1;
388 	}
389 
390 	return 0;
391 }
392 
393 void float_raise(unsigned int flags)
394 {
395 	fpu_exception_flags |= flags;
396 }
397 
398 int float_rounding_mode(void)
399 {
400 	struct task_struct *tsk = current;
401 	int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.xstate->hardfpu.fpscr);
402 	return roundingMode;
403 }
404 
405 BUILD_TRAP_HANDLER(fpu_error)
406 {
407 	struct task_struct *tsk = current;
408 	TRAP_HANDLER_DECL;
409 
410 	__unlazy_fpu(tsk, regs);
411 	fpu_exception_flags = 0;
412 	if (ieee_fpe_handler(regs)) {
413 		tsk->thread.xstate->hardfpu.fpscr &=
414 		    ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
415 		tsk->thread.xstate->hardfpu.fpscr |= fpu_exception_flags;
416 		/* Set the FPSCR flag as well as cause bits - simply
417 		 * replicate the cause */
418 		tsk->thread.xstate->hardfpu.fpscr |= (fpu_exception_flags >> 10);
419 		grab_fpu(regs);
420 		restore_fpu(tsk);
421 		task_thread_info(tsk)->status |= TS_USEDFPU;
422 		if ((((tsk->thread.xstate->hardfpu.fpscr & FPSCR_ENABLE_MASK) >> 7) &
423 		     (fpu_exception_flags >> 2)) == 0) {
424 			return;
425 		}
426 	}
427 
428 	force_sig(SIGFPE, tsk);
429 }
430