xref: /openbmc/linux/arch/powerpc/math-emu/math_efp.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * arch/powerpc/math-emu/math_efp.c
4   *
5   * Copyright (C) 2006-2008, 2010 Freescale Semiconductor, Inc.
6   *
7   * Author: Ebony Zhu,	<ebony.zhu@freescale.com>
8   *         Yu Liu,	<yu.liu@freescale.com>
9   *
10   * Derived from arch/alpha/math-emu/math.c
11   *              arch/powerpc/math-emu/math.c
12   *
13   * Description:
14   * This file is the exception handler to make E500 SPE instructions
15   * fully comply with IEEE-754 floating point standard.
16   */
17  
18  #include <linux/types.h>
19  #include <linux/prctl.h>
20  #include <linux/module.h>
21  
22  #include <linux/uaccess.h>
23  #include <asm/reg.h>
24  
25  #define FP_EX_BOOKE_E500_SPE
26  #include <asm/sfp-machine.h>
27  
28  #include <math-emu/soft-fp.h>
29  #include <math-emu/single.h>
30  #include <math-emu/double.h>
31  
32  #define EFAPU		0x4
33  
34  #define VCT		0x4
35  #define SPFP		0x6
36  #define DPFP		0x7
37  
38  #define EFSADD		0x2c0
39  #define EFSSUB		0x2c1
40  #define EFSABS		0x2c4
41  #define EFSNABS		0x2c5
42  #define EFSNEG		0x2c6
43  #define EFSMUL		0x2c8
44  #define EFSDIV		0x2c9
45  #define EFSCMPGT	0x2cc
46  #define EFSCMPLT	0x2cd
47  #define EFSCMPEQ	0x2ce
48  #define EFSCFD		0x2cf
49  #define EFSCFSI		0x2d1
50  #define EFSCTUI		0x2d4
51  #define EFSCTSI		0x2d5
52  #define EFSCTUF		0x2d6
53  #define EFSCTSF		0x2d7
54  #define EFSCTUIZ	0x2d8
55  #define EFSCTSIZ	0x2da
56  
57  #define EVFSADD		0x280
58  #define EVFSSUB		0x281
59  #define EVFSABS		0x284
60  #define EVFSNABS	0x285
61  #define EVFSNEG		0x286
62  #define EVFSMUL		0x288
63  #define EVFSDIV		0x289
64  #define EVFSCMPGT	0x28c
65  #define EVFSCMPLT	0x28d
66  #define EVFSCMPEQ	0x28e
67  #define EVFSCTUI	0x294
68  #define EVFSCTSI	0x295
69  #define EVFSCTUF	0x296
70  #define EVFSCTSF	0x297
71  #define EVFSCTUIZ	0x298
72  #define EVFSCTSIZ	0x29a
73  
74  #define EFDADD		0x2e0
75  #define EFDSUB		0x2e1
76  #define EFDABS		0x2e4
77  #define EFDNABS		0x2e5
78  #define EFDNEG		0x2e6
79  #define EFDMUL		0x2e8
80  #define EFDDIV		0x2e9
81  #define EFDCTUIDZ	0x2ea
82  #define EFDCTSIDZ	0x2eb
83  #define EFDCMPGT	0x2ec
84  #define EFDCMPLT	0x2ed
85  #define EFDCMPEQ	0x2ee
86  #define EFDCFS		0x2ef
87  #define EFDCTUI		0x2f4
88  #define EFDCTSI		0x2f5
89  #define EFDCTUF		0x2f6
90  #define EFDCTSF		0x2f7
91  #define EFDCTUIZ	0x2f8
92  #define EFDCTSIZ	0x2fa
93  
94  #define AB	2
95  #define XA	3
96  #define XB	4
97  #define XCR	5
98  #define NOTYPE	0
99  
100  #define SIGN_BIT_S	(1UL << 31)
101  #define SIGN_BIT_D	(1ULL << 63)
102  #define FP_EX_MASK	(FP_EX_INEXACT | FP_EX_INVALID | FP_EX_DIVZERO | \
103  			FP_EX_UNDERFLOW | FP_EX_OVERFLOW)
104  
105  static int have_e500_cpu_a005_erratum;
106  
107  union dw_union {
108  	u64 dp[1];
109  	u32 wp[2];
110  };
111  
insn_type(unsigned long speinsn)112  static unsigned long insn_type(unsigned long speinsn)
113  {
114  	unsigned long ret = NOTYPE;
115  
116  	switch (speinsn & 0x7ff) {
117  	case EFSABS:	ret = XA;	break;
118  	case EFSADD:	ret = AB;	break;
119  	case EFSCFD:	ret = XB;	break;
120  	case EFSCMPEQ:	ret = XCR;	break;
121  	case EFSCMPGT:	ret = XCR;	break;
122  	case EFSCMPLT:	ret = XCR;	break;
123  	case EFSCTSF:	ret = XB;	break;
124  	case EFSCTSI:	ret = XB;	break;
125  	case EFSCTSIZ:	ret = XB;	break;
126  	case EFSCTUF:	ret = XB;	break;
127  	case EFSCTUI:	ret = XB;	break;
128  	case EFSCTUIZ:	ret = XB;	break;
129  	case EFSDIV:	ret = AB;	break;
130  	case EFSMUL:	ret = AB;	break;
131  	case EFSNABS:	ret = XA;	break;
132  	case EFSNEG:	ret = XA;	break;
133  	case EFSSUB:	ret = AB;	break;
134  	case EFSCFSI:	ret = XB;	break;
135  
136  	case EVFSABS:	ret = XA;	break;
137  	case EVFSADD:	ret = AB;	break;
138  	case EVFSCMPEQ:	ret = XCR;	break;
139  	case EVFSCMPGT:	ret = XCR;	break;
140  	case EVFSCMPLT:	ret = XCR;	break;
141  	case EVFSCTSF:	ret = XB;	break;
142  	case EVFSCTSI:	ret = XB;	break;
143  	case EVFSCTSIZ:	ret = XB;	break;
144  	case EVFSCTUF:	ret = XB;	break;
145  	case EVFSCTUI:	ret = XB;	break;
146  	case EVFSCTUIZ:	ret = XB;	break;
147  	case EVFSDIV:	ret = AB;	break;
148  	case EVFSMUL:	ret = AB;	break;
149  	case EVFSNABS:	ret = XA;	break;
150  	case EVFSNEG:	ret = XA;	break;
151  	case EVFSSUB:	ret = AB;	break;
152  
153  	case EFDABS:	ret = XA;	break;
154  	case EFDADD:	ret = AB;	break;
155  	case EFDCFS:	ret = XB;	break;
156  	case EFDCMPEQ:	ret = XCR;	break;
157  	case EFDCMPGT:	ret = XCR;	break;
158  	case EFDCMPLT:	ret = XCR;	break;
159  	case EFDCTSF:	ret = XB;	break;
160  	case EFDCTSI:	ret = XB;	break;
161  	case EFDCTSIDZ:	ret = XB;	break;
162  	case EFDCTSIZ:	ret = XB;	break;
163  	case EFDCTUF:	ret = XB;	break;
164  	case EFDCTUI:	ret = XB;	break;
165  	case EFDCTUIDZ:	ret = XB;	break;
166  	case EFDCTUIZ:	ret = XB;	break;
167  	case EFDDIV:	ret = AB;	break;
168  	case EFDMUL:	ret = AB;	break;
169  	case EFDNABS:	ret = XA;	break;
170  	case EFDNEG:	ret = XA;	break;
171  	case EFDSUB:	ret = AB;	break;
172  	}
173  
174  	return ret;
175  }
176  
do_spe_mathemu(struct pt_regs * regs)177  int do_spe_mathemu(struct pt_regs *regs)
178  {
179  	FP_DECL_EX;
180  	int IR, cmp;
181  
182  	unsigned long type, func, fc, fa, fb, src, speinsn;
183  	union dw_union vc, va, vb;
184  
185  	if (get_user(speinsn, (unsigned int __user *) regs->nip))
186  		return -EFAULT;
187  	if ((speinsn >> 26) != EFAPU)
188  		return -EINVAL;         /* not an spe instruction */
189  
190  	type = insn_type(speinsn);
191  	if (type == NOTYPE)
192  		goto illegal;
193  
194  	func = speinsn & 0x7ff;
195  	fc = (speinsn >> 21) & 0x1f;
196  	fa = (speinsn >> 16) & 0x1f;
197  	fb = (speinsn >> 11) & 0x1f;
198  	src = (speinsn >> 5) & 0x7;
199  
200  	vc.wp[0] = current->thread.evr[fc];
201  	vc.wp[1] = regs->gpr[fc];
202  	va.wp[0] = current->thread.evr[fa];
203  	va.wp[1] = regs->gpr[fa];
204  	vb.wp[0] = current->thread.evr[fb];
205  	vb.wp[1] = regs->gpr[fb];
206  
207  	__FPU_FPSCR = mfspr(SPRN_SPEFSCR);
208  
209  	pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
210  	pr_debug("vc: %08x  %08x\n", vc.wp[0], vc.wp[1]);
211  	pr_debug("va: %08x  %08x\n", va.wp[0], va.wp[1]);
212  	pr_debug("vb: %08x  %08x\n", vb.wp[0], vb.wp[1]);
213  
214  	switch (src) {
215  	case SPFP: {
216  		FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
217  
218  		switch (type) {
219  		case AB:
220  		case XCR:
221  			FP_UNPACK_SP(SA, va.wp + 1);
222  			fallthrough;
223  		case XB:
224  			FP_UNPACK_SP(SB, vb.wp + 1);
225  			break;
226  		case XA:
227  			FP_UNPACK_SP(SA, va.wp + 1);
228  			break;
229  		}
230  
231  		pr_debug("SA: %d %08x %d (%d)\n", SA_s, SA_f, SA_e, SA_c);
232  		pr_debug("SB: %d %08x %d (%d)\n", SB_s, SB_f, SB_e, SB_c);
233  
234  		switch (func) {
235  		case EFSABS:
236  			vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
237  			goto update_regs;
238  
239  		case EFSNABS:
240  			vc.wp[1] = va.wp[1] | SIGN_BIT_S;
241  			goto update_regs;
242  
243  		case EFSNEG:
244  			vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
245  			goto update_regs;
246  
247  		case EFSADD:
248  			FP_ADD_S(SR, SA, SB);
249  			goto pack_s;
250  
251  		case EFSSUB:
252  			FP_SUB_S(SR, SA, SB);
253  			goto pack_s;
254  
255  		case EFSMUL:
256  			FP_MUL_S(SR, SA, SB);
257  			goto pack_s;
258  
259  		case EFSDIV:
260  			FP_DIV_S(SR, SA, SB);
261  			goto pack_s;
262  
263  		case EFSCMPEQ:
264  			cmp = 0;
265  			goto cmp_s;
266  
267  		case EFSCMPGT:
268  			cmp = 1;
269  			goto cmp_s;
270  
271  		case EFSCMPLT:
272  			cmp = -1;
273  			goto cmp_s;
274  
275  		case EFSCTSF:
276  		case EFSCTUF:
277  			if (SB_c == FP_CLS_NAN) {
278  				vc.wp[1] = 0;
279  				FP_SET_EXCEPTION(FP_EX_INVALID);
280  			} else {
281  				SB_e += (func == EFSCTSF ? 31 : 32);
282  				FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
283  						(func == EFSCTSF) ? 1 : 0);
284  			}
285  			goto update_regs;
286  
287  		case EFSCFD: {
288  			FP_DECL_D(DB);
289  			FP_CLEAR_EXCEPTIONS;
290  			FP_UNPACK_DP(DB, vb.dp);
291  
292  			pr_debug("DB: %d %08x %08x %d (%d)\n",
293  					DB_s, DB_f1, DB_f0, DB_e, DB_c);
294  
295  			FP_CONV(S, D, 1, 2, SR, DB);
296  			goto pack_s;
297  		}
298  
299  		case EFSCTSI:
300  		case EFSCTUI:
301  			if (SB_c == FP_CLS_NAN) {
302  				vc.wp[1] = 0;
303  				FP_SET_EXCEPTION(FP_EX_INVALID);
304  			} else {
305  				FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
306  						((func & 0x3) != 0) ? 1 : 0);
307  			}
308  			goto update_regs;
309  
310  		case EFSCTSIZ:
311  		case EFSCTUIZ:
312  			if (SB_c == FP_CLS_NAN) {
313  				vc.wp[1] = 0;
314  				FP_SET_EXCEPTION(FP_EX_INVALID);
315  			} else {
316  				FP_TO_INT_S(vc.wp[1], SB, 32,
317  						((func & 0x3) != 0) ? 1 : 0);
318  			}
319  			goto update_regs;
320  
321  		default:
322  			goto illegal;
323  		}
324  		break;
325  
326  pack_s:
327  		pr_debug("SR: %d %08x %d (%d)\n", SR_s, SR_f, SR_e, SR_c);
328  
329  		FP_PACK_SP(vc.wp + 1, SR);
330  		goto update_regs;
331  
332  cmp_s:
333  		FP_CMP_S(IR, SA, SB, 3);
334  		if (IR == 3 && (FP_ISSIGNAN_S(SA) || FP_ISSIGNAN_S(SB)))
335  			FP_SET_EXCEPTION(FP_EX_INVALID);
336  		if (IR == cmp) {
337  			IR = 0x4;
338  		} else {
339  			IR = 0;
340  		}
341  		goto update_ccr;
342  	}
343  
344  	case DPFP: {
345  		FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
346  
347  		switch (type) {
348  		case AB:
349  		case XCR:
350  			FP_UNPACK_DP(DA, va.dp);
351  			fallthrough;
352  		case XB:
353  			FP_UNPACK_DP(DB, vb.dp);
354  			break;
355  		case XA:
356  			FP_UNPACK_DP(DA, va.dp);
357  			break;
358  		}
359  
360  		pr_debug("DA: %d %08x %08x %d (%d)\n",
361  				DA_s, DA_f1, DA_f0, DA_e, DA_c);
362  		pr_debug("DB: %d %08x %08x %d (%d)\n",
363  				DB_s, DB_f1, DB_f0, DB_e, DB_c);
364  
365  		switch (func) {
366  		case EFDABS:
367  			vc.dp[0] = va.dp[0] & ~SIGN_BIT_D;
368  			goto update_regs;
369  
370  		case EFDNABS:
371  			vc.dp[0] = va.dp[0] | SIGN_BIT_D;
372  			goto update_regs;
373  
374  		case EFDNEG:
375  			vc.dp[0] = va.dp[0] ^ SIGN_BIT_D;
376  			goto update_regs;
377  
378  		case EFDADD:
379  			FP_ADD_D(DR, DA, DB);
380  			goto pack_d;
381  
382  		case EFDSUB:
383  			FP_SUB_D(DR, DA, DB);
384  			goto pack_d;
385  
386  		case EFDMUL:
387  			FP_MUL_D(DR, DA, DB);
388  			goto pack_d;
389  
390  		case EFDDIV:
391  			FP_DIV_D(DR, DA, DB);
392  			goto pack_d;
393  
394  		case EFDCMPEQ:
395  			cmp = 0;
396  			goto cmp_d;
397  
398  		case EFDCMPGT:
399  			cmp = 1;
400  			goto cmp_d;
401  
402  		case EFDCMPLT:
403  			cmp = -1;
404  			goto cmp_d;
405  
406  		case EFDCTSF:
407  		case EFDCTUF:
408  			if (DB_c == FP_CLS_NAN) {
409  				vc.wp[1] = 0;
410  				FP_SET_EXCEPTION(FP_EX_INVALID);
411  			} else {
412  				DB_e += (func == EFDCTSF ? 31 : 32);
413  				FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
414  						(func == EFDCTSF) ? 1 : 0);
415  			}
416  			goto update_regs;
417  
418  		case EFDCFS: {
419  			FP_DECL_S(SB);
420  			FP_CLEAR_EXCEPTIONS;
421  			FP_UNPACK_SP(SB, vb.wp + 1);
422  
423  			pr_debug("SB: %d %08x %d (%d)\n",
424  					SB_s, SB_f, SB_e, SB_c);
425  
426  			FP_CONV(D, S, 2, 1, DR, SB);
427  			goto pack_d;
428  		}
429  
430  		case EFDCTUIDZ:
431  		case EFDCTSIDZ:
432  			if (DB_c == FP_CLS_NAN) {
433  				vc.dp[0] = 0;
434  				FP_SET_EXCEPTION(FP_EX_INVALID);
435  			} else {
436  				FP_TO_INT_D(vc.dp[0], DB, 64,
437  						((func & 0x1) == 0) ? 1 : 0);
438  			}
439  			goto update_regs;
440  
441  		case EFDCTUI:
442  		case EFDCTSI:
443  			if (DB_c == FP_CLS_NAN) {
444  				vc.wp[1] = 0;
445  				FP_SET_EXCEPTION(FP_EX_INVALID);
446  			} else {
447  				FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
448  						((func & 0x3) != 0) ? 1 : 0);
449  			}
450  			goto update_regs;
451  
452  		case EFDCTUIZ:
453  		case EFDCTSIZ:
454  			if (DB_c == FP_CLS_NAN) {
455  				vc.wp[1] = 0;
456  				FP_SET_EXCEPTION(FP_EX_INVALID);
457  			} else {
458  				FP_TO_INT_D(vc.wp[1], DB, 32,
459  						((func & 0x3) != 0) ? 1 : 0);
460  			}
461  			goto update_regs;
462  
463  		default:
464  			goto illegal;
465  		}
466  		break;
467  
468  pack_d:
469  		pr_debug("DR: %d %08x %08x %d (%d)\n",
470  				DR_s, DR_f1, DR_f0, DR_e, DR_c);
471  
472  		FP_PACK_DP(vc.dp, DR);
473  		goto update_regs;
474  
475  cmp_d:
476  		FP_CMP_D(IR, DA, DB, 3);
477  		if (IR == 3 && (FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB)))
478  			FP_SET_EXCEPTION(FP_EX_INVALID);
479  		if (IR == cmp) {
480  			IR = 0x4;
481  		} else {
482  			IR = 0;
483  		}
484  		goto update_ccr;
485  
486  	}
487  
488  	case VCT: {
489  		FP_DECL_S(SA0); FP_DECL_S(SB0); FP_DECL_S(SR0);
490  		FP_DECL_S(SA1); FP_DECL_S(SB1); FP_DECL_S(SR1);
491  		int IR0, IR1;
492  
493  		switch (type) {
494  		case AB:
495  		case XCR:
496  			FP_UNPACK_SP(SA0, va.wp);
497  			FP_UNPACK_SP(SA1, va.wp + 1);
498  			fallthrough;
499  		case XB:
500  			FP_UNPACK_SP(SB0, vb.wp);
501  			FP_UNPACK_SP(SB1, vb.wp + 1);
502  			break;
503  		case XA:
504  			FP_UNPACK_SP(SA0, va.wp);
505  			FP_UNPACK_SP(SA1, va.wp + 1);
506  			break;
507  		}
508  
509  		pr_debug("SA0: %d %08x %d (%d)\n",
510  				SA0_s, SA0_f, SA0_e, SA0_c);
511  		pr_debug("SA1: %d %08x %d (%d)\n",
512  				SA1_s, SA1_f, SA1_e, SA1_c);
513  		pr_debug("SB0: %d %08x %d (%d)\n",
514  				SB0_s, SB0_f, SB0_e, SB0_c);
515  		pr_debug("SB1: %d %08x %d (%d)\n",
516  				SB1_s, SB1_f, SB1_e, SB1_c);
517  
518  		switch (func) {
519  		case EVFSABS:
520  			vc.wp[0] = va.wp[0] & ~SIGN_BIT_S;
521  			vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
522  			goto update_regs;
523  
524  		case EVFSNABS:
525  			vc.wp[0] = va.wp[0] | SIGN_BIT_S;
526  			vc.wp[1] = va.wp[1] | SIGN_BIT_S;
527  			goto update_regs;
528  
529  		case EVFSNEG:
530  			vc.wp[0] = va.wp[0] ^ SIGN_BIT_S;
531  			vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
532  			goto update_regs;
533  
534  		case EVFSADD:
535  			FP_ADD_S(SR0, SA0, SB0);
536  			FP_ADD_S(SR1, SA1, SB1);
537  			goto pack_vs;
538  
539  		case EVFSSUB:
540  			FP_SUB_S(SR0, SA0, SB0);
541  			FP_SUB_S(SR1, SA1, SB1);
542  			goto pack_vs;
543  
544  		case EVFSMUL:
545  			FP_MUL_S(SR0, SA0, SB0);
546  			FP_MUL_S(SR1, SA1, SB1);
547  			goto pack_vs;
548  
549  		case EVFSDIV:
550  			FP_DIV_S(SR0, SA0, SB0);
551  			FP_DIV_S(SR1, SA1, SB1);
552  			goto pack_vs;
553  
554  		case EVFSCMPEQ:
555  			cmp = 0;
556  			goto cmp_vs;
557  
558  		case EVFSCMPGT:
559  			cmp = 1;
560  			goto cmp_vs;
561  
562  		case EVFSCMPLT:
563  			cmp = -1;
564  			goto cmp_vs;
565  
566  		case EVFSCTUF:
567  		case EVFSCTSF:
568  			if (SB0_c == FP_CLS_NAN) {
569  				vc.wp[0] = 0;
570  				FP_SET_EXCEPTION(FP_EX_INVALID);
571  			} else {
572  				SB0_e += (func == EVFSCTSF ? 31 : 32);
573  				FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
574  						(func == EVFSCTSF) ? 1 : 0);
575  			}
576  			if (SB1_c == FP_CLS_NAN) {
577  				vc.wp[1] = 0;
578  				FP_SET_EXCEPTION(FP_EX_INVALID);
579  			} else {
580  				SB1_e += (func == EVFSCTSF ? 31 : 32);
581  				FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
582  						(func == EVFSCTSF) ? 1 : 0);
583  			}
584  			goto update_regs;
585  
586  		case EVFSCTUI:
587  		case EVFSCTSI:
588  			if (SB0_c == FP_CLS_NAN) {
589  				vc.wp[0] = 0;
590  				FP_SET_EXCEPTION(FP_EX_INVALID);
591  			} else {
592  				FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
593  						((func & 0x3) != 0) ? 1 : 0);
594  			}
595  			if (SB1_c == FP_CLS_NAN) {
596  				vc.wp[1] = 0;
597  				FP_SET_EXCEPTION(FP_EX_INVALID);
598  			} else {
599  				FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
600  						((func & 0x3) != 0) ? 1 : 0);
601  			}
602  			goto update_regs;
603  
604  		case EVFSCTUIZ:
605  		case EVFSCTSIZ:
606  			if (SB0_c == FP_CLS_NAN) {
607  				vc.wp[0] = 0;
608  				FP_SET_EXCEPTION(FP_EX_INVALID);
609  			} else {
610  				FP_TO_INT_S(vc.wp[0], SB0, 32,
611  						((func & 0x3) != 0) ? 1 : 0);
612  			}
613  			if (SB1_c == FP_CLS_NAN) {
614  				vc.wp[1] = 0;
615  				FP_SET_EXCEPTION(FP_EX_INVALID);
616  			} else {
617  				FP_TO_INT_S(vc.wp[1], SB1, 32,
618  						((func & 0x3) != 0) ? 1 : 0);
619  			}
620  			goto update_regs;
621  
622  		default:
623  			goto illegal;
624  		}
625  		break;
626  
627  pack_vs:
628  		pr_debug("SR0: %d %08x %d (%d)\n",
629  				SR0_s, SR0_f, SR0_e, SR0_c);
630  		pr_debug("SR1: %d %08x %d (%d)\n",
631  				SR1_s, SR1_f, SR1_e, SR1_c);
632  
633  		FP_PACK_SP(vc.wp, SR0);
634  		FP_PACK_SP(vc.wp + 1, SR1);
635  		goto update_regs;
636  
637  cmp_vs:
638  		{
639  			int ch, cl;
640  
641  			FP_CMP_S(IR0, SA0, SB0, 3);
642  			FP_CMP_S(IR1, SA1, SB1, 3);
643  			if (IR0 == 3 && (FP_ISSIGNAN_S(SA0) || FP_ISSIGNAN_S(SB0)))
644  				FP_SET_EXCEPTION(FP_EX_INVALID);
645  			if (IR1 == 3 && (FP_ISSIGNAN_S(SA1) || FP_ISSIGNAN_S(SB1)))
646  				FP_SET_EXCEPTION(FP_EX_INVALID);
647  			ch = (IR0 == cmp) ? 1 : 0;
648  			cl = (IR1 == cmp) ? 1 : 0;
649  			IR = (ch << 3) | (cl << 2) | ((ch | cl) << 1) |
650  				((ch & cl) << 0);
651  			goto update_ccr;
652  		}
653  	}
654  	default:
655  		return -EINVAL;
656  	}
657  
658  update_ccr:
659  	regs->ccr &= ~(15 << ((7 - ((speinsn >> 23) & 0x7)) << 2));
660  	regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2));
661  
662  update_regs:
663  	/*
664  	 * If the "invalid" exception sticky bit was set by the
665  	 * processor for non-finite input, but was not set before the
666  	 * instruction being emulated, clear it.  Likewise for the
667  	 * "underflow" bit, which may have been set by the processor
668  	 * for exact underflow, not just inexact underflow when the
669  	 * flag should be set for IEEE 754 semantics.  Other sticky
670  	 * exceptions will only be set by the processor when they are
671  	 * correct according to IEEE 754 semantics, and we must not
672  	 * clear sticky bits that were already set before the emulated
673  	 * instruction as they represent the user-visible sticky
674  	 * exception status.  "inexact" traps to kernel are not
675  	 * required for IEEE semantics and are not enabled by default,
676  	 * so the "inexact" sticky bit may have been set by a previous
677  	 * instruction without the kernel being aware of it.
678  	 */
679  	__FPU_FPSCR
680  	  &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last;
681  	__FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK);
682  	mtspr(SPRN_SPEFSCR, __FPU_FPSCR);
683  	current->thread.spefscr_last = __FPU_FPSCR;
684  
685  	current->thread.evr[fc] = vc.wp[0];
686  	regs->gpr[fc] = vc.wp[1];
687  
688  	pr_debug("ccr = %08lx\n", regs->ccr);
689  	pr_debug("cur exceptions = %08x spefscr = %08lx\n",
690  			FP_CUR_EXCEPTIONS, __FPU_FPSCR);
691  	pr_debug("vc: %08x  %08x\n", vc.wp[0], vc.wp[1]);
692  	pr_debug("va: %08x  %08x\n", va.wp[0], va.wp[1]);
693  	pr_debug("vb: %08x  %08x\n", vb.wp[0], vb.wp[1]);
694  
695  	if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
696  		if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO)
697  		    && (current->thread.fpexc_mode & PR_FP_EXC_DIV))
698  			return 1;
699  		if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW)
700  		    && (current->thread.fpexc_mode & PR_FP_EXC_OVF))
701  			return 1;
702  		if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW)
703  		    && (current->thread.fpexc_mode & PR_FP_EXC_UND))
704  			return 1;
705  		if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT)
706  		    && (current->thread.fpexc_mode & PR_FP_EXC_RES))
707  			return 1;
708  		if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID)
709  		    && (current->thread.fpexc_mode & PR_FP_EXC_INV))
710  			return 1;
711  	}
712  	return 0;
713  
714  illegal:
715  	if (have_e500_cpu_a005_erratum) {
716  		/* according to e500 cpu a005 erratum, reissue efp inst */
717  		regs_add_return_ip(regs, -4);
718  		pr_debug("re-issue efp inst: %08lx\n", speinsn);
719  		return 0;
720  	}
721  
722  	printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered un-supported instruction.\ninst code: %08lx\n", speinsn);
723  	return -ENOSYS;
724  }
725  
speround_handler(struct pt_regs * regs)726  int speround_handler(struct pt_regs *regs)
727  {
728  	union dw_union fgpr;
729  	int s_lo, s_hi;
730  	int lo_inexact, hi_inexact;
731  	int fp_result;
732  	unsigned long speinsn, type, fb, fc, fptype, func;
733  
734  	if (get_user(speinsn, (unsigned int __user *) regs->nip))
735  		return -EFAULT;
736  	if ((speinsn >> 26) != 4)
737  		return -EINVAL;         /* not an spe instruction */
738  
739  	func = speinsn & 0x7ff;
740  	type = insn_type(func);
741  	if (type == XCR) return -ENOSYS;
742  
743  	__FPU_FPSCR = mfspr(SPRN_SPEFSCR);
744  	pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
745  
746  	fptype = (speinsn >> 5) & 0x7;
747  
748  	/* No need to round if the result is exact */
749  	lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX);
750  	hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH);
751  	if (!(lo_inexact || (hi_inexact && fptype == VCT)))
752  		return 0;
753  
754  	fc = (speinsn >> 21) & 0x1f;
755  	s_lo = regs->gpr[fc] & SIGN_BIT_S;
756  	s_hi = current->thread.evr[fc] & SIGN_BIT_S;
757  	fgpr.wp[0] = current->thread.evr[fc];
758  	fgpr.wp[1] = regs->gpr[fc];
759  
760  	fb = (speinsn >> 11) & 0x1f;
761  	switch (func) {
762  	case EFSCTUIZ:
763  	case EFSCTSIZ:
764  	case EVFSCTUIZ:
765  	case EVFSCTSIZ:
766  	case EFDCTUIDZ:
767  	case EFDCTSIDZ:
768  	case EFDCTUIZ:
769  	case EFDCTSIZ:
770  		/*
771  		 * These instructions always round to zero,
772  		 * independent of the rounding mode.
773  		 */
774  		return 0;
775  
776  	case EFSCTUI:
777  	case EFSCTUF:
778  	case EVFSCTUI:
779  	case EVFSCTUF:
780  	case EFDCTUI:
781  	case EFDCTUF:
782  		fp_result = 0;
783  		s_lo = 0;
784  		s_hi = 0;
785  		break;
786  
787  	case EFSCTSI:
788  	case EFSCTSF:
789  		fp_result = 0;
790  		/* Recover the sign of a zero result if possible.  */
791  		if (fgpr.wp[1] == 0)
792  			s_lo = regs->gpr[fb] & SIGN_BIT_S;
793  		break;
794  
795  	case EVFSCTSI:
796  	case EVFSCTSF:
797  		fp_result = 0;
798  		/* Recover the sign of a zero result if possible.  */
799  		if (fgpr.wp[1] == 0)
800  			s_lo = regs->gpr[fb] & SIGN_BIT_S;
801  		if (fgpr.wp[0] == 0)
802  			s_hi = current->thread.evr[fb] & SIGN_BIT_S;
803  		break;
804  
805  	case EFDCTSI:
806  	case EFDCTSF:
807  		fp_result = 0;
808  		s_hi = s_lo;
809  		/* Recover the sign of a zero result if possible.  */
810  		if (fgpr.wp[1] == 0)
811  			s_hi = current->thread.evr[fb] & SIGN_BIT_S;
812  		break;
813  
814  	default:
815  		fp_result = 1;
816  		break;
817  	}
818  
819  	pr_debug("round fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]);
820  
821  	switch (fptype) {
822  	/* Since SPE instructions on E500 core can handle round to nearest
823  	 * and round toward zero with IEEE-754 complied, we just need
824  	 * to handle round toward +Inf and round toward -Inf by software.
825  	 */
826  	case SPFP:
827  		if ((FP_ROUNDMODE) == FP_RND_PINF) {
828  			if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */
829  		} else { /* round to -Inf */
830  			if (s_lo) {
831  				if (fp_result)
832  					fgpr.wp[1]++; /* Z < 0, choose Z2 */
833  				else
834  					fgpr.wp[1]--; /* Z < 0, choose Z2 */
835  			}
836  		}
837  		break;
838  
839  	case DPFP:
840  		if (FP_ROUNDMODE == FP_RND_PINF) {
841  			if (!s_hi) {
842  				if (fp_result)
843  					fgpr.dp[0]++; /* Z > 0, choose Z1 */
844  				else
845  					fgpr.wp[1]++; /* Z > 0, choose Z1 */
846  			}
847  		} else { /* round to -Inf */
848  			if (s_hi) {
849  				if (fp_result)
850  					fgpr.dp[0]++; /* Z < 0, choose Z2 */
851  				else
852  					fgpr.wp[1]--; /* Z < 0, choose Z2 */
853  			}
854  		}
855  		break;
856  
857  	case VCT:
858  		if (FP_ROUNDMODE == FP_RND_PINF) {
859  			if (lo_inexact && !s_lo)
860  				fgpr.wp[1]++; /* Z_low > 0, choose Z1 */
861  			if (hi_inexact && !s_hi)
862  				fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */
863  		} else { /* round to -Inf */
864  			if (lo_inexact && s_lo) {
865  				if (fp_result)
866  					fgpr.wp[1]++; /* Z_low < 0, choose Z2 */
867  				else
868  					fgpr.wp[1]--; /* Z_low < 0, choose Z2 */
869  			}
870  			if (hi_inexact && s_hi) {
871  				if (fp_result)
872  					fgpr.wp[0]++; /* Z_high < 0, choose Z2 */
873  				else
874  					fgpr.wp[0]--; /* Z_high < 0, choose Z2 */
875  			}
876  		}
877  		break;
878  
879  	default:
880  		return -EINVAL;
881  	}
882  
883  	current->thread.evr[fc] = fgpr.wp[0];
884  	regs->gpr[fc] = fgpr.wp[1];
885  
886  	pr_debug("  to fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]);
887  
888  	if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
889  		return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0;
890  	return 0;
891  }
892  
spe_mathemu_init(void)893  static int __init spe_mathemu_init(void)
894  {
895  	u32 pvr, maj, min;
896  
897  	pvr = mfspr(SPRN_PVR);
898  
899  	if ((PVR_VER(pvr) == PVR_VER_E500V1) ||
900  	    (PVR_VER(pvr) == PVR_VER_E500V2)) {
901  		maj = PVR_MAJ(pvr);
902  		min = PVR_MIN(pvr);
903  
904  		/*
905  		 * E500 revision below 1.1, 2.3, 3.1, 4.1, 5.1
906  		 * need cpu a005 errata workaround
907  		 */
908  		switch (maj) {
909  		case 1:
910  			if (min < 1)
911  				have_e500_cpu_a005_erratum = 1;
912  			break;
913  		case 2:
914  			if (min < 3)
915  				have_e500_cpu_a005_erratum = 1;
916  			break;
917  		case 3:
918  		case 4:
919  		case 5:
920  			if (min < 1)
921  				have_e500_cpu_a005_erratum = 1;
922  			break;
923  		default:
924  			break;
925  		}
926  	}
927  
928  	return 0;
929  }
930  
931  module_init(spe_mathemu_init);
932