xref: /openbmc/linux/arch/sparc/kernel/visemul.c (revision 498495dba268b20e8eadd7fe93c140c68b6cc9d2)
1  // SPDX-License-Identifier: GPL-2.0
2  /* visemul.c: Emulation of VIS instructions.
3   *
4   * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
5   */
6  #include <linux/kernel.h>
7  #include <linux/errno.h>
8  #include <linux/thread_info.h>
9  #include <linux/perf_event.h>
10  
11  #include <asm/ptrace.h>
12  #include <asm/pstate.h>
13  #include <asm/fpumacro.h>
14  #include <linux/uaccess.h>
15  #include <asm/cacheflush.h>
16  
17  /* OPF field of various VIS instructions.  */
18  
19  /* 000111011 - four 16-bit packs  */
20  #define FPACK16_OPF	0x03b
21  
22  /* 000111010 - two 32-bit packs  */
23  #define FPACK32_OPF	0x03a
24  
25  /* 000111101 - four 16-bit packs  */
26  #define FPACKFIX_OPF	0x03d
27  
28  /* 001001101 - four 16-bit expands  */
29  #define FEXPAND_OPF	0x04d
30  
31  /* 001001011 - two 32-bit merges */
32  #define FPMERGE_OPF	0x04b
33  
34  /* 000110001 - 8-by-16-bit partitioned product  */
35  #define FMUL8x16_OPF	0x031
36  
37  /* 000110011 - 8-by-16-bit upper alpha partitioned product  */
38  #define FMUL8x16AU_OPF	0x033
39  
40  /* 000110101 - 8-by-16-bit lower alpha partitioned product  */
41  #define FMUL8x16AL_OPF	0x035
42  
43  /* 000110110 - upper 8-by-16-bit partitioned product  */
44  #define FMUL8SUx16_OPF	0x036
45  
46  /* 000110111 - lower 8-by-16-bit partitioned product  */
47  #define FMUL8ULx16_OPF	0x037
48  
49  /* 000111000 - upper 8-by-16-bit partitioned product  */
50  #define FMULD8SUx16_OPF	0x038
51  
52  /* 000111001 - lower unsigned 8-by-16-bit partitioned product  */
53  #define FMULD8ULx16_OPF	0x039
54  
55  /* 000101000 - four 16-bit compare; set rd if src1 > src2  */
56  #define FCMPGT16_OPF	0x028
57  
58  /* 000101100 - two 32-bit compare; set rd if src1 > src2  */
59  #define FCMPGT32_OPF	0x02c
60  
61  /* 000100000 - four 16-bit compare; set rd if src1 <= src2  */
62  #define FCMPLE16_OPF	0x020
63  
64  /* 000100100 - two 32-bit compare; set rd if src1 <= src2  */
65  #define FCMPLE32_OPF	0x024
66  
67  /* 000100010 - four 16-bit compare; set rd if src1 != src2  */
68  #define FCMPNE16_OPF	0x022
69  
70  /* 000100110 - two 32-bit compare; set rd if src1 != src2  */
71  #define FCMPNE32_OPF	0x026
72  
73  /* 000101010 - four 16-bit compare; set rd if src1 == src2  */
74  #define FCMPEQ16_OPF	0x02a
75  
76  /* 000101110 - two 32-bit compare; set rd if src1 == src2  */
77  #define FCMPEQ32_OPF	0x02e
78  
79  /* 000000000 - Eight 8-bit edge boundary processing  */
80  #define EDGE8_OPF	0x000
81  
82  /* 000000001 - Eight 8-bit edge boundary processing, no CC */
83  #define EDGE8N_OPF	0x001
84  
85  /* 000000010 - Eight 8-bit edge boundary processing, little-endian  */
86  #define EDGE8L_OPF	0x002
87  
88  /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC  */
89  #define EDGE8LN_OPF	0x003
90  
91  /* 000000100 - Four 16-bit edge boundary processing  */
92  #define EDGE16_OPF	0x004
93  
94  /* 000000101 - Four 16-bit edge boundary processing, no CC  */
95  #define EDGE16N_OPF	0x005
96  
97  /* 000000110 - Four 16-bit edge boundary processing, little-endian  */
98  #define EDGE16L_OPF	0x006
99  
100  /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC  */
101  #define EDGE16LN_OPF	0x007
102  
103  /* 000001000 - Two 32-bit edge boundary processing  */
104  #define EDGE32_OPF	0x008
105  
106  /* 000001001 - Two 32-bit edge boundary processing, no CC  */
107  #define EDGE32N_OPF	0x009
108  
109  /* 000001010 - Two 32-bit edge boundary processing, little-endian  */
110  #define EDGE32L_OPF	0x00a
111  
112  /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC  */
113  #define EDGE32LN_OPF	0x00b
114  
115  /* 000111110 - distance between 8 8-bit components  */
116  #define PDIST_OPF	0x03e
117  
118  /* 000010000 - convert 8-bit 3-D address to blocked byte address  */
119  #define ARRAY8_OPF	0x010
120  
121  /* 000010010 - convert 16-bit 3-D address to blocked byte address  */
122  #define ARRAY16_OPF	0x012
123  
124  /* 000010100 - convert 32-bit 3-D address to blocked byte address  */
125  #define ARRAY32_OPF	0x014
126  
127  /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE  */
128  #define BMASK_OPF	0x019
129  
130  /* 001001100 - Permute bytes as specified by GSR.MASK  */
131  #define BSHUFFLE_OPF	0x04c
132  
133  #define VIS_OPF_SHIFT	5
134  #define VIS_OPF_MASK	(0x1ff << VIS_OPF_SHIFT)
135  
136  #define RS1(INSN)	(((INSN) >> 14) & 0x1f)
137  #define RS2(INSN)	(((INSN) >>  0) & 0x1f)
138  #define RD(INSN)	(((INSN) >> 25) & 0x1f)
139  
maybe_flush_windows(unsigned int rs1,unsigned int rs2,unsigned int rd,int from_kernel)140  static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
141  				       unsigned int rd, int from_kernel)
142  {
143  	if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
144  		if (from_kernel != 0)
145  			__asm__ __volatile__("flushw");
146  		else
147  			flushw_user();
148  	}
149  }
150  
fetch_reg(unsigned int reg,struct pt_regs * regs)151  static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
152  {
153  	unsigned long value, fp;
154  
155  	if (reg < 16)
156  		return (!reg ? 0 : regs->u_regs[reg]);
157  
158  	fp = regs->u_regs[UREG_FP];
159  
160  	if (regs->tstate & TSTATE_PRIV) {
161  		struct reg_window *win;
162  		win = (struct reg_window *)(fp + STACK_BIAS);
163  		value = win->locals[reg - 16];
164  	} else if (!test_thread_64bit_stack(fp)) {
165  		struct reg_window32 __user *win32;
166  		win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
167  		get_user(value, &win32->locals[reg - 16]);
168  	} else {
169  		struct reg_window __user *win;
170  		win = (struct reg_window __user *)(fp + STACK_BIAS);
171  		get_user(value, &win->locals[reg - 16]);
172  	}
173  	return value;
174  }
175  
__fetch_reg_addr_user(unsigned int reg,struct pt_regs * regs)176  static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
177  							  struct pt_regs *regs)
178  {
179  	unsigned long fp = regs->u_regs[UREG_FP];
180  
181  	BUG_ON(reg < 16);
182  	BUG_ON(regs->tstate & TSTATE_PRIV);
183  
184  	if (!test_thread_64bit_stack(fp)) {
185  		struct reg_window32 __user *win32;
186  		win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
187  		return (unsigned long __user *)&win32->locals[reg - 16];
188  	} else {
189  		struct reg_window __user *win;
190  		win = (struct reg_window __user *)(fp + STACK_BIAS);
191  		return &win->locals[reg - 16];
192  	}
193  }
194  
__fetch_reg_addr_kern(unsigned int reg,struct pt_regs * regs)195  static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
196  						   struct pt_regs *regs)
197  {
198  	BUG_ON(reg >= 16);
199  	BUG_ON(regs->tstate & TSTATE_PRIV);
200  
201  	return &regs->u_regs[reg];
202  }
203  
store_reg(struct pt_regs * regs,unsigned long val,unsigned long rd)204  static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
205  {
206  	if (rd < 16) {
207  		unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
208  
209  		*rd_kern = val;
210  	} else {
211  		unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
212  
213  		if (!test_thread_64bit_stack(regs->u_regs[UREG_FP]))
214  			__put_user((u32)val, (u32 __user *)rd_user);
215  		else
216  			__put_user(val, rd_user);
217  	}
218  }
219  
fpd_regval(struct fpustate * f,unsigned int insn_regnum)220  static inline unsigned long fpd_regval(struct fpustate *f,
221  				       unsigned int insn_regnum)
222  {
223  	insn_regnum = (((insn_regnum & 1) << 5) |
224  		       (insn_regnum & 0x1e));
225  
226  	return *(unsigned long *) &f->regs[insn_regnum];
227  }
228  
fpd_regaddr(struct fpustate * f,unsigned int insn_regnum)229  static inline unsigned long *fpd_regaddr(struct fpustate *f,
230  					 unsigned int insn_regnum)
231  {
232  	insn_regnum = (((insn_regnum & 1) << 5) |
233  		       (insn_regnum & 0x1e));
234  
235  	return (unsigned long *) &f->regs[insn_regnum];
236  }
237  
fps_regval(struct fpustate * f,unsigned int insn_regnum)238  static inline unsigned int fps_regval(struct fpustate *f,
239  				      unsigned int insn_regnum)
240  {
241  	return f->regs[insn_regnum];
242  }
243  
fps_regaddr(struct fpustate * f,unsigned int insn_regnum)244  static inline unsigned int *fps_regaddr(struct fpustate *f,
245  					unsigned int insn_regnum)
246  {
247  	return &f->regs[insn_regnum];
248  }
249  
250  struct edge_tab {
251  	u16 left, right;
252  };
253  static struct edge_tab edge8_tab[8] = {
254  	{ 0xff, 0x80 },
255  	{ 0x7f, 0xc0 },
256  	{ 0x3f, 0xe0 },
257  	{ 0x1f, 0xf0 },
258  	{ 0x0f, 0xf8 },
259  	{ 0x07, 0xfc },
260  	{ 0x03, 0xfe },
261  	{ 0x01, 0xff },
262  };
263  static struct edge_tab edge8_tab_l[8] = {
264  	{ 0xff, 0x01 },
265  	{ 0xfe, 0x03 },
266  	{ 0xfc, 0x07 },
267  	{ 0xf8, 0x0f },
268  	{ 0xf0, 0x1f },
269  	{ 0xe0, 0x3f },
270  	{ 0xc0, 0x7f },
271  	{ 0x80, 0xff },
272  };
273  static struct edge_tab edge16_tab[4] = {
274  	{ 0xf, 0x8 },
275  	{ 0x7, 0xc },
276  	{ 0x3, 0xe },
277  	{ 0x1, 0xf },
278  };
279  static struct edge_tab edge16_tab_l[4] = {
280  	{ 0xf, 0x1 },
281  	{ 0xe, 0x3 },
282  	{ 0xc, 0x7 },
283  	{ 0x8, 0xf },
284  };
285  static struct edge_tab edge32_tab[2] = {
286  	{ 0x3, 0x2 },
287  	{ 0x1, 0x3 },
288  };
289  static struct edge_tab edge32_tab_l[2] = {
290  	{ 0x3, 0x1 },
291  	{ 0x2, 0x3 },
292  };
293  
edge(struct pt_regs * regs,unsigned int insn,unsigned int opf)294  static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
295  {
296  	unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
297  	u16 left, right;
298  
299  	maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
300  	orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
301  	orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
302  
303  	if (test_thread_flag(TIF_32BIT)) {
304  		rs1 = rs1 & 0xffffffff;
305  		rs2 = rs2 & 0xffffffff;
306  	}
307  	switch (opf) {
308  	default:
309  	case EDGE8_OPF:
310  	case EDGE8N_OPF:
311  		left = edge8_tab[rs1 & 0x7].left;
312  		right = edge8_tab[rs2 & 0x7].right;
313  		break;
314  	case EDGE8L_OPF:
315  	case EDGE8LN_OPF:
316  		left = edge8_tab_l[rs1 & 0x7].left;
317  		right = edge8_tab_l[rs2 & 0x7].right;
318  		break;
319  
320  	case EDGE16_OPF:
321  	case EDGE16N_OPF:
322  		left = edge16_tab[(rs1 >> 1) & 0x3].left;
323  		right = edge16_tab[(rs2 >> 1) & 0x3].right;
324  		break;
325  
326  	case EDGE16L_OPF:
327  	case EDGE16LN_OPF:
328  		left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
329  		right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
330  		break;
331  
332  	case EDGE32_OPF:
333  	case EDGE32N_OPF:
334  		left = edge32_tab[(rs1 >> 2) & 0x1].left;
335  		right = edge32_tab[(rs2 >> 2) & 0x1].right;
336  		break;
337  
338  	case EDGE32L_OPF:
339  	case EDGE32LN_OPF:
340  		left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
341  		right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
342  		break;
343  	}
344  
345  	if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
346  		rd_val = right & left;
347  	else
348  		rd_val = left;
349  
350  	store_reg(regs, rd_val, RD(insn));
351  
352  	switch (opf) {
353  	case EDGE8_OPF:
354  	case EDGE8L_OPF:
355  	case EDGE16_OPF:
356  	case EDGE16L_OPF:
357  	case EDGE32_OPF:
358  	case EDGE32L_OPF: {
359  		unsigned long ccr, tstate;
360  
361  		__asm__ __volatile__("subcc	%1, %2, %%g0\n\t"
362  				     "rd	%%ccr, %0"
363  				     : "=r" (ccr)
364  				     : "r" (orig_rs1), "r" (orig_rs2)
365  				     : "cc");
366  		tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
367  		regs->tstate = tstate | (ccr << 32UL);
368  	}
369  	}
370  }
371  
array(struct pt_regs * regs,unsigned int insn,unsigned int opf)372  static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
373  {
374  	unsigned long rs1, rs2, rd_val;
375  	unsigned int bits, bits_mask;
376  
377  	maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
378  	rs1 = fetch_reg(RS1(insn), regs);
379  	rs2 = fetch_reg(RS2(insn), regs);
380  
381  	bits = (rs2 > 5 ? 5 : rs2);
382  	bits_mask = (1UL << bits) - 1UL;
383  
384  	rd_val = ((((rs1 >> 11) & 0x3) <<  0) |
385  		  (((rs1 >> 33) & 0x3) <<  2) |
386  		  (((rs1 >> 55) & 0x1) <<  4) |
387  		  (((rs1 >> 13) & 0xf) <<  5) |
388  		  (((rs1 >> 35) & 0xf) <<  9) |
389  		  (((rs1 >> 56) & 0xf) << 13) |
390  		  (((rs1 >> 17) & bits_mask) << 17) |
391  		  (((rs1 >> 39) & bits_mask) << (17 + bits)) |
392  		  (((rs1 >> 60) & 0xf)       << (17 + (2*bits))));
393  
394  	switch (opf) {
395  	case ARRAY16_OPF:
396  		rd_val <<= 1;
397  		break;
398  
399  	case ARRAY32_OPF:
400  		rd_val <<= 2;
401  	}
402  
403  	store_reg(regs, rd_val, RD(insn));
404  }
405  
bmask(struct pt_regs * regs,unsigned int insn)406  static void bmask(struct pt_regs *regs, unsigned int insn)
407  {
408  	unsigned long rs1, rs2, rd_val, gsr;
409  
410  	maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
411  	rs1 = fetch_reg(RS1(insn), regs);
412  	rs2 = fetch_reg(RS2(insn), regs);
413  	rd_val = rs1 + rs2;
414  
415  	store_reg(regs, rd_val, RD(insn));
416  
417  	gsr = current_thread_info()->gsr[0] & 0xffffffff;
418  	gsr |= rd_val << 32UL;
419  	current_thread_info()->gsr[0] = gsr;
420  }
421  
bshuffle(struct pt_regs * regs,unsigned int insn)422  static void bshuffle(struct pt_regs *regs, unsigned int insn)
423  {
424  	struct fpustate *f = FPUSTATE;
425  	unsigned long rs1, rs2, rd_val;
426  	unsigned long bmask, i;
427  
428  	bmask = current_thread_info()->gsr[0] >> 32UL;
429  
430  	rs1 = fpd_regval(f, RS1(insn));
431  	rs2 = fpd_regval(f, RS2(insn));
432  
433  	rd_val = 0UL;
434  	for (i = 0; i < 8; i++) {
435  		unsigned long which = (bmask >> (i * 4)) & 0xf;
436  		unsigned long byte;
437  
438  		if (which < 8)
439  			byte = (rs1 >> (which * 8)) & 0xff;
440  		else
441  			byte = (rs2 >> ((which-8)*8)) & 0xff;
442  		rd_val |= (byte << (i * 8));
443  	}
444  
445  	*fpd_regaddr(f, RD(insn)) = rd_val;
446  }
447  
pdist(struct pt_regs * regs,unsigned int insn)448  static void pdist(struct pt_regs *regs, unsigned int insn)
449  {
450  	struct fpustate *f = FPUSTATE;
451  	unsigned long rs1, rs2, *rd, rd_val;
452  	unsigned long i;
453  
454  	rs1 = fpd_regval(f, RS1(insn));
455  	rs2 = fpd_regval(f, RS2(insn));
456  	rd = fpd_regaddr(f, RD(insn));
457  
458  	rd_val = *rd;
459  
460  	for (i = 0; i < 8; i++) {
461  		s16 s1, s2;
462  
463  		s1 = (rs1 >> (56 - (i * 8))) & 0xff;
464  		s2 = (rs2 >> (56 - (i * 8))) & 0xff;
465  
466  		/* Absolute value of difference. */
467  		s1 -= s2;
468  		if (s1 < 0)
469  			s1 = ~s1 + 1;
470  
471  		rd_val += s1;
472  	}
473  
474  	*rd = rd_val;
475  }
476  
pformat(struct pt_regs * regs,unsigned int insn,unsigned int opf)477  static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
478  {
479  	struct fpustate *f = FPUSTATE;
480  	unsigned long rs1, rs2, gsr, scale, rd_val;
481  
482  	gsr = current_thread_info()->gsr[0];
483  	scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
484  	switch (opf) {
485  	case FPACK16_OPF: {
486  		unsigned long byte;
487  
488  		rs2 = fpd_regval(f, RS2(insn));
489  		rd_val = 0;
490  		for (byte = 0; byte < 4; byte++) {
491  			unsigned int val;
492  			s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
493  			int scaled = src << scale;
494  			int from_fixed = scaled >> 7;
495  
496  			val = ((from_fixed < 0) ?
497  			       0 :
498  			       (from_fixed > 255) ?
499  			       255 : from_fixed);
500  
501  			rd_val |= (val << (8 * byte));
502  		}
503  		*fps_regaddr(f, RD(insn)) = rd_val;
504  		break;
505  	}
506  
507  	case FPACK32_OPF: {
508  		unsigned long word;
509  
510  		rs1 = fpd_regval(f, RS1(insn));
511  		rs2 = fpd_regval(f, RS2(insn));
512  		rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
513  		for (word = 0; word < 2; word++) {
514  			unsigned long val;
515  			s32 src = (rs2 >> (word * 32UL));
516  			s64 scaled = src << scale;
517  			s64 from_fixed = scaled >> 23;
518  
519  			val = ((from_fixed < 0) ?
520  			       0 :
521  			       (from_fixed > 255) ?
522  			       255 : from_fixed);
523  
524  			rd_val |= (val << (32 * word));
525  		}
526  		*fpd_regaddr(f, RD(insn)) = rd_val;
527  		break;
528  	}
529  
530  	case FPACKFIX_OPF: {
531  		unsigned long word;
532  
533  		rs2 = fpd_regval(f, RS2(insn));
534  
535  		rd_val = 0;
536  		for (word = 0; word < 2; word++) {
537  			long val;
538  			s32 src = (rs2 >> (word * 32UL));
539  			s64 scaled = src << scale;
540  			s64 from_fixed = scaled >> 16;
541  
542  			val = ((from_fixed < -32768) ?
543  			       -32768 :
544  			       (from_fixed > 32767) ?
545  			       32767 : from_fixed);
546  
547  			rd_val |= ((val & 0xffff) << (word * 16));
548  		}
549  		*fps_regaddr(f, RD(insn)) = rd_val;
550  		break;
551  	}
552  
553  	case FEXPAND_OPF: {
554  		unsigned long byte;
555  
556  		rs2 = fps_regval(f, RS2(insn));
557  
558  		rd_val = 0;
559  		for (byte = 0; byte < 4; byte++) {
560  			unsigned long val;
561  			u8 src = (rs2 >> (byte * 8)) & 0xff;
562  
563  			val = src << 4;
564  
565  			rd_val |= (val << (byte * 16));
566  		}
567  		*fpd_regaddr(f, RD(insn)) = rd_val;
568  		break;
569  	}
570  
571  	case FPMERGE_OPF: {
572  		rs1 = fps_regval(f, RS1(insn));
573  		rs2 = fps_regval(f, RS2(insn));
574  
575  		rd_val = (((rs2 & 0x000000ff) <<  0) |
576  			  ((rs1 & 0x000000ff) <<  8) |
577  			  ((rs2 & 0x0000ff00) <<  8) |
578  			  ((rs1 & 0x0000ff00) << 16) |
579  			  ((rs2 & 0x00ff0000) << 16) |
580  			  ((rs1 & 0x00ff0000) << 24) |
581  			  ((rs2 & 0xff000000) << 24) |
582  			  ((rs1 & 0xff000000) << 32));
583  		*fpd_regaddr(f, RD(insn)) = rd_val;
584  		break;
585  	}
586  	}
587  }
588  
pmul(struct pt_regs * regs,unsigned int insn,unsigned int opf)589  static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
590  {
591  	struct fpustate *f = FPUSTATE;
592  	unsigned long rs1, rs2, rd_val;
593  
594  	switch (opf) {
595  	case FMUL8x16_OPF: {
596  		unsigned long byte;
597  
598  		rs1 = fps_regval(f, RS1(insn));
599  		rs2 = fpd_regval(f, RS2(insn));
600  
601  		rd_val = 0;
602  		for (byte = 0; byte < 4; byte++) {
603  			u16 src1 = (rs1 >> (byte *  8)) & 0x00ff;
604  			s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
605  			u32 prod = src1 * src2;
606  			u16 scaled = ((prod & 0x00ffff00) >> 8);
607  
608  			/* Round up.  */
609  			if (prod & 0x80)
610  				scaled++;
611  			rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
612  		}
613  
614  		*fpd_regaddr(f, RD(insn)) = rd_val;
615  		break;
616  	}
617  
618  	case FMUL8x16AU_OPF:
619  	case FMUL8x16AL_OPF: {
620  		unsigned long byte;
621  		s16 src2;
622  
623  		rs1 = fps_regval(f, RS1(insn));
624  		rs2 = fps_regval(f, RS2(insn));
625  
626  		rd_val = 0;
627  		src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0);
628  		for (byte = 0; byte < 4; byte++) {
629  			u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
630  			u32 prod = src1 * src2;
631  			u16 scaled = ((prod & 0x00ffff00) >> 8);
632  
633  			/* Round up.  */
634  			if (prod & 0x80)
635  				scaled++;
636  			rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
637  		}
638  
639  		*fpd_regaddr(f, RD(insn)) = rd_val;
640  		break;
641  	}
642  
643  	case FMUL8SUx16_OPF:
644  	case FMUL8ULx16_OPF: {
645  		unsigned long byte, ushift;
646  
647  		rs1 = fpd_regval(f, RS1(insn));
648  		rs2 = fpd_regval(f, RS2(insn));
649  
650  		rd_val = 0;
651  		ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
652  		for (byte = 0; byte < 4; byte++) {
653  			u16 src1;
654  			s16 src2;
655  			u32 prod;
656  			u16 scaled;
657  
658  			src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
659  			src2 = ((rs2 >> (16 * byte)) & 0xffff);
660  			prod = src1 * src2;
661  			scaled = ((prod & 0x00ffff00) >> 8);
662  
663  			/* Round up.  */
664  			if (prod & 0x80)
665  				scaled++;
666  			rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
667  		}
668  
669  		*fpd_regaddr(f, RD(insn)) = rd_val;
670  		break;
671  	}
672  
673  	case FMULD8SUx16_OPF:
674  	case FMULD8ULx16_OPF: {
675  		unsigned long byte, ushift;
676  
677  		rs1 = fps_regval(f, RS1(insn));
678  		rs2 = fps_regval(f, RS2(insn));
679  
680  		rd_val = 0;
681  		ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
682  		for (byte = 0; byte < 2; byte++) {
683  			u16 src1;
684  			s16 src2;
685  			u32 prod;
686  			u16 scaled;
687  
688  			src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
689  			src2 = ((rs2 >> (16 * byte)) & 0xffff);
690  			prod = src1 * src2;
691  			scaled = ((prod & 0x00ffff00) >> 8);
692  
693  			/* Round up.  */
694  			if (prod & 0x80)
695  				scaled++;
696  			rd_val |= ((scaled & 0xffffUL) <<
697  				   ((byte * 32UL) + 7UL));
698  		}
699  		*fpd_regaddr(f, RD(insn)) = rd_val;
700  		break;
701  	}
702  	}
703  }
704  
pcmp(struct pt_regs * regs,unsigned int insn,unsigned int opf)705  static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
706  {
707  	struct fpustate *f = FPUSTATE;
708  	unsigned long rs1, rs2, rd_val, i;
709  
710  	rs1 = fpd_regval(f, RS1(insn));
711  	rs2 = fpd_regval(f, RS2(insn));
712  
713  	rd_val = 0;
714  
715  	switch (opf) {
716  	case FCMPGT16_OPF:
717  		for (i = 0; i < 4; i++) {
718  			s16 a = (rs1 >> (i * 16)) & 0xffff;
719  			s16 b = (rs2 >> (i * 16)) & 0xffff;
720  
721  			if (a > b)
722  				rd_val |= 8 >> i;
723  		}
724  		break;
725  
726  	case FCMPGT32_OPF:
727  		for (i = 0; i < 2; i++) {
728  			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
729  			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
730  
731  			if (a > b)
732  				rd_val |= 2 >> i;
733  		}
734  		break;
735  
736  	case FCMPLE16_OPF:
737  		for (i = 0; i < 4; i++) {
738  			s16 a = (rs1 >> (i * 16)) & 0xffff;
739  			s16 b = (rs2 >> (i * 16)) & 0xffff;
740  
741  			if (a <= b)
742  				rd_val |= 8 >> i;
743  		}
744  		break;
745  
746  	case FCMPLE32_OPF:
747  		for (i = 0; i < 2; i++) {
748  			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
749  			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
750  
751  			if (a <= b)
752  				rd_val |= 2 >> i;
753  		}
754  		break;
755  
756  	case FCMPNE16_OPF:
757  		for (i = 0; i < 4; i++) {
758  			s16 a = (rs1 >> (i * 16)) & 0xffff;
759  			s16 b = (rs2 >> (i * 16)) & 0xffff;
760  
761  			if (a != b)
762  				rd_val |= 8 >> i;
763  		}
764  		break;
765  
766  	case FCMPNE32_OPF:
767  		for (i = 0; i < 2; i++) {
768  			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
769  			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
770  
771  			if (a != b)
772  				rd_val |= 2 >> i;
773  		}
774  		break;
775  
776  	case FCMPEQ16_OPF:
777  		for (i = 0; i < 4; i++) {
778  			s16 a = (rs1 >> (i * 16)) & 0xffff;
779  			s16 b = (rs2 >> (i * 16)) & 0xffff;
780  
781  			if (a == b)
782  				rd_val |= 8 >> i;
783  		}
784  		break;
785  
786  	case FCMPEQ32_OPF:
787  		for (i = 0; i < 2; i++) {
788  			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
789  			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
790  
791  			if (a == b)
792  				rd_val |= 2 >> i;
793  		}
794  		break;
795  	}
796  
797  	maybe_flush_windows(0, 0, RD(insn), 0);
798  	store_reg(regs, rd_val, RD(insn));
799  }
800  
801  /* Emulate the VIS instructions which are not implemented in
802   * hardware on Niagara.
803   */
vis_emul(struct pt_regs * regs,unsigned int insn)804  int vis_emul(struct pt_regs *regs, unsigned int insn)
805  {
806  	unsigned long pc = regs->tpc;
807  	unsigned int opf;
808  
809  	BUG_ON(regs->tstate & TSTATE_PRIV);
810  
811  	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
812  
813  	if (test_thread_flag(TIF_32BIT))
814  		pc = (u32)pc;
815  
816  	if (get_user(insn, (u32 __user *) pc))
817  		return -EFAULT;
818  
819  	save_and_clear_fpu();
820  
821  	opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
822  	switch (opf) {
823  	default:
824  		return -EINVAL;
825  
826  	/* Pixel Formatting Instructions.  */
827  	case FPACK16_OPF:
828  	case FPACK32_OPF:
829  	case FPACKFIX_OPF:
830  	case FEXPAND_OPF:
831  	case FPMERGE_OPF:
832  		pformat(regs, insn, opf);
833  		break;
834  
835  	/* Partitioned Multiply Instructions  */
836  	case FMUL8x16_OPF:
837  	case FMUL8x16AU_OPF:
838  	case FMUL8x16AL_OPF:
839  	case FMUL8SUx16_OPF:
840  	case FMUL8ULx16_OPF:
841  	case FMULD8SUx16_OPF:
842  	case FMULD8ULx16_OPF:
843  		pmul(regs, insn, opf);
844  		break;
845  
846  	/* Pixel Compare Instructions  */
847  	case FCMPGT16_OPF:
848  	case FCMPGT32_OPF:
849  	case FCMPLE16_OPF:
850  	case FCMPLE32_OPF:
851  	case FCMPNE16_OPF:
852  	case FCMPNE32_OPF:
853  	case FCMPEQ16_OPF:
854  	case FCMPEQ32_OPF:
855  		pcmp(regs, insn, opf);
856  		break;
857  
858  	/* Edge Handling Instructions  */
859  	case EDGE8_OPF:
860  	case EDGE8N_OPF:
861  	case EDGE8L_OPF:
862  	case EDGE8LN_OPF:
863  	case EDGE16_OPF:
864  	case EDGE16N_OPF:
865  	case EDGE16L_OPF:
866  	case EDGE16LN_OPF:
867  	case EDGE32_OPF:
868  	case EDGE32N_OPF:
869  	case EDGE32L_OPF:
870  	case EDGE32LN_OPF:
871  		edge(regs, insn, opf);
872  		break;
873  
874  	/* Pixel Component Distance  */
875  	case PDIST_OPF:
876  		pdist(regs, insn);
877  		break;
878  
879  	/* Three-Dimensional Array Addressing Instructions  */
880  	case ARRAY8_OPF:
881  	case ARRAY16_OPF:
882  	case ARRAY32_OPF:
883  		array(regs, insn, opf);
884  		break;
885  
886  	/* Byte Mask and Shuffle Instructions  */
887  	case BMASK_OPF:
888  		bmask(regs, insn);
889  		break;
890  
891  	case BSHUFFLE_OPF:
892  		bshuffle(regs, insn);
893  		break;
894  	}
895  
896  	regs->tpc = regs->tnpc;
897  	regs->tnpc += 4;
898  	return 0;
899  }
900