xref: /openbmc/qemu/tests/tcg/hexagon/usr.c (revision ab1b2ba9)
1 /*
2  *  Copyright(c) 2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 /*
19  * Test instructions that might set bits in user status register (USR)
20  */
21 
22 #include <stdio.h>
23 #include <stdint.h>
24 
25 int err;
26 
27 static void __check(int line, uint32_t val, uint32_t expect)
28 {
29     if (val != expect) {
30         printf("ERROR at line %d: %d != %d\n", line, val, expect);
31         err++;
32     }
33 }
34 
35 #define check(RES, EXP) __check(__LINE__, RES, EXP)
36 
37 static void __check32(int line, uint32_t val, uint32_t expect)
38 {
39     if (val != expect) {
40         printf("ERROR at line %d: 0x%08x != 0x%08x\n", line, val, expect);
41         err++;
42     }
43 }
44 
45 #define check32(RES, EXP) __check32(__LINE__, RES, EXP)
46 
47 static void __check64(int line, uint64_t val, uint64_t expect)
48 {
49     if (val != expect) {
50         printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", line, val, expect);
51         err++;
52     }
53 }
54 
55 #define check64(RES, EXP) __check64(__LINE__, RES, EXP)
56 
57 /*
58  * Some of the instructions tested are only available on certain versions
59  * of the Hexagon core
60  */
61 #define CORE_HAS_AUDIO    (__HEXAGON_ARCH__ >= 67 && defined(__HEXAGON_AUDIO__))
62 #define CORE_IS_V67       (__HEXAGON_ARCH__ >= 67)
63 
64 /* Define the bits in Hexagon USR register */
65 #define USR_OVF_BIT          0        /* Sticky saturation overflow */
66 #define USR_FPINVF_BIT       1        /* IEEE FP invalid sticky flag */
67 #define USR_FPDBZF_BIT       2        /* IEEE FP divide-by-zero sticky flag */
68 #define USR_FPOVFF_BIT       3        /* IEEE FP overflow sticky flag */
69 #define USR_FPUNFF_BIT       4        /* IEEE FP underflow sticky flag */
70 #define USR_FPINPF_BIT       5        /* IEEE FP inexact sticky flag */
71 
72 /* Corresponding values in USR */
73 #define USR_CLEAR            0
74 #define USR_OVF              (1 << USR_OVF_BIT)
75 #define USR_FPINVF           (1 << USR_FPINVF_BIT)
76 #define USR_FPDBZF           (1 << USR_FPDBZF_BIT)
77 #define USR_FPOVFF           (1 << USR_FPOVFF_BIT)
78 #define USR_FPUNFF           (1 << USR_FPUNFF_BIT)
79 #define USR_FPINPF           (1 << USR_FPINPF_BIT)
80 
81 /* Some useful floating point values */
82 const uint32_t SF_INF =              0x7f800000;
83 const uint32_t SF_QNaN =             0x7fc00000;
84 const uint32_t SF_SNaN =             0x7fb00000;
85 const uint32_t SF_QNaN_neg =         0xffc00000;
86 const uint32_t SF_SNaN_neg =         0xffb00000;
87 const uint32_t SF_HEX_NaN =          0xffffffff;
88 const uint32_t SF_zero =             0x00000000;
89 const uint32_t SF_zero_neg =         0x80000000;
90 const uint32_t SF_one =              0x3f800000;
91 const uint32_t SF_one_recip =        0x3f7f0001;         /* 0.9960...  */
92 const uint32_t SF_one_invsqrta =     0x3f7f0000;         /* 0.99609375 */
93 const uint32_t SF_two =              0x40000000;
94 const uint32_t SF_four =             0x40800000;
95 const uint32_t SF_small_neg =        0xab98fba8;
96 const uint32_t SF_large_pos =        0x5afa572e;
97 
98 const uint64_t DF_QNaN =             0x7ff8000000000000ULL;
99 const uint64_t DF_SNaN =             0x7ff7000000000000ULL;
100 const uint64_t DF_QNaN_neg =         0xfff8000000000000ULL;
101 const uint64_t DF_SNaN_neg =         0xfff7000000000000ULL;
102 const uint64_t DF_HEX_NaN =          0xffffffffffffffffULL;
103 const uint64_t DF_zero =             0x0000000000000000ULL;
104 const uint64_t DF_zero_neg =         0x8000000000000000ULL;
105 const uint64_t DF_any =              0x3f80000000000000ULL;
106 const uint64_t DF_one =              0x3ff0000000000000ULL;
107 const uint64_t DF_one_hh =           0x3ff001ff80000000ULL;     /* 1.00048... */
108 const uint64_t DF_small_neg =        0xbd731f7500000000ULL;
109 const uint64_t DF_large_pos =        0x7f80000000000001ULL;
110 
111 /*
112  * Templates for functions to execute an instruction
113  *
114  * The templates vary by the number of arguments and the types of the args
115  * and result.  We use one letter in the macro name for the result and each
116  * argument:
117  *     x             unknown (specified in a subsequent template) or don't care
118  *     R             register (32 bits)
119  *     P             pair (64 bits)
120  *     p             predicate
121  *     I             immediate
122  *     Xx            read/write
123  */
124 
125 /* Clear bits 0-5 in USR */
126 #define CLEAR_USRBITS \
127     "r2 = usr\n\t" \
128     "r2 = and(r2, #0xffffffc0)\n\t" \
129     "usr = r2\n\t"
130 
131 /* Template for instructions with one register operand */
132 #define FUNC_x_OP_x(RESTYPE, SRCTYPE, NAME, INSN) \
133 static RESTYPE NAME(SRCTYPE src, uint32_t *usr_result) \
134 { \
135     RESTYPE result; \
136     uint32_t usr; \
137     asm(CLEAR_USRBITS \
138         INSN  "\n\t" \
139         "%1 = usr\n\t" \
140         : "=r"(result), "=r"(usr) \
141         : "r"(src) \
142         : "r2", "usr"); \
143       *usr_result = usr & 0x3f; \
144       return result; \
145 }
146 
147 #define FUNC_R_OP_R(NAME, INSN) \
148 FUNC_x_OP_x(uint32_t, uint32_t, NAME, INSN)
149 
150 #define FUNC_R_OP_P(NAME, INSN) \
151 FUNC_x_OP_x(uint32_t, uint64_t, NAME, INSN)
152 
153 #define FUNC_P_OP_P(NAME, INSN) \
154 FUNC_x_OP_x(uint64_t, uint64_t, NAME, INSN)
155 
156 #define FUNC_P_OP_R(NAME, INSN) \
157 FUNC_x_OP_x(uint64_t, uint32_t, NAME, INSN)
158 
159 /*
160  * Template for instructions with a register and predicate result
161  * and one register operand
162  */
163 #define FUNC_xp_OP_x(RESTYPE, SRCTYPE, NAME, INSN) \
164 static RESTYPE NAME(SRCTYPE src, uint8_t *pred_result, uint32_t *usr_result) \
165 { \
166     RESTYPE result; \
167     uint8_t pred; \
168     uint32_t usr; \
169     asm(CLEAR_USRBITS \
170         INSN  "\n\t" \
171         "%1 = p2\n\t" \
172         "%2 = usr\n\t" \
173         : "=r"(result), "=r"(pred), "=r"(usr) \
174         : "r"(src) \
175         : "r2", "p2", "usr"); \
176     *pred_result = pred; \
177     *usr_result = usr & 0x3f; \
178     return result; \
179 }
180 
181 #define FUNC_Rp_OP_R(NAME, INSN) \
182 FUNC_xp_OP_x(uint32_t, uint32_t, NAME, INSN)
183 
184 /* Template for instructions with two register operands */
185 #define FUNC_x_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
186 static RESTYPE NAME(SRC1TYPE src1, SRC2TYPE src2, uint32_t *usr_result) \
187 { \
188     RESTYPE result; \
189     uint32_t usr; \
190     asm(CLEAR_USRBITS \
191         INSN "\n\t" \
192         "%1 = usr\n\t" \
193         : "=r"(result), "=r"(usr) \
194         : "r"(src1), "r"(src2) \
195         : "r2", "usr"); \
196     *usr_result = usr & 0x3f; \
197     return result; \
198 }
199 
200 #define FUNC_P_OP_PP(NAME, INSN) \
201 FUNC_x_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN)
202 
203 #define FUNC_R_OP_PP(NAME, INSN) \
204 FUNC_x_OP_xx(uint32_t, uint64_t, uint64_t, NAME, INSN)
205 
206 #define FUNC_P_OP_RR(NAME, INSN) \
207 FUNC_x_OP_xx(uint64_t, uint32_t, uint32_t, NAME, INSN)
208 
209 #define FUNC_R_OP_RR(NAME, INSN) \
210 FUNC_x_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN)
211 
212 #define FUNC_R_OP_PR(NAME, INSN) \
213 FUNC_x_OP_xx(uint32_t, uint64_t, uint32_t, NAME, INSN)
214 
215 #define FUNC_P_OP_PR(NAME, INSN) \
216 FUNC_x_OP_xx(uint64_t, uint64_t, uint32_t, NAME, INSN)
217 
218 /*
219  * Template for instructions with a register and predicate result
220  * and two register operands
221  */
222 #define FUNC_xp_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
223 static RESTYPE NAME(SRC1TYPE src1, SRC2TYPE src2, \
224                     uint8_t *pred_result, uint32_t *usr_result) \
225 { \
226     RESTYPE result; \
227     uint8_t pred; \
228     uint32_t usr; \
229     asm(CLEAR_USRBITS \
230         INSN  "\n\t" \
231         "%1 = p2\n\t" \
232         "%2 = usr\n\t" \
233         : "=r"(result), "=r"(pred), "=r"(usr) \
234         : "r"(src1), "r"(src2) \
235         : "r2", "p2", "usr"); \
236     *pred_result = pred; \
237     *usr_result = usr & 0x3f; \
238     return result; \
239 }
240 
241 #define FUNC_Rp_OP_RR(NAME, INSN) \
242 FUNC_xp_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN)
243 
244 /* Template for instructions with one register and one immediate */
245 #define FUNC_x_OP_xI(RESTYPE, SRC1TYPE, NAME, INSN) \
246 static RESTYPE NAME(SRC1TYPE src1, int32_t src2, uint32_t *usr_result) \
247 { \
248     RESTYPE result; \
249     uint32_t usr; \
250     asm(CLEAR_USRBITS \
251         INSN "\n\t" \
252         "%1 = usr\n\t" \
253         : "=r"(result), "=r"(usr) \
254         : "r"(src1), "i"(src2) \
255         : "r2", "usr"); \
256     *usr_result = usr & 0x3f; \
257     return result; \
258 }
259 
260 #define FUNC_R_OP_RI(NAME, INSN) \
261 FUNC_x_OP_xI(uint32_t, uint32_t, NAME, INSN)
262 
263 #define FUNC_R_OP_PI(NAME, INSN) \
264 FUNC_x_OP_xI(uint32_t, uint64_t, NAME, INSN)
265 
266 /*
267  * Template for instructions with a read/write result
268  * and two register operands
269  */
270 #define FUNC_Xx_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
271 static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, \
272                     uint32_t *usr_result) \
273 { \
274     uint32_t usr; \
275     asm(CLEAR_USRBITS \
276         INSN "\n\t" \
277         "%1 = usr\n\t" \
278         : "+r"(result), "=r"(usr) \
279         : "r"(src1), "r"(src2) \
280         : "r2", "usr"); \
281     *usr_result = usr & 0x3f; \
282     return result; \
283 }
284 
285 #define FUNC_XR_OP_RR(NAME, INSN) \
286 FUNC_Xx_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN)
287 
288 #define FUNC_XP_OP_PP(NAME, INSN) \
289 FUNC_Xx_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN)
290 
291 #define FUNC_XP_OP_RR(NAME, INSN) \
292 FUNC_Xx_OP_xx(uint64_t, uint32_t, uint32_t, NAME, INSN)
293 
294 /*
295  * Template for instructions with a read/write result
296  * and two register operands
297  */
298 #define FUNC_Xxp_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
299 static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, \
300                     uint8_t *pred_result, uint32_t *usr_result) \
301 { \
302     uint32_t usr; \
303     uint8_t pred; \
304     asm(CLEAR_USRBITS \
305         INSN "\n\t" \
306         "%1 = p2\n\t" \
307         "%2 = usr\n\t" \
308         : "+r"(result), "=r"(pred), "=r"(usr) \
309         : "r"(src1), "r"(src2) \
310         : "r2", "usr"); \
311     *pred_result = pred; \
312     *usr_result = usr & 0x3f; \
313     return result; \
314 }
315 
316 #define FUNC_XPp_OP_PP(NAME, INSN) \
317 FUNC_Xxp_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN)
318 
319 /*
320  * Template for instructions with a read/write result and
321  * two register and one predicate operands
322  */
323 #define FUNC_Xx_OP_xxp(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
324 static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, uint8_t pred,\
325                     uint32_t *usr_result) \
326 { \
327     uint32_t usr; \
328     asm(CLEAR_USRBITS \
329         "p2 = %4\n\t" \
330         INSN "\n\t" \
331         "%1 = usr\n\t" \
332         : "+r"(result), "=r"(usr) \
333         : "r"(src1), "r"(src2), "r"(pred) \
334         : "r2", "p2", "usr"); \
335     *usr_result = usr & 0x3f; \
336     return result; \
337 }
338 
339 #define FUNC_XR_OP_RRp(NAME, INSN) \
340 FUNC_Xx_OP_xxp(uint32_t, uint32_t, uint32_t, NAME, INSN)
341 
342 /* Template for compare instructions with two register operands */
343 #define FUNC_CMP_xx(SRC1TYPE, SRC2TYPE, NAME, INSN) \
344 static uint32_t NAME(SRC1TYPE src1, SRC2TYPE src2, uint32_t *usr_result) \
345 { \
346     uint32_t result; \
347     uint32_t usr; \
348     asm(CLEAR_USRBITS \
349         INSN "\n\t" \
350         "%0 = p1\n\t" \
351         "%1 = usr\n\t" \
352         : "=r"(result), "=r"(usr) \
353         : "r"(src1), "r"(src2) \
354         : "p1", "r2", "usr"); \
355     *usr_result = usr & 0x3f; \
356     return result; \
357 }
358 
359 #define FUNC_CMP_RR(NAME, INSN) \
360 FUNC_CMP_xx(uint32_t, uint32_t, NAME, INSN)
361 
362 #define FUNC_CMP_PP(NAME, INSN) \
363 FUNC_CMP_xx(uint64_t, uint64_t, NAME, INSN)
364 
365 /*
366  * Function declarations using the templates
367  */
368 FUNC_R_OP_R(satub,              "%0 = satub(%2)")
369 FUNC_P_OP_PP(vaddubs,           "%0 = vaddub(%2, %3):sat")
370 FUNC_P_OP_PP(vadduhs,           "%0 = vadduh(%2, %3):sat")
371 FUNC_P_OP_PP(vsububs,           "%0 = vsubub(%2, %3):sat")
372 FUNC_P_OP_PP(vsubuhs,           "%0 = vsubuh(%2, %3):sat")
373 
374 /* Add vector of half integers with saturation and pack to unsigned bytes */
375 FUNC_R_OP_PP(vaddhubs,          "%0 = vaddhub(%2, %3):sat")
376 
377 /* Vector saturate half to unsigned byte */
378 FUNC_R_OP_P(vsathub,            "%0 = vsathub(%2)")
379 
380 /* Similar to above but takes a 32-bit argument */
381 FUNC_R_OP_R(svsathub,           "%0 = vsathub(%2)")
382 
383 /* Vector saturate word to unsigned half */
384 FUNC_P_OP_P(vsatwuh_nopack,     "%0 = vsatwuh(%2)")
385 
386 /* Similar to above but returns a 32-bit result */
387 FUNC_R_OP_P(vsatwuh,            "%0 = vsatwuh(%2)")
388 
389 /* Vector arithmetic shift halfwords with saturate and pack */
390 FUNC_R_OP_PI(asrhub_sat,        "%0 = vasrhub(%2, #%3):sat")
391 
392 /* Vector arithmetic shift halfwords with round, saturate and pack */
393 FUNC_R_OP_PI(asrhub_rnd_sat,    "%0 = vasrhub(%2, #%3):raw")
394 
395 FUNC_R_OP_RR(addsat,            "%0 = add(%2, %3):sat")
396 /* Similar to above but with register pairs */
397 FUNC_P_OP_PP(addpsat,           "%0 = add(%2, %3):sat")
398 
399 FUNC_XR_OP_RR(mpy_acc_sat_hh_s0, "%0 += mpy(%2.H, %3.H):sat")
400 FUNC_R_OP_RR(mpy_sat_hh_s1,     "%0 = mpy(%2.H, %3.H):<<1:sat")
401 FUNC_R_OP_RR(mpy_sat_rnd_hh_s1, "%0 = mpy(%2.H, %3.H):<<1:rnd:sat")
402 FUNC_R_OP_RR(mpy_up_s1_sat,     "%0 = mpy(%2, %3):<<1:sat")
403 FUNC_P_OP_RR(vmpy2s_s1,         "%0 = vmpyh(%2, %3):<<1:sat")
404 FUNC_P_OP_RR(vmpy2su_s1,        "%0 = vmpyhsu(%2, %3):<<1:sat")
405 FUNC_R_OP_RR(vmpy2s_s1pack,     "%0 = vmpyh(%2, %3):<<1:rnd:sat")
406 FUNC_P_OP_PP(vmpy2es_s1,        "%0 = vmpyeh(%2, %3):<<1:sat")
407 FUNC_R_OP_PP(vdmpyrs_s1,        "%0 = vdmpy(%2, %3):<<1:rnd:sat")
408 FUNC_XP_OP_PP(vdmacs_s0,        "%0 += vdmpy(%2, %3):sat")
409 FUNC_R_OP_RR(cmpyrs_s0,         "%0 = cmpy(%2, %3):rnd:sat")
410 FUNC_XP_OP_RR(cmacs_s0,         "%0 += cmpy(%2, %3):sat")
411 FUNC_XP_OP_RR(cnacs_s0,         "%0 -= cmpy(%2, %3):sat")
412 FUNC_P_OP_PP(vrcmpys_s1_h,      "%0 = vrcmpys(%2, %3):<<1:sat:raw:hi")
413 FUNC_XP_OP_PP(mmacls_s0,        "%0 += vmpyweh(%2, %3):sat")
414 FUNC_R_OP_RR(hmmpyl_rs1,        "%0 = mpy(%2, %3.L):<<1:rnd:sat")
415 FUNC_XP_OP_PP(mmaculs_s0,       "%0 += vmpyweuh(%2, %3):sat")
416 FUNC_R_OP_PR(cmpyi_wh,          "%0 = cmpyiwh(%2, %3):<<1:rnd:sat")
417 FUNC_P_OP_PP(vcmpy_s0_sat_i,    "%0 = vcmpyi(%2, %3):sat")
418 FUNC_P_OP_PR(vcrotate,          "%0 = vcrotate(%2, %3)")
419 FUNC_P_OP_PR(vcnegh,            "%0 = vcnegh(%2, %3)")
420 
421 #if CORE_HAS_AUDIO
422 FUNC_R_OP_PP(wcmpyrw,           "%0 = cmpyrw(%2, %3):<<1:sat")
423 #endif
424 
425 FUNC_R_OP_RR(addh_l16_sat_ll,   "%0 = add(%2.L, %3.L):sat")
426 FUNC_P_OP_P(vconj,              "%0 = vconj(%2):sat")
427 FUNC_P_OP_PP(vxaddsubw,         "%0 = vxaddsubw(%2, %3):sat")
428 FUNC_P_OP_P(vabshsat,           "%0 = vabsh(%2):sat")
429 FUNC_P_OP_PP(vnavgwr,           "%0 = vnavgw(%2, %3):rnd:sat")
430 FUNC_R_OP_RI(round_ri_sat,      "%0 = round(%2, #%3):sat")
431 FUNC_R_OP_RR(asr_r_r_sat,       "%0 = asr(%2, %3):sat")
432 
433 FUNC_XPp_OP_PP(ACS,             "%0, p2 = vacsh(%3, %4)")
434 
435 /* Floating point */
436 FUNC_R_OP_RR(sfmin,             "%0 = sfmin(%2, %3)")
437 FUNC_R_OP_RR(sfmax,             "%0 = sfmax(%2, %3)")
438 FUNC_R_OP_RR(sfadd,             "%0 = sfadd(%2, %3)")
439 FUNC_R_OP_RR(sfsub,             "%0 = sfsub(%2, %3)")
440 FUNC_R_OP_RR(sfmpy,             "%0 = sfmpy(%2, %3)")
441 FUNC_XR_OP_RR(sffma,            "%0 += sfmpy(%2, %3)")
442 FUNC_XR_OP_RR(sffms,            "%0 -= sfmpy(%2, %3)")
443 FUNC_CMP_RR(sfcmpuo,            "p1 = sfcmp.uo(%2, %3)")
444 FUNC_CMP_RR(sfcmpeq,            "p1 = sfcmp.eq(%2, %3)")
445 FUNC_CMP_RR(sfcmpgt,            "p1 = sfcmp.gt(%2, %3)")
446 FUNC_CMP_RR(sfcmpge,            "p1 = sfcmp.ge(%2, %3)")
447 
448 FUNC_P_OP_PP(dfadd,             "%0 = dfadd(%2, %3)")
449 FUNC_P_OP_PP(dfsub,             "%0 = dfsub(%2, %3)")
450 
451 #if CORE_IS_V67
452 FUNC_P_OP_PP(dfmin,             "%0 = dfmin(%2, %3)")
453 FUNC_P_OP_PP(dfmax,             "%0 = dfmax(%2, %3)")
454 FUNC_XP_OP_PP(dfmpyhh,          "%0 += dfmpyhh(%2, %3)")
455 #endif
456 
457 FUNC_CMP_PP(dfcmpuo,            "p1 = dfcmp.uo(%2, %3)")
458 FUNC_CMP_PP(dfcmpeq,            "p1 = dfcmp.eq(%2, %3)")
459 FUNC_CMP_PP(dfcmpgt,            "p1 = dfcmp.gt(%2, %3)")
460 FUNC_CMP_PP(dfcmpge,            "p1 = dfcmp.ge(%2, %3)")
461 
462 /* Conversions from sf */
463 FUNC_P_OP_R(conv_sf2df,         "%0 = convert_sf2df(%2)")
464 FUNC_R_OP_R(conv_sf2uw,         "%0 = convert_sf2uw(%2)")
465 FUNC_R_OP_R(conv_sf2w,          "%0 = convert_sf2w(%2)")
466 FUNC_P_OP_R(conv_sf2ud,         "%0 = convert_sf2ud(%2)")
467 FUNC_P_OP_R(conv_sf2d,          "%0 = convert_sf2d(%2)")
468 FUNC_R_OP_R(conv_sf2uw_chop,    "%0 = convert_sf2uw(%2):chop")
469 FUNC_R_OP_R(conv_sf2w_chop,     "%0 = convert_sf2w(%2):chop")
470 FUNC_P_OP_R(conv_sf2ud_chop,    "%0 = convert_sf2ud(%2):chop")
471 FUNC_P_OP_R(conv_sf2d_chop,     "%0 = convert_sf2d(%2):chop")
472 
473 /* Conversions from df */
474 FUNC_R_OP_P(conv_df2sf,         "%0 = convert_df2sf(%2)")
475 FUNC_R_OP_P(conv_df2uw,         "%0 = convert_df2uw(%2)")
476 FUNC_R_OP_P(conv_df2w,          "%0 = convert_df2w(%2)")
477 FUNC_P_OP_P(conv_df2ud,         "%0 = convert_df2ud(%2)")
478 FUNC_P_OP_P(conv_df2d,          "%0 = convert_df2d(%2)")
479 FUNC_R_OP_P(conv_df2uw_chop,    "%0 = convert_df2uw(%2):chop")
480 FUNC_R_OP_P(conv_df2w_chop,     "%0 = convert_df2w(%2):chop")
481 FUNC_P_OP_P(conv_df2ud_chop,    "%0 = convert_df2ud(%2):chop")
482 FUNC_P_OP_P(conv_df2d_chop,     "%0 = convert_df2d(%2):chop")
483 
484 /* Integer to float conversions */
485 FUNC_R_OP_R(conv_uw2sf,         "%0 = convert_uw2sf(%2)")
486 FUNC_R_OP_R(conv_w2sf,          "%0 = convert_w2sf(%2)")
487 FUNC_R_OP_P(conv_ud2sf,         "%0 = convert_ud2sf(%2)")
488 FUNC_R_OP_P(conv_d2sf,          "%0 = convert_d2sf(%2)")
489 
490 /* Special purpose floating point instructions */
491 FUNC_XR_OP_RRp(sffma_sc,        "%0 += sfmpy(%2, %3, p2):scale")
492 FUNC_Rp_OP_RR(sfrecipa,         "%0, p2 = sfrecipa(%3, %4)")
493 FUNC_R_OP_RR(sffixupn,          "%0 = sffixupn(%2, %3)")
494 FUNC_R_OP_RR(sffixupd,          "%0 = sffixupd(%2, %3)")
495 FUNC_R_OP_R(sffixupr,           "%0 = sffixupr(%2)")
496 FUNC_Rp_OP_R(sfinvsqrta,        "%0, p2 = sfinvsqrta(%3)")
497 
498 /*
499  * Templates for test cases
500  *
501  * Same naming convention as the function templates
502  */
503 #define TEST_x_OP_x(RESTYPE, CHECKFN, SRCTYPE, FUNC, SRC, RES, USR_RES) \
504     do { \
505         RESTYPE result; \
506         SRCTYPE src = SRC; \
507         uint32_t usr_result; \
508         result = FUNC(src, &usr_result); \
509         CHECKFN(result, RES); \
510         check(usr_result, USR_RES); \
511     } while (0)
512 
513 #define TEST_R_OP_R(FUNC, SRC, RES, USR_RES) \
514 TEST_x_OP_x(uint32_t, check32, uint32_t, FUNC, SRC, RES, USR_RES)
515 
516 #define TEST_R_OP_P(FUNC, SRC, RES, USR_RES) \
517 TEST_x_OP_x(uint32_t, check32, uint64_t, FUNC, SRC, RES, USR_RES)
518 
519 #define TEST_P_OP_P(FUNC, SRC, RES, USR_RES) \
520 TEST_x_OP_x(uint64_t, check64, uint64_t, FUNC, SRC, RES, USR_RES)
521 
522 #define TEST_P_OP_R(FUNC, SRC, RES, USR_RES) \
523 TEST_x_OP_x(uint64_t, check64, uint32_t, FUNC, SRC, RES, USR_RES)
524 
525 #define TEST_xp_OP_x(RESTYPE, CHECKFN, SRCTYPE, FUNC, SRC, \
526                      RES, PRED_RES, USR_RES) \
527     do { \
528         RESTYPE result; \
529         SRCTYPE src = SRC; \
530         uint8_t pred_result; \
531         uint32_t usr_result; \
532         result = FUNC(src, &pred_result, &usr_result); \
533         CHECKFN(result, RES); \
534         check(pred_result, PRED_RES); \
535         check(usr_result, USR_RES); \
536     } while (0)
537 
538 #define TEST_Rp_OP_R(FUNC, SRC, RES, PRED_RES, USR_RES) \
539 TEST_xp_OP_x(uint32_t, check32, uint32_t, FUNC, SRC, RES, PRED_RES, USR_RES)
540 
541 #define TEST_x_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
542                      FUNC, SRC1, SRC2, RES, USR_RES) \
543     do { \
544         RESTYPE result; \
545         SRC1TYPE src1 = SRC1; \
546         SRC2TYPE src2 = SRC2; \
547         uint32_t usr_result; \
548         result = FUNC(src1, src2, &usr_result); \
549         CHECKFN(result, RES); \
550         check(usr_result, USR_RES); \
551     } while (0)
552 
553 #define TEST_P_OP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \
554 TEST_x_OP_xx(uint64_t, check64, uint64_t, uint64_t, \
555              FUNC, SRC1, SRC2, RES, USR_RES)
556 
557 #define TEST_R_OP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \
558 TEST_x_OP_xx(uint32_t, check32, uint64_t, uint64_t, \
559              FUNC, SRC1, SRC2, RES, USR_RES)
560 
561 #define TEST_P_OP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \
562 TEST_x_OP_xx(uint64_t, check64, uint32_t, uint32_t, \
563              FUNC, SRC1, SRC2, RES, USR_RES)
564 
565 #define TEST_R_OP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \
566 TEST_x_OP_xx(uint32_t, check32, uint32_t, uint32_t, \
567              FUNC, SRC1, SRC2, RES, USR_RES)
568 
569 #define TEST_R_OP_PR(FUNC, SRC1, SRC2, RES, USR_RES) \
570 TEST_x_OP_xx(uint32_t, check32, uint64_t, uint32_t, \
571              FUNC, SRC1, SRC2, RES, USR_RES)
572 
573 #define TEST_P_OP_PR(FUNC, SRC1, SRC2, RES, USR_RES) \
574 TEST_x_OP_xx(uint64_t, check64, uint64_t, uint32_t, \
575              FUNC, SRC1, SRC2, RES, USR_RES)
576 
577 #define TEST_xp_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, FUNC, SRC1, SRC2, \
578                       RES, PRED_RES, USR_RES) \
579     do { \
580         RESTYPE result; \
581         SRC1TYPE src1 = SRC1; \
582         SRC2TYPE src2 = SRC2; \
583         uint8_t pred_result; \
584         uint32_t usr_result; \
585         result = FUNC(src1, src2, &pred_result, &usr_result); \
586         CHECKFN(result, RES); \
587         check(pred_result, PRED_RES); \
588         check(usr_result, USR_RES); \
589     } while (0)
590 
591 #define TEST_Rp_OP_RR(FUNC, SRC1, SRC2, RES, PRED_RES, USR_RES) \
592 TEST_xp_OP_xx(uint32_t, check32, uint32_t, uint32_t, FUNC, SRC1, SRC2, \
593               RES, PRED_RES, USR_RES)
594 
595 #define TEST_x_OP_xI(RESTYPE, CHECKFN, SRC1TYPE, \
596                      FUNC, SRC1, SRC2, RES, USR_RES) \
597     do { \
598         RESTYPE result; \
599         SRC1TYPE src1 = SRC1; \
600         uint32_t src2 = SRC2; \
601         uint32_t usr_result; \
602         result = FUNC(src1, src2, &usr_result); \
603         CHECKFN(result, RES); \
604         check(usr_result, USR_RES); \
605     } while (0)
606 
607 #define TEST_R_OP_RI(FUNC, SRC1, SRC2, RES, USR_RES) \
608 TEST_x_OP_xI(uint32_t, check32, uint32_t, \
609              FUNC, SRC1, SRC2, RES, USR_RES)
610 
611 #define TEST_R_OP_PI(FUNC, SRC1, SRC2, RES, USR_RES) \
612 TEST_x_OP_xI(uint32_t, check64, uint64_t, \
613              FUNC, SRC1, SRC2, RES, USR_RES)
614 
615 #define TEST_Xx_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
616                       FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
617     do { \
618         RESTYPE result = RESIN; \
619         SRC1TYPE src1 = SRC1; \
620         SRC2TYPE src2 = SRC2; \
621         uint32_t usr_result; \
622         result = FUNC(result, src1, src2, &usr_result); \
623         CHECKFN(result, RES); \
624         check(usr_result, USR_RES); \
625     } while (0)
626 
627 #define TEST_XR_OP_RR(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
628 TEST_Xx_OP_xx(uint32_t, check32, uint32_t, uint32_t, \
629               FUNC, RESIN, SRC1, SRC2, RES, USR_RES)
630 
631 #define TEST_XP_OP_PP(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
632 TEST_Xx_OP_xx(uint64_t, check64, uint64_t, uint64_t, \
633               FUNC, RESIN, SRC1, SRC2, RES, USR_RES)
634 
635 #define TEST_XP_OP_RR(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
636 TEST_Xx_OP_xx(uint64_t, check64, uint32_t, uint32_t, \
637               FUNC, RESIN, SRC1, SRC2, RES, USR_RES)
638 
639 #define TEST_Xxp_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
640                        FUNC, RESIN, SRC1, SRC2, RES, PRED_RES, USR_RES) \
641     do { \
642         RESTYPE result = RESIN; \
643         SRC1TYPE src1 = SRC1; \
644         SRC2TYPE src2 = SRC2; \
645         uint8_t pred_res; \
646         uint32_t usr_result; \
647         result = FUNC(result, src1, src2, &pred_res, &usr_result); \
648         CHECKFN(result, RES); \
649         check(usr_result, USR_RES); \
650     } while (0)
651 
652 #define TEST_XPp_OP_PP(FUNC, RESIN, SRC1, SRC2, RES, PRED_RES, USR_RES) \
653 TEST_Xxp_OP_xx(uint64_t, check64, uint64_t, uint64_t, FUNC, RESIN, SRC1, SRC2, \
654                RES, PRED_RES, USR_RES)
655 
656 #define TEST_Xx_OP_xxp(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
657                       FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) \
658     do { \
659         RESTYPE result = RESIN; \
660         SRC1TYPE src1 = SRC1; \
661         SRC2TYPE src2 = SRC2; \
662         uint8_t pred = PRED; \
663         uint32_t usr_result; \
664         result = FUNC(result, src1, src2, pred, &usr_result); \
665         CHECKFN(result, RES); \
666         check(usr_result, USR_RES); \
667     } while (0)
668 
669 #define TEST_XR_OP_RRp(FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) \
670 TEST_Xx_OP_xxp(uint32_t, check32, uint32_t, uint32_t, \
671               FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES)
672 
673 #define TEST_CMP_xx(SRC1TYPE, SRC2TYPE, \
674                     FUNC, SRC1, SRC2, RES, USR_RES) \
675     do { \
676         uint32_t result; \
677         SRC1TYPE src1 = SRC1; \
678         SRC2TYPE src2 = SRC2; \
679         uint32_t usr_result; \
680         result = FUNC(src1, src2, &usr_result); \
681         check(result, RES); \
682         check(usr_result, USR_RES); \
683     } while (0)
684 
685 #define TEST_CMP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \
686 TEST_CMP_xx(uint32_t, uint32_t, FUNC, SRC1, SRC2, RES, USR_RES)
687 
688 #define TEST_CMP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \
689 TEST_CMP_xx(uint64_t, uint64_t, FUNC, SRC1, SRC2, RES, USR_RES)
690 
691 int main()
692 {
693     TEST_R_OP_R(satub,       0,         0,         USR_CLEAR);
694     TEST_R_OP_R(satub,       0xff,      0xff,      USR_CLEAR);
695     TEST_R_OP_R(satub,       0xfff,     0xff,      USR_OVF);
696     TEST_R_OP_R(satub,       -1,        0,         USR_OVF);
697 
698     TEST_P_OP_PP(vaddubs,    0xfeLL,    0x01LL,    0xffLL,    USR_CLEAR);
699     TEST_P_OP_PP(vaddubs,    0xffLL,    0xffLL,    0xffLL,    USR_OVF);
700 
701     TEST_P_OP_PP(vadduhs,    0xfffeLL,  0x1LL,     0xffffLL,  USR_CLEAR);
702     TEST_P_OP_PP(vadduhs,    0xffffLL,  0x1LL,     0xffffLL,  USR_OVF);
703 
704     TEST_P_OP_PP(vsububs, 0x0807060504030201LL, 0x0101010101010101LL,
705                  0x0706050403020100LL, USR_CLEAR);
706     TEST_P_OP_PP(vsububs, 0x0807060504030201LL, 0x0202020202020202LL,
707                  0x0605040302010000LL, USR_OVF);
708 
709     TEST_P_OP_PP(vsubuhs, 0x0004000300020001LL, 0x0001000100010001LL,
710                  0x0003000200010000LL, USR_CLEAR);
711     TEST_P_OP_PP(vsubuhs, 0x0004000300020001LL, 0x0002000200020002LL,
712                  0x0002000100000000LL, USR_OVF);
713 
714     TEST_R_OP_PP(vaddhubs, 0x0004000300020001LL, 0x0001000100010001LL,
715                  0x05040302, USR_CLEAR);
716     TEST_R_OP_PP(vaddhubs, 0x7fff000300020001LL, 0x0002000200020002LL,
717                  0xff050403, USR_OVF);
718 
719     TEST_R_OP_P(vsathub,         0x0001000300020001LL, 0x01030201, USR_CLEAR);
720     TEST_R_OP_P(vsathub,         0x010000700080ffffLL, 0xff708000, USR_OVF);
721 
722     TEST_R_OP_P(vsatwuh,         0x0000ffff00000001LL, 0xffff0001, USR_CLEAR);
723     TEST_R_OP_P(vsatwuh,         0x800000000000ffffLL, 0x0000ffff, USR_OVF);
724 
725     TEST_P_OP_P(vsatwuh_nopack,  0x0000ffff00000001LL, 0x0000ffff00000001LL,
726                 USR_CLEAR);
727     TEST_P_OP_P(vsatwuh_nopack,  0x800000000000ffffLL, 0x000000000000ffffLL,
728                 USR_OVF);
729 
730     TEST_R_OP_R(svsathub,        0x00020001,           0x0201,     USR_CLEAR);
731     TEST_R_OP_R(svsathub,        0x0080ffff,           0x8000,     USR_OVF);
732 
733     TEST_R_OP_PI(asrhub_sat,     0x004f003f002f001fLL, 3,    0x09070503,
734                  USR_CLEAR);
735     TEST_R_OP_PI(asrhub_sat,     0x004fffff8fff001fLL, 3,    0x09000003,
736                  USR_OVF);
737 
738     TEST_R_OP_PI(asrhub_rnd_sat, 0x004f003f002f001fLL, 2,    0x0a080604,
739                  USR_CLEAR);
740     TEST_R_OP_PI(asrhub_rnd_sat, 0x004fffff8fff001fLL, 2,    0x0a000004,
741                  USR_OVF);
742 
743     TEST_R_OP_RR(addsat,        1,              2,              3,
744                  USR_CLEAR);
745     TEST_R_OP_RR(addsat,        0x7fffffff,     0x00000010,     0x7fffffff,
746                  USR_OVF);
747     TEST_R_OP_RR(addsat,        0x80000000,     0x80000006,     0x80000000,
748                  USR_OVF);
749 
750     TEST_P_OP_PP(addpsat, 1LL, 2LL, 3LL, USR_CLEAR);
751     /* overflow to max positive */
752     TEST_P_OP_PP(addpsat, 0x7ffffffffffffff0LL, 0x0000000000000010LL,
753                  0x7fffffffffffffffLL, USR_OVF);
754     /* overflow to min negative */
755     TEST_P_OP_PP(addpsat, 0x8000000000000003LL, 0x8000000000000006LL,
756                  0x8000000000000000LL, USR_OVF);
757 
758     TEST_XR_OP_RR(mpy_acc_sat_hh_s0, 0x7fffffff, 0xffff0000, 0x11110000,
759                   0x7fffeeee, USR_CLEAR);
760     TEST_XR_OP_RR(mpy_acc_sat_hh_s0, 0x7fffffff, 0x7fff0000, 0x7fff0000,
761                   0x7fffffff, USR_OVF);
762 
763     TEST_R_OP_RR(mpy_sat_hh_s1,        0xffff0000, 0x11110000, 0xffffddde,
764                  USR_CLEAR);
765     TEST_R_OP_RR(mpy_sat_hh_s1,        0x7fff0000, 0x7fff0000, 0x7ffe0002,
766                  USR_CLEAR);
767     TEST_R_OP_RR(mpy_sat_hh_s1,        0x80000000, 0x80000000, 0x7fffffff,
768                  USR_OVF);
769 
770     TEST_R_OP_RR(mpy_sat_rnd_hh_s1,    0xffff0000, 0x11110000, 0x00005dde,
771                  USR_CLEAR);
772     TEST_R_OP_RR(mpy_sat_rnd_hh_s1,    0x7fff0000, 0x7fff0000, 0x7ffe8002,
773                  USR_CLEAR);
774     TEST_R_OP_RR(mpy_sat_rnd_hh_s1,    0x80000000, 0x80000000, 0x7fffffff,
775                  USR_OVF);
776 
777     TEST_R_OP_RR(mpy_up_s1_sat,        0xffff0000, 0x11110000, 0xffffddde,
778                  USR_CLEAR);
779     TEST_R_OP_RR(mpy_up_s1_sat,        0x7fff0000, 0x7fff0000, 0x7ffe0002,
780                  USR_CLEAR);
781     TEST_R_OP_RR(mpy_up_s1_sat,        0x80000000, 0x80000000, 0x7fffffff,
782                  USR_OVF);
783 
784     TEST_P_OP_RR(vmpy2s_s1,  0x7fff0000, 0x7fff0000, 0x7ffe000200000000LL,
785                  USR_CLEAR);
786     TEST_P_OP_RR(vmpy2s_s1,  0x80000000, 0x80000000, 0x7fffffff00000000LL,
787                  USR_OVF);
788 
789     TEST_P_OP_RR(vmpy2su_s1, 0x7fff0000, 0x7fff0000, 0x7ffe000200000000LL,
790                  USR_CLEAR);
791     TEST_P_OP_RR(vmpy2su_s1, 0xffffbd97, 0xffffffff, 0xfffe000280000000LL,
792                  USR_OVF);
793 
794     TEST_R_OP_RR(vmpy2s_s1pack,        0x7fff0000, 0x7fff0000, 0x7ffe0000,
795                  USR_CLEAR);
796     TEST_R_OP_RR(vmpy2s_s1pack,        0x80008000, 0x80008000, 0x7fff7fff,
797                  USR_OVF);
798 
799     TEST_P_OP_PP(vmpy2es_s1, 0x7fff7fff7fff7fffLL, 0x1fff1fff1fff1fffLL,
800                  0x1ffec0021ffec002LL, USR_CLEAR);
801     TEST_P_OP_PP(vmpy2es_s1, 0x8000800080008000LL, 0x8000800080008000LL,
802                  0x7fffffff7fffffffLL, USR_OVF);
803 
804     TEST_R_OP_PP(vdmpyrs_s1, 0x7fff7fff7fff7fffLL, 0x1fff1fff1fff1fffLL,
805                  0x3ffe3ffe, USR_CLEAR);
806     TEST_R_OP_PP(vdmpyrs_s1, 0x8000800080008000LL, 0x8000800080008000LL,
807                  0x7fff7fffLL, USR_OVF);
808 
809     TEST_XP_OP_PP(vdmacs_s0, 0x0fffffffULL, 0x00ff00ff00ff00ffLL,
810                   0x00ff00ff00ff00ffLL, 0x0001fc021001fc01LL, USR_CLEAR);
811     TEST_XP_OP_PP(vdmacs_s0, 0x01111111ULL, 0x8000800080001000LL,
812                   0x8000800080008000LL, 0x7fffffff39111111LL, USR_OVF);
813 
814     TEST_R_OP_RR(cmpyrs_s0,            0x7fff0000, 0x7fff0000, 0x0000c001,
815                  USR_CLEAR);
816     TEST_R_OP_RR(cmpyrs_s0,            0x80008000, 0x80008000, 0x7fff0000,
817                  USR_OVF);
818 
819     TEST_XP_OP_RR(cmacs_s0, 0x0fffffff, 0x7fff0000, 0x7fff0000,
820                   0x00000000d000fffeLL, USR_CLEAR);
821     TEST_XP_OP_RR(cmacs_s0, 0x0fff1111, 0x80008000, 0x80008000,
822                   0x7fffffff0fff1111LL, USR_OVF);
823 
824     TEST_XP_OP_RR(cnacs_s0, 0x000000108fffffffULL, 0x7fff0000, 0x7fff0000,
825                   0x00000010cfff0000ULL, USR_CLEAR);
826     TEST_XP_OP_RR(cnacs_s0, 0x000000108ff1111fULL, 0x00002001, 0x00007ffd,
827                   0x0000001080000000ULL, USR_OVF);
828 
829     TEST_P_OP_PP(vrcmpys_s1_h, 0x00ff00ff00ff00ffLL, 0x00ff00ff00ff00ffLL,
830                  0x0003f8040003f804LL, USR_CLEAR);
831     TEST_P_OP_PP(vrcmpys_s1_h, 0x8000800080008000LL, 0x8000800080008000LL,
832                  0x7fffffff7fffffffLL, USR_OVF);
833 
834     TEST_XP_OP_PP(mmacls_s0, 0x6fffffff, 0x00ff00ff00ff00ffLL,
835                   0x00ff00ff00ff00ffLL, 0x0000fe017000fe00LL, USR_CLEAR);
836     TEST_XP_OP_PP(mmacls_s0, 0x6f1111ff, 0x8000800080008000LL,
837                   0x1000100080008000LL, 0xf80008007fffffffLL, USR_OVF);
838 
839     TEST_R_OP_RR(hmmpyl_rs1,           0x7fff0000, 0x7fff0001, 0x0000fffe,
840                  USR_CLEAR);
841     TEST_R_OP_RR(hmmpyl_rs1,           0x80000000, 0x80008000, 0x7fffffff,
842                  USR_OVF);
843 
844     TEST_XP_OP_PP(mmaculs_s0, 0x000000007fffffffULL, 0xffff800080008000LL,
845                   0xffff800080008000LL, 0xffffc00040003fffLL, USR_CLEAR);
846     TEST_XP_OP_PP(mmaculs_s0, 0x000011107fffffffULL, 0x00ff00ff00ff00ffLL,
847                   0x00ff00ff001100ffLL, 0x00010f117fffffffLL, USR_OVF);
848 
849     TEST_R_OP_PR(cmpyi_wh, 0x7fff000000000000LL, 0x7fff0001, 0x0000fffe,
850                  USR_CLEAR);
851     TEST_R_OP_PR(cmpyi_wh, 0x8000000000000000LL, 0x80008000, 0x7fffffff,
852                  USR_OVF);
853 
854     TEST_P_OP_PP(vcmpy_s0_sat_i, 0x00ff00ff00ff00ffLL, 0x00ff00ff00ff00ffLL,
855                  0x0001fc020001fc02LL, USR_CLEAR);
856     TEST_P_OP_PP(vcmpy_s0_sat_i, 0x8000800080008000LL, 0x8000800080008000LL,
857                  0x7fffffff7fffffffLL, USR_OVF);
858 
859     TEST_P_OP_PR(vcrotate, 0x8000000000000000LL, 0x00000002,
860                  0x8000000000000000LL, USR_CLEAR);
861     TEST_P_OP_PR(vcrotate, 0x7fff80007fff8000LL, 0x00000001,
862                  0x7fff80007fff7fffLL, USR_OVF);
863 
864     TEST_P_OP_PR(vcnegh, 0x8000000000000000LL, 0x00000002,
865                  0x8000000000000000LL, USR_CLEAR);
866     TEST_P_OP_PR(vcnegh, 0x7fff80007fff8000LL, 0x00000001,
867                  0x7fff80007fff7fffLL, USR_OVF);
868 
869 #if CORE_HAS_AUDIO
870     TEST_R_OP_PP(wcmpyrw, 0x8765432101234567LL, 0x00000002ffffffffLL,
871                  0x00000001, USR_CLEAR);
872     TEST_R_OP_PP(wcmpyrw, 0x800000007fffffffLL, 0x000000ff7fffffffLL,
873                  0x7fffffff, USR_OVF);
874     TEST_R_OP_PP(wcmpyrw, 0x7fffffff80000000LL, 0x7fffffff000000ffLL,
875                  0x80000000, USR_OVF);
876 #else
877     printf("Audio instructions skipped\n");
878 #endif
879 
880     TEST_R_OP_RR(addh_l16_sat_ll,      0x0000ffff, 0x00000002, 0x00000001,
881                  USR_CLEAR);
882     TEST_R_OP_RR(addh_l16_sat_ll,      0x00007fff, 0x00000005, 0x00007fff,
883                  USR_OVF);
884     TEST_R_OP_RR(addh_l16_sat_ll,      0x00008000, 0x00008000, 0xffff8000,
885                  USR_OVF);
886 
887     TEST_P_OP_P(vconj, 0x0000ffff00000001LL, 0x0000ffff00000001LL, USR_CLEAR);
888     TEST_P_OP_P(vconj, 0x800000000000ffffLL, 0x7fff00000000ffffLL, USR_OVF);
889 
890     TEST_P_OP_PP(vxaddsubw, 0x8765432101234567LL, 0x00000002ffffffffLL,
891                  0x8765432201234569LL, USR_CLEAR);
892     TEST_P_OP_PP(vxaddsubw, 0x7fffffff7fffffffLL, 0xffffffffffffffffLL,
893                  0x7fffffff7ffffffeLL, USR_OVF);
894     TEST_P_OP_PP(vxaddsubw, 0x800000000fffffffLL, 0x0000000a00000008LL,
895                  0x8000000010000009LL, USR_OVF);
896 
897     TEST_P_OP_P(vabshsat, 0x0001000afffff800LL, 0x0001000a00010800LL,
898                 USR_CLEAR);
899     TEST_P_OP_P(vabshsat, 0x8000000b000c000aLL, 0x7fff000b000c000aLL,
900              USR_OVF);
901 
902     TEST_P_OP_PP(vnavgwr, 0x8765432101234567LL, 0x00000002ffffffffLL,
903                  0xc3b2a1900091a2b4LL, USR_CLEAR);
904     TEST_P_OP_PP(vnavgwr, 0x7fffffff8000000aLL, 0x80000000ffffffffLL,
905                  0x7fffffffc0000006LL, USR_OVF);
906 
907     TEST_R_OP_RI(round_ri_sat,         0x0000ffff, 2, 0x00004000, USR_CLEAR);
908     TEST_R_OP_RI(round_ri_sat,         0x7fffffff, 2, 0x1fffffff, USR_OVF);
909 
910     TEST_R_OP_RR(asr_r_r_sat,          0x0000ffff, 0x00000002, 0x00003fff,
911                  USR_CLEAR);
912     TEST_R_OP_RR(asr_r_r_sat,          0x00ffffff, 0xfffffff5, 0x7fffffff,
913                  USR_OVF);
914     TEST_R_OP_RR(asr_r_r_sat,          0x80000000, 0xfffffff5, 0x80000000,
915                  USR_OVF);
916 
917     TEST_XPp_OP_PP(ACS, 0x0004000300020001ULL, 0x0001000200030004ULL,
918                    0x0000000000000000ULL, 0x0004000300030004ULL, 0xf0,
919                    USR_CLEAR);
920     TEST_XPp_OP_PP(ACS, 0x0004000300020001ULL, 0x0001000200030004ULL,
921                    0x000affff000d0000ULL, 0x000e0003000f0004ULL, 0xcc,
922                    USR_CLEAR);
923     TEST_XPp_OP_PP(ACS, 0x00047fff00020001ULL, 0x00017fff00030004ULL,
924                   0x000a0fff000d0000ULL, 0x000e7fff000f0004ULL, 0xfc,
925                   USR_OVF);
926     TEST_XPp_OP_PP(ACS, 0x00047fff00020001ULL, 0x00017fff00030004ULL,
927                    0x000a0fff000d0000ULL, 0x000e7fff000f0004ULL, 0xf0,
928                    USR_OVF);
929 
930     /* Floating point */
931     TEST_R_OP_RR(sfmin,  SF_one,      SF_small_neg,   SF_small_neg, USR_CLEAR);
932     TEST_R_OP_RR(sfmin,  SF_one,      SF_SNaN,        SF_one,       USR_FPINVF);
933     TEST_R_OP_RR(sfmin,  SF_SNaN,     SF_one,         SF_one,       USR_FPINVF);
934     TEST_R_OP_RR(sfmin,  SF_one,      SF_QNaN,        SF_one,       USR_CLEAR);
935     TEST_R_OP_RR(sfmin,  SF_QNaN,     SF_one,         SF_one,       USR_CLEAR);
936     TEST_R_OP_RR(sfmin,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
937     TEST_R_OP_RR(sfmin,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
938     TEST_R_OP_RR(sfmin,  SF_zero,     SF_zero_neg,    SF_zero_neg,  USR_CLEAR);
939     TEST_R_OP_RR(sfmin,  SF_zero_neg, SF_zero,        SF_zero_neg,  USR_CLEAR);
940 
941     TEST_R_OP_RR(sfmax,  SF_one,      SF_small_neg,   SF_one,       USR_CLEAR);
942     TEST_R_OP_RR(sfmax,  SF_one,      SF_SNaN,        SF_one,       USR_FPINVF);
943     TEST_R_OP_RR(sfmax,  SF_SNaN,     SF_one,         SF_one,       USR_FPINVF);
944     TEST_R_OP_RR(sfmax,  SF_one,      SF_QNaN,        SF_one,       USR_CLEAR);
945     TEST_R_OP_RR(sfmax,  SF_QNaN,     SF_one,         SF_one,       USR_CLEAR);
946     TEST_R_OP_RR(sfmax,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
947     TEST_R_OP_RR(sfmax,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
948     TEST_R_OP_RR(sfmax,  SF_zero,     SF_zero_neg,    SF_zero,      USR_CLEAR);
949     TEST_R_OP_RR(sfmax,  SF_zero_neg, SF_zero,        SF_zero,      USR_CLEAR);
950 
951     TEST_R_OP_RR(sfadd,  SF_one,      SF_QNaN,        SF_HEX_NaN,   USR_CLEAR);
952     TEST_R_OP_RR(sfadd,  SF_one,      SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
953     TEST_R_OP_RR(sfadd,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
954     TEST_R_OP_RR(sfadd,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
955 
956     TEST_R_OP_RR(sfsub,  SF_one,      SF_QNaN,        SF_HEX_NaN,   USR_CLEAR);
957     TEST_R_OP_RR(sfsub,  SF_one,      SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
958     TEST_R_OP_RR(sfsub,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
959     TEST_R_OP_RR(sfsub,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
960 
961     TEST_R_OP_RR(sfmpy,  SF_one,      SF_QNaN,        SF_HEX_NaN,   USR_CLEAR);
962     TEST_R_OP_RR(sfmpy,  SF_one,      SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
963     TEST_R_OP_RR(sfmpy,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
964     TEST_R_OP_RR(sfmpy,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
965 
966     TEST_XR_OP_RR(sffma, SF_one,   SF_one,    SF_one,   SF_two,     USR_CLEAR);
967     TEST_XR_OP_RR(sffma, SF_zero,  SF_one,    SF_QNaN,  SF_HEX_NaN, USR_CLEAR);
968     TEST_XR_OP_RR(sffma, SF_zero,  SF_one,    SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
969     TEST_XR_OP_RR(sffma, SF_zero,  SF_QNaN,   SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
970     TEST_XR_OP_RR(sffma, SF_zero,  SF_SNaN,   SF_QNaN,  SF_HEX_NaN, USR_FPINVF);
971 
972     TEST_XR_OP_RR(sffms, SF_one,   SF_one,    SF_one,   SF_zero,    USR_CLEAR);
973     TEST_XR_OP_RR(sffms, SF_zero,  SF_one,    SF_QNaN,  SF_HEX_NaN, USR_CLEAR);
974     TEST_XR_OP_RR(sffms, SF_zero,  SF_one,    SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
975     TEST_XR_OP_RR(sffms, SF_zero,  SF_QNaN,   SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
976     TEST_XR_OP_RR(sffms, SF_zero,  SF_SNaN,   SF_QNaN,  SF_HEX_NaN, USR_FPINVF);
977 
978     TEST_CMP_RR(sfcmpuo, SF_one,      SF_large_pos,    0x00,    USR_CLEAR);
979     TEST_CMP_RR(sfcmpuo, SF_INF,      SF_large_pos,    0x00,    USR_CLEAR);
980     TEST_CMP_RR(sfcmpuo, SF_QNaN,     SF_large_pos,    0xff,    USR_CLEAR);
981     TEST_CMP_RR(sfcmpuo, SF_QNaN_neg, SF_large_pos,    0xff,    USR_CLEAR);
982     TEST_CMP_RR(sfcmpuo, SF_SNaN,     SF_large_pos,    0xff,    USR_FPINVF);
983     TEST_CMP_RR(sfcmpuo, SF_SNaN_neg, SF_large_pos,    0xff,    USR_FPINVF);
984     TEST_CMP_RR(sfcmpuo, SF_QNaN,     SF_QNaN,         0xff,    USR_CLEAR);
985     TEST_CMP_RR(sfcmpuo, SF_QNaN,     SF_SNaN,         0xff,    USR_FPINVF);
986 
987     TEST_CMP_RR(sfcmpeq, SF_one,      SF_QNaN,         0x00,    USR_CLEAR);
988     TEST_CMP_RR(sfcmpeq, SF_one,      SF_SNaN,         0x00,    USR_FPINVF);
989     TEST_CMP_RR(sfcmpgt, SF_one,      SF_QNaN,         0x00,    USR_CLEAR);
990     TEST_CMP_RR(sfcmpgt, SF_one,      SF_SNaN,         0x00,    USR_FPINVF);
991     TEST_CMP_RR(sfcmpge, SF_one,      SF_QNaN,         0x00,    USR_CLEAR);
992     TEST_CMP_RR(sfcmpge, SF_one,      SF_SNaN,         0x00,    USR_FPINVF);
993 
994     TEST_P_OP_PP(dfadd,  DF_any,    DF_QNaN,         DF_HEX_NaN,    USR_CLEAR);
995     TEST_P_OP_PP(dfadd,  DF_any,    DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
996     TEST_P_OP_PP(dfadd,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
997     TEST_P_OP_PP(dfadd,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
998 
999     TEST_P_OP_PP(dfsub,  DF_any,    DF_QNaN,         DF_HEX_NaN,    USR_CLEAR);
1000     TEST_P_OP_PP(dfsub,  DF_any,    DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1001     TEST_P_OP_PP(dfsub,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1002     TEST_P_OP_PP(dfsub,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
1003 
1004 #if CORE_IS_V67
1005     TEST_P_OP_PP(dfmin,  DF_any,    DF_small_neg,    DF_small_neg,  USR_CLEAR);
1006     TEST_P_OP_PP(dfmin,  DF_any,    DF_SNaN,         DF_any,        USR_FPINVF);
1007     TEST_P_OP_PP(dfmin,  DF_SNaN,   DF_any,          DF_any,        USR_FPINVF);
1008     TEST_P_OP_PP(dfmin,  DF_any,    DF_QNaN,         DF_any,        USR_CLEAR);
1009     TEST_P_OP_PP(dfmin,  DF_QNaN,   DF_any,          DF_any,        USR_CLEAR);
1010     TEST_P_OP_PP(dfmin,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
1011     TEST_P_OP_PP(dfmin,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1012     TEST_P_OP_PP(dfmin,  DF_zero,   DF_zero_neg,     DF_zero_neg,   USR_CLEAR);
1013     TEST_P_OP_PP(dfmin,  DF_zero_neg, DF_zero,       DF_zero_neg,   USR_CLEAR);
1014 
1015     TEST_P_OP_PP(dfmax,  DF_any,    DF_small_neg,    DF_any,        USR_CLEAR);
1016     TEST_P_OP_PP(dfmax,  DF_any,    DF_SNaN,         DF_any,        USR_FPINVF);
1017     TEST_P_OP_PP(dfmax,  DF_SNaN,   DF_any,          DF_any,        USR_FPINVF);
1018     TEST_P_OP_PP(dfmax,  DF_any,    DF_QNaN,         DF_any,        USR_CLEAR);
1019     TEST_P_OP_PP(dfmax,  DF_QNaN,   DF_any,          DF_any,        USR_CLEAR);
1020     TEST_P_OP_PP(dfmax,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
1021     TEST_P_OP_PP(dfmax,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1022     TEST_P_OP_PP(dfmax,  DF_zero,   DF_zero_neg,     DF_zero,       USR_CLEAR);
1023     TEST_P_OP_PP(dfmax,  DF_zero_neg, DF_zero,       DF_zero,       USR_CLEAR);
1024 
1025     TEST_XP_OP_PP(dfmpyhh, DF_one,   DF_one,  DF_one,   DF_one_hh,  USR_CLEAR);
1026     TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_any,  DF_QNaN,  DF_HEX_NaN, USR_CLEAR);
1027     TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_any,  DF_SNaN,  DF_HEX_NaN, USR_FPINVF);
1028     TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_QNaN, DF_SNaN,  DF_HEX_NaN, USR_FPINVF);
1029     TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_SNaN, DF_QNaN,  DF_HEX_NaN, USR_FPINVF);
1030 #else
1031     printf("v67 instructions skipped\n");
1032 #endif
1033 
1034     TEST_CMP_PP(dfcmpuo, DF_small_neg, DF_any,          0x00,    USR_CLEAR);
1035     TEST_CMP_PP(dfcmpuo, DF_large_pos, DF_any,          0x00,    USR_CLEAR);
1036     TEST_CMP_PP(dfcmpuo, DF_QNaN,      DF_any,          0xff,    USR_CLEAR);
1037     TEST_CMP_PP(dfcmpuo, DF_QNaN_neg,  DF_any,          0xff,    USR_CLEAR);
1038     TEST_CMP_PP(dfcmpuo, DF_SNaN,      DF_any,          0xff,    USR_FPINVF);
1039     TEST_CMP_PP(dfcmpuo, DF_SNaN_neg,  DF_any,          0xff,    USR_FPINVF);
1040     TEST_CMP_PP(dfcmpuo, DF_QNaN,      DF_QNaN,         0xff,    USR_CLEAR);
1041     TEST_CMP_PP(dfcmpuo, DF_QNaN,      DF_SNaN,         0xff,    USR_FPINVF);
1042 
1043     TEST_CMP_PP(dfcmpeq, DF_any,       DF_QNaN,         0x00,    USR_CLEAR);
1044     TEST_CMP_PP(dfcmpeq, DF_any,       DF_SNaN,         0x00,    USR_FPINVF);
1045     TEST_CMP_PP(dfcmpgt, DF_any,       DF_QNaN,         0x00,    USR_CLEAR);
1046     TEST_CMP_PP(dfcmpgt, DF_any,       DF_SNaN,         0x00,    USR_FPINVF);
1047     TEST_CMP_PP(dfcmpge, DF_any,       DF_QNaN,         0x00,    USR_CLEAR);
1048     TEST_CMP_PP(dfcmpge, DF_any,       DF_SNaN,         0x00,    USR_FPINVF);
1049 
1050     TEST_P_OP_R(conv_sf2df,       SF_QNaN,  DF_HEX_NaN,             USR_CLEAR);
1051     TEST_P_OP_R(conv_sf2df,       SF_SNaN,  DF_HEX_NaN,             USR_FPINVF);
1052     TEST_R_OP_R(conv_sf2uw,       SF_QNaN,  0xffffffff,             USR_FPINVF);
1053     TEST_R_OP_R(conv_sf2uw,       SF_SNaN,  0xffffffff,             USR_FPINVF);
1054     TEST_R_OP_R(conv_sf2w,        SF_QNaN,  0xffffffff,             USR_FPINVF);
1055     TEST_R_OP_R(conv_sf2w,        SF_SNaN,  0xffffffff,             USR_FPINVF);
1056     TEST_P_OP_R(conv_sf2ud,       SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1057     TEST_P_OP_R(conv_sf2ud,       SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1058     TEST_P_OP_R(conv_sf2d,        SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1059     TEST_P_OP_R(conv_sf2d,        SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1060     TEST_R_OP_R(conv_sf2uw_chop,  SF_QNaN,  0xffffffff,             USR_FPINVF);
1061     TEST_R_OP_R(conv_sf2uw_chop,  SF_SNaN,  0xffffffff,             USR_FPINVF);
1062     TEST_R_OP_R(conv_sf2w_chop,   SF_QNaN,  0xffffffff,             USR_FPINVF);
1063     TEST_R_OP_R(conv_sf2w_chop,   SF_SNaN,  0xffffffff,             USR_FPINVF);
1064     TEST_P_OP_R(conv_sf2ud_chop,  SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1065     TEST_P_OP_R(conv_sf2ud_chop,  SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1066     TEST_P_OP_R(conv_sf2d_chop,   SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1067     TEST_P_OP_R(conv_sf2d_chop,   SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1068 
1069     TEST_R_OP_P(conv_df2sf,       DF_QNaN,  SF_HEX_NaN,             USR_CLEAR);
1070     TEST_R_OP_P(conv_df2sf,       DF_SNaN,  SF_HEX_NaN,             USR_FPINVF);
1071     TEST_R_OP_P(conv_df2uw,       DF_QNaN,  0xffffffff,             USR_FPINVF);
1072     TEST_R_OP_P(conv_df2uw,       DF_SNaN,  0xffffffff,             USR_FPINVF);
1073     TEST_R_OP_P(conv_df2w,        DF_QNaN,  0xffffffff,             USR_FPINVF);
1074     TEST_R_OP_P(conv_df2w,        DF_SNaN,  0xffffffff,             USR_FPINVF);
1075     TEST_P_OP_P(conv_df2ud,       DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1076     TEST_P_OP_P(conv_df2ud,       DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1077     TEST_P_OP_P(conv_df2d,        DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1078     TEST_P_OP_P(conv_df2d,        DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1079     TEST_R_OP_P(conv_df2uw_chop,  DF_QNaN,  0xffffffff,             USR_FPINVF);
1080     TEST_R_OP_P(conv_df2uw_chop,  DF_SNaN,  0xffffffff,             USR_FPINVF);
1081 
1082     /* Test for typo in HELPER(conv_df2uw_chop) */
1083     TEST_R_OP_P(conv_df2uw_chop, 0xffffff7f00000001ULL, 0xffffffff, USR_FPINVF);
1084 
1085     TEST_R_OP_P(conv_df2w_chop,   DF_QNaN,  0xffffffff,             USR_FPINVF);
1086     TEST_R_OP_P(conv_df2w_chop,   DF_SNaN,  0xffffffff,             USR_FPINVF);
1087     TEST_P_OP_P(conv_df2ud_chop,  DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1088     TEST_P_OP_P(conv_df2ud_chop,  DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1089     TEST_P_OP_P(conv_df2d_chop,   DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1090     TEST_P_OP_P(conv_df2d_chop,   DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1091 
1092     TEST_R_OP_R(conv_uw2sf,    0x00000001,             SF_one,      USR_CLEAR);
1093     TEST_R_OP_R(conv_uw2sf,    0x010020a5,             0x4b801052,  USR_FPINPF);
1094     TEST_R_OP_R(conv_w2sf,     0x00000001,             SF_one,      USR_CLEAR);
1095     TEST_R_OP_R(conv_w2sf,     0x010020a5,             0x4b801052,  USR_FPINPF);
1096     TEST_R_OP_P(conv_ud2sf,    0x0000000000000001ULL,  SF_one,      USR_CLEAR);
1097     TEST_R_OP_P(conv_ud2sf,    0x00000000010020a5ULL,  0x4b801052,  USR_FPINPF);
1098     TEST_R_OP_P(conv_d2sf,     0x0000000000000001ULL,  SF_one,      USR_CLEAR);
1099     TEST_R_OP_P(conv_d2sf,     0x00000000010020a5ULL,  0x4b801052,  USR_FPINPF);
1100 
1101     TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_one,    SF_one,   1, SF_four,
1102                    USR_CLEAR);
1103     TEST_XR_OP_RRp(sffma_sc, SF_QNaN,  SF_one,    SF_one,   1, SF_HEX_NaN,
1104                    USR_CLEAR);
1105     TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_QNaN,   SF_one,   1, SF_HEX_NaN,
1106                    USR_CLEAR);
1107     TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_one,    SF_QNaN,  1, SF_HEX_NaN,
1108                    USR_CLEAR);
1109     TEST_XR_OP_RRp(sffma_sc, SF_SNaN,  SF_one,    SF_one,   1, SF_HEX_NaN,
1110                    USR_FPINVF);
1111     TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_SNaN,   SF_one,   1, SF_HEX_NaN,
1112                    USR_FPINVF);
1113     TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_one,    SF_SNaN,  1, SF_HEX_NaN,
1114                    USR_FPINVF);
1115 
1116     TEST_Rp_OP_RR(sfrecipa, SF_one,    SF_one,    SF_one_recip,   0x00,
1117                   USR_CLEAR);
1118     TEST_Rp_OP_RR(sfrecipa, SF_QNaN,   SF_one,    SF_HEX_NaN,     0x00,
1119                   USR_CLEAR);
1120     TEST_Rp_OP_RR(sfrecipa, SF_one,    SF_QNaN,   SF_HEX_NaN,     0x00,
1121                   USR_CLEAR);
1122     TEST_Rp_OP_RR(sfrecipa, SF_one,    SF_SNaN,   SF_HEX_NaN,     0x00,
1123                   USR_FPINVF);
1124     TEST_Rp_OP_RR(sfrecipa, SF_SNaN,   SF_one,    SF_HEX_NaN,     0x00,
1125                   USR_FPINVF);
1126 
1127     TEST_R_OP_RR(sffixupn, SF_one,     SF_one,    SF_one,       USR_CLEAR);
1128     TEST_R_OP_RR(sffixupn, SF_QNaN,    SF_one,    SF_HEX_NaN,   USR_CLEAR);
1129     TEST_R_OP_RR(sffixupn, SF_one,     SF_QNaN,   SF_HEX_NaN,   USR_CLEAR);
1130     TEST_R_OP_RR(sffixupn, SF_SNaN,    SF_one,    SF_HEX_NaN,   USR_FPINVF);
1131     TEST_R_OP_RR(sffixupn, SF_one,     SF_SNaN,   SF_HEX_NaN,   USR_FPINVF);
1132 
1133     TEST_R_OP_RR(sffixupd, SF_one,     SF_one,    SF_one,       USR_CLEAR);
1134     TEST_R_OP_RR(sffixupd, SF_QNaN,    SF_one,    SF_HEX_NaN,   USR_CLEAR);
1135     TEST_R_OP_RR(sffixupd, SF_one,     SF_QNaN,   SF_HEX_NaN,   USR_CLEAR);
1136     TEST_R_OP_RR(sffixupd, SF_SNaN,    SF_one,    SF_HEX_NaN,   USR_FPINVF);
1137     TEST_R_OP_RR(sffixupd, SF_one,     SF_SNaN,   SF_HEX_NaN,   USR_FPINVF);
1138 
1139     TEST_R_OP_R(sffixupr, SF_one,             SF_one,           USR_CLEAR);
1140     TEST_R_OP_R(sffixupr, SF_QNaN,            SF_HEX_NaN,       USR_CLEAR);
1141     TEST_R_OP_R(sffixupr, SF_SNaN,            SF_HEX_NaN,       USR_FPINVF);
1142 
1143     TEST_Rp_OP_R(sfinvsqrta, SF_one,        SF_one_invsqrta,  0x00, USR_CLEAR);
1144     TEST_Rp_OP_R(sfinvsqrta, SF_zero,       SF_one,           0x00, USR_CLEAR);
1145     TEST_Rp_OP_R(sfinvsqrta, SF_QNaN,       SF_HEX_NaN,       0x00, USR_CLEAR);
1146     TEST_Rp_OP_R(sfinvsqrta, SF_small_neg,  SF_HEX_NaN,       0x00, USR_FPINVF);
1147     TEST_Rp_OP_R(sfinvsqrta, SF_SNaN,       SF_HEX_NaN,       0x00, USR_FPINVF);
1148 
1149     puts(err ? "FAIL" : "PASS");
1150     return err;
1151 }
1152