xref: /openbmc/qemu/tests/tcg/hexagon/usr.c (revision 2a8af382)
1 /*
2  *  Copyright(c) 2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 /*
19  * Test instructions that might set bits in user status register (USR)
20  */
21 
22 #include <stdio.h>
23 #include <stdint.h>
24 
25 int err;
26 
27 static void __check(int line, uint32_t val, uint32_t expect)
28 {
29     if (val != expect) {
30         printf("ERROR at line %d: %d != %d\n", line, val, expect);
31         err++;
32     }
33 }
34 
35 #define check(RES, EXP) __check(__LINE__, RES, EXP)
36 
37 static void __check32(int line, uint32_t val, uint32_t expect)
38 {
39     if (val != expect) {
40         printf("ERROR at line %d: 0x%08x != 0x%08x\n", line, val, expect);
41         err++;
42     }
43 }
44 
45 #define check32(RES, EXP) __check32(__LINE__, RES, EXP)
46 
47 static void __check64(int line, uint64_t val, uint64_t expect)
48 {
49     if (val != expect) {
50         printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", line, val, expect);
51         err++;
52     }
53 }
54 
55 #define check64(RES, EXP) __check64(__LINE__, RES, EXP)
56 
57 /*
58  * Some of the instructions tested are only available on certain versions
59  * of the Hexagon core
60  */
61 #define CORE_HAS_AUDIO    (__HEXAGON_ARCH__ >= 67 && defined(__HEXAGON_AUDIO__))
62 #define CORE_IS_V67       (__HEXAGON_ARCH__ >= 67)
63 
64 /* Define the bits in Hexagon USR register */
65 #define USR_OVF_BIT          0        /* Sticky saturation overflow */
66 #define USR_FPINVF_BIT       1        /* IEEE FP invalid sticky flag */
67 #define USR_FPDBZF_BIT       2        /* IEEE FP divide-by-zero sticky flag */
68 #define USR_FPOVFF_BIT       3        /* IEEE FP overflow sticky flag */
69 #define USR_FPUNFF_BIT       4        /* IEEE FP underflow sticky flag */
70 #define USR_FPINPF_BIT       5        /* IEEE FP inexact sticky flag */
71 
72 /* Corresponding values in USR */
73 #define USR_CLEAR            0
74 #define USR_OVF              (1 << USR_OVF_BIT)
75 #define USR_FPINVF           (1 << USR_FPINVF_BIT)
76 #define USR_FPDBZF           (1 << USR_FPDBZF_BIT)
77 #define USR_FPOVFF           (1 << USR_FPOVFF_BIT)
78 #define USR_FPUNFF           (1 << USR_FPUNFF_BIT)
79 #define USR_FPINPF           (1 << USR_FPINPF_BIT)
80 
81 /* Some useful floating point values */
82 const uint32_t SF_INF =              0x7f800000;
83 const uint32_t SF_QNaN =             0x7fc00000;
84 const uint32_t SF_SNaN =             0x7fb00000;
85 const uint32_t SF_QNaN_neg =         0xffc00000;
86 const uint32_t SF_SNaN_neg =         0xffb00000;
87 const uint32_t SF_HEX_NaN =          0xffffffff;
88 const uint32_t SF_zero =             0x00000000;
89 const uint32_t SF_zero_neg =         0x80000000;
90 const uint32_t SF_one =              0x3f800000;
91 const uint32_t SF_one_recip =        0x3f7f0001;         /* 0.9960...  */
92 const uint32_t SF_one_invsqrta =     0x3f7f0000;         /* 0.99609375 */
93 const uint32_t SF_two =              0x40000000;
94 const uint32_t SF_four =             0x40800000;
95 const uint32_t SF_small_neg =        0xab98fba8;
96 const uint32_t SF_large_pos =        0x5afa572e;
97 
98 const uint64_t DF_QNaN =             0x7ff8000000000000ULL;
99 const uint64_t DF_SNaN =             0x7ff7000000000000ULL;
100 const uint64_t DF_QNaN_neg =         0xfff8000000000000ULL;
101 const uint64_t DF_SNaN_neg =         0xfff7000000000000ULL;
102 const uint64_t DF_HEX_NaN =          0xffffffffffffffffULL;
103 const uint64_t DF_zero =             0x0000000000000000ULL;
104 const uint64_t DF_zero_neg =         0x8000000000000000ULL;
105 const uint64_t DF_any =              0x3f80000000000000ULL;
106 const uint64_t DF_one =              0x3ff0000000000000ULL;
107 const uint64_t DF_one_hh =           0x3ff001ff80000000ULL;     /* 1.00048... */
108 const uint64_t DF_small_neg =        0xbd731f7500000000ULL;
109 const uint64_t DF_large_pos =        0x7f80000000000001ULL;
110 
111 /*
112  * Templates for functions to execute an instruction
113  *
114  * The templates vary by the number of arguments and the types of the args
115  * and result.  We use one letter in the macro name for the result and each
116  * argument:
117  *     x             unknown (specified in a subsequent template) or don't care
118  *     R             register (32 bits)
119  *     P             pair (64 bits)
120  *     p             predicate
121  *     I             immediate
122  *     Xx            read/write
123  */
124 
125 /* Clear bits 0-5 in USR */
126 #define CLEAR_USRBITS \
127     "r2 = usr\n\t" \
128     "r2 = and(r2, #0xffffffc0)\n\t" \
129     "usr = r2\n\t"
130 
131 /* Template for instructions with one register operand */
132 #define FUNC_x_OP_x(RESTYPE, SRCTYPE, NAME, INSN) \
133 static RESTYPE NAME(SRCTYPE src, uint32_t *usr_result) \
134 { \
135     RESTYPE result; \
136     uint32_t usr; \
137     asm(CLEAR_USRBITS \
138         INSN  "\n\t" \
139         "%1 = usr\n\t" \
140         : "=r"(result), "=r"(usr) \
141         : "r"(src) \
142         : "r2", "usr"); \
143       *usr_result = usr & 0x3f; \
144       return result; \
145 }
146 
147 #define FUNC_R_OP_R(NAME, INSN) \
148 FUNC_x_OP_x(uint32_t, uint32_t, NAME, INSN)
149 
150 #define FUNC_R_OP_P(NAME, INSN) \
151 FUNC_x_OP_x(uint32_t, uint64_t, NAME, INSN)
152 
153 #define FUNC_P_OP_P(NAME, INSN) \
154 FUNC_x_OP_x(uint64_t, uint64_t, NAME, INSN)
155 
156 #define FUNC_P_OP_R(NAME, INSN) \
157 FUNC_x_OP_x(uint64_t, uint32_t, NAME, INSN)
158 
159 /*
160  * Template for instructions with a register and predicate result
161  * and one register operand
162  */
163 #define FUNC_xp_OP_x(RESTYPE, SRCTYPE, NAME, INSN) \
164 static RESTYPE NAME(SRCTYPE src, uint8_t *pred_result, uint32_t *usr_result) \
165 { \
166     RESTYPE result; \
167     uint8_t pred; \
168     uint32_t usr; \
169     asm(CLEAR_USRBITS \
170         INSN  "\n\t" \
171         "%1 = p2\n\t" \
172         "%2 = usr\n\t" \
173         : "=r"(result), "=r"(pred), "=r"(usr) \
174         : "r"(src) \
175         : "r2", "p2", "usr"); \
176     *pred_result = pred; \
177     *usr_result = usr & 0x3f; \
178     return result; \
179 }
180 
181 #define FUNC_Rp_OP_R(NAME, INSN) \
182 FUNC_xp_OP_x(uint32_t, uint32_t, NAME, INSN)
183 
184 /* Template for instructions with two register operands */
185 #define FUNC_x_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
186 static RESTYPE NAME(SRC1TYPE src1, SRC2TYPE src2, uint32_t *usr_result) \
187 { \
188     RESTYPE result; \
189     uint32_t usr; \
190     asm(CLEAR_USRBITS \
191         INSN "\n\t" \
192         "%1 = usr\n\t" \
193         : "=r"(result), "=r"(usr) \
194         : "r"(src1), "r"(src2) \
195         : "r2", "usr"); \
196     *usr_result = usr & 0x3f; \
197     return result; \
198 }
199 
200 #define FUNC_P_OP_PP(NAME, INSN) \
201 FUNC_x_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN)
202 
203 #define FUNC_R_OP_PP(NAME, INSN) \
204 FUNC_x_OP_xx(uint32_t, uint64_t, uint64_t, NAME, INSN)
205 
206 #define FUNC_P_OP_RR(NAME, INSN) \
207 FUNC_x_OP_xx(uint64_t, uint32_t, uint32_t, NAME, INSN)
208 
209 #define FUNC_R_OP_RR(NAME, INSN) \
210 FUNC_x_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN)
211 
212 #define FUNC_R_OP_PR(NAME, INSN) \
213 FUNC_x_OP_xx(uint32_t, uint64_t, uint32_t, NAME, INSN)
214 
215 #define FUNC_P_OP_PR(NAME, INSN) \
216 FUNC_x_OP_xx(uint64_t, uint64_t, uint32_t, NAME, INSN)
217 
218 /*
219  * Template for instructions with a register and predicate result
220  * and two register operands
221  */
222 #define FUNC_xp_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
223 static RESTYPE NAME(SRC1TYPE src1, SRC2TYPE src2, \
224                     uint8_t *pred_result, uint32_t *usr_result) \
225 { \
226     RESTYPE result; \
227     uint8_t pred; \
228     uint32_t usr; \
229     asm(CLEAR_USRBITS \
230         INSN  "\n\t" \
231         "%1 = p2\n\t" \
232         "%2 = usr\n\t" \
233         : "=r"(result), "=r"(pred), "=r"(usr) \
234         : "r"(src1), "r"(src2) \
235         : "r2", "p2", "usr"); \
236     *pred_result = pred; \
237     *usr_result = usr & 0x3f; \
238     return result; \
239 }
240 
241 #define FUNC_Rp_OP_RR(NAME, INSN) \
242 FUNC_xp_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN)
243 
244 /* Template for instructions with one register and one immediate */
245 #define FUNC_x_OP_xI(RESTYPE, SRC1TYPE, NAME, INSN) \
246 static RESTYPE NAME(SRC1TYPE src1, int32_t src2, uint32_t *usr_result) \
247 { \
248     RESTYPE result; \
249     uint32_t usr; \
250     asm(CLEAR_USRBITS \
251         INSN "\n\t" \
252         "%1 = usr\n\t" \
253         : "=r"(result), "=r"(usr) \
254         : "r"(src1), "i"(src2) \
255         : "r2", "usr"); \
256     *usr_result = usr & 0x3f; \
257     return result; \
258 }
259 
260 #define FUNC_R_OP_RI(NAME, INSN) \
261 FUNC_x_OP_xI(uint32_t, uint32_t, NAME, INSN)
262 
263 #define FUNC_R_OP_PI(NAME, INSN) \
264 FUNC_x_OP_xI(uint32_t, uint64_t, NAME, INSN)
265 
266 /*
267  * Template for instructions with a read/write result
268  * and two register operands
269  */
270 #define FUNC_Xx_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
271 static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, \
272                     uint32_t *usr_result) \
273 { \
274     uint32_t usr; \
275     asm(CLEAR_USRBITS \
276         INSN "\n\t" \
277         "%1 = usr\n\t" \
278         : "+r"(result), "=r"(usr) \
279         : "r"(src1), "r"(src2) \
280         : "r2", "usr"); \
281     *usr_result = usr & 0x3f; \
282     return result; \
283 }
284 
285 #define FUNC_XR_OP_RR(NAME, INSN) \
286 FUNC_Xx_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN)
287 
288 #define FUNC_XP_OP_PP(NAME, INSN) \
289 FUNC_Xx_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN)
290 
291 #define FUNC_XP_OP_RR(NAME, INSN) \
292 FUNC_Xx_OP_xx(uint64_t, uint32_t, uint32_t, NAME, INSN)
293 
294 /*
295  * Template for instructions with a read/write result
296  * and two register operands
297  */
298 #define FUNC_Xxp_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
299 static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, \
300                     uint8_t *pred_result, uint32_t *usr_result) \
301 { \
302     uint32_t usr; \
303     uint8_t pred; \
304     asm(CLEAR_USRBITS \
305         INSN "\n\t" \
306         "%1 = p2\n\t" \
307         "%2 = usr\n\t" \
308         : "+r"(result), "=r"(pred), "=r"(usr) \
309         : "r"(src1), "r"(src2) \
310         : "r2", "usr"); \
311     *pred_result = pred; \
312     *usr_result = usr & 0x3f; \
313     return result; \
314 }
315 
316 #define FUNC_XPp_OP_PP(NAME, INSN) \
317 FUNC_Xxp_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN)
318 
319 /*
320  * Template for instructions with a read/write result and
321  * two register and one predicate operands
322  */
323 #define FUNC_Xx_OP_xxp(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
324 static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, uint8_t pred,\
325                     uint32_t *usr_result) \
326 { \
327     uint32_t usr; \
328     asm(CLEAR_USRBITS \
329         "p2 = %4\n\t" \
330         INSN "\n\t" \
331         "%1 = usr\n\t" \
332         : "+r"(result), "=r"(usr) \
333         : "r"(src1), "r"(src2), "r"(pred) \
334         : "r2", "p2", "usr"); \
335     *usr_result = usr & 0x3f; \
336     return result; \
337 }
338 
339 #define FUNC_XR_OP_RRp(NAME, INSN) \
340 FUNC_Xx_OP_xxp(uint32_t, uint32_t, uint32_t, NAME, INSN)
341 
342 /* Template for compare instructions with two register operands */
343 #define FUNC_CMP_xx(SRC1TYPE, SRC2TYPE, NAME, INSN) \
344 static uint32_t NAME(SRC1TYPE src1, SRC2TYPE src2, uint32_t *usr_result) \
345 { \
346     uint32_t result; \
347     uint32_t usr; \
348     asm(CLEAR_USRBITS \
349         INSN "\n\t" \
350         "%0 = p1\n\t" \
351         "%1 = usr\n\t" \
352         : "=r"(result), "=r"(usr) \
353         : "r"(src1), "r"(src2) \
354         : "p1", "r2", "usr"); \
355     *usr_result = usr & 0x3f; \
356     return result; \
357 }
358 
359 #define FUNC_CMP_RR(NAME, INSN) \
360 FUNC_CMP_xx(uint32_t, uint32_t, NAME, INSN)
361 
362 #define FUNC_CMP_PP(NAME, INSN) \
363 FUNC_CMP_xx(uint64_t, uint64_t, NAME, INSN)
364 
365 /*
366  * Function declarations using the templates
367  */
368 FUNC_R_OP_R(satub,              "%0 = satub(%2)")
369 FUNC_P_OP_PP(vaddubs,           "%0 = vaddub(%2, %3):sat")
370 FUNC_P_OP_PP(vadduhs,           "%0 = vadduh(%2, %3):sat")
371 FUNC_P_OP_PP(vsububs,           "%0 = vsubub(%2, %3):sat")
372 FUNC_P_OP_PP(vsubuhs,           "%0 = vsubuh(%2, %3):sat")
373 
374 /* Add vector of half integers with saturation and pack to unsigned bytes */
375 FUNC_R_OP_PP(vaddhubs,          "%0 = vaddhub(%2, %3):sat")
376 
377 /* Vector saturate half to unsigned byte */
378 FUNC_R_OP_P(vsathub,            "%0 = vsathub(%2)")
379 
380 /* Similar to above but takes a 32-bit argument */
381 FUNC_R_OP_R(svsathub,           "%0 = vsathub(%2)")
382 
383 /* Vector saturate word to unsigned half */
384 FUNC_P_OP_P(vsatwuh_nopack,     "%0 = vsatwuh(%2)")
385 
386 /* Similar to above but returns a 32-bit result */
387 FUNC_R_OP_P(vsatwuh,            "%0 = vsatwuh(%2)")
388 
389 /* Vector arithmetic shift halfwords with saturate and pack */
390 FUNC_R_OP_PI(asrhub_sat,        "%0 = vasrhub(%2, #%3):sat")
391 
392 /* Vector arithmetic shift halfwords with round, saturate and pack */
393 FUNC_R_OP_PI(asrhub_rnd_sat,    "%0 = vasrhub(%2, #%3):raw")
394 
395 FUNC_R_OP_RR(addsat,            "%0 = add(%2, %3):sat")
396 /* Similar to above but with register pairs */
397 FUNC_P_OP_PP(addpsat,           "%0 = add(%2, %3):sat")
398 
399 FUNC_XR_OP_RR(mpy_acc_sat_hh_s0, "%0 += mpy(%2.H, %3.H):sat")
400 FUNC_R_OP_RR(mpy_sat_hh_s1,     "%0 = mpy(%2.H, %3.H):<<1:sat")
401 FUNC_R_OP_RR(mpy_sat_rnd_hh_s1, "%0 = mpy(%2.H, %3.H):<<1:rnd:sat")
402 FUNC_R_OP_RR(mpy_up_s1_sat,     "%0 = mpy(%2, %3):<<1:sat")
403 FUNC_P_OP_RR(vmpy2s_s1,         "%0 = vmpyh(%2, %3):<<1:sat")
404 FUNC_P_OP_RR(vmpy2su_s1,        "%0 = vmpyhsu(%2, %3):<<1:sat")
405 FUNC_R_OP_RR(vmpy2s_s1pack,     "%0 = vmpyh(%2, %3):<<1:rnd:sat")
406 FUNC_P_OP_PP(vmpy2es_s1,        "%0 = vmpyeh(%2, %3):<<1:sat")
407 FUNC_R_OP_PP(vdmpyrs_s1,        "%0 = vdmpy(%2, %3):<<1:rnd:sat")
408 FUNC_XP_OP_PP(vdmacs_s0,        "%0 += vdmpy(%2, %3):sat")
409 FUNC_R_OP_RR(cmpyrs_s0,         "%0 = cmpy(%2, %3):rnd:sat")
410 FUNC_XP_OP_RR(cmacs_s0,         "%0 += cmpy(%2, %3):sat")
411 FUNC_XP_OP_RR(cnacs_s0,         "%0 -= cmpy(%2, %3):sat")
412 FUNC_P_OP_PP(vrcmpys_s1_h,      "%0 = vrcmpys(%2, %3):<<1:sat:raw:hi")
413 FUNC_XP_OP_PP(mmacls_s0,        "%0 += vmpyweh(%2, %3):sat")
414 FUNC_R_OP_RR(hmmpyl_rs1,        "%0 = mpy(%2, %3.L):<<1:rnd:sat")
415 FUNC_XP_OP_PP(mmaculs_s0,       "%0 += vmpyweuh(%2, %3):sat")
416 FUNC_R_OP_PR(cmpyi_wh,          "%0 = cmpyiwh(%2, %3):<<1:rnd:sat")
417 FUNC_P_OP_PP(vcmpy_s0_sat_i,    "%0 = vcmpyi(%2, %3):sat")
418 FUNC_P_OP_PR(vcrotate,          "%0 = vcrotate(%2, %3)")
419 FUNC_P_OP_PR(vcnegh,            "%0 = vcnegh(%2, %3)")
420 
421 #if CORE_HAS_AUDIO
422 FUNC_R_OP_PP(wcmpyrw,           "%0 = cmpyrw(%2, %3):<<1:sat")
423 #endif
424 
425 FUNC_R_OP_RR(addh_l16_sat_ll,   "%0 = add(%2.L, %3.L):sat")
426 FUNC_P_OP_P(vconj,              "%0 = vconj(%2):sat")
427 FUNC_P_OP_PP(vxaddsubw,         "%0 = vxaddsubw(%2, %3):sat")
428 FUNC_P_OP_P(vabshsat,           "%0 = vabsh(%2):sat")
429 FUNC_P_OP_PP(vnavgwr,           "%0 = vnavgw(%2, %3):rnd:sat")
430 FUNC_R_OP_RI(round_ri_sat,      "%0 = round(%2, #%3):sat")
431 FUNC_R_OP_RR(asr_r_r_sat,       "%0 = asr(%2, %3):sat")
432 FUNC_R_OP_RR(asl_r_r_sat,       "%0 = asl(%2, %3):sat")
433 
434 FUNC_XPp_OP_PP(ACS,             "%0, p2 = vacsh(%3, %4)")
435 
436 /* Floating point */
437 FUNC_R_OP_RR(sfmin,             "%0 = sfmin(%2, %3)")
438 FUNC_R_OP_RR(sfmax,             "%0 = sfmax(%2, %3)")
439 FUNC_R_OP_RR(sfadd,             "%0 = sfadd(%2, %3)")
440 FUNC_R_OP_RR(sfsub,             "%0 = sfsub(%2, %3)")
441 FUNC_R_OP_RR(sfmpy,             "%0 = sfmpy(%2, %3)")
442 FUNC_XR_OP_RR(sffma,            "%0 += sfmpy(%2, %3)")
443 FUNC_XR_OP_RR(sffms,            "%0 -= sfmpy(%2, %3)")
444 FUNC_CMP_RR(sfcmpuo,            "p1 = sfcmp.uo(%2, %3)")
445 FUNC_CMP_RR(sfcmpeq,            "p1 = sfcmp.eq(%2, %3)")
446 FUNC_CMP_RR(sfcmpgt,            "p1 = sfcmp.gt(%2, %3)")
447 FUNC_CMP_RR(sfcmpge,            "p1 = sfcmp.ge(%2, %3)")
448 
449 FUNC_P_OP_PP(dfadd,             "%0 = dfadd(%2, %3)")
450 FUNC_P_OP_PP(dfsub,             "%0 = dfsub(%2, %3)")
451 
452 #if CORE_IS_V67
453 FUNC_P_OP_PP(dfmin,             "%0 = dfmin(%2, %3)")
454 FUNC_P_OP_PP(dfmax,             "%0 = dfmax(%2, %3)")
455 FUNC_XP_OP_PP(dfmpyhh,          "%0 += dfmpyhh(%2, %3)")
456 #endif
457 
458 FUNC_CMP_PP(dfcmpuo,            "p1 = dfcmp.uo(%2, %3)")
459 FUNC_CMP_PP(dfcmpeq,            "p1 = dfcmp.eq(%2, %3)")
460 FUNC_CMP_PP(dfcmpgt,            "p1 = dfcmp.gt(%2, %3)")
461 FUNC_CMP_PP(dfcmpge,            "p1 = dfcmp.ge(%2, %3)")
462 
463 /* Conversions from sf */
464 FUNC_P_OP_R(conv_sf2df,         "%0 = convert_sf2df(%2)")
465 FUNC_R_OP_R(conv_sf2uw,         "%0 = convert_sf2uw(%2)")
466 FUNC_R_OP_R(conv_sf2w,          "%0 = convert_sf2w(%2)")
467 FUNC_P_OP_R(conv_sf2ud,         "%0 = convert_sf2ud(%2)")
468 FUNC_P_OP_R(conv_sf2d,          "%0 = convert_sf2d(%2)")
469 FUNC_R_OP_R(conv_sf2uw_chop,    "%0 = convert_sf2uw(%2):chop")
470 FUNC_R_OP_R(conv_sf2w_chop,     "%0 = convert_sf2w(%2):chop")
471 FUNC_P_OP_R(conv_sf2ud_chop,    "%0 = convert_sf2ud(%2):chop")
472 FUNC_P_OP_R(conv_sf2d_chop,     "%0 = convert_sf2d(%2):chop")
473 
474 /* Conversions from df */
475 FUNC_R_OP_P(conv_df2sf,         "%0 = convert_df2sf(%2)")
476 FUNC_R_OP_P(conv_df2uw,         "%0 = convert_df2uw(%2)")
477 FUNC_R_OP_P(conv_df2w,          "%0 = convert_df2w(%2)")
478 FUNC_P_OP_P(conv_df2ud,         "%0 = convert_df2ud(%2)")
479 FUNC_P_OP_P(conv_df2d,          "%0 = convert_df2d(%2)")
480 FUNC_R_OP_P(conv_df2uw_chop,    "%0 = convert_df2uw(%2):chop")
481 FUNC_R_OP_P(conv_df2w_chop,     "%0 = convert_df2w(%2):chop")
482 FUNC_P_OP_P(conv_df2ud_chop,    "%0 = convert_df2ud(%2):chop")
483 FUNC_P_OP_P(conv_df2d_chop,     "%0 = convert_df2d(%2):chop")
484 
485 /* Integer to float conversions */
486 FUNC_R_OP_R(conv_uw2sf,         "%0 = convert_uw2sf(%2)")
487 FUNC_R_OP_R(conv_w2sf,          "%0 = convert_w2sf(%2)")
488 FUNC_R_OP_P(conv_ud2sf,         "%0 = convert_ud2sf(%2)")
489 FUNC_R_OP_P(conv_d2sf,          "%0 = convert_d2sf(%2)")
490 
491 /* Special purpose floating point instructions */
492 FUNC_XR_OP_RRp(sffma_sc,        "%0 += sfmpy(%2, %3, p2):scale")
493 FUNC_Rp_OP_RR(sfrecipa,         "%0, p2 = sfrecipa(%3, %4)")
494 FUNC_R_OP_RR(sffixupn,          "%0 = sffixupn(%2, %3)")
495 FUNC_R_OP_RR(sffixupd,          "%0 = sffixupd(%2, %3)")
496 FUNC_R_OP_R(sffixupr,           "%0 = sffixupr(%2)")
497 FUNC_Rp_OP_R(sfinvsqrta,        "%0, p2 = sfinvsqrta(%3)")
498 
499 /*
500  * Templates for test cases
501  *
502  * Same naming convention as the function templates
503  */
504 #define TEST_x_OP_x(RESTYPE, CHECKFN, SRCTYPE, FUNC, SRC, RES, USR_RES) \
505     do { \
506         RESTYPE result; \
507         SRCTYPE src = SRC; \
508         uint32_t usr_result; \
509         result = FUNC(src, &usr_result); \
510         CHECKFN(result, RES); \
511         check(usr_result, USR_RES); \
512     } while (0)
513 
514 #define TEST_R_OP_R(FUNC, SRC, RES, USR_RES) \
515 TEST_x_OP_x(uint32_t, check32, uint32_t, FUNC, SRC, RES, USR_RES)
516 
517 #define TEST_R_OP_P(FUNC, SRC, RES, USR_RES) \
518 TEST_x_OP_x(uint32_t, check32, uint64_t, FUNC, SRC, RES, USR_RES)
519 
520 #define TEST_P_OP_P(FUNC, SRC, RES, USR_RES) \
521 TEST_x_OP_x(uint64_t, check64, uint64_t, FUNC, SRC, RES, USR_RES)
522 
523 #define TEST_P_OP_R(FUNC, SRC, RES, USR_RES) \
524 TEST_x_OP_x(uint64_t, check64, uint32_t, FUNC, SRC, RES, USR_RES)
525 
526 #define TEST_xp_OP_x(RESTYPE, CHECKFN, SRCTYPE, FUNC, SRC, \
527                      RES, PRED_RES, USR_RES) \
528     do { \
529         RESTYPE result; \
530         SRCTYPE src = SRC; \
531         uint8_t pred_result; \
532         uint32_t usr_result; \
533         result = FUNC(src, &pred_result, &usr_result); \
534         CHECKFN(result, RES); \
535         check(pred_result, PRED_RES); \
536         check(usr_result, USR_RES); \
537     } while (0)
538 
539 #define TEST_Rp_OP_R(FUNC, SRC, RES, PRED_RES, USR_RES) \
540 TEST_xp_OP_x(uint32_t, check32, uint32_t, FUNC, SRC, RES, PRED_RES, USR_RES)
541 
542 #define TEST_x_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
543                      FUNC, SRC1, SRC2, RES, USR_RES) \
544     do { \
545         RESTYPE result; \
546         SRC1TYPE src1 = SRC1; \
547         SRC2TYPE src2 = SRC2; \
548         uint32_t usr_result; \
549         result = FUNC(src1, src2, &usr_result); \
550         CHECKFN(result, RES); \
551         check(usr_result, USR_RES); \
552     } while (0)
553 
554 #define TEST_P_OP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \
555 TEST_x_OP_xx(uint64_t, check64, uint64_t, uint64_t, \
556              FUNC, SRC1, SRC2, RES, USR_RES)
557 
558 #define TEST_R_OP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \
559 TEST_x_OP_xx(uint32_t, check32, uint64_t, uint64_t, \
560              FUNC, SRC1, SRC2, RES, USR_RES)
561 
562 #define TEST_P_OP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \
563 TEST_x_OP_xx(uint64_t, check64, uint32_t, uint32_t, \
564              FUNC, SRC1, SRC2, RES, USR_RES)
565 
566 #define TEST_R_OP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \
567 TEST_x_OP_xx(uint32_t, check32, uint32_t, uint32_t, \
568              FUNC, SRC1, SRC2, RES, USR_RES)
569 
570 #define TEST_R_OP_PR(FUNC, SRC1, SRC2, RES, USR_RES) \
571 TEST_x_OP_xx(uint32_t, check32, uint64_t, uint32_t, \
572              FUNC, SRC1, SRC2, RES, USR_RES)
573 
574 #define TEST_P_OP_PR(FUNC, SRC1, SRC2, RES, USR_RES) \
575 TEST_x_OP_xx(uint64_t, check64, uint64_t, uint32_t, \
576              FUNC, SRC1, SRC2, RES, USR_RES)
577 
578 #define TEST_xp_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, FUNC, SRC1, SRC2, \
579                       RES, PRED_RES, USR_RES) \
580     do { \
581         RESTYPE result; \
582         SRC1TYPE src1 = SRC1; \
583         SRC2TYPE src2 = SRC2; \
584         uint8_t pred_result; \
585         uint32_t usr_result; \
586         result = FUNC(src1, src2, &pred_result, &usr_result); \
587         CHECKFN(result, RES); \
588         check(pred_result, PRED_RES); \
589         check(usr_result, USR_RES); \
590     } while (0)
591 
592 #define TEST_Rp_OP_RR(FUNC, SRC1, SRC2, RES, PRED_RES, USR_RES) \
593 TEST_xp_OP_xx(uint32_t, check32, uint32_t, uint32_t, FUNC, SRC1, SRC2, \
594               RES, PRED_RES, USR_RES)
595 
596 #define TEST_x_OP_xI(RESTYPE, CHECKFN, SRC1TYPE, \
597                      FUNC, SRC1, SRC2, RES, USR_RES) \
598     do { \
599         RESTYPE result; \
600         SRC1TYPE src1 = SRC1; \
601         uint32_t src2 = SRC2; \
602         uint32_t usr_result; \
603         result = FUNC(src1, src2, &usr_result); \
604         CHECKFN(result, RES); \
605         check(usr_result, USR_RES); \
606     } while (0)
607 
608 #define TEST_R_OP_RI(FUNC, SRC1, SRC2, RES, USR_RES) \
609 TEST_x_OP_xI(uint32_t, check32, uint32_t, \
610              FUNC, SRC1, SRC2, RES, USR_RES)
611 
612 #define TEST_R_OP_PI(FUNC, SRC1, SRC2, RES, USR_RES) \
613 TEST_x_OP_xI(uint32_t, check64, uint64_t, \
614              FUNC, SRC1, SRC2, RES, USR_RES)
615 
616 #define TEST_Xx_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
617                       FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
618     do { \
619         RESTYPE result = RESIN; \
620         SRC1TYPE src1 = SRC1; \
621         SRC2TYPE src2 = SRC2; \
622         uint32_t usr_result; \
623         result = FUNC(result, src1, src2, &usr_result); \
624         CHECKFN(result, RES); \
625         check(usr_result, USR_RES); \
626     } while (0)
627 
628 #define TEST_XR_OP_RR(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
629 TEST_Xx_OP_xx(uint32_t, check32, uint32_t, uint32_t, \
630               FUNC, RESIN, SRC1, SRC2, RES, USR_RES)
631 
632 #define TEST_XP_OP_PP(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
633 TEST_Xx_OP_xx(uint64_t, check64, uint64_t, uint64_t, \
634               FUNC, RESIN, SRC1, SRC2, RES, USR_RES)
635 
636 #define TEST_XP_OP_RR(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
637 TEST_Xx_OP_xx(uint64_t, check64, uint32_t, uint32_t, \
638               FUNC, RESIN, SRC1, SRC2, RES, USR_RES)
639 
640 #define TEST_Xxp_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
641                        FUNC, RESIN, SRC1, SRC2, RES, PRED_RES, USR_RES) \
642     do { \
643         RESTYPE result = RESIN; \
644         SRC1TYPE src1 = SRC1; \
645         SRC2TYPE src2 = SRC2; \
646         uint8_t pred_res; \
647         uint32_t usr_result; \
648         result = FUNC(result, src1, src2, &pred_res, &usr_result); \
649         CHECKFN(result, RES); \
650         check(usr_result, USR_RES); \
651     } while (0)
652 
653 #define TEST_XPp_OP_PP(FUNC, RESIN, SRC1, SRC2, RES, PRED_RES, USR_RES) \
654 TEST_Xxp_OP_xx(uint64_t, check64, uint64_t, uint64_t, FUNC, RESIN, SRC1, SRC2, \
655                RES, PRED_RES, USR_RES)
656 
657 #define TEST_Xx_OP_xxp(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
658                       FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) \
659     do { \
660         RESTYPE result = RESIN; \
661         SRC1TYPE src1 = SRC1; \
662         SRC2TYPE src2 = SRC2; \
663         uint8_t pred = PRED; \
664         uint32_t usr_result; \
665         result = FUNC(result, src1, src2, pred, &usr_result); \
666         CHECKFN(result, RES); \
667         check(usr_result, USR_RES); \
668     } while (0)
669 
670 #define TEST_XR_OP_RRp(FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) \
671 TEST_Xx_OP_xxp(uint32_t, check32, uint32_t, uint32_t, \
672               FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES)
673 
674 #define TEST_CMP_xx(SRC1TYPE, SRC2TYPE, \
675                     FUNC, SRC1, SRC2, RES, USR_RES) \
676     do { \
677         uint32_t result; \
678         SRC1TYPE src1 = SRC1; \
679         SRC2TYPE src2 = SRC2; \
680         uint32_t usr_result; \
681         result = FUNC(src1, src2, &usr_result); \
682         check(result, RES); \
683         check(usr_result, USR_RES); \
684     } while (0)
685 
686 #define TEST_CMP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \
687 TEST_CMP_xx(uint32_t, uint32_t, FUNC, SRC1, SRC2, RES, USR_RES)
688 
689 #define TEST_CMP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \
690 TEST_CMP_xx(uint64_t, uint64_t, FUNC, SRC1, SRC2, RES, USR_RES)
691 
692 int main()
693 {
694     TEST_R_OP_R(satub,       0,         0,         USR_CLEAR);
695     TEST_R_OP_R(satub,       0xff,      0xff,      USR_CLEAR);
696     TEST_R_OP_R(satub,       0xfff,     0xff,      USR_OVF);
697     TEST_R_OP_R(satub,       -1,        0,         USR_OVF);
698 
699     TEST_P_OP_PP(vaddubs,    0xfeLL,    0x01LL,    0xffLL,    USR_CLEAR);
700     TEST_P_OP_PP(vaddubs,    0xffLL,    0xffLL,    0xffLL,    USR_OVF);
701 
702     TEST_P_OP_PP(vadduhs,    0xfffeLL,  0x1LL,     0xffffLL,  USR_CLEAR);
703     TEST_P_OP_PP(vadduhs,    0xffffLL,  0x1LL,     0xffffLL,  USR_OVF);
704 
705     TEST_P_OP_PP(vsububs, 0x0807060504030201LL, 0x0101010101010101LL,
706                  0x0706050403020100LL, USR_CLEAR);
707     TEST_P_OP_PP(vsububs, 0x0807060504030201LL, 0x0202020202020202LL,
708                  0x0605040302010000LL, USR_OVF);
709 
710     TEST_P_OP_PP(vsubuhs, 0x0004000300020001LL, 0x0001000100010001LL,
711                  0x0003000200010000LL, USR_CLEAR);
712     TEST_P_OP_PP(vsubuhs, 0x0004000300020001LL, 0x0002000200020002LL,
713                  0x0002000100000000LL, USR_OVF);
714 
715     TEST_R_OP_PP(vaddhubs, 0x0004000300020001LL, 0x0001000100010001LL,
716                  0x05040302, USR_CLEAR);
717     TEST_R_OP_PP(vaddhubs, 0x7fff000300020001LL, 0x0002000200020002LL,
718                  0xff050403, USR_OVF);
719 
720     TEST_R_OP_P(vsathub,         0x0001000300020001LL, 0x01030201, USR_CLEAR);
721     TEST_R_OP_P(vsathub,         0x010000700080ffffLL, 0xff708000, USR_OVF);
722 
723     TEST_R_OP_P(vsatwuh,         0x0000ffff00000001LL, 0xffff0001, USR_CLEAR);
724     TEST_R_OP_P(vsatwuh,         0x800000000000ffffLL, 0x0000ffff, USR_OVF);
725 
726     TEST_P_OP_P(vsatwuh_nopack,  0x0000ffff00000001LL, 0x0000ffff00000001LL,
727                 USR_CLEAR);
728     TEST_P_OP_P(vsatwuh_nopack,  0x800000000000ffffLL, 0x000000000000ffffLL,
729                 USR_OVF);
730 
731     TEST_R_OP_R(svsathub,        0x00020001,           0x0201,     USR_CLEAR);
732     TEST_R_OP_R(svsathub,        0x0080ffff,           0x8000,     USR_OVF);
733 
734     TEST_R_OP_PI(asrhub_sat,     0x004f003f002f001fLL, 3,    0x09070503,
735                  USR_CLEAR);
736     TEST_R_OP_PI(asrhub_sat,     0x004fffff8fff001fLL, 3,    0x09000003,
737                  USR_OVF);
738 
739     TEST_R_OP_PI(asrhub_rnd_sat, 0x004f003f002f001fLL, 2,    0x0a080604,
740                  USR_CLEAR);
741     TEST_R_OP_PI(asrhub_rnd_sat, 0x004fffff8fff001fLL, 2,    0x0a000004,
742                  USR_OVF);
743 
744     TEST_R_OP_RR(addsat,        1,              2,              3,
745                  USR_CLEAR);
746     TEST_R_OP_RR(addsat,        0x7fffffff,     0x00000010,     0x7fffffff,
747                  USR_OVF);
748     TEST_R_OP_RR(addsat,        0x80000000,     0x80000006,     0x80000000,
749                  USR_OVF);
750 
751     TEST_P_OP_PP(addpsat, 1LL, 2LL, 3LL, USR_CLEAR);
752     /* overflow to max positive */
753     TEST_P_OP_PP(addpsat, 0x7ffffffffffffff0LL, 0x0000000000000010LL,
754                  0x7fffffffffffffffLL, USR_OVF);
755     /* overflow to min negative */
756     TEST_P_OP_PP(addpsat, 0x8000000000000003LL, 0x8000000000000006LL,
757                  0x8000000000000000LL, USR_OVF);
758 
759     TEST_XR_OP_RR(mpy_acc_sat_hh_s0, 0x7fffffff, 0xffff0000, 0x11110000,
760                   0x7fffeeee, USR_CLEAR);
761     TEST_XR_OP_RR(mpy_acc_sat_hh_s0, 0x7fffffff, 0x7fff0000, 0x7fff0000,
762                   0x7fffffff, USR_OVF);
763 
764     TEST_R_OP_RR(mpy_sat_hh_s1,        0xffff0000, 0x11110000, 0xffffddde,
765                  USR_CLEAR);
766     TEST_R_OP_RR(mpy_sat_hh_s1,        0x7fff0000, 0x7fff0000, 0x7ffe0002,
767                  USR_CLEAR);
768     TEST_R_OP_RR(mpy_sat_hh_s1,        0x80000000, 0x80000000, 0x7fffffff,
769                  USR_OVF);
770 
771     TEST_R_OP_RR(mpy_sat_rnd_hh_s1,    0xffff0000, 0x11110000, 0x00005dde,
772                  USR_CLEAR);
773     TEST_R_OP_RR(mpy_sat_rnd_hh_s1,    0x7fff0000, 0x7fff0000, 0x7ffe8002,
774                  USR_CLEAR);
775     TEST_R_OP_RR(mpy_sat_rnd_hh_s1,    0x80000000, 0x80000000, 0x7fffffff,
776                  USR_OVF);
777 
778     TEST_R_OP_RR(mpy_up_s1_sat,        0xffff0000, 0x11110000, 0xffffddde,
779                  USR_CLEAR);
780     TEST_R_OP_RR(mpy_up_s1_sat,        0x7fff0000, 0x7fff0000, 0x7ffe0002,
781                  USR_CLEAR);
782     TEST_R_OP_RR(mpy_up_s1_sat,        0x80000000, 0x80000000, 0x7fffffff,
783                  USR_OVF);
784 
785     TEST_P_OP_RR(vmpy2s_s1,  0x7fff0000, 0x7fff0000, 0x7ffe000200000000LL,
786                  USR_CLEAR);
787     TEST_P_OP_RR(vmpy2s_s1,  0x80000000, 0x80000000, 0x7fffffff00000000LL,
788                  USR_OVF);
789 
790     TEST_P_OP_RR(vmpy2su_s1, 0x7fff0000, 0x7fff0000, 0x7ffe000200000000LL,
791                  USR_CLEAR);
792     TEST_P_OP_RR(vmpy2su_s1, 0xffffbd97, 0xffffffff, 0xfffe000280000000LL,
793                  USR_OVF);
794 
795     TEST_R_OP_RR(vmpy2s_s1pack,        0x7fff0000, 0x7fff0000, 0x7ffe0000,
796                  USR_CLEAR);
797     TEST_R_OP_RR(vmpy2s_s1pack,        0x80008000, 0x80008000, 0x7fff7fff,
798                  USR_OVF);
799 
800     TEST_P_OP_PP(vmpy2es_s1, 0x7fff7fff7fff7fffLL, 0x1fff1fff1fff1fffLL,
801                  0x1ffec0021ffec002LL, USR_CLEAR);
802     TEST_P_OP_PP(vmpy2es_s1, 0x8000800080008000LL, 0x8000800080008000LL,
803                  0x7fffffff7fffffffLL, USR_OVF);
804 
805     TEST_R_OP_PP(vdmpyrs_s1, 0x7fff7fff7fff7fffLL, 0x1fff1fff1fff1fffLL,
806                  0x3ffe3ffe, USR_CLEAR);
807     TEST_R_OP_PP(vdmpyrs_s1, 0x8000800080008000LL, 0x8000800080008000LL,
808                  0x7fff7fffLL, USR_OVF);
809 
810     TEST_XP_OP_PP(vdmacs_s0, 0x0fffffffULL, 0x00ff00ff00ff00ffLL,
811                   0x00ff00ff00ff00ffLL, 0x0001fc021001fc01LL, USR_CLEAR);
812     TEST_XP_OP_PP(vdmacs_s0, 0x01111111ULL, 0x8000800080001000LL,
813                   0x8000800080008000LL, 0x7fffffff39111111LL, USR_OVF);
814 
815     TEST_R_OP_RR(cmpyrs_s0,            0x7fff0000, 0x7fff0000, 0x0000c001,
816                  USR_CLEAR);
817     TEST_R_OP_RR(cmpyrs_s0,            0x80008000, 0x80008000, 0x7fff0000,
818                  USR_OVF);
819 
820     TEST_XP_OP_RR(cmacs_s0, 0x0fffffff, 0x7fff0000, 0x7fff0000,
821                   0x00000000d000fffeLL, USR_CLEAR);
822     TEST_XP_OP_RR(cmacs_s0, 0x0fff1111, 0x80008000, 0x80008000,
823                   0x7fffffff0fff1111LL, USR_OVF);
824 
825     TEST_XP_OP_RR(cnacs_s0, 0x000000108fffffffULL, 0x7fff0000, 0x7fff0000,
826                   0x00000010cfff0000ULL, USR_CLEAR);
827     TEST_XP_OP_RR(cnacs_s0, 0x000000108ff1111fULL, 0x00002001, 0x00007ffd,
828                   0x0000001080000000ULL, USR_OVF);
829 
830     TEST_P_OP_PP(vrcmpys_s1_h, 0x00ff00ff00ff00ffLL, 0x00ff00ff00ff00ffLL,
831                  0x0003f8040003f804LL, USR_CLEAR);
832     TEST_P_OP_PP(vrcmpys_s1_h, 0x8000800080008000LL, 0x8000800080008000LL,
833                  0x7fffffff7fffffffLL, USR_OVF);
834 
835     TEST_XP_OP_PP(mmacls_s0, 0x6fffffff, 0x00ff00ff00ff00ffLL,
836                   0x00ff00ff00ff00ffLL, 0x0000fe017000fe00LL, USR_CLEAR);
837     TEST_XP_OP_PP(mmacls_s0, 0x6f1111ff, 0x8000800080008000LL,
838                   0x1000100080008000LL, 0xf80008007fffffffLL, USR_OVF);
839 
840     TEST_R_OP_RR(hmmpyl_rs1,           0x7fff0000, 0x7fff0001, 0x0000fffe,
841                  USR_CLEAR);
842     TEST_R_OP_RR(hmmpyl_rs1,           0x80000000, 0x80008000, 0x7fffffff,
843                  USR_OVF);
844 
845     TEST_XP_OP_PP(mmaculs_s0, 0x000000007fffffffULL, 0xffff800080008000LL,
846                   0xffff800080008000LL, 0xffffc00040003fffLL, USR_CLEAR);
847     TEST_XP_OP_PP(mmaculs_s0, 0x000011107fffffffULL, 0x00ff00ff00ff00ffLL,
848                   0x00ff00ff001100ffLL, 0x00010f117fffffffLL, USR_OVF);
849 
850     TEST_R_OP_PR(cmpyi_wh, 0x7fff000000000000LL, 0x7fff0001, 0x0000fffe,
851                  USR_CLEAR);
852     TEST_R_OP_PR(cmpyi_wh, 0x8000000000000000LL, 0x80008000, 0x7fffffff,
853                  USR_OVF);
854 
855     TEST_P_OP_PP(vcmpy_s0_sat_i, 0x00ff00ff00ff00ffLL, 0x00ff00ff00ff00ffLL,
856                  0x0001fc020001fc02LL, USR_CLEAR);
857     TEST_P_OP_PP(vcmpy_s0_sat_i, 0x8000800080008000LL, 0x8000800080008000LL,
858                  0x7fffffff7fffffffLL, USR_OVF);
859 
860     TEST_P_OP_PR(vcrotate, 0x8000000000000000LL, 0x00000002,
861                  0x8000000000000000LL, USR_CLEAR);
862     TEST_P_OP_PR(vcrotate, 0x7fff80007fff8000LL, 0x00000001,
863                  0x7fff80007fff7fffLL, USR_OVF);
864 
865     TEST_P_OP_PR(vcnegh, 0x8000000000000000LL, 0x00000002,
866                  0x8000000000000000LL, USR_CLEAR);
867     TEST_P_OP_PR(vcnegh, 0x7fff80007fff8000LL, 0x00000001,
868                  0x7fff80007fff7fffLL, USR_OVF);
869 
870 #if CORE_HAS_AUDIO
871     TEST_R_OP_PP(wcmpyrw, 0x8765432101234567LL, 0x00000002ffffffffLL,
872                  0x00000001, USR_CLEAR);
873     TEST_R_OP_PP(wcmpyrw, 0x800000007fffffffLL, 0x000000ff7fffffffLL,
874                  0x7fffffff, USR_OVF);
875     TEST_R_OP_PP(wcmpyrw, 0x7fffffff80000000LL, 0x7fffffff000000ffLL,
876                  0x80000000, USR_OVF);
877 #else
878     printf("Audio instructions skipped\n");
879 #endif
880 
881     TEST_R_OP_RR(addh_l16_sat_ll,      0x0000ffff, 0x00000002, 0x00000001,
882                  USR_CLEAR);
883     TEST_R_OP_RR(addh_l16_sat_ll,      0x00007fff, 0x00000005, 0x00007fff,
884                  USR_OVF);
885     TEST_R_OP_RR(addh_l16_sat_ll,      0x00008000, 0x00008000, 0xffff8000,
886                  USR_OVF);
887 
888     TEST_P_OP_P(vconj, 0x0000ffff00000001LL, 0x0000ffff00000001LL, USR_CLEAR);
889     TEST_P_OP_P(vconj, 0x800000000000ffffLL, 0x7fff00000000ffffLL, USR_OVF);
890 
891     TEST_P_OP_PP(vxaddsubw, 0x8765432101234567LL, 0x00000002ffffffffLL,
892                  0x8765432201234569LL, USR_CLEAR);
893     TEST_P_OP_PP(vxaddsubw, 0x7fffffff7fffffffLL, 0xffffffffffffffffLL,
894                  0x7fffffff7ffffffeLL, USR_OVF);
895     TEST_P_OP_PP(vxaddsubw, 0x800000000fffffffLL, 0x0000000a00000008LL,
896                  0x8000000010000009LL, USR_OVF);
897 
898     TEST_P_OP_P(vabshsat, 0x0001000afffff800LL, 0x0001000a00010800LL,
899                 USR_CLEAR);
900     TEST_P_OP_P(vabshsat, 0x8000000b000c000aLL, 0x7fff000b000c000aLL,
901              USR_OVF);
902 
903     TEST_P_OP_PP(vnavgwr, 0x8765432101234567LL, 0x00000002ffffffffLL,
904                  0xc3b2a1900091a2b4LL, USR_CLEAR);
905     TEST_P_OP_PP(vnavgwr, 0x7fffffff8000000aLL, 0x80000000ffffffffLL,
906                  0x7fffffffc0000006LL, USR_OVF);
907 
908     TEST_R_OP_RI(round_ri_sat,         0x0000ffff, 2, 0x00004000, USR_CLEAR);
909     TEST_R_OP_RI(round_ri_sat,         0x7fffffff, 2, 0x1fffffff, USR_OVF);
910 
911     TEST_R_OP_RR(asr_r_r_sat,  0x0000ffff, 0x02, 0x00003fff, USR_CLEAR);
912     TEST_R_OP_RR(asr_r_r_sat,  0x80000000, 0x01, 0xc0000000, USR_CLEAR);
913     TEST_R_OP_RR(asr_r_r_sat,  0xffffffff, 0x01, 0xffffffff, USR_CLEAR);
914     TEST_R_OP_RR(asr_r_r_sat,  0x00ffffff, 0xf5, 0x7fffffff, USR_OVF);
915     TEST_R_OP_RR(asr_r_r_sat,  0x80000000, 0xf5, 0x80000000, USR_OVF);
916     TEST_R_OP_RR(asr_r_r_sat,  0x7fff0000, 0x42, 0x7fffffff, USR_OVF);
917     TEST_R_OP_RR(asr_r_r_sat,  0xff000000, 0x42, 0x80000000, USR_OVF);
918     TEST_R_OP_RR(asr_r_r_sat,        4096,   32, 0x00000000, USR_CLEAR);
919     TEST_R_OP_RR(asr_r_r_sat,        4096,  -32, 0x7fffffff, USR_OVF);
920     TEST_R_OP_RR(asr_r_r_sat,       -4096,   32, 0xffffffff, USR_CLEAR);
921     TEST_R_OP_RR(asr_r_r_sat,       -4096,  -32, 0x80000000, USR_OVF);
922     TEST_R_OP_RR(asr_r_r_sat,           0,  -32, 0x00000000, USR_CLEAR);
923     TEST_R_OP_RR(asr_r_r_sat,           1,  -32, 0x7fffffff, USR_OVF);
924 
925     TEST_R_OP_RR(asl_r_r_sat,  0x00000000, 0x40, 0x00000000, USR_CLEAR);
926     TEST_R_OP_RR(asl_r_r_sat,  0x80000000, 0xff, 0xc0000000, USR_CLEAR);
927     TEST_R_OP_RR(asl_r_r_sat,  0xffffffff, 0xff, 0xffffffff, USR_CLEAR);
928     TEST_R_OP_RR(asl_r_r_sat,  0x00ffffff, 0x0b, 0x7fffffff, USR_OVF);
929     TEST_R_OP_RR(asl_r_r_sat,  0x80000000, 0x0b, 0x80000000, USR_OVF);
930     TEST_R_OP_RR(asl_r_r_sat,  0x7fff0000, 0xbe, 0x7fffffff, USR_OVF);
931     TEST_R_OP_RR(asl_r_r_sat,  0xff000000, 0xbe, 0x80000000, USR_OVF);
932     TEST_R_OP_RR(asl_r_r_sat,        4096,   32, 0x7fffffff, USR_OVF);
933     TEST_R_OP_RR(asl_r_r_sat,        4096,  -32, 0x00000000, USR_CLEAR);
934     TEST_R_OP_RR(asl_r_r_sat,       -4096,   32, 0x80000000, USR_OVF);
935     TEST_R_OP_RR(asl_r_r_sat,       -4096,  -32, 0xffffffff, USR_CLEAR);
936     TEST_R_OP_RR(asl_r_r_sat,           0,   32, 0x00000000, USR_CLEAR);
937     TEST_R_OP_RR(asl_r_r_sat,           1,   32, 0x7fffffff, USR_OVF);
938 
939     TEST_XPp_OP_PP(ACS, 0x0004000300020001ULL, 0x0001000200030004ULL,
940                    0x0000000000000000ULL, 0x0004000300030004ULL, 0xf0,
941                    USR_CLEAR);
942     TEST_XPp_OP_PP(ACS, 0x0004000300020001ULL, 0x0001000200030004ULL,
943                    0x000affff000d0000ULL, 0x000e0003000f0004ULL, 0xcc,
944                    USR_CLEAR);
945     TEST_XPp_OP_PP(ACS, 0x00047fff00020001ULL, 0x00017fff00030004ULL,
946                   0x000a0fff000d0000ULL, 0x000e7fff000f0004ULL, 0xfc,
947                   USR_OVF);
948     TEST_XPp_OP_PP(ACS, 0x00047fff00020001ULL, 0x00017fff00030004ULL,
949                    0x000a0fff000d0000ULL, 0x000e7fff000f0004ULL, 0xf0,
950                    USR_OVF);
951 
952     /* Floating point */
953     TEST_R_OP_RR(sfmin,  SF_one,      SF_small_neg,   SF_small_neg, USR_CLEAR);
954     TEST_R_OP_RR(sfmin,  SF_one,      SF_SNaN,        SF_one,       USR_FPINVF);
955     TEST_R_OP_RR(sfmin,  SF_SNaN,     SF_one,         SF_one,       USR_FPINVF);
956     TEST_R_OP_RR(sfmin,  SF_one,      SF_QNaN,        SF_one,       USR_CLEAR);
957     TEST_R_OP_RR(sfmin,  SF_QNaN,     SF_one,         SF_one,       USR_CLEAR);
958     TEST_R_OP_RR(sfmin,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
959     TEST_R_OP_RR(sfmin,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
960     TEST_R_OP_RR(sfmin,  SF_zero,     SF_zero_neg,    SF_zero_neg,  USR_CLEAR);
961     TEST_R_OP_RR(sfmin,  SF_zero_neg, SF_zero,        SF_zero_neg,  USR_CLEAR);
962 
963     TEST_R_OP_RR(sfmax,  SF_one,      SF_small_neg,   SF_one,       USR_CLEAR);
964     TEST_R_OP_RR(sfmax,  SF_one,      SF_SNaN,        SF_one,       USR_FPINVF);
965     TEST_R_OP_RR(sfmax,  SF_SNaN,     SF_one,         SF_one,       USR_FPINVF);
966     TEST_R_OP_RR(sfmax,  SF_one,      SF_QNaN,        SF_one,       USR_CLEAR);
967     TEST_R_OP_RR(sfmax,  SF_QNaN,     SF_one,         SF_one,       USR_CLEAR);
968     TEST_R_OP_RR(sfmax,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
969     TEST_R_OP_RR(sfmax,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
970     TEST_R_OP_RR(sfmax,  SF_zero,     SF_zero_neg,    SF_zero,      USR_CLEAR);
971     TEST_R_OP_RR(sfmax,  SF_zero_neg, SF_zero,        SF_zero,      USR_CLEAR);
972 
973     TEST_R_OP_RR(sfadd,  SF_one,      SF_QNaN,        SF_HEX_NaN,   USR_CLEAR);
974     TEST_R_OP_RR(sfadd,  SF_one,      SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
975     TEST_R_OP_RR(sfadd,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
976     TEST_R_OP_RR(sfadd,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
977 
978     TEST_R_OP_RR(sfsub,  SF_one,      SF_QNaN,        SF_HEX_NaN,   USR_CLEAR);
979     TEST_R_OP_RR(sfsub,  SF_one,      SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
980     TEST_R_OP_RR(sfsub,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
981     TEST_R_OP_RR(sfsub,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
982 
983     TEST_R_OP_RR(sfmpy,  SF_one,      SF_QNaN,        SF_HEX_NaN,   USR_CLEAR);
984     TEST_R_OP_RR(sfmpy,  SF_one,      SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
985     TEST_R_OP_RR(sfmpy,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
986     TEST_R_OP_RR(sfmpy,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
987 
988     TEST_XR_OP_RR(sffma, SF_one,   SF_one,    SF_one,   SF_two,     USR_CLEAR);
989     TEST_XR_OP_RR(sffma, SF_zero,  SF_one,    SF_QNaN,  SF_HEX_NaN, USR_CLEAR);
990     TEST_XR_OP_RR(sffma, SF_zero,  SF_one,    SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
991     TEST_XR_OP_RR(sffma, SF_zero,  SF_QNaN,   SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
992     TEST_XR_OP_RR(sffma, SF_zero,  SF_SNaN,   SF_QNaN,  SF_HEX_NaN, USR_FPINVF);
993 
994     TEST_XR_OP_RR(sffms, SF_one,   SF_one,    SF_one,   SF_zero,    USR_CLEAR);
995     TEST_XR_OP_RR(sffms, SF_zero,  SF_one,    SF_QNaN,  SF_HEX_NaN, USR_CLEAR);
996     TEST_XR_OP_RR(sffms, SF_zero,  SF_one,    SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
997     TEST_XR_OP_RR(sffms, SF_zero,  SF_QNaN,   SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
998     TEST_XR_OP_RR(sffms, SF_zero,  SF_SNaN,   SF_QNaN,  SF_HEX_NaN, USR_FPINVF);
999 
1000     TEST_CMP_RR(sfcmpuo, SF_one,      SF_large_pos,    0x00,    USR_CLEAR);
1001     TEST_CMP_RR(sfcmpuo, SF_INF,      SF_large_pos,    0x00,    USR_CLEAR);
1002     TEST_CMP_RR(sfcmpuo, SF_QNaN,     SF_large_pos,    0xff,    USR_CLEAR);
1003     TEST_CMP_RR(sfcmpuo, SF_QNaN_neg, SF_large_pos,    0xff,    USR_CLEAR);
1004     TEST_CMP_RR(sfcmpuo, SF_SNaN,     SF_large_pos,    0xff,    USR_FPINVF);
1005     TEST_CMP_RR(sfcmpuo, SF_SNaN_neg, SF_large_pos,    0xff,    USR_FPINVF);
1006     TEST_CMP_RR(sfcmpuo, SF_QNaN,     SF_QNaN,         0xff,    USR_CLEAR);
1007     TEST_CMP_RR(sfcmpuo, SF_QNaN,     SF_SNaN,         0xff,    USR_FPINVF);
1008 
1009     TEST_CMP_RR(sfcmpeq, SF_one,      SF_QNaN,         0x00,    USR_CLEAR);
1010     TEST_CMP_RR(sfcmpeq, SF_one,      SF_SNaN,         0x00,    USR_FPINVF);
1011     TEST_CMP_RR(sfcmpgt, SF_one,      SF_QNaN,         0x00,    USR_CLEAR);
1012     TEST_CMP_RR(sfcmpgt, SF_one,      SF_SNaN,         0x00,    USR_FPINVF);
1013     TEST_CMP_RR(sfcmpge, SF_one,      SF_QNaN,         0x00,    USR_CLEAR);
1014     TEST_CMP_RR(sfcmpge, SF_one,      SF_SNaN,         0x00,    USR_FPINVF);
1015 
1016     TEST_P_OP_PP(dfadd,  DF_any,    DF_QNaN,         DF_HEX_NaN,    USR_CLEAR);
1017     TEST_P_OP_PP(dfadd,  DF_any,    DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1018     TEST_P_OP_PP(dfadd,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1019     TEST_P_OP_PP(dfadd,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
1020 
1021     TEST_P_OP_PP(dfsub,  DF_any,    DF_QNaN,         DF_HEX_NaN,    USR_CLEAR);
1022     TEST_P_OP_PP(dfsub,  DF_any,    DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1023     TEST_P_OP_PP(dfsub,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1024     TEST_P_OP_PP(dfsub,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
1025 
1026 #if CORE_IS_V67
1027     TEST_P_OP_PP(dfmin,  DF_any,    DF_small_neg,    DF_small_neg,  USR_CLEAR);
1028     TEST_P_OP_PP(dfmin,  DF_any,    DF_SNaN,         DF_any,        USR_FPINVF);
1029     TEST_P_OP_PP(dfmin,  DF_SNaN,   DF_any,          DF_any,        USR_FPINVF);
1030     TEST_P_OP_PP(dfmin,  DF_any,    DF_QNaN,         DF_any,        USR_CLEAR);
1031     TEST_P_OP_PP(dfmin,  DF_QNaN,   DF_any,          DF_any,        USR_CLEAR);
1032     TEST_P_OP_PP(dfmin,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
1033     TEST_P_OP_PP(dfmin,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1034     TEST_P_OP_PP(dfmin,  DF_zero,   DF_zero_neg,     DF_zero_neg,   USR_CLEAR);
1035     TEST_P_OP_PP(dfmin,  DF_zero_neg, DF_zero,       DF_zero_neg,   USR_CLEAR);
1036 
1037     TEST_P_OP_PP(dfmax,  DF_any,    DF_small_neg,    DF_any,        USR_CLEAR);
1038     TEST_P_OP_PP(dfmax,  DF_any,    DF_SNaN,         DF_any,        USR_FPINVF);
1039     TEST_P_OP_PP(dfmax,  DF_SNaN,   DF_any,          DF_any,        USR_FPINVF);
1040     TEST_P_OP_PP(dfmax,  DF_any,    DF_QNaN,         DF_any,        USR_CLEAR);
1041     TEST_P_OP_PP(dfmax,  DF_QNaN,   DF_any,          DF_any,        USR_CLEAR);
1042     TEST_P_OP_PP(dfmax,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
1043     TEST_P_OP_PP(dfmax,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1044     TEST_P_OP_PP(dfmax,  DF_zero,   DF_zero_neg,     DF_zero,       USR_CLEAR);
1045     TEST_P_OP_PP(dfmax,  DF_zero_neg, DF_zero,       DF_zero,       USR_CLEAR);
1046 
1047     TEST_XP_OP_PP(dfmpyhh, DF_one,   DF_one,  DF_one,   DF_one_hh,  USR_CLEAR);
1048     TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_any,  DF_QNaN,  DF_HEX_NaN, USR_CLEAR);
1049     TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_any,  DF_SNaN,  DF_HEX_NaN, USR_FPINVF);
1050     TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_QNaN, DF_SNaN,  DF_HEX_NaN, USR_FPINVF);
1051     TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_SNaN, DF_QNaN,  DF_HEX_NaN, USR_FPINVF);
1052 #else
1053     printf("v67 instructions skipped\n");
1054 #endif
1055 
1056     TEST_CMP_PP(dfcmpuo, DF_small_neg, DF_any,          0x00,    USR_CLEAR);
1057     TEST_CMP_PP(dfcmpuo, DF_large_pos, DF_any,          0x00,    USR_CLEAR);
1058     TEST_CMP_PP(dfcmpuo, DF_QNaN,      DF_any,          0xff,    USR_CLEAR);
1059     TEST_CMP_PP(dfcmpuo, DF_QNaN_neg,  DF_any,          0xff,    USR_CLEAR);
1060     TEST_CMP_PP(dfcmpuo, DF_SNaN,      DF_any,          0xff,    USR_FPINVF);
1061     TEST_CMP_PP(dfcmpuo, DF_SNaN_neg,  DF_any,          0xff,    USR_FPINVF);
1062     TEST_CMP_PP(dfcmpuo, DF_QNaN,      DF_QNaN,         0xff,    USR_CLEAR);
1063     TEST_CMP_PP(dfcmpuo, DF_QNaN,      DF_SNaN,         0xff,    USR_FPINVF);
1064 
1065     TEST_CMP_PP(dfcmpeq, DF_any,       DF_QNaN,         0x00,    USR_CLEAR);
1066     TEST_CMP_PP(dfcmpeq, DF_any,       DF_SNaN,         0x00,    USR_FPINVF);
1067     TEST_CMP_PP(dfcmpgt, DF_any,       DF_QNaN,         0x00,    USR_CLEAR);
1068     TEST_CMP_PP(dfcmpgt, DF_any,       DF_SNaN,         0x00,    USR_FPINVF);
1069     TEST_CMP_PP(dfcmpge, DF_any,       DF_QNaN,         0x00,    USR_CLEAR);
1070     TEST_CMP_PP(dfcmpge, DF_any,       DF_SNaN,         0x00,    USR_FPINVF);
1071 
1072     TEST_P_OP_R(conv_sf2df,       SF_QNaN,  DF_HEX_NaN,             USR_CLEAR);
1073     TEST_P_OP_R(conv_sf2df,       SF_SNaN,  DF_HEX_NaN,             USR_FPINVF);
1074     TEST_R_OP_R(conv_sf2uw,       SF_QNaN,  0xffffffff,             USR_FPINVF);
1075     TEST_R_OP_R(conv_sf2uw,       SF_SNaN,  0xffffffff,             USR_FPINVF);
1076     TEST_R_OP_R(conv_sf2w,        SF_QNaN,  0xffffffff,             USR_FPINVF);
1077     TEST_R_OP_R(conv_sf2w,        SF_SNaN,  0xffffffff,             USR_FPINVF);
1078     TEST_P_OP_R(conv_sf2ud,       SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1079     TEST_P_OP_R(conv_sf2ud,       SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1080     TEST_P_OP_R(conv_sf2d,        SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1081     TEST_P_OP_R(conv_sf2d,        SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1082     TEST_R_OP_R(conv_sf2uw_chop,  SF_QNaN,  0xffffffff,             USR_FPINVF);
1083     TEST_R_OP_R(conv_sf2uw_chop,  SF_SNaN,  0xffffffff,             USR_FPINVF);
1084     TEST_R_OP_R(conv_sf2w_chop,   SF_QNaN,  0xffffffff,             USR_FPINVF);
1085     TEST_R_OP_R(conv_sf2w_chop,   SF_SNaN,  0xffffffff,             USR_FPINVF);
1086     TEST_P_OP_R(conv_sf2ud_chop,  SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1087     TEST_P_OP_R(conv_sf2ud_chop,  SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1088     TEST_P_OP_R(conv_sf2d_chop,   SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1089     TEST_P_OP_R(conv_sf2d_chop,   SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1090 
1091     TEST_R_OP_P(conv_df2sf,       DF_QNaN,  SF_HEX_NaN,             USR_CLEAR);
1092     TEST_R_OP_P(conv_df2sf,       DF_SNaN,  SF_HEX_NaN,             USR_FPINVF);
1093     TEST_R_OP_P(conv_df2uw,       DF_QNaN,  0xffffffff,             USR_FPINVF);
1094     TEST_R_OP_P(conv_df2uw,       DF_SNaN,  0xffffffff,             USR_FPINVF);
1095     TEST_R_OP_P(conv_df2w,        DF_QNaN,  0xffffffff,             USR_FPINVF);
1096     TEST_R_OP_P(conv_df2w,        DF_SNaN,  0xffffffff,             USR_FPINVF);
1097     TEST_P_OP_P(conv_df2ud,       DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1098     TEST_P_OP_P(conv_df2ud,       DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1099     TEST_P_OP_P(conv_df2d,        DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1100     TEST_P_OP_P(conv_df2d,        DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1101     TEST_R_OP_P(conv_df2uw_chop,  DF_QNaN,  0xffffffff,             USR_FPINVF);
1102     TEST_R_OP_P(conv_df2uw_chop,  DF_SNaN,  0xffffffff,             USR_FPINVF);
1103 
1104     /* Test for typo in HELPER(conv_df2uw_chop) */
1105     TEST_R_OP_P(conv_df2uw_chop, 0xffffff7f00000001ULL, 0xffffffff, USR_FPINVF);
1106 
1107     TEST_R_OP_P(conv_df2w_chop,   DF_QNaN,  0xffffffff,             USR_FPINVF);
1108     TEST_R_OP_P(conv_df2w_chop,   DF_SNaN,  0xffffffff,             USR_FPINVF);
1109     TEST_P_OP_P(conv_df2ud_chop,  DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1110     TEST_P_OP_P(conv_df2ud_chop,  DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1111     TEST_P_OP_P(conv_df2d_chop,   DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1112     TEST_P_OP_P(conv_df2d_chop,   DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1113 
1114     TEST_R_OP_R(conv_uw2sf,    0x00000001,             SF_one,      USR_CLEAR);
1115     TEST_R_OP_R(conv_uw2sf,    0x010020a5,             0x4b801052,  USR_FPINPF);
1116     TEST_R_OP_R(conv_w2sf,     0x00000001,             SF_one,      USR_CLEAR);
1117     TEST_R_OP_R(conv_w2sf,     0x010020a5,             0x4b801052,  USR_FPINPF);
1118     TEST_R_OP_P(conv_ud2sf,    0x0000000000000001ULL,  SF_one,      USR_CLEAR);
1119     TEST_R_OP_P(conv_ud2sf,    0x00000000010020a5ULL,  0x4b801052,  USR_FPINPF);
1120     TEST_R_OP_P(conv_d2sf,     0x0000000000000001ULL,  SF_one,      USR_CLEAR);
1121     TEST_R_OP_P(conv_d2sf,     0x00000000010020a5ULL,  0x4b801052,  USR_FPINPF);
1122 
1123     TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_one,    SF_one,   1, SF_four,
1124                    USR_CLEAR);
1125     TEST_XR_OP_RRp(sffma_sc, SF_QNaN,  SF_one,    SF_one,   1, SF_HEX_NaN,
1126                    USR_CLEAR);
1127     TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_QNaN,   SF_one,   1, SF_HEX_NaN,
1128                    USR_CLEAR);
1129     TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_one,    SF_QNaN,  1, SF_HEX_NaN,
1130                    USR_CLEAR);
1131     TEST_XR_OP_RRp(sffma_sc, SF_SNaN,  SF_one,    SF_one,   1, SF_HEX_NaN,
1132                    USR_FPINVF);
1133     TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_SNaN,   SF_one,   1, SF_HEX_NaN,
1134                    USR_FPINVF);
1135     TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_one,    SF_SNaN,  1, SF_HEX_NaN,
1136                    USR_FPINVF);
1137 
1138     TEST_Rp_OP_RR(sfrecipa, SF_one,    SF_one,    SF_one_recip,   0x00,
1139                   USR_CLEAR);
1140     TEST_Rp_OP_RR(sfrecipa, SF_QNaN,   SF_one,    SF_HEX_NaN,     0x00,
1141                   USR_CLEAR);
1142     TEST_Rp_OP_RR(sfrecipa, SF_one,    SF_QNaN,   SF_HEX_NaN,     0x00,
1143                   USR_CLEAR);
1144     TEST_Rp_OP_RR(sfrecipa, SF_one,    SF_SNaN,   SF_HEX_NaN,     0x00,
1145                   USR_FPINVF);
1146     TEST_Rp_OP_RR(sfrecipa, SF_SNaN,   SF_one,    SF_HEX_NaN,     0x00,
1147                   USR_FPINVF);
1148 
1149     TEST_R_OP_RR(sffixupn, SF_one,     SF_one,    SF_one,       USR_CLEAR);
1150     TEST_R_OP_RR(sffixupn, SF_QNaN,    SF_one,    SF_HEX_NaN,   USR_CLEAR);
1151     TEST_R_OP_RR(sffixupn, SF_one,     SF_QNaN,   SF_HEX_NaN,   USR_CLEAR);
1152     TEST_R_OP_RR(sffixupn, SF_SNaN,    SF_one,    SF_HEX_NaN,   USR_FPINVF);
1153     TEST_R_OP_RR(sffixupn, SF_one,     SF_SNaN,   SF_HEX_NaN,   USR_FPINVF);
1154 
1155     TEST_R_OP_RR(sffixupd, SF_one,     SF_one,    SF_one,       USR_CLEAR);
1156     TEST_R_OP_RR(sffixupd, SF_QNaN,    SF_one,    SF_HEX_NaN,   USR_CLEAR);
1157     TEST_R_OP_RR(sffixupd, SF_one,     SF_QNaN,   SF_HEX_NaN,   USR_CLEAR);
1158     TEST_R_OP_RR(sffixupd, SF_SNaN,    SF_one,    SF_HEX_NaN,   USR_FPINVF);
1159     TEST_R_OP_RR(sffixupd, SF_one,     SF_SNaN,   SF_HEX_NaN,   USR_FPINVF);
1160 
1161     TEST_R_OP_R(sffixupr, SF_one,             SF_one,           USR_CLEAR);
1162     TEST_R_OP_R(sffixupr, SF_QNaN,            SF_HEX_NaN,       USR_CLEAR);
1163     TEST_R_OP_R(sffixupr, SF_SNaN,            SF_HEX_NaN,       USR_FPINVF);
1164 
1165     TEST_Rp_OP_R(sfinvsqrta, SF_one,        SF_one_invsqrta,  0x00, USR_CLEAR);
1166     TEST_Rp_OP_R(sfinvsqrta, SF_zero,       SF_one,           0x00, USR_CLEAR);
1167     TEST_Rp_OP_R(sfinvsqrta, SF_QNaN,       SF_HEX_NaN,       0x00, USR_CLEAR);
1168     TEST_Rp_OP_R(sfinvsqrta, SF_small_neg,  SF_HEX_NaN,       0x00, USR_FPINVF);
1169     TEST_Rp_OP_R(sfinvsqrta, SF_SNaN,       SF_HEX_NaN,       0x00, USR_FPINVF);
1170 
1171     puts(err ? "FAIL" : "PASS");
1172     return err;
1173 }
1174