xref: /openbmc/qemu/tests/tcg/hexagon/v69_hvx.c (revision 449d6d9eb44772e69f11d002e3c1e2be8a91c350)
1*6c61d4e1STaylor Simpson /*
2*6c61d4e1STaylor Simpson  *  Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3*6c61d4e1STaylor Simpson  *
4*6c61d4e1STaylor Simpson  *  This program is free software; you can redistribute it and/or modify
5*6c61d4e1STaylor Simpson  *  it under the terms of the GNU General Public License as published by
6*6c61d4e1STaylor Simpson  *  the Free Software Foundation; either version 2 of the License, or
7*6c61d4e1STaylor Simpson  *  (at your option) any later version.
8*6c61d4e1STaylor Simpson  *
9*6c61d4e1STaylor Simpson  *  This program is distributed in the hope that it will be useful,
10*6c61d4e1STaylor Simpson  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11*6c61d4e1STaylor Simpson  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*6c61d4e1STaylor Simpson  *  GNU General Public License for more details.
13*6c61d4e1STaylor Simpson  *
14*6c61d4e1STaylor Simpson  *  You should have received a copy of the GNU General Public License
15*6c61d4e1STaylor Simpson  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16*6c61d4e1STaylor Simpson  */
17*6c61d4e1STaylor Simpson 
18*6c61d4e1STaylor Simpson #include <stdio.h>
19*6c61d4e1STaylor Simpson #include <stdint.h>
20*6c61d4e1STaylor Simpson #include <stdbool.h>
21*6c61d4e1STaylor Simpson #include <string.h>
22*6c61d4e1STaylor Simpson #include <limits.h>
23*6c61d4e1STaylor Simpson 
24*6c61d4e1STaylor Simpson int err;
25*6c61d4e1STaylor Simpson 
26*6c61d4e1STaylor Simpson #include "hvx_misc.h"
27*6c61d4e1STaylor Simpson 
28*6c61d4e1STaylor Simpson #define fVROUND(VAL, SHAMT) \
29*6c61d4e1STaylor Simpson     ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0))
30*6c61d4e1STaylor Simpson 
31*6c61d4e1STaylor Simpson #define fVSATUB(VAL) \
32*6c61d4e1STaylor Simpson     ((((VAL) & 0xffLL) == (VAL)) ? \
33*6c61d4e1STaylor Simpson         (VAL) : \
34*6c61d4e1STaylor Simpson         ((((int32_t)(VAL)) < 0) ? 0 : 0xff))
35*6c61d4e1STaylor Simpson 
36*6c61d4e1STaylor Simpson #define fVSATUH(VAL) \
37*6c61d4e1STaylor Simpson     ((((VAL) & 0xffffLL) == (VAL)) ? \
38*6c61d4e1STaylor Simpson         (VAL) : \
39*6c61d4e1STaylor Simpson         ((((int32_t)(VAL)) < 0) ? 0 : 0xffff))
40*6c61d4e1STaylor Simpson 
test_vasrvuhubrndsat(void)41*6c61d4e1STaylor Simpson static void test_vasrvuhubrndsat(void)
42*6c61d4e1STaylor Simpson {
43*6c61d4e1STaylor Simpson     void *p0 = buffer0;
44*6c61d4e1STaylor Simpson     void *p1 = buffer1;
45*6c61d4e1STaylor Simpson     void *pout = output;
46*6c61d4e1STaylor Simpson 
47*6c61d4e1STaylor Simpson     memset(expect, 0xaa, sizeof(expect));
48*6c61d4e1STaylor Simpson     memset(output, 0xbb, sizeof(output));
49*6c61d4e1STaylor Simpson 
50*6c61d4e1STaylor Simpson     for (int i = 0; i < BUFSIZE / 2; i++) {
51*6c61d4e1STaylor Simpson         asm("v4 = vmem(%0 + #0)\n\t"
52*6c61d4e1STaylor Simpson             "v5 = vmem(%0 + #1)\n\t"
53*6c61d4e1STaylor Simpson             "v6 = vmem(%1 + #0)\n\t"
54*6c61d4e1STaylor Simpson             "v5.ub = vasr(v5:4.uh, v6.ub):rnd:sat\n\t"
55*6c61d4e1STaylor Simpson             "vmem(%2) = v5\n\t"
56*6c61d4e1STaylor Simpson             : : "r"(p0), "r"(p1), "r"(pout)
57*6c61d4e1STaylor Simpson             : "v4", "v5", "v6", "memory");
58*6c61d4e1STaylor Simpson         p0 += sizeof(MMVector) * 2;
59*6c61d4e1STaylor Simpson         p1 += sizeof(MMVector);
60*6c61d4e1STaylor Simpson         pout += sizeof(MMVector);
61*6c61d4e1STaylor Simpson 
62*6c61d4e1STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
63*6c61d4e1STaylor Simpson             int shamt;
64*6c61d4e1STaylor Simpson             uint8_t byte0;
65*6c61d4e1STaylor Simpson             uint8_t byte1;
66*6c61d4e1STaylor Simpson 
67*6c61d4e1STaylor Simpson             shamt = buffer1[i].ub[2 * j + 0] & 0x7;
68*6c61d4e1STaylor Simpson             byte0 = fVSATUB(fVROUND(buffer0[2 * i + 0].uh[j], shamt) >> shamt);
69*6c61d4e1STaylor Simpson             shamt = buffer1[i].ub[2 * j + 1] & 0x7;
70*6c61d4e1STaylor Simpson             byte1 = fVSATUB(fVROUND(buffer0[2 * i + 1].uh[j], shamt) >> shamt);
71*6c61d4e1STaylor Simpson             expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff);
72*6c61d4e1STaylor Simpson         }
73*6c61d4e1STaylor Simpson     }
74*6c61d4e1STaylor Simpson 
75*6c61d4e1STaylor Simpson     check_output_h(__LINE__, BUFSIZE / 2);
76*6c61d4e1STaylor Simpson }
77*6c61d4e1STaylor Simpson 
test_vasrvuhubsat(void)78*6c61d4e1STaylor Simpson static void test_vasrvuhubsat(void)
79*6c61d4e1STaylor Simpson {
80*6c61d4e1STaylor Simpson     void *p0 = buffer0;
81*6c61d4e1STaylor Simpson     void *p1 = buffer1;
82*6c61d4e1STaylor Simpson     void *pout = output;
83*6c61d4e1STaylor Simpson 
84*6c61d4e1STaylor Simpson     memset(expect, 0xaa, sizeof(expect));
85*6c61d4e1STaylor Simpson     memset(output, 0xbb, sizeof(output));
86*6c61d4e1STaylor Simpson 
87*6c61d4e1STaylor Simpson     for (int i = 0; i < BUFSIZE / 2; i++) {
88*6c61d4e1STaylor Simpson         asm("v4 = vmem(%0 + #0)\n\t"
89*6c61d4e1STaylor Simpson             "v5 = vmem(%0 + #1)\n\t"
90*6c61d4e1STaylor Simpson             "v6 = vmem(%1 + #0)\n\t"
91*6c61d4e1STaylor Simpson             "v5.ub = vasr(v5:4.uh, v6.ub):sat\n\t"
92*6c61d4e1STaylor Simpson             "vmem(%2) = v5\n\t"
93*6c61d4e1STaylor Simpson             : : "r"(p0), "r"(p1), "r"(pout)
94*6c61d4e1STaylor Simpson             : "v4", "v5", "v6", "memory");
95*6c61d4e1STaylor Simpson         p0 += sizeof(MMVector) * 2;
96*6c61d4e1STaylor Simpson         p1 += sizeof(MMVector);
97*6c61d4e1STaylor Simpson         pout += sizeof(MMVector);
98*6c61d4e1STaylor Simpson 
99*6c61d4e1STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
100*6c61d4e1STaylor Simpson             int shamt;
101*6c61d4e1STaylor Simpson             uint8_t byte0;
102*6c61d4e1STaylor Simpson             uint8_t byte1;
103*6c61d4e1STaylor Simpson 
104*6c61d4e1STaylor Simpson             shamt = buffer1[i].ub[2 * j + 0] & 0x7;
105*6c61d4e1STaylor Simpson             byte0 = fVSATUB(buffer0[2 * i + 0].uh[j] >> shamt);
106*6c61d4e1STaylor Simpson             shamt = buffer1[i].ub[2 * j + 1] & 0x7;
107*6c61d4e1STaylor Simpson             byte1 = fVSATUB(buffer0[2 * i + 1].uh[j] >> shamt);
108*6c61d4e1STaylor Simpson             expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff);
109*6c61d4e1STaylor Simpson         }
110*6c61d4e1STaylor Simpson     }
111*6c61d4e1STaylor Simpson 
112*6c61d4e1STaylor Simpson     check_output_h(__LINE__, BUFSIZE / 2);
113*6c61d4e1STaylor Simpson }
114*6c61d4e1STaylor Simpson 
test_vasrvwuhrndsat(void)115*6c61d4e1STaylor Simpson static void test_vasrvwuhrndsat(void)
116*6c61d4e1STaylor Simpson {
117*6c61d4e1STaylor Simpson     void *p0 = buffer0;
118*6c61d4e1STaylor Simpson     void *p1 = buffer1;
119*6c61d4e1STaylor Simpson     void *pout = output;
120*6c61d4e1STaylor Simpson 
121*6c61d4e1STaylor Simpson     memset(expect, 0xaa, sizeof(expect));
122*6c61d4e1STaylor Simpson     memset(output, 0xbb, sizeof(output));
123*6c61d4e1STaylor Simpson 
124*6c61d4e1STaylor Simpson     for (int i = 0; i < BUFSIZE / 2; i++) {
125*6c61d4e1STaylor Simpson         asm("v4 = vmem(%0 + #0)\n\t"
126*6c61d4e1STaylor Simpson             "v5 = vmem(%0 + #1)\n\t"
127*6c61d4e1STaylor Simpson             "v6 = vmem(%1 + #0)\n\t"
128*6c61d4e1STaylor Simpson             "v5.uh = vasr(v5:4.w, v6.uh):rnd:sat\n\t"
129*6c61d4e1STaylor Simpson             "vmem(%2) = v5\n\t"
130*6c61d4e1STaylor Simpson             : : "r"(p0), "r"(p1), "r"(pout)
131*6c61d4e1STaylor Simpson             : "v4", "v5", "v6", "memory");
132*6c61d4e1STaylor Simpson         p0 += sizeof(MMVector) * 2;
133*6c61d4e1STaylor Simpson         p1 += sizeof(MMVector);
134*6c61d4e1STaylor Simpson         pout += sizeof(MMVector);
135*6c61d4e1STaylor Simpson 
136*6c61d4e1STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
137*6c61d4e1STaylor Simpson             int shamt;
138*6c61d4e1STaylor Simpson             uint16_t half0;
139*6c61d4e1STaylor Simpson             uint16_t half1;
140*6c61d4e1STaylor Simpson 
141*6c61d4e1STaylor Simpson             shamt = buffer1[i].uh[2 * j + 0] & 0xf;
142*6c61d4e1STaylor Simpson             half0 = fVSATUH(fVROUND(buffer0[2 * i + 0].w[j], shamt) >> shamt);
143*6c61d4e1STaylor Simpson             shamt = buffer1[i].uh[2 * j + 1] & 0xf;
144*6c61d4e1STaylor Simpson             half1 = fVSATUH(fVROUND(buffer0[2 * i + 1].w[j], shamt) >> shamt);
145*6c61d4e1STaylor Simpson             expect[i].w[j] = (half1 << 16) | (half0 & 0xffff);
146*6c61d4e1STaylor Simpson         }
147*6c61d4e1STaylor Simpson     }
148*6c61d4e1STaylor Simpson 
149*6c61d4e1STaylor Simpson     check_output_w(__LINE__, BUFSIZE / 2);
150*6c61d4e1STaylor Simpson }
151*6c61d4e1STaylor Simpson 
test_vasrvwuhsat(void)152*6c61d4e1STaylor Simpson static void test_vasrvwuhsat(void)
153*6c61d4e1STaylor Simpson {
154*6c61d4e1STaylor Simpson     void *p0 = buffer0;
155*6c61d4e1STaylor Simpson     void *p1 = buffer1;
156*6c61d4e1STaylor Simpson     void *pout = output;
157*6c61d4e1STaylor Simpson 
158*6c61d4e1STaylor Simpson     memset(expect, 0xaa, sizeof(expect));
159*6c61d4e1STaylor Simpson     memset(output, 0xbb, sizeof(output));
160*6c61d4e1STaylor Simpson 
161*6c61d4e1STaylor Simpson     for (int i = 0; i < BUFSIZE / 2; i++) {
162*6c61d4e1STaylor Simpson         asm("v4 = vmem(%0 + #0)\n\t"
163*6c61d4e1STaylor Simpson             "v5 = vmem(%0 + #1)\n\t"
164*6c61d4e1STaylor Simpson             "v6 = vmem(%1 + #0)\n\t"
165*6c61d4e1STaylor Simpson             "v5.uh = vasr(v5:4.w, v6.uh):sat\n\t"
166*6c61d4e1STaylor Simpson             "vmem(%2) = v5\n\t"
167*6c61d4e1STaylor Simpson             : : "r"(p0), "r"(p1), "r"(pout)
168*6c61d4e1STaylor Simpson             : "v4", "v5", "v6", "memory");
169*6c61d4e1STaylor Simpson         p0 += sizeof(MMVector) * 2;
170*6c61d4e1STaylor Simpson         p1 += sizeof(MMVector);
171*6c61d4e1STaylor Simpson         pout += sizeof(MMVector);
172*6c61d4e1STaylor Simpson 
173*6c61d4e1STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
174*6c61d4e1STaylor Simpson             int shamt;
175*6c61d4e1STaylor Simpson             uint16_t half0;
176*6c61d4e1STaylor Simpson             uint16_t half1;
177*6c61d4e1STaylor Simpson 
178*6c61d4e1STaylor Simpson             shamt = buffer1[i].uh[2 * j + 0] & 0xf;
179*6c61d4e1STaylor Simpson             half0 = fVSATUH(buffer0[2 * i + 0].w[j] >> shamt);
180*6c61d4e1STaylor Simpson             shamt = buffer1[i].uh[2 * j + 1] & 0xf;
181*6c61d4e1STaylor Simpson             half1 = fVSATUH(buffer0[2 * i + 1].w[j] >> shamt);
182*6c61d4e1STaylor Simpson             expect[i].w[j] = (half1 << 16) | (half0 & 0xffff);
183*6c61d4e1STaylor Simpson         }
184*6c61d4e1STaylor Simpson     }
185*6c61d4e1STaylor Simpson 
186*6c61d4e1STaylor Simpson     check_output_w(__LINE__, BUFSIZE / 2);
187*6c61d4e1STaylor Simpson }
188*6c61d4e1STaylor Simpson 
test_vassign_tmp(void)189*6c61d4e1STaylor Simpson static void test_vassign_tmp(void)
190*6c61d4e1STaylor Simpson {
191*6c61d4e1STaylor Simpson     void *p0 = buffer0;
192*6c61d4e1STaylor Simpson     void *pout = output;
193*6c61d4e1STaylor Simpson 
194*6c61d4e1STaylor Simpson     memset(expect, 0xaa, sizeof(expect));
195*6c61d4e1STaylor Simpson     memset(output, 0xbb, sizeof(output));
196*6c61d4e1STaylor Simpson 
197*6c61d4e1STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) {
198*6c61d4e1STaylor Simpson         /*
199*6c61d4e1STaylor Simpson          * Assign into v12 as .tmp, then use it in the next packet
200*6c61d4e1STaylor Simpson          * Should get the new value within the same packet and
201*6c61d4e1STaylor Simpson          * the old value in the next packet
202*6c61d4e1STaylor Simpson          */
203*6c61d4e1STaylor Simpson         asm("v3 = vmem(%0 + #0)\n\t"
204*6c61d4e1STaylor Simpson             "r1 = #1\n\t"
205*6c61d4e1STaylor Simpson             "v12 = vsplat(r1)\n\t"
206*6c61d4e1STaylor Simpson             "r1 = #2\n\t"
207*6c61d4e1STaylor Simpson             "v13 = vsplat(r1)\n\t"
208*6c61d4e1STaylor Simpson             "{\n\t"
209*6c61d4e1STaylor Simpson             "    v12.tmp = v13\n\t"
210*6c61d4e1STaylor Simpson             "    v4.w = vadd(v12.w, v3.w)\n\t"
211*6c61d4e1STaylor Simpson             "}\n\t"
212*6c61d4e1STaylor Simpson             "v4.w = vadd(v4.w, v12.w)\n\t"
213*6c61d4e1STaylor Simpson             "vmem(%1 + #0) = v4\n\t"
214*6c61d4e1STaylor Simpson             : : "r"(p0), "r"(pout)
215*6c61d4e1STaylor Simpson             : "r1", "v3", "v4", "v12", "v13", "memory");
216*6c61d4e1STaylor Simpson         p0 += sizeof(MMVector);
217*6c61d4e1STaylor Simpson         pout += sizeof(MMVector);
218*6c61d4e1STaylor Simpson 
219*6c61d4e1STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
220*6c61d4e1STaylor Simpson             expect[i].w[j] = buffer0[i].w[j] + 3;
221*6c61d4e1STaylor Simpson         }
222*6c61d4e1STaylor Simpson     }
223*6c61d4e1STaylor Simpson 
224*6c61d4e1STaylor Simpson     check_output_w(__LINE__, BUFSIZE);
225*6c61d4e1STaylor Simpson }
226*6c61d4e1STaylor Simpson 
test_vcombine_tmp(void)227*6c61d4e1STaylor Simpson static void test_vcombine_tmp(void)
228*6c61d4e1STaylor Simpson {
229*6c61d4e1STaylor Simpson     void *p0 = buffer0;
230*6c61d4e1STaylor Simpson     void *p1 = buffer1;
231*6c61d4e1STaylor Simpson     void *pout = output;
232*6c61d4e1STaylor Simpson 
233*6c61d4e1STaylor Simpson     memset(expect, 0xaa, sizeof(expect));
234*6c61d4e1STaylor Simpson     memset(output, 0xbb, sizeof(output));
235*6c61d4e1STaylor Simpson 
236*6c61d4e1STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) {
237*6c61d4e1STaylor Simpson         /*
238*6c61d4e1STaylor Simpson          * Combine into v13:12 as .tmp, then use it in the next packet
239*6c61d4e1STaylor Simpson          * Should get the new value within the same packet and
240*6c61d4e1STaylor Simpson          * the old value in the next packet
241*6c61d4e1STaylor Simpson          */
242*6c61d4e1STaylor Simpson         asm("v3 = vmem(%0 + #0)\n\t"
243*6c61d4e1STaylor Simpson             "r1 = #1\n\t"
244*6c61d4e1STaylor Simpson             "v12 = vsplat(r1)\n\t"
245*6c61d4e1STaylor Simpson             "r1 = #2\n\t"
246*6c61d4e1STaylor Simpson             "v13 = vsplat(r1)\n\t"
247*6c61d4e1STaylor Simpson             "r1 = #3\n\t"
248*6c61d4e1STaylor Simpson             "v14 = vsplat(r1)\n\t"
249*6c61d4e1STaylor Simpson             "r1 = #4\n\t"
250*6c61d4e1STaylor Simpson             "v15 = vsplat(r1)\n\t"
251*6c61d4e1STaylor Simpson             "{\n\t"
252*6c61d4e1STaylor Simpson             "    v13:12.tmp = vcombine(v15, v14)\n\t"
253*6c61d4e1STaylor Simpson             "    v4.w = vadd(v12.w, v3.w)\n\t"
254*6c61d4e1STaylor Simpson             "    v16 = v13\n\t"
255*6c61d4e1STaylor Simpson             "}\n\t"
256*6c61d4e1STaylor Simpson             "v4.w = vadd(v4.w, v12.w)\n\t"
257*6c61d4e1STaylor Simpson             "v4.w = vadd(v4.w, v13.w)\n\t"
258*6c61d4e1STaylor Simpson             "v4.w = vadd(v4.w, v16.w)\n\t"
259*6c61d4e1STaylor Simpson             "vmem(%2 + #0) = v4\n\t"
260*6c61d4e1STaylor Simpson             : : "r"(p0), "r"(p1), "r"(pout)
261*6c61d4e1STaylor Simpson             : "r1", "v3", "v4", "v12", "v13", "v14", "v15", "v16", "memory");
262*6c61d4e1STaylor Simpson         p0 += sizeof(MMVector);
263*6c61d4e1STaylor Simpson         p1 += sizeof(MMVector);
264*6c61d4e1STaylor Simpson         pout += sizeof(MMVector);
265*6c61d4e1STaylor Simpson 
266*6c61d4e1STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
267*6c61d4e1STaylor Simpson             expect[i].w[j] = buffer0[i].w[j] + 10;
268*6c61d4e1STaylor Simpson         }
269*6c61d4e1STaylor Simpson     }
270*6c61d4e1STaylor Simpson 
271*6c61d4e1STaylor Simpson     check_output_w(__LINE__, BUFSIZE);
272*6c61d4e1STaylor Simpson }
273*6c61d4e1STaylor Simpson 
test_vmpyuhvs(void)274*6c61d4e1STaylor Simpson static void test_vmpyuhvs(void)
275*6c61d4e1STaylor Simpson {
276*6c61d4e1STaylor Simpson     void *p0 = buffer0;
277*6c61d4e1STaylor Simpson     void *p1 = buffer1;
278*6c61d4e1STaylor Simpson     void *pout = output;
279*6c61d4e1STaylor Simpson 
280*6c61d4e1STaylor Simpson     memset(expect, 0xaa, sizeof(expect));
281*6c61d4e1STaylor Simpson     memset(output, 0xbb, sizeof(output));
282*6c61d4e1STaylor Simpson 
283*6c61d4e1STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) {
284*6c61d4e1STaylor Simpson         asm("v4 = vmem(%0 + #0)\n\t"
285*6c61d4e1STaylor Simpson             "v5 = vmem(%1 + #0)\n\t"
286*6c61d4e1STaylor Simpson             "v4.uh = vmpy(V4.uh, v5.uh):>>16\n\t"
287*6c61d4e1STaylor Simpson             "vmem(%2) = v4\n\t"
288*6c61d4e1STaylor Simpson             : : "r"(p0), "r"(p1), "r"(pout)
289*6c61d4e1STaylor Simpson             : "v4", "v5", "memory");
290*6c61d4e1STaylor Simpson         p0 += sizeof(MMVector);
291*6c61d4e1STaylor Simpson         p1 += sizeof(MMVector);
292*6c61d4e1STaylor Simpson         pout += sizeof(MMVector);
293*6c61d4e1STaylor Simpson 
294*6c61d4e1STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
295*6c61d4e1STaylor Simpson             expect[i].uh[j] = (buffer0[i].uh[j] * buffer1[i].uh[j]) >> 16;
296*6c61d4e1STaylor Simpson         }
297*6c61d4e1STaylor Simpson     }
298*6c61d4e1STaylor Simpson 
299*6c61d4e1STaylor Simpson     check_output_h(__LINE__, BUFSIZE);
300*6c61d4e1STaylor Simpson }
301*6c61d4e1STaylor Simpson 
main()302*6c61d4e1STaylor Simpson int main()
303*6c61d4e1STaylor Simpson {
304*6c61d4e1STaylor Simpson     init_buffers();
305*6c61d4e1STaylor Simpson 
306*6c61d4e1STaylor Simpson     test_vasrvuhubrndsat();
307*6c61d4e1STaylor Simpson     test_vasrvuhubsat();
308*6c61d4e1STaylor Simpson     test_vasrvwuhrndsat();
309*6c61d4e1STaylor Simpson     test_vasrvwuhsat();
310*6c61d4e1STaylor Simpson 
311*6c61d4e1STaylor Simpson     test_vassign_tmp();
312*6c61d4e1STaylor Simpson     test_vcombine_tmp();
313*6c61d4e1STaylor Simpson 
314*6c61d4e1STaylor Simpson     test_vmpyuhvs();
315*6c61d4e1STaylor Simpson 
316*6c61d4e1STaylor Simpson     puts(err ? "FAIL" : "PASS");
317*6c61d4e1STaylor Simpson     return err ? 1 : 0;
318*6c61d4e1STaylor Simpson }
319