xref: /openbmc/qemu/tests/tcg/hexagon/v69_hvx.c (revision d2dfe0b5)
1 /*
2  *  Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <stdio.h>
19 #include <stdint.h>
20 #include <stdbool.h>
21 #include <string.h>
22 #include <limits.h>
23 
24 int err;
25 
26 #include "hvx_misc.h"
27 
28 #define fVROUND(VAL, SHAMT) \
29     ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0))
30 
31 #define fVSATUB(VAL) \
32     ((((VAL) & 0xffLL) == (VAL)) ? \
33         (VAL) : \
34         ((((int32_t)(VAL)) < 0) ? 0 : 0xff))
35 
36 #define fVSATUH(VAL) \
37     ((((VAL) & 0xffffLL) == (VAL)) ? \
38         (VAL) : \
39         ((((int32_t)(VAL)) < 0) ? 0 : 0xffff))
40 
41 static void test_vasrvuhubrndsat(void)
42 {
43     void *p0 = buffer0;
44     void *p1 = buffer1;
45     void *pout = output;
46 
47     memset(expect, 0xaa, sizeof(expect));
48     memset(output, 0xbb, sizeof(output));
49 
50     for (int i = 0; i < BUFSIZE / 2; i++) {
51         asm("v4 = vmem(%0 + #0)\n\t"
52             "v5 = vmem(%0 + #1)\n\t"
53             "v6 = vmem(%1 + #0)\n\t"
54             "v5.ub = vasr(v5:4.uh, v6.ub):rnd:sat\n\t"
55             "vmem(%2) = v5\n\t"
56             : : "r"(p0), "r"(p1), "r"(pout)
57             : "v4", "v5", "v6", "memory");
58         p0 += sizeof(MMVector) * 2;
59         p1 += sizeof(MMVector);
60         pout += sizeof(MMVector);
61 
62         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
63             int shamt;
64             uint8_t byte0;
65             uint8_t byte1;
66 
67             shamt = buffer1[i].ub[2 * j + 0] & 0x7;
68             byte0 = fVSATUB(fVROUND(buffer0[2 * i + 0].uh[j], shamt) >> shamt);
69             shamt = buffer1[i].ub[2 * j + 1] & 0x7;
70             byte1 = fVSATUB(fVROUND(buffer0[2 * i + 1].uh[j], shamt) >> shamt);
71             expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff);
72         }
73     }
74 
75     check_output_h(__LINE__, BUFSIZE / 2);
76 }
77 
78 static void test_vasrvuhubsat(void)
79 {
80     void *p0 = buffer0;
81     void *p1 = buffer1;
82     void *pout = output;
83 
84     memset(expect, 0xaa, sizeof(expect));
85     memset(output, 0xbb, sizeof(output));
86 
87     for (int i = 0; i < BUFSIZE / 2; i++) {
88         asm("v4 = vmem(%0 + #0)\n\t"
89             "v5 = vmem(%0 + #1)\n\t"
90             "v6 = vmem(%1 + #0)\n\t"
91             "v5.ub = vasr(v5:4.uh, v6.ub):sat\n\t"
92             "vmem(%2) = v5\n\t"
93             : : "r"(p0), "r"(p1), "r"(pout)
94             : "v4", "v5", "v6", "memory");
95         p0 += sizeof(MMVector) * 2;
96         p1 += sizeof(MMVector);
97         pout += sizeof(MMVector);
98 
99         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
100             int shamt;
101             uint8_t byte0;
102             uint8_t byte1;
103 
104             shamt = buffer1[i].ub[2 * j + 0] & 0x7;
105             byte0 = fVSATUB(buffer0[2 * i + 0].uh[j] >> shamt);
106             shamt = buffer1[i].ub[2 * j + 1] & 0x7;
107             byte1 = fVSATUB(buffer0[2 * i + 1].uh[j] >> shamt);
108             expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff);
109         }
110     }
111 
112     check_output_h(__LINE__, BUFSIZE / 2);
113 }
114 
115 static void test_vasrvwuhrndsat(void)
116 {
117     void *p0 = buffer0;
118     void *p1 = buffer1;
119     void *pout = output;
120 
121     memset(expect, 0xaa, sizeof(expect));
122     memset(output, 0xbb, sizeof(output));
123 
124     for (int i = 0; i < BUFSIZE / 2; i++) {
125         asm("v4 = vmem(%0 + #0)\n\t"
126             "v5 = vmem(%0 + #1)\n\t"
127             "v6 = vmem(%1 + #0)\n\t"
128             "v5.uh = vasr(v5:4.w, v6.uh):rnd:sat\n\t"
129             "vmem(%2) = v5\n\t"
130             : : "r"(p0), "r"(p1), "r"(pout)
131             : "v4", "v5", "v6", "memory");
132         p0 += sizeof(MMVector) * 2;
133         p1 += sizeof(MMVector);
134         pout += sizeof(MMVector);
135 
136         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
137             int shamt;
138             uint16_t half0;
139             uint16_t half1;
140 
141             shamt = buffer1[i].uh[2 * j + 0] & 0xf;
142             half0 = fVSATUH(fVROUND(buffer0[2 * i + 0].w[j], shamt) >> shamt);
143             shamt = buffer1[i].uh[2 * j + 1] & 0xf;
144             half1 = fVSATUH(fVROUND(buffer0[2 * i + 1].w[j], shamt) >> shamt);
145             expect[i].w[j] = (half1 << 16) | (half0 & 0xffff);
146         }
147     }
148 
149     check_output_w(__LINE__, BUFSIZE / 2);
150 }
151 
152 static void test_vasrvwuhsat(void)
153 {
154     void *p0 = buffer0;
155     void *p1 = buffer1;
156     void *pout = output;
157 
158     memset(expect, 0xaa, sizeof(expect));
159     memset(output, 0xbb, sizeof(output));
160 
161     for (int i = 0; i < BUFSIZE / 2; i++) {
162         asm("v4 = vmem(%0 + #0)\n\t"
163             "v5 = vmem(%0 + #1)\n\t"
164             "v6 = vmem(%1 + #0)\n\t"
165             "v5.uh = vasr(v5:4.w, v6.uh):sat\n\t"
166             "vmem(%2) = v5\n\t"
167             : : "r"(p0), "r"(p1), "r"(pout)
168             : "v4", "v5", "v6", "memory");
169         p0 += sizeof(MMVector) * 2;
170         p1 += sizeof(MMVector);
171         pout += sizeof(MMVector);
172 
173         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
174             int shamt;
175             uint16_t half0;
176             uint16_t half1;
177 
178             shamt = buffer1[i].uh[2 * j + 0] & 0xf;
179             half0 = fVSATUH(buffer0[2 * i + 0].w[j] >> shamt);
180             shamt = buffer1[i].uh[2 * j + 1] & 0xf;
181             half1 = fVSATUH(buffer0[2 * i + 1].w[j] >> shamt);
182             expect[i].w[j] = (half1 << 16) | (half0 & 0xffff);
183         }
184     }
185 
186     check_output_w(__LINE__, BUFSIZE / 2);
187 }
188 
189 static void test_vassign_tmp(void)
190 {
191     void *p0 = buffer0;
192     void *pout = output;
193 
194     memset(expect, 0xaa, sizeof(expect));
195     memset(output, 0xbb, sizeof(output));
196 
197     for (int i = 0; i < BUFSIZE; i++) {
198         /*
199          * Assign into v12 as .tmp, then use it in the next packet
200          * Should get the new value within the same packet and
201          * the old value in the next packet
202          */
203         asm("v3 = vmem(%0 + #0)\n\t"
204             "r1 = #1\n\t"
205             "v12 = vsplat(r1)\n\t"
206             "r1 = #2\n\t"
207             "v13 = vsplat(r1)\n\t"
208             "{\n\t"
209             "    v12.tmp = v13\n\t"
210             "    v4.w = vadd(v12.w, v3.w)\n\t"
211             "}\n\t"
212             "v4.w = vadd(v4.w, v12.w)\n\t"
213             "vmem(%1 + #0) = v4\n\t"
214             : : "r"(p0), "r"(pout)
215             : "r1", "v3", "v4", "v12", "v13", "memory");
216         p0 += sizeof(MMVector);
217         pout += sizeof(MMVector);
218 
219         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
220             expect[i].w[j] = buffer0[i].w[j] + 3;
221         }
222     }
223 
224     check_output_w(__LINE__, BUFSIZE);
225 }
226 
227 static void test_vcombine_tmp(void)
228 {
229     void *p0 = buffer0;
230     void *p1 = buffer1;
231     void *pout = output;
232 
233     memset(expect, 0xaa, sizeof(expect));
234     memset(output, 0xbb, sizeof(output));
235 
236     for (int i = 0; i < BUFSIZE; i++) {
237         /*
238          * Combine into v13:12 as .tmp, then use it in the next packet
239          * Should get the new value within the same packet and
240          * the old value in the next packet
241          */
242         asm("v3 = vmem(%0 + #0)\n\t"
243             "r1 = #1\n\t"
244             "v12 = vsplat(r1)\n\t"
245             "r1 = #2\n\t"
246             "v13 = vsplat(r1)\n\t"
247             "r1 = #3\n\t"
248             "v14 = vsplat(r1)\n\t"
249             "r1 = #4\n\t"
250             "v15 = vsplat(r1)\n\t"
251             "{\n\t"
252             "    v13:12.tmp = vcombine(v15, v14)\n\t"
253             "    v4.w = vadd(v12.w, v3.w)\n\t"
254             "    v16 = v13\n\t"
255             "}\n\t"
256             "v4.w = vadd(v4.w, v12.w)\n\t"
257             "v4.w = vadd(v4.w, v13.w)\n\t"
258             "v4.w = vadd(v4.w, v16.w)\n\t"
259             "vmem(%2 + #0) = v4\n\t"
260             : : "r"(p0), "r"(p1), "r"(pout)
261             : "r1", "v3", "v4", "v12", "v13", "v14", "v15", "v16", "memory");
262         p0 += sizeof(MMVector);
263         p1 += sizeof(MMVector);
264         pout += sizeof(MMVector);
265 
266         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
267             expect[i].w[j] = buffer0[i].w[j] + 10;
268         }
269     }
270 
271     check_output_w(__LINE__, BUFSIZE);
272 }
273 
274 static void test_vmpyuhvs(void)
275 {
276     void *p0 = buffer0;
277     void *p1 = buffer1;
278     void *pout = output;
279 
280     memset(expect, 0xaa, sizeof(expect));
281     memset(output, 0xbb, sizeof(output));
282 
283     for (int i = 0; i < BUFSIZE; i++) {
284         asm("v4 = vmem(%0 + #0)\n\t"
285             "v5 = vmem(%1 + #0)\n\t"
286             "v4.uh = vmpy(V4.uh, v5.uh):>>16\n\t"
287             "vmem(%2) = v4\n\t"
288             : : "r"(p0), "r"(p1), "r"(pout)
289             : "v4", "v5", "memory");
290         p0 += sizeof(MMVector);
291         p1 += sizeof(MMVector);
292         pout += sizeof(MMVector);
293 
294         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
295             expect[i].uh[j] = (buffer0[i].uh[j] * buffer1[i].uh[j]) >> 16;
296         }
297     }
298 
299     check_output_h(__LINE__, BUFSIZE);
300 }
301 
302 int main()
303 {
304     init_buffers();
305 
306     test_vasrvuhubrndsat();
307     test_vasrvuhubsat();
308     test_vasrvwuhrndsat();
309     test_vasrvwuhsat();
310 
311     test_vassign_tmp();
312     test_vcombine_tmp();
313 
314     test_vmpyuhvs();
315 
316     puts(err ? "FAIL" : "PASS");
317     return err ? 1 : 0;
318 }
319