1 /*
2 * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <stdio.h>
19 #include <stdint.h>
20 #include <stdbool.h>
21 #include <string.h>
22 #include <limits.h>
23
24 int err;
25
26 #include "hvx_misc.h"
27
28 #define fVROUND(VAL, SHAMT) \
29 ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0))
30
31 #define fVSATUB(VAL) \
32 ((((VAL) & 0xffLL) == (VAL)) ? \
33 (VAL) : \
34 ((((int32_t)(VAL)) < 0) ? 0 : 0xff))
35
36 #define fVSATUH(VAL) \
37 ((((VAL) & 0xffffLL) == (VAL)) ? \
38 (VAL) : \
39 ((((int32_t)(VAL)) < 0) ? 0 : 0xffff))
40
test_vasrvuhubrndsat(void)41 static void test_vasrvuhubrndsat(void)
42 {
43 void *p0 = buffer0;
44 void *p1 = buffer1;
45 void *pout = output;
46
47 memset(expect, 0xaa, sizeof(expect));
48 memset(output, 0xbb, sizeof(output));
49
50 for (int i = 0; i < BUFSIZE / 2; i++) {
51 asm("v4 = vmem(%0 + #0)\n\t"
52 "v5 = vmem(%0 + #1)\n\t"
53 "v6 = vmem(%1 + #0)\n\t"
54 "v5.ub = vasr(v5:4.uh, v6.ub):rnd:sat\n\t"
55 "vmem(%2) = v5\n\t"
56 : : "r"(p0), "r"(p1), "r"(pout)
57 : "v4", "v5", "v6", "memory");
58 p0 += sizeof(MMVector) * 2;
59 p1 += sizeof(MMVector);
60 pout += sizeof(MMVector);
61
62 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
63 int shamt;
64 uint8_t byte0;
65 uint8_t byte1;
66
67 shamt = buffer1[i].ub[2 * j + 0] & 0x7;
68 byte0 = fVSATUB(fVROUND(buffer0[2 * i + 0].uh[j], shamt) >> shamt);
69 shamt = buffer1[i].ub[2 * j + 1] & 0x7;
70 byte1 = fVSATUB(fVROUND(buffer0[2 * i + 1].uh[j], shamt) >> shamt);
71 expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff);
72 }
73 }
74
75 check_output_h(__LINE__, BUFSIZE / 2);
76 }
77
test_vasrvuhubsat(void)78 static void test_vasrvuhubsat(void)
79 {
80 void *p0 = buffer0;
81 void *p1 = buffer1;
82 void *pout = output;
83
84 memset(expect, 0xaa, sizeof(expect));
85 memset(output, 0xbb, sizeof(output));
86
87 for (int i = 0; i < BUFSIZE / 2; i++) {
88 asm("v4 = vmem(%0 + #0)\n\t"
89 "v5 = vmem(%0 + #1)\n\t"
90 "v6 = vmem(%1 + #0)\n\t"
91 "v5.ub = vasr(v5:4.uh, v6.ub):sat\n\t"
92 "vmem(%2) = v5\n\t"
93 : : "r"(p0), "r"(p1), "r"(pout)
94 : "v4", "v5", "v6", "memory");
95 p0 += sizeof(MMVector) * 2;
96 p1 += sizeof(MMVector);
97 pout += sizeof(MMVector);
98
99 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
100 int shamt;
101 uint8_t byte0;
102 uint8_t byte1;
103
104 shamt = buffer1[i].ub[2 * j + 0] & 0x7;
105 byte0 = fVSATUB(buffer0[2 * i + 0].uh[j] >> shamt);
106 shamt = buffer1[i].ub[2 * j + 1] & 0x7;
107 byte1 = fVSATUB(buffer0[2 * i + 1].uh[j] >> shamt);
108 expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff);
109 }
110 }
111
112 check_output_h(__LINE__, BUFSIZE / 2);
113 }
114
test_vasrvwuhrndsat(void)115 static void test_vasrvwuhrndsat(void)
116 {
117 void *p0 = buffer0;
118 void *p1 = buffer1;
119 void *pout = output;
120
121 memset(expect, 0xaa, sizeof(expect));
122 memset(output, 0xbb, sizeof(output));
123
124 for (int i = 0; i < BUFSIZE / 2; i++) {
125 asm("v4 = vmem(%0 + #0)\n\t"
126 "v5 = vmem(%0 + #1)\n\t"
127 "v6 = vmem(%1 + #0)\n\t"
128 "v5.uh = vasr(v5:4.w, v6.uh):rnd:sat\n\t"
129 "vmem(%2) = v5\n\t"
130 : : "r"(p0), "r"(p1), "r"(pout)
131 : "v4", "v5", "v6", "memory");
132 p0 += sizeof(MMVector) * 2;
133 p1 += sizeof(MMVector);
134 pout += sizeof(MMVector);
135
136 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
137 int shamt;
138 uint16_t half0;
139 uint16_t half1;
140
141 shamt = buffer1[i].uh[2 * j + 0] & 0xf;
142 half0 = fVSATUH(fVROUND(buffer0[2 * i + 0].w[j], shamt) >> shamt);
143 shamt = buffer1[i].uh[2 * j + 1] & 0xf;
144 half1 = fVSATUH(fVROUND(buffer0[2 * i + 1].w[j], shamt) >> shamt);
145 expect[i].w[j] = (half1 << 16) | (half0 & 0xffff);
146 }
147 }
148
149 check_output_w(__LINE__, BUFSIZE / 2);
150 }
151
test_vasrvwuhsat(void)152 static void test_vasrvwuhsat(void)
153 {
154 void *p0 = buffer0;
155 void *p1 = buffer1;
156 void *pout = output;
157
158 memset(expect, 0xaa, sizeof(expect));
159 memset(output, 0xbb, sizeof(output));
160
161 for (int i = 0; i < BUFSIZE / 2; i++) {
162 asm("v4 = vmem(%0 + #0)\n\t"
163 "v5 = vmem(%0 + #1)\n\t"
164 "v6 = vmem(%1 + #0)\n\t"
165 "v5.uh = vasr(v5:4.w, v6.uh):sat\n\t"
166 "vmem(%2) = v5\n\t"
167 : : "r"(p0), "r"(p1), "r"(pout)
168 : "v4", "v5", "v6", "memory");
169 p0 += sizeof(MMVector) * 2;
170 p1 += sizeof(MMVector);
171 pout += sizeof(MMVector);
172
173 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
174 int shamt;
175 uint16_t half0;
176 uint16_t half1;
177
178 shamt = buffer1[i].uh[2 * j + 0] & 0xf;
179 half0 = fVSATUH(buffer0[2 * i + 0].w[j] >> shamt);
180 shamt = buffer1[i].uh[2 * j + 1] & 0xf;
181 half1 = fVSATUH(buffer0[2 * i + 1].w[j] >> shamt);
182 expect[i].w[j] = (half1 << 16) | (half0 & 0xffff);
183 }
184 }
185
186 check_output_w(__LINE__, BUFSIZE / 2);
187 }
188
test_vassign_tmp(void)189 static void test_vassign_tmp(void)
190 {
191 void *p0 = buffer0;
192 void *pout = output;
193
194 memset(expect, 0xaa, sizeof(expect));
195 memset(output, 0xbb, sizeof(output));
196
197 for (int i = 0; i < BUFSIZE; i++) {
198 /*
199 * Assign into v12 as .tmp, then use it in the next packet
200 * Should get the new value within the same packet and
201 * the old value in the next packet
202 */
203 asm("v3 = vmem(%0 + #0)\n\t"
204 "r1 = #1\n\t"
205 "v12 = vsplat(r1)\n\t"
206 "r1 = #2\n\t"
207 "v13 = vsplat(r1)\n\t"
208 "{\n\t"
209 " v12.tmp = v13\n\t"
210 " v4.w = vadd(v12.w, v3.w)\n\t"
211 "}\n\t"
212 "v4.w = vadd(v4.w, v12.w)\n\t"
213 "vmem(%1 + #0) = v4\n\t"
214 : : "r"(p0), "r"(pout)
215 : "r1", "v3", "v4", "v12", "v13", "memory");
216 p0 += sizeof(MMVector);
217 pout += sizeof(MMVector);
218
219 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
220 expect[i].w[j] = buffer0[i].w[j] + 3;
221 }
222 }
223
224 check_output_w(__LINE__, BUFSIZE);
225 }
226
test_vcombine_tmp(void)227 static void test_vcombine_tmp(void)
228 {
229 void *p0 = buffer0;
230 void *p1 = buffer1;
231 void *pout = output;
232
233 memset(expect, 0xaa, sizeof(expect));
234 memset(output, 0xbb, sizeof(output));
235
236 for (int i = 0; i < BUFSIZE; i++) {
237 /*
238 * Combine into v13:12 as .tmp, then use it in the next packet
239 * Should get the new value within the same packet and
240 * the old value in the next packet
241 */
242 asm("v3 = vmem(%0 + #0)\n\t"
243 "r1 = #1\n\t"
244 "v12 = vsplat(r1)\n\t"
245 "r1 = #2\n\t"
246 "v13 = vsplat(r1)\n\t"
247 "r1 = #3\n\t"
248 "v14 = vsplat(r1)\n\t"
249 "r1 = #4\n\t"
250 "v15 = vsplat(r1)\n\t"
251 "{\n\t"
252 " v13:12.tmp = vcombine(v15, v14)\n\t"
253 " v4.w = vadd(v12.w, v3.w)\n\t"
254 " v16 = v13\n\t"
255 "}\n\t"
256 "v4.w = vadd(v4.w, v12.w)\n\t"
257 "v4.w = vadd(v4.w, v13.w)\n\t"
258 "v4.w = vadd(v4.w, v16.w)\n\t"
259 "vmem(%2 + #0) = v4\n\t"
260 : : "r"(p0), "r"(p1), "r"(pout)
261 : "r1", "v3", "v4", "v12", "v13", "v14", "v15", "v16", "memory");
262 p0 += sizeof(MMVector);
263 p1 += sizeof(MMVector);
264 pout += sizeof(MMVector);
265
266 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
267 expect[i].w[j] = buffer0[i].w[j] + 10;
268 }
269 }
270
271 check_output_w(__LINE__, BUFSIZE);
272 }
273
test_vmpyuhvs(void)274 static void test_vmpyuhvs(void)
275 {
276 void *p0 = buffer0;
277 void *p1 = buffer1;
278 void *pout = output;
279
280 memset(expect, 0xaa, sizeof(expect));
281 memset(output, 0xbb, sizeof(output));
282
283 for (int i = 0; i < BUFSIZE; i++) {
284 asm("v4 = vmem(%0 + #0)\n\t"
285 "v5 = vmem(%1 + #0)\n\t"
286 "v4.uh = vmpy(V4.uh, v5.uh):>>16\n\t"
287 "vmem(%2) = v4\n\t"
288 : : "r"(p0), "r"(p1), "r"(pout)
289 : "v4", "v5", "memory");
290 p0 += sizeof(MMVector);
291 p1 += sizeof(MMVector);
292 pout += sizeof(MMVector);
293
294 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
295 expect[i].uh[j] = (buffer0[i].uh[j] * buffer1[i].uh[j]) >> 16;
296 }
297 }
298
299 check_output_h(__LINE__, BUFSIZE);
300 }
301
main()302 int main()
303 {
304 init_buffers();
305
306 test_vasrvuhubrndsat();
307 test_vasrvuhubsat();
308 test_vasrvwuhrndsat();
309 test_vasrvwuhsat();
310
311 test_vassign_tmp();
312 test_vcombine_tmp();
313
314 test_vmpyuhvs();
315
316 puts(err ? "FAIL" : "PASS");
317 return err ? 1 : 0;
318 }
319