1 /*
2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <stdio.h>
19 #include <stdint.h>
20 #include <stdbool.h>
21
22 int err;
23
24 #include "hex_test.h"
25
26 /*
27 * Make sure that the :mem_noshuf packet attribute is honored.
28 * This is important when the addresses overlap.
29 * The store instruction in slot 1 effectively executes first,
30 * followed by the load instruction in slot 0.
31 */
32
33 #define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
34 static inline uint32_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
35 { \
36 uint32_t ret; \
37 asm volatile("{\n\t" \
38 " " #ST_OP "(%1) = %3\n\t" \
39 " %0 = " #LD_OP "(%2)\n\t" \
40 "}:mem_noshuf\n" \
41 : "=r"(ret) \
42 : "r"(p), "r"(q), "r"(x) \
43 : "memory"); \
44 return ret; \
45 }
46
47 #define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
48 static inline uint64_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
49 { \
50 uint64_t ret; \
51 asm volatile("{\n\t" \
52 " " #ST_OP "(%1) = %3\n\t" \
53 " %0 = " #LD_OP "(%2)\n\t" \
54 "}:mem_noshuf\n" \
55 : "=r"(ret) \
56 : "r"(p), "r"(q), "r"(x) \
57 : "memory"); \
58 return ret; \
59 }
60
61 /* Store byte combinations */
MEM_NOSHUF32(mem_noshuf_sb_lb,int8_t,int8_t,memb,memb)62 MEM_NOSHUF32(mem_noshuf_sb_lb, int8_t, int8_t, memb, memb)
63 MEM_NOSHUF32(mem_noshuf_sb_lub, int8_t, uint8_t, memb, memub)
64 MEM_NOSHUF32(mem_noshuf_sb_lh, int8_t, int16_t, memb, memh)
65 MEM_NOSHUF32(mem_noshuf_sb_luh, int8_t, uint16_t, memb, memuh)
66 MEM_NOSHUF32(mem_noshuf_sb_lw, int8_t, int32_t, memb, memw)
67 MEM_NOSHUF64(mem_noshuf_sb_ld, int8_t, int64_t, memb, memd)
68
69 /* Store half combinations */
70 MEM_NOSHUF32(mem_noshuf_sh_lb, int16_t, int8_t, memh, memb)
71 MEM_NOSHUF32(mem_noshuf_sh_lub, int16_t, uint8_t, memh, memub)
72 MEM_NOSHUF32(mem_noshuf_sh_lh, int16_t, int16_t, memh, memh)
73 MEM_NOSHUF32(mem_noshuf_sh_luh, int16_t, uint16_t, memh, memuh)
74 MEM_NOSHUF32(mem_noshuf_sh_lw, int16_t, int32_t, memh, memw)
75 MEM_NOSHUF64(mem_noshuf_sh_ld, int16_t, int64_t, memh, memd)
76
77 /* Store word combinations */
78 MEM_NOSHUF32(mem_noshuf_sw_lb, int32_t, int8_t, memw, memb)
79 MEM_NOSHUF32(mem_noshuf_sw_lub, int32_t, uint8_t, memw, memub)
80 MEM_NOSHUF32(mem_noshuf_sw_lh, int32_t, int16_t, memw, memh)
81 MEM_NOSHUF32(mem_noshuf_sw_luh, int32_t, uint16_t, memw, memuh)
82 MEM_NOSHUF32(mem_noshuf_sw_lw, int32_t, int32_t, memw, memw)
83 MEM_NOSHUF64(mem_noshuf_sw_ld, int32_t, int64_t, memw, memd)
84
85 /* Store double combinations */
86 MEM_NOSHUF32(mem_noshuf_sd_lb, int64_t, int8_t, memd, memb)
87 MEM_NOSHUF32(mem_noshuf_sd_lub, int64_t, uint8_t, memd, memub)
88 MEM_NOSHUF32(mem_noshuf_sd_lh, int64_t, int16_t, memd, memh)
89 MEM_NOSHUF32(mem_noshuf_sd_luh, int64_t, uint16_t, memd, memuh)
90 MEM_NOSHUF32(mem_noshuf_sd_lw, int64_t, int32_t, memd, memw)
91 MEM_NOSHUF64(mem_noshuf_sd_ld, int64_t, int64_t, memd, memd)
92
93 static inline int pred_lw_sw(bool pred, int32_t *p, int32_t *q,
94 int32_t x, int32_t y)
95 {
96 int ret;
97 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
98 "%0 = %3\n\t"
99 "{\n\t"
100 " memw(%1) = %4\n\t"
101 " if (!p0) %0 = memw(%2)\n\t"
102 "}:mem_noshuf\n"
103 : "=&r"(ret)
104 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
105 : "p0", "memory");
106 return ret;
107 }
108
pred_lw_sw_pi(bool pred,int32_t * p,int32_t * q,int32_t x,int32_t y)109 static inline int pred_lw_sw_pi(bool pred, int32_t *p, int32_t *q,
110 int32_t x, int32_t y)
111 {
112 int ret;
113 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
114 "%0 = %3\n\t"
115 "r7 = %2\n\t"
116 "{\n\t"
117 " memw(%1) = %4\n\t"
118 " if (!p0) %0 = memw(r7++#4)\n\t"
119 "}:mem_noshuf\n"
120 : "=&r"(ret)
121 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
122 : "r7", "p0", "memory");
123 return ret;
124 }
125
pred_ld_sd(bool pred,int64_t * p,int64_t * q,int64_t x,int64_t y)126 static inline int64_t pred_ld_sd(bool pred, int64_t *p, int64_t *q,
127 int64_t x, int64_t y)
128 {
129 int64_t ret;
130 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
131 "%0 = %3\n\t"
132 "{\n\t"
133 " memd(%1) = %4\n\t"
134 " if (!p0) %0 = memd(%2)\n\t"
135 "}:mem_noshuf\n"
136 : "=&r"(ret)
137 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
138 : "p0", "memory");
139 return ret;
140 }
141
pred_ld_sd_pi(bool pred,int64_t * p,int64_t * q,int64_t x,int64_t y)142 static inline int64_t pred_ld_sd_pi(bool pred, int64_t *p, int64_t *q,
143 int64_t x, int64_t y)
144 {
145 int64_t ret;
146 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
147 "%0 = %3\n\t"
148 "r7 = %2\n\t"
149 "{\n\t"
150 " memd(%1) = %4\n\t"
151 " if (!p0) %0 = memd(r7++#8)\n\t"
152 "}:mem_noshuf\n"
153 : "=&r"(ret)
154 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
155 : "r7", "p0", "memory");
156 return ret;
157 }
158
cancel_sw_lb(bool pred,int32_t * p,int8_t * q,int32_t x)159 static inline int32_t cancel_sw_lb(bool pred, int32_t *p, int8_t *q, int32_t x)
160 {
161 int32_t ret;
162 asm volatile("p0 = cmp.eq(%4, #0)\n\t"
163 "{\n\t"
164 " if (!p0) memw(%1) = %3\n\t"
165 " %0 = memb(%2)\n\t"
166 "}:mem_noshuf\n"
167 : "=r"(ret)
168 : "r"(p), "r"(q), "r"(x), "r"(pred)
169 : "p0", "memory");
170 return ret;
171 }
172
cancel_sw_ld(bool pred,int32_t * p,int64_t * q,int32_t x)173 static inline int64_t cancel_sw_ld(bool pred, int32_t *p, int64_t *q, int32_t x)
174 {
175 int64_t ret;
176 asm volatile("p0 = cmp.eq(%4, #0)\n\t"
177 "{\n\t"
178 " if (!p0) memw(%1) = %3\n\t"
179 " %0 = memd(%2)\n\t"
180 "}:mem_noshuf\n"
181 : "=r"(ret)
182 : "r"(p), "r"(q), "r"(x), "r"(pred)
183 : "p0", "memory");
184 return ret;
185 }
186
187 typedef union {
188 int64_t d[2];
189 uint64_t ud[2];
190 int32_t w[4];
191 uint32_t uw[4];
192 int16_t h[8];
193 uint16_t uh[8];
194 int8_t b[16];
195 uint8_t ub[16];
196 } Memory;
197
main()198 int main()
199 {
200 Memory n;
201 uint32_t res32;
202 uint64_t res64;
203
204 /*
205 * Store byte combinations
206 */
207 n.w[0] = ~0;
208 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87);
209 check32(res32, 0xffffff87);
210
211 n.w[0] = ~0;
212 res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87);
213 check32(res32, 0x00000087);
214
215 n.w[0] = ~0;
216 res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87);
217 check32(res32, 0xffffff87);
218
219 n.w[0] = ~0;
220 res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87);
221 check32(res32, 0x0000ff87);
222
223 n.w[0] = ~0;
224 res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87);
225 check32(res32, 0xffffff87);
226
227 n.d[0] = ~0LL;
228 res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87);
229 check64(res64, 0xffffffffffffff87LL);
230
231 /*
232 * Store half combinations
233 */
234 n.w[0] = ~0;
235 res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787);
236 check32(res32, 0xffffff87);
237
238 n.w[0] = ~0;
239 res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87);
240 check32(res32, 0x0000008f);
241
242 n.w[0] = ~0;
243 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87);
244 check32(res32, 0xffff8a87);
245
246 n.w[0] = ~0;
247 res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87);
248 check32(res32, 0x8a87);
249
250 n.w[0] = ~0;
251 res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87);
252 check32(res32, 0x8a87ffff);
253
254 n.w[0] = ~0;
255 res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87);
256 check64(res64, 0xffffffff8a87ffffLL);
257
258 /*
259 * Store word combinations
260 */
261 n.w[0] = ~0;
262 res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687);
263 check32(res32, 0xffffff87);
264
265 n.w[0] = ~0;
266 res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687);
267 check32(res32, 0x00000087);
268
269 n.w[0] = ~0;
270 res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678);
271 check32(res32, 0xfffff678);
272
273 n.w[0] = ~0;
274 res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678);
275 check32(res32, 0x00005678);
276
277 n.w[0] = ~0;
278 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678);
279 check32(res32, 0x12345678);
280
281 n.d[0] = ~0LL;
282 res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678);
283 check64(res64, 0xffffffff12345678LL);
284
285 /*
286 * Store double combinations
287 */
288 n.d[0] = ~0LL;
289 res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0);
290 check32(res32, 0xffffffde);
291
292 n.d[0] = ~0LL;
293 res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0);
294 check32(res32, 0x000000de);
295
296 n.d[0] = ~0LL;
297 res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0);
298 check32(res32, 0xffff9abc);
299
300 n.d[0] = ~0LL;
301 res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0);
302 check32(res32, 0x00009abc);
303
304 n.d[0] = ~0LL;
305 res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0);
306 check32(res32, 0x12345678);
307
308 n.d[0] = ~0LL;
309 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0);
310 check64(res64, 0x123456789abcdef0LL);
311
312 /*
313 * Predicated word stores
314 */
315 n.w[0] = ~0;
316 res32 = cancel_sw_lb(false, &n.w[0], &n.b[0], 0x12345678);
317 check32(res32, 0xffffffff);
318
319 n.w[0] = ~0;
320 res32 = cancel_sw_lb(true, &n.w[0], &n.b[0], 0x12345687);
321 check32(res32, 0xffffff87);
322
323 /*
324 * Predicated double stores
325 */
326 n.d[0] = ~0LL;
327 res64 = cancel_sw_ld(false, &n.w[0], &n.d[0], 0x12345678);
328 check64(res64, 0xffffffffffffffffLL);
329
330 n.d[0] = ~0LL;
331 res64 = cancel_sw_ld(true, &n.w[0], &n.d[0], 0x12345678);
332 check64(res64, 0xffffffff12345678LL);
333
334 n.d[0] = ~0LL;
335 res64 = cancel_sw_ld(false, &n.w[1], &n.d[0], 0x12345678);
336 check64(res64, 0xffffffffffffffffLL);
337
338 n.d[0] = ~0LL;
339 res64 = cancel_sw_ld(true, &n.w[1], &n.d[0], 0x12345678);
340 check64(res64, 0x12345678ffffffffLL);
341
342 /*
343 * No overlap tests
344 */
345 n.w[0] = ~0;
346 res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87);
347 check32(res32, 0xffffffff);
348
349 n.w[0] = ~0;
350 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87);
351 check32(res32, 0xffffffff);
352
353 n.w[0] = ~0;
354 res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787);
355 check32(res32, 0xffffffff);
356
357 n.w[0] = ~0;
358 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787);
359 check32(res32, 0xffffffff);
360
361 n.d[0] = ~0LL;
362 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678);
363 check32(res32, 0xffffffff);
364
365 n.d[0] = ~0LL;
366 res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678);
367 check32(res32, 0xffffffff);
368
369 n.d[0] = ~0LL;
370 n.d[1] = ~0LL;
371 res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL);
372 check64(res64, 0xffffffffffffffffLL);
373
374 n.d[0] = ~0LL;
375 n.d[1] = ~0LL;
376 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL);
377 check64(res64, 0xffffffffffffffffLL);
378
379 n.w[0] = ~0;
380 res32 = pred_lw_sw(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
381 check32(res32, 0x12345678);
382 check32(n.w[0], 0xc0ffeeda);
383
384 n.w[0] = ~0;
385 res32 = pred_lw_sw(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
386 check32(res32, 0xc0ffeeda);
387 check32(n.w[0], 0xc0ffeeda);
388
389 n.w[0] = ~0;
390 res32 = pred_lw_sw_pi(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
391 check32(res32, 0x12345678);
392 check32(n.w[0], 0xc0ffeeda);
393
394 n.w[0] = ~0;
395 res32 = pred_lw_sw_pi(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
396 check32(res32, 0xc0ffeeda);
397 check32(n.w[0], 0xc0ffeeda);
398
399 n.d[0] = ~0LL;
400 res64 = pred_ld_sd(false, &n.d[0], &n.d[0],
401 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
402 check64(res64, 0x1234567812345678LL);
403 check64(n.d[0], 0xc0ffeedac0ffeedaLL);
404
405 n.d[0] = ~0LL;
406 res64 = pred_ld_sd(true, &n.d[0], &n.d[0],
407 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
408 check64(res64, 0xc0ffeedac0ffeedaLL);
409 check64(n.d[0], 0xc0ffeedac0ffeedaLL);
410
411 n.d[0] = ~0LL;
412 res64 = pred_ld_sd_pi(false, &n.d[0], &n.d[0],
413 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
414 check64(res64, 0x1234567812345678LL);
415 check64(n.d[0], 0xc0ffeedac0ffeedaLL);
416
417 n.d[0] = ~0LL;
418 res64 = pred_ld_sd_pi(true, &n.d[0], &n.d[0],
419 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
420 check64(res64, 0xc0ffeedac0ffeedaLL);
421 check64(n.d[0], 0xc0ffeedac0ffeedaLL);
422
423 puts(err ? "FAIL" : "PASS");
424 return err;
425 }
426