xref: /openbmc/qemu/tests/tcg/hexagon/mem_noshuf.c (revision f0984d40)
1 /*
2  *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <stdio.h>
19 
20 /*
21  *  Make sure that the :mem_noshuf packet attribute is honored.
22  *  This is important when the addresses overlap.
23  *  The store instruction in slot 1 effectively executes first,
24  *  followed by the load instruction in slot 0.
25  */
26 
27 #define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
28 static inline unsigned int NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
29 { \
30     unsigned int ret; \
31     asm volatile("{\n\t" \
32                  "    " #ST_OP "(%1) = %3\n\t" \
33                  "    %0 = " #LD_OP "(%2)\n\t" \
34                  "}:mem_noshuf\n" \
35                  : "=r"(ret) \
36                  : "r"(p), "r"(q), "r"(x) \
37                  : "memory"); \
38     return ret; \
39 }
40 
41 #define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
42 static inline unsigned long long NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
43 { \
44     unsigned long long ret; \
45     asm volatile("{\n\t" \
46                  "    " #ST_OP "(%1) = %3\n\t" \
47                  "    %0 = " #LD_OP "(%2)\n\t" \
48                  "}:mem_noshuf\n" \
49                  : "=r"(ret) \
50                  : "r"(p), "r"(q), "r"(x) \
51                  : "memory"); \
52     return ret; \
53 }
54 
55 /* Store byte combinations */
56 MEM_NOSHUF32(mem_noshuf_sb_lb,  signed char,  signed char,      memb, memb)
57 MEM_NOSHUF32(mem_noshuf_sb_lub, signed char,  unsigned char,    memb, memub)
58 MEM_NOSHUF32(mem_noshuf_sb_lh,  signed char,  signed short,     memb, memh)
59 MEM_NOSHUF32(mem_noshuf_sb_luh, signed char,  unsigned short,   memb, memuh)
60 MEM_NOSHUF32(mem_noshuf_sb_lw,  signed char,  signed int,       memb, memw)
61 MEM_NOSHUF64(mem_noshuf_sb_ld,  signed char,  signed long long, memb, memd)
62 
63 /* Store half combinations */
64 MEM_NOSHUF32(mem_noshuf_sh_lb,  signed short, signed char,      memh, memb)
65 MEM_NOSHUF32(mem_noshuf_sh_lub, signed short, unsigned char,    memh, memub)
66 MEM_NOSHUF32(mem_noshuf_sh_lh,  signed short, signed short,     memh, memh)
67 MEM_NOSHUF32(mem_noshuf_sh_luh, signed short, unsigned short,   memh, memuh)
68 MEM_NOSHUF32(mem_noshuf_sh_lw,  signed short, signed int,       memh, memw)
69 MEM_NOSHUF64(mem_noshuf_sh_ld,  signed short, signed long long, memh, memd)
70 
71 /* Store word combinations */
72 MEM_NOSHUF32(mem_noshuf_sw_lb,  signed int,   signed char,      memw, memb)
73 MEM_NOSHUF32(mem_noshuf_sw_lub, signed int,   unsigned char,    memw, memub)
74 MEM_NOSHUF32(mem_noshuf_sw_lh,  signed int,   signed short,     memw, memh)
75 MEM_NOSHUF32(mem_noshuf_sw_luh, signed int,   unsigned short,   memw, memuh)
76 MEM_NOSHUF32(mem_noshuf_sw_lw,  signed int,   signed int,       memw, memw)
77 MEM_NOSHUF64(mem_noshuf_sw_ld,  signed int,   signed long long, memw, memd)
78 
79 /* Store double combinations */
80 MEM_NOSHUF32(mem_noshuf_sd_lb,  long long,    signed char,      memd, memb)
81 MEM_NOSHUF32(mem_noshuf_sd_lub, long long,    unsigned char,    memd, memub)
82 MEM_NOSHUF32(mem_noshuf_sd_lh,  long long,    signed short,     memd, memh)
83 MEM_NOSHUF32(mem_noshuf_sd_luh, long long,    unsigned short,   memd, memuh)
84 MEM_NOSHUF32(mem_noshuf_sd_lw,  long long,    signed int,       memd, memw)
85 MEM_NOSHUF64(mem_noshuf_sd_ld,  long long,    signed long long, memd, memd)
86 
87 static inline int pred_lw_sw(int pred, int *p, int *q, int x, int y)
88 {
89     int ret;
90     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
91                  "%0 = %3\n\t"
92                  "{\n\t"
93                  "    memw(%1) = %4\n\t"
94                  "    if (!p0) %0 = memw(%2)\n\t"
95                  "}:mem_noshuf\n"
96                  : "=&r"(ret)
97                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
98                  : "p0", "memory");
99     return ret;
100 }
101 
102 static inline int pred_lw_sw_pi(int pred, int *p, int *q, int x, int y)
103 {
104     int ret;
105     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
106                  "%0 = %3\n\t"
107                  "r7 = %2\n\t"
108                  "{\n\t"
109                  "    memw(%1) = %4\n\t"
110                  "    if (!p0) %0 = memw(r7++#4)\n\t"
111                  "}:mem_noshuf\n"
112                  : "=&r"(ret)
113                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
114                  : "r7", "p0", "memory");
115     return ret;
116 }
117 
118 static inline long long pred_ld_sd(int pred, long long *p, long long *q,
119                                    long long x, long long y)
120 {
121     unsigned long long ret;
122     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
123                  "%0 = %3\n\t"
124                  "{\n\t"
125                  "    memd(%1) = %4\n\t"
126                  "    if (!p0) %0 = memd(%2)\n\t"
127                  "}:mem_noshuf\n"
128                  : "=&r"(ret)
129                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
130                  : "p0", "memory");
131     return ret;
132 }
133 
134 static inline long long pred_ld_sd_pi(int pred, long long *p, long long *q,
135                                       long long x, long long y)
136 {
137     long long ret;
138     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
139                  "%0 = %3\n\t"
140                  "r7 = %2\n\t"
141                  "{\n\t"
142                  "    memd(%1) = %4\n\t"
143                  "    if (!p0) %0 = memd(r7++#8)\n\t"
144                  "}:mem_noshuf\n"
145                  : "=&r"(ret)
146                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
147                  : "r7", "p0", "memory");
148     return ret;
149 }
150 
151 static inline unsigned int cancel_sw_lb(int pred, int *p, signed char *q, int x)
152 {
153     unsigned int ret;
154     asm volatile("p0 = cmp.eq(%4, #0)\n\t"
155                  "{\n\t"
156                  "    if (!p0) memw(%1) = %3\n\t"
157                  "    %0 = memb(%2)\n\t"
158                  "}:mem_noshuf\n"
159                  : "=r"(ret)
160                  : "r"(p), "r"(q), "r"(x), "r"(pred)
161                  : "p0", "memory");
162     return ret;
163 }
164 
165 static inline
166 unsigned long long cancel_sw_ld(int pred, int *p, long long *q, int x)
167 {
168     long long ret;
169     asm volatile("p0 = cmp.eq(%4, #0)\n\t"
170                  "{\n\t"
171                  "    if (!p0) memw(%1) = %3\n\t"
172                  "    %0 = memd(%2)\n\t"
173                  "}:mem_noshuf\n"
174                  : "=r"(ret)
175                  : "r"(p), "r"(q), "r"(x), "r"(pred)
176                  : "p0", "memory");
177     return ret;
178 }
179 
180 typedef union {
181     signed long long d[2];
182     unsigned long long ud[2];
183     signed int w[4];
184     unsigned int uw[4];
185     signed short h[8];
186     unsigned short uh[8];
187     signed char b[16];
188     unsigned char ub[16];
189 } Memory;
190 
191 int err;
192 
193 #define check32(n, expect) check32_(n, expect, __LINE__)
194 
195 static void check32_(int n, int expect, int line)
196 {
197     if (n != expect) {
198         printf("ERROR: 0x%08x != 0x%08x, line %d\n", n, expect, line);
199         err++;
200     }
201 }
202 
203 #define check64(n, expect) check64_(n, expect, __LINE__)
204 
205 static void check64_(long long n, long long expect, int line)
206 {
207     if (n != expect) {
208         printf("ERROR: 0x%08llx != 0x%08llx, line %d\n", n, expect, line);
209         err++;
210     }
211 }
212 
213 int main()
214 {
215     Memory n;
216     unsigned int res32;
217     unsigned long long res64;
218 
219     /*
220      * Store byte combinations
221      */
222     n.w[0] = ~0;
223     res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87);
224     check32(res32, 0xffffff87);
225 
226     n.w[0] = ~0;
227     res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87);
228     check32(res32, 0x00000087);
229 
230     n.w[0] = ~0;
231     res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87);
232     check32(res32, 0xffffff87);
233 
234     n.w[0] = ~0;
235     res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87);
236     check32(res32, 0x0000ff87);
237 
238     n.w[0] = ~0;
239     res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87);
240     check32(res32, 0xffffff87);
241 
242     n.d[0] = ~0LL;
243     res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87);
244     check64(res64, 0xffffffffffffff87LL);
245 
246     /*
247      * Store half combinations
248      */
249     n.w[0] = ~0;
250     res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787);
251     check32(res32, 0xffffff87);
252 
253     n.w[0] = ~0;
254     res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87);
255     check32(res32, 0x0000008f);
256 
257     n.w[0] = ~0;
258     res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87);
259     check32(res32, 0xffff8a87);
260 
261     n.w[0] = ~0;
262     res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87);
263     check32(res32, 0x8a87);
264 
265     n.w[0] = ~0;
266     res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87);
267     check32(res32, 0x8a87ffff);
268 
269     n.w[0] = ~0;
270     res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87);
271     check64(res64, 0xffffffff8a87ffffLL);
272 
273     /*
274      * Store word combinations
275      */
276     n.w[0] = ~0;
277     res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687);
278     check32(res32, 0xffffff87);
279 
280     n.w[0] = ~0;
281     res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687);
282     check32(res32, 0x00000087);
283 
284     n.w[0] = ~0;
285     res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678);
286     check32(res32, 0xfffff678);
287 
288     n.w[0] = ~0;
289     res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678);
290     check32(res32, 0x00005678);
291 
292     n.w[0] = ~0;
293     res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678);
294     check32(res32, 0x12345678);
295 
296     n.d[0] = ~0LL;
297     res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678);
298     check64(res64, 0xffffffff12345678LL);
299 
300     /*
301      * Store double combinations
302      */
303     n.d[0] = ~0LL;
304     res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0);
305     check32(res32, 0xffffffde);
306 
307     n.d[0] = ~0LL;
308     res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0);
309     check32(res32, 0x000000de);
310 
311     n.d[0] = ~0LL;
312     res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0);
313     check32(res32, 0xffff9abc);
314 
315     n.d[0] = ~0LL;
316     res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0);
317     check32(res32, 0x00009abc);
318 
319     n.d[0] = ~0LL;
320     res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0);
321     check32(res32, 0x12345678);
322 
323     n.d[0] = ~0LL;
324     res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0);
325     check64(res64, 0x123456789abcdef0LL);
326 
327     /*
328      * Predicated word stores
329      */
330     n.w[0] = ~0;
331     res32 = cancel_sw_lb(0, &n.w[0], &n.b[0], 0x12345678);
332     check32(res32, 0xffffffff);
333 
334     n.w[0] = ~0;
335     res32 = cancel_sw_lb(1, &n.w[0], &n.b[0], 0x12345687);
336     check32(res32, 0xffffff87);
337 
338     /*
339      * Predicated double stores
340      */
341     n.d[0] = ~0LL;
342     res64 = cancel_sw_ld(0, &n.w[0], &n.d[0], 0x12345678);
343     check64(res64, 0xffffffffffffffffLL);
344 
345     n.d[0] = ~0LL;
346     res64 = cancel_sw_ld(1, &n.w[0], &n.d[0], 0x12345678);
347     check64(res64, 0xffffffff12345678LL);
348 
349     n.d[0] = ~0LL;
350     res64 = cancel_sw_ld(0, &n.w[1], &n.d[0], 0x12345678);
351     check64(res64, 0xffffffffffffffffLL);
352 
353     n.d[0] = ~0LL;
354     res64 = cancel_sw_ld(1, &n.w[1], &n.d[0], 0x12345678);
355     check64(res64, 0x12345678ffffffffLL);
356 
357     /*
358      * No overlap tests
359      */
360     n.w[0] = ~0;
361     res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87);
362     check32(res32, 0xffffffff);
363 
364     n.w[0] = ~0;
365     res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87);
366     check32(res32, 0xffffffff);
367 
368     n.w[0] = ~0;
369     res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787);
370     check32(res32, 0xffffffff);
371 
372     n.w[0] = ~0;
373     res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787);
374     check32(res32, 0xffffffff);
375 
376     n.d[0] = ~0LL;
377     res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678);
378     check32(res32, 0xffffffff);
379 
380     n.d[0] = ~0LL;
381     res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678);
382     check32(res32, 0xffffffff);
383 
384     n.d[0] = ~0LL;
385     n.d[1] = ~0LL;
386     res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL);
387     check64(res64, 0xffffffffffffffffLL);
388 
389     n.d[0] = ~0LL;
390     n.d[1] = ~0LL;
391     res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL);
392     check64(res64, 0xffffffffffffffffLL);
393 
394     n.w[0] = ~0;
395     res32 = pred_lw_sw(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
396     check32(res32, 0x12345678);
397     check32(n.w[0], 0xc0ffeeda);
398 
399     n.w[0] = ~0;
400     res32 = pred_lw_sw(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
401     check32(res32, 0xc0ffeeda);
402     check32(n.w[0], 0xc0ffeeda);
403 
404     n.w[0] = ~0;
405     res32 = pred_lw_sw_pi(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
406     check32(res32, 0x12345678);
407     check32(n.w[0], 0xc0ffeeda);
408 
409     n.w[0] = ~0;
410     res32 = pred_lw_sw_pi(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
411     check32(res32, 0xc0ffeeda);
412     check32(n.w[0], 0xc0ffeeda);
413 
414     n.d[0] = ~0LL;
415     res64 = pred_ld_sd(0, &n.d[0], &n.d[0],
416                        0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
417     check64(res64, 0x1234567812345678LL);
418     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
419 
420     n.d[0] = ~0LL;
421     res64 = pred_ld_sd(1, &n.d[0], &n.d[0],
422                        0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
423     check64(res64, 0xc0ffeedac0ffeedaLL);
424     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
425 
426     n.d[0] = ~0LL;
427     res64 = pred_ld_sd_pi(0, &n.d[0], &n.d[0],
428                           0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
429     check64(res64, 0x1234567812345678LL);
430     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
431 
432     n.d[0] = ~0LL;
433     res64 = pred_ld_sd_pi(1, &n.d[0], &n.d[0],
434                           0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
435     check64(res64, 0xc0ffeedac0ffeedaLL);
436     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
437 
438     puts(err ? "FAIL" : "PASS");
439     return err;
440 }
441