xref: /openbmc/qemu/tests/tcg/hexagon/mem_noshuf.c (revision 9c9fff18c45b54fd9adf2282323aab1b6f0ec866)
1825d6ebaSTaylor Simpson /*
2*0d57cd61STaylor Simpson  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3825d6ebaSTaylor Simpson  *
4825d6ebaSTaylor Simpson  *  This program is free software; you can redistribute it and/or modify
5825d6ebaSTaylor Simpson  *  it under the terms of the GNU General Public License as published by
6825d6ebaSTaylor Simpson  *  the Free Software Foundation; either version 2 of the License, or
7825d6ebaSTaylor Simpson  *  (at your option) any later version.
8825d6ebaSTaylor Simpson  *
9825d6ebaSTaylor Simpson  *  This program is distributed in the hope that it will be useful,
10825d6ebaSTaylor Simpson  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11825d6ebaSTaylor Simpson  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12825d6ebaSTaylor Simpson  *  GNU General Public License for more details.
13825d6ebaSTaylor Simpson  *
14825d6ebaSTaylor Simpson  *  You should have received a copy of the GNU General Public License
15825d6ebaSTaylor Simpson  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16825d6ebaSTaylor Simpson  */
17825d6ebaSTaylor Simpson 
18825d6ebaSTaylor Simpson #include <stdio.h>
19*0d57cd61STaylor Simpson #include <stdint.h>
20*0d57cd61STaylor Simpson #include <stdbool.h>
21*0d57cd61STaylor Simpson 
22*0d57cd61STaylor Simpson int err;
23*0d57cd61STaylor Simpson 
24*0d57cd61STaylor Simpson #include "hex_test.h"
25825d6ebaSTaylor Simpson 
26825d6ebaSTaylor Simpson /*
27825d6ebaSTaylor Simpson  *  Make sure that the :mem_noshuf packet attribute is honored.
28825d6ebaSTaylor Simpson  *  This is important when the addresses overlap.
29825d6ebaSTaylor Simpson  *  The store instruction in slot 1 effectively executes first,
30825d6ebaSTaylor Simpson  *  followed by the load instruction in slot 0.
31825d6ebaSTaylor Simpson  */
32825d6ebaSTaylor Simpson 
33825d6ebaSTaylor Simpson #define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
34*0d57cd61STaylor Simpson static inline uint32_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
35825d6ebaSTaylor Simpson { \
36*0d57cd61STaylor Simpson     uint32_t ret; \
37825d6ebaSTaylor Simpson     asm volatile("{\n\t" \
38825d6ebaSTaylor Simpson                  "    " #ST_OP "(%1) = %3\n\t" \
39825d6ebaSTaylor Simpson                  "    %0 = " #LD_OP "(%2)\n\t" \
40825d6ebaSTaylor Simpson                  "}:mem_noshuf\n" \
41825d6ebaSTaylor Simpson                  : "=r"(ret) \
42825d6ebaSTaylor Simpson                  : "r"(p), "r"(q), "r"(x) \
43825d6ebaSTaylor Simpson                  : "memory"); \
44825d6ebaSTaylor Simpson     return ret; \
45825d6ebaSTaylor Simpson }
46825d6ebaSTaylor Simpson 
47825d6ebaSTaylor Simpson #define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
48*0d57cd61STaylor Simpson static inline uint64_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
49825d6ebaSTaylor Simpson { \
50*0d57cd61STaylor Simpson     uint64_t ret; \
51825d6ebaSTaylor Simpson     asm volatile("{\n\t" \
52825d6ebaSTaylor Simpson                  "    " #ST_OP "(%1) = %3\n\t" \
53825d6ebaSTaylor Simpson                  "    %0 = " #LD_OP "(%2)\n\t" \
54825d6ebaSTaylor Simpson                  "}:mem_noshuf\n" \
55825d6ebaSTaylor Simpson                  : "=r"(ret) \
56825d6ebaSTaylor Simpson                  : "r"(p), "r"(q), "r"(x) \
57825d6ebaSTaylor Simpson                  : "memory"); \
58825d6ebaSTaylor Simpson     return ret; \
59825d6ebaSTaylor Simpson }
60825d6ebaSTaylor Simpson 
61825d6ebaSTaylor Simpson /* Store byte combinations */
MEM_NOSHUF32(mem_noshuf_sb_lb,int8_t,int8_t,memb,memb)62*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lb,  int8_t,       int8_t,           memb, memb)
63*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lub, int8_t,       uint8_t,          memb, memub)
64*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lh,  int8_t,       int16_t,          memb, memh)
65*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_luh, int8_t,       uint16_t,         memb, memuh)
66*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lw,  int8_t,       int32_t,          memb, memw)
67*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sb_ld,  int8_t,       int64_t,          memb, memd)
68825d6ebaSTaylor Simpson 
69825d6ebaSTaylor Simpson /* Store half combinations */
70*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lb,  int16_t,      int8_t,           memh, memb)
71*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lub, int16_t,      uint8_t,          memh, memub)
72*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lh,  int16_t,      int16_t,          memh, memh)
73*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_luh, int16_t,      uint16_t,         memh, memuh)
74*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lw,  int16_t,      int32_t,          memh, memw)
75*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sh_ld,  int16_t,      int64_t,          memh, memd)
76825d6ebaSTaylor Simpson 
77825d6ebaSTaylor Simpson /* Store word combinations */
78*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lb,  int32_t,      int8_t,           memw, memb)
79*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lub, int32_t,      uint8_t,          memw, memub)
80*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lh,  int32_t,      int16_t,          memw, memh)
81*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_luh, int32_t,      uint16_t,         memw, memuh)
82*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lw,  int32_t,      int32_t,          memw, memw)
83*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sw_ld,  int32_t,      int64_t,          memw, memd)
84825d6ebaSTaylor Simpson 
85825d6ebaSTaylor Simpson /* Store double combinations */
86*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lb,  int64_t,      int8_t,           memd, memb)
87*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lub, int64_t,      uint8_t,          memd, memub)
88*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lh,  int64_t,      int16_t,          memd, memh)
89*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_luh, int64_t,      uint16_t,         memd, memuh)
90*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lw,  int64_t,      int32_t,          memd, memw)
91*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sd_ld,  int64_t,      int64_t,          memd, memd)
92825d6ebaSTaylor Simpson 
93*0d57cd61STaylor Simpson static inline int pred_lw_sw(bool pred, int32_t *p, int32_t *q,
94*0d57cd61STaylor Simpson                              int32_t x, int32_t y)
95cab86deaSTaylor Simpson {
96cab86deaSTaylor Simpson     int ret;
97cab86deaSTaylor Simpson     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
98cab86deaSTaylor Simpson                  "%0 = %3\n\t"
99cab86deaSTaylor Simpson                  "{\n\t"
100cab86deaSTaylor Simpson                  "    memw(%1) = %4\n\t"
101cab86deaSTaylor Simpson                  "    if (!p0) %0 = memw(%2)\n\t"
102cab86deaSTaylor Simpson                  "}:mem_noshuf\n"
103cab86deaSTaylor Simpson                  : "=&r"(ret)
104cab86deaSTaylor Simpson                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
105cab86deaSTaylor Simpson                  : "p0", "memory");
106cab86deaSTaylor Simpson     return ret;
107cab86deaSTaylor Simpson }
108cab86deaSTaylor Simpson 
pred_lw_sw_pi(bool pred,int32_t * p,int32_t * q,int32_t x,int32_t y)109*0d57cd61STaylor Simpson static inline int pred_lw_sw_pi(bool pred, int32_t *p, int32_t *q,
110*0d57cd61STaylor Simpson                                 int32_t x, int32_t y)
111cab86deaSTaylor Simpson {
112cab86deaSTaylor Simpson     int ret;
113cab86deaSTaylor Simpson     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
114cab86deaSTaylor Simpson                  "%0 = %3\n\t"
115cab86deaSTaylor Simpson                  "r7 = %2\n\t"
116cab86deaSTaylor Simpson                  "{\n\t"
117cab86deaSTaylor Simpson                  "    memw(%1) = %4\n\t"
118cab86deaSTaylor Simpson                  "    if (!p0) %0 = memw(r7++#4)\n\t"
119cab86deaSTaylor Simpson                  "}:mem_noshuf\n"
120cab86deaSTaylor Simpson                  : "=&r"(ret)
121cab86deaSTaylor Simpson                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
122cab86deaSTaylor Simpson                  : "r7", "p0", "memory");
123cab86deaSTaylor Simpson     return ret;
124cab86deaSTaylor Simpson }
125cab86deaSTaylor Simpson 
pred_ld_sd(bool pred,int64_t * p,int64_t * q,int64_t x,int64_t y)126*0d57cd61STaylor Simpson static inline int64_t pred_ld_sd(bool pred, int64_t *p, int64_t *q,
127*0d57cd61STaylor Simpson                                  int64_t x, int64_t y)
128cab86deaSTaylor Simpson {
129*0d57cd61STaylor Simpson     int64_t ret;
130cab86deaSTaylor Simpson     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
131cab86deaSTaylor Simpson                  "%0 = %3\n\t"
132cab86deaSTaylor Simpson                  "{\n\t"
133cab86deaSTaylor Simpson                  "    memd(%1) = %4\n\t"
134cab86deaSTaylor Simpson                  "    if (!p0) %0 = memd(%2)\n\t"
135cab86deaSTaylor Simpson                  "}:mem_noshuf\n"
136cab86deaSTaylor Simpson                  : "=&r"(ret)
137cab86deaSTaylor Simpson                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
138cab86deaSTaylor Simpson                  : "p0", "memory");
139cab86deaSTaylor Simpson     return ret;
140cab86deaSTaylor Simpson }
141cab86deaSTaylor Simpson 
pred_ld_sd_pi(bool pred,int64_t * p,int64_t * q,int64_t x,int64_t y)142*0d57cd61STaylor Simpson static inline int64_t pred_ld_sd_pi(bool pred, int64_t *p, int64_t *q,
143*0d57cd61STaylor Simpson                                     int64_t x, int64_t y)
144cab86deaSTaylor Simpson {
145*0d57cd61STaylor Simpson     int64_t ret;
146cab86deaSTaylor Simpson     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
147cab86deaSTaylor Simpson                  "%0 = %3\n\t"
148cab86deaSTaylor Simpson                  "r7 = %2\n\t"
149cab86deaSTaylor Simpson                  "{\n\t"
150cab86deaSTaylor Simpson                  "    memd(%1) = %4\n\t"
151cab86deaSTaylor Simpson                  "    if (!p0) %0 = memd(r7++#8)\n\t"
152cab86deaSTaylor Simpson                  "}:mem_noshuf\n"
153cab86deaSTaylor Simpson                  : "=&r"(ret)
154cab86deaSTaylor Simpson                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
155eaee3b6fSMukilan Thiyagarajan                  : "r7", "p0", "memory");
156cab86deaSTaylor Simpson     return ret;
157cab86deaSTaylor Simpson }
158cab86deaSTaylor Simpson 
cancel_sw_lb(bool pred,int32_t * p,int8_t * q,int32_t x)159*0d57cd61STaylor Simpson static inline int32_t cancel_sw_lb(bool pred, int32_t *p, int8_t *q, int32_t x)
160825d6ebaSTaylor Simpson {
161*0d57cd61STaylor Simpson     int32_t ret;
162825d6ebaSTaylor Simpson     asm volatile("p0 = cmp.eq(%4, #0)\n\t"
163825d6ebaSTaylor Simpson                  "{\n\t"
164825d6ebaSTaylor Simpson                  "    if (!p0) memw(%1) = %3\n\t"
165825d6ebaSTaylor Simpson                  "    %0 = memb(%2)\n\t"
166825d6ebaSTaylor Simpson                  "}:mem_noshuf\n"
167825d6ebaSTaylor Simpson                  : "=r"(ret)
168825d6ebaSTaylor Simpson                  : "r"(p), "r"(q), "r"(x), "r"(pred)
169825d6ebaSTaylor Simpson                  : "p0", "memory");
170825d6ebaSTaylor Simpson     return ret;
171825d6ebaSTaylor Simpson }
172825d6ebaSTaylor Simpson 
cancel_sw_ld(bool pred,int32_t * p,int64_t * q,int32_t x)173*0d57cd61STaylor Simpson static inline int64_t cancel_sw_ld(bool pred, int32_t *p, int64_t *q, int32_t x)
174825d6ebaSTaylor Simpson {
175*0d57cd61STaylor Simpson     int64_t ret;
176825d6ebaSTaylor Simpson     asm volatile("p0 = cmp.eq(%4, #0)\n\t"
177825d6ebaSTaylor Simpson                  "{\n\t"
178825d6ebaSTaylor Simpson                  "    if (!p0) memw(%1) = %3\n\t"
179825d6ebaSTaylor Simpson                  "    %0 = memd(%2)\n\t"
180825d6ebaSTaylor Simpson                  "}:mem_noshuf\n"
181825d6ebaSTaylor Simpson                  : "=r"(ret)
182825d6ebaSTaylor Simpson                  : "r"(p), "r"(q), "r"(x), "r"(pred)
183825d6ebaSTaylor Simpson                  : "p0", "memory");
184825d6ebaSTaylor Simpson     return ret;
185825d6ebaSTaylor Simpson }
186825d6ebaSTaylor Simpson 
187825d6ebaSTaylor Simpson typedef union {
188*0d57cd61STaylor Simpson     int64_t d[2];
189*0d57cd61STaylor Simpson     uint64_t ud[2];
190*0d57cd61STaylor Simpson     int32_t w[4];
191*0d57cd61STaylor Simpson     uint32_t uw[4];
192*0d57cd61STaylor Simpson     int16_t h[8];
193*0d57cd61STaylor Simpson     uint16_t uh[8];
194*0d57cd61STaylor Simpson     int8_t b[16];
195*0d57cd61STaylor Simpson     uint8_t ub[16];
196825d6ebaSTaylor Simpson } Memory;
197825d6ebaSTaylor Simpson 
main()198825d6ebaSTaylor Simpson int main()
199825d6ebaSTaylor Simpson {
200825d6ebaSTaylor Simpson     Memory n;
201*0d57cd61STaylor Simpson     uint32_t res32;
202*0d57cd61STaylor Simpson     uint64_t res64;
203825d6ebaSTaylor Simpson 
204825d6ebaSTaylor Simpson     /*
205825d6ebaSTaylor Simpson      * Store byte combinations
206825d6ebaSTaylor Simpson      */
207825d6ebaSTaylor Simpson     n.w[0] = ~0;
208825d6ebaSTaylor Simpson     res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87);
209825d6ebaSTaylor Simpson     check32(res32, 0xffffff87);
210825d6ebaSTaylor Simpson 
211825d6ebaSTaylor Simpson     n.w[0] = ~0;
212825d6ebaSTaylor Simpson     res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87);
213825d6ebaSTaylor Simpson     check32(res32, 0x00000087);
214825d6ebaSTaylor Simpson 
215825d6ebaSTaylor Simpson     n.w[0] = ~0;
216825d6ebaSTaylor Simpson     res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87);
217825d6ebaSTaylor Simpson     check32(res32, 0xffffff87);
218825d6ebaSTaylor Simpson 
219825d6ebaSTaylor Simpson     n.w[0] = ~0;
220825d6ebaSTaylor Simpson     res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87);
221825d6ebaSTaylor Simpson     check32(res32, 0x0000ff87);
222825d6ebaSTaylor Simpson 
223825d6ebaSTaylor Simpson     n.w[0] = ~0;
224825d6ebaSTaylor Simpson     res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87);
225825d6ebaSTaylor Simpson     check32(res32, 0xffffff87);
226825d6ebaSTaylor Simpson 
227825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
228825d6ebaSTaylor Simpson     res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87);
229825d6ebaSTaylor Simpson     check64(res64, 0xffffffffffffff87LL);
230825d6ebaSTaylor Simpson 
231825d6ebaSTaylor Simpson     /*
232825d6ebaSTaylor Simpson      * Store half combinations
233825d6ebaSTaylor Simpson      */
234825d6ebaSTaylor Simpson     n.w[0] = ~0;
235825d6ebaSTaylor Simpson     res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787);
236825d6ebaSTaylor Simpson     check32(res32, 0xffffff87);
237825d6ebaSTaylor Simpson 
238825d6ebaSTaylor Simpson     n.w[0] = ~0;
239825d6ebaSTaylor Simpson     res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87);
240825d6ebaSTaylor Simpson     check32(res32, 0x0000008f);
241825d6ebaSTaylor Simpson 
242825d6ebaSTaylor Simpson     n.w[0] = ~0;
243825d6ebaSTaylor Simpson     res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87);
244825d6ebaSTaylor Simpson     check32(res32, 0xffff8a87);
245825d6ebaSTaylor Simpson 
246825d6ebaSTaylor Simpson     n.w[0] = ~0;
247825d6ebaSTaylor Simpson     res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87);
248825d6ebaSTaylor Simpson     check32(res32, 0x8a87);
249825d6ebaSTaylor Simpson 
250825d6ebaSTaylor Simpson     n.w[0] = ~0;
251825d6ebaSTaylor Simpson     res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87);
252825d6ebaSTaylor Simpson     check32(res32, 0x8a87ffff);
253825d6ebaSTaylor Simpson 
254825d6ebaSTaylor Simpson     n.w[0] = ~0;
255825d6ebaSTaylor Simpson     res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87);
256825d6ebaSTaylor Simpson     check64(res64, 0xffffffff8a87ffffLL);
257825d6ebaSTaylor Simpson 
258825d6ebaSTaylor Simpson     /*
259825d6ebaSTaylor Simpson      * Store word combinations
260825d6ebaSTaylor Simpson      */
261825d6ebaSTaylor Simpson     n.w[0] = ~0;
262825d6ebaSTaylor Simpson     res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687);
263825d6ebaSTaylor Simpson     check32(res32, 0xffffff87);
264825d6ebaSTaylor Simpson 
265825d6ebaSTaylor Simpson     n.w[0] = ~0;
266825d6ebaSTaylor Simpson     res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687);
267825d6ebaSTaylor Simpson     check32(res32, 0x00000087);
268825d6ebaSTaylor Simpson 
269825d6ebaSTaylor Simpson     n.w[0] = ~0;
270825d6ebaSTaylor Simpson     res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678);
271825d6ebaSTaylor Simpson     check32(res32, 0xfffff678);
272825d6ebaSTaylor Simpson 
273825d6ebaSTaylor Simpson     n.w[0] = ~0;
274825d6ebaSTaylor Simpson     res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678);
275825d6ebaSTaylor Simpson     check32(res32, 0x00005678);
276825d6ebaSTaylor Simpson 
277825d6ebaSTaylor Simpson     n.w[0] = ~0;
278825d6ebaSTaylor Simpson     res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678);
279825d6ebaSTaylor Simpson     check32(res32, 0x12345678);
280825d6ebaSTaylor Simpson 
281825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
282825d6ebaSTaylor Simpson     res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678);
283825d6ebaSTaylor Simpson     check64(res64, 0xffffffff12345678LL);
284825d6ebaSTaylor Simpson 
285825d6ebaSTaylor Simpson     /*
286825d6ebaSTaylor Simpson      * Store double combinations
287825d6ebaSTaylor Simpson      */
288825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
289825d6ebaSTaylor Simpson     res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0);
290825d6ebaSTaylor Simpson     check32(res32, 0xffffffde);
291825d6ebaSTaylor Simpson 
292825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
293825d6ebaSTaylor Simpson     res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0);
294825d6ebaSTaylor Simpson     check32(res32, 0x000000de);
295825d6ebaSTaylor Simpson 
296825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
297825d6ebaSTaylor Simpson     res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0);
298825d6ebaSTaylor Simpson     check32(res32, 0xffff9abc);
299825d6ebaSTaylor Simpson 
300825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
301825d6ebaSTaylor Simpson     res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0);
302825d6ebaSTaylor Simpson     check32(res32, 0x00009abc);
303825d6ebaSTaylor Simpson 
304825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
305825d6ebaSTaylor Simpson     res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0);
306825d6ebaSTaylor Simpson     check32(res32, 0x12345678);
307825d6ebaSTaylor Simpson 
308825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
309825d6ebaSTaylor Simpson     res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0);
310825d6ebaSTaylor Simpson     check64(res64, 0x123456789abcdef0LL);
311825d6ebaSTaylor Simpson 
312825d6ebaSTaylor Simpson     /*
313825d6ebaSTaylor Simpson      * Predicated word stores
314825d6ebaSTaylor Simpson      */
315825d6ebaSTaylor Simpson     n.w[0] = ~0;
316*0d57cd61STaylor Simpson     res32 = cancel_sw_lb(false, &n.w[0], &n.b[0], 0x12345678);
317825d6ebaSTaylor Simpson     check32(res32, 0xffffffff);
318825d6ebaSTaylor Simpson 
319825d6ebaSTaylor Simpson     n.w[0] = ~0;
320*0d57cd61STaylor Simpson     res32 = cancel_sw_lb(true, &n.w[0], &n.b[0], 0x12345687);
321825d6ebaSTaylor Simpson     check32(res32, 0xffffff87);
322825d6ebaSTaylor Simpson 
323825d6ebaSTaylor Simpson     /*
324825d6ebaSTaylor Simpson      * Predicated double stores
325825d6ebaSTaylor Simpson      */
326825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
327*0d57cd61STaylor Simpson     res64 = cancel_sw_ld(false, &n.w[0], &n.d[0], 0x12345678);
328825d6ebaSTaylor Simpson     check64(res64, 0xffffffffffffffffLL);
329825d6ebaSTaylor Simpson 
330825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
331*0d57cd61STaylor Simpson     res64 = cancel_sw_ld(true, &n.w[0], &n.d[0], 0x12345678);
332825d6ebaSTaylor Simpson     check64(res64, 0xffffffff12345678LL);
333825d6ebaSTaylor Simpson 
334825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
335*0d57cd61STaylor Simpson     res64 = cancel_sw_ld(false, &n.w[1], &n.d[0], 0x12345678);
336825d6ebaSTaylor Simpson     check64(res64, 0xffffffffffffffffLL);
337825d6ebaSTaylor Simpson 
338825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
339*0d57cd61STaylor Simpson     res64 = cancel_sw_ld(true, &n.w[1], &n.d[0], 0x12345678);
340825d6ebaSTaylor Simpson     check64(res64, 0x12345678ffffffffLL);
341825d6ebaSTaylor Simpson 
342825d6ebaSTaylor Simpson     /*
343825d6ebaSTaylor Simpson      * No overlap tests
344825d6ebaSTaylor Simpson      */
345825d6ebaSTaylor Simpson     n.w[0] = ~0;
346825d6ebaSTaylor Simpson     res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87);
347825d6ebaSTaylor Simpson     check32(res32, 0xffffffff);
348825d6ebaSTaylor Simpson 
349825d6ebaSTaylor Simpson     n.w[0] = ~0;
350825d6ebaSTaylor Simpson     res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87);
351825d6ebaSTaylor Simpson     check32(res32, 0xffffffff);
352825d6ebaSTaylor Simpson 
353825d6ebaSTaylor Simpson     n.w[0] = ~0;
354825d6ebaSTaylor Simpson     res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787);
355825d6ebaSTaylor Simpson     check32(res32, 0xffffffff);
356825d6ebaSTaylor Simpson 
357825d6ebaSTaylor Simpson     n.w[0] = ~0;
358825d6ebaSTaylor Simpson     res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787);
359825d6ebaSTaylor Simpson     check32(res32, 0xffffffff);
360825d6ebaSTaylor Simpson 
361825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
362825d6ebaSTaylor Simpson     res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678);
363825d6ebaSTaylor Simpson     check32(res32, 0xffffffff);
364825d6ebaSTaylor Simpson 
365825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
366825d6ebaSTaylor Simpson     res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678);
367825d6ebaSTaylor Simpson     check32(res32, 0xffffffff);
368825d6ebaSTaylor Simpson 
369825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
370825d6ebaSTaylor Simpson     n.d[1] = ~0LL;
371825d6ebaSTaylor Simpson     res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL);
372825d6ebaSTaylor Simpson     check64(res64, 0xffffffffffffffffLL);
373825d6ebaSTaylor Simpson 
374825d6ebaSTaylor Simpson     n.d[0] = ~0LL;
375825d6ebaSTaylor Simpson     n.d[1] = ~0LL;
376825d6ebaSTaylor Simpson     res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL);
377825d6ebaSTaylor Simpson     check64(res64, 0xffffffffffffffffLL);
378825d6ebaSTaylor Simpson 
379cab86deaSTaylor Simpson     n.w[0] = ~0;
380*0d57cd61STaylor Simpson     res32 = pred_lw_sw(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
381cab86deaSTaylor Simpson     check32(res32, 0x12345678);
382cab86deaSTaylor Simpson     check32(n.w[0], 0xc0ffeeda);
383cab86deaSTaylor Simpson 
384cab86deaSTaylor Simpson     n.w[0] = ~0;
385*0d57cd61STaylor Simpson     res32 = pred_lw_sw(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
386cab86deaSTaylor Simpson     check32(res32, 0xc0ffeeda);
387cab86deaSTaylor Simpson     check32(n.w[0], 0xc0ffeeda);
388cab86deaSTaylor Simpson 
389cab86deaSTaylor Simpson     n.w[0] = ~0;
390*0d57cd61STaylor Simpson     res32 = pred_lw_sw_pi(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
391cab86deaSTaylor Simpson     check32(res32, 0x12345678);
392cab86deaSTaylor Simpson     check32(n.w[0], 0xc0ffeeda);
393cab86deaSTaylor Simpson 
394cab86deaSTaylor Simpson     n.w[0] = ~0;
395*0d57cd61STaylor Simpson     res32 = pred_lw_sw_pi(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
396cab86deaSTaylor Simpson     check32(res32, 0xc0ffeeda);
397cab86deaSTaylor Simpson     check32(n.w[0], 0xc0ffeeda);
398cab86deaSTaylor Simpson 
399cab86deaSTaylor Simpson     n.d[0] = ~0LL;
400*0d57cd61STaylor Simpson     res64 = pred_ld_sd(false, &n.d[0], &n.d[0],
401cab86deaSTaylor Simpson                        0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
402cab86deaSTaylor Simpson     check64(res64, 0x1234567812345678LL);
403cab86deaSTaylor Simpson     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
404cab86deaSTaylor Simpson 
405cab86deaSTaylor Simpson     n.d[0] = ~0LL;
406*0d57cd61STaylor Simpson     res64 = pred_ld_sd(true, &n.d[0], &n.d[0],
407cab86deaSTaylor Simpson                        0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
408cab86deaSTaylor Simpson     check64(res64, 0xc0ffeedac0ffeedaLL);
409cab86deaSTaylor Simpson     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
410cab86deaSTaylor Simpson 
411cab86deaSTaylor Simpson     n.d[0] = ~0LL;
412*0d57cd61STaylor Simpson     res64 = pred_ld_sd_pi(false, &n.d[0], &n.d[0],
413cab86deaSTaylor Simpson                           0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
414cab86deaSTaylor Simpson     check64(res64, 0x1234567812345678LL);
415cab86deaSTaylor Simpson     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
416cab86deaSTaylor Simpson 
417cab86deaSTaylor Simpson     n.d[0] = ~0LL;
418*0d57cd61STaylor Simpson     res64 = pred_ld_sd_pi(true, &n.d[0], &n.d[0],
419cab86deaSTaylor Simpson                           0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
420cab86deaSTaylor Simpson     check64(res64, 0xc0ffeedac0ffeedaLL);
421cab86deaSTaylor Simpson     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
422cab86deaSTaylor Simpson 
423825d6ebaSTaylor Simpson     puts(err ? "FAIL" : "PASS");
424825d6ebaSTaylor Simpson     return err;
425825d6ebaSTaylor Simpson }
426