1825d6ebaSTaylor Simpson /*
2*0d57cd61STaylor Simpson * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3825d6ebaSTaylor Simpson *
4825d6ebaSTaylor Simpson * This program is free software; you can redistribute it and/or modify
5825d6ebaSTaylor Simpson * it under the terms of the GNU General Public License as published by
6825d6ebaSTaylor Simpson * the Free Software Foundation; either version 2 of the License, or
7825d6ebaSTaylor Simpson * (at your option) any later version.
8825d6ebaSTaylor Simpson *
9825d6ebaSTaylor Simpson * This program is distributed in the hope that it will be useful,
10825d6ebaSTaylor Simpson * but WITHOUT ANY WARRANTY; without even the implied warranty of
11825d6ebaSTaylor Simpson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12825d6ebaSTaylor Simpson * GNU General Public License for more details.
13825d6ebaSTaylor Simpson *
14825d6ebaSTaylor Simpson * You should have received a copy of the GNU General Public License
15825d6ebaSTaylor Simpson * along with this program; if not, see <http://www.gnu.org/licenses/>.
16825d6ebaSTaylor Simpson */
17825d6ebaSTaylor Simpson
18825d6ebaSTaylor Simpson #include <stdio.h>
19*0d57cd61STaylor Simpson #include <stdint.h>
20*0d57cd61STaylor Simpson #include <stdbool.h>
21*0d57cd61STaylor Simpson
22*0d57cd61STaylor Simpson int err;
23*0d57cd61STaylor Simpson
24*0d57cd61STaylor Simpson #include "hex_test.h"
25825d6ebaSTaylor Simpson
26825d6ebaSTaylor Simpson /*
27825d6ebaSTaylor Simpson * Make sure that the :mem_noshuf packet attribute is honored.
28825d6ebaSTaylor Simpson * This is important when the addresses overlap.
29825d6ebaSTaylor Simpson * The store instruction in slot 1 effectively executes first,
30825d6ebaSTaylor Simpson * followed by the load instruction in slot 0.
31825d6ebaSTaylor Simpson */
32825d6ebaSTaylor Simpson
33825d6ebaSTaylor Simpson #define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
34*0d57cd61STaylor Simpson static inline uint32_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
35825d6ebaSTaylor Simpson { \
36*0d57cd61STaylor Simpson uint32_t ret; \
37825d6ebaSTaylor Simpson asm volatile("{\n\t" \
38825d6ebaSTaylor Simpson " " #ST_OP "(%1) = %3\n\t" \
39825d6ebaSTaylor Simpson " %0 = " #LD_OP "(%2)\n\t" \
40825d6ebaSTaylor Simpson "}:mem_noshuf\n" \
41825d6ebaSTaylor Simpson : "=r"(ret) \
42825d6ebaSTaylor Simpson : "r"(p), "r"(q), "r"(x) \
43825d6ebaSTaylor Simpson : "memory"); \
44825d6ebaSTaylor Simpson return ret; \
45825d6ebaSTaylor Simpson }
46825d6ebaSTaylor Simpson
47825d6ebaSTaylor Simpson #define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
48*0d57cd61STaylor Simpson static inline uint64_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
49825d6ebaSTaylor Simpson { \
50*0d57cd61STaylor Simpson uint64_t ret; \
51825d6ebaSTaylor Simpson asm volatile("{\n\t" \
52825d6ebaSTaylor Simpson " " #ST_OP "(%1) = %3\n\t" \
53825d6ebaSTaylor Simpson " %0 = " #LD_OP "(%2)\n\t" \
54825d6ebaSTaylor Simpson "}:mem_noshuf\n" \
55825d6ebaSTaylor Simpson : "=r"(ret) \
56825d6ebaSTaylor Simpson : "r"(p), "r"(q), "r"(x) \
57825d6ebaSTaylor Simpson : "memory"); \
58825d6ebaSTaylor Simpson return ret; \
59825d6ebaSTaylor Simpson }
60825d6ebaSTaylor Simpson
61825d6ebaSTaylor Simpson /* Store byte combinations */
MEM_NOSHUF32(mem_noshuf_sb_lb,int8_t,int8_t,memb,memb)62*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lb, int8_t, int8_t, memb, memb)
63*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lub, int8_t, uint8_t, memb, memub)
64*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lh, int8_t, int16_t, memb, memh)
65*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_luh, int8_t, uint16_t, memb, memuh)
66*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lw, int8_t, int32_t, memb, memw)
67*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sb_ld, int8_t, int64_t, memb, memd)
68825d6ebaSTaylor Simpson
69825d6ebaSTaylor Simpson /* Store half combinations */
70*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lb, int16_t, int8_t, memh, memb)
71*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lub, int16_t, uint8_t, memh, memub)
72*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lh, int16_t, int16_t, memh, memh)
73*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_luh, int16_t, uint16_t, memh, memuh)
74*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lw, int16_t, int32_t, memh, memw)
75*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sh_ld, int16_t, int64_t, memh, memd)
76825d6ebaSTaylor Simpson
77825d6ebaSTaylor Simpson /* Store word combinations */
78*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lb, int32_t, int8_t, memw, memb)
79*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lub, int32_t, uint8_t, memw, memub)
80*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lh, int32_t, int16_t, memw, memh)
81*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_luh, int32_t, uint16_t, memw, memuh)
82*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lw, int32_t, int32_t, memw, memw)
83*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sw_ld, int32_t, int64_t, memw, memd)
84825d6ebaSTaylor Simpson
85825d6ebaSTaylor Simpson /* Store double combinations */
86*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lb, int64_t, int8_t, memd, memb)
87*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lub, int64_t, uint8_t, memd, memub)
88*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lh, int64_t, int16_t, memd, memh)
89*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_luh, int64_t, uint16_t, memd, memuh)
90*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lw, int64_t, int32_t, memd, memw)
91*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sd_ld, int64_t, int64_t, memd, memd)
92825d6ebaSTaylor Simpson
93*0d57cd61STaylor Simpson static inline int pred_lw_sw(bool pred, int32_t *p, int32_t *q,
94*0d57cd61STaylor Simpson int32_t x, int32_t y)
95cab86deaSTaylor Simpson {
96cab86deaSTaylor Simpson int ret;
97cab86deaSTaylor Simpson asm volatile("p0 = cmp.eq(%5, #0)\n\t"
98cab86deaSTaylor Simpson "%0 = %3\n\t"
99cab86deaSTaylor Simpson "{\n\t"
100cab86deaSTaylor Simpson " memw(%1) = %4\n\t"
101cab86deaSTaylor Simpson " if (!p0) %0 = memw(%2)\n\t"
102cab86deaSTaylor Simpson "}:mem_noshuf\n"
103cab86deaSTaylor Simpson : "=&r"(ret)
104cab86deaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
105cab86deaSTaylor Simpson : "p0", "memory");
106cab86deaSTaylor Simpson return ret;
107cab86deaSTaylor Simpson }
108cab86deaSTaylor Simpson
pred_lw_sw_pi(bool pred,int32_t * p,int32_t * q,int32_t x,int32_t y)109*0d57cd61STaylor Simpson static inline int pred_lw_sw_pi(bool pred, int32_t *p, int32_t *q,
110*0d57cd61STaylor Simpson int32_t x, int32_t y)
111cab86deaSTaylor Simpson {
112cab86deaSTaylor Simpson int ret;
113cab86deaSTaylor Simpson asm volatile("p0 = cmp.eq(%5, #0)\n\t"
114cab86deaSTaylor Simpson "%0 = %3\n\t"
115cab86deaSTaylor Simpson "r7 = %2\n\t"
116cab86deaSTaylor Simpson "{\n\t"
117cab86deaSTaylor Simpson " memw(%1) = %4\n\t"
118cab86deaSTaylor Simpson " if (!p0) %0 = memw(r7++#4)\n\t"
119cab86deaSTaylor Simpson "}:mem_noshuf\n"
120cab86deaSTaylor Simpson : "=&r"(ret)
121cab86deaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
122cab86deaSTaylor Simpson : "r7", "p0", "memory");
123cab86deaSTaylor Simpson return ret;
124cab86deaSTaylor Simpson }
125cab86deaSTaylor Simpson
pred_ld_sd(bool pred,int64_t * p,int64_t * q,int64_t x,int64_t y)126*0d57cd61STaylor Simpson static inline int64_t pred_ld_sd(bool pred, int64_t *p, int64_t *q,
127*0d57cd61STaylor Simpson int64_t x, int64_t y)
128cab86deaSTaylor Simpson {
129*0d57cd61STaylor Simpson int64_t ret;
130cab86deaSTaylor Simpson asm volatile("p0 = cmp.eq(%5, #0)\n\t"
131cab86deaSTaylor Simpson "%0 = %3\n\t"
132cab86deaSTaylor Simpson "{\n\t"
133cab86deaSTaylor Simpson " memd(%1) = %4\n\t"
134cab86deaSTaylor Simpson " if (!p0) %0 = memd(%2)\n\t"
135cab86deaSTaylor Simpson "}:mem_noshuf\n"
136cab86deaSTaylor Simpson : "=&r"(ret)
137cab86deaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
138cab86deaSTaylor Simpson : "p0", "memory");
139cab86deaSTaylor Simpson return ret;
140cab86deaSTaylor Simpson }
141cab86deaSTaylor Simpson
pred_ld_sd_pi(bool pred,int64_t * p,int64_t * q,int64_t x,int64_t y)142*0d57cd61STaylor Simpson static inline int64_t pred_ld_sd_pi(bool pred, int64_t *p, int64_t *q,
143*0d57cd61STaylor Simpson int64_t x, int64_t y)
144cab86deaSTaylor Simpson {
145*0d57cd61STaylor Simpson int64_t ret;
146cab86deaSTaylor Simpson asm volatile("p0 = cmp.eq(%5, #0)\n\t"
147cab86deaSTaylor Simpson "%0 = %3\n\t"
148cab86deaSTaylor Simpson "r7 = %2\n\t"
149cab86deaSTaylor Simpson "{\n\t"
150cab86deaSTaylor Simpson " memd(%1) = %4\n\t"
151cab86deaSTaylor Simpson " if (!p0) %0 = memd(r7++#8)\n\t"
152cab86deaSTaylor Simpson "}:mem_noshuf\n"
153cab86deaSTaylor Simpson : "=&r"(ret)
154cab86deaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
155eaee3b6fSMukilan Thiyagarajan : "r7", "p0", "memory");
156cab86deaSTaylor Simpson return ret;
157cab86deaSTaylor Simpson }
158cab86deaSTaylor Simpson
cancel_sw_lb(bool pred,int32_t * p,int8_t * q,int32_t x)159*0d57cd61STaylor Simpson static inline int32_t cancel_sw_lb(bool pred, int32_t *p, int8_t *q, int32_t x)
160825d6ebaSTaylor Simpson {
161*0d57cd61STaylor Simpson int32_t ret;
162825d6ebaSTaylor Simpson asm volatile("p0 = cmp.eq(%4, #0)\n\t"
163825d6ebaSTaylor Simpson "{\n\t"
164825d6ebaSTaylor Simpson " if (!p0) memw(%1) = %3\n\t"
165825d6ebaSTaylor Simpson " %0 = memb(%2)\n\t"
166825d6ebaSTaylor Simpson "}:mem_noshuf\n"
167825d6ebaSTaylor Simpson : "=r"(ret)
168825d6ebaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(pred)
169825d6ebaSTaylor Simpson : "p0", "memory");
170825d6ebaSTaylor Simpson return ret;
171825d6ebaSTaylor Simpson }
172825d6ebaSTaylor Simpson
cancel_sw_ld(bool pred,int32_t * p,int64_t * q,int32_t x)173*0d57cd61STaylor Simpson static inline int64_t cancel_sw_ld(bool pred, int32_t *p, int64_t *q, int32_t x)
174825d6ebaSTaylor Simpson {
175*0d57cd61STaylor Simpson int64_t ret;
176825d6ebaSTaylor Simpson asm volatile("p0 = cmp.eq(%4, #0)\n\t"
177825d6ebaSTaylor Simpson "{\n\t"
178825d6ebaSTaylor Simpson " if (!p0) memw(%1) = %3\n\t"
179825d6ebaSTaylor Simpson " %0 = memd(%2)\n\t"
180825d6ebaSTaylor Simpson "}:mem_noshuf\n"
181825d6ebaSTaylor Simpson : "=r"(ret)
182825d6ebaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(pred)
183825d6ebaSTaylor Simpson : "p0", "memory");
184825d6ebaSTaylor Simpson return ret;
185825d6ebaSTaylor Simpson }
186825d6ebaSTaylor Simpson
187825d6ebaSTaylor Simpson typedef union {
188*0d57cd61STaylor Simpson int64_t d[2];
189*0d57cd61STaylor Simpson uint64_t ud[2];
190*0d57cd61STaylor Simpson int32_t w[4];
191*0d57cd61STaylor Simpson uint32_t uw[4];
192*0d57cd61STaylor Simpson int16_t h[8];
193*0d57cd61STaylor Simpson uint16_t uh[8];
194*0d57cd61STaylor Simpson int8_t b[16];
195*0d57cd61STaylor Simpson uint8_t ub[16];
196825d6ebaSTaylor Simpson } Memory;
197825d6ebaSTaylor Simpson
main()198825d6ebaSTaylor Simpson int main()
199825d6ebaSTaylor Simpson {
200825d6ebaSTaylor Simpson Memory n;
201*0d57cd61STaylor Simpson uint32_t res32;
202*0d57cd61STaylor Simpson uint64_t res64;
203825d6ebaSTaylor Simpson
204825d6ebaSTaylor Simpson /*
205825d6ebaSTaylor Simpson * Store byte combinations
206825d6ebaSTaylor Simpson */
207825d6ebaSTaylor Simpson n.w[0] = ~0;
208825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87);
209825d6ebaSTaylor Simpson check32(res32, 0xffffff87);
210825d6ebaSTaylor Simpson
211825d6ebaSTaylor Simpson n.w[0] = ~0;
212825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87);
213825d6ebaSTaylor Simpson check32(res32, 0x00000087);
214825d6ebaSTaylor Simpson
215825d6ebaSTaylor Simpson n.w[0] = ~0;
216825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87);
217825d6ebaSTaylor Simpson check32(res32, 0xffffff87);
218825d6ebaSTaylor Simpson
219825d6ebaSTaylor Simpson n.w[0] = ~0;
220825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87);
221825d6ebaSTaylor Simpson check32(res32, 0x0000ff87);
222825d6ebaSTaylor Simpson
223825d6ebaSTaylor Simpson n.w[0] = ~0;
224825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87);
225825d6ebaSTaylor Simpson check32(res32, 0xffffff87);
226825d6ebaSTaylor Simpson
227825d6ebaSTaylor Simpson n.d[0] = ~0LL;
228825d6ebaSTaylor Simpson res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87);
229825d6ebaSTaylor Simpson check64(res64, 0xffffffffffffff87LL);
230825d6ebaSTaylor Simpson
231825d6ebaSTaylor Simpson /*
232825d6ebaSTaylor Simpson * Store half combinations
233825d6ebaSTaylor Simpson */
234825d6ebaSTaylor Simpson n.w[0] = ~0;
235825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787);
236825d6ebaSTaylor Simpson check32(res32, 0xffffff87);
237825d6ebaSTaylor Simpson
238825d6ebaSTaylor Simpson n.w[0] = ~0;
239825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87);
240825d6ebaSTaylor Simpson check32(res32, 0x0000008f);
241825d6ebaSTaylor Simpson
242825d6ebaSTaylor Simpson n.w[0] = ~0;
243825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87);
244825d6ebaSTaylor Simpson check32(res32, 0xffff8a87);
245825d6ebaSTaylor Simpson
246825d6ebaSTaylor Simpson n.w[0] = ~0;
247825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87);
248825d6ebaSTaylor Simpson check32(res32, 0x8a87);
249825d6ebaSTaylor Simpson
250825d6ebaSTaylor Simpson n.w[0] = ~0;
251825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87);
252825d6ebaSTaylor Simpson check32(res32, 0x8a87ffff);
253825d6ebaSTaylor Simpson
254825d6ebaSTaylor Simpson n.w[0] = ~0;
255825d6ebaSTaylor Simpson res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87);
256825d6ebaSTaylor Simpson check64(res64, 0xffffffff8a87ffffLL);
257825d6ebaSTaylor Simpson
258825d6ebaSTaylor Simpson /*
259825d6ebaSTaylor Simpson * Store word combinations
260825d6ebaSTaylor Simpson */
261825d6ebaSTaylor Simpson n.w[0] = ~0;
262825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687);
263825d6ebaSTaylor Simpson check32(res32, 0xffffff87);
264825d6ebaSTaylor Simpson
265825d6ebaSTaylor Simpson n.w[0] = ~0;
266825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687);
267825d6ebaSTaylor Simpson check32(res32, 0x00000087);
268825d6ebaSTaylor Simpson
269825d6ebaSTaylor Simpson n.w[0] = ~0;
270825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678);
271825d6ebaSTaylor Simpson check32(res32, 0xfffff678);
272825d6ebaSTaylor Simpson
273825d6ebaSTaylor Simpson n.w[0] = ~0;
274825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678);
275825d6ebaSTaylor Simpson check32(res32, 0x00005678);
276825d6ebaSTaylor Simpson
277825d6ebaSTaylor Simpson n.w[0] = ~0;
278825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678);
279825d6ebaSTaylor Simpson check32(res32, 0x12345678);
280825d6ebaSTaylor Simpson
281825d6ebaSTaylor Simpson n.d[0] = ~0LL;
282825d6ebaSTaylor Simpson res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678);
283825d6ebaSTaylor Simpson check64(res64, 0xffffffff12345678LL);
284825d6ebaSTaylor Simpson
285825d6ebaSTaylor Simpson /*
286825d6ebaSTaylor Simpson * Store double combinations
287825d6ebaSTaylor Simpson */
288825d6ebaSTaylor Simpson n.d[0] = ~0LL;
289825d6ebaSTaylor Simpson res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0);
290825d6ebaSTaylor Simpson check32(res32, 0xffffffde);
291825d6ebaSTaylor Simpson
292825d6ebaSTaylor Simpson n.d[0] = ~0LL;
293825d6ebaSTaylor Simpson res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0);
294825d6ebaSTaylor Simpson check32(res32, 0x000000de);
295825d6ebaSTaylor Simpson
296825d6ebaSTaylor Simpson n.d[0] = ~0LL;
297825d6ebaSTaylor Simpson res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0);
298825d6ebaSTaylor Simpson check32(res32, 0xffff9abc);
299825d6ebaSTaylor Simpson
300825d6ebaSTaylor Simpson n.d[0] = ~0LL;
301825d6ebaSTaylor Simpson res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0);
302825d6ebaSTaylor Simpson check32(res32, 0x00009abc);
303825d6ebaSTaylor Simpson
304825d6ebaSTaylor Simpson n.d[0] = ~0LL;
305825d6ebaSTaylor Simpson res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0);
306825d6ebaSTaylor Simpson check32(res32, 0x12345678);
307825d6ebaSTaylor Simpson
308825d6ebaSTaylor Simpson n.d[0] = ~0LL;
309825d6ebaSTaylor Simpson res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0);
310825d6ebaSTaylor Simpson check64(res64, 0x123456789abcdef0LL);
311825d6ebaSTaylor Simpson
312825d6ebaSTaylor Simpson /*
313825d6ebaSTaylor Simpson * Predicated word stores
314825d6ebaSTaylor Simpson */
315825d6ebaSTaylor Simpson n.w[0] = ~0;
316*0d57cd61STaylor Simpson res32 = cancel_sw_lb(false, &n.w[0], &n.b[0], 0x12345678);
317825d6ebaSTaylor Simpson check32(res32, 0xffffffff);
318825d6ebaSTaylor Simpson
319825d6ebaSTaylor Simpson n.w[0] = ~0;
320*0d57cd61STaylor Simpson res32 = cancel_sw_lb(true, &n.w[0], &n.b[0], 0x12345687);
321825d6ebaSTaylor Simpson check32(res32, 0xffffff87);
322825d6ebaSTaylor Simpson
323825d6ebaSTaylor Simpson /*
324825d6ebaSTaylor Simpson * Predicated double stores
325825d6ebaSTaylor Simpson */
326825d6ebaSTaylor Simpson n.d[0] = ~0LL;
327*0d57cd61STaylor Simpson res64 = cancel_sw_ld(false, &n.w[0], &n.d[0], 0x12345678);
328825d6ebaSTaylor Simpson check64(res64, 0xffffffffffffffffLL);
329825d6ebaSTaylor Simpson
330825d6ebaSTaylor Simpson n.d[0] = ~0LL;
331*0d57cd61STaylor Simpson res64 = cancel_sw_ld(true, &n.w[0], &n.d[0], 0x12345678);
332825d6ebaSTaylor Simpson check64(res64, 0xffffffff12345678LL);
333825d6ebaSTaylor Simpson
334825d6ebaSTaylor Simpson n.d[0] = ~0LL;
335*0d57cd61STaylor Simpson res64 = cancel_sw_ld(false, &n.w[1], &n.d[0], 0x12345678);
336825d6ebaSTaylor Simpson check64(res64, 0xffffffffffffffffLL);
337825d6ebaSTaylor Simpson
338825d6ebaSTaylor Simpson n.d[0] = ~0LL;
339*0d57cd61STaylor Simpson res64 = cancel_sw_ld(true, &n.w[1], &n.d[0], 0x12345678);
340825d6ebaSTaylor Simpson check64(res64, 0x12345678ffffffffLL);
341825d6ebaSTaylor Simpson
342825d6ebaSTaylor Simpson /*
343825d6ebaSTaylor Simpson * No overlap tests
344825d6ebaSTaylor Simpson */
345825d6ebaSTaylor Simpson n.w[0] = ~0;
346825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87);
347825d6ebaSTaylor Simpson check32(res32, 0xffffffff);
348825d6ebaSTaylor Simpson
349825d6ebaSTaylor Simpson n.w[0] = ~0;
350825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87);
351825d6ebaSTaylor Simpson check32(res32, 0xffffffff);
352825d6ebaSTaylor Simpson
353825d6ebaSTaylor Simpson n.w[0] = ~0;
354825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787);
355825d6ebaSTaylor Simpson check32(res32, 0xffffffff);
356825d6ebaSTaylor Simpson
357825d6ebaSTaylor Simpson n.w[0] = ~0;
358825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787);
359825d6ebaSTaylor Simpson check32(res32, 0xffffffff);
360825d6ebaSTaylor Simpson
361825d6ebaSTaylor Simpson n.d[0] = ~0LL;
362825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678);
363825d6ebaSTaylor Simpson check32(res32, 0xffffffff);
364825d6ebaSTaylor Simpson
365825d6ebaSTaylor Simpson n.d[0] = ~0LL;
366825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678);
367825d6ebaSTaylor Simpson check32(res32, 0xffffffff);
368825d6ebaSTaylor Simpson
369825d6ebaSTaylor Simpson n.d[0] = ~0LL;
370825d6ebaSTaylor Simpson n.d[1] = ~0LL;
371825d6ebaSTaylor Simpson res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL);
372825d6ebaSTaylor Simpson check64(res64, 0xffffffffffffffffLL);
373825d6ebaSTaylor Simpson
374825d6ebaSTaylor Simpson n.d[0] = ~0LL;
375825d6ebaSTaylor Simpson n.d[1] = ~0LL;
376825d6ebaSTaylor Simpson res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL);
377825d6ebaSTaylor Simpson check64(res64, 0xffffffffffffffffLL);
378825d6ebaSTaylor Simpson
379cab86deaSTaylor Simpson n.w[0] = ~0;
380*0d57cd61STaylor Simpson res32 = pred_lw_sw(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
381cab86deaSTaylor Simpson check32(res32, 0x12345678);
382cab86deaSTaylor Simpson check32(n.w[0], 0xc0ffeeda);
383cab86deaSTaylor Simpson
384cab86deaSTaylor Simpson n.w[0] = ~0;
385*0d57cd61STaylor Simpson res32 = pred_lw_sw(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
386cab86deaSTaylor Simpson check32(res32, 0xc0ffeeda);
387cab86deaSTaylor Simpson check32(n.w[0], 0xc0ffeeda);
388cab86deaSTaylor Simpson
389cab86deaSTaylor Simpson n.w[0] = ~0;
390*0d57cd61STaylor Simpson res32 = pred_lw_sw_pi(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
391cab86deaSTaylor Simpson check32(res32, 0x12345678);
392cab86deaSTaylor Simpson check32(n.w[0], 0xc0ffeeda);
393cab86deaSTaylor Simpson
394cab86deaSTaylor Simpson n.w[0] = ~0;
395*0d57cd61STaylor Simpson res32 = pred_lw_sw_pi(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
396cab86deaSTaylor Simpson check32(res32, 0xc0ffeeda);
397cab86deaSTaylor Simpson check32(n.w[0], 0xc0ffeeda);
398cab86deaSTaylor Simpson
399cab86deaSTaylor Simpson n.d[0] = ~0LL;
400*0d57cd61STaylor Simpson res64 = pred_ld_sd(false, &n.d[0], &n.d[0],
401cab86deaSTaylor Simpson 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
402cab86deaSTaylor Simpson check64(res64, 0x1234567812345678LL);
403cab86deaSTaylor Simpson check64(n.d[0], 0xc0ffeedac0ffeedaLL);
404cab86deaSTaylor Simpson
405cab86deaSTaylor Simpson n.d[0] = ~0LL;
406*0d57cd61STaylor Simpson res64 = pred_ld_sd(true, &n.d[0], &n.d[0],
407cab86deaSTaylor Simpson 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
408cab86deaSTaylor Simpson check64(res64, 0xc0ffeedac0ffeedaLL);
409cab86deaSTaylor Simpson check64(n.d[0], 0xc0ffeedac0ffeedaLL);
410cab86deaSTaylor Simpson
411cab86deaSTaylor Simpson n.d[0] = ~0LL;
412*0d57cd61STaylor Simpson res64 = pred_ld_sd_pi(false, &n.d[0], &n.d[0],
413cab86deaSTaylor Simpson 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
414cab86deaSTaylor Simpson check64(res64, 0x1234567812345678LL);
415cab86deaSTaylor Simpson check64(n.d[0], 0xc0ffeedac0ffeedaLL);
416cab86deaSTaylor Simpson
417cab86deaSTaylor Simpson n.d[0] = ~0LL;
418*0d57cd61STaylor Simpson res64 = pred_ld_sd_pi(true, &n.d[0], &n.d[0],
419cab86deaSTaylor Simpson 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
420cab86deaSTaylor Simpson check64(res64, 0xc0ffeedac0ffeedaLL);
421cab86deaSTaylor Simpson check64(n.d[0], 0xc0ffeedac0ffeedaLL);
422cab86deaSTaylor Simpson
423825d6ebaSTaylor Simpson puts(err ? "FAIL" : "PASS");
424825d6ebaSTaylor Simpson return err;
425825d6ebaSTaylor Simpson }
426