1825d6ebaSTaylor Simpson /* 2*0d57cd61STaylor Simpson * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. 3825d6ebaSTaylor Simpson * 4825d6ebaSTaylor Simpson * This program is free software; you can redistribute it and/or modify 5825d6ebaSTaylor Simpson * it under the terms of the GNU General Public License as published by 6825d6ebaSTaylor Simpson * the Free Software Foundation; either version 2 of the License, or 7825d6ebaSTaylor Simpson * (at your option) any later version. 8825d6ebaSTaylor Simpson * 9825d6ebaSTaylor Simpson * This program is distributed in the hope that it will be useful, 10825d6ebaSTaylor Simpson * but WITHOUT ANY WARRANTY; without even the implied warranty of 11825d6ebaSTaylor Simpson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12825d6ebaSTaylor Simpson * GNU General Public License for more details. 13825d6ebaSTaylor Simpson * 14825d6ebaSTaylor Simpson * You should have received a copy of the GNU General Public License 15825d6ebaSTaylor Simpson * along with this program; if not, see <http://www.gnu.org/licenses/>. 16825d6ebaSTaylor Simpson */ 17825d6ebaSTaylor Simpson 18825d6ebaSTaylor Simpson #include <stdio.h> 19*0d57cd61STaylor Simpson #include <stdint.h> 20*0d57cd61STaylor Simpson #include <stdbool.h> 21*0d57cd61STaylor Simpson 22*0d57cd61STaylor Simpson int err; 23*0d57cd61STaylor Simpson 24*0d57cd61STaylor Simpson #include "hex_test.h" 25825d6ebaSTaylor Simpson 26825d6ebaSTaylor Simpson /* 27825d6ebaSTaylor Simpson * Make sure that the :mem_noshuf packet attribute is honored. 28825d6ebaSTaylor Simpson * This is important when the addresses overlap. 29825d6ebaSTaylor Simpson * The store instruction in slot 1 effectively executes first, 30825d6ebaSTaylor Simpson * followed by the load instruction in slot 0. 31825d6ebaSTaylor Simpson */ 32825d6ebaSTaylor Simpson 33825d6ebaSTaylor Simpson #define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \ 34*0d57cd61STaylor Simpson static inline uint32_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \ 35825d6ebaSTaylor Simpson { \ 36*0d57cd61STaylor Simpson uint32_t ret; \ 37825d6ebaSTaylor Simpson asm volatile("{\n\t" \ 38825d6ebaSTaylor Simpson " " #ST_OP "(%1) = %3\n\t" \ 39825d6ebaSTaylor Simpson " %0 = " #LD_OP "(%2)\n\t" \ 40825d6ebaSTaylor Simpson "}:mem_noshuf\n" \ 41825d6ebaSTaylor Simpson : "=r"(ret) \ 42825d6ebaSTaylor Simpson : "r"(p), "r"(q), "r"(x) \ 43825d6ebaSTaylor Simpson : "memory"); \ 44825d6ebaSTaylor Simpson return ret; \ 45825d6ebaSTaylor Simpson } 46825d6ebaSTaylor Simpson 47825d6ebaSTaylor Simpson #define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \ 48*0d57cd61STaylor Simpson static inline uint64_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \ 49825d6ebaSTaylor Simpson { \ 50*0d57cd61STaylor Simpson uint64_t ret; \ 51825d6ebaSTaylor Simpson asm volatile("{\n\t" \ 52825d6ebaSTaylor Simpson " " #ST_OP "(%1) = %3\n\t" \ 53825d6ebaSTaylor Simpson " %0 = " #LD_OP "(%2)\n\t" \ 54825d6ebaSTaylor Simpson "}:mem_noshuf\n" \ 55825d6ebaSTaylor Simpson : "=r"(ret) \ 56825d6ebaSTaylor Simpson : "r"(p), "r"(q), "r"(x) \ 57825d6ebaSTaylor Simpson : "memory"); \ 58825d6ebaSTaylor Simpson return ret; \ 59825d6ebaSTaylor Simpson } 60825d6ebaSTaylor Simpson 61825d6ebaSTaylor Simpson /* Store byte combinations */ 62*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lb, int8_t, int8_t, memb, memb) 63*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lub, int8_t, uint8_t, memb, memub) 64*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lh, int8_t, int16_t, memb, memh) 65*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_luh, int8_t, uint16_t, memb, memuh) 66*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sb_lw, int8_t, int32_t, memb, memw) 67*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sb_ld, int8_t, int64_t, memb, memd) 68825d6ebaSTaylor Simpson 69825d6ebaSTaylor Simpson /* Store half combinations */ 70*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lb, int16_t, int8_t, memh, memb) 71*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lub, int16_t, uint8_t, memh, memub) 72*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lh, int16_t, int16_t, memh, memh) 73*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_luh, int16_t, uint16_t, memh, memuh) 74*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sh_lw, int16_t, int32_t, memh, memw) 75*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sh_ld, int16_t, int64_t, memh, memd) 76825d6ebaSTaylor Simpson 77825d6ebaSTaylor Simpson /* Store word combinations */ 78*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lb, int32_t, int8_t, memw, memb) 79*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lub, int32_t, uint8_t, memw, memub) 80*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lh, int32_t, int16_t, memw, memh) 81*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_luh, int32_t, uint16_t, memw, memuh) 82*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sw_lw, int32_t, int32_t, memw, memw) 83*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sw_ld, int32_t, int64_t, memw, memd) 84825d6ebaSTaylor Simpson 85825d6ebaSTaylor Simpson /* Store double combinations */ 86*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lb, int64_t, int8_t, memd, memb) 87*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lub, int64_t, uint8_t, memd, memub) 88*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lh, int64_t, int16_t, memd, memh) 89*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_luh, int64_t, uint16_t, memd, memuh) 90*0d57cd61STaylor Simpson MEM_NOSHUF32(mem_noshuf_sd_lw, int64_t, int32_t, memd, memw) 91*0d57cd61STaylor Simpson MEM_NOSHUF64(mem_noshuf_sd_ld, int64_t, int64_t, memd, memd) 92825d6ebaSTaylor Simpson 93*0d57cd61STaylor Simpson static inline int pred_lw_sw(bool pred, int32_t *p, int32_t *q, 94*0d57cd61STaylor Simpson int32_t x, int32_t y) 95cab86deaSTaylor Simpson { 96cab86deaSTaylor Simpson int ret; 97cab86deaSTaylor Simpson asm volatile("p0 = cmp.eq(%5, #0)\n\t" 98cab86deaSTaylor Simpson "%0 = %3\n\t" 99cab86deaSTaylor Simpson "{\n\t" 100cab86deaSTaylor Simpson " memw(%1) = %4\n\t" 101cab86deaSTaylor Simpson " if (!p0) %0 = memw(%2)\n\t" 102cab86deaSTaylor Simpson "}:mem_noshuf\n" 103cab86deaSTaylor Simpson : "=&r"(ret) 104cab86deaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 105cab86deaSTaylor Simpson : "p0", "memory"); 106cab86deaSTaylor Simpson return ret; 107cab86deaSTaylor Simpson } 108cab86deaSTaylor Simpson 109*0d57cd61STaylor Simpson static inline int pred_lw_sw_pi(bool pred, int32_t *p, int32_t *q, 110*0d57cd61STaylor Simpson int32_t x, int32_t y) 111cab86deaSTaylor Simpson { 112cab86deaSTaylor Simpson int ret; 113cab86deaSTaylor Simpson asm volatile("p0 = cmp.eq(%5, #0)\n\t" 114cab86deaSTaylor Simpson "%0 = %3\n\t" 115cab86deaSTaylor Simpson "r7 = %2\n\t" 116cab86deaSTaylor Simpson "{\n\t" 117cab86deaSTaylor Simpson " memw(%1) = %4\n\t" 118cab86deaSTaylor Simpson " if (!p0) %0 = memw(r7++#4)\n\t" 119cab86deaSTaylor Simpson "}:mem_noshuf\n" 120cab86deaSTaylor Simpson : "=&r"(ret) 121cab86deaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 122cab86deaSTaylor Simpson : "r7", "p0", "memory"); 123cab86deaSTaylor Simpson return ret; 124cab86deaSTaylor Simpson } 125cab86deaSTaylor Simpson 126*0d57cd61STaylor Simpson static inline int64_t pred_ld_sd(bool pred, int64_t *p, int64_t *q, 127*0d57cd61STaylor Simpson int64_t x, int64_t y) 128cab86deaSTaylor Simpson { 129*0d57cd61STaylor Simpson int64_t ret; 130cab86deaSTaylor Simpson asm volatile("p0 = cmp.eq(%5, #0)\n\t" 131cab86deaSTaylor Simpson "%0 = %3\n\t" 132cab86deaSTaylor Simpson "{\n\t" 133cab86deaSTaylor Simpson " memd(%1) = %4\n\t" 134cab86deaSTaylor Simpson " if (!p0) %0 = memd(%2)\n\t" 135cab86deaSTaylor Simpson "}:mem_noshuf\n" 136cab86deaSTaylor Simpson : "=&r"(ret) 137cab86deaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 138cab86deaSTaylor Simpson : "p0", "memory"); 139cab86deaSTaylor Simpson return ret; 140cab86deaSTaylor Simpson } 141cab86deaSTaylor Simpson 142*0d57cd61STaylor Simpson static inline int64_t pred_ld_sd_pi(bool pred, int64_t *p, int64_t *q, 143*0d57cd61STaylor Simpson int64_t x, int64_t y) 144cab86deaSTaylor Simpson { 145*0d57cd61STaylor Simpson int64_t ret; 146cab86deaSTaylor Simpson asm volatile("p0 = cmp.eq(%5, #0)\n\t" 147cab86deaSTaylor Simpson "%0 = %3\n\t" 148cab86deaSTaylor Simpson "r7 = %2\n\t" 149cab86deaSTaylor Simpson "{\n\t" 150cab86deaSTaylor Simpson " memd(%1) = %4\n\t" 151cab86deaSTaylor Simpson " if (!p0) %0 = memd(r7++#8)\n\t" 152cab86deaSTaylor Simpson "}:mem_noshuf\n" 153cab86deaSTaylor Simpson : "=&r"(ret) 154cab86deaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 155eaee3b6fSMukilan Thiyagarajan : "r7", "p0", "memory"); 156cab86deaSTaylor Simpson return ret; 157cab86deaSTaylor Simpson } 158cab86deaSTaylor Simpson 159*0d57cd61STaylor Simpson static inline int32_t cancel_sw_lb(bool pred, int32_t *p, int8_t *q, int32_t x) 160825d6ebaSTaylor Simpson { 161*0d57cd61STaylor Simpson int32_t ret; 162825d6ebaSTaylor Simpson asm volatile("p0 = cmp.eq(%4, #0)\n\t" 163825d6ebaSTaylor Simpson "{\n\t" 164825d6ebaSTaylor Simpson " if (!p0) memw(%1) = %3\n\t" 165825d6ebaSTaylor Simpson " %0 = memb(%2)\n\t" 166825d6ebaSTaylor Simpson "}:mem_noshuf\n" 167825d6ebaSTaylor Simpson : "=r"(ret) 168825d6ebaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(pred) 169825d6ebaSTaylor Simpson : "p0", "memory"); 170825d6ebaSTaylor Simpson return ret; 171825d6ebaSTaylor Simpson } 172825d6ebaSTaylor Simpson 173*0d57cd61STaylor Simpson static inline int64_t cancel_sw_ld(bool pred, int32_t *p, int64_t *q, int32_t x) 174825d6ebaSTaylor Simpson { 175*0d57cd61STaylor Simpson int64_t ret; 176825d6ebaSTaylor Simpson asm volatile("p0 = cmp.eq(%4, #0)\n\t" 177825d6ebaSTaylor Simpson "{\n\t" 178825d6ebaSTaylor Simpson " if (!p0) memw(%1) = %3\n\t" 179825d6ebaSTaylor Simpson " %0 = memd(%2)\n\t" 180825d6ebaSTaylor Simpson "}:mem_noshuf\n" 181825d6ebaSTaylor Simpson : "=r"(ret) 182825d6ebaSTaylor Simpson : "r"(p), "r"(q), "r"(x), "r"(pred) 183825d6ebaSTaylor Simpson : "p0", "memory"); 184825d6ebaSTaylor Simpson return ret; 185825d6ebaSTaylor Simpson } 186825d6ebaSTaylor Simpson 187825d6ebaSTaylor Simpson typedef union { 188*0d57cd61STaylor Simpson int64_t d[2]; 189*0d57cd61STaylor Simpson uint64_t ud[2]; 190*0d57cd61STaylor Simpson int32_t w[4]; 191*0d57cd61STaylor Simpson uint32_t uw[4]; 192*0d57cd61STaylor Simpson int16_t h[8]; 193*0d57cd61STaylor Simpson uint16_t uh[8]; 194*0d57cd61STaylor Simpson int8_t b[16]; 195*0d57cd61STaylor Simpson uint8_t ub[16]; 196825d6ebaSTaylor Simpson } Memory; 197825d6ebaSTaylor Simpson 198825d6ebaSTaylor Simpson int main() 199825d6ebaSTaylor Simpson { 200825d6ebaSTaylor Simpson Memory n; 201*0d57cd61STaylor Simpson uint32_t res32; 202*0d57cd61STaylor Simpson uint64_t res64; 203825d6ebaSTaylor Simpson 204825d6ebaSTaylor Simpson /* 205825d6ebaSTaylor Simpson * Store byte combinations 206825d6ebaSTaylor Simpson */ 207825d6ebaSTaylor Simpson n.w[0] = ~0; 208825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87); 209825d6ebaSTaylor Simpson check32(res32, 0xffffff87); 210825d6ebaSTaylor Simpson 211825d6ebaSTaylor Simpson n.w[0] = ~0; 212825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87); 213825d6ebaSTaylor Simpson check32(res32, 0x00000087); 214825d6ebaSTaylor Simpson 215825d6ebaSTaylor Simpson n.w[0] = ~0; 216825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87); 217825d6ebaSTaylor Simpson check32(res32, 0xffffff87); 218825d6ebaSTaylor Simpson 219825d6ebaSTaylor Simpson n.w[0] = ~0; 220825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87); 221825d6ebaSTaylor Simpson check32(res32, 0x0000ff87); 222825d6ebaSTaylor Simpson 223825d6ebaSTaylor Simpson n.w[0] = ~0; 224825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87); 225825d6ebaSTaylor Simpson check32(res32, 0xffffff87); 226825d6ebaSTaylor Simpson 227825d6ebaSTaylor Simpson n.d[0] = ~0LL; 228825d6ebaSTaylor Simpson res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87); 229825d6ebaSTaylor Simpson check64(res64, 0xffffffffffffff87LL); 230825d6ebaSTaylor Simpson 231825d6ebaSTaylor Simpson /* 232825d6ebaSTaylor Simpson * Store half combinations 233825d6ebaSTaylor Simpson */ 234825d6ebaSTaylor Simpson n.w[0] = ~0; 235825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787); 236825d6ebaSTaylor Simpson check32(res32, 0xffffff87); 237825d6ebaSTaylor Simpson 238825d6ebaSTaylor Simpson n.w[0] = ~0; 239825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87); 240825d6ebaSTaylor Simpson check32(res32, 0x0000008f); 241825d6ebaSTaylor Simpson 242825d6ebaSTaylor Simpson n.w[0] = ~0; 243825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87); 244825d6ebaSTaylor Simpson check32(res32, 0xffff8a87); 245825d6ebaSTaylor Simpson 246825d6ebaSTaylor Simpson n.w[0] = ~0; 247825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87); 248825d6ebaSTaylor Simpson check32(res32, 0x8a87); 249825d6ebaSTaylor Simpson 250825d6ebaSTaylor Simpson n.w[0] = ~0; 251825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87); 252825d6ebaSTaylor Simpson check32(res32, 0x8a87ffff); 253825d6ebaSTaylor Simpson 254825d6ebaSTaylor Simpson n.w[0] = ~0; 255825d6ebaSTaylor Simpson res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87); 256825d6ebaSTaylor Simpson check64(res64, 0xffffffff8a87ffffLL); 257825d6ebaSTaylor Simpson 258825d6ebaSTaylor Simpson /* 259825d6ebaSTaylor Simpson * Store word combinations 260825d6ebaSTaylor Simpson */ 261825d6ebaSTaylor Simpson n.w[0] = ~0; 262825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687); 263825d6ebaSTaylor Simpson check32(res32, 0xffffff87); 264825d6ebaSTaylor Simpson 265825d6ebaSTaylor Simpson n.w[0] = ~0; 266825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687); 267825d6ebaSTaylor Simpson check32(res32, 0x00000087); 268825d6ebaSTaylor Simpson 269825d6ebaSTaylor Simpson n.w[0] = ~0; 270825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678); 271825d6ebaSTaylor Simpson check32(res32, 0xfffff678); 272825d6ebaSTaylor Simpson 273825d6ebaSTaylor Simpson n.w[0] = ~0; 274825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678); 275825d6ebaSTaylor Simpson check32(res32, 0x00005678); 276825d6ebaSTaylor Simpson 277825d6ebaSTaylor Simpson n.w[0] = ~0; 278825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678); 279825d6ebaSTaylor Simpson check32(res32, 0x12345678); 280825d6ebaSTaylor Simpson 281825d6ebaSTaylor Simpson n.d[0] = ~0LL; 282825d6ebaSTaylor Simpson res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678); 283825d6ebaSTaylor Simpson check64(res64, 0xffffffff12345678LL); 284825d6ebaSTaylor Simpson 285825d6ebaSTaylor Simpson /* 286825d6ebaSTaylor Simpson * Store double combinations 287825d6ebaSTaylor Simpson */ 288825d6ebaSTaylor Simpson n.d[0] = ~0LL; 289825d6ebaSTaylor Simpson res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0); 290825d6ebaSTaylor Simpson check32(res32, 0xffffffde); 291825d6ebaSTaylor Simpson 292825d6ebaSTaylor Simpson n.d[0] = ~0LL; 293825d6ebaSTaylor Simpson res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0); 294825d6ebaSTaylor Simpson check32(res32, 0x000000de); 295825d6ebaSTaylor Simpson 296825d6ebaSTaylor Simpson n.d[0] = ~0LL; 297825d6ebaSTaylor Simpson res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0); 298825d6ebaSTaylor Simpson check32(res32, 0xffff9abc); 299825d6ebaSTaylor Simpson 300825d6ebaSTaylor Simpson n.d[0] = ~0LL; 301825d6ebaSTaylor Simpson res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0); 302825d6ebaSTaylor Simpson check32(res32, 0x00009abc); 303825d6ebaSTaylor Simpson 304825d6ebaSTaylor Simpson n.d[0] = ~0LL; 305825d6ebaSTaylor Simpson res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0); 306825d6ebaSTaylor Simpson check32(res32, 0x12345678); 307825d6ebaSTaylor Simpson 308825d6ebaSTaylor Simpson n.d[0] = ~0LL; 309825d6ebaSTaylor Simpson res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0); 310825d6ebaSTaylor Simpson check64(res64, 0x123456789abcdef0LL); 311825d6ebaSTaylor Simpson 312825d6ebaSTaylor Simpson /* 313825d6ebaSTaylor Simpson * Predicated word stores 314825d6ebaSTaylor Simpson */ 315825d6ebaSTaylor Simpson n.w[0] = ~0; 316*0d57cd61STaylor Simpson res32 = cancel_sw_lb(false, &n.w[0], &n.b[0], 0x12345678); 317825d6ebaSTaylor Simpson check32(res32, 0xffffffff); 318825d6ebaSTaylor Simpson 319825d6ebaSTaylor Simpson n.w[0] = ~0; 320*0d57cd61STaylor Simpson res32 = cancel_sw_lb(true, &n.w[0], &n.b[0], 0x12345687); 321825d6ebaSTaylor Simpson check32(res32, 0xffffff87); 322825d6ebaSTaylor Simpson 323825d6ebaSTaylor Simpson /* 324825d6ebaSTaylor Simpson * Predicated double stores 325825d6ebaSTaylor Simpson */ 326825d6ebaSTaylor Simpson n.d[0] = ~0LL; 327*0d57cd61STaylor Simpson res64 = cancel_sw_ld(false, &n.w[0], &n.d[0], 0x12345678); 328825d6ebaSTaylor Simpson check64(res64, 0xffffffffffffffffLL); 329825d6ebaSTaylor Simpson 330825d6ebaSTaylor Simpson n.d[0] = ~0LL; 331*0d57cd61STaylor Simpson res64 = cancel_sw_ld(true, &n.w[0], &n.d[0], 0x12345678); 332825d6ebaSTaylor Simpson check64(res64, 0xffffffff12345678LL); 333825d6ebaSTaylor Simpson 334825d6ebaSTaylor Simpson n.d[0] = ~0LL; 335*0d57cd61STaylor Simpson res64 = cancel_sw_ld(false, &n.w[1], &n.d[0], 0x12345678); 336825d6ebaSTaylor Simpson check64(res64, 0xffffffffffffffffLL); 337825d6ebaSTaylor Simpson 338825d6ebaSTaylor Simpson n.d[0] = ~0LL; 339*0d57cd61STaylor Simpson res64 = cancel_sw_ld(true, &n.w[1], &n.d[0], 0x12345678); 340825d6ebaSTaylor Simpson check64(res64, 0x12345678ffffffffLL); 341825d6ebaSTaylor Simpson 342825d6ebaSTaylor Simpson /* 343825d6ebaSTaylor Simpson * No overlap tests 344825d6ebaSTaylor Simpson */ 345825d6ebaSTaylor Simpson n.w[0] = ~0; 346825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87); 347825d6ebaSTaylor Simpson check32(res32, 0xffffffff); 348825d6ebaSTaylor Simpson 349825d6ebaSTaylor Simpson n.w[0] = ~0; 350825d6ebaSTaylor Simpson res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87); 351825d6ebaSTaylor Simpson check32(res32, 0xffffffff); 352825d6ebaSTaylor Simpson 353825d6ebaSTaylor Simpson n.w[0] = ~0; 354825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787); 355825d6ebaSTaylor Simpson check32(res32, 0xffffffff); 356825d6ebaSTaylor Simpson 357825d6ebaSTaylor Simpson n.w[0] = ~0; 358825d6ebaSTaylor Simpson res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787); 359825d6ebaSTaylor Simpson check32(res32, 0xffffffff); 360825d6ebaSTaylor Simpson 361825d6ebaSTaylor Simpson n.d[0] = ~0LL; 362825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678); 363825d6ebaSTaylor Simpson check32(res32, 0xffffffff); 364825d6ebaSTaylor Simpson 365825d6ebaSTaylor Simpson n.d[0] = ~0LL; 366825d6ebaSTaylor Simpson res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678); 367825d6ebaSTaylor Simpson check32(res32, 0xffffffff); 368825d6ebaSTaylor Simpson 369825d6ebaSTaylor Simpson n.d[0] = ~0LL; 370825d6ebaSTaylor Simpson n.d[1] = ~0LL; 371825d6ebaSTaylor Simpson res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL); 372825d6ebaSTaylor Simpson check64(res64, 0xffffffffffffffffLL); 373825d6ebaSTaylor Simpson 374825d6ebaSTaylor Simpson n.d[0] = ~0LL; 375825d6ebaSTaylor Simpson n.d[1] = ~0LL; 376825d6ebaSTaylor Simpson res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL); 377825d6ebaSTaylor Simpson check64(res64, 0xffffffffffffffffLL); 378825d6ebaSTaylor Simpson 379cab86deaSTaylor Simpson n.w[0] = ~0; 380*0d57cd61STaylor Simpson res32 = pred_lw_sw(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 381cab86deaSTaylor Simpson check32(res32, 0x12345678); 382cab86deaSTaylor Simpson check32(n.w[0], 0xc0ffeeda); 383cab86deaSTaylor Simpson 384cab86deaSTaylor Simpson n.w[0] = ~0; 385*0d57cd61STaylor Simpson res32 = pred_lw_sw(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 386cab86deaSTaylor Simpson check32(res32, 0xc0ffeeda); 387cab86deaSTaylor Simpson check32(n.w[0], 0xc0ffeeda); 388cab86deaSTaylor Simpson 389cab86deaSTaylor Simpson n.w[0] = ~0; 390*0d57cd61STaylor Simpson res32 = pred_lw_sw_pi(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 391cab86deaSTaylor Simpson check32(res32, 0x12345678); 392cab86deaSTaylor Simpson check32(n.w[0], 0xc0ffeeda); 393cab86deaSTaylor Simpson 394cab86deaSTaylor Simpson n.w[0] = ~0; 395*0d57cd61STaylor Simpson res32 = pred_lw_sw_pi(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 396cab86deaSTaylor Simpson check32(res32, 0xc0ffeeda); 397cab86deaSTaylor Simpson check32(n.w[0], 0xc0ffeeda); 398cab86deaSTaylor Simpson 399cab86deaSTaylor Simpson n.d[0] = ~0LL; 400*0d57cd61STaylor Simpson res64 = pred_ld_sd(false, &n.d[0], &n.d[0], 401cab86deaSTaylor Simpson 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 402cab86deaSTaylor Simpson check64(res64, 0x1234567812345678LL); 403cab86deaSTaylor Simpson check64(n.d[0], 0xc0ffeedac0ffeedaLL); 404cab86deaSTaylor Simpson 405cab86deaSTaylor Simpson n.d[0] = ~0LL; 406*0d57cd61STaylor Simpson res64 = pred_ld_sd(true, &n.d[0], &n.d[0], 407cab86deaSTaylor Simpson 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 408cab86deaSTaylor Simpson check64(res64, 0xc0ffeedac0ffeedaLL); 409cab86deaSTaylor Simpson check64(n.d[0], 0xc0ffeedac0ffeedaLL); 410cab86deaSTaylor Simpson 411cab86deaSTaylor Simpson n.d[0] = ~0LL; 412*0d57cd61STaylor Simpson res64 = pred_ld_sd_pi(false, &n.d[0], &n.d[0], 413cab86deaSTaylor Simpson 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 414cab86deaSTaylor Simpson check64(res64, 0x1234567812345678LL); 415cab86deaSTaylor Simpson check64(n.d[0], 0xc0ffeedac0ffeedaLL); 416cab86deaSTaylor Simpson 417cab86deaSTaylor Simpson n.d[0] = ~0LL; 418*0d57cd61STaylor Simpson res64 = pred_ld_sd_pi(true, &n.d[0], &n.d[0], 419cab86deaSTaylor Simpson 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 420cab86deaSTaylor Simpson check64(res64, 0xc0ffeedac0ffeedaLL); 421cab86deaSTaylor Simpson check64(n.d[0], 0xc0ffeedac0ffeedaLL); 422cab86deaSTaylor Simpson 423825d6ebaSTaylor Simpson puts(err ? "FAIL" : "PASS"); 424825d6ebaSTaylor Simpson return err; 425825d6ebaSTaylor Simpson } 426