1 /* 2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include <stdio.h> 19 #include <stdint.h> 20 #include <stdbool.h> 21 22 int err; 23 24 #include "hex_test.h" 25 26 /* 27 * Make sure that the :mem_noshuf packet attribute is honored. 28 * This is important when the addresses overlap. 29 * The store instruction in slot 1 effectively executes first, 30 * followed by the load instruction in slot 0. 31 */ 32 33 #define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \ 34 static inline uint32_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \ 35 { \ 36 uint32_t ret; \ 37 asm volatile("{\n\t" \ 38 " " #ST_OP "(%1) = %3\n\t" \ 39 " %0 = " #LD_OP "(%2)\n\t" \ 40 "}:mem_noshuf\n" \ 41 : "=r"(ret) \ 42 : "r"(p), "r"(q), "r"(x) \ 43 : "memory"); \ 44 return ret; \ 45 } 46 47 #define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \ 48 static inline uint64_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \ 49 { \ 50 uint64_t ret; \ 51 asm volatile("{\n\t" \ 52 " " #ST_OP "(%1) = %3\n\t" \ 53 " %0 = " #LD_OP "(%2)\n\t" \ 54 "}:mem_noshuf\n" \ 55 : "=r"(ret) \ 56 : "r"(p), "r"(q), "r"(x) \ 57 : "memory"); \ 58 return ret; \ 59 } 60 61 /* Store byte combinations */ 62 MEM_NOSHUF32(mem_noshuf_sb_lb, int8_t, int8_t, memb, memb) 63 MEM_NOSHUF32(mem_noshuf_sb_lub, int8_t, uint8_t, memb, memub) 64 MEM_NOSHUF32(mem_noshuf_sb_lh, int8_t, int16_t, memb, memh) 65 MEM_NOSHUF32(mem_noshuf_sb_luh, int8_t, uint16_t, memb, memuh) 66 MEM_NOSHUF32(mem_noshuf_sb_lw, int8_t, int32_t, memb, memw) 67 MEM_NOSHUF64(mem_noshuf_sb_ld, int8_t, int64_t, memb, memd) 68 69 /* Store half combinations */ 70 MEM_NOSHUF32(mem_noshuf_sh_lb, int16_t, int8_t, memh, memb) 71 MEM_NOSHUF32(mem_noshuf_sh_lub, int16_t, uint8_t, memh, memub) 72 MEM_NOSHUF32(mem_noshuf_sh_lh, int16_t, int16_t, memh, memh) 73 MEM_NOSHUF32(mem_noshuf_sh_luh, int16_t, uint16_t, memh, memuh) 74 MEM_NOSHUF32(mem_noshuf_sh_lw, int16_t, int32_t, memh, memw) 75 MEM_NOSHUF64(mem_noshuf_sh_ld, int16_t, int64_t, memh, memd) 76 77 /* Store word combinations */ 78 MEM_NOSHUF32(mem_noshuf_sw_lb, int32_t, int8_t, memw, memb) 79 MEM_NOSHUF32(mem_noshuf_sw_lub, int32_t, uint8_t, memw, memub) 80 MEM_NOSHUF32(mem_noshuf_sw_lh, int32_t, int16_t, memw, memh) 81 MEM_NOSHUF32(mem_noshuf_sw_luh, int32_t, uint16_t, memw, memuh) 82 MEM_NOSHUF32(mem_noshuf_sw_lw, int32_t, int32_t, memw, memw) 83 MEM_NOSHUF64(mem_noshuf_sw_ld, int32_t, int64_t, memw, memd) 84 85 /* Store double combinations */ 86 MEM_NOSHUF32(mem_noshuf_sd_lb, int64_t, int8_t, memd, memb) 87 MEM_NOSHUF32(mem_noshuf_sd_lub, int64_t, uint8_t, memd, memub) 88 MEM_NOSHUF32(mem_noshuf_sd_lh, int64_t, int16_t, memd, memh) 89 MEM_NOSHUF32(mem_noshuf_sd_luh, int64_t, uint16_t, memd, memuh) 90 MEM_NOSHUF32(mem_noshuf_sd_lw, int64_t, int32_t, memd, memw) 91 MEM_NOSHUF64(mem_noshuf_sd_ld, int64_t, int64_t, memd, memd) 92 93 static inline int pred_lw_sw(bool pred, int32_t *p, int32_t *q, 94 int32_t x, int32_t y) 95 { 96 int ret; 97 asm volatile("p0 = cmp.eq(%5, #0)\n\t" 98 "%0 = %3\n\t" 99 "{\n\t" 100 " memw(%1) = %4\n\t" 101 " if (!p0) %0 = memw(%2)\n\t" 102 "}:mem_noshuf\n" 103 : "=&r"(ret) 104 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 105 : "p0", "memory"); 106 return ret; 107 } 108 109 static inline int pred_lw_sw_pi(bool pred, int32_t *p, int32_t *q, 110 int32_t x, int32_t y) 111 { 112 int ret; 113 asm volatile("p0 = cmp.eq(%5, #0)\n\t" 114 "%0 = %3\n\t" 115 "r7 = %2\n\t" 116 "{\n\t" 117 " memw(%1) = %4\n\t" 118 " if (!p0) %0 = memw(r7++#4)\n\t" 119 "}:mem_noshuf\n" 120 : "=&r"(ret) 121 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 122 : "r7", "p0", "memory"); 123 return ret; 124 } 125 126 static inline int64_t pred_ld_sd(bool pred, int64_t *p, int64_t *q, 127 int64_t x, int64_t y) 128 { 129 int64_t ret; 130 asm volatile("p0 = cmp.eq(%5, #0)\n\t" 131 "%0 = %3\n\t" 132 "{\n\t" 133 " memd(%1) = %4\n\t" 134 " if (!p0) %0 = memd(%2)\n\t" 135 "}:mem_noshuf\n" 136 : "=&r"(ret) 137 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 138 : "p0", "memory"); 139 return ret; 140 } 141 142 static inline int64_t pred_ld_sd_pi(bool pred, int64_t *p, int64_t *q, 143 int64_t x, int64_t y) 144 { 145 int64_t ret; 146 asm volatile("p0 = cmp.eq(%5, #0)\n\t" 147 "%0 = %3\n\t" 148 "r7 = %2\n\t" 149 "{\n\t" 150 " memd(%1) = %4\n\t" 151 " if (!p0) %0 = memd(r7++#8)\n\t" 152 "}:mem_noshuf\n" 153 : "=&r"(ret) 154 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 155 : "r7", "p0", "memory"); 156 return ret; 157 } 158 159 static inline int32_t cancel_sw_lb(bool pred, int32_t *p, int8_t *q, int32_t x) 160 { 161 int32_t ret; 162 asm volatile("p0 = cmp.eq(%4, #0)\n\t" 163 "{\n\t" 164 " if (!p0) memw(%1) = %3\n\t" 165 " %0 = memb(%2)\n\t" 166 "}:mem_noshuf\n" 167 : "=r"(ret) 168 : "r"(p), "r"(q), "r"(x), "r"(pred) 169 : "p0", "memory"); 170 return ret; 171 } 172 173 static inline int64_t cancel_sw_ld(bool pred, int32_t *p, int64_t *q, int32_t x) 174 { 175 int64_t ret; 176 asm volatile("p0 = cmp.eq(%4, #0)\n\t" 177 "{\n\t" 178 " if (!p0) memw(%1) = %3\n\t" 179 " %0 = memd(%2)\n\t" 180 "}:mem_noshuf\n" 181 : "=r"(ret) 182 : "r"(p), "r"(q), "r"(x), "r"(pred) 183 : "p0", "memory"); 184 return ret; 185 } 186 187 typedef union { 188 int64_t d[2]; 189 uint64_t ud[2]; 190 int32_t w[4]; 191 uint32_t uw[4]; 192 int16_t h[8]; 193 uint16_t uh[8]; 194 int8_t b[16]; 195 uint8_t ub[16]; 196 } Memory; 197 198 int main() 199 { 200 Memory n; 201 uint32_t res32; 202 uint64_t res64; 203 204 /* 205 * Store byte combinations 206 */ 207 n.w[0] = ~0; 208 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87); 209 check32(res32, 0xffffff87); 210 211 n.w[0] = ~0; 212 res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87); 213 check32(res32, 0x00000087); 214 215 n.w[0] = ~0; 216 res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87); 217 check32(res32, 0xffffff87); 218 219 n.w[0] = ~0; 220 res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87); 221 check32(res32, 0x0000ff87); 222 223 n.w[0] = ~0; 224 res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87); 225 check32(res32, 0xffffff87); 226 227 n.d[0] = ~0LL; 228 res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87); 229 check64(res64, 0xffffffffffffff87LL); 230 231 /* 232 * Store half combinations 233 */ 234 n.w[0] = ~0; 235 res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787); 236 check32(res32, 0xffffff87); 237 238 n.w[0] = ~0; 239 res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87); 240 check32(res32, 0x0000008f); 241 242 n.w[0] = ~0; 243 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87); 244 check32(res32, 0xffff8a87); 245 246 n.w[0] = ~0; 247 res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87); 248 check32(res32, 0x8a87); 249 250 n.w[0] = ~0; 251 res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87); 252 check32(res32, 0x8a87ffff); 253 254 n.w[0] = ~0; 255 res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87); 256 check64(res64, 0xffffffff8a87ffffLL); 257 258 /* 259 * Store word combinations 260 */ 261 n.w[0] = ~0; 262 res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687); 263 check32(res32, 0xffffff87); 264 265 n.w[0] = ~0; 266 res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687); 267 check32(res32, 0x00000087); 268 269 n.w[0] = ~0; 270 res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678); 271 check32(res32, 0xfffff678); 272 273 n.w[0] = ~0; 274 res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678); 275 check32(res32, 0x00005678); 276 277 n.w[0] = ~0; 278 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678); 279 check32(res32, 0x12345678); 280 281 n.d[0] = ~0LL; 282 res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678); 283 check64(res64, 0xffffffff12345678LL); 284 285 /* 286 * Store double combinations 287 */ 288 n.d[0] = ~0LL; 289 res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0); 290 check32(res32, 0xffffffde); 291 292 n.d[0] = ~0LL; 293 res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0); 294 check32(res32, 0x000000de); 295 296 n.d[0] = ~0LL; 297 res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0); 298 check32(res32, 0xffff9abc); 299 300 n.d[0] = ~0LL; 301 res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0); 302 check32(res32, 0x00009abc); 303 304 n.d[0] = ~0LL; 305 res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0); 306 check32(res32, 0x12345678); 307 308 n.d[0] = ~0LL; 309 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0); 310 check64(res64, 0x123456789abcdef0LL); 311 312 /* 313 * Predicated word stores 314 */ 315 n.w[0] = ~0; 316 res32 = cancel_sw_lb(false, &n.w[0], &n.b[0], 0x12345678); 317 check32(res32, 0xffffffff); 318 319 n.w[0] = ~0; 320 res32 = cancel_sw_lb(true, &n.w[0], &n.b[0], 0x12345687); 321 check32(res32, 0xffffff87); 322 323 /* 324 * Predicated double stores 325 */ 326 n.d[0] = ~0LL; 327 res64 = cancel_sw_ld(false, &n.w[0], &n.d[0], 0x12345678); 328 check64(res64, 0xffffffffffffffffLL); 329 330 n.d[0] = ~0LL; 331 res64 = cancel_sw_ld(true, &n.w[0], &n.d[0], 0x12345678); 332 check64(res64, 0xffffffff12345678LL); 333 334 n.d[0] = ~0LL; 335 res64 = cancel_sw_ld(false, &n.w[1], &n.d[0], 0x12345678); 336 check64(res64, 0xffffffffffffffffLL); 337 338 n.d[0] = ~0LL; 339 res64 = cancel_sw_ld(true, &n.w[1], &n.d[0], 0x12345678); 340 check64(res64, 0x12345678ffffffffLL); 341 342 /* 343 * No overlap tests 344 */ 345 n.w[0] = ~0; 346 res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87); 347 check32(res32, 0xffffffff); 348 349 n.w[0] = ~0; 350 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87); 351 check32(res32, 0xffffffff); 352 353 n.w[0] = ~0; 354 res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787); 355 check32(res32, 0xffffffff); 356 357 n.w[0] = ~0; 358 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787); 359 check32(res32, 0xffffffff); 360 361 n.d[0] = ~0LL; 362 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678); 363 check32(res32, 0xffffffff); 364 365 n.d[0] = ~0LL; 366 res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678); 367 check32(res32, 0xffffffff); 368 369 n.d[0] = ~0LL; 370 n.d[1] = ~0LL; 371 res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL); 372 check64(res64, 0xffffffffffffffffLL); 373 374 n.d[0] = ~0LL; 375 n.d[1] = ~0LL; 376 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL); 377 check64(res64, 0xffffffffffffffffLL); 378 379 n.w[0] = ~0; 380 res32 = pred_lw_sw(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 381 check32(res32, 0x12345678); 382 check32(n.w[0], 0xc0ffeeda); 383 384 n.w[0] = ~0; 385 res32 = pred_lw_sw(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 386 check32(res32, 0xc0ffeeda); 387 check32(n.w[0], 0xc0ffeeda); 388 389 n.w[0] = ~0; 390 res32 = pred_lw_sw_pi(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 391 check32(res32, 0x12345678); 392 check32(n.w[0], 0xc0ffeeda); 393 394 n.w[0] = ~0; 395 res32 = pred_lw_sw_pi(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 396 check32(res32, 0xc0ffeeda); 397 check32(n.w[0], 0xc0ffeeda); 398 399 n.d[0] = ~0LL; 400 res64 = pred_ld_sd(false, &n.d[0], &n.d[0], 401 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 402 check64(res64, 0x1234567812345678LL); 403 check64(n.d[0], 0xc0ffeedac0ffeedaLL); 404 405 n.d[0] = ~0LL; 406 res64 = pred_ld_sd(true, &n.d[0], &n.d[0], 407 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 408 check64(res64, 0xc0ffeedac0ffeedaLL); 409 check64(n.d[0], 0xc0ffeedac0ffeedaLL); 410 411 n.d[0] = ~0LL; 412 res64 = pred_ld_sd_pi(false, &n.d[0], &n.d[0], 413 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 414 check64(res64, 0x1234567812345678LL); 415 check64(n.d[0], 0xc0ffeedac0ffeedaLL); 416 417 n.d[0] = ~0LL; 418 res64 = pred_ld_sd_pi(true, &n.d[0], &n.d[0], 419 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 420 check64(res64, 0xc0ffeedac0ffeedaLL); 421 check64(n.d[0], 0xc0ffeedac0ffeedaLL); 422 423 puts(err ? "FAIL" : "PASS"); 424 return err; 425 } 426