1 /* 2 * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include <stdio.h> 19 20 /* 21 * Make sure that the :mem_noshuf packet attribute is honored. 22 * This is important when the addresses overlap. 23 * The store instruction in slot 1 effectively executes first, 24 * followed by the load instruction in slot 0. 25 */ 26 27 #define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \ 28 static inline unsigned int NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \ 29 { \ 30 unsigned int ret; \ 31 asm volatile("{\n\t" \ 32 " " #ST_OP "(%1) = %3\n\t" \ 33 " %0 = " #LD_OP "(%2)\n\t" \ 34 "}:mem_noshuf\n" \ 35 : "=r"(ret) \ 36 : "r"(p), "r"(q), "r"(x) \ 37 : "memory"); \ 38 return ret; \ 39 } 40 41 #define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \ 42 static inline unsigned long long NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \ 43 { \ 44 unsigned long long ret; \ 45 asm volatile("{\n\t" \ 46 " " #ST_OP "(%1) = %3\n\t" \ 47 " %0 = " #LD_OP "(%2)\n\t" \ 48 "}:mem_noshuf\n" \ 49 : "=r"(ret) \ 50 : "r"(p), "r"(q), "r"(x) \ 51 : "memory"); \ 52 return ret; \ 53 } 54 55 /* Store byte combinations */ 56 MEM_NOSHUF32(mem_noshuf_sb_lb, signed char, signed char, memb, memb) 57 MEM_NOSHUF32(mem_noshuf_sb_lub, signed char, unsigned char, memb, memub) 58 MEM_NOSHUF32(mem_noshuf_sb_lh, signed char, signed short, memb, memh) 59 MEM_NOSHUF32(mem_noshuf_sb_luh, signed char, unsigned short, memb, memuh) 60 MEM_NOSHUF32(mem_noshuf_sb_lw, signed char, signed int, memb, memw) 61 MEM_NOSHUF64(mem_noshuf_sb_ld, signed char, signed long long, memb, memd) 62 63 /* Store half combinations */ 64 MEM_NOSHUF32(mem_noshuf_sh_lb, signed short, signed char, memh, memb) 65 MEM_NOSHUF32(mem_noshuf_sh_lub, signed short, unsigned char, memh, memub) 66 MEM_NOSHUF32(mem_noshuf_sh_lh, signed short, signed short, memh, memh) 67 MEM_NOSHUF32(mem_noshuf_sh_luh, signed short, unsigned short, memh, memuh) 68 MEM_NOSHUF32(mem_noshuf_sh_lw, signed short, signed int, memh, memw) 69 MEM_NOSHUF64(mem_noshuf_sh_ld, signed short, signed long long, memh, memd) 70 71 /* Store word combinations */ 72 MEM_NOSHUF32(mem_noshuf_sw_lb, signed int, signed char, memw, memb) 73 MEM_NOSHUF32(mem_noshuf_sw_lub, signed int, unsigned char, memw, memub) 74 MEM_NOSHUF32(mem_noshuf_sw_lh, signed int, signed short, memw, memh) 75 MEM_NOSHUF32(mem_noshuf_sw_luh, signed int, unsigned short, memw, memuh) 76 MEM_NOSHUF32(mem_noshuf_sw_lw, signed int, signed int, memw, memw) 77 MEM_NOSHUF64(mem_noshuf_sw_ld, signed int, signed long long, memw, memd) 78 79 /* Store double combinations */ 80 MEM_NOSHUF32(mem_noshuf_sd_lb, long long, signed char, memd, memb) 81 MEM_NOSHUF32(mem_noshuf_sd_lub, long long, unsigned char, memd, memub) 82 MEM_NOSHUF32(mem_noshuf_sd_lh, long long, signed short, memd, memh) 83 MEM_NOSHUF32(mem_noshuf_sd_luh, long long, unsigned short, memd, memuh) 84 MEM_NOSHUF32(mem_noshuf_sd_lw, long long, signed int, memd, memw) 85 MEM_NOSHUF64(mem_noshuf_sd_ld, long long, signed long long, memd, memd) 86 87 static inline int pred_lw_sw(int pred, int *p, int *q, int x, int y) 88 { 89 int ret; 90 asm volatile("p0 = cmp.eq(%5, #0)\n\t" 91 "%0 = %3\n\t" 92 "{\n\t" 93 " memw(%1) = %4\n\t" 94 " if (!p0) %0 = memw(%2)\n\t" 95 "}:mem_noshuf\n" 96 : "=&r"(ret) 97 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 98 : "p0", "memory"); 99 return ret; 100 } 101 102 static inline int pred_lw_sw_pi(int pred, int *p, int *q, int x, int y) 103 { 104 int ret; 105 asm volatile("p0 = cmp.eq(%5, #0)\n\t" 106 "%0 = %3\n\t" 107 "r7 = %2\n\t" 108 "{\n\t" 109 " memw(%1) = %4\n\t" 110 " if (!p0) %0 = memw(r7++#4)\n\t" 111 "}:mem_noshuf\n" 112 : "=&r"(ret) 113 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 114 : "r7", "p0", "memory"); 115 return ret; 116 } 117 118 static inline long long pred_ld_sd(int pred, long long *p, long long *q, 119 long long x, long long y) 120 { 121 unsigned long long ret; 122 asm volatile("p0 = cmp.eq(%5, #0)\n\t" 123 "%0 = %3\n\t" 124 "{\n\t" 125 " memd(%1) = %4\n\t" 126 " if (!p0) %0 = memd(%2)\n\t" 127 "}:mem_noshuf\n" 128 : "=&r"(ret) 129 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 130 : "p0", "memory"); 131 return ret; 132 } 133 134 static inline long long pred_ld_sd_pi(int pred, long long *p, long long *q, 135 long long x, long long y) 136 { 137 long long ret; 138 asm volatile("p0 = cmp.eq(%5, #0)\n\t" 139 "%0 = %3\n\t" 140 "r7 = %2\n\t" 141 "{\n\t" 142 " memd(%1) = %4\n\t" 143 " if (!p0) %0 = memd(r7++#8)\n\t" 144 "}:mem_noshuf\n" 145 : "=&r"(ret) 146 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) 147 : "r7", "p0", "memory"); 148 return ret; 149 } 150 151 static inline unsigned int cancel_sw_lb(int pred, int *p, signed char *q, int x) 152 { 153 unsigned int ret; 154 asm volatile("p0 = cmp.eq(%4, #0)\n\t" 155 "{\n\t" 156 " if (!p0) memw(%1) = %3\n\t" 157 " %0 = memb(%2)\n\t" 158 "}:mem_noshuf\n" 159 : "=r"(ret) 160 : "r"(p), "r"(q), "r"(x), "r"(pred) 161 : "p0", "memory"); 162 return ret; 163 } 164 165 static inline 166 unsigned long long cancel_sw_ld(int pred, int *p, long long *q, int x) 167 { 168 long long ret; 169 asm volatile("p0 = cmp.eq(%4, #0)\n\t" 170 "{\n\t" 171 " if (!p0) memw(%1) = %3\n\t" 172 " %0 = memd(%2)\n\t" 173 "}:mem_noshuf\n" 174 : "=r"(ret) 175 : "r"(p), "r"(q), "r"(x), "r"(pred) 176 : "p0", "memory"); 177 return ret; 178 } 179 180 typedef union { 181 signed long long d[2]; 182 unsigned long long ud[2]; 183 signed int w[4]; 184 unsigned int uw[4]; 185 signed short h[8]; 186 unsigned short uh[8]; 187 signed char b[16]; 188 unsigned char ub[16]; 189 } Memory; 190 191 int err; 192 193 #define check32(n, expect) check32_(n, expect, __LINE__) 194 195 static void check32_(int n, int expect, int line) 196 { 197 if (n != expect) { 198 printf("ERROR: 0x%08x != 0x%08x, line %d\n", n, expect, line); 199 err++; 200 } 201 } 202 203 #define check64(n, expect) check64_(n, expect, __LINE__) 204 205 static void check64_(long long n, long long expect, int line) 206 { 207 if (n != expect) { 208 printf("ERROR: 0x%08llx != 0x%08llx, line %d\n", n, expect, line); 209 err++; 210 } 211 } 212 213 int main() 214 { 215 Memory n; 216 unsigned int res32; 217 unsigned long long res64; 218 219 /* 220 * Store byte combinations 221 */ 222 n.w[0] = ~0; 223 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87); 224 check32(res32, 0xffffff87); 225 226 n.w[0] = ~0; 227 res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87); 228 check32(res32, 0x00000087); 229 230 n.w[0] = ~0; 231 res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87); 232 check32(res32, 0xffffff87); 233 234 n.w[0] = ~0; 235 res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87); 236 check32(res32, 0x0000ff87); 237 238 n.w[0] = ~0; 239 res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87); 240 check32(res32, 0xffffff87); 241 242 n.d[0] = ~0LL; 243 res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87); 244 check64(res64, 0xffffffffffffff87LL); 245 246 /* 247 * Store half combinations 248 */ 249 n.w[0] = ~0; 250 res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787); 251 check32(res32, 0xffffff87); 252 253 n.w[0] = ~0; 254 res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87); 255 check32(res32, 0x0000008f); 256 257 n.w[0] = ~0; 258 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87); 259 check32(res32, 0xffff8a87); 260 261 n.w[0] = ~0; 262 res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87); 263 check32(res32, 0x8a87); 264 265 n.w[0] = ~0; 266 res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87); 267 check32(res32, 0x8a87ffff); 268 269 n.w[0] = ~0; 270 res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87); 271 check64(res64, 0xffffffff8a87ffffLL); 272 273 /* 274 * Store word combinations 275 */ 276 n.w[0] = ~0; 277 res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687); 278 check32(res32, 0xffffff87); 279 280 n.w[0] = ~0; 281 res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687); 282 check32(res32, 0x00000087); 283 284 n.w[0] = ~0; 285 res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678); 286 check32(res32, 0xfffff678); 287 288 n.w[0] = ~0; 289 res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678); 290 check32(res32, 0x00005678); 291 292 n.w[0] = ~0; 293 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678); 294 check32(res32, 0x12345678); 295 296 n.d[0] = ~0LL; 297 res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678); 298 check64(res64, 0xffffffff12345678LL); 299 300 /* 301 * Store double combinations 302 */ 303 n.d[0] = ~0LL; 304 res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0); 305 check32(res32, 0xffffffde); 306 307 n.d[0] = ~0LL; 308 res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0); 309 check32(res32, 0x000000de); 310 311 n.d[0] = ~0LL; 312 res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0); 313 check32(res32, 0xffff9abc); 314 315 n.d[0] = ~0LL; 316 res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0); 317 check32(res32, 0x00009abc); 318 319 n.d[0] = ~0LL; 320 res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0); 321 check32(res32, 0x12345678); 322 323 n.d[0] = ~0LL; 324 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0); 325 check64(res64, 0x123456789abcdef0LL); 326 327 /* 328 * Predicated word stores 329 */ 330 n.w[0] = ~0; 331 res32 = cancel_sw_lb(0, &n.w[0], &n.b[0], 0x12345678); 332 check32(res32, 0xffffffff); 333 334 n.w[0] = ~0; 335 res32 = cancel_sw_lb(1, &n.w[0], &n.b[0], 0x12345687); 336 check32(res32, 0xffffff87); 337 338 /* 339 * Predicated double stores 340 */ 341 n.d[0] = ~0LL; 342 res64 = cancel_sw_ld(0, &n.w[0], &n.d[0], 0x12345678); 343 check64(res64, 0xffffffffffffffffLL); 344 345 n.d[0] = ~0LL; 346 res64 = cancel_sw_ld(1, &n.w[0], &n.d[0], 0x12345678); 347 check64(res64, 0xffffffff12345678LL); 348 349 n.d[0] = ~0LL; 350 res64 = cancel_sw_ld(0, &n.w[1], &n.d[0], 0x12345678); 351 check64(res64, 0xffffffffffffffffLL); 352 353 n.d[0] = ~0LL; 354 res64 = cancel_sw_ld(1, &n.w[1], &n.d[0], 0x12345678); 355 check64(res64, 0x12345678ffffffffLL); 356 357 /* 358 * No overlap tests 359 */ 360 n.w[0] = ~0; 361 res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87); 362 check32(res32, 0xffffffff); 363 364 n.w[0] = ~0; 365 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87); 366 check32(res32, 0xffffffff); 367 368 n.w[0] = ~0; 369 res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787); 370 check32(res32, 0xffffffff); 371 372 n.w[0] = ~0; 373 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787); 374 check32(res32, 0xffffffff); 375 376 n.d[0] = ~0LL; 377 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678); 378 check32(res32, 0xffffffff); 379 380 n.d[0] = ~0LL; 381 res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678); 382 check32(res32, 0xffffffff); 383 384 n.d[0] = ~0LL; 385 n.d[1] = ~0LL; 386 res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL); 387 check64(res64, 0xffffffffffffffffLL); 388 389 n.d[0] = ~0LL; 390 n.d[1] = ~0LL; 391 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL); 392 check64(res64, 0xffffffffffffffffLL); 393 394 n.w[0] = ~0; 395 res32 = pred_lw_sw(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 396 check32(res32, 0x12345678); 397 check32(n.w[0], 0xc0ffeeda); 398 399 n.w[0] = ~0; 400 res32 = pred_lw_sw(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 401 check32(res32, 0xc0ffeeda); 402 check32(n.w[0], 0xc0ffeeda); 403 404 n.w[0] = ~0; 405 res32 = pred_lw_sw_pi(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 406 check32(res32, 0x12345678); 407 check32(n.w[0], 0xc0ffeeda); 408 409 n.w[0] = ~0; 410 res32 = pred_lw_sw_pi(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); 411 check32(res32, 0xc0ffeeda); 412 check32(n.w[0], 0xc0ffeeda); 413 414 n.d[0] = ~0LL; 415 res64 = pred_ld_sd(0, &n.d[0], &n.d[0], 416 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 417 check64(res64, 0x1234567812345678LL); 418 check64(n.d[0], 0xc0ffeedac0ffeedaLL); 419 420 n.d[0] = ~0LL; 421 res64 = pred_ld_sd(1, &n.d[0], &n.d[0], 422 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 423 check64(res64, 0xc0ffeedac0ffeedaLL); 424 check64(n.d[0], 0xc0ffeedac0ffeedaLL); 425 426 n.d[0] = ~0LL; 427 res64 = pred_ld_sd_pi(0, &n.d[0], &n.d[0], 428 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 429 check64(res64, 0x1234567812345678LL); 430 check64(n.d[0], 0xc0ffeedac0ffeedaLL); 431 432 n.d[0] = ~0LL; 433 res64 = pred_ld_sd_pi(1, &n.d[0], &n.d[0], 434 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); 435 check64(res64, 0xc0ffeedac0ffeedaLL); 436 check64(n.d[0], 0xc0ffeedac0ffeedaLL); 437 438 puts(err ? "FAIL" : "PASS"); 439 return err; 440 } 441