1 /* 2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include <stdio.h> 19 #include <string.h> 20 21 typedef unsigned char uint8_t; 22 typedef unsigned short uint16_t; 23 typedef unsigned int uint32_t; 24 typedef unsigned long long uint64_t; 25 26 27 static inline void S4_storerhnew_rr(void *p, int index, uint16_t v) 28 { 29 asm volatile("{\n\t" 30 " r0 = %0\n\n" 31 " memh(%1+%2<<#2) = r0.new\n\t" 32 "}\n" 33 :: "r"(v), "r"(p), "r"(index) 34 : "r0", "memory"); 35 } 36 37 static uint32_t data; 38 static inline void *S4_storerbnew_ap(uint8_t v) 39 { 40 void *ret; 41 asm volatile("{\n\t" 42 " r0 = %1\n\n" 43 " memb(%0 = ##data) = r0.new\n\t" 44 "}\n" 45 : "=r"(ret) 46 : "r"(v) 47 : "r0", "memory"); 48 return ret; 49 } 50 51 static inline void *S4_storerhnew_ap(uint16_t v) 52 { 53 void *ret; 54 asm volatile("{\n\t" 55 " r0 = %1\n\n" 56 " memh(%0 = ##data) = r0.new\n\t" 57 "}\n" 58 : "=r"(ret) 59 : "r"(v) 60 : "r0", "memory"); 61 return ret; 62 } 63 64 static inline void *S4_storerinew_ap(uint32_t v) 65 { 66 void *ret; 67 asm volatile("{\n\t" 68 " r0 = %1\n\n" 69 " memw(%0 = ##data) = r0.new\n\t" 70 "}\n" 71 : "=r"(ret) 72 : "r"(v) 73 : "r0", "memory"); 74 return ret; 75 } 76 77 static inline void S4_storeirbt_io(void *p, int pred) 78 { 79 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 80 "if (p0) memb(%1+#4)=#27\n\t" 81 :: "r"(pred), "r"(p) 82 : "p0", "memory"); 83 } 84 85 static inline void S4_storeirbf_io(void *p, int pred) 86 { 87 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 88 "if (!p0) memb(%1+#4)=#27\n\t" 89 :: "r"(pred), "r"(p) 90 : "p0", "memory"); 91 } 92 93 static inline void S4_storeirbtnew_io(void *p, int pred) 94 { 95 asm volatile("{\n\t" 96 " p0 = cmp.eq(%0, #1)\n\t" 97 " if (p0.new) memb(%1+#4)=#27\n\t" 98 "}\n\t" 99 :: "r"(pred), "r"(p) 100 : "p0", "memory"); 101 } 102 103 static inline void S4_storeirbfnew_io(void *p, int pred) 104 { 105 asm volatile("{\n\t" 106 " p0 = cmp.eq(%0, #1)\n\t" 107 " if (!p0.new) memb(%1+#4)=#27\n\t" 108 "}\n\t" 109 :: "r"(pred), "r"(p) 110 : "p0", "memory"); 111 } 112 113 static inline void S4_storeirht_io(void *p, int pred) 114 { 115 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 116 "if (p0) memh(%1+#4)=#27\n\t" 117 :: "r"(pred), "r"(p) 118 : "p0", "memory"); 119 } 120 121 static inline void S4_storeirhf_io(void *p, int pred) 122 { 123 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 124 "if (!p0) memh(%1+#4)=#27\n\t" 125 :: "r"(pred), "r"(p) 126 : "p0", "memory"); 127 } 128 129 static inline void S4_storeirhtnew_io(void *p, int pred) 130 { 131 asm volatile("{\n\t" 132 " p0 = cmp.eq(%0, #1)\n\t" 133 " if (p0.new) memh(%1+#4)=#27\n\t" 134 "}\n\t" 135 :: "r"(pred), "r"(p) 136 : "p0", "memory"); 137 } 138 139 static inline void S4_storeirhfnew_io(void *p, int pred) 140 { 141 asm volatile("{\n\t" 142 " p0 = cmp.eq(%0, #1)\n\t" 143 " if (!p0.new) memh(%1+#4)=#27\n\t" 144 "}\n\t" 145 :: "r"(pred), "r"(p) 146 : "p0", "memory"); 147 } 148 149 static inline void S4_storeirit_io(void *p, int pred) 150 { 151 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 152 "if (p0) memw(%1+#4)=#27\n\t" 153 :: "r"(pred), "r"(p) 154 : "p0", "memory"); 155 } 156 157 static inline void S4_storeirif_io(void *p, int pred) 158 { 159 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 160 "if (!p0) memw(%1+#4)=#27\n\t" 161 :: "r"(pred), "r"(p) 162 : "p0", "memory"); 163 } 164 165 static inline void S4_storeiritnew_io(void *p, int pred) 166 { 167 asm volatile("{\n\t" 168 " p0 = cmp.eq(%0, #1)\n\t" 169 " if (p0.new) memw(%1+#4)=#27\n\t" 170 "}\n\t" 171 :: "r"(pred), "r"(p) 172 : "p0", "memory"); 173 } 174 175 static inline void S4_storeirifnew_io(void *p, int pred) 176 { 177 asm volatile("{\n\t" 178 " p0 = cmp.eq(%0, #1)\n\t" 179 " if (!p0.new) memw(%1+#4)=#27\n\t" 180 "}\n\t" 181 :: "r"(pred), "r"(p) 182 : "p0", "memory"); 183 } 184 185 static int L2_ploadrifnew_pi(void *p, int pred) 186 { 187 int result; 188 asm volatile("%0 = #31\n\t" 189 "{\n\t" 190 " p0 = cmp.eq(%2, #1)\n\t" 191 " if (!p0.new) %0 = memw(%1++#4)\n\t" 192 "}\n\t" 193 : "=&r"(result), "+r"(p) : "r"(pred) 194 : "p0"); 195 return result; 196 } 197 198 /* 199 * Test that compound-compare-jump is executed in 2 parts 200 * First we have to do all the compares in the packet and 201 * account for auto-anding. Then, we can do the predicated 202 * jump. 203 */ 204 static inline int cmpnd_cmp_jump(void) 205 { 206 int retval; 207 asm ("r5 = #7\n\t" 208 "r6 = #9\n\t" 209 "{\n\t" 210 " p0 = cmp.eq(r5, #7)\n\t" 211 " if (p0.new) jump:nt 1f\n\t" 212 " p0 = cmp.eq(r6, #7)\n\t" 213 "}\n\t" 214 "%0 = #12\n\t" 215 "jump 2f\n\t" 216 "1:\n\t" 217 "%0 = #13\n\t" 218 "2:\n\t" 219 : "=r"(retval) :: "r5", "r6", "p0"); 220 return retval; 221 } 222 223 static inline int test_clrtnew(int arg1, int old_val) 224 { 225 int ret; 226 asm volatile("r5 = %2\n\t" 227 "{\n\t" 228 "p0 = cmp.eq(%1, #1)\n\t" 229 "if (p0.new) r5=#0\n\t" 230 "}\n\t" 231 "%0 = r5\n\t" 232 : "=r"(ret) 233 : "r"(arg1), "r"(old_val) 234 : "p0", "r5"); 235 return ret; 236 } 237 238 int err; 239 240 static void check(int val, int expect) 241 { 242 if (val != expect) { 243 printf("ERROR: 0x%04x != 0x%04x\n", val, expect); 244 err++; 245 } 246 } 247 248 static void check64(long long val, long long expect) 249 { 250 if (val != expect) { 251 printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect); 252 err++; 253 } 254 } 255 256 uint32_t init[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; 257 uint32_t array[10]; 258 259 uint32_t early_exit; 260 261 /* 262 * Write this as a function because we can't guarantee the compiler will 263 * allocate a frame with just the SL2_return_tnew packet. 264 */ 265 static void SL2_return_tnew(int x); 266 asm ("SL2_return_tnew:\n\t" 267 " allocframe(#0)\n\t" 268 " r1 = #1\n\t" 269 " memw(##early_exit) = r1\n\t" 270 " {\n\t" 271 " p0 = cmp.eq(r0, #1)\n\t" 272 " if (p0.new) dealloc_return:nt\n\t" /* SL2_return_tnew */ 273 " }\n\t" 274 " r1 = #0\n\t" 275 " memw(##early_exit) = r1\n\t" 276 " dealloc_return\n\t" 277 ); 278 279 static long long creg_pair(int x, int y) 280 { 281 long long retval; 282 asm ("m0 = %1\n\t" 283 "m1 = %2\n\t" 284 "%0 = c7:6\n\t" 285 : "=r"(retval) : "r"(x), "r"(y) : "m0", "m1"); 286 return retval; 287 } 288 289 static long long decbin(long long x, long long y, int *pred) 290 { 291 long long retval; 292 asm ("%0 = decbin(%2, %3)\n\t" 293 "%1 = p0\n\t" 294 : "=r"(retval), "=r"(*pred) 295 : "r"(x), "r"(y)); 296 return retval; 297 } 298 299 /* Check that predicates are auto-and'ed in a packet */ 300 static int auto_and(void) 301 { 302 int retval; 303 asm ("r5 = #1\n\t" 304 "{\n\t" 305 " p0 = cmp.eq(r1, #1)\n\t" 306 " p0 = cmp.eq(r1, #2)\n\t" 307 "}\n\t" 308 "%0 = p0\n\t" 309 : "=r"(retval) 310 : 311 : "r5", "p0"); 312 return retval; 313 } 314 315 void test_lsbnew(void) 316 { 317 int result; 318 319 asm("r0 = #2\n\t" 320 "r1 = #5\n\t" 321 "{\n\t" 322 " p0 = r0\n\t" 323 " if (p0.new) r1 = #3\n\t" 324 "}\n\t" 325 "%0 = r1\n\t" 326 : "=r"(result) :: "r0", "r1", "p0"); 327 check(result, 5); 328 } 329 330 void test_l2fetch(void) 331 { 332 /* These don't do anything in qemu, just make sure they don't assert */ 333 asm volatile ("l2fetch(r0, r1)\n\t" 334 "l2fetch(r0, r3:2)\n\t"); 335 } 336 337 static inline int ct0(uint32_t x) 338 { 339 int res; 340 asm("%0 = ct0(%1)\n\t" : "=r"(res) : "r"(x)); 341 return res; 342 } 343 344 static inline int ct1(uint32_t x) 345 { 346 int res; 347 asm("%0 = ct1(%1)\n\t" : "=r"(res) : "r"(x)); 348 return res; 349 } 350 351 static inline int ct0p(uint64_t x) 352 { 353 int res; 354 asm("%0 = ct0(%1)\n\t" : "=r"(res) : "r"(x)); 355 return res; 356 } 357 358 static inline int ct1p(uint64_t x) 359 { 360 int res; 361 asm("%0 = ct1(%1)\n\t" : "=r"(res) : "r"(x)); 362 return res; 363 } 364 365 void test_count_trailing_zeros_ones(void) 366 { 367 check(ct0(0x0000000f), 0); 368 check(ct0(0x00000000), 32); 369 check(ct0(0x000000f0), 4); 370 371 check(ct1(0x000000f0), 0); 372 check(ct1(0x0000000f), 4); 373 check(ct1(0x00000000), 0); 374 check(ct1(0xffffffff), 32); 375 376 check(ct0p(0x000000000000000fULL), 0); 377 check(ct0p(0x0000000000000000ULL), 64); 378 check(ct0p(0x00000000000000f0ULL), 4); 379 380 check(ct1p(0x00000000000000f0ULL), 0); 381 check(ct1p(0x000000000000000fULL), 4); 382 check(ct1p(0x0000000000000000ULL), 0); 383 check(ct1p(0xffffffffffffffffULL), 64); 384 check(ct1p(0xffffffffff0fffffULL), 20); 385 check(ct1p(0xffffff0fffffffffULL), 36); 386 } 387 388 int main() 389 { 390 int res; 391 long long res64; 392 int pred; 393 394 memcpy(array, init, sizeof(array)); 395 S4_storerhnew_rr(array, 4, 0xffff); 396 check(array[4], 0xffff); 397 398 data = ~0; 399 check((uint32_t)S4_storerbnew_ap(0x12), (uint32_t)&data); 400 check(data, 0xffffff12); 401 402 data = ~0; 403 check((uint32_t)S4_storerhnew_ap(0x1234), (uint32_t)&data); 404 check(data, 0xffff1234); 405 406 data = ~0; 407 check((uint32_t)S4_storerinew_ap(0x12345678), (uint32_t)&data); 408 check(data, 0x12345678); 409 410 /* Byte */ 411 memcpy(array, init, sizeof(array)); 412 S4_storeirbt_io(&array[1], 1); 413 check(array[2], 27); 414 S4_storeirbt_io(&array[2], 0); 415 check(array[3], 3); 416 417 memcpy(array, init, sizeof(array)); 418 S4_storeirbf_io(&array[3], 0); 419 check(array[4], 27); 420 S4_storeirbf_io(&array[4], 1); 421 check(array[5], 5); 422 423 memcpy(array, init, sizeof(array)); 424 S4_storeirbtnew_io(&array[5], 1); 425 check(array[6], 27); 426 S4_storeirbtnew_io(&array[6], 0); 427 check(array[7], 7); 428 429 memcpy(array, init, sizeof(array)); 430 S4_storeirbfnew_io(&array[7], 0); 431 check(array[8], 27); 432 S4_storeirbfnew_io(&array[8], 1); 433 check(array[9], 9); 434 435 /* Half word */ 436 memcpy(array, init, sizeof(array)); 437 S4_storeirht_io(&array[1], 1); 438 check(array[2], 27); 439 S4_storeirht_io(&array[2], 0); 440 check(array[3], 3); 441 442 memcpy(array, init, sizeof(array)); 443 S4_storeirhf_io(&array[3], 0); 444 check(array[4], 27); 445 S4_storeirhf_io(&array[4], 1); 446 check(array[5], 5); 447 448 memcpy(array, init, sizeof(array)); 449 S4_storeirhtnew_io(&array[5], 1); 450 check(array[6], 27); 451 S4_storeirhtnew_io(&array[6], 0); 452 check(array[7], 7); 453 454 memcpy(array, init, sizeof(array)); 455 S4_storeirhfnew_io(&array[7], 0); 456 check(array[8], 27); 457 S4_storeirhfnew_io(&array[8], 1); 458 check(array[9], 9); 459 460 /* Word */ 461 memcpy(array, init, sizeof(array)); 462 S4_storeirit_io(&array[1], 1); 463 check(array[2], 27); 464 S4_storeirit_io(&array[2], 0); 465 check(array[3], 3); 466 467 memcpy(array, init, sizeof(array)); 468 S4_storeirif_io(&array[3], 0); 469 check(array[4], 27); 470 S4_storeirif_io(&array[4], 1); 471 check(array[5], 5); 472 473 memcpy(array, init, sizeof(array)); 474 S4_storeiritnew_io(&array[5], 1); 475 check(array[6], 27); 476 S4_storeiritnew_io(&array[6], 0); 477 check(array[7], 7); 478 479 memcpy(array, init, sizeof(array)); 480 S4_storeirifnew_io(&array[7], 0); 481 check(array[8], 27); 482 S4_storeirifnew_io(&array[8], 1); 483 check(array[9], 9); 484 485 memcpy(array, init, sizeof(array)); 486 res = L2_ploadrifnew_pi(&array[6], 0); 487 check(res, 6); 488 res = L2_ploadrifnew_pi(&array[7], 1); 489 check(res, 31); 490 491 int x = cmpnd_cmp_jump(); 492 check(x, 12); 493 494 SL2_return_tnew(0); 495 check(early_exit, 0); 496 SL2_return_tnew(1); 497 check(early_exit, 1); 498 499 long long pair = creg_pair(5, 7); 500 check((int)pair, 5); 501 check((int)(pair >> 32), 7); 502 503 res = test_clrtnew(1, 7); 504 check(res, 0); 505 res = test_clrtnew(2, 7); 506 check(res, 7); 507 508 res64 = decbin(0xf0f1f2f3f4f5f6f7LL, 0x7f6f5f4f3f2f1f0fLL, &pred); 509 check64(res64, 0x357980003700010cLL); 510 check(pred, 0); 511 512 res64 = decbin(0xfLL, 0x1bLL, &pred); 513 check64(res64, 0x78000100LL); 514 check(pred, 1); 515 516 res = auto_and(); 517 check(res, 0); 518 519 test_lsbnew(); 520 521 test_l2fetch(); 522 523 test_count_trailing_zeros_ones(); 524 525 puts(err ? "FAIL" : "PASS"); 526 return err; 527 } 528