1 /* 2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include <stdio.h> 19 #include <string.h> 20 21 #define CORE_HAS_CABAC (__HEXAGON_ARCH__ <= 71) 22 23 typedef unsigned char uint8_t; 24 typedef unsigned short uint16_t; 25 typedef unsigned int uint32_t; 26 typedef unsigned long long uint64_t; 27 28 29 static inline void S4_storerhnew_rr(void *p, int index, uint16_t v) 30 { 31 asm volatile("{\n\t" 32 " r0 = %0\n\n" 33 " memh(%1+%2<<#2) = r0.new\n\t" 34 "}\n" 35 :: "r"(v), "r"(p), "r"(index) 36 : "r0", "memory"); 37 } 38 39 static uint32_t data; 40 static inline void *S4_storerbnew_ap(uint8_t v) 41 { 42 void *ret; 43 asm volatile("{\n\t" 44 " r0 = %1\n\n" 45 " memb(%0 = ##data) = r0.new\n\t" 46 "}\n" 47 : "=r"(ret) 48 : "r"(v) 49 : "r0", "memory"); 50 return ret; 51 } 52 53 static inline void *S4_storerhnew_ap(uint16_t v) 54 { 55 void *ret; 56 asm volatile("{\n\t" 57 " r0 = %1\n\n" 58 " memh(%0 = ##data) = r0.new\n\t" 59 "}\n" 60 : "=r"(ret) 61 : "r"(v) 62 : "r0", "memory"); 63 return ret; 64 } 65 66 static inline void *S4_storerinew_ap(uint32_t v) 67 { 68 void *ret; 69 asm volatile("{\n\t" 70 " r0 = %1\n\n" 71 " memw(%0 = ##data) = r0.new\n\t" 72 "}\n" 73 : "=r"(ret) 74 : "r"(v) 75 : "r0", "memory"); 76 return ret; 77 } 78 79 static inline void S4_storeirbt_io(void *p, int pred) 80 { 81 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 82 "if (p0) memb(%1+#4)=#27\n\t" 83 :: "r"(pred), "r"(p) 84 : "p0", "memory"); 85 } 86 87 static inline void S4_storeirbf_io(void *p, int pred) 88 { 89 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 90 "if (!p0) memb(%1+#4)=#27\n\t" 91 :: "r"(pred), "r"(p) 92 : "p0", "memory"); 93 } 94 95 static inline void S4_storeirbtnew_io(void *p, int pred) 96 { 97 asm volatile("{\n\t" 98 " p0 = cmp.eq(%0, #1)\n\t" 99 " if (p0.new) memb(%1+#4)=#27\n\t" 100 "}\n\t" 101 :: "r"(pred), "r"(p) 102 : "p0", "memory"); 103 } 104 105 static inline void S4_storeirbfnew_io(void *p, int pred) 106 { 107 asm volatile("{\n\t" 108 " p0 = cmp.eq(%0, #1)\n\t" 109 " if (!p0.new) memb(%1+#4)=#27\n\t" 110 "}\n\t" 111 :: "r"(pred), "r"(p) 112 : "p0", "memory"); 113 } 114 115 static inline void S4_storeirht_io(void *p, int pred) 116 { 117 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 118 "if (p0) memh(%1+#4)=#27\n\t" 119 :: "r"(pred), "r"(p) 120 : "p0", "memory"); 121 } 122 123 static inline void S4_storeirhf_io(void *p, int pred) 124 { 125 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 126 "if (!p0) memh(%1+#4)=#27\n\t" 127 :: "r"(pred), "r"(p) 128 : "p0", "memory"); 129 } 130 131 static inline void S4_storeirhtnew_io(void *p, int pred) 132 { 133 asm volatile("{\n\t" 134 " p0 = cmp.eq(%0, #1)\n\t" 135 " if (p0.new) memh(%1+#4)=#27\n\t" 136 "}\n\t" 137 :: "r"(pred), "r"(p) 138 : "p0", "memory"); 139 } 140 141 static inline void S4_storeirhfnew_io(void *p, int pred) 142 { 143 asm volatile("{\n\t" 144 " p0 = cmp.eq(%0, #1)\n\t" 145 " if (!p0.new) memh(%1+#4)=#27\n\t" 146 "}\n\t" 147 :: "r"(pred), "r"(p) 148 : "p0", "memory"); 149 } 150 151 static inline void S4_storeirit_io(void *p, int pred) 152 { 153 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 154 "if (p0) memw(%1+#4)=#27\n\t" 155 :: "r"(pred), "r"(p) 156 : "p0", "memory"); 157 } 158 159 static inline void S4_storeirif_io(void *p, int pred) 160 { 161 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 162 "if (!p0) memw(%1+#4)=#27\n\t" 163 :: "r"(pred), "r"(p) 164 : "p0", "memory"); 165 } 166 167 static inline void S4_storeiritnew_io(void *p, int pred) 168 { 169 asm volatile("{\n\t" 170 " p0 = cmp.eq(%0, #1)\n\t" 171 " if (p0.new) memw(%1+#4)=#27\n\t" 172 "}\n\t" 173 :: "r"(pred), "r"(p) 174 : "p0", "memory"); 175 } 176 177 static inline void S4_storeirifnew_io(void *p, int pred) 178 { 179 asm volatile("{\n\t" 180 " p0 = cmp.eq(%0, #1)\n\t" 181 " if (!p0.new) memw(%1+#4)=#27\n\t" 182 "}\n\t" 183 :: "r"(pred), "r"(p) 184 : "p0", "memory"); 185 } 186 187 static int L2_ploadrifnew_pi(void *p, int pred) 188 { 189 int result; 190 asm volatile("%0 = #31\n\t" 191 "{\n\t" 192 " p0 = cmp.eq(%2, #1)\n\t" 193 " if (!p0.new) %0 = memw(%1++#4)\n\t" 194 "}\n\t" 195 : "=&r"(result), "+r"(p) : "r"(pred) 196 : "p0"); 197 return result; 198 } 199 200 /* 201 * Test that compound-compare-jump is executed in 2 parts 202 * First we have to do all the compares in the packet and 203 * account for auto-anding. Then, we can do the predicated 204 * jump. 205 */ 206 static inline int cmpnd_cmp_jump(void) 207 { 208 int retval; 209 asm ("r5 = #7\n\t" 210 "r6 = #9\n\t" 211 "{\n\t" 212 " p0 = cmp.eq(r5, #7)\n\t" 213 " if (p0.new) jump:nt 1f\n\t" 214 " p0 = cmp.eq(r6, #7)\n\t" 215 "}\n\t" 216 "%0 = #12\n\t" 217 "jump 2f\n\t" 218 "1:\n\t" 219 "%0 = #13\n\t" 220 "2:\n\t" 221 : "=r"(retval) :: "r5", "r6", "p0"); 222 return retval; 223 } 224 225 static inline int test_clrtnew(int arg1, int old_val) 226 { 227 int ret; 228 asm volatile("r5 = %2\n\t" 229 "{\n\t" 230 "p0 = cmp.eq(%1, #1)\n\t" 231 "if (p0.new) r5=#0\n\t" 232 "}\n\t" 233 "%0 = r5\n\t" 234 : "=r"(ret) 235 : "r"(arg1), "r"(old_val) 236 : "p0", "r5"); 237 return ret; 238 } 239 240 int err; 241 242 static void check(int val, int expect) 243 { 244 if (val != expect) { 245 printf("ERROR: 0x%04x != 0x%04x\n", val, expect); 246 err++; 247 } 248 } 249 250 #if CORE_HAS_CABAC 251 static void check64(long long val, long long expect) 252 { 253 if (val != expect) { 254 printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect); 255 err++; 256 } 257 } 258 #endif 259 260 uint32_t init[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; 261 uint32_t array[10]; 262 263 uint32_t early_exit; 264 265 /* 266 * Write this as a function because we can't guarantee the compiler will 267 * allocate a frame with just the SL2_return_tnew packet. 268 */ 269 static void SL2_return_tnew(int x); 270 asm ("SL2_return_tnew:\n\t" 271 " allocframe(#0)\n\t" 272 " r1 = #1\n\t" 273 " memw(##early_exit) = r1\n\t" 274 " {\n\t" 275 " p0 = cmp.eq(r0, #1)\n\t" 276 " if (p0.new) dealloc_return:nt\n\t" /* SL2_return_tnew */ 277 " }\n\t" 278 " r1 = #0\n\t" 279 " memw(##early_exit) = r1\n\t" 280 " dealloc_return\n\t" 281 ); 282 283 static long long creg_pair(int x, int y) 284 { 285 long long retval; 286 asm ("m0 = %1\n\t" 287 "m1 = %2\n\t" 288 "%0 = c7:6\n\t" 289 : "=r"(retval) : "r"(x), "r"(y) : "m0", "m1"); 290 return retval; 291 } 292 293 #if CORE_HAS_CABAC 294 static long long decbin(long long x, long long y, int *pred) 295 { 296 long long retval; 297 asm ("%0 = decbin(%2, %3)\n\t" 298 "%1 = p0\n\t" 299 : "=r"(retval), "=r"(*pred) 300 : "r"(x), "r"(y)); 301 return retval; 302 } 303 #endif 304 305 /* Check that predicates are auto-and'ed in a packet */ 306 static int auto_and(void) 307 { 308 int retval; 309 asm ("r5 = #1\n\t" 310 "{\n\t" 311 " p0 = cmp.eq(r1, #1)\n\t" 312 " p0 = cmp.eq(r1, #2)\n\t" 313 "}\n\t" 314 "%0 = p0\n\t" 315 : "=r"(retval) 316 : 317 : "r5", "p0"); 318 return retval; 319 } 320 321 void test_lsbnew(void) 322 { 323 int result; 324 325 asm("r0 = #2\n\t" 326 "r1 = #5\n\t" 327 "{\n\t" 328 " p0 = r0\n\t" 329 " if (p0.new) r1 = #3\n\t" 330 "}\n\t" 331 "%0 = r1\n\t" 332 : "=r"(result) :: "r0", "r1", "p0"); 333 check(result, 5); 334 } 335 336 void test_l2fetch(void) 337 { 338 /* These don't do anything in qemu, just make sure they don't assert */ 339 asm volatile ("l2fetch(r0, r1)\n\t" 340 "l2fetch(r0, r3:2)\n\t"); 341 } 342 343 static inline int ct0(uint32_t x) 344 { 345 int res; 346 asm("%0 = ct0(%1)\n\t" : "=r"(res) : "r"(x)); 347 return res; 348 } 349 350 static inline int ct1(uint32_t x) 351 { 352 int res; 353 asm("%0 = ct1(%1)\n\t" : "=r"(res) : "r"(x)); 354 return res; 355 } 356 357 static inline int ct0p(uint64_t x) 358 { 359 int res; 360 asm("%0 = ct0(%1)\n\t" : "=r"(res) : "r"(x)); 361 return res; 362 } 363 364 static inline int ct1p(uint64_t x) 365 { 366 int res; 367 asm("%0 = ct1(%1)\n\t" : "=r"(res) : "r"(x)); 368 return res; 369 } 370 371 void test_count_trailing_zeros_ones(void) 372 { 373 check(ct0(0x0000000f), 0); 374 check(ct0(0x00000000), 32); 375 check(ct0(0x000000f0), 4); 376 377 check(ct1(0x000000f0), 0); 378 check(ct1(0x0000000f), 4); 379 check(ct1(0x00000000), 0); 380 check(ct1(0xffffffff), 32); 381 382 check(ct0p(0x000000000000000fULL), 0); 383 check(ct0p(0x0000000000000000ULL), 64); 384 check(ct0p(0x00000000000000f0ULL), 4); 385 386 check(ct1p(0x00000000000000f0ULL), 0); 387 check(ct1p(0x000000000000000fULL), 4); 388 check(ct1p(0x0000000000000000ULL), 0); 389 check(ct1p(0xffffffffffffffffULL), 64); 390 check(ct1p(0xffffffffff0fffffULL), 20); 391 check(ct1p(0xffffff0fffffffffULL), 36); 392 } 393 394 static inline int dpmpyss_rnd_s0(int x, int y) 395 { 396 int res; 397 asm("%0 = mpy(%1, %2):rnd\n\t" : "=r"(res) : "r"(x), "r"(y)); 398 return res; 399 } 400 401 void test_dpmpyss_rnd_s0(void) 402 { 403 check(dpmpyss_rnd_s0(-1, 0x80000000), 1); 404 check(dpmpyss_rnd_s0(0, 0x80000000), 0); 405 check(dpmpyss_rnd_s0(1, 0x80000000), 0); 406 check(dpmpyss_rnd_s0(0x7fffffff, 0x80000000), 0xc0000001); 407 check(dpmpyss_rnd_s0(0x80000000, -1), 1); 408 check(dpmpyss_rnd_s0(-1, -1), 0); 409 check(dpmpyss_rnd_s0(0, -1), 0); 410 check(dpmpyss_rnd_s0(1, -1), 0); 411 check(dpmpyss_rnd_s0(0x7fffffff, -1), 0); 412 check(dpmpyss_rnd_s0(0x80000000, 0), 0); 413 check(dpmpyss_rnd_s0(-1, 0), 0); 414 check(dpmpyss_rnd_s0(0, 0), 0); 415 check(dpmpyss_rnd_s0(1, 0), 0); 416 check(dpmpyss_rnd_s0(-1, -1), 0); 417 check(dpmpyss_rnd_s0(0, -1), 0); 418 check(dpmpyss_rnd_s0(1, -1), 0); 419 check(dpmpyss_rnd_s0(0x7fffffff, 1), 0); 420 check(dpmpyss_rnd_s0(0x80000000, 0x7fffffff), 0xc0000001); 421 check(dpmpyss_rnd_s0(-1, 0x7fffffff), 0); 422 check(dpmpyss_rnd_s0(0, 0x7fffffff), 0); 423 check(dpmpyss_rnd_s0(1, 0x7fffffff), 0); 424 check(dpmpyss_rnd_s0(0x7fffffff, 0x7fffffff), 0x3fffffff); 425 } 426 427 int main() 428 { 429 int res; 430 #if CORE_HAS_CABAC 431 long long res64; 432 int pred; 433 #endif 434 435 memcpy(array, init, sizeof(array)); 436 S4_storerhnew_rr(array, 4, 0xffff); 437 check(array[4], 0xffff); 438 439 data = ~0; 440 check((uint32_t)S4_storerbnew_ap(0x12), (uint32_t)&data); 441 check(data, 0xffffff12); 442 443 data = ~0; 444 check((uint32_t)S4_storerhnew_ap(0x1234), (uint32_t)&data); 445 check(data, 0xffff1234); 446 447 data = ~0; 448 check((uint32_t)S4_storerinew_ap(0x12345678), (uint32_t)&data); 449 check(data, 0x12345678); 450 451 /* Byte */ 452 memcpy(array, init, sizeof(array)); 453 S4_storeirbt_io(&array[1], 1); 454 check(array[2], 27); 455 S4_storeirbt_io(&array[2], 0); 456 check(array[3], 3); 457 458 memcpy(array, init, sizeof(array)); 459 S4_storeirbf_io(&array[3], 0); 460 check(array[4], 27); 461 S4_storeirbf_io(&array[4], 1); 462 check(array[5], 5); 463 464 memcpy(array, init, sizeof(array)); 465 S4_storeirbtnew_io(&array[5], 1); 466 check(array[6], 27); 467 S4_storeirbtnew_io(&array[6], 0); 468 check(array[7], 7); 469 470 memcpy(array, init, sizeof(array)); 471 S4_storeirbfnew_io(&array[7], 0); 472 check(array[8], 27); 473 S4_storeirbfnew_io(&array[8], 1); 474 check(array[9], 9); 475 476 /* Half word */ 477 memcpy(array, init, sizeof(array)); 478 S4_storeirht_io(&array[1], 1); 479 check(array[2], 27); 480 S4_storeirht_io(&array[2], 0); 481 check(array[3], 3); 482 483 memcpy(array, init, sizeof(array)); 484 S4_storeirhf_io(&array[3], 0); 485 check(array[4], 27); 486 S4_storeirhf_io(&array[4], 1); 487 check(array[5], 5); 488 489 memcpy(array, init, sizeof(array)); 490 S4_storeirhtnew_io(&array[5], 1); 491 check(array[6], 27); 492 S4_storeirhtnew_io(&array[6], 0); 493 check(array[7], 7); 494 495 memcpy(array, init, sizeof(array)); 496 S4_storeirhfnew_io(&array[7], 0); 497 check(array[8], 27); 498 S4_storeirhfnew_io(&array[8], 1); 499 check(array[9], 9); 500 501 /* Word */ 502 memcpy(array, init, sizeof(array)); 503 S4_storeirit_io(&array[1], 1); 504 check(array[2], 27); 505 S4_storeirit_io(&array[2], 0); 506 check(array[3], 3); 507 508 memcpy(array, init, sizeof(array)); 509 S4_storeirif_io(&array[3], 0); 510 check(array[4], 27); 511 S4_storeirif_io(&array[4], 1); 512 check(array[5], 5); 513 514 memcpy(array, init, sizeof(array)); 515 S4_storeiritnew_io(&array[5], 1); 516 check(array[6], 27); 517 S4_storeiritnew_io(&array[6], 0); 518 check(array[7], 7); 519 520 memcpy(array, init, sizeof(array)); 521 S4_storeirifnew_io(&array[7], 0); 522 check(array[8], 27); 523 S4_storeirifnew_io(&array[8], 1); 524 check(array[9], 9); 525 526 memcpy(array, init, sizeof(array)); 527 res = L2_ploadrifnew_pi(&array[6], 0); 528 check(res, 6); 529 res = L2_ploadrifnew_pi(&array[7], 1); 530 check(res, 31); 531 532 int x = cmpnd_cmp_jump(); 533 check(x, 12); 534 535 SL2_return_tnew(0); 536 check(early_exit, 0); 537 SL2_return_tnew(1); 538 check(early_exit, 1); 539 540 long long pair = creg_pair(5, 7); 541 check((int)pair, 5); 542 check((int)(pair >> 32), 7); 543 544 res = test_clrtnew(1, 7); 545 check(res, 0); 546 res = test_clrtnew(2, 7); 547 check(res, 7); 548 549 #if CORE_HAS_CABAC 550 res64 = decbin(0xf0f1f2f3f4f5f6f7LL, 0x7f6f5f4f3f2f1f0fLL, &pred); 551 check64(res64, 0x357980003700010cLL); 552 check(pred, 0); 553 554 res64 = decbin(0xfLL, 0x1bLL, &pred); 555 check64(res64, 0x78000100LL); 556 check(pred, 1); 557 #else 558 puts("Skipping cabac tests"); 559 #endif 560 561 res = auto_and(); 562 check(res, 0); 563 564 test_lsbnew(); 565 566 test_l2fetch(); 567 568 test_count_trailing_zeros_ones(); 569 570 test_dpmpyss_rnd_s0(); 571 572 puts(err ? "FAIL" : "PASS"); 573 return err; 574 } 575