1 /* 2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include <stdio.h> 19 #include <stdint.h> 20 #include <stdbool.h> 21 #include <string.h> 22 23 int err; 24 25 #include "hex_test.h" 26 27 #define CORE_HAS_CABAC (__HEXAGON_ARCH__ <= 71) 28 29 static inline void S4_storerhnew_rr(void *p, int index, uint16_t v) 30 { 31 asm volatile("{\n\t" 32 " r0 = %0\n\n" 33 " memh(%1+%2<<#2) = r0.new\n\t" 34 "}\n" 35 :: "r"(v), "r"(p), "r"(index) 36 : "r0", "memory"); 37 } 38 39 static uint32_t data; 40 static inline void *S4_storerbnew_ap(uint8_t v) 41 { 42 void *ret; 43 asm volatile("{\n\t" 44 " r0 = %1\n\n" 45 " memb(%0 = ##data) = r0.new\n\t" 46 "}\n" 47 : "=r"(ret) 48 : "r"(v) 49 : "r0", "memory"); 50 return ret; 51 } 52 53 static inline void *S4_storerhnew_ap(uint16_t v) 54 { 55 void *ret; 56 asm volatile("{\n\t" 57 " r0 = %1\n\n" 58 " memh(%0 = ##data) = r0.new\n\t" 59 "}\n" 60 : "=r"(ret) 61 : "r"(v) 62 : "r0", "memory"); 63 return ret; 64 } 65 66 static inline void *S4_storerinew_ap(uint32_t v) 67 { 68 void *ret; 69 asm volatile("{\n\t" 70 " r0 = %1\n\n" 71 " memw(%0 = ##data) = r0.new\n\t" 72 "}\n" 73 : "=r"(ret) 74 : "r"(v) 75 : "r0", "memory"); 76 return ret; 77 } 78 79 static inline void S4_storeirbt_io(void *p, bool pred) 80 { 81 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 82 "if (p0) memb(%1+#4)=#27\n\t" 83 :: "r"(pred), "r"(p) 84 : "p0", "memory"); 85 } 86 87 static inline void S4_storeirbf_io(void *p, bool pred) 88 { 89 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 90 "if (!p0) memb(%1+#4)=#27\n\t" 91 :: "r"(pred), "r"(p) 92 : "p0", "memory"); 93 } 94 95 static inline void S4_storeirbtnew_io(void *p, bool pred) 96 { 97 asm volatile("{\n\t" 98 " p0 = cmp.eq(%0, #1)\n\t" 99 " if (p0.new) memb(%1+#4)=#27\n\t" 100 "}\n\t" 101 :: "r"(pred), "r"(p) 102 : "p0", "memory"); 103 } 104 105 static inline void S4_storeirbfnew_io(void *p, bool pred) 106 { 107 asm volatile("{\n\t" 108 " p0 = cmp.eq(%0, #1)\n\t" 109 " if (!p0.new) memb(%1+#4)=#27\n\t" 110 "}\n\t" 111 :: "r"(pred), "r"(p) 112 : "p0", "memory"); 113 } 114 115 static inline void S4_storeirht_io(void *p, bool pred) 116 { 117 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 118 "if (p0) memh(%1+#4)=#27\n\t" 119 :: "r"(pred), "r"(p) 120 : "p0", "memory"); 121 } 122 123 static inline void S4_storeirhf_io(void *p, bool pred) 124 { 125 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 126 "if (!p0) memh(%1+#4)=#27\n\t" 127 :: "r"(pred), "r"(p) 128 : "p0", "memory"); 129 } 130 131 static inline void S4_storeirhtnew_io(void *p, bool pred) 132 { 133 asm volatile("{\n\t" 134 " p0 = cmp.eq(%0, #1)\n\t" 135 " if (p0.new) memh(%1+#4)=#27\n\t" 136 "}\n\t" 137 :: "r"(pred), "r"(p) 138 : "p0", "memory"); 139 } 140 141 static inline void S4_storeirhfnew_io(void *p, bool pred) 142 { 143 asm volatile("{\n\t" 144 " p0 = cmp.eq(%0, #1)\n\t" 145 " if (!p0.new) memh(%1+#4)=#27\n\t" 146 "}\n\t" 147 :: "r"(pred), "r"(p) 148 : "p0", "memory"); 149 } 150 151 static inline void S4_storeirit_io(void *p, bool pred) 152 { 153 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 154 "if (p0) memw(%1+#4)=#27\n\t" 155 :: "r"(pred), "r"(p) 156 : "p0", "memory"); 157 } 158 159 static inline void S4_storeirif_io(void *p, bool pred) 160 { 161 asm volatile("p0 = cmp.eq(%0, #1)\n\t" 162 "if (!p0) memw(%1+#4)=#27\n\t" 163 :: "r"(pred), "r"(p) 164 : "p0", "memory"); 165 } 166 167 static inline void S4_storeiritnew_io(void *p, bool pred) 168 { 169 asm volatile("{\n\t" 170 " p0 = cmp.eq(%0, #1)\n\t" 171 " if (p0.new) memw(%1+#4)=#27\n\t" 172 "}\n\t" 173 :: "r"(pred), "r"(p) 174 : "p0", "memory"); 175 } 176 177 static inline void S4_storeirifnew_io(void *p, bool pred) 178 { 179 asm volatile("{\n\t" 180 " p0 = cmp.eq(%0, #1)\n\t" 181 " if (!p0.new) memw(%1+#4)=#27\n\t" 182 "}\n\t" 183 :: "r"(pred), "r"(p) 184 : "p0", "memory"); 185 } 186 187 static int32_t L2_ploadrifnew_pi(void *p, bool pred) 188 { 189 int32_t result; 190 asm volatile("%0 = #31\n\t" 191 "{\n\t" 192 " p0 = cmp.eq(%2, #1)\n\t" 193 " if (!p0.new) %0 = memw(%1++#4)\n\t" 194 "}\n\t" 195 : "=&r"(result), "+r"(p) : "r"(pred) 196 : "p0"); 197 return result; 198 } 199 200 /* 201 * Test that compound-compare-jump is executed in 2 parts 202 * First we have to do all the compares in the packet and 203 * account for auto-anding. Then, we can do the predicated 204 * jump. 205 */ 206 static inline int32_t cmpnd_cmp_jump(void) 207 { 208 int32_t retval; 209 asm ("r5 = #7\n\t" 210 "r6 = #9\n\t" 211 "{\n\t" 212 " p0 = cmp.eq(r5, #7)\n\t" 213 " if (p0.new) jump:nt 1f\n\t" 214 " p0 = cmp.eq(r6, #7)\n\t" 215 "}\n\t" 216 "%0 = #12\n\t" 217 "jump 2f\n\t" 218 "1:\n\t" 219 "%0 = #13\n\t" 220 "2:\n\t" 221 : "=r"(retval) :: "r5", "r6", "p0"); 222 return retval; 223 } 224 225 static inline int32_t test_clrtnew(int32_t arg1, int32_t old_val) 226 { 227 int32_t ret; 228 asm volatile("r5 = %2\n\t" 229 "{\n\t" 230 "p0 = cmp.eq(%1, #1)\n\t" 231 "if (p0.new) r5=#0\n\t" 232 "}\n\t" 233 "%0 = r5\n\t" 234 : "=r"(ret) 235 : "r"(arg1), "r"(old_val) 236 : "p0", "r5"); 237 return ret; 238 } 239 240 uint32_t init[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; 241 uint32_t array[10]; 242 243 bool early_exit; 244 245 /* 246 * Write this as a function because we can't guarantee the compiler will 247 * allocate a frame with just the SL2_return_tnew packet. 248 */ 249 static void SL2_return_tnew(bool pred); 250 asm ("SL2_return_tnew:\n\t" 251 " allocframe(#0)\n\t" 252 " r1 = #1\n\t" 253 " memw(##early_exit) = r1\n\t" 254 " {\n\t" 255 " p0 = cmp.eq(r0, #1)\n\t" 256 " if (p0.new) dealloc_return:nt\n\t" /* SL2_return_tnew */ 257 " }\n\t" 258 " r1 = #0\n\t" 259 " memw(##early_exit) = r1\n\t" 260 " dealloc_return\n\t" 261 ); 262 263 static int64_t creg_pair(int32_t x, int32_t y) 264 { 265 int64_t retval; 266 asm ("m0 = %1\n\t" 267 "m1 = %2\n\t" 268 "%0 = c7:6\n\t" 269 : "=r"(retval) : "r"(x), "r"(y) : "m0", "m1"); 270 return retval; 271 } 272 273 #if CORE_HAS_CABAC 274 static int64_t decbin(int64_t x, int64_t y, bool *pred) 275 { 276 int64_t retval; 277 asm ("%0 = decbin(%2, %3)\n\t" 278 "%1 = p0\n\t" 279 : "=r"(retval), "=r"(*pred) 280 : "r"(x), "r"(y)); 281 return retval; 282 } 283 #endif 284 285 /* Check that predicates are auto-and'ed in a packet */ 286 static bool auto_and(void) 287 { 288 bool retval; 289 asm ("r5 = #1\n\t" 290 "{\n\t" 291 " p0 = cmp.eq(r1, #1)\n\t" 292 " p0 = cmp.eq(r1, #2)\n\t" 293 "}\n\t" 294 "%0 = p0\n\t" 295 : "=r"(retval) 296 : 297 : "r5", "p0"); 298 return retval; 299 } 300 301 void test_lsbnew(void) 302 { 303 int32_t result; 304 305 asm("r0 = #2\n\t" 306 "r1 = #5\n\t" 307 "{\n\t" 308 " p0 = r0\n\t" 309 " if (p0.new) r1 = #3\n\t" 310 "}\n\t" 311 "%0 = r1\n\t" 312 : "=r"(result) :: "r0", "r1", "p0"); 313 check32(result, 5); 314 } 315 316 void test_l2fetch(void) 317 { 318 /* These don't do anything in qemu, just make sure they don't assert */ 319 asm volatile ("l2fetch(r0, r1)\n\t" 320 "l2fetch(r0, r3:2)\n\t"); 321 } 322 323 static inline int32_t ct0(uint32_t x) 324 { 325 int32_t res; 326 asm("%0 = ct0(%1)\n\t" : "=r"(res) : "r"(x)); 327 return res; 328 } 329 330 static inline int32_t ct1(uint32_t x) 331 { 332 int32_t res; 333 asm("%0 = ct1(%1)\n\t" : "=r"(res) : "r"(x)); 334 return res; 335 } 336 337 static inline int32_t ct0p(uint64_t x) 338 { 339 int32_t res; 340 asm("%0 = ct0(%1)\n\t" : "=r"(res) : "r"(x)); 341 return res; 342 } 343 344 static inline int32_t ct1p(uint64_t x) 345 { 346 int32_t res; 347 asm("%0 = ct1(%1)\n\t" : "=r"(res) : "r"(x)); 348 return res; 349 } 350 351 void test_count_trailing_zeros_ones(void) 352 { 353 check32(ct0(0x0000000f), 0); 354 check32(ct0(0x00000000), 32); 355 check32(ct0(0x000000f0), 4); 356 357 check32(ct1(0x000000f0), 0); 358 check32(ct1(0x0000000f), 4); 359 check32(ct1(0x00000000), 0); 360 check32(ct1(0xffffffff), 32); 361 362 check32(ct0p(0x000000000000000fULL), 0); 363 check32(ct0p(0x0000000000000000ULL), 64); 364 check32(ct0p(0x00000000000000f0ULL), 4); 365 366 check32(ct1p(0x00000000000000f0ULL), 0); 367 check32(ct1p(0x000000000000000fULL), 4); 368 check32(ct1p(0x0000000000000000ULL), 0); 369 check32(ct1p(0xffffffffffffffffULL), 64); 370 check32(ct1p(0xffffffffff0fffffULL), 20); 371 check32(ct1p(0xffffff0fffffffffULL), 36); 372 } 373 374 static inline int32_t dpmpyss_rnd_s0(int32_t x, int32_t y) 375 { 376 int32_t res; 377 asm("%0 = mpy(%1, %2):rnd\n\t" : "=r"(res) : "r"(x), "r"(y)); 378 return res; 379 } 380 381 void test_dpmpyss_rnd_s0(void) 382 { 383 check32(dpmpyss_rnd_s0(-1, 0x80000000), 1); 384 check32(dpmpyss_rnd_s0(0, 0x80000000), 0); 385 check32(dpmpyss_rnd_s0(1, 0x80000000), 0); 386 check32(dpmpyss_rnd_s0(0x7fffffff, 0x80000000), 0xc0000001); 387 check32(dpmpyss_rnd_s0(0x80000000, -1), 1); 388 check32(dpmpyss_rnd_s0(-1, -1), 0); 389 check32(dpmpyss_rnd_s0(0, -1), 0); 390 check32(dpmpyss_rnd_s0(1, -1), 0); 391 check32(dpmpyss_rnd_s0(0x7fffffff, -1), 0); 392 check32(dpmpyss_rnd_s0(0x80000000, 0), 0); 393 check32(dpmpyss_rnd_s0(-1, 0), 0); 394 check32(dpmpyss_rnd_s0(0, 0), 0); 395 check32(dpmpyss_rnd_s0(1, 0), 0); 396 check32(dpmpyss_rnd_s0(-1, -1), 0); 397 check32(dpmpyss_rnd_s0(0, -1), 0); 398 check32(dpmpyss_rnd_s0(1, -1), 0); 399 check32(dpmpyss_rnd_s0(0x7fffffff, 1), 0); 400 check32(dpmpyss_rnd_s0(0x80000000, 0x7fffffff), 0xc0000001); 401 check32(dpmpyss_rnd_s0(-1, 0x7fffffff), 0); 402 check32(dpmpyss_rnd_s0(0, 0x7fffffff), 0); 403 check32(dpmpyss_rnd_s0(1, 0x7fffffff), 0); 404 check32(dpmpyss_rnd_s0(0x7fffffff, 0x7fffffff), 0x3fffffff); 405 } 406 407 int main() 408 { 409 int32_t res; 410 int64_t res64; 411 bool pred; 412 413 memcpy(array, init, sizeof(array)); 414 S4_storerhnew_rr(array, 4, 0xffff); 415 check32(array[4], 0xffff); 416 417 data = ~0; 418 checkp(S4_storerbnew_ap(0x12), &data); 419 check32(data, 0xffffff12); 420 421 data = ~0; 422 checkp(S4_storerhnew_ap(0x1234), &data); 423 check32(data, 0xffff1234); 424 425 data = ~0; 426 checkp(S4_storerinew_ap(0x12345678), &data); 427 check32(data, 0x12345678); 428 429 /* Byte */ 430 memcpy(array, init, sizeof(array)); 431 S4_storeirbt_io(&array[1], true); 432 check32(array[2], 27); 433 S4_storeirbt_io(&array[2], false); 434 check32(array[3], 3); 435 436 memcpy(array, init, sizeof(array)); 437 S4_storeirbf_io(&array[3], false); 438 check32(array[4], 27); 439 S4_storeirbf_io(&array[4], true); 440 check32(array[5], 5); 441 442 memcpy(array, init, sizeof(array)); 443 S4_storeirbtnew_io(&array[5], true); 444 check32(array[6], 27); 445 S4_storeirbtnew_io(&array[6], false); 446 check32(array[7], 7); 447 448 memcpy(array, init, sizeof(array)); 449 S4_storeirbfnew_io(&array[7], false); 450 check32(array[8], 27); 451 S4_storeirbfnew_io(&array[8], true); 452 check32(array[9], 9); 453 454 /* Half word */ 455 memcpy(array, init, sizeof(array)); 456 S4_storeirht_io(&array[1], true); 457 check32(array[2], 27); 458 S4_storeirht_io(&array[2], false); 459 check32(array[3], 3); 460 461 memcpy(array, init, sizeof(array)); 462 S4_storeirhf_io(&array[3], false); 463 check32(array[4], 27); 464 S4_storeirhf_io(&array[4], true); 465 check32(array[5], 5); 466 467 memcpy(array, init, sizeof(array)); 468 S4_storeirhtnew_io(&array[5], true); 469 check32(array[6], 27); 470 S4_storeirhtnew_io(&array[6], false); 471 check32(array[7], 7); 472 473 memcpy(array, init, sizeof(array)); 474 S4_storeirhfnew_io(&array[7], false); 475 check32(array[8], 27); 476 S4_storeirhfnew_io(&array[8], true); 477 check32(array[9], 9); 478 479 /* Word */ 480 memcpy(array, init, sizeof(array)); 481 S4_storeirit_io(&array[1], true); 482 check32(array[2], 27); 483 S4_storeirit_io(&array[2], false); 484 check32(array[3], 3); 485 486 memcpy(array, init, sizeof(array)); 487 S4_storeirif_io(&array[3], false); 488 check32(array[4], 27); 489 S4_storeirif_io(&array[4], true); 490 check32(array[5], 5); 491 492 memcpy(array, init, sizeof(array)); 493 S4_storeiritnew_io(&array[5], true); 494 check32(array[6], 27); 495 S4_storeiritnew_io(&array[6], false); 496 check32(array[7], 7); 497 498 memcpy(array, init, sizeof(array)); 499 S4_storeirifnew_io(&array[7], false); 500 check32(array[8], 27); 501 S4_storeirifnew_io(&array[8], true); 502 check32(array[9], 9); 503 504 memcpy(array, init, sizeof(array)); 505 res = L2_ploadrifnew_pi(&array[6], false); 506 check32(res, 6); 507 res = L2_ploadrifnew_pi(&array[7], true); 508 check32(res, 31); 509 510 res = cmpnd_cmp_jump(); 511 check32(res, 12); 512 513 SL2_return_tnew(false); 514 check32(early_exit, false); 515 SL2_return_tnew(true); 516 check32(early_exit, true); 517 518 res64 = creg_pair(5, 7); 519 check32((int32_t)res64, 5); 520 check32((int32_t)(res64 >> 32), 7); 521 522 res = test_clrtnew(1, 7); 523 check32(res, 0); 524 res = test_clrtnew(2, 7); 525 check32(res, 7); 526 527 #if CORE_HAS_CABAC 528 res64 = decbin(0xf0f1f2f3f4f5f6f7LL, 0x7f6f5f4f3f2f1f0fLL, &pred); 529 check64(res64, 0x357980003700010cLL); 530 check32(pred, false); 531 532 res64 = decbin(0xfLL, 0x1bLL, &pred); 533 check64(res64, 0x78000100LL); 534 check32(pred, true); 535 #else 536 puts("Skipping cabac tests"); 537 #endif 538 539 pred = auto_and(); 540 check32(pred, false); 541 542 test_lsbnew(); 543 544 test_l2fetch(); 545 546 test_count_trailing_zeros_ones(); 547 548 test_dpmpyss_rnd_s0(); 549 550 puts(err ? "FAIL" : "PASS"); 551 return err; 552 } 553