1 /* 2 * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include <stdio.h> 19 #include <stdint.h> 20 21 int err; 22 23 #include "hex_test.h" 24 25 #define DEBUG 0 26 #define DEBUG_PRINTF(...) \ 27 do { \ 28 if (DEBUG) { \ 29 printf(__VA_ARGS__); \ 30 } \ 31 } while (0) 32 33 34 #define NBYTES (1 << 8) 35 #define NHALFS (NBYTES / sizeof(short)) 36 #define NWORDS (NBYTES / sizeof(int)) 37 #define NDOBLS (NBYTES / sizeof(long long)) 38 39 int64_t dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0}; 40 int32_t wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0}; 41 int16_t hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0}; 42 uint8_t bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0}; 43 44 /* 45 * We use the C preporcessor to deal with the combinations of types 46 */ 47 48 #define INIT(BUF, N) \ 49 void init_##BUF(void) \ 50 { \ 51 for (int i = 0; i < N; i++) { \ 52 BUF[i] = i; \ 53 } \ 54 } \ 55 56 INIT(bbuf, NBYTES) 57 INIT(hbuf, NHALFS) 58 INIT(wbuf, NWORDS) 59 INIT(dbuf, NDOBLS) 60 61 /* 62 * Macros for performing circular load 63 * RES result 64 * ADDR address 65 * START start address of buffer 66 * LEN length of buffer (in bytes) 67 * INC address increment (in bytes for IMM, elements for REG) 68 */ 69 #define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \ 70 __asm__( \ 71 "r4 = %3\n\t" \ 72 "m0 = r4\n\t" \ 73 "cs0 = %2\n\t" \ 74 "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \ 75 : "=r"(RES), "+r"(ADDR) \ 76 : "r"(START), "r"(LEN) \ 77 : "r4", "m0", "cs0") 78 #define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \ 79 CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC) 80 #define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \ 81 CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC) 82 #define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \ 83 CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC) 84 #define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \ 85 CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC) 86 #define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \ 87 CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC) 88 #define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \ 89 CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC) 90 91 /* 92 * The mreg has the following pieces 93 * mreg[31:28] increment[10:7] 94 * mreg[27:24] K value (used Hexagon v3 and earlier) 95 * mreg[23:17] increment[6:0] 96 * mreg[16:0] circular buffer length 97 */ 98 static int32_t build_mreg(int32_t inc, int32_t K, int32_t len) 99 { 100 return ((inc & 0x780) << 21) | 101 ((K & 0xf) << 24) | 102 ((inc & 0x7f) << 17) | 103 (len & 0x1ffff); 104 } 105 106 #define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \ 107 __asm__( \ 108 "r4 = %2\n\t" \ 109 "m1 = r4\n\t" \ 110 "cs1 = %3\n\t" \ 111 "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \ 112 : "=r"(RES), "+r"(ADDR) \ 113 : "r"(build_mreg((INC), 0, (LEN))), \ 114 "r"(START) \ 115 : "r4", "m1", "cs1") 116 #define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \ 117 CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC) 118 #define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \ 119 CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC) 120 #define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \ 121 CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC) 122 #define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \ 123 CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC) 124 #define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \ 125 CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC) 126 #define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \ 127 CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC) 128 129 /* 130 * Macros for performing circular store 131 * VAL value to store 132 * ADDR address 133 * START start address of buffer 134 * LEN length of buffer (in bytes) 135 * INC address increment (in bytes for IMM, elements for REG) 136 */ 137 #define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \ 138 __asm__( \ 139 "r4 = %3\n\t" \ 140 "m0 = r4\n\t" \ 141 "cs0 = %1\n\t" \ 142 "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \ 143 : "+r"(ADDR) \ 144 : "r"(START), "r"(VAL), "r"(LEN) \ 145 : "r4", "m0", "cs0", "memory") 146 #define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \ 147 CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC) 148 #define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \ 149 CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC) 150 #define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \ 151 CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC) 152 #define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \ 153 CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC) 154 #define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \ 155 CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC) 156 157 #define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \ 158 __asm__( \ 159 "r4 = %3\n\t" \ 160 "m0 = r4\n\t" \ 161 "cs0 = %1\n\t" \ 162 "{\n\t" \ 163 " r5 = %2\n\t" \ 164 " mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \ 165 "}\n\t" \ 166 : "+r"(ADDR) \ 167 : "r"(START), "r"(VAL), "r"(LEN) \ 168 : "r4", "r5", "m0", "cs0", "memory") 169 #define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \ 170 CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC) 171 #define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \ 172 CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC) 173 #define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \ 174 CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC) 175 176 #define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \ 177 __asm__( \ 178 "r4 = %1\n\t" \ 179 "m1 = r4\n\t" \ 180 "cs1 = %2\n\t" \ 181 "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \ 182 : "+r"(ADDR) \ 183 : "r"(build_mreg((INC), 0, (LEN))), \ 184 "r"(START), \ 185 "r"(VAL) \ 186 : "r4", "m1", "cs1", "memory") 187 #define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \ 188 CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC) 189 #define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \ 190 CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC) 191 #define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \ 192 CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC) 193 #define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \ 194 CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC) 195 #define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \ 196 CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC) 197 198 #define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \ 199 __asm__( \ 200 "r4 = %1\n\t" \ 201 "m1 = r4\n\t" \ 202 "cs1 = %2\n\t" \ 203 "{\n\t" \ 204 " r5 = %3\n\t" \ 205 " mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \ 206 "}\n\t" \ 207 : "+r"(ADDR) \ 208 : "r"(build_mreg((INC), 0, (LEN))), \ 209 "r"(START), \ 210 "r"(VAL) \ 211 : "r4", "r5", "m1", "cs1", "memory") 212 #define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \ 213 CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC) 214 #define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \ 215 CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC) 216 #define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \ 217 CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC) 218 219 220 /* We'll test increments +1 and -1 */ 221 void __check_load(int line, int32_t i, int64_t res, int32_t inc, int32_t size) 222 { 223 int32_t expect = (i * inc); 224 while (expect >= size) { 225 expect -= size; 226 } 227 while (expect < 0) { 228 expect += size; 229 } 230 __check32(line, res, expect); 231 } 232 233 #define check_load(I, RES, INC, SZ) __check_load(__LINE__, I, RES, INC, SZ) 234 235 #define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \ 236 void circ_test_load_imm_##SZ(void) \ 237 { \ 238 TYPE *p = (TYPE *)BUF; \ 239 int32_t size = 10; \ 240 for (int i = 0; i < BUFSIZE; i++) { \ 241 TYPE element; \ 242 CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \ 243 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 244 i, p, element); \ 245 check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \ 246 } \ 247 p = (TYPE *)BUF; \ 248 for (int i = 0; i < BUFSIZE; i++) { \ 249 TYPE element; \ 250 CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \ 251 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 252 i, p, element); \ 253 check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \ 254 } \ 255 } 256 257 TEST_LOAD_IMM(b, int8_t, bbuf, NBYTES, 1, d) 258 TEST_LOAD_IMM(ub, uint8_t, bbuf, NBYTES, 1, d) 259 TEST_LOAD_IMM(h, int16_t, hbuf, NHALFS, 2, d) 260 TEST_LOAD_IMM(uh, uint16_t, hbuf, NHALFS, 2, d) 261 TEST_LOAD_IMM(w, int32_t, wbuf, NWORDS, 4, d) 262 TEST_LOAD_IMM(d, int64_t, dbuf, NDOBLS, 8, lld) 263 264 #define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \ 265 void circ_test_load_reg_##SZ(void) \ 266 { \ 267 TYPE *p = (TYPE *)BUF; \ 268 int32_t size = 13; \ 269 for (int i = 0; i < BUFSIZE; i++) { \ 270 TYPE element; \ 271 CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \ 272 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 273 i, p, element); \ 274 check_load(i, element, 1, size); \ 275 } \ 276 p = (TYPE *)BUF; \ 277 for (int i = 0; i < BUFSIZE; i++) { \ 278 TYPE element; \ 279 CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \ 280 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 281 i, p, element); \ 282 check_load(i, element, -1, size); \ 283 } \ 284 } 285 286 TEST_LOAD_REG(b, int8_t, bbuf, NBYTES, d) 287 TEST_LOAD_REG(ub, uint8_t, bbuf, NBYTES, d) 288 TEST_LOAD_REG(h, int16_t, hbuf, NHALFS, d) 289 TEST_LOAD_REG(uh, uint16_t, hbuf, NHALFS, d) 290 TEST_LOAD_REG(w, int32_t, wbuf, NWORDS, d) 291 TEST_LOAD_REG(d, int64_t, dbuf, NDOBLS, lld) 292 293 /* The circular stores will wrap around somewhere inside the buffer */ 294 #define CIRC_VAL(SZ, TYPE, BUFSIZE) \ 295 TYPE circ_val_##SZ(int i, int32_t inc, int32_t size) \ 296 { \ 297 int mod = BUFSIZE % size; \ 298 int elem = i * inc; \ 299 if (elem < 0) { \ 300 if (-elem <= size - mod) { \ 301 return (elem + BUFSIZE - mod); \ 302 } else { \ 303 return (elem + BUFSIZE + size - mod); \ 304 } \ 305 } else if (elem < mod) {\ 306 return (elem + BUFSIZE - mod); \ 307 } else { \ 308 return (elem + BUFSIZE - size - mod); \ 309 } \ 310 } 311 312 CIRC_VAL(b, uint8_t, NBYTES) 313 CIRC_VAL(h, int16_t, NHALFS) 314 CIRC_VAL(w, int32_t, NWORDS) 315 CIRC_VAL(d, int64_t, NDOBLS) 316 317 /* 318 * Circular stores should only write to the first "size" elements of the buffer 319 * the remainder of the elements should have BUF[i] == i 320 */ 321 #define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \ 322 void check_store_##SZ(int32_t inc, int32_t size) \ 323 { \ 324 for (int i = 0; i < size; i++) { \ 325 DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \ 326 i, BUF[i], circ_val_##SZ(i, inc, size)); \ 327 check64(BUF[i], circ_val_##SZ(i, inc, size)); \ 328 } \ 329 for (int i = size; i < BUFSIZE; i++) { \ 330 check64(BUF[i], i); \ 331 } \ 332 } 333 334 CHECK_STORE(b, bbuf, NBYTES, x) 335 CHECK_STORE(h, hbuf, NHALFS, x) 336 CHECK_STORE(w, wbuf, NWORDS, x) 337 CHECK_STORE(d, dbuf, NDOBLS, llx) 338 339 #define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \ 340 void circ_test_store_imm_##SZ(void) \ 341 { \ 342 uint32_t size = 27; \ 343 TYPE *p = BUF; \ 344 TYPE val = 0; \ 345 init_##BUF(); \ 346 for (int i = 0; i < BUFSIZE; i++) { \ 347 CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \ 348 val++; \ 349 } \ 350 check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \ 351 p = BUF; \ 352 val = 0; \ 353 init_##BUF(); \ 354 for (int i = 0; i < BUFSIZE; i++) { \ 355 CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \ 356 -(INC)); \ 357 val++; \ 358 } \ 359 check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \ 360 } 361 362 CIRC_TEST_STORE_IMM(b, b, uint8_t, bbuf, NBYTES, 0, 1) 363 CIRC_TEST_STORE_IMM(h, h, int16_t, hbuf, NHALFS, 0, 2) 364 CIRC_TEST_STORE_IMM(f, h, int16_t, hbuf, NHALFS, 16, 2) 365 CIRC_TEST_STORE_IMM(w, w, int32_t, wbuf, NWORDS, 0, 4) 366 CIRC_TEST_STORE_IMM(d, d, int64_t, dbuf, NDOBLS, 0, 8) 367 CIRC_TEST_STORE_IMM(bnew, b, uint8_t, bbuf, NBYTES, 0, 1) 368 CIRC_TEST_STORE_IMM(hnew, h, int16_t, hbuf, NHALFS, 0, 2) 369 CIRC_TEST_STORE_IMM(wnew, w, int32_t, wbuf, NWORDS, 0, 4) 370 371 #define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \ 372 void circ_test_store_reg_##SZ(void) \ 373 { \ 374 TYPE *p = BUF; \ 375 uint32_t size = 19; \ 376 TYPE val = 0; \ 377 init_##BUF(); \ 378 for (int i = 0; i < BUFSIZE; i++) { \ 379 CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \ 380 val++; \ 381 } \ 382 check_store_##CHK(1, size); \ 383 p = BUF; \ 384 val = 0; \ 385 init_##BUF(); \ 386 for (int i = 0; i < BUFSIZE; i++) { \ 387 CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \ 388 val++; \ 389 } \ 390 check_store_##CHK(-1, size); \ 391 } 392 393 CIRC_TEST_STORE_REG(b, b, uint8_t, bbuf, NBYTES, 0) 394 CIRC_TEST_STORE_REG(h, h, int16_t, hbuf, NHALFS, 0) 395 CIRC_TEST_STORE_REG(f, h, int16_t, hbuf, NHALFS, 16) 396 CIRC_TEST_STORE_REG(w, w, int32_t, wbuf, NWORDS, 0) 397 CIRC_TEST_STORE_REG(d, d, int64_t, dbuf, NDOBLS, 0) 398 CIRC_TEST_STORE_REG(bnew, b, uint8_t, bbuf, NBYTES, 0) 399 CIRC_TEST_STORE_REG(hnew, h, int16_t, hbuf, NHALFS, 0) 400 CIRC_TEST_STORE_REG(wnew, w, int32_t, wbuf, NWORDS, 0) 401 402 /* Test the old scheme used in Hexagon V3 */ 403 static void circ_test_v3(void) 404 { 405 int *p = wbuf; 406 int32_t size = 15; 407 /* set high bit in K to test unsigned extract in fcirc */ 408 int32_t K = 8; /* 1024 bytes */ 409 int32_t element; 410 411 init_wbuf(); 412 413 for (int i = 0; i < NWORDS; i++) { 414 __asm__( 415 "r4 = %2\n\t" 416 "m1 = r4\n\t" 417 "%0 = memw(%1++I:circ(M1))\n\t" 418 : "=r"(element), "+r"(p) 419 : "r"(build_mreg(1, K, size * sizeof(int))) 420 : "r4", "m1"); 421 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element); 422 check_load(i, element, 1, size); 423 } 424 } 425 426 int main() 427 { 428 init_bbuf(); 429 init_hbuf(); 430 init_wbuf(); 431 init_dbuf(); 432 433 DEBUG_PRINTF("NBYTES = %d\n", NBYTES); 434 DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf); 435 DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf); 436 DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf); 437 DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf); 438 439 circ_test_load_imm_b(); 440 circ_test_load_imm_ub(); 441 circ_test_load_imm_h(); 442 circ_test_load_imm_uh(); 443 circ_test_load_imm_w(); 444 circ_test_load_imm_d(); 445 446 circ_test_load_reg_b(); 447 circ_test_load_reg_ub(); 448 circ_test_load_reg_h(); 449 circ_test_load_reg_uh(); 450 circ_test_load_reg_w(); 451 circ_test_load_reg_d(); 452 453 circ_test_store_imm_b(); 454 circ_test_store_imm_h(); 455 circ_test_store_imm_f(); 456 circ_test_store_imm_w(); 457 circ_test_store_imm_d(); 458 circ_test_store_imm_bnew(); 459 circ_test_store_imm_hnew(); 460 circ_test_store_imm_wnew(); 461 462 circ_test_store_reg_b(); 463 circ_test_store_reg_h(); 464 circ_test_store_reg_f(); 465 circ_test_store_reg_w(); 466 circ_test_store_reg_d(); 467 circ_test_store_reg_bnew(); 468 circ_test_store_reg_hnew(); 469 circ_test_store_reg_wnew(); 470 471 circ_test_v3(); 472 473 puts(err ? "FAIL" : "PASS"); 474 return err ? 1 : 0; 475 } 476