1*46ef47e2STaylor Simpson /* 2*46ef47e2STaylor Simpson * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. 3*46ef47e2STaylor Simpson * 4*46ef47e2STaylor Simpson * This program is free software; you can redistribute it and/or modify 5*46ef47e2STaylor Simpson * it under the terms of the GNU General Public License as published by 6*46ef47e2STaylor Simpson * the Free Software Foundation; either version 2 of the License, or 7*46ef47e2STaylor Simpson * (at your option) any later version. 8*46ef47e2STaylor Simpson * 9*46ef47e2STaylor Simpson * This program is distributed in the hope that it will be useful, 10*46ef47e2STaylor Simpson * but WITHOUT ANY WARRANTY; without even the implied warranty of 11*46ef47e2STaylor Simpson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12*46ef47e2STaylor Simpson * GNU General Public License for more details. 13*46ef47e2STaylor Simpson * 14*46ef47e2STaylor Simpson * You should have received a copy of the GNU General Public License 15*46ef47e2STaylor Simpson * along with this program; if not, see <http://www.gnu.org/licenses/>. 16*46ef47e2STaylor Simpson */ 17*46ef47e2STaylor Simpson 18*46ef47e2STaylor Simpson #include <stdio.h> 19*46ef47e2STaylor Simpson 20*46ef47e2STaylor Simpson #define DEBUG 0 21*46ef47e2STaylor Simpson #define DEBUG_PRINTF(...) \ 22*46ef47e2STaylor Simpson do { \ 23*46ef47e2STaylor Simpson if (DEBUG) { \ 24*46ef47e2STaylor Simpson printf(__VA_ARGS__); \ 25*46ef47e2STaylor Simpson } \ 26*46ef47e2STaylor Simpson } while (0) 27*46ef47e2STaylor Simpson 28*46ef47e2STaylor Simpson 29*46ef47e2STaylor Simpson #define NBYTES (1 << 8) 30*46ef47e2STaylor Simpson #define NHALFS (NBYTES / sizeof(short)) 31*46ef47e2STaylor Simpson #define NWORDS (NBYTES / sizeof(int)) 32*46ef47e2STaylor Simpson #define NDOBLS (NBYTES / sizeof(long long)) 33*46ef47e2STaylor Simpson 34*46ef47e2STaylor Simpson long long dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0}; 35*46ef47e2STaylor Simpson int wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0}; 36*46ef47e2STaylor Simpson short hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0}; 37*46ef47e2STaylor Simpson unsigned char bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0}; 38*46ef47e2STaylor Simpson 39*46ef47e2STaylor Simpson /* 40*46ef47e2STaylor Simpson * We use the C preporcessor to deal with the combinations of types 41*46ef47e2STaylor Simpson */ 42*46ef47e2STaylor Simpson 43*46ef47e2STaylor Simpson #define INIT(BUF, N) \ 44*46ef47e2STaylor Simpson void init_##BUF(void) \ 45*46ef47e2STaylor Simpson { \ 46*46ef47e2STaylor Simpson int i; \ 47*46ef47e2STaylor Simpson for (i = 0; i < N; i++) { \ 48*46ef47e2STaylor Simpson BUF[i] = i; \ 49*46ef47e2STaylor Simpson } \ 50*46ef47e2STaylor Simpson } \ 51*46ef47e2STaylor Simpson 52*46ef47e2STaylor Simpson INIT(bbuf, NBYTES) 53*46ef47e2STaylor Simpson INIT(hbuf, NHALFS) 54*46ef47e2STaylor Simpson INIT(wbuf, NWORDS) 55*46ef47e2STaylor Simpson INIT(dbuf, NDOBLS) 56*46ef47e2STaylor Simpson 57*46ef47e2STaylor Simpson /* 58*46ef47e2STaylor Simpson * Macros for performing circular load 59*46ef47e2STaylor Simpson * RES result 60*46ef47e2STaylor Simpson * ADDR address 61*46ef47e2STaylor Simpson * START start address of buffer 62*46ef47e2STaylor Simpson * LEN length of buffer (in bytes) 63*46ef47e2STaylor Simpson * INC address increment (in bytes for IMM, elements for REG) 64*46ef47e2STaylor Simpson */ 65*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \ 66*46ef47e2STaylor Simpson __asm__( \ 67*46ef47e2STaylor Simpson "r4 = %3\n\t" \ 68*46ef47e2STaylor Simpson "m0 = r4\n\t" \ 69*46ef47e2STaylor Simpson "cs0 = %2\n\t" \ 70*46ef47e2STaylor Simpson "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \ 71*46ef47e2STaylor Simpson : "=r"(RES), "+r"(ADDR) \ 72*46ef47e2STaylor Simpson : "r"(START), "r"(LEN) \ 73*46ef47e2STaylor Simpson : "r4", "m0", "cs0") 74*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \ 75*46ef47e2STaylor Simpson CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC) 76*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \ 77*46ef47e2STaylor Simpson CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC) 78*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \ 79*46ef47e2STaylor Simpson CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC) 80*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \ 81*46ef47e2STaylor Simpson CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC) 82*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \ 83*46ef47e2STaylor Simpson CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC) 84*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \ 85*46ef47e2STaylor Simpson CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC) 86*46ef47e2STaylor Simpson 87*46ef47e2STaylor Simpson /* 88*46ef47e2STaylor Simpson * The mreg has the following pieces 89*46ef47e2STaylor Simpson * mreg[31:28] increment[10:7] 90*46ef47e2STaylor Simpson * mreg[27:24] K value (used Hexagon v3 and earlier) 91*46ef47e2STaylor Simpson * mreg[23:17] increment[6:0] 92*46ef47e2STaylor Simpson * mreg[16:0] circular buffer length 93*46ef47e2STaylor Simpson */ 94*46ef47e2STaylor Simpson static int build_mreg(int inc, int K, int len) 95*46ef47e2STaylor Simpson { 96*46ef47e2STaylor Simpson return ((inc & 0x780) << 21) | 97*46ef47e2STaylor Simpson ((K & 0xf) << 24) | 98*46ef47e2STaylor Simpson ((inc & 0x7f) << 17) | 99*46ef47e2STaylor Simpson (len & 0x1ffff); 100*46ef47e2STaylor Simpson } 101*46ef47e2STaylor Simpson 102*46ef47e2STaylor Simpson #define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \ 103*46ef47e2STaylor Simpson __asm__( \ 104*46ef47e2STaylor Simpson "r4 = %2\n\t" \ 105*46ef47e2STaylor Simpson "m1 = r4\n\t" \ 106*46ef47e2STaylor Simpson "cs1 = %3\n\t" \ 107*46ef47e2STaylor Simpson "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \ 108*46ef47e2STaylor Simpson : "=r"(RES), "+r"(ADDR) \ 109*46ef47e2STaylor Simpson : "r"(build_mreg((INC), 0, (LEN))), \ 110*46ef47e2STaylor Simpson "r"(START) \ 111*46ef47e2STaylor Simpson : "r4", "m1", "cs1") 112*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \ 113*46ef47e2STaylor Simpson CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC) 114*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \ 115*46ef47e2STaylor Simpson CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC) 116*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \ 117*46ef47e2STaylor Simpson CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC) 118*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \ 119*46ef47e2STaylor Simpson CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC) 120*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \ 121*46ef47e2STaylor Simpson CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC) 122*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \ 123*46ef47e2STaylor Simpson CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC) 124*46ef47e2STaylor Simpson 125*46ef47e2STaylor Simpson /* 126*46ef47e2STaylor Simpson * Macros for performing circular store 127*46ef47e2STaylor Simpson * VAL value to store 128*46ef47e2STaylor Simpson * ADDR address 129*46ef47e2STaylor Simpson * START start address of buffer 130*46ef47e2STaylor Simpson * LEN length of buffer (in bytes) 131*46ef47e2STaylor Simpson * INC address increment (in bytes for IMM, elements for REG) 132*46ef47e2STaylor Simpson */ 133*46ef47e2STaylor Simpson #define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \ 134*46ef47e2STaylor Simpson __asm__( \ 135*46ef47e2STaylor Simpson "r4 = %3\n\t" \ 136*46ef47e2STaylor Simpson "m0 = r4\n\t" \ 137*46ef47e2STaylor Simpson "cs0 = %1\n\t" \ 138*46ef47e2STaylor Simpson "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \ 139*46ef47e2STaylor Simpson : "+r"(ADDR) \ 140*46ef47e2STaylor Simpson : "r"(START), "r"(VAL), "r"(LEN) \ 141*46ef47e2STaylor Simpson : "r4", "m0", "cs0", "memory") 142*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \ 143*46ef47e2STaylor Simpson CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC) 144*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \ 145*46ef47e2STaylor Simpson CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC) 146*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \ 147*46ef47e2STaylor Simpson CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC) 148*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \ 149*46ef47e2STaylor Simpson CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC) 150*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \ 151*46ef47e2STaylor Simpson CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC) 152*46ef47e2STaylor Simpson 153*46ef47e2STaylor Simpson #define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \ 154*46ef47e2STaylor Simpson __asm__( \ 155*46ef47e2STaylor Simpson "r4 = %3\n\t" \ 156*46ef47e2STaylor Simpson "m0 = r4\n\t" \ 157*46ef47e2STaylor Simpson "cs0 = %1\n\t" \ 158*46ef47e2STaylor Simpson "{\n\t" \ 159*46ef47e2STaylor Simpson " r5 = %2\n\t" \ 160*46ef47e2STaylor Simpson " mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \ 161*46ef47e2STaylor Simpson "}\n\t" \ 162*46ef47e2STaylor Simpson : "+r"(ADDR) \ 163*46ef47e2STaylor Simpson : "r"(START), "r"(VAL), "r"(LEN) \ 164*46ef47e2STaylor Simpson : "r4", "r5", "m0", "cs0", "memory") 165*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \ 166*46ef47e2STaylor Simpson CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC) 167*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \ 168*46ef47e2STaylor Simpson CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC) 169*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \ 170*46ef47e2STaylor Simpson CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC) 171*46ef47e2STaylor Simpson 172*46ef47e2STaylor Simpson #define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \ 173*46ef47e2STaylor Simpson __asm__( \ 174*46ef47e2STaylor Simpson "r4 = %1\n\t" \ 175*46ef47e2STaylor Simpson "m1 = r4\n\t" \ 176*46ef47e2STaylor Simpson "cs1 = %2\n\t" \ 177*46ef47e2STaylor Simpson "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \ 178*46ef47e2STaylor Simpson : "+r"(ADDR) \ 179*46ef47e2STaylor Simpson : "r"(build_mreg((INC), 0, (LEN))), \ 180*46ef47e2STaylor Simpson "r"(START), \ 181*46ef47e2STaylor Simpson "r"(VAL) \ 182*46ef47e2STaylor Simpson : "r4", "m1", "cs1", "memory") 183*46ef47e2STaylor Simpson #define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \ 184*46ef47e2STaylor Simpson CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC) 185*46ef47e2STaylor Simpson #define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \ 186*46ef47e2STaylor Simpson CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC) 187*46ef47e2STaylor Simpson #define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \ 188*46ef47e2STaylor Simpson CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC) 189*46ef47e2STaylor Simpson #define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \ 190*46ef47e2STaylor Simpson CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC) 191*46ef47e2STaylor Simpson #define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \ 192*46ef47e2STaylor Simpson CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC) 193*46ef47e2STaylor Simpson 194*46ef47e2STaylor Simpson #define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \ 195*46ef47e2STaylor Simpson __asm__( \ 196*46ef47e2STaylor Simpson "r4 = %1\n\t" \ 197*46ef47e2STaylor Simpson "m1 = r4\n\t" \ 198*46ef47e2STaylor Simpson "cs1 = %2\n\t" \ 199*46ef47e2STaylor Simpson "{\n\t" \ 200*46ef47e2STaylor Simpson " r5 = %3\n\t" \ 201*46ef47e2STaylor Simpson " mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \ 202*46ef47e2STaylor Simpson "}\n\t" \ 203*46ef47e2STaylor Simpson : "+r"(ADDR) \ 204*46ef47e2STaylor Simpson : "r"(build_mreg((INC), 0, (LEN))), \ 205*46ef47e2STaylor Simpson "r"(START), \ 206*46ef47e2STaylor Simpson "r"(VAL) \ 207*46ef47e2STaylor Simpson : "r4", "r5", "m1", "cs1", "memory") 208*46ef47e2STaylor Simpson #define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \ 209*46ef47e2STaylor Simpson CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC) 210*46ef47e2STaylor Simpson #define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \ 211*46ef47e2STaylor Simpson CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC) 212*46ef47e2STaylor Simpson #define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \ 213*46ef47e2STaylor Simpson CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC) 214*46ef47e2STaylor Simpson 215*46ef47e2STaylor Simpson 216*46ef47e2STaylor Simpson int err; 217*46ef47e2STaylor Simpson 218*46ef47e2STaylor Simpson /* We'll test increments +1 and -1 */ 219*46ef47e2STaylor Simpson void check_load(int i, long long result, int inc, int size) 220*46ef47e2STaylor Simpson { 221*46ef47e2STaylor Simpson int expect = (i * inc); 222*46ef47e2STaylor Simpson while (expect >= size) { 223*46ef47e2STaylor Simpson expect -= size; 224*46ef47e2STaylor Simpson } 225*46ef47e2STaylor Simpson while (expect < 0) { 226*46ef47e2STaylor Simpson expect += size; 227*46ef47e2STaylor Simpson } 228*46ef47e2STaylor Simpson if (result != expect) { 229*46ef47e2STaylor Simpson printf("ERROR(%d): %lld != %d\n", i, result, expect); 230*46ef47e2STaylor Simpson err++; 231*46ef47e2STaylor Simpson } 232*46ef47e2STaylor Simpson } 233*46ef47e2STaylor Simpson 234*46ef47e2STaylor Simpson #define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \ 235*46ef47e2STaylor Simpson void circ_test_load_imm_##SZ(void) \ 236*46ef47e2STaylor Simpson { \ 237*46ef47e2STaylor Simpson TYPE *p = (TYPE *)BUF; \ 238*46ef47e2STaylor Simpson int size = 10; \ 239*46ef47e2STaylor Simpson int i; \ 240*46ef47e2STaylor Simpson for (i = 0; i < BUFSIZE; i++) { \ 241*46ef47e2STaylor Simpson TYPE element; \ 242*46ef47e2STaylor Simpson CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \ 243*46ef47e2STaylor Simpson DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 244*46ef47e2STaylor Simpson i, p, element); \ 245*46ef47e2STaylor Simpson check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \ 246*46ef47e2STaylor Simpson } \ 247*46ef47e2STaylor Simpson p = (TYPE *)BUF; \ 248*46ef47e2STaylor Simpson for (i = 0; i < BUFSIZE; i++) { \ 249*46ef47e2STaylor Simpson TYPE element; \ 250*46ef47e2STaylor Simpson CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \ 251*46ef47e2STaylor Simpson DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 252*46ef47e2STaylor Simpson i, p, element); \ 253*46ef47e2STaylor Simpson check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \ 254*46ef47e2STaylor Simpson } \ 255*46ef47e2STaylor Simpson } 256*46ef47e2STaylor Simpson 257*46ef47e2STaylor Simpson TEST_LOAD_IMM(b, char, bbuf, NBYTES, 1, d) 258*46ef47e2STaylor Simpson TEST_LOAD_IMM(ub, unsigned char, bbuf, NBYTES, 1, d) 259*46ef47e2STaylor Simpson TEST_LOAD_IMM(h, short, hbuf, NHALFS, 2, d) 260*46ef47e2STaylor Simpson TEST_LOAD_IMM(uh, unsigned short, hbuf, NHALFS, 2, d) 261*46ef47e2STaylor Simpson TEST_LOAD_IMM(w, int, wbuf, NWORDS, 4, d) 262*46ef47e2STaylor Simpson TEST_LOAD_IMM(d, long long, dbuf, NDOBLS, 8, lld) 263*46ef47e2STaylor Simpson 264*46ef47e2STaylor Simpson #define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \ 265*46ef47e2STaylor Simpson void circ_test_load_reg_##SZ(void) \ 266*46ef47e2STaylor Simpson { \ 267*46ef47e2STaylor Simpson TYPE *p = (TYPE *)BUF; \ 268*46ef47e2STaylor Simpson int size = 13; \ 269*46ef47e2STaylor Simpson int i; \ 270*46ef47e2STaylor Simpson for (i = 0; i < BUFSIZE; i++) { \ 271*46ef47e2STaylor Simpson TYPE element; \ 272*46ef47e2STaylor Simpson CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \ 273*46ef47e2STaylor Simpson DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 274*46ef47e2STaylor Simpson i, p, element); \ 275*46ef47e2STaylor Simpson check_load(i, element, 1, size); \ 276*46ef47e2STaylor Simpson } \ 277*46ef47e2STaylor Simpson p = (TYPE *)BUF; \ 278*46ef47e2STaylor Simpson for (i = 0; i < BUFSIZE; i++) { \ 279*46ef47e2STaylor Simpson TYPE element; \ 280*46ef47e2STaylor Simpson CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \ 281*46ef47e2STaylor Simpson DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 282*46ef47e2STaylor Simpson i, p, element); \ 283*46ef47e2STaylor Simpson check_load(i, element, -1, size); \ 284*46ef47e2STaylor Simpson } \ 285*46ef47e2STaylor Simpson } 286*46ef47e2STaylor Simpson 287*46ef47e2STaylor Simpson TEST_LOAD_REG(b, char, bbuf, NBYTES, d) 288*46ef47e2STaylor Simpson TEST_LOAD_REG(ub, unsigned char, bbuf, NBYTES, d) 289*46ef47e2STaylor Simpson TEST_LOAD_REG(h, short, hbuf, NHALFS, d) 290*46ef47e2STaylor Simpson TEST_LOAD_REG(uh, unsigned short, hbuf, NHALFS, d) 291*46ef47e2STaylor Simpson TEST_LOAD_REG(w, int, wbuf, NWORDS, d) 292*46ef47e2STaylor Simpson TEST_LOAD_REG(d, long long, dbuf, NDOBLS, lld) 293*46ef47e2STaylor Simpson 294*46ef47e2STaylor Simpson /* The circular stores will wrap around somewhere inside the buffer */ 295*46ef47e2STaylor Simpson #define CIRC_VAL(SZ, TYPE, BUFSIZE) \ 296*46ef47e2STaylor Simpson TYPE circ_val_##SZ(int i, int inc, int size) \ 297*46ef47e2STaylor Simpson { \ 298*46ef47e2STaylor Simpson int mod = BUFSIZE % size; \ 299*46ef47e2STaylor Simpson int elem = i * inc; \ 300*46ef47e2STaylor Simpson if (elem < 0) { \ 301*46ef47e2STaylor Simpson if (-elem <= size - mod) { \ 302*46ef47e2STaylor Simpson return (elem + BUFSIZE - mod); \ 303*46ef47e2STaylor Simpson } else { \ 304*46ef47e2STaylor Simpson return (elem + BUFSIZE + size - mod); \ 305*46ef47e2STaylor Simpson } \ 306*46ef47e2STaylor Simpson } else if (elem < mod) {\ 307*46ef47e2STaylor Simpson return (elem + BUFSIZE - mod); \ 308*46ef47e2STaylor Simpson } else { \ 309*46ef47e2STaylor Simpson return (elem + BUFSIZE - size - mod); \ 310*46ef47e2STaylor Simpson } \ 311*46ef47e2STaylor Simpson } 312*46ef47e2STaylor Simpson 313*46ef47e2STaylor Simpson CIRC_VAL(b, unsigned char, NBYTES) 314*46ef47e2STaylor Simpson CIRC_VAL(h, short, NHALFS) 315*46ef47e2STaylor Simpson CIRC_VAL(w, int, NWORDS) 316*46ef47e2STaylor Simpson CIRC_VAL(d, long long, NDOBLS) 317*46ef47e2STaylor Simpson 318*46ef47e2STaylor Simpson /* 319*46ef47e2STaylor Simpson * Circular stores should only write to the first "size" elements of the buffer 320*46ef47e2STaylor Simpson * the remainder of the elements should have BUF[i] == i 321*46ef47e2STaylor Simpson */ 322*46ef47e2STaylor Simpson #define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \ 323*46ef47e2STaylor Simpson void check_store_##SZ(int inc, int size) \ 324*46ef47e2STaylor Simpson { \ 325*46ef47e2STaylor Simpson int i; \ 326*46ef47e2STaylor Simpson for (i = 0; i < size; i++) { \ 327*46ef47e2STaylor Simpson DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \ 328*46ef47e2STaylor Simpson i, BUF[i], circ_val_##SZ(i, inc, size)); \ 329*46ef47e2STaylor Simpson if (BUF[i] != circ_val_##SZ(i, inc, size)) { \ 330*46ef47e2STaylor Simpson printf("ERROR(%3d): 0x%02" #FMT " != 0x%02" #FMT "\n", \ 331*46ef47e2STaylor Simpson i, BUF[i], circ_val_##SZ(i, inc, size)); \ 332*46ef47e2STaylor Simpson err++; \ 333*46ef47e2STaylor Simpson } \ 334*46ef47e2STaylor Simpson } \ 335*46ef47e2STaylor Simpson for (i = size; i < BUFSIZE; i++) { \ 336*46ef47e2STaylor Simpson if (BUF[i] != i) { \ 337*46ef47e2STaylor Simpson printf("ERROR(%3d): 0x%02" #FMT " != 0x%02x\n", i, BUF[i], i); \ 338*46ef47e2STaylor Simpson err++; \ 339*46ef47e2STaylor Simpson } \ 340*46ef47e2STaylor Simpson } \ 341*46ef47e2STaylor Simpson } 342*46ef47e2STaylor Simpson 343*46ef47e2STaylor Simpson CHECK_STORE(b, bbuf, NBYTES, x) 344*46ef47e2STaylor Simpson CHECK_STORE(h, hbuf, NHALFS, x) 345*46ef47e2STaylor Simpson CHECK_STORE(w, wbuf, NWORDS, x) 346*46ef47e2STaylor Simpson CHECK_STORE(d, dbuf, NDOBLS, llx) 347*46ef47e2STaylor Simpson 348*46ef47e2STaylor Simpson #define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \ 349*46ef47e2STaylor Simpson void circ_test_store_imm_##SZ(void) \ 350*46ef47e2STaylor Simpson { \ 351*46ef47e2STaylor Simpson unsigned int size = 27; \ 352*46ef47e2STaylor Simpson TYPE *p = BUF; \ 353*46ef47e2STaylor Simpson TYPE val = 0; \ 354*46ef47e2STaylor Simpson int i; \ 355*46ef47e2STaylor Simpson init_##BUF(); \ 356*46ef47e2STaylor Simpson for (i = 0; i < BUFSIZE; i++) { \ 357*46ef47e2STaylor Simpson CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \ 358*46ef47e2STaylor Simpson val++; \ 359*46ef47e2STaylor Simpson } \ 360*46ef47e2STaylor Simpson check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \ 361*46ef47e2STaylor Simpson p = BUF; \ 362*46ef47e2STaylor Simpson val = 0; \ 363*46ef47e2STaylor Simpson init_##BUF(); \ 364*46ef47e2STaylor Simpson for (i = 0; i < BUFSIZE; i++) { \ 365*46ef47e2STaylor Simpson CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \ 366*46ef47e2STaylor Simpson -(INC)); \ 367*46ef47e2STaylor Simpson val++; \ 368*46ef47e2STaylor Simpson } \ 369*46ef47e2STaylor Simpson check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \ 370*46ef47e2STaylor Simpson } 371*46ef47e2STaylor Simpson 372*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(b, b, unsigned char, bbuf, NBYTES, 0, 1) 373*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(h, h, short, hbuf, NHALFS, 0, 2) 374*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(f, h, short, hbuf, NHALFS, 16, 2) 375*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(w, w, int, wbuf, NWORDS, 0, 4) 376*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(d, d, long long, dbuf, NDOBLS, 0, 8) 377*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(bnew, b, unsigned char, bbuf, NBYTES, 0, 1) 378*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(hnew, h, short, hbuf, NHALFS, 0, 2) 379*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(wnew, w, int, wbuf, NWORDS, 0, 4) 380*46ef47e2STaylor Simpson 381*46ef47e2STaylor Simpson #define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \ 382*46ef47e2STaylor Simpson void circ_test_store_reg_##SZ(void) \ 383*46ef47e2STaylor Simpson { \ 384*46ef47e2STaylor Simpson TYPE *p = BUF; \ 385*46ef47e2STaylor Simpson unsigned int size = 19; \ 386*46ef47e2STaylor Simpson TYPE val = 0; \ 387*46ef47e2STaylor Simpson int i; \ 388*46ef47e2STaylor Simpson init_##BUF(); \ 389*46ef47e2STaylor Simpson for (i = 0; i < BUFSIZE; i++) { \ 390*46ef47e2STaylor Simpson CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \ 391*46ef47e2STaylor Simpson val++; \ 392*46ef47e2STaylor Simpson } \ 393*46ef47e2STaylor Simpson check_store_##CHK(1, size); \ 394*46ef47e2STaylor Simpson p = BUF; \ 395*46ef47e2STaylor Simpson val = 0; \ 396*46ef47e2STaylor Simpson init_##BUF(); \ 397*46ef47e2STaylor Simpson for (i = 0; i < BUFSIZE; i++) { \ 398*46ef47e2STaylor Simpson CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \ 399*46ef47e2STaylor Simpson val++; \ 400*46ef47e2STaylor Simpson } \ 401*46ef47e2STaylor Simpson check_store_##CHK(-1, size); \ 402*46ef47e2STaylor Simpson } 403*46ef47e2STaylor Simpson 404*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(b, b, unsigned char, bbuf, NBYTES, 0) 405*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(h, h, short, hbuf, NHALFS, 0) 406*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(f, h, short, hbuf, NHALFS, 16) 407*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(w, w, int, wbuf, NWORDS, 0) 408*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(d, d, long long, dbuf, NDOBLS, 0) 409*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(bnew, b, unsigned char, bbuf, NBYTES, 0) 410*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(hnew, h, short, hbuf, NHALFS, 0) 411*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(wnew, w, int, wbuf, NWORDS, 0) 412*46ef47e2STaylor Simpson 413*46ef47e2STaylor Simpson /* Test the old scheme used in Hexagon V3 */ 414*46ef47e2STaylor Simpson static void circ_test_v3(void) 415*46ef47e2STaylor Simpson { 416*46ef47e2STaylor Simpson int *p = wbuf; 417*46ef47e2STaylor Simpson int size = 15; 418*46ef47e2STaylor Simpson int K = 4; /* 64 bytes */ 419*46ef47e2STaylor Simpson int element; 420*46ef47e2STaylor Simpson int i; 421*46ef47e2STaylor Simpson 422*46ef47e2STaylor Simpson init_wbuf(); 423*46ef47e2STaylor Simpson 424*46ef47e2STaylor Simpson for (i = 0; i < NWORDS; i++) { 425*46ef47e2STaylor Simpson __asm__( 426*46ef47e2STaylor Simpson "r4 = %2\n\t" 427*46ef47e2STaylor Simpson "m1 = r4\n\t" 428*46ef47e2STaylor Simpson "%0 = memw(%1++I:circ(M1))\n\t" 429*46ef47e2STaylor Simpson : "=r"(element), "+r"(p) 430*46ef47e2STaylor Simpson : "r"(build_mreg(1, K, size * sizeof(int))) 431*46ef47e2STaylor Simpson : "r4", "m1"); 432*46ef47e2STaylor Simpson DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element); 433*46ef47e2STaylor Simpson check_load(i, element, 1, size); 434*46ef47e2STaylor Simpson } 435*46ef47e2STaylor Simpson } 436*46ef47e2STaylor Simpson 437*46ef47e2STaylor Simpson int main() 438*46ef47e2STaylor Simpson { 439*46ef47e2STaylor Simpson init_bbuf(); 440*46ef47e2STaylor Simpson init_hbuf(); 441*46ef47e2STaylor Simpson init_wbuf(); 442*46ef47e2STaylor Simpson init_dbuf(); 443*46ef47e2STaylor Simpson 444*46ef47e2STaylor Simpson DEBUG_PRINTF("NBYTES = %d\n", NBYTES); 445*46ef47e2STaylor Simpson DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf); 446*46ef47e2STaylor Simpson DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf); 447*46ef47e2STaylor Simpson DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf); 448*46ef47e2STaylor Simpson DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf); 449*46ef47e2STaylor Simpson 450*46ef47e2STaylor Simpson circ_test_load_imm_b(); 451*46ef47e2STaylor Simpson circ_test_load_imm_ub(); 452*46ef47e2STaylor Simpson circ_test_load_imm_h(); 453*46ef47e2STaylor Simpson circ_test_load_imm_uh(); 454*46ef47e2STaylor Simpson circ_test_load_imm_w(); 455*46ef47e2STaylor Simpson circ_test_load_imm_d(); 456*46ef47e2STaylor Simpson 457*46ef47e2STaylor Simpson circ_test_load_reg_b(); 458*46ef47e2STaylor Simpson circ_test_load_reg_ub(); 459*46ef47e2STaylor Simpson circ_test_load_reg_h(); 460*46ef47e2STaylor Simpson circ_test_load_reg_uh(); 461*46ef47e2STaylor Simpson circ_test_load_reg_w(); 462*46ef47e2STaylor Simpson circ_test_load_reg_d(); 463*46ef47e2STaylor Simpson 464*46ef47e2STaylor Simpson circ_test_store_imm_b(); 465*46ef47e2STaylor Simpson circ_test_store_imm_h(); 466*46ef47e2STaylor Simpson circ_test_store_imm_f(); 467*46ef47e2STaylor Simpson circ_test_store_imm_w(); 468*46ef47e2STaylor Simpson circ_test_store_imm_d(); 469*46ef47e2STaylor Simpson circ_test_store_imm_bnew(); 470*46ef47e2STaylor Simpson circ_test_store_imm_hnew(); 471*46ef47e2STaylor Simpson circ_test_store_imm_wnew(); 472*46ef47e2STaylor Simpson 473*46ef47e2STaylor Simpson circ_test_store_reg_b(); 474*46ef47e2STaylor Simpson circ_test_store_reg_h(); 475*46ef47e2STaylor Simpson circ_test_store_reg_f(); 476*46ef47e2STaylor Simpson circ_test_store_reg_w(); 477*46ef47e2STaylor Simpson circ_test_store_reg_d(); 478*46ef47e2STaylor Simpson circ_test_store_reg_bnew(); 479*46ef47e2STaylor Simpson circ_test_store_reg_hnew(); 480*46ef47e2STaylor Simpson circ_test_store_reg_wnew(); 481*46ef47e2STaylor Simpson 482*46ef47e2STaylor Simpson circ_test_v3(); 483*46ef47e2STaylor Simpson 484*46ef47e2STaylor Simpson puts(err ? "FAIL" : "PASS"); 485*46ef47e2STaylor Simpson return err ? 1 : 0; 486*46ef47e2STaylor Simpson } 487