xref: /openbmc/qemu/tests/tcg/hexagon/circ.c (revision 9c9fff18c45b54fd9adf2282323aab1b6f0ec866)
146ef47e2STaylor Simpson /*
258ff2981SMichael Lambert  *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
346ef47e2STaylor Simpson  *
446ef47e2STaylor Simpson  *  This program is free software; you can redistribute it and/or modify
546ef47e2STaylor Simpson  *  it under the terms of the GNU General Public License as published by
646ef47e2STaylor Simpson  *  the Free Software Foundation; either version 2 of the License, or
746ef47e2STaylor Simpson  *  (at your option) any later version.
846ef47e2STaylor Simpson  *
946ef47e2STaylor Simpson  *  This program is distributed in the hope that it will be useful,
1046ef47e2STaylor Simpson  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
1146ef47e2STaylor Simpson  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1246ef47e2STaylor Simpson  *  GNU General Public License for more details.
1346ef47e2STaylor Simpson  *
1446ef47e2STaylor Simpson  *  You should have received a copy of the GNU General Public License
1546ef47e2STaylor Simpson  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
1646ef47e2STaylor Simpson  */
1746ef47e2STaylor Simpson 
1846ef47e2STaylor Simpson #include <stdio.h>
19*0d57cd61STaylor Simpson #include <stdint.h>
20*0d57cd61STaylor Simpson 
21*0d57cd61STaylor Simpson int err;
22*0d57cd61STaylor Simpson 
23*0d57cd61STaylor Simpson #include "hex_test.h"
2446ef47e2STaylor Simpson 
2546ef47e2STaylor Simpson #define DEBUG          0
2646ef47e2STaylor Simpson #define DEBUG_PRINTF(...) \
2746ef47e2STaylor Simpson     do { \
2846ef47e2STaylor Simpson         if (DEBUG) { \
2946ef47e2STaylor Simpson             printf(__VA_ARGS__); \
3046ef47e2STaylor Simpson         } \
3146ef47e2STaylor Simpson     } while (0)
3246ef47e2STaylor Simpson 
3346ef47e2STaylor Simpson 
3446ef47e2STaylor Simpson #define NBYTES         (1 << 8)
3546ef47e2STaylor Simpson #define NHALFS         (NBYTES / sizeof(short))
3646ef47e2STaylor Simpson #define NWORDS         (NBYTES / sizeof(int))
3746ef47e2STaylor Simpson #define NDOBLS         (NBYTES / sizeof(long long))
3846ef47e2STaylor Simpson 
39*0d57cd61STaylor Simpson int64_t       dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0};
40*0d57cd61STaylor Simpson int32_t       wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0};
41*0d57cd61STaylor Simpson int16_t       hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0};
42*0d57cd61STaylor Simpson uint8_t       bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0};
4346ef47e2STaylor Simpson 
4446ef47e2STaylor Simpson /*
4546ef47e2STaylor Simpson  * We use the C preporcessor to deal with the combinations of types
4646ef47e2STaylor Simpson  */
4746ef47e2STaylor Simpson 
4846ef47e2STaylor Simpson #define INIT(BUF, N) \
4946ef47e2STaylor Simpson     void init_##BUF(void) \
5046ef47e2STaylor Simpson     { \
51*0d57cd61STaylor Simpson         for (int i = 0; i < N; i++) { \
5246ef47e2STaylor Simpson             BUF[i] = i; \
5346ef47e2STaylor Simpson         } \
5446ef47e2STaylor Simpson     } \
5546ef47e2STaylor Simpson 
INIT(bbuf,NBYTES)5646ef47e2STaylor Simpson INIT(bbuf, NBYTES)
5746ef47e2STaylor Simpson INIT(hbuf, NHALFS)
5846ef47e2STaylor Simpson INIT(wbuf, NWORDS)
5946ef47e2STaylor Simpson INIT(dbuf, NDOBLS)
6046ef47e2STaylor Simpson 
6146ef47e2STaylor Simpson /*
6246ef47e2STaylor Simpson  * Macros for performing circular load
6346ef47e2STaylor Simpson  *     RES         result
6446ef47e2STaylor Simpson  *     ADDR        address
6546ef47e2STaylor Simpson  *     START       start address of buffer
6646ef47e2STaylor Simpson  *     LEN         length of buffer (in bytes)
6746ef47e2STaylor Simpson  *     INC         address increment (in bytes for IMM, elements for REG)
6846ef47e2STaylor Simpson  */
6946ef47e2STaylor Simpson #define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \
7046ef47e2STaylor Simpson     __asm__( \
7146ef47e2STaylor Simpson         "r4 = %3\n\t" \
7246ef47e2STaylor Simpson         "m0 = r4\n\t" \
7346ef47e2STaylor Simpson         "cs0 = %2\n\t" \
7446ef47e2STaylor Simpson         "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \
7546ef47e2STaylor Simpson         : "=r"(RES), "+r"(ADDR) \
7646ef47e2STaylor Simpson         : "r"(START), "r"(LEN) \
7746ef47e2STaylor Simpson         : "r4", "m0", "cs0")
7846ef47e2STaylor Simpson #define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \
7946ef47e2STaylor Simpson     CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC)
8046ef47e2STaylor Simpson #define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \
8146ef47e2STaylor Simpson     CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC)
8246ef47e2STaylor Simpson #define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \
8346ef47e2STaylor Simpson     CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC)
8446ef47e2STaylor Simpson #define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \
8546ef47e2STaylor Simpson     CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC)
8646ef47e2STaylor Simpson #define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \
8746ef47e2STaylor Simpson     CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC)
8846ef47e2STaylor Simpson #define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \
8946ef47e2STaylor Simpson     CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC)
9046ef47e2STaylor Simpson 
9146ef47e2STaylor Simpson /*
9246ef47e2STaylor Simpson  * The mreg has the following pieces
9346ef47e2STaylor Simpson  *     mreg[31:28]              increment[10:7]
9446ef47e2STaylor Simpson  *     mreg[27:24]              K value (used Hexagon v3 and earlier)
9546ef47e2STaylor Simpson  *     mreg[23:17]              increment[6:0]
9646ef47e2STaylor Simpson  *     mreg[16:0]               circular buffer length
9746ef47e2STaylor Simpson  */
98*0d57cd61STaylor Simpson static int32_t build_mreg(int32_t inc, int32_t K, int32_t len)
9946ef47e2STaylor Simpson {
10046ef47e2STaylor Simpson     return ((inc & 0x780) << 21) |
10146ef47e2STaylor Simpson            ((K & 0xf) << 24) |
10246ef47e2STaylor Simpson            ((inc & 0x7f) << 17) |
10346ef47e2STaylor Simpson            (len & 0x1ffff);
10446ef47e2STaylor Simpson }
10546ef47e2STaylor Simpson 
10646ef47e2STaylor Simpson #define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \
10746ef47e2STaylor Simpson     __asm__( \
10846ef47e2STaylor Simpson         "r4 = %2\n\t" \
10946ef47e2STaylor Simpson         "m1 = r4\n\t" \
11046ef47e2STaylor Simpson         "cs1 = %3\n\t" \
11146ef47e2STaylor Simpson         "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \
11246ef47e2STaylor Simpson         : "=r"(RES), "+r"(ADDR) \
11346ef47e2STaylor Simpson         : "r"(build_mreg((INC), 0, (LEN))), \
11446ef47e2STaylor Simpson           "r"(START) \
11546ef47e2STaylor Simpson         : "r4", "m1", "cs1")
11646ef47e2STaylor Simpson #define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \
11746ef47e2STaylor Simpson     CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC)
11846ef47e2STaylor Simpson #define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \
11946ef47e2STaylor Simpson     CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC)
12046ef47e2STaylor Simpson #define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \
12146ef47e2STaylor Simpson     CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC)
12246ef47e2STaylor Simpson #define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \
12346ef47e2STaylor Simpson     CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC)
12446ef47e2STaylor Simpson #define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \
12546ef47e2STaylor Simpson     CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC)
12646ef47e2STaylor Simpson #define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \
12746ef47e2STaylor Simpson     CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC)
12846ef47e2STaylor Simpson 
12946ef47e2STaylor Simpson /*
13046ef47e2STaylor Simpson  * Macros for performing circular store
13146ef47e2STaylor Simpson  *     VAL         value to store
13246ef47e2STaylor Simpson  *     ADDR        address
13346ef47e2STaylor Simpson  *     START       start address of buffer
13446ef47e2STaylor Simpson  *     LEN         length of buffer (in bytes)
13546ef47e2STaylor Simpson  *     INC         address increment (in bytes for IMM, elements for REG)
13646ef47e2STaylor Simpson  */
13746ef47e2STaylor Simpson #define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \
13846ef47e2STaylor Simpson     __asm__( \
13946ef47e2STaylor Simpson         "r4 = %3\n\t" \
14046ef47e2STaylor Simpson         "m0 = r4\n\t" \
14146ef47e2STaylor Simpson         "cs0 = %1\n\t" \
14246ef47e2STaylor Simpson         "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \
14346ef47e2STaylor Simpson         : "+r"(ADDR) \
14446ef47e2STaylor Simpson         : "r"(START), "r"(VAL), "r"(LEN) \
14546ef47e2STaylor Simpson         : "r4", "m0", "cs0", "memory")
14646ef47e2STaylor Simpson #define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \
14746ef47e2STaylor Simpson     CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC)
14846ef47e2STaylor Simpson #define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \
14946ef47e2STaylor Simpson     CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC)
15046ef47e2STaylor Simpson #define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \
15146ef47e2STaylor Simpson     CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC)
15246ef47e2STaylor Simpson #define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \
15346ef47e2STaylor Simpson     CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC)
15446ef47e2STaylor Simpson #define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \
15546ef47e2STaylor Simpson     CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC)
15646ef47e2STaylor Simpson 
15746ef47e2STaylor Simpson #define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \
15846ef47e2STaylor Simpson     __asm__( \
15946ef47e2STaylor Simpson         "r4 = %3\n\t" \
16046ef47e2STaylor Simpson         "m0 = r4\n\t" \
16146ef47e2STaylor Simpson         "cs0 = %1\n\t" \
16246ef47e2STaylor Simpson         "{\n\t" \
16346ef47e2STaylor Simpson         "    r5 = %2\n\t" \
16446ef47e2STaylor Simpson         "    mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \
16546ef47e2STaylor Simpson         "}\n\t" \
16646ef47e2STaylor Simpson         : "+r"(ADDR) \
16746ef47e2STaylor Simpson         : "r"(START), "r"(VAL), "r"(LEN) \
16846ef47e2STaylor Simpson         : "r4", "r5", "m0", "cs0", "memory")
16946ef47e2STaylor Simpson #define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \
17046ef47e2STaylor Simpson     CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC)
17146ef47e2STaylor Simpson #define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \
17246ef47e2STaylor Simpson     CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC)
17346ef47e2STaylor Simpson #define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \
17446ef47e2STaylor Simpson     CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC)
17546ef47e2STaylor Simpson 
17646ef47e2STaylor Simpson #define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \
17746ef47e2STaylor Simpson     __asm__( \
17846ef47e2STaylor Simpson         "r4 = %1\n\t" \
17946ef47e2STaylor Simpson         "m1 = r4\n\t" \
18046ef47e2STaylor Simpson         "cs1 = %2\n\t" \
18146ef47e2STaylor Simpson         "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \
18246ef47e2STaylor Simpson         : "+r"(ADDR) \
18346ef47e2STaylor Simpson         : "r"(build_mreg((INC), 0, (LEN))), \
18446ef47e2STaylor Simpson           "r"(START), \
18546ef47e2STaylor Simpson           "r"(VAL) \
18646ef47e2STaylor Simpson         : "r4", "m1", "cs1", "memory")
18746ef47e2STaylor Simpson #define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \
18846ef47e2STaylor Simpson     CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC)
18946ef47e2STaylor Simpson #define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \
19046ef47e2STaylor Simpson     CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC)
19146ef47e2STaylor Simpson #define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \
19246ef47e2STaylor Simpson     CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC)
19346ef47e2STaylor Simpson #define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \
19446ef47e2STaylor Simpson     CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC)
19546ef47e2STaylor Simpson #define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \
19646ef47e2STaylor Simpson     CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC)
19746ef47e2STaylor Simpson 
19846ef47e2STaylor Simpson #define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \
19946ef47e2STaylor Simpson     __asm__( \
20046ef47e2STaylor Simpson         "r4 = %1\n\t" \
20146ef47e2STaylor Simpson         "m1 = r4\n\t" \
20246ef47e2STaylor Simpson         "cs1 = %2\n\t" \
20346ef47e2STaylor Simpson         "{\n\t" \
20446ef47e2STaylor Simpson         "    r5 = %3\n\t" \
20546ef47e2STaylor Simpson         "    mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \
20646ef47e2STaylor Simpson         "}\n\t" \
20746ef47e2STaylor Simpson         : "+r"(ADDR) \
20846ef47e2STaylor Simpson         : "r"(build_mreg((INC), 0, (LEN))), \
20946ef47e2STaylor Simpson           "r"(START), \
21046ef47e2STaylor Simpson           "r"(VAL) \
21146ef47e2STaylor Simpson         : "r4", "r5", "m1", "cs1", "memory")
21246ef47e2STaylor Simpson #define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \
21346ef47e2STaylor Simpson     CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC)
21446ef47e2STaylor Simpson #define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \
21546ef47e2STaylor Simpson     CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC)
21646ef47e2STaylor Simpson #define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \
21746ef47e2STaylor Simpson     CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC)
21846ef47e2STaylor Simpson 
21946ef47e2STaylor Simpson 
22046ef47e2STaylor Simpson /* We'll test increments +1 and -1 */
__check_load(int line,int32_t i,int64_t res,int32_t inc,int32_t size)221*0d57cd61STaylor Simpson void __check_load(int line, int32_t i, int64_t res, int32_t inc, int32_t size)
22246ef47e2STaylor Simpson {
223*0d57cd61STaylor Simpson     int32_t expect = (i * inc);
22446ef47e2STaylor Simpson     while (expect >= size) {
22546ef47e2STaylor Simpson         expect -= size;
22646ef47e2STaylor Simpson     }
22746ef47e2STaylor Simpson     while (expect < 0) {
22846ef47e2STaylor Simpson         expect += size;
22946ef47e2STaylor Simpson     }
230*0d57cd61STaylor Simpson     __check32(line, res, expect);
23146ef47e2STaylor Simpson }
232*0d57cd61STaylor Simpson 
233*0d57cd61STaylor Simpson #define check_load(I, RES, INC, SZ) __check_load(__LINE__, I, RES, INC, SZ)
23446ef47e2STaylor Simpson 
23546ef47e2STaylor Simpson #define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \
23646ef47e2STaylor Simpson void circ_test_load_imm_##SZ(void) \
23746ef47e2STaylor Simpson { \
23846ef47e2STaylor Simpson     TYPE *p = (TYPE *)BUF; \
239*0d57cd61STaylor Simpson     int32_t size = 10; \
240*0d57cd61STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
24146ef47e2STaylor Simpson         TYPE element; \
24246ef47e2STaylor Simpson         CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \
24346ef47e2STaylor Simpson         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
24446ef47e2STaylor Simpson                      i, p, element); \
24546ef47e2STaylor Simpson         check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \
24646ef47e2STaylor Simpson     } \
24746ef47e2STaylor Simpson     p = (TYPE *)BUF; \
248*0d57cd61STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
24946ef47e2STaylor Simpson         TYPE element; \
25046ef47e2STaylor Simpson         CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \
25146ef47e2STaylor Simpson         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
25246ef47e2STaylor Simpson                      i, p, element); \
25346ef47e2STaylor Simpson         check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \
25446ef47e2STaylor Simpson     } \
25546ef47e2STaylor Simpson }
25646ef47e2STaylor Simpson 
257*0d57cd61STaylor Simpson TEST_LOAD_IMM(b,  int8_t,         bbuf, NBYTES, 1, d)
258*0d57cd61STaylor Simpson TEST_LOAD_IMM(ub, uint8_t,        bbuf, NBYTES, 1, d)
259*0d57cd61STaylor Simpson TEST_LOAD_IMM(h,  int16_t,        hbuf, NHALFS, 2, d)
260*0d57cd61STaylor Simpson TEST_LOAD_IMM(uh, uint16_t,       hbuf, NHALFS, 2, d)
261*0d57cd61STaylor Simpson TEST_LOAD_IMM(w,  int32_t,        wbuf, NWORDS, 4, d)
262*0d57cd61STaylor Simpson TEST_LOAD_IMM(d,  int64_t,        dbuf, NDOBLS, 8, lld)
26346ef47e2STaylor Simpson 
26446ef47e2STaylor Simpson #define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \
26546ef47e2STaylor Simpson void circ_test_load_reg_##SZ(void) \
26646ef47e2STaylor Simpson { \
26746ef47e2STaylor Simpson     TYPE *p = (TYPE *)BUF; \
268*0d57cd61STaylor Simpson     int32_t size = 13; \
269*0d57cd61STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
27046ef47e2STaylor Simpson         TYPE element; \
27146ef47e2STaylor Simpson         CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \
27246ef47e2STaylor Simpson         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
27346ef47e2STaylor Simpson                      i, p, element); \
27446ef47e2STaylor Simpson         check_load(i, element, 1, size); \
27546ef47e2STaylor Simpson     } \
27646ef47e2STaylor Simpson     p = (TYPE *)BUF; \
277*0d57cd61STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
27846ef47e2STaylor Simpson         TYPE element; \
27946ef47e2STaylor Simpson         CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \
28046ef47e2STaylor Simpson         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
28146ef47e2STaylor Simpson                      i, p, element); \
28246ef47e2STaylor Simpson         check_load(i, element, -1, size); \
28346ef47e2STaylor Simpson     } \
28446ef47e2STaylor Simpson }
28546ef47e2STaylor Simpson 
TEST_LOAD_REG(b,int8_t,bbuf,NBYTES,d)286*0d57cd61STaylor Simpson TEST_LOAD_REG(b,  int8_t,         bbuf, NBYTES, d)
287*0d57cd61STaylor Simpson TEST_LOAD_REG(ub, uint8_t,        bbuf, NBYTES, d)
288*0d57cd61STaylor Simpson TEST_LOAD_REG(h,  int16_t,        hbuf, NHALFS, d)
289*0d57cd61STaylor Simpson TEST_LOAD_REG(uh, uint16_t,       hbuf, NHALFS, d)
290*0d57cd61STaylor Simpson TEST_LOAD_REG(w,  int32_t,        wbuf, NWORDS, d)
291*0d57cd61STaylor Simpson TEST_LOAD_REG(d,  int64_t,        dbuf, NDOBLS, lld)
29246ef47e2STaylor Simpson 
29346ef47e2STaylor Simpson /* The circular stores will wrap around somewhere inside the buffer */
29446ef47e2STaylor Simpson #define CIRC_VAL(SZ, TYPE, BUFSIZE) \
295*0d57cd61STaylor Simpson TYPE circ_val_##SZ(int i, int32_t inc, int32_t size) \
29646ef47e2STaylor Simpson { \
29746ef47e2STaylor Simpson     int mod = BUFSIZE % size; \
29846ef47e2STaylor Simpson     int elem = i * inc; \
29946ef47e2STaylor Simpson     if (elem < 0) { \
30046ef47e2STaylor Simpson         if (-elem <= size - mod) { \
30146ef47e2STaylor Simpson             return (elem + BUFSIZE - mod); \
30246ef47e2STaylor Simpson         } else { \
30346ef47e2STaylor Simpson             return (elem + BUFSIZE + size - mod); \
30446ef47e2STaylor Simpson         } \
30546ef47e2STaylor Simpson     } else if (elem < mod) {\
30646ef47e2STaylor Simpson         return (elem + BUFSIZE - mod); \
30746ef47e2STaylor Simpson     } else { \
30846ef47e2STaylor Simpson         return (elem + BUFSIZE - size - mod); \
30946ef47e2STaylor Simpson     } \
31046ef47e2STaylor Simpson }
31146ef47e2STaylor Simpson 
312*0d57cd61STaylor Simpson CIRC_VAL(b, uint8_t,       NBYTES)
313*0d57cd61STaylor Simpson CIRC_VAL(h, int16_t,       NHALFS)
314*0d57cd61STaylor Simpson CIRC_VAL(w, int32_t,       NWORDS)
315*0d57cd61STaylor Simpson CIRC_VAL(d, int64_t,       NDOBLS)
31646ef47e2STaylor Simpson 
31746ef47e2STaylor Simpson /*
31846ef47e2STaylor Simpson  * Circular stores should only write to the first "size" elements of the buffer
31946ef47e2STaylor Simpson  * the remainder of the elements should have BUF[i] == i
32046ef47e2STaylor Simpson  */
32146ef47e2STaylor Simpson #define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \
322*0d57cd61STaylor Simpson void check_store_##SZ(int32_t inc, int32_t size) \
32346ef47e2STaylor Simpson { \
324*0d57cd61STaylor Simpson     for (int i = 0; i < size; i++) { \
32546ef47e2STaylor Simpson         DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \
32646ef47e2STaylor Simpson                      i, BUF[i], circ_val_##SZ(i, inc, size)); \
327*0d57cd61STaylor Simpson         check64(BUF[i], circ_val_##SZ(i, inc, size)); \
32846ef47e2STaylor Simpson     } \
329*0d57cd61STaylor Simpson     for (int i = size; i < BUFSIZE; i++) { \
330*0d57cd61STaylor Simpson         check64(BUF[i], i); \
33146ef47e2STaylor Simpson     } \
33246ef47e2STaylor Simpson }
33346ef47e2STaylor Simpson 
33446ef47e2STaylor Simpson CHECK_STORE(b, bbuf, NBYTES, x)
33546ef47e2STaylor Simpson CHECK_STORE(h, hbuf, NHALFS, x)
33646ef47e2STaylor Simpson CHECK_STORE(w, wbuf, NWORDS, x)
33746ef47e2STaylor Simpson CHECK_STORE(d, dbuf, NDOBLS, llx)
33846ef47e2STaylor Simpson 
33946ef47e2STaylor Simpson #define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \
34046ef47e2STaylor Simpson void circ_test_store_imm_##SZ(void) \
34146ef47e2STaylor Simpson { \
342*0d57cd61STaylor Simpson     uint32_t size = 27; \
34346ef47e2STaylor Simpson     TYPE *p = BUF; \
34446ef47e2STaylor Simpson     TYPE val = 0; \
34546ef47e2STaylor Simpson     init_##BUF(); \
346*0d57cd61STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
34746ef47e2STaylor Simpson         CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \
34846ef47e2STaylor Simpson         val++; \
34946ef47e2STaylor Simpson     } \
35046ef47e2STaylor Simpson     check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \
35146ef47e2STaylor Simpson     p = BUF; \
35246ef47e2STaylor Simpson     val = 0; \
35346ef47e2STaylor Simpson     init_##BUF(); \
354*0d57cd61STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
35546ef47e2STaylor Simpson         CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \
35646ef47e2STaylor Simpson                             -(INC)); \
35746ef47e2STaylor Simpson         val++; \
35846ef47e2STaylor Simpson     } \
35946ef47e2STaylor Simpson     check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \
36046ef47e2STaylor Simpson }
36146ef47e2STaylor Simpson 
362*0d57cd61STaylor Simpson CIRC_TEST_STORE_IMM(b,    b, uint8_t,       bbuf, NBYTES, 0,  1)
363*0d57cd61STaylor Simpson CIRC_TEST_STORE_IMM(h,    h, int16_t,       hbuf, NHALFS, 0,  2)
364*0d57cd61STaylor Simpson CIRC_TEST_STORE_IMM(f,    h, int16_t,       hbuf, NHALFS, 16, 2)
365*0d57cd61STaylor Simpson CIRC_TEST_STORE_IMM(w,    w, int32_t,       wbuf, NWORDS, 0,  4)
366*0d57cd61STaylor Simpson CIRC_TEST_STORE_IMM(d,    d, int64_t,       dbuf, NDOBLS, 0,  8)
367*0d57cd61STaylor Simpson CIRC_TEST_STORE_IMM(bnew, b, uint8_t,       bbuf, NBYTES, 0,  1)
368*0d57cd61STaylor Simpson CIRC_TEST_STORE_IMM(hnew, h, int16_t,       hbuf, NHALFS, 0,  2)
369*0d57cd61STaylor Simpson CIRC_TEST_STORE_IMM(wnew, w, int32_t,       wbuf, NWORDS, 0,  4)
37046ef47e2STaylor Simpson 
37146ef47e2STaylor Simpson #define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \
37246ef47e2STaylor Simpson void circ_test_store_reg_##SZ(void) \
37346ef47e2STaylor Simpson { \
37446ef47e2STaylor Simpson     TYPE *p = BUF; \
375*0d57cd61STaylor Simpson     uint32_t size = 19; \
37646ef47e2STaylor Simpson     TYPE val = 0; \
37746ef47e2STaylor Simpson     init_##BUF(); \
378*0d57cd61STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
37946ef47e2STaylor Simpson         CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \
38046ef47e2STaylor Simpson         val++; \
38146ef47e2STaylor Simpson     } \
38246ef47e2STaylor Simpson     check_store_##CHK(1, size); \
38346ef47e2STaylor Simpson     p = BUF; \
38446ef47e2STaylor Simpson     val = 0; \
38546ef47e2STaylor Simpson     init_##BUF(); \
386*0d57cd61STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
38746ef47e2STaylor Simpson         CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \
38846ef47e2STaylor Simpson         val++; \
38946ef47e2STaylor Simpson     } \
39046ef47e2STaylor Simpson     check_store_##CHK(-1, size); \
39146ef47e2STaylor Simpson }
39246ef47e2STaylor Simpson 
393*0d57cd61STaylor Simpson CIRC_TEST_STORE_REG(b,    b, uint8_t,       bbuf, NBYTES, 0)
394*0d57cd61STaylor Simpson CIRC_TEST_STORE_REG(h,    h, int16_t,       hbuf, NHALFS, 0)
395*0d57cd61STaylor Simpson CIRC_TEST_STORE_REG(f,    h, int16_t,       hbuf, NHALFS, 16)
396*0d57cd61STaylor Simpson CIRC_TEST_STORE_REG(w,    w, int32_t,       wbuf, NWORDS, 0)
397*0d57cd61STaylor Simpson CIRC_TEST_STORE_REG(d,    d, int64_t,       dbuf, NDOBLS, 0)
398*0d57cd61STaylor Simpson CIRC_TEST_STORE_REG(bnew, b, uint8_t,       bbuf, NBYTES, 0)
399*0d57cd61STaylor Simpson CIRC_TEST_STORE_REG(hnew, h, int16_t,       hbuf, NHALFS, 0)
400*0d57cd61STaylor Simpson CIRC_TEST_STORE_REG(wnew, w, int32_t,       wbuf, NWORDS, 0)
40146ef47e2STaylor Simpson 
40246ef47e2STaylor Simpson /* Test the old scheme used in Hexagon V3 */
40346ef47e2STaylor Simpson static void circ_test_v3(void)
40446ef47e2STaylor Simpson {
40546ef47e2STaylor Simpson     int *p = wbuf;
406*0d57cd61STaylor Simpson     int32_t size = 15;
40758ff2981SMichael Lambert     /* set high bit in K to test unsigned extract in fcirc */
408*0d57cd61STaylor Simpson     int32_t K = 8;      /* 1024 bytes */
409*0d57cd61STaylor Simpson     int32_t element;
41046ef47e2STaylor Simpson 
41146ef47e2STaylor Simpson     init_wbuf();
41246ef47e2STaylor Simpson 
413*0d57cd61STaylor Simpson     for (int i = 0; i < NWORDS; i++) {
41446ef47e2STaylor Simpson         __asm__(
41546ef47e2STaylor Simpson             "r4 = %2\n\t"
41646ef47e2STaylor Simpson             "m1 = r4\n\t"
41746ef47e2STaylor Simpson             "%0 = memw(%1++I:circ(M1))\n\t"
41846ef47e2STaylor Simpson             : "=r"(element), "+r"(p)
41946ef47e2STaylor Simpson             : "r"(build_mreg(1, K, size * sizeof(int)))
42046ef47e2STaylor Simpson             : "r4", "m1");
42146ef47e2STaylor Simpson         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element);
42246ef47e2STaylor Simpson         check_load(i, element, 1, size);
42346ef47e2STaylor Simpson     }
42446ef47e2STaylor Simpson }
42546ef47e2STaylor Simpson 
main()42646ef47e2STaylor Simpson int main()
42746ef47e2STaylor Simpson {
42846ef47e2STaylor Simpson     init_bbuf();
42946ef47e2STaylor Simpson     init_hbuf();
43046ef47e2STaylor Simpson     init_wbuf();
43146ef47e2STaylor Simpson     init_dbuf();
43246ef47e2STaylor Simpson 
43346ef47e2STaylor Simpson     DEBUG_PRINTF("NBYTES = %d\n", NBYTES);
43446ef47e2STaylor Simpson     DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf);
43546ef47e2STaylor Simpson     DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf);
43646ef47e2STaylor Simpson     DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf);
43746ef47e2STaylor Simpson     DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf);
43846ef47e2STaylor Simpson 
43946ef47e2STaylor Simpson     circ_test_load_imm_b();
44046ef47e2STaylor Simpson     circ_test_load_imm_ub();
44146ef47e2STaylor Simpson     circ_test_load_imm_h();
44246ef47e2STaylor Simpson     circ_test_load_imm_uh();
44346ef47e2STaylor Simpson     circ_test_load_imm_w();
44446ef47e2STaylor Simpson     circ_test_load_imm_d();
44546ef47e2STaylor Simpson 
44646ef47e2STaylor Simpson     circ_test_load_reg_b();
44746ef47e2STaylor Simpson     circ_test_load_reg_ub();
44846ef47e2STaylor Simpson     circ_test_load_reg_h();
44946ef47e2STaylor Simpson     circ_test_load_reg_uh();
45046ef47e2STaylor Simpson     circ_test_load_reg_w();
45146ef47e2STaylor Simpson     circ_test_load_reg_d();
45246ef47e2STaylor Simpson 
45346ef47e2STaylor Simpson     circ_test_store_imm_b();
45446ef47e2STaylor Simpson     circ_test_store_imm_h();
45546ef47e2STaylor Simpson     circ_test_store_imm_f();
45646ef47e2STaylor Simpson     circ_test_store_imm_w();
45746ef47e2STaylor Simpson     circ_test_store_imm_d();
45846ef47e2STaylor Simpson     circ_test_store_imm_bnew();
45946ef47e2STaylor Simpson     circ_test_store_imm_hnew();
46046ef47e2STaylor Simpson     circ_test_store_imm_wnew();
46146ef47e2STaylor Simpson 
46246ef47e2STaylor Simpson     circ_test_store_reg_b();
46346ef47e2STaylor Simpson     circ_test_store_reg_h();
46446ef47e2STaylor Simpson     circ_test_store_reg_f();
46546ef47e2STaylor Simpson     circ_test_store_reg_w();
46646ef47e2STaylor Simpson     circ_test_store_reg_d();
46746ef47e2STaylor Simpson     circ_test_store_reg_bnew();
46846ef47e2STaylor Simpson     circ_test_store_reg_hnew();
46946ef47e2STaylor Simpson     circ_test_store_reg_wnew();
47046ef47e2STaylor Simpson 
47146ef47e2STaylor Simpson     circ_test_v3();
47246ef47e2STaylor Simpson 
47346ef47e2STaylor Simpson     puts(err ? "FAIL" : "PASS");
47446ef47e2STaylor Simpson     return err ? 1 : 0;
47546ef47e2STaylor Simpson }
476