xref: /openbmc/qemu/tests/tcg/hexagon/circ.c (revision 46ef47e2a77d1a34996964760b4a0d2b19476f25)
1*46ef47e2STaylor Simpson /*
2*46ef47e2STaylor Simpson  *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
3*46ef47e2STaylor Simpson  *
4*46ef47e2STaylor Simpson  *  This program is free software; you can redistribute it and/or modify
5*46ef47e2STaylor Simpson  *  it under the terms of the GNU General Public License as published by
6*46ef47e2STaylor Simpson  *  the Free Software Foundation; either version 2 of the License, or
7*46ef47e2STaylor Simpson  *  (at your option) any later version.
8*46ef47e2STaylor Simpson  *
9*46ef47e2STaylor Simpson  *  This program is distributed in the hope that it will be useful,
10*46ef47e2STaylor Simpson  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11*46ef47e2STaylor Simpson  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*46ef47e2STaylor Simpson  *  GNU General Public License for more details.
13*46ef47e2STaylor Simpson  *
14*46ef47e2STaylor Simpson  *  You should have received a copy of the GNU General Public License
15*46ef47e2STaylor Simpson  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16*46ef47e2STaylor Simpson  */
17*46ef47e2STaylor Simpson 
18*46ef47e2STaylor Simpson #include <stdio.h>
19*46ef47e2STaylor Simpson 
20*46ef47e2STaylor Simpson #define DEBUG          0
21*46ef47e2STaylor Simpson #define DEBUG_PRINTF(...) \
22*46ef47e2STaylor Simpson     do { \
23*46ef47e2STaylor Simpson         if (DEBUG) { \
24*46ef47e2STaylor Simpson             printf(__VA_ARGS__); \
25*46ef47e2STaylor Simpson         } \
26*46ef47e2STaylor Simpson     } while (0)
27*46ef47e2STaylor Simpson 
28*46ef47e2STaylor Simpson 
29*46ef47e2STaylor Simpson #define NBYTES         (1 << 8)
30*46ef47e2STaylor Simpson #define NHALFS         (NBYTES / sizeof(short))
31*46ef47e2STaylor Simpson #define NWORDS         (NBYTES / sizeof(int))
32*46ef47e2STaylor Simpson #define NDOBLS         (NBYTES / sizeof(long long))
33*46ef47e2STaylor Simpson 
34*46ef47e2STaylor Simpson long long     dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0};
35*46ef47e2STaylor Simpson int           wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0};
36*46ef47e2STaylor Simpson short         hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0};
37*46ef47e2STaylor Simpson unsigned char bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0};
38*46ef47e2STaylor Simpson 
39*46ef47e2STaylor Simpson /*
40*46ef47e2STaylor Simpson  * We use the C preporcessor to deal with the combinations of types
41*46ef47e2STaylor Simpson  */
42*46ef47e2STaylor Simpson 
43*46ef47e2STaylor Simpson #define INIT(BUF, N) \
44*46ef47e2STaylor Simpson     void init_##BUF(void) \
45*46ef47e2STaylor Simpson     { \
46*46ef47e2STaylor Simpson         int i; \
47*46ef47e2STaylor Simpson         for (i = 0; i < N; i++) { \
48*46ef47e2STaylor Simpson             BUF[i] = i; \
49*46ef47e2STaylor Simpson         } \
50*46ef47e2STaylor Simpson     } \
51*46ef47e2STaylor Simpson 
52*46ef47e2STaylor Simpson INIT(bbuf, NBYTES)
53*46ef47e2STaylor Simpson INIT(hbuf, NHALFS)
54*46ef47e2STaylor Simpson INIT(wbuf, NWORDS)
55*46ef47e2STaylor Simpson INIT(dbuf, NDOBLS)
56*46ef47e2STaylor Simpson 
57*46ef47e2STaylor Simpson /*
58*46ef47e2STaylor Simpson  * Macros for performing circular load
59*46ef47e2STaylor Simpson  *     RES         result
60*46ef47e2STaylor Simpson  *     ADDR        address
61*46ef47e2STaylor Simpson  *     START       start address of buffer
62*46ef47e2STaylor Simpson  *     LEN         length of buffer (in bytes)
63*46ef47e2STaylor Simpson  *     INC         address increment (in bytes for IMM, elements for REG)
64*46ef47e2STaylor Simpson  */
65*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \
66*46ef47e2STaylor Simpson     __asm__( \
67*46ef47e2STaylor Simpson         "r4 = %3\n\t" \
68*46ef47e2STaylor Simpson         "m0 = r4\n\t" \
69*46ef47e2STaylor Simpson         "cs0 = %2\n\t" \
70*46ef47e2STaylor Simpson         "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \
71*46ef47e2STaylor Simpson         : "=r"(RES), "+r"(ADDR) \
72*46ef47e2STaylor Simpson         : "r"(START), "r"(LEN) \
73*46ef47e2STaylor Simpson         : "r4", "m0", "cs0")
74*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \
75*46ef47e2STaylor Simpson     CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC)
76*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \
77*46ef47e2STaylor Simpson     CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC)
78*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \
79*46ef47e2STaylor Simpson     CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC)
80*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \
81*46ef47e2STaylor Simpson     CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC)
82*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \
83*46ef47e2STaylor Simpson     CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC)
84*46ef47e2STaylor Simpson #define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \
85*46ef47e2STaylor Simpson     CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC)
86*46ef47e2STaylor Simpson 
87*46ef47e2STaylor Simpson /*
88*46ef47e2STaylor Simpson  * The mreg has the following pieces
89*46ef47e2STaylor Simpson  *     mreg[31:28]              increment[10:7]
90*46ef47e2STaylor Simpson  *     mreg[27:24]              K value (used Hexagon v3 and earlier)
91*46ef47e2STaylor Simpson  *     mreg[23:17]              increment[6:0]
92*46ef47e2STaylor Simpson  *     mreg[16:0]               circular buffer length
93*46ef47e2STaylor Simpson  */
94*46ef47e2STaylor Simpson static int build_mreg(int inc, int K, int len)
95*46ef47e2STaylor Simpson {
96*46ef47e2STaylor Simpson     return ((inc & 0x780) << 21) |
97*46ef47e2STaylor Simpson            ((K & 0xf) << 24) |
98*46ef47e2STaylor Simpson            ((inc & 0x7f) << 17) |
99*46ef47e2STaylor Simpson            (len & 0x1ffff);
100*46ef47e2STaylor Simpson }
101*46ef47e2STaylor Simpson 
102*46ef47e2STaylor Simpson #define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \
103*46ef47e2STaylor Simpson     __asm__( \
104*46ef47e2STaylor Simpson         "r4 = %2\n\t" \
105*46ef47e2STaylor Simpson         "m1 = r4\n\t" \
106*46ef47e2STaylor Simpson         "cs1 = %3\n\t" \
107*46ef47e2STaylor Simpson         "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \
108*46ef47e2STaylor Simpson         : "=r"(RES), "+r"(ADDR) \
109*46ef47e2STaylor Simpson         : "r"(build_mreg((INC), 0, (LEN))), \
110*46ef47e2STaylor Simpson           "r"(START) \
111*46ef47e2STaylor Simpson         : "r4", "m1", "cs1")
112*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \
113*46ef47e2STaylor Simpson     CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC)
114*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \
115*46ef47e2STaylor Simpson     CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC)
116*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \
117*46ef47e2STaylor Simpson     CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC)
118*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \
119*46ef47e2STaylor Simpson     CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC)
120*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \
121*46ef47e2STaylor Simpson     CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC)
122*46ef47e2STaylor Simpson #define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \
123*46ef47e2STaylor Simpson     CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC)
124*46ef47e2STaylor Simpson 
125*46ef47e2STaylor Simpson /*
126*46ef47e2STaylor Simpson  * Macros for performing circular store
127*46ef47e2STaylor Simpson  *     VAL         value to store
128*46ef47e2STaylor Simpson  *     ADDR        address
129*46ef47e2STaylor Simpson  *     START       start address of buffer
130*46ef47e2STaylor Simpson  *     LEN         length of buffer (in bytes)
131*46ef47e2STaylor Simpson  *     INC         address increment (in bytes for IMM, elements for REG)
132*46ef47e2STaylor Simpson  */
133*46ef47e2STaylor Simpson #define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \
134*46ef47e2STaylor Simpson     __asm__( \
135*46ef47e2STaylor Simpson         "r4 = %3\n\t" \
136*46ef47e2STaylor Simpson         "m0 = r4\n\t" \
137*46ef47e2STaylor Simpson         "cs0 = %1\n\t" \
138*46ef47e2STaylor Simpson         "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \
139*46ef47e2STaylor Simpson         : "+r"(ADDR) \
140*46ef47e2STaylor Simpson         : "r"(START), "r"(VAL), "r"(LEN) \
141*46ef47e2STaylor Simpson         : "r4", "m0", "cs0", "memory")
142*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \
143*46ef47e2STaylor Simpson     CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC)
144*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \
145*46ef47e2STaylor Simpson     CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC)
146*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \
147*46ef47e2STaylor Simpson     CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC)
148*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \
149*46ef47e2STaylor Simpson     CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC)
150*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \
151*46ef47e2STaylor Simpson     CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC)
152*46ef47e2STaylor Simpson 
153*46ef47e2STaylor Simpson #define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \
154*46ef47e2STaylor Simpson     __asm__( \
155*46ef47e2STaylor Simpson         "r4 = %3\n\t" \
156*46ef47e2STaylor Simpson         "m0 = r4\n\t" \
157*46ef47e2STaylor Simpson         "cs0 = %1\n\t" \
158*46ef47e2STaylor Simpson         "{\n\t" \
159*46ef47e2STaylor Simpson         "    r5 = %2\n\t" \
160*46ef47e2STaylor Simpson         "    mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \
161*46ef47e2STaylor Simpson         "}\n\t" \
162*46ef47e2STaylor Simpson         : "+r"(ADDR) \
163*46ef47e2STaylor Simpson         : "r"(START), "r"(VAL), "r"(LEN) \
164*46ef47e2STaylor Simpson         : "r4", "r5", "m0", "cs0", "memory")
165*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \
166*46ef47e2STaylor Simpson     CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC)
167*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \
168*46ef47e2STaylor Simpson     CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC)
169*46ef47e2STaylor Simpson #define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \
170*46ef47e2STaylor Simpson     CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC)
171*46ef47e2STaylor Simpson 
172*46ef47e2STaylor Simpson #define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \
173*46ef47e2STaylor Simpson     __asm__( \
174*46ef47e2STaylor Simpson         "r4 = %1\n\t" \
175*46ef47e2STaylor Simpson         "m1 = r4\n\t" \
176*46ef47e2STaylor Simpson         "cs1 = %2\n\t" \
177*46ef47e2STaylor Simpson         "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \
178*46ef47e2STaylor Simpson         : "+r"(ADDR) \
179*46ef47e2STaylor Simpson         : "r"(build_mreg((INC), 0, (LEN))), \
180*46ef47e2STaylor Simpson           "r"(START), \
181*46ef47e2STaylor Simpson           "r"(VAL) \
182*46ef47e2STaylor Simpson         : "r4", "m1", "cs1", "memory")
183*46ef47e2STaylor Simpson #define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \
184*46ef47e2STaylor Simpson     CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC)
185*46ef47e2STaylor Simpson #define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \
186*46ef47e2STaylor Simpson     CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC)
187*46ef47e2STaylor Simpson #define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \
188*46ef47e2STaylor Simpson     CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC)
189*46ef47e2STaylor Simpson #define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \
190*46ef47e2STaylor Simpson     CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC)
191*46ef47e2STaylor Simpson #define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \
192*46ef47e2STaylor Simpson     CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC)
193*46ef47e2STaylor Simpson 
194*46ef47e2STaylor Simpson #define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \
195*46ef47e2STaylor Simpson     __asm__( \
196*46ef47e2STaylor Simpson         "r4 = %1\n\t" \
197*46ef47e2STaylor Simpson         "m1 = r4\n\t" \
198*46ef47e2STaylor Simpson         "cs1 = %2\n\t" \
199*46ef47e2STaylor Simpson         "{\n\t" \
200*46ef47e2STaylor Simpson         "    r5 = %3\n\t" \
201*46ef47e2STaylor Simpson         "    mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \
202*46ef47e2STaylor Simpson         "}\n\t" \
203*46ef47e2STaylor Simpson         : "+r"(ADDR) \
204*46ef47e2STaylor Simpson         : "r"(build_mreg((INC), 0, (LEN))), \
205*46ef47e2STaylor Simpson           "r"(START), \
206*46ef47e2STaylor Simpson           "r"(VAL) \
207*46ef47e2STaylor Simpson         : "r4", "r5", "m1", "cs1", "memory")
208*46ef47e2STaylor Simpson #define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \
209*46ef47e2STaylor Simpson     CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC)
210*46ef47e2STaylor Simpson #define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \
211*46ef47e2STaylor Simpson     CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC)
212*46ef47e2STaylor Simpson #define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \
213*46ef47e2STaylor Simpson     CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC)
214*46ef47e2STaylor Simpson 
215*46ef47e2STaylor Simpson 
216*46ef47e2STaylor Simpson int err;
217*46ef47e2STaylor Simpson 
218*46ef47e2STaylor Simpson /* We'll test increments +1 and -1 */
219*46ef47e2STaylor Simpson void check_load(int i, long long result, int inc, int size)
220*46ef47e2STaylor Simpson {
221*46ef47e2STaylor Simpson     int expect = (i * inc);
222*46ef47e2STaylor Simpson     while (expect >= size) {
223*46ef47e2STaylor Simpson         expect -= size;
224*46ef47e2STaylor Simpson     }
225*46ef47e2STaylor Simpson     while (expect < 0) {
226*46ef47e2STaylor Simpson         expect += size;
227*46ef47e2STaylor Simpson     }
228*46ef47e2STaylor Simpson     if (result != expect) {
229*46ef47e2STaylor Simpson         printf("ERROR(%d): %lld != %d\n", i, result, expect);
230*46ef47e2STaylor Simpson         err++;
231*46ef47e2STaylor Simpson     }
232*46ef47e2STaylor Simpson }
233*46ef47e2STaylor Simpson 
234*46ef47e2STaylor Simpson #define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \
235*46ef47e2STaylor Simpson void circ_test_load_imm_##SZ(void) \
236*46ef47e2STaylor Simpson { \
237*46ef47e2STaylor Simpson     TYPE *p = (TYPE *)BUF; \
238*46ef47e2STaylor Simpson     int size = 10; \
239*46ef47e2STaylor Simpson     int i; \
240*46ef47e2STaylor Simpson     for (i = 0; i < BUFSIZE; i++) { \
241*46ef47e2STaylor Simpson         TYPE element; \
242*46ef47e2STaylor Simpson         CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \
243*46ef47e2STaylor Simpson         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
244*46ef47e2STaylor Simpson                      i, p, element); \
245*46ef47e2STaylor Simpson         check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \
246*46ef47e2STaylor Simpson     } \
247*46ef47e2STaylor Simpson     p = (TYPE *)BUF; \
248*46ef47e2STaylor Simpson     for (i = 0; i < BUFSIZE; i++) { \
249*46ef47e2STaylor Simpson         TYPE element; \
250*46ef47e2STaylor Simpson         CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \
251*46ef47e2STaylor Simpson         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
252*46ef47e2STaylor Simpson                      i, p, element); \
253*46ef47e2STaylor Simpson         check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \
254*46ef47e2STaylor Simpson     } \
255*46ef47e2STaylor Simpson }
256*46ef47e2STaylor Simpson 
257*46ef47e2STaylor Simpson TEST_LOAD_IMM(b,  char,           bbuf, NBYTES, 1, d)
258*46ef47e2STaylor Simpson TEST_LOAD_IMM(ub, unsigned char,  bbuf, NBYTES, 1, d)
259*46ef47e2STaylor Simpson TEST_LOAD_IMM(h,  short,          hbuf, NHALFS, 2, d)
260*46ef47e2STaylor Simpson TEST_LOAD_IMM(uh, unsigned short, hbuf, NHALFS, 2, d)
261*46ef47e2STaylor Simpson TEST_LOAD_IMM(w,  int,            wbuf, NWORDS, 4, d)
262*46ef47e2STaylor Simpson TEST_LOAD_IMM(d,  long long,      dbuf, NDOBLS, 8, lld)
263*46ef47e2STaylor Simpson 
264*46ef47e2STaylor Simpson #define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \
265*46ef47e2STaylor Simpson void circ_test_load_reg_##SZ(void) \
266*46ef47e2STaylor Simpson { \
267*46ef47e2STaylor Simpson     TYPE *p = (TYPE *)BUF; \
268*46ef47e2STaylor Simpson     int size = 13; \
269*46ef47e2STaylor Simpson     int i; \
270*46ef47e2STaylor Simpson     for (i = 0; i < BUFSIZE; i++) { \
271*46ef47e2STaylor Simpson         TYPE element; \
272*46ef47e2STaylor Simpson         CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \
273*46ef47e2STaylor Simpson         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
274*46ef47e2STaylor Simpson                      i, p, element); \
275*46ef47e2STaylor Simpson         check_load(i, element, 1, size); \
276*46ef47e2STaylor Simpson     } \
277*46ef47e2STaylor Simpson     p = (TYPE *)BUF; \
278*46ef47e2STaylor Simpson     for (i = 0; i < BUFSIZE; i++) { \
279*46ef47e2STaylor Simpson         TYPE element; \
280*46ef47e2STaylor Simpson         CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \
281*46ef47e2STaylor Simpson         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
282*46ef47e2STaylor Simpson                      i, p, element); \
283*46ef47e2STaylor Simpson         check_load(i, element, -1, size); \
284*46ef47e2STaylor Simpson     } \
285*46ef47e2STaylor Simpson }
286*46ef47e2STaylor Simpson 
287*46ef47e2STaylor Simpson TEST_LOAD_REG(b,  char,           bbuf, NBYTES, d)
288*46ef47e2STaylor Simpson TEST_LOAD_REG(ub, unsigned char,  bbuf, NBYTES, d)
289*46ef47e2STaylor Simpson TEST_LOAD_REG(h,  short,          hbuf, NHALFS, d)
290*46ef47e2STaylor Simpson TEST_LOAD_REG(uh, unsigned short, hbuf, NHALFS, d)
291*46ef47e2STaylor Simpson TEST_LOAD_REG(w,  int,            wbuf, NWORDS, d)
292*46ef47e2STaylor Simpson TEST_LOAD_REG(d,  long long,      dbuf, NDOBLS, lld)
293*46ef47e2STaylor Simpson 
294*46ef47e2STaylor Simpson /* The circular stores will wrap around somewhere inside the buffer */
295*46ef47e2STaylor Simpson #define CIRC_VAL(SZ, TYPE, BUFSIZE) \
296*46ef47e2STaylor Simpson TYPE circ_val_##SZ(int i, int inc, int size) \
297*46ef47e2STaylor Simpson { \
298*46ef47e2STaylor Simpson     int mod = BUFSIZE % size; \
299*46ef47e2STaylor Simpson     int elem = i * inc; \
300*46ef47e2STaylor Simpson     if (elem < 0) { \
301*46ef47e2STaylor Simpson         if (-elem <= size - mod) { \
302*46ef47e2STaylor Simpson             return (elem + BUFSIZE - mod); \
303*46ef47e2STaylor Simpson         } else { \
304*46ef47e2STaylor Simpson             return (elem + BUFSIZE + size - mod); \
305*46ef47e2STaylor Simpson         } \
306*46ef47e2STaylor Simpson     } else if (elem < mod) {\
307*46ef47e2STaylor Simpson         return (elem + BUFSIZE - mod); \
308*46ef47e2STaylor Simpson     } else { \
309*46ef47e2STaylor Simpson         return (elem + BUFSIZE - size - mod); \
310*46ef47e2STaylor Simpson     } \
311*46ef47e2STaylor Simpson }
312*46ef47e2STaylor Simpson 
313*46ef47e2STaylor Simpson CIRC_VAL(b, unsigned char, NBYTES)
314*46ef47e2STaylor Simpson CIRC_VAL(h, short,         NHALFS)
315*46ef47e2STaylor Simpson CIRC_VAL(w, int,           NWORDS)
316*46ef47e2STaylor Simpson CIRC_VAL(d, long long,     NDOBLS)
317*46ef47e2STaylor Simpson 
318*46ef47e2STaylor Simpson /*
319*46ef47e2STaylor Simpson  * Circular stores should only write to the first "size" elements of the buffer
320*46ef47e2STaylor Simpson  * the remainder of the elements should have BUF[i] == i
321*46ef47e2STaylor Simpson  */
322*46ef47e2STaylor Simpson #define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \
323*46ef47e2STaylor Simpson void check_store_##SZ(int inc, int size) \
324*46ef47e2STaylor Simpson { \
325*46ef47e2STaylor Simpson     int i; \
326*46ef47e2STaylor Simpson     for (i = 0; i < size; i++) { \
327*46ef47e2STaylor Simpson         DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \
328*46ef47e2STaylor Simpson                      i, BUF[i], circ_val_##SZ(i, inc, size)); \
329*46ef47e2STaylor Simpson         if (BUF[i] != circ_val_##SZ(i, inc, size)) { \
330*46ef47e2STaylor Simpson             printf("ERROR(%3d): 0x%02" #FMT " != 0x%02" #FMT "\n", \
331*46ef47e2STaylor Simpson                    i, BUF[i], circ_val_##SZ(i, inc, size)); \
332*46ef47e2STaylor Simpson             err++; \
333*46ef47e2STaylor Simpson         } \
334*46ef47e2STaylor Simpson     } \
335*46ef47e2STaylor Simpson     for (i = size; i < BUFSIZE; i++) { \
336*46ef47e2STaylor Simpson         if (BUF[i] != i) { \
337*46ef47e2STaylor Simpson             printf("ERROR(%3d): 0x%02" #FMT " != 0x%02x\n", i, BUF[i], i); \
338*46ef47e2STaylor Simpson             err++; \
339*46ef47e2STaylor Simpson         } \
340*46ef47e2STaylor Simpson     } \
341*46ef47e2STaylor Simpson }
342*46ef47e2STaylor Simpson 
343*46ef47e2STaylor Simpson CHECK_STORE(b, bbuf, NBYTES, x)
344*46ef47e2STaylor Simpson CHECK_STORE(h, hbuf, NHALFS, x)
345*46ef47e2STaylor Simpson CHECK_STORE(w, wbuf, NWORDS, x)
346*46ef47e2STaylor Simpson CHECK_STORE(d, dbuf, NDOBLS, llx)
347*46ef47e2STaylor Simpson 
348*46ef47e2STaylor Simpson #define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \
349*46ef47e2STaylor Simpson void circ_test_store_imm_##SZ(void) \
350*46ef47e2STaylor Simpson { \
351*46ef47e2STaylor Simpson     unsigned int size = 27; \
352*46ef47e2STaylor Simpson     TYPE *p = BUF; \
353*46ef47e2STaylor Simpson     TYPE val = 0; \
354*46ef47e2STaylor Simpson     int i; \
355*46ef47e2STaylor Simpson     init_##BUF(); \
356*46ef47e2STaylor Simpson     for (i = 0; i < BUFSIZE; i++) { \
357*46ef47e2STaylor Simpson         CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \
358*46ef47e2STaylor Simpson         val++; \
359*46ef47e2STaylor Simpson     } \
360*46ef47e2STaylor Simpson     check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \
361*46ef47e2STaylor Simpson     p = BUF; \
362*46ef47e2STaylor Simpson     val = 0; \
363*46ef47e2STaylor Simpson     init_##BUF(); \
364*46ef47e2STaylor Simpson     for (i = 0; i < BUFSIZE; i++) { \
365*46ef47e2STaylor Simpson         CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \
366*46ef47e2STaylor Simpson                             -(INC)); \
367*46ef47e2STaylor Simpson         val++; \
368*46ef47e2STaylor Simpson     } \
369*46ef47e2STaylor Simpson     check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \
370*46ef47e2STaylor Simpson }
371*46ef47e2STaylor Simpson 
372*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(b,    b, unsigned char, bbuf, NBYTES, 0,  1)
373*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(h,    h, short,         hbuf, NHALFS, 0,  2)
374*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(f,    h, short,         hbuf, NHALFS, 16, 2)
375*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(w,    w, int,           wbuf, NWORDS, 0,  4)
376*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(d,    d, long long,     dbuf, NDOBLS, 0,  8)
377*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(bnew, b, unsigned char, bbuf, NBYTES, 0,  1)
378*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(hnew, h, short,         hbuf, NHALFS, 0,  2)
379*46ef47e2STaylor Simpson CIRC_TEST_STORE_IMM(wnew, w, int,           wbuf, NWORDS, 0,  4)
380*46ef47e2STaylor Simpson 
381*46ef47e2STaylor Simpson #define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \
382*46ef47e2STaylor Simpson void circ_test_store_reg_##SZ(void) \
383*46ef47e2STaylor Simpson { \
384*46ef47e2STaylor Simpson     TYPE *p = BUF; \
385*46ef47e2STaylor Simpson     unsigned int size = 19; \
386*46ef47e2STaylor Simpson     TYPE val = 0; \
387*46ef47e2STaylor Simpson     int i; \
388*46ef47e2STaylor Simpson     init_##BUF(); \
389*46ef47e2STaylor Simpson     for (i = 0; i < BUFSIZE; i++) { \
390*46ef47e2STaylor Simpson         CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \
391*46ef47e2STaylor Simpson         val++; \
392*46ef47e2STaylor Simpson     } \
393*46ef47e2STaylor Simpson     check_store_##CHK(1, size); \
394*46ef47e2STaylor Simpson     p = BUF; \
395*46ef47e2STaylor Simpson     val = 0; \
396*46ef47e2STaylor Simpson     init_##BUF(); \
397*46ef47e2STaylor Simpson     for (i = 0; i < BUFSIZE; i++) { \
398*46ef47e2STaylor Simpson         CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \
399*46ef47e2STaylor Simpson         val++; \
400*46ef47e2STaylor Simpson     } \
401*46ef47e2STaylor Simpson     check_store_##CHK(-1, size); \
402*46ef47e2STaylor Simpson }
403*46ef47e2STaylor Simpson 
404*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(b,    b, unsigned char, bbuf, NBYTES, 0)
405*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(h,    h, short,         hbuf, NHALFS, 0)
406*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(f,    h, short,         hbuf, NHALFS, 16)
407*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(w,    w, int,           wbuf, NWORDS, 0)
408*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(d,    d, long long,     dbuf, NDOBLS, 0)
409*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(bnew, b, unsigned char, bbuf, NBYTES, 0)
410*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(hnew, h, short,         hbuf, NHALFS, 0)
411*46ef47e2STaylor Simpson CIRC_TEST_STORE_REG(wnew, w, int,           wbuf, NWORDS, 0)
412*46ef47e2STaylor Simpson 
413*46ef47e2STaylor Simpson /* Test the old scheme used in Hexagon V3 */
414*46ef47e2STaylor Simpson static void circ_test_v3(void)
415*46ef47e2STaylor Simpson {
416*46ef47e2STaylor Simpson     int *p = wbuf;
417*46ef47e2STaylor Simpson     int size = 15;
418*46ef47e2STaylor Simpson     int K = 4;      /* 64 bytes */
419*46ef47e2STaylor Simpson     int element;
420*46ef47e2STaylor Simpson     int i;
421*46ef47e2STaylor Simpson 
422*46ef47e2STaylor Simpson     init_wbuf();
423*46ef47e2STaylor Simpson 
424*46ef47e2STaylor Simpson     for (i = 0; i < NWORDS; i++) {
425*46ef47e2STaylor Simpson         __asm__(
426*46ef47e2STaylor Simpson             "r4 = %2\n\t"
427*46ef47e2STaylor Simpson             "m1 = r4\n\t"
428*46ef47e2STaylor Simpson             "%0 = memw(%1++I:circ(M1))\n\t"
429*46ef47e2STaylor Simpson             : "=r"(element), "+r"(p)
430*46ef47e2STaylor Simpson             : "r"(build_mreg(1, K, size * sizeof(int)))
431*46ef47e2STaylor Simpson             : "r4", "m1");
432*46ef47e2STaylor Simpson         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element);
433*46ef47e2STaylor Simpson         check_load(i, element, 1, size);
434*46ef47e2STaylor Simpson     }
435*46ef47e2STaylor Simpson }
436*46ef47e2STaylor Simpson 
437*46ef47e2STaylor Simpson int main()
438*46ef47e2STaylor Simpson {
439*46ef47e2STaylor Simpson     init_bbuf();
440*46ef47e2STaylor Simpson     init_hbuf();
441*46ef47e2STaylor Simpson     init_wbuf();
442*46ef47e2STaylor Simpson     init_dbuf();
443*46ef47e2STaylor Simpson 
444*46ef47e2STaylor Simpson     DEBUG_PRINTF("NBYTES = %d\n", NBYTES);
445*46ef47e2STaylor Simpson     DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf);
446*46ef47e2STaylor Simpson     DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf);
447*46ef47e2STaylor Simpson     DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf);
448*46ef47e2STaylor Simpson     DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf);
449*46ef47e2STaylor Simpson 
450*46ef47e2STaylor Simpson     circ_test_load_imm_b();
451*46ef47e2STaylor Simpson     circ_test_load_imm_ub();
452*46ef47e2STaylor Simpson     circ_test_load_imm_h();
453*46ef47e2STaylor Simpson     circ_test_load_imm_uh();
454*46ef47e2STaylor Simpson     circ_test_load_imm_w();
455*46ef47e2STaylor Simpson     circ_test_load_imm_d();
456*46ef47e2STaylor Simpson 
457*46ef47e2STaylor Simpson     circ_test_load_reg_b();
458*46ef47e2STaylor Simpson     circ_test_load_reg_ub();
459*46ef47e2STaylor Simpson     circ_test_load_reg_h();
460*46ef47e2STaylor Simpson     circ_test_load_reg_uh();
461*46ef47e2STaylor Simpson     circ_test_load_reg_w();
462*46ef47e2STaylor Simpson     circ_test_load_reg_d();
463*46ef47e2STaylor Simpson 
464*46ef47e2STaylor Simpson     circ_test_store_imm_b();
465*46ef47e2STaylor Simpson     circ_test_store_imm_h();
466*46ef47e2STaylor Simpson     circ_test_store_imm_f();
467*46ef47e2STaylor Simpson     circ_test_store_imm_w();
468*46ef47e2STaylor Simpson     circ_test_store_imm_d();
469*46ef47e2STaylor Simpson     circ_test_store_imm_bnew();
470*46ef47e2STaylor Simpson     circ_test_store_imm_hnew();
471*46ef47e2STaylor Simpson     circ_test_store_imm_wnew();
472*46ef47e2STaylor Simpson 
473*46ef47e2STaylor Simpson     circ_test_store_reg_b();
474*46ef47e2STaylor Simpson     circ_test_store_reg_h();
475*46ef47e2STaylor Simpson     circ_test_store_reg_f();
476*46ef47e2STaylor Simpson     circ_test_store_reg_w();
477*46ef47e2STaylor Simpson     circ_test_store_reg_d();
478*46ef47e2STaylor Simpson     circ_test_store_reg_bnew();
479*46ef47e2STaylor Simpson     circ_test_store_reg_hnew();
480*46ef47e2STaylor Simpson     circ_test_store_reg_wnew();
481*46ef47e2STaylor Simpson 
482*46ef47e2STaylor Simpson     circ_test_v3();
483*46ef47e2STaylor Simpson 
484*46ef47e2STaylor Simpson     puts(err ? "FAIL" : "PASS");
485*46ef47e2STaylor Simpson     return err ? 1 : 0;
486*46ef47e2STaylor Simpson }
487