xref: /openbmc/qemu/tests/tcg/hexagon/hvx_misc.h (revision 6dd06214892d71cbbdd25daed7693e58afcb1093)
1*761e1c67STaylor Simpson /*
2*761e1c67STaylor Simpson  *  Copyright(c) 2021-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3*761e1c67STaylor Simpson  *
4*761e1c67STaylor Simpson  *  This program is free software; you can redistribute it and/or modify
5*761e1c67STaylor Simpson  *  it under the terms of the GNU General Public License as published by
6*761e1c67STaylor Simpson  *  the Free Software Foundation; either version 2 of the License, or
7*761e1c67STaylor Simpson  *  (at your option) any later version.
8*761e1c67STaylor Simpson  *
9*761e1c67STaylor Simpson  *  This program is distributed in the hope that it will be useful,
10*761e1c67STaylor Simpson  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11*761e1c67STaylor Simpson  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*761e1c67STaylor Simpson  *  GNU General Public License for more details.
13*761e1c67STaylor Simpson  *
14*761e1c67STaylor Simpson  *  You should have received a copy of the GNU General Public License
15*761e1c67STaylor Simpson  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16*761e1c67STaylor Simpson  */
17*761e1c67STaylor Simpson 
18*761e1c67STaylor Simpson #ifndef HVX_MISC_H
19*761e1c67STaylor Simpson #define HVX_MISC_H
20*761e1c67STaylor Simpson 
check(int line,int i,int j,uint64_t result,uint64_t expect)21*761e1c67STaylor Simpson static inline void check(int line, int i, int j,
22*761e1c67STaylor Simpson                          uint64_t result, uint64_t expect)
23*761e1c67STaylor Simpson {
24*761e1c67STaylor Simpson     if (result != expect) {
25*761e1c67STaylor Simpson         printf("ERROR at line %d: [%d][%d] 0x%016llx != 0x%016llx\n",
26*761e1c67STaylor Simpson                line, i, j, result, expect);
27*761e1c67STaylor Simpson         err++;
28*761e1c67STaylor Simpson     }
29*761e1c67STaylor Simpson }
30*761e1c67STaylor Simpson 
31*761e1c67STaylor Simpson #define MAX_VEC_SIZE_BYTES         128
32*761e1c67STaylor Simpson 
33*761e1c67STaylor Simpson typedef union {
34*761e1c67STaylor Simpson     uint64_t ud[MAX_VEC_SIZE_BYTES / 8];
35*761e1c67STaylor Simpson     int64_t   d[MAX_VEC_SIZE_BYTES / 8];
36*761e1c67STaylor Simpson     uint32_t uw[MAX_VEC_SIZE_BYTES / 4];
37*761e1c67STaylor Simpson     int32_t   w[MAX_VEC_SIZE_BYTES / 4];
38*761e1c67STaylor Simpson     uint16_t uh[MAX_VEC_SIZE_BYTES / 2];
39*761e1c67STaylor Simpson     int16_t   h[MAX_VEC_SIZE_BYTES / 2];
40*761e1c67STaylor Simpson     uint8_t  ub[MAX_VEC_SIZE_BYTES / 1];
41*761e1c67STaylor Simpson     int8_t    b[MAX_VEC_SIZE_BYTES / 1];
42*761e1c67STaylor Simpson } MMVector;
43*761e1c67STaylor Simpson 
44*761e1c67STaylor Simpson #define BUFSIZE      16
45*761e1c67STaylor Simpson #define OUTSIZE      16
46*761e1c67STaylor Simpson #define MASKMOD      3
47*761e1c67STaylor Simpson 
48*761e1c67STaylor Simpson MMVector buffer0[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
49*761e1c67STaylor Simpson MMVector buffer1[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
50*761e1c67STaylor Simpson MMVector mask[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
51*761e1c67STaylor Simpson MMVector output[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
52*761e1c67STaylor Simpson MMVector expect[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
53*761e1c67STaylor Simpson 
54*761e1c67STaylor Simpson #define CHECK_OUTPUT_FUNC(FIELD, FIELDSZ) \
55*761e1c67STaylor Simpson static inline void check_output_##FIELD(int line, size_t num_vectors) \
56*761e1c67STaylor Simpson { \
57*761e1c67STaylor Simpson     for (int i = 0; i < num_vectors; i++) { \
58*761e1c67STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
59*761e1c67STaylor Simpson             check(line, i, j, output[i].FIELD[j], expect[i].FIELD[j]); \
60*761e1c67STaylor Simpson         } \
61*761e1c67STaylor Simpson     } \
62*761e1c67STaylor Simpson }
63*761e1c67STaylor Simpson 
64*761e1c67STaylor Simpson CHECK_OUTPUT_FUNC(d,  8)
65*761e1c67STaylor Simpson CHECK_OUTPUT_FUNC(w,  4)
66*761e1c67STaylor Simpson CHECK_OUTPUT_FUNC(h,  2)
67*761e1c67STaylor Simpson CHECK_OUTPUT_FUNC(b,  1)
68*761e1c67STaylor Simpson 
init_buffers(void)69*761e1c67STaylor Simpson static inline void init_buffers(void)
70*761e1c67STaylor Simpson {
71*761e1c67STaylor Simpson     int counter0 = 0;
72*761e1c67STaylor Simpson     int counter1 = 17;
73*761e1c67STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) {
74*761e1c67STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) {
75*761e1c67STaylor Simpson             buffer0[i].b[j] = counter0++;
76*761e1c67STaylor Simpson             buffer1[i].b[j] = counter1++;
77*761e1c67STaylor Simpson         }
78*761e1c67STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
79*761e1c67STaylor Simpson             mask[i].w[j] = (i + j % MASKMOD == 0) ? 0 : 1;
80*761e1c67STaylor Simpson         }
81*761e1c67STaylor Simpson     }
82*761e1c67STaylor Simpson }
83*761e1c67STaylor Simpson 
84*761e1c67STaylor Simpson #define VEC_OP1(ASM, EL, IN, OUT) \
85*761e1c67STaylor Simpson     asm("v2 = vmem(%0 + #0)\n\t" \
86*761e1c67STaylor Simpson         "v2" #EL " = " #ASM "(v2" #EL ")\n\t" \
87*761e1c67STaylor Simpson         "vmem(%1 + #0) = v2\n\t" \
88*761e1c67STaylor Simpson         : : "r"(IN), "r"(OUT) : "v2", "memory")
89*761e1c67STaylor Simpson 
90*761e1c67STaylor Simpson #define VEC_OP2(ASM, EL, IN0, IN1, OUT) \
91*761e1c67STaylor Simpson     asm("v2 = vmem(%0 + #0)\n\t" \
92*761e1c67STaylor Simpson         "v3 = vmem(%1 + #0)\n\t" \
93*761e1c67STaylor Simpson         "v2" #EL " = " #ASM "(v2" #EL ", v3" #EL ")\n\t" \
94*761e1c67STaylor Simpson         "vmem(%2 + #0) = v2\n\t" \
95*761e1c67STaylor Simpson         : : "r"(IN0), "r"(IN1), "r"(OUT) : "v2", "v3", "memory")
96*761e1c67STaylor Simpson 
97*761e1c67STaylor Simpson #define TEST_VEC_OP1(NAME, ASM, EL, FIELD, FIELDSZ, OP) \
98*761e1c67STaylor Simpson static inline void test_##NAME(void) \
99*761e1c67STaylor Simpson { \
100*761e1c67STaylor Simpson     void *pin = buffer0; \
101*761e1c67STaylor Simpson     void *pout = output; \
102*761e1c67STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
103*761e1c67STaylor Simpson         VEC_OP1(ASM, EL, pin, pout); \
104*761e1c67STaylor Simpson         pin += sizeof(MMVector); \
105*761e1c67STaylor Simpson         pout += sizeof(MMVector); \
106*761e1c67STaylor Simpson     } \
107*761e1c67STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
108*761e1c67STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
109*761e1c67STaylor Simpson             expect[i].FIELD[j] = OP buffer0[i].FIELD[j]; \
110*761e1c67STaylor Simpson         } \
111*761e1c67STaylor Simpson     } \
112*761e1c67STaylor Simpson     check_output_##FIELD(__LINE__, BUFSIZE); \
113*761e1c67STaylor Simpson }
114*761e1c67STaylor Simpson 
115*761e1c67STaylor Simpson #define TEST_VEC_OP2(NAME, ASM, EL, FIELD, FIELDSZ, OP) \
116*761e1c67STaylor Simpson static inline void test_##NAME(void) \
117*761e1c67STaylor Simpson { \
118*761e1c67STaylor Simpson     void *p0 = buffer0; \
119*761e1c67STaylor Simpson     void *p1 = buffer1; \
120*761e1c67STaylor Simpson     void *pout = output; \
121*761e1c67STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
122*761e1c67STaylor Simpson         VEC_OP2(ASM, EL, p0, p1, pout); \
123*761e1c67STaylor Simpson         p0 += sizeof(MMVector); \
124*761e1c67STaylor Simpson         p1 += sizeof(MMVector); \
125*761e1c67STaylor Simpson         pout += sizeof(MMVector); \
126*761e1c67STaylor Simpson     } \
127*761e1c67STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
128*761e1c67STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
129*761e1c67STaylor Simpson             expect[i].FIELD[j] = buffer0[i].FIELD[j] OP buffer1[i].FIELD[j]; \
130*761e1c67STaylor Simpson         } \
131*761e1c67STaylor Simpson     } \
132*761e1c67STaylor Simpson     check_output_##FIELD(__LINE__, BUFSIZE); \
133*761e1c67STaylor Simpson }
134*761e1c67STaylor Simpson 
135*761e1c67STaylor Simpson #define THRESHOLD        31
136*761e1c67STaylor Simpson 
137*761e1c67STaylor Simpson #define PRED_OP2(ASM, IN0, IN1, OUT, INV) \
138*761e1c67STaylor Simpson     asm("r4 = #%3\n\t" \
139*761e1c67STaylor Simpson         "v1.b = vsplat(r4)\n\t" \
140*761e1c67STaylor Simpson         "v2 = vmem(%0 + #0)\n\t" \
141*761e1c67STaylor Simpson         "q0 = vcmp.gt(v2.b, v1.b)\n\t" \
142*761e1c67STaylor Simpson         "v3 = vmem(%1 + #0)\n\t" \
143*761e1c67STaylor Simpson         "q1 = vcmp.gt(v3.b, v1.b)\n\t" \
144*761e1c67STaylor Simpson         "q2 = " #ASM "(q0, " INV "q1)\n\t" \
145*761e1c67STaylor Simpson         "r4 = #0xff\n\t" \
146*761e1c67STaylor Simpson         "v1.b = vsplat(r4)\n\t" \
147*761e1c67STaylor Simpson         "if (q2) vmem(%2 + #0) = v1\n\t" \
148*761e1c67STaylor Simpson         : : "r"(IN0), "r"(IN1), "r"(OUT), "i"(THRESHOLD) \
149*761e1c67STaylor Simpson         : "r4", "v1", "v2", "v3", "q0", "q1", "q2", "memory")
150*761e1c67STaylor Simpson 
151*761e1c67STaylor Simpson #define TEST_PRED_OP2(NAME, ASM, OP, INV) \
152*761e1c67STaylor Simpson static inline void test_##NAME(bool invert) \
153*761e1c67STaylor Simpson { \
154*761e1c67STaylor Simpson     void *p0 = buffer0; \
155*761e1c67STaylor Simpson     void *p1 = buffer1; \
156*761e1c67STaylor Simpson     void *pout = output; \
157*761e1c67STaylor Simpson     memset(output, 0, sizeof(expect)); \
158*761e1c67STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
159*761e1c67STaylor Simpson         PRED_OP2(ASM, p0, p1, pout, INV); \
160*761e1c67STaylor Simpson         p0 += sizeof(MMVector); \
161*761e1c67STaylor Simpson         p1 += sizeof(MMVector); \
162*761e1c67STaylor Simpson         pout += sizeof(MMVector); \
163*761e1c67STaylor Simpson     } \
164*761e1c67STaylor Simpson     for (int i = 0; i < BUFSIZE; i++) { \
165*761e1c67STaylor Simpson         for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) { \
166*761e1c67STaylor Simpson             bool p0 = (buffer0[i].b[j] > THRESHOLD); \
167*761e1c67STaylor Simpson             bool p1 = (buffer1[i].b[j] > THRESHOLD); \
168*761e1c67STaylor Simpson             if (invert) { \
169*761e1c67STaylor Simpson                 expect[i].b[j] = (p0 OP !p1) ? 0xff : 0x00; \
170*761e1c67STaylor Simpson             } else { \
171*761e1c67STaylor Simpson                 expect[i].b[j] = (p0 OP p1) ? 0xff : 0x00; \
172*761e1c67STaylor Simpson             } \
173*761e1c67STaylor Simpson         } \
174*761e1c67STaylor Simpson     } \
175*761e1c67STaylor Simpson     check_output_b(__LINE__, BUFSIZE); \
176*761e1c67STaylor Simpson }
177*761e1c67STaylor Simpson 
178*761e1c67STaylor Simpson #endif
179