xref: /openbmc/qemu/tests/tcg/hexagon/load_unpack.c (revision 9c9fff18c45b54fd9adf2282323aab1b6f0ec866)
10d0b91a8STaylor Simpson /*
2*0d57cd61STaylor Simpson  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
30d0b91a8STaylor Simpson  *
40d0b91a8STaylor Simpson  *  This program is free software; you can redistribute it and/or modify
50d0b91a8STaylor Simpson  *  it under the terms of the GNU General Public License as published by
60d0b91a8STaylor Simpson  *  the Free Software Foundation; either version 2 of the License, or
70d0b91a8STaylor Simpson  *  (at your option) any later version.
80d0b91a8STaylor Simpson  *
90d0b91a8STaylor Simpson  *  This program is distributed in the hope that it will be useful,
100d0b91a8STaylor Simpson  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
110d0b91a8STaylor Simpson  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
120d0b91a8STaylor Simpson  *  GNU General Public License for more details.
130d0b91a8STaylor Simpson  *
140d0b91a8STaylor Simpson  *  You should have received a copy of the GNU General Public License
150d0b91a8STaylor Simpson  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
160d0b91a8STaylor Simpson  */
170d0b91a8STaylor Simpson 
180d0b91a8STaylor Simpson /*
190d0b91a8STaylor Simpson  * Test load unpack instructions
200d0b91a8STaylor Simpson  *
210d0b91a8STaylor Simpson  * Example
220d0b91a8STaylor Simpson  *     r0 = memubh(r1+#0)
230d0b91a8STaylor Simpson  * loads a half word from memory and zero-extends the 2 bytes to form a word
240d0b91a8STaylor Simpson  *
250d0b91a8STaylor Simpson  * For each addressing mode, there are 4 tests
260d0b91a8STaylor Simpson  *     bzw2          unsigned     2 elements
270d0b91a8STaylor Simpson  *     bsw2          signed       2 elements
280d0b91a8STaylor Simpson  *     bzw4          unsigned     4 elements
290d0b91a8STaylor Simpson  *     bsw4          signed       4 elements
300d0b91a8STaylor Simpson  * There are 8 addressing modes, for a total of 32 instructions to test
310d0b91a8STaylor Simpson  */
320d0b91a8STaylor Simpson 
330d0b91a8STaylor Simpson #include <stdio.h>
34*0d57cd61STaylor Simpson #include <stdint.h>
350d0b91a8STaylor Simpson #include <string.h>
360d0b91a8STaylor Simpson 
370d0b91a8STaylor Simpson int err;
380d0b91a8STaylor Simpson 
39*0d57cd61STaylor Simpson #include "hex_test.h"
40*0d57cd61STaylor Simpson 
41*0d57cd61STaylor Simpson int8_t buf[16] __attribute__((aligned(1 << 16)));
420d0b91a8STaylor Simpson 
init_buf(void)430d0b91a8STaylor Simpson void init_buf(void)
440d0b91a8STaylor Simpson {
45*0d57cd61STaylor Simpson     for (int i = 0; i < 16; i++) {
460d0b91a8STaylor Simpson         int sign = i % 2 == 0 ? 0x80 : 0;
470d0b91a8STaylor Simpson         buf[i] = sign | (i + 1);
480d0b91a8STaylor Simpson     }
490d0b91a8STaylor Simpson }
500d0b91a8STaylor Simpson 
510d0b91a8STaylor Simpson /*
520d0b91a8STaylor Simpson  ****************************************************************************
530d0b91a8STaylor Simpson  * _io addressing mode (addr + offset)
540d0b91a8STaylor Simpson  */
550d0b91a8STaylor Simpson #define BxW_LOAD_io(SZ, RES, ADDR, OFF) \
560d0b91a8STaylor Simpson     __asm__( \
570d0b91a8STaylor Simpson         "%0 = mem" #SZ "(%1+#" #OFF ")\n\t" \
580d0b91a8STaylor Simpson         : "=r"(RES) \
590d0b91a8STaylor Simpson         : "r"(ADDR))
600d0b91a8STaylor Simpson #define BxW_LOAD_io_Z(RES, ADDR, OFF) \
610d0b91a8STaylor Simpson     BxW_LOAD_io(ubh, RES, ADDR, OFF)
620d0b91a8STaylor Simpson #define BxW_LOAD_io_S(RES, ADDR, OFF) \
630d0b91a8STaylor Simpson     BxW_LOAD_io(bh, RES, ADDR, OFF)
640d0b91a8STaylor Simpson 
650d0b91a8STaylor Simpson #define TEST_io(NAME, TYPE, SIGN, SIZE, EXT, EXP1, EXP2, EXP3, EXP4) \
660d0b91a8STaylor Simpson void test_##NAME(void) \
670d0b91a8STaylor Simpson { \
680d0b91a8STaylor Simpson     TYPE result; \
690d0b91a8STaylor Simpson     init_buf(); \
700d0b91a8STaylor Simpson     BxW_LOAD_io_##SIGN(result, buf, 0 * (SIZE)); \
71*0d57cd61STaylor Simpson     check64(result, (EXP1) | (EXT)); \
720d0b91a8STaylor Simpson     BxW_LOAD_io_##SIGN(result, buf, 1 * (SIZE)); \
73*0d57cd61STaylor Simpson     check64(result, (EXP2) | (EXT)); \
740d0b91a8STaylor Simpson     BxW_LOAD_io_##SIGN(result, buf, 2 * (SIZE)); \
75*0d57cd61STaylor Simpson     check64(result, (EXP3) | (EXT)); \
760d0b91a8STaylor Simpson     BxW_LOAD_io_##SIGN(result, buf, 3 * (SIZE)); \
77*0d57cd61STaylor Simpson     check64(result, (EXP4) | (EXT)); \
780d0b91a8STaylor Simpson }
790d0b91a8STaylor Simpson 
800d0b91a8STaylor Simpson 
81*0d57cd61STaylor Simpson TEST_io(loadbzw2_io, int32_t, Z, 2, 0x00000000,
820d0b91a8STaylor Simpson         0x00020081, 0x00040083, 0x00060085, 0x00080087)
83*0d57cd61STaylor Simpson TEST_io(loadbsw2_io, int32_t, S, 2, 0x0000ff00,
840d0b91a8STaylor Simpson         0x00020081, 0x00040083, 0x00060085, 0x00080087)
85*0d57cd61STaylor Simpson TEST_io(loadbzw4_io, int64_t, Z,  4, 0x0000000000000000LL,
860d0b91a8STaylor Simpson         0x0004008300020081LL, 0x0008008700060085LL,
870d0b91a8STaylor Simpson         0x000c008b000a0089LL, 0x0010008f000e008dLL)
88*0d57cd61STaylor Simpson TEST_io(loadbsw4_io, int64_t, S,  4, 0x0000ff000000ff00LL,
890d0b91a8STaylor Simpson         0x0004008300020081LL, 0x0008008700060085LL,
900d0b91a8STaylor Simpson         0x000c008b000a0089LL, 0x0010008f000e008dLL)
910d0b91a8STaylor Simpson 
920d0b91a8STaylor Simpson /*
930d0b91a8STaylor Simpson  ****************************************************************************
940d0b91a8STaylor Simpson  * _ur addressing mode (index << offset + base)
950d0b91a8STaylor Simpson  */
960d0b91a8STaylor Simpson #define BxW_LOAD_ur(SZ, RES, SHIFT, IDX) \
970d0b91a8STaylor Simpson     __asm__( \
980d0b91a8STaylor Simpson         "%0 = mem" #SZ "(%1<<#" #SHIFT " + ##buf)\n\t" \
990d0b91a8STaylor Simpson         : "=r"(RES) \
1000d0b91a8STaylor Simpson         : "r"(IDX))
1010d0b91a8STaylor Simpson #define BxW_LOAD_ur_Z(RES, SHIFT, IDX) \
1020d0b91a8STaylor Simpson     BxW_LOAD_ur(ubh, RES, SHIFT, IDX)
1030d0b91a8STaylor Simpson #define BxW_LOAD_ur_S(RES, SHIFT, IDX) \
1040d0b91a8STaylor Simpson     BxW_LOAD_ur(bh, RES, SHIFT, IDX)
1050d0b91a8STaylor Simpson 
1060d0b91a8STaylor Simpson #define TEST_ur(NAME, TYPE, SIGN, SHIFT, EXT, RES1, RES2, RES3, RES4) \
1070d0b91a8STaylor Simpson void test_##NAME(void) \
1080d0b91a8STaylor Simpson { \
1090d0b91a8STaylor Simpson     TYPE result; \
1100d0b91a8STaylor Simpson     init_buf(); \
1110d0b91a8STaylor Simpson     BxW_LOAD_ur_##SIGN(result, (SHIFT), 0); \
112*0d57cd61STaylor Simpson     check64(result, (RES1) | (EXT)); \
1130d0b91a8STaylor Simpson     BxW_LOAD_ur_##SIGN(result, (SHIFT), 1); \
114*0d57cd61STaylor Simpson     check64(result, (RES2) | (EXT)); \
1150d0b91a8STaylor Simpson     BxW_LOAD_ur_##SIGN(result, (SHIFT), 2); \
116*0d57cd61STaylor Simpson     check64(result, (RES3) | (EXT)); \
1170d0b91a8STaylor Simpson     BxW_LOAD_ur_##SIGN(result, (SHIFT), 3); \
118*0d57cd61STaylor Simpson     check64(result, (RES4) | (EXT)); \
1190d0b91a8STaylor Simpson } \
1200d0b91a8STaylor Simpson 
121*0d57cd61STaylor Simpson TEST_ur(loadbzw2_ur, int32_t, Z, 1, 0x00000000,
1220d0b91a8STaylor Simpson         0x00020081, 0x00040083, 0x00060085, 0x00080087)
123*0d57cd61STaylor Simpson TEST_ur(loadbsw2_ur, int32_t, S, 1, 0x0000ff00,
1240d0b91a8STaylor Simpson         0x00020081, 0x00040083, 0x00060085, 0x00080087)
125*0d57cd61STaylor Simpson TEST_ur(loadbzw4_ur, int64_t, Z, 2, 0x0000000000000000LL,
1260d0b91a8STaylor Simpson         0x0004008300020081LL, 0x0008008700060085LL,
1270d0b91a8STaylor Simpson         0x000c008b000a0089LL, 0x0010008f000e008dLL)
128*0d57cd61STaylor Simpson TEST_ur(loadbsw4_ur, int64_t, S, 2, 0x0000ff000000ff00LL,
1290d0b91a8STaylor Simpson         0x0004008300020081LL, 0x0008008700060085LL,
1300d0b91a8STaylor Simpson         0x000c008b000a0089LL, 0x0010008f000e008dLL)
1310d0b91a8STaylor Simpson 
1320d0b91a8STaylor Simpson /*
1330d0b91a8STaylor Simpson  ****************************************************************************
1340d0b91a8STaylor Simpson  * _ap addressing mode (addr = base)
1350d0b91a8STaylor Simpson  */
1360d0b91a8STaylor Simpson #define BxW_LOAD_ap(SZ, RES, PTR, ADDR) \
1370d0b91a8STaylor Simpson     __asm__( \
1380d0b91a8STaylor Simpson         "%0 = mem" #SZ "(%1 = ##" #ADDR ")\n\t" \
1390d0b91a8STaylor Simpson         : "=r"(RES), "=r"(PTR))
1400d0b91a8STaylor Simpson #define BxW_LOAD_ap_Z(RES, PTR, ADDR) \
1410d0b91a8STaylor Simpson     BxW_LOAD_ap(ubh, RES, PTR, ADDR)
1420d0b91a8STaylor Simpson #define BxW_LOAD_ap_S(RES, PTR, ADDR) \
1430d0b91a8STaylor Simpson     BxW_LOAD_ap(bh, RES, PTR, ADDR)
1440d0b91a8STaylor Simpson 
1450d0b91a8STaylor Simpson #define TEST_ap(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \
1460d0b91a8STaylor Simpson void test_##NAME(void) \
1470d0b91a8STaylor Simpson { \
1480d0b91a8STaylor Simpson     TYPE result; \
1490d0b91a8STaylor Simpson     void *ptr; \
1500d0b91a8STaylor Simpson     init_buf(); \
1510d0b91a8STaylor Simpson     BxW_LOAD_ap_##SIGN(result, ptr, (buf + 0 * (SIZE))); \
152*0d57cd61STaylor Simpson     check64(result, (RES1) | (EXT)); \
1530d0b91a8STaylor Simpson     checkp(ptr, &buf[0 * (SIZE)]); \
1540d0b91a8STaylor Simpson     BxW_LOAD_ap_##SIGN(result, ptr, (buf + 1 * (SIZE))); \
155*0d57cd61STaylor Simpson     check64(result, (RES2) | (EXT)); \
1560d0b91a8STaylor Simpson     checkp(ptr, &buf[1 * (SIZE)]); \
1570d0b91a8STaylor Simpson     BxW_LOAD_ap_##SIGN(result, ptr, (buf + 2 * (SIZE))); \
158*0d57cd61STaylor Simpson     check64(result, (RES3) | (EXT)); \
1590d0b91a8STaylor Simpson     checkp(ptr, &buf[2 * (SIZE)]); \
1600d0b91a8STaylor Simpson     BxW_LOAD_ap_##SIGN(result, ptr, (buf + 3 * (SIZE))); \
161*0d57cd61STaylor Simpson     check64(result, (RES4) | (EXT)); \
1620d0b91a8STaylor Simpson     checkp(ptr, &buf[3 * (SIZE)]); \
1630d0b91a8STaylor Simpson }
1640d0b91a8STaylor Simpson 
165*0d57cd61STaylor Simpson TEST_ap(loadbzw2_ap, int32_t, Z, 2, 0x00000000,
1660d0b91a8STaylor Simpson         0x00020081, 0x00040083, 0x00060085, 0x00080087)
167*0d57cd61STaylor Simpson TEST_ap(loadbsw2_ap, int32_t, S, 2, 0x0000ff00,
1680d0b91a8STaylor Simpson         0x00020081, 0x00040083, 0x00060085, 0x00080087)
169*0d57cd61STaylor Simpson TEST_ap(loadbzw4_ap, int64_t, Z, 4, 0x0000000000000000LL,
1700d0b91a8STaylor Simpson         0x0004008300020081LL, 0x0008008700060085LL,
1710d0b91a8STaylor Simpson         0x000c008b000a0089LL, 0x0010008f000e008dLL)
172*0d57cd61STaylor Simpson TEST_ap(loadbsw4_ap, int64_t, S, 4, 0x0000ff000000ff00LL,
1730d0b91a8STaylor Simpson         0x0004008300020081LL, 0x0008008700060085LL,
1740d0b91a8STaylor Simpson         0x000c008b000a0089LL, 0x0010008f000e008dLL)
1750d0b91a8STaylor Simpson 
1760d0b91a8STaylor Simpson /*
1770d0b91a8STaylor Simpson  ****************************************************************************
1780d0b91a8STaylor Simpson  * _rp addressing mode (addr ++ modifer-reg)
1790d0b91a8STaylor Simpson  */
1800d0b91a8STaylor Simpson #define BxW_LOAD_pr(SZ, RES, PTR, INC) \
1810d0b91a8STaylor Simpson     __asm__( \
1820d0b91a8STaylor Simpson         "m0 = %2\n\t" \
1830d0b91a8STaylor Simpson         "%0 = mem" #SZ "(%1++m0)\n\t" \
1840d0b91a8STaylor Simpson         : "=r"(RES), "+r"(PTR) \
1850d0b91a8STaylor Simpson         : "r"(INC) \
1860d0b91a8STaylor Simpson         : "m0")
1870d0b91a8STaylor Simpson #define BxW_LOAD_pr_Z(RES, PTR, INC) \
1880d0b91a8STaylor Simpson     BxW_LOAD_pr(ubh, RES, PTR, INC)
1890d0b91a8STaylor Simpson #define BxW_LOAD_pr_S(RES, PTR, INC) \
1900d0b91a8STaylor Simpson     BxW_LOAD_pr(bh, RES, PTR, INC)
1910d0b91a8STaylor Simpson 
1920d0b91a8STaylor Simpson #define TEST_pr(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \
1930d0b91a8STaylor Simpson void test_##NAME(void) \
1940d0b91a8STaylor Simpson { \
1950d0b91a8STaylor Simpson     TYPE result; \
1960d0b91a8STaylor Simpson     void *ptr = buf; \
1970d0b91a8STaylor Simpson     init_buf(); \
1980d0b91a8STaylor Simpson     BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
199*0d57cd61STaylor Simpson     check64(result, (RES1) | (EXT)); \
2000d0b91a8STaylor Simpson     checkp(ptr, &buf[1 * (SIZE)]); \
2010d0b91a8STaylor Simpson     BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
202*0d57cd61STaylor Simpson     check64(result, (RES2) | (EXT)); \
2030d0b91a8STaylor Simpson     checkp(ptr, &buf[2 * (SIZE)]); \
2040d0b91a8STaylor Simpson     BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
205*0d57cd61STaylor Simpson     check64(result, (RES3) | (EXT)); \
2060d0b91a8STaylor Simpson     checkp(ptr, &buf[3 * (SIZE)]); \
2070d0b91a8STaylor Simpson     BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
208*0d57cd61STaylor Simpson     check64(result, (RES4) | (EXT)); \
2090d0b91a8STaylor Simpson     checkp(ptr, &buf[4 * (SIZE)]); \
2100d0b91a8STaylor Simpson }
2110d0b91a8STaylor Simpson 
212*0d57cd61STaylor Simpson TEST_pr(loadbzw2_pr, int32_t, Z, 2, 0x00000000,
2130d0b91a8STaylor Simpson     0x00020081, 0x0040083, 0x00060085, 0x00080087)
214*0d57cd61STaylor Simpson TEST_pr(loadbsw2_pr, int32_t, S, 2, 0x0000ff00,
2150d0b91a8STaylor Simpson     0x00020081, 0x0040083, 0x00060085, 0x00080087)
216*0d57cd61STaylor Simpson TEST_pr(loadbzw4_pr, int64_t, Z, 4, 0x0000000000000000LL,
2170d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL,
2180d0b91a8STaylor Simpson     0x000c008b000a0089LL, 0x0010008f000e008dLL)
219*0d57cd61STaylor Simpson TEST_pr(loadbsw4_pr, int64_t, S, 4, 0x0000ff000000ff00LL,
2200d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL,
2210d0b91a8STaylor Simpson     0x000c008b000a0089LL, 0x0010008f000e008dLL)
2220d0b91a8STaylor Simpson 
2230d0b91a8STaylor Simpson /*
2240d0b91a8STaylor Simpson  ****************************************************************************
2250d0b91a8STaylor Simpson  * _pbr addressing mode (addr ++ modifer-reg:brev)
2260d0b91a8STaylor Simpson  */
2270d0b91a8STaylor Simpson #define BxW_LOAD_pbr(SZ, RES, PTR) \
2280d0b91a8STaylor Simpson     __asm__( \
229a1ad040dSTaylor Simpson         "r4 = #(1 << (16 - 4))\n\t" \
2300d0b91a8STaylor Simpson         "m0 = r4\n\t" \
2310d0b91a8STaylor Simpson         "%0 = mem" #SZ "(%1++m0:brev)\n\t" \
2320d0b91a8STaylor Simpson         : "=r"(RES), "+r"(PTR) \
2330d0b91a8STaylor Simpson         : \
2340d0b91a8STaylor Simpson         : "r4", "m0")
2350d0b91a8STaylor Simpson #define BxW_LOAD_pbr_Z(RES, PTR) \
2360d0b91a8STaylor Simpson     BxW_LOAD_pbr(ubh, RES, PTR)
2370d0b91a8STaylor Simpson #define BxW_LOAD_pbr_S(RES, PTR) \
2380d0b91a8STaylor Simpson     BxW_LOAD_pbr(bh, RES, PTR)
2390d0b91a8STaylor Simpson 
2400d0b91a8STaylor Simpson #define TEST_pbr(NAME, TYPE, SIGN, EXT, RES1, RES2, RES3, RES4) \
2410d0b91a8STaylor Simpson void test_##NAME(void) \
2420d0b91a8STaylor Simpson { \
2430d0b91a8STaylor Simpson     TYPE result; \
2440d0b91a8STaylor Simpson     void *ptr = buf; \
2450d0b91a8STaylor Simpson     init_buf(); \
2460d0b91a8STaylor Simpson     BxW_LOAD_pbr_##SIGN(result, ptr); \
247*0d57cd61STaylor Simpson     check64(result, (RES1) | (EXT)); \
2480d0b91a8STaylor Simpson     BxW_LOAD_pbr_##SIGN(result, ptr); \
249*0d57cd61STaylor Simpson     check64(result, (RES2) | (EXT)); \
2500d0b91a8STaylor Simpson     BxW_LOAD_pbr_##SIGN(result, ptr); \
251*0d57cd61STaylor Simpson     check64(result, (RES3) | (EXT)); \
2520d0b91a8STaylor Simpson     BxW_LOAD_pbr_##SIGN(result, ptr); \
253*0d57cd61STaylor Simpson     check64(result, (RES4) | (EXT)); \
2540d0b91a8STaylor Simpson }
2550d0b91a8STaylor Simpson 
256*0d57cd61STaylor Simpson TEST_pbr(loadbzw2_pbr, int32_t, Z, 0x00000000,
257a1ad040dSTaylor Simpson     0x00020081, 0x000a0089, 0x00060085, 0x000e008d)
258*0d57cd61STaylor Simpson TEST_pbr(loadbsw2_pbr, int32_t, S, 0x0000ff00,
259a1ad040dSTaylor Simpson     0x00020081, 0x000aff89, 0x0006ff85, 0x000eff8d)
260*0d57cd61STaylor Simpson TEST_pbr(loadbzw4_pbr, int64_t, Z, 0x0000000000000000LL,
261a1ad040dSTaylor Simpson     0x0004008300020081LL, 0x000c008b000a0089LL,
262a1ad040dSTaylor Simpson     0x0008008700060085LL, 0x0010008f000e008dLL)
263*0d57cd61STaylor Simpson TEST_pbr(loadbsw4_pbr, int64_t, S, 0x0000ff000000ff00LL,
264a1ad040dSTaylor Simpson     0x0004008300020081LL, 0x000cff8b000aff89LL,
265a1ad040dSTaylor Simpson     0x0008ff870006ff85LL, 0x0010ff8f000eff8dLL)
2660d0b91a8STaylor Simpson 
2670d0b91a8STaylor Simpson /*
2680d0b91a8STaylor Simpson  ****************************************************************************
2690d0b91a8STaylor Simpson  * _pi addressing mode (addr ++ inc)
2700d0b91a8STaylor Simpson  */
2710d0b91a8STaylor Simpson #define BxW_LOAD_pi(SZ, RES, PTR, INC) \
2720d0b91a8STaylor Simpson     __asm__( \
2730d0b91a8STaylor Simpson         "%0 = mem" #SZ "(%1++#" #INC ")\n\t" \
2740d0b91a8STaylor Simpson         : "=r"(RES), "+r"(PTR))
2750d0b91a8STaylor Simpson #define BxW_LOAD_pi_Z(RES, PTR, INC) \
2760d0b91a8STaylor Simpson     BxW_LOAD_pi(ubh, RES, PTR, INC)
2770d0b91a8STaylor Simpson #define BxW_LOAD_pi_S(RES, PTR, INC) \
2780d0b91a8STaylor Simpson     BxW_LOAD_pi(bh, RES, PTR, INC)
2790d0b91a8STaylor Simpson 
2800d0b91a8STaylor Simpson #define TEST_pi(NAME, TYPE, SIGN, INC, EXT, RES1, RES2, RES3, RES4) \
2810d0b91a8STaylor Simpson void test_##NAME(void) \
2820d0b91a8STaylor Simpson { \
2830d0b91a8STaylor Simpson     TYPE result; \
2840d0b91a8STaylor Simpson     void *ptr = buf; \
2850d0b91a8STaylor Simpson     init_buf(); \
2860d0b91a8STaylor Simpson     BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
287*0d57cd61STaylor Simpson     check64(result, (RES1) | (EXT)); \
2880d0b91a8STaylor Simpson     checkp(ptr, &buf[1 * (INC)]); \
2890d0b91a8STaylor Simpson     BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
290*0d57cd61STaylor Simpson     check64(result, (RES2) | (EXT)); \
2910d0b91a8STaylor Simpson     checkp(ptr, &buf[2 * (INC)]); \
2920d0b91a8STaylor Simpson     BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
293*0d57cd61STaylor Simpson     check64(result, (RES3) | (EXT)); \
2940d0b91a8STaylor Simpson     checkp(ptr, &buf[3 * (INC)]); \
2950d0b91a8STaylor Simpson     BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
296*0d57cd61STaylor Simpson     check64(result, (RES4) | (EXT)); \
2970d0b91a8STaylor Simpson     checkp(ptr, &buf[4 * (INC)]); \
2980d0b91a8STaylor Simpson }
2990d0b91a8STaylor Simpson 
300*0d57cd61STaylor Simpson TEST_pi(loadbzw2_pi, int32_t, Z, 2, 0x00000000,
3010d0b91a8STaylor Simpson     0x00020081, 0x00040083, 0x00060085, 0x00080087)
302*0d57cd61STaylor Simpson TEST_pi(loadbsw2_pi, int32_t, S, 2, 0x0000ff00,
3030d0b91a8STaylor Simpson     0x00020081, 0x00040083, 0x00060085, 0x00080087)
304*0d57cd61STaylor Simpson TEST_pi(loadbzw4_pi, int64_t, Z, 4, 0x0000000000000000LL,
3050d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL,
3060d0b91a8STaylor Simpson     0x000c008b000a0089LL, 0x0010008f000e008dLL)
307*0d57cd61STaylor Simpson TEST_pi(loadbsw4_pi, int64_t, S, 4, 0x0000ff000000ff00LL,
3080d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL,
3090d0b91a8STaylor Simpson     0x000c008b000a0089LL, 0x0010008f000e008dLL)
3100d0b91a8STaylor Simpson 
3110d0b91a8STaylor Simpson /*
3120d0b91a8STaylor Simpson  ****************************************************************************
3130d0b91a8STaylor Simpson  * _pci addressing mode (addr ++ inc:circ)
3140d0b91a8STaylor Simpson  */
3150d0b91a8STaylor Simpson #define BxW_LOAD_pci(SZ, RES, PTR, START, LEN, INC) \
3160d0b91a8STaylor Simpson     __asm__( \
3170d0b91a8STaylor Simpson         "r4 = %3\n\t" \
3180d0b91a8STaylor Simpson         "m0 = r4\n\t" \
3190d0b91a8STaylor Simpson         "cs0 = %2\n\t" \
3200d0b91a8STaylor Simpson         "%0 = mem" #SZ "(%1++#" #INC ":circ(m0))\n\t" \
3210d0b91a8STaylor Simpson         : "=r"(RES), "+r"(PTR) \
3220d0b91a8STaylor Simpson         : "r"(START), "r"(LEN) \
3230d0b91a8STaylor Simpson         : "r4", "m0", "cs0")
3240d0b91a8STaylor Simpson #define BxW_LOAD_pci_Z(RES, PTR, START, LEN, INC) \
3250d0b91a8STaylor Simpson     BxW_LOAD_pci(ubh, RES, PTR, START, LEN, INC)
3260d0b91a8STaylor Simpson #define BxW_LOAD_pci_S(RES, PTR, START, LEN, INC) \
3270d0b91a8STaylor Simpson     BxW_LOAD_pci(bh, RES, PTR, START, LEN, INC)
3280d0b91a8STaylor Simpson 
3290d0b91a8STaylor Simpson #define TEST_pci(NAME, TYPE, SIGN, LEN, INC, EXT, RES1, RES2, RES3, RES4) \
3300d0b91a8STaylor Simpson void test_##NAME(void) \
3310d0b91a8STaylor Simpson { \
3320d0b91a8STaylor Simpson     TYPE result; \
3330d0b91a8STaylor Simpson     void *ptr = buf; \
3340d0b91a8STaylor Simpson     init_buf(); \
3350d0b91a8STaylor Simpson     BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
336*0d57cd61STaylor Simpson     check64(result, (RES1) | (EXT)); \
3370d0b91a8STaylor Simpson     checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \
3380d0b91a8STaylor Simpson     BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
339*0d57cd61STaylor Simpson     check64(result, (RES2) | (EXT)); \
3400d0b91a8STaylor Simpson     checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \
3410d0b91a8STaylor Simpson     BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
342*0d57cd61STaylor Simpson     check64(result, (RES3) | (EXT)); \
3430d0b91a8STaylor Simpson     checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \
3440d0b91a8STaylor Simpson     BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
345*0d57cd61STaylor Simpson     check64(result, (RES4) | (EXT)); \
3460d0b91a8STaylor Simpson     checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \
3470d0b91a8STaylor Simpson }
3480d0b91a8STaylor Simpson 
349*0d57cd61STaylor Simpson TEST_pci(loadbzw2_pci, int32_t, Z, 6, 2, 0x00000000,
3500d0b91a8STaylor Simpson     0x00020081, 0x00040083, 0x00060085, 0x00020081)
351*0d57cd61STaylor Simpson TEST_pci(loadbsw2_pci, int32_t, S, 6, 2, 0x0000ff00,
3520d0b91a8STaylor Simpson     0x00020081, 0x00040083, 0x00060085, 0x00020081)
353*0d57cd61STaylor Simpson TEST_pci(loadbzw4_pci, int64_t, Z, 8, 4, 0x0000000000000000LL,
3540d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL,
3550d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL)
356*0d57cd61STaylor Simpson TEST_pci(loadbsw4_pci, int64_t, S, 8, 4, 0x0000ff000000ff00LL,
3570d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL,
3580d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL)
3590d0b91a8STaylor Simpson 
3600d0b91a8STaylor Simpson /*
3610d0b91a8STaylor Simpson  ****************************************************************************
3620d0b91a8STaylor Simpson  * _pcr addressing mode (addr ++ I:circ(modifier-reg))
3630d0b91a8STaylor Simpson  */
3640d0b91a8STaylor Simpson #define BxW_LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \
3650d0b91a8STaylor Simpson     __asm__( \
3660d0b91a8STaylor Simpson         "r4 = %2\n\t" \
3670d0b91a8STaylor Simpson         "m1 = r4\n\t" \
3680d0b91a8STaylor Simpson         "cs1 = %3\n\t" \
3690d0b91a8STaylor Simpson         "%0 = mem" #SZ "(%1++I:circ(m1))\n\t" \
3700d0b91a8STaylor Simpson         : "=r"(RES), "+r"(PTR) \
3710d0b91a8STaylor Simpson         : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \
3720d0b91a8STaylor Simpson           "r"(START) \
3730d0b91a8STaylor Simpson         : "r4", "m1", "cs1")
3740d0b91a8STaylor Simpson #define BxW_LOAD_pcr_Z(RES, PTR, START, LEN, INC) \
3750d0b91a8STaylor Simpson     BxW_LOAD_pcr(ubh, RES, PTR, START, LEN, INC)
3760d0b91a8STaylor Simpson #define BxW_LOAD_pcr_S(RES, PTR, START, LEN, INC) \
3770d0b91a8STaylor Simpson     BxW_LOAD_pcr(bh, RES, PTR, START, LEN, INC)
3780d0b91a8STaylor Simpson 
3790d0b91a8STaylor Simpson #define TEST_pcr(NAME, TYPE, SIGN, SIZE, LEN, INC, \
3800d0b91a8STaylor Simpson                  EXT, RES1, RES2, RES3, RES4) \
3810d0b91a8STaylor Simpson void test_##NAME(void) \
3820d0b91a8STaylor Simpson { \
3830d0b91a8STaylor Simpson     TYPE result; \
3840d0b91a8STaylor Simpson     void *ptr = buf; \
3850d0b91a8STaylor Simpson     init_buf(); \
3860d0b91a8STaylor Simpson     BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
387*0d57cd61STaylor Simpson     check64(result, (RES1) | (EXT)); \
3880d0b91a8STaylor Simpson     checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \
3890d0b91a8STaylor Simpson     BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
390*0d57cd61STaylor Simpson     check64(result, (RES2) | (EXT)); \
3910d0b91a8STaylor Simpson     checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \
3920d0b91a8STaylor Simpson     BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
393*0d57cd61STaylor Simpson     check64(result, (RES3) | (EXT)); \
3940d0b91a8STaylor Simpson     checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \
3950d0b91a8STaylor Simpson     BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
396*0d57cd61STaylor Simpson     check64(result, (RES4) | (EXT)); \
3970d0b91a8STaylor Simpson     checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \
3980d0b91a8STaylor Simpson }
3990d0b91a8STaylor Simpson 
400*0d57cd61STaylor Simpson TEST_pcr(loadbzw2_pcr, int32_t, Z, 2, 8, 2, 0x00000000,
4010d0b91a8STaylor Simpson     0x00020081, 0x00060085, 0x00020081, 0x00060085)
402*0d57cd61STaylor Simpson TEST_pcr(loadbsw2_pcr, int32_t, S, 2, 8, 2, 0x0000ff00,
4030d0b91a8STaylor Simpson     0x00020081, 0x00060085, 0x00020081, 0x00060085)
404*0d57cd61STaylor Simpson TEST_pcr(loadbzw4_pcr, int64_t, Z, 4, 8, 1, 0x0000000000000000LL,
4050d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL,
4060d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL)
407*0d57cd61STaylor Simpson TEST_pcr(loadbsw4_pcr, int64_t, S, 4, 8, 1, 0x0000ff000000ff00LL,
4080d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL,
4090d0b91a8STaylor Simpson     0x0004008300020081LL, 0x0008008700060085LL)
4100d0b91a8STaylor Simpson 
main()4110d0b91a8STaylor Simpson int main()
4120d0b91a8STaylor Simpson {
4130d0b91a8STaylor Simpson     test_loadbzw2_io();
4140d0b91a8STaylor Simpson     test_loadbsw2_io();
4150d0b91a8STaylor Simpson     test_loadbzw4_io();
4160d0b91a8STaylor Simpson     test_loadbsw4_io();
4170d0b91a8STaylor Simpson 
4180d0b91a8STaylor Simpson     test_loadbzw2_ur();
4190d0b91a8STaylor Simpson     test_loadbsw2_ur();
4200d0b91a8STaylor Simpson     test_loadbzw4_ur();
4210d0b91a8STaylor Simpson     test_loadbsw4_ur();
4220d0b91a8STaylor Simpson 
4230d0b91a8STaylor Simpson     test_loadbzw2_ap();
4240d0b91a8STaylor Simpson     test_loadbsw2_ap();
4250d0b91a8STaylor Simpson     test_loadbzw4_ap();
4260d0b91a8STaylor Simpson     test_loadbsw4_ap();
4270d0b91a8STaylor Simpson 
4280d0b91a8STaylor Simpson     test_loadbzw2_pr();
4290d0b91a8STaylor Simpson     test_loadbsw2_pr();
4300d0b91a8STaylor Simpson     test_loadbzw4_pr();
4310d0b91a8STaylor Simpson     test_loadbsw4_pr();
4320d0b91a8STaylor Simpson 
4330d0b91a8STaylor Simpson     test_loadbzw2_pbr();
4340d0b91a8STaylor Simpson     test_loadbsw2_pbr();
4350d0b91a8STaylor Simpson     test_loadbzw4_pbr();
4360d0b91a8STaylor Simpson     test_loadbsw4_pbr();
4370d0b91a8STaylor Simpson 
4380d0b91a8STaylor Simpson     test_loadbzw2_pi();
4390d0b91a8STaylor Simpson     test_loadbsw2_pi();
4400d0b91a8STaylor Simpson     test_loadbzw4_pi();
4410d0b91a8STaylor Simpson     test_loadbsw4_pi();
4420d0b91a8STaylor Simpson 
4430d0b91a8STaylor Simpson     test_loadbzw2_pci();
4440d0b91a8STaylor Simpson     test_loadbsw2_pci();
4450d0b91a8STaylor Simpson     test_loadbzw4_pci();
4460d0b91a8STaylor Simpson     test_loadbsw4_pci();
4470d0b91a8STaylor Simpson 
4480d0b91a8STaylor Simpson     test_loadbzw2_pcr();
4490d0b91a8STaylor Simpson     test_loadbsw2_pcr();
4500d0b91a8STaylor Simpson     test_loadbzw4_pcr();
4510d0b91a8STaylor Simpson     test_loadbsw4_pcr();
4520d0b91a8STaylor Simpson 
4530d0b91a8STaylor Simpson     puts(err ? "FAIL" : "PASS");
4540d0b91a8STaylor Simpson     return err ? 1 : 0;
4550d0b91a8STaylor Simpson }
456