/*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#include
/*
* Make sure that the :mem_noshuf packet attribute is honored.
* This is important when the addresses overlap.
* The store instruction in slot 1 effectively executes first,
* followed by the load instruction in slot 0.
*/
#define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
static inline unsigned int NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
{ \
unsigned int ret; \
asm volatile("{\n\t" \
" " #ST_OP "(%1) = %3\n\t" \
" %0 = " #LD_OP "(%2)\n\t" \
"}:mem_noshuf\n" \
: "=r"(ret) \
: "r"(p), "r"(q), "r"(x) \
: "memory"); \
return ret; \
}
#define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
static inline unsigned long long NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
{ \
unsigned long long ret; \
asm volatile("{\n\t" \
" " #ST_OP "(%1) = %3\n\t" \
" %0 = " #LD_OP "(%2)\n\t" \
"}:mem_noshuf\n" \
: "=r"(ret) \
: "r"(p), "r"(q), "r"(x) \
: "memory"); \
return ret; \
}
/* Store byte combinations */
MEM_NOSHUF32(mem_noshuf_sb_lb, signed char, signed char, memb, memb)
MEM_NOSHUF32(mem_noshuf_sb_lub, signed char, unsigned char, memb, memub)
MEM_NOSHUF32(mem_noshuf_sb_lh, signed char, signed short, memb, memh)
MEM_NOSHUF32(mem_noshuf_sb_luh, signed char, unsigned short, memb, memuh)
MEM_NOSHUF32(mem_noshuf_sb_lw, signed char, signed int, memb, memw)
MEM_NOSHUF64(mem_noshuf_sb_ld, signed char, signed long long, memb, memd)
/* Store half combinations */
MEM_NOSHUF32(mem_noshuf_sh_lb, signed short, signed char, memh, memb)
MEM_NOSHUF32(mem_noshuf_sh_lub, signed short, unsigned char, memh, memub)
MEM_NOSHUF32(mem_noshuf_sh_lh, signed short, signed short, memh, memh)
MEM_NOSHUF32(mem_noshuf_sh_luh, signed short, unsigned short, memh, memuh)
MEM_NOSHUF32(mem_noshuf_sh_lw, signed short, signed int, memh, memw)
MEM_NOSHUF64(mem_noshuf_sh_ld, signed short, signed long long, memh, memd)
/* Store word combinations */
MEM_NOSHUF32(mem_noshuf_sw_lb, signed int, signed char, memw, memb)
MEM_NOSHUF32(mem_noshuf_sw_lub, signed int, unsigned char, memw, memub)
MEM_NOSHUF32(mem_noshuf_sw_lh, signed int, signed short, memw, memh)
MEM_NOSHUF32(mem_noshuf_sw_luh, signed int, unsigned short, memw, memuh)
MEM_NOSHUF32(mem_noshuf_sw_lw, signed int, signed int, memw, memw)
MEM_NOSHUF64(mem_noshuf_sw_ld, signed int, signed long long, memw, memd)
/* Store double combinations */
MEM_NOSHUF32(mem_noshuf_sd_lb, long long, signed char, memd, memb)
MEM_NOSHUF32(mem_noshuf_sd_lub, long long, unsigned char, memd, memub)
MEM_NOSHUF32(mem_noshuf_sd_lh, long long, signed short, memd, memh)
MEM_NOSHUF32(mem_noshuf_sd_luh, long long, unsigned short, memd, memuh)
MEM_NOSHUF32(mem_noshuf_sd_lw, long long, signed int, memd, memw)
MEM_NOSHUF64(mem_noshuf_sd_ld, long long, signed long long, memd, memd)
static inline int pred_lw_sw(int pred, int *p, int *q, int x, int y)
{
int ret;
asm volatile("p0 = cmp.eq(%5, #0)\n\t"
"%0 = %3\n\t"
"{\n\t"
" memw(%1) = %4\n\t"
" if (!p0) %0 = memw(%2)\n\t"
"}:mem_noshuf\n"
: "=&r"(ret)
: "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
: "p0", "memory");
return ret;
}
static inline int pred_lw_sw_pi(int pred, int *p, int *q, int x, int y)
{
int ret;
asm volatile("p0 = cmp.eq(%5, #0)\n\t"
"%0 = %3\n\t"
"r7 = %2\n\t"
"{\n\t"
" memw(%1) = %4\n\t"
" if (!p0) %0 = memw(r7++#4)\n\t"
"}:mem_noshuf\n"
: "=&r"(ret)
: "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
: "r7", "p0", "memory");
return ret;
}
static inline long long pred_ld_sd(int pred, long long *p, long long *q,
long long x, long long y)
{
unsigned long long ret;
asm volatile("p0 = cmp.eq(%5, #0)\n\t"
"%0 = %3\n\t"
"{\n\t"
" memd(%1) = %4\n\t"
" if (!p0) %0 = memd(%2)\n\t"
"}:mem_noshuf\n"
: "=&r"(ret)
: "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
: "p0", "memory");
return ret;
}
static inline long long pred_ld_sd_pi(int pred, long long *p, long long *q,
long long x, long long y)
{
long long ret;
asm volatile("p0 = cmp.eq(%5, #0)\n\t"
"%0 = %3\n\t"
"r7 = %2\n\t"
"{\n\t"
" memd(%1) = %4\n\t"
" if (!p0) %0 = memd(r7++#8)\n\t"
"}:mem_noshuf\n"
: "=&r"(ret)
: "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
: "r7", "p0", "memory");
return ret;
}
static inline unsigned int cancel_sw_lb(int pred, int *p, signed char *q, int x)
{
unsigned int ret;
asm volatile("p0 = cmp.eq(%4, #0)\n\t"
"{\n\t"
" if (!p0) memw(%1) = %3\n\t"
" %0 = memb(%2)\n\t"
"}:mem_noshuf\n"
: "=r"(ret)
: "r"(p), "r"(q), "r"(x), "r"(pred)
: "p0", "memory");
return ret;
}
static inline
unsigned long long cancel_sw_ld(int pred, int *p, long long *q, int x)
{
long long ret;
asm volatile("p0 = cmp.eq(%4, #0)\n\t"
"{\n\t"
" if (!p0) memw(%1) = %3\n\t"
" %0 = memd(%2)\n\t"
"}:mem_noshuf\n"
: "=r"(ret)
: "r"(p), "r"(q), "r"(x), "r"(pred)
: "p0", "memory");
return ret;
}
typedef union {
signed long long d[2];
unsigned long long ud[2];
signed int w[4];
unsigned int uw[4];
signed short h[8];
unsigned short uh[8];
signed char b[16];
unsigned char ub[16];
} Memory;
int err;
#define check32(n, expect) check32_(n, expect, __LINE__)
static void check32_(int n, int expect, int line)
{
if (n != expect) {
printf("ERROR: 0x%08x != 0x%08x, line %d\n", n, expect, line);
err++;
}
}
#define check64(n, expect) check64_(n, expect, __LINE__)
static void check64_(long long n, long long expect, int line)
{
if (n != expect) {
printf("ERROR: 0x%08llx != 0x%08llx, line %d\n", n, expect, line);
err++;
}
}
int main()
{
Memory n;
unsigned int res32;
unsigned long long res64;
/*
* Store byte combinations
*/
n.w[0] = ~0;
res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87);
check32(res32, 0xffffff87);
n.w[0] = ~0;
res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87);
check32(res32, 0x00000087);
n.w[0] = ~0;
res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87);
check32(res32, 0xffffff87);
n.w[0] = ~0;
res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87);
check32(res32, 0x0000ff87);
n.w[0] = ~0;
res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87);
check32(res32, 0xffffff87);
n.d[0] = ~0LL;
res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87);
check64(res64, 0xffffffffffffff87LL);
/*
* Store half combinations
*/
n.w[0] = ~0;
res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787);
check32(res32, 0xffffff87);
n.w[0] = ~0;
res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87);
check32(res32, 0x0000008f);
n.w[0] = ~0;
res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87);
check32(res32, 0xffff8a87);
n.w[0] = ~0;
res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87);
check32(res32, 0x8a87);
n.w[0] = ~0;
res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87);
check32(res32, 0x8a87ffff);
n.w[0] = ~0;
res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87);
check64(res64, 0xffffffff8a87ffffLL);
/*
* Store word combinations
*/
n.w[0] = ~0;
res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687);
check32(res32, 0xffffff87);
n.w[0] = ~0;
res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687);
check32(res32, 0x00000087);
n.w[0] = ~0;
res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678);
check32(res32, 0xfffff678);
n.w[0] = ~0;
res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678);
check32(res32, 0x00005678);
n.w[0] = ~0;
res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678);
check32(res32, 0x12345678);
n.d[0] = ~0LL;
res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678);
check64(res64, 0xffffffff12345678LL);
/*
* Store double combinations
*/
n.d[0] = ~0LL;
res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0);
check32(res32, 0xffffffde);
n.d[0] = ~0LL;
res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0);
check32(res32, 0x000000de);
n.d[0] = ~0LL;
res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0);
check32(res32, 0xffff9abc);
n.d[0] = ~0LL;
res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0);
check32(res32, 0x00009abc);
n.d[0] = ~0LL;
res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0);
check32(res32, 0x12345678);
n.d[0] = ~0LL;
res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0);
check64(res64, 0x123456789abcdef0LL);
/*
* Predicated word stores
*/
n.w[0] = ~0;
res32 = cancel_sw_lb(0, &n.w[0], &n.b[0], 0x12345678);
check32(res32, 0xffffffff);
n.w[0] = ~0;
res32 = cancel_sw_lb(1, &n.w[0], &n.b[0], 0x12345687);
check32(res32, 0xffffff87);
/*
* Predicated double stores
*/
n.d[0] = ~0LL;
res64 = cancel_sw_ld(0, &n.w[0], &n.d[0], 0x12345678);
check64(res64, 0xffffffffffffffffLL);
n.d[0] = ~0LL;
res64 = cancel_sw_ld(1, &n.w[0], &n.d[0], 0x12345678);
check64(res64, 0xffffffff12345678LL);
n.d[0] = ~0LL;
res64 = cancel_sw_ld(0, &n.w[1], &n.d[0], 0x12345678);
check64(res64, 0xffffffffffffffffLL);
n.d[0] = ~0LL;
res64 = cancel_sw_ld(1, &n.w[1], &n.d[0], 0x12345678);
check64(res64, 0x12345678ffffffffLL);
/*
* No overlap tests
*/
n.w[0] = ~0;
res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87);
check32(res32, 0xffffffff);
n.w[0] = ~0;
res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87);
check32(res32, 0xffffffff);
n.w[0] = ~0;
res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787);
check32(res32, 0xffffffff);
n.w[0] = ~0;
res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787);
check32(res32, 0xffffffff);
n.d[0] = ~0LL;
res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678);
check32(res32, 0xffffffff);
n.d[0] = ~0LL;
res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678);
check32(res32, 0xffffffff);
n.d[0] = ~0LL;
n.d[1] = ~0LL;
res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL);
check64(res64, 0xffffffffffffffffLL);
n.d[0] = ~0LL;
n.d[1] = ~0LL;
res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL);
check64(res64, 0xffffffffffffffffLL);
n.w[0] = ~0;
res32 = pred_lw_sw(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
check32(res32, 0x12345678);
check32(n.w[0], 0xc0ffeeda);
n.w[0] = ~0;
res32 = pred_lw_sw(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
check32(res32, 0xc0ffeeda);
check32(n.w[0], 0xc0ffeeda);
n.w[0] = ~0;
res32 = pred_lw_sw_pi(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
check32(res32, 0x12345678);
check32(n.w[0], 0xc0ffeeda);
n.w[0] = ~0;
res32 = pred_lw_sw_pi(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
check32(res32, 0xc0ffeeda);
check32(n.w[0], 0xc0ffeeda);
n.d[0] = ~0LL;
res64 = pred_ld_sd(0, &n.d[0], &n.d[0],
0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
check64(res64, 0x1234567812345678LL);
check64(n.d[0], 0xc0ffeedac0ffeedaLL);
n.d[0] = ~0LL;
res64 = pred_ld_sd(1, &n.d[0], &n.d[0],
0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
check64(res64, 0xc0ffeedac0ffeedaLL);
check64(n.d[0], 0xc0ffeedac0ffeedaLL);
n.d[0] = ~0LL;
res64 = pred_ld_sd_pi(0, &n.d[0], &n.d[0],
0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
check64(res64, 0x1234567812345678LL);
check64(n.d[0], 0xc0ffeedac0ffeedaLL);
n.d[0] = ~0LL;
res64 = pred_ld_sd_pi(1, &n.d[0], &n.d[0],
0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
check64(res64, 0xc0ffeedac0ffeedaLL);
check64(n.d[0], 0xc0ffeedac0ffeedaLL);
puts(err ? "FAIL" : "PASS");
return err;
}