1d934c16dSTaylor Simpson /*
2*0d57cd61STaylor Simpson * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3d934c16dSTaylor Simpson *
4d934c16dSTaylor Simpson * This program is free software; you can redistribute it and/or modify
5d934c16dSTaylor Simpson * it under the terms of the GNU General Public License as published by
6d934c16dSTaylor Simpson * the Free Software Foundation; either version 2 of the License, or
7d934c16dSTaylor Simpson * (at your option) any later version.
8d934c16dSTaylor Simpson *
9d934c16dSTaylor Simpson * This program is distributed in the hope that it will be useful,
10d934c16dSTaylor Simpson * but WITHOUT ANY WARRANTY; without even the implied warranty of
11d934c16dSTaylor Simpson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12d934c16dSTaylor Simpson * GNU General Public License for more details.
13d934c16dSTaylor Simpson *
14d934c16dSTaylor Simpson * You should have received a copy of the GNU General Public License
15d934c16dSTaylor Simpson * along with this program; if not, see <http://www.gnu.org/licenses/>.
16d934c16dSTaylor Simpson */
17d934c16dSTaylor Simpson
18d934c16dSTaylor Simpson #include <stdio.h>
19*0d57cd61STaylor Simpson #include <stdint.h>
20*0d57cd61STaylor Simpson #include <stdbool.h>
21d934c16dSTaylor Simpson
22*0d57cd61STaylor Simpson int err;
23*0d57cd61STaylor Simpson
24*0d57cd61STaylor Simpson #include "hex_test.h"
25*0d57cd61STaylor Simpson
sfrecipa(int32_t Rs,int32_t Rt,bool * pred_result)26*0d57cd61STaylor Simpson static int32_t sfrecipa(int32_t Rs, int32_t Rt, bool *pred_result)
27d934c16dSTaylor Simpson {
28*0d57cd61STaylor Simpson int32_t result;
29*0d57cd61STaylor Simpson bool predval;
30d934c16dSTaylor Simpson
31d934c16dSTaylor Simpson asm volatile("%0,p0 = sfrecipa(%2, %3)\n\t"
32d934c16dSTaylor Simpson "%1 = p0\n\t"
33d934c16dSTaylor Simpson : "+r"(result), "=r"(predval)
34d934c16dSTaylor Simpson : "r"(Rs), "r"(Rt)
35d934c16dSTaylor Simpson : "p0");
36d934c16dSTaylor Simpson *pred_result = predval;
37d934c16dSTaylor Simpson return result;
38d934c16dSTaylor Simpson }
39d934c16dSTaylor Simpson
sfinvsqrta(int32_t Rs,int32_t * pred_result)40*0d57cd61STaylor Simpson static int32_t sfinvsqrta(int32_t Rs, int32_t *pred_result)
41dd8705bdSTaylor Simpson {
42*0d57cd61STaylor Simpson int32_t result;
43*0d57cd61STaylor Simpson int32_t predval;
44dd8705bdSTaylor Simpson
45dd8705bdSTaylor Simpson asm volatile("%0,p0 = sfinvsqrta(%2)\n\t"
46dd8705bdSTaylor Simpson "%1 = p0\n\t"
47dd8705bdSTaylor Simpson : "+r"(result), "=r"(predval)
48dd8705bdSTaylor Simpson : "r"(Rs)
49dd8705bdSTaylor Simpson : "p0");
50dd8705bdSTaylor Simpson *pred_result = predval;
51dd8705bdSTaylor Simpson return result;
52dd8705bdSTaylor Simpson }
53dd8705bdSTaylor Simpson
vacsh(int64_t Rxx,int64_t Rss,int64_t Rtt,int * pred_result,bool * ovf_result)54*0d57cd61STaylor Simpson static int64_t vacsh(int64_t Rxx, int64_t Rss, int64_t Rtt,
55*0d57cd61STaylor Simpson int *pred_result, bool *ovf_result)
56da74cd2dSTaylor Simpson {
57*0d57cd61STaylor Simpson int64_t result = Rxx;
58da74cd2dSTaylor Simpson int predval;
59*0d57cd61STaylor Simpson uint32_t usr;
60da74cd2dSTaylor Simpson
61da74cd2dSTaylor Simpson /*
62da74cd2dSTaylor Simpson * This instruction can set bit 0 (OVF/overflow) in usr
63da74cd2dSTaylor Simpson * Clear the bit first, then return that bit to the caller
64da74cd2dSTaylor Simpson */
65da74cd2dSTaylor Simpson asm volatile("r2 = usr\n\t"
66da74cd2dSTaylor Simpson "r2 = clrbit(r2, #0)\n\t" /* clear overflow bit */
67da74cd2dSTaylor Simpson "usr = r2\n\t"
68da74cd2dSTaylor Simpson "%0,p0 = vacsh(%3, %4)\n\t"
69da74cd2dSTaylor Simpson "%1 = p0\n\t"
70da74cd2dSTaylor Simpson "%2 = usr\n\t"
71da74cd2dSTaylor Simpson : "+r"(result), "=r"(predval), "=r"(usr)
72da74cd2dSTaylor Simpson : "r"(Rss), "r"(Rtt)
73da74cd2dSTaylor Simpson : "r2", "p0", "usr");
74da74cd2dSTaylor Simpson *pred_result = predval;
75da74cd2dSTaylor Simpson *ovf_result = (usr & 1);
76da74cd2dSTaylor Simpson return result;
77da74cd2dSTaylor Simpson }
78da74cd2dSTaylor Simpson
vminub(int64_t Rtt,int64_t Rss,int32_t * pred_result)79*0d57cd61STaylor Simpson static int64_t vminub(int64_t Rtt, int64_t Rss, int32_t *pred_result)
800a65d286STaylor Simpson {
81*0d57cd61STaylor Simpson int64_t result;
82*0d57cd61STaylor Simpson int32_t predval;
830a65d286STaylor Simpson
840a65d286STaylor Simpson asm volatile("%0,p0 = vminub(%2, %3)\n\t"
850a65d286STaylor Simpson "%1 = p0\n\t"
860a65d286STaylor Simpson : "=r"(result), "=r"(predval)
870a65d286STaylor Simpson : "r"(Rtt), "r"(Rss)
880a65d286STaylor Simpson : "p0");
890a65d286STaylor Simpson *pred_result = predval;
900a65d286STaylor Simpson return result;
910a65d286STaylor Simpson }
920a65d286STaylor Simpson
add_carry(int64_t Rss,int64_t Rtt,int32_t pred_in,int32_t * pred_result)93*0d57cd61STaylor Simpson static int64_t add_carry(int64_t Rss, int64_t Rtt,
94*0d57cd61STaylor Simpson int32_t pred_in, int32_t *pred_result)
9557d352acSTaylor Simpson {
96*0d57cd61STaylor Simpson int64_t result;
97*0d57cd61STaylor Simpson int32_t predval = pred_in;
9857d352acSTaylor Simpson
9957d352acSTaylor Simpson asm volatile("p0 = %1\n\t"
10057d352acSTaylor Simpson "%0 = add(%2, %3, p0):carry\n\t"
10157d352acSTaylor Simpson "%1 = p0\n\t"
10257d352acSTaylor Simpson : "=r"(result), "+r"(predval)
10357d352acSTaylor Simpson : "r"(Rss), "r"(Rtt)
10457d352acSTaylor Simpson : "p0");
10557d352acSTaylor Simpson *pred_result = predval;
10657d352acSTaylor Simpson return result;
10757d352acSTaylor Simpson }
10857d352acSTaylor Simpson
sub_carry(int64_t Rss,int64_t Rtt,int32_t pred_in,int32_t * pred_result)109*0d57cd61STaylor Simpson static int64_t sub_carry(int64_t Rss, int64_t Rtt,
110*0d57cd61STaylor Simpson int32_t pred_in, int32_t *pred_result)
11157d352acSTaylor Simpson {
112*0d57cd61STaylor Simpson int64_t result;
113*0d57cd61STaylor Simpson int32_t predval = pred_in;
11457d352acSTaylor Simpson
11557d352acSTaylor Simpson asm volatile("p0 = !cmp.eq(%1, #0)\n\t"
11657d352acSTaylor Simpson "%0 = sub(%2, %3, p0):carry\n\t"
11757d352acSTaylor Simpson "%1 = p0\n\t"
11857d352acSTaylor Simpson : "=r"(result), "+r"(predval)
11957d352acSTaylor Simpson : "r"(Rss), "r"(Rtt)
12057d352acSTaylor Simpson : "p0");
12157d352acSTaylor Simpson *pred_result = predval;
12257d352acSTaylor Simpson return result;
12357d352acSTaylor Simpson }
12457d352acSTaylor Simpson
test_sfrecipa()125d934c16dSTaylor Simpson static void test_sfrecipa()
126d934c16dSTaylor Simpson {
127*0d57cd61STaylor Simpson int32_t res;
128*0d57cd61STaylor Simpson bool pred_result;
129d934c16dSTaylor Simpson
130d934c16dSTaylor Simpson res = sfrecipa(0x04030201, 0x05060708, &pred_result);
131*0d57cd61STaylor Simpson check32(res, 0x59f38001);
132*0d57cd61STaylor Simpson check32(pred_result, false);
133d934c16dSTaylor Simpson }
134d934c16dSTaylor Simpson
test_sfinvsqrta()135dd8705bdSTaylor Simpson static void test_sfinvsqrta()
136dd8705bdSTaylor Simpson {
137*0d57cd61STaylor Simpson int32_t res;
138*0d57cd61STaylor Simpson int32_t pred_result;
139dd8705bdSTaylor Simpson
140dd8705bdSTaylor Simpson res = sfinvsqrta(0x04030201, &pred_result);
141*0d57cd61STaylor Simpson check32(res, 0x4d330000);
142*0d57cd61STaylor Simpson check32(pred_result, 0xe0);
143dd8705bdSTaylor Simpson
144dd8705bdSTaylor Simpson res = sfinvsqrta(0x0, &pred_result);
145*0d57cd61STaylor Simpson check32(res, 0x3f800000);
146*0d57cd61STaylor Simpson check32(pred_result, 0x0);
147dd8705bdSTaylor Simpson }
148dd8705bdSTaylor Simpson
test_vacsh()149da74cd2dSTaylor Simpson static void test_vacsh()
150da74cd2dSTaylor Simpson {
151*0d57cd61STaylor Simpson int64_t res64;
152*0d57cd61STaylor Simpson int32_t pred_result;
153*0d57cd61STaylor Simpson bool ovf_result;
154da74cd2dSTaylor Simpson
155da74cd2dSTaylor Simpson res64 = vacsh(0x0004000300020001LL,
156da74cd2dSTaylor Simpson 0x0001000200030004LL,
157da74cd2dSTaylor Simpson 0x0000000000000000LL, &pred_result, &ovf_result);
158*0d57cd61STaylor Simpson check64(res64, 0x0004000300030004LL);
159*0d57cd61STaylor Simpson check32(pred_result, 0xf0);
160*0d57cd61STaylor Simpson check32(ovf_result, false);
161da74cd2dSTaylor Simpson
162da74cd2dSTaylor Simpson res64 = vacsh(0x0004000300020001LL,
163da74cd2dSTaylor Simpson 0x0001000200030004LL,
164da74cd2dSTaylor Simpson 0x000affff000d0000LL, &pred_result, &ovf_result);
165*0d57cd61STaylor Simpson check64(res64, 0x000e0003000f0004LL);
166*0d57cd61STaylor Simpson check32(pred_result, 0xcc);
167*0d57cd61STaylor Simpson check32(ovf_result, false);
168da74cd2dSTaylor Simpson
169da74cd2dSTaylor Simpson res64 = vacsh(0x00047fff00020001LL,
170da74cd2dSTaylor Simpson 0x00017fff00030004LL,
171da74cd2dSTaylor Simpson 0x000a0fff000d0000LL, &pred_result, &ovf_result);
172*0d57cd61STaylor Simpson check64(res64, 0x000e7fff000f0004LL);
173*0d57cd61STaylor Simpson check32(pred_result, 0xfc);
174*0d57cd61STaylor Simpson check32(ovf_result, true);
175da74cd2dSTaylor Simpson
176da74cd2dSTaylor Simpson res64 = vacsh(0x0004000300020001LL,
177da74cd2dSTaylor Simpson 0x0001000200030009LL,
178da74cd2dSTaylor Simpson 0x000affff000d0001LL, &pred_result, &ovf_result);
179*0d57cd61STaylor Simpson check64(res64, 0x000e0003000f0008LL);
180*0d57cd61STaylor Simpson check32(pred_result, 0xcc);
181*0d57cd61STaylor Simpson check32(ovf_result, false);
182da74cd2dSTaylor Simpson }
183da74cd2dSTaylor Simpson
test_vminub()1840a65d286STaylor Simpson static void test_vminub()
1850a65d286STaylor Simpson {
186*0d57cd61STaylor Simpson int64_t res64;
187*0d57cd61STaylor Simpson int32_t pred_result;
1880a65d286STaylor Simpson
1890a65d286STaylor Simpson res64 = vminub(0x0807060504030201LL,
1900a65d286STaylor Simpson 0x0102030405060708LL,
1910a65d286STaylor Simpson &pred_result);
192*0d57cd61STaylor Simpson check64(res64, 0x0102030404030201LL);
193*0d57cd61STaylor Simpson check32(pred_result, 0xf0);
1940a65d286STaylor Simpson
1950a65d286STaylor Simpson res64 = vminub(0x0802060405030701LL,
1960a65d286STaylor Simpson 0x0107030504060208LL,
1970a65d286STaylor Simpson &pred_result);
198*0d57cd61STaylor Simpson check64(res64, 0x0102030404030201LL);
199*0d57cd61STaylor Simpson check32(pred_result, 0xaa);
2000a65d286STaylor Simpson }
2010a65d286STaylor Simpson
test_add_carry()20257d352acSTaylor Simpson static void test_add_carry()
20357d352acSTaylor Simpson {
204*0d57cd61STaylor Simpson int64_t res64;
205*0d57cd61STaylor Simpson int32_t pred_result;
20657d352acSTaylor Simpson
20757d352acSTaylor Simpson res64 = add_carry(0x0000000000000000LL,
20857d352acSTaylor Simpson 0xffffffffffffffffLL,
20957d352acSTaylor Simpson 1, &pred_result);
210*0d57cd61STaylor Simpson check64(res64, 0x0000000000000000LL);
211*0d57cd61STaylor Simpson check32(pred_result, 0xff);
21257d352acSTaylor Simpson
21357d352acSTaylor Simpson res64 = add_carry(0x0000000100000000LL,
21457d352acSTaylor Simpson 0xffffffffffffffffLL,
21557d352acSTaylor Simpson 0, &pred_result);
216*0d57cd61STaylor Simpson check64(res64, 0x00000000ffffffffLL);
217*0d57cd61STaylor Simpson check32(pred_result, 0xff);
21857d352acSTaylor Simpson
21957d352acSTaylor Simpson res64 = add_carry(0x0000000100000000LL,
22057d352acSTaylor Simpson 0xffffffffffffffffLL,
22157d352acSTaylor Simpson 0, &pred_result);
222*0d57cd61STaylor Simpson check64(res64, 0x00000000ffffffffLL);
223*0d57cd61STaylor Simpson check32(pred_result, 0xff);
22457d352acSTaylor Simpson }
22557d352acSTaylor Simpson
test_sub_carry()22657d352acSTaylor Simpson static void test_sub_carry()
22757d352acSTaylor Simpson {
228*0d57cd61STaylor Simpson int64_t res64;
229*0d57cd61STaylor Simpson int32_t pred_result;
23057d352acSTaylor Simpson
23157d352acSTaylor Simpson res64 = sub_carry(0x0000000000000000LL,
23257d352acSTaylor Simpson 0x0000000000000000LL,
23357d352acSTaylor Simpson 1, &pred_result);
234*0d57cd61STaylor Simpson check64(res64, 0x0000000000000000LL);
235*0d57cd61STaylor Simpson check32(pred_result, 0xff);
23657d352acSTaylor Simpson
23757d352acSTaylor Simpson res64 = sub_carry(0x0000000100000000LL,
23857d352acSTaylor Simpson 0x0000000000000000LL,
23957d352acSTaylor Simpson 0, &pred_result);
240*0d57cd61STaylor Simpson check64(res64, 0x00000000ffffffffLL);
241*0d57cd61STaylor Simpson check32(pred_result, 0xff);
24257d352acSTaylor Simpson
24357d352acSTaylor Simpson res64 = sub_carry(0x0000000100000000LL,
24457d352acSTaylor Simpson 0x0000000000000000LL,
24557d352acSTaylor Simpson 0, &pred_result);
246*0d57cd61STaylor Simpson check64(res64, 0x00000000ffffffffLL);
247*0d57cd61STaylor Simpson check32(pred_result, 0xff);
24857d352acSTaylor Simpson }
24957d352acSTaylor Simpson
main()250d934c16dSTaylor Simpson int main()
251d934c16dSTaylor Simpson {
252d934c16dSTaylor Simpson test_sfrecipa();
253dd8705bdSTaylor Simpson test_sfinvsqrta();
254da74cd2dSTaylor Simpson test_vacsh();
2550a65d286STaylor Simpson test_vminub();
25657d352acSTaylor Simpson test_add_carry();
25757d352acSTaylor Simpson test_sub_carry();
258d934c16dSTaylor Simpson
259d934c16dSTaylor Simpson puts(err ? "FAIL" : "PASS");
260d934c16dSTaylor Simpson return err;
261d934c16dSTaylor Simpson }
262