1/*
2 *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
3 *
4 *  This program is free software; you can redistribute it and/or modify
5 *  it under the terms of the GNU General Public License as published by
6 *  the Free Software Foundation; either version 2 of the License, or
7 *  (at your option) any later version.
8 *
9 *  This program is distributed in the hope that it will be useful,
10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 *  GNU General Public License for more details.
13 *
14 *  You should have received a copy of the GNU General Public License
15 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18/*
19 * Floating-Point Instructions
20 */
21
22/*************************************/
23/* Scalar FP                         */
24/*************************************/
25Q6INSN(F2_sfadd,"Rd32=sfadd(Rs32,Rt32)",ATTRIBS(),
26"Floating-Point Add",
27{ RdV=fUNFLOAT(fFLOAT(RsV)+fFLOAT(RtV));})
28
29Q6INSN(F2_sfsub,"Rd32=sfsub(Rs32,Rt32)",ATTRIBS(),
30"Floating-Point Subtract",
31{ RdV=fUNFLOAT(fFLOAT(RsV)-fFLOAT(RtV));})
32
33Q6INSN(F2_sfmpy,"Rd32=sfmpy(Rs32,Rt32)",ATTRIBS(),
34"Floating-Point Multiply",
35{ RdV=fUNFLOAT(fSFMPY(fFLOAT(RsV),fFLOAT(RtV)));})
36
37Q6INSN(F2_sffma,"Rx32+=sfmpy(Rs32,Rt32)",ATTRIBS(),
38"Floating-Point Fused Multiply Add",
39{ RxV=fUNFLOAT(fFMAF(fFLOAT(RsV),fFLOAT(RtV),fFLOAT(RxV)));})
40
41Q6INSN(F2_sffma_sc,"Rx32+=sfmpy(Rs32,Rt32,Pu4):scale",ATTRIBS(),
42"Floating-Point Fused Multiply Add w/ Additional Scaling (2**Pu)",
43{
44    fHIDE(size4s_t tmp;)
45    fCHECKSFNAN3(RxV,RxV,RsV,RtV);
46    tmp=fUNFLOAT(fFMAFX(fFLOAT(RsV),fFLOAT(RtV),fFLOAT(RxV),PuV));
47    if (!((fFLOAT(RxV) == 0.0) && fISZEROPROD(fFLOAT(RsV),fFLOAT(RtV)))) RxV = tmp;
48})
49
50Q6INSN(F2_sffms,"Rx32-=sfmpy(Rs32,Rt32)",ATTRIBS(),
51"Floating-Point Fused Multiply Add",
52{ RxV=fUNFLOAT(fFMAF(-fFLOAT(RsV),fFLOAT(RtV),fFLOAT(RxV))); })
53
54Q6INSN(F2_sffma_lib,"Rx32+=sfmpy(Rs32,Rt32):lib",ATTRIBS(),
55"Floating-Point Fused Multiply Add for Library Routines",
56{ fFPSETROUND_NEAREST(); fHIDE(int infinp; int infminusinf; size4s_t tmp;)
57  infminusinf = ((isinf(fFLOAT(RxV))) &&
58                 (fISINFPROD(fFLOAT(RsV),fFLOAT(RtV))) &&
59                 (fGETBIT(31,RsV ^ RxV ^ RtV) != 0));
60  infinp = (isinf(fFLOAT(RxV))) || (isinf(fFLOAT(RtV))) || (isinf(fFLOAT(RsV)));
61  fCHECKSFNAN3(RxV,RxV,RsV,RtV);
62  tmp=fUNFLOAT(fFMAF(fFLOAT(RsV),fFLOAT(RtV),fFLOAT(RxV)));
63  if (!((fFLOAT(RxV) == 0.0) && fISZEROPROD(fFLOAT(RsV),fFLOAT(RtV)))) RxV = tmp;
64  fFPCANCELFLAGS();
65  if (isinf(fFLOAT(RxV)) && !infinp) RxV = RxV - 1;
66  if (infminusinf) RxV = 0;
67})
68
69Q6INSN(F2_sffms_lib,"Rx32-=sfmpy(Rs32,Rt32):lib",ATTRIBS(),
70"Floating-Point Fused Multiply Add for Library Routines",
71{ fFPSETROUND_NEAREST(); fHIDE(int infinp; int infminusinf; size4s_t tmp;)
72  infminusinf = ((isinf(fFLOAT(RxV))) &&
73                 (fISINFPROD(fFLOAT(RsV),fFLOAT(RtV))) &&
74                 (fGETBIT(31,RsV ^ RxV ^ RtV) == 0));
75  infinp = (isinf(fFLOAT(RxV))) || (isinf(fFLOAT(RtV))) || (isinf(fFLOAT(RsV)));
76  fCHECKSFNAN3(RxV,RxV,RsV,RtV);
77  tmp=fUNFLOAT(fFMAF(-fFLOAT(RsV),fFLOAT(RtV),fFLOAT(RxV)));
78  if (!((fFLOAT(RxV) == 0.0) && fISZEROPROD(fFLOAT(RsV),fFLOAT(RtV)))) RxV = tmp;
79  fFPCANCELFLAGS();
80  if (isinf(fFLOAT(RxV)) && !infinp) RxV = RxV - 1;
81  if (infminusinf) RxV = 0;
82})
83
84
85Q6INSN(F2_sfcmpeq,"Pd4=sfcmp.eq(Rs32,Rt32)",ATTRIBS(),
86"Floating Point Compare for Equal",
87{PdV=f8BITSOF(fFLOAT(RsV)==fFLOAT(RtV));})
88
89Q6INSN(F2_sfcmpgt,"Pd4=sfcmp.gt(Rs32,Rt32)",ATTRIBS(),
90"Floating-Point Compare for Greater Than",
91{PdV=f8BITSOF(fFLOAT(RsV)>fFLOAT(RtV));})
92
93/* cmpge is not the same as !cmpgt(swapops) in IEEE */
94
95Q6INSN(F2_sfcmpge,"Pd4=sfcmp.ge(Rs32,Rt32)",ATTRIBS(),
96"Floating-Point Compare for Greater Than / Equal To",
97{PdV=f8BITSOF(fFLOAT(RsV)>=fFLOAT(RtV));})
98
99/* Everyone seems to have this... */
100
101Q6INSN(F2_sfcmpuo,"Pd4=sfcmp.uo(Rs32,Rt32)",ATTRIBS(),
102"Floating-Point Compare for Unordered",
103{PdV=f8BITSOF(isunordered(fFLOAT(RsV),fFLOAT(RtV)));})
104
105
106Q6INSN(F2_sfmax,"Rd32=sfmax(Rs32,Rt32)",ATTRIBS(),
107"Maximum of Floating-Point values",
108{ RdV = fUNFLOAT(fSF_MAX(fFLOAT(RsV),fFLOAT(RtV))); })
109
110Q6INSN(F2_sfmin,"Rd32=sfmin(Rs32,Rt32)",ATTRIBS(),
111"Minimum of Floating-Point values",
112{ RdV = fUNFLOAT(fSF_MIN(fFLOAT(RsV),fFLOAT(RtV))); })
113
114
115Q6INSN(F2_sfclass,"Pd4=sfclass(Rs32,#u5)",ATTRIBS(),
116"Classify Floating-Point Value",
117{
118    fHIDE(int class;)
119    PdV = 0;
120    class = fpclassify(fFLOAT(RsV));
121    /* Is the value zero? */
122    if (fGETBIT(0,uiV) && (class == FP_ZERO)) PdV = 0xff;
123    if (fGETBIT(1,uiV) && (class == FP_NORMAL)) PdV = 0xff;
124    if (fGETBIT(2,uiV) && (class == FP_SUBNORMAL)) PdV = 0xff;
125    if (fGETBIT(3,uiV) && (class == FP_INFINITE)) PdV = 0xff;
126    if (fGETBIT(4,uiV) && (class == FP_NAN)) PdV = 0xff;
127    fFPCANCELFLAGS();
128})
129
130/* Range: +/- (1.0 .. 1+(63/64)) * 2**(-6 .. +9) */
131/* More immediate bits should probably be used for more precision? */
132
133Q6INSN(F2_sfimm_p,"Rd32=sfmake(#u10):pos",ATTRIBS(),
134"Make Floating Point Value",
135{
136    RdV = (127 - 6) << 23;
137    RdV += uiV << 17;
138})
139
140Q6INSN(F2_sfimm_n,"Rd32=sfmake(#u10):neg",ATTRIBS(),
141"Make Floating Point Value",
142{
143    RdV = (127 - 6) << 23;
144    RdV += (uiV << 17);
145    RdV |= (1 << 31);
146})
147
148
149Q6INSN(F2_sffixupn,"Rd32=sffixupn(Rs32,Rt32)",ATTRIBS(),
150"Fix Up Numerator",
151{
152    fHIDE(int adjust;)
153    fSF_RECIP_COMMON(RsV,RtV,RdV,adjust);
154    RdV = RsV;
155})
156
157Q6INSN(F2_sffixupd,"Rd32=sffixupd(Rs32,Rt32)",ATTRIBS(),
158"Fix Up Denominator",
159{
160    fHIDE(int adjust;)
161    fSF_RECIP_COMMON(RsV,RtV,RdV,adjust);
162    RdV = RtV;
163})
164
165Q6INSN(F2_sffixupr,"Rd32=sffixupr(Rs32)",ATTRIBS(),
166"Fix Up Radicand",
167{
168    fHIDE(int adjust;)
169    fSF_INVSQRT_COMMON(RsV,RdV,adjust);
170    RdV = RsV;
171})
172
173/*************************************/
174/* Scalar DP                         */
175/*************************************/
176Q6INSN(F2_dfadd,"Rdd32=dfadd(Rss32,Rtt32)",ATTRIBS(),
177"Floating-Point Add",
178{ RddV=fUNDOUBLE(fDOUBLE(RssV)+fDOUBLE(RttV));})
179
180Q6INSN(F2_dfsub,"Rdd32=dfsub(Rss32,Rtt32)",ATTRIBS(),
181"Floating-Point Subtract",
182{ RddV=fUNDOUBLE(fDOUBLE(RssV)-fDOUBLE(RttV));})
183
184Q6INSN(F2_dfmax,"Rdd32=dfmax(Rss32,Rtt32)",ATTRIBS(),
185"Maximum of Floating-Point values",
186{ RddV = fUNDOUBLE(fDF_MAX(fDOUBLE(RssV),fDOUBLE(RttV))); })
187
188Q6INSN(F2_dfmin,"Rdd32=dfmin(Rss32,Rtt32)",ATTRIBS(),
189"Minimum of Floating-Point values",
190{ RddV = fUNDOUBLE(fDF_MIN(fDOUBLE(RssV),fDOUBLE(RttV))); })
191
192Q6INSN(F2_dfmpyfix,"Rdd32=dfmpyfix(Rss32,Rtt32)",ATTRIBS(),
193"Fix Up Multiplicand for Multiplication",
194{
195    if (fDF_ISDENORM(RssV) && fDF_ISBIG(RttV) && fDF_ISNORMAL(RttV)) RddV = fUNDOUBLE(fDOUBLE(RssV) * 0x1.0p52);
196    else if (fDF_ISDENORM(RttV) && fDF_ISBIG(RssV) && fDF_ISNORMAL(RssV)) RddV = fUNDOUBLE(fDOUBLE(RssV) * 0x1.0p-52);
197    else RddV = RssV;
198})
199
200Q6INSN(F2_dfmpyll,"Rdd32=dfmpyll(Rss32,Rtt32)",ATTRIBS(),
201"Multiply low*low and shift off low 32 bits into sticky (in MSB)",
202{
203    fHIDE(size8u_t prod;)
204    prod = fMPY32UU(fGETUWORD(0,RssV),fGETUWORD(0,RttV));
205    RddV = (prod >> 32) << 1;
206    if (fGETUWORD(0,prod) != 0) fSETBIT(0,RddV,1);
207})
208
209Q6INSN(F2_dfmpylh,"Rxx32+=dfmpylh(Rss32,Rtt32)",ATTRIBS(),
210"Multiply low*high and accumulate",
211{
212    RxxV += (fGETUWORD(0,RssV) * (0x00100000 | fZXTN(20,64,fGETUWORD(1,RttV)))) << 1;
213})
214
215Q6INSN(F2_dfmpyhh,"Rxx32+=dfmpyhh(Rss32,Rtt32)",ATTRIBS(),
216"Multiply high*high and accumulate with L*H value",
217{
218    RxxV = fUNDOUBLE(fDF_MPY_HH(fDOUBLE(RssV),fDOUBLE(RttV),RxxV));
219})
220
221
222
223Q6INSN(F2_dfcmpeq,"Pd4=dfcmp.eq(Rss32,Rtt32)",ATTRIBS(),
224"Floating Point Compare for Equal",
225{PdV=f8BITSOF(fDOUBLE(RssV)==fDOUBLE(RttV));})
226
227Q6INSN(F2_dfcmpgt,"Pd4=dfcmp.gt(Rss32,Rtt32)",ATTRIBS(),
228"Floating-Point Compare for Greater Than",
229{PdV=f8BITSOF(fDOUBLE(RssV)>fDOUBLE(RttV));})
230
231
232/* cmpge is not the same as !cmpgt(swapops) in IEEE */
233
234Q6INSN(F2_dfcmpge,"Pd4=dfcmp.ge(Rss32,Rtt32)",ATTRIBS(),
235"Floating-Point Compare for Greater Than / Equal To",
236{PdV=f8BITSOF(fDOUBLE(RssV)>=fDOUBLE(RttV));})
237
238/* Everyone seems to have this... */
239
240Q6INSN(F2_dfcmpuo,"Pd4=dfcmp.uo(Rss32,Rtt32)",ATTRIBS(),
241"Floating-Point Compare for Unordered",
242{PdV=f8BITSOF(isunordered(fDOUBLE(RssV),fDOUBLE(RttV)));})
243
244
245Q6INSN(F2_dfclass,"Pd4=dfclass(Rss32,#u5)",ATTRIBS(),
246"Classify Floating-Point Value",
247{
248    fHIDE(int class;)
249    PdV = 0;
250    class = fpclassify(fDOUBLE(RssV));
251    /* Is the value zero? */
252    if (fGETBIT(0,uiV) && (class == FP_ZERO)) PdV = 0xff;
253    if (fGETBIT(1,uiV) && (class == FP_NORMAL)) PdV = 0xff;
254    if (fGETBIT(2,uiV) && (class == FP_SUBNORMAL)) PdV = 0xff;
255    if (fGETBIT(3,uiV) && (class == FP_INFINITE)) PdV = 0xff;
256    if (fGETBIT(4,uiV) && (class == FP_NAN)) PdV = 0xff;
257    fFPCANCELFLAGS();
258})
259
260
261/* Range: +/- (1.0 .. 1+(63/64)) * 2**(-6 .. +9) */
262/* More immediate bits should probably be used for more precision? */
263
264Q6INSN(F2_dfimm_p,"Rdd32=dfmake(#u10):pos",ATTRIBS(),
265"Make Floating Point Value",
266{
267    RddV = (1023ULL - 6) << 52;
268    RddV += (fHIDE((size8u_t))uiV) << 46;
269})
270
271Q6INSN(F2_dfimm_n,"Rdd32=dfmake(#u10):neg",ATTRIBS(),
272"Make Floating Point Value",
273{
274    RddV = (1023ULL - 6) << 52;
275    RddV += (fHIDE((size8u_t))uiV) << 46;
276    RddV |= ((1ULL) << 63);
277})
278
279
280/* CONVERSION */
281
282#define CONVERT(TAG,DEST,DESTV,SRC,SRCV,OUTCAST,OUTTYPE,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \
283    Q6INSN(F2_conv_##TAG##MODETAG,#DEST"=convert_"#TAG"("#SRC")"#MODESYN,ATTRIBS(), \
284    "Floating point format conversion", \
285    { MODEBEH DESTV = OUTCAST(conv_##INTYPE##_to_##OUTTYPE(INCAST(SRCV))); })
286
287CONVERT(sf2df,Rdd32,RddV,Rs32,RsV,fUNDOUBLE,df,fFLOAT,sf,,,)
288CONVERT(df2sf,Rd32,RdV,Rss32,RssV,fUNFLOAT,sf,fDOUBLE,df,,,)
289
290#define ALLINTDST(TAGSTART,SRC,SRCV,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \
291CONVERT(TAGSTART##uw,Rd32,RdV,SRC,SRCV,fCAST4u,4u,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \
292CONVERT(TAGSTART##w,Rd32,RdV,SRC,SRCV,fCAST4s,4s,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \
293CONVERT(TAGSTART##ud,Rdd32,RddV,SRC,SRCV,fCAST8u,8u,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \
294CONVERT(TAGSTART##d,Rdd32,RddV,SRC,SRCV,fCAST8s,8s,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH)
295
296#define ALLFPDST(TAGSTART,SRC,SRCV,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \
297CONVERT(TAGSTART##sf,Rd32,RdV,SRC,SRCV,fUNFLOAT,sf,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \
298CONVERT(TAGSTART##df,Rdd32,RddV,SRC,SRCV,fUNDOUBLE,df,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH)
299
300#define ALLINTSRC(GEN,MODETAG,MODESYN,MODEBEH) \
301GEN(uw##2,Rs32,RsV,fCAST4u,4u,MODETAG,MODESYN,MODEBEH) \
302GEN(w##2,Rs32,RsV,fCAST4s,4s,MODETAG,MODESYN,MODEBEH) \
303GEN(ud##2,Rss32,RssV,fCAST8u,8u,MODETAG,MODESYN,MODEBEH) \
304GEN(d##2,Rss32,RssV,fCAST8s,8s,MODETAG,MODESYN,MODEBEH)
305
306#define ALLFPSRC(GEN,MODETAG,MODESYN,MODEBEH) \
307GEN(sf##2,Rs32,RsV,fFLOAT,sf,MODETAG,MODESYN,MODEBEH) \
308GEN(df##2,Rss32,RssV,fDOUBLE,df,MODETAG,MODESYN,MODEBEH)
309
310ALLINTSRC(ALLFPDST,,,)
311ALLFPSRC(ALLINTDST,,,)
312ALLFPSRC(ALLINTDST,_chop,:chop,fFPSETROUND_CHOP();)
313