1/* 2 * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18/* 19 * Floating-Point Instructions 20 */ 21 22/*************************************/ 23/* Scalar FP */ 24/*************************************/ 25Q6INSN(F2_sfadd,"Rd32=sfadd(Rs32,Rt32)",ATTRIBS(), 26"Floating-Point Add", 27{ RdV=fUNFLOAT(fFLOAT(RsV)+fFLOAT(RtV));}) 28 29Q6INSN(F2_sfsub,"Rd32=sfsub(Rs32,Rt32)",ATTRIBS(), 30"Floating-Point Subtract", 31{ RdV=fUNFLOAT(fFLOAT(RsV)-fFLOAT(RtV));}) 32 33Q6INSN(F2_sfmpy,"Rd32=sfmpy(Rs32,Rt32)",ATTRIBS(), 34"Floating-Point Multiply", 35{ RdV=fUNFLOAT(fSFMPY(fFLOAT(RsV),fFLOAT(RtV)));}) 36 37Q6INSN(F2_sffma,"Rx32+=sfmpy(Rs32,Rt32)",ATTRIBS(), 38"Floating-Point Fused Multiply Add", 39{ RxV=fUNFLOAT(fFMAF(fFLOAT(RsV),fFLOAT(RtV),fFLOAT(RxV)));}) 40 41Q6INSN(F2_sffma_sc,"Rx32+=sfmpy(Rs32,Rt32,Pu4):scale",ATTRIBS(), 42"Floating-Point Fused Multiply Add w/ Additional Scaling (2**Pu)", 43{ 44 fHIDE(size4s_t tmp;) 45 fCHECKSFNAN3(RxV,RxV,RsV,RtV); 46 tmp=fUNFLOAT(fFMAFX(fFLOAT(RsV),fFLOAT(RtV),fFLOAT(RxV),PuV)); 47 if (!((fFLOAT(RxV) == 0.0) && fISZEROPROD(fFLOAT(RsV),fFLOAT(RtV)))) RxV = tmp; 48}) 49 50Q6INSN(F2_sffms,"Rx32-=sfmpy(Rs32,Rt32)",ATTRIBS(), 51"Floating-Point Fused Multiply Add", 52{ RxV=fUNFLOAT(fFMAF(-fFLOAT(RsV),fFLOAT(RtV),fFLOAT(RxV))); }) 53 54Q6INSN(F2_sffma_lib,"Rx32+=sfmpy(Rs32,Rt32):lib",ATTRIBS(), 55"Floating-Point Fused Multiply Add for Library Routines", 56{ fFPSETROUND_NEAREST(); fHIDE(int infinp; int infminusinf; size4s_t tmp;) 57 infminusinf = ((isinf(fFLOAT(RxV))) && 58 (fISINFPROD(fFLOAT(RsV),fFLOAT(RtV))) && 59 (fGETBIT(31,RsV ^ RxV ^ RtV) != 0)); 60 infinp = (isinf(fFLOAT(RxV))) || (isinf(fFLOAT(RtV))) || (isinf(fFLOAT(RsV))); 61 fCHECKSFNAN3(RxV,RxV,RsV,RtV); 62 tmp=fUNFLOAT(fFMAF(fFLOAT(RsV),fFLOAT(RtV),fFLOAT(RxV))); 63 if (!((fFLOAT(RxV) == 0.0) && fISZEROPROD(fFLOAT(RsV),fFLOAT(RtV)))) RxV = tmp; 64 fFPCANCELFLAGS(); 65 if (isinf(fFLOAT(RxV)) && !infinp) RxV = RxV - 1; 66 if (infminusinf) RxV = 0; 67}) 68 69Q6INSN(F2_sffms_lib,"Rx32-=sfmpy(Rs32,Rt32):lib",ATTRIBS(), 70"Floating-Point Fused Multiply Add for Library Routines", 71{ fFPSETROUND_NEAREST(); fHIDE(int infinp; int infminusinf; size4s_t tmp;) 72 infminusinf = ((isinf(fFLOAT(RxV))) && 73 (fISINFPROD(fFLOAT(RsV),fFLOAT(RtV))) && 74 (fGETBIT(31,RsV ^ RxV ^ RtV) == 0)); 75 infinp = (isinf(fFLOAT(RxV))) || (isinf(fFLOAT(RtV))) || (isinf(fFLOAT(RsV))); 76 fCHECKSFNAN3(RxV,RxV,RsV,RtV); 77 tmp=fUNFLOAT(fFMAF(-fFLOAT(RsV),fFLOAT(RtV),fFLOAT(RxV))); 78 if (!((fFLOAT(RxV) == 0.0) && fISZEROPROD(fFLOAT(RsV),fFLOAT(RtV)))) RxV = tmp; 79 fFPCANCELFLAGS(); 80 if (isinf(fFLOAT(RxV)) && !infinp) RxV = RxV - 1; 81 if (infminusinf) RxV = 0; 82}) 83 84 85Q6INSN(F2_sfcmpeq,"Pd4=sfcmp.eq(Rs32,Rt32)",ATTRIBS(), 86"Floating Point Compare for Equal", 87{PdV=f8BITSOF(fFLOAT(RsV)==fFLOAT(RtV));}) 88 89Q6INSN(F2_sfcmpgt,"Pd4=sfcmp.gt(Rs32,Rt32)",ATTRIBS(), 90"Floating-Point Compare for Greater Than", 91{PdV=f8BITSOF(fFLOAT(RsV)>fFLOAT(RtV));}) 92 93/* cmpge is not the same as !cmpgt(swapops) in IEEE */ 94 95Q6INSN(F2_sfcmpge,"Pd4=sfcmp.ge(Rs32,Rt32)",ATTRIBS(), 96"Floating-Point Compare for Greater Than / Equal To", 97{PdV=f8BITSOF(fFLOAT(RsV)>=fFLOAT(RtV));}) 98 99/* Everyone seems to have this... */ 100 101Q6INSN(F2_sfcmpuo,"Pd4=sfcmp.uo(Rs32,Rt32)",ATTRIBS(), 102"Floating-Point Compare for Unordered", 103{PdV=f8BITSOF(isunordered(fFLOAT(RsV),fFLOAT(RtV)));}) 104 105 106Q6INSN(F2_sfmax,"Rd32=sfmax(Rs32,Rt32)",ATTRIBS(), 107"Maximum of Floating-Point values", 108{ RdV = fUNFLOAT(fSF_MAX(fFLOAT(RsV),fFLOAT(RtV))); }) 109 110Q6INSN(F2_sfmin,"Rd32=sfmin(Rs32,Rt32)",ATTRIBS(), 111"Minimum of Floating-Point values", 112{ RdV = fUNFLOAT(fSF_MIN(fFLOAT(RsV),fFLOAT(RtV))); }) 113 114 115Q6INSN(F2_sfclass,"Pd4=sfclass(Rs32,#u5)",ATTRIBS(), 116"Classify Floating-Point Value", 117{ 118 fHIDE(int class;) 119 PdV = 0; 120 class = fpclassify(fFLOAT(RsV)); 121 /* Is the value zero? */ 122 if (fGETBIT(0,uiV) && (class == FP_ZERO)) PdV = 0xff; 123 if (fGETBIT(1,uiV) && (class == FP_NORMAL)) PdV = 0xff; 124 if (fGETBIT(2,uiV) && (class == FP_SUBNORMAL)) PdV = 0xff; 125 if (fGETBIT(3,uiV) && (class == FP_INFINITE)) PdV = 0xff; 126 if (fGETBIT(4,uiV) && (class == FP_NAN)) PdV = 0xff; 127 fFPCANCELFLAGS(); 128}) 129 130/* Range: +/- (1.0 .. 1+(63/64)) * 2**(-6 .. +9) */ 131/* More immediate bits should probably be used for more precision? */ 132 133Q6INSN(F2_sfimm_p,"Rd32=sfmake(#u10):pos",ATTRIBS(), 134"Make Floating Point Value", 135{ 136 RdV = (127 - 6) << 23; 137 RdV += uiV << 17; 138}) 139 140Q6INSN(F2_sfimm_n,"Rd32=sfmake(#u10):neg",ATTRIBS(), 141"Make Floating Point Value", 142{ 143 RdV = (127 - 6) << 23; 144 RdV += (uiV << 17); 145 RdV |= (1 << 31); 146}) 147 148 149Q6INSN(F2_sffixupn,"Rd32=sffixupn(Rs32,Rt32)",ATTRIBS(), 150"Fix Up Numerator", 151{ 152 fHIDE(int adjust;) 153 fSF_RECIP_COMMON(RsV,RtV,RdV,adjust); 154 RdV = RsV; 155}) 156 157Q6INSN(F2_sffixupd,"Rd32=sffixupd(Rs32,Rt32)",ATTRIBS(), 158"Fix Up Denominator", 159{ 160 fHIDE(int adjust;) 161 fSF_RECIP_COMMON(RsV,RtV,RdV,adjust); 162 RdV = RtV; 163}) 164 165Q6INSN(F2_sffixupr,"Rd32=sffixupr(Rs32)",ATTRIBS(), 166"Fix Up Radicand", 167{ 168 fHIDE(int adjust;) 169 fSF_INVSQRT_COMMON(RsV,RdV,adjust); 170 RdV = RsV; 171}) 172 173/*************************************/ 174/* Scalar DP */ 175/*************************************/ 176Q6INSN(F2_dfadd,"Rdd32=dfadd(Rss32,Rtt32)",ATTRIBS(), 177"Floating-Point Add", 178{ RddV=fUNDOUBLE(fDOUBLE(RssV)+fDOUBLE(RttV));}) 179 180Q6INSN(F2_dfsub,"Rdd32=dfsub(Rss32,Rtt32)",ATTRIBS(), 181"Floating-Point Subtract", 182{ RddV=fUNDOUBLE(fDOUBLE(RssV)-fDOUBLE(RttV));}) 183 184Q6INSN(F2_dfmax,"Rdd32=dfmax(Rss32,Rtt32)",ATTRIBS(), 185"Maximum of Floating-Point values", 186{ RddV = fUNDOUBLE(fDF_MAX(fDOUBLE(RssV),fDOUBLE(RttV))); }) 187 188Q6INSN(F2_dfmin,"Rdd32=dfmin(Rss32,Rtt32)",ATTRIBS(), 189"Minimum of Floating-Point values", 190{ RddV = fUNDOUBLE(fDF_MIN(fDOUBLE(RssV),fDOUBLE(RttV))); }) 191 192Q6INSN(F2_dfmpyfix,"Rdd32=dfmpyfix(Rss32,Rtt32)",ATTRIBS(), 193"Fix Up Multiplicand for Multiplication", 194{ 195 if (fDF_ISDENORM(RssV) && fDF_ISBIG(RttV) && fDF_ISNORMAL(RttV)) RddV = fUNDOUBLE(fDOUBLE(RssV) * 0x1.0p52); 196 else if (fDF_ISDENORM(RttV) && fDF_ISBIG(RssV) && fDF_ISNORMAL(RssV)) RddV = fUNDOUBLE(fDOUBLE(RssV) * 0x1.0p-52); 197 else RddV = RssV; 198}) 199 200Q6INSN(F2_dfmpyll,"Rdd32=dfmpyll(Rss32,Rtt32)",ATTRIBS(), 201"Multiply low*low and shift off low 32 bits into sticky (in MSB)", 202{ 203 fHIDE(size8u_t prod;) 204 prod = fMPY32UU(fGETUWORD(0,RssV),fGETUWORD(0,RttV)); 205 RddV = (prod >> 32) << 1; 206 if (fGETUWORD(0,prod) != 0) fSETBIT(0,RddV,1); 207}) 208 209Q6INSN(F2_dfmpylh,"Rxx32+=dfmpylh(Rss32,Rtt32)",ATTRIBS(), 210"Multiply low*high and accumulate", 211{ 212 RxxV += (fGETUWORD(0,RssV) * (0x00100000 | fZXTN(20,64,fGETUWORD(1,RttV)))) << 1; 213}) 214 215Q6INSN(F2_dfmpyhh,"Rxx32+=dfmpyhh(Rss32,Rtt32)",ATTRIBS(), 216"Multiply high*high and accumulate with L*H value", 217{ 218 RxxV = fUNDOUBLE(fDF_MPY_HH(fDOUBLE(RssV),fDOUBLE(RttV),RxxV)); 219}) 220 221 222 223Q6INSN(F2_dfcmpeq,"Pd4=dfcmp.eq(Rss32,Rtt32)",ATTRIBS(), 224"Floating Point Compare for Equal", 225{PdV=f8BITSOF(fDOUBLE(RssV)==fDOUBLE(RttV));}) 226 227Q6INSN(F2_dfcmpgt,"Pd4=dfcmp.gt(Rss32,Rtt32)",ATTRIBS(), 228"Floating-Point Compare for Greater Than", 229{PdV=f8BITSOF(fDOUBLE(RssV)>fDOUBLE(RttV));}) 230 231 232/* cmpge is not the same as !cmpgt(swapops) in IEEE */ 233 234Q6INSN(F2_dfcmpge,"Pd4=dfcmp.ge(Rss32,Rtt32)",ATTRIBS(), 235"Floating-Point Compare for Greater Than / Equal To", 236{PdV=f8BITSOF(fDOUBLE(RssV)>=fDOUBLE(RttV));}) 237 238/* Everyone seems to have this... */ 239 240Q6INSN(F2_dfcmpuo,"Pd4=dfcmp.uo(Rss32,Rtt32)",ATTRIBS(), 241"Floating-Point Compare for Unordered", 242{PdV=f8BITSOF(isunordered(fDOUBLE(RssV),fDOUBLE(RttV)));}) 243 244 245Q6INSN(F2_dfclass,"Pd4=dfclass(Rss32,#u5)",ATTRIBS(), 246"Classify Floating-Point Value", 247{ 248 fHIDE(int class;) 249 PdV = 0; 250 class = fpclassify(fDOUBLE(RssV)); 251 /* Is the value zero? */ 252 if (fGETBIT(0,uiV) && (class == FP_ZERO)) PdV = 0xff; 253 if (fGETBIT(1,uiV) && (class == FP_NORMAL)) PdV = 0xff; 254 if (fGETBIT(2,uiV) && (class == FP_SUBNORMAL)) PdV = 0xff; 255 if (fGETBIT(3,uiV) && (class == FP_INFINITE)) PdV = 0xff; 256 if (fGETBIT(4,uiV) && (class == FP_NAN)) PdV = 0xff; 257 fFPCANCELFLAGS(); 258}) 259 260 261/* Range: +/- (1.0 .. 1+(63/64)) * 2**(-6 .. +9) */ 262/* More immediate bits should probably be used for more precision? */ 263 264Q6INSN(F2_dfimm_p,"Rdd32=dfmake(#u10):pos",ATTRIBS(), 265"Make Floating Point Value", 266{ 267 RddV = (1023ULL - 6) << 52; 268 RddV += (fHIDE((size8u_t))uiV) << 46; 269}) 270 271Q6INSN(F2_dfimm_n,"Rdd32=dfmake(#u10):neg",ATTRIBS(), 272"Make Floating Point Value", 273{ 274 RddV = (1023ULL - 6) << 52; 275 RddV += (fHIDE((size8u_t))uiV) << 46; 276 RddV |= ((1ULL) << 63); 277}) 278 279 280/* CONVERSION */ 281 282#define CONVERT(TAG,DEST,DESTV,SRC,SRCV,OUTCAST,OUTTYPE,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \ 283 Q6INSN(F2_conv_##TAG##MODETAG,#DEST"=convert_"#TAG"("#SRC")"#MODESYN,ATTRIBS(), \ 284 "Floating point format conversion", \ 285 { MODEBEH DESTV = OUTCAST(conv_##INTYPE##_to_##OUTTYPE(INCAST(SRCV))); }) 286 287CONVERT(sf2df,Rdd32,RddV,Rs32,RsV,fUNDOUBLE,df,fFLOAT,sf,,,) 288CONVERT(df2sf,Rd32,RdV,Rss32,RssV,fUNFLOAT,sf,fDOUBLE,df,,,) 289 290#define ALLINTDST(TAGSTART,SRC,SRCV,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \ 291CONVERT(TAGSTART##uw,Rd32,RdV,SRC,SRCV,fCAST4u,4u,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \ 292CONVERT(TAGSTART##w,Rd32,RdV,SRC,SRCV,fCAST4s,4s,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \ 293CONVERT(TAGSTART##ud,Rdd32,RddV,SRC,SRCV,fCAST8u,8u,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \ 294CONVERT(TAGSTART##d,Rdd32,RddV,SRC,SRCV,fCAST8s,8s,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) 295 296#define ALLFPDST(TAGSTART,SRC,SRCV,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \ 297CONVERT(TAGSTART##sf,Rd32,RdV,SRC,SRCV,fUNFLOAT,sf,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) \ 298CONVERT(TAGSTART##df,Rdd32,RddV,SRC,SRCV,fUNDOUBLE,df,INCAST,INTYPE,MODETAG,MODESYN,MODEBEH) 299 300#define ALLINTSRC(GEN,MODETAG,MODESYN,MODEBEH) \ 301GEN(uw##2,Rs32,RsV,fCAST4u,4u,MODETAG,MODESYN,MODEBEH) \ 302GEN(w##2,Rs32,RsV,fCAST4s,4s,MODETAG,MODESYN,MODEBEH) \ 303GEN(ud##2,Rss32,RssV,fCAST8u,8u,MODETAG,MODESYN,MODEBEH) \ 304GEN(d##2,Rss32,RssV,fCAST8s,8s,MODETAG,MODESYN,MODEBEH) 305 306#define ALLFPSRC(GEN,MODETAG,MODESYN,MODEBEH) \ 307GEN(sf##2,Rs32,RsV,fFLOAT,sf,MODETAG,MODESYN,MODEBEH) \ 308GEN(df##2,Rss32,RssV,fDOUBLE,df,MODETAG,MODESYN,MODEBEH) 309 310ALLINTSRC(ALLFPDST,,,) 311ALLFPSRC(ALLINTDST,,,) 312ALLFPSRC(ALLINTDST,_chop,:chop,fFPSETROUND_CHOP();) 313