1 /* IEEE754 floating point arithmetic 2 * double precision square root 3 */ 4 /* 5 * MIPS floating point support 6 * Copyright (C) 1994-2000 Algorithmics Ltd. 7 * http://www.algor.co.uk 8 * 9 * ######################################################################## 10 * 11 * This program is free software; you can distribute it and/or modify it 12 * under the terms of the GNU General Public License (Version 2) as 13 * published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope it will be useful, but WITHOUT 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18 * for more details. 19 * 20 * You should have received a copy of the GNU General Public License along 21 * with this program; if not, write to the Free Software Foundation, Inc., 22 * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. 23 * 24 * ######################################################################## 25 */ 26 27 28 #include "ieee754dp.h" 29 30 static const unsigned table[] = { 31 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, 32 29598, 36145, 43202, 50740, 58733, 67158, 75992, 33 85215, 83599, 71378, 60428, 50647, 41945, 34246, 34 27478, 21581, 16499, 12183, 8588, 5674, 3403, 35 1742, 661, 130 36 }; 37 38 ieee754dp ieee754dp_sqrt(ieee754dp x) 39 { 40 struct _ieee754_csr oldcsr; 41 ieee754dp y, z, t; 42 unsigned scalx, yh; 43 COMPXDP; 44 45 EXPLODEXDP; 46 CLEARCX; 47 FLUSHXDP; 48 49 /* x == INF or NAN? */ 50 switch (xc) { 51 case IEEE754_CLASS_QNAN: 52 /* sqrt(Nan) = Nan */ 53 return ieee754dp_nanxcpt(x, "sqrt"); 54 case IEEE754_CLASS_SNAN: 55 SETCX(IEEE754_INVALID_OPERATION); 56 return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt"); 57 case IEEE754_CLASS_ZERO: 58 /* sqrt(0) = 0 */ 59 return x; 60 case IEEE754_CLASS_INF: 61 if (xs) { 62 /* sqrt(-Inf) = Nan */ 63 SETCX(IEEE754_INVALID_OPERATION); 64 return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt"); 65 } 66 /* sqrt(+Inf) = Inf */ 67 return x; 68 case IEEE754_CLASS_DNORM: 69 DPDNORMX; 70 /* fall through */ 71 case IEEE754_CLASS_NORM: 72 if (xs) { 73 /* sqrt(-x) = Nan */ 74 SETCX(IEEE754_INVALID_OPERATION); 75 return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt"); 76 } 77 break; 78 } 79 80 /* save old csr; switch off INX enable & flag; set RN rounding */ 81 oldcsr = ieee754_csr; 82 ieee754_csr.mx &= ~IEEE754_INEXACT; 83 ieee754_csr.sx &= ~IEEE754_INEXACT; 84 ieee754_csr.rm = IEEE754_RN; 85 86 /* adjust exponent to prevent overflow */ 87 scalx = 0; 88 if (xe > 512) { /* x > 2**-512? */ 89 xe -= 512; /* x = x / 2**512 */ 90 scalx += 256; 91 } else if (xe < -512) { /* x < 2**-512? */ 92 xe += 512; /* x = x * 2**512 */ 93 scalx -= 256; 94 } 95 96 y = x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); 97 98 /* magic initial approximation to almost 8 sig. bits */ 99 yh = y.bits >> 32; 100 yh = (yh >> 1) + 0x1ff80000; 101 yh = yh - table[(yh >> 15) & 31]; 102 y.bits = ((u64) yh << 32) | (y.bits & 0xffffffff); 103 104 /* Heron's rule once with correction to improve to ~18 sig. bits */ 105 /* t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; */ 106 t = ieee754dp_div(x, y); 107 y = ieee754dp_add(y, t); 108 y.bits -= 0x0010000600000000LL; 109 y.bits &= 0xffffffff00000000LL; 110 111 /* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */ 112 /* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */ 113 z = t = ieee754dp_mul(y, y); 114 t.parts.bexp += 0x001; 115 t = ieee754dp_add(t, z); 116 z = ieee754dp_mul(ieee754dp_sub(x, z), y); 117 118 /* t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; */ 119 t = ieee754dp_div(z, ieee754dp_add(t, x)); 120 t.parts.bexp += 0x001; 121 y = ieee754dp_add(y, t); 122 123 /* twiddle last bit to force y correctly rounded */ 124 125 /* set RZ, clear INEX flag */ 126 ieee754_csr.rm = IEEE754_RZ; 127 ieee754_csr.sx &= ~IEEE754_INEXACT; 128 129 /* t=x/y; ...chopped quotient, possibly inexact */ 130 t = ieee754dp_div(x, y); 131 132 if (ieee754_csr.sx & IEEE754_INEXACT || t.bits != y.bits) { 133 134 if (!(ieee754_csr.sx & IEEE754_INEXACT)) 135 /* t = t-ulp */ 136 t.bits -= 1; 137 138 /* add inexact to result status */ 139 oldcsr.cx |= IEEE754_INEXACT; 140 oldcsr.sx |= IEEE754_INEXACT; 141 142 switch (oldcsr.rm) { 143 case IEEE754_RP: 144 y.bits += 1; 145 /* drop through */ 146 case IEEE754_RN: 147 t.bits += 1; 148 break; 149 } 150 151 /* y=y+t; ...chopped sum */ 152 y = ieee754dp_add(y, t); 153 154 /* adjust scalx for correctly rounded sqrt(x) */ 155 scalx -= 1; 156 } 157 158 /* py[n0]=py[n0]+scalx; ...scale back y */ 159 y.parts.bexp += scalx; 160 161 /* restore rounding mode, possibly set inexact */ 162 ieee754_csr = oldcsr; 163 164 return y; 165 } 166