1 /*---------------------------------------------------------------------------+ 2 | poly_sin.c | 3 | | 4 | Computation of an approximation of the sin function and the cosine | 5 | function by a polynomial. | 6 | | 7 | Copyright (C) 1992,1993,1994,1997,1999 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | 9 | E-mail billm@melbpc.org.au | 10 | | 11 | | 12 +---------------------------------------------------------------------------*/ 13 14 15 #include "exception.h" 16 #include "reg_constant.h" 17 #include "fpu_emu.h" 18 #include "fpu_system.h" 19 #include "control_w.h" 20 #include "poly.h" 21 22 23 #define N_COEFF_P 4 24 #define N_COEFF_N 4 25 26 static const unsigned long long pos_terms_l[N_COEFF_P] = 27 { 28 0xaaaaaaaaaaaaaaabLL, 29 0x00d00d00d00cf906LL, 30 0x000006b99159a8bbLL, 31 0x000000000d7392e6LL 32 }; 33 34 static const unsigned long long neg_terms_l[N_COEFF_N] = 35 { 36 0x2222222222222167LL, 37 0x0002e3bc74aab624LL, 38 0x0000000b09229062LL, 39 0x00000000000c7973LL 40 }; 41 42 43 44 #define N_COEFF_PH 4 45 #define N_COEFF_NH 4 46 static const unsigned long long pos_terms_h[N_COEFF_PH] = 47 { 48 0x0000000000000000LL, 49 0x05b05b05b05b0406LL, 50 0x000049f93edd91a9LL, 51 0x00000000c9c9ed62LL 52 }; 53 54 static const unsigned long long neg_terms_h[N_COEFF_NH] = 55 { 56 0xaaaaaaaaaaaaaa98LL, 57 0x001a01a01a019064LL, 58 0x0000008f76c68a77LL, 59 0x0000000000d58f5eLL 60 }; 61 62 63 /*--- poly_sine() -----------------------------------------------------------+ 64 | | 65 +---------------------------------------------------------------------------*/ 66 void poly_sine(FPU_REG *st0_ptr) 67 { 68 int exponent, echange; 69 Xsig accumulator, argSqrd, argTo4; 70 unsigned long fix_up, adj; 71 unsigned long long fixed_arg; 72 FPU_REG result; 73 74 exponent = exponent(st0_ptr); 75 76 accumulator.lsw = accumulator.midw = accumulator.msw = 0; 77 78 /* Split into two ranges, for arguments below and above 1.0 */ 79 /* The boundary between upper and lower is approx 0.88309101259 */ 80 if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) ) 81 { 82 /* The argument is <= 0.88309101259 */ 83 84 argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0; 85 mul64_Xsig(&argSqrd, &significand(st0_ptr)); 86 shr_Xsig(&argSqrd, 2*(-1-exponent)); 87 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; 88 argTo4.lsw = argSqrd.lsw; 89 mul_Xsig_Xsig(&argTo4, &argTo4); 90 91 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, 92 N_COEFF_N-1); 93 mul_Xsig_Xsig(&accumulator, &argSqrd); 94 negate_Xsig(&accumulator); 95 96 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, 97 N_COEFF_P-1); 98 99 shr_Xsig(&accumulator, 2); /* Divide by four */ 100 accumulator.msw |= 0x80000000; /* Add 1.0 */ 101 102 mul64_Xsig(&accumulator, &significand(st0_ptr)); 103 mul64_Xsig(&accumulator, &significand(st0_ptr)); 104 mul64_Xsig(&accumulator, &significand(st0_ptr)); 105 106 /* Divide by four, FPU_REG compatible, etc */ 107 exponent = 3*exponent; 108 109 /* The minimum exponent difference is 3 */ 110 shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); 111 112 negate_Xsig(&accumulator); 113 XSIG_LL(accumulator) += significand(st0_ptr); 114 115 echange = round_Xsig(&accumulator); 116 117 setexponentpos(&result, exponent(st0_ptr) + echange); 118 } 119 else 120 { 121 /* The argument is > 0.88309101259 */ 122 /* We use sin(st(0)) = cos(pi/2-st(0)) */ 123 124 fixed_arg = significand(st0_ptr); 125 126 if ( exponent == 0 ) 127 { 128 /* The argument is >= 1.0 */ 129 130 /* Put the binary point at the left. */ 131 fixed_arg <<= 1; 132 } 133 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ 134 fixed_arg = 0x921fb54442d18469LL - fixed_arg; 135 /* There is a special case which arises due to rounding, to fix here. */ 136 if ( fixed_arg == 0xffffffffffffffffLL ) 137 fixed_arg = 0; 138 139 XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; 140 mul64_Xsig(&argSqrd, &fixed_arg); 141 142 XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw; 143 mul_Xsig_Xsig(&argTo4, &argTo4); 144 145 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, 146 N_COEFF_NH-1); 147 mul_Xsig_Xsig(&accumulator, &argSqrd); 148 negate_Xsig(&accumulator); 149 150 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, 151 N_COEFF_PH-1); 152 negate_Xsig(&accumulator); 153 154 mul64_Xsig(&accumulator, &fixed_arg); 155 mul64_Xsig(&accumulator, &fixed_arg); 156 157 shr_Xsig(&accumulator, 3); 158 negate_Xsig(&accumulator); 159 160 add_Xsig_Xsig(&accumulator, &argSqrd); 161 162 shr_Xsig(&accumulator, 1); 163 164 accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ 165 negate_Xsig(&accumulator); 166 167 /* The basic computation is complete. Now fix the answer to 168 compensate for the error due to the approximation used for 169 pi/2 170 */ 171 172 /* This has an exponent of -65 */ 173 fix_up = 0x898cc517; 174 /* The fix-up needs to be improved for larger args */ 175 if ( argSqrd.msw & 0xffc00000 ) 176 { 177 /* Get about 32 bit precision in these: */ 178 fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6; 179 } 180 fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg)); 181 182 adj = accumulator.lsw; /* temp save */ 183 accumulator.lsw -= fix_up; 184 if ( accumulator.lsw > adj ) 185 XSIG_LL(accumulator) --; 186 187 echange = round_Xsig(&accumulator); 188 189 setexponentpos(&result, echange - 1); 190 } 191 192 significand(&result) = XSIG_LL(accumulator); 193 setsign(&result, getsign(st0_ptr)); 194 FPU_copy_to_reg0(&result, TAG_Valid); 195 196 #ifdef PARANOID 197 if ( (exponent(&result) >= 0) 198 && (significand(&result) > 0x8000000000000000LL) ) 199 { 200 EXCEPTION(EX_INTERNAL|0x150); 201 } 202 #endif /* PARANOID */ 203 204 } 205 206 207 208 /*--- poly_cos() ------------------------------------------------------------+ 209 | | 210 +---------------------------------------------------------------------------*/ 211 void poly_cos(FPU_REG *st0_ptr) 212 { 213 FPU_REG result; 214 long int exponent, exp2, echange; 215 Xsig accumulator, argSqrd, fix_up, argTo4; 216 unsigned long long fixed_arg; 217 218 #ifdef PARANOID 219 if ( (exponent(st0_ptr) > 0) 220 || ((exponent(st0_ptr) == 0) 221 && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) ) 222 { 223 EXCEPTION(EX_Invalid); 224 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); 225 return; 226 } 227 #endif /* PARANOID */ 228 229 exponent = exponent(st0_ptr); 230 231 accumulator.lsw = accumulator.midw = accumulator.msw = 0; 232 233 if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) ) 234 { 235 /* arg is < 0.687705 */ 236 237 argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; 238 argSqrd.lsw = 0; 239 mul64_Xsig(&argSqrd, &significand(st0_ptr)); 240 241 if ( exponent < -1 ) 242 { 243 /* shift the argument right by the required places */ 244 shr_Xsig(&argSqrd, 2*(-1-exponent)); 245 } 246 247 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; 248 argTo4.lsw = argSqrd.lsw; 249 mul_Xsig_Xsig(&argTo4, &argTo4); 250 251 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, 252 N_COEFF_NH-1); 253 mul_Xsig_Xsig(&accumulator, &argSqrd); 254 negate_Xsig(&accumulator); 255 256 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, 257 N_COEFF_PH-1); 258 negate_Xsig(&accumulator); 259 260 mul64_Xsig(&accumulator, &significand(st0_ptr)); 261 mul64_Xsig(&accumulator, &significand(st0_ptr)); 262 shr_Xsig(&accumulator, -2*(1+exponent)); 263 264 shr_Xsig(&accumulator, 3); 265 negate_Xsig(&accumulator); 266 267 add_Xsig_Xsig(&accumulator, &argSqrd); 268 269 shr_Xsig(&accumulator, 1); 270 271 /* It doesn't matter if accumulator is all zero here, the 272 following code will work ok */ 273 negate_Xsig(&accumulator); 274 275 if ( accumulator.lsw & 0x80000000 ) 276 XSIG_LL(accumulator) ++; 277 if ( accumulator.msw == 0 ) 278 { 279 /* The result is 1.0 */ 280 FPU_copy_to_reg0(&CONST_1, TAG_Valid); 281 return; 282 } 283 else 284 { 285 significand(&result) = XSIG_LL(accumulator); 286 287 /* will be a valid positive nr with expon = -1 */ 288 setexponentpos(&result, -1); 289 } 290 } 291 else 292 { 293 fixed_arg = significand(st0_ptr); 294 295 if ( exponent == 0 ) 296 { 297 /* The argument is >= 1.0 */ 298 299 /* Put the binary point at the left. */ 300 fixed_arg <<= 1; 301 } 302 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ 303 fixed_arg = 0x921fb54442d18469LL - fixed_arg; 304 /* There is a special case which arises due to rounding, to fix here. */ 305 if ( fixed_arg == 0xffffffffffffffffLL ) 306 fixed_arg = 0; 307 308 exponent = -1; 309 exp2 = -1; 310 311 /* A shift is needed here only for a narrow range of arguments, 312 i.e. for fixed_arg approx 2^-32, but we pick up more... */ 313 if ( !(LL_MSW(fixed_arg) & 0xffff0000) ) 314 { 315 fixed_arg <<= 16; 316 exponent -= 16; 317 exp2 -= 16; 318 } 319 320 XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; 321 mul64_Xsig(&argSqrd, &fixed_arg); 322 323 if ( exponent < -1 ) 324 { 325 /* shift the argument right by the required places */ 326 shr_Xsig(&argSqrd, 2*(-1-exponent)); 327 } 328 329 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; 330 argTo4.lsw = argSqrd.lsw; 331 mul_Xsig_Xsig(&argTo4, &argTo4); 332 333 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, 334 N_COEFF_N-1); 335 mul_Xsig_Xsig(&accumulator, &argSqrd); 336 negate_Xsig(&accumulator); 337 338 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, 339 N_COEFF_P-1); 340 341 shr_Xsig(&accumulator, 2); /* Divide by four */ 342 accumulator.msw |= 0x80000000; /* Add 1.0 */ 343 344 mul64_Xsig(&accumulator, &fixed_arg); 345 mul64_Xsig(&accumulator, &fixed_arg); 346 mul64_Xsig(&accumulator, &fixed_arg); 347 348 /* Divide by four, FPU_REG compatible, etc */ 349 exponent = 3*exponent; 350 351 /* The minimum exponent difference is 3 */ 352 shr_Xsig(&accumulator, exp2 - exponent); 353 354 negate_Xsig(&accumulator); 355 XSIG_LL(accumulator) += fixed_arg; 356 357 /* The basic computation is complete. Now fix the answer to 358 compensate for the error due to the approximation used for 359 pi/2 360 */ 361 362 /* This has an exponent of -65 */ 363 XSIG_LL(fix_up) = 0x898cc51701b839a2ll; 364 fix_up.lsw = 0; 365 366 /* The fix-up needs to be improved for larger args */ 367 if ( argSqrd.msw & 0xffc00000 ) 368 { 369 /* Get about 32 bit precision in these: */ 370 fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; 371 fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; 372 } 373 374 exp2 += norm_Xsig(&accumulator); 375 shr_Xsig(&accumulator, 1); /* Prevent overflow */ 376 exp2++; 377 shr_Xsig(&fix_up, 65 + exp2); 378 379 add_Xsig_Xsig(&accumulator, &fix_up); 380 381 echange = round_Xsig(&accumulator); 382 383 setexponentpos(&result, exp2 + echange); 384 significand(&result) = XSIG_LL(accumulator); 385 } 386 387 FPU_copy_to_reg0(&result, TAG_Valid); 388 389 #ifdef PARANOID 390 if ( (exponent(&result) >= 0) 391 && (significand(&result) > 0x8000000000000000LL) ) 392 { 393 EXCEPTION(EX_INTERNAL|0x151); 394 } 395 #endif /* PARANOID */ 396 397 } 398