xref: /openbmc/qemu/target/m68k/softfloat.c (revision 5a95f5ce3cd5842cc8f61a91ecd4fb4e8d10104f)
1 /*
2  * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator,
3  * derived from NetBSD M68040 FPSP functions,
4  * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic
5  * Package. Those parts of the code (and some later contributions) are
6  * provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file will be taken to be licensed under
14  * the Softfloat-2a license unless specifically indicated otherwise.
15  */
16 
17 /*
18  * Portions of this work are licensed under the terms of the GNU GPL,
19  * version 2 or later. See the COPYING file in the top-level directory.
20  */
21 
22 #include "qemu/osdep.h"
23 #include "softfloat.h"
24 #include "fpu/softfloat-macros.h"
25 #include "softfloat_fpsp_tables.h"
26 
27 #define pi_exp      0x4000
28 #define piby2_exp   0x3FFF
29 #define pi_sig      UINT64_C(0xc90fdaa22168c235)
30 
propagateFloatx80NaNOneArg(floatx80 a,float_status * status)31 static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status)
32 {
33     if (floatx80_is_signaling_nan(a, status)) {
34         float_raise(float_flag_invalid, status);
35         a = floatx80_silence_nan(a, status);
36     }
37 
38     if (status->default_nan_mode) {
39         return floatx80_default_nan(status);
40     }
41 
42     return a;
43 }
44 
45 /*
46  * Returns the mantissa of the extended double-precision floating-point
47  * value `a'.
48  */
49 
floatx80_getman(floatx80 a,float_status * status)50 floatx80 floatx80_getman(floatx80 a, float_status *status)
51 {
52     bool aSign;
53     int32_t aExp;
54     uint64_t aSig;
55 
56     aSig = extractFloatx80Frac(a);
57     aExp = extractFloatx80Exp(a);
58     aSign = extractFloatx80Sign(a);
59 
60     if (aExp == 0x7FFF) {
61         if ((uint64_t) (aSig << 1)) {
62             return propagateFloatx80NaNOneArg(a , status);
63         }
64         float_raise(float_flag_invalid , status);
65         return floatx80_default_nan(status);
66     }
67 
68     if (aExp == 0) {
69         if (aSig == 0) {
70             return packFloatx80(aSign, 0, 0);
71         }
72         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
73     }
74 
75     return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign,
76                                 0x3FFF, aSig, 0, status);
77 }
78 
79 /*
80  * Returns the exponent of the extended double-precision floating-point
81  * value `a' as an extended double-precision value.
82  */
83 
floatx80_getexp(floatx80 a,float_status * status)84 floatx80 floatx80_getexp(floatx80 a, float_status *status)
85 {
86     bool aSign;
87     int32_t aExp;
88     uint64_t aSig;
89 
90     aSig = extractFloatx80Frac(a);
91     aExp = extractFloatx80Exp(a);
92     aSign = extractFloatx80Sign(a);
93 
94     if (aExp == 0x7FFF) {
95         if ((uint64_t) (aSig << 1)) {
96             return propagateFloatx80NaNOneArg(a , status);
97         }
98         float_raise(float_flag_invalid , status);
99         return floatx80_default_nan(status);
100     }
101 
102     if (aExp == 0) {
103         if (aSig == 0) {
104             return packFloatx80(aSign, 0, 0);
105         }
106         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
107     }
108 
109     return int32_to_floatx80(aExp - 0x3FFF, status);
110 }
111 
112 /*
113  * Scales extended double-precision floating-point value in operand `a' by
114  * value `b'. The function truncates the value in the second operand 'b' to
115  * an integral value and adds that value to the exponent of the operand 'a'.
116  * The operation performed according to the IEC/IEEE Standard for Binary
117  * Floating-Point Arithmetic.
118  */
119 
floatx80_scale(floatx80 a,floatx80 b,float_status * status)120 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status)
121 {
122     bool aSign, bSign;
123     int32_t aExp, bExp, shiftCount;
124     uint64_t aSig, bSig;
125 
126     aSig = extractFloatx80Frac(a);
127     aExp = extractFloatx80Exp(a);
128     aSign = extractFloatx80Sign(a);
129     bSig = extractFloatx80Frac(b);
130     bExp = extractFloatx80Exp(b);
131     bSign = extractFloatx80Sign(b);
132 
133     if (bExp == 0x7FFF) {
134         if ((uint64_t) (bSig << 1) ||
135             ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) {
136             return propagateFloatx80NaN(a, b, status);
137         }
138         float_raise(float_flag_invalid , status);
139         return floatx80_default_nan(status);
140     }
141     if (aExp == 0x7FFF) {
142         if ((uint64_t) (aSig << 1)) {
143             return propagateFloatx80NaN(a, b, status);
144         }
145         return packFloatx80(aSign, floatx80_infinity.high,
146                             floatx80_infinity.low);
147     }
148     if (aExp == 0) {
149         if (aSig == 0) {
150             return packFloatx80(aSign, 0, 0);
151         }
152         if (bExp < 0x3FFF) {
153             return a;
154         }
155         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
156     }
157 
158     if (bExp < 0x3FFF) {
159         return a;
160     }
161 
162     if (0x400F < bExp) {
163         aExp = bSign ? -0x6001 : 0xE000;
164         return roundAndPackFloatx80(status->floatx80_rounding_precision,
165                                     aSign, aExp, aSig, 0, status);
166     }
167 
168     shiftCount = 0x403E - bExp;
169     bSig >>= shiftCount;
170     aExp = bSign ? (aExp - bSig) : (aExp + bSig);
171 
172     return roundAndPackFloatx80(status->floatx80_rounding_precision,
173                                 aSign, aExp, aSig, 0, status);
174 }
175 
floatx80_move(floatx80 a,float_status * status)176 floatx80 floatx80_move(floatx80 a, float_status *status)
177 {
178     bool aSign;
179     int32_t aExp;
180     uint64_t aSig;
181 
182     aSig = extractFloatx80Frac(a);
183     aExp = extractFloatx80Exp(a);
184     aSign = extractFloatx80Sign(a);
185 
186     if (aExp == 0x7FFF) {
187         if ((uint64_t)(aSig << 1)) {
188             return propagateFloatx80NaNOneArg(a, status);
189         }
190         return a;
191     }
192     if (aExp == 0) {
193         if (aSig == 0) {
194             return a;
195         }
196         normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
197                                       aSign, aExp, aSig, 0, status);
198     }
199     return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign,
200                                 aExp, aSig, 0, status);
201 }
202 
203 /*
204  * Algorithms for transcendental functions supported by MC68881 and MC68882
205  * mathematical coprocessors. The functions are derived from FPSP library.
206  */
207 
208 #define one_exp     0x3FFF
209 #define one_sig     UINT64_C(0x8000000000000000)
210 
211 /*
212  * Function for compactifying extended double-precision floating point values.
213  */
214 
floatx80_make_compact(int32_t aExp,uint64_t aSig)215 static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig)
216 {
217     return (aExp << 16) | (aSig >> 48);
218 }
219 
220 /*
221  * Log base e of x plus 1
222  */
223 
floatx80_lognp1(floatx80 a,float_status * status)224 floatx80 floatx80_lognp1(floatx80 a, float_status *status)
225 {
226     bool aSign;
227     int32_t aExp;
228     uint64_t aSig, fSig;
229 
230     FloatRoundMode user_rnd_mode;
231     FloatX80RoundPrec user_rnd_prec;
232 
233     int32_t compact, j, k;
234     floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
235 
236     aSig = extractFloatx80Frac(a);
237     aExp = extractFloatx80Exp(a);
238     aSign = extractFloatx80Sign(a);
239 
240     if (aExp == 0x7FFF) {
241         if ((uint64_t) (aSig << 1)) {
242             propagateFloatx80NaNOneArg(a, status);
243         }
244         if (aSign) {
245             float_raise(float_flag_invalid, status);
246             return floatx80_default_nan(status);
247         }
248         return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low);
249     }
250 
251     if (aExp == 0 && aSig == 0) {
252         return packFloatx80(aSign, 0, 0);
253     }
254 
255     if (aSign && aExp >= one_exp) {
256         if (aExp == one_exp && aSig == one_sig) {
257             float_raise(float_flag_divbyzero, status);
258             return packFloatx80(aSign, floatx80_infinity.high,
259                                 floatx80_infinity.low);
260         }
261         float_raise(float_flag_invalid, status);
262         return floatx80_default_nan(status);
263     }
264 
265     if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) {
266         /* <= min threshold */
267         float_raise(float_flag_inexact, status);
268         return floatx80_move(a, status);
269     }
270 
271     user_rnd_mode = status->float_rounding_mode;
272     user_rnd_prec = status->floatx80_rounding_precision;
273     status->float_rounding_mode = float_round_nearest_even;
274     status->floatx80_rounding_precision = floatx80_precision_x;
275 
276     compact = floatx80_make_compact(aExp, aSig);
277 
278     fp0 = a; /* Z */
279     fp1 = a;
280 
281     fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
282                        status), status); /* X = (1+Z) */
283 
284     aExp = extractFloatx80Exp(fp0);
285     aSig = extractFloatx80Frac(fp0);
286 
287     compact = floatx80_make_compact(aExp, aSig);
288 
289     if (compact < 0x3FFE8000 || compact > 0x3FFFC000) {
290         /* |X| < 1/2 or |X| > 3/2 */
291         k = aExp - 0x3FFF;
292         fp1 = int32_to_floatx80(k, status);
293 
294         fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000);
295         j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
296 
297         f = packFloatx80(0, 0x3FFF, fSig); /* F */
298         fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */
299 
300         fp0 = floatx80_sub(fp0, f, status); /* Y-F */
301 
302     lp1cont1:
303         /* LP1CONT1 */
304         fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */
305         logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC));
306         klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */
307         fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */
308 
309         fp3 = fp2;
310         fp1 = fp2;
311 
312         fp1 = floatx80_mul(fp1, float64_to_floatx80(
313                            make_float64(0x3FC2499AB5E4040B), status),
314                            status); /* V*A6 */
315         fp2 = floatx80_mul(fp2, float64_to_floatx80(
316                            make_float64(0xBFC555B5848CB7DB), status),
317                            status); /* V*A5 */
318         fp1 = floatx80_add(fp1, float64_to_floatx80(
319                            make_float64(0x3FC99999987D8730), status),
320                            status); /* A4+V*A6 */
321         fp2 = floatx80_add(fp2, float64_to_floatx80(
322                            make_float64(0xBFCFFFFFFF6F7E97), status),
323                            status); /* A3+V*A5 */
324         fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */
325         fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */
326         fp1 = floatx80_add(fp1, float64_to_floatx80(
327                            make_float64(0x3FD55555555555A4), status),
328                            status); /* A2+V*(A4+V*A6) */
329         fp2 = floatx80_add(fp2, float64_to_floatx80(
330                            make_float64(0xBFE0000000000008), status),
331                            status); /* A1+V*(A3+V*A5) */
332         fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */
333         fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */
334         fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */
335         fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */
336 
337         fp1 = floatx80_add(fp1, log_tbl[j + 1],
338                            status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */
339         fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */
340 
341         status->float_rounding_mode = user_rnd_mode;
342         status->floatx80_rounding_precision = user_rnd_prec;
343 
344         a = floatx80_add(fp0, klog2, status);
345 
346         float_raise(float_flag_inexact, status);
347 
348         return a;
349     } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) {
350         /* |X| < 1/16 or |X| > -1/16 */
351         /* LP1CARE */
352         fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000);
353         f = packFloatx80(0, 0x3FFF, fSig); /* F */
354         j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
355 
356         if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */
357             /* KISZERO */
358             fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000),
359                                status), f, status); /* 1-F */
360             fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */
361             fp1 = packFloatx80(0, 0, 0); /* K = 0 */
362         } else {
363             /* KISNEG */
364             fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000),
365                                status), f, status); /* 2-F */
366             fp1 = floatx80_add(fp1, fp1, status); /* 2Z */
367             fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */
368             fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */
369         }
370         goto lp1cont1;
371     } else {
372         /* LP1ONE16 */
373         fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */
374         fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
375                            status), status); /* FP0 IS 1+X */
376 
377         /* LP1CONT2 */
378         fp1 = floatx80_div(fp1, fp0, status); /* U */
379         saveu = fp1;
380         fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */
381         fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */
382 
383         fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6),
384                                   status); /* B5 */
385         fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0),
386                                   status); /* B4 */
387         fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */
388         fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */
389         fp3 = floatx80_add(fp3, float64_to_floatx80(
390                            make_float64(0x3F624924928BCCFF), status),
391                            status); /* B3+W*B5 */
392         fp2 = floatx80_add(fp2, float64_to_floatx80(
393                            make_float64(0x3F899999999995EC), status),
394                            status); /* B2+W*B4 */
395         fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */
396         fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */
397         fp1 = floatx80_add(fp1, float64_to_floatx80(
398                            make_float64(0x3FB5555555555555), status),
399                            status); /* B1+W*(B3+W*B5) */
400 
401         fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */
402         fp1 = floatx80_add(fp1, fp2,
403                            status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */
404         fp0 = floatx80_mul(fp0, fp1,
405                            status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */
406 
407         status->float_rounding_mode = user_rnd_mode;
408         status->floatx80_rounding_precision = user_rnd_prec;
409 
410         a = floatx80_add(fp0, saveu, status);
411 
412         /*if (!floatx80_is_zero(a)) { */
413             float_raise(float_flag_inexact, status);
414         /*} */
415 
416         return a;
417     }
418 }
419 
420 /*
421  * Log base e
422  */
423 
floatx80_logn(floatx80 a,float_status * status)424 floatx80 floatx80_logn(floatx80 a, float_status *status)
425 {
426     bool aSign;
427     int32_t aExp;
428     uint64_t aSig, fSig;
429 
430     FloatRoundMode user_rnd_mode;
431     FloatX80RoundPrec user_rnd_prec;
432 
433     int32_t compact, j, k, adjk;
434     floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
435 
436     aSig = extractFloatx80Frac(a);
437     aExp = extractFloatx80Exp(a);
438     aSign = extractFloatx80Sign(a);
439 
440     if (aExp == 0x7FFF) {
441         if ((uint64_t) (aSig << 1)) {
442             propagateFloatx80NaNOneArg(a, status);
443         }
444         if (aSign == 0) {
445             return packFloatx80(0, floatx80_infinity.high,
446                                 floatx80_infinity.low);
447         }
448     }
449 
450     adjk = 0;
451 
452     if (aExp == 0) {
453         if (aSig == 0) { /* zero */
454             float_raise(float_flag_divbyzero, status);
455             return packFloatx80(1, floatx80_infinity.high,
456                                 floatx80_infinity.low);
457         }
458         if ((aSig & one_sig) == 0) { /* denormal */
459             normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
460             adjk = -100;
461             aExp += 100;
462             a = packFloatx80(aSign, aExp, aSig);
463         }
464     }
465 
466     if (aSign) {
467         float_raise(float_flag_invalid, status);
468         return floatx80_default_nan(status);
469     }
470 
471     user_rnd_mode = status->float_rounding_mode;
472     user_rnd_prec = status->floatx80_rounding_precision;
473     status->float_rounding_mode = float_round_nearest_even;
474     status->floatx80_rounding_precision = floatx80_precision_x;
475 
476     compact = floatx80_make_compact(aExp, aSig);
477 
478     if (compact < 0x3FFEF07D || compact > 0x3FFF8841) {
479         /* |X| < 15/16 or |X| > 17/16 */
480         k = aExp - 0x3FFF;
481         k += adjk;
482         fp1 = int32_to_floatx80(k, status);
483 
484         fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000);
485         j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
486 
487         f = packFloatx80(0, 0x3FFF, fSig); /* F */
488         fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */
489 
490         fp0 = floatx80_sub(fp0, f, status); /* Y-F */
491 
492         /* LP1CONT1 */
493         fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */
494         logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC));
495         klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */
496         fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */
497 
498         fp3 = fp2;
499         fp1 = fp2;
500 
501         fp1 = floatx80_mul(fp1, float64_to_floatx80(
502                            make_float64(0x3FC2499AB5E4040B), status),
503                            status); /* V*A6 */
504         fp2 = floatx80_mul(fp2, float64_to_floatx80(
505                            make_float64(0xBFC555B5848CB7DB), status),
506                            status); /* V*A5 */
507         fp1 = floatx80_add(fp1, float64_to_floatx80(
508                            make_float64(0x3FC99999987D8730), status),
509                            status); /* A4+V*A6 */
510         fp2 = floatx80_add(fp2, float64_to_floatx80(
511                            make_float64(0xBFCFFFFFFF6F7E97), status),
512                            status); /* A3+V*A5 */
513         fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */
514         fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */
515         fp1 = floatx80_add(fp1, float64_to_floatx80(
516                            make_float64(0x3FD55555555555A4), status),
517                            status); /* A2+V*(A4+V*A6) */
518         fp2 = floatx80_add(fp2, float64_to_floatx80(
519                            make_float64(0xBFE0000000000008), status),
520                            status); /* A1+V*(A3+V*A5) */
521         fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */
522         fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */
523         fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */
524         fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */
525 
526         fp1 = floatx80_add(fp1, log_tbl[j + 1],
527                            status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */
528         fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */
529 
530         status->float_rounding_mode = user_rnd_mode;
531         status->floatx80_rounding_precision = user_rnd_prec;
532 
533         a = floatx80_add(fp0, klog2, status);
534 
535         float_raise(float_flag_inexact, status);
536 
537         return a;
538     } else { /* |X-1| >= 1/16 */
539         fp0 = a;
540         fp1 = a;
541         fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000),
542                            status), status); /* FP1 IS X-1 */
543         fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
544                            status), status); /* FP0 IS X+1 */
545         fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */
546 
547         /* LP1CONT2 */
548         fp1 = floatx80_div(fp1, fp0, status); /* U */
549         saveu = fp1;
550         fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */
551         fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */
552 
553         fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6),
554                                   status); /* B5 */
555         fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0),
556                                   status); /* B4 */
557         fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */
558         fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */
559         fp3 = floatx80_add(fp3, float64_to_floatx80(
560                            make_float64(0x3F624924928BCCFF), status),
561                            status); /* B3+W*B5 */
562         fp2 = floatx80_add(fp2, float64_to_floatx80(
563                            make_float64(0x3F899999999995EC), status),
564                            status); /* B2+W*B4 */
565         fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */
566         fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */
567         fp1 = floatx80_add(fp1, float64_to_floatx80(
568                            make_float64(0x3FB5555555555555), status),
569                            status); /* B1+W*(B3+W*B5) */
570 
571         fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */
572         fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */
573         fp0 = floatx80_mul(fp0, fp1,
574                            status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */
575 
576         status->float_rounding_mode = user_rnd_mode;
577         status->floatx80_rounding_precision = user_rnd_prec;
578 
579         a = floatx80_add(fp0, saveu, status);
580 
581         /*if (!floatx80_is_zero(a)) { */
582             float_raise(float_flag_inexact, status);
583         /*} */
584 
585         return a;
586     }
587 }
588 
589 /*
590  * Log base 10
591  */
592 
floatx80_log10(floatx80 a,float_status * status)593 floatx80 floatx80_log10(floatx80 a, float_status *status)
594 {
595     bool aSign;
596     int32_t aExp;
597     uint64_t aSig;
598 
599     FloatRoundMode user_rnd_mode;
600     FloatX80RoundPrec user_rnd_prec;
601 
602     floatx80 fp0, fp1;
603 
604     aSig = extractFloatx80Frac(a);
605     aExp = extractFloatx80Exp(a);
606     aSign = extractFloatx80Sign(a);
607 
608     if (aExp == 0x7FFF) {
609         if ((uint64_t) (aSig << 1)) {
610             propagateFloatx80NaNOneArg(a, status);
611         }
612         if (aSign == 0) {
613             return packFloatx80(0, floatx80_infinity.high,
614                                 floatx80_infinity.low);
615         }
616     }
617 
618     if (aExp == 0 && aSig == 0) {
619         float_raise(float_flag_divbyzero, status);
620         return packFloatx80(1, floatx80_infinity.high,
621                             floatx80_infinity.low);
622     }
623 
624     if (aSign) {
625         float_raise(float_flag_invalid, status);
626         return floatx80_default_nan(status);
627     }
628 
629     user_rnd_mode = status->float_rounding_mode;
630     user_rnd_prec = status->floatx80_rounding_precision;
631     status->float_rounding_mode = float_round_nearest_even;
632     status->floatx80_rounding_precision = floatx80_precision_x;
633 
634     fp0 = floatx80_logn(a, status);
635     fp1 = packFloatx80(0, 0x3FFD, UINT64_C(0xDE5BD8A937287195)); /* INV_L10 */
636 
637     status->float_rounding_mode = user_rnd_mode;
638     status->floatx80_rounding_precision = user_rnd_prec;
639 
640     a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */
641 
642     float_raise(float_flag_inexact, status);
643 
644     return a;
645 }
646 
647 /*
648  * Log base 2
649  */
650 
floatx80_log2(floatx80 a,float_status * status)651 floatx80 floatx80_log2(floatx80 a, float_status *status)
652 {
653     bool aSign;
654     int32_t aExp;
655     uint64_t aSig;
656 
657     FloatRoundMode user_rnd_mode;
658     FloatX80RoundPrec user_rnd_prec;
659 
660     floatx80 fp0, fp1;
661 
662     aSig = extractFloatx80Frac(a);
663     aExp = extractFloatx80Exp(a);
664     aSign = extractFloatx80Sign(a);
665 
666     if (aExp == 0x7FFF) {
667         if ((uint64_t) (aSig << 1)) {
668             propagateFloatx80NaNOneArg(a, status);
669         }
670         if (aSign == 0) {
671             return packFloatx80(0, floatx80_infinity.high,
672                                 floatx80_infinity.low);
673         }
674     }
675 
676     if (aExp == 0) {
677         if (aSig == 0) {
678             float_raise(float_flag_divbyzero, status);
679             return packFloatx80(1, floatx80_infinity.high,
680                                 floatx80_infinity.low);
681         }
682         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
683     }
684 
685     if (aSign) {
686         float_raise(float_flag_invalid, status);
687         return floatx80_default_nan(status);
688     }
689 
690     user_rnd_mode = status->float_rounding_mode;
691     user_rnd_prec = status->floatx80_rounding_precision;
692     status->float_rounding_mode = float_round_nearest_even;
693     status->floatx80_rounding_precision = floatx80_precision_x;
694 
695     if (aSig == one_sig) { /* X is 2^k */
696         status->float_rounding_mode = user_rnd_mode;
697         status->floatx80_rounding_precision = user_rnd_prec;
698 
699         a = int32_to_floatx80(aExp - 0x3FFF, status);
700     } else {
701         fp0 = floatx80_logn(a, status);
702         fp1 = packFloatx80(0, 0x3FFF, UINT64_C(0xB8AA3B295C17F0BC)); /* INV_L2 */
703 
704         status->float_rounding_mode = user_rnd_mode;
705         status->floatx80_rounding_precision = user_rnd_prec;
706 
707         a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */
708     }
709 
710     float_raise(float_flag_inexact, status);
711 
712     return a;
713 }
714 
715 /*
716  * e to x
717  */
718 
floatx80_etox(floatx80 a,float_status * status)719 floatx80 floatx80_etox(floatx80 a, float_status *status)
720 {
721     bool aSign;
722     int32_t aExp;
723     uint64_t aSig;
724 
725     FloatRoundMode user_rnd_mode;
726     FloatX80RoundPrec user_rnd_prec;
727 
728     int32_t compact, n, j, k, m, m1;
729     floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale;
730     bool adjflag;
731 
732     aSig = extractFloatx80Frac(a);
733     aExp = extractFloatx80Exp(a);
734     aSign = extractFloatx80Sign(a);
735 
736     if (aExp == 0x7FFF) {
737         if ((uint64_t) (aSig << 1)) {
738             return propagateFloatx80NaNOneArg(a, status);
739         }
740         if (aSign) {
741             return packFloatx80(0, 0, 0);
742         }
743         return packFloatx80(0, floatx80_infinity.high,
744                             floatx80_infinity.low);
745     }
746 
747     if (aExp == 0 && aSig == 0) {
748         return packFloatx80(0, one_exp, one_sig);
749     }
750 
751     user_rnd_mode = status->float_rounding_mode;
752     user_rnd_prec = status->floatx80_rounding_precision;
753     status->float_rounding_mode = float_round_nearest_even;
754     status->floatx80_rounding_precision = floatx80_precision_x;
755 
756     adjflag = 0;
757 
758     if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */
759         compact = floatx80_make_compact(aExp, aSig);
760 
761         if (compact < 0x400CB167) { /* |X| < 16380 log2 */
762             fp0 = a;
763             fp1 = a;
764             fp0 = floatx80_mul(fp0, float32_to_floatx80(
765                                make_float32(0x42B8AA3B), status),
766                                status); /* 64/log2 * X */
767             adjflag = 0;
768             n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
769             fp0 = int32_to_floatx80(n, status);
770 
771             j = n & 0x3F; /* J = N mod 64 */
772             m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
773             if (n < 0 && j) {
774                 /*
775                  * arithmetic right shift is division and
776                  * round towards minus infinity
777                  */
778                 m--;
779             }
780             m += 0x3FFF; /* biased exponent of 2^(M) */
781 
782         expcont1:
783             fp2 = fp0; /* N */
784             fp0 = floatx80_mul(fp0, float32_to_floatx80(
785                                make_float32(0xBC317218), status),
786                                status); /* N * L1, L1 = lead(-log2/64) */
787             l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6));
788             fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */
789             fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */
790             fp0 = floatx80_add(fp0, fp2, status); /* R */
791 
792             fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
793             fp2 = float32_to_floatx80(make_float32(0x3AB60B70),
794                                       status); /* A5 */
795             fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */
796             fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895),
797                                status), fp1,
798                                status); /* fp3 is S*A4 */
799             fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64(
800                                0x3FA5555555554431), status),
801                                status); /* fp2 is A3+S*A5 */
802             fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64(
803                                0x3FC5555555554018), status),
804                                status); /* fp3 is A2+S*A4 */
805             fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */
806             fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */
807             fp2 = floatx80_add(fp2, float32_to_floatx80(
808                                make_float32(0x3F000000), status),
809                                status); /* fp2 is A1+S*(A3+S*A5) */
810             fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */
811             fp2 = floatx80_mul(fp2, fp1,
812                                status); /* fp2 IS S*(A1+S*(A3+S*A5)) */
813             fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */
814             fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */
815 
816             fp1 = exp_tbl[j];
817             fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */
818             fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status),
819                                status); /* accurate 2^(J/64) */
820             fp0 = floatx80_add(fp0, fp1,
821                                status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */
822 
823             scale = packFloatx80(0, m, one_sig);
824             if (adjflag) {
825                 adjscale = packFloatx80(0, m1, one_sig);
826                 fp0 = floatx80_mul(fp0, adjscale, status);
827             }
828 
829             status->float_rounding_mode = user_rnd_mode;
830             status->floatx80_rounding_precision = user_rnd_prec;
831 
832             a = floatx80_mul(fp0, scale, status);
833 
834             float_raise(float_flag_inexact, status);
835 
836             return a;
837         } else { /* |X| >= 16380 log2 */
838             if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */
839                 status->float_rounding_mode = user_rnd_mode;
840                 status->floatx80_rounding_precision = user_rnd_prec;
841                 if (aSign) {
842                     a = roundAndPackFloatx80(
843                                            status->floatx80_rounding_precision,
844                                            0, -0x1000, aSig, 0, status);
845                 } else {
846                     a = roundAndPackFloatx80(
847                                            status->floatx80_rounding_precision,
848                                            0, 0x8000, aSig, 0, status);
849                 }
850                 float_raise(float_flag_inexact, status);
851 
852                 return a;
853             } else {
854                 fp0 = a;
855                 fp1 = a;
856                 fp0 = floatx80_mul(fp0, float32_to_floatx80(
857                                    make_float32(0x42B8AA3B), status),
858                                    status); /* 64/log2 * X */
859                 adjflag = 1;
860                 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
861                 fp0 = int32_to_floatx80(n, status);
862 
863                 j = n & 0x3F; /* J = N mod 64 */
864                 /* NOTE: this is really arithmetic right shift by 6 */
865                 k = n / 64;
866                 if (n < 0 && j) {
867                     /* arithmetic right shift is division and
868                      * round towards minus infinity
869                      */
870                     k--;
871                 }
872                 /* NOTE: this is really arithmetic right shift by 1 */
873                 m1 = k / 2;
874                 if (k < 0 && (k & 1)) {
875                     /* arithmetic right shift is division and
876                      * round towards minus infinity
877                      */
878                     m1--;
879                 }
880                 m = k - m1;
881                 m1 += 0x3FFF; /* biased exponent of 2^(M1) */
882                 m += 0x3FFF; /* biased exponent of 2^(M) */
883 
884                 goto expcont1;
885             }
886         }
887     } else { /* |X| < 2^(-65) */
888         status->float_rounding_mode = user_rnd_mode;
889         status->floatx80_rounding_precision = user_rnd_prec;
890 
891         a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000),
892                          status), status); /* 1 + X */
893 
894         float_raise(float_flag_inexact, status);
895 
896         return a;
897     }
898 }
899 
900 /*
901  * 2 to x
902  */
903 
floatx80_twotox(floatx80 a,float_status * status)904 floatx80 floatx80_twotox(floatx80 a, float_status *status)
905 {
906     bool aSign;
907     int32_t aExp;
908     uint64_t aSig;
909 
910     FloatRoundMode user_rnd_mode;
911     FloatX80RoundPrec user_rnd_prec;
912 
913     int32_t compact, n, j, l, m, m1;
914     floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
915 
916     aSig = extractFloatx80Frac(a);
917     aExp = extractFloatx80Exp(a);
918     aSign = extractFloatx80Sign(a);
919 
920     if (aExp == 0x7FFF) {
921         if ((uint64_t) (aSig << 1)) {
922             return propagateFloatx80NaNOneArg(a, status);
923         }
924         if (aSign) {
925             return packFloatx80(0, 0, 0);
926         }
927         return packFloatx80(0, floatx80_infinity.high,
928                             floatx80_infinity.low);
929     }
930 
931     if (aExp == 0 && aSig == 0) {
932         return packFloatx80(0, one_exp, one_sig);
933     }
934 
935     user_rnd_mode = status->float_rounding_mode;
936     user_rnd_prec = status->floatx80_rounding_precision;
937     status->float_rounding_mode = float_round_nearest_even;
938     status->floatx80_rounding_precision = floatx80_precision_x;
939 
940     fp0 = a;
941 
942     compact = floatx80_make_compact(aExp, aSig);
943 
944     if (compact < 0x3FB98000 || compact > 0x400D80C0) {
945         /* |X| > 16480 or |X| < 2^(-70) */
946         if (compact > 0x3FFF8000) { /* |X| > 16480 */
947             status->float_rounding_mode = user_rnd_mode;
948             status->floatx80_rounding_precision = user_rnd_prec;
949 
950             if (aSign) {
951                 return roundAndPackFloatx80(status->floatx80_rounding_precision,
952                                             0, -0x1000, aSig, 0, status);
953             } else {
954                 return roundAndPackFloatx80(status->floatx80_rounding_precision,
955                                             0, 0x8000, aSig, 0, status);
956             }
957         } else { /* |X| < 2^(-70) */
958             status->float_rounding_mode = user_rnd_mode;
959             status->floatx80_rounding_precision = user_rnd_prec;
960 
961             a = floatx80_add(fp0, float32_to_floatx80(
962                              make_float32(0x3F800000), status),
963                              status); /* 1 + X */
964 
965             float_raise(float_flag_inexact, status);
966 
967             return a;
968         }
969     } else { /* 2^(-70) <= |X| <= 16480 */
970         fp1 = fp0; /* X */
971         fp1 = floatx80_mul(fp1, float32_to_floatx80(
972                            make_float32(0x42800000), status),
973                            status); /* X * 64 */
974         n = floatx80_to_int32(fp1, status);
975         fp1 = int32_to_floatx80(n, status);
976         j = n & 0x3F;
977         l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
978         if (n < 0 && j) {
979             /*
980              * arithmetic right shift is division and
981              * round towards minus infinity
982              */
983             l--;
984         }
985         m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */
986         if (l < 0 && (l & 1)) {
987             /*
988              * arithmetic right shift is division and
989              * round towards minus infinity
990              */
991             m--;
992         }
993         m1 = l - m;
994         m1 += 0x3FFF; /* ADJFACT IS 2^(M') */
995 
996         adjfact = packFloatx80(0, m1, one_sig);
997         fact1 = exp2_tbl[j];
998         fact1.high += m;
999         fact2.high = exp2_tbl2[j] >> 16;
1000         fact2.high += m;
1001         fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF);
1002         fact2.low <<= 48;
1003 
1004         fp1 = floatx80_mul(fp1, float32_to_floatx80(
1005                            make_float32(0x3C800000), status),
1006                            status); /* (1/64)*N */
1007         fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */
1008         fp2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); /* LOG2 */
1009         fp0 = floatx80_mul(fp0, fp2, status); /* R */
1010 
1011         /* EXPR */
1012         fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1013         fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2),
1014                                   status); /* A5 */
1015         fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C),
1016                                   status); /* A4 */
1017         fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */
1018         fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */
1019         fp2 = floatx80_add(fp2, float64_to_floatx80(
1020                            make_float64(0x3FA5555555554CC1), status),
1021                            status); /* A3+S*A5 */
1022         fp3 = floatx80_add(fp3, float64_to_floatx80(
1023                            make_float64(0x3FC5555555554A54), status),
1024                            status); /* A2+S*A4 */
1025         fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */
1026         fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */
1027         fp2 = floatx80_add(fp2, float64_to_floatx80(
1028                            make_float64(0x3FE0000000000000), status),
1029                            status); /* A1+S*(A3+S*A5) */
1030         fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */
1031 
1032         fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */
1033         fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */
1034         fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */
1035 
1036         fp0 = floatx80_mul(fp0, fact1, status);
1037         fp0 = floatx80_add(fp0, fact2, status);
1038         fp0 = floatx80_add(fp0, fact1, status);
1039 
1040         status->float_rounding_mode = user_rnd_mode;
1041         status->floatx80_rounding_precision = user_rnd_prec;
1042 
1043         a = floatx80_mul(fp0, adjfact, status);
1044 
1045         float_raise(float_flag_inexact, status);
1046 
1047         return a;
1048     }
1049 }
1050 
1051 /*
1052  * 10 to x
1053  */
1054 
floatx80_tentox(floatx80 a,float_status * status)1055 floatx80 floatx80_tentox(floatx80 a, float_status *status)
1056 {
1057     bool aSign;
1058     int32_t aExp;
1059     uint64_t aSig;
1060 
1061     FloatRoundMode user_rnd_mode;
1062     FloatX80RoundPrec user_rnd_prec;
1063 
1064     int32_t compact, n, j, l, m, m1;
1065     floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
1066 
1067     aSig = extractFloatx80Frac(a);
1068     aExp = extractFloatx80Exp(a);
1069     aSign = extractFloatx80Sign(a);
1070 
1071     if (aExp == 0x7FFF) {
1072         if ((uint64_t) (aSig << 1)) {
1073             return propagateFloatx80NaNOneArg(a, status);
1074         }
1075         if (aSign) {
1076             return packFloatx80(0, 0, 0);
1077         }
1078         return packFloatx80(0, floatx80_infinity.high,
1079                             floatx80_infinity.low);
1080     }
1081 
1082     if (aExp == 0 && aSig == 0) {
1083         return packFloatx80(0, one_exp, one_sig);
1084     }
1085 
1086     user_rnd_mode = status->float_rounding_mode;
1087     user_rnd_prec = status->floatx80_rounding_precision;
1088     status->float_rounding_mode = float_round_nearest_even;
1089     status->floatx80_rounding_precision = floatx80_precision_x;
1090 
1091     fp0 = a;
1092 
1093     compact = floatx80_make_compact(aExp, aSig);
1094 
1095     if (compact < 0x3FB98000 || compact > 0x400B9B07) {
1096         /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */
1097         if (compact > 0x3FFF8000) { /* |X| > 16480 */
1098             status->float_rounding_mode = user_rnd_mode;
1099             status->floatx80_rounding_precision = user_rnd_prec;
1100 
1101             if (aSign) {
1102                 return roundAndPackFloatx80(status->floatx80_rounding_precision,
1103                                             0, -0x1000, aSig, 0, status);
1104             } else {
1105                 return roundAndPackFloatx80(status->floatx80_rounding_precision,
1106                                             0, 0x8000, aSig, 0, status);
1107             }
1108         } else { /* |X| < 2^(-70) */
1109             status->float_rounding_mode = user_rnd_mode;
1110             status->floatx80_rounding_precision = user_rnd_prec;
1111 
1112             a = floatx80_add(fp0, float32_to_floatx80(
1113                              make_float32(0x3F800000), status),
1114                              status); /* 1 + X */
1115 
1116             float_raise(float_flag_inexact, status);
1117 
1118             return a;
1119         }
1120     } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */
1121         fp1 = fp0; /* X */
1122         fp1 = floatx80_mul(fp1, float64_to_floatx80(
1123                            make_float64(0x406A934F0979A371),
1124                            status), status); /* X*64*LOG10/LOG2 */
1125         n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */
1126         fp1 = int32_to_floatx80(n, status);
1127 
1128         j = n & 0x3F;
1129         l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
1130         if (n < 0 && j) {
1131             /*
1132              * arithmetic right shift is division and
1133              * round towards minus infinity
1134              */
1135             l--;
1136         }
1137         m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */
1138         if (l < 0 && (l & 1)) {
1139             /*
1140              * arithmetic right shift is division and
1141              * round towards minus infinity
1142              */
1143             m--;
1144         }
1145         m1 = l - m;
1146         m1 += 0x3FFF; /* ADJFACT IS 2^(M') */
1147 
1148         adjfact = packFloatx80(0, m1, one_sig);
1149         fact1 = exp2_tbl[j];
1150         fact1.high += m;
1151         fact2.high = exp2_tbl2[j] >> 16;
1152         fact2.high += m;
1153         fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF);
1154         fact2.low <<= 48;
1155 
1156         fp2 = fp1; /* N */
1157         fp1 = floatx80_mul(fp1, float64_to_floatx80(
1158                            make_float64(0x3F734413509F8000), status),
1159                            status); /* N*(LOG2/64LOG10)_LEAD */
1160         fp3 = packFloatx80(1, 0x3FCD, UINT64_C(0xC0219DC1DA994FD2));
1161         fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */
1162         fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */
1163         fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */
1164         fp2 = packFloatx80(0, 0x4000, UINT64_C(0x935D8DDDAAA8AC17)); /* LOG10 */
1165         fp0 = floatx80_mul(fp0, fp2, status); /* R */
1166 
1167         /* EXPR */
1168         fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1169         fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2),
1170                                   status); /* A5 */
1171         fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C),
1172                                   status); /* A4 */
1173         fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */
1174         fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */
1175         fp2 = floatx80_add(fp2, float64_to_floatx80(
1176                            make_float64(0x3FA5555555554CC1), status),
1177                            status); /* A3+S*A5 */
1178         fp3 = floatx80_add(fp3, float64_to_floatx80(
1179                            make_float64(0x3FC5555555554A54), status),
1180                            status); /* A2+S*A4 */
1181         fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */
1182         fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */
1183         fp2 = floatx80_add(fp2, float64_to_floatx80(
1184                            make_float64(0x3FE0000000000000), status),
1185                            status); /* A1+S*(A3+S*A5) */
1186         fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */
1187 
1188         fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */
1189         fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */
1190         fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */
1191 
1192         fp0 = floatx80_mul(fp0, fact1, status);
1193         fp0 = floatx80_add(fp0, fact2, status);
1194         fp0 = floatx80_add(fp0, fact1, status);
1195 
1196         status->float_rounding_mode = user_rnd_mode;
1197         status->floatx80_rounding_precision = user_rnd_prec;
1198 
1199         a = floatx80_mul(fp0, adjfact, status);
1200 
1201         float_raise(float_flag_inexact, status);
1202 
1203         return a;
1204     }
1205 }
1206 
1207 /*
1208  * Tangent
1209  */
1210 
floatx80_tan(floatx80 a,float_status * status)1211 floatx80 floatx80_tan(floatx80 a, float_status *status)
1212 {
1213     bool aSign, xSign;
1214     int32_t aExp, xExp;
1215     uint64_t aSig, xSig;
1216 
1217     FloatRoundMode user_rnd_mode;
1218     FloatX80RoundPrec user_rnd_prec;
1219 
1220     int32_t compact, l, n, j;
1221     floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2;
1222     float32 twoto63;
1223     bool endflag;
1224 
1225     aSig = extractFloatx80Frac(a);
1226     aExp = extractFloatx80Exp(a);
1227     aSign = extractFloatx80Sign(a);
1228 
1229     if (aExp == 0x7FFF) {
1230         if ((uint64_t) (aSig << 1)) {
1231             return propagateFloatx80NaNOneArg(a, status);
1232         }
1233         float_raise(float_flag_invalid, status);
1234         return floatx80_default_nan(status);
1235     }
1236 
1237     if (aExp == 0 && aSig == 0) {
1238         return packFloatx80(aSign, 0, 0);
1239     }
1240 
1241     user_rnd_mode = status->float_rounding_mode;
1242     user_rnd_prec = status->floatx80_rounding_precision;
1243     status->float_rounding_mode = float_round_nearest_even;
1244     status->floatx80_rounding_precision = floatx80_precision_x;
1245 
1246     compact = floatx80_make_compact(aExp, aSig);
1247 
1248     fp0 = a;
1249 
1250     if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
1251         /* 2^(-40) > |X| > 15 PI */
1252         if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
1253             /* REDUCEX */
1254             fp1 = packFloatx80(0, 0, 0);
1255             if (compact == 0x7FFEFFFF) {
1256                 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
1257                                       UINT64_C(0xC90FDAA200000000));
1258                 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
1259                                       UINT64_C(0x85A308D300000000));
1260                 fp0 = floatx80_add(fp0, twopi1, status);
1261                 fp1 = fp0;
1262                 fp0 = floatx80_add(fp0, twopi2, status);
1263                 fp1 = floatx80_sub(fp1, fp0, status);
1264                 fp1 = floatx80_add(fp1, twopi2, status);
1265             }
1266         loop:
1267             xSign = extractFloatx80Sign(fp0);
1268             xExp = extractFloatx80Exp(fp0);
1269             xExp -= 0x3FFF;
1270             if (xExp <= 28) {
1271                 l = 0;
1272                 endflag = true;
1273             } else {
1274                 l = xExp - 27;
1275                 endflag = false;
1276             }
1277             invtwopi = packFloatx80(0, 0x3FFE - l,
1278                                     UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
1279             twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000));
1280             twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000));
1281 
1282             /* SIGN(INARG)*2^63 IN SGL */
1283             twoto63 = packFloat32(xSign, 0xBE, 0);
1284 
1285             fp2 = floatx80_mul(fp0, invtwopi, status);
1286             fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
1287                                status); /* THE FRACT PART OF FP2 IS ROUNDED */
1288             fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
1289                                status); /* FP2 is N */
1290             fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
1291             fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
1292             fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
1293             fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
1294             fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
1295             fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
1296             fp3 = fp0; /* FP3 is A */
1297             fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
1298             fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
1299 
1300             if (endflag) {
1301                 n = floatx80_to_int32(fp2, status);
1302                 goto tancont;
1303             }
1304             fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
1305             fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
1306             goto loop;
1307         } else {
1308             status->float_rounding_mode = user_rnd_mode;
1309             status->floatx80_rounding_precision = user_rnd_prec;
1310 
1311             a = floatx80_move(a, status);
1312 
1313             float_raise(float_flag_inexact, status);
1314 
1315             return a;
1316         }
1317     } else {
1318         fp1 = floatx80_mul(fp0, float64_to_floatx80(
1319                            make_float64(0x3FE45F306DC9C883), status),
1320                            status); /* X*2/PI */
1321 
1322         n = floatx80_to_int32(fp1, status);
1323         j = 32 + n;
1324 
1325         fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
1326         fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
1327                            status); /* FP0 IS R = (X-Y1)-Y2 */
1328 
1329     tancont:
1330         if (n & 1) {
1331             /* NODD */
1332             fp1 = fp0; /* R */
1333             fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1334             fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688),
1335                                       status); /* Q4 */
1336             fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04),
1337                                       status); /* P3 */
1338             fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */
1339             fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */
1340             fp3 = floatx80_add(fp3, float64_to_floatx80(
1341                                make_float64(0xBF346F59B39BA65F), status),
1342                                status); /* Q3+SQ4 */
1343             fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00));
1344             fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */
1345             fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */
1346             fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */
1347             fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1));
1348             fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */
1349             fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA));
1350             fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */
1351             fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */
1352             fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */
1353             fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE));
1354             fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */
1355             fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */
1356             fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */
1357             fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */
1358             fp0 = floatx80_add(fp0, float32_to_floatx80(
1359                                make_float32(0x3F800000), status),
1360                                status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */
1361 
1362             xSign = extractFloatx80Sign(fp1);
1363             xExp = extractFloatx80Exp(fp1);
1364             xSig = extractFloatx80Frac(fp1);
1365             xSign ^= 1;
1366             fp1 = packFloatx80(xSign, xExp, xSig);
1367 
1368             status->float_rounding_mode = user_rnd_mode;
1369             status->floatx80_rounding_precision = user_rnd_prec;
1370 
1371             a = floatx80_div(fp0, fp1, status);
1372 
1373             float_raise(float_flag_inexact, status);
1374 
1375             return a;
1376         } else {
1377             fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1378             fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688),
1379                                       status); /* Q4 */
1380             fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04),
1381                                       status); /* P3 */
1382             fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */
1383             fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */
1384             fp3 = floatx80_add(fp3, float64_to_floatx80(
1385                                make_float64(0xBF346F59B39BA65F), status),
1386                                status); /* Q3+SQ4 */
1387             fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00));
1388             fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */
1389             fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */
1390             fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */
1391             fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1));
1392             fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */
1393             fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA));
1394             fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */
1395             fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */
1396             fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */
1397             fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE));
1398             fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */
1399             fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */
1400             fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */
1401             fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */
1402             fp1 = floatx80_add(fp1, float32_to_floatx80(
1403                                make_float32(0x3F800000), status),
1404                                status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */
1405 
1406             status->float_rounding_mode = user_rnd_mode;
1407             status->floatx80_rounding_precision = user_rnd_prec;
1408 
1409             a = floatx80_div(fp0, fp1, status);
1410 
1411             float_raise(float_flag_inexact, status);
1412 
1413             return a;
1414         }
1415     }
1416 }
1417 
1418 /*
1419  * Sine
1420  */
1421 
floatx80_sin(floatx80 a,float_status * status)1422 floatx80 floatx80_sin(floatx80 a, float_status *status)
1423 {
1424     bool aSign, xSign;
1425     int32_t aExp, xExp;
1426     uint64_t aSig, xSig;
1427 
1428     FloatRoundMode user_rnd_mode;
1429     FloatX80RoundPrec user_rnd_prec;
1430 
1431     int32_t compact, l, n, j;
1432     floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
1433     float32 posneg1, twoto63;
1434     bool endflag;
1435 
1436     aSig = extractFloatx80Frac(a);
1437     aExp = extractFloatx80Exp(a);
1438     aSign = extractFloatx80Sign(a);
1439 
1440     if (aExp == 0x7FFF) {
1441         if ((uint64_t) (aSig << 1)) {
1442             return propagateFloatx80NaNOneArg(a, status);
1443         }
1444         float_raise(float_flag_invalid, status);
1445         return floatx80_default_nan(status);
1446     }
1447 
1448     if (aExp == 0 && aSig == 0) {
1449         return packFloatx80(aSign, 0, 0);
1450     }
1451 
1452     user_rnd_mode = status->float_rounding_mode;
1453     user_rnd_prec = status->floatx80_rounding_precision;
1454     status->float_rounding_mode = float_round_nearest_even;
1455     status->floatx80_rounding_precision = floatx80_precision_x;
1456 
1457     compact = floatx80_make_compact(aExp, aSig);
1458 
1459     fp0 = a;
1460 
1461     if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
1462         /* 2^(-40) > |X| > 15 PI */
1463         if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
1464             /* REDUCEX */
1465             fp1 = packFloatx80(0, 0, 0);
1466             if (compact == 0x7FFEFFFF) {
1467                 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
1468                                       UINT64_C(0xC90FDAA200000000));
1469                 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
1470                                       UINT64_C(0x85A308D300000000));
1471                 fp0 = floatx80_add(fp0, twopi1, status);
1472                 fp1 = fp0;
1473                 fp0 = floatx80_add(fp0, twopi2, status);
1474                 fp1 = floatx80_sub(fp1, fp0, status);
1475                 fp1 = floatx80_add(fp1, twopi2, status);
1476             }
1477         loop:
1478             xSign = extractFloatx80Sign(fp0);
1479             xExp = extractFloatx80Exp(fp0);
1480             xExp -= 0x3FFF;
1481             if (xExp <= 28) {
1482                 l = 0;
1483                 endflag = true;
1484             } else {
1485                 l = xExp - 27;
1486                 endflag = false;
1487             }
1488             invtwopi = packFloatx80(0, 0x3FFE - l,
1489                                     UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
1490             twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000));
1491             twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000));
1492 
1493             /* SIGN(INARG)*2^63 IN SGL */
1494             twoto63 = packFloat32(xSign, 0xBE, 0);
1495 
1496             fp2 = floatx80_mul(fp0, invtwopi, status);
1497             fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
1498                                status); /* THE FRACT PART OF FP2 IS ROUNDED */
1499             fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
1500                                status); /* FP2 is N */
1501             fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
1502             fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
1503             fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
1504             fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
1505             fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
1506             fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
1507             fp3 = fp0; /* FP3 is A */
1508             fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
1509             fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
1510 
1511             if (endflag) {
1512                 n = floatx80_to_int32(fp2, status);
1513                 goto sincont;
1514             }
1515             fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
1516             fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
1517             goto loop;
1518         } else {
1519             /* SINSM */
1520             fp0 = float32_to_floatx80(make_float32(0x3F800000),
1521                                       status); /* 1 */
1522 
1523             status->float_rounding_mode = user_rnd_mode;
1524             status->floatx80_rounding_precision = user_rnd_prec;
1525 
1526             /* SINTINY */
1527             a = floatx80_move(a, status);
1528             float_raise(float_flag_inexact, status);
1529 
1530             return a;
1531         }
1532     } else {
1533         fp1 = floatx80_mul(fp0, float64_to_floatx80(
1534                            make_float64(0x3FE45F306DC9C883), status),
1535                            status); /* X*2/PI */
1536 
1537         n = floatx80_to_int32(fp1, status);
1538         j = 32 + n;
1539 
1540         fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
1541         fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
1542                            status); /* FP0 IS R = (X-Y1)-Y2 */
1543 
1544     sincont:
1545         if (n & 1) {
1546             /* COSPOLY */
1547             fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1548             fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1549             fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3),
1550                                       status); /* B8 */
1551             fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19),
1552                                       status); /* B7 */
1553 
1554             xSign = extractFloatx80Sign(fp0); /* X IS S */
1555             xExp = extractFloatx80Exp(fp0);
1556             xSig = extractFloatx80Frac(fp0);
1557 
1558             if ((n >> 1) & 1) {
1559                 xSign ^= 1;
1560                 posneg1 = make_float32(0xBF800000); /* -1 */
1561             } else {
1562                 xSign ^= 0;
1563                 posneg1 = make_float32(0x3F800000); /* 1 */
1564             } /* X IS NOW R'= SGN*R */
1565 
1566             fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */
1567             fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */
1568             fp2 = floatx80_add(fp2, float64_to_floatx80(
1569                                make_float64(0x3E21EED90612C972), status),
1570                                status); /* B6+TB8 */
1571             fp3 = floatx80_add(fp3, float64_to_floatx80(
1572                                make_float64(0xBE927E4FB79D9FCF), status),
1573                                status); /* B5+TB7 */
1574             fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */
1575             fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */
1576             fp2 = floatx80_add(fp2, float64_to_floatx80(
1577                                make_float64(0x3EFA01A01A01D423), status),
1578                                status); /* B4+T(B6+TB8) */
1579             fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438));
1580             fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */
1581             fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */
1582             fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */
1583             fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E));
1584             fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */
1585             fp1 = floatx80_add(fp1, float32_to_floatx80(
1586                                make_float32(0xBF000000), status),
1587                                status); /* B1+T(B3+T(B5+TB7)) */
1588             fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */
1589             fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+
1590                                                    * [S(B2+T(B4+T(B6+TB8)))]
1591                                                    */
1592 
1593             x = packFloatx80(xSign, xExp, xSig);
1594             fp0 = floatx80_mul(fp0, x, status);
1595 
1596             status->float_rounding_mode = user_rnd_mode;
1597             status->floatx80_rounding_precision = user_rnd_prec;
1598 
1599             a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status);
1600 
1601             float_raise(float_flag_inexact, status);
1602 
1603             return a;
1604         } else {
1605             /* SINPOLY */
1606             xSign = extractFloatx80Sign(fp0); /* X IS R */
1607             xExp = extractFloatx80Exp(fp0);
1608             xSig = extractFloatx80Frac(fp0);
1609 
1610             xSign ^= (n >> 1) & 1; /* X IS NOW R'= SGN*R */
1611 
1612             fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1613             fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1614             fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5),
1615                                       status); /* A7 */
1616             fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1),
1617                                       status); /* A6 */
1618             fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */
1619             fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */
1620             fp3 = floatx80_add(fp3, float64_to_floatx80(
1621                                make_float64(0xBE5AE6452A118AE4), status),
1622                                status); /* A5+T*A7 */
1623             fp2 = floatx80_add(fp2, float64_to_floatx80(
1624                                make_float64(0x3EC71DE3A5341531), status),
1625                                status); /* A4+T*A6 */
1626             fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */
1627             fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */
1628             fp3 = floatx80_add(fp3, float64_to_floatx80(
1629                                make_float64(0xBF2A01A01A018B59), status),
1630                                status); /* A3+T(A5+TA7) */
1631             fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF));
1632             fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */
1633             fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */
1634             fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */
1635             fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99));
1636             fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */
1637             fp1 = floatx80_add(fp1, fp2,
1638                                status); /* [A1+T(A3+T(A5+TA7))]+
1639                                          * [S(A2+T(A4+TA6))]
1640                                          */
1641 
1642             x = packFloatx80(xSign, xExp, xSig);
1643             fp0 = floatx80_mul(fp0, x, status); /* R'*S */
1644             fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */
1645 
1646             status->float_rounding_mode = user_rnd_mode;
1647             status->floatx80_rounding_precision = user_rnd_prec;
1648 
1649             a = floatx80_add(fp0, x, status);
1650 
1651             float_raise(float_flag_inexact, status);
1652 
1653             return a;
1654         }
1655     }
1656 }
1657 
1658 /*
1659  * Cosine
1660  */
1661 
floatx80_cos(floatx80 a,float_status * status)1662 floatx80 floatx80_cos(floatx80 a, float_status *status)
1663 {
1664     bool aSign, xSign;
1665     int32_t aExp, xExp;
1666     uint64_t aSig, xSig;
1667 
1668     FloatRoundMode user_rnd_mode;
1669     FloatX80RoundPrec user_rnd_prec;
1670 
1671     int32_t compact, l, n, j;
1672     floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
1673     float32 posneg1, twoto63;
1674     bool endflag;
1675 
1676     aSig = extractFloatx80Frac(a);
1677     aExp = extractFloatx80Exp(a);
1678     aSign = extractFloatx80Sign(a);
1679 
1680     if (aExp == 0x7FFF) {
1681         if ((uint64_t) (aSig << 1)) {
1682             return propagateFloatx80NaNOneArg(a, status);
1683         }
1684         float_raise(float_flag_invalid, status);
1685         return floatx80_default_nan(status);
1686     }
1687 
1688     if (aExp == 0 && aSig == 0) {
1689         return packFloatx80(0, one_exp, one_sig);
1690     }
1691 
1692     user_rnd_mode = status->float_rounding_mode;
1693     user_rnd_prec = status->floatx80_rounding_precision;
1694     status->float_rounding_mode = float_round_nearest_even;
1695     status->floatx80_rounding_precision = floatx80_precision_x;
1696 
1697     compact = floatx80_make_compact(aExp, aSig);
1698 
1699     fp0 = a;
1700 
1701     if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
1702         /* 2^(-40) > |X| > 15 PI */
1703         if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
1704             /* REDUCEX */
1705             fp1 = packFloatx80(0, 0, 0);
1706             if (compact == 0x7FFEFFFF) {
1707                 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
1708                                       UINT64_C(0xC90FDAA200000000));
1709                 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
1710                                       UINT64_C(0x85A308D300000000));
1711                 fp0 = floatx80_add(fp0, twopi1, status);
1712                 fp1 = fp0;
1713                 fp0 = floatx80_add(fp0, twopi2, status);
1714                 fp1 = floatx80_sub(fp1, fp0, status);
1715                 fp1 = floatx80_add(fp1, twopi2, status);
1716             }
1717         loop:
1718             xSign = extractFloatx80Sign(fp0);
1719             xExp = extractFloatx80Exp(fp0);
1720             xExp -= 0x3FFF;
1721             if (xExp <= 28) {
1722                 l = 0;
1723                 endflag = true;
1724             } else {
1725                 l = xExp - 27;
1726                 endflag = false;
1727             }
1728             invtwopi = packFloatx80(0, 0x3FFE - l,
1729                                     UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
1730             twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000));
1731             twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000));
1732 
1733             /* SIGN(INARG)*2^63 IN SGL */
1734             twoto63 = packFloat32(xSign, 0xBE, 0);
1735 
1736             fp2 = floatx80_mul(fp0, invtwopi, status);
1737             fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
1738                                status); /* THE FRACT PART OF FP2 IS ROUNDED */
1739             fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
1740                                status); /* FP2 is N */
1741             fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
1742             fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
1743             fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
1744             fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
1745             fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
1746             fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
1747             fp3 = fp0; /* FP3 is A */
1748             fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
1749             fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
1750 
1751             if (endflag) {
1752                 n = floatx80_to_int32(fp2, status);
1753                 goto sincont;
1754             }
1755             fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
1756             fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
1757             goto loop;
1758         } else {
1759             /* SINSM */
1760             fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */
1761 
1762             status->float_rounding_mode = user_rnd_mode;
1763             status->floatx80_rounding_precision = user_rnd_prec;
1764 
1765             /* COSTINY */
1766             a = floatx80_sub(fp0, float32_to_floatx80(
1767                              make_float32(0x00800000), status),
1768                              status);
1769             float_raise(float_flag_inexact, status);
1770 
1771             return a;
1772         }
1773     } else {
1774         fp1 = floatx80_mul(fp0, float64_to_floatx80(
1775                            make_float64(0x3FE45F306DC9C883), status),
1776                            status); /* X*2/PI */
1777 
1778         n = floatx80_to_int32(fp1, status);
1779         j = 32 + n;
1780 
1781         fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
1782         fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
1783                            status); /* FP0 IS R = (X-Y1)-Y2 */
1784 
1785     sincont:
1786         if ((n + 1) & 1) {
1787             /* COSPOLY */
1788             fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1789             fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1790             fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3),
1791                                       status); /* B8 */
1792             fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19),
1793                                       status); /* B7 */
1794 
1795             xSign = extractFloatx80Sign(fp0); /* X IS S */
1796             xExp = extractFloatx80Exp(fp0);
1797             xSig = extractFloatx80Frac(fp0);
1798 
1799             if (((n + 1) >> 1) & 1) {
1800                 xSign ^= 1;
1801                 posneg1 = make_float32(0xBF800000); /* -1 */
1802             } else {
1803                 xSign ^= 0;
1804                 posneg1 = make_float32(0x3F800000); /* 1 */
1805             } /* X IS NOW R'= SGN*R */
1806 
1807             fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */
1808             fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */
1809             fp2 = floatx80_add(fp2, float64_to_floatx80(
1810                                make_float64(0x3E21EED90612C972), status),
1811                                status); /* B6+TB8 */
1812             fp3 = floatx80_add(fp3, float64_to_floatx80(
1813                                make_float64(0xBE927E4FB79D9FCF), status),
1814                                status); /* B5+TB7 */
1815             fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */
1816             fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */
1817             fp2 = floatx80_add(fp2, float64_to_floatx80(
1818                                make_float64(0x3EFA01A01A01D423), status),
1819                                status); /* B4+T(B6+TB8) */
1820             fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438));
1821             fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */
1822             fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */
1823             fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */
1824             fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E));
1825             fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */
1826             fp1 = floatx80_add(fp1, float32_to_floatx80(
1827                                make_float32(0xBF000000), status),
1828                                status); /* B1+T(B3+T(B5+TB7)) */
1829             fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */
1830             fp0 = floatx80_add(fp0, fp1, status);
1831                               /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */
1832 
1833             x = packFloatx80(xSign, xExp, xSig);
1834             fp0 = floatx80_mul(fp0, x, status);
1835 
1836             status->float_rounding_mode = user_rnd_mode;
1837             status->floatx80_rounding_precision = user_rnd_prec;
1838 
1839             a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status);
1840 
1841             float_raise(float_flag_inexact, status);
1842 
1843             return a;
1844         } else {
1845             /* SINPOLY */
1846             xSign = extractFloatx80Sign(fp0); /* X IS R */
1847             xExp = extractFloatx80Exp(fp0);
1848             xSig = extractFloatx80Frac(fp0);
1849 
1850             xSign ^= ((n + 1) >> 1) & 1; /* X IS NOW R'= SGN*R */
1851 
1852             fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1853             fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1854             fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5),
1855                                       status); /* A7 */
1856             fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1),
1857                                       status); /* A6 */
1858             fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */
1859             fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */
1860             fp3 = floatx80_add(fp3, float64_to_floatx80(
1861                                make_float64(0xBE5AE6452A118AE4), status),
1862                                status); /* A5+T*A7 */
1863             fp2 = floatx80_add(fp2, float64_to_floatx80(
1864                                make_float64(0x3EC71DE3A5341531), status),
1865                                status); /* A4+T*A6 */
1866             fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */
1867             fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */
1868             fp3 = floatx80_add(fp3, float64_to_floatx80(
1869                                make_float64(0xBF2A01A01A018B59), status),
1870                                status); /* A3+T(A5+TA7) */
1871             fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF));
1872             fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */
1873             fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */
1874             fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */
1875             fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99));
1876             fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */
1877             fp1 = floatx80_add(fp1, fp2, status);
1878                                     /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */
1879 
1880             x = packFloatx80(xSign, xExp, xSig);
1881             fp0 = floatx80_mul(fp0, x, status); /* R'*S */
1882             fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */
1883 
1884             status->float_rounding_mode = user_rnd_mode;
1885             status->floatx80_rounding_precision = user_rnd_prec;
1886 
1887             a = floatx80_add(fp0, x, status);
1888 
1889             float_raise(float_flag_inexact, status);
1890 
1891             return a;
1892         }
1893     }
1894 }
1895 
1896 /*
1897  * Arc tangent
1898  */
1899 
floatx80_atan(floatx80 a,float_status * status)1900 floatx80 floatx80_atan(floatx80 a, float_status *status)
1901 {
1902     bool aSign;
1903     int32_t aExp;
1904     uint64_t aSig;
1905 
1906     FloatRoundMode user_rnd_mode;
1907     FloatX80RoundPrec user_rnd_prec;
1908 
1909     int32_t compact, tbl_index;
1910     floatx80 fp0, fp1, fp2, fp3, xsave;
1911 
1912     aSig = extractFloatx80Frac(a);
1913     aExp = extractFloatx80Exp(a);
1914     aSign = extractFloatx80Sign(a);
1915 
1916     if (aExp == 0x7FFF) {
1917         if ((uint64_t) (aSig << 1)) {
1918             return propagateFloatx80NaNOneArg(a, status);
1919         }
1920         a = packFloatx80(aSign, piby2_exp, pi_sig);
1921         float_raise(float_flag_inexact, status);
1922         return floatx80_move(a, status);
1923     }
1924 
1925     if (aExp == 0 && aSig == 0) {
1926         return packFloatx80(aSign, 0, 0);
1927     }
1928 
1929     compact = floatx80_make_compact(aExp, aSig);
1930 
1931     user_rnd_mode = status->float_rounding_mode;
1932     user_rnd_prec = status->floatx80_rounding_precision;
1933     status->float_rounding_mode = float_round_nearest_even;
1934     status->floatx80_rounding_precision = floatx80_precision_x;
1935 
1936     if (compact < 0x3FFB8000 || compact > 0x4002FFFF) {
1937         /* |X| >= 16 or |X| < 1/16 */
1938         if (compact > 0x3FFF8000) { /* |X| >= 16 */
1939             if (compact > 0x40638000) { /* |X| > 2^(100) */
1940                 fp0 = packFloatx80(aSign, piby2_exp, pi_sig);
1941                 fp1 = packFloatx80(aSign, 0x0001, one_sig);
1942 
1943                 status->float_rounding_mode = user_rnd_mode;
1944                 status->floatx80_rounding_precision = user_rnd_prec;
1945 
1946                 a = floatx80_sub(fp0, fp1, status);
1947 
1948                 float_raise(float_flag_inexact, status);
1949 
1950                 return a;
1951             } else {
1952                 fp0 = a;
1953                 fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */
1954                 fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */
1955                 xsave = fp1;
1956                 fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */
1957                 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */
1958                 fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A),
1959                                           status); /* C5 */
1960                 fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D),
1961                                           status); /* C4 */
1962                 fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */
1963                 fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */
1964                 fp3 = floatx80_add(fp3, float64_to_floatx80(
1965                                    make_float64(0xBFC24924827107B8), status),
1966                                    status); /* C3+Z*C5 */
1967                 fp2 = floatx80_add(fp2, float64_to_floatx80(
1968                                    make_float64(0x3FC999999996263E), status),
1969                                    status); /* C2+Z*C4 */
1970                 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */
1971                 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */
1972                 fp1 = floatx80_add(fp1, float64_to_floatx80(
1973                                    make_float64(0xBFD5555555555536), status),
1974                                    status); /* C1+Z*(C3+Z*C5) */
1975                 fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */
1976                 /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */
1977                 fp1 = floatx80_add(fp1, fp2, status);
1978                 /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */
1979                 fp0 = floatx80_mul(fp0, fp1, status);
1980                 fp0 = floatx80_add(fp0, xsave, status);
1981                 fp1 = packFloatx80(aSign, piby2_exp, pi_sig);
1982 
1983                 status->float_rounding_mode = user_rnd_mode;
1984                 status->floatx80_rounding_precision = user_rnd_prec;
1985 
1986                 a = floatx80_add(fp0, fp1, status);
1987 
1988                 float_raise(float_flag_inexact, status);
1989 
1990                 return a;
1991             }
1992         } else { /* |X| < 1/16 */
1993             if (compact < 0x3FD78000) { /* |X| < 2^(-40) */
1994                 status->float_rounding_mode = user_rnd_mode;
1995                 status->floatx80_rounding_precision = user_rnd_prec;
1996 
1997                 a = floatx80_move(a, status);
1998 
1999                 float_raise(float_flag_inexact, status);
2000 
2001                 return a;
2002             } else {
2003                 fp0 = a;
2004                 xsave = a;
2005                 fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */
2006                 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */
2007                 fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989),
2008                                           status); /* B6 */
2009                 fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB),
2010                                           status); /* B5 */
2011                 fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */
2012                 fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */
2013                 fp2 = floatx80_add(fp2, float64_to_floatx80(
2014                                    make_float64(0x3FBC71C646940220), status),
2015                                    status); /* B4+Z*B6 */
2016                 fp3 = floatx80_add(fp3, float64_to_floatx80(
2017                                    make_float64(0xBFC24924921872F9),
2018                                    status), status); /* B3+Z*B5 */
2019                 fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */
2020                 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */
2021                 fp2 = floatx80_add(fp2, float64_to_floatx80(
2022                                    make_float64(0x3FC9999999998FA9), status),
2023                                    status); /* B2+Z*(B4+Z*B6) */
2024                 fp1 = floatx80_add(fp1, float64_to_floatx80(
2025                                    make_float64(0xBFD5555555555555), status),
2026                                    status); /* B1+Z*(B3+Z*B5) */
2027                 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */
2028                 fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */
2029                 /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */
2030                 fp1 = floatx80_add(fp1, fp2, status);
2031                 /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */
2032                 fp0 = floatx80_mul(fp0, fp1, status);
2033 
2034                 status->float_rounding_mode = user_rnd_mode;
2035                 status->floatx80_rounding_precision = user_rnd_prec;
2036 
2037                 a = floatx80_add(fp0, xsave, status);
2038 
2039                 float_raise(float_flag_inexact, status);
2040 
2041                 return a;
2042             }
2043         }
2044     } else {
2045         aSig &= UINT64_C(0xF800000000000000);
2046         aSig |= UINT64_C(0x0400000000000000);
2047         xsave = packFloatx80(aSign, aExp, aSig); /* F */
2048         fp0 = a;
2049         fp1 = a; /* X */
2050         fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */
2051         fp1 = floatx80_mul(fp1, xsave, status); /* X*F */
2052         fp0 = floatx80_sub(fp0, xsave, status); /* X-F */
2053         fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */
2054         fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */
2055 
2056         tbl_index = compact;
2057 
2058         tbl_index &= 0x7FFF0000;
2059         tbl_index -= 0x3FFB0000;
2060         tbl_index >>= 1;
2061         tbl_index += compact & 0x00007800;
2062         tbl_index >>= 11;
2063 
2064         fp3 = atan_tbl[tbl_index];
2065 
2066         fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */
2067 
2068         fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */
2069         fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8),
2070                                   status); /* A3 */
2071         fp2 = floatx80_add(fp2, fp1, status); /* A3+V */
2072         fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */
2073         fp1 = floatx80_mul(fp1, fp0, status); /* U*V */
2074         fp2 = floatx80_add(fp2, float64_to_floatx80(
2075                            make_float64(0x4002AC6934A26DB3), status),
2076                            status); /* A2+V*(A3+V) */
2077         fp1 = floatx80_mul(fp1, float64_to_floatx80(
2078                            make_float64(0xBFC2476F4E1DA28E), status),
2079                            status); /* A1+U*V */
2080         fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */
2081         fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */
2082 
2083         status->float_rounding_mode = user_rnd_mode;
2084         status->floatx80_rounding_precision = user_rnd_prec;
2085 
2086         a = floatx80_add(fp0, fp3, status); /* ATAN(X) */
2087 
2088         float_raise(float_flag_inexact, status);
2089 
2090         return a;
2091     }
2092 }
2093 
2094 /*
2095  * Arc sine
2096  */
2097 
floatx80_asin(floatx80 a,float_status * status)2098 floatx80 floatx80_asin(floatx80 a, float_status *status)
2099 {
2100     bool aSign;
2101     int32_t aExp;
2102     uint64_t aSig;
2103 
2104     FloatRoundMode user_rnd_mode;
2105     FloatX80RoundPrec user_rnd_prec;
2106 
2107     int32_t compact;
2108     floatx80 fp0, fp1, fp2, one;
2109 
2110     aSig = extractFloatx80Frac(a);
2111     aExp = extractFloatx80Exp(a);
2112     aSign = extractFloatx80Sign(a);
2113 
2114     if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
2115         return propagateFloatx80NaNOneArg(a, status);
2116     }
2117 
2118     if (aExp == 0 && aSig == 0) {
2119         return packFloatx80(aSign, 0, 0);
2120     }
2121 
2122     compact = floatx80_make_compact(aExp, aSig);
2123 
2124     if (compact >= 0x3FFF8000) { /* |X| >= 1 */
2125         if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
2126             float_raise(float_flag_inexact, status);
2127             a = packFloatx80(aSign, piby2_exp, pi_sig);
2128             return floatx80_move(a, status);
2129         } else { /* |X| > 1 */
2130             float_raise(float_flag_invalid, status);
2131             return floatx80_default_nan(status);
2132         }
2133 
2134     } /* |X| < 1 */
2135 
2136     user_rnd_mode = status->float_rounding_mode;
2137     user_rnd_prec = status->floatx80_rounding_precision;
2138     status->float_rounding_mode = float_round_nearest_even;
2139     status->floatx80_rounding_precision = floatx80_precision_x;
2140 
2141     one = packFloatx80(0, one_exp, one_sig);
2142     fp0 = a;
2143 
2144     fp1 = floatx80_sub(one, fp0, status);   /* 1 - X */
2145     fp2 = floatx80_add(one, fp0, status);   /* 1 + X */
2146     fp1 = floatx80_mul(fp2, fp1, status);   /* (1+X)*(1-X) */
2147     fp1 = floatx80_sqrt(fp1, status);       /* SQRT((1+X)*(1-X)) */
2148     fp0 = floatx80_div(fp0, fp1, status);   /* X/SQRT((1+X)*(1-X)) */
2149 
2150     status->float_rounding_mode = user_rnd_mode;
2151     status->floatx80_rounding_precision = user_rnd_prec;
2152 
2153     a = floatx80_atan(fp0, status);         /* ATAN(X/SQRT((1+X)*(1-X))) */
2154 
2155     float_raise(float_flag_inexact, status);
2156 
2157     return a;
2158 }
2159 
2160 /*
2161  * Arc cosine
2162  */
2163 
floatx80_acos(floatx80 a,float_status * status)2164 floatx80 floatx80_acos(floatx80 a, float_status *status)
2165 {
2166     bool aSign;
2167     int32_t aExp;
2168     uint64_t aSig;
2169 
2170     FloatRoundMode user_rnd_mode;
2171     FloatX80RoundPrec user_rnd_prec;
2172 
2173     int32_t compact;
2174     floatx80 fp0, fp1, one;
2175 
2176     aSig = extractFloatx80Frac(a);
2177     aExp = extractFloatx80Exp(a);
2178     aSign = extractFloatx80Sign(a);
2179 
2180     if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
2181         return propagateFloatx80NaNOneArg(a, status);
2182     }
2183     if (aExp == 0 && aSig == 0) {
2184         float_raise(float_flag_inexact, status);
2185         return roundAndPackFloatx80(status->floatx80_rounding_precision, 0,
2186                                     piby2_exp, pi_sig, 0, status);
2187     }
2188 
2189     compact = floatx80_make_compact(aExp, aSig);
2190 
2191     if (compact >= 0x3FFF8000) { /* |X| >= 1 */
2192         if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
2193             if (aSign) { /* X == -1 */
2194                 a = packFloatx80(0, pi_exp, pi_sig);
2195                 float_raise(float_flag_inexact, status);
2196                 return floatx80_move(a, status);
2197             } else { /* X == +1 */
2198                 return packFloatx80(0, 0, 0);
2199             }
2200         } else { /* |X| > 1 */
2201             float_raise(float_flag_invalid, status);
2202             return floatx80_default_nan(status);
2203         }
2204     } /* |X| < 1 */
2205 
2206     user_rnd_mode = status->float_rounding_mode;
2207     user_rnd_prec = status->floatx80_rounding_precision;
2208     status->float_rounding_mode = float_round_nearest_even;
2209     status->floatx80_rounding_precision = floatx80_precision_x;
2210 
2211     one = packFloatx80(0, one_exp, one_sig);
2212     fp0 = a;
2213 
2214     fp1 = floatx80_add(one, fp0, status);   /* 1 + X */
2215     fp0 = floatx80_sub(one, fp0, status);   /* 1 - X */
2216     fp0 = floatx80_div(fp0, fp1, status);   /* (1-X)/(1+X) */
2217     fp0 = floatx80_sqrt(fp0, status);       /* SQRT((1-X)/(1+X)) */
2218     fp0 = floatx80_atan(fp0, status);       /* ATAN(SQRT((1-X)/(1+X))) */
2219 
2220     status->float_rounding_mode = user_rnd_mode;
2221     status->floatx80_rounding_precision = user_rnd_prec;
2222 
2223     a = floatx80_add(fp0, fp0, status);     /* 2 * ATAN(SQRT((1-X)/(1+X))) */
2224 
2225     float_raise(float_flag_inexact, status);
2226 
2227     return a;
2228 }
2229 
2230 /*
2231  * Hyperbolic arc tangent
2232  */
2233 
floatx80_atanh(floatx80 a,float_status * status)2234 floatx80 floatx80_atanh(floatx80 a, float_status *status)
2235 {
2236     bool aSign;
2237     int32_t aExp;
2238     uint64_t aSig;
2239 
2240     FloatRoundMode user_rnd_mode;
2241     FloatX80RoundPrec user_rnd_prec;
2242 
2243     int32_t compact;
2244     floatx80 fp0, fp1, fp2, one;
2245 
2246     aSig = extractFloatx80Frac(a);
2247     aExp = extractFloatx80Exp(a);
2248     aSign = extractFloatx80Sign(a);
2249 
2250     if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
2251         return propagateFloatx80NaNOneArg(a, status);
2252     }
2253 
2254     if (aExp == 0 && aSig == 0) {
2255         return packFloatx80(aSign, 0, 0);
2256     }
2257 
2258     compact = floatx80_make_compact(aExp, aSig);
2259 
2260     if (compact >= 0x3FFF8000) { /* |X| >= 1 */
2261         if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
2262             float_raise(float_flag_divbyzero, status);
2263             return packFloatx80(aSign, floatx80_infinity.high,
2264                                 floatx80_infinity.low);
2265         } else { /* |X| > 1 */
2266             float_raise(float_flag_invalid, status);
2267             return floatx80_default_nan(status);
2268         }
2269     } /* |X| < 1 */
2270 
2271     user_rnd_mode = status->float_rounding_mode;
2272     user_rnd_prec = status->floatx80_rounding_precision;
2273     status->float_rounding_mode = float_round_nearest_even;
2274     status->floatx80_rounding_precision = floatx80_precision_x;
2275 
2276     one = packFloatx80(0, one_exp, one_sig);
2277     fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */
2278     fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */
2279     fp1 = packFloatx80(1, aExp, aSig); /* -Y */
2280     fp0 = floatx80_add(fp0, fp0, status); /* 2Y */
2281     fp1 = floatx80_add(fp1, one, status); /* 1-Y */
2282     fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */
2283     fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */
2284 
2285     status->float_rounding_mode = user_rnd_mode;
2286     status->floatx80_rounding_precision = user_rnd_prec;
2287 
2288     a = floatx80_mul(fp0, fp2,
2289                      status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */
2290 
2291     float_raise(float_flag_inexact, status);
2292 
2293     return a;
2294 }
2295 
2296 /*
2297  * e to x minus 1
2298  */
2299 
floatx80_etoxm1(floatx80 a,float_status * status)2300 floatx80 floatx80_etoxm1(floatx80 a, float_status *status)
2301 {
2302     bool aSign;
2303     int32_t aExp;
2304     uint64_t aSig;
2305 
2306     FloatRoundMode user_rnd_mode;
2307     FloatX80RoundPrec user_rnd_prec;
2308 
2309     int32_t compact, n, j, m, m1;
2310     floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc;
2311 
2312     aSig = extractFloatx80Frac(a);
2313     aExp = extractFloatx80Exp(a);
2314     aSign = extractFloatx80Sign(a);
2315 
2316     if (aExp == 0x7FFF) {
2317         if ((uint64_t) (aSig << 1)) {
2318             return propagateFloatx80NaNOneArg(a, status);
2319         }
2320         if (aSign) {
2321             return packFloatx80(aSign, one_exp, one_sig);
2322         }
2323         return packFloatx80(0, floatx80_infinity.high,
2324                             floatx80_infinity.low);
2325     }
2326 
2327     if (aExp == 0 && aSig == 0) {
2328         return packFloatx80(aSign, 0, 0);
2329     }
2330 
2331     user_rnd_mode = status->float_rounding_mode;
2332     user_rnd_prec = status->floatx80_rounding_precision;
2333     status->float_rounding_mode = float_round_nearest_even;
2334     status->floatx80_rounding_precision = floatx80_precision_x;
2335 
2336     if (aExp >= 0x3FFD) { /* |X| >= 1/4 */
2337         compact = floatx80_make_compact(aExp, aSig);
2338 
2339         if (compact <= 0x4004C215) { /* |X| <= 70 log2 */
2340             fp0 = a;
2341             fp1 = a;
2342             fp0 = floatx80_mul(fp0, float32_to_floatx80(
2343                                make_float32(0x42B8AA3B), status),
2344                                status); /* 64/log2 * X */
2345             n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
2346             fp0 = int32_to_floatx80(n, status);
2347 
2348             j = n & 0x3F; /* J = N mod 64 */
2349             m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
2350             if (n < 0 && j) {
2351                 /*
2352                  * arithmetic right shift is division and
2353                  * round towards minus infinity
2354                  */
2355                 m--;
2356             }
2357             m1 = -m;
2358             /*m += 0x3FFF; // biased exponent of 2^(M) */
2359             /*m1 += 0x3FFF; // biased exponent of -2^(-M) */
2360 
2361             fp2 = fp0; /* N */
2362             fp0 = floatx80_mul(fp0, float32_to_floatx80(
2363                                make_float32(0xBC317218), status),
2364                                status); /* N * L1, L1 = lead(-log2/64) */
2365             l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6));
2366             fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */
2367             fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */
2368             fp0 = floatx80_add(fp0, fp2, status); /* R */
2369 
2370             fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
2371             fp2 = float32_to_floatx80(make_float32(0x3950097B),
2372                                       status); /* A6 */
2373             fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */
2374             fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A),
2375                                status), fp1, status); /* fp3 is S*A5 */
2376             fp2 = floatx80_add(fp2, float64_to_floatx80(
2377                                make_float64(0x3F81111111174385), status),
2378                                status); /* fp2 IS A4+S*A6 */
2379             fp3 = floatx80_add(fp3, float64_to_floatx80(
2380                                make_float64(0x3FA5555555554F5A), status),
2381                                status); /* fp3 is A3+S*A5 */
2382             fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */
2383             fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */
2384             fp2 = floatx80_add(fp2, float64_to_floatx80(
2385                                make_float64(0x3FC5555555555555), status),
2386                                status); /* fp2 IS A2+S*(A4+S*A6) */
2387             fp3 = floatx80_add(fp3, float32_to_floatx80(
2388                                make_float32(0x3F000000), status),
2389                                status); /* fp3 IS A1+S*(A3+S*A5) */
2390             fp2 = floatx80_mul(fp2, fp1,
2391                                status); /* fp2 IS S*(A2+S*(A4+S*A6)) */
2392             fp1 = floatx80_mul(fp1, fp3,
2393                                status); /* fp1 IS S*(A1+S*(A3+S*A5)) */
2394             fp2 = floatx80_mul(fp2, fp0,
2395                                status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */
2396             fp0 = floatx80_add(fp0, fp1,
2397                                status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */
2398             fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */
2399 
2400             fp0 = floatx80_mul(fp0, exp_tbl[j],
2401                                status); /* 2^(J/64)*(Exp(R)-1) */
2402 
2403             if (m >= 64) {
2404                 fp1 = float32_to_floatx80(exp_tbl2[j], status);
2405                 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
2406                 fp1 = floatx80_add(fp1, onebysc, status);
2407                 fp0 = floatx80_add(fp0, fp1, status);
2408                 fp0 = floatx80_add(fp0, exp_tbl[j], status);
2409             } else if (m < -3) {
2410                 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j],
2411                                    status), status);
2412                 fp0 = floatx80_add(fp0, exp_tbl[j], status);
2413                 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
2414                 fp0 = floatx80_add(fp0, onebysc, status);
2415             } else { /* -3 <= m <= 63 */
2416                 fp1 = exp_tbl[j];
2417                 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j],
2418                                    status), status);
2419                 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
2420                 fp1 = floatx80_add(fp1, onebysc, status);
2421                 fp0 = floatx80_add(fp0, fp1, status);
2422             }
2423 
2424             sc = packFloatx80(0, m + 0x3FFF, one_sig);
2425 
2426             status->float_rounding_mode = user_rnd_mode;
2427             status->floatx80_rounding_precision = user_rnd_prec;
2428 
2429             a = floatx80_mul(fp0, sc, status);
2430 
2431             float_raise(float_flag_inexact, status);
2432 
2433             return a;
2434         } else { /* |X| > 70 log2 */
2435             if (aSign) {
2436                 fp0 = float32_to_floatx80(make_float32(0xBF800000),
2437                       status); /* -1 */
2438 
2439                 status->float_rounding_mode = user_rnd_mode;
2440                 status->floatx80_rounding_precision = user_rnd_prec;
2441 
2442                 a = floatx80_add(fp0, float32_to_floatx80(
2443                                  make_float32(0x00800000), status),
2444                                  status); /* -1 + 2^(-126) */
2445 
2446                 float_raise(float_flag_inexact, status);
2447 
2448                 return a;
2449             } else {
2450                 status->float_rounding_mode = user_rnd_mode;
2451                 status->floatx80_rounding_precision = user_rnd_prec;
2452 
2453                 return floatx80_etox(a, status);
2454             }
2455         }
2456     } else { /* |X| < 1/4 */
2457         if (aExp >= 0x3FBE) {
2458             fp0 = a;
2459             fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */
2460             fp1 = float32_to_floatx80(make_float32(0x2F30CAA8),
2461                                       status); /* B12 */
2462             fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */
2463             fp2 = float32_to_floatx80(make_float32(0x310F8290),
2464                                       status); /* B11 */
2465             fp1 = floatx80_add(fp1, float32_to_floatx80(
2466                                make_float32(0x32D73220), status),
2467                                status); /* B10 */
2468             fp2 = floatx80_mul(fp2, fp0, status);
2469             fp1 = floatx80_mul(fp1, fp0, status);
2470             fp2 = floatx80_add(fp2, float32_to_floatx80(
2471                                make_float32(0x3493F281), status),
2472                                status); /* B9 */
2473             fp1 = floatx80_add(fp1, float64_to_floatx80(
2474                                make_float64(0x3EC71DE3A5774682), status),
2475                                status); /* B8 */
2476             fp2 = floatx80_mul(fp2, fp0, status);
2477             fp1 = floatx80_mul(fp1, fp0, status);
2478             fp2 = floatx80_add(fp2, float64_to_floatx80(
2479                                make_float64(0x3EFA01A019D7CB68), status),
2480                                status); /* B7 */
2481             fp1 = floatx80_add(fp1, float64_to_floatx80(
2482                                make_float64(0x3F2A01A01A019DF3), status),
2483                                status); /* B6 */
2484             fp2 = floatx80_mul(fp2, fp0, status);
2485             fp1 = floatx80_mul(fp1, fp0, status);
2486             fp2 = floatx80_add(fp2, float64_to_floatx80(
2487                                make_float64(0x3F56C16C16C170E2), status),
2488                                status); /* B5 */
2489             fp1 = floatx80_add(fp1, float64_to_floatx80(
2490                                make_float64(0x3F81111111111111), status),
2491                                status); /* B4 */
2492             fp2 = floatx80_mul(fp2, fp0, status);
2493             fp1 = floatx80_mul(fp1, fp0, status);
2494             fp2 = floatx80_add(fp2, float64_to_floatx80(
2495                                make_float64(0x3FA5555555555555), status),
2496                                status); /* B3 */
2497             fp3 = packFloatx80(0, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAAAB));
2498             fp1 = floatx80_add(fp1, fp3, status); /* B2 */
2499             fp2 = floatx80_mul(fp2, fp0, status);
2500             fp1 = floatx80_mul(fp1, fp0, status);
2501 
2502             fp2 = floatx80_mul(fp2, fp0, status);
2503             fp1 = floatx80_mul(fp1, a, status);
2504 
2505             fp0 = floatx80_mul(fp0, float32_to_floatx80(
2506                                make_float32(0x3F000000), status),
2507                                status); /* S*B1 */
2508             fp1 = floatx80_add(fp1, fp2, status); /* Q */
2509             fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */
2510 
2511             status->float_rounding_mode = user_rnd_mode;
2512             status->floatx80_rounding_precision = user_rnd_prec;
2513 
2514             a = floatx80_add(fp0, a, status);
2515 
2516             float_raise(float_flag_inexact, status);
2517 
2518             return a;
2519         } else { /* |X| < 2^(-65) */
2520             sc = packFloatx80(1, 1, one_sig);
2521             fp0 = a;
2522 
2523             if (aExp < 0x0033) { /* |X| < 2^(-16382) */
2524                 fp0 = floatx80_mul(fp0, float64_to_floatx80(
2525                                    make_float64(0x48B0000000000000), status),
2526                                    status);
2527                 fp0 = floatx80_add(fp0, sc, status);
2528 
2529                 status->float_rounding_mode = user_rnd_mode;
2530                 status->floatx80_rounding_precision = user_rnd_prec;
2531 
2532                 a = floatx80_mul(fp0, float64_to_floatx80(
2533                                  make_float64(0x3730000000000000), status),
2534                                  status);
2535             } else {
2536                 status->float_rounding_mode = user_rnd_mode;
2537                 status->floatx80_rounding_precision = user_rnd_prec;
2538 
2539                 a = floatx80_add(fp0, sc, status);
2540             }
2541 
2542             float_raise(float_flag_inexact, status);
2543 
2544             return a;
2545         }
2546     }
2547 }
2548 
2549 /*
2550  * Hyperbolic tangent
2551  */
2552 
floatx80_tanh(floatx80 a,float_status * status)2553 floatx80 floatx80_tanh(floatx80 a, float_status *status)
2554 {
2555     bool aSign, vSign;
2556     int32_t aExp, vExp;
2557     uint64_t aSig, vSig;
2558 
2559     FloatRoundMode user_rnd_mode;
2560     FloatX80RoundPrec user_rnd_prec;
2561 
2562     int32_t compact;
2563     floatx80 fp0, fp1;
2564     uint32_t sign;
2565 
2566     aSig = extractFloatx80Frac(a);
2567     aExp = extractFloatx80Exp(a);
2568     aSign = extractFloatx80Sign(a);
2569 
2570     if (aExp == 0x7FFF) {
2571         if ((uint64_t) (aSig << 1)) {
2572             return propagateFloatx80NaNOneArg(a, status);
2573         }
2574         return packFloatx80(aSign, one_exp, one_sig);
2575     }
2576 
2577     if (aExp == 0 && aSig == 0) {
2578         return packFloatx80(aSign, 0, 0);
2579     }
2580 
2581     user_rnd_mode = status->float_rounding_mode;
2582     user_rnd_prec = status->floatx80_rounding_precision;
2583     status->float_rounding_mode = float_round_nearest_even;
2584     status->floatx80_rounding_precision = floatx80_precision_x;
2585 
2586     compact = floatx80_make_compact(aExp, aSig);
2587 
2588     if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) {
2589         /* TANHBORS */
2590         if (compact < 0x3FFF8000) {
2591             /* TANHSM */
2592             status->float_rounding_mode = user_rnd_mode;
2593             status->floatx80_rounding_precision = user_rnd_prec;
2594 
2595             a = floatx80_move(a, status);
2596 
2597             float_raise(float_flag_inexact, status);
2598 
2599             return a;
2600         } else {
2601             if (compact > 0x40048AA1) {
2602                 /* TANHHUGE */
2603                 sign = 0x3F800000;
2604                 sign |= aSign ? 0x80000000 : 0x00000000;
2605                 fp0 = float32_to_floatx80(make_float32(sign), status);
2606                 sign &= 0x80000000;
2607                 sign ^= 0x80800000; /* -SIGN(X)*EPS */
2608 
2609                 status->float_rounding_mode = user_rnd_mode;
2610                 status->floatx80_rounding_precision = user_rnd_prec;
2611 
2612                 a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign),
2613                                  status), status);
2614 
2615                 float_raise(float_flag_inexact, status);
2616 
2617                 return a;
2618             } else {
2619                 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */
2620                 fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */
2621                 fp0 = floatx80_add(fp0, float32_to_floatx80(
2622                                    make_float32(0x3F800000),
2623                                    status), status); /* EXP(Y)+1 */
2624                 sign = aSign ? 0x80000000 : 0x00000000;
2625                 fp1 = floatx80_div(float32_to_floatx80(make_float32(
2626                                    sign ^ 0xC0000000), status), fp0,
2627                                    status); /* -SIGN(X)*2 / [EXP(Y)+1] */
2628                 fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000),
2629                                           status); /* SIGN */
2630 
2631                 status->float_rounding_mode = user_rnd_mode;
2632                 status->floatx80_rounding_precision = user_rnd_prec;
2633 
2634                 a = floatx80_add(fp1, fp0, status);
2635 
2636                 float_raise(float_flag_inexact, status);
2637 
2638                 return a;
2639             }
2640         }
2641     } else { /* 2**(-40) < |X| < (5/2)LOG2 */
2642         fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */
2643         fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */
2644         fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000),
2645                            status),
2646                            status); /* Z+2 */
2647 
2648         vSign = extractFloatx80Sign(fp1);
2649         vExp = extractFloatx80Exp(fp1);
2650         vSig = extractFloatx80Frac(fp1);
2651 
2652         fp1 = packFloatx80(vSign ^ aSign, vExp, vSig);
2653 
2654         status->float_rounding_mode = user_rnd_mode;
2655         status->floatx80_rounding_precision = user_rnd_prec;
2656 
2657         a = floatx80_div(fp0, fp1, status);
2658 
2659         float_raise(float_flag_inexact, status);
2660 
2661         return a;
2662     }
2663 }
2664 
2665 /*
2666  * Hyperbolic sine
2667  */
2668 
floatx80_sinh(floatx80 a,float_status * status)2669 floatx80 floatx80_sinh(floatx80 a, float_status *status)
2670 {
2671     bool aSign;
2672     int32_t aExp;
2673     uint64_t aSig;
2674 
2675     FloatRoundMode user_rnd_mode;
2676     FloatX80RoundPrec user_rnd_prec;
2677 
2678     int32_t compact;
2679     floatx80 fp0, fp1, fp2;
2680     float32 fact;
2681 
2682     aSig = extractFloatx80Frac(a);
2683     aExp = extractFloatx80Exp(a);
2684     aSign = extractFloatx80Sign(a);
2685 
2686     if (aExp == 0x7FFF) {
2687         if ((uint64_t) (aSig << 1)) {
2688             return propagateFloatx80NaNOneArg(a, status);
2689         }
2690         return packFloatx80(aSign, floatx80_infinity.high,
2691                             floatx80_infinity.low);
2692     }
2693 
2694     if (aExp == 0 && aSig == 0) {
2695         return packFloatx80(aSign, 0, 0);
2696     }
2697 
2698     user_rnd_mode = status->float_rounding_mode;
2699     user_rnd_prec = status->floatx80_rounding_precision;
2700     status->float_rounding_mode = float_round_nearest_even;
2701     status->floatx80_rounding_precision = floatx80_precision_x;
2702 
2703     compact = floatx80_make_compact(aExp, aSig);
2704 
2705     if (compact > 0x400CB167) {
2706         /* SINHBIG */
2707         if (compact > 0x400CB2B3) {
2708             status->float_rounding_mode = user_rnd_mode;
2709             status->floatx80_rounding_precision = user_rnd_prec;
2710 
2711             return roundAndPackFloatx80(status->floatx80_rounding_precision,
2712                                         aSign, 0x8000, aSig, 0, status);
2713         } else {
2714             fp0 = floatx80_abs(a); /* Y = |X| */
2715             fp0 = floatx80_sub(fp0, float64_to_floatx80(
2716                                make_float64(0x40C62D38D3D64634), status),
2717                                status); /* (|X|-16381LOG2_LEAD) */
2718             fp0 = floatx80_sub(fp0, float64_to_floatx80(
2719                                make_float64(0x3D6F90AEB1E75CC7), status),
2720                                status); /* |X| - 16381 LOG2, ACCURATE */
2721             fp0 = floatx80_etox(fp0, status);
2722             fp2 = packFloatx80(aSign, 0x7FFB, one_sig);
2723 
2724             status->float_rounding_mode = user_rnd_mode;
2725             status->floatx80_rounding_precision = user_rnd_prec;
2726 
2727             a = floatx80_mul(fp0, fp2, status);
2728 
2729             float_raise(float_flag_inexact, status);
2730 
2731             return a;
2732         }
2733     } else { /* |X| < 16380 LOG2 */
2734         fp0 = floatx80_abs(a); /* Y = |X| */
2735         fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */
2736         fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
2737                            status), status); /* 1+Z */
2738         fp2 = fp0;
2739         fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */
2740         fp0 = floatx80_add(fp0, fp2, status);
2741 
2742         fact = packFloat32(aSign, 0x7E, 0);
2743 
2744         status->float_rounding_mode = user_rnd_mode;
2745         status->floatx80_rounding_precision = user_rnd_prec;
2746 
2747         a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status);
2748 
2749         float_raise(float_flag_inexact, status);
2750 
2751         return a;
2752     }
2753 }
2754 
2755 /*
2756  * Hyperbolic cosine
2757  */
2758 
floatx80_cosh(floatx80 a,float_status * status)2759 floatx80 floatx80_cosh(floatx80 a, float_status *status)
2760 {
2761     int32_t aExp;
2762     uint64_t aSig;
2763 
2764     FloatRoundMode user_rnd_mode;
2765     FloatX80RoundPrec user_rnd_prec;
2766 
2767     int32_t compact;
2768     floatx80 fp0, fp1;
2769 
2770     aSig = extractFloatx80Frac(a);
2771     aExp = extractFloatx80Exp(a);
2772 
2773     if (aExp == 0x7FFF) {
2774         if ((uint64_t) (aSig << 1)) {
2775             return propagateFloatx80NaNOneArg(a, status);
2776         }
2777         return packFloatx80(0, floatx80_infinity.high,
2778                             floatx80_infinity.low);
2779     }
2780 
2781     if (aExp == 0 && aSig == 0) {
2782         return packFloatx80(0, one_exp, one_sig);
2783     }
2784 
2785     user_rnd_mode = status->float_rounding_mode;
2786     user_rnd_prec = status->floatx80_rounding_precision;
2787     status->float_rounding_mode = float_round_nearest_even;
2788     status->floatx80_rounding_precision = floatx80_precision_x;
2789 
2790     compact = floatx80_make_compact(aExp, aSig);
2791 
2792     if (compact > 0x400CB167) {
2793         if (compact > 0x400CB2B3) {
2794             status->float_rounding_mode = user_rnd_mode;
2795             status->floatx80_rounding_precision = user_rnd_prec;
2796             return roundAndPackFloatx80(status->floatx80_rounding_precision, 0,
2797                                         0x8000, one_sig, 0, status);
2798         } else {
2799             fp0 = packFloatx80(0, aExp, aSig);
2800             fp0 = floatx80_sub(fp0, float64_to_floatx80(
2801                                make_float64(0x40C62D38D3D64634), status),
2802                                status);
2803             fp0 = floatx80_sub(fp0, float64_to_floatx80(
2804                                make_float64(0x3D6F90AEB1E75CC7), status),
2805                                status);
2806             fp0 = floatx80_etox(fp0, status);
2807             fp1 = packFloatx80(0, 0x7FFB, one_sig);
2808 
2809             status->float_rounding_mode = user_rnd_mode;
2810             status->floatx80_rounding_precision = user_rnd_prec;
2811 
2812             a = floatx80_mul(fp0, fp1, status);
2813 
2814             float_raise(float_flag_inexact, status);
2815 
2816             return a;
2817         }
2818     }
2819 
2820     fp0 = packFloatx80(0, aExp, aSig); /* |X| */
2821     fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */
2822     fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000),
2823                        status), status); /* (1/2)*EXP(|X|) */
2824     fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */
2825     fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */
2826 
2827     status->float_rounding_mode = user_rnd_mode;
2828     status->floatx80_rounding_precision = user_rnd_prec;
2829 
2830     a = floatx80_add(fp0, fp1, status);
2831 
2832     float_raise(float_flag_inexact, status);
2833 
2834     return a;
2835 }
2836