xref: /openbmc/linux/arch/x86/math-emu/poly_sin.c (revision d0e22329)
1 // SPDX-License-Identifier: GPL-2.0
2 /*---------------------------------------------------------------------------+
3  |  poly_sin.c                                                               |
4  |                                                                           |
5  |  Computation of an approximation of the sin function and the cosine       |
6  |  function by a polynomial.                                                |
7  |                                                                           |
8  | Copyright (C) 1992,1993,1994,1997,1999                                    |
9  |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
10  |                  E-mail   billm@melbpc.org.au                             |
11  |                                                                           |
12  |                                                                           |
13  +---------------------------------------------------------------------------*/
14 
15 #include "exception.h"
16 #include "reg_constant.h"
17 #include "fpu_emu.h"
18 #include "fpu_system.h"
19 #include "control_w.h"
20 #include "poly.h"
21 
22 #define	N_COEFF_P	4
23 #define	N_COEFF_N	4
24 
25 static const unsigned long long pos_terms_l[N_COEFF_P] = {
26 	0xaaaaaaaaaaaaaaabLL,
27 	0x00d00d00d00cf906LL,
28 	0x000006b99159a8bbLL,
29 	0x000000000d7392e6LL
30 };
31 
32 static const unsigned long long neg_terms_l[N_COEFF_N] = {
33 	0x2222222222222167LL,
34 	0x0002e3bc74aab624LL,
35 	0x0000000b09229062LL,
36 	0x00000000000c7973LL
37 };
38 
39 #define	N_COEFF_PH	4
40 #define	N_COEFF_NH	4
41 static const unsigned long long pos_terms_h[N_COEFF_PH] = {
42 	0x0000000000000000LL,
43 	0x05b05b05b05b0406LL,
44 	0x000049f93edd91a9LL,
45 	0x00000000c9c9ed62LL
46 };
47 
48 static const unsigned long long neg_terms_h[N_COEFF_NH] = {
49 	0xaaaaaaaaaaaaaa98LL,
50 	0x001a01a01a019064LL,
51 	0x0000008f76c68a77LL,
52 	0x0000000000d58f5eLL
53 };
54 
55 /*--- poly_sine() -----------------------------------------------------------+
56  |                                                                           |
57  +---------------------------------------------------------------------------*/
58 void poly_sine(FPU_REG *st0_ptr)
59 {
60 	int exponent, echange;
61 	Xsig accumulator, argSqrd, argTo4;
62 	unsigned long fix_up, adj;
63 	unsigned long long fixed_arg;
64 	FPU_REG result;
65 
66 	exponent = exponent(st0_ptr);
67 
68 	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
69 
70 	/* Split into two ranges, for arguments below and above 1.0 */
71 	/* The boundary between upper and lower is approx 0.88309101259 */
72 	if ((exponent < -1)
73 	    || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
74 		/* The argument is <= 0.88309101259 */
75 
76 		argSqrd.msw = st0_ptr->sigh;
77 		argSqrd.midw = st0_ptr->sigl;
78 		argSqrd.lsw = 0;
79 		mul64_Xsig(&argSqrd, &significand(st0_ptr));
80 		shr_Xsig(&argSqrd, 2 * (-1 - exponent));
81 		argTo4.msw = argSqrd.msw;
82 		argTo4.midw = argSqrd.midw;
83 		argTo4.lsw = argSqrd.lsw;
84 		mul_Xsig_Xsig(&argTo4, &argTo4);
85 
86 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
87 				N_COEFF_N - 1);
88 		mul_Xsig_Xsig(&accumulator, &argSqrd);
89 		negate_Xsig(&accumulator);
90 
91 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
92 				N_COEFF_P - 1);
93 
94 		shr_Xsig(&accumulator, 2);	/* Divide by four */
95 		accumulator.msw |= 0x80000000;	/* Add 1.0 */
96 
97 		mul64_Xsig(&accumulator, &significand(st0_ptr));
98 		mul64_Xsig(&accumulator, &significand(st0_ptr));
99 		mul64_Xsig(&accumulator, &significand(st0_ptr));
100 
101 		/* Divide by four, FPU_REG compatible, etc */
102 		exponent = 3 * exponent;
103 
104 		/* The minimum exponent difference is 3 */
105 		shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
106 
107 		negate_Xsig(&accumulator);
108 		XSIG_LL(accumulator) += significand(st0_ptr);
109 
110 		echange = round_Xsig(&accumulator);
111 
112 		setexponentpos(&result, exponent(st0_ptr) + echange);
113 	} else {
114 		/* The argument is > 0.88309101259 */
115 		/* We use sin(st(0)) = cos(pi/2-st(0)) */
116 
117 		fixed_arg = significand(st0_ptr);
118 
119 		if (exponent == 0) {
120 			/* The argument is >= 1.0 */
121 
122 			/* Put the binary point at the left. */
123 			fixed_arg <<= 1;
124 		}
125 		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
126 		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
127 		/* There is a special case which arises due to rounding, to fix here. */
128 		if (fixed_arg == 0xffffffffffffffffLL)
129 			fixed_arg = 0;
130 
131 		XSIG_LL(argSqrd) = fixed_arg;
132 		argSqrd.lsw = 0;
133 		mul64_Xsig(&argSqrd, &fixed_arg);
134 
135 		XSIG_LL(argTo4) = XSIG_LL(argSqrd);
136 		argTo4.lsw = argSqrd.lsw;
137 		mul_Xsig_Xsig(&argTo4, &argTo4);
138 
139 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
140 				N_COEFF_NH - 1);
141 		mul_Xsig_Xsig(&accumulator, &argSqrd);
142 		negate_Xsig(&accumulator);
143 
144 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
145 				N_COEFF_PH - 1);
146 		negate_Xsig(&accumulator);
147 
148 		mul64_Xsig(&accumulator, &fixed_arg);
149 		mul64_Xsig(&accumulator, &fixed_arg);
150 
151 		shr_Xsig(&accumulator, 3);
152 		negate_Xsig(&accumulator);
153 
154 		add_Xsig_Xsig(&accumulator, &argSqrd);
155 
156 		shr_Xsig(&accumulator, 1);
157 
158 		accumulator.lsw |= 1;	/* A zero accumulator here would cause problems */
159 		negate_Xsig(&accumulator);
160 
161 		/* The basic computation is complete. Now fix the answer to
162 		   compensate for the error due to the approximation used for
163 		   pi/2
164 		 */
165 
166 		/* This has an exponent of -65 */
167 		fix_up = 0x898cc517;
168 		/* The fix-up needs to be improved for larger args */
169 		if (argSqrd.msw & 0xffc00000) {
170 			/* Get about 32 bit precision in these: */
171 			fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
172 		}
173 		fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
174 
175 		adj = accumulator.lsw;	/* temp save */
176 		accumulator.lsw -= fix_up;
177 		if (accumulator.lsw > adj)
178 			XSIG_LL(accumulator)--;
179 
180 		echange = round_Xsig(&accumulator);
181 
182 		setexponentpos(&result, echange - 1);
183 	}
184 
185 	significand(&result) = XSIG_LL(accumulator);
186 	setsign(&result, getsign(st0_ptr));
187 	FPU_copy_to_reg0(&result, TAG_Valid);
188 
189 #ifdef PARANOID
190 	if ((exponent(&result) >= 0)
191 	    && (significand(&result) > 0x8000000000000000LL)) {
192 		EXCEPTION(EX_INTERNAL | 0x150);
193 	}
194 #endif /* PARANOID */
195 
196 }
197 
198 /*--- poly_cos() ------------------------------------------------------------+
199  |                                                                           |
200  +---------------------------------------------------------------------------*/
201 void poly_cos(FPU_REG *st0_ptr)
202 {
203 	FPU_REG result;
204 	long int exponent, exp2, echange;
205 	Xsig accumulator, argSqrd, fix_up, argTo4;
206 	unsigned long long fixed_arg;
207 
208 #ifdef PARANOID
209 	if ((exponent(st0_ptr) > 0)
210 	    || ((exponent(st0_ptr) == 0)
211 		&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
212 		EXCEPTION(EX_Invalid);
213 		FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
214 		return;
215 	}
216 #endif /* PARANOID */
217 
218 	exponent = exponent(st0_ptr);
219 
220 	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
221 
222 	if ((exponent < -1)
223 	    || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
224 		/* arg is < 0.687705 */
225 
226 		argSqrd.msw = st0_ptr->sigh;
227 		argSqrd.midw = st0_ptr->sigl;
228 		argSqrd.lsw = 0;
229 		mul64_Xsig(&argSqrd, &significand(st0_ptr));
230 
231 		if (exponent < -1) {
232 			/* shift the argument right by the required places */
233 			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
234 		}
235 
236 		argTo4.msw = argSqrd.msw;
237 		argTo4.midw = argSqrd.midw;
238 		argTo4.lsw = argSqrd.lsw;
239 		mul_Xsig_Xsig(&argTo4, &argTo4);
240 
241 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
242 				N_COEFF_NH - 1);
243 		mul_Xsig_Xsig(&accumulator, &argSqrd);
244 		negate_Xsig(&accumulator);
245 
246 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
247 				N_COEFF_PH - 1);
248 		negate_Xsig(&accumulator);
249 
250 		mul64_Xsig(&accumulator, &significand(st0_ptr));
251 		mul64_Xsig(&accumulator, &significand(st0_ptr));
252 		shr_Xsig(&accumulator, -2 * (1 + exponent));
253 
254 		shr_Xsig(&accumulator, 3);
255 		negate_Xsig(&accumulator);
256 
257 		add_Xsig_Xsig(&accumulator, &argSqrd);
258 
259 		shr_Xsig(&accumulator, 1);
260 
261 		/* It doesn't matter if accumulator is all zero here, the
262 		   following code will work ok */
263 		negate_Xsig(&accumulator);
264 
265 		if (accumulator.lsw & 0x80000000)
266 			XSIG_LL(accumulator)++;
267 		if (accumulator.msw == 0) {
268 			/* The result is 1.0 */
269 			FPU_copy_to_reg0(&CONST_1, TAG_Valid);
270 			return;
271 		} else {
272 			significand(&result) = XSIG_LL(accumulator);
273 
274 			/* will be a valid positive nr with expon = -1 */
275 			setexponentpos(&result, -1);
276 		}
277 	} else {
278 		fixed_arg = significand(st0_ptr);
279 
280 		if (exponent == 0) {
281 			/* The argument is >= 1.0 */
282 
283 			/* Put the binary point at the left. */
284 			fixed_arg <<= 1;
285 		}
286 		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
287 		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
288 		/* There is a special case which arises due to rounding, to fix here. */
289 		if (fixed_arg == 0xffffffffffffffffLL)
290 			fixed_arg = 0;
291 
292 		exponent = -1;
293 		exp2 = -1;
294 
295 		/* A shift is needed here only for a narrow range of arguments,
296 		   i.e. for fixed_arg approx 2^-32, but we pick up more... */
297 		if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
298 			fixed_arg <<= 16;
299 			exponent -= 16;
300 			exp2 -= 16;
301 		}
302 
303 		XSIG_LL(argSqrd) = fixed_arg;
304 		argSqrd.lsw = 0;
305 		mul64_Xsig(&argSqrd, &fixed_arg);
306 
307 		if (exponent < -1) {
308 			/* shift the argument right by the required places */
309 			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
310 		}
311 
312 		argTo4.msw = argSqrd.msw;
313 		argTo4.midw = argSqrd.midw;
314 		argTo4.lsw = argSqrd.lsw;
315 		mul_Xsig_Xsig(&argTo4, &argTo4);
316 
317 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
318 				N_COEFF_N - 1);
319 		mul_Xsig_Xsig(&accumulator, &argSqrd);
320 		negate_Xsig(&accumulator);
321 
322 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
323 				N_COEFF_P - 1);
324 
325 		shr_Xsig(&accumulator, 2);	/* Divide by four */
326 		accumulator.msw |= 0x80000000;	/* Add 1.0 */
327 
328 		mul64_Xsig(&accumulator, &fixed_arg);
329 		mul64_Xsig(&accumulator, &fixed_arg);
330 		mul64_Xsig(&accumulator, &fixed_arg);
331 
332 		/* Divide by four, FPU_REG compatible, etc */
333 		exponent = 3 * exponent;
334 
335 		/* The minimum exponent difference is 3 */
336 		shr_Xsig(&accumulator, exp2 - exponent);
337 
338 		negate_Xsig(&accumulator);
339 		XSIG_LL(accumulator) += fixed_arg;
340 
341 		/* The basic computation is complete. Now fix the answer to
342 		   compensate for the error due to the approximation used for
343 		   pi/2
344 		 */
345 
346 		/* This has an exponent of -65 */
347 		XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
348 		fix_up.lsw = 0;
349 
350 		/* The fix-up needs to be improved for larger args */
351 		if (argSqrd.msw & 0xffc00000) {
352 			/* Get about 32 bit precision in these: */
353 			fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
354 			fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
355 		}
356 
357 		exp2 += norm_Xsig(&accumulator);
358 		shr_Xsig(&accumulator, 1);	/* Prevent overflow */
359 		exp2++;
360 		shr_Xsig(&fix_up, 65 + exp2);
361 
362 		add_Xsig_Xsig(&accumulator, &fix_up);
363 
364 		echange = round_Xsig(&accumulator);
365 
366 		setexponentpos(&result, exp2 + echange);
367 		significand(&result) = XSIG_LL(accumulator);
368 	}
369 
370 	FPU_copy_to_reg0(&result, TAG_Valid);
371 
372 #ifdef PARANOID
373 	if ((exponent(&result) >= 0)
374 	    && (significand(&result) > 0x8000000000000000LL)) {
375 		EXCEPTION(EX_INTERNAL | 0x151);
376 	}
377 #endif /* PARANOID */
378 
379 }
380