xref: /openbmc/linux/arch/m68k/fpsp040/stan.S (revision e5451c8f8330e03ad3cfa16048b4daf961af434f)
11da177e4SLinus Torvalds|
21da177e4SLinus Torvalds|	stan.sa 3.3 7/29/91
31da177e4SLinus Torvalds|
41da177e4SLinus Torvalds|	The entry point stan computes the tangent of
51da177e4SLinus Torvalds|	an input argument;
61da177e4SLinus Torvalds|	stand does the same except for denormalized input.
71da177e4SLinus Torvalds|
81da177e4SLinus Torvalds|	Input: Double-extended number X in location pointed to
91da177e4SLinus Torvalds|		by address register a0.
101da177e4SLinus Torvalds|
111da177e4SLinus Torvalds|	Output: The value tan(X) returned in floating-point register Fp0.
121da177e4SLinus Torvalds|
131da177e4SLinus Torvalds|	Accuracy and Monotonicity: The returned result is within 3 ulp in
141da177e4SLinus Torvalds|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
151da177e4SLinus Torvalds|		result is subsequently rounded to double precision. The
161da177e4SLinus Torvalds|		result is provably monotonic in double precision.
171da177e4SLinus Torvalds|
181da177e4SLinus Torvalds|	Speed: The program sTAN takes approximately 170 cycles for
191da177e4SLinus Torvalds|		input argument X such that |X| < 15Pi, which is the usual
201da177e4SLinus Torvalds|		situation.
211da177e4SLinus Torvalds|
221da177e4SLinus Torvalds|	Algorithm:
231da177e4SLinus Torvalds|
241da177e4SLinus Torvalds|	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
251da177e4SLinus Torvalds|
261da177e4SLinus Torvalds|	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
271da177e4SLinus Torvalds|		k = N mod 2, so in particular, k = 0 or 1.
281da177e4SLinus Torvalds|
291da177e4SLinus Torvalds|	3. If k is odd, go to 5.
301da177e4SLinus Torvalds|
311da177e4SLinus Torvalds|	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
321da177e4SLinus Torvalds|		rational function U/V where
331da177e4SLinus Torvalds|		U = r + r*s*(P1 + s*(P2 + s*P3)), and
341da177e4SLinus Torvalds|		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
351da177e4SLinus Torvalds|		Exit.
361da177e4SLinus Torvalds|
371da177e4SLinus Torvalds|	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
381da177e4SLinus Torvalds|		rational function U/V where
391da177e4SLinus Torvalds|		U = r + r*s*(P1 + s*(P2 + s*P3)), and
401da177e4SLinus Torvalds|		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
411da177e4SLinus Torvalds|		-Cot(r) = -V/U. Exit.
421da177e4SLinus Torvalds|
431da177e4SLinus Torvalds|	6. If |X| > 1, go to 8.
441da177e4SLinus Torvalds|
451da177e4SLinus Torvalds|	7. (|X|<2**(-40)) Tan(X) = X. Exit.
461da177e4SLinus Torvalds|
471da177e4SLinus Torvalds|	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
481da177e4SLinus Torvalds|
491da177e4SLinus Torvalds
501da177e4SLinus Torvalds|		Copyright (C) Motorola, Inc. 1990
511da177e4SLinus Torvalds|			All Rights Reserved
521da177e4SLinus Torvalds|
53*e00d82d0SMatt Waddel|       For details on the license for this file, please see the
54*e00d82d0SMatt Waddel|       file, README, in this same directory.
551da177e4SLinus Torvalds
561da177e4SLinus Torvalds|STAN	idnt	2,1 | Motorola 040 Floating Point Software Package
571da177e4SLinus Torvalds
581da177e4SLinus Torvalds	|section	8
591da177e4SLinus Torvalds
601da177e4SLinus Torvalds#include "fpsp.h"
611da177e4SLinus Torvalds
621da177e4SLinus TorvaldsBOUNDS1:	.long 0x3FD78000,0x4004BC7E
631da177e4SLinus TorvaldsTWOBYPI:	.long 0x3FE45F30,0x6DC9C883
641da177e4SLinus Torvalds
651da177e4SLinus TorvaldsTANQ4:	.long 0x3EA0B759,0xF50F8688
661da177e4SLinus TorvaldsTANP3:	.long 0xBEF2BAA5,0xA8924F04
671da177e4SLinus Torvalds
681da177e4SLinus TorvaldsTANQ3:	.long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
691da177e4SLinus Torvalds
701da177e4SLinus TorvaldsTANP2:	.long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
711da177e4SLinus Torvalds
721da177e4SLinus TorvaldsTANQ2:	.long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
731da177e4SLinus Torvalds
741da177e4SLinus TorvaldsTANP1:	.long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
751da177e4SLinus Torvalds
761da177e4SLinus TorvaldsTANQ1:	.long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
771da177e4SLinus Torvalds
781da177e4SLinus TorvaldsINVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
791da177e4SLinus Torvalds
801da177e4SLinus TorvaldsTWOPI1:	.long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
811da177e4SLinus TorvaldsTWOPI2:	.long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
821da177e4SLinus Torvalds
831da177e4SLinus Torvalds|--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
841da177e4SLinus Torvalds|--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
851da177e4SLinus Torvalds|--MOST 69 BITS LONG.
861da177e4SLinus Torvalds	.global	PITBL
871da177e4SLinus TorvaldsPITBL:
881da177e4SLinus Torvalds  .long  0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
891da177e4SLinus Torvalds  .long  0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
901da177e4SLinus Torvalds  .long  0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
911da177e4SLinus Torvalds  .long  0xC0040000,0xB6365E22,0xEE46F000,0x21480000
921da177e4SLinus Torvalds  .long  0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
931da177e4SLinus Torvalds  .long  0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
941da177e4SLinus Torvalds  .long  0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
951da177e4SLinus Torvalds  .long  0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
961da177e4SLinus Torvalds  .long  0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
971da177e4SLinus Torvalds  .long  0xC0040000,0x90836524,0x88034B96,0x20B00000
981da177e4SLinus Torvalds  .long  0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
991da177e4SLinus Torvalds  .long  0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
1001da177e4SLinus Torvalds  .long  0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
1011da177e4SLinus Torvalds  .long  0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
1021da177e4SLinus Torvalds  .long  0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
1031da177e4SLinus Torvalds  .long  0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
1041da177e4SLinus Torvalds  .long  0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
1051da177e4SLinus Torvalds  .long  0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
1061da177e4SLinus Torvalds  .long  0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
1071da177e4SLinus Torvalds  .long  0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
1081da177e4SLinus Torvalds  .long  0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
1091da177e4SLinus Torvalds  .long  0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
1101da177e4SLinus Torvalds  .long  0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
1111da177e4SLinus Torvalds  .long  0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
1121da177e4SLinus Torvalds  .long  0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
1131da177e4SLinus Torvalds  .long  0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
1141da177e4SLinus Torvalds  .long  0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
1151da177e4SLinus Torvalds  .long  0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
1161da177e4SLinus Torvalds  .long  0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
1171da177e4SLinus Torvalds  .long  0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
1181da177e4SLinus Torvalds  .long  0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
1191da177e4SLinus Torvalds  .long  0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
1201da177e4SLinus Torvalds  .long  0x00000000,0x00000000,0x00000000,0x00000000
1211da177e4SLinus Torvalds  .long  0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
1221da177e4SLinus Torvalds  .long  0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
1231da177e4SLinus Torvalds  .long  0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
1241da177e4SLinus Torvalds  .long  0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
1251da177e4SLinus Torvalds  .long  0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
1261da177e4SLinus Torvalds  .long  0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
1271da177e4SLinus Torvalds  .long  0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
1281da177e4SLinus Torvalds  .long  0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
1291da177e4SLinus Torvalds  .long  0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
1301da177e4SLinus Torvalds  .long  0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
1311da177e4SLinus Torvalds  .long  0x40030000,0x8A3AE64F,0x76F80584,0x21080000
1321da177e4SLinus Torvalds  .long  0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
1331da177e4SLinus Torvalds  .long  0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
1341da177e4SLinus Torvalds  .long  0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
1351da177e4SLinus Torvalds  .long  0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
1361da177e4SLinus Torvalds  .long  0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
1371da177e4SLinus Torvalds  .long  0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
1381da177e4SLinus Torvalds  .long  0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
1391da177e4SLinus Torvalds  .long  0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
1401da177e4SLinus Torvalds  .long  0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
1411da177e4SLinus Torvalds  .long  0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
1421da177e4SLinus Torvalds  .long  0x40040000,0x8A3AE64F,0x76F80584,0x21880000
1431da177e4SLinus Torvalds  .long  0x40040000,0x90836524,0x88034B96,0xA0B00000
1441da177e4SLinus Torvalds  .long  0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
1451da177e4SLinus Torvalds  .long  0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
1461da177e4SLinus Torvalds  .long  0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
1471da177e4SLinus Torvalds  .long  0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
1481da177e4SLinus Torvalds  .long  0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
1491da177e4SLinus Torvalds  .long  0x40040000,0xB6365E22,0xEE46F000,0xA1480000
1501da177e4SLinus Torvalds  .long  0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
1511da177e4SLinus Torvalds  .long  0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
1521da177e4SLinus Torvalds  .long  0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
1531da177e4SLinus Torvalds
1541da177e4SLinus Torvalds	.set	INARG,FP_SCR4
1551da177e4SLinus Torvalds
1561da177e4SLinus Torvalds	.set	TWOTO63,L_SCR1
1571da177e4SLinus Torvalds	.set	ENDFLAG,L_SCR2
1581da177e4SLinus Torvalds	.set	N,L_SCR3
1591da177e4SLinus Torvalds
1601da177e4SLinus Torvalds	| xref	t_frcinx
1611da177e4SLinus Torvalds	|xref	t_extdnrm
1621da177e4SLinus Torvalds
1631da177e4SLinus Torvalds	.global	stand
1641da177e4SLinus Torvaldsstand:
1651da177e4SLinus Torvalds|--TAN(X) = X FOR DENORMALIZED X
1661da177e4SLinus Torvalds
1671da177e4SLinus Torvalds	bra		t_extdnrm
1681da177e4SLinus Torvalds
1691da177e4SLinus Torvalds	.global	stan
1701da177e4SLinus Torvaldsstan:
1711da177e4SLinus Torvalds	fmovex		(%a0),%fp0	| ...LOAD INPUT
1721da177e4SLinus Torvalds
1731da177e4SLinus Torvalds	movel		(%a0),%d0
1741da177e4SLinus Torvalds	movew		4(%a0),%d0
1751da177e4SLinus Torvalds	andil		#0x7FFFFFFF,%d0
1761da177e4SLinus Torvalds
1771da177e4SLinus Torvalds	cmpil		#0x3FD78000,%d0		| ...|X| >= 2**(-40)?
1781da177e4SLinus Torvalds	bges		TANOK1
1791da177e4SLinus Torvalds	bra		TANSM
1801da177e4SLinus TorvaldsTANOK1:
1811da177e4SLinus Torvalds	cmpil		#0x4004BC7E,%d0		| ...|X| < 15 PI?
1821da177e4SLinus Torvalds	blts		TANMAIN
1831da177e4SLinus Torvalds	bra		REDUCEX
1841da177e4SLinus Torvalds
1851da177e4SLinus Torvalds
1861da177e4SLinus TorvaldsTANMAIN:
1871da177e4SLinus Torvalds|--THIS IS THE USUAL CASE, |X| <= 15 PI.
1881da177e4SLinus Torvalds|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
1891da177e4SLinus Torvalds	fmovex		%fp0,%fp1
1901da177e4SLinus Torvalds	fmuld		TWOBYPI,%fp1	| ...X*2/PI
1911da177e4SLinus Torvalds
1921da177e4SLinus Torvalds|--HIDE THE NEXT TWO INSTRUCTIONS
1931da177e4SLinus Torvalds	leal		PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
1941da177e4SLinus Torvalds
1951da177e4SLinus Torvalds|--FP1 IS NOW READY
1961da177e4SLinus Torvalds	fmovel		%fp1,%d0		| ...CONVERT TO INTEGER
1971da177e4SLinus Torvalds
1981da177e4SLinus Torvalds	asll		#4,%d0
1991da177e4SLinus Torvalds	addal		%d0,%a1		| ...ADDRESS N*PIBY2 IN Y1, Y2
2001da177e4SLinus Torvalds
2011da177e4SLinus Torvalds	fsubx		(%a1)+,%fp0	| ...X-Y1
2021da177e4SLinus Torvalds|--HIDE THE NEXT ONE
2031da177e4SLinus Torvalds
2041da177e4SLinus Torvalds	fsubs		(%a1),%fp0	| ...FP0 IS R = (X-Y1)-Y2
2051da177e4SLinus Torvalds
2061da177e4SLinus Torvalds	rorl		#5,%d0
2071da177e4SLinus Torvalds	andil		#0x80000000,%d0	| ...D0 WAS ODD IFF D0 < 0
2081da177e4SLinus Torvalds
2091da177e4SLinus TorvaldsTANCONT:
2101da177e4SLinus Torvalds
2111da177e4SLinus Torvalds	cmpil		#0,%d0
2121da177e4SLinus Torvalds	blt		NODD
2131da177e4SLinus Torvalds
2141da177e4SLinus Torvalds	fmovex		%fp0,%fp1
2151da177e4SLinus Torvalds	fmulx		%fp1,%fp1		| ...S = R*R
2161da177e4SLinus Torvalds
2171da177e4SLinus Torvalds	fmoved		TANQ4,%fp3
2181da177e4SLinus Torvalds	fmoved		TANP3,%fp2
2191da177e4SLinus Torvalds
2201da177e4SLinus Torvalds	fmulx		%fp1,%fp3		| ...SQ4
2211da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...SP3
2221da177e4SLinus Torvalds
2231da177e4SLinus Torvalds	faddd		TANQ3,%fp3	| ...Q3+SQ4
2241da177e4SLinus Torvalds	faddx		TANP2,%fp2	| ...P2+SP3
2251da177e4SLinus Torvalds
2261da177e4SLinus Torvalds	fmulx		%fp1,%fp3		| ...S(Q3+SQ4)
2271da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...S(P2+SP3)
2281da177e4SLinus Torvalds
2291da177e4SLinus Torvalds	faddx		TANQ2,%fp3	| ...Q2+S(Q3+SQ4)
2301da177e4SLinus Torvalds	faddx		TANP1,%fp2	| ...P1+S(P2+SP3)
2311da177e4SLinus Torvalds
2321da177e4SLinus Torvalds	fmulx		%fp1,%fp3		| ...S(Q2+S(Q3+SQ4))
2331da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...S(P1+S(P2+SP3))
2341da177e4SLinus Torvalds
2351da177e4SLinus Torvalds	faddx		TANQ1,%fp3	| ...Q1+S(Q2+S(Q3+SQ4))
2361da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...RS(P1+S(P2+SP3))
2371da177e4SLinus Torvalds
2381da177e4SLinus Torvalds	fmulx		%fp3,%fp1		| ...S(Q1+S(Q2+S(Q3+SQ4)))
2391da177e4SLinus Torvalds
2401da177e4SLinus Torvalds
2411da177e4SLinus Torvalds	faddx		%fp2,%fp0		| ...R+RS(P1+S(P2+SP3))
2421da177e4SLinus Torvalds
2431da177e4SLinus Torvalds
2441da177e4SLinus Torvalds	fadds		#0x3F800000,%fp1	| ...1+S(Q1+...)
2451da177e4SLinus Torvalds
2461da177e4SLinus Torvalds	fmovel		%d1,%fpcr		|restore users exceptions
2471da177e4SLinus Torvalds	fdivx		%fp1,%fp0		|last inst - possible exception set
2481da177e4SLinus Torvalds
2491da177e4SLinus Torvalds	bra		t_frcinx
2501da177e4SLinus Torvalds
2511da177e4SLinus TorvaldsNODD:
2521da177e4SLinus Torvalds	fmovex		%fp0,%fp1
2531da177e4SLinus Torvalds	fmulx		%fp0,%fp0		| ...S = R*R
2541da177e4SLinus Torvalds
2551da177e4SLinus Torvalds	fmoved		TANQ4,%fp3
2561da177e4SLinus Torvalds	fmoved		TANP3,%fp2
2571da177e4SLinus Torvalds
2581da177e4SLinus Torvalds	fmulx		%fp0,%fp3		| ...SQ4
2591da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...SP3
2601da177e4SLinus Torvalds
2611da177e4SLinus Torvalds	faddd		TANQ3,%fp3	| ...Q3+SQ4
2621da177e4SLinus Torvalds	faddx		TANP2,%fp2	| ...P2+SP3
2631da177e4SLinus Torvalds
2641da177e4SLinus Torvalds	fmulx		%fp0,%fp3		| ...S(Q3+SQ4)
2651da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...S(P2+SP3)
2661da177e4SLinus Torvalds
2671da177e4SLinus Torvalds	faddx		TANQ2,%fp3	| ...Q2+S(Q3+SQ4)
2681da177e4SLinus Torvalds	faddx		TANP1,%fp2	| ...P1+S(P2+SP3)
2691da177e4SLinus Torvalds
2701da177e4SLinus Torvalds	fmulx		%fp0,%fp3		| ...S(Q2+S(Q3+SQ4))
2711da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...S(P1+S(P2+SP3))
2721da177e4SLinus Torvalds
2731da177e4SLinus Torvalds	faddx		TANQ1,%fp3	| ...Q1+S(Q2+S(Q3+SQ4))
2741da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...RS(P1+S(P2+SP3))
2751da177e4SLinus Torvalds
2761da177e4SLinus Torvalds	fmulx		%fp3,%fp0		| ...S(Q1+S(Q2+S(Q3+SQ4)))
2771da177e4SLinus Torvalds
2781da177e4SLinus Torvalds
2791da177e4SLinus Torvalds	faddx		%fp2,%fp1		| ...R+RS(P1+S(P2+SP3))
2801da177e4SLinus Torvalds	fadds		#0x3F800000,%fp0	| ...1+S(Q1+...)
2811da177e4SLinus Torvalds
2821da177e4SLinus Torvalds
2831da177e4SLinus Torvalds	fmovex		%fp1,-(%sp)
2841da177e4SLinus Torvalds	eoril		#0x80000000,(%sp)
2851da177e4SLinus Torvalds
2861da177e4SLinus Torvalds	fmovel		%d1,%fpcr		|restore users exceptions
2871da177e4SLinus Torvalds	fdivx		(%sp)+,%fp0	|last inst - possible exception set
2881da177e4SLinus Torvalds
2891da177e4SLinus Torvalds	bra		t_frcinx
2901da177e4SLinus Torvalds
2911da177e4SLinus TorvaldsTANBORS:
2921da177e4SLinus Torvalds|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
2931da177e4SLinus Torvalds|--IF |X| < 2**(-40), RETURN X OR 1.
2941da177e4SLinus Torvalds	cmpil		#0x3FFF8000,%d0
2951da177e4SLinus Torvalds	bgts		REDUCEX
2961da177e4SLinus Torvalds
2971da177e4SLinus TorvaldsTANSM:
2981da177e4SLinus Torvalds
2991da177e4SLinus Torvalds	fmovex		%fp0,-(%sp)
3001da177e4SLinus Torvalds	fmovel		%d1,%fpcr		 |restore users exceptions
3011da177e4SLinus Torvalds	fmovex		(%sp)+,%fp0	|last inst - possible exception set
3021da177e4SLinus Torvalds
3031da177e4SLinus Torvalds	bra		t_frcinx
3041da177e4SLinus Torvalds
3051da177e4SLinus Torvalds
3061da177e4SLinus TorvaldsREDUCEX:
3071da177e4SLinus Torvalds|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
3081da177e4SLinus Torvalds|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
3091da177e4SLinus Torvalds|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
3101da177e4SLinus Torvalds
3111da177e4SLinus Torvalds	fmovemx	%fp2-%fp5,-(%a7)	| ...save FP2 through FP5
3121da177e4SLinus Torvalds	movel		%d2,-(%a7)
3131da177e4SLinus Torvalds        fmoves         #0x00000000,%fp1
3141da177e4SLinus Torvalds
3151da177e4SLinus Torvalds|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
3161da177e4SLinus Torvalds|--there is a danger of unwanted overflow in first LOOP iteration.  In this
3171da177e4SLinus Torvalds|--case, reduce argument by one remainder step to make subsequent reduction
3181da177e4SLinus Torvalds|--safe.
3191da177e4SLinus Torvalds	cmpil	#0x7ffeffff,%d0		|is argument dangerously large?
3201da177e4SLinus Torvalds	bnes	LOOP
3211da177e4SLinus Torvalds	movel	#0x7ffe0000,FP_SCR2(%a6)	|yes
3221da177e4SLinus Torvalds|					;create 2**16383*PI/2
3231da177e4SLinus Torvalds	movel	#0xc90fdaa2,FP_SCR2+4(%a6)
3241da177e4SLinus Torvalds	clrl	FP_SCR2+8(%a6)
3251da177e4SLinus Torvalds	ftstx	%fp0			|test sign of argument
3261da177e4SLinus Torvalds	movel	#0x7fdc0000,FP_SCR3(%a6)	|create low half of 2**16383*
3271da177e4SLinus Torvalds|					;PI/2 at FP_SCR3
3281da177e4SLinus Torvalds	movel	#0x85a308d3,FP_SCR3+4(%a6)
3291da177e4SLinus Torvalds	clrl   FP_SCR3+8(%a6)
3301da177e4SLinus Torvalds	fblt	red_neg
3311da177e4SLinus Torvalds	orw	#0x8000,FP_SCR2(%a6)	|positive arg
3321da177e4SLinus Torvalds	orw	#0x8000,FP_SCR3(%a6)
3331da177e4SLinus Torvaldsred_neg:
3341da177e4SLinus Torvalds	faddx  FP_SCR2(%a6),%fp0		|high part of reduction is exact
3351da177e4SLinus Torvalds	fmovex  %fp0,%fp1		|save high result in fp1
3361da177e4SLinus Torvalds	faddx  FP_SCR3(%a6),%fp0		|low part of reduction
3371da177e4SLinus Torvalds	fsubx  %fp0,%fp1			|determine low component of result
3381da177e4SLinus Torvalds	faddx  FP_SCR3(%a6),%fp1		|fp0/fp1 are reduced argument.
3391da177e4SLinus Torvalds
3401da177e4SLinus Torvalds|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
3411da177e4SLinus Torvalds|--integer quotient will be stored in N
3421da177e4SLinus Torvalds|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
3431da177e4SLinus Torvalds
3441da177e4SLinus TorvaldsLOOP:
3451da177e4SLinus Torvalds	fmovex		%fp0,INARG(%a6)	| ...+-2**K * F, 1 <= F < 2
3461da177e4SLinus Torvalds	movew		INARG(%a6),%d0
3471da177e4SLinus Torvalds        movel          %d0,%a1		| ...save a copy of D0
3481da177e4SLinus Torvalds	andil		#0x00007FFF,%d0
3491da177e4SLinus Torvalds	subil		#0x00003FFF,%d0	| ...D0 IS K
3501da177e4SLinus Torvalds	cmpil		#28,%d0
3511da177e4SLinus Torvalds	bles		LASTLOOP
3521da177e4SLinus TorvaldsCONTLOOP:
3531da177e4SLinus Torvalds	subil		#27,%d0	 | ...D0 IS L := K-27
3541da177e4SLinus Torvalds	movel		#0,ENDFLAG(%a6)
3551da177e4SLinus Torvalds	bras		WORK
3561da177e4SLinus TorvaldsLASTLOOP:
3571da177e4SLinus Torvalds	clrl		%d0		| ...D0 IS L := 0
3581da177e4SLinus Torvalds	movel		#1,ENDFLAG(%a6)
3591da177e4SLinus Torvalds
3601da177e4SLinus TorvaldsWORK:
3611da177e4SLinus Torvalds|--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
3621da177e4SLinus Torvalds|--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
3631da177e4SLinus Torvalds
3641da177e4SLinus Torvalds|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
3651da177e4SLinus Torvalds|--2**L * (PIby2_1), 2**L * (PIby2_2)
3661da177e4SLinus Torvalds
3671da177e4SLinus Torvalds	movel		#0x00003FFE,%d2	| ...BIASED EXPO OF 2/PI
3681da177e4SLinus Torvalds	subl		%d0,%d2		| ...BIASED EXPO OF 2**(-L)*(2/PI)
3691da177e4SLinus Torvalds
3701da177e4SLinus Torvalds	movel		#0xA2F9836E,FP_SCR1+4(%a6)
3711da177e4SLinus Torvalds	movel		#0x4E44152A,FP_SCR1+8(%a6)
3721da177e4SLinus Torvalds	movew		%d2,FP_SCR1(%a6)	| ...FP_SCR1 is 2**(-L)*(2/PI)
3731da177e4SLinus Torvalds
3741da177e4SLinus Torvalds	fmovex		%fp0,%fp2
3751da177e4SLinus Torvalds	fmulx		FP_SCR1(%a6),%fp2
3761da177e4SLinus Torvalds|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
3771da177e4SLinus Torvalds|--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
3781da177e4SLinus Torvalds|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
3791da177e4SLinus Torvalds|--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
3801da177e4SLinus Torvalds|--US THE DESIRED VALUE IN FLOATING POINT.
3811da177e4SLinus Torvalds
3821da177e4SLinus Torvalds|--HIDE SIX CYCLES OF INSTRUCTION
3831da177e4SLinus Torvalds        movel		%a1,%d2
3841da177e4SLinus Torvalds        swap		%d2
3851da177e4SLinus Torvalds	andil		#0x80000000,%d2
3861da177e4SLinus Torvalds	oril		#0x5F000000,%d2	| ...D2 IS SIGN(INARG)*2**63 IN SGL
3871da177e4SLinus Torvalds	movel		%d2,TWOTO63(%a6)
3881da177e4SLinus Torvalds
3891da177e4SLinus Torvalds	movel		%d0,%d2
3901da177e4SLinus Torvalds	addil		#0x00003FFF,%d2	| ...BIASED EXPO OF 2**L * (PI/2)
3911da177e4SLinus Torvalds
3921da177e4SLinus Torvalds|--FP2 IS READY
3931da177e4SLinus Torvalds	fadds		TWOTO63(%a6),%fp2	| ...THE FRACTIONAL PART OF FP1 IS ROUNDED
3941da177e4SLinus Torvalds
3951da177e4SLinus Torvalds|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
3961da177e4SLinus Torvalds        movew		%d2,FP_SCR2(%a6)
3971da177e4SLinus Torvalds	clrw           FP_SCR2+2(%a6)
3981da177e4SLinus Torvalds	movel		#0xC90FDAA2,FP_SCR2+4(%a6)
3991da177e4SLinus Torvalds	clrl		FP_SCR2+8(%a6)		| ...FP_SCR2 is  2**(L) * Piby2_1
4001da177e4SLinus Torvalds
4011da177e4SLinus Torvalds|--FP2 IS READY
4021da177e4SLinus Torvalds	fsubs		TWOTO63(%a6),%fp2		| ...FP2 is N
4031da177e4SLinus Torvalds
4041da177e4SLinus Torvalds	addil		#0x00003FDD,%d0
4051da177e4SLinus Torvalds        movew		%d0,FP_SCR3(%a6)
4061da177e4SLinus Torvalds	clrw           FP_SCR3+2(%a6)
4071da177e4SLinus Torvalds	movel		#0x85A308D3,FP_SCR3+4(%a6)
4081da177e4SLinus Torvalds	clrl		FP_SCR3+8(%a6)		| ...FP_SCR3 is 2**(L) * Piby2_2
4091da177e4SLinus Torvalds
4101da177e4SLinus Torvalds	movel		ENDFLAG(%a6),%d0
4111da177e4SLinus Torvalds
4121da177e4SLinus Torvalds|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
4131da177e4SLinus Torvalds|--P2 = 2**(L) * Piby2_2
4141da177e4SLinus Torvalds	fmovex		%fp2,%fp4
4151da177e4SLinus Torvalds	fmulx		FP_SCR2(%a6),%fp4		| ...W = N*P1
4161da177e4SLinus Torvalds	fmovex		%fp2,%fp5
4171da177e4SLinus Torvalds	fmulx		FP_SCR3(%a6),%fp5		| ...w = N*P2
4181da177e4SLinus Torvalds	fmovex		%fp4,%fp3
4191da177e4SLinus Torvalds|--we want P+p = W+w  but  |p| <= half ulp of P
4201da177e4SLinus Torvalds|--Then, we need to compute  A := R-P   and  a := r-p
4211da177e4SLinus Torvalds	faddx		%fp5,%fp3			| ...FP3 is P
4221da177e4SLinus Torvalds	fsubx		%fp3,%fp4			| ...W-P
4231da177e4SLinus Torvalds
4241da177e4SLinus Torvalds	fsubx		%fp3,%fp0			| ...FP0 is A := R - P
4251da177e4SLinus Torvalds        faddx		%fp5,%fp4			| ...FP4 is p = (W-P)+w
4261da177e4SLinus Torvalds
4271da177e4SLinus Torvalds	fmovex		%fp0,%fp3			| ...FP3 A
4281da177e4SLinus Torvalds	fsubx		%fp4,%fp1			| ...FP1 is a := r - p
4291da177e4SLinus Torvalds
4301da177e4SLinus Torvalds|--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
4311da177e4SLinus Torvalds|--|r| <= half ulp of R.
4321da177e4SLinus Torvalds	faddx		%fp1,%fp0			| ...FP0 is R := A+a
4331da177e4SLinus Torvalds|--No need to calculate r if this is the last loop
4341da177e4SLinus Torvalds	cmpil		#0,%d0
4351da177e4SLinus Torvalds	bgt		RESTORE
4361da177e4SLinus Torvalds
4371da177e4SLinus Torvalds|--Need to calculate r
4381da177e4SLinus Torvalds	fsubx		%fp0,%fp3			| ...A-R
4391da177e4SLinus Torvalds	faddx		%fp3,%fp1			| ...FP1 is r := (A-R)+a
4401da177e4SLinus Torvalds	bra		LOOP
4411da177e4SLinus Torvalds
4421da177e4SLinus TorvaldsRESTORE:
4431da177e4SLinus Torvalds        fmovel		%fp2,N(%a6)
4441da177e4SLinus Torvalds	movel		(%a7)+,%d2
4451da177e4SLinus Torvalds	fmovemx	(%a7)+,%fp2-%fp5
4461da177e4SLinus Torvalds
4471da177e4SLinus Torvalds
4481da177e4SLinus Torvalds	movel		N(%a6),%d0
4491da177e4SLinus Torvalds        rorl		#1,%d0
4501da177e4SLinus Torvalds
4511da177e4SLinus Torvalds
4521da177e4SLinus Torvalds	bra		TANCONT
4531da177e4SLinus Torvalds
4541da177e4SLinus Torvalds	|end
455