xref: /openbmc/linux/arch/m68k/fpsp040/setox.S (revision e5451c8f8330e03ad3cfa16048b4daf961af434f)
11da177e4SLinus Torvalds|
21da177e4SLinus Torvalds|	setox.sa 3.1 12/10/90
31da177e4SLinus Torvalds|
41da177e4SLinus Torvalds|	The entry point setox computes the exponential of a value.
51da177e4SLinus Torvalds|	setoxd does the same except the input value is a denormalized
61da177e4SLinus Torvalds|	number.	setoxm1 computes exp(X)-1, and setoxm1d computes
71da177e4SLinus Torvalds|	exp(X)-1 for denormalized X.
81da177e4SLinus Torvalds|
91da177e4SLinus Torvalds|	INPUT
101da177e4SLinus Torvalds|	-----
111da177e4SLinus Torvalds|	Double-extended value in memory location pointed to by address
121da177e4SLinus Torvalds|	register a0.
131da177e4SLinus Torvalds|
141da177e4SLinus Torvalds|	OUTPUT
151da177e4SLinus Torvalds|	------
161da177e4SLinus Torvalds|	exp(X) or exp(X)-1 returned in floating-point register fp0.
171da177e4SLinus Torvalds|
181da177e4SLinus Torvalds|	ACCURACY and MONOTONICITY
191da177e4SLinus Torvalds|	-------------------------
201da177e4SLinus Torvalds|	The returned result is within 0.85 ulps in 64 significant bit, i.e.
211da177e4SLinus Torvalds|	within 0.5001 ulp to 53 bits if the result is subsequently rounded
221da177e4SLinus Torvalds|	to double precision. The result is provably monotonic in double
231da177e4SLinus Torvalds|	precision.
241da177e4SLinus Torvalds|
251da177e4SLinus Torvalds|	SPEED
261da177e4SLinus Torvalds|	-----
271da177e4SLinus Torvalds|	Two timings are measured, both in the copy-back mode. The
281da177e4SLinus Torvalds|	first one is measured when the function is invoked the first time
291da177e4SLinus Torvalds|	(so the instructions and data are not in cache), and the
301da177e4SLinus Torvalds|	second one is measured when the function is reinvoked at the same
311da177e4SLinus Torvalds|	input argument.
321da177e4SLinus Torvalds|
331da177e4SLinus Torvalds|	The program setox takes approximately 210/190 cycles for input
341da177e4SLinus Torvalds|	argument X whose magnitude is less than 16380 log2, which
351da177e4SLinus Torvalds|	is the usual situation.	For the less common arguments,
361da177e4SLinus Torvalds|	depending on their values, the program may run faster or slower --
371da177e4SLinus Torvalds|	but no worse than 10% slower even in the extreme cases.
381da177e4SLinus Torvalds|
391da177e4SLinus Torvalds|	The program setoxm1 takes approximately ??? / ??? cycles for input
401da177e4SLinus Torvalds|	argument X, 0.25 <= |X| < 70log2. For |X| < 0.25, it takes
411da177e4SLinus Torvalds|	approximately ??? / ??? cycles. For the less common arguments,
421da177e4SLinus Torvalds|	depending on their values, the program may run faster or slower --
431da177e4SLinus Torvalds|	but no worse than 10% slower even in the extreme cases.
441da177e4SLinus Torvalds|
451da177e4SLinus Torvalds|	ALGORITHM and IMPLEMENTATION NOTES
461da177e4SLinus Torvalds|	----------------------------------
471da177e4SLinus Torvalds|
481da177e4SLinus Torvalds|	setoxd
491da177e4SLinus Torvalds|	------
501da177e4SLinus Torvalds|	Step 1.	Set ans := 1.0
511da177e4SLinus Torvalds|
521da177e4SLinus Torvalds|	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.
531da177e4SLinus Torvalds|	Notes:	This will always generate one exception -- inexact.
541da177e4SLinus Torvalds|
551da177e4SLinus Torvalds|
561da177e4SLinus Torvalds|	setox
571da177e4SLinus Torvalds|	-----
581da177e4SLinus Torvalds|
591da177e4SLinus Torvalds|	Step 1.	Filter out extreme cases of input argument.
601da177e4SLinus Torvalds|		1.1	If |X| >= 2^(-65), go to Step 1.3.
611da177e4SLinus Torvalds|		1.2	Go to Step 7.
621da177e4SLinus Torvalds|		1.3	If |X| < 16380 log(2), go to Step 2.
631da177e4SLinus Torvalds|		1.4	Go to Step 8.
641da177e4SLinus Torvalds|	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.
651da177e4SLinus Torvalds|		 To avoid the use of floating-point comparisons, a
661da177e4SLinus Torvalds|		 compact representation of |X| is used. This format is a
671da177e4SLinus Torvalds|		 32-bit integer, the upper (more significant) 16 bits are
681da177e4SLinus Torvalds|		 the sign and biased exponent field of |X|; the lower 16
691da177e4SLinus Torvalds|		 bits are the 16 most significant fraction (including the
701da177e4SLinus Torvalds|		 explicit bit) bits of |X|. Consequently, the comparisons
711da177e4SLinus Torvalds|		 in Steps 1.1 and 1.3 can be performed by integer comparison.
721da177e4SLinus Torvalds|		 Note also that the constant 16380 log(2) used in Step 1.3
731da177e4SLinus Torvalds|		 is also in the compact form. Thus taking the branch
741da177e4SLinus Torvalds|		 to Step 2 guarantees |X| < 16380 log(2). There is no harm
751da177e4SLinus Torvalds|		 to have a small number of cases where |X| is less than,
761da177e4SLinus Torvalds|		 but close to, 16380 log(2) and the branch to Step 9 is
771da177e4SLinus Torvalds|		 taken.
781da177e4SLinus Torvalds|
791da177e4SLinus Torvalds|	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).
801da177e4SLinus Torvalds|		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 was taken)
811da177e4SLinus Torvalds|		2.2	N := round-to-nearest-integer( X * 64/log2 ).
821da177e4SLinus Torvalds|		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., or 63.
831da177e4SLinus Torvalds|		2.4	Calculate	M = (N - J)/64; so N = 64M + J.
841da177e4SLinus Torvalds|		2.5	Calculate the address of the stored value of 2^(J/64).
851da177e4SLinus Torvalds|		2.6	Create the value Scale = 2^M.
861da177e4SLinus Torvalds|	Notes:	The calculation in 2.2 is really performed by
871da177e4SLinus Torvalds|
881da177e4SLinus Torvalds|			Z := X * constant
891da177e4SLinus Torvalds|			N := round-to-nearest-integer(Z)
901da177e4SLinus Torvalds|
911da177e4SLinus Torvalds|		 where
921da177e4SLinus Torvalds|
931da177e4SLinus Torvalds|			constant := single-precision( 64/log 2 ).
941da177e4SLinus Torvalds|
951da177e4SLinus Torvalds|		 Using a single-precision constant avoids memory access.
961da177e4SLinus Torvalds|		 Another effect of using a single-precision "constant" is
971da177e4SLinus Torvalds|		 that the calculated value Z is
981da177e4SLinus Torvalds|
991da177e4SLinus Torvalds|			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).
1001da177e4SLinus Torvalds|
1011da177e4SLinus Torvalds|		 This error has to be considered later in Steps 3 and 4.
1021da177e4SLinus Torvalds|
1031da177e4SLinus Torvalds|	Step 3.	Calculate X - N*log2/64.
1041da177e4SLinus Torvalds|		3.1	R := X + N*L1, where L1 := single-precision(-log2/64).
1051da177e4SLinus Torvalds|		3.2	R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
1061da177e4SLinus Torvalds|	Notes:	a) The way L1 and L2 are chosen ensures L1+L2 approximate
1071da177e4SLinus Torvalds|		 the value	-log2/64	to 88 bits of accuracy.
1081da177e4SLinus Torvalds|		 b) N*L1 is exact because N is no longer than 22 bits and
1091da177e4SLinus Torvalds|		 L1 is no longer than 24 bits.
1101da177e4SLinus Torvalds|		 c) The calculation X+N*L1 is also exact due to cancellation.
1111da177e4SLinus Torvalds|		 Thus, R is practically X+N(L1+L2) to full 64 bits.
1121da177e4SLinus Torvalds|		 d) It is important to estimate how large can |R| be after
1131da177e4SLinus Torvalds|		 Step 3.2.
1141da177e4SLinus Torvalds|
1151da177e4SLinus Torvalds|			N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)
1161da177e4SLinus Torvalds|			X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5
1171da177e4SLinus Torvalds|			X*64/log2 - N	=	f - eps*X 64/log2
1181da177e4SLinus Torvalds|			X - N*log2/64	=	f*log2/64 - eps*X
1191da177e4SLinus Torvalds|
1201da177e4SLinus Torvalds|
1211da177e4SLinus Torvalds|		 Now |X| <= 16446 log2, thus
1221da177e4SLinus Torvalds|
1231da177e4SLinus Torvalds|			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64
1241da177e4SLinus Torvalds|					<= 0.57 log2/64.
1251da177e4SLinus Torvalds|		 This bound will be used in Step 4.
1261da177e4SLinus Torvalds|
1271da177e4SLinus Torvalds|	Step 4.	Approximate exp(R)-1 by a polynomial
1281da177e4SLinus Torvalds|			p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
1291da177e4SLinus Torvalds|	Notes:	a) In order to reduce memory access, the coefficients are
1301da177e4SLinus Torvalds|		 made as "short" as possible: A1 (which is 1/2), A4 and A5
1311da177e4SLinus Torvalds|		 are single precision; A2 and A3 are double precision.
1321da177e4SLinus Torvalds|		 b) Even with the restrictions above,
1331da177e4SLinus Torvalds|			|p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.
1341da177e4SLinus Torvalds|		 Note that 0.0062 is slightly bigger than 0.57 log2/64.
1351da177e4SLinus Torvalds|		 c) To fully utilize the pipeline, p is separated into
1361da177e4SLinus Torvalds|		 two independent pieces of roughly equal complexities
1371da177e4SLinus Torvalds|			p = [ R + R*S*(A2 + S*A4) ]	+
1381da177e4SLinus Torvalds|				[ S*(A1 + S*(A3 + S*A5)) ]
1391da177e4SLinus Torvalds|		 where S = R*R.
1401da177e4SLinus Torvalds|
1411da177e4SLinus Torvalds|	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by
1421da177e4SLinus Torvalds|				ans := T + ( T*p + t)
1431da177e4SLinus Torvalds|		 where T and t are the stored values for 2^(J/64).
1441da177e4SLinus Torvalds|	Notes:	2^(J/64) is stored as T and t where T+t approximates
1451da177e4SLinus Torvalds|		 2^(J/64) to roughly 85 bits; T is in extended precision
1461da177e4SLinus Torvalds|		 and t is in single precision. Note also that T is rounded
1471da177e4SLinus Torvalds|		 to 62 bits so that the last two bits of T are zero. The
1481da177e4SLinus Torvalds|		 reason for such a special form is that T-1, T-2, and T-8
1491da177e4SLinus Torvalds|		 will all be exact --- a property that will give much
1501da177e4SLinus Torvalds|		 more accurate computation of the function EXPM1.
1511da177e4SLinus Torvalds|
1521da177e4SLinus Torvalds|	Step 6.	Reconstruction of exp(X)
1531da177e4SLinus Torvalds|			exp(X) = 2^M * 2^(J/64) * exp(R).
1541da177e4SLinus Torvalds|		6.1	If AdjFlag = 0, go to 6.3
1551da177e4SLinus Torvalds|		6.2	ans := ans * AdjScale
1561da177e4SLinus Torvalds|		6.3	Restore the user FPCR
1571da177e4SLinus Torvalds|		6.4	Return ans := ans * Scale. Exit.
1581da177e4SLinus Torvalds|	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,
1591da177e4SLinus Torvalds|		 |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will
1601da177e4SLinus Torvalds|		 neither overflow nor underflow. If AdjFlag = 1, that
1611da177e4SLinus Torvalds|		 means that
1621da177e4SLinus Torvalds|			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.
1631da177e4SLinus Torvalds|		 Hence, exp(X) may overflow or underflow or neither.
1641da177e4SLinus Torvalds|		 When that is the case, AdjScale = 2^(M1) where M1 is
1651da177e4SLinus Torvalds|		 approximately M. Thus 6.2 will never cause over/underflow.
1661da177e4SLinus Torvalds|		 Possible exception in 6.4 is overflow or underflow.
1671da177e4SLinus Torvalds|		 The inexact exception is not generated in 6.4. Although
1681da177e4SLinus Torvalds|		 one can argue that the inexact flag should always be
1691da177e4SLinus Torvalds|		 raised, to simulate that exception cost to much than the
1701da177e4SLinus Torvalds|		 flag is worth in practical uses.
1711da177e4SLinus Torvalds|
1721da177e4SLinus Torvalds|	Step 7.	Return 1 + X.
1731da177e4SLinus Torvalds|		7.1	ans := X
1741da177e4SLinus Torvalds|		7.2	Restore user FPCR.
1751da177e4SLinus Torvalds|		7.3	Return ans := 1 + ans. Exit
1761da177e4SLinus Torvalds|	Notes:	For non-zero X, the inexact exception will always be
1771da177e4SLinus Torvalds|		 raised by 7.3. That is the only exception raised by 7.3.
1781da177e4SLinus Torvalds|		 Note also that we use the FMOVEM instruction to move X
1791da177e4SLinus Torvalds|		 in Step 7.1 to avoid unnecessary trapping. (Although
1801da177e4SLinus Torvalds|		 the FMOVEM may not seem relevant since X is normalized,
1811da177e4SLinus Torvalds|		 the precaution will be useful in the library version of
1821da177e4SLinus Torvalds|		 this code where the separate entry for denormalized inputs
1831da177e4SLinus Torvalds|		 will be done away with.)
1841da177e4SLinus Torvalds|
1851da177e4SLinus Torvalds|	Step 8.	Handle exp(X) where |X| >= 16380log2.
1861da177e4SLinus Torvalds|		8.1	If |X| > 16480 log2, go to Step 9.
1871da177e4SLinus Torvalds|		(mimic 2.2 - 2.6)
1881da177e4SLinus Torvalds|		8.2	N := round-to-integer( X * 64/log2 )
1891da177e4SLinus Torvalds|		8.3	Calculate J = N mod 64, J = 0,1,...,63
1901da177e4SLinus Torvalds|		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1, AdjFlag := 1.
1911da177e4SLinus Torvalds|		8.5	Calculate the address of the stored value 2^(J/64).
1921da177e4SLinus Torvalds|		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.
1931da177e4SLinus Torvalds|		8.7	Go to Step 3.
1941da177e4SLinus Torvalds|	Notes:	Refer to notes for 2.2 - 2.6.
1951da177e4SLinus Torvalds|
1961da177e4SLinus Torvalds|	Step 9.	Handle exp(X), |X| > 16480 log2.
1971da177e4SLinus Torvalds|		9.1	If X < 0, go to 9.3
1981da177e4SLinus Torvalds|		9.2	ans := Huge, go to 9.4
1991da177e4SLinus Torvalds|		9.3	ans := Tiny.
2001da177e4SLinus Torvalds|		9.4	Restore user FPCR.
2011da177e4SLinus Torvalds|		9.5	Return ans := ans * ans. Exit.
2021da177e4SLinus Torvalds|	Notes:	Exp(X) will surely overflow or underflow, depending on
2031da177e4SLinus Torvalds|		 X's sign. "Huge" and "Tiny" are respectively large/tiny
2041da177e4SLinus Torvalds|		 extended-precision numbers whose square over/underflow
2051da177e4SLinus Torvalds|		 with an inexact result. Thus, 9.5 always raises the
2061da177e4SLinus Torvalds|		 inexact together with either overflow or underflow.
2071da177e4SLinus Torvalds|
2081da177e4SLinus Torvalds|
2091da177e4SLinus Torvalds|	setoxm1d
2101da177e4SLinus Torvalds|	--------
2111da177e4SLinus Torvalds|
2121da177e4SLinus Torvalds|	Step 1.	Set ans := 0
2131da177e4SLinus Torvalds|
2141da177e4SLinus Torvalds|	Step 2.	Return	ans := X + ans. Exit.
2151da177e4SLinus Torvalds|	Notes:	This will return X with the appropriate rounding
2161da177e4SLinus Torvalds|		 precision prescribed by the user FPCR.
2171da177e4SLinus Torvalds|
2181da177e4SLinus Torvalds|	setoxm1
2191da177e4SLinus Torvalds|	-------
2201da177e4SLinus Torvalds|
2211da177e4SLinus Torvalds|	Step 1.	Check |X|
2221da177e4SLinus Torvalds|		1.1	If |X| >= 1/4, go to Step 1.3.
2231da177e4SLinus Torvalds|		1.2	Go to Step 7.
2241da177e4SLinus Torvalds|		1.3	If |X| < 70 log(2), go to Step 2.
2251da177e4SLinus Torvalds|		1.4	Go to Step 10.
2261da177e4SLinus Torvalds|	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.
2271da177e4SLinus Torvalds|		 However, it is conceivable |X| can be small very often
2281da177e4SLinus Torvalds|		 because EXPM1 is intended to evaluate exp(X)-1 accurately
2291da177e4SLinus Torvalds|		 when |X| is small. For further details on the comparisons,
2301da177e4SLinus Torvalds|		 see the notes on Step 1 of setox.
2311da177e4SLinus Torvalds|
2321da177e4SLinus Torvalds|	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).
2331da177e4SLinus Torvalds|		2.1	N := round-to-nearest-integer( X * 64/log2 ).
2341da177e4SLinus Torvalds|		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., or 63.
2351da177e4SLinus Torvalds|		2.3	Calculate	M = (N - J)/64; so N = 64M + J.
2361da177e4SLinus Torvalds|		2.4	Calculate the address of the stored value of 2^(J/64).
2371da177e4SLinus Torvalds|		2.5	Create the values Sc = 2^M and OnebySc := -2^(-M).
2381da177e4SLinus Torvalds|	Notes:	See the notes on Step 2 of setox.
2391da177e4SLinus Torvalds|
2401da177e4SLinus Torvalds|	Step 3.	Calculate X - N*log2/64.
2411da177e4SLinus Torvalds|		3.1	R := X + N*L1, where L1 := single-precision(-log2/64).
2421da177e4SLinus Torvalds|		3.2	R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
2431da177e4SLinus Torvalds|	Notes:	Applying the analysis of Step 3 of setox in this case
2441da177e4SLinus Torvalds|		 shows that |R| <= 0.0055 (note that |X| <= 70 log2 in
2451da177e4SLinus Torvalds|		 this case).
2461da177e4SLinus Torvalds|
2471da177e4SLinus Torvalds|	Step 4.	Approximate exp(R)-1 by a polynomial
2481da177e4SLinus Torvalds|			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))
2491da177e4SLinus Torvalds|	Notes:	a) In order to reduce memory access, the coefficients are
2501da177e4SLinus Torvalds|		 made as "short" as possible: A1 (which is 1/2), A5 and A6
2511da177e4SLinus Torvalds|		 are single precision; A2, A3 and A4 are double precision.
2521da177e4SLinus Torvalds|		 b) Even with the restriction above,
2531da177e4SLinus Torvalds|			|p - (exp(R)-1)| <	|R| * 2^(-72.7)
2541da177e4SLinus Torvalds|		 for all |R| <= 0.0055.
2551da177e4SLinus Torvalds|		 c) To fully utilize the pipeline, p is separated into
2561da177e4SLinus Torvalds|		 two independent pieces of roughly equal complexity
2571da177e4SLinus Torvalds|			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+
2581da177e4SLinus Torvalds|				[ R + S*(A1 + S*(A3 + S*A5)) ]
2591da177e4SLinus Torvalds|		 where S = R*R.
2601da177e4SLinus Torvalds|
2611da177e4SLinus Torvalds|	Step 5.	Compute 2^(J/64)*p by
2621da177e4SLinus Torvalds|				p := T*p
2631da177e4SLinus Torvalds|		 where T and t are the stored values for 2^(J/64).
2641da177e4SLinus Torvalds|	Notes:	2^(J/64) is stored as T and t where T+t approximates
2651da177e4SLinus Torvalds|		 2^(J/64) to roughly 85 bits; T is in extended precision
2661da177e4SLinus Torvalds|		 and t is in single precision. Note also that T is rounded
2671da177e4SLinus Torvalds|		 to 62 bits so that the last two bits of T are zero. The
2681da177e4SLinus Torvalds|		 reason for such a special form is that T-1, T-2, and T-8
2691da177e4SLinus Torvalds|		 will all be exact --- a property that will be exploited
2701da177e4SLinus Torvalds|		 in Step 6 below. The total relative error in p is no
2711da177e4SLinus Torvalds|		 bigger than 2^(-67.7) compared to the final result.
2721da177e4SLinus Torvalds|
2731da177e4SLinus Torvalds|	Step 6.	Reconstruction of exp(X)-1
2741da177e4SLinus Torvalds|			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).
2751da177e4SLinus Torvalds|		6.1	If M <= 63, go to Step 6.3.
2761da177e4SLinus Torvalds|		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6
2771da177e4SLinus Torvalds|		6.3	If M >= -3, go to 6.5.
2781da177e4SLinus Torvalds|		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6
2791da177e4SLinus Torvalds|		6.5	ans := (T + OnebySc) + (p + t).
2801da177e4SLinus Torvalds|		6.6	Restore user FPCR.
2811da177e4SLinus Torvalds|		6.7	Return ans := Sc * ans. Exit.
2821da177e4SLinus Torvalds|	Notes:	The various arrangements of the expressions give accurate
2831da177e4SLinus Torvalds|		 evaluations.
2841da177e4SLinus Torvalds|
2851da177e4SLinus Torvalds|	Step 7.	exp(X)-1 for |X| < 1/4.
2861da177e4SLinus Torvalds|		7.1	If |X| >= 2^(-65), go to Step 9.
2871da177e4SLinus Torvalds|		7.2	Go to Step 8.
2881da177e4SLinus Torvalds|
2891da177e4SLinus Torvalds|	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).
2901da177e4SLinus Torvalds|		8.1	If |X| < 2^(-16312), goto 8.3
2911da177e4SLinus Torvalds|		8.2	Restore FPCR; return ans := X - 2^(-16382). Exit.
2921da177e4SLinus Torvalds|		8.3	X := X * 2^(140).
2931da177e4SLinus Torvalds|		8.4	Restore FPCR; ans := ans - 2^(-16382).
2941da177e4SLinus Torvalds|		 Return ans := ans*2^(140). Exit
2951da177e4SLinus Torvalds|	Notes:	The idea is to return "X - tiny" under the user
2961da177e4SLinus Torvalds|		 precision and rounding modes. To avoid unnecessary
2971da177e4SLinus Torvalds|		 inefficiency, we stay away from denormalized numbers the
2981da177e4SLinus Torvalds|		 best we can. For |X| >= 2^(-16312), the straightforward
2991da177e4SLinus Torvalds|		 8.2 generates the inexact exception as the case warrants.
3001da177e4SLinus Torvalds|
3011da177e4SLinus Torvalds|	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial
3021da177e4SLinus Torvalds|			p = X + X*X*(B1 + X*(B2 + ... + X*B12))
3031da177e4SLinus Torvalds|	Notes:	a) In order to reduce memory access, the coefficients are
3041da177e4SLinus Torvalds|		 made as "short" as possible: B1 (which is 1/2), B9 to B12
3051da177e4SLinus Torvalds|		 are single precision; B3 to B8 are double precision; and
3061da177e4SLinus Torvalds|		 B2 is double extended.
3071da177e4SLinus Torvalds|		 b) Even with the restriction above,
3081da177e4SLinus Torvalds|			|p - (exp(X)-1)| < |X| 2^(-70.6)
3091da177e4SLinus Torvalds|		 for all |X| <= 0.251.
3101da177e4SLinus Torvalds|		 Note that 0.251 is slightly bigger than 1/4.
3111da177e4SLinus Torvalds|		 c) To fully preserve accuracy, the polynomial is computed
3121da177e4SLinus Torvalds|		 as	X + ( S*B1 +	Q ) where S = X*X and
3131da177e4SLinus Torvalds|			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))
3141da177e4SLinus Torvalds|		 d) To fully utilize the pipeline, Q is separated into
3151da177e4SLinus Torvalds|		 two independent pieces of roughly equal complexity
3161da177e4SLinus Torvalds|			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +
3171da177e4SLinus Torvalds|				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]
3181da177e4SLinus Torvalds|
3191da177e4SLinus Torvalds|	Step 10.	Calculate exp(X)-1 for |X| >= 70 log 2.
3201da177e4SLinus Torvalds|		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all practical
3211da177e4SLinus Torvalds|		 purposes. Therefore, go to Step 1 of setox.
3221da177e4SLinus Torvalds|		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical purposes.
3231da177e4SLinus Torvalds|		 ans := -1
3241da177e4SLinus Torvalds|		 Restore user FPCR
3251da177e4SLinus Torvalds|		 Return ans := ans + 2^(-126). Exit.
3261da177e4SLinus Torvalds|	Notes:	10.2 will always create an inexact and return -1 + tiny
3271da177e4SLinus Torvalds|		 in the user rounding precision and mode.
3281da177e4SLinus Torvalds|
3291da177e4SLinus Torvalds|
3301da177e4SLinus Torvalds
3311da177e4SLinus Torvalds|		Copyright (C) Motorola, Inc. 1990
3321da177e4SLinus Torvalds|			All Rights Reserved
3331da177e4SLinus Torvalds|
334*e00d82d0SMatt Waddel|       For details on the license for this file, please see the
335*e00d82d0SMatt Waddel|       file, README, in this same directory.
3361da177e4SLinus Torvalds
3371da177e4SLinus Torvalds|setox	idnt	2,1 | Motorola 040 Floating Point Software Package
3381da177e4SLinus Torvalds
3391da177e4SLinus Torvalds	|section	8
3401da177e4SLinus Torvalds
3411da177e4SLinus Torvalds#include "fpsp.h"
3421da177e4SLinus Torvalds
3431da177e4SLinus TorvaldsL2:	.long	0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
3441da177e4SLinus Torvalds
3451da177e4SLinus TorvaldsEXPA3:	.long	0x3FA55555,0x55554431
3461da177e4SLinus TorvaldsEXPA2:	.long	0x3FC55555,0x55554018
3471da177e4SLinus Torvalds
3481da177e4SLinus TorvaldsHUGE:	.long	0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
3491da177e4SLinus TorvaldsTINY:	.long	0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
3501da177e4SLinus Torvalds
3511da177e4SLinus TorvaldsEM1A4:	.long	0x3F811111,0x11174385
3521da177e4SLinus TorvaldsEM1A3:	.long	0x3FA55555,0x55554F5A
3531da177e4SLinus Torvalds
3541da177e4SLinus TorvaldsEM1A2:	.long	0x3FC55555,0x55555555,0x00000000,0x00000000
3551da177e4SLinus Torvalds
3561da177e4SLinus TorvaldsEM1B8:	.long	0x3EC71DE3,0xA5774682
3571da177e4SLinus TorvaldsEM1B7:	.long	0x3EFA01A0,0x19D7CB68
3581da177e4SLinus Torvalds
3591da177e4SLinus TorvaldsEM1B6:	.long	0x3F2A01A0,0x1A019DF3
3601da177e4SLinus TorvaldsEM1B5:	.long	0x3F56C16C,0x16C170E2
3611da177e4SLinus Torvalds
3621da177e4SLinus TorvaldsEM1B4:	.long	0x3F811111,0x11111111
3631da177e4SLinus TorvaldsEM1B3:	.long	0x3FA55555,0x55555555
3641da177e4SLinus Torvalds
3651da177e4SLinus TorvaldsEM1B2:	.long	0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
3661da177e4SLinus Torvalds	.long	0x00000000
3671da177e4SLinus Torvalds
3681da177e4SLinus TorvaldsTWO140:	.long	0x48B00000,0x00000000
3691da177e4SLinus TorvaldsTWON140:	.long	0x37300000,0x00000000
3701da177e4SLinus Torvalds
3711da177e4SLinus TorvaldsEXPTBL:
3721da177e4SLinus Torvalds	.long	0x3FFF0000,0x80000000,0x00000000,0x00000000
3731da177e4SLinus Torvalds	.long	0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
3741da177e4SLinus Torvalds	.long	0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
3751da177e4SLinus Torvalds	.long	0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
3761da177e4SLinus Torvalds	.long	0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
3771da177e4SLinus Torvalds	.long	0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
3781da177e4SLinus Torvalds	.long	0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
3791da177e4SLinus Torvalds	.long	0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
3801da177e4SLinus Torvalds	.long	0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
3811da177e4SLinus Torvalds	.long	0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
3821da177e4SLinus Torvalds	.long	0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
3831da177e4SLinus Torvalds	.long	0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
3841da177e4SLinus Torvalds	.long	0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
3851da177e4SLinus Torvalds	.long	0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
3861da177e4SLinus Torvalds	.long	0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
3871da177e4SLinus Torvalds	.long	0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
3881da177e4SLinus Torvalds	.long	0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
3891da177e4SLinus Torvalds	.long	0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
3901da177e4SLinus Torvalds	.long	0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
3911da177e4SLinus Torvalds	.long	0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
3921da177e4SLinus Torvalds	.long	0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
3931da177e4SLinus Torvalds	.long	0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
3941da177e4SLinus Torvalds	.long	0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
3951da177e4SLinus Torvalds	.long	0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
3961da177e4SLinus Torvalds	.long	0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
3971da177e4SLinus Torvalds	.long	0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
3981da177e4SLinus Torvalds	.long	0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
3991da177e4SLinus Torvalds	.long	0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
4001da177e4SLinus Torvalds	.long	0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
4011da177e4SLinus Torvalds	.long	0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
4021da177e4SLinus Torvalds	.long	0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
4031da177e4SLinus Torvalds	.long	0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
4041da177e4SLinus Torvalds	.long	0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
4051da177e4SLinus Torvalds	.long	0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
4061da177e4SLinus Torvalds	.long	0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
4071da177e4SLinus Torvalds	.long	0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
4081da177e4SLinus Torvalds	.long	0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
4091da177e4SLinus Torvalds	.long	0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
4101da177e4SLinus Torvalds	.long	0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
4111da177e4SLinus Torvalds	.long	0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
4121da177e4SLinus Torvalds	.long	0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
4131da177e4SLinus Torvalds	.long	0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
4141da177e4SLinus Torvalds	.long	0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
4151da177e4SLinus Torvalds	.long	0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
4161da177e4SLinus Torvalds	.long	0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
4171da177e4SLinus Torvalds	.long	0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
4181da177e4SLinus Torvalds	.long	0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
4191da177e4SLinus Torvalds	.long	0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
4201da177e4SLinus Torvalds	.long	0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
4211da177e4SLinus Torvalds	.long	0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
4221da177e4SLinus Torvalds	.long	0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
4231da177e4SLinus Torvalds	.long	0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
4241da177e4SLinus Torvalds	.long	0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
4251da177e4SLinus Torvalds	.long	0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
4261da177e4SLinus Torvalds	.long	0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
4271da177e4SLinus Torvalds	.long	0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
4281da177e4SLinus Torvalds	.long	0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
4291da177e4SLinus Torvalds	.long	0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
4301da177e4SLinus Torvalds	.long	0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
4311da177e4SLinus Torvalds	.long	0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
4321da177e4SLinus Torvalds	.long	0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
4331da177e4SLinus Torvalds	.long	0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
4341da177e4SLinus Torvalds	.long	0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
4351da177e4SLinus Torvalds	.long	0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
4361da177e4SLinus Torvalds
4371da177e4SLinus Torvalds	.set	ADJFLAG,L_SCR2
4381da177e4SLinus Torvalds	.set	SCALE,FP_SCR1
4391da177e4SLinus Torvalds	.set	ADJSCALE,FP_SCR2
4401da177e4SLinus Torvalds	.set	SC,FP_SCR3
4411da177e4SLinus Torvalds	.set	ONEBYSC,FP_SCR4
4421da177e4SLinus Torvalds
4431da177e4SLinus Torvalds	| xref	t_frcinx
4441da177e4SLinus Torvalds	|xref	t_extdnrm
4451da177e4SLinus Torvalds	|xref	t_unfl
4461da177e4SLinus Torvalds	|xref	t_ovfl
4471da177e4SLinus Torvalds
4481da177e4SLinus Torvalds	.global	setoxd
4491da177e4SLinus Torvaldssetoxd:
4501da177e4SLinus Torvalds|--entry point for EXP(X), X is denormalized
4511da177e4SLinus Torvalds	movel		(%a0),%d0
4521da177e4SLinus Torvalds	andil		#0x80000000,%d0
4531da177e4SLinus Torvalds	oril		#0x00800000,%d0		| ...sign(X)*2^(-126)
4541da177e4SLinus Torvalds	movel		%d0,-(%sp)
4551da177e4SLinus Torvalds	fmoves		#0x3F800000,%fp0
4561da177e4SLinus Torvalds	fmovel		%d1,%fpcr
4571da177e4SLinus Torvalds	fadds		(%sp)+,%fp0
4581da177e4SLinus Torvalds	bra		t_frcinx
4591da177e4SLinus Torvalds
4601da177e4SLinus Torvalds	.global	setox
4611da177e4SLinus Torvaldssetox:
4621da177e4SLinus Torvalds|--entry point for EXP(X), here X is finite, non-zero, and not NaN's
4631da177e4SLinus Torvalds
4641da177e4SLinus Torvalds|--Step 1.
4651da177e4SLinus Torvalds	movel		(%a0),%d0	 | ...load part of input X
4661da177e4SLinus Torvalds	andil		#0x7FFF0000,%d0	| ...biased expo. of X
4671da177e4SLinus Torvalds	cmpil		#0x3FBE0000,%d0	| ...2^(-65)
4681da177e4SLinus Torvalds	bges		EXPC1		| ...normal case
4691da177e4SLinus Torvalds	bra		EXPSM
4701da177e4SLinus Torvalds
4711da177e4SLinus TorvaldsEXPC1:
4721da177e4SLinus Torvalds|--The case |X| >= 2^(-65)
4731da177e4SLinus Torvalds	movew		4(%a0),%d0	| ...expo. and partial sig. of |X|
4741da177e4SLinus Torvalds	cmpil		#0x400CB167,%d0	| ...16380 log2 trunc. 16 bits
4751da177e4SLinus Torvalds	blts		EXPMAIN	 | ...normal case
4761da177e4SLinus Torvalds	bra		EXPBIG
4771da177e4SLinus Torvalds
4781da177e4SLinus TorvaldsEXPMAIN:
4791da177e4SLinus Torvalds|--Step 2.
4801da177e4SLinus Torvalds|--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
4811da177e4SLinus Torvalds	fmovex		(%a0),%fp0	| ...load input from (a0)
4821da177e4SLinus Torvalds
4831da177e4SLinus Torvalds	fmovex		%fp0,%fp1
4841da177e4SLinus Torvalds	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
4851da177e4SLinus Torvalds	fmovemx	%fp2-%fp2/%fp3,-(%a7)		| ...save fp2
4861da177e4SLinus Torvalds	movel		#0,ADJFLAG(%a6)
4871da177e4SLinus Torvalds	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
4881da177e4SLinus Torvalds	lea		EXPTBL,%a1
4891da177e4SLinus Torvalds	fmovel		%d0,%fp0		| ...convert to floating-format
4901da177e4SLinus Torvalds
4911da177e4SLinus Torvalds	movel		%d0,L_SCR1(%a6)	| ...save N temporarily
4921da177e4SLinus Torvalds	andil		#0x3F,%d0		| ...D0 is J = N mod 64
4931da177e4SLinus Torvalds	lsll		#4,%d0
4941da177e4SLinus Torvalds	addal		%d0,%a1		| ...address of 2^(J/64)
4951da177e4SLinus Torvalds	movel		L_SCR1(%a6),%d0
4961da177e4SLinus Torvalds	asrl		#6,%d0		| ...D0 is M
4971da177e4SLinus Torvalds	addiw		#0x3FFF,%d0	| ...biased expo. of 2^(M)
4981da177e4SLinus Torvalds	movew		L2,L_SCR1(%a6)	| ...prefetch L2, no need in CB
4991da177e4SLinus Torvalds
5001da177e4SLinus TorvaldsEXPCONT1:
5011da177e4SLinus Torvalds|--Step 3.
5021da177e4SLinus Torvalds|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
5031da177e4SLinus Torvalds|--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
5041da177e4SLinus Torvalds	fmovex		%fp0,%fp2
5051da177e4SLinus Torvalds	fmuls		#0xBC317218,%fp0	| ...N * L1, L1 = lead(-log2/64)
5061da177e4SLinus Torvalds	fmulx		L2,%fp2		| ...N * L2, L1+L2 = -log2/64
5071da177e4SLinus Torvalds	faddx		%fp1,%fp0		| ...X + N*L1
5081da177e4SLinus Torvalds	faddx		%fp2,%fp0		| ...fp0 is R, reduced arg.
5091da177e4SLinus Torvalds|	MOVE.W		#$3FA5,EXPA3	...load EXPA3 in cache
5101da177e4SLinus Torvalds
5111da177e4SLinus Torvalds|--Step 4.
5121da177e4SLinus Torvalds|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
5131da177e4SLinus Torvalds|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
5141da177e4SLinus Torvalds|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
5151da177e4SLinus Torvalds|--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
5161da177e4SLinus Torvalds
5171da177e4SLinus Torvalds	fmovex		%fp0,%fp1
5181da177e4SLinus Torvalds	fmulx		%fp1,%fp1		| ...fp1 IS S = R*R
5191da177e4SLinus Torvalds
5201da177e4SLinus Torvalds	fmoves		#0x3AB60B70,%fp2	| ...fp2 IS A5
5211da177e4SLinus Torvalds|	MOVE.W		#0,2(%a1)	...load 2^(J/64) in cache
5221da177e4SLinus Torvalds
5231da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...fp2 IS S*A5
5241da177e4SLinus Torvalds	fmovex		%fp1,%fp3
5251da177e4SLinus Torvalds	fmuls		#0x3C088895,%fp3	| ...fp3 IS S*A4
5261da177e4SLinus Torvalds
5271da177e4SLinus Torvalds	faddd		EXPA3,%fp2	| ...fp2 IS A3+S*A5
5281da177e4SLinus Torvalds	faddd		EXPA2,%fp3	| ...fp3 IS A2+S*A4
5291da177e4SLinus Torvalds
5301da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...fp2 IS S*(A3+S*A5)
5311da177e4SLinus Torvalds	movew		%d0,SCALE(%a6)	| ...SCALE is 2^(M) in extended
5321da177e4SLinus Torvalds	clrw		SCALE+2(%a6)
5331da177e4SLinus Torvalds	movel		#0x80000000,SCALE+4(%a6)
5341da177e4SLinus Torvalds	clrl		SCALE+8(%a6)
5351da177e4SLinus Torvalds
5361da177e4SLinus Torvalds	fmulx		%fp1,%fp3		| ...fp3 IS S*(A2+S*A4)
5371da177e4SLinus Torvalds
5381da177e4SLinus Torvalds	fadds		#0x3F000000,%fp2	| ...fp2 IS A1+S*(A3+S*A5)
5391da177e4SLinus Torvalds	fmulx		%fp0,%fp3		| ...fp3 IS R*S*(A2+S*A4)
5401da177e4SLinus Torvalds
5411da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...fp2 IS S*(A1+S*(A3+S*A5))
5421da177e4SLinus Torvalds	faddx		%fp3,%fp0		| ...fp0 IS R+R*S*(A2+S*A4),
5431da177e4SLinus Torvalds|					...fp3 released
5441da177e4SLinus Torvalds
5451da177e4SLinus Torvalds	fmovex		(%a1)+,%fp1	| ...fp1 is lead. pt. of 2^(J/64)
5461da177e4SLinus Torvalds	faddx		%fp2,%fp0		| ...fp0 is EXP(R) - 1
5471da177e4SLinus Torvalds|					...fp2 released
5481da177e4SLinus Torvalds
5491da177e4SLinus Torvalds|--Step 5
5501da177e4SLinus Torvalds|--final reconstruction process
5511da177e4SLinus Torvalds|--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
5521da177e4SLinus Torvalds
5531da177e4SLinus Torvalds	fmulx		%fp1,%fp0		| ...2^(J/64)*(Exp(R)-1)
5541da177e4SLinus Torvalds	fmovemx	(%a7)+,%fp2-%fp2/%fp3	| ...fp2 restored
5551da177e4SLinus Torvalds	fadds		(%a1),%fp0	| ...accurate 2^(J/64)
5561da177e4SLinus Torvalds
5571da177e4SLinus Torvalds	faddx		%fp1,%fp0		| ...2^(J/64) + 2^(J/64)*...
5581da177e4SLinus Torvalds	movel		ADJFLAG(%a6),%d0
5591da177e4SLinus Torvalds
5601da177e4SLinus Torvalds|--Step 6
5611da177e4SLinus Torvalds	tstl		%d0
5621da177e4SLinus Torvalds	beqs		NORMAL
5631da177e4SLinus TorvaldsADJUST:
5641da177e4SLinus Torvalds	fmulx		ADJSCALE(%a6),%fp0
5651da177e4SLinus TorvaldsNORMAL:
5661da177e4SLinus Torvalds	fmovel		%d1,%FPCR		| ...restore user FPCR
5671da177e4SLinus Torvalds	fmulx		SCALE(%a6),%fp0	| ...multiply 2^(M)
5681da177e4SLinus Torvalds	bra		t_frcinx
5691da177e4SLinus Torvalds
5701da177e4SLinus TorvaldsEXPSM:
5711da177e4SLinus Torvalds|--Step 7
5721da177e4SLinus Torvalds	fmovemx	(%a0),%fp0-%fp0	| ...in case X is denormalized
5731da177e4SLinus Torvalds	fmovel		%d1,%FPCR
5741da177e4SLinus Torvalds	fadds		#0x3F800000,%fp0	| ...1+X in user mode
5751da177e4SLinus Torvalds	bra		t_frcinx
5761da177e4SLinus Torvalds
5771da177e4SLinus TorvaldsEXPBIG:
5781da177e4SLinus Torvalds|--Step 8
5791da177e4SLinus Torvalds	cmpil		#0x400CB27C,%d0	| ...16480 log2
5801da177e4SLinus Torvalds	bgts		EXP2BIG
5811da177e4SLinus Torvalds|--Steps 8.2 -- 8.6
5821da177e4SLinus Torvalds	fmovex		(%a0),%fp0	| ...load input from (a0)
5831da177e4SLinus Torvalds
5841da177e4SLinus Torvalds	fmovex		%fp0,%fp1
5851da177e4SLinus Torvalds	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
5861da177e4SLinus Torvalds	fmovemx	 %fp2-%fp2/%fp3,-(%a7)		| ...save fp2
5871da177e4SLinus Torvalds	movel		#1,ADJFLAG(%a6)
5881da177e4SLinus Torvalds	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
5891da177e4SLinus Torvalds	lea		EXPTBL,%a1
5901da177e4SLinus Torvalds	fmovel		%d0,%fp0		| ...convert to floating-format
5911da177e4SLinus Torvalds	movel		%d0,L_SCR1(%a6)			| ...save N temporarily
5921da177e4SLinus Torvalds	andil		#0x3F,%d0		 | ...D0 is J = N mod 64
5931da177e4SLinus Torvalds	lsll		#4,%d0
5941da177e4SLinus Torvalds	addal		%d0,%a1			| ...address of 2^(J/64)
5951da177e4SLinus Torvalds	movel		L_SCR1(%a6),%d0
5961da177e4SLinus Torvalds	asrl		#6,%d0			| ...D0 is K
5971da177e4SLinus Torvalds	movel		%d0,L_SCR1(%a6)			| ...save K temporarily
5981da177e4SLinus Torvalds	asrl		#1,%d0			| ...D0 is M1
5991da177e4SLinus Torvalds	subl		%d0,L_SCR1(%a6)			| ...a1 is M
6001da177e4SLinus Torvalds	addiw		#0x3FFF,%d0		| ...biased expo. of 2^(M1)
6011da177e4SLinus Torvalds	movew		%d0,ADJSCALE(%a6)		| ...ADJSCALE := 2^(M1)
6021da177e4SLinus Torvalds	clrw		ADJSCALE+2(%a6)
6031da177e4SLinus Torvalds	movel		#0x80000000,ADJSCALE+4(%a6)
6041da177e4SLinus Torvalds	clrl		ADJSCALE+8(%a6)
6051da177e4SLinus Torvalds	movel		L_SCR1(%a6),%d0			| ...D0 is M
6061da177e4SLinus Torvalds	addiw		#0x3FFF,%d0		| ...biased expo. of 2^(M)
6071da177e4SLinus Torvalds	bra		EXPCONT1		| ...go back to Step 3
6081da177e4SLinus Torvalds
6091da177e4SLinus TorvaldsEXP2BIG:
6101da177e4SLinus Torvalds|--Step 9
6111da177e4SLinus Torvalds	fmovel		%d1,%FPCR
6121da177e4SLinus Torvalds	movel		(%a0),%d0
6131da177e4SLinus Torvalds	bclrb		#sign_bit,(%a0)		| ...setox always returns positive
6141da177e4SLinus Torvalds	cmpil		#0,%d0
6151da177e4SLinus Torvalds	blt		t_unfl
6161da177e4SLinus Torvalds	bra		t_ovfl
6171da177e4SLinus Torvalds
6181da177e4SLinus Torvalds	.global	setoxm1d
6191da177e4SLinus Torvaldssetoxm1d:
6201da177e4SLinus Torvalds|--entry point for EXPM1(X), here X is denormalized
6211da177e4SLinus Torvalds|--Step 0.
6221da177e4SLinus Torvalds	bra		t_extdnrm
6231da177e4SLinus Torvalds
6241da177e4SLinus Torvalds
6251da177e4SLinus Torvalds	.global	setoxm1
6261da177e4SLinus Torvaldssetoxm1:
6271da177e4SLinus Torvalds|--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
6281da177e4SLinus Torvalds
6291da177e4SLinus Torvalds|--Step 1.
6301da177e4SLinus Torvalds|--Step 1.1
6311da177e4SLinus Torvalds	movel		(%a0),%d0	 | ...load part of input X
6321da177e4SLinus Torvalds	andil		#0x7FFF0000,%d0	| ...biased expo. of X
6331da177e4SLinus Torvalds	cmpil		#0x3FFD0000,%d0	| ...1/4
6341da177e4SLinus Torvalds	bges		EM1CON1	 | ...|X| >= 1/4
6351da177e4SLinus Torvalds	bra		EM1SM
6361da177e4SLinus Torvalds
6371da177e4SLinus TorvaldsEM1CON1:
6381da177e4SLinus Torvalds|--Step 1.3
6391da177e4SLinus Torvalds|--The case |X| >= 1/4
6401da177e4SLinus Torvalds	movew		4(%a0),%d0	| ...expo. and partial sig. of |X|
6411da177e4SLinus Torvalds	cmpil		#0x4004C215,%d0	| ...70log2 rounded up to 16 bits
6421da177e4SLinus Torvalds	bles		EM1MAIN	 | ...1/4 <= |X| <= 70log2
6431da177e4SLinus Torvalds	bra		EM1BIG
6441da177e4SLinus Torvalds
6451da177e4SLinus TorvaldsEM1MAIN:
6461da177e4SLinus Torvalds|--Step 2.
6471da177e4SLinus Torvalds|--This is the case:	1/4 <= |X| <= 70 log2.
6481da177e4SLinus Torvalds	fmovex		(%a0),%fp0	| ...load input from (a0)
6491da177e4SLinus Torvalds
6501da177e4SLinus Torvalds	fmovex		%fp0,%fp1
6511da177e4SLinus Torvalds	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
6521da177e4SLinus Torvalds	fmovemx	%fp2-%fp2/%fp3,-(%a7)		| ...save fp2
6531da177e4SLinus Torvalds|	MOVE.W		#$3F81,EM1A4		...prefetch in CB mode
6541da177e4SLinus Torvalds	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
6551da177e4SLinus Torvalds	lea		EXPTBL,%a1
6561da177e4SLinus Torvalds	fmovel		%d0,%fp0		| ...convert to floating-format
6571da177e4SLinus Torvalds
6581da177e4SLinus Torvalds	movel		%d0,L_SCR1(%a6)			| ...save N temporarily
6591da177e4SLinus Torvalds	andil		#0x3F,%d0		 | ...D0 is J = N mod 64
6601da177e4SLinus Torvalds	lsll		#4,%d0
6611da177e4SLinus Torvalds	addal		%d0,%a1			| ...address of 2^(J/64)
6621da177e4SLinus Torvalds	movel		L_SCR1(%a6),%d0
6631da177e4SLinus Torvalds	asrl		#6,%d0			| ...D0 is M
6641da177e4SLinus Torvalds	movel		%d0,L_SCR1(%a6)			| ...save a copy of M
6651da177e4SLinus Torvalds|	MOVE.W		#$3FDC,L2		...prefetch L2 in CB mode
6661da177e4SLinus Torvalds
6671da177e4SLinus Torvalds|--Step 3.
6681da177e4SLinus Torvalds|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
6691da177e4SLinus Torvalds|--a0 points to 2^(J/64), D0 and a1 both contain M
6701da177e4SLinus Torvalds	fmovex		%fp0,%fp2
6711da177e4SLinus Torvalds	fmuls		#0xBC317218,%fp0	| ...N * L1, L1 = lead(-log2/64)
6721da177e4SLinus Torvalds	fmulx		L2,%fp2		| ...N * L2, L1+L2 = -log2/64
6731da177e4SLinus Torvalds	faddx		%fp1,%fp0	 | ...X + N*L1
6741da177e4SLinus Torvalds	faddx		%fp2,%fp0	 | ...fp0 is R, reduced arg.
6751da177e4SLinus Torvalds|	MOVE.W		#$3FC5,EM1A2		...load EM1A2 in cache
6761da177e4SLinus Torvalds	addiw		#0x3FFF,%d0		| ...D0 is biased expo. of 2^M
6771da177e4SLinus Torvalds
6781da177e4SLinus Torvalds|--Step 4.
6791da177e4SLinus Torvalds|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
6801da177e4SLinus Torvalds|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
6811da177e4SLinus Torvalds|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
6821da177e4SLinus Torvalds|--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
6831da177e4SLinus Torvalds
6841da177e4SLinus Torvalds	fmovex		%fp0,%fp1
6851da177e4SLinus Torvalds	fmulx		%fp1,%fp1		| ...fp1 IS S = R*R
6861da177e4SLinus Torvalds
6871da177e4SLinus Torvalds	fmoves		#0x3950097B,%fp2	| ...fp2 IS a6
6881da177e4SLinus Torvalds|	MOVE.W		#0,2(%a1)	...load 2^(J/64) in cache
6891da177e4SLinus Torvalds
6901da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...fp2 IS S*A6
6911da177e4SLinus Torvalds	fmovex		%fp1,%fp3
6921da177e4SLinus Torvalds	fmuls		#0x3AB60B6A,%fp3	| ...fp3 IS S*A5
6931da177e4SLinus Torvalds
6941da177e4SLinus Torvalds	faddd		EM1A4,%fp2	| ...fp2 IS A4+S*A6
6951da177e4SLinus Torvalds	faddd		EM1A3,%fp3	| ...fp3 IS A3+S*A5
6961da177e4SLinus Torvalds	movew		%d0,SC(%a6)		| ...SC is 2^(M) in extended
6971da177e4SLinus Torvalds	clrw		SC+2(%a6)
6981da177e4SLinus Torvalds	movel		#0x80000000,SC+4(%a6)
6991da177e4SLinus Torvalds	clrl		SC+8(%a6)
7001da177e4SLinus Torvalds
7011da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...fp2 IS S*(A4+S*A6)
7021da177e4SLinus Torvalds	movel		L_SCR1(%a6),%d0		| ...D0 is	M
7031da177e4SLinus Torvalds	negw		%d0		| ...D0 is -M
7041da177e4SLinus Torvalds	fmulx		%fp1,%fp3		| ...fp3 IS S*(A3+S*A5)
7051da177e4SLinus Torvalds	addiw		#0x3FFF,%d0	| ...biased expo. of 2^(-M)
7061da177e4SLinus Torvalds	faddd		EM1A2,%fp2	| ...fp2 IS A2+S*(A4+S*A6)
7071da177e4SLinus Torvalds	fadds		#0x3F000000,%fp3	| ...fp3 IS A1+S*(A3+S*A5)
7081da177e4SLinus Torvalds
7091da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...fp2 IS S*(A2+S*(A4+S*A6))
7101da177e4SLinus Torvalds	oriw		#0x8000,%d0	| ...signed/expo. of -2^(-M)
7111da177e4SLinus Torvalds	movew		%d0,ONEBYSC(%a6)	| ...OnebySc is -2^(-M)
7121da177e4SLinus Torvalds	clrw		ONEBYSC+2(%a6)
7131da177e4SLinus Torvalds	movel		#0x80000000,ONEBYSC+4(%a6)
7141da177e4SLinus Torvalds	clrl		ONEBYSC+8(%a6)
7151da177e4SLinus Torvalds	fmulx		%fp3,%fp1		| ...fp1 IS S*(A1+S*(A3+S*A5))
7161da177e4SLinus Torvalds|					...fp3 released
7171da177e4SLinus Torvalds
7181da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...fp2 IS R*S*(A2+S*(A4+S*A6))
7191da177e4SLinus Torvalds	faddx		%fp1,%fp0		| ...fp0 IS R+S*(A1+S*(A3+S*A5))
7201da177e4SLinus Torvalds|					...fp1 released
7211da177e4SLinus Torvalds
7221da177e4SLinus Torvalds	faddx		%fp2,%fp0		| ...fp0 IS EXP(R)-1
7231da177e4SLinus Torvalds|					...fp2 released
7241da177e4SLinus Torvalds	fmovemx	(%a7)+,%fp2-%fp2/%fp3	| ...fp2 restored
7251da177e4SLinus Torvalds
7261da177e4SLinus Torvalds|--Step 5
7271da177e4SLinus Torvalds|--Compute 2^(J/64)*p
7281da177e4SLinus Torvalds
7291da177e4SLinus Torvalds	fmulx		(%a1),%fp0	| ...2^(J/64)*(Exp(R)-1)
7301da177e4SLinus Torvalds
7311da177e4SLinus Torvalds|--Step 6
7321da177e4SLinus Torvalds|--Step 6.1
7331da177e4SLinus Torvalds	movel		L_SCR1(%a6),%d0		| ...retrieve M
7341da177e4SLinus Torvalds	cmpil		#63,%d0
7351da177e4SLinus Torvalds	bles		MLE63
7361da177e4SLinus Torvalds|--Step 6.2	M >= 64
7371da177e4SLinus Torvalds	fmoves		12(%a1),%fp1	| ...fp1 is t
7381da177e4SLinus Torvalds	faddx		ONEBYSC(%a6),%fp1	| ...fp1 is t+OnebySc
7391da177e4SLinus Torvalds	faddx		%fp1,%fp0		| ...p+(t+OnebySc), fp1 released
7401da177e4SLinus Torvalds	faddx		(%a1),%fp0	| ...T+(p+(t+OnebySc))
7411da177e4SLinus Torvalds	bras		EM1SCALE
7421da177e4SLinus TorvaldsMLE63:
7431da177e4SLinus Torvalds|--Step 6.3	M <= 63
7441da177e4SLinus Torvalds	cmpil		#-3,%d0
7451da177e4SLinus Torvalds	bges		MGEN3
7461da177e4SLinus TorvaldsMLTN3:
7471da177e4SLinus Torvalds|--Step 6.4	M <= -4
7481da177e4SLinus Torvalds	fadds		12(%a1),%fp0	| ...p+t
7491da177e4SLinus Torvalds	faddx		(%a1),%fp0	| ...T+(p+t)
7501da177e4SLinus Torvalds	faddx		ONEBYSC(%a6),%fp0	| ...OnebySc + (T+(p+t))
7511da177e4SLinus Torvalds	bras		EM1SCALE
7521da177e4SLinus TorvaldsMGEN3:
7531da177e4SLinus Torvalds|--Step 6.5	-3 <= M <= 63
7541da177e4SLinus Torvalds	fmovex		(%a1)+,%fp1	| ...fp1 is T
7551da177e4SLinus Torvalds	fadds		(%a1),%fp0	| ...fp0 is p+t
7561da177e4SLinus Torvalds	faddx		ONEBYSC(%a6),%fp1	| ...fp1 is T+OnebySc
7571da177e4SLinus Torvalds	faddx		%fp1,%fp0		| ...(T+OnebySc)+(p+t)
7581da177e4SLinus Torvalds
7591da177e4SLinus TorvaldsEM1SCALE:
7601da177e4SLinus Torvalds|--Step 6.6
7611da177e4SLinus Torvalds	fmovel		%d1,%FPCR
7621da177e4SLinus Torvalds	fmulx		SC(%a6),%fp0
7631da177e4SLinus Torvalds
7641da177e4SLinus Torvalds	bra		t_frcinx
7651da177e4SLinus Torvalds
7661da177e4SLinus TorvaldsEM1SM:
7671da177e4SLinus Torvalds|--Step 7	|X| < 1/4.
7681da177e4SLinus Torvalds	cmpil		#0x3FBE0000,%d0	| ...2^(-65)
7691da177e4SLinus Torvalds	bges		EM1POLY
7701da177e4SLinus Torvalds
7711da177e4SLinus TorvaldsEM1TINY:
7721da177e4SLinus Torvalds|--Step 8	|X| < 2^(-65)
7731da177e4SLinus Torvalds	cmpil		#0x00330000,%d0	| ...2^(-16312)
7741da177e4SLinus Torvalds	blts		EM12TINY
7751da177e4SLinus Torvalds|--Step 8.2
7761da177e4SLinus Torvalds	movel		#0x80010000,SC(%a6)	| ...SC is -2^(-16382)
7771da177e4SLinus Torvalds	movel		#0x80000000,SC+4(%a6)
7781da177e4SLinus Torvalds	clrl		SC+8(%a6)
7791da177e4SLinus Torvalds	fmovex		(%a0),%fp0
7801da177e4SLinus Torvalds	fmovel		%d1,%FPCR
7811da177e4SLinus Torvalds	faddx		SC(%a6),%fp0
7821da177e4SLinus Torvalds
7831da177e4SLinus Torvalds	bra		t_frcinx
7841da177e4SLinus Torvalds
7851da177e4SLinus TorvaldsEM12TINY:
7861da177e4SLinus Torvalds|--Step 8.3
7871da177e4SLinus Torvalds	fmovex		(%a0),%fp0
7881da177e4SLinus Torvalds	fmuld		TWO140,%fp0
7891da177e4SLinus Torvalds	movel		#0x80010000,SC(%a6)
7901da177e4SLinus Torvalds	movel		#0x80000000,SC+4(%a6)
7911da177e4SLinus Torvalds	clrl		SC+8(%a6)
7921da177e4SLinus Torvalds	faddx		SC(%a6),%fp0
7931da177e4SLinus Torvalds	fmovel		%d1,%FPCR
7941da177e4SLinus Torvalds	fmuld		TWON140,%fp0
7951da177e4SLinus Torvalds
7961da177e4SLinus Torvalds	bra		t_frcinx
7971da177e4SLinus Torvalds
7981da177e4SLinus TorvaldsEM1POLY:
7991da177e4SLinus Torvalds|--Step 9	exp(X)-1 by a simple polynomial
8001da177e4SLinus Torvalds	fmovex		(%a0),%fp0	| ...fp0 is X
8011da177e4SLinus Torvalds	fmulx		%fp0,%fp0		| ...fp0 is S := X*X
8021da177e4SLinus Torvalds	fmovemx	%fp2-%fp2/%fp3,-(%a7)	| ...save fp2
8031da177e4SLinus Torvalds	fmoves		#0x2F30CAA8,%fp1	| ...fp1 is B12
8041da177e4SLinus Torvalds	fmulx		%fp0,%fp1		| ...fp1 is S*B12
8051da177e4SLinus Torvalds	fmoves		#0x310F8290,%fp2	| ...fp2 is B11
8061da177e4SLinus Torvalds	fadds		#0x32D73220,%fp1	| ...fp1 is B10+S*B12
8071da177e4SLinus Torvalds
8081da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...fp2 is S*B11
8091da177e4SLinus Torvalds	fmulx		%fp0,%fp1		| ...fp1 is S*(B10 + ...
8101da177e4SLinus Torvalds
8111da177e4SLinus Torvalds	fadds		#0x3493F281,%fp2	| ...fp2 is B9+S*...
8121da177e4SLinus Torvalds	faddd		EM1B8,%fp1	| ...fp1 is B8+S*...
8131da177e4SLinus Torvalds
8141da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...fp2 is S*(B9+...
8151da177e4SLinus Torvalds	fmulx		%fp0,%fp1		| ...fp1 is S*(B8+...
8161da177e4SLinus Torvalds
8171da177e4SLinus Torvalds	faddd		EM1B7,%fp2	| ...fp2 is B7+S*...
8181da177e4SLinus Torvalds	faddd		EM1B6,%fp1	| ...fp1 is B6+S*...
8191da177e4SLinus Torvalds
8201da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...fp2 is S*(B7+...
8211da177e4SLinus Torvalds	fmulx		%fp0,%fp1		| ...fp1 is S*(B6+...
8221da177e4SLinus Torvalds
8231da177e4SLinus Torvalds	faddd		EM1B5,%fp2	| ...fp2 is B5+S*...
8241da177e4SLinus Torvalds	faddd		EM1B4,%fp1	| ...fp1 is B4+S*...
8251da177e4SLinus Torvalds
8261da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...fp2 is S*(B5+...
8271da177e4SLinus Torvalds	fmulx		%fp0,%fp1		| ...fp1 is S*(B4+...
8281da177e4SLinus Torvalds
8291da177e4SLinus Torvalds	faddd		EM1B3,%fp2	| ...fp2 is B3+S*...
8301da177e4SLinus Torvalds	faddx		EM1B2,%fp1	| ...fp1 is B2+S*...
8311da177e4SLinus Torvalds
8321da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...fp2 is S*(B3+...
8331da177e4SLinus Torvalds	fmulx		%fp0,%fp1		| ...fp1 is S*(B2+...
8341da177e4SLinus Torvalds
8351da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...fp2 is S*S*(B3+...)
8361da177e4SLinus Torvalds	fmulx		(%a0),%fp1	| ...fp1 is X*S*(B2...
8371da177e4SLinus Torvalds
8381da177e4SLinus Torvalds	fmuls		#0x3F000000,%fp0	| ...fp0 is S*B1
8391da177e4SLinus Torvalds	faddx		%fp2,%fp1		| ...fp1 is Q
8401da177e4SLinus Torvalds|					...fp2 released
8411da177e4SLinus Torvalds
8421da177e4SLinus Torvalds	fmovemx	(%a7)+,%fp2-%fp2/%fp3	| ...fp2 restored
8431da177e4SLinus Torvalds
8441da177e4SLinus Torvalds	faddx		%fp1,%fp0		| ...fp0 is S*B1+Q
8451da177e4SLinus Torvalds|					...fp1 released
8461da177e4SLinus Torvalds
8471da177e4SLinus Torvalds	fmovel		%d1,%FPCR
8481da177e4SLinus Torvalds	faddx		(%a0),%fp0
8491da177e4SLinus Torvalds
8501da177e4SLinus Torvalds	bra		t_frcinx
8511da177e4SLinus Torvalds
8521da177e4SLinus TorvaldsEM1BIG:
8531da177e4SLinus Torvalds|--Step 10	|X| > 70 log2
8541da177e4SLinus Torvalds	movel		(%a0),%d0
8551da177e4SLinus Torvalds	cmpil		#0,%d0
8561da177e4SLinus Torvalds	bgt		EXPC1
8571da177e4SLinus Torvalds|--Step 10.2
8581da177e4SLinus Torvalds	fmoves		#0xBF800000,%fp0	| ...fp0 is -1
8591da177e4SLinus Torvalds	fmovel		%d1,%FPCR
8601da177e4SLinus Torvalds	fadds		#0x00800000,%fp0	| ...-1 + 2^(-126)
8611da177e4SLinus Torvalds
8621da177e4SLinus Torvalds	bra		t_frcinx
8631da177e4SLinus Torvalds
8641da177e4SLinus Torvalds	|end
865