xref: /openbmc/linux/arch/m68k/fpsp040/slogn.S (revision e8f6f3b4)
1|
2|	slogn.sa 3.1 12/10/90
3|
4|	slogn computes the natural logarithm of an
5|	input value. slognd does the same except the input value is a
6|	denormalized number. slognp1 computes log(1+X), and slognp1d
7|	computes log(1+X) for denormalized X.
8|
9|	Input: Double-extended value in memory location pointed to by address
10|		register a0.
11|
12|	Output:	log(X) or log(1+X) returned in floating-point register Fp0.
13|
14|	Accuracy and Monotonicity: The returned result is within 2 ulps in
15|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16|		result is subsequently rounded to double precision. The
17|		result is provably monotonic in double precision.
18|
19|	Speed: The program slogn takes approximately 190 cycles for input
20|		argument X such that |X-1| >= 1/16, which is the usual
21|		situation. For those arguments, slognp1 takes approximately
22|		 210 cycles. For the less common arguments, the program will
23|		 run no worse than 10% slower.
24|
25|	Algorithm:
26|	LOGN:
27|	Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
28|		u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
29|
30|	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
31|		significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
32|		2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
33|
34|	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
35|		log(1+u) = poly.
36|
37|	Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
38|		by k*log(2) + (log(F) + poly). The values of log(F) are calculated
39|		beforehand and stored in the program.
40|
41|	lognp1:
42|	Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
43|		u where u = 2X/(2+X). Otherwise, move on to Step 2.
44|
45|	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
46|		of the algorithm for LOGN and compute log(1+X) as
47|		k*log(2) + log(F) + poly where poly approximates log(1+u),
48|		u = (Y-F)/F.
49|
50|	Implementation Notes:
51|	Note 1. There are 64 different possible values for F, thus 64 log(F)'s
52|		need to be tabulated. Moreover, the values of 1/F are also
53|		tabulated so that the division in (Y-F)/F can be performed by a
54|		multiplication.
55|
56|	Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
57|		Y-F has to be calculated carefully when 1/2 <= X < 3/2.
58|
59|	Note 3. To fully exploit the pipeline, polynomials are usually separated
60|		into two parts evaluated independently before being added up.
61|
62
63|		Copyright (C) Motorola, Inc. 1990
64|			All Rights Reserved
65|
66|       For details on the license for this file, please see the
67|       file, README, in this same directory.
68
69|slogn	idnt	2,1 | Motorola 040 Floating Point Software Package
70
71	|section	8
72
73#include "fpsp.h"
74
75BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
76BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
77
78LOGOF2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
79
80one:	.long 0x3F800000
81zero:	.long 0x00000000
82infty:	.long 0x7F800000
83negone:	.long 0xBF800000
84
85LOGA6:	.long 0x3FC2499A,0xB5E4040B
86LOGA5:	.long 0xBFC555B5,0x848CB7DB
87
88LOGA4:	.long 0x3FC99999,0x987D8730
89LOGA3:	.long 0xBFCFFFFF,0xFF6F7E97
90
91LOGA2:	.long 0x3FD55555,0x555555a4
92LOGA1:	.long 0xBFE00000,0x00000008
93
94LOGB5:	.long 0x3F175496,0xADD7DAD6
95LOGB4:	.long 0x3F3C71C2,0xFE80C7E0
96
97LOGB3:	.long 0x3F624924,0x928BCCFF
98LOGB2:	.long 0x3F899999,0x999995EC
99
100LOGB1:	.long 0x3FB55555,0x55555555
101TWO:	.long 0x40000000,0x00000000
102
103LTHOLD:	.long 0x3f990000,0x80000000,0x00000000,0x00000000
104
105LOGTBL:
106	.long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
107	.long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
108	.long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
109	.long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
110	.long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
111	.long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
112	.long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
113	.long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
114	.long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
115	.long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
116	.long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
117	.long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
118	.long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
119	.long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
120	.long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
121	.long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
122	.long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
123	.long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
124	.long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
125	.long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
126	.long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
127	.long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
128	.long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
129	.long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
130	.long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
131	.long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
132	.long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
133	.long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
134	.long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
135	.long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
136	.long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
137	.long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
138	.long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
139	.long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
140	.long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
141	.long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
142	.long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
143	.long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
144	.long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
145	.long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
146	.long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
147	.long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
148	.long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
149	.long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
150	.long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
151	.long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
152	.long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
153	.long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
154	.long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
155	.long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
156	.long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
157	.long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
158	.long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
159	.long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
160	.long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
161	.long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
162	.long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
163	.long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
164	.long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
165	.long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
166	.long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
167	.long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
168	.long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
169	.long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
170	.long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
171	.long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
172	.long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
173	.long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
174	.long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
175	.long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
176	.long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
177	.long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
178	.long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
179	.long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
180	.long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
181	.long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
182	.long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
183	.long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
184	.long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
185	.long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
186	.long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
187	.long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
188	.long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
189	.long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
190	.long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
191	.long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
192	.long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
193	.long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
194	.long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
195	.long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
196	.long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
197	.long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
198	.long  0x3FFE0000,0x94458094,0x45809446,0x00000000
199	.long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
200	.long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
201	.long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
202	.long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
203	.long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
204	.long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
205	.long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
206	.long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
207	.long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
208	.long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
209	.long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
210	.long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
211	.long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
212	.long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
213	.long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
214	.long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
215	.long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
216	.long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
217	.long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
218	.long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
219	.long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
220	.long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
221	.long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
222	.long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
223	.long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
224	.long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
225	.long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
226	.long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
227	.long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
228	.long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
229	.long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
230	.long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
231	.long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
232	.long  0x3FFE0000,0x80808080,0x80808081,0x00000000
233	.long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
234
235	.set	ADJK,L_SCR1
236
237	.set	X,FP_SCR1
238	.set	XDCARE,X+2
239	.set	XFRAC,X+4
240
241	.set	F,FP_SCR2
242	.set	FFRAC,F+4
243
244	.set	KLOG2,FP_SCR3
245
246	.set	SAVEU,FP_SCR4
247
248	| xref	t_frcinx
249	|xref	t_extdnrm
250	|xref	t_operr
251	|xref	t_dz
252
253	.global	slognd
254slognd:
255|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
256
257	movel		#-100,ADJK(%a6)	| ...INPUT = 2^(ADJK) * FP0
258
259|----normalize the input value by left shifting k bits (k to be determined
260|----below), adjusting exponent and storing -k to  ADJK
261|----the value TWOTO100 is no longer needed.
262|----Note that this code assumes the denormalized input is NON-ZERO.
263
264     moveml	%d2-%d7,-(%a7)		| ...save some registers
265     movel	#0x00000000,%d3		| ...D3 is exponent of smallest norm. #
266     movel	4(%a0),%d4
267     movel	8(%a0),%d5		| ...(D4,D5) is (Hi_X,Lo_X)
268     clrl	%d2			| ...D2 used for holding K
269
270     tstl	%d4
271     bnes	HiX_not0
272
273HiX_0:
274     movel	%d5,%d4
275     clrl	%d5
276     movel	#32,%d2
277     clrl	%d6
278     bfffo      %d4{#0:#32},%d6
279     lsll      %d6,%d4
280     addl	%d6,%d2			| ...(D3,D4,D5) is normalized
281
282     movel	%d3,X(%a6)
283     movel	%d4,XFRAC(%a6)
284     movel	%d5,XFRAC+4(%a6)
285     negl	%d2
286     movel	%d2,ADJK(%a6)
287     fmovex	X(%a6),%fp0
288     moveml	(%a7)+,%d2-%d7		| ...restore registers
289     lea	X(%a6),%a0
290     bras	LOGBGN			| ...begin regular log(X)
291
292
293HiX_not0:
294     clrl	%d6
295     bfffo	%d4{#0:#32},%d6		| ...find first 1
296     movel	%d6,%d2			| ...get k
297     lsll	%d6,%d4
298     movel	%d5,%d7			| ...a copy of D5
299     lsll	%d6,%d5
300     negl	%d6
301     addil	#32,%d6
302     lsrl	%d6,%d7
303     orl	%d7,%d4			| ...(D3,D4,D5) normalized
304
305     movel	%d3,X(%a6)
306     movel	%d4,XFRAC(%a6)
307     movel	%d5,XFRAC+4(%a6)
308     negl	%d2
309     movel	%d2,ADJK(%a6)
310     fmovex	X(%a6),%fp0
311     moveml	(%a7)+,%d2-%d7		| ...restore registers
312     lea	X(%a6),%a0
313     bras	LOGBGN			| ...begin regular log(X)
314
315
316	.global	slogn
317slogn:
318|--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
319
320	fmovex		(%a0),%fp0	| ...LOAD INPUT
321	movel		#0x00000000,ADJK(%a6)
322
323LOGBGN:
324|--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
325|--A FINITE, NON-ZERO, NORMALIZED NUMBER.
326
327	movel	(%a0),%d0
328	movew	4(%a0),%d0
329
330	movel	(%a0),X(%a6)
331	movel	4(%a0),X+4(%a6)
332	movel	8(%a0),X+8(%a6)
333
334	cmpil	#0,%d0		| ...CHECK IF X IS NEGATIVE
335	blt	LOGNEG		| ...LOG OF NEGATIVE ARGUMENT IS INVALID
336	cmp2l	BOUNDS1,%d0	| ...X IS POSITIVE, CHECK IF X IS NEAR 1
337	bcc	LOGNEAR1	| ...BOUNDS IS ROUGHLY [15/16, 17/16]
338
339LOGMAIN:
340|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
341
342|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
343|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
344|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
345|--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
346|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
347|--LOG(1+U) CAN BE VERY EFFICIENT.
348|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
349|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
350
351|--GET K, Y, F, AND ADDRESS OF 1/F.
352	asrl	#8,%d0
353	asrl	#8,%d0		| ...SHIFTED 16 BITS, BIASED EXPO. OF X
354	subil	#0x3FFF,%d0	| ...THIS IS K
355	addl	ADJK(%a6),%d0	| ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
356	lea	LOGTBL,%a0	| ...BASE ADDRESS OF 1/F AND LOG(F)
357	fmovel	%d0,%fp1		| ...CONVERT K TO FLOATING-POINT FORMAT
358
359|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
360	movel	#0x3FFF0000,X(%a6)	| ...X IS NOW Y, I.E. 2^(-K)*X
361	movel	XFRAC(%a6),FFRAC(%a6)
362	andil	#0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
363	oril	#0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
364	movel	FFRAC(%a6),%d0	| ...READY TO GET ADDRESS OF 1/F
365	andil	#0x7E000000,%d0
366	asrl	#8,%d0
367	asrl	#8,%d0
368	asrl	#4,%d0		| ...SHIFTED 20, D0 IS THE DISPLACEMENT
369	addal	%d0,%a0		| ...A0 IS THE ADDRESS FOR 1/F
370
371	fmovex	X(%a6),%fp0
372	movel	#0x3fff0000,F(%a6)
373	clrl	F+8(%a6)
374	fsubx	F(%a6),%fp0		| ...Y-F
375	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...SAVE FP2 WHILE FP0 IS NOT READY
376|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
377|--REGISTERS SAVED: FPCR, FP1, FP2
378
379LP1CONT1:
380|--AN RE-ENTRY POINT FOR LOGNP1
381	fmulx	(%a0),%fp0	| ...FP0 IS U = (Y-F)/F
382	fmulx	LOGOF2,%fp1	| ...GET K*LOG2 WHILE FP0 IS NOT READY
383	fmovex	%fp0,%fp2
384	fmulx	%fp2,%fp2		| ...FP2 IS V=U*U
385	fmovex	%fp1,KLOG2(%a6)	| ...PUT K*LOG2 IN MEMORY, FREE FP1
386
387|--LOG(1+U) IS APPROXIMATED BY
388|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
389|--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
390
391	fmovex	%fp2,%fp3
392	fmovex	%fp2,%fp1
393
394	fmuld	LOGA6,%fp1	| ...V*A6
395	fmuld	LOGA5,%fp2	| ...V*A5
396
397	faddd	LOGA4,%fp1	| ...A4+V*A6
398	faddd	LOGA3,%fp2	| ...A3+V*A5
399
400	fmulx	%fp3,%fp1		| ...V*(A4+V*A6)
401	fmulx	%fp3,%fp2		| ...V*(A3+V*A5)
402
403	faddd	LOGA2,%fp1	| ...A2+V*(A4+V*A6)
404	faddd	LOGA1,%fp2	| ...A1+V*(A3+V*A5)
405
406	fmulx	%fp3,%fp1		| ...V*(A2+V*(A4+V*A6))
407	addal	#16,%a0		| ...ADDRESS OF LOG(F)
408	fmulx	%fp3,%fp2		| ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
409
410	fmulx	%fp0,%fp1		| ...U*V*(A2+V*(A4+V*A6))
411	faddx	%fp2,%fp0		| ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
412
413	faddx	(%a0),%fp1	| ...LOG(F)+U*V*(A2+V*(A4+V*A6))
414	fmovemx  (%sp)+,%fp2-%fp2/%fp3	| ...RESTORE FP2
415	faddx	%fp1,%fp0		| ...FP0 IS LOG(F) + LOG(1+U)
416
417	fmovel	%d1,%fpcr
418	faddx	KLOG2(%a6),%fp0	| ...FINAL ADD
419	bra	t_frcinx
420
421
422LOGNEAR1:
423|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
424	fmovex	%fp0,%fp1
425	fsubs	one,%fp1		| ...FP1 IS X-1
426	fadds	one,%fp0		| ...FP0 IS X+1
427	faddx	%fp1,%fp1		| ...FP1 IS 2(X-1)
428|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
429|--IN U, U = 2(X-1)/(X+1) = FP1/FP0
430
431LP1CONT2:
432|--THIS IS AN RE-ENTRY POINT FOR LOGNP1
433	fdivx	%fp0,%fp1		| ...FP1 IS U
434	fmovemx %fp2-%fp2/%fp3,-(%sp)	 | ...SAVE FP2
435|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
436|--LET V=U*U, W=V*V, CALCULATE
437|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
438|--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
439	fmovex	%fp1,%fp0
440	fmulx	%fp0,%fp0	| ...FP0 IS V
441	fmovex	%fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
442	fmovex	%fp0,%fp1
443	fmulx	%fp1,%fp1	| ...FP1 IS W
444
445	fmoved	LOGB5,%fp3
446	fmoved	LOGB4,%fp2
447
448	fmulx	%fp1,%fp3	| ...W*B5
449	fmulx	%fp1,%fp2	| ...W*B4
450
451	faddd	LOGB3,%fp3 | ...B3+W*B5
452	faddd	LOGB2,%fp2 | ...B2+W*B4
453
454	fmulx	%fp3,%fp1	| ...W*(B3+W*B5), FP3 RELEASED
455
456	fmulx	%fp0,%fp2	| ...V*(B2+W*B4)
457
458	faddd	LOGB1,%fp1 | ...B1+W*(B3+W*B5)
459	fmulx	SAVEU(%a6),%fp0 | ...FP0 IS U*V
460
461	faddx	%fp2,%fp1	| ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
462	fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
463
464	fmulx	%fp1,%fp0	| ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
465
466	fmovel	%d1,%fpcr
467	faddx	SAVEU(%a6),%fp0
468	bra	t_frcinx
469	rts
470
471LOGNEG:
472|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
473	bra	t_operr
474
475	.global	slognp1d
476slognp1d:
477|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
478| Simply return the denorm
479
480	bra	t_extdnrm
481
482	.global	slognp1
483slognp1:
484|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
485
486	fmovex	(%a0),%fp0	| ...LOAD INPUT
487	fabsx	%fp0		|test magnitude
488	fcmpx	LTHOLD,%fp0	|compare with min threshold
489	fbgt	LP1REAL		|if greater, continue
490	fmovel	#0,%fpsr		|clr N flag from compare
491	fmovel	%d1,%fpcr
492	fmovex	(%a0),%fp0	|return signed argument
493	bra	t_frcinx
494
495LP1REAL:
496	fmovex	(%a0),%fp0	| ...LOAD INPUT
497	movel	#0x00000000,ADJK(%a6)
498	fmovex	%fp0,%fp1	| ...FP1 IS INPUT Z
499	fadds	one,%fp0	| ...X := ROUND(1+Z)
500	fmovex	%fp0,X(%a6)
501	movew	XFRAC(%a6),XDCARE(%a6)
502	movel	X(%a6),%d0
503	cmpil	#0,%d0
504	ble	LP1NEG0	| ...LOG OF ZERO OR -VE
505	cmp2l	BOUNDS2,%d0
506	bcs	LOGMAIN	| ...BOUNDS2 IS [1/2,3/2]
507|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
508|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
509|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
510
511LP1NEAR1:
512|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
513	cmp2l	BOUNDS1,%d0
514	bcss	LP1CARE
515
516LP1ONE16:
517|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
518|--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
519	faddx	%fp1,%fp1	| ...FP1 IS 2Z
520	fadds	one,%fp0	| ...FP0 IS 1+X
521|--U = FP1/FP0
522	bra	LP1CONT2
523
524LP1CARE:
525|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
526|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
527|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
528|--THERE ARE ONLY TWO CASES.
529|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
530|--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
531|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
532|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
533
534	movel	XFRAC(%a6),FFRAC(%a6)
535	andil	#0xFE000000,FFRAC(%a6)
536	oril	#0x01000000,FFRAC(%a6)	| ...F OBTAINED
537	cmpil	#0x3FFF8000,%d0	| ...SEE IF 1+Z > 1
538	bges	KISZERO
539
540KISNEG1:
541	fmoves	TWO,%fp0
542	movel	#0x3fff0000,F(%a6)
543	clrl	F+8(%a6)
544	fsubx	F(%a6),%fp0	| ...2-F
545	movel	FFRAC(%a6),%d0
546	andil	#0x7E000000,%d0
547	asrl	#8,%d0
548	asrl	#8,%d0
549	asrl	#4,%d0		| ...D0 CONTAINS DISPLACEMENT FOR 1/F
550	faddx	%fp1,%fp1		| ...GET 2Z
551	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...SAVE FP2
552	faddx	%fp1,%fp0		| ...FP0 IS Y-F = (2-F)+2Z
553	lea	LOGTBL,%a0	| ...A0 IS ADDRESS OF 1/F
554	addal	%d0,%a0
555	fmoves	negone,%fp1	| ...FP1 IS K = -1
556	bra	LP1CONT1
557
558KISZERO:
559	fmoves	one,%fp0
560	movel	#0x3fff0000,F(%a6)
561	clrl	F+8(%a6)
562	fsubx	F(%a6),%fp0		| ...1-F
563	movel	FFRAC(%a6),%d0
564	andil	#0x7E000000,%d0
565	asrl	#8,%d0
566	asrl	#8,%d0
567	asrl	#4,%d0
568	faddx	%fp1,%fp0		| ...FP0 IS Y-F
569	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...FP2 SAVED
570	lea	LOGTBL,%a0
571	addal	%d0,%a0		| ...A0 IS ADDRESS OF 1/F
572	fmoves	zero,%fp1	| ...FP1 IS K = 0
573	bra	LP1CONT1
574
575LP1NEG0:
576|--FPCR SAVED. D0 IS X IN COMPACT FORM.
577	cmpil	#0,%d0
578	blts	LP1NEG
579LP1ZERO:
580	fmoves	negone,%fp0
581
582	fmovel	%d1,%fpcr
583	bra t_dz
584
585LP1NEG:
586	fmoves	zero,%fp0
587
588	fmovel	%d1,%fpcr
589	bra	t_operr
590
591	|end
592