xref: /openbmc/linux/arch/m68k/fpsp040/slogn.S (revision 1da177e4)
1|
2|	slogn.sa 3.1 12/10/90
3|
4|	slogn computes the natural logarithm of an
5|	input value. slognd does the same except the input value is a
6|	denormalized number. slognp1 computes log(1+X), and slognp1d
7|	computes log(1+X) for denormalized X.
8|
9|	Input: Double-extended value in memory location pointed to by address
10|		register a0.
11|
12|	Output:	log(X) or log(1+X) returned in floating-point register Fp0.
13|
14|	Accuracy and Monotonicity: The returned result is within 2 ulps in
15|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16|		result is subsequently rounded to double precision. The
17|		result is provably monotonic in double precision.
18|
19|	Speed: The program slogn takes approximately 190 cycles for input
20|		argument X such that |X-1| >= 1/16, which is the usual
21|		situation. For those arguments, slognp1 takes approximately
22|		 210 cycles. For the less common arguments, the program will
23|		 run no worse than 10% slower.
24|
25|	Algorithm:
26|	LOGN:
27|	Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
28|		u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
29|
30|	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
31|		significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
32|		2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
33|
34|	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
35|		log(1+u) = poly.
36|
37|	Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
38|		by k*log(2) + (log(F) + poly). The values of log(F) are calculated
39|		beforehand and stored in the program.
40|
41|	lognp1:
42|	Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
43|		u where u = 2X/(2+X). Otherwise, move on to Step 2.
44|
45|	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
46|		of the algorithm for LOGN and compute log(1+X) as
47|		k*log(2) + log(F) + poly where poly approximates log(1+u),
48|		u = (Y-F)/F.
49|
50|	Implementation Notes:
51|	Note 1. There are 64 different possible values for F, thus 64 log(F)'s
52|		need to be tabulated. Moreover, the values of 1/F are also
53|		tabulated so that the division in (Y-F)/F can be performed by a
54|		multiplication.
55|
56|	Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
57|		Y-F has to be calculated carefully when 1/2 <= X < 3/2.
58|
59|	Note 3. To fully exploit the pipeline, polynomials are usually separated
60|		into two parts evaluated independently before being added up.
61|
62
63|		Copyright (C) Motorola, Inc. 1990
64|			All Rights Reserved
65|
66|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
67|	The copyright notice above does not evidence any
68|	actual or intended publication of such source code.
69
70|slogn	idnt	2,1 | Motorola 040 Floating Point Software Package
71
72	|section	8
73
74#include "fpsp.h"
75
76BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
77BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
78
79LOGOF2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
80
81one:	.long 0x3F800000
82zero:	.long 0x00000000
83infty:	.long 0x7F800000
84negone:	.long 0xBF800000
85
86LOGA6:	.long 0x3FC2499A,0xB5E4040B
87LOGA5:	.long 0xBFC555B5,0x848CB7DB
88
89LOGA4:	.long 0x3FC99999,0x987D8730
90LOGA3:	.long 0xBFCFFFFF,0xFF6F7E97
91
92LOGA2:	.long 0x3FD55555,0x555555a4
93LOGA1:	.long 0xBFE00000,0x00000008
94
95LOGB5:	.long 0x3F175496,0xADD7DAD6
96LOGB4:	.long 0x3F3C71C2,0xFE80C7E0
97
98LOGB3:	.long 0x3F624924,0x928BCCFF
99LOGB2:	.long 0x3F899999,0x999995EC
100
101LOGB1:	.long 0x3FB55555,0x55555555
102TWO:	.long 0x40000000,0x00000000
103
104LTHOLD:	.long 0x3f990000,0x80000000,0x00000000,0x00000000
105
106LOGTBL:
107	.long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
108	.long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
109	.long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
110	.long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
111	.long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
112	.long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
113	.long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
114	.long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
115	.long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
116	.long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
117	.long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
118	.long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
119	.long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
120	.long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
121	.long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
122	.long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
123	.long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
124	.long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
125	.long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
126	.long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
127	.long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
128	.long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
129	.long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
130	.long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
131	.long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
132	.long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
133	.long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
134	.long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
135	.long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
136	.long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
137	.long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
138	.long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
139	.long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
140	.long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
141	.long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
142	.long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
143	.long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
144	.long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
145	.long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
146	.long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
147	.long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
148	.long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
149	.long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
150	.long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
151	.long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
152	.long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
153	.long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
154	.long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
155	.long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
156	.long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
157	.long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
158	.long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
159	.long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
160	.long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
161	.long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
162	.long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
163	.long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
164	.long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
165	.long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
166	.long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
167	.long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
168	.long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
169	.long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
170	.long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
171	.long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
172	.long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
173	.long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
174	.long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
175	.long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
176	.long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
177	.long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
178	.long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
179	.long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
180	.long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
181	.long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
182	.long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
183	.long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
184	.long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
185	.long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
186	.long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
187	.long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
188	.long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
189	.long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
190	.long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
191	.long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
192	.long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
193	.long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
194	.long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
195	.long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
196	.long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
197	.long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
198	.long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
199	.long  0x3FFE0000,0x94458094,0x45809446,0x00000000
200	.long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
201	.long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
202	.long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
203	.long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
204	.long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
205	.long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
206	.long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
207	.long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
208	.long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
209	.long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
210	.long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
211	.long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
212	.long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
213	.long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
214	.long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
215	.long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
216	.long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
217	.long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
218	.long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
219	.long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
220	.long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
221	.long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
222	.long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
223	.long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
224	.long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
225	.long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
226	.long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
227	.long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
228	.long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
229	.long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
230	.long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
231	.long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
232	.long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
233	.long  0x3FFE0000,0x80808080,0x80808081,0x00000000
234	.long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
235
236	.set	ADJK,L_SCR1
237
238	.set	X,FP_SCR1
239	.set	XDCARE,X+2
240	.set	XFRAC,X+4
241
242	.set	F,FP_SCR2
243	.set	FFRAC,F+4
244
245	.set	KLOG2,FP_SCR3
246
247	.set	SAVEU,FP_SCR4
248
249	| xref	t_frcinx
250	|xref	t_extdnrm
251	|xref	t_operr
252	|xref	t_dz
253
254	.global	slognd
255slognd:
256|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
257
258	movel		#-100,ADJK(%a6)	| ...INPUT = 2^(ADJK) * FP0
259
260|----normalize the input value by left shifting k bits (k to be determined
261|----below), adjusting exponent and storing -k to  ADJK
262|----the value TWOTO100 is no longer needed.
263|----Note that this code assumes the denormalized input is NON-ZERO.
264
265     moveml	%d2-%d7,-(%a7)		| ...save some registers
266     movel	#0x00000000,%d3		| ...D3 is exponent of smallest norm. #
267     movel	4(%a0),%d4
268     movel	8(%a0),%d5		| ...(D4,D5) is (Hi_X,Lo_X)
269     clrl	%d2			| ...D2 used for holding K
270
271     tstl	%d4
272     bnes	HiX_not0
273
274HiX_0:
275     movel	%d5,%d4
276     clrl	%d5
277     movel	#32,%d2
278     clrl	%d6
279     bfffo      %d4{#0:#32},%d6
280     lsll      %d6,%d4
281     addl	%d6,%d2			| ...(D3,D4,D5) is normalized
282
283     movel	%d3,X(%a6)
284     movel	%d4,XFRAC(%a6)
285     movel	%d5,XFRAC+4(%a6)
286     negl	%d2
287     movel	%d2,ADJK(%a6)
288     fmovex	X(%a6),%fp0
289     moveml	(%a7)+,%d2-%d7		| ...restore registers
290     lea	X(%a6),%a0
291     bras	LOGBGN			| ...begin regular log(X)
292
293
294HiX_not0:
295     clrl	%d6
296     bfffo	%d4{#0:#32},%d6		| ...find first 1
297     movel	%d6,%d2			| ...get k
298     lsll	%d6,%d4
299     movel	%d5,%d7			| ...a copy of D5
300     lsll	%d6,%d5
301     negl	%d6
302     addil	#32,%d6
303     lsrl	%d6,%d7
304     orl	%d7,%d4			| ...(D3,D4,D5) normalized
305
306     movel	%d3,X(%a6)
307     movel	%d4,XFRAC(%a6)
308     movel	%d5,XFRAC+4(%a6)
309     negl	%d2
310     movel	%d2,ADJK(%a6)
311     fmovex	X(%a6),%fp0
312     moveml	(%a7)+,%d2-%d7		| ...restore registers
313     lea	X(%a6),%a0
314     bras	LOGBGN			| ...begin regular log(X)
315
316
317	.global	slogn
318slogn:
319|--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
320
321	fmovex		(%a0),%fp0	| ...LOAD INPUT
322	movel		#0x00000000,ADJK(%a6)
323
324LOGBGN:
325|--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
326|--A FINITE, NON-ZERO, NORMALIZED NUMBER.
327
328	movel	(%a0),%d0
329	movew	4(%a0),%d0
330
331	movel	(%a0),X(%a6)
332	movel	4(%a0),X+4(%a6)
333	movel	8(%a0),X+8(%a6)
334
335	cmpil	#0,%d0		| ...CHECK IF X IS NEGATIVE
336	blt	LOGNEG		| ...LOG OF NEGATIVE ARGUMENT IS INVALID
337	cmp2l	BOUNDS1,%d0	| ...X IS POSITIVE, CHECK IF X IS NEAR 1
338	bcc	LOGNEAR1	| ...BOUNDS IS ROUGHLY [15/16, 17/16]
339
340LOGMAIN:
341|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
342
343|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
344|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
345|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
346|--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
347|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
348|--LOG(1+U) CAN BE VERY EFFICIENT.
349|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
350|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
351
352|--GET K, Y, F, AND ADDRESS OF 1/F.
353	asrl	#8,%d0
354	asrl	#8,%d0		| ...SHIFTED 16 BITS, BIASED EXPO. OF X
355	subil	#0x3FFF,%d0	| ...THIS IS K
356	addl	ADJK(%a6),%d0	| ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
357	lea	LOGTBL,%a0	| ...BASE ADDRESS OF 1/F AND LOG(F)
358	fmovel	%d0,%fp1		| ...CONVERT K TO FLOATING-POINT FORMAT
359
360|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
361	movel	#0x3FFF0000,X(%a6)	| ...X IS NOW Y, I.E. 2^(-K)*X
362	movel	XFRAC(%a6),FFRAC(%a6)
363	andil	#0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
364	oril	#0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
365	movel	FFRAC(%a6),%d0	| ...READY TO GET ADDRESS OF 1/F
366	andil	#0x7E000000,%d0
367	asrl	#8,%d0
368	asrl	#8,%d0
369	asrl	#4,%d0		| ...SHIFTED 20, D0 IS THE DISPLACEMENT
370	addal	%d0,%a0		| ...A0 IS THE ADDRESS FOR 1/F
371
372	fmovex	X(%a6),%fp0
373	movel	#0x3fff0000,F(%a6)
374	clrl	F+8(%a6)
375	fsubx	F(%a6),%fp0		| ...Y-F
376	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...SAVE FP2 WHILE FP0 IS NOT READY
377|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
378|--REGISTERS SAVED: FPCR, FP1, FP2
379
380LP1CONT1:
381|--AN RE-ENTRY POINT FOR LOGNP1
382	fmulx	(%a0),%fp0	| ...FP0 IS U = (Y-F)/F
383	fmulx	LOGOF2,%fp1	| ...GET K*LOG2 WHILE FP0 IS NOT READY
384	fmovex	%fp0,%fp2
385	fmulx	%fp2,%fp2		| ...FP2 IS V=U*U
386	fmovex	%fp1,KLOG2(%a6)	| ...PUT K*LOG2 IN MEMORY, FREE FP1
387
388|--LOG(1+U) IS APPROXIMATED BY
389|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
390|--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
391
392	fmovex	%fp2,%fp3
393	fmovex	%fp2,%fp1
394
395	fmuld	LOGA6,%fp1	| ...V*A6
396	fmuld	LOGA5,%fp2	| ...V*A5
397
398	faddd	LOGA4,%fp1	| ...A4+V*A6
399	faddd	LOGA3,%fp2	| ...A3+V*A5
400
401	fmulx	%fp3,%fp1		| ...V*(A4+V*A6)
402	fmulx	%fp3,%fp2		| ...V*(A3+V*A5)
403
404	faddd	LOGA2,%fp1	| ...A2+V*(A4+V*A6)
405	faddd	LOGA1,%fp2	| ...A1+V*(A3+V*A5)
406
407	fmulx	%fp3,%fp1		| ...V*(A2+V*(A4+V*A6))
408	addal	#16,%a0		| ...ADDRESS OF LOG(F)
409	fmulx	%fp3,%fp2		| ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
410
411	fmulx	%fp0,%fp1		| ...U*V*(A2+V*(A4+V*A6))
412	faddx	%fp2,%fp0		| ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
413
414	faddx	(%a0),%fp1	| ...LOG(F)+U*V*(A2+V*(A4+V*A6))
415	fmovemx  (%sp)+,%fp2-%fp2/%fp3	| ...RESTORE FP2
416	faddx	%fp1,%fp0		| ...FP0 IS LOG(F) + LOG(1+U)
417
418	fmovel	%d1,%fpcr
419	faddx	KLOG2(%a6),%fp0	| ...FINAL ADD
420	bra	t_frcinx
421
422
423LOGNEAR1:
424|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
425	fmovex	%fp0,%fp1
426	fsubs	one,%fp1		| ...FP1 IS X-1
427	fadds	one,%fp0		| ...FP0 IS X+1
428	faddx	%fp1,%fp1		| ...FP1 IS 2(X-1)
429|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
430|--IN U, U = 2(X-1)/(X+1) = FP1/FP0
431
432LP1CONT2:
433|--THIS IS AN RE-ENTRY POINT FOR LOGNP1
434	fdivx	%fp0,%fp1		| ...FP1 IS U
435	fmovemx %fp2-%fp2/%fp3,-(%sp)	 | ...SAVE FP2
436|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
437|--LET V=U*U, W=V*V, CALCULATE
438|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
439|--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
440	fmovex	%fp1,%fp0
441	fmulx	%fp0,%fp0	| ...FP0 IS V
442	fmovex	%fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
443	fmovex	%fp0,%fp1
444	fmulx	%fp1,%fp1	| ...FP1 IS W
445
446	fmoved	LOGB5,%fp3
447	fmoved	LOGB4,%fp2
448
449	fmulx	%fp1,%fp3	| ...W*B5
450	fmulx	%fp1,%fp2	| ...W*B4
451
452	faddd	LOGB3,%fp3 | ...B3+W*B5
453	faddd	LOGB2,%fp2 | ...B2+W*B4
454
455	fmulx	%fp3,%fp1	| ...W*(B3+W*B5), FP3 RELEASED
456
457	fmulx	%fp0,%fp2	| ...V*(B2+W*B4)
458
459	faddd	LOGB1,%fp1 | ...B1+W*(B3+W*B5)
460	fmulx	SAVEU(%a6),%fp0 | ...FP0 IS U*V
461
462	faddx	%fp2,%fp1	| ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
463	fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
464
465	fmulx	%fp1,%fp0	| ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
466
467	fmovel	%d1,%fpcr
468	faddx	SAVEU(%a6),%fp0
469	bra	t_frcinx
470	rts
471
472LOGNEG:
473|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
474	bra	t_operr
475
476	.global	slognp1d
477slognp1d:
478|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
479| Simply return the denorm
480
481	bra	t_extdnrm
482
483	.global	slognp1
484slognp1:
485|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
486
487	fmovex	(%a0),%fp0	| ...LOAD INPUT
488	fabsx	%fp0		|test magnitude
489	fcmpx	LTHOLD,%fp0	|compare with min threshold
490	fbgt	LP1REAL		|if greater, continue
491	fmovel	#0,%fpsr		|clr N flag from compare
492	fmovel	%d1,%fpcr
493	fmovex	(%a0),%fp0	|return signed argument
494	bra	t_frcinx
495
496LP1REAL:
497	fmovex	(%a0),%fp0	| ...LOAD INPUT
498	movel	#0x00000000,ADJK(%a6)
499	fmovex	%fp0,%fp1	| ...FP1 IS INPUT Z
500	fadds	one,%fp0	| ...X := ROUND(1+Z)
501	fmovex	%fp0,X(%a6)
502	movew	XFRAC(%a6),XDCARE(%a6)
503	movel	X(%a6),%d0
504	cmpil	#0,%d0
505	ble	LP1NEG0	| ...LOG OF ZERO OR -VE
506	cmp2l	BOUNDS2,%d0
507	bcs	LOGMAIN	| ...BOUNDS2 IS [1/2,3/2]
508|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
509|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
510|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
511
512LP1NEAR1:
513|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
514	cmp2l	BOUNDS1,%d0
515	bcss	LP1CARE
516
517LP1ONE16:
518|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
519|--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
520	faddx	%fp1,%fp1	| ...FP1 IS 2Z
521	fadds	one,%fp0	| ...FP0 IS 1+X
522|--U = FP1/FP0
523	bra	LP1CONT2
524
525LP1CARE:
526|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
527|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
528|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
529|--THERE ARE ONLY TWO CASES.
530|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
531|--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
532|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
533|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
534
535	movel	XFRAC(%a6),FFRAC(%a6)
536	andil	#0xFE000000,FFRAC(%a6)
537	oril	#0x01000000,FFRAC(%a6)	| ...F OBTAINED
538	cmpil	#0x3FFF8000,%d0	| ...SEE IF 1+Z > 1
539	bges	KISZERO
540
541KISNEG1:
542	fmoves	TWO,%fp0
543	movel	#0x3fff0000,F(%a6)
544	clrl	F+8(%a6)
545	fsubx	F(%a6),%fp0	| ...2-F
546	movel	FFRAC(%a6),%d0
547	andil	#0x7E000000,%d0
548	asrl	#8,%d0
549	asrl	#8,%d0
550	asrl	#4,%d0		| ...D0 CONTAINS DISPLACEMENT FOR 1/F
551	faddx	%fp1,%fp1		| ...GET 2Z
552	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...SAVE FP2
553	faddx	%fp1,%fp0		| ...FP0 IS Y-F = (2-F)+2Z
554	lea	LOGTBL,%a0	| ...A0 IS ADDRESS OF 1/F
555	addal	%d0,%a0
556	fmoves	negone,%fp1	| ...FP1 IS K = -1
557	bra	LP1CONT1
558
559KISZERO:
560	fmoves	one,%fp0
561	movel	#0x3fff0000,F(%a6)
562	clrl	F+8(%a6)
563	fsubx	F(%a6),%fp0		| ...1-F
564	movel	FFRAC(%a6),%d0
565	andil	#0x7E000000,%d0
566	asrl	#8,%d0
567	asrl	#8,%d0
568	asrl	#4,%d0
569	faddx	%fp1,%fp0		| ...FP0 IS Y-F
570	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...FP2 SAVED
571	lea	LOGTBL,%a0
572	addal	%d0,%a0		| ...A0 IS ADDRESS OF 1/F
573	fmoves	zero,%fp1	| ...FP1 IS K = 0
574	bra	LP1CONT1
575
576LP1NEG0:
577|--FPCR SAVED. D0 IS X IN COMPACT FORM.
578	cmpil	#0,%d0
579	blts	LP1NEG
580LP1ZERO:
581	fmoves	negone,%fp0
582
583	fmovel	%d1,%fpcr
584	bra t_dz
585
586LP1NEG:
587	fmoves	zero,%fp0
588
589	fmovel	%d1,%fpcr
590	bra	t_operr
591
592	|end
593