xref: /openbmc/linux/arch/m68k/ifpsp060/src/fpsp.S (revision 0883c2c0)
1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3M68000 Hi-Performance Microprocessor Division
4M68060 Software Package
5Production Release P1.00 -- October 10, 1994
6
7M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
8
9THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10To the maximum extent permitted by applicable law,
11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13and any warranty against infringement with regard to the SOFTWARE
14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15
16To the maximum extent permitted by applicable law,
17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22
23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24so long as this entire notice is retained without alteration in any modified and/or
25redistributed versions, and that such modified versions are clearly identified as such.
26No licenses are granted by implication, estoppel or otherwise under any patents
27or trademarks of Motorola, Inc.
28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29#
30# freal.s:
31#	This file is appended to the top of the 060FPSP package
32# and contains the entry points into the package. The user, in
33# effect, branches to one of the branch table entries located
34# after _060FPSP_TABLE.
35#	Also, subroutine stubs exist in this file (_fpsp_done for
36# example) that are referenced by the FPSP package itself in order
37# to call a given routine. The stub routine actually performs the
38# callout. The FPSP code does a "bsr" to the stub routine. This
39# extra layer of hierarchy adds a slight performance penalty but
40# it makes the FPSP code easier to read and more mainatinable.
41#
42
43set	_off_bsun,	0x00
44set	_off_snan,	0x04
45set	_off_operr,	0x08
46set	_off_ovfl,	0x0c
47set	_off_unfl,	0x10
48set	_off_dz,	0x14
49set	_off_inex,	0x18
50set	_off_fline,	0x1c
51set	_off_fpu_dis,	0x20
52set	_off_trap,	0x24
53set	_off_trace,	0x28
54set	_off_access,	0x2c
55set	_off_done,	0x30
56
57set	_off_imr,	0x40
58set	_off_dmr,	0x44
59set	_off_dmw,	0x48
60set	_off_irw,	0x4c
61set	_off_irl,	0x50
62set	_off_drb,	0x54
63set	_off_drw,	0x58
64set	_off_drl,	0x5c
65set	_off_dwb,	0x60
66set	_off_dww,	0x64
67set	_off_dwl,	0x68
68
69_060FPSP_TABLE:
70
71###############################################################
72
73# Here's the table of ENTRY POINTS for those linking the package.
74	bra.l		_fpsp_snan
75	short		0x0000
76	bra.l		_fpsp_operr
77	short		0x0000
78	bra.l		_fpsp_ovfl
79	short		0x0000
80	bra.l		_fpsp_unfl
81	short		0x0000
82	bra.l		_fpsp_dz
83	short		0x0000
84	bra.l		_fpsp_inex
85	short		0x0000
86	bra.l		_fpsp_fline
87	short		0x0000
88	bra.l		_fpsp_unsupp
89	short		0x0000
90	bra.l		_fpsp_effadd
91	short		0x0000
92
93	space		56
94
95###############################################################
96	global		_fpsp_done
97_fpsp_done:
98	mov.l		%d0,-(%sp)
99	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
100	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
101	mov.l		0x4(%sp),%d0
102	rtd		&0x4
103
104	global		_real_ovfl
105_real_ovfl:
106	mov.l		%d0,-(%sp)
107	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
108	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
109	mov.l		0x4(%sp),%d0
110	rtd		&0x4
111
112	global		_real_unfl
113_real_unfl:
114	mov.l		%d0,-(%sp)
115	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
116	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
117	mov.l		0x4(%sp),%d0
118	rtd		&0x4
119
120	global		_real_inex
121_real_inex:
122	mov.l		%d0,-(%sp)
123	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
124	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
125	mov.l		0x4(%sp),%d0
126	rtd		&0x4
127
128	global		_real_bsun
129_real_bsun:
130	mov.l		%d0,-(%sp)
131	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
132	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
133	mov.l		0x4(%sp),%d0
134	rtd		&0x4
135
136	global		_real_operr
137_real_operr:
138	mov.l		%d0,-(%sp)
139	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
140	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
141	mov.l		0x4(%sp),%d0
142	rtd		&0x4
143
144	global		_real_snan
145_real_snan:
146	mov.l		%d0,-(%sp)
147	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
148	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
149	mov.l		0x4(%sp),%d0
150	rtd		&0x4
151
152	global		_real_dz
153_real_dz:
154	mov.l		%d0,-(%sp)
155	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
156	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
157	mov.l		0x4(%sp),%d0
158	rtd		&0x4
159
160	global		_real_fline
161_real_fline:
162	mov.l		%d0,-(%sp)
163	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
164	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
165	mov.l		0x4(%sp),%d0
166	rtd		&0x4
167
168	global		_real_fpu_disabled
169_real_fpu_disabled:
170	mov.l		%d0,-(%sp)
171	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
172	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
173	mov.l		0x4(%sp),%d0
174	rtd		&0x4
175
176	global		_real_trap
177_real_trap:
178	mov.l		%d0,-(%sp)
179	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
180	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
181	mov.l		0x4(%sp),%d0
182	rtd		&0x4
183
184	global		_real_trace
185_real_trace:
186	mov.l		%d0,-(%sp)
187	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
188	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
189	mov.l		0x4(%sp),%d0
190	rtd		&0x4
191
192	global		_real_access
193_real_access:
194	mov.l		%d0,-(%sp)
195	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
196	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
197	mov.l		0x4(%sp),%d0
198	rtd		&0x4
199
200#######################################
201
202	global		_imem_read
203_imem_read:
204	mov.l		%d0,-(%sp)
205	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
206	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
207	mov.l		0x4(%sp),%d0
208	rtd		&0x4
209
210	global		_dmem_read
211_dmem_read:
212	mov.l		%d0,-(%sp)
213	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
214	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
215	mov.l		0x4(%sp),%d0
216	rtd		&0x4
217
218	global		_dmem_write
219_dmem_write:
220	mov.l		%d0,-(%sp)
221	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
222	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
223	mov.l		0x4(%sp),%d0
224	rtd		&0x4
225
226	global		_imem_read_word
227_imem_read_word:
228	mov.l		%d0,-(%sp)
229	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
230	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
231	mov.l		0x4(%sp),%d0
232	rtd		&0x4
233
234	global		_imem_read_long
235_imem_read_long:
236	mov.l		%d0,-(%sp)
237	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
238	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
239	mov.l		0x4(%sp),%d0
240	rtd		&0x4
241
242	global		_dmem_read_byte
243_dmem_read_byte:
244	mov.l		%d0,-(%sp)
245	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
246	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
247	mov.l		0x4(%sp),%d0
248	rtd		&0x4
249
250	global		_dmem_read_word
251_dmem_read_word:
252	mov.l		%d0,-(%sp)
253	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
254	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
255	mov.l		0x4(%sp),%d0
256	rtd		&0x4
257
258	global		_dmem_read_long
259_dmem_read_long:
260	mov.l		%d0,-(%sp)
261	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
262	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
263	mov.l		0x4(%sp),%d0
264	rtd		&0x4
265
266	global		_dmem_write_byte
267_dmem_write_byte:
268	mov.l		%d0,-(%sp)
269	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
270	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
271	mov.l		0x4(%sp),%d0
272	rtd		&0x4
273
274	global		_dmem_write_word
275_dmem_write_word:
276	mov.l		%d0,-(%sp)
277	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
278	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
279	mov.l		0x4(%sp),%d0
280	rtd		&0x4
281
282	global		_dmem_write_long
283_dmem_write_long:
284	mov.l		%d0,-(%sp)
285	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
286	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
287	mov.l		0x4(%sp),%d0
288	rtd		&0x4
289
290#
291# This file contains a set of define statements for constants
292# in order to promote readability within the corecode itself.
293#
294
295set LOCAL_SIZE,		192			# stack frame size(bytes)
296set LV,			-LOCAL_SIZE		# stack offset
297
298set EXC_SR,		0x4			# stack status register
299set EXC_PC,		0x6			# stack pc
300set EXC_VOFF,		0xa			# stacked vector offset
301set EXC_EA,		0xc			# stacked <ea>
302
303set EXC_FP,		0x0			# frame pointer
304
305set EXC_AREGS,		-68			# offset of all address regs
306set EXC_DREGS,		-100			# offset of all data regs
307set EXC_FPREGS,		-36			# offset of all fp regs
308
309set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
310set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
311set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
312set EXC_A5,		EXC_AREGS+(5*4)
313set EXC_A4,		EXC_AREGS+(4*4)
314set EXC_A3,		EXC_AREGS+(3*4)
315set EXC_A2,		EXC_AREGS+(2*4)
316set EXC_A1,		EXC_AREGS+(1*4)
317set EXC_A0,		EXC_AREGS+(0*4)
318set EXC_D7,		EXC_DREGS+(7*4)
319set EXC_D6,		EXC_DREGS+(6*4)
320set EXC_D5,		EXC_DREGS+(5*4)
321set EXC_D4,		EXC_DREGS+(4*4)
322set EXC_D3,		EXC_DREGS+(3*4)
323set EXC_D2,		EXC_DREGS+(2*4)
324set EXC_D1,		EXC_DREGS+(1*4)
325set EXC_D0,		EXC_DREGS+(0*4)
326
327set EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
328set EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
329set EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
330
331set FP_SCR1,		LV+80			# fp scratch 1
332set FP_SCR1_EX,		FP_SCR1+0
333set FP_SCR1_SGN,	FP_SCR1+2
334set FP_SCR1_HI,		FP_SCR1+4
335set FP_SCR1_LO,		FP_SCR1+8
336
337set FP_SCR0,		LV+68			# fp scratch 0
338set FP_SCR0_EX,		FP_SCR0+0
339set FP_SCR0_SGN,	FP_SCR0+2
340set FP_SCR0_HI,		FP_SCR0+4
341set FP_SCR0_LO,		FP_SCR0+8
342
343set FP_DST,		LV+56			# fp destination operand
344set FP_DST_EX,		FP_DST+0
345set FP_DST_SGN,		FP_DST+2
346set FP_DST_HI,		FP_DST+4
347set FP_DST_LO,		FP_DST+8
348
349set FP_SRC,		LV+44			# fp source operand
350set FP_SRC_EX,		FP_SRC+0
351set FP_SRC_SGN,		FP_SRC+2
352set FP_SRC_HI,		FP_SRC+4
353set FP_SRC_LO,		FP_SRC+8
354
355set USER_FPIAR,		LV+40			# FP instr address register
356
357set USER_FPSR,		LV+36			# FP status register
358set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
359set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
360set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
361set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
362
363set USER_FPCR,		LV+32			# FP control register
364set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
365set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
366
367set L_SCR3,		LV+28			# integer scratch 3
368set L_SCR2,		LV+24			# integer scratch 2
369set L_SCR1,		LV+20			# integer scratch 1
370
371set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
372
373set EXC_TEMP2,		LV+24			# temporary space
374set EXC_TEMP,		LV+16			# temporary space
375
376set DTAG,		LV+15			# destination operand type
377set STAG,		LV+14			# source operand type
378
379set SPCOND_FLG,		LV+10			# flag: special case (see below)
380
381set EXC_CC,		LV+8			# saved condition codes
382set EXC_EXTWPTR,	LV+4			# saved current PC (active)
383set EXC_EXTWORD,	LV+2			# saved extension word
384set EXC_CMDREG,		LV+2			# saved extension word
385set EXC_OPWORD,		LV+0			# saved operation word
386
387################################
388
389# Helpful macros
390
391set FTEMP,		0			# offsets within an
392set FTEMP_EX,		0			# extended precision
393set FTEMP_SGN,		2			# value saved in memory.
394set FTEMP_HI,		4
395set FTEMP_LO,		8
396set FTEMP_GRS,		12
397
398set LOCAL,		0			# offsets within an
399set LOCAL_EX,		0			# extended precision
400set LOCAL_SGN,		2			# value saved in memory.
401set LOCAL_HI,		4
402set LOCAL_LO,		8
403set LOCAL_GRS,		12
404
405set DST,		0			# offsets within an
406set DST_EX,		0			# extended precision
407set DST_HI,		4			# value saved in memory.
408set DST_LO,		8
409
410set SRC,		0			# offsets within an
411set SRC_EX,		0			# extended precision
412set SRC_HI,		4			# value saved in memory.
413set SRC_LO,		8
414
415set SGL_LO,		0x3f81			# min sgl prec exponent
416set SGL_HI,		0x407e			# max sgl prec exponent
417set DBL_LO,		0x3c01			# min dbl prec exponent
418set DBL_HI,		0x43fe			# max dbl prec exponent
419set EXT_LO,		0x0			# min ext prec exponent
420set EXT_HI,		0x7ffe			# max ext prec exponent
421
422set EXT_BIAS,		0x3fff			# extended precision bias
423set SGL_BIAS,		0x007f			# single precision bias
424set DBL_BIAS,		0x03ff			# double precision bias
425
426set NORM,		0x00			# operand type for STAG/DTAG
427set ZERO,		0x01			# operand type for STAG/DTAG
428set INF,		0x02			# operand type for STAG/DTAG
429set QNAN,		0x03			# operand type for STAG/DTAG
430set DENORM,		0x04			# operand type for STAG/DTAG
431set SNAN,		0x05			# operand type for STAG/DTAG
432set UNNORM,		0x06			# operand type for STAG/DTAG
433
434##################
435# FPSR/FPCR bits #
436##################
437set neg_bit,		0x3			# negative result
438set z_bit,		0x2			# zero result
439set inf_bit,		0x1			# infinite result
440set nan_bit,		0x0			# NAN result
441
442set q_sn_bit,		0x7			# sign bit of quotient byte
443
444set bsun_bit,		7			# branch on unordered
445set snan_bit,		6			# signalling NAN
446set operr_bit,		5			# operand error
447set ovfl_bit,		4			# overflow
448set unfl_bit,		3			# underflow
449set dz_bit,		2			# divide by zero
450set inex2_bit,		1			# inexact result 2
451set inex1_bit,		0			# inexact result 1
452
453set aiop_bit,		7			# accrued inexact operation bit
454set aovfl_bit,		6			# accrued overflow bit
455set aunfl_bit,		5			# accrued underflow bit
456set adz_bit,		4			# accrued dz bit
457set ainex_bit,		3			# accrued inexact bit
458
459#############################
460# FPSR individual bit masks #
461#############################
462set neg_mask,		0x08000000		# negative bit mask (lw)
463set inf_mask,		0x02000000		# infinity bit mask (lw)
464set z_mask,		0x04000000		# zero bit mask (lw)
465set nan_mask,		0x01000000		# nan bit mask (lw)
466
467set neg_bmask,		0x08			# negative bit mask (byte)
468set inf_bmask,		0x02			# infinity bit mask (byte)
469set z_bmask,		0x04			# zero bit mask (byte)
470set nan_bmask,		0x01			# nan bit mask (byte)
471
472set bsun_mask,		0x00008000		# bsun exception mask
473set snan_mask,		0x00004000		# snan exception mask
474set operr_mask,		0x00002000		# operr exception mask
475set ovfl_mask,		0x00001000		# overflow exception mask
476set unfl_mask,		0x00000800		# underflow exception mask
477set dz_mask,		0x00000400		# dz exception mask
478set inex2_mask,		0x00000200		# inex2 exception mask
479set inex1_mask,		0x00000100		# inex1 exception mask
480
481set aiop_mask,		0x00000080		# accrued illegal operation
482set aovfl_mask,		0x00000040		# accrued overflow
483set aunfl_mask,		0x00000020		# accrued underflow
484set adz_mask,		0x00000010		# accrued divide by zero
485set ainex_mask,		0x00000008		# accrued inexact
486
487######################################
488# FPSR combinations used in the FPSP #
489######################################
490set dzinf_mask,		inf_mask+dz_mask+adz_mask
491set opnan_mask,		nan_mask+operr_mask+aiop_mask
492set nzi_mask,		0x01ffffff		#clears N, Z, and I
493set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
494set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
495set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
496set inx1a_mask,		inex1_mask+ainex_mask
497set inx2a_mask,		inex2_mask+ainex_mask
498set snaniop_mask,	nan_mask+snan_mask+aiop_mask
499set snaniop2_mask,	snan_mask+aiop_mask
500set naniop_mask,	nan_mask+aiop_mask
501set neginf_mask,	neg_mask+inf_mask
502set infaiop_mask,	inf_mask+aiop_mask
503set negz_mask,		neg_mask+z_mask
504set opaop_mask,		operr_mask+aiop_mask
505set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
506set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
507
508#########
509# misc. #
510#########
511set rnd_stky_bit,	29			# stky bit pos in longword
512
513set sign_bit,		0x7			# sign bit
514set signan_bit,		0x6			# signalling nan bit
515
516set sgl_thresh,		0x3f81			# minimum sgl exponent
517set dbl_thresh,		0x3c01			# minimum dbl exponent
518
519set x_mode,		0x0			# extended precision
520set s_mode,		0x4			# single precision
521set d_mode,		0x8			# double precision
522
523set rn_mode,		0x0			# round-to-nearest
524set rz_mode,		0x1			# round-to-zero
525set rm_mode,		0x2			# round-tp-minus-infinity
526set rp_mode,		0x3			# round-to-plus-infinity
527
528set mantissalen,	64			# length of mantissa in bits
529
530set BYTE,		1			# len(byte) == 1 byte
531set WORD,		2			# len(word) == 2 bytes
532set LONG,		4			# len(longword) == 2 bytes
533
534set BSUN_VEC,		0xc0			# bsun    vector offset
535set INEX_VEC,		0xc4			# inexact vector offset
536set DZ_VEC,		0xc8			# dz      vector offset
537set UNFL_VEC,		0xcc			# unfl    vector offset
538set OPERR_VEC,		0xd0			# operr   vector offset
539set OVFL_VEC,		0xd4			# ovfl    vector offset
540set SNAN_VEC,		0xd8			# snan    vector offset
541
542###########################
543# SPecial CONDition FLaGs #
544###########################
545set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
546set fbsun_flg,		0x02			# flag bit: bsun exception
547set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
548set mda7_flg,		0x08			# flag bit: -(a7) <ea>
549set fmovm_flg,		0x40			# flag bit: fmovm instruction
550set immed_flg,		0x80			# flag bit: &<data> <ea>
551
552set ftrapcc_bit,	0x0
553set fbsun_bit,		0x1
554set mia7_bit,		0x2
555set mda7_bit,		0x3
556set immed_bit,		0x7
557
558##################################
559# TRANSCENDENTAL "LAST-OP" FLAGS #
560##################################
561set FMUL_OP,		0x0			# fmul instr performed last
562set FDIV_OP,		0x1			# fdiv performed last
563set FADD_OP,		0x2			# fadd performed last
564set FMOV_OP,		0x3			# fmov performed last
565
566#############
567# CONSTANTS #
568#############
569T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
570T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
571
572PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
573PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
574
575TWOBYPI:
576	long		0x3FE45F30,0x6DC9C883
577
578#########################################################################
579# XDEF ****************************************************************	#
580#	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
581#									#
582#	This handler should be the first code executed upon taking the	#
583#	FP Overflow exception in an operating system.			#
584#									#
585# XREF ****************************************************************	#
586#	_imem_read_long() - read instruction longword			#
587#	fix_skewed_ops() - adjust src operand in fsave frame		#
588#	set_tag_x() - determine optype of src/dst operands		#
589#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
590#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
591#	load_fpn2() - load dst operand from FP regfile			#
592#	fout() - emulate an opclass 3 instruction			#
593#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
594#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
595#	_real_ovfl() - "callout" for Overflow exception enabled code	#
596#	_real_inex() - "callout" for Inexact exception enabled code	#
597#	_real_trace() - "callout" for Trace exception code		#
598#									#
599# INPUT ***************************************************************	#
600#	- The system stack contains the FP Ovfl exception stack frame	#
601#	- The fsave frame contains the source operand			#
602#									#
603# OUTPUT **************************************************************	#
604#	Overflow Exception enabled:					#
605#	- The system stack is unchanged					#
606#	- The fsave frame contains the adjusted src op for opclass 0,2	#
607#	Overflow Exception disabled:					#
608#	- The system stack is unchanged					#
609#	- The "exception present" flag in the fsave frame is cleared	#
610#									#
611# ALGORITHM ***********************************************************	#
612#	On the 060, if an FP overflow is present as the result of any	#
613# instruction, the 060 will take an overflow exception whether the	#
614# exception is enabled or disabled in the FPCR. For the disabled case,	#
615# This handler emulates the instruction to determine what the correct	#
616# default result should be for the operation. This default result is	#
617# then stored in either the FP regfile, data regfile, or memory.	#
618# Finally, the handler exits through the "callout" _fpsp_done()		#
619# denoting that no exceptional conditions exist within the machine.	#
620#	If the exception is enabled, then this handler must create the	#
621# exceptional operand and plave it in the fsave state frame, and store	#
622# the default result (only if the instruction is opclass 3). For	#
623# exceptions enabled, this handler must exit through the "callout"	#
624# _real_ovfl() so that the operating system enabled overflow handler	#
625# can handle this case.							#
626#	Two other conditions exist. First, if overflow was disabled	#
627# but the inexact exception was enabled, this handler must exit		#
628# through the "callout" _real_inex() regardless of whether the result	#
629# was inexact.								#
630#	Also, in the case of an opclass three instruction where		#
631# overflow was disabled and the trace exception was enabled, this	#
632# handler must exit through the "callout" _real_trace().		#
633#									#
634#########################################################################
635
636	global		_fpsp_ovfl
637_fpsp_ovfl:
638
639#$#	sub.l		&24,%sp			# make room for src/dst
640
641	link.w		%a6,&-LOCAL_SIZE	# init stack frame
642
643	fsave		FP_SRC(%a6)		# grab the "busy" frame
644
645	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
646	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
647	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
648
649# the FPIAR holds the "current PC" of the faulting instruction
650	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
651	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
652	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
653	bsr.l		_imem_read_long		# fetch the instruction words
654	mov.l		%d0,EXC_OPWORD(%a6)
655
656##############################################################################
657
658	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
659	bne.w		fovfl_out
660
661
662	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
663	bsr.l		fix_skewed_ops		# fix src op
664
665# since, I believe, only NORMs and DENORMs can come through here,
666# maybe we can avoid the subroutine call.
667	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
668	bsr.l		set_tag_x		# tag the operand type
669	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
670
671# bit five of the fp extension word separates the monadic and dyadic operations
672# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
673# will never take this exception.
674	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
675	beq.b		fovfl_extract		# monadic
676
677	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
678	bsr.l		load_fpn2		# load dst into FP_DST
679
680	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
681	bsr.l		set_tag_x		# tag the operand type
682	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
683	bne.b		fovfl_op2_done		# no
684	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
685fovfl_op2_done:
686	mov.b		%d0,DTAG(%a6)		# save dst optype tag
687
688fovfl_extract:
689
690#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
691#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
692#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
693#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
694#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
695#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
696
697	clr.l		%d0
698	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
699
700	mov.b		1+EXC_CMDREG(%a6),%d1
701	andi.w		&0x007f,%d1		# extract extension
702
703	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
704
705	fmov.l		&0x0,%fpcr		# zero current control regs
706	fmov.l		&0x0,%fpsr
707
708	lea		FP_SRC(%a6),%a0
709	lea		FP_DST(%a6),%a1
710
711# maybe we can make these entry points ONLY the OVFL entry points of each routine.
712	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
713	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
714
715# the operation has been emulated. the result is in fp0.
716# the EXOP, if an exception occurred, is in fp1.
717# we must save the default result regardless of whether
718# traps are enabled or disabled.
719	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
720	bsr.l		store_fpreg
721
722# the exceptional possibilities we have left ourselves with are ONLY overflow
723# and inexact. and, the inexact is such that overflow occurred and was disabled
724# but inexact was enabled.
725	btst		&ovfl_bit,FPCR_ENABLE(%a6)
726	bne.b		fovfl_ovfl_on
727
728	btst		&inex2_bit,FPCR_ENABLE(%a6)
729	bne.b		fovfl_inex_on
730
731	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
732	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
733	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
734
735	unlk		%a6
736#$#	add.l		&24,%sp
737	bra.l		_fpsp_done
738
739# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
740# in fp1. now, simply jump to _real_ovfl()!
741fovfl_ovfl_on:
742	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
743
744	mov.w		&0xe005,2+FP_SRC(%a6)	# save exc status
745
746	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
747	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
748	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
749
750	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
751
752	unlk		%a6
753
754	bra.l		_real_ovfl
755
756# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
757# we must jump to real_inex().
758fovfl_inex_on:
759
760	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
761
762	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
763	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
764
765	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
766	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
767	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
768
769	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
770
771	unlk		%a6
772
773	bra.l		_real_inex
774
775########################################################################
776fovfl_out:
777
778
779#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
780#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
781#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
782
783# the src operand is definitely a NORM(!), so tag it as such
784	mov.b		&NORM,STAG(%a6)		# set src optype tag
785
786	clr.l		%d0
787	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
788
789	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
790
791	fmov.l		&0x0,%fpcr		# zero current control regs
792	fmov.l		&0x0,%fpsr
793
794	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
795
796	bsr.l		fout
797
798	btst		&ovfl_bit,FPCR_ENABLE(%a6)
799	bne.w		fovfl_ovfl_on
800
801	btst		&inex2_bit,FPCR_ENABLE(%a6)
802	bne.w		fovfl_inex_on
803
804	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
805	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
806	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
807
808	unlk		%a6
809#$#	add.l		&24,%sp
810
811	btst		&0x7,(%sp)		# is trace on?
812	beq.l		_fpsp_done		# no
813
814	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
815	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
816	bra.l		_real_trace
817
818#########################################################################
819# XDEF ****************************************************************	#
820#	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
821#									#
822#	This handler should be the first code executed upon taking the	#
823#	FP Underflow exception in an operating system.			#
824#									#
825# XREF ****************************************************************	#
826#	_imem_read_long() - read instruction longword			#
827#	fix_skewed_ops() - adjust src operand in fsave frame		#
828#	set_tag_x() - determine optype of src/dst operands		#
829#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
830#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
831#	load_fpn2() - load dst operand from FP regfile			#
832#	fout() - emulate an opclass 3 instruction			#
833#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
834#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
835#	_real_ovfl() - "callout" for Overflow exception enabled code	#
836#	_real_inex() - "callout" for Inexact exception enabled code	#
837#	_real_trace() - "callout" for Trace exception code		#
838#									#
839# INPUT ***************************************************************	#
840#	- The system stack contains the FP Unfl exception stack frame	#
841#	- The fsave frame contains the source operand			#
842#									#
843# OUTPUT **************************************************************	#
844#	Underflow Exception enabled:					#
845#	- The system stack is unchanged					#
846#	- The fsave frame contains the adjusted src op for opclass 0,2	#
847#	Underflow Exception disabled:					#
848#	- The system stack is unchanged					#
849#	- The "exception present" flag in the fsave frame is cleared	#
850#									#
851# ALGORITHM ***********************************************************	#
852#	On the 060, if an FP underflow is present as the result of any	#
853# instruction, the 060 will take an underflow exception whether the	#
854# exception is enabled or disabled in the FPCR. For the disabled case,	#
855# This handler emulates the instruction to determine what the correct	#
856# default result should be for the operation. This default result is	#
857# then stored in either the FP regfile, data regfile, or memory.	#
858# Finally, the handler exits through the "callout" _fpsp_done()		#
859# denoting that no exceptional conditions exist within the machine.	#
860#	If the exception is enabled, then this handler must create the	#
861# exceptional operand and plave it in the fsave state frame, and store	#
862# the default result (only if the instruction is opclass 3). For	#
863# exceptions enabled, this handler must exit through the "callout"	#
864# _real_unfl() so that the operating system enabled overflow handler	#
865# can handle this case.							#
866#	Two other conditions exist. First, if underflow was disabled	#
867# but the inexact exception was enabled and the result was inexact,	#
868# this handler must exit through the "callout" _real_inex().		#
869# was inexact.								#
870#	Also, in the case of an opclass three instruction where		#
871# underflow was disabled and the trace exception was enabled, this	#
872# handler must exit through the "callout" _real_trace().		#
873#									#
874#########################################################################
875
876	global		_fpsp_unfl
877_fpsp_unfl:
878
879#$#	sub.l		&24,%sp			# make room for src/dst
880
881	link.w		%a6,&-LOCAL_SIZE	# init stack frame
882
883	fsave		FP_SRC(%a6)		# grab the "busy" frame
884
885	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
886	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
887	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
888
889# the FPIAR holds the "current PC" of the faulting instruction
890	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
891	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
892	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
893	bsr.l		_imem_read_long		# fetch the instruction words
894	mov.l		%d0,EXC_OPWORD(%a6)
895
896##############################################################################
897
898	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
899	bne.w		funfl_out
900
901
902	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
903	bsr.l		fix_skewed_ops		# fix src op
904
905	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
906	bsr.l		set_tag_x		# tag the operand type
907	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
908
909# bit five of the fp ext word separates the monadic and dyadic operations
910# that can pass through fpsp_unfl(). remember that fcmp, and ftst
911# will never take this exception.
912	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
913	beq.b		funfl_extract		# monadic
914
915# now, what's left that's not dyadic is fsincos. we can distinguish it
916# from all dyadics by the '0110xxx pattern
917	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
918	bne.b		funfl_extract		# yes
919
920	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
921	bsr.l		load_fpn2		# load dst into FP_DST
922
923	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
924	bsr.l		set_tag_x		# tag the operand type
925	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
926	bne.b		funfl_op2_done		# no
927	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
928funfl_op2_done:
929	mov.b		%d0,DTAG(%a6)		# save dst optype tag
930
931funfl_extract:
932
933#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
934#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
935#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
936#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
937#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
938#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
939
940	clr.l		%d0
941	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
942
943	mov.b		1+EXC_CMDREG(%a6),%d1
944	andi.w		&0x007f,%d1		# extract extension
945
946	andi.l		&0x00ff01ff,USER_FPSR(%a6)
947
948	fmov.l		&0x0,%fpcr		# zero current control regs
949	fmov.l		&0x0,%fpsr
950
951	lea		FP_SRC(%a6),%a0
952	lea		FP_DST(%a6),%a1
953
954# maybe we can make these entry points ONLY the OVFL entry points of each routine.
955	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
956	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
957
958	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
959	bsr.l		store_fpreg
960
961# The `060 FPU multiplier hardware is such that if the result of a
962# multiply operation is the smallest possible normalized number
963# (0x00000000_80000000_00000000), then the machine will take an
964# underflow exception. Since this is incorrect, we need to check
965# if our emulation, after re-doing the operation, decided that
966# no underflow was called for. We do these checks only in
967# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
968# special case will simply exit gracefully with the correct result.
969
970# the exceptional possibilities we have left ourselves with are ONLY overflow
971# and inexact. and, the inexact is such that overflow occurred and was disabled
972# but inexact was enabled.
973	btst		&unfl_bit,FPCR_ENABLE(%a6)
974	bne.b		funfl_unfl_on
975
976funfl_chkinex:
977	btst		&inex2_bit,FPCR_ENABLE(%a6)
978	bne.b		funfl_inex_on
979
980funfl_exit:
981	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
982	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
983	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
984
985	unlk		%a6
986#$#	add.l		&24,%sp
987	bra.l		_fpsp_done
988
989# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
990# in fp1 (don't forget to save fp0). what to do now?
991# well, we simply have to get to go to _real_unfl()!
992funfl_unfl_on:
993
994# The `060 FPU multiplier hardware is such that if the result of a
995# multiply operation is the smallest possible normalized number
996# (0x00000000_80000000_00000000), then the machine will take an
997# underflow exception. Since this is incorrect, we check here to see
998# if our emulation, after re-doing the operation, decided that
999# no underflow was called for.
1000	btst		&unfl_bit,FPSR_EXCEPT(%a6)
1001	beq.w		funfl_chkinex
1002
1003funfl_unfl_on2:
1004	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
1005
1006	mov.w		&0xe003,2+FP_SRC(%a6)	# save exc status
1007
1008	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1009	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1010	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1011
1012	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1013
1014	unlk		%a6
1015
1016	bra.l		_real_unfl
1017
1018# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1019# we must jump to real_inex().
1020funfl_inex_on:
1021
1022# The `060 FPU multiplier hardware is such that if the result of a
1023# multiply operation is the smallest possible normalized number
1024# (0x00000000_80000000_00000000), then the machine will take an
1025# underflow exception.
1026# But, whether bogus or not, if inexact is enabled AND it occurred,
1027# then we have to branch to real_inex.
1028
1029	btst		&inex2_bit,FPSR_EXCEPT(%a6)
1030	beq.w		funfl_exit
1031
1032funfl_inex_on2:
1033
1034	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to stack
1035
1036	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
1037	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
1038
1039	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1040	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1041	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1042
1043	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1044
1045	unlk		%a6
1046
1047	bra.l		_real_inex
1048
1049#######################################################################
1050funfl_out:
1051
1052
1053#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1054#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1055#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1056
1057# the src operand is definitely a NORM(!), so tag it as such
1058	mov.b		&NORM,STAG(%a6)		# set src optype tag
1059
1060	clr.l		%d0
1061	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
1062
1063	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1064
1065	fmov.l		&0x0,%fpcr		# zero current control regs
1066	fmov.l		&0x0,%fpsr
1067
1068	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1069
1070	bsr.l		fout
1071
1072	btst		&unfl_bit,FPCR_ENABLE(%a6)
1073	bne.w		funfl_unfl_on2
1074
1075	btst		&inex2_bit,FPCR_ENABLE(%a6)
1076	bne.w		funfl_inex_on2
1077
1078	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1079	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1080	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1081
1082	unlk		%a6
1083#$#	add.l		&24,%sp
1084
1085	btst		&0x7,(%sp)		# is trace on?
1086	beq.l		_fpsp_done		# no
1087
1088	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
1089	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
1090	bra.l		_real_trace
1091
1092#########################################################################
1093# XDEF ****************************************************************	#
1094#	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
1095#		        Data Type" exception.				#
1096#									#
1097#	This handler should be the first code executed upon taking the	#
1098#	FP Unimplemented Data Type exception in an operating system.	#
1099#									#
1100# XREF ****************************************************************	#
1101#	_imem_read_{word,long}() - read instruction word/longword	#
1102#	fix_skewed_ops() - adjust src operand in fsave frame		#
1103#	set_tag_x() - determine optype of src/dst operands		#
1104#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
1105#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
1106#	load_fpn2() - load dst operand from FP regfile			#
1107#	load_fpn1() - load src operand from FP regfile			#
1108#	fout() - emulate an opclass 3 instruction			#
1109#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
1110#	_real_inex() - "callout" to operating system inexact handler	#
1111#	_fpsp_done() - "callout" for exit; work all done		#
1112#	_real_trace() - "callout" for Trace enabled exception		#
1113#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
1114#	_real_snan() - "callout" for SNAN exception			#
1115#	_real_operr() - "callout" for OPERR exception			#
1116#	_real_ovfl() - "callout" for OVFL exception			#
1117#	_real_unfl() - "callout" for UNFL exception			#
1118#	get_packed() - fetch packed operand from memory			#
1119#									#
1120# INPUT ***************************************************************	#
1121#	- The system stack contains the "Unimp Data Type" stk frame	#
1122#	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
1123#									#
1124# OUTPUT **************************************************************	#
1125#	If Inexact exception (opclass 3):				#
1126#	- The system stack is changed to an Inexact exception stk frame	#
1127#	If SNAN exception (opclass 3):					#
1128#	- The system stack is changed to an SNAN exception stk frame	#
1129#	If OPERR exception (opclass 3):					#
1130#	- The system stack is changed to an OPERR exception stk frame	#
1131#	If OVFL exception (opclass 3):					#
1132#	- The system stack is changed to an OVFL exception stk frame	#
1133#	If UNFL exception (opclass 3):					#
1134#	- The system stack is changed to an UNFL exception stack frame	#
1135#	If Trace exception enabled:					#
1136#	- The system stack is changed to a Trace exception stack frame	#
1137#	Else: (normal case)						#
1138#	- Correct result has been stored as appropriate			#
1139#									#
1140# ALGORITHM ***********************************************************	#
1141#	Two main instruction types can enter here: (1) DENORM or UNNORM	#
1142# unimplemented data types. These can be either opclass 0,2 or 3	#
1143# instructions, and (2) PACKED unimplemented data format instructions	#
1144# also of opclasses 0,2, or 3.						#
1145#	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
1146# operand from the fsave state frame and the dst operand (if dyadic)	#
1147# from the FP register file. The instruction is then emulated by	#
1148# choosing an emulation routine from a table of routines indexed by	#
1149# instruction type. Once the instruction has been emulated and result	#
1150# saved, then we check to see if any enabled exceptions resulted from	#
1151# instruction emulation. If none, then we exit through the "callout"	#
1152# _fpsp_done(). If there is an enabled FP exception, then we insert	#
1153# this exception into the FPU in the fsave state frame and then exit	#
1154# through _fpsp_done().							#
1155#	PACKED opclass 0 and 2 is similar in how the instruction is	#
1156# emulated and exceptions handled. The differences occur in how the	#
1157# handler loads the packed op (by calling get_packed() routine) and	#
1158# by the fact that a Trace exception could be pending for PACKED ops.	#
1159# If a Trace exception is pending, then the current exception stack	#
1160# frame is changed to a Trace exception stack frame and an exit is	#
1161# made through _real_trace().						#
1162#	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
1163# performed by calling the routine fout(). If no exception should occur	#
1164# as the result of emulation, then an exit either occurs through	#
1165# _fpsp_done() or through _real_trace() if a Trace exception is pending	#
1166# (a Trace stack frame must be created here, too). If an FP exception	#
1167# should occur, then we must create an exception stack frame of that	#
1168# type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
1169# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3	#
1170# emulation is performed in a similar manner.				#
1171#									#
1172#########################################################################
1173
1174#
1175# (1) DENORM and UNNORM (unimplemented) data types:
1176#
1177#				post-instruction
1178#				*****************
1179#				*      EA	*
1180#	 pre-instruction	*		*
1181#	*****************	*****************
1182#	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
1183#	*****************	*****************
1184#	*     Next	*	*     Next	*
1185#	*      PC	*	*      PC	*
1186#	*****************	*****************
1187#	*      SR	*	*      SR	*
1188#	*****************	*****************
1189#
1190# (2) PACKED format (unsupported) opclasses two and three:
1191#	*****************
1192#	*      EA	*
1193#	*		*
1194#	*****************
1195#	* 0x2 *  0x0dc	*
1196#	*****************
1197#	*     Next	*
1198#	*      PC	*
1199#	*****************
1200#	*      SR	*
1201#	*****************
1202#
1203	global		_fpsp_unsupp
1204_fpsp_unsupp:
1205
1206	link.w		%a6,&-LOCAL_SIZE	# init stack frame
1207
1208	fsave		FP_SRC(%a6)		# save fp state
1209
1210	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
1211	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1212	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
1213
1214	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
1215	bne.b		fu_s
1216fu_u:
1217	mov.l		%usp,%a0		# fetch user stack pointer
1218	mov.l		%a0,EXC_A7(%a6)		# save on stack
1219	bra.b		fu_cont
1220# if the exception is an opclass zero or two unimplemented data type
1221# exception, then the a7' calculated here is wrong since it doesn't
1222# stack an ea. however, we don't need an a7' for this case anyways.
1223fu_s:
1224	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
1225	mov.l		%a0,EXC_A7(%a6)		# save on stack
1226
1227fu_cont:
1228
1229# the FPIAR holds the "current PC" of the faulting instruction
1230# the FPIAR should be set correctly for ALL exceptions passing through
1231# this point.
1232	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1233	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
1234	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
1235	bsr.l		_imem_read_long		# fetch the instruction words
1236	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
1237
1238############################
1239
1240	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
1241
1242# Separate opclass three (fpn-to-mem) ops since they have a different
1243# stack frame and protocol.
1244	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
1245	bne.w		fu_out			# yes
1246
1247# Separate packed opclass two instructions.
1248	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
1249	cmpi.b		%d0,&0x13
1250	beq.w		fu_in_pack
1251
1252
1253# I'm not sure at this point what FPSR bits are valid for this instruction.
1254# so, since the emulation routines re-create them anyways, zero exception field
1255	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
1256
1257	fmov.l		&0x0,%fpcr		# zero current control regs
1258	fmov.l		&0x0,%fpsr
1259
1260# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1261# precision format if the src format was single or double and the
1262# source data type was an INF, NAN, DENORM, or UNNORM
1263	lea		FP_SRC(%a6),%a0		# pass ptr to input
1264	bsr.l		fix_skewed_ops
1265
1266# we don't know whether the src operand or the dst operand (or both) is the
1267# UNNORM or DENORM. call the function that tags the operand type. if the
1268# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1269	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
1270	bsr.l		set_tag_x		# tag the operand type
1271	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1272	bne.b		fu_op2			# no
1273	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1274
1275fu_op2:
1276	mov.b		%d0,STAG(%a6)		# save src optype tag
1277
1278	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1279
1280# bit five of the fp extension word separates the monadic and dyadic operations
1281# at this point
1282	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1283	beq.b		fu_extract		# monadic
1284	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1285	beq.b		fu_extract		# yes, so it's monadic, too
1286
1287	bsr.l		load_fpn2		# load dst into FP_DST
1288
1289	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1290	bsr.l		set_tag_x		# tag the operand type
1291	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1292	bne.b		fu_op2_done		# no
1293	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1294fu_op2_done:
1295	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1296
1297fu_extract:
1298	clr.l		%d0
1299	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1300
1301	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1302
1303	lea		FP_SRC(%a6),%a0
1304	lea		FP_DST(%a6),%a1
1305
1306	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1307	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1308
1309#
1310# Exceptions in order of precedence:
1311#	BSUN	: none
1312#	SNAN	: all dyadic ops
1313#	OPERR	: fsqrt(-NORM)
1314#	OVFL	: all except ftst,fcmp
1315#	UNFL	: all except ftst,fcmp
1316#	DZ	: fdiv
1317#	INEX2	: all except ftst,fcmp
1318#	INEX1	: none (packed doesn't go through here)
1319#
1320
1321# we determine the highest priority exception(if any) set by the
1322# emulation routine that has also been enabled by the user.
1323	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
1324	bne.b		fu_in_ena		# some are enabled
1325
1326fu_in_cont:
1327# fcmp and ftst do not store any result.
1328	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1329	andi.b		&0x38,%d0		# extract bits 3-5
1330	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1331	beq.b		fu_in_exit		# yes
1332
1333	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1334	bsr.l		store_fpreg		# store the result
1335
1336fu_in_exit:
1337
1338	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1339	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1340	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1341
1342	unlk		%a6
1343
1344	bra.l		_fpsp_done
1345
1346fu_in_ena:
1347	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1348	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1349	bne.b		fu_in_exc		# there is at least one set
1350
1351#
1352# No exceptions occurred that were also enabled. Now:
1353#
1354#	if (OVFL && ovfl_disabled && inexact_enabled) {
1355#	    branch to _real_inex() (even if the result was exact!);
1356#	} else {
1357#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1358#	    return;
1359#	}
1360#
1361	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1362	beq.b		fu_in_cont		# no
1363
1364fu_in_ovflchk:
1365	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1366	beq.b		fu_in_cont		# no
1367	bra.w		fu_in_exc_ovfl		# go insert overflow frame
1368
1369#
1370# An exception occurred and that exception was enabled:
1371#
1372#	shift enabled exception field into lo byte of d0;
1373#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1374#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1375#		/*
1376#		 * this is the case where we must call _real_inex() now or else
1377#		 * there will be no other way to pass it the exceptional operand
1378#		 */
1379#		call _real_inex();
1380#	} else {
1381#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1382#	}
1383#
1384fu_in_exc:
1385	subi.l		&24,%d0			# fix offset to be 0-8
1386	cmpi.b		%d0,&0x6		# is exception INEX? (6)
1387	bne.b		fu_in_exc_exit		# no
1388
1389# the enabled exception was inexact
1390	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1391	bne.w		fu_in_exc_unfl		# yes
1392	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1393	bne.w		fu_in_exc_ovfl		# yes
1394
1395# here, we insert the correct fsave status value into the fsave frame for the
1396# corresponding exception. the operand in the fsave frame should be the original
1397# src operand.
1398fu_in_exc_exit:
1399	mov.l		%d0,-(%sp)		# save d0
1400	bsr.l		funimp_skew		# skew sgl or dbl inputs
1401	mov.l		(%sp)+,%d0		# restore d0
1402
1403	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1404
1405	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1406	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1407	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1408
1409	frestore	FP_SRC(%a6)		# restore src op
1410
1411	unlk		%a6
1412
1413	bra.l		_fpsp_done
1414
1415tbl_except:
1416	short		0xe000,0xe006,0xe004,0xe005
1417	short		0xe003,0xe002,0xe001,0xe001
1418
1419fu_in_exc_unfl:
1420	mov.w		&0x4,%d0
1421	bra.b		fu_in_exc_exit
1422fu_in_exc_ovfl:
1423	mov.w		&0x03,%d0
1424	bra.b		fu_in_exc_exit
1425
1426# If the input operand to this operation was opclass two and a single
1427# or double precision denorm, inf, or nan, the operand needs to be
1428# "corrected" in order to have the proper equivalent extended precision
1429# number.
1430	global		fix_skewed_ops
1431fix_skewed_ops:
1432	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1433	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
1434	beq.b		fso_sgl			# yes
1435	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
1436	beq.b		fso_dbl			# yes
1437	rts					# no
1438
1439fso_sgl:
1440	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1441	andi.w		&0x7fff,%d0		# strip sign
1442	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
1443	beq.b		fso_sgl_dnrm_zero	# yes
1444	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
1445	beq.b		fso_infnan		# yes
1446	rts					# no
1447
1448fso_sgl_dnrm_zero:
1449	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1450	beq.b		fso_zero		# it's a skewed zero
1451fso_sgl_dnrm:
1452# here, we count on norm not to alter a0...
1453	bsr.l		norm			# normalize mantissa
1454	neg.w		%d0			# -shft amt
1455	addi.w		&0x3f81,%d0		# adjust new exponent
1456	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1457	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1458	rts
1459
1460fso_zero:
1461	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
1462	rts
1463
1464fso_infnan:
1465	andi.b		&0x7f,LOCAL_HI(%a0)	# clear j-bit
1466	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
1467	rts
1468
1469fso_dbl:
1470	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1471	andi.w		&0x7fff,%d0		# strip sign
1472	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
1473	beq.b		fso_dbl_dnrm_zero	# yes
1474	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
1475	beq.b		fso_infnan		# yes
1476	rts					# no
1477
1478fso_dbl_dnrm_zero:
1479	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1480	bne.b		fso_dbl_dnrm		# it's a skewed denorm
1481	tst.l		LOCAL_LO(%a0)		# is it a zero?
1482	beq.b		fso_zero		# yes
1483fso_dbl_dnrm:
1484# here, we count on norm not to alter a0...
1485	bsr.l		norm			# normalize mantissa
1486	neg.w		%d0			# -shft amt
1487	addi.w		&0x3c01,%d0		# adjust new exponent
1488	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1489	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1490	rts
1491
1492#################################################################
1493
1494# fmove out took an unimplemented data type exception.
1495# the src operand is in FP_SRC. Call _fout() to write out the result and
1496# to determine which exceptions, if any, to take.
1497fu_out:
1498
1499# Separate packed move outs from the UNNORM and DENORM move outs.
1500	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
1501	cmpi.b		%d0,&0x3
1502	beq.w		fu_out_pack
1503	cmpi.b		%d0,&0x7
1504	beq.w		fu_out_pack
1505
1506
1507# I'm not sure at this point what FPSR bits are valid for this instruction.
1508# so, since the emulation routines re-create them anyways, zero exception field.
1509# fmove out doesn't affect ccodes.
1510	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
1511
1512	fmov.l		&0x0,%fpcr		# zero current control regs
1513	fmov.l		&0x0,%fpsr
1514
1515# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1516# call here. just figure out what it is...
1517	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
1518	andi.w		&0x7fff,%d0		# strip sign
1519	beq.b		fu_out_denorm		# it's a DENORM
1520
1521	lea		FP_SRC(%a6),%a0
1522	bsr.l		unnorm_fix		# yes; fix it
1523
1524	mov.b		%d0,STAG(%a6)
1525
1526	bra.b		fu_out_cont
1527fu_out_denorm:
1528	mov.b		&DENORM,STAG(%a6)
1529fu_out_cont:
1530
1531	clr.l		%d0
1532	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1533
1534	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1535
1536	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
1537	bsr.l		fout			# call fmove out routine
1538
1539# Exceptions in order of precedence:
1540#	BSUN	: none
1541#	SNAN	: none
1542#	OPERR	: fmove.{b,w,l} out of large UNNORM
1543#	OVFL	: fmove.{s,d}
1544#	UNFL	: fmove.{s,d,x}
1545#	DZ	: none
1546#	INEX2	: all
1547#	INEX1	: none (packed doesn't travel through here)
1548
1549# determine the highest priority exception(if any) set by the
1550# emulation routine that has also been enabled by the user.
1551	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1552	bne.w		fu_out_ena		# some are enabled
1553
1554fu_out_done:
1555
1556	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
1557
1558# on extended precision opclass three instructions using pre-decrement or
1559# post-increment addressing mode, the address register is not updated. is the
1560# address register was the stack pointer used from user mode, then let's update
1561# it here. if it was used from supervisor mode, then we have to handle this
1562# as a special case.
1563	btst		&0x5,EXC_SR(%a6)
1564	bne.b		fu_out_done_s
1565
1566	mov.l		EXC_A7(%a6),%a0		# restore a7
1567	mov.l		%a0,%usp
1568
1569fu_out_done_cont:
1570	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1571	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1572	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1573
1574	unlk		%a6
1575
1576	btst		&0x7,(%sp)		# is trace on?
1577	bne.b		fu_out_trace		# yes
1578
1579	bra.l		_fpsp_done
1580
1581# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1582# ("fmov.x fpm,-(a7)") if so,
1583fu_out_done_s:
1584	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
1585	bne.b		fu_out_done_cont
1586
1587# the extended precision result is still in fp0. but, we need to save it
1588# somewhere on the stack until we can copy it to its final resting place.
1589# here, we're counting on the top of the stack to be the old place-holders
1590# for fp0/fp1 which have already been restored. that way, we can write
1591# over those destinations with the shifted stack frame.
1592	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1593
1594	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1595	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1596	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1597
1598	mov.l		(%a6),%a6		# restore frame pointer
1599
1600	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1601	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1602
1603# now, copy the result to the proper place on the stack
1604	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1605	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1606	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1607
1608	add.l		&LOCAL_SIZE-0x8,%sp
1609
1610	btst		&0x7,(%sp)
1611	bne.b		fu_out_trace
1612
1613	bra.l		_fpsp_done
1614
1615fu_out_ena:
1616	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1617	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1618	bne.b		fu_out_exc		# there is at least one set
1619
1620# no exceptions were set.
1621# if a disabled overflow occurred and inexact was enabled but the result
1622# was exact, then a branch to _real_inex() is made.
1623	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1624	beq.w		fu_out_done		# no
1625
1626fu_out_ovflchk:
1627	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1628	beq.w		fu_out_done		# no
1629	bra.w		fu_inex			# yes
1630
1631#
1632# The fp move out that took the "Unimplemented Data Type" exception was
1633# being traced. Since the stack frames are similar, get the "current" PC
1634# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1635#
1636#		  UNSUPP FRAME		   TRACE FRAME
1637#		*****************	*****************
1638#		*      EA	*	*    Current	*
1639#		*		*	*      PC	*
1640#		*****************	*****************
1641#		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
1642#		*****************	*****************
1643#		*     Next	*	*     Next	*
1644#		*      PC	*	*      PC	*
1645#		*****************	*****************
1646#		*      SR	*	*      SR	*
1647#		*****************	*****************
1648#
1649fu_out_trace:
1650	mov.w		&0x2024,0x6(%sp)
1651	fmov.l		%fpiar,0x8(%sp)
1652	bra.l		_real_trace
1653
1654# an exception occurred and that exception was enabled.
1655fu_out_exc:
1656	subi.l		&24,%d0			# fix offset to be 0-8
1657
1658# we don't mess with the existing fsave frame. just re-insert it and
1659# jump to the "_real_{}()" handler...
1660	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
1661	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
1662
1663	swbeg		&0x8
1664tbl_fu_out:
1665	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
1666	short		tbl_fu_out	- tbl_fu_out	# SNAN can't happen
1667	short		fu_operr	- tbl_fu_out	# OPERR
1668	short		fu_ovfl		- tbl_fu_out	# OVFL
1669	short		fu_unfl		- tbl_fu_out	# UNFL
1670	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
1671	short		fu_inex		- tbl_fu_out	# INEX2
1672	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
1673
1674# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1675# frestore it.
1676fu_snan:
1677	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1678	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1679	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1680
1681	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
1682	mov.w		&0xe006,2+FP_SRC(%a6)
1683
1684	frestore	FP_SRC(%a6)
1685
1686	unlk		%a6
1687
1688
1689	bra.l		_real_snan
1690
1691fu_operr:
1692	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1693	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1694	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1695
1696	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
1697	mov.w		&0xe004,2+FP_SRC(%a6)
1698
1699	frestore	FP_SRC(%a6)
1700
1701	unlk		%a6
1702
1703
1704	bra.l		_real_operr
1705
1706fu_ovfl:
1707	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1708
1709	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1710	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1711	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1712
1713	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
1714	mov.w		&0xe005,2+FP_SRC(%a6)
1715
1716	frestore	FP_SRC(%a6)		# restore EXOP
1717
1718	unlk		%a6
1719
1720	bra.l		_real_ovfl
1721
1722# underflow can happen for extended precision. extended precision opclass
1723# three instruction exceptions don't update the stack pointer. so, if the
1724# exception occurred from user mode, then simply update a7 and exit normally.
1725# if the exception occurred from supervisor mode, check if
1726fu_unfl:
1727	mov.l		EXC_A6(%a6),(%a6)	# restore a6
1728
1729	btst		&0x5,EXC_SR(%a6)
1730	bne.w		fu_unfl_s
1731
1732	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
1733	mov.l		%a0,%usp		# to or not...
1734
1735fu_unfl_cont:
1736	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1737
1738	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1739	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1740	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1741
1742	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1743	mov.w		&0xe003,2+FP_SRC(%a6)
1744
1745	frestore	FP_SRC(%a6)		# restore EXOP
1746
1747	unlk		%a6
1748
1749	bra.l		_real_unfl
1750
1751fu_unfl_s:
1752	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1753	bne.b		fu_unfl_cont
1754
1755# the extended precision result is still in fp0. but, we need to save it
1756# somewhere on the stack until we can copy it to its final resting place
1757# (where the exc frame is currently). make sure it's not at the top of the
1758# frame or it will get overwritten when the exc stack frame is shifted "down".
1759	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1760	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
1761
1762	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1763	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1764	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1765
1766	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1767	mov.w		&0xe003,2+FP_DST(%a6)
1768
1769	frestore	FP_DST(%a6)		# restore EXOP
1770
1771	mov.l		(%a6),%a6		# restore frame pointer
1772
1773	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1774	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1775	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1776
1777# now, copy the result to the proper place on the stack
1778	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1779	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1780	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1781
1782	add.l		&LOCAL_SIZE-0x8,%sp
1783
1784	bra.l		_real_unfl
1785
1786# fmove in and out enter here.
1787fu_inex:
1788	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1789
1790	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1791	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1792	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1793
1794	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
1795	mov.w		&0xe001,2+FP_SRC(%a6)
1796
1797	frestore	FP_SRC(%a6)		# restore EXOP
1798
1799	unlk		%a6
1800
1801
1802	bra.l		_real_inex
1803
1804#########################################################################
1805#########################################################################
1806fu_in_pack:
1807
1808
1809# I'm not sure at this point what FPSR bits are valid for this instruction.
1810# so, since the emulation routines re-create them anyways, zero exception field
1811	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
1812
1813	fmov.l		&0x0,%fpcr		# zero current control regs
1814	fmov.l		&0x0,%fpsr
1815
1816	bsr.l		get_packed		# fetch packed src operand
1817
1818	lea		FP_SRC(%a6),%a0		# pass ptr to src
1819	bsr.l		set_tag_x		# set src optype tag
1820
1821	mov.b		%d0,STAG(%a6)		# save src optype tag
1822
1823	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1824
1825# bit five of the fp extension word separates the monadic and dyadic operations
1826# at this point
1827	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1828	beq.b		fu_extract_p		# monadic
1829	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1830	beq.b		fu_extract_p		# yes, so it's monadic, too
1831
1832	bsr.l		load_fpn2		# load dst into FP_DST
1833
1834	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1835	bsr.l		set_tag_x		# tag the operand type
1836	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1837	bne.b		fu_op2_done_p		# no
1838	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1839fu_op2_done_p:
1840	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1841
1842fu_extract_p:
1843	clr.l		%d0
1844	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1845
1846	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1847
1848	lea		FP_SRC(%a6),%a0
1849	lea		FP_DST(%a6),%a1
1850
1851	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1852	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1853
1854#
1855# Exceptions in order of precedence:
1856#	BSUN	: none
1857#	SNAN	: all dyadic ops
1858#	OPERR	: fsqrt(-NORM)
1859#	OVFL	: all except ftst,fcmp
1860#	UNFL	: all except ftst,fcmp
1861#	DZ	: fdiv
1862#	INEX2	: all except ftst,fcmp
1863#	INEX1	: all
1864#
1865
1866# we determine the highest priority exception(if any) set by the
1867# emulation routine that has also been enabled by the user.
1868	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1869	bne.w		fu_in_ena_p		# some are enabled
1870
1871fu_in_cont_p:
1872# fcmp and ftst do not store any result.
1873	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1874	andi.b		&0x38,%d0		# extract bits 3-5
1875	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1876	beq.b		fu_in_exit_p		# yes
1877
1878	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1879	bsr.l		store_fpreg		# store the result
1880
1881fu_in_exit_p:
1882
1883	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1884	bne.w		fu_in_exit_s_p		# supervisor
1885
1886	mov.l		EXC_A7(%a6),%a0		# update user a7
1887	mov.l		%a0,%usp
1888
1889fu_in_exit_cont_p:
1890	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1891	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1892	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1893
1894	unlk		%a6			# unravel stack frame
1895
1896	btst		&0x7,(%sp)		# is trace on?
1897	bne.w		fu_trace_p		# yes
1898
1899	bra.l		_fpsp_done		# exit to os
1900
1901# the exception occurred in supervisor mode. check to see if the
1902# addressing mode was (a7)+. if so, we'll need to shift the
1903# stack frame "up".
1904fu_in_exit_s_p:
1905	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1906	beq.b		fu_in_exit_cont_p	# no
1907
1908	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1909	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1910	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1911
1912	unlk		%a6			# unravel stack frame
1913
1914# shift the stack frame "up". we don't really care about the <ea> field.
1915	mov.l		0x4(%sp),0x10(%sp)
1916	mov.l		0x0(%sp),0xc(%sp)
1917	add.l		&0xc,%sp
1918
1919	btst		&0x7,(%sp)		# is trace on?
1920	bne.w		fu_trace_p		# yes
1921
1922	bra.l		_fpsp_done		# exit to os
1923
1924fu_in_ena_p:
1925	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
1926	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1927	bne.b		fu_in_exc_p		# at least one was set
1928
1929#
1930# No exceptions occurred that were also enabled. Now:
1931#
1932#	if (OVFL && ovfl_disabled && inexact_enabled) {
1933#	    branch to _real_inex() (even if the result was exact!);
1934#	} else {
1935#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1936#	    return;
1937#	}
1938#
1939	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1940	beq.w		fu_in_cont_p		# no
1941
1942fu_in_ovflchk_p:
1943	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1944	beq.w		fu_in_cont_p		# no
1945	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
1946
1947#
1948# An exception occurred and that exception was enabled:
1949#
1950#	shift enabled exception field into lo byte of d0;
1951#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1952#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1953#		/*
1954#		 * this is the case where we must call _real_inex() now or else
1955#		 * there will be no other way to pass it the exceptional operand
1956#		 */
1957#		call _real_inex();
1958#	} else {
1959#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1960#	}
1961#
1962fu_in_exc_p:
1963	subi.l		&24,%d0			# fix offset to be 0-8
1964	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
1965	blt.b		fu_in_exc_exit_p	# no
1966
1967# the enabled exception was inexact
1968	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1969	bne.w		fu_in_exc_unfl_p	# yes
1970	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1971	bne.w		fu_in_exc_ovfl_p	# yes
1972
1973# here, we insert the correct fsave status value into the fsave frame for the
1974# corresponding exception. the operand in the fsave frame should be the original
1975# src operand.
1976# as a reminder for future predicted pain and agony, we are passing in fsave the
1977# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1978# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1979fu_in_exc_exit_p:
1980	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1981	bne.w		fu_in_exc_exit_s_p	# supervisor
1982
1983	mov.l		EXC_A7(%a6),%a0		# update user a7
1984	mov.l		%a0,%usp
1985
1986fu_in_exc_exit_cont_p:
1987	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1988
1989	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1990	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1991	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1992
1993	frestore	FP_SRC(%a6)		# restore src op
1994
1995	unlk		%a6
1996
1997	btst		&0x7,(%sp)		# is trace enabled?
1998	bne.w		fu_trace_p		# yes
1999
2000	bra.l		_fpsp_done
2001
2002tbl_except_p:
2003	short		0xe000,0xe006,0xe004,0xe005
2004	short		0xe003,0xe002,0xe001,0xe001
2005
2006fu_in_exc_ovfl_p:
2007	mov.w		&0x3,%d0
2008	bra.w		fu_in_exc_exit_p
2009
2010fu_in_exc_unfl_p:
2011	mov.w		&0x4,%d0
2012	bra.w		fu_in_exc_exit_p
2013
2014fu_in_exc_exit_s_p:
2015	btst		&mia7_bit,SPCOND_FLG(%a6)
2016	beq.b		fu_in_exc_exit_cont_p
2017
2018	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2019
2020	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2021	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2022	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2023
2024	frestore	FP_SRC(%a6)		# restore src op
2025
2026	unlk		%a6			# unravel stack frame
2027
2028# shift stack frame "up". who cares about <ea> field.
2029	mov.l		0x4(%sp),0x10(%sp)
2030	mov.l		0x0(%sp),0xc(%sp)
2031	add.l		&0xc,%sp
2032
2033	btst		&0x7,(%sp)		# is trace on?
2034	bne.b		fu_trace_p		# yes
2035
2036	bra.l		_fpsp_done		# exit to os
2037
2038#
2039# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2040# exception was being traced. Make the "current" PC the FPIAR and put it in the
2041# trace stack frame then jump to _real_trace().
2042#
2043#		  UNSUPP FRAME		   TRACE FRAME
2044#		*****************	*****************
2045#		*      EA	*	*    Current	*
2046#		*		*	*      PC	*
2047#		*****************	*****************
2048#		* 0x2 *	0x0dc	*	* 0x2 *  0x024	*
2049#		*****************	*****************
2050#		*     Next	*	*     Next	*
2051#		*      PC	*	*      PC	*
2052#		*****************	*****************
2053#		*      SR	*	*      SR	*
2054#		*****************	*****************
2055fu_trace_p:
2056	mov.w		&0x2024,0x6(%sp)
2057	fmov.l		%fpiar,0x8(%sp)
2058
2059	bra.l		_real_trace
2060
2061#########################################################
2062#########################################################
2063fu_out_pack:
2064
2065
2066# I'm not sure at this point what FPSR bits are valid for this instruction.
2067# so, since the emulation routines re-create them anyways, zero exception field.
2068# fmove out doesn't affect ccodes.
2069	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
2070
2071	fmov.l		&0x0,%fpcr		# zero current control regs
2072	fmov.l		&0x0,%fpsr
2073
2074	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
2075	bsr.l		load_fpn1
2076
2077# unlike other opclass 3, unimplemented data type exceptions, packed must be
2078# able to detect all operand types.
2079	lea		FP_SRC(%a6),%a0
2080	bsr.l		set_tag_x		# tag the operand type
2081	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2082	bne.b		fu_op2_p		# no
2083	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
2084
2085fu_op2_p:
2086	mov.b		%d0,STAG(%a6)		# save src optype tag
2087
2088	clr.l		%d0
2089	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
2090
2091	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
2092
2093	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
2094	bsr.l		fout			# call fmove out routine
2095
2096# Exceptions in order of precedence:
2097#	BSUN	: no
2098#	SNAN	: yes
2099#	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2100#	OVFL	: no
2101#	UNFL	: no
2102#	DZ	: no
2103#	INEX2	: yes
2104#	INEX1	: no
2105
2106# determine the highest priority exception(if any) set by the
2107# emulation routine that has also been enabled by the user.
2108	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2109	bne.w		fu_out_ena_p		# some are enabled
2110
2111fu_out_exit_p:
2112	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2113
2114	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
2115	bne.b		fu_out_exit_s_p		# supervisor
2116
2117	mov.l		EXC_A7(%a6),%a0		# update user a7
2118	mov.l		%a0,%usp
2119
2120fu_out_exit_cont_p:
2121	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2122	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2123	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2124
2125	unlk		%a6			# unravel stack frame
2126
2127	btst		&0x7,(%sp)		# is trace on?
2128	bne.w		fu_trace_p		# yes
2129
2130	bra.l		_fpsp_done		# exit to os
2131
2132# the exception occurred in supervisor mode. check to see if the
2133# addressing mode was -(a7). if so, we'll need to shift the
2134# stack frame "down".
2135fu_out_exit_s_p:
2136	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2137	beq.b		fu_out_exit_cont_p	# no
2138
2139	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2140	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2141	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2142
2143	mov.l		(%a6),%a6		# restore frame pointer
2144
2145	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2146	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2147
2148# now, copy the result to the proper place on the stack
2149	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2150	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2151	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2152
2153	add.l		&LOCAL_SIZE-0x8,%sp
2154
2155	btst		&0x7,(%sp)
2156	bne.w		fu_trace_p
2157
2158	bra.l		_fpsp_done
2159
2160fu_out_ena_p:
2161	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
2162	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2163	beq.w		fu_out_exit_p
2164
2165	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2166
2167# an exception occurred and that exception was enabled.
2168# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2169fu_out_exc_p:
2170	cmpi.b		%d0,&0x1a
2171	bgt.w		fu_inex_p2
2172	beq.w		fu_operr_p
2173
2174fu_snan_p:
2175	btst		&0x5,EXC_SR(%a6)
2176	bne.b		fu_snan_s_p
2177
2178	mov.l		EXC_A7(%a6),%a0
2179	mov.l		%a0,%usp
2180	bra.w		fu_snan
2181
2182fu_snan_s_p:
2183	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2184	bne.w		fu_snan
2185
2186# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2187# the strategy is to move the exception frame "down" 12 bytes. then, we
2188# can store the default result where the exception frame was.
2189	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2190	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2191	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2192
2193	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
2194	mov.w		&0xe006,2+FP_SRC(%a6)	# set fsave status
2195
2196	frestore	FP_SRC(%a6)		# restore src operand
2197
2198	mov.l		(%a6),%a6		# restore frame pointer
2199
2200	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2201	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2202	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2203
2204# now, we copy the default result to its proper location
2205	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2206	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2207	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2208
2209	add.l		&LOCAL_SIZE-0x8,%sp
2210
2211
2212	bra.l		_real_snan
2213
2214fu_operr_p:
2215	btst		&0x5,EXC_SR(%a6)
2216	bne.w		fu_operr_p_s
2217
2218	mov.l		EXC_A7(%a6),%a0
2219	mov.l		%a0,%usp
2220	bra.w		fu_operr
2221
2222fu_operr_p_s:
2223	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2224	bne.w		fu_operr
2225
2226# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2227# the strategy is to move the exception frame "down" 12 bytes. then, we
2228# can store the default result where the exception frame was.
2229	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2230	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2231	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2232
2233	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
2234	mov.w		&0xe004,2+FP_SRC(%a6)	# set fsave status
2235
2236	frestore	FP_SRC(%a6)		# restore src operand
2237
2238	mov.l		(%a6),%a6		# restore frame pointer
2239
2240	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2241	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2242	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2243
2244# now, we copy the default result to its proper location
2245	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2246	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2247	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2248
2249	add.l		&LOCAL_SIZE-0x8,%sp
2250
2251
2252	bra.l		_real_operr
2253
2254fu_inex_p2:
2255	btst		&0x5,EXC_SR(%a6)
2256	bne.w		fu_inex_s_p2
2257
2258	mov.l		EXC_A7(%a6),%a0
2259	mov.l		%a0,%usp
2260	bra.w		fu_inex
2261
2262fu_inex_s_p2:
2263	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2264	bne.w		fu_inex
2265
2266# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2267# the strategy is to move the exception frame "down" 12 bytes. then, we
2268# can store the default result where the exception frame was.
2269	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2270	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2271	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2272
2273	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
2274	mov.w		&0xe001,2+FP_SRC(%a6)	# set fsave status
2275
2276	frestore	FP_SRC(%a6)		# restore src operand
2277
2278	mov.l		(%a6),%a6		# restore frame pointer
2279
2280	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2281	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2282	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2283
2284# now, we copy the default result to its proper location
2285	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2286	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2287	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2288
2289	add.l		&LOCAL_SIZE-0x8,%sp
2290
2291
2292	bra.l		_real_inex
2293
2294#########################################################################
2295
2296#
2297# if we're stuffing a source operand back into an fsave frame then we
2298# have to make sure that for single or double source operands that the
2299# format stuffed is as weird as the hardware usually makes it.
2300#
2301	global		funimp_skew
2302funimp_skew:
2303	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2304	cmpi.b		%d0,&0x1		# was src sgl?
2305	beq.b		funimp_skew_sgl		# yes
2306	cmpi.b		%d0,&0x5		# was src dbl?
2307	beq.b		funimp_skew_dbl		# yes
2308	rts
2309
2310funimp_skew_sgl:
2311	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2312	andi.w		&0x7fff,%d0		# strip sign
2313	beq.b		funimp_skew_sgl_not
2314	cmpi.w		%d0,&0x3f80
2315	bgt.b		funimp_skew_sgl_not
2316	neg.w		%d0			# make exponent negative
2317	addi.w		&0x3f81,%d0		# find amt to shift
2318	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
2319	lsr.l		%d0,%d1			# shift it
2320	bset		&31,%d1			# set j-bit
2321	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
2322	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
2323	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
2324funimp_skew_sgl_not:
2325	rts
2326
2327funimp_skew_dbl:
2328	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2329	andi.w		&0x7fff,%d0		# strip sign
2330	beq.b		funimp_skew_dbl_not
2331	cmpi.w		%d0,&0x3c00
2332	bgt.b		funimp_skew_dbl_not
2333
2334	tst.b		FP_SRC_EX(%a6)		# make "internal format"
2335	smi.b		0x2+FP_SRC(%a6)
2336	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
2337	clr.l		%d0			# clear g,r,s
2338	lea		FP_SRC(%a6),%a0		# pass ptr to src op
2339	mov.w		&0x3c01,%d1		# pass denorm threshold
2340	bsr.l		dnrm_lp			# denorm it
2341	mov.w		&0x3c00,%d0		# new exponent
2342	tst.b		0x2+FP_SRC(%a6)		# is sign set?
2343	beq.b		fss_dbl_denorm_done	# no
2344	bset		&15,%d0			# set sign
2345fss_dbl_denorm_done:
2346	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
2347	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
2348funimp_skew_dbl_not:
2349	rts
2350
2351#########################################################################
2352	global		_mem_write2
2353_mem_write2:
2354	btst		&0x5,EXC_SR(%a6)
2355	beq.l		_dmem_write
2356	mov.l		0x0(%a0),FP_DST_EX(%a6)
2357	mov.l		0x4(%a0),FP_DST_HI(%a6)
2358	mov.l		0x8(%a0),FP_DST_LO(%a6)
2359	clr.l		%d1
2360	rts
2361
2362#########################################################################
2363# XDEF ****************************************************************	#
2364#	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
2365#			effective address" exception.			#
2366#									#
2367#	This handler should be the first code executed upon taking the	#
2368#	FP Unimplemented Effective Address exception in an operating	#
2369#	system.								#
2370#									#
2371# XREF ****************************************************************	#
2372#	_imem_read_long() - read instruction longword			#
2373#	fix_skewed_ops() - adjust src operand in fsave frame		#
2374#	set_tag_x() - determine optype of src/dst operands		#
2375#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
2376#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
2377#	load_fpn2() - load dst operand from FP regfile			#
2378#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
2379#	decbin() - convert packed data to FP binary data		#
2380#	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
2381#	_real_access() - "callout" for access error exception		#
2382#	_mem_read() - read extended immediate operand from memory	#
2383#	_fpsp_done() - "callout" for exit; work all done		#
2384#	_real_trace() - "callout" for Trace enabled exception		#
2385#	fmovm_dynamic() - emulate dynamic fmovm instruction		#
2386#	fmovm_ctrl() - emulate fmovm control instruction		#
2387#									#
2388# INPUT ***************************************************************	#
2389#	- The system stack contains the "Unimplemented <ea>" stk frame	#
2390#									#
2391# OUTPUT **************************************************************	#
2392#	If access error:						#
2393#	- The system stack is changed to an access error stack frame	#
2394#	If FPU disabled:						#
2395#	- The system stack is changed to an FPU disabled stack frame	#
2396#	If Trace exception enabled:					#
2397#	- The system stack is changed to a Trace exception stack frame	#
2398#	Else: (normal case)						#
2399#	- None (correct result has been stored as appropriate)		#
2400#									#
2401# ALGORITHM ***********************************************************	#
2402#	This exception handles 3 types of operations:			#
2403# (1) FP Instructions using extended precision or packed immediate	#
2404#     addressing mode.							#
2405# (2) The "fmovm.x" instruction w/ dynamic register specification.	#
2406# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
2407#									#
2408#	For immediate data operations, the data is read in w/ a		#
2409# _mem_read() "callout", converted to FP binary (if packed), and used	#
2410# as the source operand to the instruction specified by the instruction	#
2411# word. If no FP exception should be reported ads a result of the	#
2412# emulation, then the result is stored to the destination register and	#
2413# the handler exits through _fpsp_done(). If an enabled exc has been	#
2414# signalled as a result of emulation, then an fsave state frame		#
2415# corresponding to the FP exception type must be entered into the 060	#
2416# FPU before exiting. In either the enabled or disabled cases, we	#
2417# must also check if a Trace exception is pending, in which case, we	#
2418# must create a Trace exception stack frame from the current exception	#
2419# stack frame. If no Trace is pending, we simply exit through		#
2420# _fpsp_done().								#
2421#	For "fmovm.x", call the routine fmovm_dynamic() which will	#
2422# decode and emulate the instruction. No FP exceptions can be pending	#
2423# as a result of this operation emulation. A Trace exception can be	#
2424# pending, though, which means the current stack frame must be changed	#
2425# to a Trace stack frame and an exit made through _real_trace().	#
2426# For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
2427# was executed from supervisor mode, this handler must store the FP	#
2428# register file values to the system stack by itself since		#
2429# fmovm_dynamic() can't handle this. A normal exit is made through	#
2430# fpsp_done().								#
2431#	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
2432# Again, a Trace exception may be pending and an exit made through	#
2433# _real_trace(). Else, a normal exit is made through _fpsp_done().	#
2434#									#
2435#	Before any of the above is attempted, it must be checked to	#
2436# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
2437# before the "FPU disabled" exception, but the "FPU disabled" exception	#
2438# has higher priority, we check the disabled bit in the PCR. If set,	#
2439# then we must create an 8 word "FPU disabled" exception stack frame	#
2440# from the current 4 word exception stack frame. This includes		#
2441# reproducing the effective address of the instruction to put on the	#
2442# new stack frame.							#
2443#									#
2444#	In the process of all emulation work, if a _mem_read()		#
2445# "callout" returns a failing result indicating an access error, then	#
2446# we must create an access error stack frame from the current stack	#
2447# frame. This information includes a faulting address and a fault-	#
2448# status-longword. These are created within this handler.		#
2449#									#
2450#########################################################################
2451
2452	global		_fpsp_effadd
2453_fpsp_effadd:
2454
2455# This exception type takes priority over the "Line F Emulator"
2456# exception. Therefore, the FPU could be disabled when entering here.
2457# So, we must check to see if it's disabled and handle that case separately.
2458	mov.l		%d0,-(%sp)		# save d0
2459	movc		%pcr,%d0		# load proc cr
2460	btst		&0x1,%d0		# is FPU disabled?
2461	bne.w		iea_disabled		# yes
2462	mov.l		(%sp)+,%d0		# restore d0
2463
2464	link		%a6,&-LOCAL_SIZE	# init stack frame
2465
2466	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2467	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2468	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
2469
2470# PC of instruction that took the exception is the PC in the frame
2471	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2472
2473	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2474	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2475	bsr.l		_imem_read_long		# fetch the instruction words
2476	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2477
2478#########################################################################
2479
2480	tst.w		%d0			# is operation fmovem?
2481	bmi.w		iea_fmovm		# yes
2482
2483#
2484# here, we will have:
2485#	fabs	fdabs	fsabs		facos		fmod
2486#	fadd	fdadd	fsadd		fasin		frem
2487#	fcmp				fatan		fscale
2488#	fdiv	fddiv	fsdiv		fatanh		fsin
2489#	fint				fcos		fsincos
2490#	fintrz				fcosh		fsinh
2491#	fmove	fdmove	fsmove		fetox		ftan
2492#	fmul	fdmul	fsmul		fetoxm1		ftanh
2493#	fneg	fdneg	fsneg		fgetexp		ftentox
2494#	fsgldiv				fgetman		ftwotox
2495#	fsglmul				flog10
2496#	fsqrt				flog2
2497#	fsub	fdsub	fssub		flogn
2498#	ftst				flognp1
2499# which can all use f<op>.{x,p}
2500# so, now it's immediate data extended precision AND PACKED FORMAT!
2501#
2502iea_op:
2503	andi.l		&0x00ff00ff,USER_FPSR(%a6)
2504
2505	btst		&0xa,%d0		# is src fmt x or p?
2506	bne.b		iea_op_pack		# packed
2507
2508
2509	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2510	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
2511	mov.l		&0xc,%d0		# pass: 12 bytes
2512	bsr.l		_imem_read		# read extended immediate
2513
2514	tst.l		%d1			# did ifetch fail?
2515	bne.w		iea_iacc		# yes
2516
2517	bra.b		iea_op_setsrc
2518
2519iea_op_pack:
2520
2521	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2522	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
2523	mov.l		&0xc,%d0		# pass: 12 bytes
2524	bsr.l		_imem_read		# read packed operand
2525
2526	tst.l		%d1			# did ifetch fail?
2527	bne.w		iea_iacc		# yes
2528
2529# The packed operand is an INF or a NAN if the exponent field is all ones.
2530	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
2531	cmpi.w		%d0,&0x7fff		# INF or NAN?
2532	beq.b		iea_op_setsrc		# operand is an INF or NAN
2533
2534# The packed operand is a zero if the mantissa is all zero, else it's
2535# a normal packed op.
2536	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
2537	andi.b		&0x0f,%d0		# clear all but last nybble
2538	bne.b		iea_op_gp_not_spec	# not a zero
2539	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
2540	bne.b		iea_op_gp_not_spec	# not a zero
2541	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
2542	beq.b		iea_op_setsrc		# operand is a ZERO
2543iea_op_gp_not_spec:
2544	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
2545	bsr.l		decbin			# convert to extended
2546	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
2547
2548iea_op_setsrc:
2549	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
2550
2551# FP_SRC now holds the src operand.
2552	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
2553	bsr.l		set_tag_x		# tag the operand type
2554	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
2555	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2556	bne.b		iea_op_getdst		# no
2557	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2558	mov.b		%d0,STAG(%a6)		# set new optype tag
2559iea_op_getdst:
2560	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
2561
2562	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
2563	beq.b		iea_op_extract		# monadic
2564	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
2565	bne.b		iea_op_spec		# yes
2566
2567iea_op_loaddst:
2568	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2569	bsr.l		load_fpn2		# load dst operand
2570
2571	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
2572	bsr.l		set_tag_x		# tag the operand type
2573	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
2574	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2575	bne.b		iea_op_extract		# no
2576	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2577	mov.b		%d0,DTAG(%a6)		# set new optype tag
2578	bra.b		iea_op_extract
2579
2580# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2581iea_op_spec:
2582	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
2583	beq.b		iea_op_extract		# yes
2584# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2585# store a result. then, only fcmp will branch back and pick up a dst operand.
2586	st		STORE_FLG(%a6)		# don't store a final result
2587	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
2588	beq.b		iea_op_loaddst		# yes
2589
2590iea_op_extract:
2591	clr.l		%d0
2592	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
2593
2594	mov.b		1+EXC_CMDREG(%a6),%d1
2595	andi.w		&0x007f,%d1		# extract extension
2596
2597	fmov.l		&0x0,%fpcr
2598	fmov.l		&0x0,%fpsr
2599
2600	lea		FP_SRC(%a6),%a0
2601	lea		FP_DST(%a6),%a1
2602
2603	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2604	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
2605
2606#
2607# Exceptions in order of precedence:
2608#	BSUN	: none
2609#	SNAN	: all operations
2610#	OPERR	: all reg-reg or mem-reg operations that can normally operr
2611#	OVFL	: same as OPERR
2612#	UNFL	: same as OPERR
2613#	DZ	: same as OPERR
2614#	INEX2	: same as OPERR
2615#	INEX1	: all packed immediate operations
2616#
2617
2618# we determine the highest priority exception(if any) set by the
2619# emulation routine that has also been enabled by the user.
2620	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2621	bne.b		iea_op_ena		# some are enabled
2622
2623# now, we save the result, unless, of course, the operation was ftst or fcmp.
2624# these don't save results.
2625iea_op_save:
2626	tst.b		STORE_FLG(%a6)		# does this op store a result?
2627	bne.b		iea_op_exit1		# exit with no frestore
2628
2629iea_op_store:
2630	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2631	bsr.l		store_fpreg		# store the result
2632
2633iea_op_exit1:
2634	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2635	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2636
2637	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2638	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2639	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2640
2641	unlk		%a6			# unravel the frame
2642
2643	btst		&0x7,(%sp)		# is trace on?
2644	bne.w		iea_op_trace		# yes
2645
2646	bra.l		_fpsp_done		# exit to os
2647
2648iea_op_ena:
2649	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
2650	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2651	bne.b		iea_op_exc		# at least one was set
2652
2653# no exception occurred. now, did a disabled, exact overflow occur with inexact
2654# enabled? if so, then we have to stuff an overflow frame into the FPU.
2655	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2656	beq.b		iea_op_save
2657
2658iea_op_ovfl:
2659	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2660	beq.b		iea_op_store		# no
2661	bra.b		iea_op_exc_ovfl		# yes
2662
2663# an enabled exception occurred. we have to insert the exception type back into
2664# the machine.
2665iea_op_exc:
2666	subi.l		&24,%d0			# fix offset to be 0-8
2667	cmpi.b		%d0,&0x6		# is exception INEX?
2668	bne.b		iea_op_exc_force	# no
2669
2670# the enabled exception was inexact. so, if it occurs with an overflow
2671# or underflow that was disabled, then we have to force an overflow or
2672# underflow frame.
2673	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2674	bne.b		iea_op_exc_ovfl		# yes
2675	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2676	bne.b		iea_op_exc_unfl		# yes
2677
2678iea_op_exc_force:
2679	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2680	bra.b		iea_op_exit2		# exit with frestore
2681
2682tbl_iea_except:
2683	short		0xe002, 0xe006, 0xe004, 0xe005
2684	short		0xe003, 0xe002, 0xe001, 0xe001
2685
2686iea_op_exc_ovfl:
2687	mov.w		&0xe005,2+FP_SRC(%a6)
2688	bra.b		iea_op_exit2
2689
2690iea_op_exc_unfl:
2691	mov.w		&0xe003,2+FP_SRC(%a6)
2692
2693iea_op_exit2:
2694	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2695	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2696
2697	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2698	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2699	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2700
2701	frestore	FP_SRC(%a6)		# restore exceptional state
2702
2703	unlk		%a6			# unravel the frame
2704
2705	btst		&0x7,(%sp)		# is trace on?
2706	bne.b		iea_op_trace		# yes
2707
2708	bra.l		_fpsp_done		# exit to os
2709
2710#
2711# The opclass two instruction that took an "Unimplemented Effective Address"
2712# exception was being traced. Make the "current" PC the FPIAR and put it in
2713# the trace stack frame then jump to _real_trace().
2714#
2715#		 UNIMP EA FRAME		   TRACE FRAME
2716#		*****************	*****************
2717#		* 0x0 *  0x0f0	*	*    Current	*
2718#		*****************	*      PC	*
2719#		*    Current	*	*****************
2720#		*      PC	*	* 0x2 *  0x024	*
2721#		*****************	*****************
2722#		*      SR	*	*     Next	*
2723#		*****************	*      PC	*
2724#					*****************
2725#					*      SR	*
2726#					*****************
2727iea_op_trace:
2728	mov.l		(%sp),-(%sp)		# shift stack frame "down"
2729	mov.w		0x8(%sp),0x4(%sp)
2730	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
2731	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
2732
2733	bra.l		_real_trace
2734
2735#########################################################################
2736iea_fmovm:
2737	btst		&14,%d0			# ctrl or data reg
2738	beq.w		iea_fmovm_ctrl
2739
2740iea_fmovm_data:
2741
2742	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
2743	bne.b		iea_fmovm_data_s
2744
2745iea_fmovm_data_u:
2746	mov.l		%usp,%a0
2747	mov.l		%a0,EXC_A7(%a6)		# store current a7
2748	bsr.l		fmovm_dynamic		# do dynamic fmovm
2749	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
2750	mov.l		%a0,%usp		# update usp
2751	bra.w		iea_fmovm_exit
2752
2753iea_fmovm_data_s:
2754	clr.b		SPCOND_FLG(%a6)
2755	lea		0x2+EXC_VOFF(%a6),%a0
2756	mov.l		%a0,EXC_A7(%a6)
2757	bsr.l		fmovm_dynamic		# do dynamic fmovm
2758
2759	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2760	beq.w		iea_fmovm_data_predec
2761	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
2762	bne.w		iea_fmovm_exit
2763
2764# right now, d0 = the size.
2765# the data has been fetched from the supervisor stack, but we have not
2766# incremented the stack pointer by the appropriate number of bytes.
2767# do it here.
2768iea_fmovm_data_postinc:
2769	btst		&0x7,EXC_SR(%a6)
2770	bne.b		iea_fmovm_data_pi_trace
2771
2772	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2773	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2774	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2775
2776	lea		(EXC_SR,%a6,%d0),%a0
2777	mov.l		%a0,EXC_SR(%a6)
2778
2779	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2780	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2781	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2782
2783	unlk		%a6
2784	mov.l		(%sp)+,%sp
2785	bra.l		_fpsp_done
2786
2787iea_fmovm_data_pi_trace:
2788	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2789	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2790	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2791	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2792
2793	lea		(EXC_SR-0x4,%a6,%d0),%a0
2794	mov.l		%a0,EXC_SR(%a6)
2795
2796	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2797	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2798	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2799
2800	unlk		%a6
2801	mov.l		(%sp)+,%sp
2802	bra.l		_real_trace
2803
2804# right now, d1 = size and d0 = the strg.
2805iea_fmovm_data_predec:
2806	mov.b		%d1,EXC_VOFF(%a6)	# store strg
2807	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
2808
2809	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2810	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2811	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2812
2813	mov.l		(%a6),-(%sp)		# make a copy of a6
2814	mov.l		%d0,-(%sp)		# save d0
2815	mov.l		%d1,-(%sp)		# save d1
2816	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
2817
2818	clr.l		%d0
2819	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
2820	neg.l		%d0			# get negative of size
2821
2822	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
2823	beq.b		iea_fmovm_data_p2
2824
2825	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2826	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2827	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
2828	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2829
2830	pea		(%a6,%d0)		# create final sp
2831	bra.b		iea_fmovm_data_p3
2832
2833iea_fmovm_data_p2:
2834	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2835	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
2836	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2837
2838	pea		(0x4,%a6,%d0)		# create final sp
2839
2840iea_fmovm_data_p3:
2841	clr.l		%d1
2842	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
2843
2844	tst.b		%d1
2845	bpl.b		fm_1
2846	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
2847	addi.l		&0xc,%d0
2848fm_1:
2849	lsl.b		&0x1,%d1
2850	bpl.b		fm_2
2851	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
2852	addi.l		&0xc,%d0
2853fm_2:
2854	lsl.b		&0x1,%d1
2855	bpl.b		fm_3
2856	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
2857	addi.l		&0xc,%d0
2858fm_3:
2859	lsl.b		&0x1,%d1
2860	bpl.b		fm_4
2861	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
2862	addi.l		&0xc,%d0
2863fm_4:
2864	lsl.b		&0x1,%d1
2865	bpl.b		fm_5
2866	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
2867	addi.l		&0xc,%d0
2868fm_5:
2869	lsl.b		&0x1,%d1
2870	bpl.b		fm_6
2871	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
2872	addi.l		&0xc,%d0
2873fm_6:
2874	lsl.b		&0x1,%d1
2875	bpl.b		fm_7
2876	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
2877	addi.l		&0xc,%d0
2878fm_7:
2879	lsl.b		&0x1,%d1
2880	bpl.b		fm_end
2881	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
2882fm_end:
2883	mov.l		0x4(%sp),%d1
2884	mov.l		0x8(%sp),%d0
2885	mov.l		0xc(%sp),%a6
2886	mov.l		(%sp)+,%sp
2887
2888	btst		&0x7,(%sp)		# is trace enabled?
2889	beq.l		_fpsp_done
2890	bra.l		_real_trace
2891
2892#########################################################################
2893iea_fmovm_ctrl:
2894
2895	bsr.l		fmovm_ctrl		# load ctrl regs
2896
2897iea_fmovm_exit:
2898	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2899	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2900	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2901
2902	btst		&0x7,EXC_SR(%a6)	# is trace on?
2903	bne.b		iea_fmovm_trace		# yes
2904
2905	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2906
2907	unlk		%a6			# unravel the frame
2908
2909	bra.l		_fpsp_done		# exit to os
2910
2911#
2912# The control reg instruction that took an "Unimplemented Effective Address"
2913# exception was being traced. The "Current PC" for the trace frame is the
2914# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2915# After fixing the stack frame, jump to _real_trace().
2916#
2917#		 UNIMP EA FRAME		   TRACE FRAME
2918#		*****************	*****************
2919#		* 0x0 *  0x0f0	*	*    Current	*
2920#		*****************	*      PC	*
2921#		*    Current	*	*****************
2922#		*      PC	*	* 0x2 *  0x024	*
2923#		*****************	*****************
2924#		*      SR	*	*     Next	*
2925#		*****************	*      PC	*
2926#					*****************
2927#					*      SR	*
2928#					*****************
2929# this ain't a pretty solution, but it works:
2930# -restore a6 (not with unlk)
2931# -shift stack frame down over where old a6 used to be
2932# -add LOCAL_SIZE to stack pointer
2933iea_fmovm_trace:
2934	mov.l		(%a6),%a6		# restore frame pointer
2935	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2936	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2937	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2938	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2939	add.l		&LOCAL_SIZE,%sp		# clear stack frame
2940
2941	bra.l		_real_trace
2942
2943#########################################################################
2944# The FPU is disabled and so we should really have taken the "Line
2945# F Emulator" exception. So, here we create an 8-word stack frame
2946# from our 4-word stack frame. This means we must calculate the length
2947# the faulting instruction to get the "next PC". This is trivial for
2948# immediate operands but requires some extra work for fmovm dynamic
2949# which can use most addressing modes.
2950iea_disabled:
2951	mov.l		(%sp)+,%d0		# restore d0
2952
2953	link		%a6,&-LOCAL_SIZE	# init stack frame
2954
2955	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2956
2957# PC of instruction that took the exception is the PC in the frame
2958	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2959	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2960	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2961	bsr.l		_imem_read_long		# fetch the instruction words
2962	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2963
2964	tst.w		%d0			# is instr fmovm?
2965	bmi.b		iea_dis_fmovm		# yes
2966# instruction is using an extended precision immediate operand. Therefore,
2967# the total instruction length is 16 bytes.
2968iea_dis_immed:
2969	mov.l		&0x10,%d0		# 16 bytes of instruction
2970	bra.b		iea_dis_cont
2971iea_dis_fmovm:
2972	btst		&0xe,%d0		# is instr fmovm ctrl
2973	bne.b		iea_dis_fmovm_data	# no
2974# the instruction is a fmovm.l with 2 or 3 registers.
2975	bfextu		%d0{&19:&3},%d1
2976	mov.l		&0xc,%d0
2977	cmpi.b		%d1,&0x7		# move all regs?
2978	bne.b		iea_dis_cont
2979	addq.l		&0x4,%d0
2980	bra.b		iea_dis_cont
2981# the instruction is an fmovm.x dynamic which can use many addressing
2982# modes and thus can have several different total instruction lengths.
2983# call fmovm_calc_ea which will go through the ea calc process and,
2984# as a by-product, will tell us how long the instruction is.
2985iea_dis_fmovm_data:
2986	clr.l		%d0
2987	bsr.l		fmovm_calc_ea
2988	mov.l		EXC_EXTWPTR(%a6),%d0
2989	sub.l		EXC_PC(%a6),%d0
2990iea_dis_cont:
2991	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
2992
2993	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2994
2995	unlk		%a6
2996
2997# here, we actually create the 8-word frame from the 4-word frame,
2998# with the "next PC" as additional info.
2999# the <ea> field is let as undefined.
3000	subq.l		&0x8,%sp		# make room for new stack
3001	mov.l		%d0,-(%sp)		# save d0
3002	mov.w		0xc(%sp),0x4(%sp)	# move SR
3003	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
3004	clr.l		%d0
3005	mov.w		0x12(%sp),%d0
3006	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
3007	add.l		%d0,0x6(%sp)		# make Next PC
3008	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
3009	mov.l		(%sp)+,%d0		# restore d0
3010
3011	bra.l		_real_fpu_disabled
3012
3013##########
3014
3015iea_iacc:
3016	movc		%pcr,%d0
3017	btst		&0x1,%d0
3018	bne.b		iea_iacc_cont
3019	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3020	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3021iea_iacc_cont:
3022	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3023
3024	unlk		%a6
3025
3026	subq.w		&0x8,%sp		# make stack frame bigger
3027	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
3028	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
3029	mov.w		&0x4008,0x6(%sp)	# store voff
3030	mov.l		0x2(%sp),0x8(%sp)	# store ea
3031	mov.l		&0x09428001,0xc(%sp)	# store fslw
3032
3033iea_acc_done:
3034	btst		&0x5,(%sp)		# user or supervisor mode?
3035	beq.b		iea_acc_done2		# user
3036	bset		&0x2,0xd(%sp)		# set supervisor TM bit
3037
3038iea_acc_done2:
3039	bra.l		_real_access
3040
3041iea_dacc:
3042	lea		-LOCAL_SIZE(%a6),%sp
3043
3044	movc		%pcr,%d1
3045	btst		&0x1,%d1
3046	bne.b		iea_dacc_cont
3047	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3048	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3049iea_dacc_cont:
3050	mov.l		(%a6),%a6
3051
3052	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3053	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3054	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3055	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
3056	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
3057	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3058
3059	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3060	add.w		&LOCAL_SIZE-0x4,%sp
3061
3062	bra.b		iea_acc_done
3063
3064#########################################################################
3065# XDEF ****************************************************************	#
3066#	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
3067#									#
3068#	This handler should be the first code executed upon taking the	#
3069#	FP Operand Error exception in an operating system.		#
3070#									#
3071# XREF ****************************************************************	#
3072#	_imem_read_long() - read instruction longword			#
3073#	fix_skewed_ops() - adjust src operand in fsave frame		#
3074#	_real_operr() - "callout" to operating system operr handler	#
3075#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3076#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3077#	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
3078#									#
3079# INPUT ***************************************************************	#
3080#	- The system stack contains the FP Operr exception frame	#
3081#	- The fsave frame contains the source operand			#
3082#									#
3083# OUTPUT **************************************************************	#
3084#	No access error:						#
3085#	- The system stack is unchanged					#
3086#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3087#									#
3088# ALGORITHM ***********************************************************	#
3089#	In a system where the FP Operr exception is enabled, the goal	#
3090# is to get to the handler specified at _real_operr(). But, on the 060,	#
3091# for opclass zero and two instruction taking this exception, the	#
3092# input operand in the fsave frame may be incorrect for some cases	#
3093# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3094# do just this and then exits through _real_operr().			#
3095#	For opclass 3 instructions, the 060 doesn't store the default	#
3096# operr result out to memory or data register file as it should.	#
3097# This code must emulate the move out before finally exiting through	#
3098# _real_inex(). The move out, if to memory, is performed using		#
3099# _mem_write() "callout" routines that may return a failing result.	#
3100# In this special case, the handler must exit through facc_out()	#
3101# which creates an access error stack frame from the current operr	#
3102# stack frame.								#
3103#									#
3104#########################################################################
3105
3106	global		_fpsp_operr
3107_fpsp_operr:
3108
3109	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3110
3111	fsave		FP_SRC(%a6)		# grab the "busy" frame
3112
3113	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3114	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3115	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3116
3117# the FPIAR holds the "current PC" of the faulting instruction
3118	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3119
3120	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3121	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3122	bsr.l		_imem_read_long		# fetch the instruction words
3123	mov.l		%d0,EXC_OPWORD(%a6)
3124
3125##############################################################################
3126
3127	btst		&13,%d0			# is instr an fmove out?
3128	bne.b		foperr_out		# fmove out
3129
3130
3131# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3132# this would be the case for opclass two operations with a source infinity or
3133# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3134# cause an operr so we don't need to check for them here.
3135	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3136	bsr.l		fix_skewed_ops		# fix src op
3137
3138foperr_exit:
3139	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3140	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3141	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3142
3143	frestore	FP_SRC(%a6)
3144
3145	unlk		%a6
3146	bra.l		_real_operr
3147
3148########################################################################
3149
3150#
3151# the hardware does not save the default result to memory on enabled
3152# operand error exceptions. we do this here before passing control to
3153# the user operand error handler.
3154#
3155# byte, word, and long destination format operations can pass
3156# through here. we simply need to test the sign of the src
3157# operand and save the appropriate minimum or maximum integer value
3158# to the effective address as pointed to by the stacked effective address.
3159#
3160# although packed opclass three operations can take operand error
3161# exceptions, they won't pass through here since they are caught
3162# first by the unsupported data format exception handler. that handler
3163# sends them directly to _real_operr() if necessary.
3164#
3165foperr_out:
3166
3167	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
3168	andi.w		&0x7fff,%d1
3169	cmpi.w		%d1,&0x7fff
3170	bne.b		foperr_out_not_qnan
3171# the operand is either an infinity or a QNAN.
3172	tst.l		FP_SRC_LO(%a6)
3173	bne.b		foperr_out_qnan
3174	mov.l		FP_SRC_HI(%a6),%d1
3175	andi.l		&0x7fffffff,%d1
3176	beq.b		foperr_out_not_qnan
3177foperr_out_qnan:
3178	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
3179	bra.b		foperr_out_jmp
3180
3181foperr_out_not_qnan:
3182	mov.l		&0x7fffffff,%d1
3183	tst.b		FP_SRC_EX(%a6)
3184	bpl.b		foperr_out_not_qnan2
3185	addq.l		&0x1,%d1
3186foperr_out_not_qnan2:
3187	mov.l		%d1,L_SCR1(%a6)
3188
3189foperr_out_jmp:
3190	bfextu		%d0{&19:&3},%d0		# extract dst format field
3191	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3192	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
3193	jmp		(tbl_operr.b,%pc,%a0)
3194
3195tbl_operr:
3196	short		foperr_out_l - tbl_operr # long word integer
3197	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
3198	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
3199	short		foperr_exit  - tbl_operr # packed won't enter here
3200	short		foperr_out_w - tbl_operr # word integer
3201	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
3202	short		foperr_out_b - tbl_operr # byte integer
3203	short		tbl_operr    - tbl_operr # packed won't enter here
3204
3205foperr_out_b:
3206	mov.b		L_SCR1(%a6),%d0		# load positive default result
3207	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3208	ble.b		foperr_out_b_save_dn	# yes
3209	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3210	bsr.l		_dmem_write_byte	# write the default result
3211
3212	tst.l		%d1			# did dstore fail?
3213	bne.l		facc_out_b		# yes
3214
3215	bra.w		foperr_exit
3216foperr_out_b_save_dn:
3217	andi.w		&0x0007,%d1
3218	bsr.l		store_dreg_b		# store result to regfile
3219	bra.w		foperr_exit
3220
3221foperr_out_w:
3222	mov.w		L_SCR1(%a6),%d0		# load positive default result
3223	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3224	ble.b		foperr_out_w_save_dn	# yes
3225	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3226	bsr.l		_dmem_write_word	# write the default result
3227
3228	tst.l		%d1			# did dstore fail?
3229	bne.l		facc_out_w		# yes
3230
3231	bra.w		foperr_exit
3232foperr_out_w_save_dn:
3233	andi.w		&0x0007,%d1
3234	bsr.l		store_dreg_w		# store result to regfile
3235	bra.w		foperr_exit
3236
3237foperr_out_l:
3238	mov.l		L_SCR1(%a6),%d0		# load positive default result
3239	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3240	ble.b		foperr_out_l_save_dn	# yes
3241	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3242	bsr.l		_dmem_write_long	# write the default result
3243
3244	tst.l		%d1			# did dstore fail?
3245	bne.l		facc_out_l		# yes
3246
3247	bra.w		foperr_exit
3248foperr_out_l_save_dn:
3249	andi.w		&0x0007,%d1
3250	bsr.l		store_dreg_l		# store result to regfile
3251	bra.w		foperr_exit
3252
3253#########################################################################
3254# XDEF ****************************************************************	#
3255#	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
3256#									#
3257#	This handler should be the first code executed upon taking the	#
3258#	FP Signalling NAN exception in an operating system.		#
3259#									#
3260# XREF ****************************************************************	#
3261#	_imem_read_long() - read instruction longword			#
3262#	fix_skewed_ops() - adjust src operand in fsave frame		#
3263#	_real_snan() - "callout" to operating system SNAN handler	#
3264#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3265#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3266#	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
3267#	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
3268#									#
3269# INPUT ***************************************************************	#
3270#	- The system stack contains the FP SNAN exception frame		#
3271#	- The fsave frame contains the source operand			#
3272#									#
3273# OUTPUT **************************************************************	#
3274#	No access error:						#
3275#	- The system stack is unchanged					#
3276#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3277#									#
3278# ALGORITHM ***********************************************************	#
3279#	In a system where the FP SNAN exception is enabled, the goal	#
3280# is to get to the handler specified at _real_snan(). But, on the 060,	#
3281# for opclass zero and two instructions taking this exception, the	#
3282# input operand in the fsave frame may be incorrect for some cases	#
3283# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3284# do just this and then exits through _real_snan().			#
3285#	For opclass 3 instructions, the 060 doesn't store the default	#
3286# SNAN result out to memory or data register file as it should.		#
3287# This code must emulate the move out before finally exiting through	#
3288# _real_snan(). The move out, if to memory, is performed using		#
3289# _mem_write() "callout" routines that may return a failing result.	#
3290# In this special case, the handler must exit through facc_out()	#
3291# which creates an access error stack frame from the current SNAN	#
3292# stack frame.								#
3293#	For the case of an extended precision opclass 3 instruction,	#
3294# if the effective addressing mode was -() or ()+, then the address	#
3295# register must get updated by calling _calc_ea_fout(). If the <ea>	#
3296# was -(a7) from supervisor mode, then the exception frame currently	#
3297# on the system stack must be carefully moved "down" to make room	#
3298# for the operand being moved.						#
3299#									#
3300#########################################################################
3301
3302	global		_fpsp_snan
3303_fpsp_snan:
3304
3305	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3306
3307	fsave		FP_SRC(%a6)		# grab the "busy" frame
3308
3309	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3310	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3311	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3312
3313# the FPIAR holds the "current PC" of the faulting instruction
3314	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3315
3316	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3317	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3318	bsr.l		_imem_read_long		# fetch the instruction words
3319	mov.l		%d0,EXC_OPWORD(%a6)
3320
3321##############################################################################
3322
3323	btst		&13,%d0			# is instr an fmove out?
3324	bne.w		fsnan_out		# fmove out
3325
3326
3327# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3328# this would be the case for opclass two operations with a source infinity or
3329# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3330# fixed here.
3331	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3332	bsr.l		fix_skewed_ops		# fix src op
3333
3334fsnan_exit:
3335	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3336	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3337	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3338
3339	frestore	FP_SRC(%a6)
3340
3341	unlk		%a6
3342	bra.l		_real_snan
3343
3344########################################################################
3345
3346#
3347# the hardware does not save the default result to memory on enabled
3348# snan exceptions. we do this here before passing control to
3349# the user snan handler.
3350#
3351# byte, word, long, and packed destination format operations can pass
3352# through here. since packed format operations already were handled by
3353# fpsp_unsupp(), then we need to do nothing else for them here.
3354# for byte, word, and long, we simply need to test the sign of the src
3355# operand and save the appropriate minimum or maximum integer value
3356# to the effective address as pointed to by the stacked effective address.
3357#
3358fsnan_out:
3359
3360	bfextu		%d0{&19:&3},%d0		# extract dst format field
3361	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3362	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
3363	jmp		(tbl_snan.b,%pc,%a0)
3364
3365tbl_snan:
3366	short		fsnan_out_l - tbl_snan # long word integer
3367	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3368	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
3369	short		tbl_snan    - tbl_snan # packed needs no help
3370	short		fsnan_out_w - tbl_snan # word integer
3371	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3372	short		fsnan_out_b - tbl_snan # byte integer
3373	short		tbl_snan    - tbl_snan # packed needs no help
3374
3375fsnan_out_b:
3376	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
3377	bset		&6,%d0			# set SNAN bit
3378	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3379	ble.b		fsnan_out_b_dn		# yes
3380	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3381	bsr.l		_dmem_write_byte	# write the default result
3382
3383	tst.l		%d1			# did dstore fail?
3384	bne.l		facc_out_b		# yes
3385
3386	bra.w		fsnan_exit
3387fsnan_out_b_dn:
3388	andi.w		&0x0007,%d1
3389	bsr.l		store_dreg_b		# store result to regfile
3390	bra.w		fsnan_exit
3391
3392fsnan_out_w:
3393	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
3394	bset		&14,%d0			# set SNAN bit
3395	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3396	ble.b		fsnan_out_w_dn		# yes
3397	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3398	bsr.l		_dmem_write_word	# write the default result
3399
3400	tst.l		%d1			# did dstore fail?
3401	bne.l		facc_out_w		# yes
3402
3403	bra.w		fsnan_exit
3404fsnan_out_w_dn:
3405	andi.w		&0x0007,%d1
3406	bsr.l		store_dreg_w		# store result to regfile
3407	bra.w		fsnan_exit
3408
3409fsnan_out_l:
3410	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
3411	bset		&30,%d0			# set SNAN bit
3412	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3413	ble.b		fsnan_out_l_dn		# yes
3414	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3415	bsr.l		_dmem_write_long	# write the default result
3416
3417	tst.l		%d1			# did dstore fail?
3418	bne.l		facc_out_l		# yes
3419
3420	bra.w		fsnan_exit
3421fsnan_out_l_dn:
3422	andi.w		&0x0007,%d1
3423	bsr.l		store_dreg_l		# store result to regfile
3424	bra.w		fsnan_exit
3425
3426fsnan_out_s:
3427	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3428	ble.b		fsnan_out_d_dn		# yes
3429	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3430	andi.l		&0x80000000,%d0		# keep sign
3431	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3432	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3433	lsr.l		&0x8,%d1		# shift mantissa for sgl
3434	or.l		%d1,%d0			# create sgl SNAN
3435	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3436	bsr.l		_dmem_write_long	# write the default result
3437
3438	tst.l		%d1			# did dstore fail?
3439	bne.l		facc_out_l		# yes
3440
3441	bra.w		fsnan_exit
3442fsnan_out_d_dn:
3443	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3444	andi.l		&0x80000000,%d0		# keep sign
3445	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3446	mov.l		%d1,-(%sp)
3447	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3448	lsr.l		&0x8,%d1		# shift mantissa for sgl
3449	or.l		%d1,%d0			# create sgl SNAN
3450	mov.l		(%sp)+,%d1
3451	andi.w		&0x0007,%d1
3452	bsr.l		store_dreg_l		# store result to regfile
3453	bra.w		fsnan_exit
3454
3455fsnan_out_d:
3456	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3457	andi.l		&0x80000000,%d0		# keep sign
3458	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
3459	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3460	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
3461	mov.l		&11,%d0			# load shift amt
3462	lsr.l		%d0,%d1
3463	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
3464	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3465	andi.l		&0x000007ff,%d1
3466	ror.l		%d0,%d1
3467	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
3468	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
3469	lsr.l		%d0,%d1
3470	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
3471	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3472	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
3473	movq.l		&0x8,%d0		# pass: size of 8 bytes
3474	bsr.l		_dmem_write		# write the default result
3475
3476	tst.l		%d1			# did dstore fail?
3477	bne.l		facc_out_d		# yes
3478
3479	bra.w		fsnan_exit
3480
3481# for extended precision, if the addressing mode is pre-decrement or
3482# post-increment, then the address register did not get updated.
3483# in addition, for pre-decrement, the stacked <ea> is incorrect.
3484fsnan_out_x:
3485	clr.b		SPCOND_FLG(%a6)		# clear special case flag
3486
3487	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3488	clr.w		2+FP_SCR0(%a6)
3489	mov.l		FP_SRC_HI(%a6),%d0
3490	bset		&30,%d0
3491	mov.l		%d0,FP_SCR0_HI(%a6)
3492	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3493
3494	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
3495	bne.b		fsnan_out_x_s		# yes
3496
3497	mov.l		%usp,%a0		# fetch user stack pointer
3498	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
3499	mov.l		(%a6),EXC_A6(%a6)
3500
3501	bsr.l		_calc_ea_fout		# find the correct ea,update An
3502	mov.l		%a0,%a1
3503	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3504
3505	mov.l		EXC_A7(%a6),%a0
3506	mov.l		%a0,%usp		# restore user stack pointer
3507	mov.l		EXC_A6(%a6),(%a6)
3508
3509fsnan_out_x_save:
3510	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3511	movq.l		&0xc,%d0		# pass: size of extended
3512	bsr.l		_dmem_write		# write the default result
3513
3514	tst.l		%d1			# did dstore fail?
3515	bne.l		facc_out_x		# yes
3516
3517	bra.w		fsnan_exit
3518
3519fsnan_out_x_s:
3520	mov.l		(%a6),EXC_A6(%a6)
3521
3522	bsr.l		_calc_ea_fout		# find the correct ea,update An
3523	mov.l		%a0,%a1
3524	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3525
3526	mov.l		EXC_A6(%a6),(%a6)
3527
3528	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3529	bne.b		fsnan_out_x_save	# no
3530
3531# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3532	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3533	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3534	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3535
3536	frestore	FP_SRC(%a6)
3537
3538	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
3539
3540	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3541	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3542	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3543
3544	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3545	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3546	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3547
3548	add.l		&LOCAL_SIZE-0x8,%sp
3549
3550	bra.l		_real_snan
3551
3552#########################################################################
3553# XDEF ****************************************************************	#
3554#	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
3555#									#
3556#	This handler should be the first code executed upon taking the	#
3557#	FP Inexact exception in an operating system.			#
3558#									#
3559# XREF ****************************************************************	#
3560#	_imem_read_long() - read instruction longword			#
3561#	fix_skewed_ops() - adjust src operand in fsave frame		#
3562#	set_tag_x() - determine optype of src/dst operands		#
3563#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3564#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
3565#	load_fpn2() - load dst operand from FP regfile			#
3566#	smovcr() - emulate an "fmovcr" instruction			#
3567#	fout() - emulate an opclass 3 instruction			#
3568#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
3569#	_real_inex() - "callout" to operating system inexact handler	#
3570#									#
3571# INPUT ***************************************************************	#
3572#	- The system stack contains the FP Inexact exception frame	#
3573#	- The fsave frame contains the source operand			#
3574#									#
3575# OUTPUT **************************************************************	#
3576#	- The system stack is unchanged					#
3577#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3578#									#
3579# ALGORITHM ***********************************************************	#
3580#	In a system where the FP Inexact exception is enabled, the goal	#
3581# is to get to the handler specified at _real_inex(). But, on the 060,	#
3582# for opclass zero and two instruction taking this exception, the	#
3583# hardware doesn't store the correct result to the destination FP	#
3584# register as did the '040 and '881/2. This handler must emulate the	#
3585# instruction in order to get this value and then store it to the	#
3586# correct register before calling _real_inex().				#
3587#	For opclass 3 instructions, the 060 doesn't store the default	#
3588# inexact result out to memory or data register file as it should.	#
3589# This code must emulate the move out by calling fout() before finally	#
3590# exiting through _real_inex().						#
3591#									#
3592#########################################################################
3593
3594	global		_fpsp_inex
3595_fpsp_inex:
3596
3597	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3598
3599	fsave		FP_SRC(%a6)		# grab the "busy" frame
3600
3601	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3602	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3603	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3604
3605# the FPIAR holds the "current PC" of the faulting instruction
3606	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3607
3608	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3609	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3610	bsr.l		_imem_read_long		# fetch the instruction words
3611	mov.l		%d0,EXC_OPWORD(%a6)
3612
3613##############################################################################
3614
3615	btst		&13,%d0			# is instr an fmove out?
3616	bne.w		finex_out		# fmove out
3617
3618
3619# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3620# longword integer directly into the upper longword of the mantissa along
3621# w/ an exponent value of 0x401e. we convert this to extended precision here.
3622	bfextu		%d0{&19:&3},%d0		# fetch instr size
3623	bne.b		finex_cont		# instr size is not long
3624	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
3625	bne.b		finex_cont		# no
3626	fmov.l		&0x0,%fpcr
3627	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
3628	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
3629	mov.w		&0xe001,0x2+FP_SRC(%a6)
3630
3631finex_cont:
3632	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3633	bsr.l		fix_skewed_ops		# fix src op
3634
3635# Here, we zero the ccode and exception byte field since we're going to
3636# emulate the whole instruction. Notice, though, that we don't kill the
3637# INEX1 bit. This is because a packed op has long since been converted
3638# to extended before arriving here. Therefore, we need to retain the
3639# INEX1 bit from when the operand was first converted.
3640	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3641
3642	fmov.l		&0x0,%fpcr		# zero current control regs
3643	fmov.l		&0x0,%fpsr
3644
3645	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3646	cmpi.b		%d1,&0x17		# is op an fmovecr?
3647	beq.w		finex_fmovcr		# yes
3648
3649	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3650	bsr.l		set_tag_x		# tag the operand type
3651	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
3652
3653# bits four and five of the fp extension word separate the monadic and dyadic
3654# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3655# will never take this exception, but fsincos will.
3656	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
3657	beq.b		finex_extract		# monadic
3658
3659	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
3660	bne.b		finex_extract		# yes
3661
3662	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3663	bsr.l		load_fpn2		# load dst into FP_DST
3664
3665	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
3666	bsr.l		set_tag_x		# tag the operand type
3667	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
3668	bne.b		finex_op2_done		# no
3669	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
3670finex_op2_done:
3671	mov.b		%d0,DTAG(%a6)		# save dst optype tag
3672
3673finex_extract:
3674	clr.l		%d0
3675	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
3676
3677	mov.b		1+EXC_CMDREG(%a6),%d1
3678	andi.w		&0x007f,%d1		# extract extension
3679
3680	lea		FP_SRC(%a6),%a0
3681	lea		FP_DST(%a6),%a1
3682
3683	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3684	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
3685
3686# the operation has been emulated. the result is in fp0.
3687finex_save:
3688	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
3689	bsr.l		store_fpreg
3690
3691finex_exit:
3692	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3693	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3694	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3695
3696	frestore	FP_SRC(%a6)
3697
3698	unlk		%a6
3699	bra.l		_real_inex
3700
3701finex_fmovcr:
3702	clr.l		%d0
3703	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3704	mov.b		1+EXC_CMDREG(%a6),%d1
3705	andi.l		&0x0000007f,%d1		# pass rom offset
3706	bsr.l		smovcr
3707	bra.b		finex_save
3708
3709########################################################################
3710
3711#
3712# the hardware does not save the default result to memory on enabled
3713# inexact exceptions. we do this here before passing control to
3714# the user inexact handler.
3715#
3716# byte, word, and long destination format operations can pass
3717# through here. so can double and single precision.
3718# although packed opclass three operations can take inexact
3719# exceptions, they won't pass through here since they are caught
3720# first by the unsupported data format exception handler. that handler
3721# sends them directly to _real_inex() if necessary.
3722#
3723finex_out:
3724
3725	mov.b		&NORM,STAG(%a6)		# src is a NORM
3726
3727	clr.l		%d0
3728	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3729
3730	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
3731
3732	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
3733
3734	bsr.l		fout			# store the default result
3735
3736	bra.b		finex_exit
3737
3738#########################################################################
3739# XDEF ****************************************************************	#
3740#	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
3741#									#
3742#	This handler should be the first code executed upon taking	#
3743#	the FP DZ exception in an operating system.			#
3744#									#
3745# XREF ****************************************************************	#
3746#	_imem_read_long() - read instruction longword from memory	#
3747#	fix_skewed_ops() - adjust fsave operand				#
3748#	_real_dz() - "callout" exit point from FP DZ handler		#
3749#									#
3750# INPUT ***************************************************************	#
3751#	- The system stack contains the FP DZ exception stack.		#
3752#	- The fsave frame contains the source operand.			#
3753#									#
3754# OUTPUT **************************************************************	#
3755#	- The system stack contains the FP DZ exception stack.		#
3756#	- The fsave frame contains the adjusted source operand.		#
3757#									#
3758# ALGORITHM ***********************************************************	#
3759#	In a system where the DZ exception is enabled, the goal is to	#
3760# get to the handler specified at _real_dz(). But, on the 060, when the	#
3761# exception is taken, the input operand in the fsave state frame may	#
3762# be incorrect for some cases and need to be adjusted. So, this package	#
3763# adjusts the operand using fix_skewed_ops() and then branches to	#
3764# _real_dz().								#
3765#									#
3766#########################################################################
3767
3768	global		_fpsp_dz
3769_fpsp_dz:
3770
3771	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3772
3773	fsave		FP_SRC(%a6)		# grab the "busy" frame
3774
3775	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3776	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3777	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3778
3779# the FPIAR holds the "current PC" of the faulting instruction
3780	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3781
3782	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3783	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3784	bsr.l		_imem_read_long		# fetch the instruction words
3785	mov.l		%d0,EXC_OPWORD(%a6)
3786
3787##############################################################################
3788
3789
3790# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3791# this would be the case for opclass two operations with a source zero
3792# in the sgl or dbl format.
3793	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3794	bsr.l		fix_skewed_ops		# fix src op
3795
3796fdz_exit:
3797	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3798	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3799	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3800
3801	frestore	FP_SRC(%a6)
3802
3803	unlk		%a6
3804	bra.l		_real_dz
3805
3806#########################################################################
3807# XDEF ****************************************************************	#
3808#	_fpsp_fline(): 060FPSP entry point for "Line F emulator" exc.	#
3809#									#
3810#	This handler should be the first code executed upon taking the	#
3811#	"Line F Emulator" exception in an operating system.		#
3812#									#
3813# XREF ****************************************************************	#
3814#	_fpsp_unimp() - handle "FP Unimplemented" exceptions		#
3815#	_real_fpu_disabled() - handle "FPU disabled" exceptions		#
3816#	_real_fline() - handle "FLINE" exceptions			#
3817#	_imem_read_long() - read instruction longword			#
3818#									#
3819# INPUT ***************************************************************	#
3820#	- The system stack contains a "Line F Emulator" exception	#
3821#	  stack frame.							#
3822#									#
3823# OUTPUT **************************************************************	#
3824#	- The system stack is unchanged					#
3825#									#
3826# ALGORITHM ***********************************************************	#
3827#	When a "Line F Emulator" exception occurs, there are 3 possible	#
3828# exception types, denoted by the exception stack frame format number:	#
3829#	(1) FPU unimplemented instruction (6 word stack frame)		#
3830#	(2) FPU disabled (8 word stack frame)				#
3831#	(3) Line F (4 word stack frame)					#
3832#									#
3833#	This module determines which and forks the flow off to the	#
3834# appropriate "callout" (for "disabled" and "Line F") or to the		#
3835# correct emulation code (for "FPU unimplemented").			#
3836#	This code also must check for "fmovecr" instructions w/ a	#
3837# non-zero <ea> field. These may get flagged as "Line F" but should	#
3838# really be flagged as "FPU Unimplemented". (This is a "feature" on	#
3839# the '060.								#
3840#									#
3841#########################################################################
3842
3843	global		_fpsp_fline
3844_fpsp_fline:
3845
3846# check to see if this exception is a "FP Unimplemented Instruction"
3847# exception. if so, branch directly to that handler's entry point.
3848	cmpi.w		0x6(%sp),&0x202c
3849	beq.l		_fpsp_unimp
3850
3851# check to see if the FPU is disabled. if so, jump to the OS entry
3852# point for that condition.
3853	cmpi.w		0x6(%sp),&0x402c
3854	beq.l		_real_fpu_disabled
3855
3856# the exception was an "F-Line Illegal" exception. we check to see
3857# if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
3858# so, convert the F-Line exception stack frame to an FP Unimplemented
3859# Instruction exception stack frame else branch to the OS entry
3860# point for the F-Line exception handler.
3861	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3862
3863	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3864
3865	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
3866	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3867	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3868	bsr.l		_imem_read_long		# fetch instruction words
3869
3870	bfextu		%d0{&0:&10},%d1		# is it an fmovecr?
3871	cmpi.w		%d1,&0x03c8
3872	bne.b		fline_fline		# no
3873
3874	bfextu		%d0{&16:&6},%d1		# is it an fmovecr?
3875	cmpi.b		%d1,&0x17
3876	bne.b		fline_fline		# no
3877
3878# it's an fmovecr w/ a non-zero <ea> that has entered through
3879# the F-Line Illegal exception.
3880# so, we need to convert the F-Line exception stack frame into an
3881# FP Unimplemented Instruction stack frame and jump to that entry
3882# point.
3883#
3884# but, if the FPU is disabled, then we need to jump to the FPU disabled
3885# entry point.
3886	movc		%pcr,%d0
3887	btst		&0x1,%d0
3888	beq.b		fline_fmovcr
3889
3890	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3891
3892	unlk		%a6
3893
3894	sub.l		&0x8,%sp		# make room for "Next PC", <ea>
3895	mov.w		0x8(%sp),(%sp)
3896	mov.l		0xa(%sp),0x2(%sp)	# move "Current PC"
3897	mov.w		&0x402c,0x6(%sp)
3898	mov.l		0x2(%sp),0xc(%sp)
3899	addq.l		&0x4,0x2(%sp)		# set "Next PC"
3900
3901	bra.l		_real_fpu_disabled
3902
3903fline_fmovcr:
3904	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3905
3906	unlk		%a6
3907
3908	fmov.l		0x2(%sp),%fpiar		# set current PC
3909	addq.l		&0x4,0x2(%sp)		# set Next PC
3910
3911	mov.l		(%sp),-(%sp)
3912	mov.l		0x8(%sp),0x4(%sp)
3913	mov.b		&0x20,0x6(%sp)
3914
3915	bra.l		_fpsp_unimp
3916
3917fline_fline:
3918	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3919
3920	unlk		%a6
3921
3922	bra.l		_real_fline
3923
3924#########################################################################
3925# XDEF ****************************************************************	#
3926#	_fpsp_unimp(): 060FPSP entry point for FP "Unimplemented	#
3927#		       Instruction" exception.				#
3928#									#
3929#	This handler should be the first code executed upon taking the	#
3930#	FP Unimplemented Instruction exception in an operating system.	#
3931#									#
3932# XREF ****************************************************************	#
3933#	_imem_read_{word,long}() - read instruction word/longword	#
3934#	load_fop() - load src/dst ops from memory and/or FP regfile	#
3935#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3936#	tbl_trans - addr of table of emulation routines for trnscndls	#
3937#	_real_access() - "callout" for access error exception		#
3938#	_fpsp_done() - "callout" for exit; work all done		#
3939#	_real_trace() - "callout" for Trace enabled exception		#
3940#	smovcr() - emulate "fmovecr" instruction			#
3941#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
3942#	_ftrapcc() - emulate an "ftrapcc" instruction			#
3943#	_fdbcc() - emulate an "fdbcc" instruction			#
3944#	_fscc() - emulate an "fscc" instruction				#
3945#	_real_trap() - "callout" for Trap exception			#
3946#	_real_bsun() - "callout" for enabled Bsun exception		#
3947#									#
3948# INPUT ***************************************************************	#
3949#	- The system stack contains the "Unimplemented Instr" stk frame	#
3950#									#
3951# OUTPUT **************************************************************	#
3952#	If access error:						#
3953#	- The system stack is changed to an access error stack frame	#
3954#	If Trace exception enabled:					#
3955#	- The system stack is changed to a Trace exception stack frame	#
3956#	Else: (normal case)						#
3957#	- Correct result has been stored as appropriate			#
3958#									#
3959# ALGORITHM ***********************************************************	#
3960#	There are two main cases of instructions that may enter here to	#
3961# be emulated: (1) the FPgen instructions, most of which were also	#
3962# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc".	#
3963#	For the first set, this handler calls the routine load_fop()	#
3964# to load the source and destination (for dyadic) operands to be used	#
3965# for instruction emulation. The correct emulation routine is then	#
3966# chosen by decoding the instruction type and indexing into an		#
3967# emulation subroutine index table. After emulation returns, this	#
3968# handler checks to see if an exception should occur as a result of the #
3969# FP instruction emulation. If so, then an FP exception of the correct	#
3970# type is inserted into the FPU state frame using the "frestore"	#
3971# instruction before exiting through _fpsp_done(). In either the	#
3972# exceptional or non-exceptional cases, we must check to see if the	#
3973# Trace exception is enabled. If so, then we must create a Trace	#
3974# exception frame from the current exception frame and exit through	#
3975# _real_trace().							#
3976#	For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines	#
3977# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three	#
3978# may flag that a BSUN exception should be taken. If so, then the	#
3979# current exception stack frame is converted into a BSUN exception	#
3980# stack frame and an exit is made through _real_bsun(). If the		#
3981# instruction was "ftrapcc" and a Trap exception should result, a Trap	#
3982# exception stack frame is created from the current frame and an exit	#
3983# is made through _real_trap(). If a Trace exception is pending, then	#
3984# a Trace exception frame is created from the current frame and a jump	#
3985# is made to _real_trace(). Finally, if none of these conditions exist,	#
3986# then the handler exits though the callout _fpsp_done().		#
3987#									#
3988#	In any of the above scenarios, if a _mem_read() or _mem_write()	#
3989# "callout" returns a failing value, then an access error stack frame	#
3990# is created from the current stack frame and an exit is made through	#
3991# _real_access().							#
3992#									#
3993#########################################################################
3994
3995#
3996# FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
3997#
3998#	*****************
3999#	*		* => <ea> of fp unimp instr.
4000#	-      EA	-
4001#	*		*
4002#	*****************
4003#	* 0x2 *  0x02c	* => frame format and vector offset(vector #11)
4004#	*****************
4005#	*		*
4006#	-    Next PC	- => PC of instr to execute after exc handling
4007#	*		*
4008#	*****************
4009#	*      SR	* => SR at the time the exception was taken
4010#	*****************
4011#
4012# Note: the !NULL bit does not get set in the fsave frame when the
4013# machine encounters an fp unimp exception. Therefore, it must be set
4014# before leaving this handler.
4015#
4016	global		_fpsp_unimp
4017_fpsp_unimp:
4018
4019	link.w		%a6,&-LOCAL_SIZE	# init stack frame
4020
4021	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
4022	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
4023	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1
4024
4025	btst		&0x5,EXC_SR(%a6)	# user mode exception?
4026	bne.b		funimp_s		# no; supervisor mode
4027
4028# save the value of the user stack pointer onto the stack frame
4029funimp_u:
4030	mov.l		%usp,%a0		# fetch user stack pointer
4031	mov.l		%a0,EXC_A7(%a6)		# store in stack frame
4032	bra.b		funimp_cont
4033
4034# store the value of the supervisor stack pointer BEFORE the exc occurred.
4035# old_sp is address just above stacked effective address.
4036funimp_s:
4037	lea		4+EXC_EA(%a6),%a0	# load old a7'
4038	mov.l		%a0,EXC_A7(%a6)		# store a7'
4039	mov.l		%a0,OLD_A7(%a6)		# make a copy
4040
4041funimp_cont:
4042
4043# the FPIAR holds the "current PC" of the faulting instruction.
4044	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
4045
4046	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4047	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
4048	bsr.l		_imem_read_long		# fetch the instruction words
4049	mov.l		%d0,EXC_OPWORD(%a6)
4050
4051############################################################################
4052
4053	fmov.l		&0x0,%fpcr		# clear FPCR
4054	fmov.l		&0x0,%fpsr		# clear FPSR
4055
4056	clr.b		SPCOND_FLG(%a6)		# clear "special case" flag
4057
4058# Divide the fp instructions into 8 types based on the TYPE field in
4059# bits 6-8 of the opword(classes 6,7 are undefined).
4060# (for the '060, only two types  can take this exception)
4061#	bftst		%d0{&7:&3}		# test TYPE
4062	btst		&22,%d0			# type 0 or 1 ?
4063	bne.w		funimp_misc		# type 1
4064
4065#########################################
4066# TYPE == 0: General instructions	#
4067#########################################
4068funimp_gen:
4069
4070	clr.b		STORE_FLG(%a6)		# clear "store result" flag
4071
4072# clear the ccode byte and exception status byte
4073	andi.l		&0x00ff00ff,USER_FPSR(%a6)
4074
4075	bfextu		%d0{&16:&6},%d1		# extract upper 6 of cmdreg
4076	cmpi.b		%d1,&0x17		# is op an fmovecr?
4077	beq.w		funimp_fmovcr		# yes
4078
4079funimp_gen_op:
4080	bsr.l		_load_fop		# load
4081
4082	clr.l		%d0
4083	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode
4084
4085	mov.b		1+EXC_CMDREG(%a6),%d1
4086	andi.w		&0x003f,%d1		# extract extension bits
4087	lsl.w		&0x3,%d1		# shift right 3 bits
4088	or.b		STAG(%a6),%d1		# insert src optag bits
4089
4090	lea		FP_DST(%a6),%a1		# pass dst ptr in a1
4091	lea		FP_SRC(%a6),%a0		# pass src ptr in a0
4092
4093	mov.w		(tbl_trans.w,%pc,%d1.w*2),%d1
4094	jsr		(tbl_trans.w,%pc,%d1.w*1) # emulate
4095
4096funimp_fsave:
4097	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
4098	bne.w		funimp_ena		# some are enabled
4099
4100funimp_store:
4101	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
4102	bsr.l		store_fpreg		# store result to fp regfile
4103
4104funimp_gen_exit:
4105	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4106	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4107	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4108
4109funimp_gen_exit_cmp:
4110	cmpi.b		SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
4111	beq.b		funimp_gen_exit_a7	# yes
4112
4113	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
4114	beq.b		funimp_gen_exit_a7	# yes
4115
4116funimp_gen_exit_cont:
4117	unlk		%a6
4118
4119funimp_gen_exit_cont2:
4120	btst		&0x7,(%sp)		# is trace on?
4121	beq.l		_fpsp_done		# no
4122
4123# this catches a problem with the case where an exception will be re-inserted
4124# into the machine. the frestore has already been executed...so, the fmov.l
4125# alone of the control register would trigger an unwanted exception.
4126# until I feel like fixing this, we'll sidestep the exception.
4127	fsave		-(%sp)
4128	fmov.l		%fpiar,0x14(%sp)	# "Current PC" is in FPIAR
4129	frestore	(%sp)+
4130	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x24
4131	bra.l		_real_trace
4132
4133funimp_gen_exit_a7:
4134	btst		&0x5,EXC_SR(%a6)	# supervisor or user mode?
4135	bne.b		funimp_gen_exit_a7_s	# supervisor
4136
4137	mov.l		%a0,-(%sp)
4138	mov.l		EXC_A7(%a6),%a0
4139	mov.l		%a0,%usp
4140	mov.l		(%sp)+,%a0
4141	bra.b		funimp_gen_exit_cont
4142
4143# if the instruction was executed from supervisor mode and the addressing
4144# mode was (a7)+, then the stack frame for the rte must be shifted "up"
4145# "n" bytes where "n" is the size of the src operand type.
4146# f<op>.{b,w,l,s,d,x,p}
4147funimp_gen_exit_a7_s:
4148	mov.l		%d0,-(%sp)		# save d0
4149	mov.l		EXC_A7(%a6),%d0		# load new a7'
4150	sub.l		OLD_A7(%a6),%d0		# subtract old a7'
4151	mov.l		0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
4152	mov.l		EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
4153	mov.w		%d0,EXC_SR(%a6)		# store incr number
4154	mov.l		(%sp)+,%d0		# restore d0
4155
4156	unlk		%a6
4157
4158	add.w		(%sp),%sp		# stack frame shifted
4159	bra.b		funimp_gen_exit_cont2
4160
4161######################
4162# fmovecr.x #ccc,fpn #
4163######################
4164funimp_fmovcr:
4165	clr.l		%d0
4166	mov.b		FPCR_MODE(%a6),%d0
4167	mov.b		1+EXC_CMDREG(%a6),%d1
4168	andi.l		&0x0000007f,%d1		# pass rom offset in d1
4169	bsr.l		smovcr
4170	bra.w		funimp_fsave
4171
4172#########################################################################
4173
4174#
4175# the user has enabled some exceptions. we figure not to see this too
4176# often so that's why it gets lower priority.
4177#
4178funimp_ena:
4179
4180# was an exception set that was also enabled?
4181	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled and set
4182	bfffo		%d0{&24:&8},%d0		# find highest priority exception
4183	bne.b		funimp_exc		# at least one was set
4184
4185# no exception that was enabled was set BUT if we got an exact overflow
4186# and overflow wasn't enabled but inexact was (yech!) then this is
4187# an inexact exception; otherwise, return to normal non-exception flow.
4188	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4189	beq.w		funimp_store		# no; return to normal flow
4190
4191# the overflow w/ exact result happened but was inexact set in the FPCR?
4192funimp_ovfl:
4193	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
4194	beq.w		funimp_store		# no; return to normal flow
4195	bra.b		funimp_exc_ovfl		# yes
4196
4197# some exception happened that was actually enabled.
4198# we'll insert this new exception into the FPU and then return.
4199funimp_exc:
4200	subi.l		&24,%d0			# fix offset to be 0-8
4201	cmpi.b		%d0,&0x6		# is exception INEX?
4202	bne.b		funimp_exc_force	# no
4203
4204# the enabled exception was inexact. so, if it occurs with an overflow
4205# or underflow that was disabled, then we have to force an overflow or
4206# underflow frame. the eventual overflow or underflow handler will see that
4207# it's actually an inexact and act appropriately. this is the only easy
4208# way to have the EXOP available for the enabled inexact handler when
4209# a disabled overflow or underflow has also happened.
4210	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4211	bne.b		funimp_exc_ovfl		# yes
4212	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
4213	bne.b		funimp_exc_unfl		# yes
4214
4215# force the fsave exception status bits to signal an exception of the
4216# appropriate type. don't forget to "skew" the source operand in case we
4217# "unskewed" the one the hardware initially gave us.
4218funimp_exc_force:
4219	mov.l		%d0,-(%sp)		# save d0
4220	bsr.l		funimp_skew		# check for special case
4221	mov.l		(%sp)+,%d0		# restore d0
4222	mov.w		(tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
4223	bra.b		funimp_gen_exit2	# exit with frestore
4224
4225tbl_funimp_except:
4226	short		0xe002, 0xe006, 0xe004, 0xe005
4227	short		0xe003, 0xe002, 0xe001, 0xe001
4228
4229# insert an overflow frame
4230funimp_exc_ovfl:
4231	bsr.l		funimp_skew		# check for special case
4232	mov.w		&0xe005,2+FP_SRC(%a6)
4233	bra.b		funimp_gen_exit2
4234
4235# insert an underflow frame
4236funimp_exc_unfl:
4237	bsr.l		funimp_skew		# check for special case
4238	mov.w		&0xe003,2+FP_SRC(%a6)
4239
4240# this is the general exit point for an enabled exception that will be
4241# restored into the machine for the instruction just emulated.
4242funimp_gen_exit2:
4243	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4244	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4245	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4246
4247	frestore	FP_SRC(%a6)		# insert exceptional status
4248
4249	bra.w		funimp_gen_exit_cmp
4250
4251############################################################################
4252
4253#
4254# TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
4255#
4256# These instructions were implemented on the '881/2 and '040 in hardware but
4257# are emulated in software on the '060.
4258#
4259funimp_misc:
4260	bfextu		%d0{&10:&3},%d1		# extract mode field
4261	cmpi.b		%d1,&0x1		# is it an fdb<cc>?
4262	beq.w		funimp_fdbcc		# yes
4263	cmpi.b		%d1,&0x7		# is it an fs<cc>?
4264	bne.w		funimp_fscc		# yes
4265	bfextu		%d0{&13:&3},%d1
4266	cmpi.b		%d1,&0x2		# is it an fs<cc>?
4267	blt.w		funimp_fscc		# yes
4268
4269#########################
4270# ftrap<cc>		#
4271# ftrap<cc>.w #<data>	#
4272# ftrap<cc>.l #<data>	#
4273#########################
4274funimp_ftrapcc:
4275
4276	bsr.l		_ftrapcc		# FTRAP<cc>()
4277
4278	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4279	beq.w		funimp_bsun		# yes
4280
4281	cmpi.b		SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
4282	bne.w		funimp_done		# no
4283
4284#	 FP UNIMP FRAME		   TRAP  FRAME
4285#	*****************	*****************
4286#	**    <EA>     **	**  Current PC **
4287#	*****************	*****************
4288#	* 0x2 *  0x02c	*	* 0x2 *  0x01c  *
4289#	*****************	*****************
4290#	**   Next PC   **	**   Next PC   **
4291#	*****************	*****************
4292#	*      SR	*	*      SR	*
4293#	*****************	*****************
4294#	    (6 words)		    (6 words)
4295#
4296# the ftrapcc instruction should take a trap. so, here we must create a
4297# trap stack frame from an unimplemented fp instruction stack frame and
4298# jump to the user supplied entry point for the trap exception
4299funimp_ftrapcc_tp:
4300	mov.l		USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
4301	mov.w		&0x201c,EXC_VOFF(%a6)	# Vector Offset = 0x01c
4302
4303	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4304	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4305	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4306
4307	unlk		%a6
4308	bra.l		_real_trap
4309
4310#########################
4311# fdb<cc> Dn,<label>	#
4312#########################
4313funimp_fdbcc:
4314
4315	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4316	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4317	bsr.l		_imem_read_word		# read displacement
4318
4319	tst.l		%d1			# did ifetch fail?
4320	bne.w		funimp_iacc		# yes
4321
4322	ext.l		%d0			# sign extend displacement
4323
4324	bsr.l		_fdbcc			# FDB<cc>()
4325
4326	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4327	beq.w		funimp_bsun
4328
4329	bra.w		funimp_done		# branch to finish
4330
4331#################
4332# fs<cc>.b <ea>	#
4333#################
4334funimp_fscc:
4335
4336	bsr.l		_fscc			# FS<cc>()
4337
4338# I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
4339# does not need to update "An" before taking a bsun exception.
4340	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4341	beq.w		funimp_bsun
4342
4343	btst		&0x5,EXC_SR(%a6)	# yes; is it a user mode exception?
4344	bne.b		funimp_fscc_s		# no
4345
4346funimp_fscc_u:
4347	mov.l		EXC_A7(%a6),%a0		# yes; set new USP
4348	mov.l		%a0,%usp
4349	bra.w		funimp_done		# branch to finish
4350
4351# remember, I'm assuming that post-increment is bogus...(it IS!!!)
4352# so, the least significant WORD of the stacked effective address got
4353# overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
4354# so that the rte will work correctly without destroying the result.
4355# even though the operation size is byte, the stack ptr is decr by 2.
4356#
4357# remember, also, this instruction may be traced.
4358funimp_fscc_s:
4359	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
4360	bne.w		funimp_done		# no
4361
4362	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4363	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4364	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4365
4366	unlk		%a6
4367
4368	btst		&0x7,(%sp)		# is trace enabled?
4369	bne.b		funimp_fscc_s_trace	# yes
4370
4371	subq.l		&0x2,%sp
4372	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
4373	mov.l		0x6(%sp),0x4(%sp)	# shift lo(PC),voff "down"
4374	bra.l		_fpsp_done
4375
4376funimp_fscc_s_trace:
4377	subq.l		&0x2,%sp
4378	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
4379	mov.w		0x6(%sp),0x4(%sp)	# shift lo(PC)
4380	mov.w		&0x2024,0x6(%sp)	# fmt/voff = $2024
4381	fmov.l		%fpiar,0x8(%sp)		# insert "current PC"
4382
4383	bra.l		_real_trace
4384
4385#
4386# The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
4387# the fp unimplemented instruction exception stack frame into a bsun stack frame,
4388# restore a bsun exception into the machine, and branch to the user
4389# supplied bsun hook.
4390#
4391#	 FP UNIMP FRAME		   BSUN FRAME
4392#	*****************	*****************
4393#	**    <EA>     **	* 0x0 * 0x0c0	*
4394#	*****************	*****************
4395#	* 0x2 *  0x02c  *	** Current PC  **
4396#	*****************	*****************
4397#	**   Next PC   **	*      SR	*
4398#	*****************	*****************
4399#	*      SR	*	    (4 words)
4400#	*****************
4401#	    (6 words)
4402#
4403funimp_bsun:
4404	mov.w		&0x00c0,2+EXC_EA(%a6)	# Fmt = 0x0; Vector Offset = 0x0c0
4405	mov.l		USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
4406	mov.w		EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
4407
4408	mov.w		&0xe000,2+FP_SRC(%a6)	# bsun exception enabled
4409
4410	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4411	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4412	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4413
4414	frestore	FP_SRC(%a6)		# restore bsun exception
4415
4416	unlk		%a6
4417
4418	addq.l		&0x4,%sp		# erase sludge
4419
4420	bra.l		_real_bsun		# branch to user bsun hook
4421
4422#
4423# all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
4424# and return.
4425#
4426# as usual, we have to check for trace mode being on here. since instructions
4427# modifying the supervisor stack frame don't pass through here, this is a
4428# relatively easy task.
4429#
4430funimp_done:
4431	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4432	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4433	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4434
4435	unlk		%a6
4436
4437	btst		&0x7,(%sp)		# is trace enabled?
4438	bne.b		funimp_trace		# yes
4439
4440	bra.l		_fpsp_done
4441
4442#	 FP UNIMP FRAME		  TRACE  FRAME
4443#	*****************	*****************
4444#	**    <EA>     **	**  Current PC **
4445#	*****************	*****************
4446#	* 0x2 *  0x02c	*	* 0x2 *  0x024  *
4447#	*****************	*****************
4448#	**   Next PC   **	**   Next PC   **
4449#	*****************	*****************
4450#	*      SR	*	*      SR	*
4451#	*****************	*****************
4452#	    (6 words)		    (6 words)
4453#
4454# the fscc instruction should take a trace trap. so, here we must create a
4455# trace stack frame from an unimplemented fp instruction stack frame and
4456# jump to the user supplied entry point for the trace exception
4457funimp_trace:
4458	fmov.l		%fpiar,0x8(%sp)		# current PC is in fpiar
4459	mov.b		&0x24,0x7(%sp)		# vector offset = 0x024
4460
4461	bra.l		_real_trace
4462
4463################################################################
4464
4465	global		tbl_trans
4466	swbeg		&0x1c0
4467tbl_trans:
4468	short		tbl_trans - tbl_trans	# $00-0 fmovecr all
4469	short		tbl_trans - tbl_trans	# $00-1 fmovecr all
4470	short		tbl_trans - tbl_trans	# $00-2 fmovecr all
4471	short		tbl_trans - tbl_trans	# $00-3 fmovecr all
4472	short		tbl_trans - tbl_trans	# $00-4 fmovecr all
4473	short		tbl_trans - tbl_trans	# $00-5 fmovecr all
4474	short		tbl_trans - tbl_trans	# $00-6 fmovecr all
4475	short		tbl_trans - tbl_trans	# $00-7 fmovecr all
4476
4477	short		tbl_trans - tbl_trans	# $01-0 fint norm
4478	short		tbl_trans - tbl_trans	# $01-1 fint zero
4479	short		tbl_trans - tbl_trans	# $01-2 fint inf
4480	short		tbl_trans - tbl_trans	# $01-3 fint qnan
4481	short		tbl_trans - tbl_trans	# $01-5 fint denorm
4482	short		tbl_trans - tbl_trans	# $01-4 fint snan
4483	short		tbl_trans - tbl_trans	# $01-6 fint unnorm
4484	short		tbl_trans - tbl_trans	# $01-7 ERROR
4485
4486	short		ssinh	 - tbl_trans	# $02-0 fsinh norm
4487	short		src_zero - tbl_trans	# $02-1 fsinh zero
4488	short		src_inf	 - tbl_trans	# $02-2 fsinh inf
4489	short		src_qnan - tbl_trans	# $02-3 fsinh qnan
4490	short		ssinhd	 - tbl_trans	# $02-5 fsinh denorm
4491	short		src_snan - tbl_trans	# $02-4 fsinh snan
4492	short		tbl_trans - tbl_trans	# $02-6 fsinh unnorm
4493	short		tbl_trans - tbl_trans	# $02-7 ERROR
4494
4495	short		tbl_trans - tbl_trans	# $03-0 fintrz norm
4496	short		tbl_trans - tbl_trans	# $03-1 fintrz zero
4497	short		tbl_trans - tbl_trans	# $03-2 fintrz inf
4498	short		tbl_trans - tbl_trans	# $03-3 fintrz qnan
4499	short		tbl_trans - tbl_trans	# $03-5 fintrz denorm
4500	short		tbl_trans - tbl_trans	# $03-4 fintrz snan
4501	short		tbl_trans - tbl_trans	# $03-6 fintrz unnorm
4502	short		tbl_trans - tbl_trans	# $03-7 ERROR
4503
4504	short		tbl_trans - tbl_trans	# $04-0 fsqrt norm
4505	short		tbl_trans - tbl_trans	# $04-1 fsqrt zero
4506	short		tbl_trans - tbl_trans	# $04-2 fsqrt inf
4507	short		tbl_trans - tbl_trans	# $04-3 fsqrt qnan
4508	short		tbl_trans - tbl_trans	# $04-5 fsqrt denorm
4509	short		tbl_trans - tbl_trans	# $04-4 fsqrt snan
4510	short		tbl_trans - tbl_trans	# $04-6 fsqrt unnorm
4511	short		tbl_trans - tbl_trans	# $04-7 ERROR
4512
4513	short		tbl_trans - tbl_trans	# $05-0 ERROR
4514	short		tbl_trans - tbl_trans	# $05-1 ERROR
4515	short		tbl_trans - tbl_trans	# $05-2 ERROR
4516	short		tbl_trans - tbl_trans	# $05-3 ERROR
4517	short		tbl_trans - tbl_trans	# $05-4 ERROR
4518	short		tbl_trans - tbl_trans	# $05-5 ERROR
4519	short		tbl_trans - tbl_trans	# $05-6 ERROR
4520	short		tbl_trans - tbl_trans	# $05-7 ERROR
4521
4522	short		slognp1	 - tbl_trans	# $06-0 flognp1 norm
4523	short		src_zero - tbl_trans	# $06-1 flognp1 zero
4524	short		sopr_inf - tbl_trans	# $06-2 flognp1 inf
4525	short		src_qnan - tbl_trans	# $06-3 flognp1 qnan
4526	short		slognp1d - tbl_trans	# $06-5 flognp1 denorm
4527	short		src_snan - tbl_trans	# $06-4 flognp1 snan
4528	short		tbl_trans - tbl_trans	# $06-6 flognp1 unnorm
4529	short		tbl_trans - tbl_trans	# $06-7 ERROR
4530
4531	short		tbl_trans - tbl_trans	# $07-0 ERROR
4532	short		tbl_trans - tbl_trans	# $07-1 ERROR
4533	short		tbl_trans - tbl_trans	# $07-2 ERROR
4534	short		tbl_trans - tbl_trans	# $07-3 ERROR
4535	short		tbl_trans - tbl_trans	# $07-4 ERROR
4536	short		tbl_trans - tbl_trans	# $07-5 ERROR
4537	short		tbl_trans - tbl_trans	# $07-6 ERROR
4538	short		tbl_trans - tbl_trans	# $07-7 ERROR
4539
4540	short		setoxm1	 - tbl_trans	# $08-0 fetoxm1 norm
4541	short		src_zero - tbl_trans	# $08-1 fetoxm1 zero
4542	short		setoxm1i - tbl_trans	# $08-2 fetoxm1 inf
4543	short		src_qnan - tbl_trans	# $08-3 fetoxm1 qnan
4544	short		setoxm1d - tbl_trans	# $08-5 fetoxm1 denorm
4545	short		src_snan - tbl_trans	# $08-4 fetoxm1 snan
4546	short		tbl_trans - tbl_trans	# $08-6 fetoxm1 unnorm
4547	short		tbl_trans - tbl_trans	# $08-7 ERROR
4548
4549	short		stanh	 - tbl_trans	# $09-0 ftanh norm
4550	short		src_zero - tbl_trans	# $09-1 ftanh zero
4551	short		src_one	 - tbl_trans	# $09-2 ftanh inf
4552	short		src_qnan - tbl_trans	# $09-3 ftanh qnan
4553	short		stanhd	 - tbl_trans	# $09-5 ftanh denorm
4554	short		src_snan - tbl_trans	# $09-4 ftanh snan
4555	short		tbl_trans - tbl_trans	# $09-6 ftanh unnorm
4556	short		tbl_trans - tbl_trans	# $09-7 ERROR
4557
4558	short		satan	 - tbl_trans	# $0a-0 fatan norm
4559	short		src_zero - tbl_trans	# $0a-1 fatan zero
4560	short		spi_2	 - tbl_trans	# $0a-2 fatan inf
4561	short		src_qnan - tbl_trans	# $0a-3 fatan qnan
4562	short		satand	 - tbl_trans	# $0a-5 fatan denorm
4563	short		src_snan - tbl_trans	# $0a-4 fatan snan
4564	short		tbl_trans - tbl_trans	# $0a-6 fatan unnorm
4565	short		tbl_trans - tbl_trans	# $0a-7 ERROR
4566
4567	short		tbl_trans - tbl_trans	# $0b-0 ERROR
4568	short		tbl_trans - tbl_trans	# $0b-1 ERROR
4569	short		tbl_trans - tbl_trans	# $0b-2 ERROR
4570	short		tbl_trans - tbl_trans	# $0b-3 ERROR
4571	short		tbl_trans - tbl_trans	# $0b-4 ERROR
4572	short		tbl_trans - tbl_trans	# $0b-5 ERROR
4573	short		tbl_trans - tbl_trans	# $0b-6 ERROR
4574	short		tbl_trans - tbl_trans	# $0b-7 ERROR
4575
4576	short		sasin	 - tbl_trans	# $0c-0 fasin norm
4577	short		src_zero - tbl_trans	# $0c-1 fasin zero
4578	short		t_operr	 - tbl_trans	# $0c-2 fasin inf
4579	short		src_qnan - tbl_trans	# $0c-3 fasin qnan
4580	short		sasind	 - tbl_trans	# $0c-5 fasin denorm
4581	short		src_snan - tbl_trans	# $0c-4 fasin snan
4582	short		tbl_trans - tbl_trans	# $0c-6 fasin unnorm
4583	short		tbl_trans - tbl_trans	# $0c-7 ERROR
4584
4585	short		satanh	 - tbl_trans	# $0d-0 fatanh norm
4586	short		src_zero - tbl_trans	# $0d-1 fatanh zero
4587	short		t_operr	 - tbl_trans	# $0d-2 fatanh inf
4588	short		src_qnan - tbl_trans	# $0d-3 fatanh qnan
4589	short		satanhd	 - tbl_trans	# $0d-5 fatanh denorm
4590	short		src_snan - tbl_trans	# $0d-4 fatanh snan
4591	short		tbl_trans - tbl_trans	# $0d-6 fatanh unnorm
4592	short		tbl_trans - tbl_trans	# $0d-7 ERROR
4593
4594	short		ssin	 - tbl_trans	# $0e-0 fsin norm
4595	short		src_zero - tbl_trans	# $0e-1 fsin zero
4596	short		t_operr	 - tbl_trans	# $0e-2 fsin inf
4597	short		src_qnan - tbl_trans	# $0e-3 fsin qnan
4598	short		ssind	 - tbl_trans	# $0e-5 fsin denorm
4599	short		src_snan - tbl_trans	# $0e-4 fsin snan
4600	short		tbl_trans - tbl_trans	# $0e-6 fsin unnorm
4601	short		tbl_trans - tbl_trans	# $0e-7 ERROR
4602
4603	short		stan	 - tbl_trans	# $0f-0 ftan norm
4604	short		src_zero - tbl_trans	# $0f-1 ftan zero
4605	short		t_operr	 - tbl_trans	# $0f-2 ftan inf
4606	short		src_qnan - tbl_trans	# $0f-3 ftan qnan
4607	short		stand	 - tbl_trans	# $0f-5 ftan denorm
4608	short		src_snan - tbl_trans	# $0f-4 ftan snan
4609	short		tbl_trans - tbl_trans	# $0f-6 ftan unnorm
4610	short		tbl_trans - tbl_trans	# $0f-7 ERROR
4611
4612	short		setox	 - tbl_trans	# $10-0 fetox norm
4613	short		ld_pone	 - tbl_trans	# $10-1 fetox zero
4614	short		szr_inf	 - tbl_trans	# $10-2 fetox inf
4615	short		src_qnan - tbl_trans	# $10-3 fetox qnan
4616	short		setoxd	 - tbl_trans	# $10-5 fetox denorm
4617	short		src_snan - tbl_trans	# $10-4 fetox snan
4618	short		tbl_trans - tbl_trans	# $10-6 fetox unnorm
4619	short		tbl_trans - tbl_trans	# $10-7 ERROR
4620
4621	short		stwotox	 - tbl_trans	# $11-0 ftwotox norm
4622	short		ld_pone	 - tbl_trans	# $11-1 ftwotox zero
4623	short		szr_inf	 - tbl_trans	# $11-2 ftwotox inf
4624	short		src_qnan - tbl_trans	# $11-3 ftwotox qnan
4625	short		stwotoxd - tbl_trans	# $11-5 ftwotox denorm
4626	short		src_snan - tbl_trans	# $11-4 ftwotox snan
4627	short		tbl_trans - tbl_trans	# $11-6 ftwotox unnorm
4628	short		tbl_trans - tbl_trans	# $11-7 ERROR
4629
4630	short		stentox	 - tbl_trans	# $12-0 ftentox norm
4631	short		ld_pone	 - tbl_trans	# $12-1 ftentox zero
4632	short		szr_inf	 - tbl_trans	# $12-2 ftentox inf
4633	short		src_qnan - tbl_trans	# $12-3 ftentox qnan
4634	short		stentoxd - tbl_trans	# $12-5 ftentox denorm
4635	short		src_snan - tbl_trans	# $12-4 ftentox snan
4636	short		tbl_trans - tbl_trans	# $12-6 ftentox unnorm
4637	short		tbl_trans - tbl_trans	# $12-7 ERROR
4638
4639	short		tbl_trans - tbl_trans	# $13-0 ERROR
4640	short		tbl_trans - tbl_trans	# $13-1 ERROR
4641	short		tbl_trans - tbl_trans	# $13-2 ERROR
4642	short		tbl_trans - tbl_trans	# $13-3 ERROR
4643	short		tbl_trans - tbl_trans	# $13-4 ERROR
4644	short		tbl_trans - tbl_trans	# $13-5 ERROR
4645	short		tbl_trans - tbl_trans	# $13-6 ERROR
4646	short		tbl_trans - tbl_trans	# $13-7 ERROR
4647
4648	short		slogn	 - tbl_trans	# $14-0 flogn norm
4649	short		t_dz2	 - tbl_trans	# $14-1 flogn zero
4650	short		sopr_inf - tbl_trans	# $14-2 flogn inf
4651	short		src_qnan - tbl_trans	# $14-3 flogn qnan
4652	short		slognd	 - tbl_trans	# $14-5 flogn denorm
4653	short		src_snan - tbl_trans	# $14-4 flogn snan
4654	short		tbl_trans - tbl_trans	# $14-6 flogn unnorm
4655	short		tbl_trans - tbl_trans	# $14-7 ERROR
4656
4657	short		slog10	 - tbl_trans	# $15-0 flog10 norm
4658	short		t_dz2	 - tbl_trans	# $15-1 flog10 zero
4659	short		sopr_inf - tbl_trans	# $15-2 flog10 inf
4660	short		src_qnan - tbl_trans	# $15-3 flog10 qnan
4661	short		slog10d	 - tbl_trans	# $15-5 flog10 denorm
4662	short		src_snan - tbl_trans	# $15-4 flog10 snan
4663	short		tbl_trans - tbl_trans	# $15-6 flog10 unnorm
4664	short		tbl_trans - tbl_trans	# $15-7 ERROR
4665
4666	short		slog2	 - tbl_trans	# $16-0 flog2 norm
4667	short		t_dz2	 - tbl_trans	# $16-1 flog2 zero
4668	short		sopr_inf - tbl_trans	# $16-2 flog2 inf
4669	short		src_qnan - tbl_trans	# $16-3 flog2 qnan
4670	short		slog2d	 - tbl_trans	# $16-5 flog2 denorm
4671	short		src_snan - tbl_trans	# $16-4 flog2 snan
4672	short		tbl_trans - tbl_trans	# $16-6 flog2 unnorm
4673	short		tbl_trans - tbl_trans	# $16-7 ERROR
4674
4675	short		tbl_trans - tbl_trans	# $17-0 ERROR
4676	short		tbl_trans - tbl_trans	# $17-1 ERROR
4677	short		tbl_trans - tbl_trans	# $17-2 ERROR
4678	short		tbl_trans - tbl_trans	# $17-3 ERROR
4679	short		tbl_trans - tbl_trans	# $17-4 ERROR
4680	short		tbl_trans - tbl_trans	# $17-5 ERROR
4681	short		tbl_trans - tbl_trans	# $17-6 ERROR
4682	short		tbl_trans - tbl_trans	# $17-7 ERROR
4683
4684	short		tbl_trans - tbl_trans	# $18-0 fabs norm
4685	short		tbl_trans - tbl_trans	# $18-1 fabs zero
4686	short		tbl_trans - tbl_trans	# $18-2 fabs inf
4687	short		tbl_trans - tbl_trans	# $18-3 fabs qnan
4688	short		tbl_trans - tbl_trans	# $18-5 fabs denorm
4689	short		tbl_trans - tbl_trans	# $18-4 fabs snan
4690	short		tbl_trans - tbl_trans	# $18-6 fabs unnorm
4691	short		tbl_trans - tbl_trans	# $18-7 ERROR
4692
4693	short		scosh	 - tbl_trans	# $19-0 fcosh norm
4694	short		ld_pone	 - tbl_trans	# $19-1 fcosh zero
4695	short		ld_pinf	 - tbl_trans	# $19-2 fcosh inf
4696	short		src_qnan - tbl_trans	# $19-3 fcosh qnan
4697	short		scoshd	 - tbl_trans	# $19-5 fcosh denorm
4698	short		src_snan - tbl_trans	# $19-4 fcosh snan
4699	short		tbl_trans - tbl_trans	# $19-6 fcosh unnorm
4700	short		tbl_trans - tbl_trans	# $19-7 ERROR
4701
4702	short		tbl_trans - tbl_trans	# $1a-0 fneg norm
4703	short		tbl_trans - tbl_trans	# $1a-1 fneg zero
4704	short		tbl_trans - tbl_trans	# $1a-2 fneg inf
4705	short		tbl_trans - tbl_trans	# $1a-3 fneg qnan
4706	short		tbl_trans - tbl_trans	# $1a-5 fneg denorm
4707	short		tbl_trans - tbl_trans	# $1a-4 fneg snan
4708	short		tbl_trans - tbl_trans	# $1a-6 fneg unnorm
4709	short		tbl_trans - tbl_trans	# $1a-7 ERROR
4710
4711	short		tbl_trans - tbl_trans	# $1b-0 ERROR
4712	short		tbl_trans - tbl_trans	# $1b-1 ERROR
4713	short		tbl_trans - tbl_trans	# $1b-2 ERROR
4714	short		tbl_trans - tbl_trans	# $1b-3 ERROR
4715	short		tbl_trans - tbl_trans	# $1b-4 ERROR
4716	short		tbl_trans - tbl_trans	# $1b-5 ERROR
4717	short		tbl_trans - tbl_trans	# $1b-6 ERROR
4718	short		tbl_trans - tbl_trans	# $1b-7 ERROR
4719
4720	short		sacos	 - tbl_trans	# $1c-0 facos norm
4721	short		ld_ppi2	 - tbl_trans	# $1c-1 facos zero
4722	short		t_operr	 - tbl_trans	# $1c-2 facos inf
4723	short		src_qnan - tbl_trans	# $1c-3 facos qnan
4724	short		sacosd	 - tbl_trans	# $1c-5 facos denorm
4725	short		src_snan - tbl_trans	# $1c-4 facos snan
4726	short		tbl_trans - tbl_trans	# $1c-6 facos unnorm
4727	short		tbl_trans - tbl_trans	# $1c-7 ERROR
4728
4729	short		scos	 - tbl_trans	# $1d-0 fcos norm
4730	short		ld_pone	 - tbl_trans	# $1d-1 fcos zero
4731	short		t_operr	 - tbl_trans	# $1d-2 fcos inf
4732	short		src_qnan - tbl_trans	# $1d-3 fcos qnan
4733	short		scosd	 - tbl_trans	# $1d-5 fcos denorm
4734	short		src_snan - tbl_trans	# $1d-4 fcos snan
4735	short		tbl_trans - tbl_trans	# $1d-6 fcos unnorm
4736	short		tbl_trans - tbl_trans	# $1d-7 ERROR
4737
4738	short		sgetexp	 - tbl_trans	# $1e-0 fgetexp norm
4739	short		src_zero - tbl_trans	# $1e-1 fgetexp zero
4740	short		t_operr	 - tbl_trans	# $1e-2 fgetexp inf
4741	short		src_qnan - tbl_trans	# $1e-3 fgetexp qnan
4742	short		sgetexpd - tbl_trans	# $1e-5 fgetexp denorm
4743	short		src_snan - tbl_trans	# $1e-4 fgetexp snan
4744	short		tbl_trans - tbl_trans	# $1e-6 fgetexp unnorm
4745	short		tbl_trans - tbl_trans	# $1e-7 ERROR
4746
4747	short		sgetman	 - tbl_trans	# $1f-0 fgetman norm
4748	short		src_zero - tbl_trans	# $1f-1 fgetman zero
4749	short		t_operr	 - tbl_trans	# $1f-2 fgetman inf
4750	short		src_qnan - tbl_trans	# $1f-3 fgetman qnan
4751	short		sgetmand - tbl_trans	# $1f-5 fgetman denorm
4752	short		src_snan - tbl_trans	# $1f-4 fgetman snan
4753	short		tbl_trans - tbl_trans	# $1f-6 fgetman unnorm
4754	short		tbl_trans - tbl_trans	# $1f-7 ERROR
4755
4756	short		tbl_trans - tbl_trans	# $20-0 fdiv norm
4757	short		tbl_trans - tbl_trans	# $20-1 fdiv zero
4758	short		tbl_trans - tbl_trans	# $20-2 fdiv inf
4759	short		tbl_trans - tbl_trans	# $20-3 fdiv qnan
4760	short		tbl_trans - tbl_trans	# $20-5 fdiv denorm
4761	short		tbl_trans - tbl_trans	# $20-4 fdiv snan
4762	short		tbl_trans - tbl_trans	# $20-6 fdiv unnorm
4763	short		tbl_trans - tbl_trans	# $20-7 ERROR
4764
4765	short		smod_snorm - tbl_trans	# $21-0 fmod norm
4766	short		smod_szero - tbl_trans	# $21-1 fmod zero
4767	short		smod_sinf - tbl_trans	# $21-2 fmod inf
4768	short		sop_sqnan - tbl_trans	# $21-3 fmod qnan
4769	short		smod_sdnrm - tbl_trans	# $21-5 fmod denorm
4770	short		sop_ssnan - tbl_trans	# $21-4 fmod snan
4771	short		tbl_trans - tbl_trans	# $21-6 fmod unnorm
4772	short		tbl_trans - tbl_trans	# $21-7 ERROR
4773
4774	short		tbl_trans - tbl_trans	# $22-0 fadd norm
4775	short		tbl_trans - tbl_trans	# $22-1 fadd zero
4776	short		tbl_trans - tbl_trans	# $22-2 fadd inf
4777	short		tbl_trans - tbl_trans	# $22-3 fadd qnan
4778	short		tbl_trans - tbl_trans	# $22-5 fadd denorm
4779	short		tbl_trans - tbl_trans	# $22-4 fadd snan
4780	short		tbl_trans - tbl_trans	# $22-6 fadd unnorm
4781	short		tbl_trans - tbl_trans	# $22-7 ERROR
4782
4783	short		tbl_trans - tbl_trans	# $23-0 fmul norm
4784	short		tbl_trans - tbl_trans	# $23-1 fmul zero
4785	short		tbl_trans - tbl_trans	# $23-2 fmul inf
4786	short		tbl_trans - tbl_trans	# $23-3 fmul qnan
4787	short		tbl_trans - tbl_trans	# $23-5 fmul denorm
4788	short		tbl_trans - tbl_trans	# $23-4 fmul snan
4789	short		tbl_trans - tbl_trans	# $23-6 fmul unnorm
4790	short		tbl_trans - tbl_trans	# $23-7 ERROR
4791
4792	short		tbl_trans - tbl_trans	# $24-0 fsgldiv norm
4793	short		tbl_trans - tbl_trans	# $24-1 fsgldiv zero
4794	short		tbl_trans - tbl_trans	# $24-2 fsgldiv inf
4795	short		tbl_trans - tbl_trans	# $24-3 fsgldiv qnan
4796	short		tbl_trans - tbl_trans	# $24-5 fsgldiv denorm
4797	short		tbl_trans - tbl_trans	# $24-4 fsgldiv snan
4798	short		tbl_trans - tbl_trans	# $24-6 fsgldiv unnorm
4799	short		tbl_trans - tbl_trans	# $24-7 ERROR
4800
4801	short		srem_snorm - tbl_trans	# $25-0 frem norm
4802	short		srem_szero - tbl_trans	# $25-1 frem zero
4803	short		srem_sinf - tbl_trans	# $25-2 frem inf
4804	short		sop_sqnan - tbl_trans	# $25-3 frem qnan
4805	short		srem_sdnrm - tbl_trans	# $25-5 frem denorm
4806	short		sop_ssnan - tbl_trans	# $25-4 frem snan
4807	short		tbl_trans - tbl_trans	# $25-6 frem unnorm
4808	short		tbl_trans - tbl_trans	# $25-7 ERROR
4809
4810	short		sscale_snorm - tbl_trans # $26-0 fscale norm
4811	short		sscale_szero - tbl_trans # $26-1 fscale zero
4812	short		sscale_sinf - tbl_trans	# $26-2 fscale inf
4813	short		sop_sqnan - tbl_trans	# $26-3 fscale qnan
4814	short		sscale_sdnrm - tbl_trans # $26-5 fscale denorm
4815	short		sop_ssnan - tbl_trans	# $26-4 fscale snan
4816	short		tbl_trans - tbl_trans	# $26-6 fscale unnorm
4817	short		tbl_trans - tbl_trans	# $26-7 ERROR
4818
4819	short		tbl_trans - tbl_trans	# $27-0 fsglmul norm
4820	short		tbl_trans - tbl_trans	# $27-1 fsglmul zero
4821	short		tbl_trans - tbl_trans	# $27-2 fsglmul inf
4822	short		tbl_trans - tbl_trans	# $27-3 fsglmul qnan
4823	short		tbl_trans - tbl_trans	# $27-5 fsglmul denorm
4824	short		tbl_trans - tbl_trans	# $27-4 fsglmul snan
4825	short		tbl_trans - tbl_trans	# $27-6 fsglmul unnorm
4826	short		tbl_trans - tbl_trans	# $27-7 ERROR
4827
4828	short		tbl_trans - tbl_trans	# $28-0 fsub norm
4829	short		tbl_trans - tbl_trans	# $28-1 fsub zero
4830	short		tbl_trans - tbl_trans	# $28-2 fsub inf
4831	short		tbl_trans - tbl_trans	# $28-3 fsub qnan
4832	short		tbl_trans - tbl_trans	# $28-5 fsub denorm
4833	short		tbl_trans - tbl_trans	# $28-4 fsub snan
4834	short		tbl_trans - tbl_trans	# $28-6 fsub unnorm
4835	short		tbl_trans - tbl_trans	# $28-7 ERROR
4836
4837	short		tbl_trans - tbl_trans	# $29-0 ERROR
4838	short		tbl_trans - tbl_trans	# $29-1 ERROR
4839	short		tbl_trans - tbl_trans	# $29-2 ERROR
4840	short		tbl_trans - tbl_trans	# $29-3 ERROR
4841	short		tbl_trans - tbl_trans	# $29-4 ERROR
4842	short		tbl_trans - tbl_trans	# $29-5 ERROR
4843	short		tbl_trans - tbl_trans	# $29-6 ERROR
4844	short		tbl_trans - tbl_trans	# $29-7 ERROR
4845
4846	short		tbl_trans - tbl_trans	# $2a-0 ERROR
4847	short		tbl_trans - tbl_trans	# $2a-1 ERROR
4848	short		tbl_trans - tbl_trans	# $2a-2 ERROR
4849	short		tbl_trans - tbl_trans	# $2a-3 ERROR
4850	short		tbl_trans - tbl_trans	# $2a-4 ERROR
4851	short		tbl_trans - tbl_trans	# $2a-5 ERROR
4852	short		tbl_trans - tbl_trans	# $2a-6 ERROR
4853	short		tbl_trans - tbl_trans	# $2a-7 ERROR
4854
4855	short		tbl_trans - tbl_trans	# $2b-0 ERROR
4856	short		tbl_trans - tbl_trans	# $2b-1 ERROR
4857	short		tbl_trans - tbl_trans	# $2b-2 ERROR
4858	short		tbl_trans - tbl_trans	# $2b-3 ERROR
4859	short		tbl_trans - tbl_trans	# $2b-4 ERROR
4860	short		tbl_trans - tbl_trans	# $2b-5 ERROR
4861	short		tbl_trans - tbl_trans	# $2b-6 ERROR
4862	short		tbl_trans - tbl_trans	# $2b-7 ERROR
4863
4864	short		tbl_trans - tbl_trans	# $2c-0 ERROR
4865	short		tbl_trans - tbl_trans	# $2c-1 ERROR
4866	short		tbl_trans - tbl_trans	# $2c-2 ERROR
4867	short		tbl_trans - tbl_trans	# $2c-3 ERROR
4868	short		tbl_trans - tbl_trans	# $2c-4 ERROR
4869	short		tbl_trans - tbl_trans	# $2c-5 ERROR
4870	short		tbl_trans - tbl_trans	# $2c-6 ERROR
4871	short		tbl_trans - tbl_trans	# $2c-7 ERROR
4872
4873	short		tbl_trans - tbl_trans	# $2d-0 ERROR
4874	short		tbl_trans - tbl_trans	# $2d-1 ERROR
4875	short		tbl_trans - tbl_trans	# $2d-2 ERROR
4876	short		tbl_trans - tbl_trans	# $2d-3 ERROR
4877	short		tbl_trans - tbl_trans	# $2d-4 ERROR
4878	short		tbl_trans - tbl_trans	# $2d-5 ERROR
4879	short		tbl_trans - tbl_trans	# $2d-6 ERROR
4880	short		tbl_trans - tbl_trans	# $2d-7 ERROR
4881
4882	short		tbl_trans - tbl_trans	# $2e-0 ERROR
4883	short		tbl_trans - tbl_trans	# $2e-1 ERROR
4884	short		tbl_trans - tbl_trans	# $2e-2 ERROR
4885	short		tbl_trans - tbl_trans	# $2e-3 ERROR
4886	short		tbl_trans - tbl_trans	# $2e-4 ERROR
4887	short		tbl_trans - tbl_trans	# $2e-5 ERROR
4888	short		tbl_trans - tbl_trans	# $2e-6 ERROR
4889	short		tbl_trans - tbl_trans	# $2e-7 ERROR
4890
4891	short		tbl_trans - tbl_trans	# $2f-0 ERROR
4892	short		tbl_trans - tbl_trans	# $2f-1 ERROR
4893	short		tbl_trans - tbl_trans	# $2f-2 ERROR
4894	short		tbl_trans - tbl_trans	# $2f-3 ERROR
4895	short		tbl_trans - tbl_trans	# $2f-4 ERROR
4896	short		tbl_trans - tbl_trans	# $2f-5 ERROR
4897	short		tbl_trans - tbl_trans	# $2f-6 ERROR
4898	short		tbl_trans - tbl_trans	# $2f-7 ERROR
4899
4900	short		ssincos	 - tbl_trans	# $30-0 fsincos norm
4901	short		ssincosz - tbl_trans	# $30-1 fsincos zero
4902	short		ssincosi - tbl_trans	# $30-2 fsincos inf
4903	short		ssincosqnan - tbl_trans	# $30-3 fsincos qnan
4904	short		ssincosd - tbl_trans	# $30-5 fsincos denorm
4905	short		ssincossnan - tbl_trans	# $30-4 fsincos snan
4906	short		tbl_trans - tbl_trans	# $30-6 fsincos unnorm
4907	short		tbl_trans - tbl_trans	# $30-7 ERROR
4908
4909	short		ssincos	 - tbl_trans	# $31-0 fsincos norm
4910	short		ssincosz - tbl_trans	# $31-1 fsincos zero
4911	short		ssincosi - tbl_trans	# $31-2 fsincos inf
4912	short		ssincosqnan - tbl_trans	# $31-3 fsincos qnan
4913	short		ssincosd - tbl_trans	# $31-5 fsincos denorm
4914	short		ssincossnan - tbl_trans	# $31-4 fsincos snan
4915	short		tbl_trans - tbl_trans	# $31-6 fsincos unnorm
4916	short		tbl_trans - tbl_trans	# $31-7 ERROR
4917
4918	short		ssincos	 - tbl_trans	# $32-0 fsincos norm
4919	short		ssincosz - tbl_trans	# $32-1 fsincos zero
4920	short		ssincosi - tbl_trans	# $32-2 fsincos inf
4921	short		ssincosqnan - tbl_trans	# $32-3 fsincos qnan
4922	short		ssincosd - tbl_trans	# $32-5 fsincos denorm
4923	short		ssincossnan - tbl_trans	# $32-4 fsincos snan
4924	short		tbl_trans - tbl_trans	# $32-6 fsincos unnorm
4925	short		tbl_trans - tbl_trans	# $32-7 ERROR
4926
4927	short		ssincos	 - tbl_trans	# $33-0 fsincos norm
4928	short		ssincosz - tbl_trans	# $33-1 fsincos zero
4929	short		ssincosi - tbl_trans	# $33-2 fsincos inf
4930	short		ssincosqnan - tbl_trans	# $33-3 fsincos qnan
4931	short		ssincosd - tbl_trans	# $33-5 fsincos denorm
4932	short		ssincossnan - tbl_trans	# $33-4 fsincos snan
4933	short		tbl_trans - tbl_trans	# $33-6 fsincos unnorm
4934	short		tbl_trans - tbl_trans	# $33-7 ERROR
4935
4936	short		ssincos	 - tbl_trans	# $34-0 fsincos norm
4937	short		ssincosz - tbl_trans	# $34-1 fsincos zero
4938	short		ssincosi - tbl_trans	# $34-2 fsincos inf
4939	short		ssincosqnan - tbl_trans	# $34-3 fsincos qnan
4940	short		ssincosd - tbl_trans	# $34-5 fsincos denorm
4941	short		ssincossnan - tbl_trans	# $34-4 fsincos snan
4942	short		tbl_trans - tbl_trans	# $34-6 fsincos unnorm
4943	short		tbl_trans - tbl_trans	# $34-7 ERROR
4944
4945	short		ssincos	 - tbl_trans	# $35-0 fsincos norm
4946	short		ssincosz - tbl_trans	# $35-1 fsincos zero
4947	short		ssincosi - tbl_trans	# $35-2 fsincos inf
4948	short		ssincosqnan - tbl_trans	# $35-3 fsincos qnan
4949	short		ssincosd - tbl_trans	# $35-5 fsincos denorm
4950	short		ssincossnan - tbl_trans	# $35-4 fsincos snan
4951	short		tbl_trans - tbl_trans	# $35-6 fsincos unnorm
4952	short		tbl_trans - tbl_trans	# $35-7 ERROR
4953
4954	short		ssincos	 - tbl_trans	# $36-0 fsincos norm
4955	short		ssincosz - tbl_trans	# $36-1 fsincos zero
4956	short		ssincosi - tbl_trans	# $36-2 fsincos inf
4957	short		ssincosqnan - tbl_trans	# $36-3 fsincos qnan
4958	short		ssincosd - tbl_trans	# $36-5 fsincos denorm
4959	short		ssincossnan - tbl_trans	# $36-4 fsincos snan
4960	short		tbl_trans - tbl_trans	# $36-6 fsincos unnorm
4961	short		tbl_trans - tbl_trans	# $36-7 ERROR
4962
4963	short		ssincos	 - tbl_trans	# $37-0 fsincos norm
4964	short		ssincosz - tbl_trans	# $37-1 fsincos zero
4965	short		ssincosi - tbl_trans	# $37-2 fsincos inf
4966	short		ssincosqnan - tbl_trans	# $37-3 fsincos qnan
4967	short		ssincosd - tbl_trans	# $37-5 fsincos denorm
4968	short		ssincossnan - tbl_trans	# $37-4 fsincos snan
4969	short		tbl_trans - tbl_trans	# $37-6 fsincos unnorm
4970	short		tbl_trans - tbl_trans	# $37-7 ERROR
4971
4972##########
4973
4974# the instruction fetch access for the displacement word for the
4975# fdbcc emulation failed. here, we create an access error frame
4976# from the current frame and branch to _real_access().
4977funimp_iacc:
4978	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4979	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4980	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
4981
4982	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
4983
4984	unlk		%a6
4985
4986	mov.l		(%sp),-(%sp)		# store SR,hi(PC)
4987	mov.w		0x8(%sp),0x4(%sp)	# store lo(PC)
4988	mov.w		&0x4008,0x6(%sp)	# store voff
4989	mov.l		0x2(%sp),0x8(%sp)	# store EA
4990	mov.l		&0x09428001,0xc(%sp)	# store FSLW
4991
4992	btst		&0x5,(%sp)		# user or supervisor mode?
4993	beq.b		funimp_iacc_end		# user
4994	bset		&0x2,0xd(%sp)		# set supervisor TM bit
4995
4996funimp_iacc_end:
4997	bra.l		_real_access
4998
4999#########################################################################
5000# ssin():     computes the sine of a normalized input			#
5001# ssind():    computes the sine of a denormalized input			#
5002# scos():     computes the cosine of a normalized input			#
5003# scosd():    computes the cosine of a denormalized input		#
5004# ssincos():  computes the sine and cosine of a normalized input	#
5005# ssincosd(): computes the sine and cosine of a denormalized input	#
5006#									#
5007# INPUT *************************************************************** #
5008#	a0 = pointer to extended precision input			#
5009#	d0 = round precision,mode					#
5010#									#
5011# OUTPUT ************************************************************** #
5012#	fp0 = sin(X) or cos(X)						#
5013#									#
5014#    For ssincos(X):							#
5015#	fp0 = sin(X)							#
5016#	fp1 = cos(X)							#
5017#									#
5018# ACCURACY and MONOTONICITY ******************************************* #
5019#	The returned result is within 1 ulp in 64 significant bit, i.e.	#
5020#	within 0.5001 ulp to 53 bits if the result is subsequently	#
5021#	rounded to double precision. The result is provably monotonic	#
5022#	in double precision.						#
5023#									#
5024# ALGORITHM ***********************************************************	#
5025#									#
5026#	SIN and COS:							#
5027#	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.	#
5028#									#
5029#	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.			#
5030#									#
5031#	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5032#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
5033#		Overwrite k by k := k + AdjN.				#
5034#									#
5035#	4. If k is even, go to 6.					#
5036#									#
5037#	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j.			#
5038#		Return sgn*cos(r) where cos(r) is approximated by an	#
5039#		even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)),	#
5040#		s = r*r.						#
5041#		Exit.							#
5042#									#
5043#	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)	#
5044#		where sin(r) is approximated by an odd polynomial in r	#
5045#		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.	#
5046#		Exit.							#
5047#									#
5048#	7. If |X| > 1, go to 9.						#
5049#									#
5050#	8. (|X|<2**(-40)) If SIN is invoked, return X;			#
5051#		otherwise return 1.					#
5052#									#
5053#	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
5054#		go back to 3.						#
5055#									#
5056#	SINCOS:								#
5057#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
5058#									#
5059#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5060#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
5061#									#
5062#	3. If k is even, go to 5.					#
5063#									#
5064#	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie.	#
5065#		j1 exclusive or with the l.s.b. of k.			#
5066#		sgn1 := (-1)**j1, sgn2 := (-1)**j2.			#
5067#		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where	#
5068#		sin(r) and cos(r) are computed as odd and even		#
5069#		polynomials in r, respectively. Exit			#
5070#									#
5071#	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.			#
5072#		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where	#
5073#		sin(r) and cos(r) are computed as odd and even		#
5074#		polynomials in r, respectively. Exit			#
5075#									#
5076#	6. If |X| > 1, go to 8.						#
5077#									#
5078#	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.		#
5079#									#
5080#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
5081#		go back to 2.						#
5082#									#
5083#########################################################################
5084
5085SINA7:	long		0xBD6AAA77,0xCCC994F5
5086SINA6:	long		0x3DE61209,0x7AAE8DA1
5087SINA5:	long		0xBE5AE645,0x2A118AE4
5088SINA4:	long		0x3EC71DE3,0xA5341531
5089SINA3:	long		0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
5090SINA2:	long		0x3FF80000,0x88888888,0x888859AF,0x00000000
5091SINA1:	long		0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
5092
5093COSB8:	long		0x3D2AC4D0,0xD6011EE3
5094COSB7:	long		0xBDA9396F,0x9F45AC19
5095COSB6:	long		0x3E21EED9,0x0612C972
5096COSB5:	long		0xBE927E4F,0xB79D9FCF
5097COSB4:	long		0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5098COSB3:	long		0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5099COSB2:	long		0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5100COSB1:	long		0xBF000000
5101
5102	set		INARG,FP_SCR0
5103
5104	set		X,FP_SCR0
5105#	set		XDCARE,X+2
5106	set		XFRAC,X+4
5107
5108	set		RPRIME,FP_SCR0
5109	set		SPRIME,FP_SCR1
5110
5111	set		POSNEG1,L_SCR1
5112	set		TWOTO63,L_SCR1
5113
5114	set		ENDFLAG,L_SCR2
5115	set		INT,L_SCR2
5116
5117	set		ADJN,L_SCR3
5118
5119############################################
5120	global		ssin
5121ssin:
5122	mov.l		&0,ADJN(%a6)		# yes; SET ADJN TO 0
5123	bra.b		SINBGN
5124
5125############################################
5126	global		scos
5127scos:
5128	mov.l		&1,ADJN(%a6)		# yes; SET ADJN TO 1
5129
5130############################################
5131SINBGN:
5132#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5133
5134	fmov.x		(%a0),%fp0		# LOAD INPUT
5135	fmov.x		%fp0,X(%a6)		# save input at X
5136
5137# "COMPACTIFY" X
5138	mov.l		(%a0),%d1		# put exp in hi word
5139	mov.w		4(%a0),%d1		# fetch hi(man)
5140	and.l		&0x7FFFFFFF,%d1		# strip sign
5141
5142	cmpi.l		%d1,&0x3FD78000		# is |X| >= 2**(-40)?
5143	bge.b		SOK1			# no
5144	bra.w		SINSM			# yes; input is very small
5145
5146SOK1:
5147	cmp.l		%d1,&0x4004BC7E		# is |X| < 15 PI?
5148	blt.b		SINMAIN			# no
5149	bra.w		SREDUCEX		# yes; input is very large
5150
5151#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5152#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5153SINMAIN:
5154	fmov.x		%fp0,%fp1
5155	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
5156
5157	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
5158
5159	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
5160
5161	mov.l		INT(%a6),%d1		# make a copy of N
5162	asl.l		&4,%d1			# N *= 16
5163	add.l		%d1,%a1			# tbl_addr = a1 + (N*16)
5164
5165# A1 IS THE ADDRESS OF N*PIBY2
5166# ...WHICH IS IN TWO PIECES Y1 & Y2
5167	fsub.x		(%a1)+,%fp0		# X-Y1
5168	fsub.s		(%a1),%fp0		# fp0 = R = (X-Y1)-Y2
5169
5170SINCONT:
5171#--continuation from REDUCEX
5172
5173#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5174	mov.l		INT(%a6),%d1
5175	add.l		ADJN(%a6),%d1		# SEE IF D0 IS ODD OR EVEN
5176	ror.l		&1,%d1			# D0 WAS ODD IFF D0 IS NEGATIVE
5177	cmp.l		%d1,&0
5178	blt.w		COSPOLY
5179
5180#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5181#--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5182#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5183#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5184#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5185#--WHERE T=S*S.
5186#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5187#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5188SINPOLY:
5189	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
5190
5191	fmov.x		%fp0,X(%a6)		# X IS R
5192	fmul.x		%fp0,%fp0		# FP0 IS S
5193
5194	fmov.d		SINA7(%pc),%fp3
5195	fmov.d		SINA6(%pc),%fp2
5196
5197	fmov.x		%fp0,%fp1
5198	fmul.x		%fp1,%fp1		# FP1 IS T
5199
5200	ror.l		&1,%d1
5201	and.l		&0x80000000,%d1
5202# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5203	eor.l		%d1,X(%a6)		# X IS NOW R'= SGN*R
5204
5205	fmul.x		%fp1,%fp3		# TA7
5206	fmul.x		%fp1,%fp2		# TA6
5207
5208	fadd.d		SINA5(%pc),%fp3		# A5+TA7
5209	fadd.d		SINA4(%pc),%fp2		# A4+TA6
5210
5211	fmul.x		%fp1,%fp3		# T(A5+TA7)
5212	fmul.x		%fp1,%fp2		# T(A4+TA6)
5213
5214	fadd.d		SINA3(%pc),%fp3		# A3+T(A5+TA7)
5215	fadd.x		SINA2(%pc),%fp2		# A2+T(A4+TA6)
5216
5217	fmul.x		%fp3,%fp1		# T(A3+T(A5+TA7))
5218
5219	fmul.x		%fp0,%fp2		# S(A2+T(A4+TA6))
5220	fadd.x		SINA1(%pc),%fp1		# A1+T(A3+T(A5+TA7))
5221	fmul.x		X(%a6),%fp0		# R'*S
5222
5223	fadd.x		%fp2,%fp1		# [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5224
5225	fmul.x		%fp1,%fp0		# SIN(R')-R'
5226
5227	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
5228
5229	fmov.l		%d0,%fpcr		# restore users round mode,prec
5230	fadd.x		X(%a6),%fp0		# last inst - possible exception set
5231	bra		t_inx2
5232
5233#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5234#--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5235#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5236#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5237#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5238#--WHERE T=S*S.
5239#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5240#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5241#--AND IS THEREFORE STORED AS SINGLE PRECISION.
5242COSPOLY:
5243	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
5244
5245	fmul.x		%fp0,%fp0		# FP0 IS S
5246
5247	fmov.d		COSB8(%pc),%fp2
5248	fmov.d		COSB7(%pc),%fp3
5249
5250	fmov.x		%fp0,%fp1
5251	fmul.x		%fp1,%fp1		# FP1 IS T
5252
5253	fmov.x		%fp0,X(%a6)		# X IS S
5254	ror.l		&1,%d1
5255	and.l		&0x80000000,%d1
5256# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5257
5258	fmul.x		%fp1,%fp2		# TB8
5259
5260	eor.l		%d1,X(%a6)		# X IS NOW S'= SGN*S
5261	and.l		&0x80000000,%d1
5262
5263	fmul.x		%fp1,%fp3		# TB7
5264
5265	or.l		&0x3F800000,%d1		# D0 IS SGN IN SINGLE
5266	mov.l		%d1,POSNEG1(%a6)
5267
5268	fadd.d		COSB6(%pc),%fp2		# B6+TB8
5269	fadd.d		COSB5(%pc),%fp3		# B5+TB7
5270
5271	fmul.x		%fp1,%fp2		# T(B6+TB8)
5272	fmul.x		%fp1,%fp3		# T(B5+TB7)
5273
5274	fadd.d		COSB4(%pc),%fp2		# B4+T(B6+TB8)
5275	fadd.x		COSB3(%pc),%fp3		# B3+T(B5+TB7)
5276
5277	fmul.x		%fp1,%fp2		# T(B4+T(B6+TB8))
5278	fmul.x		%fp3,%fp1		# T(B3+T(B5+TB7))
5279
5280	fadd.x		COSB2(%pc),%fp2		# B2+T(B4+T(B6+TB8))
5281	fadd.s		COSB1(%pc),%fp1		# B1+T(B3+T(B5+TB7))
5282
5283	fmul.x		%fp2,%fp0		# S(B2+T(B4+T(B6+TB8)))
5284
5285	fadd.x		%fp1,%fp0
5286
5287	fmul.x		X(%a6),%fp0
5288
5289	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
5290
5291	fmov.l		%d0,%fpcr		# restore users round mode,prec
5292	fadd.s		POSNEG1(%a6),%fp0	# last inst - possible exception set
5293	bra		t_inx2
5294
5295##############################################
5296
5297# SINe: Big OR Small?
5298#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5299#--IF |X| < 2**(-40), RETURN X OR 1.
5300SINBORS:
5301	cmp.l		%d1,&0x3FFF8000
5302	bgt.l		SREDUCEX
5303
5304SINSM:
5305	mov.l		ADJN(%a6),%d1
5306	cmp.l		%d1,&0
5307	bgt.b		COSTINY
5308
5309# here, the operation may underflow iff the precision is sgl or dbl.
5310# extended denorms are handled through another entry point.
5311SINTINY:
5312#	mov.w		&0x0000,XDCARE(%a6)	# JUST IN CASE
5313
5314	fmov.l		%d0,%fpcr		# restore users round mode,prec
5315	mov.b		&FMOV_OP,%d1		# last inst is MOVE
5316	fmov.x		X(%a6),%fp0		# last inst - possible exception set
5317	bra		t_catch
5318
5319COSTINY:
5320	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
5321	fmov.l		%d0,%fpcr		# restore users round mode,prec
5322	fadd.s		&0x80800000,%fp0	# last inst - possible exception set
5323	bra		t_pinx2
5324
5325################################################
5326	global		ssind
5327#--SIN(X) = X FOR DENORMALIZED X
5328ssind:
5329	bra		t_extdnrm
5330
5331############################################
5332	global		scosd
5333#--COS(X) = 1 FOR DENORMALIZED X
5334scosd:
5335	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
5336	bra		t_pinx2
5337
5338##################################################
5339
5340	global		ssincos
5341ssincos:
5342#--SET ADJN TO 4
5343	mov.l		&4,ADJN(%a6)
5344
5345	fmov.x		(%a0),%fp0		# LOAD INPUT
5346	fmov.x		%fp0,X(%a6)
5347
5348	mov.l		(%a0),%d1
5349	mov.w		4(%a0),%d1
5350	and.l		&0x7FFFFFFF,%d1		# COMPACTIFY X
5351
5352	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
5353	bge.b		SCOK1
5354	bra.w		SCSM
5355
5356SCOK1:
5357	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
5358	blt.b		SCMAIN
5359	bra.w		SREDUCEX
5360
5361
5362#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5363#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5364SCMAIN:
5365	fmov.x		%fp0,%fp1
5366
5367	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
5368
5369	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
5370
5371	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
5372
5373	mov.l		INT(%a6),%d1
5374	asl.l		&4,%d1
5375	add.l		%d1,%a1			# ADDRESS OF N*PIBY2, IN Y1, Y2
5376
5377	fsub.x		(%a1)+,%fp0		# X-Y1
5378	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
5379
5380SCCONT:
5381#--continuation point from REDUCEX
5382
5383	mov.l		INT(%a6),%d1
5384	ror.l		&1,%d1
5385	cmp.l		%d1,&0			# D0 < 0 IFF N IS ODD
5386	bge.w		NEVEN
5387
5388SNODD:
5389#--REGISTERS SAVED SO FAR: D0, A0, FP2.
5390	fmovm.x		&0x04,-(%sp)		# save fp2
5391
5392	fmov.x		%fp0,RPRIME(%a6)
5393	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
5394	fmov.d		SINA7(%pc),%fp1		# A7
5395	fmov.d		COSB8(%pc),%fp2		# B8
5396	fmul.x		%fp0,%fp1		# SA7
5397	fmul.x		%fp0,%fp2		# SB8
5398
5399	mov.l		%d2,-(%sp)
5400	mov.l		%d1,%d2
5401	ror.l		&1,%d2
5402	and.l		&0x80000000,%d2
5403	eor.l		%d1,%d2
5404	and.l		&0x80000000,%d2
5405
5406	fadd.d		SINA6(%pc),%fp1		# A6+SA7
5407	fadd.d		COSB7(%pc),%fp2		# B7+SB8
5408
5409	fmul.x		%fp0,%fp1		# S(A6+SA7)
5410	eor.l		%d2,RPRIME(%a6)
5411	mov.l		(%sp)+,%d2
5412	fmul.x		%fp0,%fp2		# S(B7+SB8)
5413	ror.l		&1,%d1
5414	and.l		&0x80000000,%d1
5415	mov.l		&0x3F800000,POSNEG1(%a6)
5416	eor.l		%d1,POSNEG1(%a6)
5417
5418	fadd.d		SINA5(%pc),%fp1		# A5+S(A6+SA7)
5419	fadd.d		COSB6(%pc),%fp2		# B6+S(B7+SB8)
5420
5421	fmul.x		%fp0,%fp1		# S(A5+S(A6+SA7))
5422	fmul.x		%fp0,%fp2		# S(B6+S(B7+SB8))
5423	fmov.x		%fp0,SPRIME(%a6)
5424
5425	fadd.d		SINA4(%pc),%fp1		# A4+S(A5+S(A6+SA7))
5426	eor.l		%d1,SPRIME(%a6)
5427	fadd.d		COSB5(%pc),%fp2		# B5+S(B6+S(B7+SB8))
5428
5429	fmul.x		%fp0,%fp1		# S(A4+...)
5430	fmul.x		%fp0,%fp2		# S(B5+...)
5431
5432	fadd.d		SINA3(%pc),%fp1		# A3+S(A4+...)
5433	fadd.d		COSB4(%pc),%fp2		# B4+S(B5+...)
5434
5435	fmul.x		%fp0,%fp1		# S(A3+...)
5436	fmul.x		%fp0,%fp2		# S(B4+...)
5437
5438	fadd.x		SINA2(%pc),%fp1		# A2+S(A3+...)
5439	fadd.x		COSB3(%pc),%fp2		# B3+S(B4+...)
5440
5441	fmul.x		%fp0,%fp1		# S(A2+...)
5442	fmul.x		%fp0,%fp2		# S(B3+...)
5443
5444	fadd.x		SINA1(%pc),%fp1		# A1+S(A2+...)
5445	fadd.x		COSB2(%pc),%fp2		# B2+S(B3+...)
5446
5447	fmul.x		%fp0,%fp1		# S(A1+...)
5448	fmul.x		%fp2,%fp0		# S(B2+...)
5449
5450	fmul.x		RPRIME(%a6),%fp1	# R'S(A1+...)
5451	fadd.s		COSB1(%pc),%fp0		# B1+S(B2...)
5452	fmul.x		SPRIME(%a6),%fp0	# S'(B1+S(B2+...))
5453
5454	fmovm.x		(%sp)+,&0x20		# restore fp2
5455
5456	fmov.l		%d0,%fpcr
5457	fadd.x		RPRIME(%a6),%fp1	# COS(X)
5458	bsr		sto_cos			# store cosine result
5459	fadd.s		POSNEG1(%a6),%fp0	# SIN(X)
5460	bra		t_inx2
5461
5462NEVEN:
5463#--REGISTERS SAVED SO FAR: FP2.
5464	fmovm.x		&0x04,-(%sp)		# save fp2
5465
5466	fmov.x		%fp0,RPRIME(%a6)
5467	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
5468
5469	fmov.d		COSB8(%pc),%fp1		# B8
5470	fmov.d		SINA7(%pc),%fp2		# A7
5471
5472	fmul.x		%fp0,%fp1		# SB8
5473	fmov.x		%fp0,SPRIME(%a6)
5474	fmul.x		%fp0,%fp2		# SA7
5475
5476	ror.l		&1,%d1
5477	and.l		&0x80000000,%d1
5478
5479	fadd.d		COSB7(%pc),%fp1		# B7+SB8
5480	fadd.d		SINA6(%pc),%fp2		# A6+SA7
5481
5482	eor.l		%d1,RPRIME(%a6)
5483	eor.l		%d1,SPRIME(%a6)
5484
5485	fmul.x		%fp0,%fp1		# S(B7+SB8)
5486
5487	or.l		&0x3F800000,%d1
5488	mov.l		%d1,POSNEG1(%a6)
5489
5490	fmul.x		%fp0,%fp2		# S(A6+SA7)
5491
5492	fadd.d		COSB6(%pc),%fp1		# B6+S(B7+SB8)
5493	fadd.d		SINA5(%pc),%fp2		# A5+S(A6+SA7)
5494
5495	fmul.x		%fp0,%fp1		# S(B6+S(B7+SB8))
5496	fmul.x		%fp0,%fp2		# S(A5+S(A6+SA7))
5497
5498	fadd.d		COSB5(%pc),%fp1		# B5+S(B6+S(B7+SB8))
5499	fadd.d		SINA4(%pc),%fp2		# A4+S(A5+S(A6+SA7))
5500
5501	fmul.x		%fp0,%fp1		# S(B5+...)
5502	fmul.x		%fp0,%fp2		# S(A4+...)
5503
5504	fadd.d		COSB4(%pc),%fp1		# B4+S(B5+...)
5505	fadd.d		SINA3(%pc),%fp2		# A3+S(A4+...)
5506
5507	fmul.x		%fp0,%fp1		# S(B4+...)
5508	fmul.x		%fp0,%fp2		# S(A3+...)
5509
5510	fadd.x		COSB3(%pc),%fp1		# B3+S(B4+...)
5511	fadd.x		SINA2(%pc),%fp2		# A2+S(A3+...)
5512
5513	fmul.x		%fp0,%fp1		# S(B3+...)
5514	fmul.x		%fp0,%fp2		# S(A2+...)
5515
5516	fadd.x		COSB2(%pc),%fp1		# B2+S(B3+...)
5517	fadd.x		SINA1(%pc),%fp2		# A1+S(A2+...)
5518
5519	fmul.x		%fp0,%fp1		# S(B2+...)
5520	fmul.x		%fp2,%fp0		# s(a1+...)
5521
5522
5523	fadd.s		COSB1(%pc),%fp1		# B1+S(B2...)
5524	fmul.x		RPRIME(%a6),%fp0	# R'S(A1+...)
5525	fmul.x		SPRIME(%a6),%fp1	# S'(B1+S(B2+...))
5526
5527	fmovm.x		(%sp)+,&0x20		# restore fp2
5528
5529	fmov.l		%d0,%fpcr
5530	fadd.s		POSNEG1(%a6),%fp1	# COS(X)
5531	bsr		sto_cos			# store cosine result
5532	fadd.x		RPRIME(%a6),%fp0	# SIN(X)
5533	bra		t_inx2
5534
5535################################################
5536
5537SCBORS:
5538	cmp.l		%d1,&0x3FFF8000
5539	bgt.w		SREDUCEX
5540
5541################################################
5542
5543SCSM:
5544#	mov.w		&0x0000,XDCARE(%a6)
5545	fmov.s		&0x3F800000,%fp1
5546
5547	fmov.l		%d0,%fpcr
5548	fsub.s		&0x00800000,%fp1
5549	bsr		sto_cos			# store cosine result
5550	fmov.l		%fpcr,%d0		# d0 must have fpcr,too
5551	mov.b		&FMOV_OP,%d1		# last inst is MOVE
5552	fmov.x		X(%a6),%fp0
5553	bra		t_catch
5554
5555##############################################
5556
5557	global		ssincosd
5558#--SIN AND COS OF X FOR DENORMALIZED X
5559ssincosd:
5560	mov.l		%d0,-(%sp)		# save d0
5561	fmov.s		&0x3F800000,%fp1
5562	bsr		sto_cos			# store cosine result
5563	mov.l		(%sp)+,%d0		# restore d0
5564	bra		t_extdnrm
5565
5566############################################
5567
5568#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5569#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5570#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5571SREDUCEX:
5572	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
5573	mov.l		%d2,-(%sp)		# save d2
5574	fmov.s		&0x00000000,%fp1	# fp1 = 0
5575
5576#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5577#--there is a danger of unwanted overflow in first LOOP iteration.  In this
5578#--case, reduce argument by one remainder step to make subsequent reduction
5579#--safe.
5580	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
5581	bne.b		SLOOP			# no
5582
5583# yes; create 2**16383*PI/2
5584	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
5585	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
5586	clr.l		FP_SCR0_LO(%a6)
5587
5588# create low half of 2**16383*PI/2 at FP_SCR1
5589	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
5590	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
5591	clr.l		FP_SCR1_LO(%a6)
5592
5593	ftest.x		%fp0			# test sign of argument
5594	fblt.w		sred_neg
5595
5596	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
5597	or.b		&0x80,FP_SCR1_EX(%a6)
5598sred_neg:
5599	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
5600	fmov.x		%fp0,%fp1		# save high result in fp1
5601	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
5602	fsub.x		%fp0,%fp1		# determine low component of result
5603	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
5604
5605#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5606#--integer quotient will be stored in N
5607#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5608SLOOP:
5609	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
5610	mov.w		INARG(%a6),%d1
5611	mov.l		%d1,%a1			# save a copy of D0
5612	and.l		&0x00007FFF,%d1
5613	sub.l		&0x00003FFF,%d1		# d0 = K
5614	cmp.l		%d1,&28
5615	ble.b		SLASTLOOP
5616SCONTLOOP:
5617	sub.l		&27,%d1			# d0 = L := K-27
5618	mov.b		&0,ENDFLAG(%a6)
5619	bra.b		SWORK
5620SLASTLOOP:
5621	clr.l		%d1			# d0 = L := 0
5622	mov.b		&1,ENDFLAG(%a6)
5623
5624SWORK:
5625#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
5626#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
5627
5628#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5629#--2**L * (PIby2_1), 2**L * (PIby2_2)
5630
5631	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
5632	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
5633
5634	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
5635	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
5636	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
5637
5638	fmov.x		%fp0,%fp2
5639	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
5640
5641#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5642#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
5643#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5644#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
5645#--US THE DESIRED VALUE IN FLOATING POINT.
5646	mov.l		%a1,%d2
5647	swap		%d2
5648	and.l		&0x80000000,%d2
5649	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
5650	mov.l		%d2,TWOTO63(%a6)
5651	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
5652	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
5653#	fint.x		%fp2
5654
5655#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5656	mov.l		%d1,%d2			# d2 = L
5657
5658	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
5659	mov.w		%d2,FP_SCR0_EX(%a6)
5660	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
5661	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
5662
5663	add.l		&0x00003FDD,%d1
5664	mov.w		%d1,FP_SCR1_EX(%a6)
5665	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
5666	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
5667
5668	mov.b		ENDFLAG(%a6),%d1
5669
5670#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5671#--P2 = 2**(L) * Piby2_2
5672	fmov.x		%fp2,%fp4		# fp4 = N
5673	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
5674	fmov.x		%fp2,%fp5		# fp5 = N
5675	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
5676	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
5677
5678#--we want P+p = W+w  but  |p| <= half ulp of P
5679#--Then, we need to compute  A := R-P   and  a := r-p
5680	fadd.x		%fp5,%fp3		# fp3 = P
5681	fsub.x		%fp3,%fp4		# fp4 = W-P
5682
5683	fsub.x		%fp3,%fp0		# fp0 = A := R - P
5684	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
5685
5686	fmov.x		%fp0,%fp3		# fp3 = A
5687	fsub.x		%fp4,%fp1		# fp1 = a := r - p
5688
5689#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
5690#--|r| <= half ulp of R.
5691	fadd.x		%fp1,%fp0		# fp0 = R := A+a
5692#--No need to calculate r if this is the last loop
5693	cmp.b		%d1,&0
5694	bgt.w		SRESTORE
5695
5696#--Need to calculate r
5697	fsub.x		%fp0,%fp3		# fp3 = A-R
5698	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
5699	bra.w		SLOOP
5700
5701SRESTORE:
5702	fmov.l		%fp2,INT(%a6)
5703	mov.l		(%sp)+,%d2		# restore d2
5704	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
5705
5706	mov.l		ADJN(%a6),%d1
5707	cmp.l		%d1,&4
5708
5709	blt.w		SINCONT
5710	bra.w		SCCONT
5711
5712#########################################################################
5713# stan():  computes the tangent of a normalized input			#
5714# stand(): computes the tangent of a denormalized input			#
5715#									#
5716# INPUT *************************************************************** #
5717#	a0 = pointer to extended precision input			#
5718#	d0 = round precision,mode					#
5719#									#
5720# OUTPUT ************************************************************** #
5721#	fp0 = tan(X)							#
5722#									#
5723# ACCURACY and MONOTONICITY ******************************************* #
5724#	The returned result is within 3 ulp in 64 significant bit, i.e. #
5725#	within 0.5001 ulp to 53 bits if the result is subsequently	#
5726#	rounded to double precision. The result is provably monotonic	#
5727#	in double precision.						#
5728#									#
5729# ALGORITHM *********************************************************** #
5730#									#
5731#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
5732#									#
5733#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5734#		k = N mod 2, so in particular, k = 0 or 1.		#
5735#									#
5736#	3. If k is odd, go to 5.					#
5737#									#
5738#	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a	#
5739#		rational function U/V where				#
5740#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
5741#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.	#
5742#		Exit.							#
5743#									#
5744#	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5745#		a rational function U/V where				#
5746#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
5747#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,	#
5748#		-Cot(r) = -V/U. Exit.					#
5749#									#
5750#	6. If |X| > 1, go to 8.						#
5751#									#
5752#	7. (|X|<2**(-40)) Tan(X) = X. Exit.				#
5753#									#
5754#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back	#
5755#		to 2.							#
5756#									#
5757#########################################################################
5758
5759TANQ4:
5760	long		0x3EA0B759,0xF50F8688
5761TANP3:
5762	long		0xBEF2BAA5,0xA8924F04
5763
5764TANQ3:
5765	long		0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5766
5767TANP2:
5768	long		0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5769
5770TANQ2:
5771	long		0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5772
5773TANP1:
5774	long		0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5775
5776TANQ1:
5777	long		0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5778
5779INVTWOPI:
5780	long		0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5781
5782TWOPI1:
5783	long		0x40010000,0xC90FDAA2,0x00000000,0x00000000
5784TWOPI2:
5785	long		0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5786
5787#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5788#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5789#--MOST 69 BITS LONG.
5790#	global		PITBL
5791PITBL:
5792	long		0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5793	long		0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5794	long		0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5795	long		0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5796	long		0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5797	long		0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5798	long		0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5799	long		0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5800	long		0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5801	long		0xC0040000,0x90836524,0x88034B96,0x20B00000
5802	long		0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5803	long		0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5804	long		0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5805	long		0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5806	long		0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5807	long		0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5808	long		0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5809	long		0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5810	long		0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5811	long		0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5812	long		0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5813	long		0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5814	long		0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5815	long		0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5816	long		0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5817	long		0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5818	long		0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5819	long		0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5820	long		0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5821	long		0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5822	long		0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5823	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5824	long		0x00000000,0x00000000,0x00000000,0x00000000
5825	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5826	long		0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5827	long		0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5828	long		0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5829	long		0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5830	long		0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5831	long		0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5832	long		0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5833	long		0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5834	long		0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5835	long		0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5836	long		0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5837	long		0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5838	long		0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5839	long		0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5840	long		0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5841	long		0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5842	long		0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5843	long		0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5844	long		0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5845	long		0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5846	long		0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5847	long		0x40040000,0x90836524,0x88034B96,0xA0B00000
5848	long		0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5849	long		0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5850	long		0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5851	long		0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5852	long		0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5853	long		0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5854	long		0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5855	long		0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5856	long		0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5857
5858	set		INARG,FP_SCR0
5859
5860	set		TWOTO63,L_SCR1
5861	set		INT,L_SCR1
5862	set		ENDFLAG,L_SCR2
5863
5864	global		stan
5865stan:
5866	fmov.x		(%a0),%fp0		# LOAD INPUT
5867
5868	mov.l		(%a0),%d1
5869	mov.w		4(%a0),%d1
5870	and.l		&0x7FFFFFFF,%d1
5871
5872	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
5873	bge.b		TANOK1
5874	bra.w		TANSM
5875TANOK1:
5876	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
5877	blt.b		TANMAIN
5878	bra.w		REDUCEX
5879
5880TANMAIN:
5881#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5882#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5883	fmov.x		%fp0,%fp1
5884	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
5885
5886	lea.l		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
5887
5888	fmov.l		%fp1,%d1		# CONVERT TO INTEGER
5889
5890	asl.l		&4,%d1
5891	add.l		%d1,%a1			# ADDRESS N*PIBY2 IN Y1, Y2
5892
5893	fsub.x		(%a1)+,%fp0		# X-Y1
5894
5895	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
5896
5897	ror.l		&5,%d1
5898	and.l		&0x80000000,%d1		# D0 WAS ODD IFF D0 < 0
5899
5900TANCONT:
5901	fmovm.x		&0x0c,-(%sp)		# save fp2,fp3
5902
5903	cmp.l		%d1,&0
5904	blt.w		NODD
5905
5906	fmov.x		%fp0,%fp1
5907	fmul.x		%fp1,%fp1		# S = R*R
5908
5909	fmov.d		TANQ4(%pc),%fp3
5910	fmov.d		TANP3(%pc),%fp2
5911
5912	fmul.x		%fp1,%fp3		# SQ4
5913	fmul.x		%fp1,%fp2		# SP3
5914
5915	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
5916	fadd.x		TANP2(%pc),%fp2		# P2+SP3
5917
5918	fmul.x		%fp1,%fp3		# S(Q3+SQ4)
5919	fmul.x		%fp1,%fp2		# S(P2+SP3)
5920
5921	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
5922	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
5923
5924	fmul.x		%fp1,%fp3		# S(Q2+S(Q3+SQ4))
5925	fmul.x		%fp1,%fp2		# S(P1+S(P2+SP3))
5926
5927	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
5928	fmul.x		%fp0,%fp2		# RS(P1+S(P2+SP3))
5929
5930	fmul.x		%fp3,%fp1		# S(Q1+S(Q2+S(Q3+SQ4)))
5931
5932	fadd.x		%fp2,%fp0		# R+RS(P1+S(P2+SP3))
5933
5934	fadd.s		&0x3F800000,%fp1	# 1+S(Q1+...)
5935
5936	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
5937
5938	fmov.l		%d0,%fpcr		# restore users round mode,prec
5939	fdiv.x		%fp1,%fp0		# last inst - possible exception set
5940	bra		t_inx2
5941
5942NODD:
5943	fmov.x		%fp0,%fp1
5944	fmul.x		%fp0,%fp0		# S = R*R
5945
5946	fmov.d		TANQ4(%pc),%fp3
5947	fmov.d		TANP3(%pc),%fp2
5948
5949	fmul.x		%fp0,%fp3		# SQ4
5950	fmul.x		%fp0,%fp2		# SP3
5951
5952	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
5953	fadd.x		TANP2(%pc),%fp2		# P2+SP3
5954
5955	fmul.x		%fp0,%fp3		# S(Q3+SQ4)
5956	fmul.x		%fp0,%fp2		# S(P2+SP3)
5957
5958	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
5959	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
5960
5961	fmul.x		%fp0,%fp3		# S(Q2+S(Q3+SQ4))
5962	fmul.x		%fp0,%fp2		# S(P1+S(P2+SP3))
5963
5964	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
5965	fmul.x		%fp1,%fp2		# RS(P1+S(P2+SP3))
5966
5967	fmul.x		%fp3,%fp0		# S(Q1+S(Q2+S(Q3+SQ4)))
5968
5969	fadd.x		%fp2,%fp1		# R+RS(P1+S(P2+SP3))
5970	fadd.s		&0x3F800000,%fp0	# 1+S(Q1+...)
5971
5972	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
5973
5974	fmov.x		%fp1,-(%sp)
5975	eor.l		&0x80000000,(%sp)
5976
5977	fmov.l		%d0,%fpcr		# restore users round mode,prec
5978	fdiv.x		(%sp)+,%fp0		# last inst - possible exception set
5979	bra		t_inx2
5980
5981TANBORS:
5982#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5983#--IF |X| < 2**(-40), RETURN X OR 1.
5984	cmp.l		%d1,&0x3FFF8000
5985	bgt.b		REDUCEX
5986
5987TANSM:
5988	fmov.x		%fp0,-(%sp)
5989	fmov.l		%d0,%fpcr		# restore users round mode,prec
5990	mov.b		&FMOV_OP,%d1		# last inst is MOVE
5991	fmov.x		(%sp)+,%fp0		# last inst - posibble exception set
5992	bra		t_catch
5993
5994	global		stand
5995#--TAN(X) = X FOR DENORMALIZED X
5996stand:
5997	bra		t_extdnrm
5998
5999#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
6000#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
6001#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
6002REDUCEX:
6003	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
6004	mov.l		%d2,-(%sp)		# save d2
6005	fmov.s		&0x00000000,%fp1	# fp1 = 0
6006
6007#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
6008#--there is a danger of unwanted overflow in first LOOP iteration.  In this
6009#--case, reduce argument by one remainder step to make subsequent reduction
6010#--safe.
6011	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
6012	bne.b		LOOP			# no
6013
6014# yes; create 2**16383*PI/2
6015	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
6016	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
6017	clr.l		FP_SCR0_LO(%a6)
6018
6019# create low half of 2**16383*PI/2 at FP_SCR1
6020	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
6021	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
6022	clr.l		FP_SCR1_LO(%a6)
6023
6024	ftest.x		%fp0			# test sign of argument
6025	fblt.w		red_neg
6026
6027	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
6028	or.b		&0x80,FP_SCR1_EX(%a6)
6029red_neg:
6030	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
6031	fmov.x		%fp0,%fp1		# save high result in fp1
6032	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
6033	fsub.x		%fp0,%fp1		# determine low component of result
6034	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
6035
6036#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
6037#--integer quotient will be stored in N
6038#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
6039LOOP:
6040	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
6041	mov.w		INARG(%a6),%d1
6042	mov.l		%d1,%a1			# save a copy of D0
6043	and.l		&0x00007FFF,%d1
6044	sub.l		&0x00003FFF,%d1		# d0 = K
6045	cmp.l		%d1,&28
6046	ble.b		LASTLOOP
6047CONTLOOP:
6048	sub.l		&27,%d1			# d0 = L := K-27
6049	mov.b		&0,ENDFLAG(%a6)
6050	bra.b		WORK
6051LASTLOOP:
6052	clr.l		%d1			# d0 = L := 0
6053	mov.b		&1,ENDFLAG(%a6)
6054
6055WORK:
6056#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
6057#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
6058
6059#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
6060#--2**L * (PIby2_1), 2**L * (PIby2_2)
6061
6062	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
6063	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
6064
6065	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
6066	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
6067	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
6068
6069	fmov.x		%fp0,%fp2
6070	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
6071
6072#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
6073#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
6074#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
6075#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
6076#--US THE DESIRED VALUE IN FLOATING POINT.
6077	mov.l		%a1,%d2
6078	swap		%d2
6079	and.l		&0x80000000,%d2
6080	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
6081	mov.l		%d2,TWOTO63(%a6)
6082	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
6083	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
6084#	fintrz.x	%fp2,%fp2
6085
6086#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
6087	mov.l		%d1,%d2			# d2 = L
6088
6089	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
6090	mov.w		%d2,FP_SCR0_EX(%a6)
6091	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
6092	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
6093
6094	add.l		&0x00003FDD,%d1
6095	mov.w		%d1,FP_SCR1_EX(%a6)
6096	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
6097	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
6098
6099	mov.b		ENDFLAG(%a6),%d1
6100
6101#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6102#--P2 = 2**(L) * Piby2_2
6103	fmov.x		%fp2,%fp4		# fp4 = N
6104	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
6105	fmov.x		%fp2,%fp5		# fp5 = N
6106	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
6107	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
6108
6109#--we want P+p = W+w  but  |p| <= half ulp of P
6110#--Then, we need to compute  A := R-P   and  a := r-p
6111	fadd.x		%fp5,%fp3		# fp3 = P
6112	fsub.x		%fp3,%fp4		# fp4 = W-P
6113
6114	fsub.x		%fp3,%fp0		# fp0 = A := R - P
6115	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
6116
6117	fmov.x		%fp0,%fp3		# fp3 = A
6118	fsub.x		%fp4,%fp1		# fp1 = a := r - p
6119
6120#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
6121#--|r| <= half ulp of R.
6122	fadd.x		%fp1,%fp0		# fp0 = R := A+a
6123#--No need to calculate r if this is the last loop
6124	cmp.b		%d1,&0
6125	bgt.w		RESTORE
6126
6127#--Need to calculate r
6128	fsub.x		%fp0,%fp3		# fp3 = A-R
6129	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
6130	bra.w		LOOP
6131
6132RESTORE:
6133	fmov.l		%fp2,INT(%a6)
6134	mov.l		(%sp)+,%d2		# restore d2
6135	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
6136
6137	mov.l		INT(%a6),%d1
6138	ror.l		&1,%d1
6139
6140	bra.w		TANCONT
6141
6142#########################################################################
6143# satan():  computes the arctangent of a normalized number		#
6144# satand(): computes the arctangent of a denormalized number		#
6145#									#
6146# INPUT	*************************************************************** #
6147#	a0 = pointer to extended precision input			#
6148#	d0 = round precision,mode					#
6149#									#
6150# OUTPUT ************************************************************** #
6151#	fp0 = arctan(X)							#
6152#									#
6153# ACCURACY and MONOTONICITY ******************************************* #
6154#	The returned result is within 2 ulps in	64 significant bit,	#
6155#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6156#	rounded to double precision. The result is provably monotonic	#
6157#	in double precision.						#
6158#									#
6159# ALGORITHM *********************************************************** #
6160#	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.		#
6161#									#
6162#	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x.			#
6163#		Note that k = -4, -3,..., or 3.				#
6164#		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5	#
6165#		significant bits of X with a bit-1 attached at the 6-th	#
6166#		bit position. Define u to be u = (X-F) / (1 + X*F).	#
6167#									#
6168#	Step 3. Approximate arctan(u) by a polynomial poly.		#
6169#									#
6170#	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a	#
6171#		table of values calculated beforehand. Exit.		#
6172#									#
6173#	Step 5. If |X| >= 16, go to Step 7.				#
6174#									#
6175#	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.	#
6176#									#
6177#	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd	#
6178#		polynomial in X'.					#
6179#		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.		#
6180#									#
6181#########################################################################
6182
6183ATANA3:	long		0xBFF6687E,0x314987D8
6184ATANA2:	long		0x4002AC69,0x34A26DB3
6185ATANA1:	long		0xBFC2476F,0x4E1DA28E
6186
6187ATANB6:	long		0x3FB34444,0x7F876989
6188ATANB5:	long		0xBFB744EE,0x7FAF45DB
6189ATANB4:	long		0x3FBC71C6,0x46940220
6190ATANB3:	long		0xBFC24924,0x921872F9
6191ATANB2:	long		0x3FC99999,0x99998FA9
6192ATANB1:	long		0xBFD55555,0x55555555
6193
6194ATANC5:	long		0xBFB70BF3,0x98539E6A
6195ATANC4:	long		0x3FBC7187,0x962D1D7D
6196ATANC3:	long		0xBFC24924,0x827107B8
6197ATANC2:	long		0x3FC99999,0x9996263E
6198ATANC1:	long		0xBFD55555,0x55555536
6199
6200PPIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6201NPIBY2:	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6202
6203PTINY:	long		0x00010000,0x80000000,0x00000000,0x00000000
6204NTINY:	long		0x80010000,0x80000000,0x00000000,0x00000000
6205
6206ATANTBL:
6207	long		0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6208	long		0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6209	long		0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6210	long		0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6211	long		0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6212	long		0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6213	long		0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6214	long		0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6215	long		0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6216	long		0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6217	long		0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6218	long		0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6219	long		0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6220	long		0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6221	long		0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6222	long		0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6223	long		0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6224	long		0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6225	long		0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6226	long		0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6227	long		0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6228	long		0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6229	long		0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6230	long		0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6231	long		0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6232	long		0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6233	long		0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6234	long		0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6235	long		0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6236	long		0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6237	long		0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6238	long		0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6239	long		0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6240	long		0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6241	long		0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6242	long		0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6243	long		0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6244	long		0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6245	long		0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6246	long		0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6247	long		0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6248	long		0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6249	long		0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6250	long		0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6251	long		0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6252	long		0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6253	long		0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6254	long		0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6255	long		0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6256	long		0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6257	long		0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6258	long		0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6259	long		0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6260	long		0x3FFE0000,0x97731420,0x365E538C,0x00000000
6261	long		0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6262	long		0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6263	long		0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6264	long		0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6265	long		0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6266	long		0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6267	long		0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6268	long		0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6269	long		0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6270	long		0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6271	long		0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6272	long		0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6273	long		0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6274	long		0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6275	long		0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6276	long		0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6277	long		0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6278	long		0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6279	long		0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6280	long		0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6281	long		0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6282	long		0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6283	long		0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6284	long		0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6285	long		0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6286	long		0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6287	long		0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6288	long		0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6289	long		0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6290	long		0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6291	long		0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6292	long		0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6293	long		0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6294	long		0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6295	long		0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6296	long		0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6297	long		0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6298	long		0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6299	long		0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6300	long		0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6301	long		0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6302	long		0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6303	long		0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6304	long		0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6305	long		0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6306	long		0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6307	long		0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6308	long		0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6309	long		0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6310	long		0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6311	long		0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6312	long		0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6313	long		0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6314	long		0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6315	long		0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6316	long		0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6317	long		0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6318	long		0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6319	long		0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6320	long		0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6321	long		0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6322	long		0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6323	long		0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6324	long		0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6325	long		0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6326	long		0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6327	long		0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6328	long		0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6329	long		0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6330	long		0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6331	long		0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6332	long		0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6333	long		0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6334	long		0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6335
6336	set		X,FP_SCR0
6337	set		XDCARE,X+2
6338	set		XFRAC,X+4
6339	set		XFRACLO,X+8
6340
6341	set		ATANF,FP_SCR1
6342	set		ATANFHI,ATANF+4
6343	set		ATANFLO,ATANF+8
6344
6345	global		satan
6346#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6347satan:
6348	fmov.x		(%a0),%fp0		# LOAD INPUT
6349
6350	mov.l		(%a0),%d1
6351	mov.w		4(%a0),%d1
6352	fmov.x		%fp0,X(%a6)
6353	and.l		&0x7FFFFFFF,%d1
6354
6355	cmp.l		%d1,&0x3FFB8000		# |X| >= 1/16?
6356	bge.b		ATANOK1
6357	bra.w		ATANSM
6358
6359ATANOK1:
6360	cmp.l		%d1,&0x4002FFFF		# |X| < 16 ?
6361	ble.b		ATANMAIN
6362	bra.w		ATANBIG
6363
6364#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6365#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6366#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6367#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6368#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6369#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6370#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6371#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6372#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6373#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6374#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6375#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6376#--WILL INVOLVE A VERY LONG POLYNOMIAL.
6377
6378#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6379#--WE CHOSE F TO BE +-2^K * 1.BBBB1
6380#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6381#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6382#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6383#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6384
6385ATANMAIN:
6386
6387	and.l		&0xF8000000,XFRAC(%a6)	# FIRST 5 BITS
6388	or.l		&0x04000000,XFRAC(%a6)	# SET 6-TH BIT TO 1
6389	mov.l		&0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6390
6391	fmov.x		%fp0,%fp1		# FP1 IS X
6392	fmul.x		X(%a6),%fp1		# FP1 IS X*F, NOTE THAT X*F > 0
6393	fsub.x		X(%a6),%fp0		# FP0 IS X-F
6394	fadd.s		&0x3F800000,%fp1	# FP1 IS 1 + X*F
6395	fdiv.x		%fp1,%fp0		# FP0 IS U = (X-F)/(1+X*F)
6396
6397#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6398#--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6399#--SAVE REGISTERS FP2.
6400
6401	mov.l		%d2,-(%sp)		# SAVE d2 TEMPORARILY
6402	mov.l		%d1,%d2			# THE EXP AND 16 BITS OF X
6403	and.l		&0x00007800,%d1		# 4 VARYING BITS OF F'S FRACTION
6404	and.l		&0x7FFF0000,%d2		# EXPONENT OF F
6405	sub.l		&0x3FFB0000,%d2		# K+4
6406	asr.l		&1,%d2
6407	add.l		%d2,%d1			# THE 7 BITS IDENTIFYING F
6408	asr.l		&7,%d1			# INDEX INTO TBL OF ATAN(|F|)
6409	lea		ATANTBL(%pc),%a1
6410	add.l		%d1,%a1			# ADDRESS OF ATAN(|F|)
6411	mov.l		(%a1)+,ATANF(%a6)
6412	mov.l		(%a1)+,ATANFHI(%a6)
6413	mov.l		(%a1)+,ATANFLO(%a6)	# ATANF IS NOW ATAN(|F|)
6414	mov.l		X(%a6),%d1		# LOAD SIGN AND EXPO. AGAIN
6415	and.l		&0x80000000,%d1		# SIGN(F)
6416	or.l		%d1,ATANF(%a6)		# ATANF IS NOW SIGN(F)*ATAN(|F|)
6417	mov.l		(%sp)+,%d2		# RESTORE d2
6418
6419#--THAT'S ALL I HAVE TO DO FOR NOW,
6420#--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6421
6422#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6423#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6424#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6425#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6426#--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
6427#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6428#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6429
6430	fmovm.x		&0x04,-(%sp)		# save fp2
6431
6432	fmov.x		%fp0,%fp1
6433	fmul.x		%fp1,%fp1
6434	fmov.d		ATANA3(%pc),%fp2
6435	fadd.x		%fp1,%fp2		# A3+V
6436	fmul.x		%fp1,%fp2		# V*(A3+V)
6437	fmul.x		%fp0,%fp1		# U*V
6438	fadd.d		ATANA2(%pc),%fp2	# A2+V*(A3+V)
6439	fmul.d		ATANA1(%pc),%fp1	# A1*U*V
6440	fmul.x		%fp2,%fp1		# A1*U*V*(A2+V*(A3+V))
6441	fadd.x		%fp1,%fp0		# ATAN(U), FP1 RELEASED
6442
6443	fmovm.x		(%sp)+,&0x20		# restore fp2
6444
6445	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6446	fadd.x		ATANF(%a6),%fp0		# ATAN(X)
6447	bra		t_inx2
6448
6449ATANBORS:
6450#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6451#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6452	cmp.l		%d1,&0x3FFF8000
6453	bgt.w		ATANBIG			# I.E. |X| >= 16
6454
6455ATANSM:
6456#--|X| <= 1/16
6457#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6458#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6459#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6460#--WHERE Y = X*X, AND Z = Y*Y.
6461
6462	cmp.l		%d1,&0x3FD78000
6463	blt.w		ATANTINY
6464
6465#--COMPUTE POLYNOMIAL
6466	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
6467
6468	fmul.x		%fp0,%fp0		# FPO IS Y = X*X
6469
6470	fmov.x		%fp0,%fp1
6471	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
6472
6473	fmov.d		ATANB6(%pc),%fp2
6474	fmov.d		ATANB5(%pc),%fp3
6475
6476	fmul.x		%fp1,%fp2		# Z*B6
6477	fmul.x		%fp1,%fp3		# Z*B5
6478
6479	fadd.d		ATANB4(%pc),%fp2	# B4+Z*B6
6480	fadd.d		ATANB3(%pc),%fp3	# B3+Z*B5
6481
6482	fmul.x		%fp1,%fp2		# Z*(B4+Z*B6)
6483	fmul.x		%fp3,%fp1		# Z*(B3+Z*B5)
6484
6485	fadd.d		ATANB2(%pc),%fp2	# B2+Z*(B4+Z*B6)
6486	fadd.d		ATANB1(%pc),%fp1	# B1+Z*(B3+Z*B5)
6487
6488	fmul.x		%fp0,%fp2		# Y*(B2+Z*(B4+Z*B6))
6489	fmul.x		X(%a6),%fp0		# X*Y
6490
6491	fadd.x		%fp2,%fp1		# [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6492
6493	fmul.x		%fp1,%fp0		# X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6494
6495	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
6496
6497	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6498	fadd.x		X(%a6),%fp0
6499	bra		t_inx2
6500
6501ATANTINY:
6502#--|X| < 2^(-40), ATAN(X) = X
6503
6504	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6505	mov.b		&FMOV_OP,%d1		# last inst is MOVE
6506	fmov.x		X(%a6),%fp0		# last inst - possible exception set
6507
6508	bra		t_catch
6509
6510ATANBIG:
6511#--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
6512#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6513	cmp.l		%d1,&0x40638000
6514	bgt.w		ATANHUGE
6515
6516#--APPROXIMATE ATAN(-1/X) BY
6517#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6518#--THIS CAN BE RE-WRITTEN AS
6519#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6520
6521	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
6522
6523	fmov.s		&0xBF800000,%fp1	# LOAD -1
6524	fdiv.x		%fp0,%fp1		# FP1 IS -1/X
6525
6526#--DIVIDE IS STILL CRANKING
6527
6528	fmov.x		%fp1,%fp0		# FP0 IS X'
6529	fmul.x		%fp0,%fp0		# FP0 IS Y = X'*X'
6530	fmov.x		%fp1,X(%a6)		# X IS REALLY X'
6531
6532	fmov.x		%fp0,%fp1
6533	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
6534
6535	fmov.d		ATANC5(%pc),%fp3
6536	fmov.d		ATANC4(%pc),%fp2
6537
6538	fmul.x		%fp1,%fp3		# Z*C5
6539	fmul.x		%fp1,%fp2		# Z*B4
6540
6541	fadd.d		ATANC3(%pc),%fp3	# C3+Z*C5
6542	fadd.d		ATANC2(%pc),%fp2	# C2+Z*C4
6543
6544	fmul.x		%fp3,%fp1		# Z*(C3+Z*C5), FP3 RELEASED
6545	fmul.x		%fp0,%fp2		# Y*(C2+Z*C4)
6546
6547	fadd.d		ATANC1(%pc),%fp1	# C1+Z*(C3+Z*C5)
6548	fmul.x		X(%a6),%fp0		# X'*Y
6549
6550	fadd.x		%fp2,%fp1		# [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6551
6552	fmul.x		%fp1,%fp0		# X'*Y*([B1+Z*(B3+Z*B5)]
6553#					...	+[Y*(B2+Z*(B4+Z*B6))])
6554	fadd.x		X(%a6),%fp0
6555
6556	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
6557
6558	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6559	tst.b		(%a0)
6560	bpl.b		pos_big
6561
6562neg_big:
6563	fadd.x		NPIBY2(%pc),%fp0
6564	bra		t_minx2
6565
6566pos_big:
6567	fadd.x		PPIBY2(%pc),%fp0
6568	bra		t_pinx2
6569
6570ATANHUGE:
6571#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6572	tst.b		(%a0)
6573	bpl.b		pos_huge
6574
6575neg_huge:
6576	fmov.x		NPIBY2(%pc),%fp0
6577	fmov.l		%d0,%fpcr
6578	fadd.x		PTINY(%pc),%fp0
6579	bra		t_minx2
6580
6581pos_huge:
6582	fmov.x		PPIBY2(%pc),%fp0
6583	fmov.l		%d0,%fpcr
6584	fadd.x		NTINY(%pc),%fp0
6585	bra		t_pinx2
6586
6587	global		satand
6588#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6589satand:
6590	bra		t_extdnrm
6591
6592#########################################################################
6593# sasin():  computes the inverse sine of a normalized input		#
6594# sasind(): computes the inverse sine of a denormalized input		#
6595#									#
6596# INPUT ***************************************************************	#
6597#	a0 = pointer to extended precision input			#
6598#	d0 = round precision,mode					#
6599#									#
6600# OUTPUT **************************************************************	#
6601#	fp0 = arcsin(X)							#
6602#									#
6603# ACCURACY and MONOTONICITY *******************************************	#
6604#	The returned result is within 3 ulps in	64 significant bit,	#
6605#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6606#	rounded to double precision. The result is provably monotonic	#
6607#	in double precision.						#
6608#									#
6609# ALGORITHM ***********************************************************	#
6610#									#
6611#	ASIN								#
6612#	1. If |X| >= 1, go to 3.					#
6613#									#
6614#	2. (|X| < 1) Calculate asin(X) by				#
6615#		z := sqrt( [1-X][1+X] )					#
6616#		asin(X) = atan( x / z ).				#
6617#		Exit.							#
6618#									#
6619#	3. If |X| > 1, go to 5.						#
6620#									#
6621#	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6622#									#
6623#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
6624#		Exit.							#
6625#									#
6626#########################################################################
6627
6628	global		sasin
6629sasin:
6630	fmov.x		(%a0),%fp0		# LOAD INPUT
6631
6632	mov.l		(%a0),%d1
6633	mov.w		4(%a0),%d1
6634	and.l		&0x7FFFFFFF,%d1
6635	cmp.l		%d1,&0x3FFF8000
6636	bge.b		ASINBIG
6637
6638# This catch is added here for the '060 QSP. Originally, the call to
6639# satan() would handle this case by causing the exception which would
6640# not be caught until gen_except(). Now, with the exceptions being
6641# detected inside of satan(), the exception would have been handled there
6642# instead of inside sasin() as expected.
6643	cmp.l		%d1,&0x3FD78000
6644	blt.w		ASINTINY
6645
6646#--THIS IS THE USUAL CASE, |X| < 1
6647#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6648
6649ASINMAIN:
6650	fmov.s		&0x3F800000,%fp1
6651	fsub.x		%fp0,%fp1		# 1-X
6652	fmovm.x		&0x4,-(%sp)		#  {fp2}
6653	fmov.s		&0x3F800000,%fp2
6654	fadd.x		%fp0,%fp2		# 1+X
6655	fmul.x		%fp2,%fp1		# (1+X)(1-X)
6656	fmovm.x		(%sp)+,&0x20		#  {fp2}
6657	fsqrt.x		%fp1			# SQRT([1-X][1+X])
6658	fdiv.x		%fp1,%fp0		# X/SQRT([1-X][1+X])
6659	fmovm.x		&0x01,-(%sp)		# save X/SQRT(...)
6660	lea		(%sp),%a0		# pass ptr to X/SQRT(...)
6661	bsr		satan
6662	add.l		&0xc,%sp		# clear X/SQRT(...) from stack
6663	bra		t_inx2
6664
6665ASINBIG:
6666	fabs.x		%fp0			# |X|
6667	fcmp.s		%fp0,&0x3F800000
6668	fbgt		t_operr			# cause an operr exception
6669
6670#--|X| = 1, ASIN(X) = +- PI/2.
6671ASINONE:
6672	fmov.x		PIBY2(%pc),%fp0
6673	mov.l		(%a0),%d1
6674	and.l		&0x80000000,%d1		# SIGN BIT OF X
6675	or.l		&0x3F800000,%d1		# +-1 IN SGL FORMAT
6676	mov.l		%d1,-(%sp)		# push SIGN(X) IN SGL-FMT
6677	fmov.l		%d0,%fpcr
6678	fmul.s		(%sp)+,%fp0
6679	bra		t_inx2
6680
6681#--|X| < 2^(-40), ATAN(X) = X
6682ASINTINY:
6683	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6684	mov.b		&FMOV_OP,%d1		# last inst is MOVE
6685	fmov.x		(%a0),%fp0		# last inst - possible exception
6686	bra		t_catch
6687
6688	global		sasind
6689#--ASIN(X) = X FOR DENORMALIZED X
6690sasind:
6691	bra		t_extdnrm
6692
6693#########################################################################
6694# sacos():  computes the inverse cosine of a normalized input		#
6695# sacosd(): computes the inverse cosine of a denormalized input		#
6696#									#
6697# INPUT ***************************************************************	#
6698#	a0 = pointer to extended precision input			#
6699#	d0 = round precision,mode					#
6700#									#
6701# OUTPUT ************************************************************** #
6702#	fp0 = arccos(X)							#
6703#									#
6704# ACCURACY and MONOTONICITY *******************************************	#
6705#	The returned result is within 3 ulps in	64 significant bit,	#
6706#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6707#	rounded to double precision. The result is provably monotonic	#
6708#	in double precision.						#
6709#									#
6710# ALGORITHM *********************************************************** #
6711#									#
6712#	ACOS								#
6713#	1. If |X| >= 1, go to 3.					#
6714#									#
6715#	2. (|X| < 1) Calculate acos(X) by				#
6716#		z := (1-X) / (1+X)					#
6717#		acos(X) = 2 * atan( sqrt(z) ).				#
6718#		Exit.							#
6719#									#
6720#	3. If |X| > 1, go to 5.						#
6721#									#
6722#	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.	#
6723#									#
6724#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
6725#		Exit.							#
6726#									#
6727#########################################################################
6728
6729	global		sacos
6730sacos:
6731	fmov.x		(%a0),%fp0		# LOAD INPUT
6732
6733	mov.l		(%a0),%d1		# pack exp w/ upper 16 fraction
6734	mov.w		4(%a0),%d1
6735	and.l		&0x7FFFFFFF,%d1
6736	cmp.l		%d1,&0x3FFF8000
6737	bge.b		ACOSBIG
6738
6739#--THIS IS THE USUAL CASE, |X| < 1
6740#--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) ) )
6741
6742ACOSMAIN:
6743	fmov.s		&0x3F800000,%fp1
6744	fadd.x		%fp0,%fp1		# 1+X
6745	fneg.x		%fp0			# -X
6746	fadd.s		&0x3F800000,%fp0	# 1-X
6747	fdiv.x		%fp1,%fp0		# (1-X)/(1+X)
6748	fsqrt.x		%fp0			# SQRT((1-X)/(1+X))
6749	mov.l		%d0,-(%sp)		# save original users fpcr
6750	clr.l		%d0
6751	fmovm.x		&0x01,-(%sp)		# save SQRT(...) to stack
6752	lea		(%sp),%a0		# pass ptr to sqrt
6753	bsr		satan			# ATAN(SQRT([1-X]/[1+X]))
6754	add.l		&0xc,%sp		# clear SQRT(...) from stack
6755
6756	fmov.l		(%sp)+,%fpcr		# restore users round prec,mode
6757	fadd.x		%fp0,%fp0		# 2 * ATAN( STUFF )
6758	bra		t_pinx2
6759
6760ACOSBIG:
6761	fabs.x		%fp0
6762	fcmp.s		%fp0,&0x3F800000
6763	fbgt		t_operr			# cause an operr exception
6764
6765#--|X| = 1, ACOS(X) = 0 OR PI
6766	tst.b		(%a0)			# is X positive or negative?
6767	bpl.b		ACOSP1
6768
6769#--X = -1
6770#Returns PI and inexact exception
6771ACOSM1:
6772	fmov.x		PI(%pc),%fp0		# load PI
6773	fmov.l		%d0,%fpcr		# load round mode,prec
6774	fadd.s		&0x00800000,%fp0	# add a small value
6775	bra		t_pinx2
6776
6777ACOSP1:
6778	bra		ld_pzero		# answer is positive zero
6779
6780	global		sacosd
6781#--ACOS(X) = PI/2 FOR DENORMALIZED X
6782sacosd:
6783	fmov.l		%d0,%fpcr		# load user's rnd mode/prec
6784	fmov.x		PIBY2(%pc),%fp0
6785	bra		t_pinx2
6786
6787#########################################################################
6788# setox():    computes the exponential for a normalized input		#
6789# setoxd():   computes the exponential for a denormalized input		#
6790# setoxm1():  computes the exponential minus 1 for a normalized input	#
6791# setoxm1d(): computes the exponential minus 1 for a denormalized input	#
6792#									#
6793# INPUT	*************************************************************** #
6794#	a0 = pointer to extended precision input			#
6795#	d0 = round precision,mode					#
6796#									#
6797# OUTPUT ************************************************************** #
6798#	fp0 = exp(X) or exp(X)-1					#
6799#									#
6800# ACCURACY and MONOTONICITY ******************************************* #
6801#	The returned result is within 0.85 ulps in 64 significant bit,	#
6802#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6803#	rounded to double precision. The result is provably monotonic	#
6804#	in double precision.						#
6805#									#
6806# ALGORITHM and IMPLEMENTATION **************************************** #
6807#									#
6808#	setoxd								#
6809#	------								#
6810#	Step 1.	Set ans := 1.0						#
6811#									#
6812#	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.		#
6813#	Notes:	This will always generate one exception -- inexact.	#
6814#									#
6815#									#
6816#	setox								#
6817#	-----								#
6818#									#
6819#	Step 1.	Filter out extreme cases of input argument.		#
6820#		1.1	If |X| >= 2^(-65), go to Step 1.3.		#
6821#		1.2	Go to Step 7.					#
6822#		1.3	If |X| < 16380 log(2), go to Step 2.		#
6823#		1.4	Go to Step 8.					#
6824#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
6825#		To avoid the use of floating-point comparisons, a	#
6826#		compact representation of |X| is used. This format is a	#
6827#		32-bit integer, the upper (more significant) 16 bits	#
6828#		are the sign and biased exponent field of |X|; the	#
6829#		lower 16 bits are the 16 most significant fraction	#
6830#		(including the explicit bit) bits of |X|. Consequently,	#
6831#		the comparisons in Steps 1.1 and 1.3 can be performed	#
6832#		by integer comparison. Note also that the constant	#
6833#		16380 log(2) used in Step 1.3 is also in the compact	#
6834#		form. Thus taking the branch to Step 2 guarantees	#
6835#		|X| < 16380 log(2). There is no harm to have a small	#
6836#		number of cases where |X| is less than,	but close to,	#
6837#		16380 log(2) and the branch to Step 9 is taken.		#
6838#									#
6839#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
6840#		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6841#			was taken)					#
6842#		2.2	N := round-to-nearest-integer( X * 64/log2 ).	#
6843#		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., #
6844#			or 63.						#
6845#		2.4	Calculate	M = (N - J)/64; so N = 64M + J.	#
6846#		2.5	Calculate the address of the stored value of	#
6847#			2^(J/64).					#
6848#		2.6	Create the value Scale = 2^M.			#
6849#	Notes:	The calculation in 2.2 is really performed by		#
6850#			Z := X * constant				#
6851#			N := round-to-nearest-integer(Z)		#
6852#		where							#
6853#			constant := single-precision( 64/log 2 ).	#
6854#									#
6855#		Using a single-precision constant avoids memory		#
6856#		access. Another effect of using a single-precision	#
6857#		"constant" is that the calculated value Z is		#
6858#									#
6859#			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).	#
6860#									#
6861#		This error has to be considered later in Steps 3 and 4.	#
6862#									#
6863#	Step 3.	Calculate X - N*log2/64.				#
6864#		3.1	R := X + N*L1,					#
6865#				where L1 := single-precision(-log2/64).	#
6866#		3.2	R := R + N*L2,					#
6867#				L2 := extended-precision(-log2/64 - L1).#
6868#	Notes:	a) The way L1 and L2 are chosen ensures L1+L2		#
6869#		approximate the value -log2/64 to 88 bits of accuracy.	#
6870#		b) N*L1 is exact because N is no longer than 22 bits	#
6871#		and L1 is no longer than 24 bits.			#
6872#		c) The calculation X+N*L1 is also exact due to		#
6873#		cancellation. Thus, R is practically X+N(L1+L2) to full	#
6874#		64 bits.						#
6875#		d) It is important to estimate how large can |R| be	#
6876#		after Step 3.2.						#
6877#									#
6878#		N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)	#
6879#		X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5	#
6880#		X*64/log2 - N	=	f - eps*X 64/log2		#
6881#		X - N*log2/64	=	f*log2/64 - eps*X		#
6882#									#
6883#									#
6884#		Now |X| <= 16446 log2, thus				#
6885#									#
6886#			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64	#
6887#					<= 0.57 log2/64.		#
6888#		 This bound will be used in Step 4.			#
6889#									#
6890#	Step 4.	Approximate exp(R)-1 by a polynomial			#
6891#		p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))	#
6892#	Notes:	a) In order to reduce memory access, the coefficients	#
6893#		are made as "short" as possible: A1 (which is 1/2), A4	#
6894#		and A5 are single precision; A2 and A3 are double	#
6895#		precision.						#
6896#		b) Even with the restrictions above,			#
6897#		   |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.	#
6898#		Note that 0.0062 is slightly bigger than 0.57 log2/64.	#
6899#		c) To fully utilize the pipeline, p is separated into	#
6900#		two independent pieces of roughly equal complexities	#
6901#			p = [ R + R*S*(A2 + S*A4) ]	+		#
6902#				[ S*(A1 + S*(A3 + S*A5)) ]		#
6903#		where S = R*R.						#
6904#									#
6905#	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by		#
6906#				ans := T + ( T*p + t)			#
6907#		where T and t are the stored values for 2^(J/64).	#
6908#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
6909#		2^(J/64) to roughly 85 bits; T is in extended precision	#
6910#		and t is in single precision. Note also that T is	#
6911#		rounded to 62 bits so that the last two bits of T are	#
6912#		zero. The reason for such a special form is that T-1,	#
6913#		T-2, and T-8 will all be exact --- a property that will	#
6914#		give much more accurate computation of the function	#
6915#		EXPM1.							#
6916#									#
6917#	Step 6.	Reconstruction of exp(X)				#
6918#			exp(X) = 2^M * 2^(J/64) * exp(R).		#
6919#		6.1	If AdjFlag = 0, go to 6.3			#
6920#		6.2	ans := ans * AdjScale				#
6921#		6.3	Restore the user FPCR				#
6922#		6.4	Return ans := ans * Scale. Exit.		#
6923#	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,	#
6924#		|M| <= 16380, and Scale = 2^M. Moreover, exp(X) will	#
6925#		neither overflow nor underflow. If AdjFlag = 1, that	#
6926#		means that						#
6927#			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.	#
6928#		Hence, exp(X) may overflow or underflow or neither.	#
6929#		When that is the case, AdjScale = 2^(M1) where M1 is	#
6930#		approximately M. Thus 6.2 will never cause		#
6931#		over/underflow. Possible exception in 6.4 is overflow	#
6932#		or underflow. The inexact exception is not generated in	#
6933#		6.4. Although one can argue that the inexact flag	#
6934#		should always be raised, to simulate that exception	#
6935#		cost to much than the flag is worth in practical uses.	#
6936#									#
6937#	Step 7.	Return 1 + X.						#
6938#		7.1	ans := X					#
6939#		7.2	Restore user FPCR.				#
6940#		7.3	Return ans := 1 + ans. Exit			#
6941#	Notes:	For non-zero X, the inexact exception will always be	#
6942#		raised by 7.3. That is the only exception raised by 7.3.#
6943#		Note also that we use the FMOVEM instruction to move X	#
6944#		in Step 7.1 to avoid unnecessary trapping. (Although	#
6945#		the FMOVEM may not seem relevant since X is normalized,	#
6946#		the precaution will be useful in the library version of	#
6947#		this code where the separate entry for denormalized	#
6948#		inputs will be done away with.)				#
6949#									#
6950#	Step 8.	Handle exp(X) where |X| >= 16380log2.			#
6951#		8.1	If |X| > 16480 log2, go to Step 9.		#
6952#		(mimic 2.2 - 2.6)					#
6953#		8.2	N := round-to-integer( X * 64/log2 )		#
6954#		8.3	Calculate J = N mod 64, J = 0,1,...,63		#
6955#		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1,	#
6956#			AdjFlag := 1.					#
6957#		8.5	Calculate the address of the stored value	#
6958#			2^(J/64).					#
6959#		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.	#
6960#		8.7	Go to Step 3.					#
6961#	Notes:	Refer to notes for 2.2 - 2.6.				#
6962#									#
6963#	Step 9.	Handle exp(X), |X| > 16480 log2.			#
6964#		9.1	If X < 0, go to 9.3				#
6965#		9.2	ans := Huge, go to 9.4				#
6966#		9.3	ans := Tiny.					#
6967#		9.4	Restore user FPCR.				#
6968#		9.5	Return ans := ans * ans. Exit.			#
6969#	Notes:	Exp(X) will surely overflow or underflow, depending on	#
6970#		X's sign. "Huge" and "Tiny" are respectively large/tiny	#
6971#		extended-precision numbers whose square over/underflow	#
6972#		with an inexact result. Thus, 9.5 always raises the	#
6973#		inexact together with either overflow or underflow.	#
6974#									#
6975#	setoxm1d							#
6976#	--------							#
6977#									#
6978#	Step 1.	Set ans := 0						#
6979#									#
6980#	Step 2.	Return	ans := X + ans. Exit.				#
6981#	Notes:	This will return X with the appropriate rounding	#
6982#		 precision prescribed by the user FPCR.			#
6983#									#
6984#	setoxm1								#
6985#	-------								#
6986#									#
6987#	Step 1.	Check |X|						#
6988#		1.1	If |X| >= 1/4, go to Step 1.3.			#
6989#		1.2	Go to Step 7.					#
6990#		1.3	If |X| < 70 log(2), go to Step 2.		#
6991#		1.4	Go to Step 10.					#
6992#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
6993#		However, it is conceivable |X| can be small very often	#
6994#		because EXPM1 is intended to evaluate exp(X)-1		#
6995#		accurately when |X| is small. For further details on	#
6996#		the comparisons, see the notes on Step 1 of setox.	#
6997#									#
6998#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
6999#		2.1	N := round-to-nearest-integer( X * 64/log2 ).	#
7000#		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., #
7001#			or 63.						#
7002#		2.3	Calculate	M = (N - J)/64; so N = 64M + J.	#
7003#		2.4	Calculate the address of the stored value of	#
7004#			2^(J/64).					#
7005#		2.5	Create the values Sc = 2^M and			#
7006#			OnebySc := -2^(-M).				#
7007#	Notes:	See the notes on Step 2 of setox.			#
7008#									#
7009#	Step 3.	Calculate X - N*log2/64.				#
7010#		3.1	R := X + N*L1,					#
7011#				where L1 := single-precision(-log2/64).	#
7012#		3.2	R := R + N*L2,					#
7013#				L2 := extended-precision(-log2/64 - L1).#
7014#	Notes:	Applying the analysis of Step 3 of setox in this case	#
7015#		shows that |R| <= 0.0055 (note that |X| <= 70 log2 in	#
7016#		this case).						#
7017#									#
7018#	Step 4.	Approximate exp(R)-1 by a polynomial			#
7019#			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))	#
7020#	Notes:	a) In order to reduce memory access, the coefficients	#
7021#		are made as "short" as possible: A1 (which is 1/2), A5	#
7022#		and A6 are single precision; A2, A3 and A4 are double	#
7023#		precision.						#
7024#		b) Even with the restriction above,			#
7025#			|p - (exp(R)-1)| <	|R| * 2^(-72.7)		#
7026#		for all |R| <= 0.0055.					#
7027#		c) To fully utilize the pipeline, p is separated into	#
7028#		two independent pieces of roughly equal complexity	#
7029#			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+	#
7030#				[ R + S*(A1 + S*(A3 + S*A5)) ]		#
7031#		where S = R*R.						#
7032#									#
7033#	Step 5.	Compute 2^(J/64)*p by					#
7034#				p := T*p				#
7035#		where T and t are the stored values for 2^(J/64).	#
7036#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
7037#		2^(J/64) to roughly 85 bits; T is in extended precision	#
7038#		and t is in single precision. Note also that T is	#
7039#		rounded to 62 bits so that the last two bits of T are	#
7040#		zero. The reason for such a special form is that T-1,	#
7041#		T-2, and T-8 will all be exact --- a property that will	#
7042#		be exploited in Step 6 below. The total relative error	#
7043#		in p is no bigger than 2^(-67.7) compared to the final	#
7044#		result.							#
7045#									#
7046#	Step 6.	Reconstruction of exp(X)-1				#
7047#			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).	#
7048#		6.1	If M <= 63, go to Step 6.3.			#
7049#		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6	#
7050#		6.3	If M >= -3, go to 6.5.				#
7051#		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6	#
7052#		6.5	ans := (T + OnebySc) + (p + t).			#
7053#		6.6	Restore user FPCR.				#
7054#		6.7	Return ans := Sc * ans. Exit.			#
7055#	Notes:	The various arrangements of the expressions give	#
7056#		accurate evaluations.					#
7057#									#
7058#	Step 7.	exp(X)-1 for |X| < 1/4.					#
7059#		7.1	If |X| >= 2^(-65), go to Step 9.		#
7060#		7.2	Go to Step 8.					#
7061#									#
7062#	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).			#
7063#		8.1	If |X| < 2^(-16312), goto 8.3			#
7064#		8.2	Restore FPCR; return ans := X - 2^(-16382).	#
7065#			Exit.						#
7066#		8.3	X := X * 2^(140).				#
7067#		8.4	Restore FPCR; ans := ans - 2^(-16382).		#
7068#		 Return ans := ans*2^(140). Exit			#
7069#	Notes:	The idea is to return "X - tiny" under the user		#
7070#		precision and rounding modes. To avoid unnecessary	#
7071#		inefficiency, we stay away from denormalized numbers	#
7072#		the best we can. For |X| >= 2^(-16312), the		#
7073#		straightforward 8.2 generates the inexact exception as	#
7074#		the case warrants.					#
7075#									#
7076#	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial		#
7077#			p = X + X*X*(B1 + X*(B2 + ... + X*B12))		#
7078#	Notes:	a) In order to reduce memory access, the coefficients	#
7079#		are made as "short" as possible: B1 (which is 1/2), B9	#
7080#		to B12 are single precision; B3 to B8 are double	#
7081#		precision; and B2 is double extended.			#
7082#		b) Even with the restriction above,			#
7083#			|p - (exp(X)-1)| < |X| 2^(-70.6)		#
7084#		for all |X| <= 0.251.					#
7085#		Note that 0.251 is slightly bigger than 1/4.		#
7086#		c) To fully preserve accuracy, the polynomial is	#
7087#		computed as						#
7088#			X + ( S*B1 +	Q ) where S = X*X and		#
7089#			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))	#
7090#		d) To fully utilize the pipeline, Q is separated into	#
7091#		two independent pieces of roughly equal complexity	#
7092#			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +	#
7093#				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]	#
7094#									#
7095#	Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.		#
7096#		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all	#
7097#		practical purposes. Therefore, go to Step 1 of setox.	#
7098#		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical	#
7099#		purposes.						#
7100#		ans := -1						#
7101#		Restore user FPCR					#
7102#		Return ans := ans + 2^(-126). Exit.			#
7103#	Notes:	10.2 will always create an inexact and return -1 + tiny	#
7104#		in the user rounding precision and mode.		#
7105#									#
7106#########################################################################
7107
7108L2:	long		0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7109
7110EEXPA3:	long		0x3FA55555,0x55554CC1
7111EEXPA2:	long		0x3FC55555,0x55554A54
7112
7113EM1A4:	long		0x3F811111,0x11174385
7114EM1A3:	long		0x3FA55555,0x55554F5A
7115
7116EM1A2:	long		0x3FC55555,0x55555555,0x00000000,0x00000000
7117
7118EM1B8:	long		0x3EC71DE3,0xA5774682
7119EM1B7:	long		0x3EFA01A0,0x19D7CB68
7120
7121EM1B6:	long		0x3F2A01A0,0x1A019DF3
7122EM1B5:	long		0x3F56C16C,0x16C170E2
7123
7124EM1B4:	long		0x3F811111,0x11111111
7125EM1B3:	long		0x3FA55555,0x55555555
7126
7127EM1B2:	long		0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7128	long		0x00000000
7129
7130TWO140:	long		0x48B00000,0x00000000
7131TWON140:
7132	long		0x37300000,0x00000000
7133
7134EEXPTBL:
7135	long		0x3FFF0000,0x80000000,0x00000000,0x00000000
7136	long		0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7137	long		0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7138	long		0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7139	long		0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7140	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7141	long		0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7142	long		0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7143	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7144	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7145	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7146	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7147	long		0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7148	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7149	long		0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7150	long		0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7151	long		0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7152	long		0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7153	long		0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7154	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7155	long		0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7156	long		0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7157	long		0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7158	long		0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7159	long		0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7160	long		0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7161	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7162	long		0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7163	long		0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7164	long		0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7165	long		0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7166	long		0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7167	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7168	long		0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7169	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7170	long		0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7171	long		0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7172	long		0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7173	long		0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7174	long		0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7175	long		0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7176	long		0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7177	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7178	long		0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7179	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7180	long		0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7181	long		0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7182	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7183	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7184	long		0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7185	long		0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7186	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7187	long		0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7188	long		0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7189	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7190	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7191	long		0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7192	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7193	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7194	long		0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7195	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7196	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7197	long		0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7198	long		0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7199
7200	set		ADJFLAG,L_SCR2
7201	set		SCALE,FP_SCR0
7202	set		ADJSCALE,FP_SCR1
7203	set		SC,FP_SCR0
7204	set		ONEBYSC,FP_SCR1
7205
7206	global		setox
7207setox:
7208#--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7209
7210#--Step 1.
7211	mov.l		(%a0),%d1		# load part of input X
7212	and.l		&0x7FFF0000,%d1		# biased expo. of X
7213	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
7214	bge.b		EXPC1			# normal case
7215	bra		EXPSM
7216
7217EXPC1:
7218#--The case |X| >= 2^(-65)
7219	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
7220	cmp.l		%d1,&0x400CB167		# 16380 log2 trunc. 16 bits
7221	blt.b		EXPMAIN			# normal case
7222	bra		EEXPBIG
7223
7224EXPMAIN:
7225#--Step 2.
7226#--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
7227	fmov.x		(%a0),%fp0		# load input from (a0)
7228
7229	fmov.x		%fp0,%fp1
7230	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7231	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7232	mov.l		&0,ADJFLAG(%a6)
7233	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7234	lea		EEXPTBL(%pc),%a1
7235	fmov.l		%d1,%fp0		# convert to floating-format
7236
7237	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7238	and.l		&0x3F,%d1		# D0 is J = N mod 64
7239	lsl.l		&4,%d1
7240	add.l		%d1,%a1			# address of 2^(J/64)
7241	mov.l		L_SCR1(%a6),%d1
7242	asr.l		&6,%d1			# D0 is M
7243	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
7244	mov.w		L2(%pc),L_SCR1(%a6)	# prefetch L2, no need in CB
7245
7246EXPCONT1:
7247#--Step 3.
7248#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7249#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7250	fmov.x		%fp0,%fp2
7251	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
7252	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
7253	fadd.x		%fp1,%fp0		# X + N*L1
7254	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
7255
7256#--Step 4.
7257#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7258#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7259#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7260#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7261
7262	fmov.x		%fp0,%fp1
7263	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
7264
7265	fmov.s		&0x3AB60B70,%fp2	# fp2 IS A5
7266
7267	fmul.x		%fp1,%fp2		# fp2 IS S*A5
7268	fmov.x		%fp1,%fp3
7269	fmul.s		&0x3C088895,%fp3	# fp3 IS S*A4
7270
7271	fadd.d		EEXPA3(%pc),%fp2	# fp2 IS A3+S*A5
7272	fadd.d		EEXPA2(%pc),%fp3	# fp3 IS A2+S*A4
7273
7274	fmul.x		%fp1,%fp2		# fp2 IS S*(A3+S*A5)
7275	mov.w		%d1,SCALE(%a6)		# SCALE is 2^(M) in extended
7276	mov.l		&0x80000000,SCALE+4(%a6)
7277	clr.l		SCALE+8(%a6)
7278
7279	fmul.x		%fp1,%fp3		# fp3 IS S*(A2+S*A4)
7280
7281	fadd.s		&0x3F000000,%fp2	# fp2 IS A1+S*(A3+S*A5)
7282	fmul.x		%fp0,%fp3		# fp3 IS R*S*(A2+S*A4)
7283
7284	fmul.x		%fp1,%fp2		# fp2 IS S*(A1+S*(A3+S*A5))
7285	fadd.x		%fp3,%fp0		# fp0 IS R+R*S*(A2+S*A4),
7286
7287	fmov.x		(%a1)+,%fp1		# fp1 is lead. pt. of 2^(J/64)
7288	fadd.x		%fp2,%fp0		# fp0 is EXP(R) - 1
7289
7290#--Step 5
7291#--final reconstruction process
7292#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7293
7294	fmul.x		%fp1,%fp0		# 2^(J/64)*(Exp(R)-1)
7295	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7296	fadd.s		(%a1),%fp0		# accurate 2^(J/64)
7297
7298	fadd.x		%fp1,%fp0		# 2^(J/64) + 2^(J/64)*...
7299	mov.l		ADJFLAG(%a6),%d1
7300
7301#--Step 6
7302	tst.l		%d1
7303	beq.b		NORMAL
7304ADJUST:
7305	fmul.x		ADJSCALE(%a6),%fp0
7306NORMAL:
7307	fmov.l		%d0,%fpcr		# restore user FPCR
7308	mov.b		&FMUL_OP,%d1		# last inst is MUL
7309	fmul.x		SCALE(%a6),%fp0		# multiply 2^(M)
7310	bra		t_catch
7311
7312EXPSM:
7313#--Step 7
7314	fmovm.x		(%a0),&0x80		# load X
7315	fmov.l		%d0,%fpcr
7316	fadd.s		&0x3F800000,%fp0	# 1+X in user mode
7317	bra		t_pinx2
7318
7319EEXPBIG:
7320#--Step 8
7321	cmp.l		%d1,&0x400CB27C		# 16480 log2
7322	bgt.b		EXP2BIG
7323#--Steps 8.2 -- 8.6
7324	fmov.x		(%a0),%fp0		# load input from (a0)
7325
7326	fmov.x		%fp0,%fp1
7327	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7328	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7329	mov.l		&1,ADJFLAG(%a6)
7330	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7331	lea		EEXPTBL(%pc),%a1
7332	fmov.l		%d1,%fp0		# convert to floating-format
7333	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7334	and.l		&0x3F,%d1		# D0 is J = N mod 64
7335	lsl.l		&4,%d1
7336	add.l		%d1,%a1			# address of 2^(J/64)
7337	mov.l		L_SCR1(%a6),%d1
7338	asr.l		&6,%d1			# D0 is K
7339	mov.l		%d1,L_SCR1(%a6)		# save K temporarily
7340	asr.l		&1,%d1			# D0 is M1
7341	sub.l		%d1,L_SCR1(%a6)		# a1 is M
7342	add.w		&0x3FFF,%d1		# biased expo. of 2^(M1)
7343	mov.w		%d1,ADJSCALE(%a6)	# ADJSCALE := 2^(M1)
7344	mov.l		&0x80000000,ADJSCALE+4(%a6)
7345	clr.l		ADJSCALE+8(%a6)
7346	mov.l		L_SCR1(%a6),%d1		# D0 is M
7347	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
7348	bra.w		EXPCONT1		# go back to Step 3
7349
7350EXP2BIG:
7351#--Step 9
7352	tst.b		(%a0)			# is X positive or negative?
7353	bmi		t_unfl2
7354	bra		t_ovfl2
7355
7356	global		setoxd
7357setoxd:
7358#--entry point for EXP(X), X is denormalized
7359	mov.l		(%a0),-(%sp)
7360	andi.l		&0x80000000,(%sp)
7361	ori.l		&0x00800000,(%sp)	# sign(X)*2^(-126)
7362
7363	fmov.s		&0x3F800000,%fp0
7364
7365	fmov.l		%d0,%fpcr
7366	fadd.s		(%sp)+,%fp0
7367	bra		t_pinx2
7368
7369	global		setoxm1
7370setoxm1:
7371#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7372
7373#--Step 1.
7374#--Step 1.1
7375	mov.l		(%a0),%d1		# load part of input X
7376	and.l		&0x7FFF0000,%d1		# biased expo. of X
7377	cmp.l		%d1,&0x3FFD0000		# 1/4
7378	bge.b		EM1CON1			# |X| >= 1/4
7379	bra		EM1SM
7380
7381EM1CON1:
7382#--Step 1.3
7383#--The case |X| >= 1/4
7384	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
7385	cmp.l		%d1,&0x4004C215		# 70log2 rounded up to 16 bits
7386	ble.b		EM1MAIN			# 1/4 <= |X| <= 70log2
7387	bra		EM1BIG
7388
7389EM1MAIN:
7390#--Step 2.
7391#--This is the case:	1/4 <= |X| <= 70 log2.
7392	fmov.x		(%a0),%fp0		# load input from (a0)
7393
7394	fmov.x		%fp0,%fp1
7395	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7396	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7397	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7398	lea		EEXPTBL(%pc),%a1
7399	fmov.l		%d1,%fp0		# convert to floating-format
7400
7401	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7402	and.l		&0x3F,%d1		# D0 is J = N mod 64
7403	lsl.l		&4,%d1
7404	add.l		%d1,%a1			# address of 2^(J/64)
7405	mov.l		L_SCR1(%a6),%d1
7406	asr.l		&6,%d1			# D0 is M
7407	mov.l		%d1,L_SCR1(%a6)		# save a copy of M
7408
7409#--Step 3.
7410#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7411#--a0 points to 2^(J/64), D0 and a1 both contain M
7412	fmov.x		%fp0,%fp2
7413	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
7414	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
7415	fadd.x		%fp1,%fp0		# X + N*L1
7416	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
7417	add.w		&0x3FFF,%d1		# D0 is biased expo. of 2^M
7418
7419#--Step 4.
7420#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7421#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7422#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7423#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7424
7425	fmov.x		%fp0,%fp1
7426	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
7427
7428	fmov.s		&0x3950097B,%fp2	# fp2 IS a6
7429
7430	fmul.x		%fp1,%fp2		# fp2 IS S*A6
7431	fmov.x		%fp1,%fp3
7432	fmul.s		&0x3AB60B6A,%fp3	# fp3 IS S*A5
7433
7434	fadd.d		EM1A4(%pc),%fp2		# fp2 IS A4+S*A6
7435	fadd.d		EM1A3(%pc),%fp3		# fp3 IS A3+S*A5
7436	mov.w		%d1,SC(%a6)		# SC is 2^(M) in extended
7437	mov.l		&0x80000000,SC+4(%a6)
7438	clr.l		SC+8(%a6)
7439
7440	fmul.x		%fp1,%fp2		# fp2 IS S*(A4+S*A6)
7441	mov.l		L_SCR1(%a6),%d1		# D0 is	M
7442	neg.w		%d1			# D0 is -M
7443	fmul.x		%fp1,%fp3		# fp3 IS S*(A3+S*A5)
7444	add.w		&0x3FFF,%d1		# biased expo. of 2^(-M)
7445	fadd.d		EM1A2(%pc),%fp2		# fp2 IS A2+S*(A4+S*A6)
7446	fadd.s		&0x3F000000,%fp3	# fp3 IS A1+S*(A3+S*A5)
7447
7448	fmul.x		%fp1,%fp2		# fp2 IS S*(A2+S*(A4+S*A6))
7449	or.w		&0x8000,%d1		# signed/expo. of -2^(-M)
7450	mov.w		%d1,ONEBYSC(%a6)	# OnebySc is -2^(-M)
7451	mov.l		&0x80000000,ONEBYSC+4(%a6)
7452	clr.l		ONEBYSC+8(%a6)
7453	fmul.x		%fp3,%fp1		# fp1 IS S*(A1+S*(A3+S*A5))
7454
7455	fmul.x		%fp0,%fp2		# fp2 IS R*S*(A2+S*(A4+S*A6))
7456	fadd.x		%fp1,%fp0		# fp0 IS R+S*(A1+S*(A3+S*A5))
7457
7458	fadd.x		%fp2,%fp0		# fp0 IS EXP(R)-1
7459
7460	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7461
7462#--Step 5
7463#--Compute 2^(J/64)*p
7464
7465	fmul.x		(%a1),%fp0		# 2^(J/64)*(Exp(R)-1)
7466
7467#--Step 6
7468#--Step 6.1
7469	mov.l		L_SCR1(%a6),%d1		# retrieve M
7470	cmp.l		%d1,&63
7471	ble.b		MLE63
7472#--Step 6.2	M >= 64
7473	fmov.s		12(%a1),%fp1		# fp1 is t
7474	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is t+OnebySc
7475	fadd.x		%fp1,%fp0		# p+(t+OnebySc), fp1 released
7476	fadd.x		(%a1),%fp0		# T+(p+(t+OnebySc))
7477	bra		EM1SCALE
7478MLE63:
7479#--Step 6.3	M <= 63
7480	cmp.l		%d1,&-3
7481	bge.b		MGEN3
7482MLTN3:
7483#--Step 6.4	M <= -4
7484	fadd.s		12(%a1),%fp0		# p+t
7485	fadd.x		(%a1),%fp0		# T+(p+t)
7486	fadd.x		ONEBYSC(%a6),%fp0	# OnebySc + (T+(p+t))
7487	bra		EM1SCALE
7488MGEN3:
7489#--Step 6.5	-3 <= M <= 63
7490	fmov.x		(%a1)+,%fp1		# fp1 is T
7491	fadd.s		(%a1),%fp0		# fp0 is p+t
7492	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is T+OnebySc
7493	fadd.x		%fp1,%fp0		# (T+OnebySc)+(p+t)
7494
7495EM1SCALE:
7496#--Step 6.6
7497	fmov.l		%d0,%fpcr
7498	fmul.x		SC(%a6),%fp0
7499	bra		t_inx2
7500
7501EM1SM:
7502#--Step 7	|X| < 1/4.
7503	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
7504	bge.b		EM1POLY
7505
7506EM1TINY:
7507#--Step 8	|X| < 2^(-65)
7508	cmp.l		%d1,&0x00330000		# 2^(-16312)
7509	blt.b		EM12TINY
7510#--Step 8.2
7511	mov.l		&0x80010000,SC(%a6)	# SC is -2^(-16382)
7512	mov.l		&0x80000000,SC+4(%a6)
7513	clr.l		SC+8(%a6)
7514	fmov.x		(%a0),%fp0
7515	fmov.l		%d0,%fpcr
7516	mov.b		&FADD_OP,%d1		# last inst is ADD
7517	fadd.x		SC(%a6),%fp0
7518	bra		t_catch
7519
7520EM12TINY:
7521#--Step 8.3
7522	fmov.x		(%a0),%fp0
7523	fmul.d		TWO140(%pc),%fp0
7524	mov.l		&0x80010000,SC(%a6)
7525	mov.l		&0x80000000,SC+4(%a6)
7526	clr.l		SC+8(%a6)
7527	fadd.x		SC(%a6),%fp0
7528	fmov.l		%d0,%fpcr
7529	mov.b		&FMUL_OP,%d1		# last inst is MUL
7530	fmul.d		TWON140(%pc),%fp0
7531	bra		t_catch
7532
7533EM1POLY:
7534#--Step 9	exp(X)-1 by a simple polynomial
7535	fmov.x		(%a0),%fp0		# fp0 is X
7536	fmul.x		%fp0,%fp0		# fp0 is S := X*X
7537	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7538	fmov.s		&0x2F30CAA8,%fp1	# fp1 is B12
7539	fmul.x		%fp0,%fp1		# fp1 is S*B12
7540	fmov.s		&0x310F8290,%fp2	# fp2 is B11
7541	fadd.s		&0x32D73220,%fp1	# fp1 is B10+S*B12
7542
7543	fmul.x		%fp0,%fp2		# fp2 is S*B11
7544	fmul.x		%fp0,%fp1		# fp1 is S*(B10 + ...
7545
7546	fadd.s		&0x3493F281,%fp2	# fp2 is B9+S*...
7547	fadd.d		EM1B8(%pc),%fp1		# fp1 is B8+S*...
7548
7549	fmul.x		%fp0,%fp2		# fp2 is S*(B9+...
7550	fmul.x		%fp0,%fp1		# fp1 is S*(B8+...
7551
7552	fadd.d		EM1B7(%pc),%fp2		# fp2 is B7+S*...
7553	fadd.d		EM1B6(%pc),%fp1		# fp1 is B6+S*...
7554
7555	fmul.x		%fp0,%fp2		# fp2 is S*(B7+...
7556	fmul.x		%fp0,%fp1		# fp1 is S*(B6+...
7557
7558	fadd.d		EM1B5(%pc),%fp2		# fp2 is B5+S*...
7559	fadd.d		EM1B4(%pc),%fp1		# fp1 is B4+S*...
7560
7561	fmul.x		%fp0,%fp2		# fp2 is S*(B5+...
7562	fmul.x		%fp0,%fp1		# fp1 is S*(B4+...
7563
7564	fadd.d		EM1B3(%pc),%fp2		# fp2 is B3+S*...
7565	fadd.x		EM1B2(%pc),%fp1		# fp1 is B2+S*...
7566
7567	fmul.x		%fp0,%fp2		# fp2 is S*(B3+...
7568	fmul.x		%fp0,%fp1		# fp1 is S*(B2+...
7569
7570	fmul.x		%fp0,%fp2		# fp2 is S*S*(B3+...)
7571	fmul.x		(%a0),%fp1		# fp1 is X*S*(B2...
7572
7573	fmul.s		&0x3F000000,%fp0	# fp0 is S*B1
7574	fadd.x		%fp2,%fp1		# fp1 is Q
7575
7576	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7577
7578	fadd.x		%fp1,%fp0		# fp0 is S*B1+Q
7579
7580	fmov.l		%d0,%fpcr
7581	fadd.x		(%a0),%fp0
7582	bra		t_inx2
7583
7584EM1BIG:
7585#--Step 10	|X| > 70 log2
7586	mov.l		(%a0),%d1
7587	cmp.l		%d1,&0
7588	bgt.w		EXPC1
7589#--Step 10.2
7590	fmov.s		&0xBF800000,%fp0	# fp0 is -1
7591	fmov.l		%d0,%fpcr
7592	fadd.s		&0x00800000,%fp0	# -1 + 2^(-126)
7593	bra		t_minx2
7594
7595	global		setoxm1d
7596setoxm1d:
7597#--entry point for EXPM1(X), here X is denormalized
7598#--Step 0.
7599	bra		t_extdnrm
7600
7601#########################################################################
7602# sgetexp():  returns the exponent portion of the input argument.	#
7603#	      The exponent bias is removed and the exponent value is	#
7604#	      returned as an extended precision number in fp0.		#
7605# sgetexpd(): handles denormalized numbers.				#
7606#									#
7607# sgetman():  extracts the mantissa of the input argument. The		#
7608#	      mantissa is converted to an extended precision number w/	#
7609#	      an exponent of $3fff and is returned in fp0. The range of #
7610#	      the result is [1.0 - 2.0).				#
7611# sgetmand(): handles denormalized numbers.				#
7612#									#
7613# INPUT *************************************************************** #
7614#	a0  = pointer to extended precision input			#
7615#									#
7616# OUTPUT ************************************************************** #
7617#	fp0 = exponent(X) or mantissa(X)				#
7618#									#
7619#########################################################################
7620
7621	global		sgetexp
7622sgetexp:
7623	mov.w		SRC_EX(%a0),%d0		# get the exponent
7624	bclr		&0xf,%d0		# clear the sign bit
7625	subi.w		&0x3fff,%d0		# subtract off the bias
7626	fmov.w		%d0,%fp0		# return exp in fp0
7627	blt.b		sgetexpn		# it's negative
7628	rts
7629
7630sgetexpn:
7631	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7632	rts
7633
7634	global		sgetexpd
7635sgetexpd:
7636	bsr.l		norm			# normalize
7637	neg.w		%d0			# new exp = -(shft amt)
7638	subi.w		&0x3fff,%d0		# subtract off the bias
7639	fmov.w		%d0,%fp0		# return exp in fp0
7640	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7641	rts
7642
7643	global		sgetman
7644sgetman:
7645	mov.w		SRC_EX(%a0),%d0		# get the exp
7646	ori.w		&0x7fff,%d0		# clear old exp
7647	bclr		&0xe,%d0		# make it the new exp +-3fff
7648
7649# here, we build the result in a tmp location so as not to disturb the input
7650	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7651	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7652	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
7653	fmov.x		FP_SCR0(%a6),%fp0	# put new value back in fp0
7654	bmi.b		sgetmann		# it's negative
7655	rts
7656
7657sgetmann:
7658	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7659	rts
7660
7661#
7662# For denormalized numbers, shift the mantissa until the j-bit = 1,
7663# then load the exponent with +/1 $3fff.
7664#
7665	global		sgetmand
7666sgetmand:
7667	bsr.l		norm			# normalize exponent
7668	bra.b		sgetman
7669
7670#########################################################################
7671# scosh():  computes the hyperbolic cosine of a normalized input	#
7672# scoshd(): computes the hyperbolic cosine of a denormalized input	#
7673#									#
7674# INPUT ***************************************************************	#
7675#	a0 = pointer to extended precision input			#
7676#	d0 = round precision,mode					#
7677#									#
7678# OUTPUT **************************************************************	#
7679#	fp0 = cosh(X)							#
7680#									#
7681# ACCURACY and MONOTONICITY *******************************************	#
7682#	The returned result is within 3 ulps in 64 significant bit,	#
7683#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
7684#	rounded to double precision. The result is provably monotonic	#
7685#	in double precision.						#
7686#									#
7687# ALGORITHM ***********************************************************	#
7688#									#
7689#	COSH								#
7690#	1. If |X| > 16380 log2, go to 3.				#
7691#									#
7692#	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae	#
7693#		y = |X|, z = exp(Y), and				#
7694#		cosh(X) = (1/2)*( z + 1/z ).				#
7695#		Exit.							#
7696#									#
7697#	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.		#
7698#									#
7699#	4. (16380 log2 < |X| <= 16480 log2)				#
7700#		cosh(X) = sign(X) * exp(|X|)/2.				#
7701#		However, invoking exp(|X|) may cause premature		#
7702#		overflow. Thus, we calculate sinh(X) as follows:	#
7703#		Y	:= |X|						#
7704#		Fact	:=	2**(16380)				#
7705#		Y'	:= Y - 16381 log2				#
7706#		cosh(X) := Fact * exp(Y').				#
7707#		Exit.							#
7708#									#
7709#	5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
7710#		Huge*Huge to generate overflow and an infinity with	#
7711#		the appropriate sign. Huge is the largest finite number	#
7712#		in extended format. Exit.				#
7713#									#
7714#########################################################################
7715
7716TWO16380:
7717	long		0x7FFB0000,0x80000000,0x00000000,0x00000000
7718
7719	global		scosh
7720scosh:
7721	fmov.x		(%a0),%fp0		# LOAD INPUT
7722
7723	mov.l		(%a0),%d1
7724	mov.w		4(%a0),%d1
7725	and.l		&0x7FFFFFFF,%d1
7726	cmp.l		%d1,&0x400CB167
7727	bgt.b		COSHBIG
7728
7729#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7730#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7731
7732	fabs.x		%fp0			# |X|
7733
7734	mov.l		%d0,-(%sp)
7735	clr.l		%d0
7736	fmovm.x		&0x01,-(%sp)		# save |X| to stack
7737	lea		(%sp),%a0		# pass ptr to |X|
7738	bsr		setox			# FP0 IS EXP(|X|)
7739	add.l		&0xc,%sp		# erase |X| from stack
7740	fmul.s		&0x3F000000,%fp0	# (1/2)EXP(|X|)
7741	mov.l		(%sp)+,%d0
7742
7743	fmov.s		&0x3E800000,%fp1	# (1/4)
7744	fdiv.x		%fp0,%fp1		# 1/(2 EXP(|X|))
7745
7746	fmov.l		%d0,%fpcr
7747	mov.b		&FADD_OP,%d1		# last inst is ADD
7748	fadd.x		%fp1,%fp0
7749	bra		t_catch
7750
7751COSHBIG:
7752	cmp.l		%d1,&0x400CB2B3
7753	bgt.b		COSHHUGE
7754
7755	fabs.x		%fp0
7756	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
7757	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
7758
7759	mov.l		%d0,-(%sp)
7760	clr.l		%d0
7761	fmovm.x		&0x01,-(%sp)		# save fp0 to stack
7762	lea		(%sp),%a0		# pass ptr to fp0
7763	bsr		setox
7764	add.l		&0xc,%sp		# clear fp0 from stack
7765	mov.l		(%sp)+,%d0
7766
7767	fmov.l		%d0,%fpcr
7768	mov.b		&FMUL_OP,%d1		# last inst is MUL
7769	fmul.x		TWO16380(%pc),%fp0
7770	bra		t_catch
7771
7772COSHHUGE:
7773	bra		t_ovfl2
7774
7775	global		scoshd
7776#--COSH(X) = 1 FOR DENORMALIZED X
7777scoshd:
7778	fmov.s		&0x3F800000,%fp0
7779
7780	fmov.l		%d0,%fpcr
7781	fadd.s		&0x00800000,%fp0
7782	bra		t_pinx2
7783
7784#########################################################################
7785# ssinh():  computes the hyperbolic sine of a normalized input		#
7786# ssinhd(): computes the hyperbolic sine of a denormalized input	#
7787#									#
7788# INPUT *************************************************************** #
7789#	a0 = pointer to extended precision input			#
7790#	d0 = round precision,mode					#
7791#									#
7792# OUTPUT ************************************************************** #
7793#	fp0 = sinh(X)							#
7794#									#
7795# ACCURACY and MONOTONICITY *******************************************	#
7796#	The returned result is within 3 ulps in 64 significant bit,	#
7797#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7798#	rounded to double precision. The result is provably monotonic	#
7799#	in double precision.						#
7800#									#
7801# ALGORITHM *********************************************************** #
7802#									#
7803#       SINH								#
7804#       1. If |X| > 16380 log2, go to 3.				#
7805#									#
7806#       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula	#
7807#               y = |X|, sgn = sign(X), and z = expm1(Y),		#
7808#               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).			#
7809#          Exit.							#
7810#									#
7811#       3. If |X| > 16480 log2, go to 5.				#
7812#									#
7813#       4. (16380 log2 < |X| <= 16480 log2)				#
7814#               sinh(X) = sign(X) * exp(|X|)/2.				#
7815#          However, invoking exp(|X|) may cause premature overflow.	#
7816#          Thus, we calculate sinh(X) as follows:			#
7817#             Y       := |X|						#
7818#             sgn     := sign(X)					#
7819#             sgnFact := sgn * 2**(16380)				#
7820#             Y'      := Y - 16381 log2					#
7821#             sinh(X) := sgnFact * exp(Y').				#
7822#          Exit.							#
7823#									#
7824#       5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
7825#          sign(X)*Huge*Huge to generate overflow and an infinity with	#
7826#          the appropriate sign. Huge is the largest finite number in	#
7827#          extended format. Exit.					#
7828#									#
7829#########################################################################
7830
7831	global		ssinh
7832ssinh:
7833	fmov.x		(%a0),%fp0		# LOAD INPUT
7834
7835	mov.l		(%a0),%d1
7836	mov.w		4(%a0),%d1
7837	mov.l		%d1,%a1			# save (compacted) operand
7838	and.l		&0x7FFFFFFF,%d1
7839	cmp.l		%d1,&0x400CB167
7840	bgt.b		SINHBIG
7841
7842#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7843#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7844
7845	fabs.x		%fp0			# Y = |X|
7846
7847	movm.l		&0x8040,-(%sp)		# {a1/d0}
7848	fmovm.x		&0x01,-(%sp)		# save Y on stack
7849	lea		(%sp),%a0		# pass ptr to Y
7850	clr.l		%d0
7851	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
7852	add.l		&0xc,%sp		# clear Y from stack
7853	fmov.l		&0,%fpcr
7854	movm.l		(%sp)+,&0x0201		# {a1/d0}
7855
7856	fmov.x		%fp0,%fp1
7857	fadd.s		&0x3F800000,%fp1	# 1+Z
7858	fmov.x		%fp0,-(%sp)
7859	fdiv.x		%fp1,%fp0		# Z/(1+Z)
7860	mov.l		%a1,%d1
7861	and.l		&0x80000000,%d1
7862	or.l		&0x3F000000,%d1
7863	fadd.x		(%sp)+,%fp0
7864	mov.l		%d1,-(%sp)
7865
7866	fmov.l		%d0,%fpcr
7867	mov.b		&FMUL_OP,%d1		# last inst is MUL
7868	fmul.s		(%sp)+,%fp0		# last fp inst - possible exceptions set
7869	bra		t_catch
7870
7871SINHBIG:
7872	cmp.l		%d1,&0x400CB2B3
7873	bgt		t_ovfl
7874	fabs.x		%fp0
7875	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
7876	mov.l		&0,-(%sp)
7877	mov.l		&0x80000000,-(%sp)
7878	mov.l		%a1,%d1
7879	and.l		&0x80000000,%d1
7880	or.l		&0x7FFB0000,%d1
7881	mov.l		%d1,-(%sp)		# EXTENDED FMT
7882	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
7883
7884	mov.l		%d0,-(%sp)
7885	clr.l		%d0
7886	fmovm.x		&0x01,-(%sp)		# save fp0 on stack
7887	lea		(%sp),%a0		# pass ptr to fp0
7888	bsr		setox
7889	add.l		&0xc,%sp		# clear fp0 from stack
7890
7891	mov.l		(%sp)+,%d0
7892	fmov.l		%d0,%fpcr
7893	mov.b		&FMUL_OP,%d1		# last inst is MUL
7894	fmul.x		(%sp)+,%fp0		# possible exception
7895	bra		t_catch
7896
7897	global		ssinhd
7898#--SINH(X) = X FOR DENORMALIZED X
7899ssinhd:
7900	bra		t_extdnrm
7901
7902#########################################################################
7903# stanh():  computes the hyperbolic tangent of a normalized input	#
7904# stanhd(): computes the hyperbolic tangent of a denormalized input	#
7905#									#
7906# INPUT ***************************************************************	#
7907#	a0 = pointer to extended precision input			#
7908#	d0 = round precision,mode					#
7909#									#
7910# OUTPUT **************************************************************	#
7911#	fp0 = tanh(X)							#
7912#									#
7913# ACCURACY and MONOTONICITY *******************************************	#
7914#	The returned result is within 3 ulps in 64 significant bit,	#
7915#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7916#	rounded to double precision. The result is provably monotonic	#
7917#	in double precision.						#
7918#									#
7919# ALGORITHM ***********************************************************	#
7920#									#
7921#	TANH								#
7922#	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.		#
7923#									#
7924#	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by		#
7925#		sgn := sign(X), y := 2|X|, z := expm1(Y), and		#
7926#		tanh(X) = sgn*( z/(2+z) ).				#
7927#		Exit.							#
7928#									#
7929#	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,		#
7930#		go to 7.						#
7931#									#
7932#	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.		#
7933#									#
7934#	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by		#
7935#		sgn := sign(X), y := 2|X|, z := exp(Y),			#
7936#		tanh(X) = sgn - [ sgn*2/(1+z) ].			#
7937#		Exit.							#
7938#									#
7939#	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we	#
7940#		calculate Tanh(X) by					#
7941#		sgn := sign(X), Tiny := 2**(-126),			#
7942#		tanh(X) := sgn - sgn*Tiny.				#
7943#		Exit.							#
7944#									#
7945#	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.			#
7946#									#
7947#########################################################################
7948
7949	set		X,FP_SCR0
7950	set		XFRAC,X+4
7951
7952	set		SGN,L_SCR3
7953
7954	set		V,FP_SCR0
7955
7956	global		stanh
7957stanh:
7958	fmov.x		(%a0),%fp0		# LOAD INPUT
7959
7960	fmov.x		%fp0,X(%a6)
7961	mov.l		(%a0),%d1
7962	mov.w		4(%a0),%d1
7963	mov.l		%d1,X(%a6)
7964	and.l		&0x7FFFFFFF,%d1
7965	cmp.l		%d1, &0x3fd78000	# is |X| < 2^(-40)?
7966	blt.w		TANHBORS		# yes
7967	cmp.l		%d1, &0x3fffddce	# is |X| > (5/2)LOG2?
7968	bgt.w		TANHBORS		# yes
7969
7970#--THIS IS THE USUAL CASE
7971#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7972
7973	mov.l		X(%a6),%d1
7974	mov.l		%d1,SGN(%a6)
7975	and.l		&0x7FFF0000,%d1
7976	add.l		&0x00010000,%d1		# EXPONENT OF 2|X|
7977	mov.l		%d1,X(%a6)
7978	and.l		&0x80000000,SGN(%a6)
7979	fmov.x		X(%a6),%fp0		# FP0 IS Y = 2|X|
7980
7981	mov.l		%d0,-(%sp)
7982	clr.l		%d0
7983	fmovm.x		&0x1,-(%sp)		# save Y on stack
7984	lea		(%sp),%a0		# pass ptr to Y
7985	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
7986	add.l		&0xc,%sp		# clear Y from stack
7987	mov.l		(%sp)+,%d0
7988
7989	fmov.x		%fp0,%fp1
7990	fadd.s		&0x40000000,%fp1	# Z+2
7991	mov.l		SGN(%a6),%d1
7992	fmov.x		%fp1,V(%a6)
7993	eor.l		%d1,V(%a6)
7994
7995	fmov.l		%d0,%fpcr		# restore users round prec,mode
7996	fdiv.x		V(%a6),%fp0
7997	bra		t_inx2
7998
7999TANHBORS:
8000	cmp.l		%d1,&0x3FFF8000
8001	blt.w		TANHSM
8002
8003	cmp.l		%d1,&0x40048AA1
8004	bgt.w		TANHHUGE
8005
8006#-- (5/2) LOG2 < |X| < 50 LOG2,
8007#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
8008#--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
8009
8010	mov.l		X(%a6),%d1
8011	mov.l		%d1,SGN(%a6)
8012	and.l		&0x7FFF0000,%d1
8013	add.l		&0x00010000,%d1		# EXPO OF 2|X|
8014	mov.l		%d1,X(%a6)		# Y = 2|X|
8015	and.l		&0x80000000,SGN(%a6)
8016	mov.l		SGN(%a6),%d1
8017	fmov.x		X(%a6),%fp0		# Y = 2|X|
8018
8019	mov.l		%d0,-(%sp)
8020	clr.l		%d0
8021	fmovm.x		&0x01,-(%sp)		# save Y on stack
8022	lea		(%sp),%a0		# pass ptr to Y
8023	bsr		setox			# FP0 IS EXP(Y)
8024	add.l		&0xc,%sp		# clear Y from stack
8025	mov.l		(%sp)+,%d0
8026	mov.l		SGN(%a6),%d1
8027	fadd.s		&0x3F800000,%fp0	# EXP(Y)+1
8028
8029	eor.l		&0xC0000000,%d1		# -SIGN(X)*2
8030	fmov.s		%d1,%fp1		# -SIGN(X)*2 IN SGL FMT
8031	fdiv.x		%fp0,%fp1		# -SIGN(X)2 / [EXP(Y)+1 ]
8032
8033	mov.l		SGN(%a6),%d1
8034	or.l		&0x3F800000,%d1		# SGN
8035	fmov.s		%d1,%fp0		# SGN IN SGL FMT
8036
8037	fmov.l		%d0,%fpcr		# restore users round prec,mode
8038	mov.b		&FADD_OP,%d1		# last inst is ADD
8039	fadd.x		%fp1,%fp0
8040	bra		t_inx2
8041
8042TANHSM:
8043	fmov.l		%d0,%fpcr		# restore users round prec,mode
8044	mov.b		&FMOV_OP,%d1		# last inst is MOVE
8045	fmov.x		X(%a6),%fp0		# last inst - possible exception set
8046	bra		t_catch
8047
8048#---RETURN SGN(X) - SGN(X)EPS
8049TANHHUGE:
8050	mov.l		X(%a6),%d1
8051	and.l		&0x80000000,%d1
8052	or.l		&0x3F800000,%d1
8053	fmov.s		%d1,%fp0
8054	and.l		&0x80000000,%d1
8055	eor.l		&0x80800000,%d1		# -SIGN(X)*EPS
8056
8057	fmov.l		%d0,%fpcr		# restore users round prec,mode
8058	fadd.s		%d1,%fp0
8059	bra		t_inx2
8060
8061	global		stanhd
8062#--TANH(X) = X FOR DENORMALIZED X
8063stanhd:
8064	bra		t_extdnrm
8065
8066#########################################################################
8067# slogn():    computes the natural logarithm of a normalized input	#
8068# slognd():   computes the natural logarithm of a denormalized input	#
8069# slognp1():  computes the log(1+X) of a normalized input		#
8070# slognp1d(): computes the log(1+X) of a denormalized input		#
8071#									#
8072# INPUT ***************************************************************	#
8073#	a0 = pointer to extended precision input			#
8074#	d0 = round precision,mode					#
8075#									#
8076# OUTPUT **************************************************************	#
8077#	fp0 = log(X) or log(1+X)					#
8078#									#
8079# ACCURACY and MONOTONICITY *******************************************	#
8080#	The returned result is within 2 ulps in 64 significant bit,	#
8081#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8082#	rounded to double precision. The result is provably monotonic	#
8083#	in double precision.						#
8084#									#
8085# ALGORITHM ***********************************************************	#
8086#	LOGN:								#
8087#	Step 1. If |X-1| < 1/16, approximate log(X) by an odd		#
8088#		polynomial in u, where u = 2(X-1)/(X+1). Otherwise,	#
8089#		move on to Step 2.					#
8090#									#
8091#	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first	#
8092#		seven significant bits of Y plus 2**(-7), i.e.		#
8093#		F = 1.xxxxxx1 in base 2 where the six "x" match those	#
8094#		of Y. Note that |Y-F| <= 2**(-7).			#
8095#									#
8096#	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a		#
8097#		polynomial in u, log(1+u) = poly.			#
8098#									#
8099#	Step 4. Reconstruct						#
8100#		log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)	#
8101#		by k*log(2) + (log(F) + poly). The values of log(F) are	#
8102#		calculated beforehand and stored in the program.	#
8103#									#
8104#	lognp1:								#
8105#	Step 1: If |X| < 1/16, approximate log(1+X) by an odd		#
8106#		polynomial in u where u = 2X/(2+X). Otherwise, move on	#
8107#		to Step 2.						#
8108#									#
8109#	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done	#
8110#		in Step 2 of the algorithm for LOGN and compute		#
8111#		log(1+X) as k*log(2) + log(F) + poly where poly		#
8112#		approximates log(1+u), u = (Y-F)/F.			#
8113#									#
8114#	Implementation Notes:						#
8115#	Note 1. There are 64 different possible values for F, thus 64	#
8116#		log(F)'s need to be tabulated. Moreover, the values of	#
8117#		1/F are also tabulated so that the division in (Y-F)/F	#
8118#		can be performed by a multiplication.			#
8119#									#
8120#	Note 2. In Step 2 of lognp1, in order to preserved accuracy,	#
8121#		the value Y-F has to be calculated carefully when	#
8122#		1/2 <= X < 3/2.						#
8123#									#
8124#	Note 3. To fully exploit the pipeline, polynomials are usually	#
8125#		separated into two parts evaluated independently before	#
8126#		being added up.						#
8127#									#
8128#########################################################################
8129LOGOF2:
8130	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8131
8132one:
8133	long		0x3F800000
8134zero:
8135	long		0x00000000
8136infty:
8137	long		0x7F800000
8138negone:
8139	long		0xBF800000
8140
8141LOGA6:
8142	long		0x3FC2499A,0xB5E4040B
8143LOGA5:
8144	long		0xBFC555B5,0x848CB7DB
8145
8146LOGA4:
8147	long		0x3FC99999,0x987D8730
8148LOGA3:
8149	long		0xBFCFFFFF,0xFF6F7E97
8150
8151LOGA2:
8152	long		0x3FD55555,0x555555A4
8153LOGA1:
8154	long		0xBFE00000,0x00000008
8155
8156LOGB5:
8157	long		0x3F175496,0xADD7DAD6
8158LOGB4:
8159	long		0x3F3C71C2,0xFE80C7E0
8160
8161LOGB3:
8162	long		0x3F624924,0x928BCCFF
8163LOGB2:
8164	long		0x3F899999,0x999995EC
8165
8166LOGB1:
8167	long		0x3FB55555,0x55555555
8168TWO:
8169	long		0x40000000,0x00000000
8170
8171LTHOLD:
8172	long		0x3f990000,0x80000000,0x00000000,0x00000000
8173
8174LOGTBL:
8175	long		0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8176	long		0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8177	long		0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8178	long		0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8179	long		0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8180	long		0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8181	long		0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8182	long		0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8183	long		0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8184	long		0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8185	long		0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8186	long		0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8187	long		0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8188	long		0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8189	long		0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8190	long		0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8191	long		0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8192	long		0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8193	long		0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8194	long		0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8195	long		0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8196	long		0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8197	long		0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8198	long		0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8199	long		0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8200	long		0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8201	long		0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8202	long		0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8203	long		0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8204	long		0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8205	long		0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8206	long		0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8207	long		0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8208	long		0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8209	long		0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8210	long		0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8211	long		0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8212	long		0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8213	long		0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8214	long		0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8215	long		0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8216	long		0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8217	long		0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8218	long		0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8219	long		0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8220	long		0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8221	long		0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8222	long		0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8223	long		0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8224	long		0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8225	long		0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8226	long		0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8227	long		0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8228	long		0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8229	long		0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8230	long		0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8231	long		0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8232	long		0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8233	long		0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8234	long		0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8235	long		0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8236	long		0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8237	long		0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8238	long		0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8239	long		0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8240	long		0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8241	long		0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8242	long		0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8243	long		0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8244	long		0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8245	long		0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8246	long		0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8247	long		0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8248	long		0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8249	long		0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8250	long		0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8251	long		0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8252	long		0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8253	long		0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8254	long		0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8255	long		0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8256	long		0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8257	long		0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8258	long		0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8259	long		0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8260	long		0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8261	long		0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8262	long		0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8263	long		0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8264	long		0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8265	long		0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8266	long		0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8267	long		0x3FFE0000,0x94458094,0x45809446,0x00000000
8268	long		0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8269	long		0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8270	long		0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8271	long		0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8272	long		0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8273	long		0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8274	long		0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8275	long		0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8276	long		0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8277	long		0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8278	long		0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8279	long		0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8280	long		0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8281	long		0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8282	long		0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8283	long		0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8284	long		0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8285	long		0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8286	long		0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8287	long		0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8288	long		0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8289	long		0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8290	long		0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8291	long		0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8292	long		0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8293	long		0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8294	long		0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8295	long		0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8296	long		0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8297	long		0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8298	long		0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8299	long		0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8300	long		0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8301	long		0x3FFE0000,0x80808080,0x80808081,0x00000000
8302	long		0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8303
8304	set		ADJK,L_SCR1
8305
8306	set		X,FP_SCR0
8307	set		XDCARE,X+2
8308	set		XFRAC,X+4
8309
8310	set		F,FP_SCR1
8311	set		FFRAC,F+4
8312
8313	set		KLOG2,FP_SCR0
8314
8315	set		SAVEU,FP_SCR0
8316
8317	global		slogn
8318#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8319slogn:
8320	fmov.x		(%a0),%fp0		# LOAD INPUT
8321	mov.l		&0x00000000,ADJK(%a6)
8322
8323LOGBGN:
8324#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8325#--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8326
8327	mov.l		(%a0),%d1
8328	mov.w		4(%a0),%d1
8329
8330	mov.l		(%a0),X(%a6)
8331	mov.l		4(%a0),X+4(%a6)
8332	mov.l		8(%a0),X+8(%a6)
8333
8334	cmp.l		%d1,&0			# CHECK IF X IS NEGATIVE
8335	blt.w		LOGNEG			# LOG OF NEGATIVE ARGUMENT IS INVALID
8336# X IS POSITIVE, CHECK IF X IS NEAR 1
8337	cmp.l		%d1,&0x3ffef07d		# IS X < 15/16?
8338	blt.b		LOGMAIN			# YES
8339	cmp.l		%d1,&0x3fff8841		# IS X > 17/16?
8340	ble.w		LOGNEAR1		# NO
8341
8342LOGMAIN:
8343#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8344
8345#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8346#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8347#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8348#--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8349#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8350#--LOG(1+U) CAN BE VERY EFFICIENT.
8351#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8352#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8353
8354#--GET K, Y, F, AND ADDRESS OF 1/F.
8355	asr.l		&8,%d1
8356	asr.l		&8,%d1			# SHIFTED 16 BITS, BIASED EXPO. OF X
8357	sub.l		&0x3FFF,%d1		# THIS IS K
8358	add.l		ADJK(%a6),%d1		# ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
8359	lea		LOGTBL(%pc),%a0		# BASE ADDRESS OF 1/F AND LOG(F)
8360	fmov.l		%d1,%fp1		# CONVERT K TO FLOATING-POINT FORMAT
8361
8362#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8363	mov.l		&0x3FFF0000,X(%a6)	# X IS NOW Y, I.E. 2^(-K)*X
8364	mov.l		XFRAC(%a6),FFRAC(%a6)
8365	and.l		&0xFE000000,FFRAC(%a6)	# FIRST 7 BITS OF Y
8366	or.l		&0x01000000,FFRAC(%a6)	# GET F: ATTACH A 1 AT THE EIGHTH BIT
8367	mov.l		FFRAC(%a6),%d1	# READY TO GET ADDRESS OF 1/F
8368	and.l		&0x7E000000,%d1
8369	asr.l		&8,%d1
8370	asr.l		&8,%d1
8371	asr.l		&4,%d1			# SHIFTED 20, D0 IS THE DISPLACEMENT
8372	add.l		%d1,%a0			# A0 IS THE ADDRESS FOR 1/F
8373
8374	fmov.x		X(%a6),%fp0
8375	mov.l		&0x3fff0000,F(%a6)
8376	clr.l		F+8(%a6)
8377	fsub.x		F(%a6),%fp0		# Y-F
8378	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3 WHILE FP0 IS NOT READY
8379#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8380#--REGISTERS SAVED: FPCR, FP1, FP2
8381
8382LP1CONT1:
8383#--AN RE-ENTRY POINT FOR LOGNP1
8384	fmul.x		(%a0),%fp0		# FP0 IS U = (Y-F)/F
8385	fmul.x		LOGOF2(%pc),%fp1	# GET K*LOG2 WHILE FP0 IS NOT READY
8386	fmov.x		%fp0,%fp2
8387	fmul.x		%fp2,%fp2		# FP2 IS V=U*U
8388	fmov.x		%fp1,KLOG2(%a6)		# PUT K*LOG2 IN MEMEORY, FREE FP1
8389
8390#--LOG(1+U) IS APPROXIMATED BY
8391#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8392#--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
8393
8394	fmov.x		%fp2,%fp3
8395	fmov.x		%fp2,%fp1
8396
8397	fmul.d		LOGA6(%pc),%fp1		# V*A6
8398	fmul.d		LOGA5(%pc),%fp2		# V*A5
8399
8400	fadd.d		LOGA4(%pc),%fp1		# A4+V*A6
8401	fadd.d		LOGA3(%pc),%fp2		# A3+V*A5
8402
8403	fmul.x		%fp3,%fp1		# V*(A4+V*A6)
8404	fmul.x		%fp3,%fp2		# V*(A3+V*A5)
8405
8406	fadd.d		LOGA2(%pc),%fp1		# A2+V*(A4+V*A6)
8407	fadd.d		LOGA1(%pc),%fp2		# A1+V*(A3+V*A5)
8408
8409	fmul.x		%fp3,%fp1		# V*(A2+V*(A4+V*A6))
8410	add.l		&16,%a0			# ADDRESS OF LOG(F)
8411	fmul.x		%fp3,%fp2		# V*(A1+V*(A3+V*A5))
8412
8413	fmul.x		%fp0,%fp1		# U*V*(A2+V*(A4+V*A6))
8414	fadd.x		%fp2,%fp0		# U+V*(A1+V*(A3+V*A5))
8415
8416	fadd.x		(%a0),%fp1		# LOG(F)+U*V*(A2+V*(A4+V*A6))
8417	fmovm.x		(%sp)+,&0x30		# RESTORE FP2-3
8418	fadd.x		%fp1,%fp0		# FP0 IS LOG(F) + LOG(1+U)
8419
8420	fmov.l		%d0,%fpcr
8421	fadd.x		KLOG2(%a6),%fp0		# FINAL ADD
8422	bra		t_inx2
8423
8424
8425LOGNEAR1:
8426
8427# if the input is exactly equal to one, then exit through ld_pzero.
8428# if these 2 lines weren't here, the correct answer would be returned
8429# but the INEX2 bit would be set.
8430	fcmp.b		%fp0,&0x1		# is it equal to one?
8431	fbeq.l		ld_pzero		# yes
8432
8433#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8434	fmov.x		%fp0,%fp1
8435	fsub.s		one(%pc),%fp1		# FP1 IS X-1
8436	fadd.s		one(%pc),%fp0		# FP0 IS X+1
8437	fadd.x		%fp1,%fp1		# FP1 IS 2(X-1)
8438#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8439#--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8440
8441LP1CONT2:
8442#--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8443	fdiv.x		%fp0,%fp1		# FP1 IS U
8444	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3
8445#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8446#--LET V=U*U, W=V*V, CALCULATE
8447#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8448#--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
8449	fmov.x		%fp1,%fp0
8450	fmul.x		%fp0,%fp0		# FP0 IS V
8451	fmov.x		%fp1,SAVEU(%a6)		# STORE U IN MEMORY, FREE FP1
8452	fmov.x		%fp0,%fp1
8453	fmul.x		%fp1,%fp1		# FP1 IS W
8454
8455	fmov.d		LOGB5(%pc),%fp3
8456	fmov.d		LOGB4(%pc),%fp2
8457
8458	fmul.x		%fp1,%fp3		# W*B5
8459	fmul.x		%fp1,%fp2		# W*B4
8460
8461	fadd.d		LOGB3(%pc),%fp3		# B3+W*B5
8462	fadd.d		LOGB2(%pc),%fp2		# B2+W*B4
8463
8464	fmul.x		%fp3,%fp1		# W*(B3+W*B5), FP3 RELEASED
8465
8466	fmul.x		%fp0,%fp2		# V*(B2+W*B4)
8467
8468	fadd.d		LOGB1(%pc),%fp1		# B1+W*(B3+W*B5)
8469	fmul.x		SAVEU(%a6),%fp0		# FP0 IS U*V
8470
8471	fadd.x		%fp2,%fp1		# B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8472	fmovm.x		(%sp)+,&0x30		# FP2-3 RESTORED
8473
8474	fmul.x		%fp1,%fp0		# U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8475
8476	fmov.l		%d0,%fpcr
8477	fadd.x		SAVEU(%a6),%fp0
8478	bra		t_inx2
8479
8480#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8481LOGNEG:
8482	bra		t_operr
8483
8484	global		slognd
8485slognd:
8486#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8487
8488	mov.l		&-100,ADJK(%a6)		# INPUT = 2^(ADJK) * FP0
8489
8490#----normalize the input value by left shifting k bits (k to be determined
8491#----below), adjusting exponent and storing -k to  ADJK
8492#----the value TWOTO100 is no longer needed.
8493#----Note that this code assumes the denormalized input is NON-ZERO.
8494
8495	movm.l		&0x3f00,-(%sp)		# save some registers  {d2-d7}
8496	mov.l		(%a0),%d3		# D3 is exponent of smallest norm. #
8497	mov.l		4(%a0),%d4
8498	mov.l		8(%a0),%d5		# (D4,D5) is (Hi_X,Lo_X)
8499	clr.l		%d2			# D2 used for holding K
8500
8501	tst.l		%d4
8502	bne.b		Hi_not0
8503
8504Hi_0:
8505	mov.l		%d5,%d4
8506	clr.l		%d5
8507	mov.l		&32,%d2
8508	clr.l		%d6
8509	bfffo		%d4{&0:&32},%d6
8510	lsl.l		%d6,%d4
8511	add.l		%d6,%d2			# (D3,D4,D5) is normalized
8512
8513	mov.l		%d3,X(%a6)
8514	mov.l		%d4,XFRAC(%a6)
8515	mov.l		%d5,XFRAC+4(%a6)
8516	neg.l		%d2
8517	mov.l		%d2,ADJK(%a6)
8518	fmov.x		X(%a6),%fp0
8519	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
8520	lea		X(%a6),%a0
8521	bra.w		LOGBGN			# begin regular log(X)
8522
8523Hi_not0:
8524	clr.l		%d6
8525	bfffo		%d4{&0:&32},%d6		# find first 1
8526	mov.l		%d6,%d2			# get k
8527	lsl.l		%d6,%d4
8528	mov.l		%d5,%d7			# a copy of D5
8529	lsl.l		%d6,%d5
8530	neg.l		%d6
8531	add.l		&32,%d6
8532	lsr.l		%d6,%d7
8533	or.l		%d7,%d4			# (D3,D4,D5) normalized
8534
8535	mov.l		%d3,X(%a6)
8536	mov.l		%d4,XFRAC(%a6)
8537	mov.l		%d5,XFRAC+4(%a6)
8538	neg.l		%d2
8539	mov.l		%d2,ADJK(%a6)
8540	fmov.x		X(%a6),%fp0
8541	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
8542	lea		X(%a6),%a0
8543	bra.w		LOGBGN			# begin regular log(X)
8544
8545	global		slognp1
8546#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8547slognp1:
8548	fmov.x		(%a0),%fp0		# LOAD INPUT
8549	fabs.x		%fp0			# test magnitude
8550	fcmp.x		%fp0,LTHOLD(%pc)	# compare with min threshold
8551	fbgt.w		LP1REAL			# if greater, continue
8552	fmov.l		%d0,%fpcr
8553	mov.b		&FMOV_OP,%d1		# last inst is MOVE
8554	fmov.x		(%a0),%fp0		# return signed argument
8555	bra		t_catch
8556
8557LP1REAL:
8558	fmov.x		(%a0),%fp0		# LOAD INPUT
8559	mov.l		&0x00000000,ADJK(%a6)
8560	fmov.x		%fp0,%fp1		# FP1 IS INPUT Z
8561	fadd.s		one(%pc),%fp0		# X := ROUND(1+Z)
8562	fmov.x		%fp0,X(%a6)
8563	mov.w		XFRAC(%a6),XDCARE(%a6)
8564	mov.l		X(%a6),%d1
8565	cmp.l		%d1,&0
8566	ble.w		LP1NEG0			# LOG OF ZERO OR -VE
8567	cmp.l		%d1,&0x3ffe8000		# IS BOUNDS [1/2,3/2]?
8568	blt.w		LOGMAIN
8569	cmp.l		%d1,&0x3fffc000
8570	bgt.w		LOGMAIN
8571#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8572#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8573#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8574
8575LP1NEAR1:
8576#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8577	cmp.l		%d1,&0x3ffef07d
8578	blt.w		LP1CARE
8579	cmp.l		%d1,&0x3fff8841
8580	bgt.w		LP1CARE
8581
8582LP1ONE16:
8583#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8584#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8585	fadd.x		%fp1,%fp1		# FP1 IS 2Z
8586	fadd.s		one(%pc),%fp0		# FP0 IS 1+X
8587#--U = FP1/FP0
8588	bra.w		LP1CONT2
8589
8590LP1CARE:
8591#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8592#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8593#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8594#--THERE ARE ONLY TWO CASES.
8595#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8596#--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
8597#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8598#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8599
8600	mov.l		XFRAC(%a6),FFRAC(%a6)
8601	and.l		&0xFE000000,FFRAC(%a6)
8602	or.l		&0x01000000,FFRAC(%a6)	# F OBTAINED
8603	cmp.l		%d1,&0x3FFF8000		# SEE IF 1+Z > 1
8604	bge.b		KISZERO
8605
8606KISNEG1:
8607	fmov.s		TWO(%pc),%fp0
8608	mov.l		&0x3fff0000,F(%a6)
8609	clr.l		F+8(%a6)
8610	fsub.x		F(%a6),%fp0		# 2-F
8611	mov.l		FFRAC(%a6),%d1
8612	and.l		&0x7E000000,%d1
8613	asr.l		&8,%d1
8614	asr.l		&8,%d1
8615	asr.l		&4,%d1			# D0 CONTAINS DISPLACEMENT FOR 1/F
8616	fadd.x		%fp1,%fp1		# GET 2Z
8617	fmovm.x		&0xc,-(%sp)		# SAVE FP2  {%fp2/%fp3}
8618	fadd.x		%fp1,%fp0		# FP0 IS Y-F = (2-F)+2Z
8619	lea		LOGTBL(%pc),%a0		# A0 IS ADDRESS OF 1/F
8620	add.l		%d1,%a0
8621	fmov.s		negone(%pc),%fp1	# FP1 IS K = -1
8622	bra.w		LP1CONT1
8623
8624KISZERO:
8625	fmov.s		one(%pc),%fp0
8626	mov.l		&0x3fff0000,F(%a6)
8627	clr.l		F+8(%a6)
8628	fsub.x		F(%a6),%fp0		# 1-F
8629	mov.l		FFRAC(%a6),%d1
8630	and.l		&0x7E000000,%d1
8631	asr.l		&8,%d1
8632	asr.l		&8,%d1
8633	asr.l		&4,%d1
8634	fadd.x		%fp1,%fp0		# FP0 IS Y-F
8635	fmovm.x		&0xc,-(%sp)		# FP2 SAVED {%fp2/%fp3}
8636	lea		LOGTBL(%pc),%a0
8637	add.l		%d1,%a0			# A0 IS ADDRESS OF 1/F
8638	fmov.s		zero(%pc),%fp1		# FP1 IS K = 0
8639	bra.w		LP1CONT1
8640
8641LP1NEG0:
8642#--FPCR SAVED. D0 IS X IN COMPACT FORM.
8643	cmp.l		%d1,&0
8644	blt.b		LP1NEG
8645LP1ZERO:
8646	fmov.s		negone(%pc),%fp0
8647
8648	fmov.l		%d0,%fpcr
8649	bra		t_dz
8650
8651LP1NEG:
8652	fmov.s		zero(%pc),%fp0
8653
8654	fmov.l		%d0,%fpcr
8655	bra		t_operr
8656
8657	global		slognp1d
8658#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8659# Simply return the denorm
8660slognp1d:
8661	bra		t_extdnrm
8662
8663#########################################################################
8664# satanh():  computes the inverse hyperbolic tangent of a norm input	#
8665# satanhd(): computes the inverse hyperbolic tangent of a denorm input	#
8666#									#
8667# INPUT ***************************************************************	#
8668#	a0 = pointer to extended precision input			#
8669#	d0 = round precision,mode					#
8670#									#
8671# OUTPUT **************************************************************	#
8672#	fp0 = arctanh(X)						#
8673#									#
8674# ACCURACY and MONOTONICITY *******************************************	#
8675#	The returned result is within 3 ulps in	64 significant bit,	#
8676#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8677#	rounded to double precision. The result is provably monotonic	#
8678#	in double precision.						#
8679#									#
8680# ALGORITHM ***********************************************************	#
8681#									#
8682#	ATANH								#
8683#	1. If |X| >= 1, go to 3.					#
8684#									#
8685#	2. (|X| < 1) Calculate atanh(X) by				#
8686#		sgn := sign(X)						#
8687#		y := |X|						#
8688#		z := 2y/(1-y)						#
8689#		atanh(X) := sgn * (1/2) * logp1(z)			#
8690#		Exit.							#
8691#									#
8692#	3. If |X| > 1, go to 5.						#
8693#									#
8694#	4. (|X| = 1) Generate infinity with an appropriate sign and	#
8695#		divide-by-zero by					#
8696#		sgn := sign(X)						#
8697#		atan(X) := sgn / (+0).					#
8698#		Exit.							#
8699#									#
8700#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
8701#		Exit.							#
8702#									#
8703#########################################################################
8704
8705	global		satanh
8706satanh:
8707	mov.l		(%a0),%d1
8708	mov.w		4(%a0),%d1
8709	and.l		&0x7FFFFFFF,%d1
8710	cmp.l		%d1,&0x3FFF8000
8711	bge.b		ATANHBIG
8712
8713#--THIS IS THE USUAL CASE, |X| < 1
8714#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8715
8716	fabs.x		(%a0),%fp0		# Y = |X|
8717	fmov.x		%fp0,%fp1
8718	fneg.x		%fp1			# -Y
8719	fadd.x		%fp0,%fp0		# 2Y
8720	fadd.s		&0x3F800000,%fp1	# 1-Y
8721	fdiv.x		%fp1,%fp0		# 2Y/(1-Y)
8722	mov.l		(%a0),%d1
8723	and.l		&0x80000000,%d1
8724	or.l		&0x3F000000,%d1		# SIGN(X)*HALF
8725	mov.l		%d1,-(%sp)
8726
8727	mov.l		%d0,-(%sp)		# save rnd prec,mode
8728	clr.l		%d0			# pass ext prec,RN
8729	fmovm.x		&0x01,-(%sp)		# save Z on stack
8730	lea		(%sp),%a0		# pass ptr to Z
8731	bsr		slognp1			# LOG1P(Z)
8732	add.l		&0xc,%sp		# clear Z from stack
8733
8734	mov.l		(%sp)+,%d0		# fetch old prec,mode
8735	fmov.l		%d0,%fpcr		# load it
8736	mov.b		&FMUL_OP,%d1		# last inst is MUL
8737	fmul.s		(%sp)+,%fp0
8738	bra		t_catch
8739
8740ATANHBIG:
8741	fabs.x		(%a0),%fp0		# |X|
8742	fcmp.s		%fp0,&0x3F800000
8743	fbgt		t_operr
8744	bra		t_dz
8745
8746	global		satanhd
8747#--ATANH(X) = X FOR DENORMALIZED X
8748satanhd:
8749	bra		t_extdnrm
8750
8751#########################################################################
8752# slog10():  computes the base-10 logarithm of a normalized input	#
8753# slog10d(): computes the base-10 logarithm of a denormalized input	#
8754# slog2():   computes the base-2 logarithm of a normalized input	#
8755# slog2d():  computes the base-2 logarithm of a denormalized input	#
8756#									#
8757# INPUT *************************************************************** #
8758#	a0 = pointer to extended precision input			#
8759#	d0 = round precision,mode					#
8760#									#
8761# OUTPUT **************************************************************	#
8762#	fp0 = log_10(X) or log_2(X)					#
8763#									#
8764# ACCURACY and MONOTONICITY *******************************************	#
8765#	The returned result is within 1.7 ulps in 64 significant bit,	#
8766#	i.e. within 0.5003 ulp to 53 bits if the result is subsequently	#
8767#	rounded to double precision. The result is provably monotonic	#
8768#	in double precision.						#
8769#									#
8770# ALGORITHM ***********************************************************	#
8771#									#
8772#       slog10d:							#
8773#									#
8774#       Step 0.	If X < 0, create a NaN and raise the invalid operation	#
8775#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8776#       Notes:  Default means round-to-nearest mode, no floating-point	#
8777#               traps, and precision control = double extended.		#
8778#									#
8779#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
8780#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
8781#									#
8782#       Step 2.  Compute log_10(X) = log(X) * (1/log(10)).		#
8783#            2.1 Restore the user FPCR					#
8784#            2.2 Return ans := Y * INV_L10.				#
8785#									#
8786#       slog10:								#
8787#									#
8788#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8789#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8790#       Notes:  Default means round-to-nearest mode, no floating-point	#
8791#               traps, and precision control = double extended.		#
8792#									#
8793#       Step 1. Call sLogN to obtain Y = log(X), the natural log of X.	#
8794#									#
8795#       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).		#
8796#            2.1  Restore the user FPCR					#
8797#            2.2  Return ans := Y * INV_L10.				#
8798#									#
8799#       sLog2d:								#
8800#									#
8801#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8802#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8803#       Notes:  Default means round-to-nearest mode, no floating-point	#
8804#               traps, and precision control = double extended.		#
8805#									#
8806#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
8807#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
8808#									#
8809#       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).		#
8810#            2.1  Restore the user FPCR					#
8811#            2.2  Return ans := Y * INV_L2.				#
8812#									#
8813#       sLog2:								#
8814#									#
8815#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8816#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8817#       Notes:  Default means round-to-nearest mode, no floating-point	#
8818#               traps, and precision control = double extended.		#
8819#									#
8820#       Step 1. If X is not an integer power of two, i.e., X != 2^k,	#
8821#               go to Step 3.						#
8822#									#
8823#       Step 2.   Return k.						#
8824#            2.1  Get integer k, X = 2^k.				#
8825#            2.2  Restore the user FPCR.				#
8826#            2.3  Return ans := convert-to-double-extended(k).		#
8827#									#
8828#       Step 3. Call sLogN to obtain Y = log(X), the natural log of X.	#
8829#									#
8830#       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).		#
8831#            4.1  Restore the user FPCR					#
8832#            4.2  Return ans := Y * INV_L2.				#
8833#									#
8834#########################################################################
8835
8836INV_L10:
8837	long		0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8838
8839INV_L2:
8840	long		0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8841
8842	global		slog10
8843#--entry point for Log10(X), X is normalized
8844slog10:
8845	fmov.b		&0x1,%fp0
8846	fcmp.x		%fp0,(%a0)		# if operand == 1,
8847	fbeq.l		ld_pzero		# return an EXACT zero
8848
8849	mov.l		(%a0),%d1
8850	blt.w		invalid
8851	mov.l		%d0,-(%sp)
8852	clr.l		%d0
8853	bsr		slogn			# log(X), X normal.
8854	fmov.l		(%sp)+,%fpcr
8855	fmul.x		INV_L10(%pc),%fp0
8856	bra		t_inx2
8857
8858	global		slog10d
8859#--entry point for Log10(X), X is denormalized
8860slog10d:
8861	mov.l		(%a0),%d1
8862	blt.w		invalid
8863	mov.l		%d0,-(%sp)
8864	clr.l		%d0
8865	bsr		slognd			# log(X), X denorm.
8866	fmov.l		(%sp)+,%fpcr
8867	fmul.x		INV_L10(%pc),%fp0
8868	bra		t_minx2
8869
8870	global		slog2
8871#--entry point for Log2(X), X is normalized
8872slog2:
8873	mov.l		(%a0),%d1
8874	blt.w		invalid
8875
8876	mov.l		8(%a0),%d1
8877	bne.b		continue		# X is not 2^k
8878
8879	mov.l		4(%a0),%d1
8880	and.l		&0x7FFFFFFF,%d1
8881	bne.b		continue
8882
8883#--X = 2^k.
8884	mov.w		(%a0),%d1
8885	and.l		&0x00007FFF,%d1
8886	sub.l		&0x3FFF,%d1
8887	beq.l		ld_pzero
8888	fmov.l		%d0,%fpcr
8889	fmov.l		%d1,%fp0
8890	bra		t_inx2
8891
8892continue:
8893	mov.l		%d0,-(%sp)
8894	clr.l		%d0
8895	bsr		slogn			# log(X), X normal.
8896	fmov.l		(%sp)+,%fpcr
8897	fmul.x		INV_L2(%pc),%fp0
8898	bra		t_inx2
8899
8900invalid:
8901	bra		t_operr
8902
8903	global		slog2d
8904#--entry point for Log2(X), X is denormalized
8905slog2d:
8906	mov.l		(%a0),%d1
8907	blt.w		invalid
8908	mov.l		%d0,-(%sp)
8909	clr.l		%d0
8910	bsr		slognd			# log(X), X denorm.
8911	fmov.l		(%sp)+,%fpcr
8912	fmul.x		INV_L2(%pc),%fp0
8913	bra		t_minx2
8914
8915#########################################################################
8916# stwotox():  computes 2**X for a normalized input			#
8917# stwotoxd(): computes 2**X for a denormalized input			#
8918# stentox():  computes 10**X for a normalized input			#
8919# stentoxd(): computes 10**X for a denormalized input			#
8920#									#
8921# INPUT ***************************************************************	#
8922#	a0 = pointer to extended precision input			#
8923#	d0 = round precision,mode					#
8924#									#
8925# OUTPUT **************************************************************	#
8926#	fp0 = 2**X or 10**X						#
8927#									#
8928# ACCURACY and MONOTONICITY *******************************************	#
8929#	The returned result is within 2 ulps in 64 significant bit,	#
8930#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8931#	rounded to double precision. The result is provably monotonic	#
8932#	in double precision.						#
8933#									#
8934# ALGORITHM ***********************************************************	#
8935#									#
8936#	twotox								#
8937#	1. If |X| > 16480, go to ExpBig.				#
8938#									#
8939#	2. If |X| < 2**(-70), go to ExpSm.				#
8940#									#
8941#	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore	#
8942#		decompose N as						#
8943#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
8944#									#
8945#	4. Overwrite r := r * log2. Then				#
8946#		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
8947#		Go to expr to compute that expression.			#
8948#									#
8949#	tentox								#
8950#	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.	#
8951#									#
8952#	2. If |X| < 2**(-70), go to ExpSm.				#
8953#									#
8954#	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set		#
8955#		N := round-to-int(y). Decompose N as			#
8956#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
8957#									#
8958#	4. Define r as							#
8959#		r := ((X - N*L1)-N*L2) * L10				#
8960#		where L1, L2 are the leading and trailing parts of	#
8961#		log_10(2)/64 and L10 is the natural log of 10. Then	#
8962#		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
8963#		Go to expr to compute that expression.			#
8964#									#
8965#	expr								#
8966#	1. Fetch 2**(j/64) from table as Fact1 and Fact2.		#
8967#									#
8968#	2. Overwrite Fact1 and Fact2 by					#
8969#		Fact1 := 2**(M) * Fact1					#
8970#		Fact2 := 2**(M) * Fact2					#
8971#		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).		#
8972#									#
8973#	3. Calculate P where 1 + P approximates exp(r):			#
8974#		P = r + r*r*(A1+r*(A2+...+r*A5)).			#
8975#									#
8976#	4. Let AdjFact := 2**(M'). Return				#
8977#		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).		#
8978#		Exit.							#
8979#									#
8980#	ExpBig								#
8981#	1. Generate overflow by Huge * Huge if X > 0; otherwise,	#
8982#	        generate underflow by Tiny * Tiny.			#
8983#									#
8984#	ExpSm								#
8985#	1. Return 1 + X.						#
8986#									#
8987#########################################################################
8988
8989L2TEN64:
8990	long		0x406A934F,0x0979A371	# 64LOG10/LOG2
8991L10TWO1:
8992	long		0x3F734413,0x509F8000	# LOG2/64LOG10
8993
8994L10TWO2:
8995	long		0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
8996
8997LOG10:	long		0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
8998
8999LOG2:	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
9000
9001EXPA5:	long		0x3F56C16D,0x6F7BD0B2
9002EXPA4:	long		0x3F811112,0x302C712C
9003EXPA3:	long		0x3FA55555,0x55554CC1
9004EXPA2:	long		0x3FC55555,0x55554A54
9005EXPA1:	long		0x3FE00000,0x00000000,0x00000000,0x00000000
9006
9007TEXPTBL:
9008	long		0x3FFF0000,0x80000000,0x00000000,0x3F738000
9009	long		0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
9010	long		0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
9011	long		0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
9012	long		0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
9013	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
9014	long		0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
9015	long		0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
9016	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
9017	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
9018	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
9019	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
9020	long		0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
9021	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
9022	long		0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
9023	long		0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
9024	long		0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
9025	long		0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
9026	long		0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
9027	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
9028	long		0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
9029	long		0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
9030	long		0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
9031	long		0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
9032	long		0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
9033	long		0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
9034	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
9035	long		0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
9036	long		0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
9037	long		0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
9038	long		0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
9039	long		0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
9040	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
9041	long		0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
9042	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
9043	long		0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
9044	long		0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
9045	long		0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
9046	long		0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
9047	long		0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
9048	long		0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
9049	long		0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
9050	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
9051	long		0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
9052	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
9053	long		0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
9054	long		0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
9055	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
9056	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
9057	long		0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
9058	long		0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
9059	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
9060	long		0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
9061	long		0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
9062	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
9063	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
9064	long		0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
9065	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
9066	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
9067	long		0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
9068	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
9069	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
9070	long		0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
9071	long		0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
9072
9073	set		INT,L_SCR1
9074
9075	set		X,FP_SCR0
9076	set		XDCARE,X+2
9077	set		XFRAC,X+4
9078
9079	set		ADJFACT,FP_SCR0
9080
9081	set		FACT1,FP_SCR0
9082	set		FACT1HI,FACT1+4
9083	set		FACT1LOW,FACT1+8
9084
9085	set		FACT2,FP_SCR1
9086	set		FACT2HI,FACT2+4
9087	set		FACT2LOW,FACT2+8
9088
9089	global		stwotox
9090#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9091stwotox:
9092	fmovm.x		(%a0),&0x80		# LOAD INPUT
9093
9094	mov.l		(%a0),%d1
9095	mov.w		4(%a0),%d1
9096	fmov.x		%fp0,X(%a6)
9097	and.l		&0x7FFFFFFF,%d1
9098
9099	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
9100	bge.b		TWOOK1
9101	bra.w		EXPBORS
9102
9103TWOOK1:
9104	cmp.l		%d1,&0x400D80C0		# |X| > 16480?
9105	ble.b		TWOMAIN
9106	bra.w		EXPBORS
9107
9108TWOMAIN:
9109#--USUAL CASE, 2^(-70) <= |X| <= 16480
9110
9111	fmov.x		%fp0,%fp1
9112	fmul.s		&0x42800000,%fp1	# 64 * X
9113	fmov.l		%fp1,INT(%a6)		# N = ROUND-TO-INT(64 X)
9114	mov.l		%d2,-(%sp)
9115	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
9116	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
9117	mov.l		INT(%a6),%d1
9118	mov.l		%d1,%d2
9119	and.l		&0x3F,%d1		# D0 IS J
9120	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
9121	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
9122	asr.l		&6,%d2			# d2 IS L, N = 64L + J
9123	mov.l		%d2,%d1
9124	asr.l		&1,%d1			# D0 IS M
9125	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
9126	add.l		&0x3FFF,%d2
9127
9128#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9129#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9130#--ADJFACT = 2^(M').
9131#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9132
9133	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
9134
9135	fmul.s		&0x3C800000,%fp1	# (1/64)*N
9136	mov.l		(%a1)+,FACT1(%a6)
9137	mov.l		(%a1)+,FACT1HI(%a6)
9138	mov.l		(%a1)+,FACT1LOW(%a6)
9139	mov.w		(%a1)+,FACT2(%a6)
9140
9141	fsub.x		%fp1,%fp0		# X - (1/64)*INT(64 X)
9142
9143	mov.w		(%a1)+,FACT2HI(%a6)
9144	clr.w		FACT2HI+2(%a6)
9145	clr.l		FACT2LOW(%a6)
9146	add.w		%d1,FACT1(%a6)
9147	fmul.x		LOG2(%pc),%fp0		# FP0 IS R
9148	add.w		%d1,FACT2(%a6)
9149
9150	bra.w		expr
9151
9152EXPBORS:
9153#--FPCR, D0 SAVED
9154	cmp.l		%d1,&0x3FFF8000
9155	bgt.b		TEXPBIG
9156
9157#--|X| IS SMALL, RETURN 1 + X
9158
9159	fmov.l		%d0,%fpcr		# restore users round prec,mode
9160	fadd.s		&0x3F800000,%fp0	# RETURN 1 + X
9161	bra		t_pinx2
9162
9163TEXPBIG:
9164#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9165#--REGISTERS SAVE SO FAR ARE FPCR AND  D0
9166	mov.l		X(%a6),%d1
9167	cmp.l		%d1,&0
9168	blt.b		EXPNEG
9169
9170	bra		t_ovfl2			# t_ovfl expects positive value
9171
9172EXPNEG:
9173	bra		t_unfl2			# t_unfl expects positive value
9174
9175	global		stwotoxd
9176stwotoxd:
9177#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9178
9179	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
9180	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
9181	mov.l		(%a0),%d1
9182	or.l		&0x00800001,%d1
9183	fadd.s		%d1,%fp0
9184	bra		t_pinx2
9185
9186	global		stentox
9187#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9188stentox:
9189	fmovm.x		(%a0),&0x80		# LOAD INPUT
9190
9191	mov.l		(%a0),%d1
9192	mov.w		4(%a0),%d1
9193	fmov.x		%fp0,X(%a6)
9194	and.l		&0x7FFFFFFF,%d1
9195
9196	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
9197	bge.b		TENOK1
9198	bra.w		EXPBORS
9199
9200TENOK1:
9201	cmp.l		%d1,&0x400B9B07		# |X| <= 16480*log2/log10 ?
9202	ble.b		TENMAIN
9203	bra.w		EXPBORS
9204
9205TENMAIN:
9206#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9207
9208	fmov.x		%fp0,%fp1
9209	fmul.d		L2TEN64(%pc),%fp1	# X*64*LOG10/LOG2
9210	fmov.l		%fp1,INT(%a6)		# N=INT(X*64*LOG10/LOG2)
9211	mov.l		%d2,-(%sp)
9212	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
9213	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
9214	mov.l		INT(%a6),%d1
9215	mov.l		%d1,%d2
9216	and.l		&0x3F,%d1		# D0 IS J
9217	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
9218	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
9219	asr.l		&6,%d2			# d2 IS L, N = 64L + J
9220	mov.l		%d2,%d1
9221	asr.l		&1,%d1			# D0 IS M
9222	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
9223	add.l		&0x3FFF,%d2
9224
9225#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9226#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9227#--ADJFACT = 2^(M').
9228#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9229	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
9230
9231	fmov.x		%fp1,%fp2
9232
9233	fmul.d		L10TWO1(%pc),%fp1	# N*(LOG2/64LOG10)_LEAD
9234	mov.l		(%a1)+,FACT1(%a6)
9235
9236	fmul.x		L10TWO2(%pc),%fp2	# N*(LOG2/64LOG10)_TRAIL
9237
9238	mov.l		(%a1)+,FACT1HI(%a6)
9239	mov.l		(%a1)+,FACT1LOW(%a6)
9240	fsub.x		%fp1,%fp0		# X - N L_LEAD
9241	mov.w		(%a1)+,FACT2(%a6)
9242
9243	fsub.x		%fp2,%fp0		# X - N L_TRAIL
9244
9245	mov.w		(%a1)+,FACT2HI(%a6)
9246	clr.w		FACT2HI+2(%a6)
9247	clr.l		FACT2LOW(%a6)
9248
9249	fmul.x		LOG10(%pc),%fp0		# FP0 IS R
9250	add.w		%d1,FACT1(%a6)
9251	add.w		%d1,FACT2(%a6)
9252
9253expr:
9254#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9255#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9256#--FP0 IS R. THE FOLLOWING CODE COMPUTES
9257#--	2**(M'+M) * 2**(J/64) * EXP(R)
9258
9259	fmov.x		%fp0,%fp1
9260	fmul.x		%fp1,%fp1		# FP1 IS S = R*R
9261
9262	fmov.d		EXPA5(%pc),%fp2		# FP2 IS A5
9263	fmov.d		EXPA4(%pc),%fp3		# FP3 IS A4
9264
9265	fmul.x		%fp1,%fp2		# FP2 IS S*A5
9266	fmul.x		%fp1,%fp3		# FP3 IS S*A4
9267
9268	fadd.d		EXPA3(%pc),%fp2		# FP2 IS A3+S*A5
9269	fadd.d		EXPA2(%pc),%fp3		# FP3 IS A2+S*A4
9270
9271	fmul.x		%fp1,%fp2		# FP2 IS S*(A3+S*A5)
9272	fmul.x		%fp1,%fp3		# FP3 IS S*(A2+S*A4)
9273
9274	fadd.d		EXPA1(%pc),%fp2		# FP2 IS A1+S*(A3+S*A5)
9275	fmul.x		%fp0,%fp3		# FP3 IS R*S*(A2+S*A4)
9276
9277	fmul.x		%fp1,%fp2		# FP2 IS S*(A1+S*(A3+S*A5))
9278	fadd.x		%fp3,%fp0		# FP0 IS R+R*S*(A2+S*A4)
9279	fadd.x		%fp2,%fp0		# FP0 IS EXP(R) - 1
9280
9281	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
9282
9283#--FINAL RECONSTRUCTION PROCESS
9284#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
9285
9286	fmul.x		FACT1(%a6),%fp0
9287	fadd.x		FACT2(%a6),%fp0
9288	fadd.x		FACT1(%a6),%fp0
9289
9290	fmov.l		%d0,%fpcr		# restore users round prec,mode
9291	mov.w		%d2,ADJFACT(%a6)	# INSERT EXPONENT
9292	mov.l		(%sp)+,%d2
9293	mov.l		&0x80000000,ADJFACT+4(%a6)
9294	clr.l		ADJFACT+8(%a6)
9295	mov.b		&FMUL_OP,%d1		# last inst is MUL
9296	fmul.x		ADJFACT(%a6),%fp0	# FINAL ADJUSTMENT
9297	bra		t_catch
9298
9299	global		stentoxd
9300stentoxd:
9301#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9302
9303	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
9304	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
9305	mov.l		(%a0),%d1
9306	or.l		&0x00800001,%d1
9307	fadd.s		%d1,%fp0
9308	bra		t_pinx2
9309
9310#########################################################################
9311# smovcr(): returns the ROM constant at the offset specified in d1	#
9312#	    rounded to the mode and precision specified in d0.		#
9313#									#
9314# INPUT	***************************************************************	#
9315#	d0 = rnd prec,mode						#
9316#	d1 = ROM offset							#
9317#									#
9318# OUTPUT **************************************************************	#
9319#	fp0 = the ROM constant rounded to the user's rounding mode,prec	#
9320#									#
9321#########################################################################
9322
9323	global		smovcr
9324smovcr:
9325	mov.l		%d1,-(%sp)		# save rom offset for a sec
9326
9327	lsr.b		&0x4,%d0		# shift ctrl bits to lo
9328	mov.l		%d0,%d1			# make a copy
9329	andi.w		&0x3,%d1		# extract rnd mode
9330	andi.w		&0xc,%d0		# extract rnd prec
9331	swap		%d0			# put rnd prec in hi
9332	mov.w		%d1,%d0			# put rnd mode in lo
9333
9334	mov.l		(%sp)+,%d1		# get rom offset
9335
9336#
9337# check range of offset
9338#
9339	tst.b		%d1			# if zero, offset is to pi
9340	beq.b		pi_tbl			# it is pi
9341	cmpi.b		%d1,&0x0a		# check range $01 - $0a
9342	ble.b		z_val			# if in this range, return zero
9343	cmpi.b		%d1,&0x0e		# check range $0b - $0e
9344	ble.b		sm_tbl			# valid constants in this range
9345	cmpi.b		%d1,&0x2f		# check range $10 - $2f
9346	ble.b		z_val			# if in this range, return zero
9347	cmpi.b		%d1,&0x3f		# check range $30 - $3f
9348	ble.b		bg_tbl			# valid constants in this range
9349
9350z_val:
9351	bra.l		ld_pzero		# return a zero
9352
9353#
9354# the answer is PI rounded to the proper precision.
9355#
9356# fetch a pointer to the answer table relating to the proper rounding
9357# precision.
9358#
9359pi_tbl:
9360	tst.b		%d0			# is rmode RN?
9361	bne.b		pi_not_rn		# no
9362pi_rn:
9363	lea.l		PIRN(%pc),%a0		# yes; load PI RN table addr
9364	bra.w		set_finx
9365pi_not_rn:
9366	cmpi.b		%d0,&rp_mode		# is rmode RP?
9367	beq.b		pi_rp			# yes
9368pi_rzrm:
9369	lea.l		PIRZRM(%pc),%a0		# no; load PI RZ,RM table addr
9370	bra.b		set_finx
9371pi_rp:
9372	lea.l		PIRP(%pc),%a0		# load PI RP table addr
9373	bra.b		set_finx
9374
9375#
9376# the answer is one of:
9377#	$0B	log10(2)	(inexact)
9378#	$0C	e		(inexact)
9379#	$0D	log2(e)		(inexact)
9380#	$0E	log10(e)	(exact)
9381#
9382# fetch a pointer to the answer table relating to the proper rounding
9383# precision.
9384#
9385sm_tbl:
9386	subi.b		&0xb,%d1		# make offset in 0-4 range
9387	tst.b		%d0			# is rmode RN?
9388	bne.b		sm_not_rn		# no
9389sm_rn:
9390	lea.l		SMALRN(%pc),%a0		# yes; load RN table addr
9391sm_tbl_cont:
9392	cmpi.b		%d1,&0x2		# is result log10(e)?
9393	ble.b		set_finx		# no; answer is inexact
9394	bra.b		no_finx			# yes; answer is exact
9395sm_not_rn:
9396	cmpi.b		%d0,&rp_mode		# is rmode RP?
9397	beq.b		sm_rp			# yes
9398sm_rzrm:
9399	lea.l		SMALRZRM(%pc),%a0	# no; load RZ,RM table addr
9400	bra.b		sm_tbl_cont
9401sm_rp:
9402	lea.l		SMALRP(%pc),%a0		# load RP table addr
9403	bra.b		sm_tbl_cont
9404
9405#
9406# the answer is one of:
9407#	$30	ln(2)		(inexact)
9408#	$31	ln(10)		(inexact)
9409#	$32	10^0		(exact)
9410#	$33	10^1		(exact)
9411#	$34	10^2		(exact)
9412#	$35	10^4		(exact)
9413#	$36	10^8		(exact)
9414#	$37	10^16		(exact)
9415#	$38	10^32		(inexact)
9416#	$39	10^64		(inexact)
9417#	$3A	10^128		(inexact)
9418#	$3B	10^256		(inexact)
9419#	$3C	10^512		(inexact)
9420#	$3D	10^1024		(inexact)
9421#	$3E	10^2048		(inexact)
9422#	$3F	10^4096		(inexact)
9423#
9424# fetch a pointer to the answer table relating to the proper rounding
9425# precision.
9426#
9427bg_tbl:
9428	subi.b		&0x30,%d1		# make offset in 0-f range
9429	tst.b		%d0			# is rmode RN?
9430	bne.b		bg_not_rn		# no
9431bg_rn:
9432	lea.l		BIGRN(%pc),%a0		# yes; load RN table addr
9433bg_tbl_cont:
9434	cmpi.b		%d1,&0x1		# is offset <= $31?
9435	ble.b		set_finx		# yes; answer is inexact
9436	cmpi.b		%d1,&0x7		# is $32 <= offset <= $37?
9437	ble.b		no_finx			# yes; answer is exact
9438	bra.b		set_finx		# no; answer is inexact
9439bg_not_rn:
9440	cmpi.b		%d0,&rp_mode		# is rmode RP?
9441	beq.b		bg_rp			# yes
9442bg_rzrm:
9443	lea.l		BIGRZRM(%pc),%a0	# no; load RZ,RM table addr
9444	bra.b		bg_tbl_cont
9445bg_rp:
9446	lea.l		BIGRP(%pc),%a0		# load RP table addr
9447	bra.b		bg_tbl_cont
9448
9449# answer is inexact, so set INEX2 and AINEX in the user's FPSR.
9450set_finx:
9451	ori.l		&inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
9452no_finx:
9453	mulu.w		&0xc,%d1		# offset points into tables
9454	swap		%d0			# put rnd prec in lo word
9455	tst.b		%d0			# is precision extended?
9456
9457	bne.b		not_ext			# if xprec, do not call round
9458
9459# Precision is extended
9460	fmovm.x		(%a0,%d1.w),&0x80	# return result in fp0
9461	rts
9462
9463# Precision is single or double
9464not_ext:
9465	swap		%d0			# rnd prec in upper word
9466
9467# call round() to round the answer to the proper precision.
9468# exponents out of range for single or double DO NOT cause underflow
9469# or overflow.
9470	mov.w		0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
9471	mov.l		0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
9472	mov.l		0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
9473	mov.l		%d0,%d1
9474	clr.l		%d0			# clear g,r,s
9475	lea		FP_SCR1(%a6),%a0	# pass ptr to answer
9476	clr.w		LOCAL_SGN(%a0)		# sign always positive
9477	bsr.l		_round			# round the mantissa
9478
9479	fmovm.x		(%a0),&0x80		# return rounded result in fp0
9480	rts
9481
9482	align		0x4
9483
9484PIRN:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
9485PIRZRM:	long		0x40000000,0xc90fdaa2,0x2168c234	# pi
9486PIRP:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
9487
9488SMALRN:	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
9489	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
9490	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
9491	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9492	long		0x00000000,0x00000000,0x00000000	# 0.0
9493
9494SMALRZRM:
9495	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
9496	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
9497	long		0x3fff0000,0xb8aa3b29,0x5c17f0bb	# log2(e)
9498	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9499	long		0x00000000,0x00000000,0x00000000	# 0.0
9500
9501SMALRP:	long		0x3ffd0000,0x9a209a84,0xfbcff799	# log10(2)
9502	long		0x40000000,0xadf85458,0xa2bb4a9b	# e
9503	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
9504	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9505	long		0x00000000,0x00000000,0x00000000	# 0.0
9506
9507BIGRN:	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
9508	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
9509
9510	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9511	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9512	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9513	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9514	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9515	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9516	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
9517	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
9518	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
9519	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
9520	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
9521	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
9522	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
9523	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
9524
9525BIGRZRM:
9526	long		0x3ffe0000,0xb17217f7,0xd1cf79ab	# ln(2)
9527	long		0x40000000,0x935d8ddd,0xaaa8ac16	# ln(10)
9528
9529	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9530	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9531	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9532	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9533	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9534	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9535	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
9536	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
9537	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
9538	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
9539	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
9540	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
9541	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
9542	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
9543
9544BIGRP:
9545	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
9546	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
9547
9548	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9549	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9550	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9551	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9552	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9553	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9554	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
9555	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
9556	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
9557	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
9558	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
9559	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
9560	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
9561	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
9562
9563#########################################################################
9564# sscale(): computes the destination operand scaled by the source	#
9565#	    operand. If the absoulute value of the source operand is	#
9566#	    >= 2^14, an overflow or underflow is returned.		#
9567#									#
9568# INPUT *************************************************************** #
9569#	a0  = pointer to double-extended source operand X		#
9570#	a1  = pointer to double-extended destination operand Y		#
9571#									#
9572# OUTPUT ************************************************************** #
9573#	fp0 =  scale(X,Y)						#
9574#									#
9575#########################################################################
9576
9577set	SIGN,		L_SCR1
9578
9579	global		sscale
9580sscale:
9581	mov.l		%d0,-(%sp)		# store off ctrl bits for now
9582
9583	mov.w		DST_EX(%a1),%d1		# get dst exponent
9584	smi.b		SIGN(%a6)		# use SIGN to hold dst sign
9585	andi.l		&0x00007fff,%d1		# strip sign from dst exp
9586
9587	mov.w		SRC_EX(%a0),%d0		# check src bounds
9588	andi.w		&0x7fff,%d0		# clr src sign bit
9589	cmpi.w		%d0,&0x3fff		# is src ~ ZERO?
9590	blt.w		src_small		# yes
9591	cmpi.w		%d0,&0x400c		# no; is src too big?
9592	bgt.w		src_out			# yes
9593
9594#
9595# Source is within 2^14 range.
9596#
9597src_ok:
9598	fintrz.x	SRC(%a0),%fp0		# calc int of src
9599	fmov.l		%fp0,%d0		# int src to d0
9600# don't want any accrued bits from the fintrz showing up later since
9601# we may need to read the fpsr for the last fp op in t_catch2().
9602	fmov.l		&0x0,%fpsr
9603
9604	tst.b		DST_HI(%a1)		# is dst denormalized?
9605	bmi.b		sok_norm
9606
9607# the dst is a DENORM. normalize the DENORM and add the adjustment to
9608# the src value. then, jump to the norm part of the routine.
9609sok_dnrm:
9610	mov.l		%d0,-(%sp)		# save src for now
9611
9612	mov.w		DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9613	mov.l		DST_HI(%a1),FP_SCR0_HI(%a6)
9614	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
9615
9616	lea		FP_SCR0(%a6),%a0	# pass ptr to DENORM
9617	bsr.l		norm			# normalize the DENORM
9618	neg.l		%d0
9619	add.l		(%sp)+,%d0		# add adjustment to src
9620
9621	fmovm.x		FP_SCR0(%a6),&0x80	# load normalized DENORM
9622
9623	cmpi.w		%d0,&-0x3fff		# is the shft amt really low?
9624	bge.b		sok_norm2		# thank goodness no
9625
9626# the multiply factor that we're trying to create should be a denorm
9627# for the multiply to work. Therefore, we're going to actually do a
9628# multiply with a denorm which will cause an unimplemented data type
9629# exception to be put into the machine which will be caught and corrected
9630# later. we don't do this with the DENORMs above because this method
9631# is slower. but, don't fret, I don't see it being used much either.
9632	fmov.l		(%sp)+,%fpcr		# restore user fpcr
9633	mov.l		&0x80000000,%d1		# load normalized mantissa
9634	subi.l		&-0x3fff,%d0		# how many should we shift?
9635	neg.l		%d0			# make it positive
9636	cmpi.b		%d0,&0x20		# is it > 32?
9637	bge.b		sok_dnrm_32		# yes
9638	lsr.l		%d0,%d1			# no; bit stays in upper lw
9639	clr.l		-(%sp)			# insert zero low mantissa
9640	mov.l		%d1,-(%sp)		# insert new high mantissa
9641	clr.l		-(%sp)			# make zero exponent
9642	bra.b		sok_norm_cont
9643sok_dnrm_32:
9644	subi.b		&0x20,%d0		# get shift count
9645	lsr.l		%d0,%d1			# make low mantissa longword
9646	mov.l		%d1,-(%sp)		# insert new low mantissa
9647	clr.l		-(%sp)			# insert zero high mantissa
9648	clr.l		-(%sp)			# make zero exponent
9649	bra.b		sok_norm_cont
9650
9651# the src will force the dst to a DENORM value or worse. so, let's
9652# create an fp multiply that will create the result.
9653sok_norm:
9654	fmovm.x		DST(%a1),&0x80		# load fp0 with normalized src
9655sok_norm2:
9656	fmov.l		(%sp)+,%fpcr		# restore user fpcr
9657
9658	addi.w		&0x3fff,%d0		# turn src amt into exp value
9659	swap		%d0			# put exponent in high word
9660	clr.l		-(%sp)			# insert new exponent
9661	mov.l		&0x80000000,-(%sp)	# insert new high mantissa
9662	mov.l		%d0,-(%sp)		# insert new lo mantissa
9663
9664sok_norm_cont:
9665	fmov.l		%fpcr,%d0		# d0 needs fpcr for t_catch2
9666	mov.b		&FMUL_OP,%d1		# last inst is MUL
9667	fmul.x		(%sp)+,%fp0		# do the multiply
9668	bra		t_catch2		# catch any exceptions
9669
9670#
9671# Source is outside of 2^14 range.  Test the sign and branch
9672# to the appropriate exception handler.
9673#
9674src_out:
9675	mov.l		(%sp)+,%d0		# restore ctrl bits
9676	exg		%a0,%a1			# swap src,dst ptrs
9677	tst.b		SRC_EX(%a1)		# is src negative?
9678	bmi		t_unfl			# yes; underflow
9679	bra		t_ovfl_sc		# no; overflow
9680
9681#
9682# The source input is below 1, so we check for denormalized numbers
9683# and set unfl.
9684#
9685src_small:
9686	tst.b		DST_HI(%a1)		# is dst denormalized?
9687	bpl.b		ssmall_done		# yes
9688
9689	mov.l		(%sp)+,%d0
9690	fmov.l		%d0,%fpcr		# no; load control bits
9691	mov.b		&FMOV_OP,%d1		# last inst is MOVE
9692	fmov.x		DST(%a1),%fp0		# simply return dest
9693	bra		t_catch2
9694ssmall_done:
9695	mov.l		(%sp)+,%d0		# load control bits into d1
9696	mov.l		%a1,%a0			# pass ptr to dst
9697	bra		t_resdnrm
9698
9699#########################################################################
9700# smod(): computes the fp MOD of the input values X,Y.			#
9701# srem(): computes the fp (IEEE) REM of the input values X,Y.		#
9702#									#
9703# INPUT *************************************************************** #
9704#	a0 = pointer to extended precision input X			#
9705#	a1 = pointer to extended precision input Y			#
9706#	d0 = round precision,mode					#
9707#									#
9708#	The input operands X and Y can be either normalized or		#
9709#	denormalized.							#
9710#									#
9711# OUTPUT ************************************************************** #
9712#      fp0 = FREM(X,Y) or FMOD(X,Y)					#
9713#									#
9714# ALGORITHM *********************************************************** #
9715#									#
9716#       Step 1.  Save and strip signs of X and Y: signX := sign(X),	#
9717#                signY := sign(Y), X := |X|, Y := |Y|,			#
9718#                signQ := signX EOR signY. Record whether MOD or REM	#
9719#                is requested.						#
9720#									#
9721#       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.		#
9722#                If (L < 0) then					#
9723#                   R := X, go to Step 4.				#
9724#                else							#
9725#                   R := 2^(-L)X, j := L.				#
9726#                endif							#
9727#									#
9728#       Step 3.  Perform MOD(X,Y)					#
9729#            3.1 If R = Y, go to Step 9.				#
9730#            3.2 If R > Y, then { R := R - Y, Q := Q + 1}		#
9731#            3.3 If j = 0, go to Step 4.				#
9732#            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to	#
9733#                Step 3.1.						#
9734#									#
9735#       Step 4.  At this point, R = X - QY = MOD(X,Y). Set		#
9736#                Last_Subtract := false (used in Step 7 below). If	#
9737#                MOD is requested, go to Step 6.			#
9738#									#
9739#       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.		#
9740#            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to	#
9741#                Step 6.						#
9742#            5.2 If R > Y/2, then { set Last_Subtract := true,		#
9743#                Q := Q + 1, Y := signY*Y }. Go to Step 6.		#
9744#            5.3 This is the tricky case of R = Y/2. If Q is odd,	#
9745#                then { Q := Q + 1, signX := -signX }.			#
9746#									#
9747#       Step 6.  R := signX*R.						#
9748#									#
9749#       Step 7.  If Last_Subtract = true, R := R - Y.			#
9750#									#
9751#       Step 8.  Return signQ, last 7 bits of Q, and R as required.	#
9752#									#
9753#       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,		#
9754#                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),		#
9755#                R := 0. Return signQ, last 7 bits of Q, and R.		#
9756#									#
9757#########################################################################
9758
9759	set		Mod_Flag,L_SCR3
9760	set		Sc_Flag,L_SCR3+1
9761
9762	set		SignY,L_SCR2
9763	set		SignX,L_SCR2+2
9764	set		SignQ,L_SCR3+2
9765
9766	set		Y,FP_SCR0
9767	set		Y_Hi,Y+4
9768	set		Y_Lo,Y+8
9769
9770	set		R,FP_SCR1
9771	set		R_Hi,R+4
9772	set		R_Lo,R+8
9773
9774Scale:
9775	long		0x00010000,0x80000000,0x00000000,0x00000000
9776
9777	global		smod
9778smod:
9779	clr.b		FPSR_QBYTE(%a6)
9780	mov.l		%d0,-(%sp)		# save ctrl bits
9781	clr.b		Mod_Flag(%a6)
9782	bra.b		Mod_Rem
9783
9784	global		srem
9785srem:
9786	clr.b		FPSR_QBYTE(%a6)
9787	mov.l		%d0,-(%sp)		# save ctrl bits
9788	mov.b		&0x1,Mod_Flag(%a6)
9789
9790Mod_Rem:
9791#..Save sign of X and Y
9792	movm.l		&0x3f00,-(%sp)		# save data registers
9793	mov.w		SRC_EX(%a0),%d3
9794	mov.w		%d3,SignY(%a6)
9795	and.l		&0x00007FFF,%d3		# Y := |Y|
9796
9797#
9798	mov.l		SRC_HI(%a0),%d4
9799	mov.l		SRC_LO(%a0),%d5		# (D3,D4,D5) is |Y|
9800
9801	tst.l		%d3
9802	bne.b		Y_Normal
9803
9804	mov.l		&0x00003FFE,%d3		# $3FFD + 1
9805	tst.l		%d4
9806	bne.b		HiY_not0
9807
9808HiY_0:
9809	mov.l		%d5,%d4
9810	clr.l		%d5
9811	sub.l		&32,%d3
9812	clr.l		%d6
9813	bfffo		%d4{&0:&32},%d6
9814	lsl.l		%d6,%d4
9815	sub.l		%d6,%d3			# (D3,D4,D5) is normalized
9816#	                                        ...with bias $7FFD
9817	bra.b		Chk_X
9818
9819HiY_not0:
9820	clr.l		%d6
9821	bfffo		%d4{&0:&32},%d6
9822	sub.l		%d6,%d3
9823	lsl.l		%d6,%d4
9824	mov.l		%d5,%d7			# a copy of D5
9825	lsl.l		%d6,%d5
9826	neg.l		%d6
9827	add.l		&32,%d6
9828	lsr.l		%d6,%d7
9829	or.l		%d7,%d4			# (D3,D4,D5) normalized
9830#                                       ...with bias $7FFD
9831	bra.b		Chk_X
9832
9833Y_Normal:
9834	add.l		&0x00003FFE,%d3		# (D3,D4,D5) normalized
9835#                                       ...with bias $7FFD
9836
9837Chk_X:
9838	mov.w		DST_EX(%a1),%d0
9839	mov.w		%d0,SignX(%a6)
9840	mov.w		SignY(%a6),%d1
9841	eor.l		%d0,%d1
9842	and.l		&0x00008000,%d1
9843	mov.w		%d1,SignQ(%a6)		# sign(Q) obtained
9844	and.l		&0x00007FFF,%d0
9845	mov.l		DST_HI(%a1),%d1
9846	mov.l		DST_LO(%a1),%d2		# (D0,D1,D2) is |X|
9847	tst.l		%d0
9848	bne.b		X_Normal
9849	mov.l		&0x00003FFE,%d0
9850	tst.l		%d1
9851	bne.b		HiX_not0
9852
9853HiX_0:
9854	mov.l		%d2,%d1
9855	clr.l		%d2
9856	sub.l		&32,%d0
9857	clr.l		%d6
9858	bfffo		%d1{&0:&32},%d6
9859	lsl.l		%d6,%d1
9860	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
9861#                                       ...with bias $7FFD
9862	bra.b		Init
9863
9864HiX_not0:
9865	clr.l		%d6
9866	bfffo		%d1{&0:&32},%d6
9867	sub.l		%d6,%d0
9868	lsl.l		%d6,%d1
9869	mov.l		%d2,%d7			# a copy of D2
9870	lsl.l		%d6,%d2
9871	neg.l		%d6
9872	add.l		&32,%d6
9873	lsr.l		%d6,%d7
9874	or.l		%d7,%d1			# (D0,D1,D2) normalized
9875#                                       ...with bias $7FFD
9876	bra.b		Init
9877
9878X_Normal:
9879	add.l		&0x00003FFE,%d0		# (D0,D1,D2) normalized
9880#                                       ...with bias $7FFD
9881
9882Init:
9883#
9884	mov.l		%d3,L_SCR1(%a6)		# save biased exp(Y)
9885	mov.l		%d0,-(%sp)		# save biased exp(X)
9886	sub.l		%d3,%d0			# L := expo(X)-expo(Y)
9887
9888	clr.l		%d6			# D6 := carry <- 0
9889	clr.l		%d3			# D3 is Q
9890	mov.l		&0,%a1			# A1 is k; j+k=L, Q=0
9891
9892#..(Carry,D1,D2) is R
9893	tst.l		%d0
9894	bge.b		Mod_Loop_pre
9895
9896#..expo(X) < expo(Y). Thus X = mod(X,Y)
9897#
9898	mov.l		(%sp)+,%d0		# restore d0
9899	bra.w		Get_Mod
9900
9901Mod_Loop_pre:
9902	addq.l		&0x4,%sp		# erase exp(X)
9903#..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
9904Mod_Loop:
9905	tst.l		%d6			# test carry bit
9906	bgt.b		R_GT_Y
9907
9908#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9909	cmp.l		%d1,%d4			# compare hi(R) and hi(Y)
9910	bne.b		R_NE_Y
9911	cmp.l		%d2,%d5			# compare lo(R) and lo(Y)
9912	bne.b		R_NE_Y
9913
9914#..At this point, R = Y
9915	bra.w		Rem_is_0
9916
9917R_NE_Y:
9918#..use the borrow of the previous compare
9919	bcs.b		R_LT_Y			# borrow is set iff R < Y
9920
9921R_GT_Y:
9922#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9923#..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9924	sub.l		%d5,%d2			# lo(R) - lo(Y)
9925	subx.l		%d4,%d1			# hi(R) - hi(Y)
9926	clr.l		%d6			# clear carry
9927	addq.l		&1,%d3			# Q := Q + 1
9928
9929R_LT_Y:
9930#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9931	tst.l		%d0			# see if j = 0.
9932	beq.b		PostLoop
9933
9934	add.l		%d3,%d3			# Q := 2Q
9935	add.l		%d2,%d2			# lo(R) = 2lo(R)
9936	roxl.l		&1,%d1			# hi(R) = 2hi(R) + carry
9937	scs		%d6			# set Carry if 2(R) overflows
9938	addq.l		&1,%a1			# k := k+1
9939	subq.l		&1,%d0			# j := j - 1
9940#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9941
9942	bra.b		Mod_Loop
9943
9944PostLoop:
9945#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9946
9947#..normalize R.
9948	mov.l		L_SCR1(%a6),%d0		# new biased expo of R
9949	tst.l		%d1
9950	bne.b		HiR_not0
9951
9952HiR_0:
9953	mov.l		%d2,%d1
9954	clr.l		%d2
9955	sub.l		&32,%d0
9956	clr.l		%d6
9957	bfffo		%d1{&0:&32},%d6
9958	lsl.l		%d6,%d1
9959	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
9960#                                       ...with bias $7FFD
9961	bra.b		Get_Mod
9962
9963HiR_not0:
9964	clr.l		%d6
9965	bfffo		%d1{&0:&32},%d6
9966	bmi.b		Get_Mod			# already normalized
9967	sub.l		%d6,%d0
9968	lsl.l		%d6,%d1
9969	mov.l		%d2,%d7			# a copy of D2
9970	lsl.l		%d6,%d2
9971	neg.l		%d6
9972	add.l		&32,%d6
9973	lsr.l		%d6,%d7
9974	or.l		%d7,%d1			# (D0,D1,D2) normalized
9975
9976#
9977Get_Mod:
9978	cmp.l		%d0,&0x000041FE
9979	bge.b		No_Scale
9980Do_Scale:
9981	mov.w		%d0,R(%a6)
9982	mov.l		%d1,R_Hi(%a6)
9983	mov.l		%d2,R_Lo(%a6)
9984	mov.l		L_SCR1(%a6),%d6
9985	mov.w		%d6,Y(%a6)
9986	mov.l		%d4,Y_Hi(%a6)
9987	mov.l		%d5,Y_Lo(%a6)
9988	fmov.x		R(%a6),%fp0		# no exception
9989	mov.b		&1,Sc_Flag(%a6)
9990	bra.b		ModOrRem
9991No_Scale:
9992	mov.l		%d1,R_Hi(%a6)
9993	mov.l		%d2,R_Lo(%a6)
9994	sub.l		&0x3FFE,%d0
9995	mov.w		%d0,R(%a6)
9996	mov.l		L_SCR1(%a6),%d6
9997	sub.l		&0x3FFE,%d6
9998	mov.l		%d6,L_SCR1(%a6)
9999	fmov.x		R(%a6),%fp0
10000	mov.w		%d6,Y(%a6)
10001	mov.l		%d4,Y_Hi(%a6)
10002	mov.l		%d5,Y_Lo(%a6)
10003	clr.b		Sc_Flag(%a6)
10004
10005#
10006ModOrRem:
10007	tst.b		Mod_Flag(%a6)
10008	beq.b		Fix_Sign
10009
10010	mov.l		L_SCR1(%a6),%d6		# new biased expo(Y)
10011	subq.l		&1,%d6			# biased expo(Y/2)
10012	cmp.l		%d0,%d6
10013	blt.b		Fix_Sign
10014	bgt.b		Last_Sub
10015
10016	cmp.l		%d1,%d4
10017	bne.b		Not_EQ
10018	cmp.l		%d2,%d5
10019	bne.b		Not_EQ
10020	bra.w		Tie_Case
10021
10022Not_EQ:
10023	bcs.b		Fix_Sign
10024
10025Last_Sub:
10026#
10027	fsub.x		Y(%a6),%fp0		# no exceptions
10028	addq.l		&1,%d3			# Q := Q + 1
10029
10030#
10031Fix_Sign:
10032#..Get sign of X
10033	mov.w		SignX(%a6),%d6
10034	bge.b		Get_Q
10035	fneg.x		%fp0
10036
10037#..Get Q
10038#
10039Get_Q:
10040	clr.l		%d6
10041	mov.w		SignQ(%a6),%d6		# D6 is sign(Q)
10042	mov.l		&8,%d7
10043	lsr.l		%d7,%d6
10044	and.l		&0x0000007F,%d3		# 7 bits of Q
10045	or.l		%d6,%d3			# sign and bits of Q
10046#	swap		%d3
10047#	fmov.l		%fpsr,%d6
10048#	and.l		&0xFF00FFFF,%d6
10049#	or.l		%d3,%d6
10050#	fmov.l		%d6,%fpsr		# put Q in fpsr
10051	mov.b		%d3,FPSR_QBYTE(%a6)	# put Q in fpsr
10052
10053#
10054Restore:
10055	movm.l		(%sp)+,&0xfc		#  {%d2-%d7}
10056	mov.l		(%sp)+,%d0
10057	fmov.l		%d0,%fpcr
10058	tst.b		Sc_Flag(%a6)
10059	beq.b		Finish
10060	mov.b		&FMUL_OP,%d1		# last inst is MUL
10061	fmul.x		Scale(%pc),%fp0		# may cause underflow
10062	bra		t_catch2
10063# the '040 package did this apparently to see if the dst operand for the
10064# preceding fmul was a denorm. but, it better not have been since the
10065# algorithm just got done playing with fp0 and expected no exceptions
10066# as a result. trust me...
10067#	bra		t_avoid_unsupp		# check for denorm as a
10068#						;result of the scaling
10069
10070Finish:
10071	mov.b		&FMOV_OP,%d1		# last inst is MOVE
10072	fmov.x		%fp0,%fp0		# capture exceptions & round
10073	bra		t_catch2
10074
10075Rem_is_0:
10076#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
10077	addq.l		&1,%d3
10078	cmp.l		%d0,&8			# D0 is j
10079	bge.b		Q_Big
10080
10081	lsl.l		%d0,%d3
10082	bra.b		Set_R_0
10083
10084Q_Big:
10085	clr.l		%d3
10086
10087Set_R_0:
10088	fmov.s		&0x00000000,%fp0
10089	clr.b		Sc_Flag(%a6)
10090	bra.w		Fix_Sign
10091
10092Tie_Case:
10093#..Check parity of Q
10094	mov.l		%d3,%d6
10095	and.l		&0x00000001,%d6
10096	tst.l		%d6
10097	beq.w		Fix_Sign		# Q is even
10098
10099#..Q is odd, Q := Q + 1, signX := -signX
10100	addq.l		&1,%d3
10101	mov.w		SignX(%a6),%d6
10102	eor.l		&0x00008000,%d6
10103	mov.w		%d6,SignX(%a6)
10104	bra.w		Fix_Sign
10105
10106qnan:	long		0x7fff0000, 0xffffffff, 0xffffffff
10107
10108#########################################################################
10109# XDEF ****************************************************************	#
10110#	t_dz(): Handle DZ exception during transcendental emulation.	#
10111#	        Sets N bit according to sign of source operand.		#
10112#	t_dz2(): Handle DZ exception during transcendental emulation.	#
10113#		 Sets N bit always.					#
10114#									#
10115# XREF ****************************************************************	#
10116#	None								#
10117#									#
10118# INPUT ***************************************************************	#
10119#	a0 = pointer to source operand					#
10120#									#
10121# OUTPUT **************************************************************	#
10122#	fp0 = default result						#
10123#									#
10124# ALGORITHM ***********************************************************	#
10125#	- Store properly signed INF into fp0.				#
10126#	- Set FPSR exception status dz bit, ccode inf bit, and		#
10127#	  accrued dz bit.						#
10128#									#
10129#########################################################################
10130
10131	global		t_dz
10132t_dz:
10133	tst.b		SRC_EX(%a0)		# no; is src negative?
10134	bmi.b		t_dz2			# yes
10135
10136dz_pinf:
10137	fmov.s		&0x7f800000,%fp0	# return +INF in fp0
10138	ori.l		&dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
10139	rts
10140
10141	global		t_dz2
10142t_dz2:
10143	fmov.s		&0xff800000,%fp0	# return -INF in fp0
10144	ori.l		&dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
10145	rts
10146
10147#################################################################
10148# OPERR exception:						#
10149#	- set FPSR exception status operr bit, condition code	#
10150#	  nan bit; Store default NAN into fp0			#
10151#################################################################
10152	global		t_operr
10153t_operr:
10154	ori.l		&opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
10155	fmovm.x		qnan(%pc),&0x80		# return default NAN in fp0
10156	rts
10157
10158#################################################################
10159# Extended DENORM:						#
10160#	- For all functions that have a denormalized input and	#
10161#	  that f(x)=x, this is the entry point.			#
10162#	- we only return the EXOP here if either underflow or	#
10163#	  inexact is enabled.					#
10164#################################################################
10165
10166# Entry point for scale w/ extended denorm. The function does
10167# NOT set INEX2/AUNFL/AINEX.
10168	global		t_resdnrm
10169t_resdnrm:
10170	ori.l		&unfl_mask,USER_FPSR(%a6) # set UNFL
10171	bra.b		xdnrm_con
10172
10173	global		t_extdnrm
10174t_extdnrm:
10175	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10176
10177xdnrm_con:
10178	mov.l		%a0,%a1			# make copy of src ptr
10179	mov.l		%d0,%d1			# make copy of rnd prec,mode
10180	andi.b		&0xc0,%d1		# extended precision?
10181	bne.b		xdnrm_sd		# no
10182
10183# result precision is extended.
10184	tst.b		LOCAL_EX(%a0)		# is denorm negative?
10185	bpl.b		xdnrm_exit		# no
10186
10187	bset		&neg_bit,FPSR_CC(%a6)	# yes; set 'N' ccode bit
10188	bra.b		xdnrm_exit
10189
10190# result precision is single or double
10191xdnrm_sd:
10192	mov.l		%a1,-(%sp)
10193	tst.b		LOCAL_EX(%a0)		# is denorm pos or neg?
10194	smi.b		%d1			# set d0 accodingly
10195	bsr.l		unf_sub
10196	mov.l		(%sp)+,%a1
10197xdnrm_exit:
10198	fmovm.x		(%a0),&0x80		# return default result in fp0
10199
10200	mov.b		FPCR_ENABLE(%a6),%d0
10201	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
10202	bne.b		xdnrm_ena		# yes
10203	rts
10204
10205################
10206# unfl enabled #
10207################
10208# we have a DENORM that needs to be converted into an EXOP.
10209# so, normalize the mantissa, add 0x6000 to the new exponent,
10210# and return the result in fp1.
10211xdnrm_ena:
10212	mov.w		LOCAL_EX(%a1),FP_SCR0_EX(%a6)
10213	mov.l		LOCAL_HI(%a1),FP_SCR0_HI(%a6)
10214	mov.l		LOCAL_LO(%a1),FP_SCR0_LO(%a6)
10215
10216	lea		FP_SCR0(%a6),%a0
10217	bsr.l		norm			# normalize mantissa
10218	addi.l		&0x6000,%d0		# add extra bias
10219	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep old sign
10220	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
10221
10222	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10223	rts
10224
10225#################################################################
10226# UNFL exception:						#
10227#	- This routine is for cases where even an EXOP isn't	#
10228#	  large enough to hold the range of this result.	#
10229#	  In such a case, the EXOP equals zero.			#
10230#	- Return the default result to the proper precision	#
10231#	  with the sign of this result being the same as that	#
10232#	  of the src operand.					#
10233#	- t_unfl2() is provided to force the result sign to	#
10234#	  positive which is the desired result for fetox().	#
10235#################################################################
10236	global		t_unfl
10237t_unfl:
10238	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10239
10240	tst.b		(%a0)			# is result pos or neg?
10241	smi.b		%d1			# set d1 accordingly
10242	bsr.l		unf_sub			# calc default unfl result
10243	fmovm.x		(%a0),&0x80		# return default result in fp0
10244
10245	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10246	rts
10247
10248# t_unfl2 ALWAYS tells unf_sub to create a positive result
10249	global		t_unfl2
10250t_unfl2:
10251	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10252
10253	sf.b		%d1			# set d0 to represent positive
10254	bsr.l		unf_sub			# calc default unfl result
10255	fmovm.x		(%a0),&0x80		# return default result in fp0
10256
10257	fmov.s		&0x0000000,%fp1		# return EXOP in fp1
10258	rts
10259
10260#################################################################
10261# OVFL exception:						#
10262#	- This routine is for cases where even an EXOP isn't	#
10263#	  large enough to hold the range of this result.	#
10264#	- Return the default result to the proper precision	#
10265#	  with the sign of this result being the same as that	#
10266#	  of the src operand.					#
10267#	- t_ovfl2() is provided to force the result sign to	#
10268#	  positive which is the desired result for fcosh().	#
10269#	- t_ovfl_sc() is provided for scale() which only sets	#
10270#	  the inexact bits if the number is inexact for the	#
10271#	  precision indicated.					#
10272#################################################################
10273
10274	global		t_ovfl_sc
10275t_ovfl_sc:
10276	ori.l		&ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10277
10278	mov.b		%d0,%d1			# fetch rnd mode/prec
10279	andi.b		&0xc0,%d1		# extract rnd prec
10280	beq.b		ovfl_work		# prec is extended
10281
10282	tst.b		LOCAL_HI(%a0)		# is dst a DENORM?
10283	bmi.b		ovfl_sc_norm		# no
10284
10285# dst op is a DENORM. we have to normalize the mantissa to see if the
10286# result would be inexact for the given precision. make a copy of the
10287# dst so we don't screw up the version passed to us.
10288	mov.w		LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10289	mov.l		LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10290	mov.l		LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10291	lea		FP_SCR0(%a6),%a0	# pass ptr to FP_SCR0
10292	movm.l		&0xc080,-(%sp)		# save d0-d1/a0
10293	bsr.l		norm			# normalize mantissa
10294	movm.l		(%sp)+,&0x0103		# restore d0-d1/a0
10295
10296ovfl_sc_norm:
10297	cmpi.b		%d1,&0x40		# is prec dbl?
10298	bne.b		ovfl_sc_dbl		# no; sgl
10299ovfl_sc_sgl:
10300	tst.l		LOCAL_LO(%a0)		# is lo lw of sgl set?
10301	bne.b		ovfl_sc_inx		# yes
10302	tst.b		3+LOCAL_HI(%a0)		# is lo byte of hi lw set?
10303	bne.b		ovfl_sc_inx		# yes
10304	bra.b		ovfl_work		# don't set INEX2
10305ovfl_sc_dbl:
10306	mov.l		LOCAL_LO(%a0),%d1	# are any of lo 11 bits of
10307	andi.l		&0x7ff,%d1		# dbl mantissa set?
10308	beq.b		ovfl_work		# no; don't set INEX2
10309ovfl_sc_inx:
10310	ori.l		&inex2_mask,USER_FPSR(%a6) # set INEX2
10311	bra.b		ovfl_work		# continue
10312
10313	global		t_ovfl
10314t_ovfl:
10315	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10316
10317ovfl_work:
10318	tst.b		LOCAL_EX(%a0)		# what is the sign?
10319	smi.b		%d1			# set d1 accordingly
10320	bsr.l		ovf_res			# calc default ovfl result
10321	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
10322	fmovm.x		(%a0),&0x80		# return default result in fp0
10323
10324	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10325	rts
10326
10327# t_ovfl2 ALWAYS tells ovf_res to create a positive result
10328	global		t_ovfl2
10329t_ovfl2:
10330	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10331
10332	sf.b		%d1			# clear sign flag for positive
10333	bsr.l		ovf_res			# calc default ovfl result
10334	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
10335	fmovm.x		(%a0),&0x80		# return default result in fp0
10336
10337	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10338	rts
10339
10340#################################################################
10341# t_catch():							#
10342#	- the last operation of a transcendental emulation	#
10343#	  routine may have caused an underflow or overflow.	#
10344#	  we find out if this occurred by doing an fsave and	#
10345#	  checking the exception bit. if one did occur, then we	#
10346#	  jump to fgen_except() which creates the default	#
10347#	  result and EXOP for us.				#
10348#################################################################
10349	global		t_catch
10350t_catch:
10351
10352	fsave		-(%sp)
10353	tst.b		0x2(%sp)
10354	bmi.b		catch
10355	add.l		&0xc,%sp
10356
10357#################################################################
10358# INEX2 exception:						#
10359#	- The inex2 and ainex bits are set.			#
10360#################################################################
10361	global		t_inx2
10362t_inx2:
10363	fblt.w		t_minx2
10364	fbeq.w		inx2_zero
10365
10366	global		t_pinx2
10367t_pinx2:
10368	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10369	rts
10370
10371	global		t_minx2
10372t_minx2:
10373	ori.l		&inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
10374	rts
10375
10376inx2_zero:
10377	mov.b		&z_bmask,FPSR_CC(%a6)
10378	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10379	rts
10380
10381# an underflow or overflow exception occurred.
10382# we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
10383catch:
10384	ori.w		&inx2a_mask,FPSR_EXCEPT(%a6)
10385catch2:
10386	bsr.l		fgen_except
10387	add.l		&0xc,%sp
10388	rts
10389
10390	global		t_catch2
10391t_catch2:
10392
10393	fsave		-(%sp)
10394
10395	tst.b		0x2(%sp)
10396	bmi.b		catch2
10397	add.l		&0xc,%sp
10398
10399	fmov.l		%fpsr,%d0
10400	or.l		%d0,USER_FPSR(%a6)
10401
10402	rts
10403
10404#########################################################################
10405
10406#########################################################################
10407# unf_res(): underflow default result calculation for transcendentals	#
10408#									#
10409# INPUT:								#
10410#	d0   : rnd mode,precision					#
10411#	d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+))	#
10412# OUTPUT:								#
10413#	a0   : points to result (in instruction memory)			#
10414#########################################################################
10415unf_sub:
10416	ori.l		&unfinx_mask,USER_FPSR(%a6)
10417
10418	andi.w		&0x10,%d1		# keep sign bit in 4th spot
10419
10420	lsr.b		&0x4,%d0		# shift rnd prec,mode to lo bits
10421	andi.b		&0xf,%d0		# strip hi rnd mode bit
10422	or.b		%d1,%d0			# concat {sgn,mode,prec}
10423
10424	mov.l		%d0,%d1			# make a copy
10425	lsl.b		&0x1,%d1		# mult index 2 by 2
10426
10427	mov.b		(tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
10428	lea		(tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
10429	rts
10430
10431tbl_unf_cc:
10432	byte		0x4, 0x4, 0x4, 0x0
10433	byte		0x4, 0x4, 0x4, 0x0
10434	byte		0x4, 0x4, 0x4, 0x0
10435	byte		0x0, 0x0, 0x0, 0x0
10436	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10437	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10438	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10439
10440tbl_unf_result:
10441	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10442	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10443	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10444	long		0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10445
10446	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10447	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10448	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10449	long		0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10450
10451	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10452	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
10453	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10454	long		0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10455
10456	long		0x0,0x0,0x0,0x0
10457	long		0x0,0x0,0x0,0x0
10458	long		0x0,0x0,0x0,0x0
10459	long		0x0,0x0,0x0,0x0
10460
10461	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10462	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10463	long		0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10464	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10465
10466	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10467	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10468	long		0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10469	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10470
10471	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10472	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10473	long		0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10474	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10475
10476############################################################
10477
10478#########################################################################
10479# src_zero(): Return signed zero according to sign of src operand.	#
10480#########################################################################
10481	global		src_zero
10482src_zero:
10483	tst.b		SRC_EX(%a0)		# get sign of src operand
10484	bmi.b		ld_mzero		# if neg, load neg zero
10485
10486#
10487# ld_pzero(): return a positive zero.
10488#
10489	global		ld_pzero
10490ld_pzero:
10491	fmov.s		&0x00000000,%fp0	# load +0
10492	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10493	rts
10494
10495# ld_mzero(): return a negative zero.
10496	global		ld_mzero
10497ld_mzero:
10498	fmov.s		&0x80000000,%fp0	# load -0
10499	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10500	rts
10501
10502#########################################################################
10503# dst_zero(): Return signed zero according to sign of dst operand.	#
10504#########################################################################
10505	global		dst_zero
10506dst_zero:
10507	tst.b		DST_EX(%a1)		# get sign of dst operand
10508	bmi.b		ld_mzero		# if neg, load neg zero
10509	bra.b		ld_pzero		# load positive zero
10510
10511#########################################################################
10512# src_inf(): Return signed inf according to sign of src operand.	#
10513#########################################################################
10514	global		src_inf
10515src_inf:
10516	tst.b		SRC_EX(%a0)		# get sign of src operand
10517	bmi.b		ld_minf			# if negative branch
10518
10519#
10520# ld_pinf(): return a positive infinity.
10521#
10522	global		ld_pinf
10523ld_pinf:
10524	fmov.s		&0x7f800000,%fp0	# load +INF
10525	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'INF' ccode bit
10526	rts
10527
10528#
10529# ld_minf():return a negative infinity.
10530#
10531	global		ld_minf
10532ld_minf:
10533	fmov.s		&0xff800000,%fp0	# load -INF
10534	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10535	rts
10536
10537#########################################################################
10538# dst_inf(): Return signed inf according to sign of dst operand.	#
10539#########################################################################
10540	global		dst_inf
10541dst_inf:
10542	tst.b		DST_EX(%a1)		# get sign of dst operand
10543	bmi.b		ld_minf			# if negative branch
10544	bra.b		ld_pinf
10545
10546	global		szr_inf
10547#################################################################
10548# szr_inf(): Return +ZERO for a negative src operand or		#
10549#	            +INF for a positive src operand.		#
10550#	     Routine used for fetox, ftwotox, and ftentox.	#
10551#################################################################
10552szr_inf:
10553	tst.b		SRC_EX(%a0)		# check sign of source
10554	bmi.b		ld_pzero
10555	bra.b		ld_pinf
10556
10557#########################################################################
10558# sopr_inf(): Return +INF for a positive src operand or			#
10559#	      jump to operand error routine for a negative src operand.	#
10560#	      Routine used for flogn, flognp1, flog10, and flog2.	#
10561#########################################################################
10562	global		sopr_inf
10563sopr_inf:
10564	tst.b		SRC_EX(%a0)		# check sign of source
10565	bmi.w		t_operr
10566	bra.b		ld_pinf
10567
10568#################################################################
10569# setoxm1i(): Return minus one for a negative src operand or	#
10570#	      positive infinity for a positive src operand.	#
10571#	      Routine used for fetoxm1.				#
10572#################################################################
10573	global		setoxm1i
10574setoxm1i:
10575	tst.b		SRC_EX(%a0)		# check sign of source
10576	bmi.b		ld_mone
10577	bra.b		ld_pinf
10578
10579#########################################################################
10580# src_one(): Return signed one according to sign of src operand.	#
10581#########################################################################
10582	global		src_one
10583src_one:
10584	tst.b		SRC_EX(%a0)		# check sign of source
10585	bmi.b		ld_mone
10586
10587#
10588# ld_pone(): return positive one.
10589#
10590	global		ld_pone
10591ld_pone:
10592	fmov.s		&0x3f800000,%fp0	# load +1
10593	clr.b		FPSR_CC(%a6)
10594	rts
10595
10596#
10597# ld_mone(): return negative one.
10598#
10599	global		ld_mone
10600ld_mone:
10601	fmov.s		&0xbf800000,%fp0	# load -1
10602	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
10603	rts
10604
10605ppiby2:	long		0x3fff0000, 0xc90fdaa2, 0x2168c235
10606mpiby2:	long		0xbfff0000, 0xc90fdaa2, 0x2168c235
10607
10608#################################################################
10609# spi_2(): Return signed PI/2 according to sign of src operand.	#
10610#################################################################
10611	global		spi_2
10612spi_2:
10613	tst.b		SRC_EX(%a0)		# check sign of source
10614	bmi.b		ld_mpi2
10615
10616#
10617# ld_ppi2(): return positive PI/2.
10618#
10619	global		ld_ppi2
10620ld_ppi2:
10621	fmov.l		%d0,%fpcr
10622	fmov.x		ppiby2(%pc),%fp0	# load +pi/2
10623	bra.w		t_pinx2			# set INEX2
10624
10625#
10626# ld_mpi2(): return negative PI/2.
10627#
10628	global		ld_mpi2
10629ld_mpi2:
10630	fmov.l		%d0,%fpcr
10631	fmov.x		mpiby2(%pc),%fp0	# load -pi/2
10632	bra.w		t_minx2			# set INEX2
10633
10634####################################################
10635# The following routines give support for fsincos. #
10636####################################################
10637
10638#
10639# ssincosz(): When the src operand is ZERO, store a one in the
10640#	      cosine register and return a ZERO in fp0 w/ the same sign
10641#	      as the src operand.
10642#
10643	global		ssincosz
10644ssincosz:
10645	fmov.s		&0x3f800000,%fp1
10646	tst.b		SRC_EX(%a0)		# test sign
10647	bpl.b		sincoszp
10648	fmov.s		&0x80000000,%fp0	# return sin result in fp0
10649	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)
10650	bra.b		sto_cos			# store cosine result
10651sincoszp:
10652	fmov.s		&0x00000000,%fp0	# return sin result in fp0
10653	mov.b		&z_bmask,FPSR_CC(%a6)
10654	bra.b		sto_cos			# store cosine result
10655
10656#
10657# ssincosi(): When the src operand is INF, store a QNAN in the cosine
10658#	      register and jump to the operand error routine for negative
10659#	      src operands.
10660#
10661	global		ssincosi
10662ssincosi:
10663	fmov.x		qnan(%pc),%fp1		# load NAN
10664	bsr.l		sto_cos			# store cosine result
10665	bra.w		t_operr
10666
10667#
10668# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10669#		 register and branch to the src QNAN routine.
10670#
10671	global		ssincosqnan
10672ssincosqnan:
10673	fmov.x		LOCAL_EX(%a0),%fp1
10674	bsr.l		sto_cos
10675	bra.w		src_qnan
10676
10677#
10678# ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
10679#		 in the cosine register and branch to the src SNAN routine.
10680#
10681	global		ssincossnan
10682ssincossnan:
10683	fmov.x		LOCAL_EX(%a0),%fp1
10684	bsr.l		sto_cos
10685	bra.w		src_snan
10686
10687########################################################################
10688
10689#########################################################################
10690# sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field.	#
10691#	     fp1 holds the result of the cosine portion of ssincos().	#
10692#	     the value in fp1 will not take any exceptions when moved.	#
10693# INPUT:								#
10694#	fp1 : fp value to store						#
10695# MODIFIED:								#
10696#	d0								#
10697#########################################################################
10698	global		sto_cos
10699sto_cos:
10700	mov.b		1+EXC_CMDREG(%a6),%d0
10701	andi.w		&0x7,%d0
10702	mov.w		(tbl_sto_cos.b,%pc,%d0.w*2),%d0
10703	jmp		(tbl_sto_cos.b,%pc,%d0.w*1)
10704
10705tbl_sto_cos:
10706	short		sto_cos_0 - tbl_sto_cos
10707	short		sto_cos_1 - tbl_sto_cos
10708	short		sto_cos_2 - tbl_sto_cos
10709	short		sto_cos_3 - tbl_sto_cos
10710	short		sto_cos_4 - tbl_sto_cos
10711	short		sto_cos_5 - tbl_sto_cos
10712	short		sto_cos_6 - tbl_sto_cos
10713	short		sto_cos_7 - tbl_sto_cos
10714
10715sto_cos_0:
10716	fmovm.x		&0x40,EXC_FP0(%a6)
10717	rts
10718sto_cos_1:
10719	fmovm.x		&0x40,EXC_FP1(%a6)
10720	rts
10721sto_cos_2:
10722	fmov.x		%fp1,%fp2
10723	rts
10724sto_cos_3:
10725	fmov.x		%fp1,%fp3
10726	rts
10727sto_cos_4:
10728	fmov.x		%fp1,%fp4
10729	rts
10730sto_cos_5:
10731	fmov.x		%fp1,%fp5
10732	rts
10733sto_cos_6:
10734	fmov.x		%fp1,%fp6
10735	rts
10736sto_cos_7:
10737	fmov.x		%fp1,%fp7
10738	rts
10739
10740##################################################################
10741	global		smod_sdnrm
10742	global		smod_snorm
10743smod_sdnrm:
10744smod_snorm:
10745	mov.b		DTAG(%a6),%d1
10746	beq.l		smod
10747	cmpi.b		%d1,&ZERO
10748	beq.w		smod_zro
10749	cmpi.b		%d1,&INF
10750	beq.l		t_operr
10751	cmpi.b		%d1,&DENORM
10752	beq.l		smod
10753	cmpi.b		%d1,&SNAN
10754	beq.l		dst_snan
10755	bra.l		dst_qnan
10756
10757	global		smod_szero
10758smod_szero:
10759	mov.b		DTAG(%a6),%d1
10760	beq.l		t_operr
10761	cmpi.b		%d1,&ZERO
10762	beq.l		t_operr
10763	cmpi.b		%d1,&INF
10764	beq.l		t_operr
10765	cmpi.b		%d1,&DENORM
10766	beq.l		t_operr
10767	cmpi.b		%d1,&QNAN
10768	beq.l		dst_qnan
10769	bra.l		dst_snan
10770
10771	global		smod_sinf
10772smod_sinf:
10773	mov.b		DTAG(%a6),%d1
10774	beq.l		smod_fpn
10775	cmpi.b		%d1,&ZERO
10776	beq.l		smod_zro
10777	cmpi.b		%d1,&INF
10778	beq.l		t_operr
10779	cmpi.b		%d1,&DENORM
10780	beq.l		smod_fpn
10781	cmpi.b		%d1,&QNAN
10782	beq.l		dst_qnan
10783	bra.l		dst_snan
10784
10785smod_zro:
10786srem_zro:
10787	mov.b		SRC_EX(%a0),%d1		# get src sign
10788	mov.b		DST_EX(%a1),%d0		# get dst sign
10789	eor.b		%d0,%d1			# get qbyte sign
10790	andi.b		&0x80,%d1
10791	mov.b		%d1,FPSR_QBYTE(%a6)
10792	tst.b		%d0
10793	bpl.w		ld_pzero
10794	bra.w		ld_mzero
10795
10796smod_fpn:
10797srem_fpn:
10798	clr.b		FPSR_QBYTE(%a6)
10799	mov.l		%d0,-(%sp)
10800	mov.b		SRC_EX(%a0),%d1		# get src sign
10801	mov.b		DST_EX(%a1),%d0		# get dst sign
10802	eor.b		%d0,%d1			# get qbyte sign
10803	andi.b		&0x80,%d1
10804	mov.b		%d1,FPSR_QBYTE(%a6)
10805	cmpi.b		DTAG(%a6),&DENORM
10806	bne.b		smod_nrm
10807	lea		DST(%a1),%a0
10808	mov.l		(%sp)+,%d0
10809	bra		t_resdnrm
10810smod_nrm:
10811	fmov.l		(%sp)+,%fpcr
10812	fmov.x		DST(%a1),%fp0
10813	tst.b		DST_EX(%a1)
10814	bmi.b		smod_nrm_neg
10815	rts
10816
10817smod_nrm_neg:
10818	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode
10819	rts
10820
10821#########################################################################
10822	global		srem_snorm
10823	global		srem_sdnrm
10824srem_sdnrm:
10825srem_snorm:
10826	mov.b		DTAG(%a6),%d1
10827	beq.l		srem
10828	cmpi.b		%d1,&ZERO
10829	beq.w		srem_zro
10830	cmpi.b		%d1,&INF
10831	beq.l		t_operr
10832	cmpi.b		%d1,&DENORM
10833	beq.l		srem
10834	cmpi.b		%d1,&QNAN
10835	beq.l		dst_qnan
10836	bra.l		dst_snan
10837
10838	global		srem_szero
10839srem_szero:
10840	mov.b		DTAG(%a6),%d1
10841	beq.l		t_operr
10842	cmpi.b		%d1,&ZERO
10843	beq.l		t_operr
10844	cmpi.b		%d1,&INF
10845	beq.l		t_operr
10846	cmpi.b		%d1,&DENORM
10847	beq.l		t_operr
10848	cmpi.b		%d1,&QNAN
10849	beq.l		dst_qnan
10850	bra.l		dst_snan
10851
10852	global		srem_sinf
10853srem_sinf:
10854	mov.b		DTAG(%a6),%d1
10855	beq.w		srem_fpn
10856	cmpi.b		%d1,&ZERO
10857	beq.w		srem_zro
10858	cmpi.b		%d1,&INF
10859	beq.l		t_operr
10860	cmpi.b		%d1,&DENORM
10861	beq.l		srem_fpn
10862	cmpi.b		%d1,&QNAN
10863	beq.l		dst_qnan
10864	bra.l		dst_snan
10865
10866#########################################################################
10867	global		sscale_snorm
10868	global		sscale_sdnrm
10869sscale_snorm:
10870sscale_sdnrm:
10871	mov.b		DTAG(%a6),%d1
10872	beq.l		sscale
10873	cmpi.b		%d1,&ZERO
10874	beq.l		dst_zero
10875	cmpi.b		%d1,&INF
10876	beq.l		dst_inf
10877	cmpi.b		%d1,&DENORM
10878	beq.l		sscale
10879	cmpi.b		%d1,&QNAN
10880	beq.l		dst_qnan
10881	bra.l		dst_snan
10882
10883	global		sscale_szero
10884sscale_szero:
10885	mov.b		DTAG(%a6),%d1
10886	beq.l		sscale
10887	cmpi.b		%d1,&ZERO
10888	beq.l		dst_zero
10889	cmpi.b		%d1,&INF
10890	beq.l		dst_inf
10891	cmpi.b		%d1,&DENORM
10892	beq.l		sscale
10893	cmpi.b		%d1,&QNAN
10894	beq.l		dst_qnan
10895	bra.l		dst_snan
10896
10897	global		sscale_sinf
10898sscale_sinf:
10899	mov.b		DTAG(%a6),%d1
10900	beq.l		t_operr
10901	cmpi.b		%d1,&QNAN
10902	beq.l		dst_qnan
10903	cmpi.b		%d1,&SNAN
10904	beq.l		dst_snan
10905	bra.l		t_operr
10906
10907########################################################################
10908
10909#
10910# sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
10911#
10912	global		sop_sqnan
10913sop_sqnan:
10914	mov.b		DTAG(%a6),%d1
10915	cmpi.b		%d1,&QNAN
10916	beq.b		dst_qnan
10917	cmpi.b		%d1,&SNAN
10918	beq.b		dst_snan
10919	bra.b		src_qnan
10920
10921#
10922# sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
10923#
10924	global		sop_ssnan
10925sop_ssnan:
10926	mov.b		DTAG(%a6),%d1
10927	cmpi.b		%d1,&QNAN
10928	beq.b		dst_qnan_src_snan
10929	cmpi.b		%d1,&SNAN
10930	beq.b		dst_snan
10931	bra.b		src_snan
10932
10933dst_qnan_src_snan:
10934	ori.l		&snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
10935	bra.b		dst_qnan
10936
10937#
10938# dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
10939#
10940	global		dst_snan
10941dst_snan:
10942	fmov.x		DST(%a1),%fp0		# the fmove sets the SNAN bit
10943	fmov.l		%fpsr,%d0		# catch resulting status
10944	or.l		%d0,USER_FPSR(%a6)	# store status
10945	rts
10946
10947#
10948# dst_qnan(): Return the dst QNAN.
10949#
10950	global		dst_qnan
10951dst_qnan:
10952	fmov.x		DST(%a1),%fp0		# return the non-signalling nan
10953	tst.b		DST_EX(%a1)		# set ccodes according to QNAN sign
10954	bmi.b		dst_qnan_m
10955dst_qnan_p:
10956	mov.b		&nan_bmask,FPSR_CC(%a6)
10957	rts
10958dst_qnan_m:
10959	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
10960	rts
10961
10962#
10963# src_snan(): Return the src SNAN w/ the SNAN bit set.
10964#
10965	global		src_snan
10966src_snan:
10967	fmov.x		SRC(%a0),%fp0		# the fmove sets the SNAN bit
10968	fmov.l		%fpsr,%d0		# catch resulting status
10969	or.l		%d0,USER_FPSR(%a6)	# store status
10970	rts
10971
10972#
10973# src_qnan(): Return the src QNAN.
10974#
10975	global		src_qnan
10976src_qnan:
10977	fmov.x		SRC(%a0),%fp0		# return the non-signalling nan
10978	tst.b		SRC_EX(%a0)		# set ccodes according to QNAN sign
10979	bmi.b		dst_qnan_m
10980src_qnan_p:
10981	mov.b		&nan_bmask,FPSR_CC(%a6)
10982	rts
10983src_qnan_m:
10984	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
10985	rts
10986
10987#
10988# fkern2.s:
10989#	These entry points are used by the exception handler
10990# routines where an instruction is selected by an index into
10991# a large jump table corresponding to a given instruction which
10992# has been decoded. Flow continues here where we now decode
10993# further accoding to the source operand type.
10994#
10995
10996	global		fsinh
10997fsinh:
10998	mov.b		STAG(%a6),%d1
10999	beq.l		ssinh
11000	cmpi.b		%d1,&ZERO
11001	beq.l		src_zero
11002	cmpi.b		%d1,&INF
11003	beq.l		src_inf
11004	cmpi.b		%d1,&DENORM
11005	beq.l		ssinhd
11006	cmpi.b		%d1,&QNAN
11007	beq.l		src_qnan
11008	bra.l		src_snan
11009
11010	global		flognp1
11011flognp1:
11012	mov.b		STAG(%a6),%d1
11013	beq.l		slognp1
11014	cmpi.b		%d1,&ZERO
11015	beq.l		src_zero
11016	cmpi.b		%d1,&INF
11017	beq.l		sopr_inf
11018	cmpi.b		%d1,&DENORM
11019	beq.l		slognp1d
11020	cmpi.b		%d1,&QNAN
11021	beq.l		src_qnan
11022	bra.l		src_snan
11023
11024	global		fetoxm1
11025fetoxm1:
11026	mov.b		STAG(%a6),%d1
11027	beq.l		setoxm1
11028	cmpi.b		%d1,&ZERO
11029	beq.l		src_zero
11030	cmpi.b		%d1,&INF
11031	beq.l		setoxm1i
11032	cmpi.b		%d1,&DENORM
11033	beq.l		setoxm1d
11034	cmpi.b		%d1,&QNAN
11035	beq.l		src_qnan
11036	bra.l		src_snan
11037
11038	global		ftanh
11039ftanh:
11040	mov.b		STAG(%a6),%d1
11041	beq.l		stanh
11042	cmpi.b		%d1,&ZERO
11043	beq.l		src_zero
11044	cmpi.b		%d1,&INF
11045	beq.l		src_one
11046	cmpi.b		%d1,&DENORM
11047	beq.l		stanhd
11048	cmpi.b		%d1,&QNAN
11049	beq.l		src_qnan
11050	bra.l		src_snan
11051
11052	global		fatan
11053fatan:
11054	mov.b		STAG(%a6),%d1
11055	beq.l		satan
11056	cmpi.b		%d1,&ZERO
11057	beq.l		src_zero
11058	cmpi.b		%d1,&INF
11059	beq.l		spi_2
11060	cmpi.b		%d1,&DENORM
11061	beq.l		satand
11062	cmpi.b		%d1,&QNAN
11063	beq.l		src_qnan
11064	bra.l		src_snan
11065
11066	global		fasin
11067fasin:
11068	mov.b		STAG(%a6),%d1
11069	beq.l		sasin
11070	cmpi.b		%d1,&ZERO
11071	beq.l		src_zero
11072	cmpi.b		%d1,&INF
11073	beq.l		t_operr
11074	cmpi.b		%d1,&DENORM
11075	beq.l		sasind
11076	cmpi.b		%d1,&QNAN
11077	beq.l		src_qnan
11078	bra.l		src_snan
11079
11080	global		fatanh
11081fatanh:
11082	mov.b		STAG(%a6),%d1
11083	beq.l		satanh
11084	cmpi.b		%d1,&ZERO
11085	beq.l		src_zero
11086	cmpi.b		%d1,&INF
11087	beq.l		t_operr
11088	cmpi.b		%d1,&DENORM
11089	beq.l		satanhd
11090	cmpi.b		%d1,&QNAN
11091	beq.l		src_qnan
11092	bra.l		src_snan
11093
11094	global		fsine
11095fsine:
11096	mov.b		STAG(%a6),%d1
11097	beq.l		ssin
11098	cmpi.b		%d1,&ZERO
11099	beq.l		src_zero
11100	cmpi.b		%d1,&INF
11101	beq.l		t_operr
11102	cmpi.b		%d1,&DENORM
11103	beq.l		ssind
11104	cmpi.b		%d1,&QNAN
11105	beq.l		src_qnan
11106	bra.l		src_snan
11107
11108	global		ftan
11109ftan:
11110	mov.b		STAG(%a6),%d1
11111	beq.l		stan
11112	cmpi.b		%d1,&ZERO
11113	beq.l		src_zero
11114	cmpi.b		%d1,&INF
11115	beq.l		t_operr
11116	cmpi.b		%d1,&DENORM
11117	beq.l		stand
11118	cmpi.b		%d1,&QNAN
11119	beq.l		src_qnan
11120	bra.l		src_snan
11121
11122	global		fetox
11123fetox:
11124	mov.b		STAG(%a6),%d1
11125	beq.l		setox
11126	cmpi.b		%d1,&ZERO
11127	beq.l		ld_pone
11128	cmpi.b		%d1,&INF
11129	beq.l		szr_inf
11130	cmpi.b		%d1,&DENORM
11131	beq.l		setoxd
11132	cmpi.b		%d1,&QNAN
11133	beq.l		src_qnan
11134	bra.l		src_snan
11135
11136	global		ftwotox
11137ftwotox:
11138	mov.b		STAG(%a6),%d1
11139	beq.l		stwotox
11140	cmpi.b		%d1,&ZERO
11141	beq.l		ld_pone
11142	cmpi.b		%d1,&INF
11143	beq.l		szr_inf
11144	cmpi.b		%d1,&DENORM
11145	beq.l		stwotoxd
11146	cmpi.b		%d1,&QNAN
11147	beq.l		src_qnan
11148	bra.l		src_snan
11149
11150	global		ftentox
11151ftentox:
11152	mov.b		STAG(%a6),%d1
11153	beq.l		stentox
11154	cmpi.b		%d1,&ZERO
11155	beq.l		ld_pone
11156	cmpi.b		%d1,&INF
11157	beq.l		szr_inf
11158	cmpi.b		%d1,&DENORM
11159	beq.l		stentoxd
11160	cmpi.b		%d1,&QNAN
11161	beq.l		src_qnan
11162	bra.l		src_snan
11163
11164	global		flogn
11165flogn:
11166	mov.b		STAG(%a6),%d1
11167	beq.l		slogn
11168	cmpi.b		%d1,&ZERO
11169	beq.l		t_dz2
11170	cmpi.b		%d1,&INF
11171	beq.l		sopr_inf
11172	cmpi.b		%d1,&DENORM
11173	beq.l		slognd
11174	cmpi.b		%d1,&QNAN
11175	beq.l		src_qnan
11176	bra.l		src_snan
11177
11178	global		flog10
11179flog10:
11180	mov.b		STAG(%a6),%d1
11181	beq.l		slog10
11182	cmpi.b		%d1,&ZERO
11183	beq.l		t_dz2
11184	cmpi.b		%d1,&INF
11185	beq.l		sopr_inf
11186	cmpi.b		%d1,&DENORM
11187	beq.l		slog10d
11188	cmpi.b		%d1,&QNAN
11189	beq.l		src_qnan
11190	bra.l		src_snan
11191
11192	global		flog2
11193flog2:
11194	mov.b		STAG(%a6),%d1
11195	beq.l		slog2
11196	cmpi.b		%d1,&ZERO
11197	beq.l		t_dz2
11198	cmpi.b		%d1,&INF
11199	beq.l		sopr_inf
11200	cmpi.b		%d1,&DENORM
11201	beq.l		slog2d
11202	cmpi.b		%d1,&QNAN
11203	beq.l		src_qnan
11204	bra.l		src_snan
11205
11206	global		fcosh
11207fcosh:
11208	mov.b		STAG(%a6),%d1
11209	beq.l		scosh
11210	cmpi.b		%d1,&ZERO
11211	beq.l		ld_pone
11212	cmpi.b		%d1,&INF
11213	beq.l		ld_pinf
11214	cmpi.b		%d1,&DENORM
11215	beq.l		scoshd
11216	cmpi.b		%d1,&QNAN
11217	beq.l		src_qnan
11218	bra.l		src_snan
11219
11220	global		facos
11221facos:
11222	mov.b		STAG(%a6),%d1
11223	beq.l		sacos
11224	cmpi.b		%d1,&ZERO
11225	beq.l		ld_ppi2
11226	cmpi.b		%d1,&INF
11227	beq.l		t_operr
11228	cmpi.b		%d1,&DENORM
11229	beq.l		sacosd
11230	cmpi.b		%d1,&QNAN
11231	beq.l		src_qnan
11232	bra.l		src_snan
11233
11234	global		fcos
11235fcos:
11236	mov.b		STAG(%a6),%d1
11237	beq.l		scos
11238	cmpi.b		%d1,&ZERO
11239	beq.l		ld_pone
11240	cmpi.b		%d1,&INF
11241	beq.l		t_operr
11242	cmpi.b		%d1,&DENORM
11243	beq.l		scosd
11244	cmpi.b		%d1,&QNAN
11245	beq.l		src_qnan
11246	bra.l		src_snan
11247
11248	global		fgetexp
11249fgetexp:
11250	mov.b		STAG(%a6),%d1
11251	beq.l		sgetexp
11252	cmpi.b		%d1,&ZERO
11253	beq.l		src_zero
11254	cmpi.b		%d1,&INF
11255	beq.l		t_operr
11256	cmpi.b		%d1,&DENORM
11257	beq.l		sgetexpd
11258	cmpi.b		%d1,&QNAN
11259	beq.l		src_qnan
11260	bra.l		src_snan
11261
11262	global		fgetman
11263fgetman:
11264	mov.b		STAG(%a6),%d1
11265	beq.l		sgetman
11266	cmpi.b		%d1,&ZERO
11267	beq.l		src_zero
11268	cmpi.b		%d1,&INF
11269	beq.l		t_operr
11270	cmpi.b		%d1,&DENORM
11271	beq.l		sgetmand
11272	cmpi.b		%d1,&QNAN
11273	beq.l		src_qnan
11274	bra.l		src_snan
11275
11276	global		fsincos
11277fsincos:
11278	mov.b		STAG(%a6),%d1
11279	beq.l		ssincos
11280	cmpi.b		%d1,&ZERO
11281	beq.l		ssincosz
11282	cmpi.b		%d1,&INF
11283	beq.l		ssincosi
11284	cmpi.b		%d1,&DENORM
11285	beq.l		ssincosd
11286	cmpi.b		%d1,&QNAN
11287	beq.l		ssincosqnan
11288	bra.l		ssincossnan
11289
11290	global		fmod
11291fmod:
11292	mov.b		STAG(%a6),%d1
11293	beq.l		smod_snorm
11294	cmpi.b		%d1,&ZERO
11295	beq.l		smod_szero
11296	cmpi.b		%d1,&INF
11297	beq.l		smod_sinf
11298	cmpi.b		%d1,&DENORM
11299	beq.l		smod_sdnrm
11300	cmpi.b		%d1,&QNAN
11301	beq.l		sop_sqnan
11302	bra.l		sop_ssnan
11303
11304	global		frem
11305frem:
11306	mov.b		STAG(%a6),%d1
11307	beq.l		srem_snorm
11308	cmpi.b		%d1,&ZERO
11309	beq.l		srem_szero
11310	cmpi.b		%d1,&INF
11311	beq.l		srem_sinf
11312	cmpi.b		%d1,&DENORM
11313	beq.l		srem_sdnrm
11314	cmpi.b		%d1,&QNAN
11315	beq.l		sop_sqnan
11316	bra.l		sop_ssnan
11317
11318	global		fscale
11319fscale:
11320	mov.b		STAG(%a6),%d1
11321	beq.l		sscale_snorm
11322	cmpi.b		%d1,&ZERO
11323	beq.l		sscale_szero
11324	cmpi.b		%d1,&INF
11325	beq.l		sscale_sinf
11326	cmpi.b		%d1,&DENORM
11327	beq.l		sscale_sdnrm
11328	cmpi.b		%d1,&QNAN
11329	beq.l		sop_sqnan
11330	bra.l		sop_ssnan
11331
11332#########################################################################
11333# XDEF ****************************************************************	#
11334#	fgen_except(): catch an exception during transcendental		#
11335#		       emulation					#
11336#									#
11337# XREF ****************************************************************	#
11338#	fmul() - emulate a multiply instruction				#
11339#	fadd() - emulate an add instruction				#
11340#	fin() - emulate an fmove instruction				#
11341#									#
11342# INPUT ***************************************************************	#
11343#	fp0 = destination operand					#
11344#	d0  = type of instruction that took exception			#
11345#	fsave frame = source operand					#
11346#									#
11347# OUTPUT **************************************************************	#
11348#	fp0 = result							#
11349#	fp1 = EXOP							#
11350#									#
11351# ALGORITHM ***********************************************************	#
11352#	An exception occurred on the last instruction of the		#
11353# transcendental emulation. hopefully, this won't be happening much	#
11354# because it will be VERY slow.						#
11355#	The only exceptions capable of passing through here are		#
11356# Overflow, Underflow, and Unsupported Data Type.			#
11357#									#
11358#########################################################################
11359
11360	global		fgen_except
11361fgen_except:
11362	cmpi.b		0x3(%sp),&0x7		# is exception UNSUPP?
11363	beq.b		fge_unsupp		# yes
11364
11365	mov.b		&NORM,STAG(%a6)
11366
11367fge_cont:
11368	mov.b		&NORM,DTAG(%a6)
11369
11370# ok, I have a problem with putting the dst op at FP_DST. the emulation
11371# routines aren't supposed to alter the operands but we've just squashed
11372# FP_DST here...
11373
11374# 8/17/93 - this turns out to be more of a "cleanliness" standpoint
11375# then a potential bug. to begin with, only the dyadic functions
11376# frem,fmod, and fscale would get the dst trashed here. But, for
11377# the 060SP, the FP_DST is never used again anyways.
11378	fmovm.x		&0x80,FP_DST(%a6)	# dst op is in fp0
11379
11380	lea		0x4(%sp),%a0		# pass: ptr to src op
11381	lea		FP_DST(%a6),%a1		# pass: ptr to dst op
11382
11383	cmpi.b		%d1,&FMOV_OP
11384	beq.b		fge_fin			# it was an "fmov"
11385	cmpi.b		%d1,&FADD_OP
11386	beq.b		fge_fadd		# it was an "fadd"
11387fge_fmul:
11388	bsr.l		fmul
11389	rts
11390fge_fadd:
11391	bsr.l		fadd
11392	rts
11393fge_fin:
11394	bsr.l		fin
11395	rts
11396
11397fge_unsupp:
11398	mov.b		&DENORM,STAG(%a6)
11399	bra.b		fge_cont
11400
11401#
11402# This table holds the offsets of the emulation routines for each individual
11403# math operation relative to the address of this table. Included are
11404# routines like fadd/fmul/fabs as well as the transcendentals.
11405# The location within the table is determined by the extension bits of the
11406# operation longword.
11407#
11408
11409	swbeg		&109
11410tbl_unsupp:
11411	long		fin		- tbl_unsupp	# 00: fmove
11412	long		fint		- tbl_unsupp	# 01: fint
11413	long		fsinh		- tbl_unsupp	# 02: fsinh
11414	long		fintrz		- tbl_unsupp	# 03: fintrz
11415	long		fsqrt		- tbl_unsupp	# 04: fsqrt
11416	long		tbl_unsupp	- tbl_unsupp
11417	long		flognp1		- tbl_unsupp	# 06: flognp1
11418	long		tbl_unsupp	- tbl_unsupp
11419	long		fetoxm1		- tbl_unsupp	# 08: fetoxm1
11420	long		ftanh		- tbl_unsupp	# 09: ftanh
11421	long		fatan		- tbl_unsupp	# 0a: fatan
11422	long		tbl_unsupp	- tbl_unsupp
11423	long		fasin		- tbl_unsupp	# 0c: fasin
11424	long		fatanh		- tbl_unsupp	# 0d: fatanh
11425	long		fsine		- tbl_unsupp	# 0e: fsin
11426	long		ftan		- tbl_unsupp	# 0f: ftan
11427	long		fetox		- tbl_unsupp	# 10: fetox
11428	long		ftwotox		- tbl_unsupp	# 11: ftwotox
11429	long		ftentox		- tbl_unsupp	# 12: ftentox
11430	long		tbl_unsupp	- tbl_unsupp
11431	long		flogn		- tbl_unsupp	# 14: flogn
11432	long		flog10		- tbl_unsupp	# 15: flog10
11433	long		flog2		- tbl_unsupp	# 16: flog2
11434	long		tbl_unsupp	- tbl_unsupp
11435	long		fabs		- tbl_unsupp	# 18: fabs
11436	long		fcosh		- tbl_unsupp	# 19: fcosh
11437	long		fneg		- tbl_unsupp	# 1a: fneg
11438	long		tbl_unsupp	- tbl_unsupp
11439	long		facos		- tbl_unsupp	# 1c: facos
11440	long		fcos		- tbl_unsupp	# 1d: fcos
11441	long		fgetexp		- tbl_unsupp	# 1e: fgetexp
11442	long		fgetman		- tbl_unsupp	# 1f: fgetman
11443	long		fdiv		- tbl_unsupp	# 20: fdiv
11444	long		fmod		- tbl_unsupp	# 21: fmod
11445	long		fadd		- tbl_unsupp	# 22: fadd
11446	long		fmul		- tbl_unsupp	# 23: fmul
11447	long		fsgldiv		- tbl_unsupp	# 24: fsgldiv
11448	long		frem		- tbl_unsupp	# 25: frem
11449	long		fscale		- tbl_unsupp	# 26: fscale
11450	long		fsglmul		- tbl_unsupp	# 27: fsglmul
11451	long		fsub		- tbl_unsupp	# 28: fsub
11452	long		tbl_unsupp	- tbl_unsupp
11453	long		tbl_unsupp	- tbl_unsupp
11454	long		tbl_unsupp	- tbl_unsupp
11455	long		tbl_unsupp	- tbl_unsupp
11456	long		tbl_unsupp	- tbl_unsupp
11457	long		tbl_unsupp	- tbl_unsupp
11458	long		tbl_unsupp	- tbl_unsupp
11459	long		fsincos		- tbl_unsupp	# 30: fsincos
11460	long		fsincos		- tbl_unsupp	# 31: fsincos
11461	long		fsincos		- tbl_unsupp	# 32: fsincos
11462	long		fsincos		- tbl_unsupp	# 33: fsincos
11463	long		fsincos		- tbl_unsupp	# 34: fsincos
11464	long		fsincos		- tbl_unsupp	# 35: fsincos
11465	long		fsincos		- tbl_unsupp	# 36: fsincos
11466	long		fsincos		- tbl_unsupp	# 37: fsincos
11467	long		fcmp		- tbl_unsupp	# 38: fcmp
11468	long		tbl_unsupp	- tbl_unsupp
11469	long		ftst		- tbl_unsupp	# 3a: ftst
11470	long		tbl_unsupp	- tbl_unsupp
11471	long		tbl_unsupp	- tbl_unsupp
11472	long		tbl_unsupp	- tbl_unsupp
11473	long		tbl_unsupp	- tbl_unsupp
11474	long		tbl_unsupp	- tbl_unsupp
11475	long		fsin		- tbl_unsupp	# 40: fsmove
11476	long		fssqrt		- tbl_unsupp	# 41: fssqrt
11477	long		tbl_unsupp	- tbl_unsupp
11478	long		tbl_unsupp	- tbl_unsupp
11479	long		fdin		- tbl_unsupp	# 44: fdmove
11480	long		fdsqrt		- tbl_unsupp	# 45: fdsqrt
11481	long		tbl_unsupp	- tbl_unsupp
11482	long		tbl_unsupp	- tbl_unsupp
11483	long		tbl_unsupp	- tbl_unsupp
11484	long		tbl_unsupp	- tbl_unsupp
11485	long		tbl_unsupp	- tbl_unsupp
11486	long		tbl_unsupp	- tbl_unsupp
11487	long		tbl_unsupp	- tbl_unsupp
11488	long		tbl_unsupp	- tbl_unsupp
11489	long		tbl_unsupp	- tbl_unsupp
11490	long		tbl_unsupp	- tbl_unsupp
11491	long		tbl_unsupp	- tbl_unsupp
11492	long		tbl_unsupp	- tbl_unsupp
11493	long		tbl_unsupp	- tbl_unsupp
11494	long		tbl_unsupp	- tbl_unsupp
11495	long		tbl_unsupp	- tbl_unsupp
11496	long		tbl_unsupp	- tbl_unsupp
11497	long		tbl_unsupp	- tbl_unsupp
11498	long		tbl_unsupp	- tbl_unsupp
11499	long		fsabs		- tbl_unsupp	# 58: fsabs
11500	long		tbl_unsupp	- tbl_unsupp
11501	long		fsneg		- tbl_unsupp	# 5a: fsneg
11502	long		tbl_unsupp	- tbl_unsupp
11503	long		fdabs		- tbl_unsupp	# 5c: fdabs
11504	long		tbl_unsupp	- tbl_unsupp
11505	long		fdneg		- tbl_unsupp	# 5e: fdneg
11506	long		tbl_unsupp	- tbl_unsupp
11507	long		fsdiv		- tbl_unsupp	# 60: fsdiv
11508	long		tbl_unsupp	- tbl_unsupp
11509	long		fsadd		- tbl_unsupp	# 62: fsadd
11510	long		fsmul		- tbl_unsupp	# 63: fsmul
11511	long		fddiv		- tbl_unsupp	# 64: fddiv
11512	long		tbl_unsupp	- tbl_unsupp
11513	long		fdadd		- tbl_unsupp	# 66: fdadd
11514	long		fdmul		- tbl_unsupp	# 67: fdmul
11515	long		fssub		- tbl_unsupp	# 68: fssub
11516	long		tbl_unsupp	- tbl_unsupp
11517	long		tbl_unsupp	- tbl_unsupp
11518	long		tbl_unsupp	- tbl_unsupp
11519	long		fdsub		- tbl_unsupp	# 6c: fdsub
11520
11521#########################################################################
11522# XDEF ****************************************************************	#
11523#	fmul(): emulates the fmul instruction				#
11524#	fsmul(): emulates the fsmul instruction				#
11525#	fdmul(): emulates the fdmul instruction				#
11526#									#
11527# XREF ****************************************************************	#
11528#	scale_to_zero_src() - scale src exponent to zero		#
11529#	scale_to_zero_dst() - scale dst exponent to zero		#
11530#	unf_res() - return default underflow result			#
11531#	ovf_res() - return default overflow result			#
11532#	res_qnan() - return QNAN result					#
11533#	res_snan() - return SNAN result					#
11534#									#
11535# INPUT ***************************************************************	#
11536#	a0 = pointer to extended precision source operand		#
11537#	a1 = pointer to extended precision destination operand		#
11538#	d0  rnd prec,mode						#
11539#									#
11540# OUTPUT **************************************************************	#
11541#	fp0 = result							#
11542#	fp1 = EXOP (if exception occurred)				#
11543#									#
11544# ALGORITHM ***********************************************************	#
11545#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11546# norms/denorms into ext/sgl/dbl precision.				#
11547#	For norms/denorms, scale the exponents such that a multiply	#
11548# instruction won't cause an exception. Use the regular fmul to		#
11549# compute a result. Check if the regular operands would have taken	#
11550# an exception. If so, return the default overflow/underflow result	#
11551# and return the EXOP if exceptions are enabled. Else, scale the	#
11552# result operand to the proper exponent.				#
11553#									#
11554#########################################################################
11555
11556	align		0x10
11557tbl_fmul_ovfl:
11558	long		0x3fff - 0x7ffe		# ext_max
11559	long		0x3fff - 0x407e		# sgl_max
11560	long		0x3fff - 0x43fe		# dbl_max
11561tbl_fmul_unfl:
11562	long		0x3fff + 0x0001		# ext_unfl
11563	long		0x3fff - 0x3f80		# sgl_unfl
11564	long		0x3fff - 0x3c00		# dbl_unfl
11565
11566	global		fsmul
11567fsmul:
11568	andi.b		&0x30,%d0		# clear rnd prec
11569	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11570	bra.b		fmul
11571
11572	global		fdmul
11573fdmul:
11574	andi.b		&0x30,%d0
11575	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11576
11577	global		fmul
11578fmul:
11579	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11580
11581	clr.w		%d1
11582	mov.b		DTAG(%a6),%d1
11583	lsl.b		&0x3,%d1
11584	or.b		STAG(%a6),%d1		# combine src tags
11585	bne.w		fmul_not_norm		# optimize on non-norm input
11586
11587fmul_norm:
11588	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11589	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11590	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11591
11592	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11593	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11594	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11595
11596	bsr.l		scale_to_zero_src	# scale src exponent
11597	mov.l		%d0,-(%sp)		# save scale factor 1
11598
11599	bsr.l		scale_to_zero_dst	# scale dst exponent
11600
11601	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
11602
11603	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
11604	lsr.b		&0x6,%d1		# shift to lo bits
11605	mov.l		(%sp)+,%d0		# load S.F.
11606	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
11607	beq.w		fmul_may_ovfl		# result may rnd to overflow
11608	blt.w		fmul_ovfl		# result will overflow
11609
11610	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
11611	beq.w		fmul_may_unfl		# result may rnd to no unfl
11612	bgt.w		fmul_unfl		# result will underflow
11613
11614#
11615# NORMAL:
11616# - the result of the multiply operation will neither overflow nor underflow.
11617# - do the multiply to the proper precision and rounding mode.
11618# - scale the result exponent using the scale factor. if both operands were
11619# normalized then we really don't need to go through this scaling. but for now,
11620# this will do.
11621#
11622fmul_normal:
11623	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11624
11625	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11626	fmov.l		&0x0,%fpsr		# clear FPSR
11627
11628	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11629
11630	fmov.l		%fpsr,%d1		# save status
11631	fmov.l		&0x0,%fpcr		# clear FPCR
11632
11633	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11634
11635fmul_normal_exit:
11636	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11637	mov.l		%d2,-(%sp)		# save d2
11638	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
11639	mov.l		%d1,%d2			# make a copy
11640	andi.l		&0x7fff,%d1		# strip sign
11641	andi.w		&0x8000,%d2		# keep old sign
11642	sub.l		%d0,%d1			# add scale factor
11643	or.w		%d2,%d1			# concat old sign,new exp
11644	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11645	mov.l		(%sp)+,%d2		# restore d2
11646	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11647	rts
11648
11649#
11650# OVERFLOW:
11651# - the result of the multiply operation is an overflow.
11652# - do the multiply to the proper precision and rounding mode in order to
11653# set the inexact bits.
11654# - calculate the default result and return it in fp0.
11655# - if overflow or inexact is enabled, we need a multiply result rounded to
11656# extended precision. if the original operation was extended, then we have this
11657# result. if the original operation was single or double, we have to do another
11658# multiply using extended precision and the correct rounding mode. the result
11659# of this operation then has its exponent scaled by -0x6000 to create the
11660# exceptional operand.
11661#
11662fmul_ovfl:
11663	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11664
11665	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11666	fmov.l		&0x0,%fpsr		# clear FPSR
11667
11668	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11669
11670	fmov.l		%fpsr,%d1		# save status
11671	fmov.l		&0x0,%fpcr		# clear FPCR
11672
11673	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11674
11675# save setting this until now because this is where fmul_may_ovfl may jump in
11676fmul_ovfl_tst:
11677	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11678
11679	mov.b		FPCR_ENABLE(%a6),%d1
11680	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11681	bne.b		fmul_ovfl_ena		# yes
11682
11683# calculate the default result
11684fmul_ovfl_dis:
11685	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11686	sne		%d1			# set sign param accordingly
11687	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
11688	bsr.l		ovf_res			# calculate default result
11689	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11690	fmovm.x		(%a0),&0x80		# return default result in fp0
11691	rts
11692
11693#
11694# OVFL is enabled; Create EXOP:
11695# - if precision is extended, then we have the EXOP. simply bias the exponent
11696# with an extra -0x6000. if the precision is single or double, we need to
11697# calculate a result rounded to extended precision.
11698#
11699fmul_ovfl_ena:
11700	mov.l		L_SCR3(%a6),%d1
11701	andi.b		&0xc0,%d1		# test the rnd prec
11702	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
11703
11704fmul_ovfl_ena_cont:
11705	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
11706
11707	mov.l		%d2,-(%sp)		# save d2
11708	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11709	mov.w		%d1,%d2			# make a copy
11710	andi.l		&0x7fff,%d1		# strip sign
11711	sub.l		%d0,%d1			# add scale factor
11712	subi.l		&0x6000,%d1		# subtract bias
11713	andi.w		&0x7fff,%d1		# clear sign bit
11714	andi.w		&0x8000,%d2		# keep old sign
11715	or.w		%d2,%d1			# concat old sign,new exp
11716	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11717	mov.l		(%sp)+,%d2		# restore d2
11718	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11719	bra.b		fmul_ovfl_dis
11720
11721fmul_ovfl_ena_sd:
11722	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11723
11724	mov.l		L_SCR3(%a6),%d1
11725	andi.b		&0x30,%d1		# keep rnd mode only
11726	fmov.l		%d1,%fpcr		# set FPCR
11727
11728	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11729
11730	fmov.l		&0x0,%fpcr		# clear FPCR
11731	bra.b		fmul_ovfl_ena_cont
11732
11733#
11734# may OVERFLOW:
11735# - the result of the multiply operation MAY overflow.
11736# - do the multiply to the proper precision and rounding mode in order to
11737# set the inexact bits.
11738# - calculate the default result and return it in fp0.
11739#
11740fmul_may_ovfl:
11741	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11742
11743	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11744	fmov.l		&0x0,%fpsr		# clear FPSR
11745
11746	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11747
11748	fmov.l		%fpsr,%d1		# save status
11749	fmov.l		&0x0,%fpcr		# clear FPCR
11750
11751	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11752
11753	fabs.x		%fp0,%fp1		# make a copy of result
11754	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
11755	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
11756
11757# no, it didn't overflow; we have correct result
11758	bra.w		fmul_normal_exit
11759
11760#
11761# UNDERFLOW:
11762# - the result of the multiply operation is an underflow.
11763# - do the multiply to the proper precision and rounding mode in order to
11764# set the inexact bits.
11765# - calculate the default result and return it in fp0.
11766# - if overflow or inexact is enabled, we need a multiply result rounded to
11767# extended precision. if the original operation was extended, then we have this
11768# result. if the original operation was single or double, we have to do another
11769# multiply using extended precision and the correct rounding mode. the result
11770# of this operation then has its exponent scaled by -0x6000 to create the
11771# exceptional operand.
11772#
11773fmul_unfl:
11774	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11775
11776# for fun, let's use only extended precision, round to zero. then, let
11777# the unf_res() routine figure out all the rest.
11778# will we get the correct answer.
11779	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11780
11781	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11782	fmov.l		&0x0,%fpsr		# clear FPSR
11783
11784	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11785
11786	fmov.l		%fpsr,%d1		# save status
11787	fmov.l		&0x0,%fpcr		# clear FPCR
11788
11789	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11790
11791	mov.b		FPCR_ENABLE(%a6),%d1
11792	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11793	bne.b		fmul_unfl_ena		# yes
11794
11795fmul_unfl_dis:
11796	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11797
11798	lea		FP_SCR0(%a6),%a0	# pass: result addr
11799	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11800	bsr.l		unf_res			# calculate default result
11801	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
11802	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11803	rts
11804
11805#
11806# UNFL is enabled.
11807#
11808fmul_unfl_ena:
11809	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
11810
11811	mov.l		L_SCR3(%a6),%d1
11812	andi.b		&0xc0,%d1		# is precision extended?
11813	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
11814
11815# if the rnd mode is anything but RZ, then we have to re-do the above
11816# multiplication because we used RZ for all.
11817	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11818
11819fmul_unfl_ena_cont:
11820	fmov.l		&0x0,%fpsr		# clear FPSR
11821
11822	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
11823
11824	fmov.l		&0x0,%fpcr		# clear FPCR
11825
11826	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
11827	mov.l		%d2,-(%sp)		# save d2
11828	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11829	mov.l		%d1,%d2			# make a copy
11830	andi.l		&0x7fff,%d1		# strip sign
11831	andi.w		&0x8000,%d2		# keep old sign
11832	sub.l		%d0,%d1			# add scale factor
11833	addi.l		&0x6000,%d1		# add bias
11834	andi.w		&0x7fff,%d1
11835	or.w		%d2,%d1			# concat old sign,new exp
11836	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11837	mov.l		(%sp)+,%d2		# restore d2
11838	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11839	bra.w		fmul_unfl_dis
11840
11841fmul_unfl_ena_sd:
11842	mov.l		L_SCR3(%a6),%d1
11843	andi.b		&0x30,%d1		# use only rnd mode
11844	fmov.l		%d1,%fpcr		# set FPCR
11845
11846	bra.b		fmul_unfl_ena_cont
11847
11848# MAY UNDERFLOW:
11849# -use the correct rounding mode and precision. this code favors operations
11850# that do not underflow.
11851fmul_may_unfl:
11852	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11853
11854	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11855	fmov.l		&0x0,%fpsr		# clear FPSR
11856
11857	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11858
11859	fmov.l		%fpsr,%d1		# save status
11860	fmov.l		&0x0,%fpcr		# clear FPCR
11861
11862	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11863
11864	fabs.x		%fp0,%fp1		# make a copy of result
11865	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
11866	fbgt.w		fmul_normal_exit	# no; no underflow occurred
11867	fblt.w		fmul_unfl		# yes; underflow occurred
11868
11869#
11870# we still don't know if underflow occurred. result is ~ equal to 2. but,
11871# we don't know if the result was an underflow that rounded up to a 2 or
11872# a normalized number that rounded down to a 2. so, redo the entire operation
11873# using RZ as the rounding mode to see what the pre-rounded result is.
11874# this case should be relatively rare.
11875#
11876	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
11877
11878	mov.l		L_SCR3(%a6),%d1
11879	andi.b		&0xc0,%d1		# keep rnd prec
11880	ori.b		&rz_mode*0x10,%d1	# insert RZ
11881
11882	fmov.l		%d1,%fpcr		# set FPCR
11883	fmov.l		&0x0,%fpsr		# clear FPSR
11884
11885	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
11886
11887	fmov.l		&0x0,%fpcr		# clear FPCR
11888	fabs.x		%fp1			# make absolute value
11889	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
11890	fbge.w		fmul_normal_exit	# no; no underflow occurred
11891	bra.w		fmul_unfl		# yes, underflow occurred
11892
11893################################################################################
11894
11895#
11896# Multiply: inputs are not both normalized; what are they?
11897#
11898fmul_not_norm:
11899	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
11900	jmp		(tbl_fmul_op.b,%pc,%d1.w)
11901
11902	swbeg		&48
11903tbl_fmul_op:
11904	short		fmul_norm	- tbl_fmul_op # NORM x NORM
11905	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
11906	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
11907	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
11908	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
11909	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
11910	short		tbl_fmul_op	- tbl_fmul_op #
11911	short		tbl_fmul_op	- tbl_fmul_op #
11912
11913	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
11914	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
11915	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
11916	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
11917	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
11918	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
11919	short		tbl_fmul_op	- tbl_fmul_op #
11920	short		tbl_fmul_op	- tbl_fmul_op #
11921
11922	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
11923	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
11924	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
11925	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
11926	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
11927	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
11928	short		tbl_fmul_op	- tbl_fmul_op #
11929	short		tbl_fmul_op	- tbl_fmul_op #
11930
11931	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
11932	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
11933	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
11934	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
11935	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
11936	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
11937	short		tbl_fmul_op	- tbl_fmul_op #
11938	short		tbl_fmul_op	- tbl_fmul_op #
11939
11940	short		fmul_norm	- tbl_fmul_op # NORM x NORM
11941	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
11942	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
11943	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
11944	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
11945	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
11946	short		tbl_fmul_op	- tbl_fmul_op #
11947	short		tbl_fmul_op	- tbl_fmul_op #
11948
11949	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
11950	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
11951	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
11952	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
11953	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
11954	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
11955	short		tbl_fmul_op	- tbl_fmul_op #
11956	short		tbl_fmul_op	- tbl_fmul_op #
11957
11958fmul_res_operr:
11959	bra.l		res_operr
11960fmul_res_snan:
11961	bra.l		res_snan
11962fmul_res_qnan:
11963	bra.l		res_qnan
11964
11965#
11966# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
11967#
11968	global		fmul_zero		# global for fsglmul
11969fmul_zero:
11970	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11971	mov.b		DST_EX(%a1),%d1
11972	eor.b		%d0,%d1
11973	bpl.b		fmul_zero_p		# result ZERO is pos.
11974fmul_zero_n:
11975	fmov.s		&0x80000000,%fp0	# load -ZERO
11976	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
11977	rts
11978fmul_zero_p:
11979	fmov.s		&0x00000000,%fp0	# load +ZERO
11980	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11981	rts
11982
11983#
11984# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
11985#
11986# Note: The j-bit for an infinity is a don't-care. However, to be
11987# strictly compatible w/ the 68881/882, we make sure to return an
11988# INF w/ the j-bit set if the input INF j-bit was set. Destination
11989# INFs take priority.
11990#
11991	global		fmul_inf_dst		# global for fsglmul
11992fmul_inf_dst:
11993	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
11994	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11995	mov.b		DST_EX(%a1),%d1
11996	eor.b		%d0,%d1
11997	bpl.b		fmul_inf_dst_p		# result INF is pos.
11998fmul_inf_dst_n:
11999	fabs.x		%fp0			# clear result sign
12000	fneg.x		%fp0			# set result sign
12001	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12002	rts
12003fmul_inf_dst_p:
12004	fabs.x		%fp0			# clear result sign
12005	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
12006	rts
12007
12008	global		fmul_inf_src		# global for fsglmul
12009fmul_inf_src:
12010	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
12011	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
12012	mov.b		DST_EX(%a1),%d1
12013	eor.b		%d0,%d1
12014	bpl.b		fmul_inf_dst_p		# result INF is pos.
12015	bra.b		fmul_inf_dst_n
12016
12017#########################################################################
12018# XDEF ****************************************************************	#
12019#	fin(): emulates the fmove instruction				#
12020#	fsin(): emulates the fsmove instruction				#
12021#	fdin(): emulates the fdmove instruction				#
12022#									#
12023# XREF ****************************************************************	#
12024#	norm() - normalize mantissa for EXOP on denorm			#
12025#	scale_to_zero_src() - scale src exponent to zero		#
12026#	ovf_res() - return default overflow result			#
12027#	unf_res() - return default underflow result			#
12028#	res_qnan_1op() - return QNAN result				#
12029#	res_snan_1op() - return SNAN result				#
12030#									#
12031# INPUT ***************************************************************	#
12032#	a0 = pointer to extended precision source operand		#
12033#	d0 = round prec/mode						#
12034#									#
12035# OUTPUT **************************************************************	#
12036#	fp0 = result							#
12037#	fp1 = EXOP (if exception occurred)				#
12038#									#
12039# ALGORITHM ***********************************************************	#
12040#	Handle NANs, infinities, and zeroes as special cases. Divide	#
12041# norms into extended, single, and double precision.			#
12042#	Norms can be emulated w/ a regular fmove instruction. For	#
12043# sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
12044# if the result would have overflowed/underflowed. If so, use unf_res()	#
12045# or ovf_res() to return the default result. Also return EXOP if	#
12046# exception is enabled. If no exception, return the default result.	#
12047#	Unnorms don't pass through here.				#
12048#									#
12049#########################################################################
12050
12051	global		fsin
12052fsin:
12053	andi.b		&0x30,%d0		# clear rnd prec
12054	ori.b		&s_mode*0x10,%d0	# insert sgl precision
12055	bra.b		fin
12056
12057	global		fdin
12058fdin:
12059	andi.b		&0x30,%d0		# clear rnd prec
12060	ori.b		&d_mode*0x10,%d0	# insert dbl precision
12061
12062	global		fin
12063fin:
12064	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12065
12066	mov.b		STAG(%a6),%d1		# fetch src optype tag
12067	bne.w		fin_not_norm		# optimize on non-norm input
12068
12069#
12070# FP MOVE IN: NORMs and DENORMs ONLY!
12071#
12072fin_norm:
12073	andi.b		&0xc0,%d0		# is precision extended?
12074	bne.w		fin_not_ext		# no, so go handle dbl or sgl
12075
12076#
12077# precision selected is extended. so...we cannot get an underflow
12078# or overflow because of rounding to the correct precision. so...
12079# skip the scaling and unscaling...
12080#
12081	tst.b		SRC_EX(%a0)		# is the operand negative?
12082	bpl.b		fin_norm_done		# no
12083	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
12084fin_norm_done:
12085	fmovm.x		SRC(%a0),&0x80		# return result in fp0
12086	rts
12087
12088#
12089# for an extended precision DENORM, the UNFL exception bit is set
12090# the accrued bit is NOT set in this instance(no inexactness!)
12091#
12092fin_denorm:
12093	andi.b		&0xc0,%d0		# is precision extended?
12094	bne.w		fin_not_ext		# no, so go handle dbl or sgl
12095
12096	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12097	tst.b		SRC_EX(%a0)		# is the operand negative?
12098	bpl.b		fin_denorm_done		# no
12099	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
12100fin_denorm_done:
12101	fmovm.x		SRC(%a0),&0x80		# return result in fp0
12102	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12103	bne.b		fin_denorm_unfl_ena	# yes
12104	rts
12105
12106#
12107# the input is an extended DENORM and underflow is enabled in the FPCR.
12108# normalize the mantissa and add the bias of 0x6000 to the resulting negative
12109# exponent and insert back into the operand.
12110#
12111fin_denorm_unfl_ena:
12112	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12113	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12114	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12115	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
12116	bsr.l		norm			# normalize result
12117	neg.w		%d0			# new exponent = -(shft val)
12118	addi.w		&0x6000,%d0		# add new bias to exponent
12119	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
12120	andi.w		&0x8000,%d1		# keep old sign
12121	andi.w		&0x7fff,%d0		# clear sign position
12122	or.w		%d1,%d0			# concat new exo,old sign
12123	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
12124	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12125	rts
12126
12127#
12128# operand is to be rounded to single or double precision
12129#
12130fin_not_ext:
12131	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12132	bne.b		fin_dbl
12133
12134#
12135# operand is to be rounded to single precision
12136#
12137fin_sgl:
12138	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12139	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12140	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12141	bsr.l		scale_to_zero_src	# calculate scale factor
12142
12143	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
12144	bge.w		fin_sd_unfl		# yes; go handle underflow
12145	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
12146	beq.w		fin_sd_may_ovfl		# maybe; go check
12147	blt.w		fin_sd_ovfl		# yes; go handle overflow
12148
12149#
12150# operand will NOT overflow or underflow when moved into the fp reg file
12151#
12152fin_sd_normal:
12153	fmov.l		&0x0,%fpsr		# clear FPSR
12154	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12155
12156	fmov.x		FP_SCR0(%a6),%fp0	# perform move
12157
12158	fmov.l		%fpsr,%d1		# save FPSR
12159	fmov.l		&0x0,%fpcr		# clear FPCR
12160
12161	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12162
12163fin_sd_normal_exit:
12164	mov.l		%d2,-(%sp)		# save d2
12165	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12166	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
12167	mov.w		%d1,%d2			# make a copy
12168	andi.l		&0x7fff,%d1		# strip sign
12169	sub.l		%d0,%d1			# add scale factor
12170	andi.w		&0x8000,%d2		# keep old sign
12171	or.w		%d1,%d2			# concat old sign,new exponent
12172	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12173	mov.l		(%sp)+,%d2		# restore d2
12174	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12175	rts
12176
12177#
12178# operand is to be rounded to double precision
12179#
12180fin_dbl:
12181	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12182	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12183	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12184	bsr.l		scale_to_zero_src	# calculate scale factor
12185
12186	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
12187	bge.w		fin_sd_unfl		# yes; go handle underflow
12188	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
12189	beq.w		fin_sd_may_ovfl		# maybe; go check
12190	blt.w		fin_sd_ovfl		# yes; go handle overflow
12191	bra.w		fin_sd_normal		# no; ho handle normalized op
12192
12193#
12194# operand WILL underflow when moved in to the fp register file
12195#
12196fin_sd_unfl:
12197	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12198
12199	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
12200	bpl.b		fin_sd_unfl_tst
12201	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
12202
12203# if underflow or inexact is enabled, then go calculate the EXOP first.
12204fin_sd_unfl_tst:
12205	mov.b		FPCR_ENABLE(%a6),%d1
12206	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12207	bne.b		fin_sd_unfl_ena		# yes
12208
12209fin_sd_unfl_dis:
12210	lea		FP_SCR0(%a6),%a0	# pass: result addr
12211	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12212	bsr.l		unf_res			# calculate default result
12213	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
12214	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12215	rts
12216
12217#
12218# operand will underflow AND underflow or inexact is enabled.
12219# Therefore, we must return the result rounded to extended precision.
12220#
12221fin_sd_unfl_ena:
12222	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12223	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12224	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
12225
12226	mov.l		%d2,-(%sp)		# save d2
12227	mov.w		%d1,%d2			# make a copy
12228	andi.l		&0x7fff,%d1		# strip sign
12229	sub.l		%d0,%d1			# subtract scale factor
12230	andi.w		&0x8000,%d2		# extract old sign
12231	addi.l		&0x6000,%d1		# add new bias
12232	andi.w		&0x7fff,%d1
12233	or.w		%d1,%d2			# concat old sign,new exp
12234	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
12235	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
12236	mov.l		(%sp)+,%d2		# restore d2
12237	bra.b		fin_sd_unfl_dis
12238
12239#
12240# operand WILL overflow.
12241#
12242fin_sd_ovfl:
12243	fmov.l		&0x0,%fpsr		# clear FPSR
12244	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12245
12246	fmov.x		FP_SCR0(%a6),%fp0	# perform move
12247
12248	fmov.l		&0x0,%fpcr		# clear FPCR
12249	fmov.l		%fpsr,%d1		# save FPSR
12250
12251	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12252
12253fin_sd_ovfl_tst:
12254	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12255
12256	mov.b		FPCR_ENABLE(%a6),%d1
12257	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12258	bne.b		fin_sd_ovfl_ena		# yes
12259
12260#
12261# OVFL is not enabled; therefore, we must create the default result by
12262# calling ovf_res().
12263#
12264fin_sd_ovfl_dis:
12265	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12266	sne		%d1			# set sign param accordingly
12267	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
12268	bsr.l		ovf_res			# calculate default result
12269	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
12270	fmovm.x		(%a0),&0x80		# return default result in fp0
12271	rts
12272
12273#
12274# OVFL is enabled.
12275# the INEX2 bit has already been updated by the round to the correct precision.
12276# now, round to extended(and don't alter the FPSR).
12277#
12278fin_sd_ovfl_ena:
12279	mov.l		%d2,-(%sp)		# save d2
12280	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12281	mov.l		%d1,%d2			# make a copy
12282	andi.l		&0x7fff,%d1		# strip sign
12283	andi.w		&0x8000,%d2		# keep old sign
12284	sub.l		%d0,%d1			# add scale factor
12285	sub.l		&0x6000,%d1		# subtract bias
12286	andi.w		&0x7fff,%d1
12287	or.w		%d2,%d1
12288	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12289	mov.l		(%sp)+,%d2		# restore d2
12290	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12291	bra.b		fin_sd_ovfl_dis
12292
12293#
12294# the move in MAY overflow. so...
12295#
12296fin_sd_may_ovfl:
12297	fmov.l		&0x0,%fpsr		# clear FPSR
12298	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12299
12300	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
12301
12302	fmov.l		%fpsr,%d1		# save status
12303	fmov.l		&0x0,%fpcr		# clear FPCR
12304
12305	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12306
12307	fabs.x		%fp0,%fp1		# make a copy of result
12308	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
12309	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
12310
12311# no, it didn't overflow; we have correct result
12312	bra.w		fin_sd_normal_exit
12313
12314##########################################################################
12315
12316#
12317# operand is not a NORM: check its optype and branch accordingly
12318#
12319fin_not_norm:
12320	cmpi.b		%d1,&DENORM		# weed out DENORM
12321	beq.w		fin_denorm
12322	cmpi.b		%d1,&SNAN		# weed out SNANs
12323	beq.l		res_snan_1op
12324	cmpi.b		%d1,&QNAN		# weed out QNANs
12325	beq.l		res_qnan_1op
12326
12327#
12328# do the fmove in; at this point, only possible ops are ZERO and INF.
12329# use fmov to determine ccodes.
12330# prec:mode should be zero at this point but it won't affect answer anyways.
12331#
12332	fmov.x		SRC(%a0),%fp0		# do fmove in
12333	fmov.l		%fpsr,%d0		# no exceptions possible
12334	rol.l		&0x8,%d0		# put ccodes in lo byte
12335	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
12336	rts
12337
12338#########################################################################
12339# XDEF ****************************************************************	#
12340#	fdiv(): emulates the fdiv instruction				#
12341#	fsdiv(): emulates the fsdiv instruction				#
12342#	fddiv(): emulates the fddiv instruction				#
12343#									#
12344# XREF ****************************************************************	#
12345#	scale_to_zero_src() - scale src exponent to zero		#
12346#	scale_to_zero_dst() - scale dst exponent to zero		#
12347#	unf_res() - return default underflow result			#
12348#	ovf_res() - return default overflow result			#
12349#	res_qnan() - return QNAN result					#
12350#	res_snan() - return SNAN result					#
12351#									#
12352# INPUT ***************************************************************	#
12353#	a0 = pointer to extended precision source operand		#
12354#	a1 = pointer to extended precision destination operand		#
12355#	d0  rnd prec,mode						#
12356#									#
12357# OUTPUT **************************************************************	#
12358#	fp0 = result							#
12359#	fp1 = EXOP (if exception occurred)				#
12360#									#
12361# ALGORITHM ***********************************************************	#
12362#	Handle NANs, infinities, and zeroes as special cases. Divide	#
12363# norms/denorms into ext/sgl/dbl precision.				#
12364#	For norms/denorms, scale the exponents such that a divide	#
12365# instruction won't cause an exception. Use the regular fdiv to		#
12366# compute a result. Check if the regular operands would have taken	#
12367# an exception. If so, return the default overflow/underflow result	#
12368# and return the EXOP if exceptions are enabled. Else, scale the	#
12369# result operand to the proper exponent.				#
12370#									#
12371#########################################################################
12372
12373	align		0x10
12374tbl_fdiv_unfl:
12375	long		0x3fff - 0x0000		# ext_unfl
12376	long		0x3fff - 0x3f81		# sgl_unfl
12377	long		0x3fff - 0x3c01		# dbl_unfl
12378
12379tbl_fdiv_ovfl:
12380	long		0x3fff - 0x7ffe		# ext overflow exponent
12381	long		0x3fff - 0x407e		# sgl overflow exponent
12382	long		0x3fff - 0x43fe		# dbl overflow exponent
12383
12384	global		fsdiv
12385fsdiv:
12386	andi.b		&0x30,%d0		# clear rnd prec
12387	ori.b		&s_mode*0x10,%d0	# insert sgl prec
12388	bra.b		fdiv
12389
12390	global		fddiv
12391fddiv:
12392	andi.b		&0x30,%d0		# clear rnd prec
12393	ori.b		&d_mode*0x10,%d0	# insert dbl prec
12394
12395	global		fdiv
12396fdiv:
12397	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12398
12399	clr.w		%d1
12400	mov.b		DTAG(%a6),%d1
12401	lsl.b		&0x3,%d1
12402	or.b		STAG(%a6),%d1		# combine src tags
12403
12404	bne.w		fdiv_not_norm		# optimize on non-norm input
12405
12406#
12407# DIVIDE: NORMs and DENORMs ONLY!
12408#
12409fdiv_norm:
12410	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
12411	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
12412	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
12413
12414	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12415	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12416	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12417
12418	bsr.l		scale_to_zero_src	# scale src exponent
12419	mov.l		%d0,-(%sp)		# save scale factor 1
12420
12421	bsr.l		scale_to_zero_dst	# scale dst exponent
12422
12423	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
12424	add.l		%d0,(%sp)
12425
12426	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
12427	lsr.b		&0x6,%d1		# shift to lo bits
12428	mov.l		(%sp)+,%d0		# load S.F.
12429	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
12430	ble.w		fdiv_may_ovfl		# result will overflow
12431
12432	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
12433	beq.w		fdiv_may_unfl		# maybe
12434	bgt.w		fdiv_unfl		# yes; go handle underflow
12435
12436fdiv_normal:
12437	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12438
12439	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
12440	fmov.l		&0x0,%fpsr		# clear FPSR
12441
12442	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
12443
12444	fmov.l		%fpsr,%d1		# save FPSR
12445	fmov.l		&0x0,%fpcr		# clear FPCR
12446
12447	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12448
12449fdiv_normal_exit:
12450	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
12451	mov.l		%d2,-(%sp)		# store d2
12452	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
12453	mov.l		%d1,%d2			# make a copy
12454	andi.l		&0x7fff,%d1		# strip sign
12455	andi.w		&0x8000,%d2		# keep old sign
12456	sub.l		%d0,%d1			# add scale factor
12457	or.w		%d2,%d1			# concat old sign,new exp
12458	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12459	mov.l		(%sp)+,%d2		# restore d2
12460	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12461	rts
12462
12463tbl_fdiv_ovfl2:
12464	long		0x7fff
12465	long		0x407f
12466	long		0x43ff
12467
12468fdiv_no_ovfl:
12469	mov.l		(%sp)+,%d0		# restore scale factor
12470	bra.b		fdiv_normal_exit
12471
12472fdiv_may_ovfl:
12473	mov.l		%d0,-(%sp)		# save scale factor
12474
12475	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12476
12477	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12478	fmov.l		&0x0,%fpsr		# set FPSR
12479
12480	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12481
12482	fmov.l		%fpsr,%d0
12483	fmov.l		&0x0,%fpcr
12484
12485	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
12486
12487	fmovm.x		&0x01,-(%sp)		# save result to stack
12488	mov.w		(%sp),%d0		# fetch new exponent
12489	add.l		&0xc,%sp		# clear result from stack
12490	andi.l		&0x7fff,%d0		# strip sign
12491	sub.l		(%sp),%d0		# add scale factor
12492	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
12493	blt.b		fdiv_no_ovfl
12494	mov.l		(%sp)+,%d0
12495
12496fdiv_ovfl_tst:
12497	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12498
12499	mov.b		FPCR_ENABLE(%a6),%d1
12500	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12501	bne.b		fdiv_ovfl_ena		# yes
12502
12503fdiv_ovfl_dis:
12504	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12505	sne		%d1			# set sign param accordingly
12506	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
12507	bsr.l		ovf_res			# calculate default result
12508	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
12509	fmovm.x		(%a0),&0x80		# return default result in fp0
12510	rts
12511
12512fdiv_ovfl_ena:
12513	mov.l		L_SCR3(%a6),%d1
12514	andi.b		&0xc0,%d1		# is precision extended?
12515	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
12516
12517fdiv_ovfl_ena_cont:
12518	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
12519
12520	mov.l		%d2,-(%sp)		# save d2
12521	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12522	mov.w		%d1,%d2			# make a copy
12523	andi.l		&0x7fff,%d1		# strip sign
12524	sub.l		%d0,%d1			# add scale factor
12525	subi.l		&0x6000,%d1		# subtract bias
12526	andi.w		&0x7fff,%d1		# clear sign bit
12527	andi.w		&0x8000,%d2		# keep old sign
12528	or.w		%d2,%d1			# concat old sign,new exp
12529	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12530	mov.l		(%sp)+,%d2		# restore d2
12531	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12532	bra.b		fdiv_ovfl_dis
12533
12534fdiv_ovfl_ena_sd:
12535	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
12536
12537	mov.l		L_SCR3(%a6),%d1
12538	andi.b		&0x30,%d1		# keep rnd mode
12539	fmov.l		%d1,%fpcr		# set FPCR
12540
12541	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12542
12543	fmov.l		&0x0,%fpcr		# clear FPCR
12544	bra.b		fdiv_ovfl_ena_cont
12545
12546fdiv_unfl:
12547	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12548
12549	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12550
12551	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
12552	fmov.l		&0x0,%fpsr		# clear FPSR
12553
12554	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12555
12556	fmov.l		%fpsr,%d1		# save status
12557	fmov.l		&0x0,%fpcr		# clear FPCR
12558
12559	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12560
12561	mov.b		FPCR_ENABLE(%a6),%d1
12562	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12563	bne.b		fdiv_unfl_ena		# yes
12564
12565fdiv_unfl_dis:
12566	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12567
12568	lea		FP_SCR0(%a6),%a0	# pass: result addr
12569	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12570	bsr.l		unf_res			# calculate default result
12571	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
12572	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12573	rts
12574
12575#
12576# UNFL is enabled.
12577#
12578fdiv_unfl_ena:
12579	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
12580
12581	mov.l		L_SCR3(%a6),%d1
12582	andi.b		&0xc0,%d1		# is precision extended?
12583	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
12584
12585	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12586
12587fdiv_unfl_ena_cont:
12588	fmov.l		&0x0,%fpsr		# clear FPSR
12589
12590	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
12591
12592	fmov.l		&0x0,%fpcr		# clear FPCR
12593
12594	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
12595	mov.l		%d2,-(%sp)		# save d2
12596	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12597	mov.l		%d1,%d2			# make a copy
12598	andi.l		&0x7fff,%d1		# strip sign
12599	andi.w		&0x8000,%d2		# keep old sign
12600	sub.l		%d0,%d1			# add scale factoer
12601	addi.l		&0x6000,%d1		# add bias
12602	andi.w		&0x7fff,%d1
12603	or.w		%d2,%d1			# concat old sign,new exp
12604	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
12605	mov.l		(%sp)+,%d2		# restore d2
12606	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12607	bra.w		fdiv_unfl_dis
12608
12609fdiv_unfl_ena_sd:
12610	mov.l		L_SCR3(%a6),%d1
12611	andi.b		&0x30,%d1		# use only rnd mode
12612	fmov.l		%d1,%fpcr		# set FPCR
12613
12614	bra.b		fdiv_unfl_ena_cont
12615
12616#
12617# the divide operation MAY underflow:
12618#
12619fdiv_may_unfl:
12620	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12621
12622	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12623	fmov.l		&0x0,%fpsr		# clear FPSR
12624
12625	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12626
12627	fmov.l		%fpsr,%d1		# save status
12628	fmov.l		&0x0,%fpcr		# clear FPCR
12629
12630	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12631
12632	fabs.x		%fp0,%fp1		# make a copy of result
12633	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
12634	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
12635	fblt.w		fdiv_unfl		# yes; underflow occurred
12636
12637#
12638# we still don't know if underflow occurred. result is ~ equal to 1. but,
12639# we don't know if the result was an underflow that rounded up to a 1
12640# or a normalized number that rounded down to a 1. so, redo the entire
12641# operation using RZ as the rounding mode to see what the pre-rounded
12642# result is. this case should be relatively rare.
12643#
12644	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
12645
12646	mov.l		L_SCR3(%a6),%d1
12647	andi.b		&0xc0,%d1		# keep rnd prec
12648	ori.b		&rz_mode*0x10,%d1	# insert RZ
12649
12650	fmov.l		%d1,%fpcr		# set FPCR
12651	fmov.l		&0x0,%fpsr		# clear FPSR
12652
12653	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
12654
12655	fmov.l		&0x0,%fpcr		# clear FPCR
12656	fabs.x		%fp1			# make absolute value
12657	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
12658	fbge.w		fdiv_normal_exit	# no; no underflow occurred
12659	bra.w		fdiv_unfl		# yes; underflow occurred
12660
12661############################################################################
12662
12663#
12664# Divide: inputs are not both normalized; what are they?
12665#
12666fdiv_not_norm:
12667	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
12668	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
12669
12670	swbeg		&48
12671tbl_fdiv_op:
12672	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
12673	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
12674	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
12675	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
12676	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
12677	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
12678	short		tbl_fdiv_op	- tbl_fdiv_op #
12679	short		tbl_fdiv_op	- tbl_fdiv_op #
12680
12681	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
12682	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
12683	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
12684	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
12685	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
12686	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
12687	short		tbl_fdiv_op	- tbl_fdiv_op #
12688	short		tbl_fdiv_op	- tbl_fdiv_op #
12689
12690	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
12691	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
12692	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
12693	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
12694	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
12695	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
12696	short		tbl_fdiv_op	- tbl_fdiv_op #
12697	short		tbl_fdiv_op	- tbl_fdiv_op #
12698
12699	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
12700	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
12701	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
12702	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
12703	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
12704	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
12705	short		tbl_fdiv_op	- tbl_fdiv_op #
12706	short		tbl_fdiv_op	- tbl_fdiv_op #
12707
12708	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
12709	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
12710	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
12711	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
12712	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
12713	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
12714	short		tbl_fdiv_op	- tbl_fdiv_op #
12715	short		tbl_fdiv_op	- tbl_fdiv_op #
12716
12717	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
12718	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
12719	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
12720	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
12721	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
12722	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
12723	short		tbl_fdiv_op	- tbl_fdiv_op #
12724	short		tbl_fdiv_op	- tbl_fdiv_op #
12725
12726fdiv_res_qnan:
12727	bra.l		res_qnan
12728fdiv_res_snan:
12729	bra.l		res_snan
12730fdiv_res_operr:
12731	bra.l		res_operr
12732
12733	global		fdiv_zero_load		# global for fsgldiv
12734fdiv_zero_load:
12735	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
12736	mov.b		DST_EX(%a1),%d1		# or of input signs.
12737	eor.b		%d0,%d1
12738	bpl.b		fdiv_zero_load_p	# result is positive
12739	fmov.s		&0x80000000,%fp0	# load a -ZERO
12740	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
12741	rts
12742fdiv_zero_load_p:
12743	fmov.s		&0x00000000,%fp0	# load a +ZERO
12744	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
12745	rts
12746
12747#
12748# The destination was In Range and the source was a ZERO. The result,
12749# Therefore, is an INF w/ the proper sign.
12750# So, determine the sign and return a new INF (w/ the j-bit cleared).
12751#
12752	global		fdiv_inf_load		# global for fsgldiv
12753fdiv_inf_load:
12754	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
12755	mov.b		SRC_EX(%a0),%d0		# load both signs
12756	mov.b		DST_EX(%a1),%d1
12757	eor.b		%d0,%d1
12758	bpl.b		fdiv_inf_load_p		# result is positive
12759	fmov.s		&0xff800000,%fp0	# make result -INF
12760	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12761	rts
12762fdiv_inf_load_p:
12763	fmov.s		&0x7f800000,%fp0	# make result +INF
12764	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
12765	rts
12766
12767#
12768# The destination was an INF w/ an In Range or ZERO source, the result is
12769# an INF w/ the proper sign.
12770# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
12771# dst INF is set, then then j-bit of the result INF is also set).
12772#
12773	global		fdiv_inf_dst		# global for fsgldiv
12774fdiv_inf_dst:
12775	mov.b		DST_EX(%a1),%d0		# load both signs
12776	mov.b		SRC_EX(%a0),%d1
12777	eor.b		%d0,%d1
12778	bpl.b		fdiv_inf_dst_p		# result is positive
12779
12780	fmovm.x		DST(%a1),&0x80		# return result in fp0
12781	fabs.x		%fp0			# clear sign bit
12782	fneg.x		%fp0			# set sign bit
12783	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
12784	rts
12785
12786fdiv_inf_dst_p:
12787	fmovm.x		DST(%a1),&0x80		# return result in fp0
12788	fabs.x		%fp0			# return positive INF
12789	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
12790	rts
12791
12792#########################################################################
12793# XDEF ****************************************************************	#
12794#	fneg(): emulates the fneg instruction				#
12795#	fsneg(): emulates the fsneg instruction				#
12796#	fdneg(): emulates the fdneg instruction				#
12797#									#
12798# XREF ****************************************************************	#
12799#	norm() - normalize a denorm to provide EXOP			#
12800#	scale_to_zero_src() - scale sgl/dbl source exponent		#
12801#	ovf_res() - return default overflow result			#
12802#	unf_res() - return default underflow result			#
12803#	res_qnan_1op() - return QNAN result				#
12804#	res_snan_1op() - return SNAN result				#
12805#									#
12806# INPUT ***************************************************************	#
12807#	a0 = pointer to extended precision source operand		#
12808#	d0 = rnd prec,mode						#
12809#									#
12810# OUTPUT **************************************************************	#
12811#	fp0 = result							#
12812#	fp1 = EXOP (if exception occurred)				#
12813#									#
12814# ALGORITHM ***********************************************************	#
12815#	Handle NANs, zeroes, and infinities as special cases. Separate	#
12816# norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
12817# emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
12818# and an actual fneg performed to see if overflow/underflow would have	#
12819# occurred. If so, return default underflow/overflow result. Else,	#
12820# scale the result exponent and return result. FPSR gets set based on	#
12821# the result value.							#
12822#									#
12823#########################################################################
12824
12825	global		fsneg
12826fsneg:
12827	andi.b		&0x30,%d0		# clear rnd prec
12828	ori.b		&s_mode*0x10,%d0	# insert sgl precision
12829	bra.b		fneg
12830
12831	global		fdneg
12832fdneg:
12833	andi.b		&0x30,%d0		# clear rnd prec
12834	ori.b		&d_mode*0x10,%d0	# insert dbl prec
12835
12836	global		fneg
12837fneg:
12838	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12839	mov.b		STAG(%a6),%d1
12840	bne.w		fneg_not_norm		# optimize on non-norm input
12841
12842#
12843# NEGATE SIGN : norms and denorms ONLY!
12844#
12845fneg_norm:
12846	andi.b		&0xc0,%d0		# is precision extended?
12847	bne.w		fneg_not_ext		# no; go handle sgl or dbl
12848
12849#
12850# precision selected is extended. so...we can not get an underflow
12851# or overflow because of rounding to the correct precision. so...
12852# skip the scaling and unscaling...
12853#
12854	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12855	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12856	mov.w		SRC_EX(%a0),%d0
12857	eori.w		&0x8000,%d0		# negate sign
12858	bpl.b		fneg_norm_load		# sign is positive
12859	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
12860fneg_norm_load:
12861	mov.w		%d0,FP_SCR0_EX(%a6)
12862	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12863	rts
12864
12865#
12866# for an extended precision DENORM, the UNFL exception bit is set
12867# the accrued bit is NOT set in this instance(no inexactness!)
12868#
12869fneg_denorm:
12870	andi.b		&0xc0,%d0		# is precision extended?
12871	bne.b		fneg_not_ext		# no; go handle sgl or dbl
12872
12873	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12874
12875	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12876	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12877	mov.w		SRC_EX(%a0),%d0
12878	eori.w		&0x8000,%d0		# negate sign
12879	bpl.b		fneg_denorm_done	# no
12880	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
12881fneg_denorm_done:
12882	mov.w		%d0,FP_SCR0_EX(%a6)
12883	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12884
12885	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12886	bne.b		fneg_ext_unfl_ena	# yes
12887	rts
12888
12889#
12890# the input is an extended DENORM and underflow is enabled in the FPCR.
12891# normalize the mantissa and add the bias of 0x6000 to the resulting negative
12892# exponent and insert back into the operand.
12893#
12894fneg_ext_unfl_ena:
12895	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
12896	bsr.l		norm			# normalize result
12897	neg.w		%d0			# new exponent = -(shft val)
12898	addi.w		&0x6000,%d0		# add new bias to exponent
12899	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
12900	andi.w		&0x8000,%d1		# keep old sign
12901	andi.w		&0x7fff,%d0		# clear sign position
12902	or.w		%d1,%d0			# concat old sign, new exponent
12903	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
12904	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12905	rts
12906
12907#
12908# operand is either single or double
12909#
12910fneg_not_ext:
12911	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12912	bne.b		fneg_dbl
12913
12914#
12915# operand is to be rounded to single precision
12916#
12917fneg_sgl:
12918	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12919	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12920	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12921	bsr.l		scale_to_zero_src	# calculate scale factor
12922
12923	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
12924	bge.w		fneg_sd_unfl		# yes; go handle underflow
12925	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
12926	beq.w		fneg_sd_may_ovfl	# maybe; go check
12927	blt.w		fneg_sd_ovfl		# yes; go handle overflow
12928
12929#
12930# operand will NOT overflow or underflow when moved in to the fp reg file
12931#
12932fneg_sd_normal:
12933	fmov.l		&0x0,%fpsr		# clear FPSR
12934	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12935
12936	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
12937
12938	fmov.l		%fpsr,%d1		# save FPSR
12939	fmov.l		&0x0,%fpcr		# clear FPCR
12940
12941	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12942
12943fneg_sd_normal_exit:
12944	mov.l		%d2,-(%sp)		# save d2
12945	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12946	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
12947	mov.w		%d1,%d2			# make a copy
12948	andi.l		&0x7fff,%d1		# strip sign
12949	sub.l		%d0,%d1			# add scale factor
12950	andi.w		&0x8000,%d2		# keep old sign
12951	or.w		%d1,%d2			# concat old sign,new exp
12952	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12953	mov.l		(%sp)+,%d2		# restore d2
12954	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12955	rts
12956
12957#
12958# operand is to be rounded to double precision
12959#
12960fneg_dbl:
12961	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12962	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12963	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12964	bsr.l		scale_to_zero_src	# calculate scale factor
12965
12966	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
12967	bge.b		fneg_sd_unfl		# yes; go handle underflow
12968	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
12969	beq.w		fneg_sd_may_ovfl	# maybe; go check
12970	blt.w		fneg_sd_ovfl		# yes; go handle overflow
12971	bra.w		fneg_sd_normal		# no; ho handle normalized op
12972
12973#
12974# operand WILL underflow when moved in to the fp register file
12975#
12976fneg_sd_unfl:
12977	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12978
12979	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
12980	bpl.b		fneg_sd_unfl_tst
12981	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
12982
12983# if underflow or inexact is enabled, go calculate EXOP first.
12984fneg_sd_unfl_tst:
12985	mov.b		FPCR_ENABLE(%a6),%d1
12986	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12987	bne.b		fneg_sd_unfl_ena	# yes
12988
12989fneg_sd_unfl_dis:
12990	lea		FP_SCR0(%a6),%a0	# pass: result addr
12991	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12992	bsr.l		unf_res			# calculate default result
12993	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
12994	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12995	rts
12996
12997#
12998# operand will underflow AND underflow is enabled.
12999# Therefore, we must return the result rounded to extended precision.
13000#
13001fneg_sd_unfl_ena:
13002	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13003	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13004	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
13005
13006	mov.l		%d2,-(%sp)		# save d2
13007	mov.l		%d1,%d2			# make a copy
13008	andi.l		&0x7fff,%d1		# strip sign
13009	andi.w		&0x8000,%d2		# keep old sign
13010	sub.l		%d0,%d1			# subtract scale factor
13011	addi.l		&0x6000,%d1		# add new bias
13012	andi.w		&0x7fff,%d1
13013	or.w		%d2,%d1			# concat new sign,new exp
13014	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
13015	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
13016	mov.l		(%sp)+,%d2		# restore d2
13017	bra.b		fneg_sd_unfl_dis
13018
13019#
13020# operand WILL overflow.
13021#
13022fneg_sd_ovfl:
13023	fmov.l		&0x0,%fpsr		# clear FPSR
13024	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13025
13026	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
13027
13028	fmov.l		&0x0,%fpcr		# clear FPCR
13029	fmov.l		%fpsr,%d1		# save FPSR
13030
13031	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13032
13033fneg_sd_ovfl_tst:
13034	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13035
13036	mov.b		FPCR_ENABLE(%a6),%d1
13037	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
13038	bne.b		fneg_sd_ovfl_ena	# yes
13039
13040#
13041# OVFL is not enabled; therefore, we must create the default result by
13042# calling ovf_res().
13043#
13044fneg_sd_ovfl_dis:
13045	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
13046	sne		%d1			# set sign param accordingly
13047	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
13048	bsr.l		ovf_res			# calculate default result
13049	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
13050	fmovm.x		(%a0),&0x80		# return default result in fp0
13051	rts
13052
13053#
13054# OVFL is enabled.
13055# the INEX2 bit has already been updated by the round to the correct precision.
13056# now, round to extended(and don't alter the FPSR).
13057#
13058fneg_sd_ovfl_ena:
13059	mov.l		%d2,-(%sp)		# save d2
13060	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
13061	mov.l		%d1,%d2			# make a copy
13062	andi.l		&0x7fff,%d1		# strip sign
13063	andi.w		&0x8000,%d2		# keep old sign
13064	sub.l		%d0,%d1			# add scale factor
13065	subi.l		&0x6000,%d1		# subtract bias
13066	andi.w		&0x7fff,%d1
13067	or.w		%d2,%d1			# concat sign,exp
13068	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
13069	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13070	mov.l		(%sp)+,%d2		# restore d2
13071	bra.b		fneg_sd_ovfl_dis
13072
13073#
13074# the move in MAY underflow. so...
13075#
13076fneg_sd_may_ovfl:
13077	fmov.l		&0x0,%fpsr		# clear FPSR
13078	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13079
13080	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
13081
13082	fmov.l		%fpsr,%d1		# save status
13083	fmov.l		&0x0,%fpcr		# clear FPCR
13084
13085	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13086
13087	fabs.x		%fp0,%fp1		# make a copy of result
13088	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
13089	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
13090
13091# no, it didn't overflow; we have correct result
13092	bra.w		fneg_sd_normal_exit
13093
13094##########################################################################
13095
13096#
13097# input is not normalized; what is it?
13098#
13099fneg_not_norm:
13100	cmpi.b		%d1,&DENORM		# weed out DENORM
13101	beq.w		fneg_denorm
13102	cmpi.b		%d1,&SNAN		# weed out SNAN
13103	beq.l		res_snan_1op
13104	cmpi.b		%d1,&QNAN		# weed out QNAN
13105	beq.l		res_qnan_1op
13106
13107#
13108# do the fneg; at this point, only possible ops are ZERO and INF.
13109# use fneg to determine ccodes.
13110# prec:mode should be zero at this point but it won't affect answer anyways.
13111#
13112	fneg.x		SRC_EX(%a0),%fp0	# do fneg
13113	fmov.l		%fpsr,%d0
13114	rol.l		&0x8,%d0		# put ccodes in lo byte
13115	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
13116	rts
13117
13118#########################################################################
13119# XDEF ****************************************************************	#
13120#	ftst(): emulates the ftest instruction				#
13121#									#
13122# XREF ****************************************************************	#
13123#	res{s,q}nan_1op() - set NAN result for monadic instruction	#
13124#									#
13125# INPUT ***************************************************************	#
13126#	a0 = pointer to extended precision source operand		#
13127#									#
13128# OUTPUT **************************************************************	#
13129#	none								#
13130#									#
13131# ALGORITHM ***********************************************************	#
13132#	Check the source operand tag (STAG) and set the FPCR according	#
13133# to the operand type and sign.						#
13134#									#
13135#########################################################################
13136
13137	global		ftst
13138ftst:
13139	mov.b		STAG(%a6),%d1
13140	bne.b		ftst_not_norm		# optimize on non-norm input
13141
13142#
13143# Norm:
13144#
13145ftst_norm:
13146	tst.b		SRC_EX(%a0)		# is operand negative?
13147	bmi.b		ftst_norm_m		# yes
13148	rts
13149ftst_norm_m:
13150	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13151	rts
13152
13153#
13154# input is not normalized; what is it?
13155#
13156ftst_not_norm:
13157	cmpi.b		%d1,&ZERO		# weed out ZERO
13158	beq.b		ftst_zero
13159	cmpi.b		%d1,&INF		# weed out INF
13160	beq.b		ftst_inf
13161	cmpi.b		%d1,&SNAN		# weed out SNAN
13162	beq.l		res_snan_1op
13163	cmpi.b		%d1,&QNAN		# weed out QNAN
13164	beq.l		res_qnan_1op
13165
13166#
13167# Denorm:
13168#
13169ftst_denorm:
13170	tst.b		SRC_EX(%a0)		# is operand negative?
13171	bmi.b		ftst_denorm_m		# yes
13172	rts
13173ftst_denorm_m:
13174	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13175	rts
13176
13177#
13178# Infinity:
13179#
13180ftst_inf:
13181	tst.b		SRC_EX(%a0)		# is operand negative?
13182	bmi.b		ftst_inf_m		# yes
13183ftst_inf_p:
13184	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13185	rts
13186ftst_inf_m:
13187	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
13188	rts
13189
13190#
13191# Zero:
13192#
13193ftst_zero:
13194	tst.b		SRC_EX(%a0)		# is operand negative?
13195	bmi.b		ftst_zero_m		# yes
13196ftst_zero_p:
13197	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13198	rts
13199ftst_zero_m:
13200	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
13201	rts
13202
13203#########################################################################
13204# XDEF ****************************************************************	#
13205#	fint(): emulates the fint instruction				#
13206#									#
13207# XREF ****************************************************************	#
13208#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13209#									#
13210# INPUT ***************************************************************	#
13211#	a0 = pointer to extended precision source operand		#
13212#	d0 = round precision/mode					#
13213#									#
13214# OUTPUT **************************************************************	#
13215#	fp0 = result							#
13216#									#
13217# ALGORITHM ***********************************************************	#
13218#	Separate according to operand type. Unnorms don't pass through	#
13219# here. For norms, load the rounding mode/prec, execute a "fint", then	#
13220# store the resulting FPSR bits.					#
13221#	For denorms, force the j-bit to a one and do the same as for	#
13222# norms. Denorms are so low that the answer will either be a zero or a	#
13223# one.									#
13224#	For zeroes/infs/NANs, return the same while setting the FPSR	#
13225# as appropriate.							#
13226#									#
13227#########################################################################
13228
13229	global		fint
13230fint:
13231	mov.b		STAG(%a6),%d1
13232	bne.b		fint_not_norm		# optimize on non-norm input
13233
13234#
13235# Norm:
13236#
13237fint_norm:
13238	andi.b		&0x30,%d0		# set prec = ext
13239
13240	fmov.l		%d0,%fpcr		# set FPCR
13241	fmov.l		&0x0,%fpsr		# clear FPSR
13242
13243	fint.x		SRC(%a0),%fp0		# execute fint
13244
13245	fmov.l		&0x0,%fpcr		# clear FPCR
13246	fmov.l		%fpsr,%d0		# save FPSR
13247	or.l		%d0,USER_FPSR(%a6)	# set exception bits
13248
13249	rts
13250
13251#
13252# input is not normalized; what is it?
13253#
13254fint_not_norm:
13255	cmpi.b		%d1,&ZERO		# weed out ZERO
13256	beq.b		fint_zero
13257	cmpi.b		%d1,&INF		# weed out INF
13258	beq.b		fint_inf
13259	cmpi.b		%d1,&DENORM		# weed out DENORM
13260	beq.b		fint_denorm
13261	cmpi.b		%d1,&SNAN		# weed out SNAN
13262	beq.l		res_snan_1op
13263	bra.l		res_qnan_1op		# weed out QNAN
13264
13265#
13266# Denorm:
13267#
13268# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
13269# also, the INEX2 and AINEX exception bits will be set.
13270# so, we could either set these manually or force the DENORM
13271# to a very small NORM and ship it to the NORM routine.
13272# I do the latter.
13273#
13274fint_denorm:
13275	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13276	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
13277	lea		FP_SCR0(%a6),%a0
13278	bra.b		fint_norm
13279
13280#
13281# Zero:
13282#
13283fint_zero:
13284	tst.b		SRC_EX(%a0)		# is ZERO negative?
13285	bmi.b		fint_zero_m		# yes
13286fint_zero_p:
13287	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
13288	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13289	rts
13290fint_zero_m:
13291	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
13292	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13293	rts
13294
13295#
13296# Infinity:
13297#
13298fint_inf:
13299	fmovm.x		SRC(%a0),&0x80		# return result in fp0
13300	tst.b		SRC_EX(%a0)		# is INF negative?
13301	bmi.b		fint_inf_m		# yes
13302fint_inf_p:
13303	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13304	rts
13305fint_inf_m:
13306	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13307	rts
13308
13309#########################################################################
13310# XDEF ****************************************************************	#
13311#	fintrz(): emulates the fintrz instruction			#
13312#									#
13313# XREF ****************************************************************	#
13314#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13315#									#
13316# INPUT ***************************************************************	#
13317#	a0 = pointer to extended precision source operand		#
13318#	d0 = round precision/mode					#
13319#									#
13320# OUTPUT **************************************************************	#
13321#	fp0 = result							#
13322#									#
13323# ALGORITHM ***********************************************************	#
13324#	Separate according to operand type. Unnorms don't pass through	#
13325# here. For norms, load the rounding mode/prec, execute a "fintrz",	#
13326# then store the resulting FPSR bits.					#
13327#	For denorms, force the j-bit to a one and do the same as for	#
13328# norms. Denorms are so low that the answer will either be a zero or a	#
13329# one.									#
13330#	For zeroes/infs/NANs, return the same while setting the FPSR	#
13331# as appropriate.							#
13332#									#
13333#########################################################################
13334
13335	global		fintrz
13336fintrz:
13337	mov.b		STAG(%a6),%d1
13338	bne.b		fintrz_not_norm		# optimize on non-norm input
13339
13340#
13341# Norm:
13342#
13343fintrz_norm:
13344	fmov.l		&0x0,%fpsr		# clear FPSR
13345
13346	fintrz.x	SRC(%a0),%fp0		# execute fintrz
13347
13348	fmov.l		%fpsr,%d0		# save FPSR
13349	or.l		%d0,USER_FPSR(%a6)	# set exception bits
13350
13351	rts
13352
13353#
13354# input is not normalized; what is it?
13355#
13356fintrz_not_norm:
13357	cmpi.b		%d1,&ZERO		# weed out ZERO
13358	beq.b		fintrz_zero
13359	cmpi.b		%d1,&INF		# weed out INF
13360	beq.b		fintrz_inf
13361	cmpi.b		%d1,&DENORM		# weed out DENORM
13362	beq.b		fintrz_denorm
13363	cmpi.b		%d1,&SNAN		# weed out SNAN
13364	beq.l		res_snan_1op
13365	bra.l		res_qnan_1op		# weed out QNAN
13366
13367#
13368# Denorm:
13369#
13370# for DENORMs, the result will be (+/-)ZERO.
13371# also, the INEX2 and AINEX exception bits will be set.
13372# so, we could either set these manually or force the DENORM
13373# to a very small NORM and ship it to the NORM routine.
13374# I do the latter.
13375#
13376fintrz_denorm:
13377	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13378	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
13379	lea		FP_SCR0(%a6),%a0
13380	bra.b		fintrz_norm
13381
13382#
13383# Zero:
13384#
13385fintrz_zero:
13386	tst.b		SRC_EX(%a0)		# is ZERO negative?
13387	bmi.b		fintrz_zero_m		# yes
13388fintrz_zero_p:
13389	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
13390	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13391	rts
13392fintrz_zero_m:
13393	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
13394	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13395	rts
13396
13397#
13398# Infinity:
13399#
13400fintrz_inf:
13401	fmovm.x		SRC(%a0),&0x80		# return result in fp0
13402	tst.b		SRC_EX(%a0)		# is INF negative?
13403	bmi.b		fintrz_inf_m		# yes
13404fintrz_inf_p:
13405	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13406	rts
13407fintrz_inf_m:
13408	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13409	rts
13410
13411#########################################################################
13412# XDEF ****************************************************************	#
13413#	fabs():  emulates the fabs instruction				#
13414#	fsabs(): emulates the fsabs instruction				#
13415#	fdabs(): emulates the fdabs instruction				#
13416#									#
13417# XREF **************************************************************** #
13418#	norm() - normalize denorm mantissa to provide EXOP		#
13419#	scale_to_zero_src() - make exponent. = 0; get scale factor	#
13420#	unf_res() - calculate underflow result				#
13421#	ovf_res() - calculate overflow result				#
13422#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13423#									#
13424# INPUT *************************************************************** #
13425#	a0 = pointer to extended precision source operand		#
13426#	d0 = rnd precision/mode						#
13427#									#
13428# OUTPUT ************************************************************** #
13429#	fp0 = result							#
13430#	fp1 = EXOP (if exception occurred)				#
13431#									#
13432# ALGORITHM ***********************************************************	#
13433#	Handle NANs, infinities, and zeroes as special cases. Divide	#
13434# norms into extended, single, and double precision.			#
13435#	Simply clear sign for extended precision norm. Ext prec denorm	#
13436# gets an EXOP created for it since it's an underflow.			#
13437#	Double and single precision can overflow and underflow. First,	#
13438# scale the operand such that the exponent is zero. Perform an "fabs"	#
13439# using the correct rnd mode/prec. Check to see if the original		#
13440# exponent would take an exception. If so, use unf_res() or ovf_res()	#
13441# to calculate the default result. Also, create the EXOP for the	#
13442# exceptional case. If no exception should occur, insert the correct	#
13443# result exponent and return.						#
13444#	Unnorms don't pass through here.				#
13445#									#
13446#########################################################################
13447
13448	global		fsabs
13449fsabs:
13450	andi.b		&0x30,%d0		# clear rnd prec
13451	ori.b		&s_mode*0x10,%d0	# insert sgl precision
13452	bra.b		fabs
13453
13454	global		fdabs
13455fdabs:
13456	andi.b		&0x30,%d0		# clear rnd prec
13457	ori.b		&d_mode*0x10,%d0	# insert dbl precision
13458
13459	global		fabs
13460fabs:
13461	mov.l		%d0,L_SCR3(%a6)		# store rnd info
13462	mov.b		STAG(%a6),%d1
13463	bne.w		fabs_not_norm		# optimize on non-norm input
13464
13465#
13466# ABSOLUTE VALUE: norms and denorms ONLY!
13467#
13468fabs_norm:
13469	andi.b		&0xc0,%d0		# is precision extended?
13470	bne.b		fabs_not_ext		# no; go handle sgl or dbl
13471
13472#
13473# precision selected is extended. so...we can not get an underflow
13474# or overflow because of rounding to the correct precision. so...
13475# skip the scaling and unscaling...
13476#
13477	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13478	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13479	mov.w		SRC_EX(%a0),%d1
13480	bclr		&15,%d1			# force absolute value
13481	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
13482	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
13483	rts
13484
13485#
13486# for an extended precision DENORM, the UNFL exception bit is set
13487# the accrued bit is NOT set in this instance(no inexactness!)
13488#
13489fabs_denorm:
13490	andi.b		&0xc0,%d0		# is precision extended?
13491	bne.b		fabs_not_ext		# no
13492
13493	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13494
13495	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13496	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13497	mov.w		SRC_EX(%a0),%d0
13498	bclr		&15,%d0			# clear sign
13499	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
13500
13501	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
13502
13503	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
13504	bne.b		fabs_ext_unfl_ena
13505	rts
13506
13507#
13508# the input is an extended DENORM and underflow is enabled in the FPCR.
13509# normalize the mantissa and add the bias of 0x6000 to the resulting negative
13510# exponent and insert back into the operand.
13511#
13512fabs_ext_unfl_ena:
13513	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
13514	bsr.l		norm			# normalize result
13515	neg.w		%d0			# new exponent = -(shft val)
13516	addi.w		&0x6000,%d0		# add new bias to exponent
13517	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
13518	andi.w		&0x8000,%d1		# keep old sign
13519	andi.w		&0x7fff,%d0		# clear sign position
13520	or.w		%d1,%d0			# concat old sign, new exponent
13521	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
13522	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13523	rts
13524
13525#
13526# operand is either single or double
13527#
13528fabs_not_ext:
13529	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
13530	bne.b		fabs_dbl
13531
13532#
13533# operand is to be rounded to single precision
13534#
13535fabs_sgl:
13536	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13537	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13538	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13539	bsr.l		scale_to_zero_src	# calculate scale factor
13540
13541	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
13542	bge.w		fabs_sd_unfl		# yes; go handle underflow
13543	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
13544	beq.w		fabs_sd_may_ovfl	# maybe; go check
13545	blt.w		fabs_sd_ovfl		# yes; go handle overflow
13546
13547#
13548# operand will NOT overflow or underflow when moved in to the fp reg file
13549#
13550fabs_sd_normal:
13551	fmov.l		&0x0,%fpsr		# clear FPSR
13552	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13553
13554	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13555
13556	fmov.l		%fpsr,%d1		# save FPSR
13557	fmov.l		&0x0,%fpcr		# clear FPCR
13558
13559	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13560
13561fabs_sd_normal_exit:
13562	mov.l		%d2,-(%sp)		# save d2
13563	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
13564	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
13565	mov.l		%d1,%d2			# make a copy
13566	andi.l		&0x7fff,%d1		# strip sign
13567	sub.l		%d0,%d1			# add scale factor
13568	andi.w		&0x8000,%d2		# keep old sign
13569	or.w		%d1,%d2			# concat old sign,new exp
13570	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
13571	mov.l		(%sp)+,%d2		# restore d2
13572	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
13573	rts
13574
13575#
13576# operand is to be rounded to double precision
13577#
13578fabs_dbl:
13579	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13580	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13581	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13582	bsr.l		scale_to_zero_src	# calculate scale factor
13583
13584	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
13585	bge.b		fabs_sd_unfl		# yes; go handle underflow
13586	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
13587	beq.w		fabs_sd_may_ovfl	# maybe; go check
13588	blt.w		fabs_sd_ovfl		# yes; go handle overflow
13589	bra.w		fabs_sd_normal		# no; ho handle normalized op
13590
13591#
13592# operand WILL underflow when moved in to the fp register file
13593#
13594fabs_sd_unfl:
13595	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13596
13597	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
13598
13599# if underflow or inexact is enabled, go calculate EXOP first.
13600	mov.b		FPCR_ENABLE(%a6),%d1
13601	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
13602	bne.b		fabs_sd_unfl_ena	# yes
13603
13604fabs_sd_unfl_dis:
13605	lea		FP_SCR0(%a6),%a0	# pass: result addr
13606	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
13607	bsr.l		unf_res			# calculate default result
13608	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
13609	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
13610	rts
13611
13612#
13613# operand will underflow AND underflow is enabled.
13614# Therefore, we must return the result rounded to extended precision.
13615#
13616fabs_sd_unfl_ena:
13617	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13618	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13619	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
13620
13621	mov.l		%d2,-(%sp)		# save d2
13622	mov.l		%d1,%d2			# make a copy
13623	andi.l		&0x7fff,%d1		# strip sign
13624	andi.w		&0x8000,%d2		# keep old sign
13625	sub.l		%d0,%d1			# subtract scale factor
13626	addi.l		&0x6000,%d1		# add new bias
13627	andi.w		&0x7fff,%d1
13628	or.w		%d2,%d1			# concat new sign,new exp
13629	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
13630	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
13631	mov.l		(%sp)+,%d2		# restore d2
13632	bra.b		fabs_sd_unfl_dis
13633
13634#
13635# operand WILL overflow.
13636#
13637fabs_sd_ovfl:
13638	fmov.l		&0x0,%fpsr		# clear FPSR
13639	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13640
13641	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13642
13643	fmov.l		&0x0,%fpcr		# clear FPCR
13644	fmov.l		%fpsr,%d1		# save FPSR
13645
13646	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13647
13648fabs_sd_ovfl_tst:
13649	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13650
13651	mov.b		FPCR_ENABLE(%a6),%d1
13652	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
13653	bne.b		fabs_sd_ovfl_ena	# yes
13654
13655#
13656# OVFL is not enabled; therefore, we must create the default result by
13657# calling ovf_res().
13658#
13659fabs_sd_ovfl_dis:
13660	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
13661	sne		%d1			# set sign param accordingly
13662	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
13663	bsr.l		ovf_res			# calculate default result
13664	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
13665	fmovm.x		(%a0),&0x80		# return default result in fp0
13666	rts
13667
13668#
13669# OVFL is enabled.
13670# the INEX2 bit has already been updated by the round to the correct precision.
13671# now, round to extended(and don't alter the FPSR).
13672#
13673fabs_sd_ovfl_ena:
13674	mov.l		%d2,-(%sp)		# save d2
13675	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
13676	mov.l		%d1,%d2			# make a copy
13677	andi.l		&0x7fff,%d1		# strip sign
13678	andi.w		&0x8000,%d2		# keep old sign
13679	sub.l		%d0,%d1			# add scale factor
13680	subi.l		&0x6000,%d1		# subtract bias
13681	andi.w		&0x7fff,%d1
13682	or.w		%d2,%d1			# concat sign,exp
13683	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
13684	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13685	mov.l		(%sp)+,%d2		# restore d2
13686	bra.b		fabs_sd_ovfl_dis
13687
13688#
13689# the move in MAY underflow. so...
13690#
13691fabs_sd_may_ovfl:
13692	fmov.l		&0x0,%fpsr		# clear FPSR
13693	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13694
13695	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13696
13697	fmov.l		%fpsr,%d1		# save status
13698	fmov.l		&0x0,%fpcr		# clear FPCR
13699
13700	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13701
13702	fabs.x		%fp0,%fp1		# make a copy of result
13703	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
13704	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
13705
13706# no, it didn't overflow; we have correct result
13707	bra.w		fabs_sd_normal_exit
13708
13709##########################################################################
13710
13711#
13712# input is not normalized; what is it?
13713#
13714fabs_not_norm:
13715	cmpi.b		%d1,&DENORM		# weed out DENORM
13716	beq.w		fabs_denorm
13717	cmpi.b		%d1,&SNAN		# weed out SNAN
13718	beq.l		res_snan_1op
13719	cmpi.b		%d1,&QNAN		# weed out QNAN
13720	beq.l		res_qnan_1op
13721
13722	fabs.x		SRC(%a0),%fp0		# force absolute value
13723
13724	cmpi.b		%d1,&INF		# weed out INF
13725	beq.b		fabs_inf
13726fabs_zero:
13727	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13728	rts
13729fabs_inf:
13730	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13731	rts
13732
13733#########################################################################
13734# XDEF ****************************************************************	#
13735#	fcmp(): fp compare op routine					#
13736#									#
13737# XREF ****************************************************************	#
13738#	res_qnan() - return QNAN result					#
13739#	res_snan() - return SNAN result					#
13740#									#
13741# INPUT ***************************************************************	#
13742#	a0 = pointer to extended precision source operand		#
13743#	a1 = pointer to extended precision destination operand		#
13744#	d0 = round prec/mode						#
13745#									#
13746# OUTPUT ************************************************************** #
13747#	None								#
13748#									#
13749# ALGORITHM ***********************************************************	#
13750#	Handle NANs and denorms as special cases. For everything else,	#
13751# just use the actual fcmp instruction to produce the correct condition	#
13752# codes.								#
13753#									#
13754#########################################################################
13755
13756	global		fcmp
13757fcmp:
13758	clr.w		%d1
13759	mov.b		DTAG(%a6),%d1
13760	lsl.b		&0x3,%d1
13761	or.b		STAG(%a6),%d1
13762	bne.b		fcmp_not_norm		# optimize on non-norm input
13763
13764#
13765# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
13766#
13767fcmp_norm:
13768	fmovm.x		DST(%a1),&0x80		# load dst op
13769
13770	fcmp.x		%fp0,SRC(%a0)		# do compare
13771
13772	fmov.l		%fpsr,%d0		# save FPSR
13773	rol.l		&0x8,%d0		# extract ccode bits
13774	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
13775
13776	rts
13777
13778#
13779# fcmp: inputs are not both normalized; what are they?
13780#
13781fcmp_not_norm:
13782	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
13783	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
13784
13785	swbeg		&48
13786tbl_fcmp_op:
13787	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
13788	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
13789	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
13790	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
13791	short		fcmp_nrm_dnrm	- tbl_fcmp_op # NORM - DENORM
13792	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
13793	short		tbl_fcmp_op	- tbl_fcmp_op #
13794	short		tbl_fcmp_op	- tbl_fcmp_op #
13795
13796	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
13797	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
13798	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
13799	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
13800	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
13801	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
13802	short		tbl_fcmp_op	- tbl_fcmp_op #
13803	short		tbl_fcmp_op	- tbl_fcmp_op #
13804
13805	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
13806	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
13807	short		fcmp_norm	- tbl_fcmp_op # INF - INF
13808	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
13809	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
13810	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
13811	short		tbl_fcmp_op	- tbl_fcmp_op #
13812	short		tbl_fcmp_op	- tbl_fcmp_op #
13813
13814	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
13815	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
13816	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
13817	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
13818	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
13819	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
13820	short		tbl_fcmp_op	- tbl_fcmp_op #
13821	short		tbl_fcmp_op	- tbl_fcmp_op #
13822
13823	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
13824	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
13825	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
13826	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
13827	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
13828	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
13829	short		tbl_fcmp_op	- tbl_fcmp_op #
13830	short		tbl_fcmp_op	- tbl_fcmp_op #
13831
13832	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
13833	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
13834	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
13835	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
13836	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
13837	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
13838	short		tbl_fcmp_op	- tbl_fcmp_op #
13839	short		tbl_fcmp_op	- tbl_fcmp_op #
13840
13841# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
13842# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
13843fcmp_res_qnan:
13844	bsr.l		res_qnan
13845	andi.b		&0xf7,FPSR_CC(%a6)
13846	rts
13847fcmp_res_snan:
13848	bsr.l		res_snan
13849	andi.b		&0xf7,FPSR_CC(%a6)
13850	rts
13851
13852#
13853# DENORMs are a little more difficult.
13854# If you have a 2 DENORMs, then you can just force the j-bit to a one
13855# and use the fcmp_norm routine.
13856# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
13857# and use the fcmp_norm routine.
13858# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
13859# But with a DENORM and a NORM of the same sign, the neg bit is set if the
13860# (1) signs are (+) and the DENORM is the dst or
13861# (2) signs are (-) and the DENORM is the src
13862#
13863
13864fcmp_dnrm_s:
13865	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13866	mov.l		SRC_HI(%a0),%d0
13867	bset		&31,%d0			# DENORM src; make into small norm
13868	mov.l		%d0,FP_SCR0_HI(%a6)
13869	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13870	lea		FP_SCR0(%a6),%a0
13871	bra.w		fcmp_norm
13872
13873fcmp_dnrm_d:
13874	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
13875	mov.l		DST_HI(%a1),%d0
13876	bset		&31,%d0			# DENORM src; make into small norm
13877	mov.l		%d0,FP_SCR0_HI(%a6)
13878	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
13879	lea		FP_SCR0(%a6),%a1
13880	bra.w		fcmp_norm
13881
13882fcmp_dnrm_sd:
13883	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
13884	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13885	mov.l		DST_HI(%a1),%d0
13886	bset		&31,%d0			# DENORM dst; make into small norm
13887	mov.l		%d0,FP_SCR1_HI(%a6)
13888	mov.l		SRC_HI(%a0),%d0
13889	bset		&31,%d0			# DENORM dst; make into small norm
13890	mov.l		%d0,FP_SCR0_HI(%a6)
13891	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
13892	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13893	lea		FP_SCR1(%a6),%a1
13894	lea		FP_SCR0(%a6),%a0
13895	bra.w		fcmp_norm
13896
13897fcmp_nrm_dnrm:
13898	mov.b		SRC_EX(%a0),%d0		# determine if like signs
13899	mov.b		DST_EX(%a1),%d1
13900	eor.b		%d0,%d1
13901	bmi.w		fcmp_dnrm_s
13902
13903# signs are the same, so must determine the answer ourselves.
13904	tst.b		%d0			# is src op negative?
13905	bmi.b		fcmp_nrm_dnrm_m		# yes
13906	rts
13907fcmp_nrm_dnrm_m:
13908	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13909	rts
13910
13911fcmp_dnrm_nrm:
13912	mov.b		SRC_EX(%a0),%d0		# determine if like signs
13913	mov.b		DST_EX(%a1),%d1
13914	eor.b		%d0,%d1
13915	bmi.w		fcmp_dnrm_d
13916
13917# signs are the same, so must determine the answer ourselves.
13918	tst.b		%d0			# is src op negative?
13919	bpl.b		fcmp_dnrm_nrm_m		# no
13920	rts
13921fcmp_dnrm_nrm_m:
13922	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13923	rts
13924
13925#########################################################################
13926# XDEF ****************************************************************	#
13927#	fsglmul(): emulates the fsglmul instruction			#
13928#									#
13929# XREF ****************************************************************	#
13930#	scale_to_zero_src() - scale src exponent to zero		#
13931#	scale_to_zero_dst() - scale dst exponent to zero		#
13932#	unf_res4() - return default underflow result for sglop		#
13933#	ovf_res() - return default overflow result			#
13934#	res_qnan() - return QNAN result					#
13935#	res_snan() - return SNAN result					#
13936#									#
13937# INPUT ***************************************************************	#
13938#	a0 = pointer to extended precision source operand		#
13939#	a1 = pointer to extended precision destination operand		#
13940#	d0  rnd prec,mode						#
13941#									#
13942# OUTPUT **************************************************************	#
13943#	fp0 = result							#
13944#	fp1 = EXOP (if exception occurred)				#
13945#									#
13946# ALGORITHM ***********************************************************	#
13947#	Handle NANs, infinities, and zeroes as special cases. Divide	#
13948# norms/denorms into ext/sgl/dbl precision.				#
13949#	For norms/denorms, scale the exponents such that a multiply	#
13950# instruction won't cause an exception. Use the regular fsglmul to	#
13951# compute a result. Check if the regular operands would have taken	#
13952# an exception. If so, return the default overflow/underflow result	#
13953# and return the EXOP if exceptions are enabled. Else, scale the	#
13954# result operand to the proper exponent.				#
13955#									#
13956#########################################################################
13957
13958	global		fsglmul
13959fsglmul:
13960	mov.l		%d0,L_SCR3(%a6)		# store rnd info
13961
13962	clr.w		%d1
13963	mov.b		DTAG(%a6),%d1
13964	lsl.b		&0x3,%d1
13965	or.b		STAG(%a6),%d1
13966
13967	bne.w		fsglmul_not_norm	# optimize on non-norm input
13968
13969fsglmul_norm:
13970	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
13971	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
13972	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
13973
13974	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13975	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13976	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13977
13978	bsr.l		scale_to_zero_src	# scale exponent
13979	mov.l		%d0,-(%sp)		# save scale factor 1
13980
13981	bsr.l		scale_to_zero_dst	# scale dst exponent
13982
13983	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
13984
13985	cmpi.l		%d0,&0x3fff-0x7ffe	# would result ovfl?
13986	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
13987	blt.w		fsglmul_ovfl		# result will overflow
13988
13989	cmpi.l		%d0,&0x3fff+0x0001	# would result unfl?
13990	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
13991	bgt.w		fsglmul_unfl		# result will underflow
13992
13993fsglmul_normal:
13994	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
13995
13996	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13997	fmov.l		&0x0,%fpsr		# clear FPSR
13998
13999	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14000
14001	fmov.l		%fpsr,%d1		# save status
14002	fmov.l		&0x0,%fpcr		# clear FPCR
14003
14004	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14005
14006fsglmul_normal_exit:
14007	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14008	mov.l		%d2,-(%sp)		# save d2
14009	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
14010	mov.l		%d1,%d2			# make a copy
14011	andi.l		&0x7fff,%d1		# strip sign
14012	andi.w		&0x8000,%d2		# keep old sign
14013	sub.l		%d0,%d1			# add scale factor
14014	or.w		%d2,%d1			# concat old sign,new exp
14015	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14016	mov.l		(%sp)+,%d2		# restore d2
14017	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
14018	rts
14019
14020fsglmul_ovfl:
14021	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14022
14023	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14024	fmov.l		&0x0,%fpsr		# clear FPSR
14025
14026	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14027
14028	fmov.l		%fpsr,%d1		# save status
14029	fmov.l		&0x0,%fpcr		# clear FPCR
14030
14031	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14032
14033fsglmul_ovfl_tst:
14034
14035# save setting this until now because this is where fsglmul_may_ovfl may jump in
14036	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
14037
14038	mov.b		FPCR_ENABLE(%a6),%d1
14039	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14040	bne.b		fsglmul_ovfl_ena	# yes
14041
14042fsglmul_ovfl_dis:
14043	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
14044	sne		%d1			# set sign param accordingly
14045	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14046	andi.b		&0x30,%d0		# force prec = ext
14047	bsr.l		ovf_res			# calculate default result
14048	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
14049	fmovm.x		(%a0),&0x80		# return default result in fp0
14050	rts
14051
14052fsglmul_ovfl_ena:
14053	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
14054
14055	mov.l		%d2,-(%sp)		# save d2
14056	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14057	mov.l		%d1,%d2			# make a copy
14058	andi.l		&0x7fff,%d1		# strip sign
14059	sub.l		%d0,%d1			# add scale factor
14060	subi.l		&0x6000,%d1		# subtract bias
14061	andi.w		&0x7fff,%d1
14062	andi.w		&0x8000,%d2		# keep old sign
14063	or.w		%d2,%d1			# concat old sign,new exp
14064	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14065	mov.l		(%sp)+,%d2		# restore d2
14066	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14067	bra.b		fsglmul_ovfl_dis
14068
14069fsglmul_may_ovfl:
14070	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14071
14072	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14073	fmov.l		&0x0,%fpsr		# clear FPSR
14074
14075	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14076
14077	fmov.l		%fpsr,%d1		# save status
14078	fmov.l		&0x0,%fpcr		# clear FPCR
14079
14080	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14081
14082	fabs.x		%fp0,%fp1		# make a copy of result
14083	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
14084	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
14085
14086# no, it didn't overflow; we have correct result
14087	bra.w		fsglmul_normal_exit
14088
14089fsglmul_unfl:
14090	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14091
14092	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14093
14094	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14095	fmov.l		&0x0,%fpsr		# clear FPSR
14096
14097	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14098
14099	fmov.l		%fpsr,%d1		# save status
14100	fmov.l		&0x0,%fpcr		# clear FPCR
14101
14102	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14103
14104	mov.b		FPCR_ENABLE(%a6),%d1
14105	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14106	bne.b		fsglmul_unfl_ena	# yes
14107
14108fsglmul_unfl_dis:
14109	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14110
14111	lea		FP_SCR0(%a6),%a0	# pass: result addr
14112	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14113	bsr.l		unf_res4		# calculate default result
14114	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14115	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14116	rts
14117
14118#
14119# UNFL is enabled.
14120#
14121fsglmul_unfl_ena:
14122	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14123
14124	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14125	fmov.l		&0x0,%fpsr		# clear FPSR
14126
14127	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
14128
14129	fmov.l		&0x0,%fpcr		# clear FPCR
14130
14131	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14132	mov.l		%d2,-(%sp)		# save d2
14133	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14134	mov.l		%d1,%d2			# make a copy
14135	andi.l		&0x7fff,%d1		# strip sign
14136	andi.w		&0x8000,%d2		# keep old sign
14137	sub.l		%d0,%d1			# add scale factor
14138	addi.l		&0x6000,%d1		# add bias
14139	andi.w		&0x7fff,%d1
14140	or.w		%d2,%d1			# concat old sign,new exp
14141	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14142	mov.l		(%sp)+,%d2		# restore d2
14143	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14144	bra.w		fsglmul_unfl_dis
14145
14146fsglmul_may_unfl:
14147	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14148
14149	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14150	fmov.l		&0x0,%fpsr		# clear FPSR
14151
14152	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14153
14154	fmov.l		%fpsr,%d1		# save status
14155	fmov.l		&0x0,%fpcr		# clear FPCR
14156
14157	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14158
14159	fabs.x		%fp0,%fp1		# make a copy of result
14160	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
14161	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
14162	fblt.w		fsglmul_unfl		# yes; underflow occurred
14163
14164#
14165# we still don't know if underflow occurred. result is ~ equal to 2. but,
14166# we don't know if the result was an underflow that rounded up to a 2 or
14167# a normalized number that rounded down to a 2. so, redo the entire operation
14168# using RZ as the rounding mode to see what the pre-rounded result is.
14169# this case should be relatively rare.
14170#
14171	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
14172
14173	mov.l		L_SCR3(%a6),%d1
14174	andi.b		&0xc0,%d1		# keep rnd prec
14175	ori.b		&rz_mode*0x10,%d1	# insert RZ
14176
14177	fmov.l		%d1,%fpcr		# set FPCR
14178	fmov.l		&0x0,%fpsr		# clear FPSR
14179
14180	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
14181
14182	fmov.l		&0x0,%fpcr		# clear FPCR
14183	fabs.x		%fp1			# make absolute value
14184	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
14185	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
14186	bra.w		fsglmul_unfl		# yes, underflow occurred
14187
14188##############################################################################
14189
14190#
14191# Single Precision Multiply: inputs are not both normalized; what are they?
14192#
14193fsglmul_not_norm:
14194	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
14195	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
14196
14197	swbeg		&48
14198tbl_fsglmul_op:
14199	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
14200	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
14201	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
14202	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
14203	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
14204	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
14205	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14206	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14207
14208	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
14209	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
14210	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
14211	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
14212	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
14213	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
14214	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14215	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14216
14217	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
14218	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
14219	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
14220	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
14221	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
14222	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
14223	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14224	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14225
14226	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
14227	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
14228	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
14229	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
14230	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
14231	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
14232	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14233	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14234
14235	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
14236	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
14237	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
14238	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
14239	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
14240	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
14241	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14242	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14243
14244	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
14245	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
14246	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
14247	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
14248	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
14249	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
14250	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14251	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14252
14253fsglmul_res_operr:
14254	bra.l		res_operr
14255fsglmul_res_snan:
14256	bra.l		res_snan
14257fsglmul_res_qnan:
14258	bra.l		res_qnan
14259fsglmul_zero:
14260	bra.l		fmul_zero
14261fsglmul_inf_src:
14262	bra.l		fmul_inf_src
14263fsglmul_inf_dst:
14264	bra.l		fmul_inf_dst
14265
14266#########################################################################
14267# XDEF ****************************************************************	#
14268#	fsgldiv(): emulates the fsgldiv instruction			#
14269#									#
14270# XREF ****************************************************************	#
14271#	scale_to_zero_src() - scale src exponent to zero		#
14272#	scale_to_zero_dst() - scale dst exponent to zero		#
14273#	unf_res4() - return default underflow result for sglop		#
14274#	ovf_res() - return default overflow result			#
14275#	res_qnan() - return QNAN result					#
14276#	res_snan() - return SNAN result					#
14277#									#
14278# INPUT ***************************************************************	#
14279#	a0 = pointer to extended precision source operand		#
14280#	a1 = pointer to extended precision destination operand		#
14281#	d0  rnd prec,mode						#
14282#									#
14283# OUTPUT **************************************************************	#
14284#	fp0 = result							#
14285#	fp1 = EXOP (if exception occurred)				#
14286#									#
14287# ALGORITHM ***********************************************************	#
14288#	Handle NANs, infinities, and zeroes as special cases. Divide	#
14289# norms/denorms into ext/sgl/dbl precision.				#
14290#	For norms/denorms, scale the exponents such that a divide	#
14291# instruction won't cause an exception. Use the regular fsgldiv to	#
14292# compute a result. Check if the regular operands would have taken	#
14293# an exception. If so, return the default overflow/underflow result	#
14294# and return the EXOP if exceptions are enabled. Else, scale the	#
14295# result operand to the proper exponent.				#
14296#									#
14297#########################################################################
14298
14299	global		fsgldiv
14300fsgldiv:
14301	mov.l		%d0,L_SCR3(%a6)		# store rnd info
14302
14303	clr.w		%d1
14304	mov.b		DTAG(%a6),%d1
14305	lsl.b		&0x3,%d1
14306	or.b		STAG(%a6),%d1		# combine src tags
14307
14308	bne.w		fsgldiv_not_norm	# optimize on non-norm input
14309
14310#
14311# DIVIDE: NORMs and DENORMs ONLY!
14312#
14313fsgldiv_norm:
14314	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
14315	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
14316	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
14317
14318	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
14319	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
14320	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
14321
14322	bsr.l		scale_to_zero_src	# calculate scale factor 1
14323	mov.l		%d0,-(%sp)		# save scale factor 1
14324
14325	bsr.l		scale_to_zero_dst	# calculate scale factor 2
14326
14327	neg.l		(%sp)			# S.F. = scale1 - scale2
14328	add.l		%d0,(%sp)
14329
14330	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
14331	lsr.b		&0x6,%d1
14332	mov.l		(%sp)+,%d0
14333	cmpi.l		%d0,&0x3fff-0x7ffe
14334	ble.w		fsgldiv_may_ovfl
14335
14336	cmpi.l		%d0,&0x3fff-0x0000	# will result underflow?
14337	beq.w		fsgldiv_may_unfl	# maybe
14338	bgt.w		fsgldiv_unfl		# yes; go handle underflow
14339
14340fsgldiv_normal:
14341	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14342
14343	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
14344	fmov.l		&0x0,%fpsr		# clear FPSR
14345
14346	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
14347
14348	fmov.l		%fpsr,%d1		# save FPSR
14349	fmov.l		&0x0,%fpcr		# clear FPCR
14350
14351	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14352
14353fsgldiv_normal_exit:
14354	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
14355	mov.l		%d2,-(%sp)		# save d2
14356	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
14357	mov.l		%d1,%d2			# make a copy
14358	andi.l		&0x7fff,%d1		# strip sign
14359	andi.w		&0x8000,%d2		# keep old sign
14360	sub.l		%d0,%d1			# add scale factor
14361	or.w		%d2,%d1			# concat old sign,new exp
14362	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14363	mov.l		(%sp)+,%d2		# restore d2
14364	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
14365	rts
14366
14367fsgldiv_may_ovfl:
14368	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14369
14370	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14371	fmov.l		&0x0,%fpsr		# set FPSR
14372
14373	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
14374
14375	fmov.l		%fpsr,%d1
14376	fmov.l		&0x0,%fpcr
14377
14378	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
14379
14380	fmovm.x		&0x01,-(%sp)		# save result to stack
14381	mov.w		(%sp),%d1		# fetch new exponent
14382	add.l		&0xc,%sp		# clear result
14383	andi.l		&0x7fff,%d1		# strip sign
14384	sub.l		%d0,%d1			# add scale factor
14385	cmp.l		%d1,&0x7fff		# did divide overflow?
14386	blt.b		fsgldiv_normal_exit
14387
14388fsgldiv_ovfl_tst:
14389	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
14390
14391	mov.b		FPCR_ENABLE(%a6),%d1
14392	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14393	bne.b		fsgldiv_ovfl_ena	# yes
14394
14395fsgldiv_ovfl_dis:
14396	btst		&neg_bit,FPSR_CC(%a6)	# is result negative
14397	sne		%d1			# set sign param accordingly
14398	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14399	andi.b		&0x30,%d0		# kill precision
14400	bsr.l		ovf_res			# calculate default result
14401	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
14402	fmovm.x		(%a0),&0x80		# return default result in fp0
14403	rts
14404
14405fsgldiv_ovfl_ena:
14406	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
14407
14408	mov.l		%d2,-(%sp)		# save d2
14409	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14410	mov.l		%d1,%d2			# make a copy
14411	andi.l		&0x7fff,%d1		# strip sign
14412	andi.w		&0x8000,%d2		# keep old sign
14413	sub.l		%d0,%d1			# add scale factor
14414	subi.l		&0x6000,%d1		# subtract new bias
14415	andi.w		&0x7fff,%d1		# clear ms bit
14416	or.w		%d2,%d1			# concat old sign,new exp
14417	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14418	mov.l		(%sp)+,%d2		# restore d2
14419	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14420	bra.b		fsgldiv_ovfl_dis
14421
14422fsgldiv_unfl:
14423	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14424
14425	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14426
14427	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14428	fmov.l		&0x0,%fpsr		# clear FPSR
14429
14430	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
14431
14432	fmov.l		%fpsr,%d1		# save status
14433	fmov.l		&0x0,%fpcr		# clear FPCR
14434
14435	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14436
14437	mov.b		FPCR_ENABLE(%a6),%d1
14438	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14439	bne.b		fsgldiv_unfl_ena	# yes
14440
14441fsgldiv_unfl_dis:
14442	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14443
14444	lea		FP_SCR0(%a6),%a0	# pass: result addr
14445	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14446	bsr.l		unf_res4		# calculate default result
14447	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14448	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14449	rts
14450
14451#
14452# UNFL is enabled.
14453#
14454fsgldiv_unfl_ena:
14455	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14456
14457	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14458	fmov.l		&0x0,%fpsr		# clear FPSR
14459
14460	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
14461
14462	fmov.l		&0x0,%fpcr		# clear FPCR
14463
14464	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14465	mov.l		%d2,-(%sp)		# save d2
14466	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14467	mov.l		%d1,%d2			# make a copy
14468	andi.l		&0x7fff,%d1		# strip sign
14469	andi.w		&0x8000,%d2		# keep old sign
14470	sub.l		%d0,%d1			# add scale factor
14471	addi.l		&0x6000,%d1		# add bias
14472	andi.w		&0x7fff,%d1		# clear top bit
14473	or.w		%d2,%d1			# concat old sign, new exp
14474	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14475	mov.l		(%sp)+,%d2		# restore d2
14476	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14477	bra.b		fsgldiv_unfl_dis
14478
14479#
14480# the divide operation MAY underflow:
14481#
14482fsgldiv_may_unfl:
14483	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14484
14485	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14486	fmov.l		&0x0,%fpsr		# clear FPSR
14487
14488	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
14489
14490	fmov.l		%fpsr,%d1		# save status
14491	fmov.l		&0x0,%fpcr		# clear FPCR
14492
14493	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14494
14495	fabs.x		%fp0,%fp1		# make a copy of result
14496	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
14497	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
14498	fblt.w		fsgldiv_unfl		# yes; underflow occurred
14499
14500#
14501# we still don't know if underflow occurred. result is ~ equal to 1. but,
14502# we don't know if the result was an underflow that rounded up to a 1
14503# or a normalized number that rounded down to a 1. so, redo the entire
14504# operation using RZ as the rounding mode to see what the pre-rounded
14505# result is. this case should be relatively rare.
14506#
14507	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
14508
14509	clr.l		%d1			# clear scratch register
14510	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
14511
14512	fmov.l		%d1,%fpcr		# set FPCR
14513	fmov.l		&0x0,%fpsr		# clear FPSR
14514
14515	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
14516
14517	fmov.l		&0x0,%fpcr		# clear FPCR
14518	fabs.x		%fp1			# make absolute value
14519	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
14520	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
14521	bra.w		fsgldiv_unfl		# yes; underflow occurred
14522
14523############################################################################
14524
14525#
14526# Divide: inputs are not both normalized; what are they?
14527#
14528fsgldiv_not_norm:
14529	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
14530	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
14531
14532	swbeg		&48
14533tbl_fsgldiv_op:
14534	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
14535	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
14536	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
14537	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
14538	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
14539	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
14540	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14541	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14542
14543	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
14544	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
14545	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
14546	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
14547	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
14548	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
14549	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14550	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14551
14552	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
14553	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
14554	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
14555	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
14556	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
14557	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
14558	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14559	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14560
14561	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
14562	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
14563	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
14564	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
14565	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
14566	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
14567	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14568	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14569
14570	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
14571	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
14572	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
14573	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
14574	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
14575	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
14576	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14577	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14578
14579	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
14580	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
14581	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
14582	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
14583	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
14584	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
14585	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14586	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14587
14588fsgldiv_res_qnan:
14589	bra.l		res_qnan
14590fsgldiv_res_snan:
14591	bra.l		res_snan
14592fsgldiv_res_operr:
14593	bra.l		res_operr
14594fsgldiv_inf_load:
14595	bra.l		fdiv_inf_load
14596fsgldiv_zero_load:
14597	bra.l		fdiv_zero_load
14598fsgldiv_inf_dst:
14599	bra.l		fdiv_inf_dst
14600
14601#########################################################################
14602# XDEF ****************************************************************	#
14603#	fadd(): emulates the fadd instruction				#
14604#	fsadd(): emulates the fadd instruction				#
14605#	fdadd(): emulates the fdadd instruction				#
14606#									#
14607# XREF ****************************************************************	#
14608#	addsub_scaler2() - scale the operands so they won't take exc	#
14609#	ovf_res() - return default overflow result			#
14610#	unf_res() - return default underflow result			#
14611#	res_qnan() - set QNAN result					#
14612#	res_snan() - set SNAN result					#
14613#	res_operr() - set OPERR result					#
14614#	scale_to_zero_src() - set src operand exponent equal to zero	#
14615#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
14616#									#
14617# INPUT ***************************************************************	#
14618#	a0 = pointer to extended precision source operand		#
14619#	a1 = pointer to extended precision destination operand		#
14620#									#
14621# OUTPUT **************************************************************	#
14622#	fp0 = result							#
14623#	fp1 = EXOP (if exception occurred)				#
14624#									#
14625# ALGORITHM ***********************************************************	#
14626#	Handle NANs, infinities, and zeroes as special cases. Divide	#
14627# norms into extended, single, and double precision.			#
14628#	Do addition after scaling exponents such that exception won't	#
14629# occur. Then, check result exponent to see if exception would have	#
14630# occurred. If so, return default result and maybe EXOP. Else, insert	#
14631# the correct result exponent and return. Set FPSR bits as appropriate.	#
14632#									#
14633#########################################################################
14634
14635	global		fsadd
14636fsadd:
14637	andi.b		&0x30,%d0		# clear rnd prec
14638	ori.b		&s_mode*0x10,%d0	# insert sgl prec
14639	bra.b		fadd
14640
14641	global		fdadd
14642fdadd:
14643	andi.b		&0x30,%d0		# clear rnd prec
14644	ori.b		&d_mode*0x10,%d0	# insert dbl prec
14645
14646	global		fadd
14647fadd:
14648	mov.l		%d0,L_SCR3(%a6)		# store rnd info
14649
14650	clr.w		%d1
14651	mov.b		DTAG(%a6),%d1
14652	lsl.b		&0x3,%d1
14653	or.b		STAG(%a6),%d1		# combine src tags
14654
14655	bne.w		fadd_not_norm		# optimize on non-norm input
14656
14657#
14658# ADD: norms and denorms
14659#
14660fadd_norm:
14661	bsr.l		addsub_scaler2		# scale exponents
14662
14663fadd_zero_entry:
14664	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14665
14666	fmov.l		&0x0,%fpsr		# clear FPSR
14667	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14668
14669	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14670
14671	fmov.l		&0x0,%fpcr		# clear FPCR
14672	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
14673
14674	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
14675
14676	fbeq.w		fadd_zero_exit		# if result is zero, end now
14677
14678	mov.l		%d2,-(%sp)		# save d2
14679
14680	fmovm.x		&0x01,-(%sp)		# save result to stack
14681
14682	mov.w		2+L_SCR3(%a6),%d1
14683	lsr.b		&0x6,%d1
14684
14685	mov.w		(%sp),%d2		# fetch new sign, exp
14686	andi.l		&0x7fff,%d2		# strip sign
14687	sub.l		%d0,%d2			# add scale factor
14688
14689	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
14690	bge.b		fadd_ovfl		# yes
14691
14692	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
14693	blt.w		fadd_unfl		# yes
14694	beq.w		fadd_may_unfl		# maybe; go find out
14695
14696fadd_normal:
14697	mov.w		(%sp),%d1
14698	andi.w		&0x8000,%d1		# keep sign
14699	or.w		%d2,%d1			# concat sign,new exp
14700	mov.w		%d1,(%sp)		# insert new exponent
14701
14702	fmovm.x		(%sp)+,&0x80		# return result in fp0
14703
14704	mov.l		(%sp)+,%d2		# restore d2
14705	rts
14706
14707fadd_zero_exit:
14708#	fmov.s		&0x00000000,%fp0	# return zero in fp0
14709	rts
14710
14711tbl_fadd_ovfl:
14712	long		0x7fff			# ext ovfl
14713	long		0x407f			# sgl ovfl
14714	long		0x43ff			# dbl ovfl
14715
14716tbl_fadd_unfl:
14717	long	        0x0000			# ext unfl
14718	long		0x3f81			# sgl unfl
14719	long		0x3c01			# dbl unfl
14720
14721fadd_ovfl:
14722	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
14723
14724	mov.b		FPCR_ENABLE(%a6),%d1
14725	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14726	bne.b		fadd_ovfl_ena		# yes
14727
14728	add.l		&0xc,%sp
14729fadd_ovfl_dis:
14730	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
14731	sne		%d1			# set sign param accordingly
14732	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14733	bsr.l		ovf_res			# calculate default result
14734	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
14735	fmovm.x		(%a0),&0x80		# return default result in fp0
14736	mov.l		(%sp)+,%d2		# restore d2
14737	rts
14738
14739fadd_ovfl_ena:
14740	mov.b		L_SCR3(%a6),%d1
14741	andi.b		&0xc0,%d1		# is precision extended?
14742	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
14743
14744fadd_ovfl_ena_cont:
14745	mov.w		(%sp),%d1
14746	andi.w		&0x8000,%d1		# keep sign
14747	subi.l		&0x6000,%d2		# add extra bias
14748	andi.w		&0x7fff,%d2
14749	or.w		%d2,%d1			# concat sign,new exp
14750	mov.w		%d1,(%sp)		# insert new exponent
14751
14752	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
14753	bra.b		fadd_ovfl_dis
14754
14755fadd_ovfl_ena_sd:
14756	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14757
14758	mov.l		L_SCR3(%a6),%d1
14759	andi.b		&0x30,%d1		# keep rnd mode
14760	fmov.l		%d1,%fpcr		# set FPCR
14761
14762	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14763
14764	fmov.l		&0x0,%fpcr		# clear FPCR
14765
14766	add.l		&0xc,%sp
14767	fmovm.x		&0x01,-(%sp)
14768	bra.b		fadd_ovfl_ena_cont
14769
14770fadd_unfl:
14771	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14772
14773	add.l		&0xc,%sp
14774
14775	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14776
14777	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14778	fmov.l		&0x0,%fpsr		# clear FPSR
14779
14780	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14781
14782	fmov.l		&0x0,%fpcr		# clear FPCR
14783	fmov.l		%fpsr,%d1		# save status
14784
14785	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
14786
14787	mov.b		FPCR_ENABLE(%a6),%d1
14788	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14789	bne.b		fadd_unfl_ena		# yes
14790
14791fadd_unfl_dis:
14792	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14793
14794	lea		FP_SCR0(%a6),%a0	# pass: result addr
14795	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14796	bsr.l		unf_res			# calculate default result
14797	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14798	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14799	mov.l		(%sp)+,%d2		# restore d2
14800	rts
14801
14802fadd_unfl_ena:
14803	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14804
14805	mov.l		L_SCR3(%a6),%d1
14806	andi.b		&0xc0,%d1		# is precision extended?
14807	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
14808
14809	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14810
14811fadd_unfl_ena_cont:
14812	fmov.l		&0x0,%fpsr		# clear FPSR
14813
14814	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
14815
14816	fmov.l		&0x0,%fpcr		# clear FPCR
14817
14818	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14819	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14820	mov.l		%d1,%d2			# make a copy
14821	andi.l		&0x7fff,%d1		# strip sign
14822	andi.w		&0x8000,%d2		# keep old sign
14823	sub.l		%d0,%d1			# add scale factor
14824	addi.l		&0x6000,%d1		# add new bias
14825	andi.w		&0x7fff,%d1		# clear top bit
14826	or.w		%d2,%d1			# concat sign,new exp
14827	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14828	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14829	bra.w		fadd_unfl_dis
14830
14831fadd_unfl_ena_sd:
14832	mov.l		L_SCR3(%a6),%d1
14833	andi.b		&0x30,%d1		# use only rnd mode
14834	fmov.l		%d1,%fpcr		# set FPCR
14835
14836	bra.b		fadd_unfl_ena_cont
14837
14838#
14839# result is equal to the smallest normalized number in the selected precision
14840# if the precision is extended, this result could not have come from an
14841# underflow that rounded up.
14842#
14843fadd_may_unfl:
14844	mov.l		L_SCR3(%a6),%d1
14845	andi.b		&0xc0,%d1
14846	beq.w		fadd_normal		# yes; no underflow occurred
14847
14848	mov.l		0x4(%sp),%d1		# extract hi(man)
14849	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
14850	bne.w		fadd_normal		# no; no underflow occurred
14851
14852	tst.l		0x8(%sp)		# is lo(man) = 0x0?
14853	bne.w		fadd_normal		# no; no underflow occurred
14854
14855	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
14856	beq.w		fadd_normal		# no; no underflow occurred
14857
14858#
14859# ok, so now the result has a exponent equal to the smallest normalized
14860# exponent for the selected precision. also, the mantissa is equal to
14861# 0x8000000000000000 and this mantissa is the result of rounding non-zero
14862# g,r,s.
14863# now, we must determine whether the pre-rounded result was an underflow
14864# rounded "up" or a normalized number rounded "down".
14865# so, we do this be re-executing the add using RZ as the rounding mode and
14866# seeing if the new result is smaller or equal to the current result.
14867#
14868	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
14869
14870	mov.l		L_SCR3(%a6),%d1
14871	andi.b		&0xc0,%d1		# keep rnd prec
14872	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
14873	fmov.l		%d1,%fpcr		# set FPCR
14874	fmov.l		&0x0,%fpsr		# clear FPSR
14875
14876	fadd.x		FP_SCR0(%a6),%fp1	# execute add
14877
14878	fmov.l		&0x0,%fpcr		# clear FPCR
14879
14880	fabs.x		%fp0			# compare absolute values
14881	fabs.x		%fp1
14882	fcmp.x		%fp0,%fp1		# is first result > second?
14883
14884	fbgt.w		fadd_unfl		# yes; it's an underflow
14885	bra.w		fadd_normal		# no; it's not an underflow
14886
14887##########################################################################
14888
14889#
14890# Add: inputs are not both normalized; what are they?
14891#
14892fadd_not_norm:
14893	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
14894	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
14895
14896	swbeg		&48
14897tbl_fadd_op:
14898	short		fadd_norm	- tbl_fadd_op # NORM + NORM
14899	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
14900	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
14901	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14902	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
14903	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14904	short		tbl_fadd_op	- tbl_fadd_op #
14905	short		tbl_fadd_op	- tbl_fadd_op #
14906
14907	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
14908	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
14909	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
14910	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14911	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
14912	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14913	short		tbl_fadd_op	- tbl_fadd_op #
14914	short		tbl_fadd_op	- tbl_fadd_op #
14915
14916	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
14917	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
14918	short		fadd_inf_2	- tbl_fadd_op # INF + INF
14919	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14920	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
14921	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14922	short		tbl_fadd_op	- tbl_fadd_op #
14923	short		tbl_fadd_op	- tbl_fadd_op #
14924
14925	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
14926	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
14927	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
14928	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
14929	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
14930	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
14931	short		tbl_fadd_op	- tbl_fadd_op #
14932	short		tbl_fadd_op	- tbl_fadd_op #
14933
14934	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
14935	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
14936	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
14937	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14938	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
14939	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14940	short		tbl_fadd_op	- tbl_fadd_op #
14941	short		tbl_fadd_op	- tbl_fadd_op #
14942
14943	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
14944	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
14945	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
14946	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
14947	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
14948	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
14949	short		tbl_fadd_op	- tbl_fadd_op #
14950	short		tbl_fadd_op	- tbl_fadd_op #
14951
14952fadd_res_qnan:
14953	bra.l		res_qnan
14954fadd_res_snan:
14955	bra.l		res_snan
14956
14957#
14958# both operands are ZEROes
14959#
14960fadd_zero_2:
14961	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
14962	mov.b		DST_EX(%a1),%d1
14963	eor.b		%d0,%d1
14964	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
14965
14966# the signs are the same. so determine whether they are positive or negative
14967# and return the appropriately signed zero.
14968	tst.b		%d0			# are ZEROes positive or negative?
14969	bmi.b		fadd_zero_rm		# negative
14970	fmov.s		&0x00000000,%fp0	# return +ZERO
14971	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
14972	rts
14973
14974#
14975# the ZEROes have opposite signs:
14976# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
14977# - -ZERO is returned in the case of RM.
14978#
14979fadd_zero_2_chk_rm:
14980	mov.b		3+L_SCR3(%a6),%d1
14981	andi.b		&0x30,%d1		# extract rnd mode
14982	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
14983	beq.b		fadd_zero_rm		# yes
14984	fmov.s		&0x00000000,%fp0	# return +ZERO
14985	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
14986	rts
14987
14988fadd_zero_rm:
14989	fmov.s		&0x80000000,%fp0	# return -ZERO
14990	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
14991	rts
14992
14993#
14994# one operand is a ZERO and the other is a DENORM or NORM. scale
14995# the DENORM or NORM and jump to the regular fadd routine.
14996#
14997fadd_zero_dst:
14998	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
14999	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15000	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15001	bsr.l		scale_to_zero_src	# scale the operand
15002	clr.w		FP_SCR1_EX(%a6)
15003	clr.l		FP_SCR1_HI(%a6)
15004	clr.l		FP_SCR1_LO(%a6)
15005	bra.w		fadd_zero_entry		# go execute fadd
15006
15007fadd_zero_src:
15008	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
15009	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15010	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15011	bsr.l		scale_to_zero_dst	# scale the operand
15012	clr.w		FP_SCR0_EX(%a6)
15013	clr.l		FP_SCR0_HI(%a6)
15014	clr.l		FP_SCR0_LO(%a6)
15015	bra.w		fadd_zero_entry		# go execute fadd
15016
15017#
15018# both operands are INFs. an OPERR will result if the INFs have
15019# different signs. else, an INF of the same sign is returned
15020#
15021fadd_inf_2:
15022	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
15023	mov.b		DST_EX(%a1),%d1
15024	eor.b		%d1,%d0
15025	bmi.l		res_operr		# weed out (-INF)+(+INF)
15026
15027# ok, so it's not an OPERR. but, we do have to remember to return the
15028# src INF since that's where the 881/882 gets the j-bit from...
15029
15030#
15031# operands are INF and one of {ZERO, INF, DENORM, NORM}
15032#
15033fadd_inf_src:
15034	fmovm.x		SRC(%a0),&0x80		# return src INF
15035	tst.b		SRC_EX(%a0)		# is INF positive?
15036	bpl.b		fadd_inf_done		# yes; we're done
15037	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15038	rts
15039
15040#
15041# operands are INF and one of {ZERO, INF, DENORM, NORM}
15042#
15043fadd_inf_dst:
15044	fmovm.x		DST(%a1),&0x80		# return dst INF
15045	tst.b		DST_EX(%a1)		# is INF positive?
15046	bpl.b		fadd_inf_done		# yes; we're done
15047	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15048	rts
15049
15050fadd_inf_done:
15051	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
15052	rts
15053
15054#########################################################################
15055# XDEF ****************************************************************	#
15056#	fsub(): emulates the fsub instruction				#
15057#	fssub(): emulates the fssub instruction				#
15058#	fdsub(): emulates the fdsub instruction				#
15059#									#
15060# XREF ****************************************************************	#
15061#	addsub_scaler2() - scale the operands so they won't take exc	#
15062#	ovf_res() - return default overflow result			#
15063#	unf_res() - return default underflow result			#
15064#	res_qnan() - set QNAN result					#
15065#	res_snan() - set SNAN result					#
15066#	res_operr() - set OPERR result					#
15067#	scale_to_zero_src() - set src operand exponent equal to zero	#
15068#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
15069#									#
15070# INPUT ***************************************************************	#
15071#	a0 = pointer to extended precision source operand		#
15072#	a1 = pointer to extended precision destination operand		#
15073#									#
15074# OUTPUT **************************************************************	#
15075#	fp0 = result							#
15076#	fp1 = EXOP (if exception occurred)				#
15077#									#
15078# ALGORITHM ***********************************************************	#
15079#	Handle NANs, infinities, and zeroes as special cases. Divide	#
15080# norms into extended, single, and double precision.			#
15081#	Do subtraction after scaling exponents such that exception won't#
15082# occur. Then, check result exponent to see if exception would have	#
15083# occurred. If so, return default result and maybe EXOP. Else, insert	#
15084# the correct result exponent and return. Set FPSR bits as appropriate.	#
15085#									#
15086#########################################################################
15087
15088	global		fssub
15089fssub:
15090	andi.b		&0x30,%d0		# clear rnd prec
15091	ori.b		&s_mode*0x10,%d0	# insert sgl prec
15092	bra.b		fsub
15093
15094	global		fdsub
15095fdsub:
15096	andi.b		&0x30,%d0		# clear rnd prec
15097	ori.b		&d_mode*0x10,%d0	# insert dbl prec
15098
15099	global		fsub
15100fsub:
15101	mov.l		%d0,L_SCR3(%a6)		# store rnd info
15102
15103	clr.w		%d1
15104	mov.b		DTAG(%a6),%d1
15105	lsl.b		&0x3,%d1
15106	or.b		STAG(%a6),%d1		# combine src tags
15107
15108	bne.w		fsub_not_norm		# optimize on non-norm input
15109
15110#
15111# SUB: norms and denorms
15112#
15113fsub_norm:
15114	bsr.l		addsub_scaler2		# scale exponents
15115
15116fsub_zero_entry:
15117	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15118
15119	fmov.l		&0x0,%fpsr		# clear FPSR
15120	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15121
15122	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15123
15124	fmov.l		&0x0,%fpcr		# clear FPCR
15125	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
15126
15127	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
15128
15129	fbeq.w		fsub_zero_exit		# if result zero, end now
15130
15131	mov.l		%d2,-(%sp)		# save d2
15132
15133	fmovm.x		&0x01,-(%sp)		# save result to stack
15134
15135	mov.w		2+L_SCR3(%a6),%d1
15136	lsr.b		&0x6,%d1
15137
15138	mov.w		(%sp),%d2		# fetch new exponent
15139	andi.l		&0x7fff,%d2		# strip sign
15140	sub.l		%d0,%d2			# add scale factor
15141
15142	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
15143	bge.b		fsub_ovfl		# yes
15144
15145	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
15146	blt.w		fsub_unfl		# yes
15147	beq.w		fsub_may_unfl		# maybe; go find out
15148
15149fsub_normal:
15150	mov.w		(%sp),%d1
15151	andi.w		&0x8000,%d1		# keep sign
15152	or.w		%d2,%d1			# insert new exponent
15153	mov.w		%d1,(%sp)		# insert new exponent
15154
15155	fmovm.x		(%sp)+,&0x80		# return result in fp0
15156
15157	mov.l		(%sp)+,%d2		# restore d2
15158	rts
15159
15160fsub_zero_exit:
15161#	fmov.s		&0x00000000,%fp0	# return zero in fp0
15162	rts
15163
15164tbl_fsub_ovfl:
15165	long		0x7fff			# ext ovfl
15166	long		0x407f			# sgl ovfl
15167	long		0x43ff			# dbl ovfl
15168
15169tbl_fsub_unfl:
15170	long	        0x0000			# ext unfl
15171	long		0x3f81			# sgl unfl
15172	long		0x3c01			# dbl unfl
15173
15174fsub_ovfl:
15175	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15176
15177	mov.b		FPCR_ENABLE(%a6),%d1
15178	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
15179	bne.b		fsub_ovfl_ena		# yes
15180
15181	add.l		&0xc,%sp
15182fsub_ovfl_dis:
15183	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
15184	sne		%d1			# set sign param accordingly
15185	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
15186	bsr.l		ovf_res			# calculate default result
15187	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
15188	fmovm.x		(%a0),&0x80		# return default result in fp0
15189	mov.l		(%sp)+,%d2		# restore d2
15190	rts
15191
15192fsub_ovfl_ena:
15193	mov.b		L_SCR3(%a6),%d1
15194	andi.b		&0xc0,%d1		# is precision extended?
15195	bne.b		fsub_ovfl_ena_sd	# no
15196
15197fsub_ovfl_ena_cont:
15198	mov.w		(%sp),%d1		# fetch {sgn,exp}
15199	andi.w		&0x8000,%d1		# keep sign
15200	subi.l		&0x6000,%d2		# subtract new bias
15201	andi.w		&0x7fff,%d2		# clear top bit
15202	or.w		%d2,%d1			# concat sign,exp
15203	mov.w		%d1,(%sp)		# insert new exponent
15204
15205	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
15206	bra.b		fsub_ovfl_dis
15207
15208fsub_ovfl_ena_sd:
15209	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15210
15211	mov.l		L_SCR3(%a6),%d1
15212	andi.b		&0x30,%d1		# clear rnd prec
15213	fmov.l		%d1,%fpcr		# set FPCR
15214
15215	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15216
15217	fmov.l		&0x0,%fpcr		# clear FPCR
15218
15219	add.l		&0xc,%sp
15220	fmovm.x		&0x01,-(%sp)
15221	bra.b		fsub_ovfl_ena_cont
15222
15223fsub_unfl:
15224	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15225
15226	add.l		&0xc,%sp
15227
15228	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15229
15230	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
15231	fmov.l		&0x0,%fpsr		# clear FPSR
15232
15233	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15234
15235	fmov.l		&0x0,%fpcr		# clear FPCR
15236	fmov.l		%fpsr,%d1		# save status
15237
15238	or.l		%d1,USER_FPSR(%a6)
15239
15240	mov.b		FPCR_ENABLE(%a6),%d1
15241	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
15242	bne.b		fsub_unfl_ena		# yes
15243
15244fsub_unfl_dis:
15245	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15246
15247	lea		FP_SCR0(%a6),%a0	# pass: result addr
15248	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
15249	bsr.l		unf_res			# calculate default result
15250	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
15251	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
15252	mov.l		(%sp)+,%d2		# restore d2
15253	rts
15254
15255fsub_unfl_ena:
15256	fmovm.x		FP_SCR1(%a6),&0x40
15257
15258	mov.l		L_SCR3(%a6),%d1
15259	andi.b		&0xc0,%d1		# is precision extended?
15260	bne.b		fsub_unfl_ena_sd	# no
15261
15262	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15263
15264fsub_unfl_ena_cont:
15265	fmov.l		&0x0,%fpsr		# clear FPSR
15266
15267	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
15268
15269	fmov.l		&0x0,%fpcr		# clear FPCR
15270
15271	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
15272	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
15273	mov.l		%d1,%d2			# make a copy
15274	andi.l		&0x7fff,%d1		# strip sign
15275	andi.w		&0x8000,%d2		# keep old sign
15276	sub.l		%d0,%d1			# add scale factor
15277	addi.l		&0x6000,%d1		# subtract new bias
15278	andi.w		&0x7fff,%d1		# clear top bit
15279	or.w		%d2,%d1			# concat sgn,exp
15280	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
15281	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
15282	bra.w		fsub_unfl_dis
15283
15284fsub_unfl_ena_sd:
15285	mov.l		L_SCR3(%a6),%d1
15286	andi.b		&0x30,%d1		# clear rnd prec
15287	fmov.l		%d1,%fpcr		# set FPCR
15288
15289	bra.b		fsub_unfl_ena_cont
15290
15291#
15292# result is equal to the smallest normalized number in the selected precision
15293# if the precision is extended, this result could not have come from an
15294# underflow that rounded up.
15295#
15296fsub_may_unfl:
15297	mov.l		L_SCR3(%a6),%d1
15298	andi.b		&0xc0,%d1		# fetch rnd prec
15299	beq.w		fsub_normal		# yes; no underflow occurred
15300
15301	mov.l		0x4(%sp),%d1
15302	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
15303	bne.w		fsub_normal		# no; no underflow occurred
15304
15305	tst.l		0x8(%sp)		# is lo(man) = 0x0?
15306	bne.w		fsub_normal		# no; no underflow occurred
15307
15308	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
15309	beq.w		fsub_normal		# no; no underflow occurred
15310
15311#
15312# ok, so now the result has a exponent equal to the smallest normalized
15313# exponent for the selected precision. also, the mantissa is equal to
15314# 0x8000000000000000 and this mantissa is the result of rounding non-zero
15315# g,r,s.
15316# now, we must determine whether the pre-rounded result was an underflow
15317# rounded "up" or a normalized number rounded "down".
15318# so, we do this be re-executing the add using RZ as the rounding mode and
15319# seeing if the new result is smaller or equal to the current result.
15320#
15321	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
15322
15323	mov.l		L_SCR3(%a6),%d1
15324	andi.b		&0xc0,%d1		# keep rnd prec
15325	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
15326	fmov.l		%d1,%fpcr		# set FPCR
15327	fmov.l		&0x0,%fpsr		# clear FPSR
15328
15329	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
15330
15331	fmov.l		&0x0,%fpcr		# clear FPCR
15332
15333	fabs.x		%fp0			# compare absolute values
15334	fabs.x		%fp1
15335	fcmp.x		%fp0,%fp1		# is first result > second?
15336
15337	fbgt.w		fsub_unfl		# yes; it's an underflow
15338	bra.w		fsub_normal		# no; it's not an underflow
15339
15340##########################################################################
15341
15342#
15343# Sub: inputs are not both normalized; what are they?
15344#
15345fsub_not_norm:
15346	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
15347	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
15348
15349	swbeg		&48
15350tbl_fsub_op:
15351	short		fsub_norm	- tbl_fsub_op # NORM - NORM
15352	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
15353	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
15354	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15355	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
15356	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15357	short		tbl_fsub_op	- tbl_fsub_op #
15358	short		tbl_fsub_op	- tbl_fsub_op #
15359
15360	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
15361	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
15362	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
15363	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15364	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
15365	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15366	short		tbl_fsub_op	- tbl_fsub_op #
15367	short		tbl_fsub_op	- tbl_fsub_op #
15368
15369	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
15370	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
15371	short		fsub_inf_2	- tbl_fsub_op # INF - INF
15372	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15373	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
15374	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15375	short		tbl_fsub_op	- tbl_fsub_op #
15376	short		tbl_fsub_op	- tbl_fsub_op #
15377
15378	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
15379	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
15380	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
15381	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
15382	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
15383	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
15384	short		tbl_fsub_op	- tbl_fsub_op #
15385	short		tbl_fsub_op	- tbl_fsub_op #
15386
15387	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
15388	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
15389	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
15390	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15391	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
15392	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15393	short		tbl_fsub_op	- tbl_fsub_op #
15394	short		tbl_fsub_op	- tbl_fsub_op #
15395
15396	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
15397	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
15398	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
15399	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
15400	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
15401	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
15402	short		tbl_fsub_op	- tbl_fsub_op #
15403	short		tbl_fsub_op	- tbl_fsub_op #
15404
15405fsub_res_qnan:
15406	bra.l		res_qnan
15407fsub_res_snan:
15408	bra.l		res_snan
15409
15410#
15411# both operands are ZEROes
15412#
15413fsub_zero_2:
15414	mov.b		SRC_EX(%a0),%d0
15415	mov.b		DST_EX(%a1),%d1
15416	eor.b		%d1,%d0
15417	bpl.b		fsub_zero_2_chk_rm
15418
15419# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
15420	tst.b		%d0			# is dst negative?
15421	bmi.b		fsub_zero_2_rm		# yes
15422	fmov.s		&0x00000000,%fp0	# no; return +ZERO
15423	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
15424	rts
15425
15426#
15427# the ZEROes have the same signs:
15428# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
15429# - -ZERO is returned in the case of RM.
15430#
15431fsub_zero_2_chk_rm:
15432	mov.b		3+L_SCR3(%a6),%d1
15433	andi.b		&0x30,%d1		# extract rnd mode
15434	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
15435	beq.b		fsub_zero_2_rm		# yes
15436	fmov.s		&0x00000000,%fp0	# no; return +ZERO
15437	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
15438	rts
15439
15440fsub_zero_2_rm:
15441	fmov.s		&0x80000000,%fp0	# return -ZERO
15442	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
15443	rts
15444
15445#
15446# one operand is a ZERO and the other is a DENORM or a NORM.
15447# scale the DENORM or NORM and jump to the regular fsub routine.
15448#
15449fsub_zero_dst:
15450	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15451	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15452	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15453	bsr.l		scale_to_zero_src	# scale the operand
15454	clr.w		FP_SCR1_EX(%a6)
15455	clr.l		FP_SCR1_HI(%a6)
15456	clr.l		FP_SCR1_LO(%a6)
15457	bra.w		fsub_zero_entry		# go execute fsub
15458
15459fsub_zero_src:
15460	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
15461	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15462	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15463	bsr.l		scale_to_zero_dst	# scale the operand
15464	clr.w		FP_SCR0_EX(%a6)
15465	clr.l		FP_SCR0_HI(%a6)
15466	clr.l		FP_SCR0_LO(%a6)
15467	bra.w		fsub_zero_entry		# go execute fsub
15468
15469#
15470# both operands are INFs. an OPERR will result if the INFs have the
15471# same signs. else,
15472#
15473fsub_inf_2:
15474	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
15475	mov.b		DST_EX(%a1),%d1
15476	eor.b		%d1,%d0
15477	bpl.l		res_operr		# weed out (-INF)+(+INF)
15478
15479# ok, so it's not an OPERR. but we do have to remember to return
15480# the src INF since that's where the 881/882 gets the j-bit.
15481
15482fsub_inf_src:
15483	fmovm.x		SRC(%a0),&0x80		# return src INF
15484	fneg.x		%fp0			# invert sign
15485	fbge.w		fsub_inf_done		# sign is now positive
15486	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15487	rts
15488
15489fsub_inf_dst:
15490	fmovm.x		DST(%a1),&0x80		# return dst INF
15491	tst.b		DST_EX(%a1)		# is INF negative?
15492	bpl.b		fsub_inf_done		# no
15493	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15494	rts
15495
15496fsub_inf_done:
15497	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
15498	rts
15499
15500#########################################################################
15501# XDEF ****************************************************************	#
15502#	fsqrt(): emulates the fsqrt instruction				#
15503#	fssqrt(): emulates the fssqrt instruction			#
15504#	fdsqrt(): emulates the fdsqrt instruction			#
15505#									#
15506# XREF ****************************************************************	#
15507#	scale_sqrt() - scale the source operand				#
15508#	unf_res() - return default underflow result			#
15509#	ovf_res() - return default overflow result			#
15510#	res_qnan_1op() - return QNAN result				#
15511#	res_snan_1op() - return SNAN result				#
15512#									#
15513# INPUT ***************************************************************	#
15514#	a0 = pointer to extended precision source operand		#
15515#	d0  rnd prec,mode						#
15516#									#
15517# OUTPUT **************************************************************	#
15518#	fp0 = result							#
15519#	fp1 = EXOP (if exception occurred)				#
15520#									#
15521# ALGORITHM ***********************************************************	#
15522#	Handle NANs, infinities, and zeroes as special cases. Divide	#
15523# norms/denorms into ext/sgl/dbl precision.				#
15524#	For norms/denorms, scale the exponents such that a sqrt		#
15525# instruction won't cause an exception. Use the regular fsqrt to	#
15526# compute a result. Check if the regular operands would have taken	#
15527# an exception. If so, return the default overflow/underflow result	#
15528# and return the EXOP if exceptions are enabled. Else, scale the	#
15529# result operand to the proper exponent.				#
15530#									#
15531#########################################################################
15532
15533	global		fssqrt
15534fssqrt:
15535	andi.b		&0x30,%d0		# clear rnd prec
15536	ori.b		&s_mode*0x10,%d0	# insert sgl precision
15537	bra.b		fsqrt
15538
15539	global		fdsqrt
15540fdsqrt:
15541	andi.b		&0x30,%d0		# clear rnd prec
15542	ori.b		&d_mode*0x10,%d0	# insert dbl precision
15543
15544	global		fsqrt
15545fsqrt:
15546	mov.l		%d0,L_SCR3(%a6)		# store rnd info
15547	clr.w		%d1
15548	mov.b		STAG(%a6),%d1
15549	bne.w		fsqrt_not_norm		# optimize on non-norm input
15550
15551#
15552# SQUARE ROOT: norms and denorms ONLY!
15553#
15554fsqrt_norm:
15555	tst.b		SRC_EX(%a0)		# is operand negative?
15556	bmi.l		res_operr		# yes
15557
15558	andi.b		&0xc0,%d0		# is precision extended?
15559	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
15560
15561	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15562	fmov.l		&0x0,%fpsr		# clear FPSR
15563
15564	fsqrt.x		(%a0),%fp0		# execute square root
15565
15566	fmov.l		%fpsr,%d1
15567	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
15568
15569	rts
15570
15571fsqrt_denorm:
15572	tst.b		SRC_EX(%a0)		# is operand negative?
15573	bmi.l		res_operr		# yes
15574
15575	andi.b		&0xc0,%d0		# is precision extended?
15576	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
15577
15578	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15579	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15580	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15581
15582	bsr.l		scale_sqrt		# calculate scale factor
15583
15584	bra.w		fsqrt_sd_normal
15585
15586#
15587# operand is either single or double
15588#
15589fsqrt_not_ext:
15590	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
15591	bne.w		fsqrt_dbl
15592
15593#
15594# operand is to be rounded to single precision
15595#
15596fsqrt_sgl:
15597	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15598	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15599	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15600
15601	bsr.l		scale_sqrt		# calculate scale factor
15602
15603	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
15604	beq.w		fsqrt_sd_may_unfl
15605	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
15606	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
15607	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
15608	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
15609
15610#
15611# operand will NOT overflow or underflow when moved in to the fp reg file
15612#
15613fsqrt_sd_normal:
15614	fmov.l		&0x0,%fpsr		# clear FPSR
15615	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15616
15617	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
15618
15619	fmov.l		%fpsr,%d1		# save FPSR
15620	fmov.l		&0x0,%fpcr		# clear FPCR
15621
15622	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15623
15624fsqrt_sd_normal_exit:
15625	mov.l		%d2,-(%sp)		# save d2
15626	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15627	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
15628	mov.l		%d1,%d2			# make a copy
15629	andi.l		&0x7fff,%d1		# strip sign
15630	sub.l		%d0,%d1			# add scale factor
15631	andi.w		&0x8000,%d2		# keep old sign
15632	or.w		%d1,%d2			# concat old sign,new exp
15633	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
15634	mov.l		(%sp)+,%d2		# restore d2
15635	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
15636	rts
15637
15638#
15639# operand is to be rounded to double precision
15640#
15641fsqrt_dbl:
15642	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15643	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15644	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15645
15646	bsr.l		scale_sqrt		# calculate scale factor
15647
15648	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
15649	beq.w		fsqrt_sd_may_unfl
15650	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
15651	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
15652	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
15653	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
15654	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
15655
15656# we're on the line here and the distinguising characteristic is whether
15657# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
15658# elsewise fall through to underflow.
15659fsqrt_sd_may_unfl:
15660	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
15661	bne.w		fsqrt_sd_normal		# yes, so no underflow
15662
15663#
15664# operand WILL underflow when moved in to the fp register file
15665#
15666fsqrt_sd_unfl:
15667	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15668
15669	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
15670	fmov.l		&0x0,%fpsr		# clear FPSR
15671
15672	fsqrt.x		FP_SCR0(%a6),%fp0	# execute square root
15673
15674	fmov.l		%fpsr,%d1		# save status
15675	fmov.l		&0x0,%fpcr		# clear FPCR
15676
15677	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15678
15679# if underflow or inexact is enabled, go calculate EXOP first.
15680	mov.b		FPCR_ENABLE(%a6),%d1
15681	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
15682	bne.b		fsqrt_sd_unfl_ena	# yes
15683
15684fsqrt_sd_unfl_dis:
15685	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15686
15687	lea		FP_SCR0(%a6),%a0	# pass: result addr
15688	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
15689	bsr.l		unf_res			# calculate default result
15690	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
15691	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
15692	rts
15693
15694#
15695# operand will underflow AND underflow is enabled.
15696# Therefore, we must return the result rounded to extended precision.
15697#
15698fsqrt_sd_unfl_ena:
15699	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
15700	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
15701	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
15702
15703	mov.l		%d2,-(%sp)		# save d2
15704	mov.l		%d1,%d2			# make a copy
15705	andi.l		&0x7fff,%d1		# strip sign
15706	andi.w		&0x8000,%d2		# keep old sign
15707	sub.l		%d0,%d1			# subtract scale factor
15708	addi.l		&0x6000,%d1		# add new bias
15709	andi.w		&0x7fff,%d1
15710	or.w		%d2,%d1			# concat new sign,new exp
15711	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
15712	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
15713	mov.l		(%sp)+,%d2		# restore d2
15714	bra.b		fsqrt_sd_unfl_dis
15715
15716#
15717# operand WILL overflow.
15718#
15719fsqrt_sd_ovfl:
15720	fmov.l		&0x0,%fpsr		# clear FPSR
15721	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15722
15723	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
15724
15725	fmov.l		&0x0,%fpcr		# clear FPCR
15726	fmov.l		%fpsr,%d1		# save FPSR
15727
15728	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15729
15730fsqrt_sd_ovfl_tst:
15731	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15732
15733	mov.b		FPCR_ENABLE(%a6),%d1
15734	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
15735	bne.b		fsqrt_sd_ovfl_ena	# yes
15736
15737#
15738# OVFL is not enabled; therefore, we must create the default result by
15739# calling ovf_res().
15740#
15741fsqrt_sd_ovfl_dis:
15742	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
15743	sne		%d1			# set sign param accordingly
15744	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
15745	bsr.l		ovf_res			# calculate default result
15746	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
15747	fmovm.x		(%a0),&0x80		# return default result in fp0
15748	rts
15749
15750#
15751# OVFL is enabled.
15752# the INEX2 bit has already been updated by the round to the correct precision.
15753# now, round to extended(and don't alter the FPSR).
15754#
15755fsqrt_sd_ovfl_ena:
15756	mov.l		%d2,-(%sp)		# save d2
15757	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
15758	mov.l		%d1,%d2			# make a copy
15759	andi.l		&0x7fff,%d1		# strip sign
15760	andi.w		&0x8000,%d2		# keep old sign
15761	sub.l		%d0,%d1			# add scale factor
15762	subi.l		&0x6000,%d1		# subtract bias
15763	andi.w		&0x7fff,%d1
15764	or.w		%d2,%d1			# concat sign,exp
15765	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
15766	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
15767	mov.l		(%sp)+,%d2		# restore d2
15768	bra.b		fsqrt_sd_ovfl_dis
15769
15770#
15771# the move in MAY underflow. so...
15772#
15773fsqrt_sd_may_ovfl:
15774	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
15775	bne.w		fsqrt_sd_ovfl		# yes, so overflow
15776
15777	fmov.l		&0x0,%fpsr		# clear FPSR
15778	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15779
15780	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
15781
15782	fmov.l		%fpsr,%d1		# save status
15783	fmov.l		&0x0,%fpcr		# clear FPCR
15784
15785	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15786
15787	fmov.x		%fp0,%fp1		# make a copy of result
15788	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
15789	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
15790
15791# no, it didn't overflow; we have correct result
15792	bra.w		fsqrt_sd_normal_exit
15793
15794##########################################################################
15795
15796#
15797# input is not normalized; what is it?
15798#
15799fsqrt_not_norm:
15800	cmpi.b		%d1,&DENORM		# weed out DENORM
15801	beq.w		fsqrt_denorm
15802	cmpi.b		%d1,&ZERO		# weed out ZERO
15803	beq.b		fsqrt_zero
15804	cmpi.b		%d1,&INF		# weed out INF
15805	beq.b		fsqrt_inf
15806	cmpi.b		%d1,&SNAN		# weed out SNAN
15807	beq.l		res_snan_1op
15808	bra.l		res_qnan_1op
15809
15810#
15811#	fsqrt(+0) = +0
15812#	fsqrt(-0) = -0
15813#	fsqrt(+INF) = +INF
15814#	fsqrt(-INF) = OPERR
15815#
15816fsqrt_zero:
15817	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
15818	bmi.b		fsqrt_zero_m		# negative
15819fsqrt_zero_p:
15820	fmov.s		&0x00000000,%fp0	# return +ZERO
15821	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
15822	rts
15823fsqrt_zero_m:
15824	fmov.s		&0x80000000,%fp0	# return -ZERO
15825	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
15826	rts
15827
15828fsqrt_inf:
15829	tst.b		SRC_EX(%a0)		# is INF positive or negative?
15830	bmi.l		res_operr		# negative
15831fsqrt_inf_p:
15832	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
15833	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
15834	rts
15835
15836##########################################################################
15837
15838#########################################################################
15839# XDEF ****************************************************************	#
15840#	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
15841#			  OVFL/UNFL exceptions will result		#
15842#									#
15843# XREF ****************************************************************	#
15844#	norm() - normalize mantissa after adjusting exponent		#
15845#									#
15846# INPUT ***************************************************************	#
15847#	FP_SRC(a6) = fp op1(src)					#
15848#	FP_DST(a6) = fp op2(dst)					#
15849#									#
15850# OUTPUT **************************************************************	#
15851#	FP_SRC(a6) = fp op1 scaled(src)					#
15852#	FP_DST(a6) = fp op2 scaled(dst)					#
15853#	d0         = scale amount					#
15854#									#
15855# ALGORITHM ***********************************************************	#
15856#	If the DST exponent is > the SRC exponent, set the DST exponent	#
15857# equal to 0x3fff and scale the SRC exponent by the value that the	#
15858# DST exponent was scaled by. If the SRC exponent is greater or equal,	#
15859# do the opposite. Return this scale factor in d0.			#
15860#	If the two exponents differ by > the number of mantissa bits	#
15861# plus two, then set the smallest exponent to a very small value as a	#
15862# quick shortcut.							#
15863#									#
15864#########################################################################
15865
15866	global		addsub_scaler2
15867addsub_scaler2:
15868	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15869	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15870	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15871	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15872	mov.w		SRC_EX(%a0),%d0
15873	mov.w		DST_EX(%a1),%d1
15874	mov.w		%d0,FP_SCR0_EX(%a6)
15875	mov.w		%d1,FP_SCR1_EX(%a6)
15876
15877	andi.w		&0x7fff,%d0
15878	andi.w		&0x7fff,%d1
15879	mov.w		%d0,L_SCR1(%a6)		# store src exponent
15880	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
15881
15882	cmp.w		%d0, %d1		# is src exp >= dst exp?
15883	bge.l		src_exp_ge2
15884
15885# dst exp is >  src exp; scale dst to exp = 0x3fff
15886dst_exp_gt2:
15887	bsr.l		scale_to_zero_dst
15888	mov.l		%d0,-(%sp)		# save scale factor
15889
15890	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
15891	bne.b		cmpexp12
15892
15893	lea		FP_SCR0(%a6),%a0
15894	bsr.l		norm			# normalize the denorm; result is new exp
15895	neg.w		%d0			# new exp = -(shft val)
15896	mov.w		%d0,L_SCR1(%a6)		# inset new exp
15897
15898cmpexp12:
15899	mov.w		2+L_SCR1(%a6),%d0
15900	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
15901
15902	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
15903	bge.b		quick_scale12
15904
15905	mov.w		L_SCR1(%a6),%d0
15906	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
15907	mov.w		FP_SCR0_EX(%a6),%d1
15908	and.w		&0x8000,%d1
15909	or.w		%d1,%d0			# concat {sgn,new exp}
15910	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
15911
15912	mov.l		(%sp)+,%d0		# return SCALE factor
15913	rts
15914
15915quick_scale12:
15916	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
15917	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
15918
15919	mov.l		(%sp)+,%d0		# return SCALE factor
15920	rts
15921
15922# src exp is >= dst exp; scale src to exp = 0x3fff
15923src_exp_ge2:
15924	bsr.l		scale_to_zero_src
15925	mov.l		%d0,-(%sp)		# save scale factor
15926
15927	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
15928	bne.b		cmpexp22
15929	lea		FP_SCR1(%a6),%a0
15930	bsr.l		norm			# normalize the denorm; result is new exp
15931	neg.w		%d0			# new exp = -(shft val)
15932	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
15933
15934cmpexp22:
15935	mov.w		L_SCR1(%a6),%d0
15936	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
15937
15938	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
15939	bge.b		quick_scale22
15940
15941	mov.w		2+L_SCR1(%a6),%d0
15942	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
15943	mov.w		FP_SCR1_EX(%a6),%d1
15944	andi.w		&0x8000,%d1
15945	or.w		%d1,%d0			# concat {sgn,new exp}
15946	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
15947
15948	mov.l		(%sp)+,%d0		# return SCALE factor
15949	rts
15950
15951quick_scale22:
15952	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
15953	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
15954
15955	mov.l		(%sp)+,%d0		# return SCALE factor
15956	rts
15957
15958##########################################################################
15959
15960#########################################################################
15961# XDEF ****************************************************************	#
15962#	scale_to_zero_src(): scale the exponent of extended precision	#
15963#			     value at FP_SCR0(a6).			#
15964#									#
15965# XREF ****************************************************************	#
15966#	norm() - normalize the mantissa if the operand was a DENORM	#
15967#									#
15968# INPUT ***************************************************************	#
15969#	FP_SCR0(a6) = extended precision operand to be scaled		#
15970#									#
15971# OUTPUT **************************************************************	#
15972#	FP_SCR0(a6) = scaled extended precision operand			#
15973#	d0	    = scale value					#
15974#									#
15975# ALGORITHM ***********************************************************	#
15976#	Set the exponent of the input operand to 0x3fff. Save the value	#
15977# of the difference between the original and new exponent. Then,	#
15978# normalize the operand if it was a DENORM. Add this normalization	#
15979# value to the previous value. Return the result.			#
15980#									#
15981#########################################################################
15982
15983	global		scale_to_zero_src
15984scale_to_zero_src:
15985	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
15986	mov.w		%d1,%d0			# make a copy
15987
15988	andi.l		&0x7fff,%d1		# extract operand's exponent
15989
15990	andi.w		&0x8000,%d0		# extract operand's sgn
15991	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
15992
15993	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
15994
15995	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
15996	beq.b		stzs_denorm		# normalize the DENORM
15997
15998stzs_norm:
15999	mov.l		&0x3fff,%d0
16000	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16001
16002	rts
16003
16004stzs_denorm:
16005	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
16006	bsr.l		norm			# normalize denorm
16007	neg.l		%d0			# new exponent = -(shft val)
16008	mov.l		%d0,%d1			# prepare for op_norm call
16009	bra.b		stzs_norm		# finish scaling
16010
16011###
16012
16013#########################################################################
16014# XDEF ****************************************************************	#
16015#	scale_sqrt(): scale the input operand exponent so a subsequent	#
16016#		      fsqrt operation won't take an exception.		#
16017#									#
16018# XREF ****************************************************************	#
16019#	norm() - normalize the mantissa if the operand was a DENORM	#
16020#									#
16021# INPUT ***************************************************************	#
16022#	FP_SCR0(a6) = extended precision operand to be scaled		#
16023#									#
16024# OUTPUT **************************************************************	#
16025#	FP_SCR0(a6) = scaled extended precision operand			#
16026#	d0	    = scale value					#
16027#									#
16028# ALGORITHM ***********************************************************	#
16029#	If the input operand is a DENORM, normalize it.			#
16030#	If the exponent of the input operand is even, set the exponent	#
16031# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the	#
16032# exponent of the input operand is off, set the exponent to ox3fff and	#
16033# return a scale factor of "(exp-0x3fff)/2".				#
16034#									#
16035#########################################################################
16036
16037	global		scale_sqrt
16038scale_sqrt:
16039	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
16040	beq.b		ss_denorm		# normalize the DENORM
16041
16042	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
16043	andi.l		&0x7fff,%d1		# extract operand's exponent
16044
16045	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
16046
16047	btst		&0x0,%d1		# is exp even or odd?
16048	beq.b		ss_norm_even
16049
16050	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16051
16052	mov.l		&0x3fff,%d0
16053	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16054	asr.l		&0x1,%d0		# divide scale factor by 2
16055	rts
16056
16057ss_norm_even:
16058	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16059
16060	mov.l		&0x3ffe,%d0
16061	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16062	asr.l		&0x1,%d0		# divide scale factor by 2
16063	rts
16064
16065ss_denorm:
16066	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
16067	bsr.l		norm			# normalize denorm
16068
16069	btst		&0x0,%d0		# is exp even or odd?
16070	beq.b		ss_denorm_even
16071
16072	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16073
16074	add.l		&0x3fff,%d0
16075	asr.l		&0x1,%d0		# divide scale factor by 2
16076	rts
16077
16078ss_denorm_even:
16079	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16080
16081	add.l		&0x3ffe,%d0
16082	asr.l		&0x1,%d0		# divide scale factor by 2
16083	rts
16084
16085###
16086
16087#########################################################################
16088# XDEF ****************************************************************	#
16089#	scale_to_zero_dst(): scale the exponent of extended precision	#
16090#			     value at FP_SCR1(a6).			#
16091#									#
16092# XREF ****************************************************************	#
16093#	norm() - normalize the mantissa if the operand was a DENORM	#
16094#									#
16095# INPUT ***************************************************************	#
16096#	FP_SCR1(a6) = extended precision operand to be scaled		#
16097#									#
16098# OUTPUT **************************************************************	#
16099#	FP_SCR1(a6) = scaled extended precision operand			#
16100#	d0	    = scale value					#
16101#									#
16102# ALGORITHM ***********************************************************	#
16103#	Set the exponent of the input operand to 0x3fff. Save the value	#
16104# of the difference between the original and new exponent. Then,	#
16105# normalize the operand if it was a DENORM. Add this normalization	#
16106# value to the previous value. Return the result.			#
16107#									#
16108#########################################################################
16109
16110	global		scale_to_zero_dst
16111scale_to_zero_dst:
16112	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
16113	mov.w		%d1,%d0			# make a copy
16114
16115	andi.l		&0x7fff,%d1		# extract operand's exponent
16116
16117	andi.w		&0x8000,%d0		# extract operand's sgn
16118	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
16119
16120	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
16121
16122	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
16123	beq.b		stzd_denorm		# normalize the DENORM
16124
16125stzd_norm:
16126	mov.l		&0x3fff,%d0
16127	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16128	rts
16129
16130stzd_denorm:
16131	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
16132	bsr.l		norm			# normalize denorm
16133	neg.l		%d0			# new exponent = -(shft val)
16134	mov.l		%d0,%d1			# prepare for op_norm call
16135	bra.b		stzd_norm		# finish scaling
16136
16137##########################################################################
16138
16139#########################################################################
16140# XDEF ****************************************************************	#
16141#	res_qnan(): return default result w/ QNAN operand for dyadic	#
16142#	res_snan(): return default result w/ SNAN operand for dyadic	#
16143#	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
16144#	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
16145#									#
16146# XREF ****************************************************************	#
16147#	None								#
16148#									#
16149# INPUT ***************************************************************	#
16150#	FP_SRC(a6) = pointer to extended precision src operand		#
16151#	FP_DST(a6) = pointer to extended precision dst operand		#
16152#									#
16153# OUTPUT **************************************************************	#
16154#	fp0 = default result						#
16155#									#
16156# ALGORITHM ***********************************************************	#
16157#	If either operand (but not both operands) of an operation is a	#
16158# nonsignalling NAN, then that NAN is returned as the result. If both	#
16159# operands are nonsignalling NANs, then the destination operand		#
16160# nonsignalling NAN is returned as the result.				#
16161#	If either operand to an operation is a signalling NAN (SNAN),	#
16162# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
16163# enable bit is set in the FPCR, then the trap is taken and the		#
16164# destination is not modified. If the SNAN trap enable bit is not set,	#
16165# then the SNAN is converted to a nonsignalling NAN (by setting the	#
16166# SNAN bit in the operand to one), and the operation continues as	#
16167# described in the preceding paragraph, for nonsignalling NANs.		#
16168#	Make sure the appropriate FPSR bits are set before exiting.	#
16169#									#
16170#########################################################################
16171
16172	global		res_qnan
16173	global		res_snan
16174res_qnan:
16175res_snan:
16176	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
16177	beq.b		dst_snan2
16178	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
16179	beq.b		dst_qnan2
16180src_nan:
16181	cmp.b		STAG(%a6), &QNAN
16182	beq.b		src_qnan2
16183	global		res_snan_1op
16184res_snan_1op:
16185src_snan2:
16186	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
16187	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16188	lea		FP_SRC(%a6), %a0
16189	bra.b		nan_comp
16190	global		res_qnan_1op
16191res_qnan_1op:
16192src_qnan2:
16193	or.l		&nan_mask, USER_FPSR(%a6)
16194	lea		FP_SRC(%a6), %a0
16195	bra.b		nan_comp
16196dst_snan2:
16197	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16198	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
16199	lea		FP_DST(%a6), %a0
16200	bra.b		nan_comp
16201dst_qnan2:
16202	lea		FP_DST(%a6), %a0
16203	cmp.b		STAG(%a6), &SNAN
16204	bne		nan_done
16205	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
16206nan_done:
16207	or.l		&nan_mask, USER_FPSR(%a6)
16208nan_comp:
16209	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
16210	beq.b		nan_not_neg
16211	or.l		&neg_mask, USER_FPSR(%a6)
16212nan_not_neg:
16213	fmovm.x		(%a0), &0x80
16214	rts
16215
16216#########################################################################
16217# XDEF ****************************************************************	#
16218#	res_operr(): return default result during operand error		#
16219#									#
16220# XREF ****************************************************************	#
16221#	None								#
16222#									#
16223# INPUT ***************************************************************	#
16224#	None								#
16225#									#
16226# OUTPUT **************************************************************	#
16227#	fp0 = default operand error result				#
16228#									#
16229# ALGORITHM ***********************************************************	#
16230#	An nonsignalling NAN is returned as the default result when	#
16231# an operand error occurs for the following cases:			#
16232#									#
16233#	Multiply: (Infinity x Zero)					#
16234#	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
16235#									#
16236#########################################################################
16237
16238	global		res_operr
16239res_operr:
16240	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
16241	fmovm.x		nan_return(%pc), &0x80
16242	rts
16243
16244nan_return:
16245	long		0x7fff0000, 0xffffffff, 0xffffffff
16246
16247#########################################################################
16248# fdbcc(): routine to emulate the fdbcc instruction			#
16249#									#
16250# XDEF **************************************************************** #
16251#	_fdbcc()							#
16252#									#
16253# XREF **************************************************************** #
16254#	fetch_dreg() - fetch Dn value					#
16255#	store_dreg_l() - store updated Dn value				#
16256#									#
16257# INPUT ***************************************************************	#
16258#	d0 = displacement						#
16259#									#
16260# OUTPUT ************************************************************** #
16261#	none								#
16262#									#
16263# ALGORITHM ***********************************************************	#
16264#	This routine checks which conditional predicate is specified by	#
16265# the stacked fdbcc instruction opcode and then branches to a routine	#
16266# for that predicate. The corresponding fbcc instruction is then used	#
16267# to see whether the condition (specified by the stacked FPSR) is true	#
16268# or false.								#
16269#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
16270# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
16271# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
16272# enabled BSUN should not be flagged and the predicate is true, then	#
16273# Dn is fetched and decremented by one. If Dn is not equal to -1, add	#
16274# the displacement value to the stacked PC so that when an "rte" is	#
16275# finally executed, the branch occurs.					#
16276#									#
16277#########################################################################
16278	global		_fdbcc
16279_fdbcc:
16280	mov.l		%d0,L_SCR1(%a6)		# save displacement
16281
16282	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
16283
16284	clr.l		%d1			# clear scratch reg
16285	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
16286	ror.l		&0x8,%d1		# rotate to top byte
16287	fmov.l		%d1,%fpsr		# insert into FPSR
16288
16289	mov.w		(tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
16290	jmp		(tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
16291
16292tbl_fdbcc:
16293	short		fdbcc_f		-	tbl_fdbcc	# 00
16294	short		fdbcc_eq	-	tbl_fdbcc	# 01
16295	short		fdbcc_ogt	-	tbl_fdbcc	# 02
16296	short		fdbcc_oge	-	tbl_fdbcc	# 03
16297	short		fdbcc_olt	-	tbl_fdbcc	# 04
16298	short		fdbcc_ole	-	tbl_fdbcc	# 05
16299	short		fdbcc_ogl	-	tbl_fdbcc	# 06
16300	short		fdbcc_or	-	tbl_fdbcc	# 07
16301	short		fdbcc_un	-	tbl_fdbcc	# 08
16302	short		fdbcc_ueq	-	tbl_fdbcc	# 09
16303	short		fdbcc_ugt	-	tbl_fdbcc	# 10
16304	short		fdbcc_uge	-	tbl_fdbcc	# 11
16305	short		fdbcc_ult	-	tbl_fdbcc	# 12
16306	short		fdbcc_ule	-	tbl_fdbcc	# 13
16307	short		fdbcc_neq	-	tbl_fdbcc	# 14
16308	short		fdbcc_t		-	tbl_fdbcc	# 15
16309	short		fdbcc_sf	-	tbl_fdbcc	# 16
16310	short		fdbcc_seq	-	tbl_fdbcc	# 17
16311	short		fdbcc_gt	-	tbl_fdbcc	# 18
16312	short		fdbcc_ge	-	tbl_fdbcc	# 19
16313	short		fdbcc_lt	-	tbl_fdbcc	# 20
16314	short		fdbcc_le	-	tbl_fdbcc	# 21
16315	short		fdbcc_gl	-	tbl_fdbcc	# 22
16316	short		fdbcc_gle	-	tbl_fdbcc	# 23
16317	short		fdbcc_ngle	-	tbl_fdbcc	# 24
16318	short		fdbcc_ngl	-	tbl_fdbcc	# 25
16319	short		fdbcc_nle	-	tbl_fdbcc	# 26
16320	short		fdbcc_nlt	-	tbl_fdbcc	# 27
16321	short		fdbcc_nge	-	tbl_fdbcc	# 28
16322	short		fdbcc_ngt	-	tbl_fdbcc	# 29
16323	short		fdbcc_sneq	-	tbl_fdbcc	# 30
16324	short		fdbcc_st	-	tbl_fdbcc	# 31
16325
16326#########################################################################
16327#									#
16328# IEEE Nonaware tests							#
16329#									#
16330# For the IEEE nonaware tests, only the false branch changes the	#
16331# counter. However, the true branch may set bsun so we check to see	#
16332# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
16333#									#
16334# The cases EQ and NE are shared by the Aware and Nonaware groups	#
16335# and are incapable of setting the BSUN exception bit.			#
16336#									#
16337# Typically, only one of the two possible branch directions could	#
16338# have the NAN bit set.							#
16339# (This is assuming the mutual exclusiveness of FPSR cc bit groupings	#
16340#  is preserved.)							#
16341#									#
16342#########################################################################
16343
16344#
16345# equal:
16346#
16347#	Z
16348#
16349fdbcc_eq:
16350	fbeq.w		fdbcc_eq_yes		# equal?
16351fdbcc_eq_no:
16352	bra.w		fdbcc_false		# no; go handle counter
16353fdbcc_eq_yes:
16354	rts
16355
16356#
16357# not equal:
16358#	_
16359#	Z
16360#
16361fdbcc_neq:
16362	fbneq.w		fdbcc_neq_yes		# not equal?
16363fdbcc_neq_no:
16364	bra.w		fdbcc_false		# no; go handle counter
16365fdbcc_neq_yes:
16366	rts
16367
16368#
16369# greater than:
16370#	_______
16371#	NANvZvN
16372#
16373fdbcc_gt:
16374	fbgt.w		fdbcc_gt_yes		# greater than?
16375	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16376	beq.w		fdbcc_false		# no;go handle counter
16377	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16378	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16379	bne.w		fdbcc_bsun		# yes; we have an exception
16380	bra.w		fdbcc_false		# no; go handle counter
16381fdbcc_gt_yes:
16382	rts					# do nothing
16383
16384#
16385# not greater than:
16386#
16387#	NANvZvN
16388#
16389fdbcc_ngt:
16390	fbngt.w		fdbcc_ngt_yes		# not greater than?
16391fdbcc_ngt_no:
16392	bra.w		fdbcc_false		# no; go handle counter
16393fdbcc_ngt_yes:
16394	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16395	beq.b		fdbcc_ngt_done		# no;go finish
16396	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16397	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16398	bne.w		fdbcc_bsun		# yes; we have an exception
16399fdbcc_ngt_done:
16400	rts					# no; do nothing
16401
16402#
16403# greater than or equal:
16404#	   _____
16405#	Zv(NANvN)
16406#
16407fdbcc_ge:
16408	fbge.w		fdbcc_ge_yes		# greater than or equal?
16409fdbcc_ge_no:
16410	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16411	beq.w		fdbcc_false		# no;go handle counter
16412	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16413	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16414	bne.w		fdbcc_bsun		# yes; we have an exception
16415	bra.w		fdbcc_false		# no; go handle counter
16416fdbcc_ge_yes:
16417	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16418	beq.b		fdbcc_ge_yes_done	# no;go do nothing
16419	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16420	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16421	bne.w		fdbcc_bsun		# yes; we have an exception
16422fdbcc_ge_yes_done:
16423	rts					# do nothing
16424
16425#
16426# not (greater than or equal):
16427#	       _
16428#	NANv(N^Z)
16429#
16430fdbcc_nge:
16431	fbnge.w		fdbcc_nge_yes		# not (greater than or equal)?
16432fdbcc_nge_no:
16433	bra.w		fdbcc_false		# no; go handle counter
16434fdbcc_nge_yes:
16435	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16436	beq.b		fdbcc_nge_done		# no;go finish
16437	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16438	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16439	bne.w		fdbcc_bsun		# yes; we have an exception
16440fdbcc_nge_done:
16441	rts					# no; do nothing
16442
16443#
16444# less than:
16445#	   _____
16446#	N^(NANvZ)
16447#
16448fdbcc_lt:
16449	fblt.w		fdbcc_lt_yes		# less than?
16450fdbcc_lt_no:
16451	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16452	beq.w		fdbcc_false		# no; go handle counter
16453	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16454	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16455	bne.w		fdbcc_bsun		# yes; we have an exception
16456	bra.w		fdbcc_false		# no; go handle counter
16457fdbcc_lt_yes:
16458	rts					# do nothing
16459
16460#
16461# not less than:
16462#	       _
16463#	NANv(ZvN)
16464#
16465fdbcc_nlt:
16466	fbnlt.w		fdbcc_nlt_yes		# not less than?
16467fdbcc_nlt_no:
16468	bra.w		fdbcc_false		# no; go handle counter
16469fdbcc_nlt_yes:
16470	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16471	beq.b		fdbcc_nlt_done		# no;go finish
16472	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16473	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16474	bne.w		fdbcc_bsun		# yes; we have an exception
16475fdbcc_nlt_done:
16476	rts					# no; do nothing
16477
16478#
16479# less than or equal:
16480#	     ___
16481#	Zv(N^NAN)
16482#
16483fdbcc_le:
16484	fble.w		fdbcc_le_yes		# less than or equal?
16485fdbcc_le_no:
16486	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16487	beq.w		fdbcc_false		# no; go handle counter
16488	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16489	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16490	bne.w		fdbcc_bsun		# yes; we have an exception
16491	bra.w		fdbcc_false		# no; go handle counter
16492fdbcc_le_yes:
16493	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16494	beq.b		fdbcc_le_yes_done	# no; go do nothing
16495	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16496	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16497	bne.w		fdbcc_bsun		# yes; we have an exception
16498fdbcc_le_yes_done:
16499	rts					# do nothing
16500
16501#
16502# not (less than or equal):
16503#	     ___
16504#	NANv(NvZ)
16505#
16506fdbcc_nle:
16507	fbnle.w		fdbcc_nle_yes		# not (less than or equal)?
16508fdbcc_nle_no:
16509	bra.w		fdbcc_false		# no; go handle counter
16510fdbcc_nle_yes:
16511	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16512	beq.w		fdbcc_nle_done		# no; go finish
16513	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16514	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16515	bne.w		fdbcc_bsun		# yes; we have an exception
16516fdbcc_nle_done:
16517	rts					# no; do nothing
16518
16519#
16520# greater or less than:
16521#	_____
16522#	NANvZ
16523#
16524fdbcc_gl:
16525	fbgl.w		fdbcc_gl_yes		# greater or less than?
16526fdbcc_gl_no:
16527	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16528	beq.w		fdbcc_false		# no; handle counter
16529	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16530	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16531	bne.w		fdbcc_bsun		# yes; we have an exception
16532	bra.w		fdbcc_false		# no; go handle counter
16533fdbcc_gl_yes:
16534	rts					# do nothing
16535
16536#
16537# not (greater or less than):
16538#
16539#	NANvZ
16540#
16541fdbcc_ngl:
16542	fbngl.w		fdbcc_ngl_yes		# not (greater or less than)?
16543fdbcc_ngl_no:
16544	bra.w		fdbcc_false		# no; go handle counter
16545fdbcc_ngl_yes:
16546	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16547	beq.b		fdbcc_ngl_done		# no; go finish
16548	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16549	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16550	bne.w		fdbcc_bsun		# yes; we have an exception
16551fdbcc_ngl_done:
16552	rts					# no; do nothing
16553
16554#
16555# greater, less, or equal:
16556#	___
16557#	NAN
16558#
16559fdbcc_gle:
16560	fbgle.w		fdbcc_gle_yes		# greater, less, or equal?
16561fdbcc_gle_no:
16562	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16563	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16564	bne.w		fdbcc_bsun		# yes; we have an exception
16565	bra.w		fdbcc_false		# no; go handle counter
16566fdbcc_gle_yes:
16567	rts					# do nothing
16568
16569#
16570# not (greater, less, or equal):
16571#
16572#	NAN
16573#
16574fdbcc_ngle:
16575	fbngle.w	fdbcc_ngle_yes		# not (greater, less, or equal)?
16576fdbcc_ngle_no:
16577	bra.w		fdbcc_false		# no; go handle counter
16578fdbcc_ngle_yes:
16579	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16580	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16581	bne.w		fdbcc_bsun		# yes; we have an exception
16582	rts					# no; do nothing
16583
16584#########################################################################
16585#									#
16586# Miscellaneous tests							#
16587#									#
16588# For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
16589#									#
16590#########################################################################
16591
16592#
16593# false:
16594#
16595#	False
16596#
16597fdbcc_f:					# no bsun possible
16598	bra.w		fdbcc_false		# go handle counter
16599
16600#
16601# true:
16602#
16603#	True
16604#
16605fdbcc_t:					# no bsun possible
16606	rts					# do nothing
16607
16608#
16609# signalling false:
16610#
16611#	False
16612#
16613fdbcc_sf:
16614	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16615	beq.w		fdbcc_false		# no;go handle counter
16616	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16617	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16618	bne.w		fdbcc_bsun		# yes; we have an exception
16619	bra.w		fdbcc_false		# go handle counter
16620
16621#
16622# signalling true:
16623#
16624#	True
16625#
16626fdbcc_st:
16627	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16628	beq.b		fdbcc_st_done		# no;go finish
16629	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16630	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16631	bne.w		fdbcc_bsun		# yes; we have an exception
16632fdbcc_st_done:
16633	rts
16634
16635#
16636# signalling equal:
16637#
16638#	Z
16639#
16640fdbcc_seq:
16641	fbseq.w		fdbcc_seq_yes		# signalling equal?
16642fdbcc_seq_no:
16643	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16644	beq.w		fdbcc_false		# no;go handle counter
16645	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16646	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16647	bne.w		fdbcc_bsun		# yes; we have an exception
16648	bra.w		fdbcc_false		# go handle counter
16649fdbcc_seq_yes:
16650	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16651	beq.b		fdbcc_seq_yes_done	# no;go do nothing
16652	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16653	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16654	bne.w		fdbcc_bsun		# yes; we have an exception
16655fdbcc_seq_yes_done:
16656	rts					# yes; do nothing
16657
16658#
16659# signalling not equal:
16660#	_
16661#	Z
16662#
16663fdbcc_sneq:
16664	fbsneq.w	fdbcc_sneq_yes		# signalling not equal?
16665fdbcc_sneq_no:
16666	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16667	beq.w		fdbcc_false		# no;go handle counter
16668	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16669	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16670	bne.w		fdbcc_bsun		# yes; we have an exception
16671	bra.w		fdbcc_false		# go handle counter
16672fdbcc_sneq_yes:
16673	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
16674	beq.w		fdbcc_sneq_done		# no;go finish
16675	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16676	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16677	bne.w		fdbcc_bsun		# yes; we have an exception
16678fdbcc_sneq_done:
16679	rts
16680
16681#########################################################################
16682#									#
16683# IEEE Aware tests							#
16684#									#
16685# For the IEEE aware tests, action is only taken if the result is false.#
16686# Therefore, the opposite branch type is used to jump to the decrement	#
16687# routine.								#
16688# The BSUN exception will not be set for any of these tests.		#
16689#									#
16690#########################################################################
16691
16692#
16693# ordered greater than:
16694#	_______
16695#	NANvZvN
16696#
16697fdbcc_ogt:
16698	fbogt.w		fdbcc_ogt_yes		# ordered greater than?
16699fdbcc_ogt_no:
16700	bra.w		fdbcc_false		# no; go handle counter
16701fdbcc_ogt_yes:
16702	rts					# yes; do nothing
16703
16704#
16705# unordered or less or equal:
16706#	_______
16707#	NANvZvN
16708#
16709fdbcc_ule:
16710	fbule.w		fdbcc_ule_yes		# unordered or less or equal?
16711fdbcc_ule_no:
16712	bra.w		fdbcc_false		# no; go handle counter
16713fdbcc_ule_yes:
16714	rts					# yes; do nothing
16715
16716#
16717# ordered greater than or equal:
16718#	   _____
16719#	Zv(NANvN)
16720#
16721fdbcc_oge:
16722	fboge.w		fdbcc_oge_yes		# ordered greater than or equal?
16723fdbcc_oge_no:
16724	bra.w		fdbcc_false		# no; go handle counter
16725fdbcc_oge_yes:
16726	rts					# yes; do nothing
16727
16728#
16729# unordered or less than:
16730#	       _
16731#	NANv(N^Z)
16732#
16733fdbcc_ult:
16734	fbult.w		fdbcc_ult_yes		# unordered or less than?
16735fdbcc_ult_no:
16736	bra.w		fdbcc_false		# no; go handle counter
16737fdbcc_ult_yes:
16738	rts					# yes; do nothing
16739
16740#
16741# ordered less than:
16742#	   _____
16743#	N^(NANvZ)
16744#
16745fdbcc_olt:
16746	fbolt.w		fdbcc_olt_yes		# ordered less than?
16747fdbcc_olt_no:
16748	bra.w		fdbcc_false		# no; go handle counter
16749fdbcc_olt_yes:
16750	rts					# yes; do nothing
16751
16752#
16753# unordered or greater or equal:
16754#
16755#	NANvZvN
16756#
16757fdbcc_uge:
16758	fbuge.w		fdbcc_uge_yes		# unordered or greater than?
16759fdbcc_uge_no:
16760	bra.w		fdbcc_false		# no; go handle counter
16761fdbcc_uge_yes:
16762	rts					# yes; do nothing
16763
16764#
16765# ordered less than or equal:
16766#	     ___
16767#	Zv(N^NAN)
16768#
16769fdbcc_ole:
16770	fbole.w		fdbcc_ole_yes		# ordered greater or less than?
16771fdbcc_ole_no:
16772	bra.w		fdbcc_false		# no; go handle counter
16773fdbcc_ole_yes:
16774	rts					# yes; do nothing
16775
16776#
16777# unordered or greater than:
16778#	     ___
16779#	NANv(NvZ)
16780#
16781fdbcc_ugt:
16782	fbugt.w		fdbcc_ugt_yes		# unordered or greater than?
16783fdbcc_ugt_no:
16784	bra.w		fdbcc_false		# no; go handle counter
16785fdbcc_ugt_yes:
16786	rts					# yes; do nothing
16787
16788#
16789# ordered greater or less than:
16790#	_____
16791#	NANvZ
16792#
16793fdbcc_ogl:
16794	fbogl.w		fdbcc_ogl_yes		# ordered greater or less than?
16795fdbcc_ogl_no:
16796	bra.w		fdbcc_false		# no; go handle counter
16797fdbcc_ogl_yes:
16798	rts					# yes; do nothing
16799
16800#
16801# unordered or equal:
16802#
16803#	NANvZ
16804#
16805fdbcc_ueq:
16806	fbueq.w		fdbcc_ueq_yes		# unordered or equal?
16807fdbcc_ueq_no:
16808	bra.w		fdbcc_false		# no; go handle counter
16809fdbcc_ueq_yes:
16810	rts					# yes; do nothing
16811
16812#
16813# ordered:
16814#	___
16815#	NAN
16816#
16817fdbcc_or:
16818	fbor.w		fdbcc_or_yes		# ordered?
16819fdbcc_or_no:
16820	bra.w		fdbcc_false		# no; go handle counter
16821fdbcc_or_yes:
16822	rts					# yes; do nothing
16823
16824#
16825# unordered:
16826#
16827#	NAN
16828#
16829fdbcc_un:
16830	fbun.w		fdbcc_un_yes		# unordered?
16831fdbcc_un_no:
16832	bra.w		fdbcc_false		# no; go handle counter
16833fdbcc_un_yes:
16834	rts					# yes; do nothing
16835
16836#######################################################################
16837
16838#
16839# the bsun exception bit was not set.
16840#
16841# (1) subtract 1 from the count register
16842# (2) if (cr == -1) then
16843#	pc = pc of next instruction
16844#     else
16845#	pc += sign_ext(16-bit displacement)
16846#
16847fdbcc_false:
16848	mov.b		1+EXC_OPWORD(%a6), %d1	# fetch lo opword
16849	andi.w		&0x7, %d1		# extract count register
16850
16851	bsr.l		fetch_dreg		# fetch count value
16852# make sure that d0 isn't corrupted between calls...
16853
16854	subq.w		&0x1, %d0		# Dn - 1 -> Dn
16855
16856	bsr.l		store_dreg_l		# store new count value
16857
16858	cmpi.w		%d0, &-0x1		# is (Dn == -1)?
16859	bne.b		fdbcc_false_cont	# no;
16860	rts
16861
16862fdbcc_false_cont:
16863	mov.l		L_SCR1(%a6),%d0		# fetch displacement
16864	add.l		USER_FPIAR(%a6),%d0	# add instruction PC
16865	addq.l		&0x4,%d0		# add instruction length
16866	mov.l		%d0,EXC_PC(%a6)		# set new PC
16867	rts
16868
16869# the emulation routine set bsun and BSUN was enabled. have to
16870# fix stack and jump to the bsun handler.
16871# let the caller of this routine shift the stack frame up to
16872# eliminate the effective address field.
16873fdbcc_bsun:
16874	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
16875	rts
16876
16877#########################################################################
16878# ftrapcc(): routine to emulate the ftrapcc instruction			#
16879#									#
16880# XDEF ****************************************************************	#
16881#	_ftrapcc()							#
16882#									#
16883# XREF ****************************************************************	#
16884#	none								#
16885#									#
16886# INPUT *************************************************************** #
16887#	none								#
16888#									#
16889# OUTPUT ************************************************************** #
16890#	none								#
16891#									#
16892# ALGORITHM *********************************************************** #
16893#	This routine checks which conditional predicate is specified by	#
16894# the stacked ftrapcc instruction opcode and then branches to a routine	#
16895# for that predicate. The corresponding fbcc instruction is then used	#
16896# to see whether the condition (specified by the stacked FPSR) is true	#
16897# or false.								#
16898#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
16899# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
16900# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
16901# enabled BSUN should not be flagged and the predicate is true, then	#
16902# the ftrapcc_flg is set in the SPCOND_FLG location. These special	#
16903# flags indicate to the calling routine to emulate the exceptional	#
16904# condition.								#
16905#									#
16906#########################################################################
16907
16908	global		_ftrapcc
16909_ftrapcc:
16910	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
16911
16912	clr.l		%d1			# clear scratch reg
16913	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
16914	ror.l		&0x8,%d1		# rotate to top byte
16915	fmov.l		%d1,%fpsr		# insert into FPSR
16916
16917	mov.w		(tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
16918	jmp		(tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
16919
16920tbl_ftrapcc:
16921	short		ftrapcc_f	-	tbl_ftrapcc	# 00
16922	short		ftrapcc_eq	-	tbl_ftrapcc	# 01
16923	short		ftrapcc_ogt	-	tbl_ftrapcc	# 02
16924	short		ftrapcc_oge	-	tbl_ftrapcc	# 03
16925	short		ftrapcc_olt	-	tbl_ftrapcc	# 04
16926	short		ftrapcc_ole	-	tbl_ftrapcc	# 05
16927	short		ftrapcc_ogl	-	tbl_ftrapcc	# 06
16928	short		ftrapcc_or	-	tbl_ftrapcc	# 07
16929	short		ftrapcc_un	-	tbl_ftrapcc	# 08
16930	short		ftrapcc_ueq	-	tbl_ftrapcc	# 09
16931	short		ftrapcc_ugt	-	tbl_ftrapcc	# 10
16932	short		ftrapcc_uge	-	tbl_ftrapcc	# 11
16933	short		ftrapcc_ult	-	tbl_ftrapcc	# 12
16934	short		ftrapcc_ule	-	tbl_ftrapcc	# 13
16935	short		ftrapcc_neq	-	tbl_ftrapcc	# 14
16936	short		ftrapcc_t	-	tbl_ftrapcc	# 15
16937	short		ftrapcc_sf	-	tbl_ftrapcc	# 16
16938	short		ftrapcc_seq	-	tbl_ftrapcc	# 17
16939	short		ftrapcc_gt	-	tbl_ftrapcc	# 18
16940	short		ftrapcc_ge	-	tbl_ftrapcc	# 19
16941	short		ftrapcc_lt	-	tbl_ftrapcc	# 20
16942	short		ftrapcc_le	-	tbl_ftrapcc	# 21
16943	short		ftrapcc_gl	-	tbl_ftrapcc	# 22
16944	short		ftrapcc_gle	-	tbl_ftrapcc	# 23
16945	short		ftrapcc_ngle	-	tbl_ftrapcc	# 24
16946	short		ftrapcc_ngl	-	tbl_ftrapcc	# 25
16947	short		ftrapcc_nle	-	tbl_ftrapcc	# 26
16948	short		ftrapcc_nlt	-	tbl_ftrapcc	# 27
16949	short		ftrapcc_nge	-	tbl_ftrapcc	# 28
16950	short		ftrapcc_ngt	-	tbl_ftrapcc	# 29
16951	short		ftrapcc_sneq	-	tbl_ftrapcc	# 30
16952	short		ftrapcc_st	-	tbl_ftrapcc	# 31
16953
16954#########################################################################
16955#									#
16956# IEEE Nonaware tests							#
16957#									#
16958# For the IEEE nonaware tests, we set the result based on the		#
16959# floating point condition codes. In addition, we check to see		#
16960# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
16961#									#
16962# The cases EQ and NE are shared by the Aware and Nonaware groups	#
16963# and are incapable of setting the BSUN exception bit.			#
16964#									#
16965# Typically, only one of the two possible branch directions could	#
16966# have the NAN bit set.							#
16967#									#
16968#########################################################################
16969
16970#
16971# equal:
16972#
16973#	Z
16974#
16975ftrapcc_eq:
16976	fbeq.w		ftrapcc_trap		# equal?
16977ftrapcc_eq_no:
16978	rts					# do nothing
16979
16980#
16981# not equal:
16982#	_
16983#	Z
16984#
16985ftrapcc_neq:
16986	fbneq.w		ftrapcc_trap		# not equal?
16987ftrapcc_neq_no:
16988	rts					# do nothing
16989
16990#
16991# greater than:
16992#	_______
16993#	NANvZvN
16994#
16995ftrapcc_gt:
16996	fbgt.w		ftrapcc_trap		# greater than?
16997ftrapcc_gt_no:
16998	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16999	beq.b		ftrapcc_gt_done		# no
17000	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17001	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17002	bne.w		ftrapcc_bsun		# yes
17003ftrapcc_gt_done:
17004	rts					# no; do nothing
17005
17006#
17007# not greater than:
17008#
17009#	NANvZvN
17010#
17011ftrapcc_ngt:
17012	fbngt.w		ftrapcc_ngt_yes		# not greater than?
17013ftrapcc_ngt_no:
17014	rts					# do nothing
17015ftrapcc_ngt_yes:
17016	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17017	beq.w		ftrapcc_trap		# no; go take trap
17018	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17019	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17020	bne.w		ftrapcc_bsun		# yes
17021	bra.w		ftrapcc_trap		# no; go take trap
17022
17023#
17024# greater than or equal:
17025#	   _____
17026#	Zv(NANvN)
17027#
17028ftrapcc_ge:
17029	fbge.w		ftrapcc_ge_yes		# greater than or equal?
17030ftrapcc_ge_no:
17031	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17032	beq.b		ftrapcc_ge_done		# no; go finish
17033	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17034	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17035	bne.w		ftrapcc_bsun		# yes
17036ftrapcc_ge_done:
17037	rts					# no; do nothing
17038ftrapcc_ge_yes:
17039	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17040	beq.w		ftrapcc_trap		# no; go take trap
17041	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17042	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17043	bne.w		ftrapcc_bsun		# yes
17044	bra.w		ftrapcc_trap		# no; go take trap
17045
17046#
17047# not (greater than or equal):
17048#	       _
17049#	NANv(N^Z)
17050#
17051ftrapcc_nge:
17052	fbnge.w		ftrapcc_nge_yes		# not (greater than or equal)?
17053ftrapcc_nge_no:
17054	rts					# do nothing
17055ftrapcc_nge_yes:
17056	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17057	beq.w		ftrapcc_trap		# no; go take trap
17058	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17059	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17060	bne.w		ftrapcc_bsun		# yes
17061	bra.w		ftrapcc_trap		# no; go take trap
17062
17063#
17064# less than:
17065#	   _____
17066#	N^(NANvZ)
17067#
17068ftrapcc_lt:
17069	fblt.w		ftrapcc_trap		# less than?
17070ftrapcc_lt_no:
17071	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17072	beq.b		ftrapcc_lt_done		# no; go finish
17073	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17074	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17075	bne.w		ftrapcc_bsun		# yes
17076ftrapcc_lt_done:
17077	rts					# no; do nothing
17078
17079#
17080# not less than:
17081#	       _
17082#	NANv(ZvN)
17083#
17084ftrapcc_nlt:
17085	fbnlt.w		ftrapcc_nlt_yes		# not less than?
17086ftrapcc_nlt_no:
17087	rts					# do nothing
17088ftrapcc_nlt_yes:
17089	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17090	beq.w		ftrapcc_trap		# no; go take trap
17091	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17092	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17093	bne.w		ftrapcc_bsun		# yes
17094	bra.w		ftrapcc_trap		# no; go take trap
17095
17096#
17097# less than or equal:
17098#	     ___
17099#	Zv(N^NAN)
17100#
17101ftrapcc_le:
17102	fble.w		ftrapcc_le_yes		# less than or equal?
17103ftrapcc_le_no:
17104	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17105	beq.b		ftrapcc_le_done		# no; go finish
17106	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17107	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17108	bne.w		ftrapcc_bsun		# yes
17109ftrapcc_le_done:
17110	rts					# no; do nothing
17111ftrapcc_le_yes:
17112	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17113	beq.w		ftrapcc_trap		# no; go take trap
17114	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17115	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17116	bne.w		ftrapcc_bsun		# yes
17117	bra.w		ftrapcc_trap		# no; go take trap
17118
17119#
17120# not (less than or equal):
17121#	     ___
17122#	NANv(NvZ)
17123#
17124ftrapcc_nle:
17125	fbnle.w		ftrapcc_nle_yes		# not (less than or equal)?
17126ftrapcc_nle_no:
17127	rts					# do nothing
17128ftrapcc_nle_yes:
17129	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17130	beq.w		ftrapcc_trap		# no; go take trap
17131	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17132	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17133	bne.w		ftrapcc_bsun		# yes
17134	bra.w		ftrapcc_trap		# no; go take trap
17135
17136#
17137# greater or less than:
17138#	_____
17139#	NANvZ
17140#
17141ftrapcc_gl:
17142	fbgl.w		ftrapcc_trap		# greater or less than?
17143ftrapcc_gl_no:
17144	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17145	beq.b		ftrapcc_gl_done		# no; go finish
17146	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17147	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17148	bne.w		ftrapcc_bsun		# yes
17149ftrapcc_gl_done:
17150	rts					# no; do nothing
17151
17152#
17153# not (greater or less than):
17154#
17155#	NANvZ
17156#
17157ftrapcc_ngl:
17158	fbngl.w		ftrapcc_ngl_yes		# not (greater or less than)?
17159ftrapcc_ngl_no:
17160	rts					# do nothing
17161ftrapcc_ngl_yes:
17162	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17163	beq.w		ftrapcc_trap		# no; go take trap
17164	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17165	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17166	bne.w		ftrapcc_bsun		# yes
17167	bra.w		ftrapcc_trap		# no; go take trap
17168
17169#
17170# greater, less, or equal:
17171#	___
17172#	NAN
17173#
17174ftrapcc_gle:
17175	fbgle.w		ftrapcc_trap		# greater, less, or equal?
17176ftrapcc_gle_no:
17177	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17178	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17179	bne.w		ftrapcc_bsun		# yes
17180	rts					# no; do nothing
17181
17182#
17183# not (greater, less, or equal):
17184#
17185#	NAN
17186#
17187ftrapcc_ngle:
17188	fbngle.w	ftrapcc_ngle_yes	# not (greater, less, or equal)?
17189ftrapcc_ngle_no:
17190	rts					# do nothing
17191ftrapcc_ngle_yes:
17192	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17193	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17194	bne.w		ftrapcc_bsun		# yes
17195	bra.w		ftrapcc_trap		# no; go take trap
17196
17197#########################################################################
17198#									#
17199# Miscellaneous tests							#
17200#									#
17201# For the IEEE aware tests, we only have to set the result based on the	#
17202# floating point condition codes. The BSUN exception will not be	#
17203# set for any of these tests.						#
17204#									#
17205#########################################################################
17206
17207#
17208# false:
17209#
17210#	False
17211#
17212ftrapcc_f:
17213	rts					# do nothing
17214
17215#
17216# true:
17217#
17218#	True
17219#
17220ftrapcc_t:
17221	bra.w		ftrapcc_trap		# go take trap
17222
17223#
17224# signalling false:
17225#
17226#	False
17227#
17228ftrapcc_sf:
17229	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17230	beq.b		ftrapcc_sf_done		# no; go finish
17231	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17232	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17233	bne.w		ftrapcc_bsun		# yes
17234ftrapcc_sf_done:
17235	rts					# no; do nothing
17236
17237#
17238# signalling true:
17239#
17240#	True
17241#
17242ftrapcc_st:
17243	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17244	beq.w		ftrapcc_trap		# no; go take trap
17245	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17246	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17247	bne.w		ftrapcc_bsun		# yes
17248	bra.w		ftrapcc_trap		# no; go take trap
17249
17250#
17251# signalling equal:
17252#
17253#	Z
17254#
17255ftrapcc_seq:
17256	fbseq.w		ftrapcc_seq_yes		# signalling equal?
17257ftrapcc_seq_no:
17258	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17259	beq.w		ftrapcc_seq_done	# no; go finish
17260	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17261	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17262	bne.w		ftrapcc_bsun		# yes
17263ftrapcc_seq_done:
17264	rts					# no; do nothing
17265ftrapcc_seq_yes:
17266	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17267	beq.w		ftrapcc_trap		# no; go take trap
17268	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17269	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17270	bne.w		ftrapcc_bsun		# yes
17271	bra.w		ftrapcc_trap		# no; go take trap
17272
17273#
17274# signalling not equal:
17275#	_
17276#	Z
17277#
17278ftrapcc_sneq:
17279	fbsneq.w	ftrapcc_sneq_yes	# signalling equal?
17280ftrapcc_sneq_no:
17281	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17282	beq.w		ftrapcc_sneq_no_done	# no; go finish
17283	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17284	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17285	bne.w		ftrapcc_bsun		# yes
17286ftrapcc_sneq_no_done:
17287	rts					# do nothing
17288ftrapcc_sneq_yes:
17289	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17290	beq.w		ftrapcc_trap		# no; go take trap
17291	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17292	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17293	bne.w		ftrapcc_bsun		# yes
17294	bra.w		ftrapcc_trap		# no; go take trap
17295
17296#########################################################################
17297#									#
17298# IEEE Aware tests							#
17299#									#
17300# For the IEEE aware tests, we only have to set the result based on the	#
17301# floating point condition codes. The BSUN exception will not be	#
17302# set for any of these tests.						#
17303#									#
17304#########################################################################
17305
17306#
17307# ordered greater than:
17308#	_______
17309#	NANvZvN
17310#
17311ftrapcc_ogt:
17312	fbogt.w		ftrapcc_trap		# ordered greater than?
17313ftrapcc_ogt_no:
17314	rts					# do nothing
17315
17316#
17317# unordered or less or equal:
17318#	_______
17319#	NANvZvN
17320#
17321ftrapcc_ule:
17322	fbule.w		ftrapcc_trap		# unordered or less or equal?
17323ftrapcc_ule_no:
17324	rts					# do nothing
17325
17326#
17327# ordered greater than or equal:
17328#	   _____
17329#	Zv(NANvN)
17330#
17331ftrapcc_oge:
17332	fboge.w		ftrapcc_trap		# ordered greater than or equal?
17333ftrapcc_oge_no:
17334	rts					# do nothing
17335
17336#
17337# unordered or less than:
17338#	       _
17339#	NANv(N^Z)
17340#
17341ftrapcc_ult:
17342	fbult.w		ftrapcc_trap		# unordered or less than?
17343ftrapcc_ult_no:
17344	rts					# do nothing
17345
17346#
17347# ordered less than:
17348#	   _____
17349#	N^(NANvZ)
17350#
17351ftrapcc_olt:
17352	fbolt.w		ftrapcc_trap		# ordered less than?
17353ftrapcc_olt_no:
17354	rts					# do nothing
17355
17356#
17357# unordered or greater or equal:
17358#
17359#	NANvZvN
17360#
17361ftrapcc_uge:
17362	fbuge.w		ftrapcc_trap		# unordered or greater than?
17363ftrapcc_uge_no:
17364	rts					# do nothing
17365
17366#
17367# ordered less than or equal:
17368#	     ___
17369#	Zv(N^NAN)
17370#
17371ftrapcc_ole:
17372	fbole.w		ftrapcc_trap		# ordered greater or less than?
17373ftrapcc_ole_no:
17374	rts					# do nothing
17375
17376#
17377# unordered or greater than:
17378#	     ___
17379#	NANv(NvZ)
17380#
17381ftrapcc_ugt:
17382	fbugt.w		ftrapcc_trap		# unordered or greater than?
17383ftrapcc_ugt_no:
17384	rts					# do nothing
17385
17386#
17387# ordered greater or less than:
17388#	_____
17389#	NANvZ
17390#
17391ftrapcc_ogl:
17392	fbogl.w		ftrapcc_trap		# ordered greater or less than?
17393ftrapcc_ogl_no:
17394	rts					# do nothing
17395
17396#
17397# unordered or equal:
17398#
17399#	NANvZ
17400#
17401ftrapcc_ueq:
17402	fbueq.w		ftrapcc_trap		# unordered or equal?
17403ftrapcc_ueq_no:
17404	rts					# do nothing
17405
17406#
17407# ordered:
17408#	___
17409#	NAN
17410#
17411ftrapcc_or:
17412	fbor.w		ftrapcc_trap		# ordered?
17413ftrapcc_or_no:
17414	rts					# do nothing
17415
17416#
17417# unordered:
17418#
17419#	NAN
17420#
17421ftrapcc_un:
17422	fbun.w		ftrapcc_trap		# unordered?
17423ftrapcc_un_no:
17424	rts					# do nothing
17425
17426#######################################################################
17427
17428# the bsun exception bit was not set.
17429# we will need to jump to the ftrapcc vector. the stack frame
17430# is the same size as that of the fp unimp instruction. the
17431# only difference is that the <ea> field should hold the PC
17432# of the ftrapcc instruction and the vector offset field
17433# should denote the ftrapcc trap.
17434ftrapcc_trap:
17435	mov.b		&ftrapcc_flg,SPCOND_FLG(%a6)
17436	rts
17437
17438# the emulation routine set bsun and BSUN was enabled. have to
17439# fix stack and jump to the bsun handler.
17440# let the caller of this routine shift the stack frame up to
17441# eliminate the effective address field.
17442ftrapcc_bsun:
17443	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
17444	rts
17445
17446#########################################################################
17447# fscc(): routine to emulate the fscc instruction			#
17448#									#
17449# XDEF **************************************************************** #
17450#	_fscc()								#
17451#									#
17452# XREF **************************************************************** #
17453#	store_dreg_b() - store result to data register file		#
17454#	dec_areg() - decrement an areg for -(an) mode			#
17455#	inc_areg() - increment an areg for (an)+ mode			#
17456#	_dmem_write_byte() - store result to memory			#
17457#									#
17458# INPUT ***************************************************************	#
17459#	none								#
17460#									#
17461# OUTPUT ************************************************************** #
17462#	none								#
17463#									#
17464# ALGORITHM ***********************************************************	#
17465#	This routine checks which conditional predicate is specified by	#
17466# the stacked fscc instruction opcode and then branches to a routine	#
17467# for that predicate. The corresponding fbcc instruction is then used	#
17468# to see whether the condition (specified by the stacked FPSR) is true	#
17469# or false.								#
17470#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
17471# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
17472# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
17473# enabled BSUN should not be flagged and the predicate is true, then	#
17474# the result is stored to the data register file or memory		#
17475#									#
17476#########################################################################
17477
17478	global		_fscc
17479_fscc:
17480	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
17481
17482	clr.l		%d1			# clear scratch reg
17483	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
17484	ror.l		&0x8,%d1		# rotate to top byte
17485	fmov.l		%d1,%fpsr		# insert into FPSR
17486
17487	mov.w		(tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
17488	jmp		(tbl_fscc.b,%pc,%d1.w)	# jump to fscc routine
17489
17490tbl_fscc:
17491	short		fscc_f		-	tbl_fscc	# 00
17492	short		fscc_eq		-	tbl_fscc	# 01
17493	short		fscc_ogt	-	tbl_fscc	# 02
17494	short		fscc_oge	-	tbl_fscc	# 03
17495	short		fscc_olt	-	tbl_fscc	# 04
17496	short		fscc_ole	-	tbl_fscc	# 05
17497	short		fscc_ogl	-	tbl_fscc	# 06
17498	short		fscc_or		-	tbl_fscc	# 07
17499	short		fscc_un		-	tbl_fscc	# 08
17500	short		fscc_ueq	-	tbl_fscc	# 09
17501	short		fscc_ugt	-	tbl_fscc	# 10
17502	short		fscc_uge	-	tbl_fscc	# 11
17503	short		fscc_ult	-	tbl_fscc	# 12
17504	short		fscc_ule	-	tbl_fscc	# 13
17505	short		fscc_neq	-	tbl_fscc	# 14
17506	short		fscc_t		-	tbl_fscc	# 15
17507	short		fscc_sf		-	tbl_fscc	# 16
17508	short		fscc_seq	-	tbl_fscc	# 17
17509	short		fscc_gt		-	tbl_fscc	# 18
17510	short		fscc_ge		-	tbl_fscc	# 19
17511	short		fscc_lt		-	tbl_fscc	# 20
17512	short		fscc_le		-	tbl_fscc	# 21
17513	short		fscc_gl		-	tbl_fscc	# 22
17514	short		fscc_gle	-	tbl_fscc	# 23
17515	short		fscc_ngle	-	tbl_fscc	# 24
17516	short		fscc_ngl	-	tbl_fscc	# 25
17517	short		fscc_nle	-	tbl_fscc	# 26
17518	short		fscc_nlt	-	tbl_fscc	# 27
17519	short		fscc_nge	-	tbl_fscc	# 28
17520	short		fscc_ngt	-	tbl_fscc	# 29
17521	short		fscc_sneq	-	tbl_fscc	# 30
17522	short		fscc_st		-	tbl_fscc	# 31
17523
17524#########################################################################
17525#									#
17526# IEEE Nonaware tests							#
17527#									#
17528# For the IEEE nonaware tests, we set the result based on the		#
17529# floating point condition codes. In addition, we check to see		#
17530# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
17531#									#
17532# The cases EQ and NE are shared by the Aware and Nonaware groups	#
17533# and are incapable of setting the BSUN exception bit.			#
17534#									#
17535# Typically, only one of the two possible branch directions could	#
17536# have the NAN bit set.							#
17537#									#
17538#########################################################################
17539
17540#
17541# equal:
17542#
17543#	Z
17544#
17545fscc_eq:
17546	fbeq.w		fscc_eq_yes		# equal?
17547fscc_eq_no:
17548	clr.b		%d0			# set false
17549	bra.w		fscc_done		# go finish
17550fscc_eq_yes:
17551	st		%d0			# set true
17552	bra.w		fscc_done		# go finish
17553
17554#
17555# not equal:
17556#	_
17557#	Z
17558#
17559fscc_neq:
17560	fbneq.w		fscc_neq_yes		# not equal?
17561fscc_neq_no:
17562	clr.b		%d0			# set false
17563	bra.w		fscc_done		# go finish
17564fscc_neq_yes:
17565	st		%d0			# set true
17566	bra.w		fscc_done		# go finish
17567
17568#
17569# greater than:
17570#	_______
17571#	NANvZvN
17572#
17573fscc_gt:
17574	fbgt.w		fscc_gt_yes		# greater than?
17575fscc_gt_no:
17576	clr.b		%d0			# set false
17577	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17578	beq.w		fscc_done		# no;go finish
17579	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17580	bra.w		fscc_chk_bsun		# go finish
17581fscc_gt_yes:
17582	st		%d0			# set true
17583	bra.w		fscc_done		# go finish
17584
17585#
17586# not greater than:
17587#
17588#	NANvZvN
17589#
17590fscc_ngt:
17591	fbngt.w		fscc_ngt_yes		# not greater than?
17592fscc_ngt_no:
17593	clr.b		%d0			# set false
17594	bra.w		fscc_done		# go finish
17595fscc_ngt_yes:
17596	st		%d0			# set true
17597	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17598	beq.w		fscc_done		# no;go finish
17599	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17600	bra.w		fscc_chk_bsun		# go finish
17601
17602#
17603# greater than or equal:
17604#	   _____
17605#	Zv(NANvN)
17606#
17607fscc_ge:
17608	fbge.w		fscc_ge_yes		# greater than or equal?
17609fscc_ge_no:
17610	clr.b		%d0			# set false
17611	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17612	beq.w		fscc_done		# no;go finish
17613	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17614	bra.w		fscc_chk_bsun		# go finish
17615fscc_ge_yes:
17616	st		%d0			# set true
17617	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17618	beq.w		fscc_done		# no;go finish
17619	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17620	bra.w		fscc_chk_bsun		# go finish
17621
17622#
17623# not (greater than or equal):
17624#	       _
17625#	NANv(N^Z)
17626#
17627fscc_nge:
17628	fbnge.w		fscc_nge_yes		# not (greater than or equal)?
17629fscc_nge_no:
17630	clr.b		%d0			# set false
17631	bra.w		fscc_done		# go finish
17632fscc_nge_yes:
17633	st		%d0			# set true
17634	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17635	beq.w		fscc_done		# no;go finish
17636	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17637	bra.w		fscc_chk_bsun		# go finish
17638
17639#
17640# less than:
17641#	   _____
17642#	N^(NANvZ)
17643#
17644fscc_lt:
17645	fblt.w		fscc_lt_yes		# less than?
17646fscc_lt_no:
17647	clr.b		%d0			# set false
17648	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17649	beq.w		fscc_done		# no;go finish
17650	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17651	bra.w		fscc_chk_bsun		# go finish
17652fscc_lt_yes:
17653	st		%d0			# set true
17654	bra.w		fscc_done		# go finish
17655
17656#
17657# not less than:
17658#	       _
17659#	NANv(ZvN)
17660#
17661fscc_nlt:
17662	fbnlt.w		fscc_nlt_yes		# not less than?
17663fscc_nlt_no:
17664	clr.b		%d0			# set false
17665	bra.w		fscc_done		# go finish
17666fscc_nlt_yes:
17667	st		%d0			# set true
17668	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17669	beq.w		fscc_done		# no;go finish
17670	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17671	bra.w		fscc_chk_bsun		# go finish
17672
17673#
17674# less than or equal:
17675#	     ___
17676#	Zv(N^NAN)
17677#
17678fscc_le:
17679	fble.w		fscc_le_yes		# less than or equal?
17680fscc_le_no:
17681	clr.b		%d0			# set false
17682	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17683	beq.w		fscc_done		# no;go finish
17684	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17685	bra.w		fscc_chk_bsun		# go finish
17686fscc_le_yes:
17687	st		%d0			# set true
17688	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17689	beq.w		fscc_done		# no;go finish
17690	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17691	bra.w		fscc_chk_bsun		# go finish
17692
17693#
17694# not (less than or equal):
17695#	     ___
17696#	NANv(NvZ)
17697#
17698fscc_nle:
17699	fbnle.w		fscc_nle_yes		# not (less than or equal)?
17700fscc_nle_no:
17701	clr.b		%d0			# set false
17702	bra.w		fscc_done		# go finish
17703fscc_nle_yes:
17704	st		%d0			# set true
17705	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17706	beq.w		fscc_done		# no;go finish
17707	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17708	bra.w		fscc_chk_bsun		# go finish
17709
17710#
17711# greater or less than:
17712#	_____
17713#	NANvZ
17714#
17715fscc_gl:
17716	fbgl.w		fscc_gl_yes		# greater or less than?
17717fscc_gl_no:
17718	clr.b		%d0			# set false
17719	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17720	beq.w		fscc_done		# no;go finish
17721	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17722	bra.w		fscc_chk_bsun		# go finish
17723fscc_gl_yes:
17724	st		%d0			# set true
17725	bra.w		fscc_done		# go finish
17726
17727#
17728# not (greater or less than):
17729#
17730#	NANvZ
17731#
17732fscc_ngl:
17733	fbngl.w		fscc_ngl_yes		# not (greater or less than)?
17734fscc_ngl_no:
17735	clr.b		%d0			# set false
17736	bra.w		fscc_done		# go finish
17737fscc_ngl_yes:
17738	st		%d0			# set true
17739	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17740	beq.w		fscc_done		# no;go finish
17741	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17742	bra.w		fscc_chk_bsun		# go finish
17743
17744#
17745# greater, less, or equal:
17746#	___
17747#	NAN
17748#
17749fscc_gle:
17750	fbgle.w		fscc_gle_yes		# greater, less, or equal?
17751fscc_gle_no:
17752	clr.b		%d0			# set false
17753	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17754	bra.w		fscc_chk_bsun		# go finish
17755fscc_gle_yes:
17756	st		%d0			# set true
17757	bra.w		fscc_done		# go finish
17758
17759#
17760# not (greater, less, or equal):
17761#
17762#	NAN
17763#
17764fscc_ngle:
17765	fbngle.w		fscc_ngle_yes	# not (greater, less, or equal)?
17766fscc_ngle_no:
17767	clr.b		%d0			# set false
17768	bra.w		fscc_done		# go finish
17769fscc_ngle_yes:
17770	st		%d0			# set true
17771	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17772	bra.w		fscc_chk_bsun		# go finish
17773
17774#########################################################################
17775#									#
17776# Miscellaneous tests							#
17777#									#
17778# For the IEEE aware tests, we only have to set the result based on the	#
17779# floating point condition codes. The BSUN exception will not be	#
17780# set for any of these tests.						#
17781#									#
17782#########################################################################
17783
17784#
17785# false:
17786#
17787#	False
17788#
17789fscc_f:
17790	clr.b		%d0			# set false
17791	bra.w		fscc_done		# go finish
17792
17793#
17794# true:
17795#
17796#	True
17797#
17798fscc_t:
17799	st		%d0			# set true
17800	bra.w		fscc_done		# go finish
17801
17802#
17803# signalling false:
17804#
17805#	False
17806#
17807fscc_sf:
17808	clr.b		%d0			# set false
17809	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17810	beq.w		fscc_done		# no;go finish
17811	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17812	bra.w		fscc_chk_bsun		# go finish
17813
17814#
17815# signalling true:
17816#
17817#	True
17818#
17819fscc_st:
17820	st		%d0			# set false
17821	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17822	beq.w		fscc_done		# no;go finish
17823	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17824	bra.w		fscc_chk_bsun		# go finish
17825
17826#
17827# signalling equal:
17828#
17829#	Z
17830#
17831fscc_seq:
17832	fbseq.w		fscc_seq_yes		# signalling equal?
17833fscc_seq_no:
17834	clr.b		%d0			# set false
17835	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17836	beq.w		fscc_done		# no;go finish
17837	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17838	bra.w		fscc_chk_bsun		# go finish
17839fscc_seq_yes:
17840	st		%d0			# set true
17841	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17842	beq.w		fscc_done		# no;go finish
17843	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17844	bra.w		fscc_chk_bsun		# go finish
17845
17846#
17847# signalling not equal:
17848#	_
17849#	Z
17850#
17851fscc_sneq:
17852	fbsneq.w	fscc_sneq_yes		# signalling equal?
17853fscc_sneq_no:
17854	clr.b		%d0			# set false
17855	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17856	beq.w		fscc_done		# no;go finish
17857	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17858	bra.w		fscc_chk_bsun		# go finish
17859fscc_sneq_yes:
17860	st		%d0			# set true
17861	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17862	beq.w		fscc_done		# no;go finish
17863	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17864	bra.w		fscc_chk_bsun		# go finish
17865
17866#########################################################################
17867#									#
17868# IEEE Aware tests							#
17869#									#
17870# For the IEEE aware tests, we only have to set the result based on the	#
17871# floating point condition codes. The BSUN exception will not be	#
17872# set for any of these tests.						#
17873#									#
17874#########################################################################
17875
17876#
17877# ordered greater than:
17878#	_______
17879#	NANvZvN
17880#
17881fscc_ogt:
17882	fbogt.w		fscc_ogt_yes		# ordered greater than?
17883fscc_ogt_no:
17884	clr.b		%d0			# set false
17885	bra.w		fscc_done		# go finish
17886fscc_ogt_yes:
17887	st		%d0			# set true
17888	bra.w		fscc_done		# go finish
17889
17890#
17891# unordered or less or equal:
17892#	_______
17893#	NANvZvN
17894#
17895fscc_ule:
17896	fbule.w		fscc_ule_yes		# unordered or less or equal?
17897fscc_ule_no:
17898	clr.b		%d0			# set false
17899	bra.w		fscc_done		# go finish
17900fscc_ule_yes:
17901	st		%d0			# set true
17902	bra.w		fscc_done		# go finish
17903
17904#
17905# ordered greater than or equal:
17906#	   _____
17907#	Zv(NANvN)
17908#
17909fscc_oge:
17910	fboge.w		fscc_oge_yes		# ordered greater than or equal?
17911fscc_oge_no:
17912	clr.b		%d0			# set false
17913	bra.w		fscc_done		# go finish
17914fscc_oge_yes:
17915	st		%d0			# set true
17916	bra.w		fscc_done		# go finish
17917
17918#
17919# unordered or less than:
17920#	       _
17921#	NANv(N^Z)
17922#
17923fscc_ult:
17924	fbult.w		fscc_ult_yes		# unordered or less than?
17925fscc_ult_no:
17926	clr.b		%d0			# set false
17927	bra.w		fscc_done		# go finish
17928fscc_ult_yes:
17929	st		%d0			# set true
17930	bra.w		fscc_done		# go finish
17931
17932#
17933# ordered less than:
17934#	   _____
17935#	N^(NANvZ)
17936#
17937fscc_olt:
17938	fbolt.w		fscc_olt_yes		# ordered less than?
17939fscc_olt_no:
17940	clr.b		%d0			# set false
17941	bra.w		fscc_done		# go finish
17942fscc_olt_yes:
17943	st		%d0			# set true
17944	bra.w		fscc_done		# go finish
17945
17946#
17947# unordered or greater or equal:
17948#
17949#	NANvZvN
17950#
17951fscc_uge:
17952	fbuge.w		fscc_uge_yes		# unordered or greater than?
17953fscc_uge_no:
17954	clr.b		%d0			# set false
17955	bra.w		fscc_done		# go finish
17956fscc_uge_yes:
17957	st		%d0			# set true
17958	bra.w		fscc_done		# go finish
17959
17960#
17961# ordered less than or equal:
17962#	     ___
17963#	Zv(N^NAN)
17964#
17965fscc_ole:
17966	fbole.w		fscc_ole_yes		# ordered greater or less than?
17967fscc_ole_no:
17968	clr.b		%d0			# set false
17969	bra.w		fscc_done		# go finish
17970fscc_ole_yes:
17971	st		%d0			# set true
17972	bra.w		fscc_done		# go finish
17973
17974#
17975# unordered or greater than:
17976#	     ___
17977#	NANv(NvZ)
17978#
17979fscc_ugt:
17980	fbugt.w		fscc_ugt_yes		# unordered or greater than?
17981fscc_ugt_no:
17982	clr.b		%d0			# set false
17983	bra.w		fscc_done		# go finish
17984fscc_ugt_yes:
17985	st		%d0			# set true
17986	bra.w		fscc_done		# go finish
17987
17988#
17989# ordered greater or less than:
17990#	_____
17991#	NANvZ
17992#
17993fscc_ogl:
17994	fbogl.w		fscc_ogl_yes		# ordered greater or less than?
17995fscc_ogl_no:
17996	clr.b		%d0			# set false
17997	bra.w		fscc_done		# go finish
17998fscc_ogl_yes:
17999	st		%d0			# set true
18000	bra.w		fscc_done		# go finish
18001
18002#
18003# unordered or equal:
18004#
18005#	NANvZ
18006#
18007fscc_ueq:
18008	fbueq.w		fscc_ueq_yes		# unordered or equal?
18009fscc_ueq_no:
18010	clr.b		%d0			# set false
18011	bra.w		fscc_done		# go finish
18012fscc_ueq_yes:
18013	st		%d0			# set true
18014	bra.w		fscc_done		# go finish
18015
18016#
18017# ordered:
18018#	___
18019#	NAN
18020#
18021fscc_or:
18022	fbor.w		fscc_or_yes		# ordered?
18023fscc_or_no:
18024	clr.b		%d0			# set false
18025	bra.w		fscc_done		# go finish
18026fscc_or_yes:
18027	st		%d0			# set true
18028	bra.w		fscc_done		# go finish
18029
18030#
18031# unordered:
18032#
18033#	NAN
18034#
18035fscc_un:
18036	fbun.w		fscc_un_yes		# unordered?
18037fscc_un_no:
18038	clr.b		%d0			# set false
18039	bra.w		fscc_done		# go finish
18040fscc_un_yes:
18041	st		%d0			# set true
18042	bra.w		fscc_done		# go finish
18043
18044#######################################################################
18045
18046#
18047# the bsun exception bit was set. now, check to see is BSUN
18048# is enabled. if so, don't store result and correct stack frame
18049# for a bsun exception.
18050#
18051fscc_chk_bsun:
18052	btst		&bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
18053	bne.w		fscc_bsun
18054
18055#
18056# the bsun exception bit was not set.
18057# the result has been selected.
18058# now, check to see if the result is to be stored in the data register
18059# file or in memory.
18060#
18061fscc_done:
18062	mov.l		%d0,%a0			# save result for a moment
18063
18064	mov.b		1+EXC_OPWORD(%a6),%d1	# fetch lo opword
18065	mov.l		%d1,%d0			# make a copy
18066	andi.b		&0x38,%d1		# extract src mode
18067
18068	bne.b		fscc_mem_op		# it's a memory operation
18069
18070	mov.l		%d0,%d1
18071	andi.w		&0x7,%d1		# pass index in d1
18072	mov.l		%a0,%d0			# pass result in d0
18073	bsr.l		store_dreg_b		# save result in regfile
18074	rts
18075
18076#
18077# the stacked <ea> is correct with the exception of:
18078#	-> Dn : <ea> is garbage
18079#
18080# if the addressing mode is post-increment or pre-decrement,
18081# then the address registers have not been updated.
18082#
18083fscc_mem_op:
18084	cmpi.b		%d1,&0x18		# is <ea> (An)+ ?
18085	beq.b		fscc_mem_inc		# yes
18086	cmpi.b		%d1,&0x20		# is <ea> -(An) ?
18087	beq.b		fscc_mem_dec		# yes
18088
18089	mov.l		%a0,%d0			# pass result in d0
18090	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18091	bsr.l		_dmem_write_byte	# write result byte
18092
18093	tst.l		%d1			# did dstore fail?
18094	bne.w		fscc_err		# yes
18095
18096	rts
18097
18098# addressing mode is post-increment. write the result byte. if the write
18099# fails then don't update the address register. if write passes then
18100# call inc_areg() to update the address register.
18101fscc_mem_inc:
18102	mov.l		%a0,%d0			# pass result in d0
18103	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18104	bsr.l		_dmem_write_byte	# write result byte
18105
18106	tst.l		%d1			# did dstore fail?
18107	bne.w		fscc_err		# yes
18108
18109	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
18110	andi.w		&0x7,%d1		# pass index in d1
18111	movq.l		&0x1,%d0		# pass amt to inc by
18112	bsr.l		inc_areg		# increment address register
18113
18114	rts
18115
18116# addressing mode is pre-decrement. write the result byte. if the write
18117# fails then don't update the address register. if the write passes then
18118# call dec_areg() to update the address register.
18119fscc_mem_dec:
18120	mov.l		%a0,%d0			# pass result in d0
18121	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18122	bsr.l		_dmem_write_byte	# write result byte
18123
18124	tst.l		%d1			# did dstore fail?
18125	bne.w		fscc_err		# yes
18126
18127	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
18128	andi.w		&0x7,%d1		# pass index in d1
18129	movq.l		&0x1,%d0		# pass amt to dec by
18130	bsr.l		dec_areg		# decrement address register
18131
18132	rts
18133
18134# the emulation routine set bsun and BSUN was enabled. have to
18135# fix stack and jump to the bsun handler.
18136# let the caller of this routine shift the stack frame up to
18137# eliminate the effective address field.
18138fscc_bsun:
18139	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
18140	rts
18141
18142# the byte write to memory has failed. pass the failing effective address
18143# and a FSLW to funimp_dacc().
18144fscc_err:
18145	mov.w		&0x00a1,EXC_VOFF(%a6)
18146	bra.l		facc_finish
18147
18148#########################################################################
18149# XDEF ****************************************************************	#
18150#	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
18151#									#
18152# XREF ****************************************************************	#
18153#	fetch_dreg() - fetch data register				#
18154#	{i,d,}mem_read() - fetch data from memory			#
18155#	_mem_write() - write data to memory				#
18156#	iea_iacc() - instruction memory access error occurred		#
18157#	iea_dacc() - data memory access error occurred			#
18158#	restore() - restore An index regs if access error occurred	#
18159#									#
18160# INPUT ***************************************************************	#
18161#	None								#
18162#									#
18163# OUTPUT **************************************************************	#
18164#	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
18165#		d0 = size of dump					#
18166#		d1 = Dn							#
18167#	Else if instruction access error,				#
18168#		d0 = FSLW						#
18169#	Else if data access error,					#
18170#		d0 = FSLW						#
18171#		a0 = address of fault					#
18172#	Else								#
18173#		none.							#
18174#									#
18175# ALGORITHM ***********************************************************	#
18176#	The effective address must be calculated since this is entered	#
18177# from an "Unimplemented Effective Address" exception handler. So, we	#
18178# have our own fcalc_ea() routine here. If an access error is flagged	#
18179# by a _{i,d,}mem_read() call, we must exit through the special		#
18180# handler.								#
18181#	The data register is determined and its value loaded to get the	#
18182# string of FP registers affected. This value is used as an index into	#
18183# a lookup table such that we can determine the number of bytes		#
18184# involved.								#
18185#	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
18186# to read in all FP values. Again, _mem_read() may fail and require a	#
18187# special exit.								#
18188#	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
18189# to write all FP values. _mem_write() may also fail.			#
18190#	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
18191# then we return the size of the dump and the string to the caller	#
18192# so that the move can occur outside of this routine. This special	#
18193# case is required so that moves to the system stack are handled	#
18194# correctly.								#
18195#									#
18196# DYNAMIC:								#
18197#	fmovm.x	dn, <ea>						#
18198#	fmovm.x	<ea>, dn						#
18199#									#
18200#	      <WORD 1>		      <WORD2>				#
18201#	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
18202#									#
18203#	& = (0): predecrement addressing mode				#
18204#	    (1): postincrement or control addressing mode		#
18205#	@ = (0): move listed regs from memory to the FPU		#
18206#	    (1): move listed regs from the FPU to memory		#
18207#	$$$    : index of data register holding reg select mask		#
18208#									#
18209# NOTES:								#
18210#	If the data register holds a zero, then the			#
18211#	instruction is a nop.						#
18212#									#
18213#########################################################################
18214
18215	global		fmovm_dynamic
18216fmovm_dynamic:
18217
18218# extract the data register in which the bit string resides...
18219	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
18220	andi.w		&0x70,%d1		# extract reg bits
18221	lsr.b		&0x4,%d1		# shift into lo bits
18222
18223# fetch the bit string into d0...
18224	bsr.l		fetch_dreg		# fetch reg string
18225
18226	andi.l		&0x000000ff,%d0		# keep only lo byte
18227
18228	mov.l		%d0,-(%sp)		# save strg
18229	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
18230	mov.l		%d0,-(%sp)		# save size
18231	bsr.l		fmovm_calc_ea		# calculate <ea>
18232	mov.l		(%sp)+,%d0		# restore size
18233	mov.l		(%sp)+,%d1		# restore strg
18234
18235# if the bit string is a zero, then the operation is a no-op
18236# but, make sure that we've calculated ea and advanced the opword pointer
18237	beq.w		fmovm_data_done
18238
18239# separate move ins from move outs...
18240	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
18241	beq.w		fmovm_data_in		# it's a move out
18242
18243#############
18244# MOVE OUT: #
18245#############
18246fmovm_data_out:
18247	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
18248	bne.w		fmovm_out_ctrl		# control
18249
18250############################
18251fmovm_out_predec:
18252# for predecrement mode, the bit string is the opposite of both control
18253# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
18254# here, we convert it to be just like the others...
18255	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
18256
18257	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
18258	beq.b		fmovm_out_ctrl		# user
18259
18260fmovm_out_predec_s:
18261	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
18262	bne.b		fmovm_out_ctrl
18263
18264# the operation was unfortunately an: fmovm.x dn,-(sp)
18265# called from supervisor mode.
18266# we're also passing "size" and "strg" back to the calling routine
18267	rts
18268
18269############################
18270fmovm_out_ctrl:
18271	mov.l		%a0,%a1			# move <ea> to a1
18272
18273	sub.l		%d0,%sp			# subtract size of dump
18274	lea		(%sp),%a0
18275
18276	tst.b		%d1			# should FP0 be moved?
18277	bpl.b		fmovm_out_ctrl_fp1	# no
18278
18279	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
18280	mov.l		0x4+EXC_FP0(%a6),(%a0)+
18281	mov.l		0x8+EXC_FP0(%a6),(%a0)+
18282
18283fmovm_out_ctrl_fp1:
18284	lsl.b		&0x1,%d1		# should FP1 be moved?
18285	bpl.b		fmovm_out_ctrl_fp2	# no
18286
18287	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
18288	mov.l		0x4+EXC_FP1(%a6),(%a0)+
18289	mov.l		0x8+EXC_FP1(%a6),(%a0)+
18290
18291fmovm_out_ctrl_fp2:
18292	lsl.b		&0x1,%d1		# should FP2 be moved?
18293	bpl.b		fmovm_out_ctrl_fp3	# no
18294
18295	fmovm.x		&0x20,(%a0)		# yes
18296	add.l		&0xc,%a0
18297
18298fmovm_out_ctrl_fp3:
18299	lsl.b		&0x1,%d1		# should FP3 be moved?
18300	bpl.b		fmovm_out_ctrl_fp4	# no
18301
18302	fmovm.x		&0x10,(%a0)		# yes
18303	add.l		&0xc,%a0
18304
18305fmovm_out_ctrl_fp4:
18306	lsl.b		&0x1,%d1		# should FP4 be moved?
18307	bpl.b		fmovm_out_ctrl_fp5	# no
18308
18309	fmovm.x		&0x08,(%a0)		# yes
18310	add.l		&0xc,%a0
18311
18312fmovm_out_ctrl_fp5:
18313	lsl.b		&0x1,%d1		# should FP5 be moved?
18314	bpl.b		fmovm_out_ctrl_fp6	# no
18315
18316	fmovm.x		&0x04,(%a0)		# yes
18317	add.l		&0xc,%a0
18318
18319fmovm_out_ctrl_fp6:
18320	lsl.b		&0x1,%d1		# should FP6 be moved?
18321	bpl.b		fmovm_out_ctrl_fp7	# no
18322
18323	fmovm.x		&0x02,(%a0)		# yes
18324	add.l		&0xc,%a0
18325
18326fmovm_out_ctrl_fp7:
18327	lsl.b		&0x1,%d1		# should FP7 be moved?
18328	bpl.b		fmovm_out_ctrl_done	# no
18329
18330	fmovm.x		&0x01,(%a0)		# yes
18331	add.l		&0xc,%a0
18332
18333fmovm_out_ctrl_done:
18334	mov.l		%a1,L_SCR1(%a6)
18335
18336	lea		(%sp),%a0		# pass: supervisor src
18337	mov.l		%d0,-(%sp)		# save size
18338	bsr.l		_dmem_write		# copy data to user mem
18339
18340	mov.l		(%sp)+,%d0
18341	add.l		%d0,%sp			# clear fpreg data from stack
18342
18343	tst.l		%d1			# did dstore err?
18344	bne.w		fmovm_out_err		# yes
18345
18346	rts
18347
18348############
18349# MOVE IN: #
18350############
18351fmovm_data_in:
18352	mov.l		%a0,L_SCR1(%a6)
18353
18354	sub.l		%d0,%sp			# make room for fpregs
18355	lea		(%sp),%a1
18356
18357	mov.l		%d1,-(%sp)		# save bit string for later
18358	mov.l		%d0,-(%sp)		# save # of bytes
18359
18360	bsr.l		_dmem_read		# copy data from user mem
18361
18362	mov.l		(%sp)+,%d0		# retrieve # of bytes
18363
18364	tst.l		%d1			# did dfetch fail?
18365	bne.w		fmovm_in_err		# yes
18366
18367	mov.l		(%sp)+,%d1		# load bit string
18368
18369	lea		(%sp),%a0		# addr of stack
18370
18371	tst.b		%d1			# should FP0 be moved?
18372	bpl.b		fmovm_data_in_fp1	# no
18373
18374	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
18375	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
18376	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
18377
18378fmovm_data_in_fp1:
18379	lsl.b		&0x1,%d1		# should FP1 be moved?
18380	bpl.b		fmovm_data_in_fp2	# no
18381
18382	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
18383	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
18384	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
18385
18386fmovm_data_in_fp2:
18387	lsl.b		&0x1,%d1		# should FP2 be moved?
18388	bpl.b		fmovm_data_in_fp3	# no
18389
18390	fmovm.x		(%a0)+,&0x20		# yes
18391
18392fmovm_data_in_fp3:
18393	lsl.b		&0x1,%d1		# should FP3 be moved?
18394	bpl.b		fmovm_data_in_fp4	# no
18395
18396	fmovm.x		(%a0)+,&0x10		# yes
18397
18398fmovm_data_in_fp4:
18399	lsl.b		&0x1,%d1		# should FP4 be moved?
18400	bpl.b		fmovm_data_in_fp5	# no
18401
18402	fmovm.x		(%a0)+,&0x08		# yes
18403
18404fmovm_data_in_fp5:
18405	lsl.b		&0x1,%d1		# should FP5 be moved?
18406	bpl.b		fmovm_data_in_fp6	# no
18407
18408	fmovm.x		(%a0)+,&0x04		# yes
18409
18410fmovm_data_in_fp6:
18411	lsl.b		&0x1,%d1		# should FP6 be moved?
18412	bpl.b		fmovm_data_in_fp7	# no
18413
18414	fmovm.x		(%a0)+,&0x02		# yes
18415
18416fmovm_data_in_fp7:
18417	lsl.b		&0x1,%d1		# should FP7 be moved?
18418	bpl.b		fmovm_data_in_done	# no
18419
18420	fmovm.x		(%a0)+,&0x01		# yes
18421
18422fmovm_data_in_done:
18423	add.l		%d0,%sp			# remove fpregs from stack
18424	rts
18425
18426#####################################
18427
18428fmovm_data_done:
18429	rts
18430
18431##############################################################################
18432
18433#
18434# table indexed by the operation's bit string that gives the number
18435# of bytes that will be moved.
18436#
18437# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
18438#
18439tbl_fmovm_size:
18440	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
18441	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18442	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18443	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18444	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18445	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18446	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18447	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18448	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18449	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18450	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18451	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18452	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18453	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18454	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18455	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18456	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18457	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18458	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18459	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18460	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18461	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18462	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18463	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18464	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18465	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18466	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18467	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18468	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18469	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18470	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18471	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
18472
18473#
18474# table to convert a pre-decrement bit string into a post-increment
18475# or control bit string.
18476# ex:	0x00	==>	0x00
18477#	0x01	==>	0x80
18478#	0x02	==>	0x40
18479#		.
18480#		.
18481#	0xfd	==>	0xbf
18482#	0xfe	==>	0x7f
18483#	0xff	==>	0xff
18484#
18485tbl_fmovm_convert:
18486	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
18487	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
18488	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
18489	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
18490	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
18491	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
18492	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
18493	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
18494	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
18495	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
18496	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
18497	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
18498	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
18499	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
18500	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
18501	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
18502	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
18503	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
18504	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
18505	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
18506	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
18507	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
18508	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
18509	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
18510	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
18511	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
18512	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
18513	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
18514	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
18515	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
18516	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
18517	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
18518
18519	global		fmovm_calc_ea
18520###############################################
18521# _fmovm_calc_ea: calculate effective address #
18522###############################################
18523fmovm_calc_ea:
18524	mov.l		%d0,%a0			# move # bytes to a0
18525
18526# currently, MODE and REG are taken from the EXC_OPWORD. this could be
18527# easily changed if they were inputs passed in registers.
18528	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
18529	mov.w		%d0,%d1			# make a copy
18530
18531	andi.w		&0x3f,%d0		# extract mode field
18532	andi.l		&0x7,%d1		# extract reg  field
18533
18534# jump to the corresponding function for each {MODE,REG} pair.
18535	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
18536	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
18537
18538	swbeg		&64
18539tbl_fea_mode:
18540	short		tbl_fea_mode	-	tbl_fea_mode
18541	short		tbl_fea_mode	-	tbl_fea_mode
18542	short		tbl_fea_mode	-	tbl_fea_mode
18543	short		tbl_fea_mode	-	tbl_fea_mode
18544	short		tbl_fea_mode	-	tbl_fea_mode
18545	short		tbl_fea_mode	-	tbl_fea_mode
18546	short		tbl_fea_mode	-	tbl_fea_mode
18547	short		tbl_fea_mode	-	tbl_fea_mode
18548
18549	short		tbl_fea_mode	-	tbl_fea_mode
18550	short		tbl_fea_mode	-	tbl_fea_mode
18551	short		tbl_fea_mode	-	tbl_fea_mode
18552	short		tbl_fea_mode	-	tbl_fea_mode
18553	short		tbl_fea_mode	-	tbl_fea_mode
18554	short		tbl_fea_mode	-	tbl_fea_mode
18555	short		tbl_fea_mode	-	tbl_fea_mode
18556	short		tbl_fea_mode	-	tbl_fea_mode
18557
18558	short		faddr_ind_a0	-	tbl_fea_mode
18559	short		faddr_ind_a1	-	tbl_fea_mode
18560	short		faddr_ind_a2	-	tbl_fea_mode
18561	short		faddr_ind_a3	-	tbl_fea_mode
18562	short		faddr_ind_a4	-	tbl_fea_mode
18563	short		faddr_ind_a5	-	tbl_fea_mode
18564	short		faddr_ind_a6	-	tbl_fea_mode
18565	short		faddr_ind_a7	-	tbl_fea_mode
18566
18567	short		faddr_ind_p_a0	-	tbl_fea_mode
18568	short		faddr_ind_p_a1	-	tbl_fea_mode
18569	short		faddr_ind_p_a2	-	tbl_fea_mode
18570	short		faddr_ind_p_a3	-	tbl_fea_mode
18571	short		faddr_ind_p_a4	-	tbl_fea_mode
18572	short		faddr_ind_p_a5	-	tbl_fea_mode
18573	short		faddr_ind_p_a6	-	tbl_fea_mode
18574	short		faddr_ind_p_a7	-	tbl_fea_mode
18575
18576	short		faddr_ind_m_a0	-	tbl_fea_mode
18577	short		faddr_ind_m_a1	-	tbl_fea_mode
18578	short		faddr_ind_m_a2	-	tbl_fea_mode
18579	short		faddr_ind_m_a3	-	tbl_fea_mode
18580	short		faddr_ind_m_a4	-	tbl_fea_mode
18581	short		faddr_ind_m_a5	-	tbl_fea_mode
18582	short		faddr_ind_m_a6	-	tbl_fea_mode
18583	short		faddr_ind_m_a7	-	tbl_fea_mode
18584
18585	short		faddr_ind_disp_a0	-	tbl_fea_mode
18586	short		faddr_ind_disp_a1	-	tbl_fea_mode
18587	short		faddr_ind_disp_a2	-	tbl_fea_mode
18588	short		faddr_ind_disp_a3	-	tbl_fea_mode
18589	short		faddr_ind_disp_a4	-	tbl_fea_mode
18590	short		faddr_ind_disp_a5	-	tbl_fea_mode
18591	short		faddr_ind_disp_a6	-	tbl_fea_mode
18592	short		faddr_ind_disp_a7	-	tbl_fea_mode
18593
18594	short		faddr_ind_ext	-	tbl_fea_mode
18595	short		faddr_ind_ext	-	tbl_fea_mode
18596	short		faddr_ind_ext	-	tbl_fea_mode
18597	short		faddr_ind_ext	-	tbl_fea_mode
18598	short		faddr_ind_ext	-	tbl_fea_mode
18599	short		faddr_ind_ext	-	tbl_fea_mode
18600	short		faddr_ind_ext	-	tbl_fea_mode
18601	short		faddr_ind_ext	-	tbl_fea_mode
18602
18603	short		fabs_short	-	tbl_fea_mode
18604	short		fabs_long	-	tbl_fea_mode
18605	short		fpc_ind		-	tbl_fea_mode
18606	short		fpc_ind_ext	-	tbl_fea_mode
18607	short		tbl_fea_mode	-	tbl_fea_mode
18608	short		tbl_fea_mode	-	tbl_fea_mode
18609	short		tbl_fea_mode	-	tbl_fea_mode
18610	short		tbl_fea_mode	-	tbl_fea_mode
18611
18612###################################
18613# Address register indirect: (An) #
18614###################################
18615faddr_ind_a0:
18616	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
18617	rts
18618
18619faddr_ind_a1:
18620	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
18621	rts
18622
18623faddr_ind_a2:
18624	mov.l		%a2,%a0			# Get current a2
18625	rts
18626
18627faddr_ind_a3:
18628	mov.l		%a3,%a0			# Get current a3
18629	rts
18630
18631faddr_ind_a4:
18632	mov.l		%a4,%a0			# Get current a4
18633	rts
18634
18635faddr_ind_a5:
18636	mov.l		%a5,%a0			# Get current a5
18637	rts
18638
18639faddr_ind_a6:
18640	mov.l		(%a6),%a0		# Get current a6
18641	rts
18642
18643faddr_ind_a7:
18644	mov.l		EXC_A7(%a6),%a0		# Get current a7
18645	rts
18646
18647#####################################################
18648# Address register indirect w/ postincrement: (An)+ #
18649#####################################################
18650faddr_ind_p_a0:
18651	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
18652	mov.l		%d0,%d1
18653	add.l		%a0,%d1			# Increment
18654	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
18655	mov.l		%d0,%a0
18656	rts
18657
18658faddr_ind_p_a1:
18659	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
18660	mov.l		%d0,%d1
18661	add.l		%a0,%d1			# Increment
18662	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
18663	mov.l		%d0,%a0
18664	rts
18665
18666faddr_ind_p_a2:
18667	mov.l		%a2,%d0			# Get current a2
18668	mov.l		%d0,%d1
18669	add.l		%a0,%d1			# Increment
18670	mov.l		%d1,%a2			# Save incr value
18671	mov.l		%d0,%a0
18672	rts
18673
18674faddr_ind_p_a3:
18675	mov.l		%a3,%d0			# Get current a3
18676	mov.l		%d0,%d1
18677	add.l		%a0,%d1			# Increment
18678	mov.l		%d1,%a3			# Save incr value
18679	mov.l		%d0,%a0
18680	rts
18681
18682faddr_ind_p_a4:
18683	mov.l		%a4,%d0			# Get current a4
18684	mov.l		%d0,%d1
18685	add.l		%a0,%d1			# Increment
18686	mov.l		%d1,%a4			# Save incr value
18687	mov.l		%d0,%a0
18688	rts
18689
18690faddr_ind_p_a5:
18691	mov.l		%a5,%d0			# Get current a5
18692	mov.l		%d0,%d1
18693	add.l		%a0,%d1			# Increment
18694	mov.l		%d1,%a5			# Save incr value
18695	mov.l		%d0,%a0
18696	rts
18697
18698faddr_ind_p_a6:
18699	mov.l		(%a6),%d0		# Get current a6
18700	mov.l		%d0,%d1
18701	add.l		%a0,%d1			# Increment
18702	mov.l		%d1,(%a6)		# Save incr value
18703	mov.l		%d0,%a0
18704	rts
18705
18706faddr_ind_p_a7:
18707	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
18708
18709	mov.l		EXC_A7(%a6),%d0		# Get current a7
18710	mov.l		%d0,%d1
18711	add.l		%a0,%d1			# Increment
18712	mov.l		%d1,EXC_A7(%a6)		# Save incr value
18713	mov.l		%d0,%a0
18714	rts
18715
18716####################################################
18717# Address register indirect w/ predecrement: -(An) #
18718####################################################
18719faddr_ind_m_a0:
18720	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
18721	sub.l		%a0,%d0			# Decrement
18722	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
18723	mov.l		%d0,%a0
18724	rts
18725
18726faddr_ind_m_a1:
18727	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
18728	sub.l		%a0,%d0			# Decrement
18729	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
18730	mov.l		%d0,%a0
18731	rts
18732
18733faddr_ind_m_a2:
18734	mov.l		%a2,%d0			# Get current a2
18735	sub.l		%a0,%d0			# Decrement
18736	mov.l		%d0,%a2			# Save decr value
18737	mov.l		%d0,%a0
18738	rts
18739
18740faddr_ind_m_a3:
18741	mov.l		%a3,%d0			# Get current a3
18742	sub.l		%a0,%d0			# Decrement
18743	mov.l		%d0,%a3			# Save decr value
18744	mov.l		%d0,%a0
18745	rts
18746
18747faddr_ind_m_a4:
18748	mov.l		%a4,%d0			# Get current a4
18749	sub.l		%a0,%d0			# Decrement
18750	mov.l		%d0,%a4			# Save decr value
18751	mov.l		%d0,%a0
18752	rts
18753
18754faddr_ind_m_a5:
18755	mov.l		%a5,%d0			# Get current a5
18756	sub.l		%a0,%d0			# Decrement
18757	mov.l		%d0,%a5			# Save decr value
18758	mov.l		%d0,%a0
18759	rts
18760
18761faddr_ind_m_a6:
18762	mov.l		(%a6),%d0		# Get current a6
18763	sub.l		%a0,%d0			# Decrement
18764	mov.l		%d0,(%a6)		# Save decr value
18765	mov.l		%d0,%a0
18766	rts
18767
18768faddr_ind_m_a7:
18769	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
18770
18771	mov.l		EXC_A7(%a6),%d0		# Get current a7
18772	sub.l		%a0,%d0			# Decrement
18773	mov.l		%d0,EXC_A7(%a6)		# Save decr value
18774	mov.l		%d0,%a0
18775	rts
18776
18777########################################################
18778# Address register indirect w/ displacement: (d16, An) #
18779########################################################
18780faddr_ind_disp_a0:
18781	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18782	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18783	bsr.l		_imem_read_word
18784
18785	tst.l		%d1			# did ifetch fail?
18786	bne.l		iea_iacc		# yes
18787
18788	mov.w		%d0,%a0			# sign extend displacement
18789
18790	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
18791	rts
18792
18793faddr_ind_disp_a1:
18794	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18795	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18796	bsr.l		_imem_read_word
18797
18798	tst.l		%d1			# did ifetch fail?
18799	bne.l		iea_iacc		# yes
18800
18801	mov.w		%d0,%a0			# sign extend displacement
18802
18803	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
18804	rts
18805
18806faddr_ind_disp_a2:
18807	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18808	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18809	bsr.l		_imem_read_word
18810
18811	tst.l		%d1			# did ifetch fail?
18812	bne.l		iea_iacc		# yes
18813
18814	mov.w		%d0,%a0			# sign extend displacement
18815
18816	add.l		%a2,%a0			# a2 + d16
18817	rts
18818
18819faddr_ind_disp_a3:
18820	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18821	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18822	bsr.l		_imem_read_word
18823
18824	tst.l		%d1			# did ifetch fail?
18825	bne.l		iea_iacc		# yes
18826
18827	mov.w		%d0,%a0			# sign extend displacement
18828
18829	add.l		%a3,%a0			# a3 + d16
18830	rts
18831
18832faddr_ind_disp_a4:
18833	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18834	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18835	bsr.l		_imem_read_word
18836
18837	tst.l		%d1			# did ifetch fail?
18838	bne.l		iea_iacc		# yes
18839
18840	mov.w		%d0,%a0			# sign extend displacement
18841
18842	add.l		%a4,%a0			# a4 + d16
18843	rts
18844
18845faddr_ind_disp_a5:
18846	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18847	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18848	bsr.l		_imem_read_word
18849
18850	tst.l		%d1			# did ifetch fail?
18851	bne.l		iea_iacc		# yes
18852
18853	mov.w		%d0,%a0			# sign extend displacement
18854
18855	add.l		%a5,%a0			# a5 + d16
18856	rts
18857
18858faddr_ind_disp_a6:
18859	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18860	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18861	bsr.l		_imem_read_word
18862
18863	tst.l		%d1			# did ifetch fail?
18864	bne.l		iea_iacc		# yes
18865
18866	mov.w		%d0,%a0			# sign extend displacement
18867
18868	add.l		(%a6),%a0		# a6 + d16
18869	rts
18870
18871faddr_ind_disp_a7:
18872	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18873	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18874	bsr.l		_imem_read_word
18875
18876	tst.l		%d1			# did ifetch fail?
18877	bne.l		iea_iacc		# yes
18878
18879	mov.w		%d0,%a0			# sign extend displacement
18880
18881	add.l		EXC_A7(%a6),%a0		# a7 + d16
18882	rts
18883
18884########################################################################
18885# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
18886#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
18887# Memory indirect postindexed: ([bd, An], Xn, od)		       #
18888# Memory indirect preindexed: ([bd, An, Xn], od)		       #
18889########################################################################
18890faddr_ind_ext:
18891	addq.l		&0x8,%d1
18892	bsr.l		fetch_dreg		# fetch base areg
18893	mov.l		%d0,-(%sp)
18894
18895	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18896	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18897	bsr.l		_imem_read_word		# fetch extword in d0
18898
18899	tst.l		%d1			# did ifetch fail?
18900	bne.l		iea_iacc		# yes
18901
18902	mov.l		(%sp)+,%a0
18903
18904	btst		&0x8,%d0
18905	bne.w		fcalc_mem_ind
18906
18907	mov.l		%d0,L_SCR1(%a6)		# hold opword
18908
18909	mov.l		%d0,%d1
18910	rol.w		&0x4,%d1
18911	andi.w		&0xf,%d1		# extract index regno
18912
18913# count on fetch_dreg() not to alter a0...
18914	bsr.l		fetch_dreg		# fetch index
18915
18916	mov.l		%d2,-(%sp)		# save d2
18917	mov.l		L_SCR1(%a6),%d2		# fetch opword
18918
18919	btst		&0xb,%d2		# is it word or long?
18920	bne.b		faii8_long
18921	ext.l		%d0			# sign extend word index
18922faii8_long:
18923	mov.l		%d2,%d1
18924	rol.w		&0x7,%d1
18925	andi.l		&0x3,%d1		# extract scale value
18926
18927	lsl.l		%d1,%d0			# shift index by scale
18928
18929	extb.l		%d2			# sign extend displacement
18930	add.l		%d2,%d0			# index + disp
18931	add.l		%d0,%a0			# An + (index + disp)
18932
18933	mov.l		(%sp)+,%d2		# restore old d2
18934	rts
18935
18936###########################
18937# Absolute short: (XXX).W #
18938###########################
18939fabs_short:
18940	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18941	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18942	bsr.l		_imem_read_word		# fetch short address
18943
18944	tst.l		%d1			# did ifetch fail?
18945	bne.l		iea_iacc		# yes
18946
18947	mov.w		%d0,%a0			# return <ea> in a0
18948	rts
18949
18950##########################
18951# Absolute long: (XXX).L #
18952##########################
18953fabs_long:
18954	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18955	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
18956	bsr.l		_imem_read_long		# fetch long address
18957
18958	tst.l		%d1			# did ifetch fail?
18959	bne.l		iea_iacc		# yes
18960
18961	mov.l		%d0,%a0			# return <ea> in a0
18962	rts
18963
18964#######################################################
18965# Program counter indirect w/ displacement: (d16, PC) #
18966#######################################################
18967fpc_ind:
18968	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18969	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18970	bsr.l		_imem_read_word		# fetch word displacement
18971
18972	tst.l		%d1			# did ifetch fail?
18973	bne.l		iea_iacc		# yes
18974
18975	mov.w		%d0,%a0			# sign extend displacement
18976
18977	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
18978
18979# _imem_read_word() increased the extwptr by 2. need to adjust here.
18980	subq.l		&0x2,%a0		# adjust <ea>
18981	rts
18982
18983##########################################################
18984# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
18985# "     "     w/   "  (base displacement): (bd, PC, An)  #
18986# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
18987# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
18988##########################################################
18989fpc_ind_ext:
18990	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18991	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18992	bsr.l		_imem_read_word		# fetch ext word
18993
18994	tst.l		%d1			# did ifetch fail?
18995	bne.l		iea_iacc		# yes
18996
18997	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
18998	subq.l		&0x2,%a0		# adjust base
18999
19000	btst		&0x8,%d0		# is disp only 8 bits?
19001	bne.w		fcalc_mem_ind		# calc memory indirect
19002
19003	mov.l		%d0,L_SCR1(%a6)		# store opword
19004
19005	mov.l		%d0,%d1			# make extword copy
19006	rol.w		&0x4,%d1		# rotate reg num into place
19007	andi.w		&0xf,%d1		# extract register number
19008
19009# count on fetch_dreg() not to alter a0...
19010	bsr.l		fetch_dreg		# fetch index
19011
19012	mov.l		%d2,-(%sp)		# save d2
19013	mov.l		L_SCR1(%a6),%d2		# fetch opword
19014
19015	btst		&0xb,%d2		# is index word or long?
19016	bne.b		fpii8_long		# long
19017	ext.l		%d0			# sign extend word index
19018fpii8_long:
19019	mov.l		%d2,%d1
19020	rol.w		&0x7,%d1		# rotate scale value into place
19021	andi.l		&0x3,%d1		# extract scale value
19022
19023	lsl.l		%d1,%d0			# shift index by scale
19024
19025	extb.l		%d2			# sign extend displacement
19026	add.l		%d2,%d0			# disp + index
19027	add.l		%d0,%a0			# An + (index + disp)
19028
19029	mov.l		(%sp)+,%d2		# restore temp register
19030	rts
19031
19032# d2 = index
19033# d3 = base
19034# d4 = od
19035# d5 = extword
19036fcalc_mem_ind:
19037	btst		&0x6,%d0		# is the index suppressed?
19038	beq.b		fcalc_index
19039
19040	movm.l		&0x3c00,-(%sp)		# save d2-d5
19041
19042	mov.l		%d0,%d5			# put extword in d5
19043	mov.l		%a0,%d3			# put base in d3
19044
19045	clr.l		%d2			# yes, so index = 0
19046	bra.b		fbase_supp_ck
19047
19048# index:
19049fcalc_index:
19050	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
19051	bfextu		%d0{&16:&4},%d1		# fetch dreg index
19052	bsr.l		fetch_dreg
19053
19054	movm.l		&0x3c00,-(%sp)		# save d2-d5
19055	mov.l		%d0,%d2			# put index in d2
19056	mov.l		L_SCR1(%a6),%d5
19057	mov.l		%a0,%d3
19058
19059	btst		&0xb,%d5		# is index word or long?
19060	bne.b		fno_ext
19061	ext.l		%d2
19062
19063fno_ext:
19064	bfextu		%d5{&21:&2},%d0
19065	lsl.l		%d0,%d2
19066
19067# base address (passed as parameter in d3):
19068# we clear the value here if it should actually be suppressed.
19069fbase_supp_ck:
19070	btst		&0x7,%d5		# is the bd suppressed?
19071	beq.b		fno_base_sup
19072	clr.l		%d3
19073
19074# base displacement:
19075fno_base_sup:
19076	bfextu		%d5{&26:&2},%d0		# get bd size
19077#	beq.l		fmovm_error		# if (size == 0) it's reserved
19078
19079	cmpi.b		%d0,&0x2
19080	blt.b		fno_bd
19081	beq.b		fget_word_bd
19082
19083	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19084	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19085	bsr.l		_imem_read_long
19086
19087	tst.l		%d1			# did ifetch fail?
19088	bne.l		fcea_iacc		# yes
19089
19090	bra.b		fchk_ind
19091
19092fget_word_bd:
19093	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19094	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
19095	bsr.l		_imem_read_word
19096
19097	tst.l		%d1			# did ifetch fail?
19098	bne.l		fcea_iacc		# yes
19099
19100	ext.l		%d0			# sign extend bd
19101
19102fchk_ind:
19103	add.l		%d0,%d3			# base += bd
19104
19105# outer displacement:
19106fno_bd:
19107	bfextu		%d5{&30:&2},%d0		# is od suppressed?
19108	beq.w		faii_bd
19109
19110	cmpi.b		%d0,&0x2
19111	blt.b		fnull_od
19112	beq.b		fword_od
19113
19114	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19115	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19116	bsr.l		_imem_read_long
19117
19118	tst.l		%d1			# did ifetch fail?
19119	bne.l		fcea_iacc		# yes
19120
19121	bra.b		fadd_them
19122
19123fword_od:
19124	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19125	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
19126	bsr.l		_imem_read_word
19127
19128	tst.l		%d1			# did ifetch fail?
19129	bne.l		fcea_iacc		# yes
19130
19131	ext.l		%d0			# sign extend od
19132	bra.b		fadd_them
19133
19134fnull_od:
19135	clr.l		%d0
19136
19137fadd_them:
19138	mov.l		%d0,%d4
19139
19140	btst		&0x2,%d5		# pre or post indexing?
19141	beq.b		fpre_indexed
19142
19143	mov.l		%d3,%a0
19144	bsr.l		_dmem_read_long
19145
19146	tst.l		%d1			# did dfetch fail?
19147	bne.w		fcea_err		# yes
19148
19149	add.l		%d2,%d0			# <ea> += index
19150	add.l		%d4,%d0			# <ea> += od
19151	bra.b		fdone_ea
19152
19153fpre_indexed:
19154	add.l		%d2,%d3			# preindexing
19155	mov.l		%d3,%a0
19156	bsr.l		_dmem_read_long
19157
19158	tst.l		%d1			# did dfetch fail?
19159	bne.w		fcea_err		# yes
19160
19161	add.l		%d4,%d0			# ea += od
19162	bra.b		fdone_ea
19163
19164faii_bd:
19165	add.l		%d2,%d3			# ea = (base + bd) + index
19166	mov.l		%d3,%d0
19167fdone_ea:
19168	mov.l		%d0,%a0
19169
19170	movm.l		(%sp)+,&0x003c		# restore d2-d5
19171	rts
19172
19173#########################################################
19174fcea_err:
19175	mov.l		%d3,%a0
19176
19177	movm.l		(%sp)+,&0x003c		# restore d2-d5
19178	mov.w		&0x0101,%d0
19179	bra.l		iea_dacc
19180
19181fcea_iacc:
19182	movm.l		(%sp)+,&0x003c		# restore d2-d5
19183	bra.l		iea_iacc
19184
19185fmovm_out_err:
19186	bsr.l		restore
19187	mov.w		&0x00e1,%d0
19188	bra.b		fmovm_err
19189
19190fmovm_in_err:
19191	bsr.l		restore
19192	mov.w		&0x0161,%d0
19193
19194fmovm_err:
19195	mov.l		L_SCR1(%a6),%a0
19196	bra.l		iea_dacc
19197
19198#########################################################################
19199# XDEF ****************************************************************	#
19200#	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
19201#									#
19202# XREF ****************************************************************	#
19203#	_imem_read_long() - read longword from memory			#
19204#	iea_iacc() - _imem_read_long() failed; error recovery		#
19205#									#
19206# INPUT ***************************************************************	#
19207#	None								#
19208#									#
19209# OUTPUT **************************************************************	#
19210#	If _imem_read_long() doesn't fail:				#
19211#		USER_FPCR(a6)  = new FPCR value				#
19212#		USER_FPSR(a6)  = new FPSR value				#
19213#		USER_FPIAR(a6) = new FPIAR value			#
19214#									#
19215# ALGORITHM ***********************************************************	#
19216#	Decode the instruction type by looking at the extension word	#
19217# in order to see how many control registers to fetch from memory.	#
19218# Fetch them using _imem_read_long(). If this fetch fails, exit through	#
19219# the special access error exit handler iea_iacc().			#
19220#									#
19221# Instruction word decoding:						#
19222#									#
19223#	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
19224#									#
19225#		WORD1			WORD2				#
19226#	1111 0010 00 111100	100$ $$00 0000 0000			#
19227#									#
19228#	$$$ (100): FPCR							#
19229#	    (010): FPSR							#
19230#	    (001): FPIAR						#
19231#	    (000): FPIAR						#
19232#									#
19233#########################################################################
19234
19235	global		fmovm_ctrl
19236fmovm_ctrl:
19237	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
19238	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
19239	beq.w		fctrl_in_7		# yes
19240	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
19241	beq.w		fctrl_in_6		# yes
19242	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
19243	beq.b		fctrl_in_5		# yes
19244
19245# fmovem.l #<data>, fpsr/fpiar
19246fctrl_in_3:
19247	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19248	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19249	bsr.l		_imem_read_long		# fetch FPSR from mem
19250
19251	tst.l		%d1			# did ifetch fail?
19252	bne.l		iea_iacc		# yes
19253
19254	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
19255	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19256	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19257	bsr.l		_imem_read_long		# fetch FPIAR from mem
19258
19259	tst.l		%d1			# did ifetch fail?
19260	bne.l		iea_iacc		# yes
19261
19262	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
19263	rts
19264
19265# fmovem.l #<data>, fpcr/fpiar
19266fctrl_in_5:
19267	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19268	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19269	bsr.l		_imem_read_long		# fetch FPCR from mem
19270
19271	tst.l		%d1			# did ifetch fail?
19272	bne.l		iea_iacc		# yes
19273
19274	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
19275	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19276	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19277	bsr.l		_imem_read_long		# fetch FPIAR from mem
19278
19279	tst.l		%d1			# did ifetch fail?
19280	bne.l		iea_iacc		# yes
19281
19282	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
19283	rts
19284
19285# fmovem.l #<data>, fpcr/fpsr
19286fctrl_in_6:
19287	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19288	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19289	bsr.l		_imem_read_long		# fetch FPCR from mem
19290
19291	tst.l		%d1			# did ifetch fail?
19292	bne.l		iea_iacc		# yes
19293
19294	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
19295	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19296	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19297	bsr.l		_imem_read_long		# fetch FPSR from mem
19298
19299	tst.l		%d1			# did ifetch fail?
19300	bne.l		iea_iacc		# yes
19301
19302	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
19303	rts
19304
19305# fmovem.l #<data>, fpcr/fpsr/fpiar
19306fctrl_in_7:
19307	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19308	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19309	bsr.l		_imem_read_long		# fetch FPCR from mem
19310
19311	tst.l		%d1			# did ifetch fail?
19312	bne.l		iea_iacc		# yes
19313
19314	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
19315	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19316	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19317	bsr.l		_imem_read_long		# fetch FPSR from mem
19318
19319	tst.l		%d1			# did ifetch fail?
19320	bne.l		iea_iacc		# yes
19321
19322	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
19323	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19324	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19325	bsr.l		_imem_read_long		# fetch FPIAR from mem
19326
19327	tst.l		%d1			# did ifetch fail?
19328	bne.l		iea_iacc		# yes
19329
19330	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
19331	rts
19332
19333#########################################################################
19334# XDEF ****************************************************************	#
19335#	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
19336#									#
19337# XREF ****************************************************************	#
19338#	inc_areg() - increment an address register			#
19339#	dec_areg() - decrement an address register			#
19340#									#
19341# INPUT ***************************************************************	#
19342#	d0 = number of bytes to adjust <ea> by				#
19343#									#
19344# OUTPUT **************************************************************	#
19345#	None								#
19346#									#
19347# ALGORITHM ***********************************************************	#
19348# "Dummy" CALCulate Effective Address:					#
19349#	The stacked <ea> for FP unimplemented instructions and opclass	#
19350#	two packed instructions is correct with the exception of...	#
19351#									#
19352#	1) -(An)   : The register is not updated regardless of size.	#
19353#		     Also, for extended precision and packed, the	#
19354#		     stacked <ea> value is 8 bytes too big		#
19355#	2) (An)+   : The register is not updated.			#
19356#	3) #<data> : The upper longword of the immediate operand is	#
19357#		     stacked b,w,l and s sizes are completely stacked.	#
19358#		     d,x, and p are not.				#
19359#									#
19360#########################################################################
19361
19362	global		_dcalc_ea
19363_dcalc_ea:
19364	mov.l		%d0, %a0		# move # bytes to %a0
19365
19366	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
19367	mov.l		%d0, %d1		# make a copy
19368
19369	andi.w		&0x38, %d0		# extract mode field
19370	andi.l		&0x7, %d1		# extract reg  field
19371
19372	cmpi.b		%d0,&0x18		# is mode (An)+ ?
19373	beq.b		dcea_pi			# yes
19374
19375	cmpi.b		%d0,&0x20		# is mode -(An) ?
19376	beq.b		dcea_pd			# yes
19377
19378	or.w		%d1,%d0			# concat mode,reg
19379	cmpi.b		%d0,&0x3c		# is mode #<data>?
19380
19381	beq.b		dcea_imm		# yes
19382
19383	mov.l		EXC_EA(%a6),%a0		# return <ea>
19384	rts
19385
19386# need to set immediate data flag here since we'll need to do
19387# an imem_read to fetch this later.
19388dcea_imm:
19389	mov.b		&immed_flg,SPCOND_FLG(%a6)
19390	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
19391	rts
19392
19393# here, the <ea> is stacked correctly. however, we must update the
19394# address register...
19395dcea_pi:
19396	mov.l		%a0,%d0			# pass amt to inc by
19397	bsr.l		inc_areg		# inc addr register
19398
19399	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19400	rts
19401
19402# the <ea> is stacked correctly for all but extended and packed which
19403# the <ea>s are 8 bytes too large.
19404# it would make no sense to have a pre-decrement to a7 in supervisor
19405# mode so we don't even worry about this tricky case here : )
19406dcea_pd:
19407	mov.l		%a0,%d0			# pass amt to dec by
19408	bsr.l		dec_areg		# dec addr register
19409
19410	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19411
19412	cmpi.b		%d0,&0xc		# is opsize ext or packed?
19413	beq.b		dcea_pd2		# yes
19414	rts
19415dcea_pd2:
19416	sub.l		&0x8,%a0		# correct <ea>
19417	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
19418	rts
19419
19420#########################################################################
19421# XDEF ****************************************************************	#
19422#	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
19423#			 and packed data opclass 3 operations.		#
19424#									#
19425# XREF ****************************************************************	#
19426#	None								#
19427#									#
19428# INPUT ***************************************************************	#
19429#	None								#
19430#									#
19431# OUTPUT **************************************************************	#
19432#	a0 = return correct effective address				#
19433#									#
19434# ALGORITHM ***********************************************************	#
19435#	For opclass 3 extended and packed data operations, the <ea>	#
19436# stacked for the exception is incorrect for -(an) and (an)+ addressing	#
19437# modes. Also, while we're at it, the index register itself must get	#
19438# updated.								#
19439#	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
19440# and return that value as the correct <ea> and store that value in An.	#
19441# For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
19442#									#
19443#########################################################################
19444
19445# This calc_ea is currently used to retrieve the correct <ea>
19446# for fmove outs of type extended and packed.
19447	global		_calc_ea_fout
19448_calc_ea_fout:
19449	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
19450	mov.l		%d0,%d1			# make a copy
19451
19452	andi.w		&0x38,%d0		# extract mode field
19453	andi.l		&0x7,%d1		# extract reg  field
19454
19455	cmpi.b		%d0,&0x18		# is mode (An)+ ?
19456	beq.b		ceaf_pi			# yes
19457
19458	cmpi.b		%d0,&0x20		# is mode -(An) ?
19459	beq.w		ceaf_pd			# yes
19460
19461	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19462	rts
19463
19464# (An)+ : extended and packed fmove out
19465#	: stacked <ea> is correct
19466#	: "An" not updated
19467ceaf_pi:
19468	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
19469	mov.l		EXC_EA(%a6),%a0
19470	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
19471
19472	swbeg		&0x8
19473tbl_ceaf_pi:
19474	short		ceaf_pi0 - tbl_ceaf_pi
19475	short		ceaf_pi1 - tbl_ceaf_pi
19476	short		ceaf_pi2 - tbl_ceaf_pi
19477	short		ceaf_pi3 - tbl_ceaf_pi
19478	short		ceaf_pi4 - tbl_ceaf_pi
19479	short		ceaf_pi5 - tbl_ceaf_pi
19480	short		ceaf_pi6 - tbl_ceaf_pi
19481	short		ceaf_pi7 - tbl_ceaf_pi
19482
19483ceaf_pi0:
19484	addi.l		&0xc,EXC_DREGS+0x8(%a6)
19485	rts
19486ceaf_pi1:
19487	addi.l		&0xc,EXC_DREGS+0xc(%a6)
19488	rts
19489ceaf_pi2:
19490	add.l		&0xc,%a2
19491	rts
19492ceaf_pi3:
19493	add.l		&0xc,%a3
19494	rts
19495ceaf_pi4:
19496	add.l		&0xc,%a4
19497	rts
19498ceaf_pi5:
19499	add.l		&0xc,%a5
19500	rts
19501ceaf_pi6:
19502	addi.l		&0xc,EXC_A6(%a6)
19503	rts
19504ceaf_pi7:
19505	mov.b		&mia7_flg,SPCOND_FLG(%a6)
19506	addi.l		&0xc,EXC_A7(%a6)
19507	rts
19508
19509# -(An) : extended and packed fmove out
19510#	: stacked <ea> = actual <ea> + 8
19511#	: "An" not updated
19512ceaf_pd:
19513	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
19514	mov.l		EXC_EA(%a6),%a0
19515	sub.l		&0x8,%a0
19516	sub.l		&0x8,EXC_EA(%a6)
19517	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
19518
19519	swbeg		&0x8
19520tbl_ceaf_pd:
19521	short		ceaf_pd0 - tbl_ceaf_pd
19522	short		ceaf_pd1 - tbl_ceaf_pd
19523	short		ceaf_pd2 - tbl_ceaf_pd
19524	short		ceaf_pd3 - tbl_ceaf_pd
19525	short		ceaf_pd4 - tbl_ceaf_pd
19526	short		ceaf_pd5 - tbl_ceaf_pd
19527	short		ceaf_pd6 - tbl_ceaf_pd
19528	short		ceaf_pd7 - tbl_ceaf_pd
19529
19530ceaf_pd0:
19531	mov.l		%a0,EXC_DREGS+0x8(%a6)
19532	rts
19533ceaf_pd1:
19534	mov.l		%a0,EXC_DREGS+0xc(%a6)
19535	rts
19536ceaf_pd2:
19537	mov.l		%a0,%a2
19538	rts
19539ceaf_pd3:
19540	mov.l		%a0,%a3
19541	rts
19542ceaf_pd4:
19543	mov.l		%a0,%a4
19544	rts
19545ceaf_pd5:
19546	mov.l		%a0,%a5
19547	rts
19548ceaf_pd6:
19549	mov.l		%a0,EXC_A6(%a6)
19550	rts
19551ceaf_pd7:
19552	mov.l		%a0,EXC_A7(%a6)
19553	mov.b		&mda7_flg,SPCOND_FLG(%a6)
19554	rts
19555
19556#########################################################################
19557# XDEF ****************************************************************	#
19558#	_load_fop(): load operand for unimplemented FP exception	#
19559#									#
19560# XREF ****************************************************************	#
19561#	set_tag_x() - determine ext prec optype tag			#
19562#	set_tag_s() - determine sgl prec optype tag			#
19563#	set_tag_d() - determine dbl prec optype tag			#
19564#	unnorm_fix() - convert normalized number to denorm or zero	#
19565#	norm() - normalize a denormalized number			#
19566#	get_packed() - fetch a packed operand from memory		#
19567#	_dcalc_ea() - calculate <ea>, fixing An in process		#
19568#									#
19569#	_imem_read_{word,long}() - read from instruction memory		#
19570#	_dmem_read() - read from data memory				#
19571#	_dmem_read_{byte,word,long}() - read from data memory		#
19572#									#
19573#	facc_in_{b,w,l,d,x}() - mem read failed; special exit point	#
19574#									#
19575# INPUT ***************************************************************	#
19576#	None								#
19577#									#
19578# OUTPUT **************************************************************	#
19579#	If memory access doesn't fail:					#
19580#		FP_SRC(a6) = source operand in extended precision	#
19581#		FP_DST(a6) = destination operand in extended precision	#
19582#									#
19583# ALGORITHM ***********************************************************	#
19584#	This is called from the Unimplemented FP exception handler in	#
19585# order to load the source and maybe destination operand into		#
19586# FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load	#
19587# the source and destination from the FP register file. Set the optype	#
19588# tags for both if dyadic, one for monadic. If a number is an UNNORM,	#
19589# convert it to a DENORM or a ZERO.					#
19590#	If the instruction is opclass two (memory->reg), then fetch	#
19591# the destination from the register file and the source operand from	#
19592# memory. Tag and fix both as above w/ opclass zero instructions.	#
19593#	If the source operand is byte,word,long, or single, it may be	#
19594# in the data register file. If it's actually out in memory, use one of	#
19595# the mem_read() routines to fetch it. If the mem_read() access returns	#
19596# a failing value, exit through the special facc_in() routine which	#
19597# will create an access error exception frame from the current exception #
19598# frame.								#
19599#	Immediate data and regular data accesses are separated because	#
19600# if an immediate data access fails, the resulting fault status		#
19601# longword stacked for the access error exception must have the		#
19602# instruction bit set.							#
19603#									#
19604#########################################################################
19605
19606	global		_load_fop
19607_load_fop:
19608
19609#  15     13 12 10  9 7  6       0
19610# /        \ /   \ /  \ /         \
19611# ---------------------------------
19612# | opclass | RX  | RY | EXTENSION |  (2nd word of general FP instruction)
19613# ---------------------------------
19614#
19615
19616#	bfextu		EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
19617#	cmpi.b		%d0, &0x2		# which class is it? ('000,'010,'011)
19618#	beq.w		op010			# handle <ea> -> fpn
19619#	bgt.w		op011			# handle fpn -> <ea>
19620
19621# we're not using op011 for now...
19622	btst		&0x6,EXC_CMDREG(%a6)
19623	bne.b		op010
19624
19625############################
19626# OPCLASS '000: reg -> reg #
19627############################
19628op000:
19629	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension word lo
19630	btst		&0x5,%d0		# testing extension bits
19631	beq.b		op000_src		# (bit 5 == 0) => monadic
19632	btst		&0x4,%d0		# (bit 5 == 1)
19633	beq.b		op000_dst		# (bit 4 == 0) => dyadic
19634	and.w		&0x007f,%d0		# extract extension bits {6:0}
19635	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
19636	bne.b		op000_src		# it's an fcmp
19637
19638op000_dst:
19639	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19640	bsr.l		load_fpn2		# fetch dst fpreg into FP_DST
19641
19642	bsr.l		set_tag_x		# get dst optype tag
19643
19644	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
19645	beq.b		op000_dst_unnorm	# yes
19646op000_dst_cont:
19647	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
19648
19649op000_src:
19650	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
19651	bsr.l		load_fpn1		# fetch src fpreg into FP_SRC
19652
19653	bsr.l		set_tag_x		# get src optype tag
19654
19655	cmpi.b		%d0, &UNNORM		# is src fpreg an UNNORM?
19656	beq.b		op000_src_unnorm	# yes
19657op000_src_cont:
19658	mov.b		%d0, STAG(%a6)		# store the src optype tag
19659	rts
19660
19661op000_dst_unnorm:
19662	bsr.l		unnorm_fix		# fix the dst UNNORM
19663	bra.b		op000_dst_cont
19664op000_src_unnorm:
19665	bsr.l		unnorm_fix		# fix the src UNNORM
19666	bra.b		op000_src_cont
19667
19668#############################
19669# OPCLASS '010: <ea> -> reg #
19670#############################
19671op010:
19672	mov.w		EXC_CMDREG(%a6),%d0	# fetch extension word
19673	btst		&0x5,%d0		# testing extension bits
19674	beq.b		op010_src		# (bit 5 == 0) => monadic
19675	btst		&0x4,%d0		# (bit 5 == 1)
19676	beq.b		op010_dst		# (bit 4 == 0) => dyadic
19677	and.w		&0x007f,%d0		# extract extension bits {6:0}
19678	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
19679	bne.b		op010_src		# it's an fcmp
19680
19681op010_dst:
19682	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19683	bsr.l		load_fpn2		# fetch dst fpreg ptr
19684
19685	bsr.l		set_tag_x		# get dst type tag
19686
19687	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
19688	beq.b		op010_dst_unnorm	# yes
19689op010_dst_cont:
19690	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
19691
19692op010_src:
19693	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
19694
19695	bfextu		EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
19696	bne.w		fetch_from_mem		# src op is in memory
19697
19698op010_dreg:
19699	clr.b		STAG(%a6)		# either NORM or ZERO
19700	bfextu		EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
19701
19702	mov.w		(tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
19703	jmp		(tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
19704
19705op010_dst_unnorm:
19706	bsr.l		unnorm_fix		# fix the dst UNNORM
19707	bra.b		op010_dst_cont
19708
19709	swbeg		&0x8
19710tbl_op010_dreg:
19711	short		opd_long	- tbl_op010_dreg
19712	short		opd_sgl		- tbl_op010_dreg
19713	short		tbl_op010_dreg	- tbl_op010_dreg
19714	short		tbl_op010_dreg	- tbl_op010_dreg
19715	short		opd_word	- tbl_op010_dreg
19716	short		tbl_op010_dreg	- tbl_op010_dreg
19717	short		opd_byte	- tbl_op010_dreg
19718	short		tbl_op010_dreg	- tbl_op010_dreg
19719
19720#
19721# LONG: can be either NORM or ZERO...
19722#
19723opd_long:
19724	bsr.l		fetch_dreg		# fetch long in d0
19725	fmov.l		%d0, %fp0		# load a long
19726	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19727	fbeq.w		opd_long_zero		# long is a ZERO
19728	rts
19729opd_long_zero:
19730	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19731	rts
19732
19733#
19734# WORD: can be either NORM or ZERO...
19735#
19736opd_word:
19737	bsr.l		fetch_dreg		# fetch word in d0
19738	fmov.w		%d0, %fp0		# load a word
19739	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19740	fbeq.w		opd_word_zero		# WORD is a ZERO
19741	rts
19742opd_word_zero:
19743	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19744	rts
19745
19746#
19747# BYTE: can be either NORM or ZERO...
19748#
19749opd_byte:
19750	bsr.l		fetch_dreg		# fetch word in d0
19751	fmov.b		%d0, %fp0		# load a byte
19752	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19753	fbeq.w		opd_byte_zero		# byte is a ZERO
19754	rts
19755opd_byte_zero:
19756	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19757	rts
19758
19759#
19760# SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
19761#
19762# separate SNANs and DENORMs so they can be loaded w/ special care.
19763# all others can simply be moved "in" using fmove.
19764#
19765opd_sgl:
19766	bsr.l		fetch_dreg		# fetch sgl in d0
19767	mov.l		%d0,L_SCR1(%a6)
19768
19769	lea		L_SCR1(%a6), %a0	# pass: ptr to the sgl
19770	bsr.l		set_tag_s		# determine sgl type
19771	mov.b		%d0, STAG(%a6)		# save the src tag
19772
19773	cmpi.b		%d0, &SNAN		# is it an SNAN?
19774	beq.w		get_sgl_snan		# yes
19775
19776	cmpi.b		%d0, &DENORM		# is it a DENORM?
19777	beq.w		get_sgl_denorm		# yes
19778
19779	fmov.s		(%a0), %fp0		# no, so can load it regular
19780	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19781	rts
19782
19783##############################################################################
19784
19785#########################################################################
19786# fetch_from_mem():							#
19787# - src is out in memory. must:						#
19788#	(1) calc ea - must read AFTER you know the src type since	#
19789#		      if the ea is -() or ()+, need to know # of bytes.	#
19790#	(2) read it in from either user or supervisor space		#
19791#	(3) if (b || w || l) then simply read in			#
19792#	    if (s || d || x) then check for SNAN,UNNORM,DENORM		#
19793#	    if (packed) then punt for now				#
19794# INPUT:								#
19795#	%d0 : src type field						#
19796#########################################################################
19797fetch_from_mem:
19798	clr.b		STAG(%a6)		# either NORM or ZERO
19799
19800	mov.w		(tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
19801	jmp		(tbl_fp_type.b,%pc,%d0.w*1)
19802
19803	swbeg		&0x8
19804tbl_fp_type:
19805	short		load_long	- tbl_fp_type
19806	short		load_sgl	- tbl_fp_type
19807	short		load_ext	- tbl_fp_type
19808	short		load_packed	- tbl_fp_type
19809	short		load_word	- tbl_fp_type
19810	short		load_dbl	- tbl_fp_type
19811	short		load_byte	- tbl_fp_type
19812	short		tbl_fp_type	- tbl_fp_type
19813
19814#########################################
19815# load a LONG into %fp0:		#
19816#	-number can't fault		#
19817#	(1) calc ea			#
19818#	(2) read 4 bytes into L_SCR1	#
19819#	(3) fmov.l into %fp0		#
19820#########################################
19821load_long:
19822	movq.l		&0x4, %d0		# pass: 4 (bytes)
19823	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19824
19825	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19826	beq.b		load_long_immed
19827
19828	bsr.l		_dmem_read_long		# fetch src operand from memory
19829
19830	tst.l		%d1			# did dfetch fail?
19831	bne.l		facc_in_l		# yes
19832
19833load_long_cont:
19834	fmov.l		%d0, %fp0		# read into %fp0;convert to xprec
19835	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19836
19837	fbeq.w		load_long_zero		# src op is a ZERO
19838	rts
19839load_long_zero:
19840	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19841	rts
19842
19843load_long_immed:
19844	bsr.l		_imem_read_long		# fetch src operand immed data
19845
19846	tst.l		%d1			# did ifetch fail?
19847	bne.l		funimp_iacc		# yes
19848	bra.b		load_long_cont
19849
19850#########################################
19851# load a WORD into %fp0:		#
19852#	-number can't fault		#
19853#	(1) calc ea			#
19854#	(2) read 2 bytes into L_SCR1	#
19855#	(3) fmov.w into %fp0		#
19856#########################################
19857load_word:
19858	movq.l		&0x2, %d0		# pass: 2 (bytes)
19859	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19860
19861	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19862	beq.b		load_word_immed
19863
19864	bsr.l		_dmem_read_word		# fetch src operand from memory
19865
19866	tst.l		%d1			# did dfetch fail?
19867	bne.l		facc_in_w		# yes
19868
19869load_word_cont:
19870	fmov.w		%d0, %fp0		# read into %fp0;convert to xprec
19871	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19872
19873	fbeq.w		load_word_zero		# src op is a ZERO
19874	rts
19875load_word_zero:
19876	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19877	rts
19878
19879load_word_immed:
19880	bsr.l		_imem_read_word		# fetch src operand immed data
19881
19882	tst.l		%d1			# did ifetch fail?
19883	bne.l		funimp_iacc		# yes
19884	bra.b		load_word_cont
19885
19886#########################################
19887# load a BYTE into %fp0:		#
19888#	-number can't fault		#
19889#	(1) calc ea			#
19890#	(2) read 1 byte into L_SCR1	#
19891#	(3) fmov.b into %fp0		#
19892#########################################
19893load_byte:
19894	movq.l		&0x1, %d0		# pass: 1 (byte)
19895	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19896
19897	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19898	beq.b		load_byte_immed
19899
19900	bsr.l		_dmem_read_byte		# fetch src operand from memory
19901
19902	tst.l		%d1			# did dfetch fail?
19903	bne.l		facc_in_b		# yes
19904
19905load_byte_cont:
19906	fmov.b		%d0, %fp0		# read into %fp0;convert to xprec
19907	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19908
19909	fbeq.w		load_byte_zero		# src op is a ZERO
19910	rts
19911load_byte_zero:
19912	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19913	rts
19914
19915load_byte_immed:
19916	bsr.l		_imem_read_word		# fetch src operand immed data
19917
19918	tst.l		%d1			# did ifetch fail?
19919	bne.l		funimp_iacc		# yes
19920	bra.b		load_byte_cont
19921
19922#########################################
19923# load a SGL into %fp0:			#
19924#	-number can't fault		#
19925#	(1) calc ea			#
19926#	(2) read 4 bytes into L_SCR1	#
19927#	(3) fmov.s into %fp0		#
19928#########################################
19929load_sgl:
19930	movq.l		&0x4, %d0		# pass: 4 (bytes)
19931	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19932
19933	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19934	beq.b		load_sgl_immed
19935
19936	bsr.l		_dmem_read_long		# fetch src operand from memory
19937	mov.l		%d0, L_SCR1(%a6)	# store src op on stack
19938
19939	tst.l		%d1			# did dfetch fail?
19940	bne.l		facc_in_l		# yes
19941
19942load_sgl_cont:
19943	lea		L_SCR1(%a6), %a0	# pass: ptr to sgl src op
19944	bsr.l		set_tag_s		# determine src type tag
19945	mov.b		%d0, STAG(%a6)		# save src optype tag on stack
19946
19947	cmpi.b		%d0, &DENORM		# is it a sgl DENORM?
19948	beq.w		get_sgl_denorm		# yes
19949
19950	cmpi.b		%d0, &SNAN		# is it a sgl SNAN?
19951	beq.w		get_sgl_snan		# yes
19952
19953	fmov.s		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
19954	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19955	rts
19956
19957load_sgl_immed:
19958	bsr.l		_imem_read_long		# fetch src operand immed data
19959
19960	tst.l		%d1			# did ifetch fail?
19961	bne.l		funimp_iacc		# yes
19962	bra.b		load_sgl_cont
19963
19964# must convert sgl denorm format to an Xprec denorm fmt suitable for
19965# normalization...
19966# %a0 : points to sgl denorm
19967get_sgl_denorm:
19968	clr.w		FP_SRC_EX(%a6)
19969	bfextu		(%a0){&9:&23}, %d0	# fetch sgl hi(_mantissa)
19970	lsl.l		&0x8, %d0
19971	mov.l		%d0, FP_SRC_HI(%a6)	# set ext hi(_mantissa)
19972	clr.l		FP_SRC_LO(%a6)		# set ext lo(_mantissa)
19973
19974	clr.w		FP_SRC_EX(%a6)
19975	btst		&0x7, (%a0)		# is sgn bit set?
19976	beq.b		sgl_dnrm_norm
19977	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
19978
19979sgl_dnrm_norm:
19980	lea		FP_SRC(%a6), %a0
19981	bsr.l		norm			# normalize number
19982	mov.w		&0x3f81, %d1		# xprec exp = 0x3f81
19983	sub.w		%d0, %d1		# exp = 0x3f81 - shft amt.
19984	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
19985
19986	mov.b		&NORM, STAG(%a6)	# fix src type tag
19987	rts
19988
19989# convert sgl to ext SNAN
19990# %a0 : points to sgl SNAN
19991get_sgl_snan:
19992	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
19993	bfextu		(%a0){&9:&23}, %d0
19994	lsl.l		&0x8, %d0		# extract and insert hi(man)
19995	mov.l		%d0, FP_SRC_HI(%a6)
19996	clr.l		FP_SRC_LO(%a6)
19997
19998	btst		&0x7, (%a0)		# see if sign of SNAN is set
19999	beq.b		no_sgl_snan_sgn
20000	bset		&0x7, FP_SRC_EX(%a6)
20001no_sgl_snan_sgn:
20002	rts
20003
20004#########################################
20005# load a DBL into %fp0:			#
20006#	-number can't fault		#
20007#	(1) calc ea			#
20008#	(2) read 8 bytes into L_SCR(1,2)#
20009#	(3) fmov.d into %fp0		#
20010#########################################
20011load_dbl:
20012	movq.l		&0x8, %d0		# pass: 8 (bytes)
20013	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
20014
20015	cmpi.b		SPCOND_FLG(%a6),&immed_flg
20016	beq.b		load_dbl_immed
20017
20018	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
20019	movq.l		&0x8, %d0		# pass: # bytes to read
20020	bsr.l		_dmem_read		# fetch src operand from memory
20021
20022	tst.l		%d1			# did dfetch fail?
20023	bne.l		facc_in_d		# yes
20024
20025load_dbl_cont:
20026	lea		L_SCR1(%a6), %a0	# pass: ptr to input dbl
20027	bsr.l		set_tag_d		# determine src type tag
20028	mov.b		%d0, STAG(%a6)		# set src optype tag
20029
20030	cmpi.b		%d0, &DENORM		# is it a dbl DENORM?
20031	beq.w		get_dbl_denorm		# yes
20032
20033	cmpi.b		%d0, &SNAN		# is it a dbl SNAN?
20034	beq.w		get_dbl_snan		# yes
20035
20036	fmov.d		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
20037	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
20038	rts
20039
20040load_dbl_immed:
20041	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
20042	movq.l		&0x8, %d0		# pass: # bytes to read
20043	bsr.l		_imem_read		# fetch src operand from memory
20044
20045	tst.l		%d1			# did ifetch fail?
20046	bne.l		funimp_iacc		# yes
20047	bra.b		load_dbl_cont
20048
20049# must convert dbl denorm format to an Xprec denorm fmt suitable for
20050# normalization...
20051# %a0 : loc. of dbl denorm
20052get_dbl_denorm:
20053	clr.w		FP_SRC_EX(%a6)
20054	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
20055	mov.l		%d0, FP_SRC_HI(%a6)
20056	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
20057	mov.l		&0xb, %d1
20058	lsl.l		%d1, %d0
20059	mov.l		%d0, FP_SRC_LO(%a6)
20060
20061	btst		&0x7, (%a0)		# is sgn bit set?
20062	beq.b		dbl_dnrm_norm
20063	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
20064
20065dbl_dnrm_norm:
20066	lea		FP_SRC(%a6), %a0
20067	bsr.l		norm			# normalize number
20068	mov.w		&0x3c01, %d1		# xprec exp = 0x3c01
20069	sub.w		%d0, %d1		# exp = 0x3c01 - shft amt.
20070	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
20071
20072	mov.b		&NORM, STAG(%a6)	# fix src type tag
20073	rts
20074
20075# convert dbl to ext SNAN
20076# %a0 : points to dbl SNAN
20077get_dbl_snan:
20078	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20079
20080	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
20081	mov.l		%d0, FP_SRC_HI(%a6)
20082	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
20083	mov.l		&0xb, %d1
20084	lsl.l		%d1, %d0
20085	mov.l		%d0, FP_SRC_LO(%a6)
20086
20087	btst		&0x7, (%a0)		# see if sign of SNAN is set
20088	beq.b		no_dbl_snan_sgn
20089	bset		&0x7, FP_SRC_EX(%a6)
20090no_dbl_snan_sgn:
20091	rts
20092
20093#################################################
20094# load a Xprec into %fp0:			#
20095#	-number can't fault			#
20096#	(1) calc ea				#
20097#	(2) read 12 bytes into L_SCR(1,2)	#
20098#	(3) fmov.x into %fp0			#
20099#################################################
20100load_ext:
20101	mov.l		&0xc, %d0		# pass: 12 (bytes)
20102	bsr.l		_dcalc_ea		# calc <ea>
20103
20104	lea		FP_SRC(%a6), %a1	# pass: ptr to input ext tmp space
20105	mov.l		&0xc, %d0		# pass: # of bytes to read
20106	bsr.l		_dmem_read		# fetch src operand from memory
20107
20108	tst.l		%d1			# did dfetch fail?
20109	bne.l		facc_in_x		# yes
20110
20111	lea		FP_SRC(%a6), %a0	# pass: ptr to src op
20112	bsr.l		set_tag_x		# determine src type tag
20113
20114	cmpi.b		%d0, &UNNORM		# is the src op an UNNORM?
20115	beq.b		load_ext_unnorm		# yes
20116
20117	mov.b		%d0, STAG(%a6)		# store the src optype tag
20118	rts
20119
20120load_ext_unnorm:
20121	bsr.l		unnorm_fix		# fix the src UNNORM
20122	mov.b		%d0, STAG(%a6)		# store the src optype tag
20123	rts
20124
20125#################################################
20126# load a packed into %fp0:			#
20127#	-number can't fault			#
20128#	(1) calc ea				#
20129#	(2) read 12 bytes into L_SCR(1,2,3)	#
20130#	(3) fmov.x into %fp0			#
20131#################################################
20132load_packed:
20133	bsr.l		get_packed
20134
20135	lea		FP_SRC(%a6),%a0		# pass ptr to src op
20136	bsr.l		set_tag_x		# determine src type tag
20137	cmpi.b		%d0,&UNNORM		# is the src op an UNNORM ZERO?
20138	beq.b		load_packed_unnorm	# yes
20139
20140	mov.b		%d0,STAG(%a6)		# store the src optype tag
20141	rts
20142
20143load_packed_unnorm:
20144	bsr.l		unnorm_fix		# fix the UNNORM ZERO
20145	mov.b		%d0,STAG(%a6)		# store the src optype tag
20146	rts
20147
20148#########################################################################
20149# XDEF ****************************************************************	#
20150#	fout(): move from fp register to memory or data register	#
20151#									#
20152# XREF ****************************************************************	#
20153#	_round() - needed to create EXOP for sgl/dbl precision		#
20154#	norm() - needed to create EXOP for extended precision		#
20155#	ovf_res() - create default overflow result for sgl/dbl precision#
20156#	unf_res() - create default underflow result for sgl/dbl prec.	#
20157#	dst_dbl() - create rounded dbl precision result.		#
20158#	dst_sgl() - create rounded sgl precision result.		#
20159#	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
20160#	bindec() - convert FP binary number to packed number.		#
20161#	_mem_write() - write data to memory.				#
20162#	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
20163#	_dmem_write_{byte,word,long}() - write data to memory.		#
20164#	store_dreg_{b,w,l}() - store data to data register file.	#
20165#	facc_out_{b,w,l,d,x}() - data access error occurred.		#
20166#									#
20167# INPUT ***************************************************************	#
20168#	a0 = pointer to extended precision source operand		#
20169#	d0 = round prec,mode						#
20170#									#
20171# OUTPUT **************************************************************	#
20172#	fp0 : intermediate underflow or overflow result if		#
20173#	      OVFL/UNFL occurred for a sgl or dbl operand		#
20174#									#
20175# ALGORITHM ***********************************************************	#
20176#	This routine is accessed by many handlers that need to do an	#
20177# opclass three move of an operand out to memory.			#
20178#	Decode an fmove out (opclass 3) instruction to determine if	#
20179# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
20180# register or memory. The algorithm uses a standard "fmove" to create	#
20181# the rounded result. Also, since exceptions are disabled, this also	#
20182# create the correct OPERR default result if appropriate.		#
20183#	For sgl or dbl precision, overflow or underflow can occur. If	#
20184# either occurs and is enabled, the EXOP.				#
20185#	For extended precision, the stacked <ea> must be fixed along	#
20186# w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
20187# the source is a denorm and if underflow is enabled, an EXOP must be	#
20188# created.								#
20189#	For packed, the k-factor must be fetched from the instruction	#
20190# word or a data register. The <ea> must be fixed as w/ extended	#
20191# precision. Then, bindec() is called to create the appropriate		#
20192# packed result.							#
20193#	If at any time an access error is flagged by one of the move-	#
20194# to-memory routines, then a special exit must be made so that the	#
20195# access error can be handled properly.					#
20196#									#
20197#########################################################################
20198
20199	global		fout
20200fout:
20201	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
20202	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
20203	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
20204
20205	swbeg		&0x8
20206tbl_fout:
20207	short		fout_long	-	tbl_fout
20208	short		fout_sgl	-	tbl_fout
20209	short		fout_ext	-	tbl_fout
20210	short		fout_pack	-	tbl_fout
20211	short		fout_word	-	tbl_fout
20212	short		fout_dbl	-	tbl_fout
20213	short		fout_byte	-	tbl_fout
20214	short		fout_pack	-	tbl_fout
20215
20216#################################################################
20217# fmove.b out ###################################################
20218#################################################################
20219
20220# Only "Unimplemented Data Type" exceptions enter here. The operand
20221# is either a DENORM or a NORM.
20222fout_byte:
20223	tst.b		STAG(%a6)		# is operand normalized?
20224	bne.b		fout_byte_denorm	# no
20225
20226	fmovm.x		SRC(%a0),&0x80		# load value
20227
20228fout_byte_norm:
20229	fmov.l		%d0,%fpcr		# insert rnd prec,mode
20230
20231	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
20232
20233	fmov.l		&0x0,%fpcr		# clear FPCR
20234	fmov.l		%fpsr,%d1		# fetch FPSR
20235	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20236
20237	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20238	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20239	beq.b		fout_byte_dn		# must save to integer regfile
20240
20241	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20242	bsr.l		_dmem_write_byte	# write byte
20243
20244	tst.l		%d1			# did dstore fail?
20245	bne.l		facc_out_b		# yes
20246
20247	rts
20248
20249fout_byte_dn:
20250	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20251	andi.w		&0x7,%d1
20252	bsr.l		store_dreg_b
20253	rts
20254
20255fout_byte_denorm:
20256	mov.l		SRC_EX(%a0),%d1
20257	andi.l		&0x80000000,%d1		# keep DENORM sign
20258	ori.l		&0x00800000,%d1		# make smallest sgl
20259	fmov.s		%d1,%fp0
20260	bra.b		fout_byte_norm
20261
20262#################################################################
20263# fmove.w out ###################################################
20264#################################################################
20265
20266# Only "Unimplemented Data Type" exceptions enter here. The operand
20267# is either a DENORM or a NORM.
20268fout_word:
20269	tst.b		STAG(%a6)		# is operand normalized?
20270	bne.b		fout_word_denorm	# no
20271
20272	fmovm.x		SRC(%a0),&0x80		# load value
20273
20274fout_word_norm:
20275	fmov.l		%d0,%fpcr		# insert rnd prec:mode
20276
20277	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
20278
20279	fmov.l		&0x0,%fpcr		# clear FPCR
20280	fmov.l		%fpsr,%d1		# fetch FPSR
20281	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20282
20283	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20284	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20285	beq.b		fout_word_dn		# must save to integer regfile
20286
20287	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20288	bsr.l		_dmem_write_word	# write word
20289
20290	tst.l		%d1			# did dstore fail?
20291	bne.l		facc_out_w		# yes
20292
20293	rts
20294
20295fout_word_dn:
20296	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20297	andi.w		&0x7,%d1
20298	bsr.l		store_dreg_w
20299	rts
20300
20301fout_word_denorm:
20302	mov.l		SRC_EX(%a0),%d1
20303	andi.l		&0x80000000,%d1		# keep DENORM sign
20304	ori.l		&0x00800000,%d1		# make smallest sgl
20305	fmov.s		%d1,%fp0
20306	bra.b		fout_word_norm
20307
20308#################################################################
20309# fmove.l out ###################################################
20310#################################################################
20311
20312# Only "Unimplemented Data Type" exceptions enter here. The operand
20313# is either a DENORM or a NORM.
20314fout_long:
20315	tst.b		STAG(%a6)		# is operand normalized?
20316	bne.b		fout_long_denorm	# no
20317
20318	fmovm.x		SRC(%a0),&0x80		# load value
20319
20320fout_long_norm:
20321	fmov.l		%d0,%fpcr		# insert rnd prec:mode
20322
20323	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
20324
20325	fmov.l		&0x0,%fpcr		# clear FPCR
20326	fmov.l		%fpsr,%d1		# fetch FPSR
20327	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20328
20329fout_long_write:
20330	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20331	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20332	beq.b		fout_long_dn		# must save to integer regfile
20333
20334	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20335	bsr.l		_dmem_write_long	# write long
20336
20337	tst.l		%d1			# did dstore fail?
20338	bne.l		facc_out_l		# yes
20339
20340	rts
20341
20342fout_long_dn:
20343	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20344	andi.w		&0x7,%d1
20345	bsr.l		store_dreg_l
20346	rts
20347
20348fout_long_denorm:
20349	mov.l		SRC_EX(%a0),%d1
20350	andi.l		&0x80000000,%d1		# keep DENORM sign
20351	ori.l		&0x00800000,%d1		# make smallest sgl
20352	fmov.s		%d1,%fp0
20353	bra.b		fout_long_norm
20354
20355#################################################################
20356# fmove.x out ###################################################
20357#################################################################
20358
20359# Only "Unimplemented Data Type" exceptions enter here. The operand
20360# is either a DENORM or a NORM.
20361# The DENORM causes an Underflow exception.
20362fout_ext:
20363
20364# we copy the extended precision result to FP_SCR0 so that the reserved
20365# 16-bit field gets zeroed. we do this since we promise not to disturb
20366# what's at SRC(a0).
20367	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20368	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
20369	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20370	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20371
20372	fmovm.x		SRC(%a0),&0x80		# return result
20373
20374	bsr.l		_calc_ea_fout		# fix stacked <ea>
20375
20376	mov.l		%a0,%a1			# pass: dst addr
20377	lea		FP_SCR0(%a6),%a0	# pass: src addr
20378	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
20379
20380# we must not yet write the extended precision data to the stack
20381# in the pre-decrement case from supervisor mode or else we'll corrupt
20382# the stack frame. so, leave it in FP_SRC for now and deal with it later...
20383	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
20384	beq.b		fout_ext_a7
20385
20386	bsr.l		_dmem_write		# write ext prec number to memory
20387
20388	tst.l		%d1			# did dstore fail?
20389	bne.w		fout_ext_err		# yes
20390
20391	tst.b		STAG(%a6)		# is operand normalized?
20392	bne.b		fout_ext_denorm		# no
20393	rts
20394
20395# the number is a DENORM. must set the underflow exception bit
20396fout_ext_denorm:
20397	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
20398
20399	mov.b		FPCR_ENABLE(%a6),%d0
20400	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
20401	bne.b		fout_ext_exc		# yes
20402	rts
20403
20404# we don't want to do the write if the exception occurred in supervisor mode
20405# so _mem_write2() handles this for us.
20406fout_ext_a7:
20407	bsr.l		_mem_write2		# write ext prec number to memory
20408
20409	tst.l		%d1			# did dstore fail?
20410	bne.w		fout_ext_err		# yes
20411
20412	tst.b		STAG(%a6)		# is operand normalized?
20413	bne.b		fout_ext_denorm		# no
20414	rts
20415
20416fout_ext_exc:
20417	lea		FP_SCR0(%a6),%a0
20418	bsr.l		norm			# normalize the mantissa
20419	neg.w		%d0			# new exp = -(shft amt)
20420	andi.w		&0x7fff,%d0
20421	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
20422	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
20423	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
20424	rts
20425
20426fout_ext_err:
20427	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
20428	bra.l		facc_out_x
20429
20430#########################################################################
20431# fmove.s out ###########################################################
20432#########################################################################
20433fout_sgl:
20434	andi.b		&0x30,%d0		# clear rnd prec
20435	ori.b		&s_mode*0x10,%d0	# insert sgl prec
20436	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
20437
20438#
20439# operand is a normalized number. first, we check to see if the move out
20440# would cause either an underflow or overflow. these cases are handled
20441# separately. otherwise, set the FPCR to the proper rounding mode and
20442# execute the move.
20443#
20444	mov.w		SRC_EX(%a0),%d0		# extract exponent
20445	andi.w		&0x7fff,%d0		# strip sign
20446
20447	cmpi.w		%d0,&SGL_HI		# will operand overflow?
20448	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
20449	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
20450	cmpi.w		%d0,&SGL_LO		# will operand underflow?
20451	blt.w		fout_sgl_unfl		# yes; go handle underflow
20452
20453#
20454# NORMs(in range) can be stored out by a simple "fmov.s"
20455# Unnormalized inputs can come through this point.
20456#
20457fout_sgl_exg:
20458	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
20459
20460	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20461	fmov.l		&0x0,%fpsr		# clear FPSR
20462
20463	fmov.s		%fp0,%d0		# store does convert and round
20464
20465	fmov.l		&0x0,%fpcr		# clear FPCR
20466	fmov.l		%fpsr,%d1		# save FPSR
20467
20468	or.w		%d1,2+USER_FPSR(%a6)	# set possible inex2/ainex
20469
20470fout_sgl_exg_write:
20471	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20472	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20473	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
20474
20475	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20476	bsr.l		_dmem_write_long	# write long
20477
20478	tst.l		%d1			# did dstore fail?
20479	bne.l		facc_out_l		# yes
20480
20481	rts
20482
20483fout_sgl_exg_write_dn:
20484	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20485	andi.w		&0x7,%d1
20486	bsr.l		store_dreg_l
20487	rts
20488
20489#
20490# here, we know that the operand would UNFL if moved out to single prec,
20491# so, denorm and round and then use generic store single routine to
20492# write the value to memory.
20493#
20494fout_sgl_unfl:
20495	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20496
20497	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20498	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20499	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20500	mov.l		%a0,-(%sp)
20501
20502	clr.l		%d0			# pass: S.F. = 0
20503
20504	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
20505	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
20506
20507	lea		FP_SCR0(%a6),%a0
20508	bsr.l		norm			# normalize the DENORM
20509
20510fout_sgl_unfl_cont:
20511	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
20512	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
20513	bsr.l		unf_res			# calc default underflow result
20514
20515	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
20516	bsr.l		dst_sgl			# convert to single prec
20517
20518	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20519	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20520	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
20521
20522	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20523	bsr.l		_dmem_write_long	# write long
20524
20525	tst.l		%d1			# did dstore fail?
20526	bne.l		facc_out_l		# yes
20527
20528	bra.b		fout_sgl_unfl_chkexc
20529
20530fout_sgl_unfl_dn:
20531	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20532	andi.w		&0x7,%d1
20533	bsr.l		store_dreg_l
20534
20535fout_sgl_unfl_chkexc:
20536	mov.b		FPCR_ENABLE(%a6),%d1
20537	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20538	bne.w		fout_sd_exc_unfl	# yes
20539	addq.l		&0x4,%sp
20540	rts
20541
20542#
20543# it's definitely an overflow so call ovf_res to get the correct answer
20544#
20545fout_sgl_ovfl:
20546	tst.b		3+SRC_HI(%a0)		# is result inexact?
20547	bne.b		fout_sgl_ovfl_inex2
20548	tst.l		SRC_LO(%a0)		# is result inexact?
20549	bne.b		fout_sgl_ovfl_inex2
20550	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20551	bra.b		fout_sgl_ovfl_cont
20552fout_sgl_ovfl_inex2:
20553	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20554
20555fout_sgl_ovfl_cont:
20556	mov.l		%a0,-(%sp)
20557
20558# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
20559# overflow result. DON'T save the returned ccodes from ovf_res() since
20560# fmove out doesn't alter them.
20561	tst.b		SRC_EX(%a0)		# is operand negative?
20562	smi		%d1			# set if so
20563	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
20564	bsr.l		ovf_res			# calc OVFL result
20565	fmovm.x		(%a0),&0x80		# load default overflow result
20566	fmov.s		%fp0,%d0		# store to single
20567
20568	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20569	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20570	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
20571
20572	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20573	bsr.l		_dmem_write_long	# write long
20574
20575	tst.l		%d1			# did dstore fail?
20576	bne.l		facc_out_l		# yes
20577
20578	bra.b		fout_sgl_ovfl_chkexc
20579
20580fout_sgl_ovfl_dn:
20581	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20582	andi.w		&0x7,%d1
20583	bsr.l		store_dreg_l
20584
20585fout_sgl_ovfl_chkexc:
20586	mov.b		FPCR_ENABLE(%a6),%d1
20587	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20588	bne.w		fout_sd_exc_ovfl	# yes
20589	addq.l		&0x4,%sp
20590	rts
20591
20592#
20593# move out MAY overflow:
20594# (1) force the exp to 0x3fff
20595# (2) do a move w/ appropriate rnd mode
20596# (3) if exp still equals zero, then insert original exponent
20597#	for the correct result.
20598#     if exp now equals one, then it overflowed so call ovf_res.
20599#
20600fout_sgl_may_ovfl:
20601	mov.w		SRC_EX(%a0),%d1		# fetch current sign
20602	andi.w		&0x8000,%d1		# keep it,clear exp
20603	ori.w		&0x3fff,%d1		# insert exp = 0
20604	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
20605	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20606	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20607
20608	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20609
20610	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
20611	fmov.l		&0x0,%fpcr		# clear FPCR
20612
20613	fabs.x		%fp0			# need absolute value
20614	fcmp.b		%fp0,&0x2		# did exponent increase?
20615	fblt.w		fout_sgl_exg		# no; go finish NORM
20616	bra.w		fout_sgl_ovfl		# yes; go handle overflow
20617
20618################
20619
20620fout_sd_exc_unfl:
20621	mov.l		(%sp)+,%a0
20622
20623	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20624	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20625	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20626
20627	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
20628	bne.b		fout_sd_exc_cont	# no
20629
20630	lea		FP_SCR0(%a6),%a0
20631	bsr.l		norm
20632	neg.l		%d0
20633	andi.w		&0x7fff,%d0
20634	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
20635	bra.b		fout_sd_exc_cont
20636
20637fout_sd_exc:
20638fout_sd_exc_ovfl:
20639	mov.l		(%sp)+,%a0		# restore a0
20640
20641	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20642	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20643	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20644
20645fout_sd_exc_cont:
20646	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
20647	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
20648	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
20649
20650	mov.b		3+L_SCR3(%a6),%d1
20651	lsr.b		&0x4,%d1
20652	andi.w		&0x0c,%d1
20653	swap		%d1
20654	mov.b		3+L_SCR3(%a6),%d1
20655	lsr.b		&0x4,%d1
20656	andi.w		&0x03,%d1
20657	clr.l		%d0			# pass: zero g,r,s
20658	bsr.l		_round			# round the DENORM
20659
20660	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
20661	beq.b		fout_sd_exc_done	# no
20662	bset		&0x7,FP_SCR0_EX(%a6)	# yes
20663
20664fout_sd_exc_done:
20665	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
20666	rts
20667
20668#################################################################
20669# fmove.d out ###################################################
20670#################################################################
20671fout_dbl:
20672	andi.b		&0x30,%d0		# clear rnd prec
20673	ori.b		&d_mode*0x10,%d0	# insert dbl prec
20674	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
20675
20676#
20677# operand is a normalized number. first, we check to see if the move out
20678# would cause either an underflow or overflow. these cases are handled
20679# separately. otherwise, set the FPCR to the proper rounding mode and
20680# execute the move.
20681#
20682	mov.w		SRC_EX(%a0),%d0		# extract exponent
20683	andi.w		&0x7fff,%d0		# strip sign
20684
20685	cmpi.w		%d0,&DBL_HI		# will operand overflow?
20686	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
20687	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
20688	cmpi.w		%d0,&DBL_LO		# will operand underflow?
20689	blt.w		fout_dbl_unfl		# yes; go handle underflow
20690
20691#
20692# NORMs(in range) can be stored out by a simple "fmov.d"
20693# Unnormalized inputs can come through this point.
20694#
20695fout_dbl_exg:
20696	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
20697
20698	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20699	fmov.l		&0x0,%fpsr		# clear FPSR
20700
20701	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
20702
20703	fmov.l		&0x0,%fpcr		# clear FPCR
20704	fmov.l		%fpsr,%d0		# save FPSR
20705
20706	or.w		%d0,2+USER_FPSR(%a6)	# set possible inex2/ainex
20707
20708	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20709	lea		L_SCR1(%a6),%a0		# pass: src addr
20710	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20711	bsr.l		_dmem_write		# store dbl fop to memory
20712
20713	tst.l		%d1			# did dstore fail?
20714	bne.l		facc_out_d		# yes
20715
20716	rts					# no; so we're finished
20717
20718#
20719# here, we know that the operand would UNFL if moved out to double prec,
20720# so, denorm and round and then use generic store double routine to
20721# write the value to memory.
20722#
20723fout_dbl_unfl:
20724	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20725
20726	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20727	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20728	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20729	mov.l		%a0,-(%sp)
20730
20731	clr.l		%d0			# pass: S.F. = 0
20732
20733	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
20734	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
20735
20736	lea		FP_SCR0(%a6),%a0
20737	bsr.l		norm			# normalize the DENORM
20738
20739fout_dbl_unfl_cont:
20740	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
20741	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
20742	bsr.l		unf_res			# calc default underflow result
20743
20744	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
20745	bsr.l		dst_dbl			# convert to single prec
20746	mov.l		%d0,L_SCR1(%a6)
20747	mov.l		%d1,L_SCR2(%a6)
20748
20749	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20750	lea		L_SCR1(%a6),%a0		# pass: src addr
20751	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20752	bsr.l		_dmem_write		# store dbl fop to memory
20753
20754	tst.l		%d1			# did dstore fail?
20755	bne.l		facc_out_d		# yes
20756
20757	mov.b		FPCR_ENABLE(%a6),%d1
20758	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20759	bne.w		fout_sd_exc_unfl	# yes
20760	addq.l		&0x4,%sp
20761	rts
20762
20763#
20764# it's definitely an overflow so call ovf_res to get the correct answer
20765#
20766fout_dbl_ovfl:
20767	mov.w		2+SRC_LO(%a0),%d0
20768	andi.w		&0x7ff,%d0
20769	bne.b		fout_dbl_ovfl_inex2
20770
20771	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20772	bra.b		fout_dbl_ovfl_cont
20773fout_dbl_ovfl_inex2:
20774	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20775
20776fout_dbl_ovfl_cont:
20777	mov.l		%a0,-(%sp)
20778
20779# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
20780# overflow result. DON'T save the returned ccodes from ovf_res() since
20781# fmove out doesn't alter them.
20782	tst.b		SRC_EX(%a0)		# is operand negative?
20783	smi		%d1			# set if so
20784	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
20785	bsr.l		ovf_res			# calc OVFL result
20786	fmovm.x		(%a0),&0x80		# load default overflow result
20787	fmov.d		%fp0,L_SCR1(%a6)	# store to double
20788
20789	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20790	lea		L_SCR1(%a6),%a0		# pass: src addr
20791	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20792	bsr.l		_dmem_write		# store dbl fop to memory
20793
20794	tst.l		%d1			# did dstore fail?
20795	bne.l		facc_out_d		# yes
20796
20797	mov.b		FPCR_ENABLE(%a6),%d1
20798	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20799	bne.w		fout_sd_exc_ovfl	# yes
20800	addq.l		&0x4,%sp
20801	rts
20802
20803#
20804# move out MAY overflow:
20805# (1) force the exp to 0x3fff
20806# (2) do a move w/ appropriate rnd mode
20807# (3) if exp still equals zero, then insert original exponent
20808#	for the correct result.
20809#     if exp now equals one, then it overflowed so call ovf_res.
20810#
20811fout_dbl_may_ovfl:
20812	mov.w		SRC_EX(%a0),%d1		# fetch current sign
20813	andi.w		&0x8000,%d1		# keep it,clear exp
20814	ori.w		&0x3fff,%d1		# insert exp = 0
20815	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
20816	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20817	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20818
20819	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20820
20821	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
20822	fmov.l		&0x0,%fpcr		# clear FPCR
20823
20824	fabs.x		%fp0			# need absolute value
20825	fcmp.b		%fp0,&0x2		# did exponent increase?
20826	fblt.w		fout_dbl_exg		# no; go finish NORM
20827	bra.w		fout_dbl_ovfl		# yes; go handle overflow
20828
20829#########################################################################
20830# XDEF ****************************************************************	#
20831#	dst_dbl(): create double precision value from extended prec.	#
20832#									#
20833# XREF ****************************************************************	#
20834#	None								#
20835#									#
20836# INPUT ***************************************************************	#
20837#	a0 = pointer to source operand in extended precision		#
20838#									#
20839# OUTPUT **************************************************************	#
20840#	d0 = hi(double precision result)				#
20841#	d1 = lo(double precision result)				#
20842#									#
20843# ALGORITHM ***********************************************************	#
20844#									#
20845#  Changes extended precision to double precision.			#
20846#  Note: no attempt is made to round the extended value to double.	#
20847#	dbl_sign = ext_sign						#
20848#	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
20849#	get rid of ext integer bit					#
20850#	dbl_mant = ext_mant{62:12}					#
20851#									#
20852#		---------------   ---------------    ---------------	#
20853#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
20854#		---------------   ---------------    ---------------	#
20855#		 95	    64    63 62	      32      31     11	  0	#
20856#				     |			     |		#
20857#				     |			     |		#
20858#				     |			     |		#
20859#			             v			     v		#
20860#			      ---------------   ---------------		#
20861#  double   ->		      |s|exp| mant  |   |  mant       |		#
20862#			      ---------------   ---------------		#
20863#			      63     51   32   31	       0	#
20864#									#
20865#########################################################################
20866
20867dst_dbl:
20868	clr.l		%d0			# clear d0
20869	mov.w		FTEMP_EX(%a0),%d0	# get exponent
20870	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
20871	addi.w		&DBL_BIAS,%d0		# add double precision bias
20872	tst.b		FTEMP_HI(%a0)		# is number a denorm?
20873	bmi.b		dst_get_dupper		# no
20874	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
20875dst_get_dupper:
20876	swap		%d0			# d0 now in upper word
20877	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
20878	tst.b		FTEMP_EX(%a0)		# test sign
20879	bpl.b		dst_get_dman		# if positive, go process mantissa
20880	bset		&0x1f,%d0		# if negative, set sign
20881dst_get_dman:
20882	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20883	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
20884	or.l		%d1,%d0			# put these bits in ms word of double
20885	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
20886	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20887	mov.l		&21,%d0			# load shift count
20888	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
20889	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
20890	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
20891	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
20892	mov.l		L_SCR2(%a6),%d1
20893	or.l		%d0,%d1			# put them in double result
20894	mov.l		L_SCR1(%a6),%d0
20895	rts
20896
20897#########################################################################
20898# XDEF ****************************************************************	#
20899#	dst_sgl(): create single precision value from extended prec	#
20900#									#
20901# XREF ****************************************************************	#
20902#									#
20903# INPUT ***************************************************************	#
20904#	a0 = pointer to source operand in extended precision		#
20905#									#
20906# OUTPUT **************************************************************	#
20907#	d0 = single precision result					#
20908#									#
20909# ALGORITHM ***********************************************************	#
20910#									#
20911# Changes extended precision to single precision.			#
20912#	sgl_sign = ext_sign						#
20913#	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
20914#	get rid of ext integer bit					#
20915#	sgl_mant = ext_mant{62:12}					#
20916#									#
20917#		---------------   ---------------    ---------------	#
20918#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
20919#		---------------   ---------------    ---------------	#
20920#		 95	    64    63 62	   40 32      31     12	  0	#
20921#				     |	   |				#
20922#				     |	   |				#
20923#				     |	   |				#
20924#			             v     v				#
20925#			      ---------------				#
20926#  single   ->		      |s|exp| mant  |				#
20927#			      ---------------				#
20928#			      31     22     0				#
20929#									#
20930#########################################################################
20931
20932dst_sgl:
20933	clr.l		%d0
20934	mov.w		FTEMP_EX(%a0),%d0	# get exponent
20935	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
20936	addi.w		&SGL_BIAS,%d0		# add single precision bias
20937	tst.b		FTEMP_HI(%a0)		# is number a denorm?
20938	bmi.b		dst_get_supper		# no
20939	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
20940dst_get_supper:
20941	swap		%d0			# put exp in upper word of d0
20942	lsl.l		&0x7,%d0		# shift it into single exp bits
20943	tst.b		FTEMP_EX(%a0)		# test sign
20944	bpl.b		dst_get_sman		# if positive, continue
20945	bset		&0x1f,%d0		# if negative, put in sign first
20946dst_get_sman:
20947	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20948	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
20949	lsr.l		&0x8,%d1		# and put them flush right
20950	or.l		%d1,%d0			# put these bits in ms word of single
20951	rts
20952
20953##############################################################################
20954fout_pack:
20955	bsr.l		_calc_ea_fout		# fetch the <ea>
20956	mov.l		%a0,-(%sp)
20957
20958	mov.b		STAG(%a6),%d0		# fetch input type
20959	bne.w		fout_pack_not_norm	# input is not NORM
20960
20961fout_pack_norm:
20962	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
20963	beq.b		fout_pack_s		# static
20964
20965fout_pack_d:
20966	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
20967	lsr.b		&0x4,%d1
20968	andi.w		&0x7,%d1
20969
20970	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
20971
20972	bra.b		fout_pack_type
20973fout_pack_s:
20974	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
20975
20976fout_pack_type:
20977	bfexts		%d0{&25:&7},%d0		# extract k-factor
20978	mov.l	%d0,-(%sp)
20979
20980	lea		FP_SRC(%a6),%a0		# pass: ptr to input
20981
20982# bindec is currently scrambling FP_SRC for denorm inputs.
20983# we'll have to change this, but for now, tough luck!!!
20984	bsr.l		bindec			# convert xprec to packed
20985
20986#	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
20987	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
20988
20989	mov.l	(%sp)+,%d0
20990
20991	tst.b		3+FP_SCR0_EX(%a6)
20992	bne.b		fout_pack_set
20993	tst.l		FP_SCR0_HI(%a6)
20994	bne.b		fout_pack_set
20995	tst.l		FP_SCR0_LO(%a6)
20996	bne.b		fout_pack_set
20997
20998# add the extra condition that only if the k-factor was zero, too, should
20999# we zero the exponent
21000	tst.l		%d0
21001	bne.b		fout_pack_set
21002# "mantissa" is all zero which means that the answer is zero. but, the '040
21003# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
21004# if the mantissa is zero, I will zero the exponent, too.
21005# the question now is whether the exponents sign bit is allowed to be non-zero
21006# for a zero, also...
21007	andi.w		&0xf000,FP_SCR0(%a6)
21008
21009fout_pack_set:
21010
21011	lea		FP_SCR0(%a6),%a0	# pass: src addr
21012
21013fout_pack_write:
21014	mov.l		(%sp)+,%a1		# pass: dst addr
21015	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
21016
21017	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
21018	beq.b		fout_pack_a7
21019
21020	bsr.l		_dmem_write		# write ext prec number to memory
21021
21022	tst.l		%d1			# did dstore fail?
21023	bne.w		fout_ext_err		# yes
21024
21025	rts
21026
21027# we don't want to do the write if the exception occurred in supervisor mode
21028# so _mem_write2() handles this for us.
21029fout_pack_a7:
21030	bsr.l		_mem_write2		# write ext prec number to memory
21031
21032	tst.l		%d1			# did dstore fail?
21033	bne.w		fout_ext_err		# yes
21034
21035	rts
21036
21037fout_pack_not_norm:
21038	cmpi.b		%d0,&DENORM		# is it a DENORM?
21039	beq.w		fout_pack_norm		# yes
21040	lea		FP_SRC(%a6),%a0
21041	clr.w		2+FP_SRC_EX(%a6)
21042	cmpi.b		%d0,&SNAN		# is it an SNAN?
21043	beq.b		fout_pack_snan		# yes
21044	bra.b		fout_pack_write		# no
21045
21046fout_pack_snan:
21047	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
21048	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
21049	bra.b		fout_pack_write
21050
21051#########################################################################
21052# XDEF ****************************************************************	#
21053#	fetch_dreg(): fetch register according to index in d1		#
21054#									#
21055# XREF ****************************************************************	#
21056#	None								#
21057#									#
21058# INPUT ***************************************************************	#
21059#	d1 = index of register to fetch from				#
21060#									#
21061# OUTPUT **************************************************************	#
21062#	d0 = value of register fetched					#
21063#									#
21064# ALGORITHM ***********************************************************	#
21065#	According to the index value in d1 which can range from zero	#
21066# to fifteen, load the corresponding register file value (where		#
21067# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
21068# stack. The rest should still be in their original places.		#
21069#									#
21070#########################################################################
21071
21072# this routine leaves d1 intact for subsequent store_dreg calls.
21073	global		fetch_dreg
21074fetch_dreg:
21075	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
21076	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
21077
21078tbl_fdreg:
21079	short		fdreg0 - tbl_fdreg
21080	short		fdreg1 - tbl_fdreg
21081	short		fdreg2 - tbl_fdreg
21082	short		fdreg3 - tbl_fdreg
21083	short		fdreg4 - tbl_fdreg
21084	short		fdreg5 - tbl_fdreg
21085	short		fdreg6 - tbl_fdreg
21086	short		fdreg7 - tbl_fdreg
21087	short		fdreg8 - tbl_fdreg
21088	short		fdreg9 - tbl_fdreg
21089	short		fdrega - tbl_fdreg
21090	short		fdregb - tbl_fdreg
21091	short		fdregc - tbl_fdreg
21092	short		fdregd - tbl_fdreg
21093	short		fdrege - tbl_fdreg
21094	short		fdregf - tbl_fdreg
21095
21096fdreg0:
21097	mov.l		EXC_DREGS+0x0(%a6),%d0
21098	rts
21099fdreg1:
21100	mov.l		EXC_DREGS+0x4(%a6),%d0
21101	rts
21102fdreg2:
21103	mov.l		%d2,%d0
21104	rts
21105fdreg3:
21106	mov.l		%d3,%d0
21107	rts
21108fdreg4:
21109	mov.l		%d4,%d0
21110	rts
21111fdreg5:
21112	mov.l		%d5,%d0
21113	rts
21114fdreg6:
21115	mov.l		%d6,%d0
21116	rts
21117fdreg7:
21118	mov.l		%d7,%d0
21119	rts
21120fdreg8:
21121	mov.l		EXC_DREGS+0x8(%a6),%d0
21122	rts
21123fdreg9:
21124	mov.l		EXC_DREGS+0xc(%a6),%d0
21125	rts
21126fdrega:
21127	mov.l		%a2,%d0
21128	rts
21129fdregb:
21130	mov.l		%a3,%d0
21131	rts
21132fdregc:
21133	mov.l		%a4,%d0
21134	rts
21135fdregd:
21136	mov.l		%a5,%d0
21137	rts
21138fdrege:
21139	mov.l		(%a6),%d0
21140	rts
21141fdregf:
21142	mov.l		EXC_A7(%a6),%d0
21143	rts
21144
21145#########################################################################
21146# XDEF ****************************************************************	#
21147#	store_dreg_l(): store longword to data register specified by d1	#
21148#									#
21149# XREF ****************************************************************	#
21150#	None								#
21151#									#
21152# INPUT ***************************************************************	#
21153#	d0 = longowrd value to store					#
21154#	d1 = index of register to fetch from				#
21155#									#
21156# OUTPUT **************************************************************	#
21157#	(data register is updated)					#
21158#									#
21159# ALGORITHM ***********************************************************	#
21160#	According to the index value in d1, store the longword value	#
21161# in d0 to the corresponding data register. D0/D1 are on the stack	#
21162# while the rest are in their initial places.				#
21163#									#
21164#########################################################################
21165
21166	global		store_dreg_l
21167store_dreg_l:
21168	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
21169	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
21170
21171tbl_sdregl:
21172	short		sdregl0 - tbl_sdregl
21173	short		sdregl1 - tbl_sdregl
21174	short		sdregl2 - tbl_sdregl
21175	short		sdregl3 - tbl_sdregl
21176	short		sdregl4 - tbl_sdregl
21177	short		sdregl5 - tbl_sdregl
21178	short		sdregl6 - tbl_sdregl
21179	short		sdregl7 - tbl_sdregl
21180
21181sdregl0:
21182	mov.l		%d0,EXC_DREGS+0x0(%a6)
21183	rts
21184sdregl1:
21185	mov.l		%d0,EXC_DREGS+0x4(%a6)
21186	rts
21187sdregl2:
21188	mov.l		%d0,%d2
21189	rts
21190sdregl3:
21191	mov.l		%d0,%d3
21192	rts
21193sdregl4:
21194	mov.l		%d0,%d4
21195	rts
21196sdregl5:
21197	mov.l		%d0,%d5
21198	rts
21199sdregl6:
21200	mov.l		%d0,%d6
21201	rts
21202sdregl7:
21203	mov.l		%d0,%d7
21204	rts
21205
21206#########################################################################
21207# XDEF ****************************************************************	#
21208#	store_dreg_w(): store word to data register specified by d1	#
21209#									#
21210# XREF ****************************************************************	#
21211#	None								#
21212#									#
21213# INPUT ***************************************************************	#
21214#	d0 = word value to store					#
21215#	d1 = index of register to fetch from				#
21216#									#
21217# OUTPUT **************************************************************	#
21218#	(data register is updated)					#
21219#									#
21220# ALGORITHM ***********************************************************	#
21221#	According to the index value in d1, store the word value	#
21222# in d0 to the corresponding data register. D0/D1 are on the stack	#
21223# while the rest are in their initial places.				#
21224#									#
21225#########################################################################
21226
21227	global		store_dreg_w
21228store_dreg_w:
21229	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
21230	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
21231
21232tbl_sdregw:
21233	short		sdregw0 - tbl_sdregw
21234	short		sdregw1 - tbl_sdregw
21235	short		sdregw2 - tbl_sdregw
21236	short		sdregw3 - tbl_sdregw
21237	short		sdregw4 - tbl_sdregw
21238	short		sdregw5 - tbl_sdregw
21239	short		sdregw6 - tbl_sdregw
21240	short		sdregw7 - tbl_sdregw
21241
21242sdregw0:
21243	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
21244	rts
21245sdregw1:
21246	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
21247	rts
21248sdregw2:
21249	mov.w		%d0,%d2
21250	rts
21251sdregw3:
21252	mov.w		%d0,%d3
21253	rts
21254sdregw4:
21255	mov.w		%d0,%d4
21256	rts
21257sdregw5:
21258	mov.w		%d0,%d5
21259	rts
21260sdregw6:
21261	mov.w		%d0,%d6
21262	rts
21263sdregw7:
21264	mov.w		%d0,%d7
21265	rts
21266
21267#########################################################################
21268# XDEF ****************************************************************	#
21269#	store_dreg_b(): store byte to data register specified by d1	#
21270#									#
21271# XREF ****************************************************************	#
21272#	None								#
21273#									#
21274# INPUT ***************************************************************	#
21275#	d0 = byte value to store					#
21276#	d1 = index of register to fetch from				#
21277#									#
21278# OUTPUT **************************************************************	#
21279#	(data register is updated)					#
21280#									#
21281# ALGORITHM ***********************************************************	#
21282#	According to the index value in d1, store the byte value	#
21283# in d0 to the corresponding data register. D0/D1 are on the stack	#
21284# while the rest are in their initial places.				#
21285#									#
21286#########################################################################
21287
21288	global		store_dreg_b
21289store_dreg_b:
21290	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
21291	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
21292
21293tbl_sdregb:
21294	short		sdregb0 - tbl_sdregb
21295	short		sdregb1 - tbl_sdregb
21296	short		sdregb2 - tbl_sdregb
21297	short		sdregb3 - tbl_sdregb
21298	short		sdregb4 - tbl_sdregb
21299	short		sdregb5 - tbl_sdregb
21300	short		sdregb6 - tbl_sdregb
21301	short		sdregb7 - tbl_sdregb
21302
21303sdregb0:
21304	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
21305	rts
21306sdregb1:
21307	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
21308	rts
21309sdregb2:
21310	mov.b		%d0,%d2
21311	rts
21312sdregb3:
21313	mov.b		%d0,%d3
21314	rts
21315sdregb4:
21316	mov.b		%d0,%d4
21317	rts
21318sdregb5:
21319	mov.b		%d0,%d5
21320	rts
21321sdregb6:
21322	mov.b		%d0,%d6
21323	rts
21324sdregb7:
21325	mov.b		%d0,%d7
21326	rts
21327
21328#########################################################################
21329# XDEF ****************************************************************	#
21330#	inc_areg(): increment an address register by the value in d0	#
21331#									#
21332# XREF ****************************************************************	#
21333#	None								#
21334#									#
21335# INPUT ***************************************************************	#
21336#	d0 = amount to increment by					#
21337#	d1 = index of address register to increment			#
21338#									#
21339# OUTPUT **************************************************************	#
21340#	(address register is updated)					#
21341#									#
21342# ALGORITHM ***********************************************************	#
21343#	Typically used for an instruction w/ a post-increment <ea>,	#
21344# this routine adds the increment value in d0 to the address register	#
21345# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
21346# in their original places.						#
21347#	For a7, if the increment amount is one, then we have to		#
21348# increment by two. For any a7 update, set the mia7_flag so that if	#
21349# an access error exception occurs later in emulation, this address	#
21350# register update can be undone.					#
21351#									#
21352#########################################################################
21353
21354	global		inc_areg
21355inc_areg:
21356	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
21357	jmp		(tbl_iareg.b,%pc,%d1.w*1)
21358
21359tbl_iareg:
21360	short		iareg0 - tbl_iareg
21361	short		iareg1 - tbl_iareg
21362	short		iareg2 - tbl_iareg
21363	short		iareg3 - tbl_iareg
21364	short		iareg4 - tbl_iareg
21365	short		iareg5 - tbl_iareg
21366	short		iareg6 - tbl_iareg
21367	short		iareg7 - tbl_iareg
21368
21369iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
21370	rts
21371iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
21372	rts
21373iareg2:	add.l		%d0,%a2
21374	rts
21375iareg3:	add.l		%d0,%a3
21376	rts
21377iareg4:	add.l		%d0,%a4
21378	rts
21379iareg5:	add.l		%d0,%a5
21380	rts
21381iareg6:	add.l		%d0,(%a6)
21382	rts
21383iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
21384	cmpi.b		%d0,&0x1
21385	beq.b		iareg7b
21386	add.l		%d0,EXC_A7(%a6)
21387	rts
21388iareg7b:
21389	addq.l		&0x2,EXC_A7(%a6)
21390	rts
21391
21392#########################################################################
21393# XDEF ****************************************************************	#
21394#	dec_areg(): decrement an address register by the value in d0	#
21395#									#
21396# XREF ****************************************************************	#
21397#	None								#
21398#									#
21399# INPUT ***************************************************************	#
21400#	d0 = amount to decrement by					#
21401#	d1 = index of address register to decrement			#
21402#									#
21403# OUTPUT **************************************************************	#
21404#	(address register is updated)					#
21405#									#
21406# ALGORITHM ***********************************************************	#
21407#	Typically used for an instruction w/ a pre-decrement <ea>,	#
21408# this routine adds the decrement value in d0 to the address register	#
21409# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
21410# in their original places.						#
21411#	For a7, if the decrement amount is one, then we have to		#
21412# decrement by two. For any a7 update, set the mda7_flag so that if	#
21413# an access error exception occurs later in emulation, this address	#
21414# register update can be undone.					#
21415#									#
21416#########################################################################
21417
21418	global		dec_areg
21419dec_areg:
21420	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
21421	jmp		(tbl_dareg.b,%pc,%d1.w*1)
21422
21423tbl_dareg:
21424	short		dareg0 - tbl_dareg
21425	short		dareg1 - tbl_dareg
21426	short		dareg2 - tbl_dareg
21427	short		dareg3 - tbl_dareg
21428	short		dareg4 - tbl_dareg
21429	short		dareg5 - tbl_dareg
21430	short		dareg6 - tbl_dareg
21431	short		dareg7 - tbl_dareg
21432
21433dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
21434	rts
21435dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
21436	rts
21437dareg2:	sub.l		%d0,%a2
21438	rts
21439dareg3:	sub.l		%d0,%a3
21440	rts
21441dareg4:	sub.l		%d0,%a4
21442	rts
21443dareg5:	sub.l		%d0,%a5
21444	rts
21445dareg6:	sub.l		%d0,(%a6)
21446	rts
21447dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
21448	cmpi.b		%d0,&0x1
21449	beq.b		dareg7b
21450	sub.l		%d0,EXC_A7(%a6)
21451	rts
21452dareg7b:
21453	subq.l		&0x2,EXC_A7(%a6)
21454	rts
21455
21456##############################################################################
21457
21458#########################################################################
21459# XDEF ****************************************************************	#
21460#	load_fpn1(): load FP register value into FP_SRC(a6).		#
21461#									#
21462# XREF ****************************************************************	#
21463#	None								#
21464#									#
21465# INPUT ***************************************************************	#
21466#	d0 = index of FP register to load				#
21467#									#
21468# OUTPUT **************************************************************	#
21469#	FP_SRC(a6) = value loaded from FP register file			#
21470#									#
21471# ALGORITHM ***********************************************************	#
21472#	Using the index in d0, load FP_SRC(a6) with a number from the	#
21473# FP register file.							#
21474#									#
21475#########################################################################
21476
21477	global		load_fpn1
21478load_fpn1:
21479	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
21480	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
21481
21482tbl_load_fpn1:
21483	short		load_fpn1_0 - tbl_load_fpn1
21484	short		load_fpn1_1 - tbl_load_fpn1
21485	short		load_fpn1_2 - tbl_load_fpn1
21486	short		load_fpn1_3 - tbl_load_fpn1
21487	short		load_fpn1_4 - tbl_load_fpn1
21488	short		load_fpn1_5 - tbl_load_fpn1
21489	short		load_fpn1_6 - tbl_load_fpn1
21490	short		load_fpn1_7 - tbl_load_fpn1
21491
21492load_fpn1_0:
21493	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
21494	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
21495	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
21496	lea		FP_SRC(%a6), %a0
21497	rts
21498load_fpn1_1:
21499	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
21500	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
21501	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
21502	lea		FP_SRC(%a6), %a0
21503	rts
21504load_fpn1_2:
21505	fmovm.x		&0x20, FP_SRC(%a6)
21506	lea		FP_SRC(%a6), %a0
21507	rts
21508load_fpn1_3:
21509	fmovm.x		&0x10, FP_SRC(%a6)
21510	lea		FP_SRC(%a6), %a0
21511	rts
21512load_fpn1_4:
21513	fmovm.x		&0x08, FP_SRC(%a6)
21514	lea		FP_SRC(%a6), %a0
21515	rts
21516load_fpn1_5:
21517	fmovm.x		&0x04, FP_SRC(%a6)
21518	lea		FP_SRC(%a6), %a0
21519	rts
21520load_fpn1_6:
21521	fmovm.x		&0x02, FP_SRC(%a6)
21522	lea		FP_SRC(%a6), %a0
21523	rts
21524load_fpn1_7:
21525	fmovm.x		&0x01, FP_SRC(%a6)
21526	lea		FP_SRC(%a6), %a0
21527	rts
21528
21529#############################################################################
21530
21531#########################################################################
21532# XDEF ****************************************************************	#
21533#	load_fpn2(): load FP register value into FP_DST(a6).		#
21534#									#
21535# XREF ****************************************************************	#
21536#	None								#
21537#									#
21538# INPUT ***************************************************************	#
21539#	d0 = index of FP register to load				#
21540#									#
21541# OUTPUT **************************************************************	#
21542#	FP_DST(a6) = value loaded from FP register file			#
21543#									#
21544# ALGORITHM ***********************************************************	#
21545#	Using the index in d0, load FP_DST(a6) with a number from the	#
21546# FP register file.							#
21547#									#
21548#########################################################################
21549
21550	global		load_fpn2
21551load_fpn2:
21552	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
21553	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
21554
21555tbl_load_fpn2:
21556	short		load_fpn2_0 - tbl_load_fpn2
21557	short		load_fpn2_1 - tbl_load_fpn2
21558	short		load_fpn2_2 - tbl_load_fpn2
21559	short		load_fpn2_3 - tbl_load_fpn2
21560	short		load_fpn2_4 - tbl_load_fpn2
21561	short		load_fpn2_5 - tbl_load_fpn2
21562	short		load_fpn2_6 - tbl_load_fpn2
21563	short		load_fpn2_7 - tbl_load_fpn2
21564
21565load_fpn2_0:
21566	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
21567	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
21568	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
21569	lea		FP_DST(%a6), %a0
21570	rts
21571load_fpn2_1:
21572	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
21573	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
21574	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
21575	lea		FP_DST(%a6), %a0
21576	rts
21577load_fpn2_2:
21578	fmovm.x		&0x20, FP_DST(%a6)
21579	lea		FP_DST(%a6), %a0
21580	rts
21581load_fpn2_3:
21582	fmovm.x		&0x10, FP_DST(%a6)
21583	lea		FP_DST(%a6), %a0
21584	rts
21585load_fpn2_4:
21586	fmovm.x		&0x08, FP_DST(%a6)
21587	lea		FP_DST(%a6), %a0
21588	rts
21589load_fpn2_5:
21590	fmovm.x		&0x04, FP_DST(%a6)
21591	lea		FP_DST(%a6), %a0
21592	rts
21593load_fpn2_6:
21594	fmovm.x		&0x02, FP_DST(%a6)
21595	lea		FP_DST(%a6), %a0
21596	rts
21597load_fpn2_7:
21598	fmovm.x		&0x01, FP_DST(%a6)
21599	lea		FP_DST(%a6), %a0
21600	rts
21601
21602#############################################################################
21603
21604#########################################################################
21605# XDEF ****************************************************************	#
21606#	store_fpreg(): store an fp value to the fpreg designated d0.	#
21607#									#
21608# XREF ****************************************************************	#
21609#	None								#
21610#									#
21611# INPUT ***************************************************************	#
21612#	fp0 = extended precision value to store				#
21613#	d0  = index of floating-point register				#
21614#									#
21615# OUTPUT **************************************************************	#
21616#	None								#
21617#									#
21618# ALGORITHM ***********************************************************	#
21619#	Store the value in fp0 to the FP register designated by the	#
21620# value in d0. The FP number can be DENORM or SNAN so we have to be	#
21621# careful that we don't take an exception here.				#
21622#									#
21623#########################################################################
21624
21625	global		store_fpreg
21626store_fpreg:
21627	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
21628	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
21629
21630tbl_store_fpreg:
21631	short		store_fpreg_0 - tbl_store_fpreg
21632	short		store_fpreg_1 - tbl_store_fpreg
21633	short		store_fpreg_2 - tbl_store_fpreg
21634	short		store_fpreg_3 - tbl_store_fpreg
21635	short		store_fpreg_4 - tbl_store_fpreg
21636	short		store_fpreg_5 - tbl_store_fpreg
21637	short		store_fpreg_6 - tbl_store_fpreg
21638	short		store_fpreg_7 - tbl_store_fpreg
21639
21640store_fpreg_0:
21641	fmovm.x		&0x80, EXC_FP0(%a6)
21642	rts
21643store_fpreg_1:
21644	fmovm.x		&0x80, EXC_FP1(%a6)
21645	rts
21646store_fpreg_2:
21647	fmovm.x		&0x01, -(%sp)
21648	fmovm.x		(%sp)+, &0x20
21649	rts
21650store_fpreg_3:
21651	fmovm.x		&0x01, -(%sp)
21652	fmovm.x		(%sp)+, &0x10
21653	rts
21654store_fpreg_4:
21655	fmovm.x		&0x01, -(%sp)
21656	fmovm.x		(%sp)+, &0x08
21657	rts
21658store_fpreg_5:
21659	fmovm.x		&0x01, -(%sp)
21660	fmovm.x		(%sp)+, &0x04
21661	rts
21662store_fpreg_6:
21663	fmovm.x		&0x01, -(%sp)
21664	fmovm.x		(%sp)+, &0x02
21665	rts
21666store_fpreg_7:
21667	fmovm.x		&0x01, -(%sp)
21668	fmovm.x		(%sp)+, &0x01
21669	rts
21670
21671#########################################################################
21672# XDEF ****************************************************************	#
21673#	_denorm(): denormalize an intermediate result			#
21674#									#
21675# XREF ****************************************************************	#
21676#	None								#
21677#									#
21678# INPUT *************************************************************** #
21679#	a0 = points to the operand to be denormalized			#
21680#		(in the internal extended format)			#
21681#									#
21682#	d0 = rounding precision						#
21683#									#
21684# OUTPUT **************************************************************	#
21685#	a0 = pointer to the denormalized result				#
21686#		(in the internal extended format)			#
21687#									#
21688#	d0 = guard,round,sticky						#
21689#									#
21690# ALGORITHM ***********************************************************	#
21691#	According to the exponent underflow threshold for the given	#
21692# precision, shift the mantissa bits to the right in order raise the	#
21693# exponent of the operand to the threshold value. While shifting the	#
21694# mantissa bits right, maintain the value of the guard, round, and	#
21695# sticky bits.								#
21696# other notes:								#
21697#	(1) _denorm() is called by the underflow routines		#
21698#	(2) _denorm() does NOT affect the status register		#
21699#									#
21700#########################################################################
21701
21702#
21703# table of exponent threshold values for each precision
21704#
21705tbl_thresh:
21706	short		0x0
21707	short		sgl_thresh
21708	short		dbl_thresh
21709
21710	global		_denorm
21711_denorm:
21712#
21713# Load the exponent threshold for the precision selected and check
21714# to see if (threshold - exponent) is > 65 in which case we can
21715# simply calculate the sticky bit and zero the mantissa. otherwise
21716# we have to call the denormalization routine.
21717#
21718	lsr.b		&0x2, %d0		# shift prec to lo bits
21719	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
21720	mov.w		%d1, %d0		# copy d1 into d0
21721	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
21722	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
21723	bpl.b		denorm_set_stky		# yes; just calc sticky
21724
21725	clr.l		%d0			# clear g,r,s
21726	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
21727	beq.b		denorm_call		# no; don't change anything
21728	bset		&29, %d0		# yes; set sticky bit
21729
21730denorm_call:
21731	bsr.l		dnrm_lp			# denormalize the number
21732	rts
21733
21734#
21735# all bit would have been shifted off during the denorm so simply
21736# calculate if the sticky should be set and clear the entire mantissa.
21737#
21738denorm_set_stky:
21739	mov.l		&0x20000000, %d0	# set sticky bit in return value
21740	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
21741	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
21742	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
21743	rts
21744
21745#									#
21746# dnrm_lp(): normalize exponent/mantissa to specified threshold		#
21747#									#
21748# INPUT:								#
21749#	%a0	   : points to the operand to be denormalized		#
21750#	%d0{31:29} : initial guard,round,sticky				#
21751#	%d1{15:0}  : denormalization threshold				#
21752# OUTPUT:								#
21753#	%a0	   : points to the denormalized operand			#
21754#	%d0{31:29} : final guard,round,sticky				#
21755#									#
21756
21757# *** Local Equates *** #
21758set	GRS,		L_SCR2			# g,r,s temp storage
21759set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
21760
21761	global		dnrm_lp
21762dnrm_lp:
21763
21764#
21765# make a copy of FTEMP_LO and place the g,r,s bits directly after it
21766# in memory so as to make the bitfield extraction for denormalization easier.
21767#
21768	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
21769	mov.l		%d0, GRS(%a6)		# place g,r,s after it
21770
21771#
21772# check to see how much less than the underflow threshold the operand
21773# exponent is.
21774#
21775	mov.l		%d1, %d0		# copy the denorm threshold
21776	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
21777	ble.b		dnrm_no_lp		# d1 <= 0
21778	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
21779	blt.b		case_1			# yes
21780	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
21781	blt.b		case_2			# yes
21782	bra.w		case_3			# (d1 >= 64)
21783
21784#
21785# No normalization necessary
21786#
21787dnrm_no_lp:
21788	mov.l		GRS(%a6), %d0		# restore original g,r,s
21789	rts
21790
21791#
21792# case (0<d1<32)
21793#
21794# %d0 = denorm threshold
21795# %d1 = "n" = amt to shift
21796#
21797#	---------------------------------------------------------
21798#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21799#	---------------------------------------------------------
21800#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21801#	\	   \		      \			 \
21802#	 \	    \		       \		  \
21803#	  \	     \			\		   \
21804#	   \	      \			 \		    \
21805#	    \	       \		  \		     \
21806#	     \		\		   \		      \
21807#	      \		 \		    \		       \
21808#	       \	  \		     \			\
21809#	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
21810#	---------------------------------------------------------
21811#	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
21812#	---------------------------------------------------------
21813#
21814case_1:
21815	mov.l		%d2, -(%sp)		# create temp storage
21816
21817	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
21818	mov.l		&32, %d0
21819	sub.w		%d1, %d0		# %d0 = 32 - %d1
21820
21821	cmpi.w		%d1, &29		# is shft amt >= 29
21822	blt.b		case1_extract		# no; no fix needed
21823	mov.b		GRS(%a6), %d2
21824	or.b		%d2, 3+FTEMP_LO2(%a6)
21825
21826case1_extract:
21827	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
21828	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
21829	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
21830
21831	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
21832	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
21833
21834	bftst		%d0{&2:&30}		# were bits shifted off?
21835	beq.b		case1_sticky_clear	# no; go finish
21836	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
21837
21838case1_sticky_clear:
21839	and.l		&0xe0000000, %d0	# clear all but G,R,S
21840	mov.l		(%sp)+, %d2		# restore temp register
21841	rts
21842
21843#
21844# case (32<=d1<64)
21845#
21846# %d0 = denorm threshold
21847# %d1 = "n" = amt to shift
21848#
21849#	---------------------------------------------------------
21850#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21851#	---------------------------------------------------------
21852#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21853#	\	   \		      \
21854#	 \	    \		       \
21855#	  \	     \			-------------------
21856#	   \	      --------------------		   \
21857#	    -------------------		  \		    \
21858#			       \	   \		     \
21859#				\	    \		      \
21860#				 \	     \		       \
21861#	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
21862#	---------------------------------------------------------
21863#	|0...............0|0....0| NEW_LO     |grs		|
21864#	---------------------------------------------------------
21865#
21866case_2:
21867	mov.l		%d2, -(%sp)		# create temp storage
21868
21869	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
21870	subi.w		&0x20, %d1		# %d1 now between 0 and 32
21871	mov.l		&0x20, %d0
21872	sub.w		%d1, %d0		# %d0 = 32 - %d1
21873
21874# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
21875# the number of bits to check for the sticky detect.
21876# it only plays a role in shift amounts of 61-63.
21877	mov.b		GRS(%a6), %d2
21878	or.b		%d2, 3+FTEMP_LO2(%a6)
21879
21880	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
21881	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
21882
21883	bftst		%d1{&2:&30}		# were any bits shifted off?
21884	bne.b		case2_set_sticky	# yes; set sticky bit
21885	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
21886	bne.b		case2_set_sticky	# yes; set sticky bit
21887
21888	mov.l		%d1, %d0		# move new G,R,S to %d0
21889	bra.b		case2_end
21890
21891case2_set_sticky:
21892	mov.l		%d1, %d0		# move new G,R,S to %d0
21893	bset		&rnd_stky_bit, %d0	# set sticky bit
21894
21895case2_end:
21896	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
21897	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
21898	and.l		&0xe0000000, %d0	# clear all but G,R,S
21899
21900	mov.l		(%sp)+,%d2		# restore temp register
21901	rts
21902
21903#
21904# case (d1>=64)
21905#
21906# %d0 = denorm threshold
21907# %d1 = amt to shift
21908#
21909case_3:
21910	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
21911
21912	cmpi.w		%d1, &65		# is shift amt > 65?
21913	blt.b		case3_64		# no; it's == 64
21914	beq.b		case3_65		# no; it's == 65
21915
21916#
21917# case (d1>65)
21918#
21919# Shift value is > 65 and out of range. All bits are shifted off.
21920# Return a zero mantissa with the sticky bit set
21921#
21922	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
21923	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
21924	mov.l		&0x20000000, %d0	# set sticky bit
21925	rts
21926
21927#
21928# case (d1 == 64)
21929#
21930#	---------------------------------------------------------
21931#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21932#	---------------------------------------------------------
21933#	<-------(32)------>
21934#	\		   \
21935#	 \		    \
21936#	  \		     \
21937#	   \		      ------------------------------
21938#	    -------------------------------		    \
21939#					   \		     \
21940#					    \		      \
21941#					     \		       \
21942#					      <-------(32)------>
21943#	---------------------------------------------------------
21944#	|0...............0|0................0|grs		|
21945#	---------------------------------------------------------
21946#
21947case3_64:
21948	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
21949	mov.l		%d0, %d1		# make a copy
21950	and.l		&0xc0000000, %d0	# extract G,R
21951	and.l		&0x3fffffff, %d1	# extract other bits
21952
21953	bra.b		case3_complete
21954
21955#
21956# case (d1 == 65)
21957#
21958#	---------------------------------------------------------
21959#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21960#	---------------------------------------------------------
21961#	<-------(32)------>
21962#	\		   \
21963#	 \		    \
21964#	  \		     \
21965#	   \		      ------------------------------
21966#	    --------------------------------		    \
21967#					    \		     \
21968#					     \		      \
21969#					      \		       \
21970#					       <-------(31)----->
21971#	---------------------------------------------------------
21972#	|0...............0|0................0|0rs		|
21973#	---------------------------------------------------------
21974#
21975case3_65:
21976	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
21977	and.l		&0x80000000, %d0	# extract R bit
21978	lsr.l		&0x1, %d0		# shift high bit into R bit
21979	and.l		&0x7fffffff, %d1	# extract other bits
21980
21981case3_complete:
21982# last operation done was an "and" of the bits shifted off so the condition
21983# codes are already set so branch accordingly.
21984	bne.b		case3_set_sticky	# yes; go set new sticky
21985	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
21986	bne.b		case3_set_sticky	# yes; go set new sticky
21987	tst.b		GRS(%a6)		# were any bits shifted off?
21988	bne.b		case3_set_sticky	# yes; go set new sticky
21989
21990#
21991# no bits were shifted off so don't set the sticky bit.
21992# the guard and
21993# the entire mantissa is zero.
21994#
21995	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
21996	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
21997	rts
21998
21999#
22000# some bits were shifted off so set the sticky bit.
22001# the entire mantissa is zero.
22002#
22003case3_set_sticky:
22004	bset		&rnd_stky_bit,%d0	# set new sticky bit
22005	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
22006	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
22007	rts
22008
22009#########################################################################
22010# XDEF ****************************************************************	#
22011#	_round(): round result according to precision/mode		#
22012#									#
22013# XREF ****************************************************************	#
22014#	None								#
22015#									#
22016# INPUT ***************************************************************	#
22017#	a0	  = ptr to input operand in internal extended format	#
22018#	d1(hi)    = contains rounding precision:			#
22019#			ext = $0000xxxx					#
22020#			sgl = $0004xxxx					#
22021#			dbl = $0008xxxx					#
22022#	d1(lo)	  = contains rounding mode:				#
22023#			RN  = $xxxx0000					#
22024#			RZ  = $xxxx0001					#
22025#			RM  = $xxxx0002					#
22026#			RP  = $xxxx0003					#
22027#	d0{31:29} = contains the g,r,s bits (extended)			#
22028#									#
22029# OUTPUT **************************************************************	#
22030#	a0 = pointer to rounded result					#
22031#									#
22032# ALGORITHM ***********************************************************	#
22033#	On return the value pointed to by a0 is correctly rounded,	#
22034#	a0 is preserved and the g-r-s bits in d0 are cleared.		#
22035#	The result is not typed - the tag field is invalid.  The	#
22036#	result is still in the internal extended format.		#
22037#									#
22038#	The INEX bit of USER_FPSR will be set if the rounded result was	#
22039#	inexact (i.e. if any of the g-r-s bits were set).		#
22040#									#
22041#########################################################################
22042
22043	global		_round
22044_round:
22045#
22046# ext_grs() looks at the rounding precision and sets the appropriate
22047# G,R,S bits.
22048# If (G,R,S == 0) then result is exact and round is done, else set
22049# the inex flag in status reg and continue.
22050#
22051	bsr.l		ext_grs			# extract G,R,S
22052
22053	tst.l		%d0			# are G,R,S zero?
22054	beq.w		truncate		# yes; round is complete
22055
22056	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
22057
22058#
22059# Use rounding mode as an index into a jump table for these modes.
22060# All of the following assumes grs != 0.
22061#
22062	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
22063	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
22064
22065tbl_mode:
22066	short		rnd_near - tbl_mode
22067	short		truncate - tbl_mode	# RZ always truncates
22068	short		rnd_mnus - tbl_mode
22069	short		rnd_plus - tbl_mode
22070
22071#################################################################
22072#	ROUND PLUS INFINITY					#
22073#								#
22074#	If sign of fp number = 0 (positive), then add 1 to l.	#
22075#################################################################
22076rnd_plus:
22077	tst.b		FTEMP_SGN(%a0)		# check for sign
22078	bmi.w		truncate		# if positive then truncate
22079
22080	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
22081	swap		%d1			# set up d1 for round prec.
22082
22083	cmpi.b		%d1, &s_mode		# is prec = sgl?
22084	beq.w		add_sgl			# yes
22085	bgt.w		add_dbl			# no; it's dbl
22086	bra.w		add_ext			# no; it's ext
22087
22088#################################################################
22089#	ROUND MINUS INFINITY					#
22090#								#
22091#	If sign of fp number = 1 (negative), then add 1 to l.	#
22092#################################################################
22093rnd_mnus:
22094	tst.b		FTEMP_SGN(%a0)		# check for sign
22095	bpl.w		truncate		# if negative then truncate
22096
22097	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
22098	swap		%d1			# set up d1 for round prec.
22099
22100	cmpi.b		%d1, &s_mode		# is prec = sgl?
22101	beq.w		add_sgl			# yes
22102	bgt.w		add_dbl			# no; it's dbl
22103	bra.w		add_ext			# no; it's ext
22104
22105#################################################################
22106#	ROUND NEAREST						#
22107#								#
22108#	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
22109#	Note that this will round to even in case of a tie.	#
22110#################################################################
22111rnd_near:
22112	asl.l		&0x1, %d0		# shift g-bit to c-bit
22113	bcc.w		truncate		# if (g=1) then
22114
22115	swap		%d1			# set up d1 for round prec.
22116
22117	cmpi.b		%d1, &s_mode		# is prec = sgl?
22118	beq.w		add_sgl			# yes
22119	bgt.w		add_dbl			# no; it's dbl
22120	bra.w		add_ext			# no; it's ext
22121
22122# *** LOCAL EQUATES ***
22123set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
22124set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
22125
22126#########################
22127#	ADD SINGLE	#
22128#########################
22129add_sgl:
22130	add.l		&ad_1_sgl, FTEMP_HI(%a0)
22131	bcc.b		scc_clr			# no mantissa overflow
22132	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
22133	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
22134	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
22135scc_clr:
22136	tst.l		%d0			# test for rs = 0
22137	bne.b		sgl_done
22138	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
22139sgl_done:
22140	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
22141	clr.l		FTEMP_LO(%a0)		# clear d2
22142	rts
22143
22144#########################
22145#	ADD EXTENDED	#
22146#########################
22147add_ext:
22148	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
22149	bcc.b		xcc_clr			# test for carry out
22150	addq.l		&1,FTEMP_HI(%a0)	# propagate carry
22151	bcc.b		xcc_clr
22152	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
22153	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
22154	roxr.w		FTEMP_LO(%a0)
22155	roxr.w		FTEMP_LO+2(%a0)
22156	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
22157xcc_clr:
22158	tst.l		%d0			# test rs = 0
22159	bne.b		add_ext_done
22160	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
22161add_ext_done:
22162	rts
22163
22164#########################
22165#	ADD DOUBLE	#
22166#########################
22167add_dbl:
22168	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
22169	bcc.b		dcc_clr			# no carry
22170	addq.l		&0x1, FTEMP_HI(%a0)	# propagate carry
22171	bcc.b		dcc_clr			# no carry
22172
22173	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
22174	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
22175	roxr.w		FTEMP_LO(%a0)
22176	roxr.w		FTEMP_LO+2(%a0)
22177	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
22178dcc_clr:
22179	tst.l		%d0			# test for rs = 0
22180	bne.b		dbl_done
22181	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
22182
22183dbl_done:
22184	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
22185	rts
22186
22187###########################
22188# Truncate all other bits #
22189###########################
22190truncate:
22191	swap		%d1			# select rnd prec
22192
22193	cmpi.b		%d1, &s_mode		# is prec sgl?
22194	beq.w		sgl_done		# yes
22195	bgt.b		dbl_done		# no; it's dbl
22196	rts					# no; it's ext
22197
22198
22199#
22200# ext_grs(): extract guard, round and sticky bits according to
22201#	     rounding precision.
22202#
22203# INPUT
22204#	d0	   = extended precision g,r,s (in d0{31:29})
22205#	d1	   = {PREC,ROUND}
22206# OUTPUT
22207#	d0{31:29}  = guard, round, sticky
22208#
22209# The ext_grs extract the guard/round/sticky bits according to the
22210# selected rounding precision. It is called by the round subroutine
22211# only.  All registers except d0 are kept intact. d0 becomes an
22212# updated guard,round,sticky in d0{31:29}
22213#
22214# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
22215#	 prior to usage, and needs to restore d1 to original. this
22216#	 routine is tightly tied to the round routine and not meant to
22217#	 uphold standard subroutine calling practices.
22218#
22219
22220ext_grs:
22221	swap		%d1			# have d1.w point to round precision
22222	tst.b		%d1			# is rnd prec = extended?
22223	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
22224
22225#
22226# %d0 actually already hold g,r,s since _round() had it before calling
22227# this function. so, as long as we don't disturb it, we are "returning" it.
22228#
22229ext_grs_ext:
22230	swap		%d1			# yes; return to correct positions
22231	rts
22232
22233ext_grs_not_ext:
22234	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
22235
22236	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
22237	bne.b		ext_grs_dbl		# no; go handle dbl
22238
22239#
22240# sgl:
22241#	96		64	  40	32		0
22242#	-----------------------------------------------------
22243#	| EXP	|XXXXXXX|	  |xx	|		|grs|
22244#	-----------------------------------------------------
22245#			<--(24)--->nn\			   /
22246#				   ee ---------------------
22247#				   ww		|
22248#						v
22249#				   gr	   new sticky
22250#
22251ext_grs_sgl:
22252	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
22253	mov.l		&30, %d2		# of the sgl prec. limits
22254	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
22255	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
22256	and.l		&0x0000003f, %d2	# s bit is the or of all other
22257	bne.b		ext_grs_st_stky		# bits to the right of g-r
22258	tst.l		FTEMP_LO(%a0)		# test lower mantissa
22259	bne.b		ext_grs_st_stky		# if any are set, set sticky
22260	tst.l		%d0			# test original g,r,s
22261	bne.b		ext_grs_st_stky		# if any are set, set sticky
22262	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
22263
22264#
22265# dbl:
22266#	96		64		32	 11	0
22267#	-----------------------------------------------------
22268#	| EXP	|XXXXXXX|		|	 |xx	|grs|
22269#	-----------------------------------------------------
22270#						  nn\	    /
22271#						  ee -------
22272#						  ww	|
22273#							v
22274#						  gr	new sticky
22275#
22276ext_grs_dbl:
22277	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
22278	mov.l		&30, %d2		# of the dbl prec. limits
22279	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
22280	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
22281	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
22282	bne.b		ext_grs_st_stky		# other bits to the right of g-r
22283	tst.l		%d0			# test word original g,r,s
22284	bne.b		ext_grs_st_stky		# if any are set, set sticky
22285	bra.b		ext_grs_end_sd		# if clear, exit
22286
22287ext_grs_st_stky:
22288	bset		&rnd_stky_bit, %d3	# set sticky bit
22289ext_grs_end_sd:
22290	mov.l		%d3, %d0		# return grs to d0
22291
22292	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
22293
22294	swap		%d1			# restore d1 to original
22295	rts
22296
22297#########################################################################
22298# norm(): normalize the mantissa of an extended precision input. the	#
22299#	  input operand should not be normalized already.		#
22300#									#
22301# XDEF ****************************************************************	#
22302#	norm()								#
22303#									#
22304# XREF **************************************************************** #
22305#	none								#
22306#									#
22307# INPUT *************************************************************** #
22308#	a0 = pointer fp extended precision operand to normalize		#
22309#									#
22310# OUTPUT ************************************************************** #
22311#	d0 = number of bit positions the mantissa was shifted		#
22312#	a0 = the input operand's mantissa is normalized; the exponent	#
22313#	     is unchanged.						#
22314#									#
22315#########################################################################
22316	global		norm
22317norm:
22318	mov.l		%d2, -(%sp)		# create some temp regs
22319	mov.l		%d3, -(%sp)
22320
22321	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
22322	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
22323
22324	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
22325	beq.b		norm_lo			# hi(man) is all zeroes!
22326
22327norm_hi:
22328	lsl.l		%d2, %d0		# left shift hi(man)
22329	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
22330
22331	or.l		%d3, %d0		# create hi(man)
22332	lsl.l		%d2, %d1		# create lo(man)
22333
22334	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
22335	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
22336
22337	mov.l		%d2, %d0		# return shift amount
22338
22339	mov.l		(%sp)+, %d3		# restore temp regs
22340	mov.l		(%sp)+, %d2
22341
22342	rts
22343
22344norm_lo:
22345	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
22346	lsl.l		%d2, %d1		# shift lo(man)
22347	add.l		&32, %d2		# add 32 to shft amount
22348
22349	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
22350	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
22351
22352	mov.l		%d2, %d0		# return shift amount
22353
22354	mov.l		(%sp)+, %d3		# restore temp regs
22355	mov.l		(%sp)+, %d2
22356
22357	rts
22358
22359#########################################################################
22360# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
22361#		- returns corresponding optype tag			#
22362#									#
22363# XDEF ****************************************************************	#
22364#	unnorm_fix()							#
22365#									#
22366# XREF **************************************************************** #
22367#	norm() - normalize the mantissa					#
22368#									#
22369# INPUT *************************************************************** #
22370#	a0 = pointer to unnormalized extended precision number		#
22371#									#
22372# OUTPUT ************************************************************** #
22373#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
22374#	a0 = input operand has been converted to a norm, denorm, or	#
22375#	     zero; both the exponent and mantissa are changed.		#
22376#									#
22377#########################################################################
22378
22379	global		unnorm_fix
22380unnorm_fix:
22381	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
22382	bne.b		unnorm_shift		# hi(man) is not all zeroes
22383
22384#
22385# hi(man) is all zeroes so see if any bits in lo(man) are set
22386#
22387unnorm_chk_lo:
22388	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
22389	beq.w		unnorm_zero		# yes
22390
22391	add.w		&32, %d0		# no; fix shift distance
22392
22393#
22394# d0 = # shifts needed for complete normalization
22395#
22396unnorm_shift:
22397	clr.l		%d1			# clear top word
22398	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
22399	and.w		&0x7fff, %d1		# strip off sgn
22400
22401	cmp.w		%d0, %d1		# will denorm push exp < 0?
22402	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
22403
22404#
22405# exponent would not go < 0. Therefore, number stays normalized
22406#
22407	sub.w		%d0, %d1		# shift exponent value
22408	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
22409	and.w		&0x8000, %d0		# save old sign
22410	or.w		%d0, %d1		# {sgn,new exp}
22411	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
22412
22413	bsr.l		norm			# normalize UNNORM
22414
22415	mov.b		&NORM, %d0		# return new optype tag
22416	rts
22417
22418#
22419# exponent would go < 0, so only denormalize until exp = 0
22420#
22421unnorm_nrm_zero:
22422	cmp.b		%d1, &32		# is exp <= 32?
22423	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
22424
22425	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
22426	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
22427
22428	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
22429	lsl.l		%d1, %d0		# extract new lo(man)
22430	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
22431
22432	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
22433
22434	mov.b		&DENORM, %d0		# return new optype tag
22435	rts
22436
22437#
22438# only mantissa bits set are in lo(man)
22439#
22440unnorm_nrm_zero_lrg:
22441	sub.w		&32, %d1		# adjust shft amt by 32
22442
22443	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
22444	lsl.l		%d1, %d0		# left shift lo(man)
22445
22446	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
22447	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
22448
22449	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
22450
22451	mov.b		&DENORM, %d0		# return new optype tag
22452	rts
22453
22454#
22455# whole mantissa is zero so this UNNORM is actually a zero
22456#
22457unnorm_zero:
22458	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
22459
22460	mov.b		&ZERO, %d0		# fix optype tag
22461	rts
22462
22463#########################################################################
22464# XDEF ****************************************************************	#
22465#	set_tag_x(): return the optype of the input ext fp number	#
22466#									#
22467# XREF ****************************************************************	#
22468#	None								#
22469#									#
22470# INPUT ***************************************************************	#
22471#	a0 = pointer to extended precision operand			#
22472#									#
22473# OUTPUT **************************************************************	#
22474#	d0 = value of type tag						#
22475#		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
22476#									#
22477# ALGORITHM ***********************************************************	#
22478#	Simply test the exponent, j-bit, and mantissa values to		#
22479# determine the type of operand.					#
22480#	If it's an unnormalized zero, alter the operand and force it	#
22481# to be a normal zero.							#
22482#									#
22483#########################################################################
22484
22485	global		set_tag_x
22486set_tag_x:
22487	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
22488	andi.w		&0x7fff, %d0		# strip off sign
22489	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
22490	beq.b		inf_or_nan_x
22491not_inf_or_nan_x:
22492	btst		&0x7,FTEMP_HI(%a0)
22493	beq.b		not_norm_x
22494is_norm_x:
22495	mov.b		&NORM, %d0
22496	rts
22497not_norm_x:
22498	tst.w		%d0			# is exponent = 0?
22499	bne.b		is_unnorm_x
22500not_unnorm_x:
22501	tst.l		FTEMP_HI(%a0)
22502	bne.b		is_denorm_x
22503	tst.l		FTEMP_LO(%a0)
22504	bne.b		is_denorm_x
22505is_zero_x:
22506	mov.b		&ZERO, %d0
22507	rts
22508is_denorm_x:
22509	mov.b		&DENORM, %d0
22510	rts
22511# must distinguish now "Unnormalized zeroes" which we
22512# must convert to zero.
22513is_unnorm_x:
22514	tst.l		FTEMP_HI(%a0)
22515	bne.b		is_unnorm_reg_x
22516	tst.l		FTEMP_LO(%a0)
22517	bne.b		is_unnorm_reg_x
22518# it's an "unnormalized zero". let's convert it to an actual zero...
22519	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
22520	mov.b		&ZERO, %d0
22521	rts
22522is_unnorm_reg_x:
22523	mov.b		&UNNORM, %d0
22524	rts
22525inf_or_nan_x:
22526	tst.l		FTEMP_LO(%a0)
22527	bne.b		is_nan_x
22528	mov.l		FTEMP_HI(%a0), %d0
22529	and.l		&0x7fffffff, %d0	# msb is a don't care!
22530	bne.b		is_nan_x
22531is_inf_x:
22532	mov.b		&INF, %d0
22533	rts
22534is_nan_x:
22535	btst		&0x6, FTEMP_HI(%a0)
22536	beq.b		is_snan_x
22537	mov.b		&QNAN, %d0
22538	rts
22539is_snan_x:
22540	mov.b		&SNAN, %d0
22541	rts
22542
22543#########################################################################
22544# XDEF ****************************************************************	#
22545#	set_tag_d(): return the optype of the input dbl fp number	#
22546#									#
22547# XREF ****************************************************************	#
22548#	None								#
22549#									#
22550# INPUT ***************************************************************	#
22551#	a0 = points to double precision operand				#
22552#									#
22553# OUTPUT **************************************************************	#
22554#	d0 = value of type tag						#
22555#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
22556#									#
22557# ALGORITHM ***********************************************************	#
22558#	Simply test the exponent, j-bit, and mantissa values to		#
22559# determine the type of operand.					#
22560#									#
22561#########################################################################
22562
22563	global		set_tag_d
22564set_tag_d:
22565	mov.l		FTEMP(%a0), %d0
22566	mov.l		%d0, %d1
22567
22568	andi.l		&0x7ff00000, %d0
22569	beq.b		zero_or_denorm_d
22570
22571	cmpi.l		%d0, &0x7ff00000
22572	beq.b		inf_or_nan_d
22573
22574is_norm_d:
22575	mov.b		&NORM, %d0
22576	rts
22577zero_or_denorm_d:
22578	and.l		&0x000fffff, %d1
22579	bne		is_denorm_d
22580	tst.l		4+FTEMP(%a0)
22581	bne		is_denorm_d
22582is_zero_d:
22583	mov.b		&ZERO, %d0
22584	rts
22585is_denorm_d:
22586	mov.b		&DENORM, %d0
22587	rts
22588inf_or_nan_d:
22589	and.l		&0x000fffff, %d1
22590	bne		is_nan_d
22591	tst.l		4+FTEMP(%a0)
22592	bne		is_nan_d
22593is_inf_d:
22594	mov.b		&INF, %d0
22595	rts
22596is_nan_d:
22597	btst		&19, %d1
22598	bne		is_qnan_d
22599is_snan_d:
22600	mov.b		&SNAN, %d0
22601	rts
22602is_qnan_d:
22603	mov.b		&QNAN, %d0
22604	rts
22605
22606#########################################################################
22607# XDEF ****************************************************************	#
22608#	set_tag_s(): return the optype of the input sgl fp number	#
22609#									#
22610# XREF ****************************************************************	#
22611#	None								#
22612#									#
22613# INPUT ***************************************************************	#
22614#	a0 = pointer to single precision operand			#
22615#									#
22616# OUTPUT **************************************************************	#
22617#	d0 = value of type tag						#
22618#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
22619#									#
22620# ALGORITHM ***********************************************************	#
22621#	Simply test the exponent, j-bit, and mantissa values to		#
22622# determine the type of operand.					#
22623#									#
22624#########################################################################
22625
22626	global		set_tag_s
22627set_tag_s:
22628	mov.l		FTEMP(%a0), %d0
22629	mov.l		%d0, %d1
22630
22631	andi.l		&0x7f800000, %d0
22632	beq.b		zero_or_denorm_s
22633
22634	cmpi.l		%d0, &0x7f800000
22635	beq.b		inf_or_nan_s
22636
22637is_norm_s:
22638	mov.b		&NORM, %d0
22639	rts
22640zero_or_denorm_s:
22641	and.l		&0x007fffff, %d1
22642	bne		is_denorm_s
22643is_zero_s:
22644	mov.b		&ZERO, %d0
22645	rts
22646is_denorm_s:
22647	mov.b		&DENORM, %d0
22648	rts
22649inf_or_nan_s:
22650	and.l		&0x007fffff, %d1
22651	bne		is_nan_s
22652is_inf_s:
22653	mov.b		&INF, %d0
22654	rts
22655is_nan_s:
22656	btst		&22, %d1
22657	bne		is_qnan_s
22658is_snan_s:
22659	mov.b		&SNAN, %d0
22660	rts
22661is_qnan_s:
22662	mov.b		&QNAN, %d0
22663	rts
22664
22665#########################################################################
22666# XDEF ****************************************************************	#
22667#	unf_res(): routine to produce default underflow result of a	#
22668#		   scaled extended precision number; this is used by	#
22669#		   fadd/fdiv/fmul/etc. emulation routines.		#
22670#	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
22671#		    single round prec and extended prec mode.		#
22672#									#
22673# XREF ****************************************************************	#
22674#	_denorm() - denormalize according to scale factor		#
22675#	_round() - round denormalized number according to rnd prec	#
22676#									#
22677# INPUT ***************************************************************	#
22678#	a0 = pointer to extended precison operand			#
22679#	d0 = scale factor						#
22680#	d1 = rounding precision/mode					#
22681#									#
22682# OUTPUT **************************************************************	#
22683#	a0 = pointer to default underflow result in extended precision	#
22684#	d0.b = result FPSR_cc which caller may or may not want to save	#
22685#									#
22686# ALGORITHM ***********************************************************	#
22687#	Convert the input operand to "internal format" which means the	#
22688# exponent is extended to 16 bits and the sign is stored in the unused	#
22689# portion of the extended precison operand. Denormalize the number	#
22690# according to the scale factor passed in d0. Then, round the		#
22691# denormalized result.							#
22692#	Set the FPSR_exc bits as appropriate but return the cc bits in	#
22693# d0 in case the caller doesn't want to save them (as is the case for	#
22694# fmove out).								#
22695#	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
22696# precision and the rounding mode to single.				#
22697#									#
22698#########################################################################
22699	global		unf_res
22700unf_res:
22701	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
22702
22703	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
22704	sne		FTEMP_SGN(%a0)
22705
22706	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
22707	and.w		&0x7fff, %d1
22708	sub.w		%d0, %d1
22709	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
22710
22711	mov.l		%a0, -(%sp)		# save operand ptr during calls
22712
22713	mov.l		0x4(%sp),%d0		# pass rnd prec.
22714	andi.w		&0x00c0,%d0
22715	lsr.w		&0x4,%d0
22716	bsr.l		_denorm			# denorm result
22717
22718	mov.l		(%sp),%a0
22719	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
22720	andi.w		&0xc0,%d1		# extract rnd prec
22721	lsr.w		&0x4,%d1
22722	swap		%d1
22723	mov.w		0x6(%sp),%d1
22724	andi.w		&0x30,%d1
22725	lsr.w		&0x4,%d1
22726	bsr.l		_round			# round the denorm
22727
22728	mov.l		(%sp)+, %a0
22729
22730# result is now rounded properly. convert back to normal format
22731	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
22732	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
22733	beq.b		unf_res_chkifzero	# no; result is positive
22734	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
22735	clr.b		FTEMP_SGN(%a0)		# clear temp sign
22736
22737# the number may have become zero after rounding. set ccodes accordingly.
22738unf_res_chkifzero:
22739	clr.l		%d0
22740	tst.l		FTEMP_HI(%a0)		# is value now a zero?
22741	bne.b		unf_res_cont		# no
22742	tst.l		FTEMP_LO(%a0)
22743	bne.b		unf_res_cont		# no
22744#	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
22745	bset		&z_bit, %d0		# yes; set zero ccode bit
22746
22747unf_res_cont:
22748
22749#
22750# can inex1 also be set along with unfl and inex2???
22751#
22752# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22753#
22754	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
22755	beq.b		unf_res_end		# no
22756	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
22757
22758unf_res_end:
22759	add.l		&0x4, %sp		# clear stack
22760	rts
22761
22762# unf_res() for fsglmul() and fsgldiv().
22763	global		unf_res4
22764unf_res4:
22765	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
22766
22767	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
22768	sne		FTEMP_SGN(%a0)
22769
22770	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
22771	and.w		&0x7fff,%d1
22772	sub.w		%d0,%d1
22773	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
22774
22775	mov.l		%a0,-(%sp)		# save operand ptr during calls
22776
22777	clr.l		%d0			# force rnd prec = ext
22778	bsr.l		_denorm			# denorm result
22779
22780	mov.l		(%sp),%a0
22781	mov.w		&s_mode,%d1		# force rnd prec = sgl
22782	swap		%d1
22783	mov.w		0x6(%sp),%d1		# load rnd mode
22784	andi.w		&0x30,%d1		# extract rnd prec
22785	lsr.w		&0x4,%d1
22786	bsr.l		_round			# round the denorm
22787
22788	mov.l		(%sp)+,%a0
22789
22790# result is now rounded properly. convert back to normal format
22791	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
22792	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
22793	beq.b		unf_res4_chkifzero	# no; result is positive
22794	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
22795	clr.b		FTEMP_SGN(%a0)		# clear temp sign
22796
22797# the number may have become zero after rounding. set ccodes accordingly.
22798unf_res4_chkifzero:
22799	clr.l		%d0
22800	tst.l		FTEMP_HI(%a0)		# is value now a zero?
22801	bne.b		unf_res4_cont		# no
22802	tst.l		FTEMP_LO(%a0)
22803	bne.b		unf_res4_cont		# no
22804#	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
22805	bset		&z_bit,%d0		# yes; set zero ccode bit
22806
22807unf_res4_cont:
22808
22809#
22810# can inex1 also be set along with unfl and inex2???
22811#
22812# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22813#
22814	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
22815	beq.b		unf_res4_end		# no
22816	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
22817
22818unf_res4_end:
22819	add.l		&0x4,%sp		# clear stack
22820	rts
22821
22822#########################################################################
22823# XDEF ****************************************************************	#
22824#	ovf_res(): routine to produce the default overflow result of	#
22825#		   an overflowing number.				#
22826#	ovf_res2(): same as above but the rnd mode/prec are passed	#
22827#		    differently.					#
22828#									#
22829# XREF ****************************************************************	#
22830#	none								#
22831#									#
22832# INPUT ***************************************************************	#
22833#	d1.b	= '-1' => (-); '0' => (+)				#
22834#   ovf_res():								#
22835#	d0	= rnd mode/prec						#
22836#   ovf_res2():								#
22837#	hi(d0)	= rnd prec						#
22838#	lo(d0)	= rnd mode						#
22839#									#
22840# OUTPUT **************************************************************	#
22841#	a0	= points to extended precision result			#
22842#	d0.b	= condition code bits					#
22843#									#
22844# ALGORITHM ***********************************************************	#
22845#	The default overflow result can be determined by the sign of	#
22846# the result and the rounding mode/prec in effect. These bits are	#
22847# concatenated together to create an index into the default result	#
22848# table. A pointer to the correct result is returned in a0. The		#
22849# resulting condition codes are returned in d0 in case the caller	#
22850# doesn't want FPSR_cc altered (as is the case for fmove out).		#
22851#									#
22852#########################################################################
22853
22854	global		ovf_res
22855ovf_res:
22856	andi.w		&0x10,%d1		# keep result sign
22857	lsr.b		&0x4,%d0		# shift prec/mode
22858	or.b		%d0,%d1			# concat the two
22859	mov.w		%d1,%d0			# make a copy
22860	lsl.b		&0x1,%d1		# multiply d1 by 2
22861	bra.b		ovf_res_load
22862
22863	global		ovf_res2
22864ovf_res2:
22865	and.w		&0x10, %d1		# keep result sign
22866	or.b		%d0, %d1		# insert rnd mode
22867	swap		%d0
22868	or.b		%d0, %d1		# insert rnd prec
22869	mov.w		%d1, %d0		# make a copy
22870	lsl.b		&0x1, %d1		# shift left by 1
22871
22872#
22873# use the rounding mode, precision, and result sign as in index into the
22874# two tables below to fetch the default result and the result ccodes.
22875#
22876ovf_res_load:
22877	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
22878	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
22879
22880	rts
22881
22882tbl_ovfl_cc:
22883	byte		0x2, 0x0, 0x0, 0x2
22884	byte		0x2, 0x0, 0x0, 0x2
22885	byte		0x2, 0x0, 0x0, 0x2
22886	byte		0x0, 0x0, 0x0, 0x0
22887	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22888	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22889	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22890
22891tbl_ovfl_result:
22892	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22893	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
22894	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
22895	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22896
22897	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22898	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
22899	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
22900	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22901
22902	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22903	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
22904	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
22905	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22906
22907	long		0x00000000,0x00000000,0x00000000,0x00000000
22908	long		0x00000000,0x00000000,0x00000000,0x00000000
22909	long		0x00000000,0x00000000,0x00000000,0x00000000
22910	long		0x00000000,0x00000000,0x00000000,0x00000000
22911
22912	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22913	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
22914	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22915	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
22916
22917	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22918	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
22919	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22920	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
22921
22922	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22923	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
22924	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22925	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
22926
22927#########################################################################
22928# XDEF ****************************************************************	#
22929#	get_packed(): fetch a packed operand from memory and then	#
22930#		      convert it to a floating-point binary number.	#
22931#									#
22932# XREF ****************************************************************	#
22933#	_dcalc_ea() - calculate the correct <ea>			#
22934#	_mem_read() - fetch the packed operand from memory		#
22935#	facc_in_x() - the fetch failed so jump to special exit code	#
22936#	decbin()    - convert packed to binary extended precision	#
22937#									#
22938# INPUT ***************************************************************	#
22939#	None								#
22940#									#
22941# OUTPUT **************************************************************	#
22942#	If no failure on _mem_read():					#
22943#	FP_SRC(a6) = packed operand now as a binary FP number		#
22944#									#
22945# ALGORITHM ***********************************************************	#
22946#	Get the correct <ea> which is the value on the exception stack	#
22947# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
22948# Then, fetch the operand from memory. If the fetch fails, exit		#
22949# through facc_in_x().							#
22950#	If the packed operand is a ZERO,NAN, or INF, convert it to	#
22951# its binary representation here. Else, call decbin() which will	#
22952# convert the packed value to an extended precision binary value.	#
22953#									#
22954#########################################################################
22955
22956# the stacked <ea> for packed is correct except for -(An).
22957# the base reg must be updated for both -(An) and (An)+.
22958	global		get_packed
22959get_packed:
22960	mov.l		&0xc,%d0		# packed is 12 bytes
22961	bsr.l		_dcalc_ea		# fetch <ea>; correct An
22962
22963	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
22964	mov.l		&0xc,%d0		# pass: 12 bytes
22965	bsr.l		_dmem_read		# read packed operand
22966
22967	tst.l		%d1			# did dfetch fail?
22968	bne.l		facc_in_x		# yes
22969
22970# The packed operand is an INF or a NAN if the exponent field is all ones.
22971	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
22972	cmpi.w		%d0,&0x7fff		# INF or NAN?
22973	bne.b		gp_try_zero		# no
22974	rts					# operand is an INF or NAN
22975
22976# The packed operand is a zero if the mantissa is all zero, else it's
22977# a normal packed op.
22978gp_try_zero:
22979	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
22980	andi.b		&0x0f,%d0		# clear all but last nybble
22981	bne.b		gp_not_spec		# not a zero
22982	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
22983	bne.b		gp_not_spec		# not a zero
22984	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
22985	bne.b		gp_not_spec		# not a zero
22986	rts					# operand is a ZERO
22987gp_not_spec:
22988	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
22989	bsr.l		decbin			# convert to extended
22990	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
22991	rts
22992
22993#########################################################################
22994# decbin(): Converts normalized packed bcd value pointed to by register	#
22995#	    a0 to extended-precision value in fp0.			#
22996#									#
22997# INPUT ***************************************************************	#
22998#	a0 = pointer to normalized packed bcd value			#
22999#									#
23000# OUTPUT **************************************************************	#
23001#	fp0 = exact fp representation of the packed bcd value.		#
23002#									#
23003# ALGORITHM ***********************************************************	#
23004#	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
23005#	and NaN operands are dispatched without entering this routine)	#
23006#	value in 68881/882 format at location (a0).			#
23007#									#
23008#	A1. Convert the bcd exponent to binary by successive adds and	#
23009#	muls. Set the sign according to SE. Subtract 16 to compensate	#
23010#	for the mantissa which is to be interpreted as 17 integer	#
23011#	digits, rather than 1 integer and 16 fraction digits.		#
23012#	Note: this operation can never overflow.			#
23013#									#
23014#	A2. Convert the bcd mantissa to binary by successive		#
23015#	adds and muls in FP0. Set the sign according to SM.		#
23016#	The mantissa digits will be converted with the decimal point	#
23017#	assumed following the least-significant digit.			#
23018#	Note: this operation can never overflow.			#
23019#									#
23020#	A3. Count the number of leading/trailing zeros in the		#
23021#	bcd string.  If SE is positive, count the leading zeros;	#
23022#	if negative, count the trailing zeros.  Set the adjusted	#
23023#	exponent equal to the exponent from A1 and the zero count	#
23024#	added if SM = 1 and subtracted if SM = 0.  Scale the		#
23025#	mantissa the equivalent of forcing in the bcd value:		#
23026#									#
23027#	SM = 0	a non-zero digit in the integer position		#
23028#	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
23029#									#
23030#	this will insure that any value, regardless of its		#
23031#	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
23032#	consistently.							#
23033#									#
23034#	A4. Calculate the factor 10^exp in FP1 using a table of		#
23035#	10^(2^n) values.  To reduce the error in forming factors	#
23036#	greater than 10^27, a directed rounding scheme is used with	#
23037#	tables rounded to RN, RM, and RP, according to the table	#
23038#	in the comments of the pwrten section.				#
23039#									#
23040#	A5. Form the final binary number by scaling the mantissa by	#
23041#	the exponent factor.  This is done by multiplying the		#
23042#	mantissa in FP0 by the factor in FP1 if the adjusted		#
23043#	exponent sign is positive, and dividing FP0 by FP1 if		#
23044#	it is negative.							#
23045#									#
23046#	Clean up and return. Check if the final mul or div was inexact.	#
23047#	If so, set INEX1 in USER_FPSR.					#
23048#									#
23049#########################################################################
23050
23051#
23052#	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
23053#	to nearest, minus, and plus, respectively.  The tables include
23054#	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
23055#	is required until the power is greater than 27, however, all
23056#	tables include the first 5 for ease of indexing.
23057#
23058RTABLE:
23059	byte		0,0,0,0
23060	byte		2,3,2,3
23061	byte		2,3,3,2
23062	byte		3,2,2,3
23063
23064	set		FNIBS,7
23065	set		FSTRT,0
23066
23067	set		ESTRT,4
23068	set		EDIGITS,2
23069
23070	global		decbin
23071decbin:
23072	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
23073	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
23074	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
23075
23076	lea		FP_SCR0(%a6),%a0
23077
23078	movm.l		&0x3c00,-(%sp)		# save d2-d5
23079	fmovm.x		&0x1,-(%sp)		# save fp1
23080#
23081# Calculate exponent:
23082#  1. Copy bcd value in memory for use as a working copy.
23083#  2. Calculate absolute value of exponent in d1 by mul and add.
23084#  3. Correct for exponent sign.
23085#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
23086#     (i.e., all digits assumed left of the decimal point.)
23087#
23088# Register usage:
23089#
23090#  calc_e:
23091#	(*)  d0: temp digit storage
23092#	(*)  d1: accumulator for binary exponent
23093#	(*)  d2: digit count
23094#	(*)  d3: offset pointer
23095#	( )  d4: first word of bcd
23096#	( )  a0: pointer to working bcd value
23097#	( )  a6: pointer to original bcd value
23098#	(*)  FP_SCR1: working copy of original bcd value
23099#	(*)  L_SCR1: copy of original exponent word
23100#
23101calc_e:
23102	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
23103	mov.l		&ESTRT,%d3		# counter to pick up digits
23104	mov.l		(%a0),%d4		# get first word of bcd
23105	clr.l		%d1			# zero d1 for accumulator
23106e_gd:
23107	mulu.l		&0xa,%d1		# mul partial product by one digit place
23108	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
23109	add.l		%d0,%d1			# d1 = d1 + d0
23110	addq.b		&4,%d3			# advance d3 to the next digit
23111	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
23112	btst		&30,%d4			# get SE
23113	beq.b		e_pos			# don't negate if pos
23114	neg.l		%d1			# negate before subtracting
23115e_pos:
23116	sub.l		&16,%d1			# sub to compensate for shift of mant
23117	bge.b		e_save			# if still pos, do not neg
23118	neg.l		%d1			# now negative, make pos and set SE
23119	or.l		&0x40000000,%d4		# set SE in d4,
23120	or.l		&0x40000000,(%a0)	# and in working bcd
23121e_save:
23122	mov.l		%d1,-(%sp)		# save exp on stack
23123#
23124#
23125# Calculate mantissa:
23126#  1. Calculate absolute value of mantissa in fp0 by mul and add.
23127#  2. Correct for mantissa sign.
23128#     (i.e., all digits assumed left of the decimal point.)
23129#
23130# Register usage:
23131#
23132#  calc_m:
23133#	(*)  d0: temp digit storage
23134#	(*)  d1: lword counter
23135#	(*)  d2: digit count
23136#	(*)  d3: offset pointer
23137#	( )  d4: words 2 and 3 of bcd
23138#	( )  a0: pointer to working bcd value
23139#	( )  a6: pointer to original bcd value
23140#	(*) fp0: mantissa accumulator
23141#	( )  FP_SCR1: working copy of original bcd value
23142#	( )  L_SCR1: copy of original exponent word
23143#
23144calc_m:
23145	mov.l		&1,%d1			# word counter, init to 1
23146	fmov.s		&0x00000000,%fp0	# accumulator
23147#
23148#
23149#  Since the packed number has a long word between the first & second parts,
23150#  get the integer digit then skip down & get the rest of the
23151#  mantissa.  We will unroll the loop once.
23152#
23153	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
23154	fadd.b		%d0,%fp0		# add digit to sum in fp0
23155#
23156#
23157#  Get the rest of the mantissa.
23158#
23159loadlw:
23160	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
23161	mov.l		&FSTRT,%d3		# counter to pick up digits
23162	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
23163md2b:
23164	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
23165	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
23166	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
23167#
23168#
23169#  If all the digits (8) in that long word have been converted (d2=0),
23170#  then inc d1 (=2) to point to the next long word and reset d3 to 0
23171#  to initialize the digit offset, and set d2 to 7 for the digit count;
23172#  else continue with this long word.
23173#
23174	addq.b		&4,%d3			# advance d3 to the next digit
23175	dbf.w		%d2,md2b		# check for last digit in this lw
23176nextlw:
23177	addq.l		&1,%d1			# inc lw pointer in mantissa
23178	cmp.l		%d1,&2			# test for last lw
23179	ble.b		loadlw			# if not, get last one
23180#
23181#  Check the sign of the mant and make the value in fp0 the same sign.
23182#
23183m_sign:
23184	btst		&31,(%a0)		# test sign of the mantissa
23185	beq.b		ap_st_z			# if clear, go to append/strip zeros
23186	fneg.x		%fp0			# if set, negate fp0
23187#
23188# Append/strip zeros:
23189#
23190#  For adjusted exponents which have an absolute value greater than 27*,
23191#  this routine calculates the amount needed to normalize the mantissa
23192#  for the adjusted exponent.  That number is subtracted from the exp
23193#  if the exp was positive, and added if it was negative.  The purpose
23194#  of this is to reduce the value of the exponent and the possibility
23195#  of error in calculation of pwrten.
23196#
23197#  1. Branch on the sign of the adjusted exponent.
23198#  2p.(positive exp)
23199#   2. Check M16 and the digits in lwords 2 and 3 in decending order.
23200#   3. Add one for each zero encountered until a non-zero digit.
23201#   4. Subtract the count from the exp.
23202#   5. Check if the exp has crossed zero in #3 above; make the exp abs
23203#	   and set SE.
23204#	6. Multiply the mantissa by 10**count.
23205#  2n.(negative exp)
23206#   2. Check the digits in lwords 3 and 2 in decending order.
23207#   3. Add one for each zero encountered until a non-zero digit.
23208#   4. Add the count to the exp.
23209#   5. Check if the exp has crossed zero in #3 above; clear SE.
23210#   6. Divide the mantissa by 10**count.
23211#
23212#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
23213#   any adjustment due to append/strip zeros will drive the resultane
23214#   exponent towards zero.  Since all pwrten constants with a power
23215#   of 27 or less are exact, there is no need to use this routine to
23216#   attempt to lessen the resultant exponent.
23217#
23218# Register usage:
23219#
23220#  ap_st_z:
23221#	(*)  d0: temp digit storage
23222#	(*)  d1: zero count
23223#	(*)  d2: digit count
23224#	(*)  d3: offset pointer
23225#	( )  d4: first word of bcd
23226#	(*)  d5: lword counter
23227#	( )  a0: pointer to working bcd value
23228#	( )  FP_SCR1: working copy of original bcd value
23229#	( )  L_SCR1: copy of original exponent word
23230#
23231#
23232# First check the absolute value of the exponent to see if this
23233# routine is necessary.  If so, then check the sign of the exponent
23234# and do append (+) or strip (-) zeros accordingly.
23235# This section handles a positive adjusted exponent.
23236#
23237ap_st_z:
23238	mov.l		(%sp),%d1		# load expA for range test
23239	cmp.l		%d1,&27			# test is with 27
23240	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
23241	btst		&30,(%a0)		# check sign of exp
23242	bne.b		ap_st_n			# if neg, go to neg side
23243	clr.l		%d1			# zero count reg
23244	mov.l		(%a0),%d4		# load lword 1 to d4
23245	bfextu		%d4{&28:&4},%d0		# get M16 in d0
23246	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
23247	addq.l		&1,%d1			# inc zero count
23248	mov.l		&1,%d5			# init lword counter
23249	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
23250	bne.b		ap_p_cl			# if lw 2 is zero, skip it
23251	addq.l		&8,%d1			# and inc count by 8
23252	addq.l		&1,%d5			# inc lword counter
23253	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
23254ap_p_cl:
23255	clr.l		%d3			# init offset reg
23256	mov.l		&7,%d2			# init digit counter
23257ap_p_gd:
23258	bfextu		%d4{%d3:&4},%d0		# get digit
23259	bne.b		ap_p_fx			# if non-zero, go to fix exp
23260	addq.l		&4,%d3			# point to next digit
23261	addq.l		&1,%d1			# inc digit counter
23262	dbf.w		%d2,ap_p_gd		# get next digit
23263ap_p_fx:
23264	mov.l		%d1,%d0			# copy counter to d2
23265	mov.l		(%sp),%d1		# get adjusted exp from memory
23266	sub.l		%d0,%d1			# subtract count from exp
23267	bge.b		ap_p_fm			# if still pos, go to pwrten
23268	neg.l		%d1			# now its neg; get abs
23269	mov.l		(%a0),%d4		# load lword 1 to d4
23270	or.l		&0x40000000,%d4		# and set SE in d4
23271	or.l		&0x40000000,(%a0)	# and in memory
23272#
23273# Calculate the mantissa multiplier to compensate for the striping of
23274# zeros from the mantissa.
23275#
23276ap_p_fm:
23277	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
23278	clr.l		%d3			# init table index
23279	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23280	mov.l		&3,%d2			# init d2 to count bits in counter
23281ap_p_el:
23282	asr.l		&1,%d0			# shift lsb into carry
23283	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
23284	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23285ap_p_en:
23286	add.l		&12,%d3			# inc d3 to next rtable entry
23287	tst.l		%d0			# check if d0 is zero
23288	bne.b		ap_p_el			# if not, get next bit
23289	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
23290	bra.b		pwrten			# go calc pwrten
23291#
23292# This section handles a negative adjusted exponent.
23293#
23294ap_st_n:
23295	clr.l		%d1			# clr counter
23296	mov.l		&2,%d5			# set up d5 to point to lword 3
23297	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
23298	bne.b		ap_n_cl			# if not zero, check digits
23299	sub.l		&1,%d5			# dec d5 to point to lword 2
23300	addq.l		&8,%d1			# inc counter by 8
23301	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
23302ap_n_cl:
23303	mov.l		&28,%d3			# point to last digit
23304	mov.l		&7,%d2			# init digit counter
23305ap_n_gd:
23306	bfextu		%d4{%d3:&4},%d0		# get digit
23307	bne.b		ap_n_fx			# if non-zero, go to exp fix
23308	subq.l		&4,%d3			# point to previous digit
23309	addq.l		&1,%d1			# inc digit counter
23310	dbf.w		%d2,ap_n_gd		# get next digit
23311ap_n_fx:
23312	mov.l		%d1,%d0			# copy counter to d0
23313	mov.l		(%sp),%d1		# get adjusted exp from memory
23314	sub.l		%d0,%d1			# subtract count from exp
23315	bgt.b		ap_n_fm			# if still pos, go fix mantissa
23316	neg.l		%d1			# take abs of exp and clr SE
23317	mov.l		(%a0),%d4		# load lword 1 to d4
23318	and.l		&0xbfffffff,%d4		# and clr SE in d4
23319	and.l		&0xbfffffff,(%a0)	# and in memory
23320#
23321# Calculate the mantissa multiplier to compensate for the appending of
23322# zeros to the mantissa.
23323#
23324ap_n_fm:
23325	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
23326	clr.l		%d3			# init table index
23327	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23328	mov.l		&3,%d2			# init d2 to count bits in counter
23329ap_n_el:
23330	asr.l		&1,%d0			# shift lsb into carry
23331	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
23332	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23333ap_n_en:
23334	add.l		&12,%d3			# inc d3 to next rtable entry
23335	tst.l		%d0			# check if d0 is zero
23336	bne.b		ap_n_el			# if not, get next bit
23337	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
23338#
23339#
23340# Calculate power-of-ten factor from adjusted and shifted exponent.
23341#
23342# Register usage:
23343#
23344#  pwrten:
23345#	(*)  d0: temp
23346#	( )  d1: exponent
23347#	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
23348#	(*)  d3: FPCR work copy
23349#	( )  d4: first word of bcd
23350#	(*)  a1: RTABLE pointer
23351#  calc_p:
23352#	(*)  d0: temp
23353#	( )  d1: exponent
23354#	(*)  d3: PWRTxx table index
23355#	( )  a0: pointer to working copy of bcd
23356#	(*)  a1: PWRTxx pointer
23357#	(*) fp1: power-of-ten accumulator
23358#
23359# Pwrten calculates the exponent factor in the selected rounding mode
23360# according to the following table:
23361#
23362#	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
23363#
23364#	ANY	  ANY	RN	RN
23365#
23366#	 +	   +	RP	RP
23367#	 -	   +	RP	RM
23368#	 +	   -	RP	RM
23369#	 -	   -	RP	RP
23370#
23371#	 +	   +	RM	RM
23372#	 -	   +	RM	RP
23373#	 +	   -	RM	RP
23374#	 -	   -	RM	RM
23375#
23376#	 +	   +	RZ	RM
23377#	 -	   +	RZ	RM
23378#	 +	   -	RZ	RP
23379#	 -	   -	RZ	RP
23380#
23381#
23382pwrten:
23383	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
23384	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
23385	mov.l		(%a0),%d4		# reload 1st bcd word to d4
23386	asl.l		&2,%d2			# format d2 to be
23387	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
23388	add.l		%d0,%d2			# in d2 as index into RTABLE
23389	lea.l		RTABLE(%pc),%a1		# load rtable base
23390	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
23391	clr.l		%d3			# clear d3 to force no exc and extended
23392	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
23393	fmov.l		%d3,%fpcr		# write new FPCR
23394	asr.l		&1,%d0			# write correct PTENxx table
23395	bcc.b		not_rp			# to a1
23396	lea.l		PTENRP(%pc),%a1		# it is RP
23397	bra.b		calc_p			# go to init section
23398not_rp:
23399	asr.l		&1,%d0			# keep checking
23400	bcc.b		not_rm
23401	lea.l		PTENRM(%pc),%a1		# it is RM
23402	bra.b		calc_p			# go to init section
23403not_rm:
23404	lea.l		PTENRN(%pc),%a1		# it is RN
23405calc_p:
23406	mov.l		%d1,%d0			# copy exp to d0;use d0
23407	bpl.b		no_neg			# if exp is negative,
23408	neg.l		%d0			# invert it
23409	or.l		&0x40000000,(%a0)	# and set SE bit
23410no_neg:
23411	clr.l		%d3			# table index
23412	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23413e_loop:
23414	asr.l		&1,%d0			# shift next bit into carry
23415	bcc.b		e_next			# if zero, skip the mul
23416	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23417e_next:
23418	add.l		&12,%d3			# inc d3 to next rtable entry
23419	tst.l		%d0			# check if d0 is zero
23420	bne.b		e_loop			# not zero, continue shifting
23421#
23422#
23423#  Check the sign of the adjusted exp and make the value in fp0 the
23424#  same sign. If the exp was pos then multiply fp1*fp0;
23425#  else divide fp0/fp1.
23426#
23427# Register Usage:
23428#  norm:
23429#	( )  a0: pointer to working bcd value
23430#	(*) fp0: mantissa accumulator
23431#	( ) fp1: scaling factor - 10**(abs(exp))
23432#
23433pnorm:
23434	btst		&30,(%a0)		# test the sign of the exponent
23435	beq.b		mul			# if clear, go to multiply
23436div:
23437	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
23438	bra.b		end_dec
23439mul:
23440	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
23441#
23442#
23443# Clean up and return with result in fp0.
23444#
23445# If the final mul/div in decbin incurred an inex exception,
23446# it will be inex2, but will be reported as inex1 by get_op.
23447#
23448end_dec:
23449	fmov.l		%fpsr,%d0		# get status register
23450	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
23451	beq.b		no_exc			# skip this if no exc
23452	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
23453no_exc:
23454	add.l		&0x4,%sp		# clear 1 lw param
23455	fmovm.x		(%sp)+,&0x40		# restore fp1
23456	movm.l		(%sp)+,&0x3c		# restore d2-d5
23457	fmov.l		&0x0,%fpcr
23458	fmov.l		&0x0,%fpsr
23459	rts
23460
23461#########################################################################
23462# bindec(): Converts an input in extended precision format to bcd format#
23463#									#
23464# INPUT ***************************************************************	#
23465#	a0 = pointer to the input extended precision value in memory.	#
23466#	     the input may be either normalized, unnormalized, or	#
23467#	     denormalized.						#
23468#	d0 = contains the k-factor sign-extended to 32-bits.		#
23469#									#
23470# OUTPUT **************************************************************	#
23471#	FP_SCR0(a6) = bcd format result on the stack.			#
23472#									#
23473# ALGORITHM ***********************************************************	#
23474#									#
23475#	A1.	Set RM and size ext;  Set SIGMA = sign of input.	#
23476#		The k-factor is saved for use in d7. Clear the		#
23477#		BINDEC_FLG for separating normalized/denormalized	#
23478#		input.  If input is unnormalized or denormalized,	#
23479#		normalize it.						#
23480#									#
23481#	A2.	Set X = abs(input).					#
23482#									#
23483#	A3.	Compute ILOG.						#
23484#		ILOG is the log base 10 of the input value.  It is	#
23485#		approximated by adding e + 0.f when the original	#
23486#		value is viewed as 2^^e * 1.f in extended precision.	#
23487#		This value is stored in d6.				#
23488#									#
23489#	A4.	Clr INEX bit.						#
23490#		The operation in A3 above may have set INEX2.		#
23491#									#
23492#	A5.	Set ICTR = 0;						#
23493#		ICTR is a flag used in A13.  It must be set before the	#
23494#		loop entry A6.						#
23495#									#
23496#	A6.	Calculate LEN.						#
23497#		LEN is the number of digits to be displayed.  The	#
23498#		k-factor can dictate either the total number of digits,	#
23499#		if it is a positive number, or the number of digits	#
23500#		after the decimal point which are to be included as	#
23501#		significant.  See the 68882 manual for examples.	#
23502#		If LEN is computed to be greater than 17, set OPERR in	#
23503#		USER_FPSR.  LEN is stored in d4.			#
23504#									#
23505#	A7.	Calculate SCALE.					#
23506#		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
23507#		of decimal places needed to insure LEN integer digits	#
23508#		in the output before conversion to bcd. LAMBDA is the	#
23509#		sign of ISCALE, used in A9. Fp1 contains		#
23510#		10^^(abs(ISCALE)) using a rounding mode which is a	#
23511#		function of the original rounding mode and the signs	#
23512#		of ISCALE and X.  A table is given in the code.		#
23513#									#
23514#	A8.	Clr INEX; Force RZ.					#
23515#		The operation in A3 above may have set INEX2.		#
23516#		RZ mode is forced for the scaling operation to insure	#
23517#		only one rounding error.  The grs bits are collected in #
23518#		the INEX flag for use in A10.				#
23519#									#
23520#	A9.	Scale X -> Y.						#
23521#		The mantissa is scaled to the desired number of		#
23522#		significant digits.  The excess digits are collected	#
23523#		in INEX2.						#
23524#									#
23525#	A10.	Or in INEX.						#
23526#		If INEX is set, round error occurred.  This is		#
23527#		compensated for by 'or-ing' in the INEX2 flag to	#
23528#		the lsb of Y.						#
23529#									#
23530#	A11.	Restore original FPCR; set size ext.			#
23531#		Perform FINT operation in the user's rounding mode.	#
23532#		Keep the size to extended.				#
23533#									#
23534#	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
23535#		mode.  The FPSP routine sintd0 is used.  The output	#
23536#		is in fp0.						#
23537#									#
23538#	A13.	Check for LEN digits.					#
23539#		If the int operation results in more than LEN digits,	#
23540#		or less than LEN -1 digits, adjust ILOG and repeat from	#
23541#		A6.  This test occurs only on the first pass.  If the	#
23542#		result is exactly 10^LEN, decrement ILOG and divide	#
23543#		the mantissa by 10.					#
23544#									#
23545#	A14.	Convert the mantissa to bcd.				#
23546#		The binstr routine is used to convert the LEN digit	#
23547#		mantissa to bcd in memory.  The input to binstr is	#
23548#		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
23549#		such that the decimal point is to the left of bit 63.	#
23550#		The bcd digits are stored in the correct position in	#
23551#		the final string area in memory.			#
23552#									#
23553#	A15.	Convert the exponent to bcd.				#
23554#		As in A14 above, the exp is converted to bcd and the	#
23555#		digits are stored in the final string.			#
23556#		Test the length of the final exponent string.  If the	#
23557#		length is 4, set operr.					#
23558#									#
23559#	A16.	Write sign bits to final string.			#
23560#									#
23561#########################################################################
23562
23563set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
23564
23565# Constants in extended precision
23566PLOG2:
23567	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
23568PLOG2UP1:
23569	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
23570
23571# Constants in single precision
23572FONE:
23573	long		0x3F800000,0x00000000,0x00000000,0x00000000
23574FTWO:
23575	long		0x40000000,0x00000000,0x00000000,0x00000000
23576FTEN:
23577	long		0x41200000,0x00000000,0x00000000,0x00000000
23578F4933:
23579	long		0x459A2800,0x00000000,0x00000000,0x00000000
23580
23581RBDTBL:
23582	byte		0,0,0,0
23583	byte		3,3,2,2
23584	byte		3,2,2,3
23585	byte		2,3,3,2
23586
23587#	Implementation Notes:
23588#
23589#	The registers are used as follows:
23590#
23591#		d0: scratch; LEN input to binstr
23592#		d1: scratch
23593#		d2: upper 32-bits of mantissa for binstr
23594#		d3: scratch;lower 32-bits of mantissa for binstr
23595#		d4: LEN
23596#		d5: LAMBDA/ICTR
23597#		d6: ILOG
23598#		d7: k-factor
23599#		a0: ptr for original operand/final result
23600#		a1: scratch pointer
23601#		a2: pointer to FP_X; abs(original value) in ext
23602#		fp0: scratch
23603#		fp1: scratch
23604#		fp2: scratch
23605#		F_SCR1:
23606#		F_SCR2:
23607#		L_SCR1:
23608#		L_SCR2:
23609
23610	global		bindec
23611bindec:
23612	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
23613	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
23614
23615# A1. Set RM and size ext. Set SIGMA = sign input;
23616#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
23617#     separating  normalized/denormalized input.  If the input
23618#     is a denormalized number, set the BINDEC_FLG memory word
23619#     to signal denorm.  If the input is unnormalized, normalize
23620#     the input and test for denormalized result.
23621#
23622	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
23623	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
23624	mov.l		%d0,%d7		# move k-factor to d7
23625
23626	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
23627	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
23628	bne.w		A2_str		# no; input is a NORM
23629
23630#
23631# Normalize the denorm
23632#
23633un_de_norm:
23634	mov.w		(%a0),%d0
23635	and.w		&0x7fff,%d0	# strip sign of normalized exp
23636	mov.l		4(%a0),%d1
23637	mov.l		8(%a0),%d2
23638norm_loop:
23639	sub.w		&1,%d0
23640	lsl.l		&1,%d2
23641	roxl.l		&1,%d1
23642	tst.l		%d1
23643	bge.b		norm_loop
23644#
23645# Test if the normalized input is denormalized
23646#
23647	tst.w		%d0
23648	bgt.b		pos_exp		# if greater than zero, it is a norm
23649	st		BINDEC_FLG(%a6)	# set flag for denorm
23650pos_exp:
23651	and.w		&0x7fff,%d0	# strip sign of normalized exp
23652	mov.w		%d0,(%a0)
23653	mov.l		%d1,4(%a0)
23654	mov.l		%d2,8(%a0)
23655
23656# A2. Set X = abs(input).
23657#
23658A2_str:
23659	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
23660	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
23661	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
23662	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
23663
23664# A3. Compute ILOG.
23665#     ILOG is the log base 10 of the input value.  It is approx-
23666#     imated by adding e + 0.f when the original value is viewed
23667#     as 2^^e * 1.f in extended precision.  This value is stored
23668#     in d6.
23669#
23670# Register usage:
23671#	Input/Output
23672#	d0: k-factor/exponent
23673#	d2: x/x
23674#	d3: x/x
23675#	d4: x/x
23676#	d5: x/x
23677#	d6: x/ILOG
23678#	d7: k-factor/Unchanged
23679#	a0: ptr for original operand/final result
23680#	a1: x/x
23681#	a2: x/x
23682#	fp0: x/float(ILOG)
23683#	fp1: x/x
23684#	fp2: x/x
23685#	F_SCR1:x/x
23686#	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
23687#	L_SCR1:x/x
23688#	L_SCR2:first word of X packed/Unchanged
23689
23690	tst.b		BINDEC_FLG(%a6)	# check for denorm
23691	beq.b		A3_cont		# if clr, continue with norm
23692	mov.l		&-4933,%d6	# force ILOG = -4933
23693	bra.b		A4_str
23694A3_cont:
23695	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
23696	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
23697	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
23698	sub.w		&0x3fff,%d0	# strip off bias
23699	fadd.w		%d0,%fp0	# add in exp
23700	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
23701	fbge.w		pos_res		# if pos, branch
23702	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
23703	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
23704	bra.b		A4_str		# go move out ILOG
23705pos_res:
23706	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
23707	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
23708
23709
23710# A4. Clr INEX bit.
23711#     The operation in A3 above may have set INEX2.
23712
23713A4_str:
23714	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
23715
23716
23717# A5. Set ICTR = 0;
23718#     ICTR is a flag used in A13.  It must be set before the
23719#     loop entry A6. The lower word of d5 is used for ICTR.
23720
23721	clr.w		%d5		# clear ICTR
23722
23723# A6. Calculate LEN.
23724#     LEN is the number of digits to be displayed.  The k-factor
23725#     can dictate either the total number of digits, if it is
23726#     a positive number, or the number of digits after the
23727#     original decimal point which are to be included as
23728#     significant.  See the 68882 manual for examples.
23729#     If LEN is computed to be greater than 17, set OPERR in
23730#     USER_FPSR.  LEN is stored in d4.
23731#
23732# Register usage:
23733#	Input/Output
23734#	d0: exponent/Unchanged
23735#	d2: x/x/scratch
23736#	d3: x/x
23737#	d4: exc picture/LEN
23738#	d5: ICTR/Unchanged
23739#	d6: ILOG/Unchanged
23740#	d7: k-factor/Unchanged
23741#	a0: ptr for original operand/final result
23742#	a1: x/x
23743#	a2: x/x
23744#	fp0: float(ILOG)/Unchanged
23745#	fp1: x/x
23746#	fp2: x/x
23747#	F_SCR1:x/x
23748#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23749#	L_SCR1:x/x
23750#	L_SCR2:first word of X packed/Unchanged
23751
23752A6_str:
23753	tst.l		%d7		# branch on sign of k
23754	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
23755	mov.l		%d7,%d4		# if k > 0, LEN = k
23756	bra.b		len_ck		# skip to LEN check
23757k_neg:
23758	mov.l		%d6,%d4		# first load ILOG to d4
23759	sub.l		%d7,%d4		# subtract off k
23760	addq.l		&1,%d4		# add in the 1
23761len_ck:
23762	tst.l		%d4		# LEN check: branch on sign of LEN
23763	ble.b		LEN_ng		# if neg, set LEN = 1
23764	cmp.l		%d4,&17		# test if LEN > 17
23765	ble.b		A7_str		# if not, forget it
23766	mov.l		&17,%d4		# set max LEN = 17
23767	tst.l		%d7		# if negative, never set OPERR
23768	ble.b		A7_str		# if positive, continue
23769	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
23770	bra.b		A7_str		# finished here
23771LEN_ng:
23772	mov.l		&1,%d4		# min LEN is 1
23773
23774
23775# A7. Calculate SCALE.
23776#     SCALE is equal to 10^ISCALE, where ISCALE is the number
23777#     of decimal places needed to insure LEN integer digits
23778#     in the output before conversion to bcd. LAMBDA is the sign
23779#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
23780#     the rounding mode as given in the following table (see
23781#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
23782#     of opposite sign in bindec.sa from Coonen).
23783#
23784#	Initial					USE
23785#	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
23786#	----------------------------------------------
23787#	 RN	00	   0	   0		00/0	RN
23788#	 RN	00	   0	   1		00/0	RN
23789#	 RN	00	   1	   0		00/0	RN
23790#	 RN	00	   1	   1		00/0	RN
23791#	 RZ	01	   0	   0		11/3	RP
23792#	 RZ	01	   0	   1		11/3	RP
23793#	 RZ	01	   1	   0		10/2	RM
23794#	 RZ	01	   1	   1		10/2	RM
23795#	 RM	10	   0	   0		11/3	RP
23796#	 RM	10	   0	   1		10/2	RM
23797#	 RM	10	   1	   0		10/2	RM
23798#	 RM	10	   1	   1		11/3	RP
23799#	 RP	11	   0	   0		10/2	RM
23800#	 RP	11	   0	   1		11/3	RP
23801#	 RP	11	   1	   0		11/3	RP
23802#	 RP	11	   1	   1		10/2	RM
23803#
23804# Register usage:
23805#	Input/Output
23806#	d0: exponent/scratch - final is 0
23807#	d2: x/0 or 24 for A9
23808#	d3: x/scratch - offset ptr into PTENRM array
23809#	d4: LEN/Unchanged
23810#	d5: 0/ICTR:LAMBDA
23811#	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
23812#	d7: k-factor/Unchanged
23813#	a0: ptr for original operand/final result
23814#	a1: x/ptr to PTENRM array
23815#	a2: x/x
23816#	fp0: float(ILOG)/Unchanged
23817#	fp1: x/10^ISCALE
23818#	fp2: x/x
23819#	F_SCR1:x/x
23820#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23821#	L_SCR1:x/x
23822#	L_SCR2:first word of X packed/Unchanged
23823
23824A7_str:
23825	tst.l		%d7		# test sign of k
23826	bgt.b		k_pos		# if pos and > 0, skip this
23827	cmp.l		%d7,%d6		# test k - ILOG
23828	blt.b		k_pos		# if ILOG >= k, skip this
23829	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
23830k_pos:
23831	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
23832	addq.l		&1,%d0		# add the 1
23833	sub.l		%d4,%d0		# sub off LEN
23834	swap		%d5		# use upper word of d5 for LAMBDA
23835	clr.w		%d5		# set it zero initially
23836	clr.w		%d2		# set up d2 for very small case
23837	tst.l		%d0		# test sign of ISCALE
23838	bge.b		iscale		# if pos, skip next inst
23839	addq.w		&1,%d5		# if neg, set LAMBDA true
23840	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
23841	bgt.b		no_inf		# if false, skip rest
23842	add.l		&24,%d0		# add in 24 to iscale
23843	mov.l		&24,%d2		# put 24 in d2 for A9
23844no_inf:
23845	neg.l		%d0		# and take abs of ISCALE
23846iscale:
23847	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
23848	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
23849	lsl.w		&1,%d1		# put them in bits 2:1
23850	add.w		%d5,%d1		# add in LAMBDA
23851	lsl.w		&1,%d1		# put them in bits 3:1
23852	tst.l		L_SCR2(%a6)	# test sign of original x
23853	bge.b		x_pos		# if pos, don't set bit 0
23854	addq.l		&1,%d1		# if neg, set bit 0
23855x_pos:
23856	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
23857	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
23858	lsl.l		&4,%d3		# put bits in proper position
23859	fmov.l		%d3,%fpcr	# load bits into fpu
23860	lsr.l		&4,%d3		# put bits in proper position
23861	tst.b		%d3		# decode new rmode for pten table
23862	bne.b		not_rn		# if zero, it is RN
23863	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
23864	bra.b		rmode		# exit decode
23865not_rn:
23866	lsr.b		&1,%d3		# get lsb in carry
23867	bcc.b		not_rp2		# if carry clear, it is RM
23868	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
23869	bra.b		rmode		# exit decode
23870not_rp2:
23871	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
23872rmode:
23873	clr.l		%d3		# clr table index
23874e_loop2:
23875	lsr.l		&1,%d0		# shift next bit into carry
23876	bcc.b		e_next2		# if zero, skip the mul
23877	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
23878e_next2:
23879	add.l		&12,%d3		# inc d3 to next pwrten table entry
23880	tst.l		%d0		# test if ISCALE is zero
23881	bne.b		e_loop2		# if not, loop
23882
23883# A8. Clr INEX; Force RZ.
23884#     The operation in A3 above may have set INEX2.
23885#     RZ mode is forced for the scaling operation to insure
23886#     only one rounding error.  The grs bits are collected in
23887#     the INEX flag for use in A10.
23888#
23889# Register usage:
23890#	Input/Output
23891
23892	fmov.l		&0,%fpsr	# clr INEX
23893	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
23894
23895# A9. Scale X -> Y.
23896#     The mantissa is scaled to the desired number of significant
23897#     digits.  The excess digits are collected in INEX2. If mul,
23898#     Check d2 for excess 10 exponential value.  If not zero,
23899#     the iscale value would have caused the pwrten calculation
23900#     to overflow.  Only a negative iscale can cause this, so
23901#     multiply by 10^(d2), which is now only allowed to be 24,
23902#     with a multiply by 10^8 and 10^16, which is exact since
23903#     10^24 is exact.  If the input was denormalized, we must
23904#     create a busy stack frame with the mul command and the
23905#     two operands, and allow the fpu to complete the multiply.
23906#
23907# Register usage:
23908#	Input/Output
23909#	d0: FPCR with RZ mode/Unchanged
23910#	d2: 0 or 24/unchanged
23911#	d3: x/x
23912#	d4: LEN/Unchanged
23913#	d5: ICTR:LAMBDA
23914#	d6: ILOG/Unchanged
23915#	d7: k-factor/Unchanged
23916#	a0: ptr for original operand/final result
23917#	a1: ptr to PTENRM array/Unchanged
23918#	a2: x/x
23919#	fp0: float(ILOG)/X adjusted for SCALE (Y)
23920#	fp1: 10^ISCALE/Unchanged
23921#	fp2: x/x
23922#	F_SCR1:x/x
23923#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23924#	L_SCR1:x/x
23925#	L_SCR2:first word of X packed/Unchanged
23926
23927A9_str:
23928	fmov.x		(%a0),%fp0	# load X from memory
23929	fabs.x		%fp0		# use abs(X)
23930	tst.w		%d5		# LAMBDA is in lower word of d5
23931	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
23932	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
23933	bra.w		A10_st		# branch to A10
23934
23935sc_mul:
23936	tst.b		BINDEC_FLG(%a6)	# check for denorm
23937	beq.w		A9_norm		# if norm, continue with mul
23938
23939# for DENORM, we must calculate:
23940#	fp0 = input_op * 10^ISCALE * 10^24
23941# since the input operand is a DENORM, we can't multiply it directly.
23942# so, we do the multiplication of the exponents and mantissas separately.
23943# in this way, we avoid underflow on intermediate stages of the
23944# multiplication and guarantee a result without exception.
23945	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
23946
23947	mov.w		(%sp),%d3	# grab exponent
23948	andi.w		&0x7fff,%d3	# clear sign
23949	ori.w		&0x8000,(%a0)	# make DENORM exp negative
23950	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
23951	subi.w		&0x3fff,%d3	# subtract BIAS
23952	add.w		36(%a1),%d3
23953	subi.w		&0x3fff,%d3	# subtract BIAS
23954	add.w		48(%a1),%d3
23955	subi.w		&0x3fff,%d3	# subtract BIAS
23956
23957	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
23958
23959	andi.w		&0x8000,(%sp)	# keep sign
23960	or.w		%d3,(%sp)	# insert new exponent
23961	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
23962	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
23963	mov.l		0x4(%a0),-(%sp)
23964	mov.l		&0x3fff0000,-(%sp) # force exp to zero
23965	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
23966	fmul.x		(%sp)+,%fp0
23967
23968#	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
23969#	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
23970	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
23971	mov.l		36+4(%a1),-(%sp)
23972	mov.l		&0x3fff0000,-(%sp) # force exp to zero
23973	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
23974	mov.l		48+4(%a1),-(%sp)
23975	mov.l		&0x3fff0000,-(%sp)# force exp to zero
23976	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
23977	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
23978	bra.b		A10_st
23979
23980sc_mul_err:
23981	bra.b		sc_mul_err
23982
23983A9_norm:
23984	tst.w		%d2		# test for small exp case
23985	beq.b		A9_con		# if zero, continue as normal
23986	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
23987	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
23988A9_con:
23989	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
23990
23991# A10. Or in INEX.
23992#      If INEX is set, round error occurred.  This is compensated
23993#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
23994#
23995# Register usage:
23996#	Input/Output
23997#	d0: FPCR with RZ mode/FPSR with INEX2 isolated
23998#	d2: x/x
23999#	d3: x/x
24000#	d4: LEN/Unchanged
24001#	d5: ICTR:LAMBDA
24002#	d6: ILOG/Unchanged
24003#	d7: k-factor/Unchanged
24004#	a0: ptr for original operand/final result
24005#	a1: ptr to PTENxx array/Unchanged
24006#	a2: x/ptr to FP_SCR1(a6)
24007#	fp0: Y/Y with lsb adjusted
24008#	fp1: 10^ISCALE/Unchanged
24009#	fp2: x/x
24010
24011A10_st:
24012	fmov.l		%fpsr,%d0	# get FPSR
24013	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
24014	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
24015	btst		&9,%d0		# check if INEX2 set
24016	beq.b		A11_st		# if clear, skip rest
24017	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
24018	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
24019
24020
24021# A11. Restore original FPCR; set size ext.
24022#      Perform FINT operation in the user's rounding mode.  Keep
24023#      the size to extended.  The sintdo entry point in the sint
24024#      routine expects the FPCR value to be in USER_FPCR for
24025#      mode and precision.  The original FPCR is saved in L_SCR1.
24026
24027A11_st:
24028	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
24029	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
24030#					;block exceptions
24031
24032
24033# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
24034#      The FPSP routine sintd0 is used.  The output is in fp0.
24035#
24036# Register usage:
24037#	Input/Output
24038#	d0: FPSR with AINEX cleared/FPCR with size set to ext
24039#	d2: x/x/scratch
24040#	d3: x/x
24041#	d4: LEN/Unchanged
24042#	d5: ICTR:LAMBDA/Unchanged
24043#	d6: ILOG/Unchanged
24044#	d7: k-factor/Unchanged
24045#	a0: ptr for original operand/src ptr for sintdo
24046#	a1: ptr to PTENxx array/Unchanged
24047#	a2: ptr to FP_SCR1(a6)/Unchanged
24048#	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
24049#	fp0: Y/YINT
24050#	fp1: 10^ISCALE/Unchanged
24051#	fp2: x/x
24052#	F_SCR1:x/x
24053#	F_SCR2:Y adjusted for inex/Y with original exponent
24054#	L_SCR1:x/original USER_FPCR
24055#	L_SCR2:first word of X packed/Unchanged
24056
24057A12_st:
24058	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
24059	mov.l	L_SCR1(%a6),-(%sp)
24060	mov.l	L_SCR2(%a6),-(%sp)
24061
24062	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
24063	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
24064	tst.l		L_SCR2(%a6)	# test sign of original operand
24065	bge.b		do_fint12		# if pos, use Y
24066	or.l		&0x80000000,(%a0)	# if neg, use -Y
24067do_fint12:
24068	mov.l	USER_FPSR(%a6),-(%sp)
24069#	bsr	sintdo		# sint routine returns int in fp0
24070
24071	fmov.l	USER_FPCR(%a6),%fpcr
24072	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
24073##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
24074##	andi.l		&0x00000030,%d0
24075##	fmov.l		%d0,%fpcr
24076	fint.x		FP_SCR1(%a6),%fp0	# do fint()
24077	fmov.l	%fpsr,%d0
24078	or.w	%d0,FPSR_EXCEPT(%a6)
24079##	fmov.l		&0x0,%fpcr
24080##	fmov.l		%fpsr,%d0		# don't keep ccodes
24081##	or.w		%d0,FPSR_EXCEPT(%a6)
24082
24083	mov.b	(%sp),USER_FPSR(%a6)
24084	add.l	&4,%sp
24085
24086	mov.l	(%sp)+,L_SCR2(%a6)
24087	mov.l	(%sp)+,L_SCR1(%a6)
24088	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
24089
24090	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
24091	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
24092
24093# A13. Check for LEN digits.
24094#      If the int operation results in more than LEN digits,
24095#      or less than LEN -1 digits, adjust ILOG and repeat from
24096#      A6.  This test occurs only on the first pass.  If the
24097#      result is exactly 10^LEN, decrement ILOG and divide
24098#      the mantissa by 10.  The calculation of 10^LEN cannot
24099#      be inexact, since all powers of ten up to 10^27 are exact
24100#      in extended precision, so the use of a previous power-of-ten
24101#      table will introduce no error.
24102#
24103#
24104# Register usage:
24105#	Input/Output
24106#	d0: FPCR with size set to ext/scratch final = 0
24107#	d2: x/x
24108#	d3: x/scratch final = x
24109#	d4: LEN/LEN adjusted
24110#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24111#	d6: ILOG/ILOG adjusted
24112#	d7: k-factor/Unchanged
24113#	a0: pointer into memory for packed bcd string formation
24114#	a1: ptr to PTENxx array/Unchanged
24115#	a2: ptr to FP_SCR1(a6)/Unchanged
24116#	fp0: int portion of Y/abs(YINT) adjusted
24117#	fp1: 10^ISCALE/Unchanged
24118#	fp2: x/10^LEN
24119#	F_SCR1:x/x
24120#	F_SCR2:Y with original exponent/Unchanged
24121#	L_SCR1:original USER_FPCR/Unchanged
24122#	L_SCR2:first word of X packed/Unchanged
24123
24124A13_st:
24125	swap		%d5		# put ICTR in lower word of d5
24126	tst.w		%d5		# check if ICTR = 0
24127	bne		not_zr		# if non-zero, go to second test
24128#
24129# Compute 10^(LEN-1)
24130#
24131	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
24132	mov.l		%d4,%d0		# put LEN in d0
24133	subq.l		&1,%d0		# d0 = LEN -1
24134	clr.l		%d3		# clr table index
24135l_loop:
24136	lsr.l		&1,%d0		# shift next bit into carry
24137	bcc.b		l_next		# if zero, skip the mul
24138	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
24139l_next:
24140	add.l		&12,%d3		# inc d3 to next pwrten table entry
24141	tst.l		%d0		# test if LEN is zero
24142	bne.b		l_loop		# if not, loop
24143#
24144# 10^LEN-1 is computed for this test and A14.  If the input was
24145# denormalized, check only the case in which YINT > 10^LEN.
24146#
24147	tst.b		BINDEC_FLG(%a6)	# check if input was norm
24148	beq.b		A13_con		# if norm, continue with checking
24149	fabs.x		%fp0		# take abs of YINT
24150	bra		test_2
24151#
24152# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
24153#
24154A13_con:
24155	fabs.x		%fp0		# take abs of YINT
24156	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
24157	fbge.w		test_2		# if greater, do next test
24158	subq.l		&1,%d6		# subtract 1 from ILOG
24159	mov.w		&1,%d5		# set ICTR
24160	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
24161	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
24162	bra.w		A6_str		# return to A6 and recompute YINT
24163test_2:
24164	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
24165	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
24166	fblt.w		A14_st		# if less, all is ok, go to A14
24167	fbgt.w		fix_ex		# if greater, fix and redo
24168	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
24169	addq.l		&1,%d6		# and inc ILOG
24170	bra.b		A14_st		# and continue elsewhere
24171fix_ex:
24172	addq.l		&1,%d6		# increment ILOG by 1
24173	mov.w		&1,%d5		# set ICTR
24174	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
24175	bra.w		A6_str		# return to A6 and recompute YINT
24176#
24177# Since ICTR <> 0, we have already been through one adjustment,
24178# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
24179# 10^LEN is again computed using whatever table is in a1 since the
24180# value calculated cannot be inexact.
24181#
24182not_zr:
24183	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
24184	mov.l		%d4,%d0		# put LEN in d0
24185	clr.l		%d3		# clr table index
24186z_loop:
24187	lsr.l		&1,%d0		# shift next bit into carry
24188	bcc.b		z_next		# if zero, skip the mul
24189	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
24190z_next:
24191	add.l		&12,%d3		# inc d3 to next pwrten table entry
24192	tst.l		%d0		# test if LEN is zero
24193	bne.b		z_loop		# if not, loop
24194	fabs.x		%fp0		# get abs(YINT)
24195	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
24196	fbneq.w		A14_st		# if not, skip this
24197	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
24198	addq.l		&1,%d6		# and inc ILOG by 1
24199	addq.l		&1,%d4		# and inc LEN
24200	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
24201
24202# A14. Convert the mantissa to bcd.
24203#      The binstr routine is used to convert the LEN digit
24204#      mantissa to bcd in memory.  The input to binstr is
24205#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
24206#      such that the decimal point is to the left of bit 63.
24207#      The bcd digits are stored in the correct position in
24208#      the final string area in memory.
24209#
24210#
24211# Register usage:
24212#	Input/Output
24213#	d0: x/LEN call to binstr - final is 0
24214#	d1: x/0
24215#	d2: x/ms 32-bits of mant of abs(YINT)
24216#	d3: x/ls 32-bits of mant of abs(YINT)
24217#	d4: LEN/Unchanged
24218#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24219#	d6: ILOG
24220#	d7: k-factor/Unchanged
24221#	a0: pointer into memory for packed bcd string formation
24222#	    /ptr to first mantissa byte in result string
24223#	a1: ptr to PTENxx array/Unchanged
24224#	a2: ptr to FP_SCR1(a6)/Unchanged
24225#	fp0: int portion of Y/abs(YINT) adjusted
24226#	fp1: 10^ISCALE/Unchanged
24227#	fp2: 10^LEN/Unchanged
24228#	F_SCR1:x/Work area for final result
24229#	F_SCR2:Y with original exponent/Unchanged
24230#	L_SCR1:original USER_FPCR/Unchanged
24231#	L_SCR2:first word of X packed/Unchanged
24232
24233A14_st:
24234	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
24235	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
24236	lea.l		FP_SCR0(%a6),%a0
24237	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
24238	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
24239	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
24240	clr.l		4(%a0)		# zero word 2 of FP_RES
24241	clr.l		8(%a0)		# zero word 3 of FP_RES
24242	mov.l		(%a0),%d0	# move exponent to d0
24243	swap		%d0		# put exponent in lower word
24244	beq.b		no_sft		# if zero, don't shift
24245	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
24246	tst.l		%d0		# check if > 1
24247	bgt.b		no_sft		# if so, don't shift
24248	neg.l		%d0		# make exp positive
24249m_loop:
24250	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
24251	roxr.l		&1,%d3		# the number of places
24252	dbf.w		%d0,m_loop	# given in d0
24253no_sft:
24254	tst.l		%d2		# check for mantissa of zero
24255	bne.b		no_zr		# if not, go on
24256	tst.l		%d3		# continue zero check
24257	beq.b		zer_m		# if zero, go directly to binstr
24258no_zr:
24259	clr.l		%d1		# put zero in d1 for addx
24260	add.l		&0x00000080,%d3	# inc at bit 7
24261	addx.l		%d1,%d2		# continue inc
24262	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
24263zer_m:
24264	mov.l		%d4,%d0		# put LEN in d0 for binstr call
24265	addq.l		&3,%a0		# a0 points to M16 byte in result
24266	bsr		binstr		# call binstr to convert mant
24267
24268
24269# A15. Convert the exponent to bcd.
24270#      As in A14 above, the exp is converted to bcd and the
24271#      digits are stored in the final string.
24272#
24273#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
24274#
24275#	 32               16 15                0
24276#	-----------------------------------------
24277#	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
24278#	-----------------------------------------
24279#
24280# And are moved into their proper places in FP_SCR0.  If digit e4
24281# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
24282# written as specified in the 881/882 manual for packed decimal.
24283#
24284# Register usage:
24285#	Input/Output
24286#	d0: x/LEN call to binstr - final is 0
24287#	d1: x/scratch (0);shift count for final exponent packing
24288#	d2: x/ms 32-bits of exp fraction/scratch
24289#	d3: x/ls 32-bits of exp fraction
24290#	d4: LEN/Unchanged
24291#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24292#	d6: ILOG
24293#	d7: k-factor/Unchanged
24294#	a0: ptr to result string/ptr to L_SCR1(a6)
24295#	a1: ptr to PTENxx array/Unchanged
24296#	a2: ptr to FP_SCR1(a6)/Unchanged
24297#	fp0: abs(YINT) adjusted/float(ILOG)
24298#	fp1: 10^ISCALE/Unchanged
24299#	fp2: 10^LEN/Unchanged
24300#	F_SCR1:Work area for final result/BCD result
24301#	F_SCR2:Y with original exponent/ILOG/10^4
24302#	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
24303#	L_SCR2:first word of X packed/Unchanged
24304
24305A15_st:
24306	tst.b		BINDEC_FLG(%a6)	# check for denorm
24307	beq.b		not_denorm
24308	ftest.x		%fp0		# test for zero
24309	fbeq.w		den_zero	# if zero, use k-factor or 4933
24310	fmov.l		%d6,%fp0	# float ILOG
24311	fabs.x		%fp0		# get abs of ILOG
24312	bra.b		convrt
24313den_zero:
24314	tst.l		%d7		# check sign of the k-factor
24315	blt.b		use_ilog	# if negative, use ILOG
24316	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
24317	bra.b		convrt		# do it
24318use_ilog:
24319	fmov.l		%d6,%fp0	# float ILOG
24320	fabs.x		%fp0		# get abs of ILOG
24321	bra.b		convrt
24322not_denorm:
24323	ftest.x		%fp0		# test for zero
24324	fbneq.w		not_zero	# if zero, force exponent
24325	fmov.s		FONE(%pc),%fp0	# force exponent to 1
24326	bra.b		convrt		# do it
24327not_zero:
24328	fmov.l		%d6,%fp0	# float ILOG
24329	fabs.x		%fp0		# get abs of ILOG
24330convrt:
24331	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
24332	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
24333	mov.l		4(%a2),%d2	# move word 2 to d2
24334	mov.l		8(%a2),%d3	# move word 3 to d3
24335	mov.w		(%a2),%d0	# move exp to d0
24336	beq.b		x_loop_fin	# if zero, skip the shift
24337	sub.w		&0x3ffd,%d0	# subtract off bias
24338	neg.w		%d0		# make exp positive
24339x_loop:
24340	lsr.l		&1,%d2		# shift d2:d3 right
24341	roxr.l		&1,%d3		# the number of places
24342	dbf.w		%d0,x_loop	# given in d0
24343x_loop_fin:
24344	clr.l		%d1		# put zero in d1 for addx
24345	add.l		&0x00000080,%d3	# inc at bit 6
24346	addx.l		%d1,%d2		# continue inc
24347	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
24348	mov.l		&4,%d0		# put 4 in d0 for binstr call
24349	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
24350	bsr		binstr		# call binstr to convert exp
24351	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
24352	mov.l		&12,%d1		# use d1 for shift count
24353	lsr.l		%d1,%d0		# shift d0 right by 12
24354	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
24355	lsr.l		%d1,%d0		# shift d0 right by 12
24356	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
24357	tst.b		%d0		# check if e4 is zero
24358	beq.b		A16_st		# if zero, skip rest
24359	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
24360
24361
24362# A16. Write sign bits to final string.
24363#	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
24364#
24365# Register usage:
24366#	Input/Output
24367#	d0: x/scratch - final is x
24368#	d2: x/x
24369#	d3: x/x
24370#	d4: LEN/Unchanged
24371#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24372#	d6: ILOG/ILOG adjusted
24373#	d7: k-factor/Unchanged
24374#	a0: ptr to L_SCR1(a6)/Unchanged
24375#	a1: ptr to PTENxx array/Unchanged
24376#	a2: ptr to FP_SCR1(a6)/Unchanged
24377#	fp0: float(ILOG)/Unchanged
24378#	fp1: 10^ISCALE/Unchanged
24379#	fp2: 10^LEN/Unchanged
24380#	F_SCR1:BCD result with correct signs
24381#	F_SCR2:ILOG/10^4
24382#	L_SCR1:Exponent digits on return from binstr
24383#	L_SCR2:first word of X packed/Unchanged
24384
24385A16_st:
24386	clr.l		%d0		# clr d0 for collection of signs
24387	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
24388	tst.l		L_SCR2(%a6)	# check sign of original mantissa
24389	bge.b		mant_p		# if pos, don't set SM
24390	mov.l		&2,%d0		# move 2 in to d0 for SM
24391mant_p:
24392	tst.l		%d6		# check sign of ILOG
24393	bge.b		wr_sgn		# if pos, don't set SE
24394	addq.l		&1,%d0		# set bit 0 in d0 for SE
24395wr_sgn:
24396	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
24397
24398# Clean up and restore all registers used.
24399
24400	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
24401	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
24402	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
24403	rts
24404
24405	global		PTENRN
24406PTENRN:
24407	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24408	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24409	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24410	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24411	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24412	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
24413	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
24414	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
24415	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
24416	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
24417	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
24418	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
24419	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
24420
24421	global		PTENRP
24422PTENRP:
24423	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24424	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24425	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24426	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24427	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24428	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
24429	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
24430	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
24431	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
24432	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
24433	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
24434	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
24435	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
24436
24437	global		PTENRM
24438PTENRM:
24439	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24440	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24441	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24442	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24443	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24444	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
24445	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
24446	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
24447	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
24448	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
24449	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
24450	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
24451	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
24452
24453#########################################################################
24454# binstr(): Converts a 64-bit binary integer to bcd.			#
24455#									#
24456# INPUT *************************************************************** #
24457#	d2:d3 = 64-bit binary integer					#
24458#	d0    = desired length (LEN)					#
24459#	a0    = pointer to start in memory for bcd characters		#
24460#		(This pointer must point to byte 4 of the first		#
24461#		 lword of the packed decimal memory string.)		#
24462#									#
24463# OUTPUT ************************************************************** #
24464#	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
24465#									#
24466# ALGORITHM ***********************************************************	#
24467#	The 64-bit binary is assumed to have a decimal point before	#
24468#	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
24469#	shift and a mul by 8 shift.  The bits shifted out of the	#
24470#	msb form a decimal digit.  This process is iterated until	#
24471#	LEN digits are formed.						#
24472#									#
24473# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
24474#     digit formed will be assumed the least significant.  This is	#
24475#     to force the first byte formed to have a 0 in the upper 4 bits.	#
24476#									#
24477# A2. Beginning of the loop:						#
24478#     Copy the fraction in d2:d3 to d4:d5.				#
24479#									#
24480# A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
24481#     extracts and shifts.  The three msbs from d2 will go into d1.	#
24482#									#
24483# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
24484#     will be collected by the carry.					#
24485#									#
24486# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
24487#     into d2:d3.  D1 will contain the bcd digit formed.		#
24488#									#
24489# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
24490#     zero, it is the ls digit.  Put the digit in its place in the	#
24491#     upper word of d0.  If it is the ls digit, write the word		#
24492#     from d0 to memory.						#
24493#									#
24494# A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
24495#									#
24496#########################################################################
24497
24498#	Implementation Notes:
24499#
24500#	The registers are used as follows:
24501#
24502#		d0: LEN counter
24503#		d1: temp used to form the digit
24504#		d2: upper 32-bits of fraction for mul by 8
24505#		d3: lower 32-bits of fraction for mul by 8
24506#		d4: upper 32-bits of fraction for mul by 2
24507#		d5: lower 32-bits of fraction for mul by 2
24508#		d6: temp for bit-field extracts
24509#		d7: byte digit formation word;digit count {0,1}
24510#		a0: pointer into memory for packed bcd string formation
24511#
24512
24513	global		binstr
24514binstr:
24515	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
24516
24517#
24518# A1: Init d7
24519#
24520	mov.l		&1,%d7		# init d7 for second digit
24521	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
24522#
24523# A2. Copy d2:d3 to d4:d5.  Start loop.
24524#
24525loop:
24526	mov.l		%d2,%d4		# copy the fraction before muls
24527	mov.l		%d3,%d5		# to d4:d5
24528#
24529# A3. Multiply d2:d3 by 8; extract msbs into d1.
24530#
24531	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
24532	asl.l		&3,%d2		# shift d2 left by 3 places
24533	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
24534	asl.l		&3,%d3		# shift d3 left by 3 places
24535	or.l		%d6,%d2		# or in msbs from d3 into d2
24536#
24537# A4. Multiply d4:d5 by 2; add carry out to d1.
24538#
24539	asl.l		&1,%d5		# mul d5 by 2
24540	roxl.l		&1,%d4		# mul d4 by 2
24541	swap		%d6		# put 0 in d6 lower word
24542	addx.w		%d6,%d1		# add in extend from mul by 2
24543#
24544# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
24545#
24546	add.l		%d5,%d3		# add lower 32 bits
24547	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
24548	addx.l		%d4,%d2		# add with extend upper 32 bits
24549	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
24550	addx.w		%d6,%d1		# add in extend from add to d1
24551	swap		%d6		# with d6 = 0; put 0 in upper word
24552#
24553# A6. Test d7 and branch.
24554#
24555	tst.w		%d7		# if zero, store digit & to loop
24556	beq.b		first_d		# if non-zero, form byte & write
24557sec_d:
24558	swap		%d7		# bring first digit to word d7b
24559	asl.w		&4,%d7		# first digit in upper 4 bits d7b
24560	add.w		%d1,%d7		# add in ls digit to d7b
24561	mov.b		%d7,(%a0)+	# store d7b byte in memory
24562	swap		%d7		# put LEN counter in word d7a
24563	clr.w		%d7		# set d7a to signal no digits done
24564	dbf.w		%d0,loop	# do loop some more!
24565	bra.b		end_bstr	# finished, so exit
24566first_d:
24567	swap		%d7		# put digit word in d7b
24568	mov.w		%d1,%d7		# put new digit in d7b
24569	swap		%d7		# put LEN counter in word d7a
24570	addq.w		&1,%d7		# set d7a to signal first digit done
24571	dbf.w		%d0,loop	# do loop some more!
24572	swap		%d7		# put last digit in string
24573	lsl.w		&4,%d7		# move it to upper 4 bits
24574	mov.b		%d7,(%a0)+	# store it in memory string
24575#
24576# Clean up and return with result in fp0.
24577#
24578end_bstr:
24579	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
24580	rts
24581
24582#########################################################################
24583# XDEF ****************************************************************	#
24584#	facc_in_b(): dmem_read_byte failed				#
24585#	facc_in_w(): dmem_read_word failed				#
24586#	facc_in_l(): dmem_read_long failed				#
24587#	facc_in_d(): dmem_read of dbl prec failed			#
24588#	facc_in_x(): dmem_read of ext prec failed			#
24589#									#
24590#	facc_out_b(): dmem_write_byte failed				#
24591#	facc_out_w(): dmem_write_word failed				#
24592#	facc_out_l(): dmem_write_long failed				#
24593#	facc_out_d(): dmem_write of dbl prec failed			#
24594#	facc_out_x(): dmem_write of ext prec failed			#
24595#									#
24596# XREF ****************************************************************	#
24597#	_real_access() - exit through access error handler		#
24598#									#
24599# INPUT ***************************************************************	#
24600#	None								#
24601#									#
24602# OUTPUT **************************************************************	#
24603#	None								#
24604#									#
24605# ALGORITHM ***********************************************************	#
24606#	Flow jumps here when an FP data fetch call gets an error	#
24607# result. This means the operating system wants an access error frame	#
24608# made out of the current exception stack frame.			#
24609#	So, we first call restore() which makes sure that any updated	#
24610# -(an)+ register gets returned to its pre-exception value and then	#
24611# we change the stack to an access error stack frame.			#
24612#									#
24613#########################################################################
24614
24615facc_in_b:
24616	movq.l		&0x1,%d0			# one byte
24617	bsr.w		restore				# fix An
24618
24619	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
24620	bra.w		facc_finish
24621
24622facc_in_w:
24623	movq.l		&0x2,%d0			# two bytes
24624	bsr.w		restore				# fix An
24625
24626	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
24627	bra.b		facc_finish
24628
24629facc_in_l:
24630	movq.l		&0x4,%d0			# four bytes
24631	bsr.w		restore				# fix An
24632
24633	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
24634	bra.b		facc_finish
24635
24636facc_in_d:
24637	movq.l		&0x8,%d0			# eight bytes
24638	bsr.w		restore				# fix An
24639
24640	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
24641	bra.b		facc_finish
24642
24643facc_in_x:
24644	movq.l		&0xc,%d0			# twelve bytes
24645	bsr.w		restore				# fix An
24646
24647	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
24648	bra.b		facc_finish
24649
24650################################################################
24651
24652facc_out_b:
24653	movq.l		&0x1,%d0			# one byte
24654	bsr.w		restore				# restore An
24655
24656	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
24657	bra.b		facc_finish
24658
24659facc_out_w:
24660	movq.l		&0x2,%d0			# two bytes
24661	bsr.w		restore				# restore An
24662
24663	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
24664	bra.b		facc_finish
24665
24666facc_out_l:
24667	movq.l		&0x4,%d0			# four bytes
24668	bsr.w		restore				# restore An
24669
24670	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
24671	bra.b		facc_finish
24672
24673facc_out_d:
24674	movq.l		&0x8,%d0			# eight bytes
24675	bsr.w		restore				# restore An
24676
24677	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
24678	bra.b		facc_finish
24679
24680facc_out_x:
24681	mov.l		&0xc,%d0			# twelve bytes
24682	bsr.w		restore				# restore An
24683
24684	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
24685
24686# here's where we actually create the access error frame from the
24687# current exception stack frame.
24688facc_finish:
24689	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
24690
24691	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
24692	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
24693	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
24694
24695	unlk		%a6
24696
24697	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
24698	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
24699	mov.l		0xc(%sp),0x8(%sp)	# store EA
24700	mov.l		&0x00000001,0xc(%sp)	# store FSLW
24701	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
24702	mov.w		&0x4008,0x6(%sp)	# store voff
24703
24704	btst		&0x5,(%sp)		# supervisor or user mode?
24705	beq.b		facc_out2		# user
24706	bset		&0x2,0xd(%sp)		# set supervisor TM bit
24707
24708facc_out2:
24709	bra.l		_real_access
24710
24711##################################################################
24712
24713# if the effective addressing mode was predecrement or postincrement,
24714# the emulation has already changed its value to the correct post-
24715# instruction value. but since we're exiting to the access error
24716# handler, then AN must be returned to its pre-instruction value.
24717# we do that here.
24718restore:
24719	mov.b		EXC_OPWORD+0x1(%a6),%d1
24720	andi.b		&0x38,%d1		# extract opmode
24721	cmpi.b		%d1,&0x18		# postinc?
24722	beq.w		rest_inc
24723	cmpi.b		%d1,&0x20		# predec?
24724	beq.w		rest_dec
24725	rts
24726
24727rest_inc:
24728	mov.b		EXC_OPWORD+0x1(%a6),%d1
24729	andi.w		&0x0007,%d1		# fetch An
24730
24731	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
24732	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
24733
24734tbl_rest_inc:
24735	short		ri_a0 - tbl_rest_inc
24736	short		ri_a1 - tbl_rest_inc
24737	short		ri_a2 - tbl_rest_inc
24738	short		ri_a3 - tbl_rest_inc
24739	short		ri_a4 - tbl_rest_inc
24740	short		ri_a5 - tbl_rest_inc
24741	short		ri_a6 - tbl_rest_inc
24742	short		ri_a7 - tbl_rest_inc
24743
24744ri_a0:
24745	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
24746	rts
24747ri_a1:
24748	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
24749	rts
24750ri_a2:
24751	sub.l		%d0,%a2			# fix a2
24752	rts
24753ri_a3:
24754	sub.l		%d0,%a3			# fix a3
24755	rts
24756ri_a4:
24757	sub.l		%d0,%a4			# fix a4
24758	rts
24759ri_a5:
24760	sub.l		%d0,%a5			# fix a5
24761	rts
24762ri_a6:
24763	sub.l		%d0,(%a6)		# fix stacked a6
24764	rts
24765# if it's a fmove out instruction, we don't have to fix a7
24766# because we hadn't changed it yet. if it's an opclass two
24767# instruction (data moved in) and the exception was in supervisor
24768# mode, then also also wasn't updated. if it was user mode, then
24769# restore the correct a7 which is in the USP currently.
24770ri_a7:
24771	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
24772	bne.b		ri_a7_done		# out
24773
24774	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
24775	bne.b		ri_a7_done		# supervisor
24776	movc		%usp,%a0		# restore USP
24777	sub.l		%d0,%a0
24778	movc		%a0,%usp
24779ri_a7_done:
24780	rts
24781
24782# need to invert adjustment value if the <ea> was predec
24783rest_dec:
24784	neg.l		%d0
24785	bra.b		rest_inc
24786