xref: /openbmc/linux/arch/m68k/math-emu/fp_util.S (revision efdbd7345f8836f7495f3ac6ee237d86cb3bb6b0)
1/*
2 * fp_util.S
3 *
4 * Copyright Roman Zippel, 1997.  All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, and the entire permission notice in its entirety,
11 *    including the disclaimer of warranties.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote
16 *    products derived from this software without specific prior
17 *    written permission.
18 *
19 * ALTERNATIVELY, this product may be distributed under the terms of
20 * the GNU General Public License, in which case the provisions of the GPL are
21 * required INSTEAD OF the above restrictions.  (This clause is
22 * necessary due to a potential bad interaction between the GPL and
23 * the restrictions contained in a BSD-style copyright.)
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
35 * OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include "fp_emu.h"
39
40/*
41 * Here are lots of conversion and normalization functions mainly
42 * used by fp_scan.S
43 * Note that these functions are optimized for "normal" numbers,
44 * these are handled first and exit as fast as possible, this is
45 * especially important for fp_normalize_ext/fp_conv_ext2ext, as
46 * it's called very often.
47 * The register usage is optimized for fp_scan.S and which register
48 * is currently at that time unused, be careful if you want change
49 * something here. %d0 and %d1 is always usable, sometimes %d2 (or
50 * only the lower half) most function have to return the %a0
51 * unmodified, so that the caller can immediately reuse it.
52 */
53
54	.globl	fp_ill, fp_end
55
56	| exits from fp_scan:
57	| illegal instruction
58fp_ill:
59	printf	,"fp_illegal\n"
60	rts
61	| completed instruction
62fp_end:
63	tst.l	(TASK_MM-8,%a2)
64	jmi	1f
65	tst.l	(TASK_MM-4,%a2)
66	jmi	1f
67	tst.l	(TASK_MM,%a2)
68	jpl	2f
691:	printf	,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
702:	clr.l	%d0
71	rts
72
73	.globl	fp_conv_long2ext, fp_conv_single2ext
74	.globl	fp_conv_double2ext, fp_conv_ext2ext
75	.globl	fp_normalize_ext, fp_normalize_double
76	.globl	fp_normalize_single, fp_normalize_single_fast
77	.globl	fp_conv_ext2double, fp_conv_ext2single
78	.globl	fp_conv_ext2long, fp_conv_ext2short
79	.globl	fp_conv_ext2byte
80	.globl	fp_finalrounding_single, fp_finalrounding_single_fast
81	.globl	fp_finalrounding_double
82	.globl	fp_finalrounding, fp_finaltest, fp_final
83
84/*
85 * First several conversion functions from a source operand
86 * into the extended format. Note, that only fp_conv_ext2ext
87 * normalizes the number and is always called after the other
88 * conversion functions, which only move the information into
89 * fp_ext structure.
90 */
91
92	| fp_conv_long2ext:
93	|
94	| args:	%d0 = source (32-bit long)
95	|	%a0 = destination (ptr to struct fp_ext)
96
97fp_conv_long2ext:
98	printf	PCONV,"l2e: %p -> %p(",2,%d0,%a0
99	clr.l	%d1			| sign defaults to zero
100	tst.l	%d0
101	jeq	fp_l2e_zero		| is source zero?
102	jpl	1f			| positive?
103	moveq	#1,%d1
104	neg.l	%d0
1051:	swap	%d1
106	move.w	#0x3fff+31,%d1
107	move.l	%d1,(%a0)+		| set sign / exp
108	move.l	%d0,(%a0)+		| set mantissa
109	clr.l	(%a0)
110	subq.l	#8,%a0			| restore %a0
111	printx	PCONV,%a0@
112	printf	PCONV,")\n"
113	rts
114	| source is zero
115fp_l2e_zero:
116	clr.l	(%a0)+
117	clr.l	(%a0)+
118	clr.l	(%a0)
119	subq.l	#8,%a0
120	printx	PCONV,%a0@
121	printf	PCONV,")\n"
122	rts
123
124	| fp_conv_single2ext
125	| args:	%d0 = source (single-precision fp value)
126	|	%a0 = dest (struct fp_ext *)
127
128fp_conv_single2ext:
129	printf	PCONV,"s2e: %p -> %p(",2,%d0,%a0
130	move.l	%d0,%d1
131	lsl.l	#8,%d0			| shift mantissa
132	lsr.l	#8,%d1			| exponent / sign
133	lsr.l	#7,%d1
134	lsr.w	#8,%d1
135	jeq	fp_s2e_small		| zero / denormal?
136	cmp.w	#0xff,%d1		| NaN / Inf?
137	jeq	fp_s2e_large
138	bset	#31,%d0			| set explizit bit
139	add.w	#0x3fff-0x7f,%d1	| re-bias the exponent.
1409:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
141	move.l	%d0,(%a0)+		| high lword of fp_ext.mant
142	clr.l	(%a0)			| low lword = 0
143	subq.l	#8,%a0
144	printx	PCONV,%a0@
145	printf	PCONV,")\n"
146	rts
147	| zeros and denormalized
148fp_s2e_small:
149	| exponent is zero, so explizit bit is already zero too
150	tst.l	%d0
151	jeq	9b
152	move.w	#0x4000-0x7f,%d1
153	jra	9b
154	| infinities and NAN
155fp_s2e_large:
156	bclr	#31,%d0			| clear explizit bit
157	move.w	#0x7fff,%d1
158	jra	9b
159
160fp_conv_double2ext:
161#ifdef FPU_EMU_DEBUG
162	getuser.l %a1@(0),%d0,fp_err_ua2,%a1
163	getuser.l %a1@(4),%d1,fp_err_ua2,%a1
164	printf	PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
165#endif
166	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
167	move.l	%d0,%d1
168	lsl.l	#8,%d0			| shift high mantissa
169	lsl.l	#3,%d0
170	lsr.l	#8,%d1			| exponent / sign
171	lsr.l	#7,%d1
172	lsr.w	#5,%d1
173	jeq	fp_d2e_small		| zero / denormal?
174	cmp.w	#0x7ff,%d1		| NaN / Inf?
175	jeq	fp_d2e_large
176	bset	#31,%d0			| set explizit bit
177	add.w	#0x3fff-0x3ff,%d1	| re-bias the exponent.
1789:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
179	move.l	%d0,(%a0)+
180	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
181	move.l	%d0,%d1
182	lsl.l	#8,%d0
183	lsl.l	#3,%d0
184	move.l	%d0,(%a0)
185	moveq	#21,%d0
186	lsr.l	%d0,%d1
187	or.l	%d1,-(%a0)
188	subq.l	#4,%a0
189	printx	PCONV,%a0@
190	printf	PCONV,")\n"
191	rts
192	| zeros and denormalized
193fp_d2e_small:
194	| exponent is zero, so explizit bit is already zero too
195	tst.l	%d0
196	jeq	9b
197	move.w	#0x4000-0x3ff,%d1
198	jra	9b
199	| infinities and NAN
200fp_d2e_large:
201	bclr	#31,%d0			| clear explizit bit
202	move.w	#0x7fff,%d1
203	jra	9b
204
205	| fp_conv_ext2ext:
206	| originally used to get longdouble from userspace, now it's
207	| called before arithmetic operations to make sure the number
208	| is normalized [maybe rename it?].
209	| args:	%a0 = dest (struct fp_ext *)
210	| returns 0 in %d0 for a NaN, otherwise 1
211
212fp_conv_ext2ext:
213	printf	PCONV,"e2e: %p(",1,%a0
214	printx	PCONV,%a0@
215	printf	PCONV,"), "
216	move.l	(%a0)+,%d0
217	cmp.w	#0x7fff,%d0		| Inf / NaN?
218	jeq	fp_e2e_large
219	move.l	(%a0),%d0
220	jpl	fp_e2e_small		| zero / denorm?
221	| The high bit is set, so normalization is irrelevant.
222fp_e2e_checkround:
223	subq.l	#4,%a0
224#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
225	move.b	(%a0),%d0
226	jne	fp_e2e_round
227#endif
228	printf	PCONV,"%p(",1,%a0
229	printx	PCONV,%a0@
230	printf	PCONV,")\n"
231	moveq	#1,%d0
232	rts
233#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
234fp_e2e_round:
235	fp_set_sr FPSR_EXC_INEX2
236	clr.b	(%a0)
237	move.w	(FPD_RND,FPDATA),%d2
238	jne	fp_e2e_roundother	| %d2 == 0, round to nearest
239	tst.b	%d0			| test guard bit
240	jpl	9f			| zero is closer
241	btst	#0,(11,%a0)		| test lsb bit
242	jne	fp_e2e_doroundup	| round to infinity
243	lsl.b	#1,%d0			| check low bits
244	jeq	9f			| round to zero
245fp_e2e_doroundup:
246	addq.l	#1,(8,%a0)
247	jcc	9f
248	addq.l	#1,(4,%a0)
249	jcc	9f
250	move.w	#0x8000,(4,%a0)
251	addq.w	#1,(2,%a0)
2529:	printf	PNORM,"%p(",1,%a0
253	printx	PNORM,%a0@
254	printf	PNORM,")\n"
255	rts
256fp_e2e_roundother:
257	subq.w	#2,%d2
258	jcs	9b			| %d2 < 2, round to zero
259	jhi	1f			| %d2 > 2, round to +infinity
260	tst.b	(1,%a0)			| to -inf
261	jne	fp_e2e_doroundup	| negative, round to infinity
262	jra	9b			| positive, round to zero
2631:	tst.b	(1,%a0)			| to +inf
264	jeq	fp_e2e_doroundup	| positive, round to infinity
265	jra	9b			| negative, round to zero
266#endif
267	| zeros and subnormals:
268	| try to normalize these anyway.
269fp_e2e_small:
270	jne	fp_e2e_small1		| high lword zero?
271	move.l	(4,%a0),%d0
272	jne	fp_e2e_small2
273#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
274	clr.l	%d0
275	move.b	(-4,%a0),%d0
276	jne	fp_e2e_small3
277#endif
278	| Genuine zero.
279	clr.w	-(%a0)
280	subq.l	#2,%a0
281	printf	PNORM,"%p(",1,%a0
282	printx	PNORM,%a0@
283	printf	PNORM,")\n"
284	moveq	#1,%d0
285	rts
286	| definitely subnormal, need to shift all 64 bits
287fp_e2e_small1:
288	bfffo	%d0{#0,#32},%d1
289	move.w	-(%a0),%d2
290	sub.w	%d1,%d2
291	jcc	1f
292	| Pathologically small, denormalize.
293	add.w	%d2,%d1
294	clr.w	%d2
2951:	move.w	%d2,(%a0)+
296	move.w	%d1,%d2
297	jeq	fp_e2e_checkround
298	| fancy 64-bit double-shift begins here
299	lsl.l	%d2,%d0
300	move.l	%d0,(%a0)+
301	move.l	(%a0),%d0
302	move.l	%d0,%d1
303	lsl.l	%d2,%d0
304	move.l	%d0,(%a0)
305	neg.w	%d2
306	and.w	#0x1f,%d2
307	lsr.l	%d2,%d1
308	or.l	%d1,-(%a0)
309#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
310fp_e2e_extra1:
311	clr.l	%d0
312	move.b	(-4,%a0),%d0
313	neg.w	%d2
314	add.w	#24,%d2
315	jcc	1f
316	clr.b	(-4,%a0)
317	lsl.l	%d2,%d0
318	or.l	%d0,(4,%a0)
319	jra	fp_e2e_checkround
3201:	addq.w	#8,%d2
321	lsl.l	%d2,%d0
322	move.b	%d0,(-4,%a0)
323	lsr.l	#8,%d0
324	or.l	%d0,(4,%a0)
325#endif
326	jra	fp_e2e_checkround
327	| pathologically small subnormal
328fp_e2e_small2:
329	bfffo	%d0{#0,#32},%d1
330	add.w	#32,%d1
331	move.w	-(%a0),%d2
332	sub.w	%d1,%d2
333	jcc	1f
334	| Beyond pathologically small, denormalize.
335	add.w	%d2,%d1
336	clr.w	%d2
3371:	move.w	%d2,(%a0)+
338	ext.l	%d1
339	jeq	fp_e2e_checkround
340	clr.l	(4,%a0)
341	sub.w	#32,%d2
342	jcs	1f
343	lsl.l	%d1,%d0			| lower lword needs only to be shifted
344	move.l	%d0,(%a0)		| into the higher lword
345#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
346	clr.l	%d0
347	move.b	(-4,%a0),%d0
348	clr.b	(-4,%a0)
349	neg.w	%d1
350	add.w	#32,%d1
351	bfins	%d0,(%a0){%d1,#8}
352#endif
353	jra	fp_e2e_checkround
3541:	neg.w	%d1			| lower lword is splitted between
355	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
356#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
357	jra	fp_e2e_checkround
358#else
359	move.w	%d1,%d2
360	jra	fp_e2e_extra1
361	| These are extremely small numbers, that will mostly end up as zero
362	| anyway, so this is only important for correct rounding.
363fp_e2e_small3:
364	bfffo	%d0{#24,#8},%d1
365	add.w	#40,%d1
366	move.w	-(%a0),%d2
367	sub.w	%d1,%d2
368	jcc	1f
369	| Pathologically small, denormalize.
370	add.w	%d2,%d1
371	clr.w	%d2
3721:	move.w	%d2,(%a0)+
373	ext.l	%d1
374	jeq	fp_e2e_checkround
375	cmp.w	#8,%d1
376	jcs	2f
3771:	clr.b	(-4,%a0)
378	sub.w	#64,%d1
379	jcs	1f
380	add.w	#24,%d1
381	lsl.l	%d1,%d0
382	move.l	%d0,(%a0)
383	jra	fp_e2e_checkround
3841:	neg.w	%d1
385	bfins	%d0,(%a0){%d1,#8}
386	jra	fp_e2e_checkround
3872:	lsl.l	%d1,%d0
388	move.b	%d0,(-4,%a0)
389	lsr.l	#8,%d0
390	move.b	%d0,(7,%a0)
391	jra	fp_e2e_checkround
392#endif
3931:	move.l	%d0,%d1			| lower lword is splitted between
394	lsl.l	%d2,%d0			| higher and lower lword
395	move.l	%d0,(%a0)
396	move.l	%d1,%d0
397	neg.w	%d2
398	add.w	#32,%d2
399	lsr.l	%d2,%d0
400	move.l	%d0,-(%a0)
401	jra	fp_e2e_checkround
402	| Infinities and NaNs
403fp_e2e_large:
404	move.l	(%a0)+,%d0
405	jne	3f
4061:	tst.l	(%a0)
407	jne	4f
408	moveq	#1,%d0
4092:	subq.l	#8,%a0
410	printf	PCONV,"%p(",1,%a0
411	printx	PCONV,%a0@
412	printf	PCONV,")\n"
413	rts
414	| we have maybe a NaN, shift off the highest bit
4153:	lsl.l	#1,%d0
416	jeq	1b
417	| we have a NaN, clear the return value
4184:	clrl	%d0
419	jra	2b
420
421
422/*
423 * Normalization functions.  Call these on the output of general
424 * FP operators, and before any conversion into the destination
425 * formats. fp_normalize_ext has always to be called first, the
426 * following conversion functions expect an already normalized
427 * number.
428 */
429
430	| fp_normalize_ext:
431	| normalize an extended in extended (unpacked) format, basically
432	| it does the same as fp_conv_ext2ext, additionally it also does
433	| the necessary postprocessing checks.
434	| args:	%a0 (struct fp_ext *)
435	| NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2
436
437fp_normalize_ext:
438	printf	PNORM,"ne: %p(",1,%a0
439	printx	PNORM,%a0@
440	printf	PNORM,"), "
441	move.l	(%a0)+,%d0
442	cmp.w	#0x7fff,%d0		| Inf / NaN?
443	jeq	fp_ne_large
444	move.l	(%a0),%d0
445	jpl	fp_ne_small		| zero / denorm?
446	| The high bit is set, so normalization is irrelevant.
447fp_ne_checkround:
448	subq.l	#4,%a0
449#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
450	move.b	(%a0),%d0
451	jne	fp_ne_round
452#endif
453	printf	PNORM,"%p(",1,%a0
454	printx	PNORM,%a0@
455	printf	PNORM,")\n"
456	rts
457#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
458fp_ne_round:
459	fp_set_sr FPSR_EXC_INEX2
460	clr.b	(%a0)
461	move.w	(FPD_RND,FPDATA),%d2
462	jne	fp_ne_roundother	| %d2 == 0, round to nearest
463	tst.b	%d0			| test guard bit
464	jpl	9f			| zero is closer
465	btst	#0,(11,%a0)		| test lsb bit
466	jne	fp_ne_doroundup		| round to infinity
467	lsl.b	#1,%d0			| check low bits
468	jeq	9f			| round to zero
469fp_ne_doroundup:
470	addq.l	#1,(8,%a0)
471	jcc	9f
472	addq.l	#1,(4,%a0)
473	jcc	9f
474	addq.w	#1,(2,%a0)
475	move.w	#0x8000,(4,%a0)
4769:	printf	PNORM,"%p(",1,%a0
477	printx	PNORM,%a0@
478	printf	PNORM,")\n"
479	rts
480fp_ne_roundother:
481	subq.w	#2,%d2
482	jcs	9b			| %d2 < 2, round to zero
483	jhi	1f			| %d2 > 2, round to +infinity
484	tst.b	(1,%a0)			| to -inf
485	jne	fp_ne_doroundup		| negative, round to infinity
486	jra	9b			| positive, round to zero
4871:	tst.b	(1,%a0)			| to +inf
488	jeq	fp_ne_doroundup		| positive, round to infinity
489	jra	9b			| negative, round to zero
490#endif
491	| Zeros and subnormal numbers
492	| These are probably merely subnormal, rather than "denormalized"
493	|  numbers, so we will try to make them normal again.
494fp_ne_small:
495	jne	fp_ne_small1		| high lword zero?
496	move.l	(4,%a0),%d0
497	jne	fp_ne_small2
498#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
499	clr.l	%d0
500	move.b	(-4,%a0),%d0
501	jne	fp_ne_small3
502#endif
503	| Genuine zero.
504	clr.w	-(%a0)
505	subq.l	#2,%a0
506	printf	PNORM,"%p(",1,%a0
507	printx	PNORM,%a0@
508	printf	PNORM,")\n"
509	rts
510	| Subnormal.
511fp_ne_small1:
512	bfffo	%d0{#0,#32},%d1
513	move.w	-(%a0),%d2
514	sub.w	%d1,%d2
515	jcc	1f
516	| Pathologically small, denormalize.
517	add.w	%d2,%d1
518	clr.w	%d2
519	fp_set_sr FPSR_EXC_UNFL
5201:	move.w	%d2,(%a0)+
521	move.w	%d1,%d2
522	jeq	fp_ne_checkround
523	| This is exactly the same 64-bit double shift as seen above.
524	lsl.l	%d2,%d0
525	move.l	%d0,(%a0)+
526	move.l	(%a0),%d0
527	move.l	%d0,%d1
528	lsl.l	%d2,%d0
529	move.l	%d0,(%a0)
530	neg.w	%d2
531	and.w	#0x1f,%d2
532	lsr.l	%d2,%d1
533	or.l	%d1,-(%a0)
534#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
535fp_ne_extra1:
536	clr.l	%d0
537	move.b	(-4,%a0),%d0
538	neg.w	%d2
539	add.w	#24,%d2
540	jcc	1f
541	clr.b	(-4,%a0)
542	lsl.l	%d2,%d0
543	or.l	%d0,(4,%a0)
544	jra	fp_ne_checkround
5451:	addq.w	#8,%d2
546	lsl.l	%d2,%d0
547	move.b	%d0,(-4,%a0)
548	lsr.l	#8,%d0
549	or.l	%d0,(4,%a0)
550#endif
551	jra	fp_ne_checkround
552	| May or may not be subnormal, if so, only 32 bits to shift.
553fp_ne_small2:
554	bfffo	%d0{#0,#32},%d1
555	add.w	#32,%d1
556	move.w	-(%a0),%d2
557	sub.w	%d1,%d2
558	jcc	1f
559	| Beyond pathologically small, denormalize.
560	add.w	%d2,%d1
561	clr.w	%d2
562	fp_set_sr FPSR_EXC_UNFL
5631:	move.w	%d2,(%a0)+
564	ext.l	%d1
565	jeq	fp_ne_checkround
566	clr.l	(4,%a0)
567	sub.w	#32,%d1
568	jcs	1f
569	lsl.l	%d1,%d0			| lower lword needs only to be shifted
570	move.l	%d0,(%a0)		| into the higher lword
571#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
572	clr.l	%d0
573	move.b	(-4,%a0),%d0
574	clr.b	(-4,%a0)
575	neg.w	%d1
576	add.w	#32,%d1
577	bfins	%d0,(%a0){%d1,#8}
578#endif
579	jra	fp_ne_checkround
5801:	neg.w	%d1			| lower lword is splitted between
581	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
582#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
583	jra	fp_ne_checkround
584#else
585	move.w	%d1,%d2
586	jra	fp_ne_extra1
587	| These are extremely small numbers, that will mostly end up as zero
588	| anyway, so this is only important for correct rounding.
589fp_ne_small3:
590	bfffo	%d0{#24,#8},%d1
591	add.w	#40,%d1
592	move.w	-(%a0),%d2
593	sub.w	%d1,%d2
594	jcc	1f
595	| Pathologically small, denormalize.
596	add.w	%d2,%d1
597	clr.w	%d2
5981:	move.w	%d2,(%a0)+
599	ext.l	%d1
600	jeq	fp_ne_checkround
601	cmp.w	#8,%d1
602	jcs	2f
6031:	clr.b	(-4,%a0)
604	sub.w	#64,%d1
605	jcs	1f
606	add.w	#24,%d1
607	lsl.l	%d1,%d0
608	move.l	%d0,(%a0)
609	jra	fp_ne_checkround
6101:	neg.w	%d1
611	bfins	%d0,(%a0){%d1,#8}
612	jra	fp_ne_checkround
6132:	lsl.l	%d1,%d0
614	move.b	%d0,(-4,%a0)
615	lsr.l	#8,%d0
616	move.b	%d0,(7,%a0)
617	jra	fp_ne_checkround
618#endif
619	| Infinities and NaNs, again, same as above.
620fp_ne_large:
621	move.l	(%a0)+,%d0
622	jne	3f
6231:	tst.l	(%a0)
624	jne	4f
6252:	subq.l	#8,%a0
626	printf	PNORM,"%p(",1,%a0
627	printx	PNORM,%a0@
628	printf	PNORM,")\n"
629	rts
630	| we have maybe a NaN, shift off the highest bit
6313:	move.l	%d0,%d1
632	lsl.l	#1,%d1
633	jne	4f
634	clr.l	(-4,%a0)
635	jra	1b
636	| we have a NaN, test if it is signaling
6374:	bset	#30,%d0
638	jne	2b
639	fp_set_sr FPSR_EXC_SNAN
640	move.l	%d0,(-4,%a0)
641	jra	2b
642
643	| these next two do rounding as per the IEEE standard.
644	| values for the rounding modes appear to be:
645	| 0:	Round to nearest
646	| 1:	Round to zero
647	| 2:	Round to -Infinity
648	| 3:	Round to +Infinity
649	| both functions expect that fp_normalize was already
650	| called (and extended argument is already normalized
651	| as far as possible), these are used if there is different
652	| rounding precision is selected and before converting
653	| into single/double
654
655	| fp_normalize_double:
656	| normalize an extended with double (52-bit) precision
657	| args:	 %a0 (struct fp_ext *)
658
659fp_normalize_double:
660	printf	PNORM,"nd: %p(",1,%a0
661	printx	PNORM,%a0@
662	printf	PNORM,"), "
663	move.l	(%a0)+,%d2
664	tst.w	%d2
665	jeq	fp_nd_zero		| zero / denormalized
666	cmp.w	#0x7fff,%d2
667	jeq	fp_nd_huge		| NaN / infinitive.
668	sub.w	#0x4000-0x3ff,%d2	| will the exponent fit?
669	jcs	fp_nd_small		| too small.
670	cmp.w	#0x7fe,%d2
671	jcc	fp_nd_large		| too big.
672	addq.l	#4,%a0
673	move.l	(%a0),%d0		| low lword of mantissa
674	| now, round off the low 11 bits.
675fp_nd_round:
676	moveq	#21,%d1
677	lsl.l	%d1,%d0			| keep 11 low bits.
678	jne	fp_nd_checkround	| Are they non-zero?
679	| nothing to do here
6809:	subq.l	#8,%a0
681	printf	PNORM,"%p(",1,%a0
682	printx	PNORM,%a0@
683	printf	PNORM,")\n"
684	rts
685	| Be careful with the X bit! It contains the lsb
686	| from the shift above, it is needed for round to nearest.
687fp_nd_checkround:
688	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
689	and.w	#0xf800,(2,%a0)		| clear bits 0-10
690	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
691	jne	2f			| %d2 == 0, round to nearest
692	tst.l	%d0			| test guard bit
693	jpl	9b			| zero is closer
694	| here we test the X bit by adding it to %d2
695	clr.w	%d2			| first set z bit, addx only clears it
696	addx.w	%d2,%d2			| test lsb bit
697	| IEEE754-specified "round to even" behaviour.  If the guard
698	| bit is set, then the number is odd, so rounding works like
699	| in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
700	| Otherwise, an equal distance rounds towards zero, so as not
701	| to produce an odd number.  This is strange, but it is what
702	| the standard says.
703	jne	fp_nd_doroundup		| round to infinity
704	lsl.l	#1,%d0			| check low bits
705	jeq	9b			| round to zero
706fp_nd_doroundup:
707	| round (the mantissa, that is) towards infinity
708	add.l	#0x800,(%a0)
709	jcc	9b			| no overflow, good.
710	addq.l	#1,-(%a0)		| extend to high lword
711	jcc	1f			| no overflow, good.
712	| Yow! we have managed to overflow the mantissa.  Since this
713	| only happens when %d1 was 0xfffff800, it is now zero, so
714	| reset the high bit, and increment the exponent.
715	move.w	#0x8000,(%a0)
716	addq.w	#1,-(%a0)
717	cmp.w	#0x43ff,(%a0)+		| exponent now overflown?
718	jeq	fp_nd_large		| yes, so make it infinity.
7191:	subq.l	#4,%a0
720	printf	PNORM,"%p(",1,%a0
721	printx	PNORM,%a0@
722	printf	PNORM,")\n"
723	rts
7242:	subq.w	#2,%d2
725	jcs	9b			| %d2 < 2, round to zero
726	jhi	3f			| %d2 > 2, round to +infinity
727	| Round to +Inf or -Inf.  High word of %d2 contains the
728	| sign of the number, by the way.
729	swap	%d2			| to -inf
730	tst.b	%d2
731	jne	fp_nd_doroundup		| negative, round to infinity
732	jra	9b			| positive, round to zero
7333:	swap	%d2			| to +inf
734	tst.b	%d2
735	jeq	fp_nd_doroundup		| positive, round to infinity
736	jra	9b			| negative, round to zero
737	| Exponent underflow.  Try to make a denormal, and set it to
738	| the smallest possible fraction if this fails.
739fp_nd_small:
740	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
741	move.w	#0x3c01,(-2,%a0)	| 2**-1022
742	neg.w	%d2			| degree of underflow
743	cmp.w	#32,%d2			| single or double shift?
744	jcc	1f
745	| Again, another 64-bit double shift.
746	move.l	(%a0),%d0
747	move.l	%d0,%d1
748	lsr.l	%d2,%d0
749	move.l	%d0,(%a0)+
750	move.l	(%a0),%d0
751	lsr.l	%d2,%d0
752	neg.w	%d2
753	add.w	#32,%d2
754	lsl.l	%d2,%d1
755	or.l	%d1,%d0
756	move.l	(%a0),%d1
757	move.l	%d0,(%a0)
758	| Check to see if we shifted off any significant bits
759	lsl.l	%d2,%d1
760	jeq	fp_nd_round		| Nope, round.
761	bset	#0,%d0			| Yes, so set the "sticky bit".
762	jra	fp_nd_round		| Now, round.
763	| Another 64-bit single shift and store
7641:	sub.w	#32,%d2
765	cmp.w	#32,%d2			| Do we really need to shift?
766	jcc	2f			| No, the number is too small.
767	move.l	(%a0),%d0
768	clr.l	(%a0)+
769	move.l	%d0,%d1
770	lsr.l	%d2,%d0
771	neg.w	%d2
772	add.w	#32,%d2
773	| Again, check to see if we shifted off any significant bits.
774	tst.l	(%a0)
775	jeq	1f
776	bset	#0,%d0			| Sticky bit.
7771:	move.l	%d0,(%a0)
778	lsl.l	%d2,%d1
779	jeq	fp_nd_round
780	bset	#0,%d0
781	jra	fp_nd_round
782	| Sorry, the number is just too small.
7832:	clr.l	(%a0)+
784	clr.l	(%a0)
785	moveq	#1,%d0			| Smallest possible fraction,
786	jra	fp_nd_round		| round as desired.
787	| zero and denormalized
788fp_nd_zero:
789	tst.l	(%a0)+
790	jne	1f
791	tst.l	(%a0)
792	jne	1f
793	subq.l	#8,%a0
794	printf	PNORM,"%p(",1,%a0
795	printx	PNORM,%a0@
796	printf	PNORM,")\n"
797	rts				| zero.  nothing to do.
798	| These are not merely subnormal numbers, but true denormals,
799	| i.e. pathologically small (exponent is 2**-16383) numbers.
800	| It is clearly impossible for even a normal extended number
801	| with that exponent to fit into double precision, so just
802	| write these ones off as "too darn small".
8031:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
804	clr.l	(%a0)
805	clr.l	-(%a0)
806	move.w	#0x3c01,-(%a0)		| i.e. 2**-1022
807	addq.l	#6,%a0
808	moveq	#1,%d0
809	jra	fp_nd_round		| round.
810	| Exponent overflow.  Just call it infinity.
811fp_nd_large:
812	move.w	#0x7ff,%d0
813	and.w	(6,%a0),%d0
814	jeq	1f
815	fp_set_sr FPSR_EXC_INEX2
8161:	fp_set_sr FPSR_EXC_OVFL
817	move.w	(FPD_RND,FPDATA),%d2
818	jne	3f			| %d2 = 0 round to nearest
8191:	move.w	#0x7fff,(-2,%a0)
820	clr.l	(%a0)+
821	clr.l	(%a0)
8222:	subq.l	#8,%a0
823	printf	PNORM,"%p(",1,%a0
824	printx	PNORM,%a0@
825	printf	PNORM,")\n"
826	rts
8273:	subq.w	#2,%d2
828	jcs	5f			| %d2 < 2, round to zero
829	jhi	4f			| %d2 > 2, round to +infinity
830	tst.b	(-3,%a0)		| to -inf
831	jne	1b
832	jra	5f
8334:	tst.b	(-3,%a0)		| to +inf
834	jeq	1b
8355:	move.w	#0x43fe,(-2,%a0)
836	moveq	#-1,%d0
837	move.l	%d0,(%a0)+
838	move.w	#0xf800,%d0
839	move.l	%d0,(%a0)
840	jra	2b
841	| Infinities or NaNs
842fp_nd_huge:
843	subq.l	#4,%a0
844	printf	PNORM,"%p(",1,%a0
845	printx	PNORM,%a0@
846	printf	PNORM,")\n"
847	rts
848
849	| fp_normalize_single:
850	| normalize an extended with single (23-bit) precision
851	| args:	 %a0 (struct fp_ext *)
852
853fp_normalize_single:
854	printf	PNORM,"ns: %p(",1,%a0
855	printx	PNORM,%a0@
856	printf	PNORM,") "
857	addq.l	#2,%a0
858	move.w	(%a0)+,%d2
859	jeq	fp_ns_zero		| zero / denormalized
860	cmp.w	#0x7fff,%d2
861	jeq	fp_ns_huge		| NaN / infinitive.
862	sub.w	#0x4000-0x7f,%d2	| will the exponent fit?
863	jcs	fp_ns_small		| too small.
864	cmp.w	#0xfe,%d2
865	jcc	fp_ns_large		| too big.
866	move.l	(%a0)+,%d0		| get high lword of mantissa
867fp_ns_round:
868	tst.l	(%a0)			| check the low lword
869	jeq	1f
870	| Set a sticky bit if it is non-zero.  This should only
871	| affect the rounding in what would otherwise be equal-
872	| distance situations, which is what we want it to do.
873	bset	#0,%d0
8741:	clr.l	(%a0)			| zap it from memory.
875	| now, round off the low 8 bits of the hi lword.
876	tst.b	%d0			| 8 low bits.
877	jne	fp_ns_checkround	| Are they non-zero?
878	| nothing to do here
879	subq.l	#8,%a0
880	printf	PNORM,"%p(",1,%a0
881	printx	PNORM,%a0@
882	printf	PNORM,")\n"
883	rts
884fp_ns_checkround:
885	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
886	clr.b	-(%a0)			| clear low byte of high lword
887	subq.l	#3,%a0
888	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
889	jne	2f			| %d2 == 0, round to nearest
890	tst.b	%d0			| test guard bit
891	jpl	9f			| zero is closer
892	btst	#8,%d0			| test lsb bit
893	| round to even behaviour, see above.
894	jne	fp_ns_doroundup		| round to infinity
895	lsl.b	#1,%d0			| check low bits
896	jeq	9f			| round to zero
897fp_ns_doroundup:
898	| round (the mantissa, that is) towards infinity
899	add.l	#0x100,(%a0)
900	jcc	9f			| no overflow, good.
901	| Overflow.  This means that the %d1 was 0xffffff00, so it
902	| is now zero.  We will set the mantissa to reflect this, and
903	| increment the exponent (checking for overflow there too)
904	move.w	#0x8000,(%a0)
905	addq.w	#1,-(%a0)
906	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
907	jeq	fp_ns_large		| yes, so make it infinity.
9089:	subq.l	#4,%a0
909	printf	PNORM,"%p(",1,%a0
910	printx	PNORM,%a0@
911	printf	PNORM,")\n"
912	rts
913	| check nondefault rounding modes
9142:	subq.w	#2,%d2
915	jcs	9b			| %d2 < 2, round to zero
916	jhi	3f			| %d2 > 2, round to +infinity
917	tst.b	(-3,%a0)		| to -inf
918	jne	fp_ns_doroundup		| negative, round to infinity
919	jra	9b			| positive, round to zero
9203:	tst.b	(-3,%a0)		| to +inf
921	jeq	fp_ns_doroundup		| positive, round to infinity
922	jra	9b			| negative, round to zero
923	| Exponent underflow.  Try to make a denormal, and set it to
924	| the smallest possible fraction if this fails.
925fp_ns_small:
926	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
927	move.w	#0x3f81,(-2,%a0)	| 2**-126
928	neg.w	%d2			| degree of underflow
929	cmp.w	#32,%d2			| single or double shift?
930	jcc	2f
931	| a 32-bit shift.
932	move.l	(%a0),%d0
933	move.l	%d0,%d1
934	lsr.l	%d2,%d0
935	move.l	%d0,(%a0)+
936	| Check to see if we shifted off any significant bits.
937	neg.w	%d2
938	add.w	#32,%d2
939	lsl.l	%d2,%d1
940	jeq	1f
941	bset	#0,%d0			| Sticky bit.
942	| Check the lower lword
9431:	tst.l	(%a0)
944	jeq	fp_ns_round
945	clr	(%a0)
946	bset	#0,%d0			| Sticky bit.
947	jra	fp_ns_round
948	| Sorry, the number is just too small.
9492:	clr.l	(%a0)+
950	clr.l	(%a0)
951	moveq	#1,%d0			| Smallest possible fraction,
952	jra	fp_ns_round		| round as desired.
953	| Exponent overflow.  Just call it infinity.
954fp_ns_large:
955	tst.b	(3,%a0)
956	jeq	1f
957	fp_set_sr FPSR_EXC_INEX2
9581:	fp_set_sr FPSR_EXC_OVFL
959	move.w	(FPD_RND,FPDATA),%d2
960	jne	3f			| %d2 = 0 round to nearest
9611:	move.w	#0x7fff,(-2,%a0)
962	clr.l	(%a0)+
963	clr.l	(%a0)
9642:	subq.l	#8,%a0
965	printf	PNORM,"%p(",1,%a0
966	printx	PNORM,%a0@
967	printf	PNORM,")\n"
968	rts
9693:	subq.w	#2,%d2
970	jcs	5f			| %d2 < 2, round to zero
971	jhi	4f			| %d2 > 2, round to +infinity
972	tst.b	(-3,%a0)		| to -inf
973	jne	1b
974	jra	5f
9754:	tst.b	(-3,%a0)		| to +inf
976	jeq	1b
9775:	move.w	#0x407e,(-2,%a0)
978	move.l	#0xffffff00,(%a0)+
979	clr.l	(%a0)
980	jra	2b
981	| zero and denormalized
982fp_ns_zero:
983	tst.l	(%a0)+
984	jne	1f
985	tst.l	(%a0)
986	jne	1f
987	subq.l	#8,%a0
988	printf	PNORM,"%p(",1,%a0
989	printx	PNORM,%a0@
990	printf	PNORM,")\n"
991	rts				| zero.  nothing to do.
992	| These are not merely subnormal numbers, but true denormals,
993	| i.e. pathologically small (exponent is 2**-16383) numbers.
994	| It is clearly impossible for even a normal extended number
995	| with that exponent to fit into single precision, so just
996	| write these ones off as "too darn small".
9971:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
998	clr.l	(%a0)
999	clr.l	-(%a0)
1000	move.w	#0x3f81,-(%a0)		| i.e. 2**-126
1001	addq.l	#6,%a0
1002	moveq	#1,%d0
1003	jra	fp_ns_round		| round.
1004	| Infinities or NaNs
1005fp_ns_huge:
1006	subq.l	#4,%a0
1007	printf	PNORM,"%p(",1,%a0
1008	printx	PNORM,%a0@
1009	printf	PNORM,")\n"
1010	rts
1011
1012	| fp_normalize_single_fast:
1013	| normalize an extended with single (23-bit) precision
1014	| this is only used by fsgldiv/fsgdlmul, where the
1015	| operand is not completly normalized.
1016	| args:	 %a0 (struct fp_ext *)
1017
1018fp_normalize_single_fast:
1019	printf	PNORM,"nsf: %p(",1,%a0
1020	printx	PNORM,%a0@
1021	printf	PNORM,") "
1022	addq.l	#2,%a0
1023	move.w	(%a0)+,%d2
1024	cmp.w	#0x7fff,%d2
1025	jeq	fp_nsf_huge		| NaN / infinitive.
1026	move.l	(%a0)+,%d0		| get high lword of mantissa
1027fp_nsf_round:
1028	tst.l	(%a0)			| check the low lword
1029	jeq	1f
1030	| Set a sticky bit if it is non-zero.  This should only
1031	| affect the rounding in what would otherwise be equal-
1032	| distance situations, which is what we want it to do.
1033	bset	#0,%d0
10341:	clr.l	(%a0)			| zap it from memory.
1035	| now, round off the low 8 bits of the hi lword.
1036	tst.b	%d0			| 8 low bits.
1037	jne	fp_nsf_checkround	| Are they non-zero?
1038	| nothing to do here
1039	subq.l	#8,%a0
1040	printf	PNORM,"%p(",1,%a0
1041	printx	PNORM,%a0@
1042	printf	PNORM,")\n"
1043	rts
1044fp_nsf_checkround:
1045	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
1046	clr.b	-(%a0)			| clear low byte of high lword
1047	subq.l	#3,%a0
1048	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1049	jne	2f			| %d2 == 0, round to nearest
1050	tst.b	%d0			| test guard bit
1051	jpl	9f			| zero is closer
1052	btst	#8,%d0			| test lsb bit
1053	| round to even behaviour, see above.
1054	jne	fp_nsf_doroundup		| round to infinity
1055	lsl.b	#1,%d0			| check low bits
1056	jeq	9f			| round to zero
1057fp_nsf_doroundup:
1058	| round (the mantissa, that is) towards infinity
1059	add.l	#0x100,(%a0)
1060	jcc	9f			| no overflow, good.
1061	| Overflow.  This means that the %d1 was 0xffffff00, so it
1062	| is now zero.  We will set the mantissa to reflect this, and
1063	| increment the exponent (checking for overflow there too)
1064	move.w	#0x8000,(%a0)
1065	addq.w	#1,-(%a0)
1066	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
1067	jeq	fp_nsf_large		| yes, so make it infinity.
10689:	subq.l	#4,%a0
1069	printf	PNORM,"%p(",1,%a0
1070	printx	PNORM,%a0@
1071	printf	PNORM,")\n"
1072	rts
1073	| check nondefault rounding modes
10742:	subq.w	#2,%d2
1075	jcs	9b			| %d2 < 2, round to zero
1076	jhi	3f			| %d2 > 2, round to +infinity
1077	tst.b	(-3,%a0)		| to -inf
1078	jne	fp_nsf_doroundup	| negative, round to infinity
1079	jra	9b			| positive, round to zero
10803:	tst.b	(-3,%a0)		| to +inf
1081	jeq	fp_nsf_doroundup		| positive, round to infinity
1082	jra	9b			| negative, round to zero
1083	| Exponent overflow.  Just call it infinity.
1084fp_nsf_large:
1085	tst.b	(3,%a0)
1086	jeq	1f
1087	fp_set_sr FPSR_EXC_INEX2
10881:	fp_set_sr FPSR_EXC_OVFL
1089	move.w	(FPD_RND,FPDATA),%d2
1090	jne	3f			| %d2 = 0 round to nearest
10911:	move.w	#0x7fff,(-2,%a0)
1092	clr.l	(%a0)+
1093	clr.l	(%a0)
10942:	subq.l	#8,%a0
1095	printf	PNORM,"%p(",1,%a0
1096	printx	PNORM,%a0@
1097	printf	PNORM,")\n"
1098	rts
10993:	subq.w	#2,%d2
1100	jcs	5f			| %d2 < 2, round to zero
1101	jhi	4f			| %d2 > 2, round to +infinity
1102	tst.b	(-3,%a0)		| to -inf
1103	jne	1b
1104	jra	5f
11054:	tst.b	(-3,%a0)		| to +inf
1106	jeq	1b
11075:	move.w	#0x407e,(-2,%a0)
1108	move.l	#0xffffff00,(%a0)+
1109	clr.l	(%a0)
1110	jra	2b
1111	| Infinities or NaNs
1112fp_nsf_huge:
1113	subq.l	#4,%a0
1114	printf	PNORM,"%p(",1,%a0
1115	printx	PNORM,%a0@
1116	printf	PNORM,")\n"
1117	rts
1118
1119	| conv_ext2int (macro):
1120	| Generates a subroutine that converts an extended value to an
1121	| integer of a given size, again, with the appropriate type of
1122	| rounding.
1123
1124	| Macro arguments:
1125	| s:	size, as given in an assembly instruction.
1126	| b:	number of bits in that size.
1127
1128	| Subroutine arguments:
1129	| %a0:	source (struct fp_ext *)
1130
1131	| Returns the integer in %d0 (like it should)
1132
1133.macro conv_ext2int s,b
1134	.set	inf,(1<<(\b-1))-1	| i.e. MAXINT
1135	printf	PCONV,"e2i%d: %p(",2,#\b,%a0
1136	printx	PCONV,%a0@
1137	printf	PCONV,") "
1138	addq.l	#2,%a0
1139	move.w	(%a0)+,%d2		| exponent
1140	jeq	fp_e2i_zero\b		| zero / denorm (== 0, here)
1141	cmp.w	#0x7fff,%d2
1142	jeq	fp_e2i_huge\b		| Inf / NaN
1143	sub.w	#0x3ffe,%d2
1144	jcs	fp_e2i_small\b
1145	cmp.w	#\b,%d2
1146	jhi	fp_e2i_large\b
1147	move.l	(%a0),%d0
1148	move.l	%d0,%d1
1149	lsl.l	%d2,%d1
1150	jne	fp_e2i_round\b
1151	tst.l	(4,%a0)
1152	jne	fp_e2i_round\b
1153	neg.w	%d2
1154	add.w	#32,%d2
1155	lsr.l	%d2,%d0
11569:	tst.w	(-4,%a0)
1157	jne	1f
1158	tst.\s	%d0
1159	jmi	fp_e2i_large\b
1160	printf	PCONV,"-> %p\n",1,%d0
1161	rts
11621:	neg.\s	%d0
1163	jeq	1f
1164	jpl	fp_e2i_large\b
11651:	printf	PCONV,"-> %p\n",1,%d0
1166	rts
1167fp_e2i_round\b:
1168	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
1169	neg.w	%d2
1170	add.w	#32,%d2
1171	.if	\b>16
1172	jeq	5f
1173	.endif
1174	lsr.l	%d2,%d0
1175	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1176	jne	2f			| %d2 == 0, round to nearest
1177	tst.l	%d1			| test guard bit
1178	jpl	9b			| zero is closer
1179	btst	%d2,%d0			| test lsb bit (%d2 still 0)
1180	jne	fp_e2i_doroundup\b
1181	lsl.l	#1,%d1			| check low bits
1182	jne	fp_e2i_doroundup\b
1183	tst.l	(4,%a0)
1184	jeq	9b
1185fp_e2i_doroundup\b:
1186	addq.l	#1,%d0
1187	jra	9b
1188	| check nondefault rounding modes
11892:	subq.w	#2,%d2
1190	jcs	9b			| %d2 < 2, round to zero
1191	jhi	3f			| %d2 > 2, round to +infinity
1192	tst.w	(-4,%a0)		| to -inf
1193	jne	fp_e2i_doroundup\b	| negative, round to infinity
1194	jra	9b			| positive, round to zero
11953:	tst.w	(-4,%a0)		| to +inf
1196	jeq	fp_e2i_doroundup\b	| positive, round to infinity
1197	jra	9b	| negative, round to zero
1198	| we are only want -2**127 get correctly rounded here,
1199	| since the guard bit is in the lower lword.
1200	| everything else ends up anyway as overflow.
1201	.if	\b>16
12025:	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1203	jne	2b			| %d2 == 0, round to nearest
1204	move.l	(4,%a0),%d1		| test guard bit
1205	jpl	9b			| zero is closer
1206	lsl.l	#1,%d1			| check low bits
1207	jne	fp_e2i_doroundup\b
1208	jra	9b
1209	.endif
1210fp_e2i_zero\b:
1211	clr.l	%d0
1212	tst.l	(%a0)+
1213	jne	1f
1214	tst.l	(%a0)
1215	jeq	3f
12161:	subq.l	#4,%a0
1217	fp_clr_sr FPSR_EXC_UNFL		| fp_normalize_ext has set this bit
1218fp_e2i_small\b:
1219	fp_set_sr FPSR_EXC_INEX2
1220	clr.l	%d0
1221	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1222	subq.w	#2,%d2
1223	jcs	3f			| %d2 < 2, round to nearest/zero
1224	jhi	2f			| %d2 > 2, round to +infinity
1225	tst.w	(-4,%a0)		| to -inf
1226	jeq	3f
1227	subq.\s	#1,%d0
1228	jra	3f
12292:	tst.w	(-4,%a0)		| to +inf
1230	jne	3f
1231	addq.\s	#1,%d0
12323:	printf	PCONV,"-> %p\n",1,%d0
1233	rts
1234fp_e2i_large\b:
1235	fp_set_sr FPSR_EXC_OPERR
1236	move.\s	#inf,%d0
1237	tst.w	(-4,%a0)
1238	jeq	1f
1239	addq.\s	#1,%d0
12401:	printf	PCONV,"-> %p\n",1,%d0
1241	rts
1242fp_e2i_huge\b:
1243	move.\s	(%a0),%d0
1244	tst.l	(%a0)
1245	jne	1f
1246	tst.l	(%a0)
1247	jeq	fp_e2i_large\b
1248	| fp_normalize_ext has set this bit already
1249	| and made the number nonsignaling
12501:	fp_tst_sr FPSR_EXC_SNAN
1251	jne	1f
1252	fp_set_sr FPSR_EXC_OPERR
12531:	printf	PCONV,"-> %p\n",1,%d0
1254	rts
1255.endm
1256
1257fp_conv_ext2long:
1258	conv_ext2int l,32
1259
1260fp_conv_ext2short:
1261	conv_ext2int w,16
1262
1263fp_conv_ext2byte:
1264	conv_ext2int b,8
1265
1266fp_conv_ext2double:
1267	jsr	fp_normalize_double
1268	printf	PCONV,"e2d: %p(",1,%a0
1269	printx	PCONV,%a0@
1270	printf	PCONV,"), "
1271	move.l	(%a0)+,%d2
1272	cmp.w	#0x7fff,%d2
1273	jne	1f
1274	move.w	#0x7ff,%d2
1275	move.l	(%a0)+,%d0
1276	jra	2f
12771:	sub.w	#0x3fff-0x3ff,%d2
1278	move.l	(%a0)+,%d0
1279	jmi	2f
1280	clr.w	%d2
12812:	lsl.w	#5,%d2
1282	lsl.l	#7,%d2
1283	lsl.l	#8,%d2
1284	move.l	%d0,%d1
1285	lsl.l	#1,%d0
1286	lsr.l	#4,%d0
1287	lsr.l	#8,%d0
1288	or.l	%d2,%d0
1289	putuser.l %d0,(%a1)+,fp_err_ua2,%a1
1290	moveq	#21,%d0
1291	lsl.l	%d0,%d1
1292	move.l	(%a0),%d0
1293	lsr.l	#4,%d0
1294	lsr.l	#7,%d0
1295	or.l	%d1,%d0
1296	putuser.l %d0,(%a1),fp_err_ua2,%a1
1297#ifdef FPU_EMU_DEBUG
1298	getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
1299	getuser.l %a1@(0),%d1,fp_err_ua2,%a1
1300	printf	PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
1301#endif
1302	rts
1303
1304fp_conv_ext2single:
1305	jsr	fp_normalize_single
1306	printf	PCONV,"e2s: %p(",1,%a0
1307	printx	PCONV,%a0@
1308	printf	PCONV,"), "
1309	move.l	(%a0)+,%d1
1310	cmp.w	#0x7fff,%d1
1311	jne	1f
1312	move.w	#0xff,%d1
1313	move.l	(%a0)+,%d0
1314	jra	2f
13151:	sub.w	#0x3fff-0x7f,%d1
1316	move.l	(%a0)+,%d0
1317	jmi	2f
1318	clr.w	%d1
13192:	lsl.w	#8,%d1
1320	lsl.l	#7,%d1
1321	lsl.l	#8,%d1
1322	bclr	#31,%d0
1323	lsr.l	#8,%d0
1324	or.l	%d1,%d0
1325	printf	PCONV,"%08x\n",1,%d0
1326	rts
1327
1328	| special return addresses for instr that
1329	| encode the rounding precision in the opcode
1330	| (e.g. fsmove,fdmove)
1331
1332fp_finalrounding_single:
1333	addq.l	#8,%sp
1334	jsr	fp_normalize_ext
1335	jsr	fp_normalize_single
1336	jra	fp_finaltest
1337
1338fp_finalrounding_single_fast:
1339	addq.l	#8,%sp
1340	jsr	fp_normalize_ext
1341	jsr	fp_normalize_single_fast
1342	jra	fp_finaltest
1343
1344fp_finalrounding_double:
1345	addq.l	#8,%sp
1346	jsr	fp_normalize_ext
1347	jsr	fp_normalize_double
1348	jra	fp_finaltest
1349
1350	| fp_finaltest:
1351	| set the emulated status register based on the outcome of an
1352	| emulated instruction.
1353
1354fp_finalrounding:
1355	addq.l	#8,%sp
1356|	printf	,"f: %p\n",1,%a0
1357	jsr	fp_normalize_ext
1358	move.w	(FPD_PREC,FPDATA),%d0
1359	subq.w	#1,%d0
1360	jcs	fp_finaltest
1361	jne	1f
1362	jsr	fp_normalize_single
1363	jra	2f
13641:	jsr	fp_normalize_double
13652:|	printf	,"f: %p\n",1,%a0
1366fp_finaltest:
1367	| First, we do some of the obvious tests for the exception
1368	| status byte and condition code bytes of fp_sr here, so that
1369	| they do not have to be handled individually by every
1370	| emulated instruction.
1371	clr.l	%d0
1372	addq.l	#1,%a0
1373	tst.b	(%a0)+			| sign
1374	jeq	1f
1375	bset	#FPSR_CC_NEG-24,%d0	| N bit
13761:	cmp.w	#0x7fff,(%a0)+		| exponent
1377	jeq	2f
1378	| test for zero
1379	moveq	#FPSR_CC_Z-24,%d1
1380	tst.l	(%a0)+
1381	jne	9f
1382	tst.l	(%a0)
1383	jne	9f
1384	jra	8f
1385	| infinitiv and NAN
13862:	moveq	#FPSR_CC_NAN-24,%d1
1387	move.l	(%a0)+,%d2
1388	lsl.l	#1,%d2			| ignore high bit
1389	jne	8f
1390	tst.l	(%a0)
1391	jne	8f
1392	moveq	#FPSR_CC_INF-24,%d1
13938:	bset	%d1,%d0
13949:	move.b	%d0,(FPD_FPSR+0,FPDATA)	| set condition test result
1395	| move instructions enter here
1396	| Here, we test things in the exception status byte, and set
1397	| other things in the accrued exception byte accordingly.
1398	| Emulated instructions can set various things in the former,
1399	| as defined in fp_emu.h.
1400fp_final:
1401	move.l	(FPD_FPSR,FPDATA),%d0
1402#if 0
1403	btst	#FPSR_EXC_SNAN,%d0	| EXC_SNAN
1404	jne	1f
1405	btst	#FPSR_EXC_OPERR,%d0	| EXC_OPERR
1406	jeq	2f
14071:	bset	#FPSR_AEXC_IOP,%d0	| set IOP bit
14082:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
1409	jeq	1f
1410	bset	#FPSR_AEXC_OVFL,%d0	| set OVFL bit
14111:	btst	#FPSR_EXC_UNFL,%d0	| EXC_UNFL
1412	jeq	1f
1413	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
1414	jeq	1f
1415	bset	#FPSR_AEXC_UNFL,%d0	| set UNFL bit
14161:	btst	#FPSR_EXC_DZ,%d0	| EXC_INEX1
1417	jeq	1f
1418	bset	#FPSR_AEXC_DZ,%d0	| set DZ bit
14191:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
1420	jne	1f
1421	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
1422	jne	1f
1423	btst	#FPSR_EXC_INEX1,%d0	| EXC_INEX1
1424	jeq	2f
14251:	bset	#FPSR_AEXC_INEX,%d0	| set INEX bit
14262:	move.l	%d0,(FPD_FPSR,FPDATA)
1427#else
1428	| same as above, greatly optimized, but untested (yet)
1429	move.l	%d0,%d2
1430	lsr.l	#5,%d0
1431	move.l	%d0,%d1
1432	lsr.l	#4,%d1
1433	or.l	%d0,%d1
1434	and.b	#0x08,%d1
1435	move.l	%d2,%d0
1436	lsr.l	#6,%d0
1437	or.l	%d1,%d0
1438	move.l	%d2,%d1
1439	lsr.l	#4,%d1
1440	or.b	#0xdf,%d1
1441	and.b	%d1,%d0
1442	move.l	%d2,%d1
1443	lsr.l	#7,%d1
1444	and.b	#0x80,%d1
1445	or.b	%d1,%d0
1446	and.b	#0xf8,%d0
1447	or.b	%d0,%d2
1448	move.l	%d2,(FPD_FPSR,FPDATA)
1449#endif
1450	move.b	(FPD_FPSR+2,FPDATA),%d0
1451	and.b	(FPD_FPCR+2,FPDATA),%d0
1452	jeq	1f
1453	printf	,"send signal!!!\n"
14541:	jra	fp_end
1455