xref: /openbmc/linux/arch/sh/kernel/cpu/sh4/softfloat.c (revision e5451c8f8330e03ad3cfa16048b4daf961af434f)
1c8c0a1abSStuart Menefy /*
2c8c0a1abSStuart Menefy  * Floating point emulation support for subnormalised numbers on SH4
3c8c0a1abSStuart Menefy  * architecture This file is derived from the SoftFloat IEC/IEEE
4c8c0a1abSStuart Menefy  * Floating-point Arithmetic Package, Release 2 the original license of
5c8c0a1abSStuart Menefy  * which is reproduced below.
6c8c0a1abSStuart Menefy  *
7c8c0a1abSStuart Menefy  * ========================================================================
8c8c0a1abSStuart Menefy  *
9c8c0a1abSStuart Menefy  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10c8c0a1abSStuart Menefy  * Arithmetic Package, Release 2.
11c8c0a1abSStuart Menefy  *
12c8c0a1abSStuart Menefy  * Written by John R. Hauser.  This work was made possible in part by the
13c8c0a1abSStuart Menefy  * International Computer Science Institute, located at Suite 600, 1947 Center
14c8c0a1abSStuart Menefy  * Street, Berkeley, California 94704.  Funding was partially provided by the
15c8c0a1abSStuart Menefy  * National Science Foundation under grant MIP-9311980.  The original version
16c8c0a1abSStuart Menefy  * of this code was written as part of a project to build a fixed-point vector
17c8c0a1abSStuart Menefy  * processor in collaboration with the University of California at Berkeley,
18c8c0a1abSStuart Menefy  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19c8c0a1abSStuart Menefy  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20c8c0a1abSStuart Menefy  * arithmetic/softfloat.html'.
21c8c0a1abSStuart Menefy  *
22c8c0a1abSStuart Menefy  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
23c8c0a1abSStuart Menefy  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24c8c0a1abSStuart Menefy  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
25c8c0a1abSStuart Menefy  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26c8c0a1abSStuart Menefy  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
27c8c0a1abSStuart Menefy  *
28c8c0a1abSStuart Menefy  * Derivative works are acceptable, even for commercial purposes, so long as
29c8c0a1abSStuart Menefy  * (1) they include prominent notice that the work is derivative, and (2) they
30c8c0a1abSStuart Menefy  * include prominent notice akin to these three paragraphs for those parts of
31c8c0a1abSStuart Menefy  * this code that are retained.
32c8c0a1abSStuart Menefy  *
33c8c0a1abSStuart Menefy  * ========================================================================
34c8c0a1abSStuart Menefy  *
35c8c0a1abSStuart Menefy  * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
36c8c0a1abSStuart Menefy  * and Kamel Khelifi <kamel.khelifi@st.com>
37c8c0a1abSStuart Menefy  */
38c8c0a1abSStuart Menefy #include <linux/kernel.h>
39f15cbe6fSPaul Mundt #include <cpu/fpu.h>
40*e9bf51e5SPaul Mundt #include <asm/div64.h>
41c8c0a1abSStuart Menefy 
42c8c0a1abSStuart Menefy #define LIT64( a ) a##LL
43c8c0a1abSStuart Menefy 
44c8c0a1abSStuart Menefy typedef char flag;
45c8c0a1abSStuart Menefy typedef unsigned char uint8;
46c8c0a1abSStuart Menefy typedef signed char int8;
47c8c0a1abSStuart Menefy typedef int uint16;
48c8c0a1abSStuart Menefy typedef int int16;
49c8c0a1abSStuart Menefy typedef unsigned int uint32;
50c8c0a1abSStuart Menefy typedef signed int int32;
51c8c0a1abSStuart Menefy 
52c8c0a1abSStuart Menefy typedef unsigned long long int bits64;
53c8c0a1abSStuart Menefy typedef signed long long int sbits64;
54c8c0a1abSStuart Menefy 
55c8c0a1abSStuart Menefy typedef unsigned char bits8;
56c8c0a1abSStuart Menefy typedef signed char sbits8;
57c8c0a1abSStuart Menefy typedef unsigned short int bits16;
58c8c0a1abSStuart Menefy typedef signed short int sbits16;
59c8c0a1abSStuart Menefy typedef unsigned int bits32;
60c8c0a1abSStuart Menefy typedef signed int sbits32;
61c8c0a1abSStuart Menefy 
62c8c0a1abSStuart Menefy typedef unsigned long long int uint64;
63c8c0a1abSStuart Menefy typedef signed long long int int64;
64c8c0a1abSStuart Menefy 
65c8c0a1abSStuart Menefy typedef unsigned long int float32;
66c8c0a1abSStuart Menefy typedef unsigned long long float64;
67c8c0a1abSStuart Menefy 
68c8c0a1abSStuart Menefy extern void float_raise(unsigned int flags);	/* in fpu.c */
69c8c0a1abSStuart Menefy extern int float_rounding_mode(void);	/* in fpu.c */
70c8c0a1abSStuart Menefy 
71*e9bf51e5SPaul Mundt bits64 extractFloat64Frac(float64 a);
72*e9bf51e5SPaul Mundt flag extractFloat64Sign(float64 a);
73*e9bf51e5SPaul Mundt int16 extractFloat64Exp(float64 a);
74*e9bf51e5SPaul Mundt int16 extractFloat32Exp(float32 a);
75*e9bf51e5SPaul Mundt flag extractFloat32Sign(float32 a);
76*e9bf51e5SPaul Mundt bits32 extractFloat32Frac(float32 a);
77*e9bf51e5SPaul Mundt float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
78*e9bf51e5SPaul Mundt void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
79*e9bf51e5SPaul Mundt float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
80*e9bf51e5SPaul Mundt void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
81c8c0a1abSStuart Menefy float64 float64_sub(float64 a, float64 b);
82c8c0a1abSStuart Menefy float32 float32_sub(float32 a, float32 b);
83c8c0a1abSStuart Menefy float32 float32_add(float32 a, float32 b);
84c8c0a1abSStuart Menefy float64 float64_add(float64 a, float64 b);
85c8c0a1abSStuart Menefy float64 float64_div(float64 a, float64 b);
86c8c0a1abSStuart Menefy float32 float32_div(float32 a, float32 b);
87c8c0a1abSStuart Menefy float32 float32_mul(float32 a, float32 b);
88c8c0a1abSStuart Menefy float64 float64_mul(float64 a, float64 b);
89b6ad1e8cSCarl Shaw float32 float64_to_float32(float64 a);
90*e9bf51e5SPaul Mundt void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
91c8c0a1abSStuart Menefy 		   bits64 * z1Ptr);
92*e9bf51e5SPaul Mundt void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
93c8c0a1abSStuart Menefy 		   bits64 * z1Ptr);
94*e9bf51e5SPaul Mundt void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
95c8c0a1abSStuart Menefy 
96c8c0a1abSStuart Menefy static int8 countLeadingZeros32(bits32 a);
97c8c0a1abSStuart Menefy static int8 countLeadingZeros64(bits64 a);
98c8c0a1abSStuart Menefy static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
99c8c0a1abSStuart Menefy 					    bits64 zSig);
100c8c0a1abSStuart Menefy static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
101c8c0a1abSStuart Menefy static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
102c8c0a1abSStuart Menefy static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
103c8c0a1abSStuart Menefy static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
104c8c0a1abSStuart Menefy 					    bits32 zSig);
105c8c0a1abSStuart Menefy static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
106c8c0a1abSStuart Menefy static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
107c8c0a1abSStuart Menefy static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
108c8c0a1abSStuart Menefy static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
109c8c0a1abSStuart Menefy 				      bits64 * zSigPtr);
110c8c0a1abSStuart Menefy static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
111c8c0a1abSStuart Menefy static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
112c8c0a1abSStuart Menefy 				      bits32 * zSigPtr);
113c8c0a1abSStuart Menefy 
extractFloat64Frac(float64 a)114*e9bf51e5SPaul Mundt bits64 extractFloat64Frac(float64 a)
115c8c0a1abSStuart Menefy {
116c8c0a1abSStuart Menefy 	return a & LIT64(0x000FFFFFFFFFFFFF);
117c8c0a1abSStuart Menefy }
118c8c0a1abSStuart Menefy 
extractFloat64Sign(float64 a)119*e9bf51e5SPaul Mundt flag extractFloat64Sign(float64 a)
120c8c0a1abSStuart Menefy {
121c8c0a1abSStuart Menefy 	return a >> 63;
122c8c0a1abSStuart Menefy }
123c8c0a1abSStuart Menefy 
extractFloat64Exp(float64 a)124*e9bf51e5SPaul Mundt int16 extractFloat64Exp(float64 a)
125c8c0a1abSStuart Menefy {
126c8c0a1abSStuart Menefy 	return (a >> 52) & 0x7FF;
127c8c0a1abSStuart Menefy }
128c8c0a1abSStuart Menefy 
extractFloat32Exp(float32 a)129*e9bf51e5SPaul Mundt int16 extractFloat32Exp(float32 a)
130c8c0a1abSStuart Menefy {
131c8c0a1abSStuart Menefy 	return (a >> 23) & 0xFF;
132c8c0a1abSStuart Menefy }
133c8c0a1abSStuart Menefy 
extractFloat32Sign(float32 a)134*e9bf51e5SPaul Mundt flag extractFloat32Sign(float32 a)
135c8c0a1abSStuart Menefy {
136c8c0a1abSStuart Menefy 	return a >> 31;
137c8c0a1abSStuart Menefy }
138c8c0a1abSStuart Menefy 
extractFloat32Frac(float32 a)139*e9bf51e5SPaul Mundt bits32 extractFloat32Frac(float32 a)
140c8c0a1abSStuart Menefy {
141c8c0a1abSStuart Menefy 	return a & 0x007FFFFF;
142c8c0a1abSStuart Menefy }
143c8c0a1abSStuart Menefy 
packFloat64(flag zSign,int16 zExp,bits64 zSig)144*e9bf51e5SPaul Mundt float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
145c8c0a1abSStuart Menefy {
146c8c0a1abSStuart Menefy 	return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
147c8c0a1abSStuart Menefy }
148c8c0a1abSStuart Menefy 
shift64RightJamming(bits64 a,int16 count,bits64 * zPtr)149*e9bf51e5SPaul Mundt void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
150c8c0a1abSStuart Menefy {
151c8c0a1abSStuart Menefy 	bits64 z;
152c8c0a1abSStuart Menefy 
153c8c0a1abSStuart Menefy 	if (count == 0) {
154c8c0a1abSStuart Menefy 		z = a;
155c8c0a1abSStuart Menefy 	} else if (count < 64) {
156c8c0a1abSStuart Menefy 		z = (a >> count) | ((a << ((-count) & 63)) != 0);
157c8c0a1abSStuart Menefy 	} else {
158c8c0a1abSStuart Menefy 		z = (a != 0);
159c8c0a1abSStuart Menefy 	}
160c8c0a1abSStuart Menefy 	*zPtr = z;
161c8c0a1abSStuart Menefy }
162c8c0a1abSStuart Menefy 
countLeadingZeros32(bits32 a)163c8c0a1abSStuart Menefy static int8 countLeadingZeros32(bits32 a)
164c8c0a1abSStuart Menefy {
165c8c0a1abSStuart Menefy 	static const int8 countLeadingZerosHigh[] = {
166c8c0a1abSStuart Menefy 		8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
167c8c0a1abSStuart Menefy 		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
168c8c0a1abSStuart Menefy 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169c8c0a1abSStuart Menefy 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170c8c0a1abSStuart Menefy 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171c8c0a1abSStuart Menefy 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172c8c0a1abSStuart Menefy 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173c8c0a1abSStuart Menefy 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
174c8c0a1abSStuart Menefy 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175c8c0a1abSStuart Menefy 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176c8c0a1abSStuart Menefy 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177c8c0a1abSStuart Menefy 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178c8c0a1abSStuart Menefy 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179c8c0a1abSStuart Menefy 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180c8c0a1abSStuart Menefy 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181c8c0a1abSStuart Menefy 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
182c8c0a1abSStuart Menefy 	};
183c8c0a1abSStuart Menefy 	int8 shiftCount;
184c8c0a1abSStuart Menefy 
185c8c0a1abSStuart Menefy 	shiftCount = 0;
186c8c0a1abSStuart Menefy 	if (a < 0x10000) {
187c8c0a1abSStuart Menefy 		shiftCount += 16;
188c8c0a1abSStuart Menefy 		a <<= 16;
189c8c0a1abSStuart Menefy 	}
190c8c0a1abSStuart Menefy 	if (a < 0x1000000) {
191c8c0a1abSStuart Menefy 		shiftCount += 8;
192c8c0a1abSStuart Menefy 		a <<= 8;
193c8c0a1abSStuart Menefy 	}
194c8c0a1abSStuart Menefy 	shiftCount += countLeadingZerosHigh[a >> 24];
195c8c0a1abSStuart Menefy 	return shiftCount;
196c8c0a1abSStuart Menefy 
197c8c0a1abSStuart Menefy }
198c8c0a1abSStuart Menefy 
countLeadingZeros64(bits64 a)199c8c0a1abSStuart Menefy static int8 countLeadingZeros64(bits64 a)
200c8c0a1abSStuart Menefy {
201c8c0a1abSStuart Menefy 	int8 shiftCount;
202c8c0a1abSStuart Menefy 
203c8c0a1abSStuart Menefy 	shiftCount = 0;
204c8c0a1abSStuart Menefy 	if (a < ((bits64) 1) << 32) {
205c8c0a1abSStuart Menefy 		shiftCount += 32;
206c8c0a1abSStuart Menefy 	} else {
207c8c0a1abSStuart Menefy 		a >>= 32;
208c8c0a1abSStuart Menefy 	}
209c8c0a1abSStuart Menefy 	shiftCount += countLeadingZeros32(a);
210c8c0a1abSStuart Menefy 	return shiftCount;
211c8c0a1abSStuart Menefy 
212c8c0a1abSStuart Menefy }
213c8c0a1abSStuart Menefy 
normalizeRoundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)214c8c0a1abSStuart Menefy static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
215c8c0a1abSStuart Menefy {
216c8c0a1abSStuart Menefy 	int8 shiftCount;
217c8c0a1abSStuart Menefy 
218c8c0a1abSStuart Menefy 	shiftCount = countLeadingZeros64(zSig) - 1;
219c8c0a1abSStuart Menefy 	return roundAndPackFloat64(zSign, zExp - shiftCount,
220c8c0a1abSStuart Menefy 				   zSig << shiftCount);
221c8c0a1abSStuart Menefy 
222c8c0a1abSStuart Menefy }
223c8c0a1abSStuart Menefy 
subFloat64Sigs(float64 a,float64 b,flag zSign)224c8c0a1abSStuart Menefy static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
225c8c0a1abSStuart Menefy {
226c8c0a1abSStuart Menefy 	int16 aExp, bExp, zExp;
227c8c0a1abSStuart Menefy 	bits64 aSig, bSig, zSig;
228c8c0a1abSStuart Menefy 	int16 expDiff;
229c8c0a1abSStuart Menefy 
230c8c0a1abSStuart Menefy 	aSig = extractFloat64Frac(a);
231c8c0a1abSStuart Menefy 	aExp = extractFloat64Exp(a);
232c8c0a1abSStuart Menefy 	bSig = extractFloat64Frac(b);
233c8c0a1abSStuart Menefy 	bExp = extractFloat64Exp(b);
234c8c0a1abSStuart Menefy 	expDiff = aExp - bExp;
235c8c0a1abSStuart Menefy 	aSig <<= 10;
236c8c0a1abSStuart Menefy 	bSig <<= 10;
237c8c0a1abSStuart Menefy 	if (0 < expDiff)
238c8c0a1abSStuart Menefy 		goto aExpBigger;
239c8c0a1abSStuart Menefy 	if (expDiff < 0)
240c8c0a1abSStuart Menefy 		goto bExpBigger;
241c8c0a1abSStuart Menefy 	if (aExp == 0) {
242c8c0a1abSStuart Menefy 		aExp = 1;
243c8c0a1abSStuart Menefy 		bExp = 1;
244c8c0a1abSStuart Menefy 	}
245c8c0a1abSStuart Menefy 	if (bSig < aSig)
246c8c0a1abSStuart Menefy 		goto aBigger;
247c8c0a1abSStuart Menefy 	if (aSig < bSig)
248c8c0a1abSStuart Menefy 		goto bBigger;
249c8c0a1abSStuart Menefy 	return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
250c8c0a1abSStuart Menefy       bExpBigger:
251c8c0a1abSStuart Menefy 	if (bExp == 0x7FF) {
252c8c0a1abSStuart Menefy 		return packFloat64(zSign ^ 1, 0x7FF, 0);
253c8c0a1abSStuart Menefy 	}
254c8c0a1abSStuart Menefy 	if (aExp == 0) {
255c8c0a1abSStuart Menefy 		++expDiff;
256c8c0a1abSStuart Menefy 	} else {
257c8c0a1abSStuart Menefy 		aSig |= LIT64(0x4000000000000000);
258c8c0a1abSStuart Menefy 	}
259c8c0a1abSStuart Menefy 	shift64RightJamming(aSig, -expDiff, &aSig);
260c8c0a1abSStuart Menefy 	bSig |= LIT64(0x4000000000000000);
261c8c0a1abSStuart Menefy       bBigger:
262c8c0a1abSStuart Menefy 	zSig = bSig - aSig;
263c8c0a1abSStuart Menefy 	zExp = bExp;
264c8c0a1abSStuart Menefy 	zSign ^= 1;
265c8c0a1abSStuart Menefy 	goto normalizeRoundAndPack;
266c8c0a1abSStuart Menefy       aExpBigger:
267c8c0a1abSStuart Menefy 	if (aExp == 0x7FF) {
268c8c0a1abSStuart Menefy 		return a;
269c8c0a1abSStuart Menefy 	}
270c8c0a1abSStuart Menefy 	if (bExp == 0) {
271c8c0a1abSStuart Menefy 		--expDiff;
272c8c0a1abSStuart Menefy 	} else {
273c8c0a1abSStuart Menefy 		bSig |= LIT64(0x4000000000000000);
274c8c0a1abSStuart Menefy 	}
275c8c0a1abSStuart Menefy 	shift64RightJamming(bSig, expDiff, &bSig);
276c8c0a1abSStuart Menefy 	aSig |= LIT64(0x4000000000000000);
277c8c0a1abSStuart Menefy       aBigger:
278c8c0a1abSStuart Menefy 	zSig = aSig - bSig;
279c8c0a1abSStuart Menefy 	zExp = aExp;
280c8c0a1abSStuart Menefy       normalizeRoundAndPack:
281c8c0a1abSStuart Menefy 	--zExp;
282c8c0a1abSStuart Menefy 	return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
283c8c0a1abSStuart Menefy 
284c8c0a1abSStuart Menefy }
addFloat64Sigs(float64 a,float64 b,flag zSign)285c8c0a1abSStuart Menefy static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
286c8c0a1abSStuart Menefy {
287c8c0a1abSStuart Menefy 	int16 aExp, bExp, zExp;
288c8c0a1abSStuart Menefy 	bits64 aSig, bSig, zSig;
289c8c0a1abSStuart Menefy 	int16 expDiff;
290c8c0a1abSStuart Menefy 
291c8c0a1abSStuart Menefy 	aSig = extractFloat64Frac(a);
292c8c0a1abSStuart Menefy 	aExp = extractFloat64Exp(a);
293c8c0a1abSStuart Menefy 	bSig = extractFloat64Frac(b);
294c8c0a1abSStuart Menefy 	bExp = extractFloat64Exp(b);
295c8c0a1abSStuart Menefy 	expDiff = aExp - bExp;
296c8c0a1abSStuart Menefy 	aSig <<= 9;
297c8c0a1abSStuart Menefy 	bSig <<= 9;
298c8c0a1abSStuart Menefy 	if (0 < expDiff) {
299c8c0a1abSStuart Menefy 		if (aExp == 0x7FF) {
300c8c0a1abSStuart Menefy 			return a;
301c8c0a1abSStuart Menefy 		}
302c8c0a1abSStuart Menefy 		if (bExp == 0) {
303c8c0a1abSStuart Menefy 			--expDiff;
304c8c0a1abSStuart Menefy 		} else {
305c8c0a1abSStuart Menefy 			bSig |= LIT64(0x2000000000000000);
306c8c0a1abSStuart Menefy 		}
307c8c0a1abSStuart Menefy 		shift64RightJamming(bSig, expDiff, &bSig);
308c8c0a1abSStuart Menefy 		zExp = aExp;
309c8c0a1abSStuart Menefy 	} else if (expDiff < 0) {
310c8c0a1abSStuart Menefy 		if (bExp == 0x7FF) {
311c8c0a1abSStuart Menefy 			return packFloat64(zSign, 0x7FF, 0);
312c8c0a1abSStuart Menefy 		}
313c8c0a1abSStuart Menefy 		if (aExp == 0) {
314c8c0a1abSStuart Menefy 			++expDiff;
315c8c0a1abSStuart Menefy 		} else {
316c8c0a1abSStuart Menefy 			aSig |= LIT64(0x2000000000000000);
317c8c0a1abSStuart Menefy 		}
318c8c0a1abSStuart Menefy 		shift64RightJamming(aSig, -expDiff, &aSig);
319c8c0a1abSStuart Menefy 		zExp = bExp;
320c8c0a1abSStuart Menefy 	} else {
321c8c0a1abSStuart Menefy 		if (aExp == 0x7FF) {
322c8c0a1abSStuart Menefy 			return a;
323c8c0a1abSStuart Menefy 		}
324c8c0a1abSStuart Menefy 		if (aExp == 0)
325c8c0a1abSStuart Menefy 			return packFloat64(zSign, 0, (aSig + bSig) >> 9);
326c8c0a1abSStuart Menefy 		zSig = LIT64(0x4000000000000000) + aSig + bSig;
327c8c0a1abSStuart Menefy 		zExp = aExp;
328c8c0a1abSStuart Menefy 		goto roundAndPack;
329c8c0a1abSStuart Menefy 	}
330c8c0a1abSStuart Menefy 	aSig |= LIT64(0x2000000000000000);
331c8c0a1abSStuart Menefy 	zSig = (aSig + bSig) << 1;
332c8c0a1abSStuart Menefy 	--zExp;
333c8c0a1abSStuart Menefy 	if ((sbits64) zSig < 0) {
334c8c0a1abSStuart Menefy 		zSig = aSig + bSig;
335c8c0a1abSStuart Menefy 		++zExp;
336c8c0a1abSStuart Menefy 	}
337c8c0a1abSStuart Menefy       roundAndPack:
338c8c0a1abSStuart Menefy 	return roundAndPackFloat64(zSign, zExp, zSig);
339c8c0a1abSStuart Menefy 
340c8c0a1abSStuart Menefy }
341c8c0a1abSStuart Menefy 
packFloat32(flag zSign,int16 zExp,bits32 zSig)342*e9bf51e5SPaul Mundt float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
343c8c0a1abSStuart Menefy {
344c8c0a1abSStuart Menefy 	return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
345c8c0a1abSStuart Menefy }
346c8c0a1abSStuart Menefy 
shift32RightJamming(bits32 a,int16 count,bits32 * zPtr)347*e9bf51e5SPaul Mundt void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
348c8c0a1abSStuart Menefy {
349c8c0a1abSStuart Menefy 	bits32 z;
350c8c0a1abSStuart Menefy 	if (count == 0) {
351c8c0a1abSStuart Menefy 		z = a;
352c8c0a1abSStuart Menefy 	} else if (count < 32) {
353c8c0a1abSStuart Menefy 		z = (a >> count) | ((a << ((-count) & 31)) != 0);
354c8c0a1abSStuart Menefy 	} else {
355c8c0a1abSStuart Menefy 		z = (a != 0);
356c8c0a1abSStuart Menefy 	}
357c8c0a1abSStuart Menefy 	*zPtr = z;
358c8c0a1abSStuart Menefy }
359c8c0a1abSStuart Menefy 
roundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)360c8c0a1abSStuart Menefy static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
361c8c0a1abSStuart Menefy {
362c8c0a1abSStuart Menefy 	flag roundNearestEven;
363c8c0a1abSStuart Menefy 	int8 roundIncrement, roundBits;
364c8c0a1abSStuart Menefy 	flag isTiny;
365c8c0a1abSStuart Menefy 
366c8c0a1abSStuart Menefy 	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
367c8c0a1abSStuart Menefy 	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
368c8c0a1abSStuart Menefy 	roundIncrement = 0x40;
369c8c0a1abSStuart Menefy 	if (!roundNearestEven) {
370c8c0a1abSStuart Menefy 		roundIncrement = 0;
371c8c0a1abSStuart Menefy 	}
372c8c0a1abSStuart Menefy 	roundBits = zSig & 0x7F;
373c8c0a1abSStuart Menefy 	if (0xFD <= (bits16) zExp) {
374c8c0a1abSStuart Menefy 		if ((0xFD < zExp)
375c8c0a1abSStuart Menefy 		    || ((zExp == 0xFD)
376c8c0a1abSStuart Menefy 			&& ((sbits32) (zSig + roundIncrement) < 0))
377c8c0a1abSStuart Menefy 		    ) {
378c8c0a1abSStuart Menefy 			float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
379c8c0a1abSStuart Menefy 			return packFloat32(zSign, 0xFF,
380c8c0a1abSStuart Menefy 					   0) - (roundIncrement == 0);
381c8c0a1abSStuart Menefy 		}
382c8c0a1abSStuart Menefy 		if (zExp < 0) {
383c8c0a1abSStuart Menefy 			isTiny = (zExp < -1)
384c8c0a1abSStuart Menefy 			    || (zSig + roundIncrement < 0x80000000);
385c8c0a1abSStuart Menefy 			shift32RightJamming(zSig, -zExp, &zSig);
386c8c0a1abSStuart Menefy 			zExp = 0;
387c8c0a1abSStuart Menefy 			roundBits = zSig & 0x7F;
388c8c0a1abSStuart Menefy 			if (isTiny && roundBits)
389c8c0a1abSStuart Menefy 				float_raise(FPSCR_CAUSE_UNDERFLOW);
390c8c0a1abSStuart Menefy 		}
391c8c0a1abSStuart Menefy 	}
392c8c0a1abSStuart Menefy 	if (roundBits)
393c8c0a1abSStuart Menefy 		float_raise(FPSCR_CAUSE_INEXACT);
394c8c0a1abSStuart Menefy 	zSig = (zSig + roundIncrement) >> 7;
395c8c0a1abSStuart Menefy 	zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
396c8c0a1abSStuart Menefy 	if (zSig == 0)
397c8c0a1abSStuart Menefy 		zExp = 0;
398c8c0a1abSStuart Menefy 	return packFloat32(zSign, zExp, zSig);
399c8c0a1abSStuart Menefy 
400c8c0a1abSStuart Menefy }
401c8c0a1abSStuart Menefy 
normalizeRoundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)402c8c0a1abSStuart Menefy static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
403c8c0a1abSStuart Menefy {
404c8c0a1abSStuart Menefy 	int8 shiftCount;
405c8c0a1abSStuart Menefy 
406c8c0a1abSStuart Menefy 	shiftCount = countLeadingZeros32(zSig) - 1;
407c8c0a1abSStuart Menefy 	return roundAndPackFloat32(zSign, zExp - shiftCount,
408c8c0a1abSStuart Menefy 				   zSig << shiftCount);
409c8c0a1abSStuart Menefy }
410c8c0a1abSStuart Menefy 
roundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)411c8c0a1abSStuart Menefy static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
412c8c0a1abSStuart Menefy {
413c8c0a1abSStuart Menefy 	flag roundNearestEven;
414c8c0a1abSStuart Menefy 	int16 roundIncrement, roundBits;
415c8c0a1abSStuart Menefy 	flag isTiny;
416c8c0a1abSStuart Menefy 
417c8c0a1abSStuart Menefy 	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
418c8c0a1abSStuart Menefy 	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
419c8c0a1abSStuart Menefy 	roundIncrement = 0x200;
420c8c0a1abSStuart Menefy 	if (!roundNearestEven) {
421c8c0a1abSStuart Menefy 		roundIncrement = 0;
422c8c0a1abSStuart Menefy 	}
423c8c0a1abSStuart Menefy 	roundBits = zSig & 0x3FF;
424c8c0a1abSStuart Menefy 	if (0x7FD <= (bits16) zExp) {
425c8c0a1abSStuart Menefy 		if ((0x7FD < zExp)
426c8c0a1abSStuart Menefy 		    || ((zExp == 0x7FD)
427c8c0a1abSStuart Menefy 			&& ((sbits64) (zSig + roundIncrement) < 0))
428c8c0a1abSStuart Menefy 		    ) {
429c8c0a1abSStuart Menefy 			float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
430c8c0a1abSStuart Menefy 			return packFloat64(zSign, 0x7FF,
431c8c0a1abSStuart Menefy 					   0) - (roundIncrement == 0);
432c8c0a1abSStuart Menefy 		}
433c8c0a1abSStuart Menefy 		if (zExp < 0) {
434c8c0a1abSStuart Menefy 			isTiny = (zExp < -1)
435c8c0a1abSStuart Menefy 			    || (zSig + roundIncrement <
436c8c0a1abSStuart Menefy 				LIT64(0x8000000000000000));
437c8c0a1abSStuart Menefy 			shift64RightJamming(zSig, -zExp, &zSig);
438c8c0a1abSStuart Menefy 			zExp = 0;
439c8c0a1abSStuart Menefy 			roundBits = zSig & 0x3FF;
440c8c0a1abSStuart Menefy 			if (isTiny && roundBits)
441c8c0a1abSStuart Menefy 				float_raise(FPSCR_CAUSE_UNDERFLOW);
442c8c0a1abSStuart Menefy 		}
443c8c0a1abSStuart Menefy 	}
444c8c0a1abSStuart Menefy 	if (roundBits)
445c8c0a1abSStuart Menefy 		float_raise(FPSCR_CAUSE_INEXACT);
446c8c0a1abSStuart Menefy 	zSig = (zSig + roundIncrement) >> 10;
447c8c0a1abSStuart Menefy 	zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
448c8c0a1abSStuart Menefy 	if (zSig == 0)
449c8c0a1abSStuart Menefy 		zExp = 0;
450c8c0a1abSStuart Menefy 	return packFloat64(zSign, zExp, zSig);
451c8c0a1abSStuart Menefy 
452c8c0a1abSStuart Menefy }
453c8c0a1abSStuart Menefy 
subFloat32Sigs(float32 a,float32 b,flag zSign)454c8c0a1abSStuart Menefy static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
455c8c0a1abSStuart Menefy {
456c8c0a1abSStuart Menefy 	int16 aExp, bExp, zExp;
457c8c0a1abSStuart Menefy 	bits32 aSig, bSig, zSig;
458c8c0a1abSStuart Menefy 	int16 expDiff;
459c8c0a1abSStuart Menefy 
460c8c0a1abSStuart Menefy 	aSig = extractFloat32Frac(a);
461c8c0a1abSStuart Menefy 	aExp = extractFloat32Exp(a);
462c8c0a1abSStuart Menefy 	bSig = extractFloat32Frac(b);
463c8c0a1abSStuart Menefy 	bExp = extractFloat32Exp(b);
464c8c0a1abSStuart Menefy 	expDiff = aExp - bExp;
465c8c0a1abSStuart Menefy 	aSig <<= 7;
466c8c0a1abSStuart Menefy 	bSig <<= 7;
467c8c0a1abSStuart Menefy 	if (0 < expDiff)
468c8c0a1abSStuart Menefy 		goto aExpBigger;
469c8c0a1abSStuart Menefy 	if (expDiff < 0)
470c8c0a1abSStuart Menefy 		goto bExpBigger;
471c8c0a1abSStuart Menefy 	if (aExp == 0) {
472c8c0a1abSStuart Menefy 		aExp = 1;
473c8c0a1abSStuart Menefy 		bExp = 1;
474c8c0a1abSStuart Menefy 	}
475c8c0a1abSStuart Menefy 	if (bSig < aSig)
476c8c0a1abSStuart Menefy 		goto aBigger;
477c8c0a1abSStuart Menefy 	if (aSig < bSig)
478c8c0a1abSStuart Menefy 		goto bBigger;
479c8c0a1abSStuart Menefy 	return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
480c8c0a1abSStuart Menefy       bExpBigger:
481c8c0a1abSStuart Menefy 	if (bExp == 0xFF) {
482c8c0a1abSStuart Menefy 		return packFloat32(zSign ^ 1, 0xFF, 0);
483c8c0a1abSStuart Menefy 	}
484c8c0a1abSStuart Menefy 	if (aExp == 0) {
485c8c0a1abSStuart Menefy 		++expDiff;
486c8c0a1abSStuart Menefy 	} else {
487c8c0a1abSStuart Menefy 		aSig |= 0x40000000;
488c8c0a1abSStuart Menefy 	}
489c8c0a1abSStuart Menefy 	shift32RightJamming(aSig, -expDiff, &aSig);
490c8c0a1abSStuart Menefy 	bSig |= 0x40000000;
491c8c0a1abSStuart Menefy       bBigger:
492c8c0a1abSStuart Menefy 	zSig = bSig - aSig;
493c8c0a1abSStuart Menefy 	zExp = bExp;
494c8c0a1abSStuart Menefy 	zSign ^= 1;
495c8c0a1abSStuart Menefy 	goto normalizeRoundAndPack;
496c8c0a1abSStuart Menefy       aExpBigger:
497c8c0a1abSStuart Menefy 	if (aExp == 0xFF) {
498c8c0a1abSStuart Menefy 		return a;
499c8c0a1abSStuart Menefy 	}
500c8c0a1abSStuart Menefy 	if (bExp == 0) {
501c8c0a1abSStuart Menefy 		--expDiff;
502c8c0a1abSStuart Menefy 	} else {
503c8c0a1abSStuart Menefy 		bSig |= 0x40000000;
504c8c0a1abSStuart Menefy 	}
505c8c0a1abSStuart Menefy 	shift32RightJamming(bSig, expDiff, &bSig);
506c8c0a1abSStuart Menefy 	aSig |= 0x40000000;
507c8c0a1abSStuart Menefy       aBigger:
508c8c0a1abSStuart Menefy 	zSig = aSig - bSig;
509c8c0a1abSStuart Menefy 	zExp = aExp;
510c8c0a1abSStuart Menefy       normalizeRoundAndPack:
511c8c0a1abSStuart Menefy 	--zExp;
512c8c0a1abSStuart Menefy 	return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
513c8c0a1abSStuart Menefy 
514c8c0a1abSStuart Menefy }
515c8c0a1abSStuart Menefy 
addFloat32Sigs(float32 a,float32 b,flag zSign)516c8c0a1abSStuart Menefy static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
517c8c0a1abSStuart Menefy {
518c8c0a1abSStuart Menefy 	int16 aExp, bExp, zExp;
519c8c0a1abSStuart Menefy 	bits32 aSig, bSig, zSig;
520c8c0a1abSStuart Menefy 	int16 expDiff;
521c8c0a1abSStuart Menefy 
522c8c0a1abSStuart Menefy 	aSig = extractFloat32Frac(a);
523c8c0a1abSStuart Menefy 	aExp = extractFloat32Exp(a);
524c8c0a1abSStuart Menefy 	bSig = extractFloat32Frac(b);
525c8c0a1abSStuart Menefy 	bExp = extractFloat32Exp(b);
526c8c0a1abSStuart Menefy 	expDiff = aExp - bExp;
527c8c0a1abSStuart Menefy 	aSig <<= 6;
528c8c0a1abSStuart Menefy 	bSig <<= 6;
529c8c0a1abSStuart Menefy 	if (0 < expDiff) {
530c8c0a1abSStuart Menefy 		if (aExp == 0xFF) {
531c8c0a1abSStuart Menefy 			return a;
532c8c0a1abSStuart Menefy 		}
533c8c0a1abSStuart Menefy 		if (bExp == 0) {
534c8c0a1abSStuart Menefy 			--expDiff;
535c8c0a1abSStuart Menefy 		} else {
536c8c0a1abSStuart Menefy 			bSig |= 0x20000000;
537c8c0a1abSStuart Menefy 		}
538c8c0a1abSStuart Menefy 		shift32RightJamming(bSig, expDiff, &bSig);
539c8c0a1abSStuart Menefy 		zExp = aExp;
540c8c0a1abSStuart Menefy 	} else if (expDiff < 0) {
541c8c0a1abSStuart Menefy 		if (bExp == 0xFF) {
542c8c0a1abSStuart Menefy 			return packFloat32(zSign, 0xFF, 0);
543c8c0a1abSStuart Menefy 		}
544c8c0a1abSStuart Menefy 		if (aExp == 0) {
545c8c0a1abSStuart Menefy 			++expDiff;
546c8c0a1abSStuart Menefy 		} else {
547c8c0a1abSStuart Menefy 			aSig |= 0x20000000;
548c8c0a1abSStuart Menefy 		}
549c8c0a1abSStuart Menefy 		shift32RightJamming(aSig, -expDiff, &aSig);
550c8c0a1abSStuart Menefy 		zExp = bExp;
551c8c0a1abSStuart Menefy 	} else {
552c8c0a1abSStuart Menefy 		if (aExp == 0xFF) {
553c8c0a1abSStuart Menefy 			return a;
554c8c0a1abSStuart Menefy 		}
555c8c0a1abSStuart Menefy 		if (aExp == 0)
556c8c0a1abSStuart Menefy 			return packFloat32(zSign, 0, (aSig + bSig) >> 6);
557c8c0a1abSStuart Menefy 		zSig = 0x40000000 + aSig + bSig;
558c8c0a1abSStuart Menefy 		zExp = aExp;
559c8c0a1abSStuart Menefy 		goto roundAndPack;
560c8c0a1abSStuart Menefy 	}
561c8c0a1abSStuart Menefy 	aSig |= 0x20000000;
562c8c0a1abSStuart Menefy 	zSig = (aSig + bSig) << 1;
563c8c0a1abSStuart Menefy 	--zExp;
564c8c0a1abSStuart Menefy 	if ((sbits32) zSig < 0) {
565c8c0a1abSStuart Menefy 		zSig = aSig + bSig;
566c8c0a1abSStuart Menefy 		++zExp;
567c8c0a1abSStuart Menefy 	}
568c8c0a1abSStuart Menefy       roundAndPack:
569c8c0a1abSStuart Menefy 	return roundAndPackFloat32(zSign, zExp, zSig);
570c8c0a1abSStuart Menefy 
571c8c0a1abSStuart Menefy }
572c8c0a1abSStuart Menefy 
float64_sub(float64 a,float64 b)573c8c0a1abSStuart Menefy float64 float64_sub(float64 a, float64 b)
574c8c0a1abSStuart Menefy {
575c8c0a1abSStuart Menefy 	flag aSign, bSign;
576c8c0a1abSStuart Menefy 
577c8c0a1abSStuart Menefy 	aSign = extractFloat64Sign(a);
578c8c0a1abSStuart Menefy 	bSign = extractFloat64Sign(b);
579c8c0a1abSStuart Menefy 	if (aSign == bSign) {
580c8c0a1abSStuart Menefy 		return subFloat64Sigs(a, b, aSign);
581c8c0a1abSStuart Menefy 	} else {
582c8c0a1abSStuart Menefy 		return addFloat64Sigs(a, b, aSign);
583c8c0a1abSStuart Menefy 	}
584c8c0a1abSStuart Menefy 
585c8c0a1abSStuart Menefy }
586c8c0a1abSStuart Menefy 
float32_sub(float32 a,float32 b)587c8c0a1abSStuart Menefy float32 float32_sub(float32 a, float32 b)
588c8c0a1abSStuart Menefy {
589c8c0a1abSStuart Menefy 	flag aSign, bSign;
590c8c0a1abSStuart Menefy 
591c8c0a1abSStuart Menefy 	aSign = extractFloat32Sign(a);
592c8c0a1abSStuart Menefy 	bSign = extractFloat32Sign(b);
593c8c0a1abSStuart Menefy 	if (aSign == bSign) {
594c8c0a1abSStuart Menefy 		return subFloat32Sigs(a, b, aSign);
595c8c0a1abSStuart Menefy 	} else {
596c8c0a1abSStuart Menefy 		return addFloat32Sigs(a, b, aSign);
597c8c0a1abSStuart Menefy 	}
598c8c0a1abSStuart Menefy 
599c8c0a1abSStuart Menefy }
600c8c0a1abSStuart Menefy 
float32_add(float32 a,float32 b)601c8c0a1abSStuart Menefy float32 float32_add(float32 a, float32 b)
602c8c0a1abSStuart Menefy {
603c8c0a1abSStuart Menefy 	flag aSign, bSign;
604c8c0a1abSStuart Menefy 
605c8c0a1abSStuart Menefy 	aSign = extractFloat32Sign(a);
606c8c0a1abSStuart Menefy 	bSign = extractFloat32Sign(b);
607c8c0a1abSStuart Menefy 	if (aSign == bSign) {
608c8c0a1abSStuart Menefy 		return addFloat32Sigs(a, b, aSign);
609c8c0a1abSStuart Menefy 	} else {
610c8c0a1abSStuart Menefy 		return subFloat32Sigs(a, b, aSign);
611c8c0a1abSStuart Menefy 	}
612c8c0a1abSStuart Menefy 
613c8c0a1abSStuart Menefy }
614c8c0a1abSStuart Menefy 
float64_add(float64 a,float64 b)615c8c0a1abSStuart Menefy float64 float64_add(float64 a, float64 b)
616c8c0a1abSStuart Menefy {
617c8c0a1abSStuart Menefy 	flag aSign, bSign;
618c8c0a1abSStuart Menefy 
619c8c0a1abSStuart Menefy 	aSign = extractFloat64Sign(a);
620c8c0a1abSStuart Menefy 	bSign = extractFloat64Sign(b);
621c8c0a1abSStuart Menefy 	if (aSign == bSign) {
622c8c0a1abSStuart Menefy 		return addFloat64Sigs(a, b, aSign);
623c8c0a1abSStuart Menefy 	} else {
624c8c0a1abSStuart Menefy 		return subFloat64Sigs(a, b, aSign);
625c8c0a1abSStuart Menefy 	}
626c8c0a1abSStuart Menefy }
627c8c0a1abSStuart Menefy 
628c8c0a1abSStuart Menefy static void
normalizeFloat64Subnormal(bits64 aSig,int16 * zExpPtr,bits64 * zSigPtr)629c8c0a1abSStuart Menefy normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
630c8c0a1abSStuart Menefy {
631c8c0a1abSStuart Menefy 	int8 shiftCount;
632c8c0a1abSStuart Menefy 
633c8c0a1abSStuart Menefy 	shiftCount = countLeadingZeros64(aSig) - 11;
634c8c0a1abSStuart Menefy 	*zSigPtr = aSig << shiftCount;
635c8c0a1abSStuart Menefy 	*zExpPtr = 1 - shiftCount;
636c8c0a1abSStuart Menefy }
637c8c0a1abSStuart Menefy 
add128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)638*e9bf51e5SPaul Mundt void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
639c8c0a1abSStuart Menefy 		   bits64 * z1Ptr)
640c8c0a1abSStuart Menefy {
641c8c0a1abSStuart Menefy 	bits64 z1;
642c8c0a1abSStuart Menefy 
643c8c0a1abSStuart Menefy 	z1 = a1 + b1;
644c8c0a1abSStuart Menefy 	*z1Ptr = z1;
645c8c0a1abSStuart Menefy 	*z0Ptr = a0 + b0 + (z1 < a1);
646c8c0a1abSStuart Menefy }
647c8c0a1abSStuart Menefy 
648*e9bf51e5SPaul Mundt void
sub128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)649c8c0a1abSStuart Menefy sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
650c8c0a1abSStuart Menefy        bits64 * z1Ptr)
651c8c0a1abSStuart Menefy {
652c8c0a1abSStuart Menefy 	*z1Ptr = a1 - b1;
653c8c0a1abSStuart Menefy 	*z0Ptr = a0 - b0 - (a1 < b1);
654c8c0a1abSStuart Menefy }
655c8c0a1abSStuart Menefy 
estimateDiv128To64(bits64 a0,bits64 a1,bits64 b)656c8c0a1abSStuart Menefy static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
657c8c0a1abSStuart Menefy {
658c8c0a1abSStuart Menefy 	bits64 b0, b1;
659c8c0a1abSStuart Menefy 	bits64 rem0, rem1, term0, term1;
660*e9bf51e5SPaul Mundt 	bits64 z, tmp;
661c8c0a1abSStuart Menefy 	if (b <= a0)
662c8c0a1abSStuart Menefy 		return LIT64(0xFFFFFFFFFFFFFFFF);
663c8c0a1abSStuart Menefy 	b0 = b >> 32;
664*e9bf51e5SPaul Mundt 	tmp = a0;
665*e9bf51e5SPaul Mundt 	do_div(tmp, b0);
666*e9bf51e5SPaul Mundt 
667*e9bf51e5SPaul Mundt 	z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32;
668c8c0a1abSStuart Menefy 	mul64To128(b, z, &term0, &term1);
669c8c0a1abSStuart Menefy 	sub128(a0, a1, term0, term1, &rem0, &rem1);
670c8c0a1abSStuart Menefy 	while (((sbits64) rem0) < 0) {
671c8c0a1abSStuart Menefy 		z -= LIT64(0x100000000);
672c8c0a1abSStuart Menefy 		b1 = b << 32;
673c8c0a1abSStuart Menefy 		add128(rem0, rem1, b0, b1, &rem0, &rem1);
674c8c0a1abSStuart Menefy 	}
675c8c0a1abSStuart Menefy 	rem0 = (rem0 << 32) | (rem1 >> 32);
676*e9bf51e5SPaul Mundt 	tmp = rem0;
677*e9bf51e5SPaul Mundt 	do_div(tmp, b0);
678*e9bf51e5SPaul Mundt 	z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp;
679c8c0a1abSStuart Menefy 	return z;
680c8c0a1abSStuart Menefy }
681c8c0a1abSStuart Menefy 
mul64To128(bits64 a,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr)682*e9bf51e5SPaul Mundt void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
683c8c0a1abSStuart Menefy {
684c8c0a1abSStuart Menefy 	bits32 aHigh, aLow, bHigh, bLow;
685c8c0a1abSStuart Menefy 	bits64 z0, zMiddleA, zMiddleB, z1;
686c8c0a1abSStuart Menefy 
687c8c0a1abSStuart Menefy 	aLow = a;
688c8c0a1abSStuart Menefy 	aHigh = a >> 32;
689c8c0a1abSStuart Menefy 	bLow = b;
690c8c0a1abSStuart Menefy 	bHigh = b >> 32;
691c8c0a1abSStuart Menefy 	z1 = ((bits64) aLow) * bLow;
692c8c0a1abSStuart Menefy 	zMiddleA = ((bits64) aLow) * bHigh;
693c8c0a1abSStuart Menefy 	zMiddleB = ((bits64) aHigh) * bLow;
694c8c0a1abSStuart Menefy 	z0 = ((bits64) aHigh) * bHigh;
695c8c0a1abSStuart Menefy 	zMiddleA += zMiddleB;
696c8c0a1abSStuart Menefy 	z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
697c8c0a1abSStuart Menefy 	zMiddleA <<= 32;
698c8c0a1abSStuart Menefy 	z1 += zMiddleA;
699c8c0a1abSStuart Menefy 	z0 += (z1 < zMiddleA);
700c8c0a1abSStuart Menefy 	*z1Ptr = z1;
701c8c0a1abSStuart Menefy 	*z0Ptr = z0;
702c8c0a1abSStuart Menefy 
703c8c0a1abSStuart Menefy }
704c8c0a1abSStuart Menefy 
normalizeFloat32Subnormal(bits32 aSig,int16 * zExpPtr,bits32 * zSigPtr)705c8c0a1abSStuart Menefy static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
706c8c0a1abSStuart Menefy 				      bits32 * zSigPtr)
707c8c0a1abSStuart Menefy {
708c8c0a1abSStuart Menefy 	int8 shiftCount;
709c8c0a1abSStuart Menefy 
710c8c0a1abSStuart Menefy 	shiftCount = countLeadingZeros32(aSig) - 8;
711c8c0a1abSStuart Menefy 	*zSigPtr = aSig << shiftCount;
712c8c0a1abSStuart Menefy 	*zExpPtr = 1 - shiftCount;
713c8c0a1abSStuart Menefy 
714c8c0a1abSStuart Menefy }
715c8c0a1abSStuart Menefy 
float64_div(float64 a,float64 b)716c8c0a1abSStuart Menefy float64 float64_div(float64 a, float64 b)
717c8c0a1abSStuart Menefy {
718c8c0a1abSStuart Menefy 	flag aSign, bSign, zSign;
719c8c0a1abSStuart Menefy 	int16 aExp, bExp, zExp;
720c8c0a1abSStuart Menefy 	bits64 aSig, bSig, zSig;
721c8c0a1abSStuart Menefy 	bits64 rem0, rem1;
722c8c0a1abSStuart Menefy 	bits64 term0, term1;
723c8c0a1abSStuart Menefy 
724c8c0a1abSStuart Menefy 	aSig = extractFloat64Frac(a);
725c8c0a1abSStuart Menefy 	aExp = extractFloat64Exp(a);
726c8c0a1abSStuart Menefy 	aSign = extractFloat64Sign(a);
727c8c0a1abSStuart Menefy 	bSig = extractFloat64Frac(b);
728c8c0a1abSStuart Menefy 	bExp = extractFloat64Exp(b);
729c8c0a1abSStuart Menefy 	bSign = extractFloat64Sign(b);
730c8c0a1abSStuart Menefy 	zSign = aSign ^ bSign;
731c8c0a1abSStuart Menefy 	if (aExp == 0x7FF) {
732c8c0a1abSStuart Menefy 		if (bExp == 0x7FF) {
733c8c0a1abSStuart Menefy 		}
734c8c0a1abSStuart Menefy 		return packFloat64(zSign, 0x7FF, 0);
735c8c0a1abSStuart Menefy 	}
736c8c0a1abSStuart Menefy 	if (bExp == 0x7FF) {
737c8c0a1abSStuart Menefy 		return packFloat64(zSign, 0, 0);
738c8c0a1abSStuart Menefy 	}
739c8c0a1abSStuart Menefy 	if (bExp == 0) {
740c8c0a1abSStuart Menefy 		if (bSig == 0) {
741c8c0a1abSStuart Menefy 			if ((aExp | aSig) == 0) {
742c8c0a1abSStuart Menefy 				float_raise(FPSCR_CAUSE_INVALID);
743c8c0a1abSStuart Menefy 			}
744c8c0a1abSStuart Menefy 			return packFloat64(zSign, 0x7FF, 0);
745c8c0a1abSStuart Menefy 		}
746c8c0a1abSStuart Menefy 		normalizeFloat64Subnormal(bSig, &bExp, &bSig);
747c8c0a1abSStuart Menefy 	}
748c8c0a1abSStuart Menefy 	if (aExp == 0) {
749c8c0a1abSStuart Menefy 		if (aSig == 0)
750c8c0a1abSStuart Menefy 			return packFloat64(zSign, 0, 0);
751c8c0a1abSStuart Menefy 		normalizeFloat64Subnormal(aSig, &aExp, &aSig);
752c8c0a1abSStuart Menefy 	}
753c8c0a1abSStuart Menefy 	zExp = aExp - bExp + 0x3FD;
754c8c0a1abSStuart Menefy 	aSig = (aSig | LIT64(0x0010000000000000)) << 10;
755c8c0a1abSStuart Menefy 	bSig = (bSig | LIT64(0x0010000000000000)) << 11;
756c8c0a1abSStuart Menefy 	if (bSig <= (aSig + aSig)) {
757c8c0a1abSStuart Menefy 		aSig >>= 1;
758c8c0a1abSStuart Menefy 		++zExp;
759c8c0a1abSStuart Menefy 	}
760c8c0a1abSStuart Menefy 	zSig = estimateDiv128To64(aSig, 0, bSig);
761c8c0a1abSStuart Menefy 	if ((zSig & 0x1FF) <= 2) {
762c8c0a1abSStuart Menefy 		mul64To128(bSig, zSig, &term0, &term1);
763c8c0a1abSStuart Menefy 		sub128(aSig, 0, term0, term1, &rem0, &rem1);
764c8c0a1abSStuart Menefy 		while ((sbits64) rem0 < 0) {
765c8c0a1abSStuart Menefy 			--zSig;
766c8c0a1abSStuart Menefy 			add128(rem0, rem1, 0, bSig, &rem0, &rem1);
767c8c0a1abSStuart Menefy 		}
768c8c0a1abSStuart Menefy 		zSig |= (rem1 != 0);
769c8c0a1abSStuart Menefy 	}
770c8c0a1abSStuart Menefy 	return roundAndPackFloat64(zSign, zExp, zSig);
771c8c0a1abSStuart Menefy 
772c8c0a1abSStuart Menefy }
773c8c0a1abSStuart Menefy 
float32_div(float32 a,float32 b)774c8c0a1abSStuart Menefy float32 float32_div(float32 a, float32 b)
775c8c0a1abSStuart Menefy {
776c8c0a1abSStuart Menefy 	flag aSign, bSign, zSign;
777c8c0a1abSStuart Menefy 	int16 aExp, bExp, zExp;
778*e9bf51e5SPaul Mundt 	bits32 aSig, bSig;
779*e9bf51e5SPaul Mundt 	uint64_t zSig;
780c8c0a1abSStuart Menefy 
781c8c0a1abSStuart Menefy 	aSig = extractFloat32Frac(a);
782c8c0a1abSStuart Menefy 	aExp = extractFloat32Exp(a);
783c8c0a1abSStuart Menefy 	aSign = extractFloat32Sign(a);
784c8c0a1abSStuart Menefy 	bSig = extractFloat32Frac(b);
785c8c0a1abSStuart Menefy 	bExp = extractFloat32Exp(b);
786c8c0a1abSStuart Menefy 	bSign = extractFloat32Sign(b);
787c8c0a1abSStuart Menefy 	zSign = aSign ^ bSign;
788c8c0a1abSStuart Menefy 	if (aExp == 0xFF) {
789c8c0a1abSStuart Menefy 		if (bExp == 0xFF) {
790c8c0a1abSStuart Menefy 		}
791c8c0a1abSStuart Menefy 		return packFloat32(zSign, 0xFF, 0);
792c8c0a1abSStuart Menefy 	}
793c8c0a1abSStuart Menefy 	if (bExp == 0xFF) {
794c8c0a1abSStuart Menefy 		return packFloat32(zSign, 0, 0);
795c8c0a1abSStuart Menefy 	}
796c8c0a1abSStuart Menefy 	if (bExp == 0) {
797c8c0a1abSStuart Menefy 		if (bSig == 0) {
798c8c0a1abSStuart Menefy 			return packFloat32(zSign, 0xFF, 0);
799c8c0a1abSStuart Menefy 		}
800c8c0a1abSStuart Menefy 		normalizeFloat32Subnormal(bSig, &bExp, &bSig);
801c8c0a1abSStuart Menefy 	}
802c8c0a1abSStuart Menefy 	if (aExp == 0) {
803c8c0a1abSStuart Menefy 		if (aSig == 0)
804c8c0a1abSStuart Menefy 			return packFloat32(zSign, 0, 0);
805c8c0a1abSStuart Menefy 		normalizeFloat32Subnormal(aSig, &aExp, &aSig);
806c8c0a1abSStuart Menefy 	}
807c8c0a1abSStuart Menefy 	zExp = aExp - bExp + 0x7D;
808c8c0a1abSStuart Menefy 	aSig = (aSig | 0x00800000) << 7;
809c8c0a1abSStuart Menefy 	bSig = (bSig | 0x00800000) << 8;
810c8c0a1abSStuart Menefy 	if (bSig <= (aSig + aSig)) {
811c8c0a1abSStuart Menefy 		aSig >>= 1;
812c8c0a1abSStuart Menefy 		++zExp;
813c8c0a1abSStuart Menefy 	}
814*e9bf51e5SPaul Mundt 	zSig = (((bits64) aSig) << 32);
815*e9bf51e5SPaul Mundt 	do_div(zSig, bSig);
816*e9bf51e5SPaul Mundt 
817c8c0a1abSStuart Menefy 	if ((zSig & 0x3F) == 0) {
818c8c0a1abSStuart Menefy 		zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
819c8c0a1abSStuart Menefy 	}
820*e9bf51e5SPaul Mundt 	return roundAndPackFloat32(zSign, zExp, (bits32)zSig);
821c8c0a1abSStuart Menefy 
822c8c0a1abSStuart Menefy }
823c8c0a1abSStuart Menefy 
float32_mul(float32 a,float32 b)824c8c0a1abSStuart Menefy float32 float32_mul(float32 a, float32 b)
825c8c0a1abSStuart Menefy {
826c8c0a1abSStuart Menefy 	char aSign, bSign, zSign;
827c8c0a1abSStuart Menefy 	int aExp, bExp, zExp;
828c8c0a1abSStuart Menefy 	unsigned int aSig, bSig;
829c8c0a1abSStuart Menefy 	unsigned long long zSig64;
830c8c0a1abSStuart Menefy 	unsigned int zSig;
831c8c0a1abSStuart Menefy 
832c8c0a1abSStuart Menefy 	aSig = extractFloat32Frac(a);
833c8c0a1abSStuart Menefy 	aExp = extractFloat32Exp(a);
834c8c0a1abSStuart Menefy 	aSign = extractFloat32Sign(a);
835c8c0a1abSStuart Menefy 	bSig = extractFloat32Frac(b);
836c8c0a1abSStuart Menefy 	bExp = extractFloat32Exp(b);
837c8c0a1abSStuart Menefy 	bSign = extractFloat32Sign(b);
838c8c0a1abSStuart Menefy 	zSign = aSign ^ bSign;
839c8c0a1abSStuart Menefy 	if (aExp == 0) {
840c8c0a1abSStuart Menefy 		if (aSig == 0)
841c8c0a1abSStuart Menefy 			return packFloat32(zSign, 0, 0);
842c8c0a1abSStuart Menefy 		normalizeFloat32Subnormal(aSig, &aExp, &aSig);
843c8c0a1abSStuart Menefy 	}
844c8c0a1abSStuart Menefy 	if (bExp == 0) {
845c8c0a1abSStuart Menefy 		if (bSig == 0)
846c8c0a1abSStuart Menefy 			return packFloat32(zSign, 0, 0);
847c8c0a1abSStuart Menefy 		normalizeFloat32Subnormal(bSig, &bExp, &bSig);
848c8c0a1abSStuart Menefy 	}
849c8c0a1abSStuart Menefy 	if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
850c8c0a1abSStuart Menefy 		return roundAndPackFloat32(zSign, 0xff, 0);
851c8c0a1abSStuart Menefy 
852c8c0a1abSStuart Menefy 	zExp = aExp + bExp - 0x7F;
853c8c0a1abSStuart Menefy 	aSig = (aSig | 0x00800000) << 7;
854c8c0a1abSStuart Menefy 	bSig = (bSig | 0x00800000) << 8;
855c8c0a1abSStuart Menefy 	shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
856c8c0a1abSStuart Menefy 	zSig = zSig64;
857c8c0a1abSStuart Menefy 	if (0 <= (signed int)(zSig << 1)) {
858c8c0a1abSStuart Menefy 		zSig <<= 1;
859c8c0a1abSStuart Menefy 		--zExp;
860c8c0a1abSStuart Menefy 	}
861c8c0a1abSStuart Menefy 	return roundAndPackFloat32(zSign, zExp, zSig);
862c8c0a1abSStuart Menefy 
863c8c0a1abSStuart Menefy }
864c8c0a1abSStuart Menefy 
float64_mul(float64 a,float64 b)865c8c0a1abSStuart Menefy float64 float64_mul(float64 a, float64 b)
866c8c0a1abSStuart Menefy {
867c8c0a1abSStuart Menefy 	char aSign, bSign, zSign;
868c8c0a1abSStuart Menefy 	int aExp, bExp, zExp;
869c8c0a1abSStuart Menefy 	unsigned long long int aSig, bSig, zSig0, zSig1;
870c8c0a1abSStuart Menefy 
871c8c0a1abSStuart Menefy 	aSig = extractFloat64Frac(a);
872c8c0a1abSStuart Menefy 	aExp = extractFloat64Exp(a);
873c8c0a1abSStuart Menefy 	aSign = extractFloat64Sign(a);
874c8c0a1abSStuart Menefy 	bSig = extractFloat64Frac(b);
875c8c0a1abSStuart Menefy 	bExp = extractFloat64Exp(b);
876c8c0a1abSStuart Menefy 	bSign = extractFloat64Sign(b);
877c8c0a1abSStuart Menefy 	zSign = aSign ^ bSign;
878c8c0a1abSStuart Menefy 
879c8c0a1abSStuart Menefy 	if (aExp == 0) {
880c8c0a1abSStuart Menefy 		if (aSig == 0)
881c8c0a1abSStuart Menefy 			return packFloat64(zSign, 0, 0);
882c8c0a1abSStuart Menefy 		normalizeFloat64Subnormal(aSig, &aExp, &aSig);
883c8c0a1abSStuart Menefy 	}
884c8c0a1abSStuart Menefy 	if (bExp == 0) {
885c8c0a1abSStuart Menefy 		if (bSig == 0)
886c8c0a1abSStuart Menefy 			return packFloat64(zSign, 0, 0);
887c8c0a1abSStuart Menefy 		normalizeFloat64Subnormal(bSig, &bExp, &bSig);
888c8c0a1abSStuart Menefy 	}
889c8c0a1abSStuart Menefy 	if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
890c8c0a1abSStuart Menefy 		return roundAndPackFloat64(zSign, 0x7ff, 0);
891c8c0a1abSStuart Menefy 
892c8c0a1abSStuart Menefy 	zExp = aExp + bExp - 0x3FF;
893c8c0a1abSStuart Menefy 	aSig = (aSig | 0x0010000000000000LL) << 10;
894c8c0a1abSStuart Menefy 	bSig = (bSig | 0x0010000000000000LL) << 11;
895c8c0a1abSStuart Menefy 	mul64To128(aSig, bSig, &zSig0, &zSig1);
896c8c0a1abSStuart Menefy 	zSig0 |= (zSig1 != 0);
897c8c0a1abSStuart Menefy 	if (0 <= (signed long long int)(zSig0 << 1)) {
898c8c0a1abSStuart Menefy 		zSig0 <<= 1;
899c8c0a1abSStuart Menefy 		--zExp;
900c8c0a1abSStuart Menefy 	}
901c8c0a1abSStuart Menefy 	return roundAndPackFloat64(zSign, zExp, zSig0);
902c8c0a1abSStuart Menefy }
903b6ad1e8cSCarl Shaw 
904b6ad1e8cSCarl Shaw /*
905b6ad1e8cSCarl Shaw  * -------------------------------------------------------------------------------
906b6ad1e8cSCarl Shaw  *  Returns the result of converting the double-precision floating-point value
907b6ad1e8cSCarl Shaw  *  `a' to the single-precision floating-point format.  The conversion is
908b6ad1e8cSCarl Shaw  *  performed according to the IEC/IEEE Standard for Binary Floating-point
909b6ad1e8cSCarl Shaw  *  Arithmetic.
910b6ad1e8cSCarl Shaw  *  -------------------------------------------------------------------------------
911b6ad1e8cSCarl Shaw  *  */
float64_to_float32(float64 a)912b6ad1e8cSCarl Shaw float32 float64_to_float32(float64 a)
913b6ad1e8cSCarl Shaw {
914b6ad1e8cSCarl Shaw     flag aSign;
915b6ad1e8cSCarl Shaw     int16 aExp;
916b6ad1e8cSCarl Shaw     bits64 aSig;
917b6ad1e8cSCarl Shaw     bits32 zSig;
918b6ad1e8cSCarl Shaw 
919b6ad1e8cSCarl Shaw     aSig = extractFloat64Frac( a );
920b6ad1e8cSCarl Shaw     aExp = extractFloat64Exp( a );
921b6ad1e8cSCarl Shaw     aSign = extractFloat64Sign( a );
922b6ad1e8cSCarl Shaw 
923b6ad1e8cSCarl Shaw     shift64RightJamming( aSig, 22, &aSig );
924b6ad1e8cSCarl Shaw     zSig = aSig;
925b6ad1e8cSCarl Shaw     if ( aExp || zSig ) {
926b6ad1e8cSCarl Shaw         zSig |= 0x40000000;
927b6ad1e8cSCarl Shaw         aExp -= 0x381;
928b6ad1e8cSCarl Shaw     }
929b6ad1e8cSCarl Shaw     return roundAndPackFloat32(aSign, aExp, zSig);
930b6ad1e8cSCarl Shaw }
931