1c8c0a1abSStuart Menefy /*
2c8c0a1abSStuart Menefy * Floating point emulation support for subnormalised numbers on SH4
3c8c0a1abSStuart Menefy * architecture This file is derived from the SoftFloat IEC/IEEE
4c8c0a1abSStuart Menefy * Floating-point Arithmetic Package, Release 2 the original license of
5c8c0a1abSStuart Menefy * which is reproduced below.
6c8c0a1abSStuart Menefy *
7c8c0a1abSStuart Menefy * ========================================================================
8c8c0a1abSStuart Menefy *
9c8c0a1abSStuart Menefy * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10c8c0a1abSStuart Menefy * Arithmetic Package, Release 2.
11c8c0a1abSStuart Menefy *
12c8c0a1abSStuart Menefy * Written by John R. Hauser. This work was made possible in part by the
13c8c0a1abSStuart Menefy * International Computer Science Institute, located at Suite 600, 1947 Center
14c8c0a1abSStuart Menefy * Street, Berkeley, California 94704. Funding was partially provided by the
15c8c0a1abSStuart Menefy * National Science Foundation under grant MIP-9311980. The original version
16c8c0a1abSStuart Menefy * of this code was written as part of a project to build a fixed-point vector
17c8c0a1abSStuart Menefy * processor in collaboration with the University of California at Berkeley,
18c8c0a1abSStuart Menefy * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19c8c0a1abSStuart Menefy * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20c8c0a1abSStuart Menefy * arithmetic/softfloat.html'.
21c8c0a1abSStuart Menefy *
22c8c0a1abSStuart Menefy * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
23c8c0a1abSStuart Menefy * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24c8c0a1abSStuart Menefy * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
25c8c0a1abSStuart Menefy * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26c8c0a1abSStuart Menefy * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
27c8c0a1abSStuart Menefy *
28c8c0a1abSStuart Menefy * Derivative works are acceptable, even for commercial purposes, so long as
29c8c0a1abSStuart Menefy * (1) they include prominent notice that the work is derivative, and (2) they
30c8c0a1abSStuart Menefy * include prominent notice akin to these three paragraphs for those parts of
31c8c0a1abSStuart Menefy * this code that are retained.
32c8c0a1abSStuart Menefy *
33c8c0a1abSStuart Menefy * ========================================================================
34c8c0a1abSStuart Menefy *
35c8c0a1abSStuart Menefy * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
36c8c0a1abSStuart Menefy * and Kamel Khelifi <kamel.khelifi@st.com>
37c8c0a1abSStuart Menefy */
38c8c0a1abSStuart Menefy #include <linux/kernel.h>
39f15cbe6fSPaul Mundt #include <cpu/fpu.h>
40*e9bf51e5SPaul Mundt #include <asm/div64.h>
41c8c0a1abSStuart Menefy
42c8c0a1abSStuart Menefy #define LIT64( a ) a##LL
43c8c0a1abSStuart Menefy
44c8c0a1abSStuart Menefy typedef char flag;
45c8c0a1abSStuart Menefy typedef unsigned char uint8;
46c8c0a1abSStuart Menefy typedef signed char int8;
47c8c0a1abSStuart Menefy typedef int uint16;
48c8c0a1abSStuart Menefy typedef int int16;
49c8c0a1abSStuart Menefy typedef unsigned int uint32;
50c8c0a1abSStuart Menefy typedef signed int int32;
51c8c0a1abSStuart Menefy
52c8c0a1abSStuart Menefy typedef unsigned long long int bits64;
53c8c0a1abSStuart Menefy typedef signed long long int sbits64;
54c8c0a1abSStuart Menefy
55c8c0a1abSStuart Menefy typedef unsigned char bits8;
56c8c0a1abSStuart Menefy typedef signed char sbits8;
57c8c0a1abSStuart Menefy typedef unsigned short int bits16;
58c8c0a1abSStuart Menefy typedef signed short int sbits16;
59c8c0a1abSStuart Menefy typedef unsigned int bits32;
60c8c0a1abSStuart Menefy typedef signed int sbits32;
61c8c0a1abSStuart Menefy
62c8c0a1abSStuart Menefy typedef unsigned long long int uint64;
63c8c0a1abSStuart Menefy typedef signed long long int int64;
64c8c0a1abSStuart Menefy
65c8c0a1abSStuart Menefy typedef unsigned long int float32;
66c8c0a1abSStuart Menefy typedef unsigned long long float64;
67c8c0a1abSStuart Menefy
68c8c0a1abSStuart Menefy extern void float_raise(unsigned int flags); /* in fpu.c */
69c8c0a1abSStuart Menefy extern int float_rounding_mode(void); /* in fpu.c */
70c8c0a1abSStuart Menefy
71*e9bf51e5SPaul Mundt bits64 extractFloat64Frac(float64 a);
72*e9bf51e5SPaul Mundt flag extractFloat64Sign(float64 a);
73*e9bf51e5SPaul Mundt int16 extractFloat64Exp(float64 a);
74*e9bf51e5SPaul Mundt int16 extractFloat32Exp(float32 a);
75*e9bf51e5SPaul Mundt flag extractFloat32Sign(float32 a);
76*e9bf51e5SPaul Mundt bits32 extractFloat32Frac(float32 a);
77*e9bf51e5SPaul Mundt float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
78*e9bf51e5SPaul Mundt void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
79*e9bf51e5SPaul Mundt float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
80*e9bf51e5SPaul Mundt void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
81c8c0a1abSStuart Menefy float64 float64_sub(float64 a, float64 b);
82c8c0a1abSStuart Menefy float32 float32_sub(float32 a, float32 b);
83c8c0a1abSStuart Menefy float32 float32_add(float32 a, float32 b);
84c8c0a1abSStuart Menefy float64 float64_add(float64 a, float64 b);
85c8c0a1abSStuart Menefy float64 float64_div(float64 a, float64 b);
86c8c0a1abSStuart Menefy float32 float32_div(float32 a, float32 b);
87c8c0a1abSStuart Menefy float32 float32_mul(float32 a, float32 b);
88c8c0a1abSStuart Menefy float64 float64_mul(float64 a, float64 b);
89b6ad1e8cSCarl Shaw float32 float64_to_float32(float64 a);
90*e9bf51e5SPaul Mundt void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
91c8c0a1abSStuart Menefy bits64 * z1Ptr);
92*e9bf51e5SPaul Mundt void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
93c8c0a1abSStuart Menefy bits64 * z1Ptr);
94*e9bf51e5SPaul Mundt void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
95c8c0a1abSStuart Menefy
96c8c0a1abSStuart Menefy static int8 countLeadingZeros32(bits32 a);
97c8c0a1abSStuart Menefy static int8 countLeadingZeros64(bits64 a);
98c8c0a1abSStuart Menefy static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
99c8c0a1abSStuart Menefy bits64 zSig);
100c8c0a1abSStuart Menefy static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
101c8c0a1abSStuart Menefy static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
102c8c0a1abSStuart Menefy static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
103c8c0a1abSStuart Menefy static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
104c8c0a1abSStuart Menefy bits32 zSig);
105c8c0a1abSStuart Menefy static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
106c8c0a1abSStuart Menefy static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
107c8c0a1abSStuart Menefy static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
108c8c0a1abSStuart Menefy static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
109c8c0a1abSStuart Menefy bits64 * zSigPtr);
110c8c0a1abSStuart Menefy static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
111c8c0a1abSStuart Menefy static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
112c8c0a1abSStuart Menefy bits32 * zSigPtr);
113c8c0a1abSStuart Menefy
extractFloat64Frac(float64 a)114*e9bf51e5SPaul Mundt bits64 extractFloat64Frac(float64 a)
115c8c0a1abSStuart Menefy {
116c8c0a1abSStuart Menefy return a & LIT64(0x000FFFFFFFFFFFFF);
117c8c0a1abSStuart Menefy }
118c8c0a1abSStuart Menefy
extractFloat64Sign(float64 a)119*e9bf51e5SPaul Mundt flag extractFloat64Sign(float64 a)
120c8c0a1abSStuart Menefy {
121c8c0a1abSStuart Menefy return a >> 63;
122c8c0a1abSStuart Menefy }
123c8c0a1abSStuart Menefy
extractFloat64Exp(float64 a)124*e9bf51e5SPaul Mundt int16 extractFloat64Exp(float64 a)
125c8c0a1abSStuart Menefy {
126c8c0a1abSStuart Menefy return (a >> 52) & 0x7FF;
127c8c0a1abSStuart Menefy }
128c8c0a1abSStuart Menefy
extractFloat32Exp(float32 a)129*e9bf51e5SPaul Mundt int16 extractFloat32Exp(float32 a)
130c8c0a1abSStuart Menefy {
131c8c0a1abSStuart Menefy return (a >> 23) & 0xFF;
132c8c0a1abSStuart Menefy }
133c8c0a1abSStuart Menefy
extractFloat32Sign(float32 a)134*e9bf51e5SPaul Mundt flag extractFloat32Sign(float32 a)
135c8c0a1abSStuart Menefy {
136c8c0a1abSStuart Menefy return a >> 31;
137c8c0a1abSStuart Menefy }
138c8c0a1abSStuart Menefy
extractFloat32Frac(float32 a)139*e9bf51e5SPaul Mundt bits32 extractFloat32Frac(float32 a)
140c8c0a1abSStuart Menefy {
141c8c0a1abSStuart Menefy return a & 0x007FFFFF;
142c8c0a1abSStuart Menefy }
143c8c0a1abSStuart Menefy
packFloat64(flag zSign,int16 zExp,bits64 zSig)144*e9bf51e5SPaul Mundt float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
145c8c0a1abSStuart Menefy {
146c8c0a1abSStuart Menefy return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
147c8c0a1abSStuart Menefy }
148c8c0a1abSStuart Menefy
shift64RightJamming(bits64 a,int16 count,bits64 * zPtr)149*e9bf51e5SPaul Mundt void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
150c8c0a1abSStuart Menefy {
151c8c0a1abSStuart Menefy bits64 z;
152c8c0a1abSStuart Menefy
153c8c0a1abSStuart Menefy if (count == 0) {
154c8c0a1abSStuart Menefy z = a;
155c8c0a1abSStuart Menefy } else if (count < 64) {
156c8c0a1abSStuart Menefy z = (a >> count) | ((a << ((-count) & 63)) != 0);
157c8c0a1abSStuart Menefy } else {
158c8c0a1abSStuart Menefy z = (a != 0);
159c8c0a1abSStuart Menefy }
160c8c0a1abSStuart Menefy *zPtr = z;
161c8c0a1abSStuart Menefy }
162c8c0a1abSStuart Menefy
countLeadingZeros32(bits32 a)163c8c0a1abSStuart Menefy static int8 countLeadingZeros32(bits32 a)
164c8c0a1abSStuart Menefy {
165c8c0a1abSStuart Menefy static const int8 countLeadingZerosHigh[] = {
166c8c0a1abSStuart Menefy 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
167c8c0a1abSStuart Menefy 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
168c8c0a1abSStuart Menefy 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169c8c0a1abSStuart Menefy 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170c8c0a1abSStuart Menefy 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171c8c0a1abSStuart Menefy 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172c8c0a1abSStuart Menefy 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173c8c0a1abSStuart Menefy 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
174c8c0a1abSStuart Menefy 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175c8c0a1abSStuart Menefy 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176c8c0a1abSStuart Menefy 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177c8c0a1abSStuart Menefy 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178c8c0a1abSStuart Menefy 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179c8c0a1abSStuart Menefy 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180c8c0a1abSStuart Menefy 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181c8c0a1abSStuart Menefy 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
182c8c0a1abSStuart Menefy };
183c8c0a1abSStuart Menefy int8 shiftCount;
184c8c0a1abSStuart Menefy
185c8c0a1abSStuart Menefy shiftCount = 0;
186c8c0a1abSStuart Menefy if (a < 0x10000) {
187c8c0a1abSStuart Menefy shiftCount += 16;
188c8c0a1abSStuart Menefy a <<= 16;
189c8c0a1abSStuart Menefy }
190c8c0a1abSStuart Menefy if (a < 0x1000000) {
191c8c0a1abSStuart Menefy shiftCount += 8;
192c8c0a1abSStuart Menefy a <<= 8;
193c8c0a1abSStuart Menefy }
194c8c0a1abSStuart Menefy shiftCount += countLeadingZerosHigh[a >> 24];
195c8c0a1abSStuart Menefy return shiftCount;
196c8c0a1abSStuart Menefy
197c8c0a1abSStuart Menefy }
198c8c0a1abSStuart Menefy
countLeadingZeros64(bits64 a)199c8c0a1abSStuart Menefy static int8 countLeadingZeros64(bits64 a)
200c8c0a1abSStuart Menefy {
201c8c0a1abSStuart Menefy int8 shiftCount;
202c8c0a1abSStuart Menefy
203c8c0a1abSStuart Menefy shiftCount = 0;
204c8c0a1abSStuart Menefy if (a < ((bits64) 1) << 32) {
205c8c0a1abSStuart Menefy shiftCount += 32;
206c8c0a1abSStuart Menefy } else {
207c8c0a1abSStuart Menefy a >>= 32;
208c8c0a1abSStuart Menefy }
209c8c0a1abSStuart Menefy shiftCount += countLeadingZeros32(a);
210c8c0a1abSStuart Menefy return shiftCount;
211c8c0a1abSStuart Menefy
212c8c0a1abSStuart Menefy }
213c8c0a1abSStuart Menefy
normalizeRoundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)214c8c0a1abSStuart Menefy static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
215c8c0a1abSStuart Menefy {
216c8c0a1abSStuart Menefy int8 shiftCount;
217c8c0a1abSStuart Menefy
218c8c0a1abSStuart Menefy shiftCount = countLeadingZeros64(zSig) - 1;
219c8c0a1abSStuart Menefy return roundAndPackFloat64(zSign, zExp - shiftCount,
220c8c0a1abSStuart Menefy zSig << shiftCount);
221c8c0a1abSStuart Menefy
222c8c0a1abSStuart Menefy }
223c8c0a1abSStuart Menefy
subFloat64Sigs(float64 a,float64 b,flag zSign)224c8c0a1abSStuart Menefy static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
225c8c0a1abSStuart Menefy {
226c8c0a1abSStuart Menefy int16 aExp, bExp, zExp;
227c8c0a1abSStuart Menefy bits64 aSig, bSig, zSig;
228c8c0a1abSStuart Menefy int16 expDiff;
229c8c0a1abSStuart Menefy
230c8c0a1abSStuart Menefy aSig = extractFloat64Frac(a);
231c8c0a1abSStuart Menefy aExp = extractFloat64Exp(a);
232c8c0a1abSStuart Menefy bSig = extractFloat64Frac(b);
233c8c0a1abSStuart Menefy bExp = extractFloat64Exp(b);
234c8c0a1abSStuart Menefy expDiff = aExp - bExp;
235c8c0a1abSStuart Menefy aSig <<= 10;
236c8c0a1abSStuart Menefy bSig <<= 10;
237c8c0a1abSStuart Menefy if (0 < expDiff)
238c8c0a1abSStuart Menefy goto aExpBigger;
239c8c0a1abSStuart Menefy if (expDiff < 0)
240c8c0a1abSStuart Menefy goto bExpBigger;
241c8c0a1abSStuart Menefy if (aExp == 0) {
242c8c0a1abSStuart Menefy aExp = 1;
243c8c0a1abSStuart Menefy bExp = 1;
244c8c0a1abSStuart Menefy }
245c8c0a1abSStuart Menefy if (bSig < aSig)
246c8c0a1abSStuart Menefy goto aBigger;
247c8c0a1abSStuart Menefy if (aSig < bSig)
248c8c0a1abSStuart Menefy goto bBigger;
249c8c0a1abSStuart Menefy return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
250c8c0a1abSStuart Menefy bExpBigger:
251c8c0a1abSStuart Menefy if (bExp == 0x7FF) {
252c8c0a1abSStuart Menefy return packFloat64(zSign ^ 1, 0x7FF, 0);
253c8c0a1abSStuart Menefy }
254c8c0a1abSStuart Menefy if (aExp == 0) {
255c8c0a1abSStuart Menefy ++expDiff;
256c8c0a1abSStuart Menefy } else {
257c8c0a1abSStuart Menefy aSig |= LIT64(0x4000000000000000);
258c8c0a1abSStuart Menefy }
259c8c0a1abSStuart Menefy shift64RightJamming(aSig, -expDiff, &aSig);
260c8c0a1abSStuart Menefy bSig |= LIT64(0x4000000000000000);
261c8c0a1abSStuart Menefy bBigger:
262c8c0a1abSStuart Menefy zSig = bSig - aSig;
263c8c0a1abSStuart Menefy zExp = bExp;
264c8c0a1abSStuart Menefy zSign ^= 1;
265c8c0a1abSStuart Menefy goto normalizeRoundAndPack;
266c8c0a1abSStuart Menefy aExpBigger:
267c8c0a1abSStuart Menefy if (aExp == 0x7FF) {
268c8c0a1abSStuart Menefy return a;
269c8c0a1abSStuart Menefy }
270c8c0a1abSStuart Menefy if (bExp == 0) {
271c8c0a1abSStuart Menefy --expDiff;
272c8c0a1abSStuart Menefy } else {
273c8c0a1abSStuart Menefy bSig |= LIT64(0x4000000000000000);
274c8c0a1abSStuart Menefy }
275c8c0a1abSStuart Menefy shift64RightJamming(bSig, expDiff, &bSig);
276c8c0a1abSStuart Menefy aSig |= LIT64(0x4000000000000000);
277c8c0a1abSStuart Menefy aBigger:
278c8c0a1abSStuart Menefy zSig = aSig - bSig;
279c8c0a1abSStuart Menefy zExp = aExp;
280c8c0a1abSStuart Menefy normalizeRoundAndPack:
281c8c0a1abSStuart Menefy --zExp;
282c8c0a1abSStuart Menefy return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
283c8c0a1abSStuart Menefy
284c8c0a1abSStuart Menefy }
addFloat64Sigs(float64 a,float64 b,flag zSign)285c8c0a1abSStuart Menefy static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
286c8c0a1abSStuart Menefy {
287c8c0a1abSStuart Menefy int16 aExp, bExp, zExp;
288c8c0a1abSStuart Menefy bits64 aSig, bSig, zSig;
289c8c0a1abSStuart Menefy int16 expDiff;
290c8c0a1abSStuart Menefy
291c8c0a1abSStuart Menefy aSig = extractFloat64Frac(a);
292c8c0a1abSStuart Menefy aExp = extractFloat64Exp(a);
293c8c0a1abSStuart Menefy bSig = extractFloat64Frac(b);
294c8c0a1abSStuart Menefy bExp = extractFloat64Exp(b);
295c8c0a1abSStuart Menefy expDiff = aExp - bExp;
296c8c0a1abSStuart Menefy aSig <<= 9;
297c8c0a1abSStuart Menefy bSig <<= 9;
298c8c0a1abSStuart Menefy if (0 < expDiff) {
299c8c0a1abSStuart Menefy if (aExp == 0x7FF) {
300c8c0a1abSStuart Menefy return a;
301c8c0a1abSStuart Menefy }
302c8c0a1abSStuart Menefy if (bExp == 0) {
303c8c0a1abSStuart Menefy --expDiff;
304c8c0a1abSStuart Menefy } else {
305c8c0a1abSStuart Menefy bSig |= LIT64(0x2000000000000000);
306c8c0a1abSStuart Menefy }
307c8c0a1abSStuart Menefy shift64RightJamming(bSig, expDiff, &bSig);
308c8c0a1abSStuart Menefy zExp = aExp;
309c8c0a1abSStuart Menefy } else if (expDiff < 0) {
310c8c0a1abSStuart Menefy if (bExp == 0x7FF) {
311c8c0a1abSStuart Menefy return packFloat64(zSign, 0x7FF, 0);
312c8c0a1abSStuart Menefy }
313c8c0a1abSStuart Menefy if (aExp == 0) {
314c8c0a1abSStuart Menefy ++expDiff;
315c8c0a1abSStuart Menefy } else {
316c8c0a1abSStuart Menefy aSig |= LIT64(0x2000000000000000);
317c8c0a1abSStuart Menefy }
318c8c0a1abSStuart Menefy shift64RightJamming(aSig, -expDiff, &aSig);
319c8c0a1abSStuart Menefy zExp = bExp;
320c8c0a1abSStuart Menefy } else {
321c8c0a1abSStuart Menefy if (aExp == 0x7FF) {
322c8c0a1abSStuart Menefy return a;
323c8c0a1abSStuart Menefy }
324c8c0a1abSStuart Menefy if (aExp == 0)
325c8c0a1abSStuart Menefy return packFloat64(zSign, 0, (aSig + bSig) >> 9);
326c8c0a1abSStuart Menefy zSig = LIT64(0x4000000000000000) + aSig + bSig;
327c8c0a1abSStuart Menefy zExp = aExp;
328c8c0a1abSStuart Menefy goto roundAndPack;
329c8c0a1abSStuart Menefy }
330c8c0a1abSStuart Menefy aSig |= LIT64(0x2000000000000000);
331c8c0a1abSStuart Menefy zSig = (aSig + bSig) << 1;
332c8c0a1abSStuart Menefy --zExp;
333c8c0a1abSStuart Menefy if ((sbits64) zSig < 0) {
334c8c0a1abSStuart Menefy zSig = aSig + bSig;
335c8c0a1abSStuart Menefy ++zExp;
336c8c0a1abSStuart Menefy }
337c8c0a1abSStuart Menefy roundAndPack:
338c8c0a1abSStuart Menefy return roundAndPackFloat64(zSign, zExp, zSig);
339c8c0a1abSStuart Menefy
340c8c0a1abSStuart Menefy }
341c8c0a1abSStuart Menefy
packFloat32(flag zSign,int16 zExp,bits32 zSig)342*e9bf51e5SPaul Mundt float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
343c8c0a1abSStuart Menefy {
344c8c0a1abSStuart Menefy return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
345c8c0a1abSStuart Menefy }
346c8c0a1abSStuart Menefy
shift32RightJamming(bits32 a,int16 count,bits32 * zPtr)347*e9bf51e5SPaul Mundt void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
348c8c0a1abSStuart Menefy {
349c8c0a1abSStuart Menefy bits32 z;
350c8c0a1abSStuart Menefy if (count == 0) {
351c8c0a1abSStuart Menefy z = a;
352c8c0a1abSStuart Menefy } else if (count < 32) {
353c8c0a1abSStuart Menefy z = (a >> count) | ((a << ((-count) & 31)) != 0);
354c8c0a1abSStuart Menefy } else {
355c8c0a1abSStuart Menefy z = (a != 0);
356c8c0a1abSStuart Menefy }
357c8c0a1abSStuart Menefy *zPtr = z;
358c8c0a1abSStuart Menefy }
359c8c0a1abSStuart Menefy
roundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)360c8c0a1abSStuart Menefy static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
361c8c0a1abSStuart Menefy {
362c8c0a1abSStuart Menefy flag roundNearestEven;
363c8c0a1abSStuart Menefy int8 roundIncrement, roundBits;
364c8c0a1abSStuart Menefy flag isTiny;
365c8c0a1abSStuart Menefy
366c8c0a1abSStuart Menefy /* SH4 has only 2 rounding modes - round to nearest and round to zero */
367c8c0a1abSStuart Menefy roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
368c8c0a1abSStuart Menefy roundIncrement = 0x40;
369c8c0a1abSStuart Menefy if (!roundNearestEven) {
370c8c0a1abSStuart Menefy roundIncrement = 0;
371c8c0a1abSStuart Menefy }
372c8c0a1abSStuart Menefy roundBits = zSig & 0x7F;
373c8c0a1abSStuart Menefy if (0xFD <= (bits16) zExp) {
374c8c0a1abSStuart Menefy if ((0xFD < zExp)
375c8c0a1abSStuart Menefy || ((zExp == 0xFD)
376c8c0a1abSStuart Menefy && ((sbits32) (zSig + roundIncrement) < 0))
377c8c0a1abSStuart Menefy ) {
378c8c0a1abSStuart Menefy float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
379c8c0a1abSStuart Menefy return packFloat32(zSign, 0xFF,
380c8c0a1abSStuart Menefy 0) - (roundIncrement == 0);
381c8c0a1abSStuart Menefy }
382c8c0a1abSStuart Menefy if (zExp < 0) {
383c8c0a1abSStuart Menefy isTiny = (zExp < -1)
384c8c0a1abSStuart Menefy || (zSig + roundIncrement < 0x80000000);
385c8c0a1abSStuart Menefy shift32RightJamming(zSig, -zExp, &zSig);
386c8c0a1abSStuart Menefy zExp = 0;
387c8c0a1abSStuart Menefy roundBits = zSig & 0x7F;
388c8c0a1abSStuart Menefy if (isTiny && roundBits)
389c8c0a1abSStuart Menefy float_raise(FPSCR_CAUSE_UNDERFLOW);
390c8c0a1abSStuart Menefy }
391c8c0a1abSStuart Menefy }
392c8c0a1abSStuart Menefy if (roundBits)
393c8c0a1abSStuart Menefy float_raise(FPSCR_CAUSE_INEXACT);
394c8c0a1abSStuart Menefy zSig = (zSig + roundIncrement) >> 7;
395c8c0a1abSStuart Menefy zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
396c8c0a1abSStuart Menefy if (zSig == 0)
397c8c0a1abSStuart Menefy zExp = 0;
398c8c0a1abSStuart Menefy return packFloat32(zSign, zExp, zSig);
399c8c0a1abSStuart Menefy
400c8c0a1abSStuart Menefy }
401c8c0a1abSStuart Menefy
normalizeRoundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)402c8c0a1abSStuart Menefy static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
403c8c0a1abSStuart Menefy {
404c8c0a1abSStuart Menefy int8 shiftCount;
405c8c0a1abSStuart Menefy
406c8c0a1abSStuart Menefy shiftCount = countLeadingZeros32(zSig) - 1;
407c8c0a1abSStuart Menefy return roundAndPackFloat32(zSign, zExp - shiftCount,
408c8c0a1abSStuart Menefy zSig << shiftCount);
409c8c0a1abSStuart Menefy }
410c8c0a1abSStuart Menefy
roundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)411c8c0a1abSStuart Menefy static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
412c8c0a1abSStuart Menefy {
413c8c0a1abSStuart Menefy flag roundNearestEven;
414c8c0a1abSStuart Menefy int16 roundIncrement, roundBits;
415c8c0a1abSStuart Menefy flag isTiny;
416c8c0a1abSStuart Menefy
417c8c0a1abSStuart Menefy /* SH4 has only 2 rounding modes - round to nearest and round to zero */
418c8c0a1abSStuart Menefy roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
419c8c0a1abSStuart Menefy roundIncrement = 0x200;
420c8c0a1abSStuart Menefy if (!roundNearestEven) {
421c8c0a1abSStuart Menefy roundIncrement = 0;
422c8c0a1abSStuart Menefy }
423c8c0a1abSStuart Menefy roundBits = zSig & 0x3FF;
424c8c0a1abSStuart Menefy if (0x7FD <= (bits16) zExp) {
425c8c0a1abSStuart Menefy if ((0x7FD < zExp)
426c8c0a1abSStuart Menefy || ((zExp == 0x7FD)
427c8c0a1abSStuart Menefy && ((sbits64) (zSig + roundIncrement) < 0))
428c8c0a1abSStuart Menefy ) {
429c8c0a1abSStuart Menefy float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
430c8c0a1abSStuart Menefy return packFloat64(zSign, 0x7FF,
431c8c0a1abSStuart Menefy 0) - (roundIncrement == 0);
432c8c0a1abSStuart Menefy }
433c8c0a1abSStuart Menefy if (zExp < 0) {
434c8c0a1abSStuart Menefy isTiny = (zExp < -1)
435c8c0a1abSStuart Menefy || (zSig + roundIncrement <
436c8c0a1abSStuart Menefy LIT64(0x8000000000000000));
437c8c0a1abSStuart Menefy shift64RightJamming(zSig, -zExp, &zSig);
438c8c0a1abSStuart Menefy zExp = 0;
439c8c0a1abSStuart Menefy roundBits = zSig & 0x3FF;
440c8c0a1abSStuart Menefy if (isTiny && roundBits)
441c8c0a1abSStuart Menefy float_raise(FPSCR_CAUSE_UNDERFLOW);
442c8c0a1abSStuart Menefy }
443c8c0a1abSStuart Menefy }
444c8c0a1abSStuart Menefy if (roundBits)
445c8c0a1abSStuart Menefy float_raise(FPSCR_CAUSE_INEXACT);
446c8c0a1abSStuart Menefy zSig = (zSig + roundIncrement) >> 10;
447c8c0a1abSStuart Menefy zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
448c8c0a1abSStuart Menefy if (zSig == 0)
449c8c0a1abSStuart Menefy zExp = 0;
450c8c0a1abSStuart Menefy return packFloat64(zSign, zExp, zSig);
451c8c0a1abSStuart Menefy
452c8c0a1abSStuart Menefy }
453c8c0a1abSStuart Menefy
subFloat32Sigs(float32 a,float32 b,flag zSign)454c8c0a1abSStuart Menefy static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
455c8c0a1abSStuart Menefy {
456c8c0a1abSStuart Menefy int16 aExp, bExp, zExp;
457c8c0a1abSStuart Menefy bits32 aSig, bSig, zSig;
458c8c0a1abSStuart Menefy int16 expDiff;
459c8c0a1abSStuart Menefy
460c8c0a1abSStuart Menefy aSig = extractFloat32Frac(a);
461c8c0a1abSStuart Menefy aExp = extractFloat32Exp(a);
462c8c0a1abSStuart Menefy bSig = extractFloat32Frac(b);
463c8c0a1abSStuart Menefy bExp = extractFloat32Exp(b);
464c8c0a1abSStuart Menefy expDiff = aExp - bExp;
465c8c0a1abSStuart Menefy aSig <<= 7;
466c8c0a1abSStuart Menefy bSig <<= 7;
467c8c0a1abSStuart Menefy if (0 < expDiff)
468c8c0a1abSStuart Menefy goto aExpBigger;
469c8c0a1abSStuart Menefy if (expDiff < 0)
470c8c0a1abSStuart Menefy goto bExpBigger;
471c8c0a1abSStuart Menefy if (aExp == 0) {
472c8c0a1abSStuart Menefy aExp = 1;
473c8c0a1abSStuart Menefy bExp = 1;
474c8c0a1abSStuart Menefy }
475c8c0a1abSStuart Menefy if (bSig < aSig)
476c8c0a1abSStuart Menefy goto aBigger;
477c8c0a1abSStuart Menefy if (aSig < bSig)
478c8c0a1abSStuart Menefy goto bBigger;
479c8c0a1abSStuart Menefy return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
480c8c0a1abSStuart Menefy bExpBigger:
481c8c0a1abSStuart Menefy if (bExp == 0xFF) {
482c8c0a1abSStuart Menefy return packFloat32(zSign ^ 1, 0xFF, 0);
483c8c0a1abSStuart Menefy }
484c8c0a1abSStuart Menefy if (aExp == 0) {
485c8c0a1abSStuart Menefy ++expDiff;
486c8c0a1abSStuart Menefy } else {
487c8c0a1abSStuart Menefy aSig |= 0x40000000;
488c8c0a1abSStuart Menefy }
489c8c0a1abSStuart Menefy shift32RightJamming(aSig, -expDiff, &aSig);
490c8c0a1abSStuart Menefy bSig |= 0x40000000;
491c8c0a1abSStuart Menefy bBigger:
492c8c0a1abSStuart Menefy zSig = bSig - aSig;
493c8c0a1abSStuart Menefy zExp = bExp;
494c8c0a1abSStuart Menefy zSign ^= 1;
495c8c0a1abSStuart Menefy goto normalizeRoundAndPack;
496c8c0a1abSStuart Menefy aExpBigger:
497c8c0a1abSStuart Menefy if (aExp == 0xFF) {
498c8c0a1abSStuart Menefy return a;
499c8c0a1abSStuart Menefy }
500c8c0a1abSStuart Menefy if (bExp == 0) {
501c8c0a1abSStuart Menefy --expDiff;
502c8c0a1abSStuart Menefy } else {
503c8c0a1abSStuart Menefy bSig |= 0x40000000;
504c8c0a1abSStuart Menefy }
505c8c0a1abSStuart Menefy shift32RightJamming(bSig, expDiff, &bSig);
506c8c0a1abSStuart Menefy aSig |= 0x40000000;
507c8c0a1abSStuart Menefy aBigger:
508c8c0a1abSStuart Menefy zSig = aSig - bSig;
509c8c0a1abSStuart Menefy zExp = aExp;
510c8c0a1abSStuart Menefy normalizeRoundAndPack:
511c8c0a1abSStuart Menefy --zExp;
512c8c0a1abSStuart Menefy return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
513c8c0a1abSStuart Menefy
514c8c0a1abSStuart Menefy }
515c8c0a1abSStuart Menefy
addFloat32Sigs(float32 a,float32 b,flag zSign)516c8c0a1abSStuart Menefy static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
517c8c0a1abSStuart Menefy {
518c8c0a1abSStuart Menefy int16 aExp, bExp, zExp;
519c8c0a1abSStuart Menefy bits32 aSig, bSig, zSig;
520c8c0a1abSStuart Menefy int16 expDiff;
521c8c0a1abSStuart Menefy
522c8c0a1abSStuart Menefy aSig = extractFloat32Frac(a);
523c8c0a1abSStuart Menefy aExp = extractFloat32Exp(a);
524c8c0a1abSStuart Menefy bSig = extractFloat32Frac(b);
525c8c0a1abSStuart Menefy bExp = extractFloat32Exp(b);
526c8c0a1abSStuart Menefy expDiff = aExp - bExp;
527c8c0a1abSStuart Menefy aSig <<= 6;
528c8c0a1abSStuart Menefy bSig <<= 6;
529c8c0a1abSStuart Menefy if (0 < expDiff) {
530c8c0a1abSStuart Menefy if (aExp == 0xFF) {
531c8c0a1abSStuart Menefy return a;
532c8c0a1abSStuart Menefy }
533c8c0a1abSStuart Menefy if (bExp == 0) {
534c8c0a1abSStuart Menefy --expDiff;
535c8c0a1abSStuart Menefy } else {
536c8c0a1abSStuart Menefy bSig |= 0x20000000;
537c8c0a1abSStuart Menefy }
538c8c0a1abSStuart Menefy shift32RightJamming(bSig, expDiff, &bSig);
539c8c0a1abSStuart Menefy zExp = aExp;
540c8c0a1abSStuart Menefy } else if (expDiff < 0) {
541c8c0a1abSStuart Menefy if (bExp == 0xFF) {
542c8c0a1abSStuart Menefy return packFloat32(zSign, 0xFF, 0);
543c8c0a1abSStuart Menefy }
544c8c0a1abSStuart Menefy if (aExp == 0) {
545c8c0a1abSStuart Menefy ++expDiff;
546c8c0a1abSStuart Menefy } else {
547c8c0a1abSStuart Menefy aSig |= 0x20000000;
548c8c0a1abSStuart Menefy }
549c8c0a1abSStuart Menefy shift32RightJamming(aSig, -expDiff, &aSig);
550c8c0a1abSStuart Menefy zExp = bExp;
551c8c0a1abSStuart Menefy } else {
552c8c0a1abSStuart Menefy if (aExp == 0xFF) {
553c8c0a1abSStuart Menefy return a;
554c8c0a1abSStuart Menefy }
555c8c0a1abSStuart Menefy if (aExp == 0)
556c8c0a1abSStuart Menefy return packFloat32(zSign, 0, (aSig + bSig) >> 6);
557c8c0a1abSStuart Menefy zSig = 0x40000000 + aSig + bSig;
558c8c0a1abSStuart Menefy zExp = aExp;
559c8c0a1abSStuart Menefy goto roundAndPack;
560c8c0a1abSStuart Menefy }
561c8c0a1abSStuart Menefy aSig |= 0x20000000;
562c8c0a1abSStuart Menefy zSig = (aSig + bSig) << 1;
563c8c0a1abSStuart Menefy --zExp;
564c8c0a1abSStuart Menefy if ((sbits32) zSig < 0) {
565c8c0a1abSStuart Menefy zSig = aSig + bSig;
566c8c0a1abSStuart Menefy ++zExp;
567c8c0a1abSStuart Menefy }
568c8c0a1abSStuart Menefy roundAndPack:
569c8c0a1abSStuart Menefy return roundAndPackFloat32(zSign, zExp, zSig);
570c8c0a1abSStuart Menefy
571c8c0a1abSStuart Menefy }
572c8c0a1abSStuart Menefy
float64_sub(float64 a,float64 b)573c8c0a1abSStuart Menefy float64 float64_sub(float64 a, float64 b)
574c8c0a1abSStuart Menefy {
575c8c0a1abSStuart Menefy flag aSign, bSign;
576c8c0a1abSStuart Menefy
577c8c0a1abSStuart Menefy aSign = extractFloat64Sign(a);
578c8c0a1abSStuart Menefy bSign = extractFloat64Sign(b);
579c8c0a1abSStuart Menefy if (aSign == bSign) {
580c8c0a1abSStuart Menefy return subFloat64Sigs(a, b, aSign);
581c8c0a1abSStuart Menefy } else {
582c8c0a1abSStuart Menefy return addFloat64Sigs(a, b, aSign);
583c8c0a1abSStuart Menefy }
584c8c0a1abSStuart Menefy
585c8c0a1abSStuart Menefy }
586c8c0a1abSStuart Menefy
float32_sub(float32 a,float32 b)587c8c0a1abSStuart Menefy float32 float32_sub(float32 a, float32 b)
588c8c0a1abSStuart Menefy {
589c8c0a1abSStuart Menefy flag aSign, bSign;
590c8c0a1abSStuart Menefy
591c8c0a1abSStuart Menefy aSign = extractFloat32Sign(a);
592c8c0a1abSStuart Menefy bSign = extractFloat32Sign(b);
593c8c0a1abSStuart Menefy if (aSign == bSign) {
594c8c0a1abSStuart Menefy return subFloat32Sigs(a, b, aSign);
595c8c0a1abSStuart Menefy } else {
596c8c0a1abSStuart Menefy return addFloat32Sigs(a, b, aSign);
597c8c0a1abSStuart Menefy }
598c8c0a1abSStuart Menefy
599c8c0a1abSStuart Menefy }
600c8c0a1abSStuart Menefy
float32_add(float32 a,float32 b)601c8c0a1abSStuart Menefy float32 float32_add(float32 a, float32 b)
602c8c0a1abSStuart Menefy {
603c8c0a1abSStuart Menefy flag aSign, bSign;
604c8c0a1abSStuart Menefy
605c8c0a1abSStuart Menefy aSign = extractFloat32Sign(a);
606c8c0a1abSStuart Menefy bSign = extractFloat32Sign(b);
607c8c0a1abSStuart Menefy if (aSign == bSign) {
608c8c0a1abSStuart Menefy return addFloat32Sigs(a, b, aSign);
609c8c0a1abSStuart Menefy } else {
610c8c0a1abSStuart Menefy return subFloat32Sigs(a, b, aSign);
611c8c0a1abSStuart Menefy }
612c8c0a1abSStuart Menefy
613c8c0a1abSStuart Menefy }
614c8c0a1abSStuart Menefy
float64_add(float64 a,float64 b)615c8c0a1abSStuart Menefy float64 float64_add(float64 a, float64 b)
616c8c0a1abSStuart Menefy {
617c8c0a1abSStuart Menefy flag aSign, bSign;
618c8c0a1abSStuart Menefy
619c8c0a1abSStuart Menefy aSign = extractFloat64Sign(a);
620c8c0a1abSStuart Menefy bSign = extractFloat64Sign(b);
621c8c0a1abSStuart Menefy if (aSign == bSign) {
622c8c0a1abSStuart Menefy return addFloat64Sigs(a, b, aSign);
623c8c0a1abSStuart Menefy } else {
624c8c0a1abSStuart Menefy return subFloat64Sigs(a, b, aSign);
625c8c0a1abSStuart Menefy }
626c8c0a1abSStuart Menefy }
627c8c0a1abSStuart Menefy
628c8c0a1abSStuart Menefy static void
normalizeFloat64Subnormal(bits64 aSig,int16 * zExpPtr,bits64 * zSigPtr)629c8c0a1abSStuart Menefy normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
630c8c0a1abSStuart Menefy {
631c8c0a1abSStuart Menefy int8 shiftCount;
632c8c0a1abSStuart Menefy
633c8c0a1abSStuart Menefy shiftCount = countLeadingZeros64(aSig) - 11;
634c8c0a1abSStuart Menefy *zSigPtr = aSig << shiftCount;
635c8c0a1abSStuart Menefy *zExpPtr = 1 - shiftCount;
636c8c0a1abSStuart Menefy }
637c8c0a1abSStuart Menefy
add128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)638*e9bf51e5SPaul Mundt void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
639c8c0a1abSStuart Menefy bits64 * z1Ptr)
640c8c0a1abSStuart Menefy {
641c8c0a1abSStuart Menefy bits64 z1;
642c8c0a1abSStuart Menefy
643c8c0a1abSStuart Menefy z1 = a1 + b1;
644c8c0a1abSStuart Menefy *z1Ptr = z1;
645c8c0a1abSStuart Menefy *z0Ptr = a0 + b0 + (z1 < a1);
646c8c0a1abSStuart Menefy }
647c8c0a1abSStuart Menefy
648*e9bf51e5SPaul Mundt void
sub128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)649c8c0a1abSStuart Menefy sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
650c8c0a1abSStuart Menefy bits64 * z1Ptr)
651c8c0a1abSStuart Menefy {
652c8c0a1abSStuart Menefy *z1Ptr = a1 - b1;
653c8c0a1abSStuart Menefy *z0Ptr = a0 - b0 - (a1 < b1);
654c8c0a1abSStuart Menefy }
655c8c0a1abSStuart Menefy
estimateDiv128To64(bits64 a0,bits64 a1,bits64 b)656c8c0a1abSStuart Menefy static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
657c8c0a1abSStuart Menefy {
658c8c0a1abSStuart Menefy bits64 b0, b1;
659c8c0a1abSStuart Menefy bits64 rem0, rem1, term0, term1;
660*e9bf51e5SPaul Mundt bits64 z, tmp;
661c8c0a1abSStuart Menefy if (b <= a0)
662c8c0a1abSStuart Menefy return LIT64(0xFFFFFFFFFFFFFFFF);
663c8c0a1abSStuart Menefy b0 = b >> 32;
664*e9bf51e5SPaul Mundt tmp = a0;
665*e9bf51e5SPaul Mundt do_div(tmp, b0);
666*e9bf51e5SPaul Mundt
667*e9bf51e5SPaul Mundt z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32;
668c8c0a1abSStuart Menefy mul64To128(b, z, &term0, &term1);
669c8c0a1abSStuart Menefy sub128(a0, a1, term0, term1, &rem0, &rem1);
670c8c0a1abSStuart Menefy while (((sbits64) rem0) < 0) {
671c8c0a1abSStuart Menefy z -= LIT64(0x100000000);
672c8c0a1abSStuart Menefy b1 = b << 32;
673c8c0a1abSStuart Menefy add128(rem0, rem1, b0, b1, &rem0, &rem1);
674c8c0a1abSStuart Menefy }
675c8c0a1abSStuart Menefy rem0 = (rem0 << 32) | (rem1 >> 32);
676*e9bf51e5SPaul Mundt tmp = rem0;
677*e9bf51e5SPaul Mundt do_div(tmp, b0);
678*e9bf51e5SPaul Mundt z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp;
679c8c0a1abSStuart Menefy return z;
680c8c0a1abSStuart Menefy }
681c8c0a1abSStuart Menefy
mul64To128(bits64 a,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr)682*e9bf51e5SPaul Mundt void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
683c8c0a1abSStuart Menefy {
684c8c0a1abSStuart Menefy bits32 aHigh, aLow, bHigh, bLow;
685c8c0a1abSStuart Menefy bits64 z0, zMiddleA, zMiddleB, z1;
686c8c0a1abSStuart Menefy
687c8c0a1abSStuart Menefy aLow = a;
688c8c0a1abSStuart Menefy aHigh = a >> 32;
689c8c0a1abSStuart Menefy bLow = b;
690c8c0a1abSStuart Menefy bHigh = b >> 32;
691c8c0a1abSStuart Menefy z1 = ((bits64) aLow) * bLow;
692c8c0a1abSStuart Menefy zMiddleA = ((bits64) aLow) * bHigh;
693c8c0a1abSStuart Menefy zMiddleB = ((bits64) aHigh) * bLow;
694c8c0a1abSStuart Menefy z0 = ((bits64) aHigh) * bHigh;
695c8c0a1abSStuart Menefy zMiddleA += zMiddleB;
696c8c0a1abSStuart Menefy z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
697c8c0a1abSStuart Menefy zMiddleA <<= 32;
698c8c0a1abSStuart Menefy z1 += zMiddleA;
699c8c0a1abSStuart Menefy z0 += (z1 < zMiddleA);
700c8c0a1abSStuart Menefy *z1Ptr = z1;
701c8c0a1abSStuart Menefy *z0Ptr = z0;
702c8c0a1abSStuart Menefy
703c8c0a1abSStuart Menefy }
704c8c0a1abSStuart Menefy
normalizeFloat32Subnormal(bits32 aSig,int16 * zExpPtr,bits32 * zSigPtr)705c8c0a1abSStuart Menefy static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
706c8c0a1abSStuart Menefy bits32 * zSigPtr)
707c8c0a1abSStuart Menefy {
708c8c0a1abSStuart Menefy int8 shiftCount;
709c8c0a1abSStuart Menefy
710c8c0a1abSStuart Menefy shiftCount = countLeadingZeros32(aSig) - 8;
711c8c0a1abSStuart Menefy *zSigPtr = aSig << shiftCount;
712c8c0a1abSStuart Menefy *zExpPtr = 1 - shiftCount;
713c8c0a1abSStuart Menefy
714c8c0a1abSStuart Menefy }
715c8c0a1abSStuart Menefy
float64_div(float64 a,float64 b)716c8c0a1abSStuart Menefy float64 float64_div(float64 a, float64 b)
717c8c0a1abSStuart Menefy {
718c8c0a1abSStuart Menefy flag aSign, bSign, zSign;
719c8c0a1abSStuart Menefy int16 aExp, bExp, zExp;
720c8c0a1abSStuart Menefy bits64 aSig, bSig, zSig;
721c8c0a1abSStuart Menefy bits64 rem0, rem1;
722c8c0a1abSStuart Menefy bits64 term0, term1;
723c8c0a1abSStuart Menefy
724c8c0a1abSStuart Menefy aSig = extractFloat64Frac(a);
725c8c0a1abSStuart Menefy aExp = extractFloat64Exp(a);
726c8c0a1abSStuart Menefy aSign = extractFloat64Sign(a);
727c8c0a1abSStuart Menefy bSig = extractFloat64Frac(b);
728c8c0a1abSStuart Menefy bExp = extractFloat64Exp(b);
729c8c0a1abSStuart Menefy bSign = extractFloat64Sign(b);
730c8c0a1abSStuart Menefy zSign = aSign ^ bSign;
731c8c0a1abSStuart Menefy if (aExp == 0x7FF) {
732c8c0a1abSStuart Menefy if (bExp == 0x7FF) {
733c8c0a1abSStuart Menefy }
734c8c0a1abSStuart Menefy return packFloat64(zSign, 0x7FF, 0);
735c8c0a1abSStuart Menefy }
736c8c0a1abSStuart Menefy if (bExp == 0x7FF) {
737c8c0a1abSStuart Menefy return packFloat64(zSign, 0, 0);
738c8c0a1abSStuart Menefy }
739c8c0a1abSStuart Menefy if (bExp == 0) {
740c8c0a1abSStuart Menefy if (bSig == 0) {
741c8c0a1abSStuart Menefy if ((aExp | aSig) == 0) {
742c8c0a1abSStuart Menefy float_raise(FPSCR_CAUSE_INVALID);
743c8c0a1abSStuart Menefy }
744c8c0a1abSStuart Menefy return packFloat64(zSign, 0x7FF, 0);
745c8c0a1abSStuart Menefy }
746c8c0a1abSStuart Menefy normalizeFloat64Subnormal(bSig, &bExp, &bSig);
747c8c0a1abSStuart Menefy }
748c8c0a1abSStuart Menefy if (aExp == 0) {
749c8c0a1abSStuart Menefy if (aSig == 0)
750c8c0a1abSStuart Menefy return packFloat64(zSign, 0, 0);
751c8c0a1abSStuart Menefy normalizeFloat64Subnormal(aSig, &aExp, &aSig);
752c8c0a1abSStuart Menefy }
753c8c0a1abSStuart Menefy zExp = aExp - bExp + 0x3FD;
754c8c0a1abSStuart Menefy aSig = (aSig | LIT64(0x0010000000000000)) << 10;
755c8c0a1abSStuart Menefy bSig = (bSig | LIT64(0x0010000000000000)) << 11;
756c8c0a1abSStuart Menefy if (bSig <= (aSig + aSig)) {
757c8c0a1abSStuart Menefy aSig >>= 1;
758c8c0a1abSStuart Menefy ++zExp;
759c8c0a1abSStuart Menefy }
760c8c0a1abSStuart Menefy zSig = estimateDiv128To64(aSig, 0, bSig);
761c8c0a1abSStuart Menefy if ((zSig & 0x1FF) <= 2) {
762c8c0a1abSStuart Menefy mul64To128(bSig, zSig, &term0, &term1);
763c8c0a1abSStuart Menefy sub128(aSig, 0, term0, term1, &rem0, &rem1);
764c8c0a1abSStuart Menefy while ((sbits64) rem0 < 0) {
765c8c0a1abSStuart Menefy --zSig;
766c8c0a1abSStuart Menefy add128(rem0, rem1, 0, bSig, &rem0, &rem1);
767c8c0a1abSStuart Menefy }
768c8c0a1abSStuart Menefy zSig |= (rem1 != 0);
769c8c0a1abSStuart Menefy }
770c8c0a1abSStuart Menefy return roundAndPackFloat64(zSign, zExp, zSig);
771c8c0a1abSStuart Menefy
772c8c0a1abSStuart Menefy }
773c8c0a1abSStuart Menefy
float32_div(float32 a,float32 b)774c8c0a1abSStuart Menefy float32 float32_div(float32 a, float32 b)
775c8c0a1abSStuart Menefy {
776c8c0a1abSStuart Menefy flag aSign, bSign, zSign;
777c8c0a1abSStuart Menefy int16 aExp, bExp, zExp;
778*e9bf51e5SPaul Mundt bits32 aSig, bSig;
779*e9bf51e5SPaul Mundt uint64_t zSig;
780c8c0a1abSStuart Menefy
781c8c0a1abSStuart Menefy aSig = extractFloat32Frac(a);
782c8c0a1abSStuart Menefy aExp = extractFloat32Exp(a);
783c8c0a1abSStuart Menefy aSign = extractFloat32Sign(a);
784c8c0a1abSStuart Menefy bSig = extractFloat32Frac(b);
785c8c0a1abSStuart Menefy bExp = extractFloat32Exp(b);
786c8c0a1abSStuart Menefy bSign = extractFloat32Sign(b);
787c8c0a1abSStuart Menefy zSign = aSign ^ bSign;
788c8c0a1abSStuart Menefy if (aExp == 0xFF) {
789c8c0a1abSStuart Menefy if (bExp == 0xFF) {
790c8c0a1abSStuart Menefy }
791c8c0a1abSStuart Menefy return packFloat32(zSign, 0xFF, 0);
792c8c0a1abSStuart Menefy }
793c8c0a1abSStuart Menefy if (bExp == 0xFF) {
794c8c0a1abSStuart Menefy return packFloat32(zSign, 0, 0);
795c8c0a1abSStuart Menefy }
796c8c0a1abSStuart Menefy if (bExp == 0) {
797c8c0a1abSStuart Menefy if (bSig == 0) {
798c8c0a1abSStuart Menefy return packFloat32(zSign, 0xFF, 0);
799c8c0a1abSStuart Menefy }
800c8c0a1abSStuart Menefy normalizeFloat32Subnormal(bSig, &bExp, &bSig);
801c8c0a1abSStuart Menefy }
802c8c0a1abSStuart Menefy if (aExp == 0) {
803c8c0a1abSStuart Menefy if (aSig == 0)
804c8c0a1abSStuart Menefy return packFloat32(zSign, 0, 0);
805c8c0a1abSStuart Menefy normalizeFloat32Subnormal(aSig, &aExp, &aSig);
806c8c0a1abSStuart Menefy }
807c8c0a1abSStuart Menefy zExp = aExp - bExp + 0x7D;
808c8c0a1abSStuart Menefy aSig = (aSig | 0x00800000) << 7;
809c8c0a1abSStuart Menefy bSig = (bSig | 0x00800000) << 8;
810c8c0a1abSStuart Menefy if (bSig <= (aSig + aSig)) {
811c8c0a1abSStuart Menefy aSig >>= 1;
812c8c0a1abSStuart Menefy ++zExp;
813c8c0a1abSStuart Menefy }
814*e9bf51e5SPaul Mundt zSig = (((bits64) aSig) << 32);
815*e9bf51e5SPaul Mundt do_div(zSig, bSig);
816*e9bf51e5SPaul Mundt
817c8c0a1abSStuart Menefy if ((zSig & 0x3F) == 0) {
818c8c0a1abSStuart Menefy zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
819c8c0a1abSStuart Menefy }
820*e9bf51e5SPaul Mundt return roundAndPackFloat32(zSign, zExp, (bits32)zSig);
821c8c0a1abSStuart Menefy
822c8c0a1abSStuart Menefy }
823c8c0a1abSStuart Menefy
float32_mul(float32 a,float32 b)824c8c0a1abSStuart Menefy float32 float32_mul(float32 a, float32 b)
825c8c0a1abSStuart Menefy {
826c8c0a1abSStuart Menefy char aSign, bSign, zSign;
827c8c0a1abSStuart Menefy int aExp, bExp, zExp;
828c8c0a1abSStuart Menefy unsigned int aSig, bSig;
829c8c0a1abSStuart Menefy unsigned long long zSig64;
830c8c0a1abSStuart Menefy unsigned int zSig;
831c8c0a1abSStuart Menefy
832c8c0a1abSStuart Menefy aSig = extractFloat32Frac(a);
833c8c0a1abSStuart Menefy aExp = extractFloat32Exp(a);
834c8c0a1abSStuart Menefy aSign = extractFloat32Sign(a);
835c8c0a1abSStuart Menefy bSig = extractFloat32Frac(b);
836c8c0a1abSStuart Menefy bExp = extractFloat32Exp(b);
837c8c0a1abSStuart Menefy bSign = extractFloat32Sign(b);
838c8c0a1abSStuart Menefy zSign = aSign ^ bSign;
839c8c0a1abSStuart Menefy if (aExp == 0) {
840c8c0a1abSStuart Menefy if (aSig == 0)
841c8c0a1abSStuart Menefy return packFloat32(zSign, 0, 0);
842c8c0a1abSStuart Menefy normalizeFloat32Subnormal(aSig, &aExp, &aSig);
843c8c0a1abSStuart Menefy }
844c8c0a1abSStuart Menefy if (bExp == 0) {
845c8c0a1abSStuart Menefy if (bSig == 0)
846c8c0a1abSStuart Menefy return packFloat32(zSign, 0, 0);
847c8c0a1abSStuart Menefy normalizeFloat32Subnormal(bSig, &bExp, &bSig);
848c8c0a1abSStuart Menefy }
849c8c0a1abSStuart Menefy if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
850c8c0a1abSStuart Menefy return roundAndPackFloat32(zSign, 0xff, 0);
851c8c0a1abSStuart Menefy
852c8c0a1abSStuart Menefy zExp = aExp + bExp - 0x7F;
853c8c0a1abSStuart Menefy aSig = (aSig | 0x00800000) << 7;
854c8c0a1abSStuart Menefy bSig = (bSig | 0x00800000) << 8;
855c8c0a1abSStuart Menefy shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
856c8c0a1abSStuart Menefy zSig = zSig64;
857c8c0a1abSStuart Menefy if (0 <= (signed int)(zSig << 1)) {
858c8c0a1abSStuart Menefy zSig <<= 1;
859c8c0a1abSStuart Menefy --zExp;
860c8c0a1abSStuart Menefy }
861c8c0a1abSStuart Menefy return roundAndPackFloat32(zSign, zExp, zSig);
862c8c0a1abSStuart Menefy
863c8c0a1abSStuart Menefy }
864c8c0a1abSStuart Menefy
float64_mul(float64 a,float64 b)865c8c0a1abSStuart Menefy float64 float64_mul(float64 a, float64 b)
866c8c0a1abSStuart Menefy {
867c8c0a1abSStuart Menefy char aSign, bSign, zSign;
868c8c0a1abSStuart Menefy int aExp, bExp, zExp;
869c8c0a1abSStuart Menefy unsigned long long int aSig, bSig, zSig0, zSig1;
870c8c0a1abSStuart Menefy
871c8c0a1abSStuart Menefy aSig = extractFloat64Frac(a);
872c8c0a1abSStuart Menefy aExp = extractFloat64Exp(a);
873c8c0a1abSStuart Menefy aSign = extractFloat64Sign(a);
874c8c0a1abSStuart Menefy bSig = extractFloat64Frac(b);
875c8c0a1abSStuart Menefy bExp = extractFloat64Exp(b);
876c8c0a1abSStuart Menefy bSign = extractFloat64Sign(b);
877c8c0a1abSStuart Menefy zSign = aSign ^ bSign;
878c8c0a1abSStuart Menefy
879c8c0a1abSStuart Menefy if (aExp == 0) {
880c8c0a1abSStuart Menefy if (aSig == 0)
881c8c0a1abSStuart Menefy return packFloat64(zSign, 0, 0);
882c8c0a1abSStuart Menefy normalizeFloat64Subnormal(aSig, &aExp, &aSig);
883c8c0a1abSStuart Menefy }
884c8c0a1abSStuart Menefy if (bExp == 0) {
885c8c0a1abSStuart Menefy if (bSig == 0)
886c8c0a1abSStuart Menefy return packFloat64(zSign, 0, 0);
887c8c0a1abSStuart Menefy normalizeFloat64Subnormal(bSig, &bExp, &bSig);
888c8c0a1abSStuart Menefy }
889c8c0a1abSStuart Menefy if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
890c8c0a1abSStuart Menefy return roundAndPackFloat64(zSign, 0x7ff, 0);
891c8c0a1abSStuart Menefy
892c8c0a1abSStuart Menefy zExp = aExp + bExp - 0x3FF;
893c8c0a1abSStuart Menefy aSig = (aSig | 0x0010000000000000LL) << 10;
894c8c0a1abSStuart Menefy bSig = (bSig | 0x0010000000000000LL) << 11;
895c8c0a1abSStuart Menefy mul64To128(aSig, bSig, &zSig0, &zSig1);
896c8c0a1abSStuart Menefy zSig0 |= (zSig1 != 0);
897c8c0a1abSStuart Menefy if (0 <= (signed long long int)(zSig0 << 1)) {
898c8c0a1abSStuart Menefy zSig0 <<= 1;
899c8c0a1abSStuart Menefy --zExp;
900c8c0a1abSStuart Menefy }
901c8c0a1abSStuart Menefy return roundAndPackFloat64(zSign, zExp, zSig0);
902c8c0a1abSStuart Menefy }
903b6ad1e8cSCarl Shaw
904b6ad1e8cSCarl Shaw /*
905b6ad1e8cSCarl Shaw * -------------------------------------------------------------------------------
906b6ad1e8cSCarl Shaw * Returns the result of converting the double-precision floating-point value
907b6ad1e8cSCarl Shaw * `a' to the single-precision floating-point format. The conversion is
908b6ad1e8cSCarl Shaw * performed according to the IEC/IEEE Standard for Binary Floating-point
909b6ad1e8cSCarl Shaw * Arithmetic.
910b6ad1e8cSCarl Shaw * -------------------------------------------------------------------------------
911b6ad1e8cSCarl Shaw * */
float64_to_float32(float64 a)912b6ad1e8cSCarl Shaw float32 float64_to_float32(float64 a)
913b6ad1e8cSCarl Shaw {
914b6ad1e8cSCarl Shaw flag aSign;
915b6ad1e8cSCarl Shaw int16 aExp;
916b6ad1e8cSCarl Shaw bits64 aSig;
917b6ad1e8cSCarl Shaw bits32 zSig;
918b6ad1e8cSCarl Shaw
919b6ad1e8cSCarl Shaw aSig = extractFloat64Frac( a );
920b6ad1e8cSCarl Shaw aExp = extractFloat64Exp( a );
921b6ad1e8cSCarl Shaw aSign = extractFloat64Sign( a );
922b6ad1e8cSCarl Shaw
923b6ad1e8cSCarl Shaw shift64RightJamming( aSig, 22, &aSig );
924b6ad1e8cSCarl Shaw zSig = aSig;
925b6ad1e8cSCarl Shaw if ( aExp || zSig ) {
926b6ad1e8cSCarl Shaw zSig |= 0x40000000;
927b6ad1e8cSCarl Shaw aExp -= 0x381;
928b6ad1e8cSCarl Shaw }
929b6ad1e8cSCarl Shaw return roundAndPackFloat32(aSign, aExp, zSig);
930b6ad1e8cSCarl Shaw }
931