xref: /openbmc/linux/arch/sh/kernel/cpu/sh4/softfloat.c (revision e9bf51e5)
1 /*
2  * Floating point emulation support for subnormalised numbers on SH4
3  * architecture This file is derived from the SoftFloat IEC/IEEE
4  * Floating-point Arithmetic Package, Release 2 the original license of
5  * which is reproduced below.
6  *
7  * ========================================================================
8  *
9  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10  * Arithmetic Package, Release 2.
11  *
12  * Written by John R. Hauser.  This work was made possible in part by the
13  * International Computer Science Institute, located at Suite 600, 1947 Center
14  * Street, Berkeley, California 94704.  Funding was partially provided by the
15  * National Science Foundation under grant MIP-9311980.  The original version
16  * of this code was written as part of a project to build a fixed-point vector
17  * processor in collaboration with the University of California at Berkeley,
18  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20  * arithmetic/softfloat.html'.
21  *
22  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
23  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
25  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
27  *
28  * Derivative works are acceptable, even for commercial purposes, so long as
29  * (1) they include prominent notice that the work is derivative, and (2) they
30  * include prominent notice akin to these three paragraphs for those parts of
31  * this code that are retained.
32  *
33  * ========================================================================
34  *
35  * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
36  * and Kamel Khelifi <kamel.khelifi@st.com>
37  */
38 #include <linux/kernel.h>
39 #include <cpu/fpu.h>
40 #include <asm/div64.h>
41 
42 #define LIT64( a ) a##LL
43 
44 typedef char flag;
45 typedef unsigned char uint8;
46 typedef signed char int8;
47 typedef int uint16;
48 typedef int int16;
49 typedef unsigned int uint32;
50 typedef signed int int32;
51 
52 typedef unsigned long long int bits64;
53 typedef signed long long int sbits64;
54 
55 typedef unsigned char bits8;
56 typedef signed char sbits8;
57 typedef unsigned short int bits16;
58 typedef signed short int sbits16;
59 typedef unsigned int bits32;
60 typedef signed int sbits32;
61 
62 typedef unsigned long long int uint64;
63 typedef signed long long int int64;
64 
65 typedef unsigned long int float32;
66 typedef unsigned long long float64;
67 
68 extern void float_raise(unsigned int flags);	/* in fpu.c */
69 extern int float_rounding_mode(void);	/* in fpu.c */
70 
71 bits64 extractFloat64Frac(float64 a);
72 flag extractFloat64Sign(float64 a);
73 int16 extractFloat64Exp(float64 a);
74 int16 extractFloat32Exp(float32 a);
75 flag extractFloat32Sign(float32 a);
76 bits32 extractFloat32Frac(float32 a);
77 float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
78 void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
79 float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
80 void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
81 float64 float64_sub(float64 a, float64 b);
82 float32 float32_sub(float32 a, float32 b);
83 float32 float32_add(float32 a, float32 b);
84 float64 float64_add(float64 a, float64 b);
85 float64 float64_div(float64 a, float64 b);
86 float32 float32_div(float32 a, float32 b);
87 float32 float32_mul(float32 a, float32 b);
88 float64 float64_mul(float64 a, float64 b);
89 float32 float64_to_float32(float64 a);
90 void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
91 		   bits64 * z1Ptr);
92 void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
93 		   bits64 * z1Ptr);
94 void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
95 
96 static int8 countLeadingZeros32(bits32 a);
97 static int8 countLeadingZeros64(bits64 a);
98 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
99 					    bits64 zSig);
100 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
101 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
102 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
103 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
104 					    bits32 zSig);
105 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
106 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
107 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
108 static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
109 				      bits64 * zSigPtr);
110 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
111 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
112 				      bits32 * zSigPtr);
113 
extractFloat64Frac(float64 a)114 bits64 extractFloat64Frac(float64 a)
115 {
116 	return a & LIT64(0x000FFFFFFFFFFFFF);
117 }
118 
extractFloat64Sign(float64 a)119 flag extractFloat64Sign(float64 a)
120 {
121 	return a >> 63;
122 }
123 
extractFloat64Exp(float64 a)124 int16 extractFloat64Exp(float64 a)
125 {
126 	return (a >> 52) & 0x7FF;
127 }
128 
extractFloat32Exp(float32 a)129 int16 extractFloat32Exp(float32 a)
130 {
131 	return (a >> 23) & 0xFF;
132 }
133 
extractFloat32Sign(float32 a)134 flag extractFloat32Sign(float32 a)
135 {
136 	return a >> 31;
137 }
138 
extractFloat32Frac(float32 a)139 bits32 extractFloat32Frac(float32 a)
140 {
141 	return a & 0x007FFFFF;
142 }
143 
packFloat64(flag zSign,int16 zExp,bits64 zSig)144 float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
145 {
146 	return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
147 }
148 
shift64RightJamming(bits64 a,int16 count,bits64 * zPtr)149 void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
150 {
151 	bits64 z;
152 
153 	if (count == 0) {
154 		z = a;
155 	} else if (count < 64) {
156 		z = (a >> count) | ((a << ((-count) & 63)) != 0);
157 	} else {
158 		z = (a != 0);
159 	}
160 	*zPtr = z;
161 }
162 
countLeadingZeros32(bits32 a)163 static int8 countLeadingZeros32(bits32 a)
164 {
165 	static const int8 countLeadingZerosHigh[] = {
166 		8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
167 		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
168 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
174 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
182 	};
183 	int8 shiftCount;
184 
185 	shiftCount = 0;
186 	if (a < 0x10000) {
187 		shiftCount += 16;
188 		a <<= 16;
189 	}
190 	if (a < 0x1000000) {
191 		shiftCount += 8;
192 		a <<= 8;
193 	}
194 	shiftCount += countLeadingZerosHigh[a >> 24];
195 	return shiftCount;
196 
197 }
198 
countLeadingZeros64(bits64 a)199 static int8 countLeadingZeros64(bits64 a)
200 {
201 	int8 shiftCount;
202 
203 	shiftCount = 0;
204 	if (a < ((bits64) 1) << 32) {
205 		shiftCount += 32;
206 	} else {
207 		a >>= 32;
208 	}
209 	shiftCount += countLeadingZeros32(a);
210 	return shiftCount;
211 
212 }
213 
normalizeRoundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)214 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
215 {
216 	int8 shiftCount;
217 
218 	shiftCount = countLeadingZeros64(zSig) - 1;
219 	return roundAndPackFloat64(zSign, zExp - shiftCount,
220 				   zSig << shiftCount);
221 
222 }
223 
subFloat64Sigs(float64 a,float64 b,flag zSign)224 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
225 {
226 	int16 aExp, bExp, zExp;
227 	bits64 aSig, bSig, zSig;
228 	int16 expDiff;
229 
230 	aSig = extractFloat64Frac(a);
231 	aExp = extractFloat64Exp(a);
232 	bSig = extractFloat64Frac(b);
233 	bExp = extractFloat64Exp(b);
234 	expDiff = aExp - bExp;
235 	aSig <<= 10;
236 	bSig <<= 10;
237 	if (0 < expDiff)
238 		goto aExpBigger;
239 	if (expDiff < 0)
240 		goto bExpBigger;
241 	if (aExp == 0) {
242 		aExp = 1;
243 		bExp = 1;
244 	}
245 	if (bSig < aSig)
246 		goto aBigger;
247 	if (aSig < bSig)
248 		goto bBigger;
249 	return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
250       bExpBigger:
251 	if (bExp == 0x7FF) {
252 		return packFloat64(zSign ^ 1, 0x7FF, 0);
253 	}
254 	if (aExp == 0) {
255 		++expDiff;
256 	} else {
257 		aSig |= LIT64(0x4000000000000000);
258 	}
259 	shift64RightJamming(aSig, -expDiff, &aSig);
260 	bSig |= LIT64(0x4000000000000000);
261       bBigger:
262 	zSig = bSig - aSig;
263 	zExp = bExp;
264 	zSign ^= 1;
265 	goto normalizeRoundAndPack;
266       aExpBigger:
267 	if (aExp == 0x7FF) {
268 		return a;
269 	}
270 	if (bExp == 0) {
271 		--expDiff;
272 	} else {
273 		bSig |= LIT64(0x4000000000000000);
274 	}
275 	shift64RightJamming(bSig, expDiff, &bSig);
276 	aSig |= LIT64(0x4000000000000000);
277       aBigger:
278 	zSig = aSig - bSig;
279 	zExp = aExp;
280       normalizeRoundAndPack:
281 	--zExp;
282 	return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
283 
284 }
addFloat64Sigs(float64 a,float64 b,flag zSign)285 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
286 {
287 	int16 aExp, bExp, zExp;
288 	bits64 aSig, bSig, zSig;
289 	int16 expDiff;
290 
291 	aSig = extractFloat64Frac(a);
292 	aExp = extractFloat64Exp(a);
293 	bSig = extractFloat64Frac(b);
294 	bExp = extractFloat64Exp(b);
295 	expDiff = aExp - bExp;
296 	aSig <<= 9;
297 	bSig <<= 9;
298 	if (0 < expDiff) {
299 		if (aExp == 0x7FF) {
300 			return a;
301 		}
302 		if (bExp == 0) {
303 			--expDiff;
304 		} else {
305 			bSig |= LIT64(0x2000000000000000);
306 		}
307 		shift64RightJamming(bSig, expDiff, &bSig);
308 		zExp = aExp;
309 	} else if (expDiff < 0) {
310 		if (bExp == 0x7FF) {
311 			return packFloat64(zSign, 0x7FF, 0);
312 		}
313 		if (aExp == 0) {
314 			++expDiff;
315 		} else {
316 			aSig |= LIT64(0x2000000000000000);
317 		}
318 		shift64RightJamming(aSig, -expDiff, &aSig);
319 		zExp = bExp;
320 	} else {
321 		if (aExp == 0x7FF) {
322 			return a;
323 		}
324 		if (aExp == 0)
325 			return packFloat64(zSign, 0, (aSig + bSig) >> 9);
326 		zSig = LIT64(0x4000000000000000) + aSig + bSig;
327 		zExp = aExp;
328 		goto roundAndPack;
329 	}
330 	aSig |= LIT64(0x2000000000000000);
331 	zSig = (aSig + bSig) << 1;
332 	--zExp;
333 	if ((sbits64) zSig < 0) {
334 		zSig = aSig + bSig;
335 		++zExp;
336 	}
337       roundAndPack:
338 	return roundAndPackFloat64(zSign, zExp, zSig);
339 
340 }
341 
packFloat32(flag zSign,int16 zExp,bits32 zSig)342 float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
343 {
344 	return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
345 }
346 
shift32RightJamming(bits32 a,int16 count,bits32 * zPtr)347 void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
348 {
349 	bits32 z;
350 	if (count == 0) {
351 		z = a;
352 	} else if (count < 32) {
353 		z = (a >> count) | ((a << ((-count) & 31)) != 0);
354 	} else {
355 		z = (a != 0);
356 	}
357 	*zPtr = z;
358 }
359 
roundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)360 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
361 {
362 	flag roundNearestEven;
363 	int8 roundIncrement, roundBits;
364 	flag isTiny;
365 
366 	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
367 	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
368 	roundIncrement = 0x40;
369 	if (!roundNearestEven) {
370 		roundIncrement = 0;
371 	}
372 	roundBits = zSig & 0x7F;
373 	if (0xFD <= (bits16) zExp) {
374 		if ((0xFD < zExp)
375 		    || ((zExp == 0xFD)
376 			&& ((sbits32) (zSig + roundIncrement) < 0))
377 		    ) {
378 			float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
379 			return packFloat32(zSign, 0xFF,
380 					   0) - (roundIncrement == 0);
381 		}
382 		if (zExp < 0) {
383 			isTiny = (zExp < -1)
384 			    || (zSig + roundIncrement < 0x80000000);
385 			shift32RightJamming(zSig, -zExp, &zSig);
386 			zExp = 0;
387 			roundBits = zSig & 0x7F;
388 			if (isTiny && roundBits)
389 				float_raise(FPSCR_CAUSE_UNDERFLOW);
390 		}
391 	}
392 	if (roundBits)
393 		float_raise(FPSCR_CAUSE_INEXACT);
394 	zSig = (zSig + roundIncrement) >> 7;
395 	zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
396 	if (zSig == 0)
397 		zExp = 0;
398 	return packFloat32(zSign, zExp, zSig);
399 
400 }
401 
normalizeRoundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)402 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
403 {
404 	int8 shiftCount;
405 
406 	shiftCount = countLeadingZeros32(zSig) - 1;
407 	return roundAndPackFloat32(zSign, zExp - shiftCount,
408 				   zSig << shiftCount);
409 }
410 
roundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)411 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
412 {
413 	flag roundNearestEven;
414 	int16 roundIncrement, roundBits;
415 	flag isTiny;
416 
417 	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
418 	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
419 	roundIncrement = 0x200;
420 	if (!roundNearestEven) {
421 		roundIncrement = 0;
422 	}
423 	roundBits = zSig & 0x3FF;
424 	if (0x7FD <= (bits16) zExp) {
425 		if ((0x7FD < zExp)
426 		    || ((zExp == 0x7FD)
427 			&& ((sbits64) (zSig + roundIncrement) < 0))
428 		    ) {
429 			float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
430 			return packFloat64(zSign, 0x7FF,
431 					   0) - (roundIncrement == 0);
432 		}
433 		if (zExp < 0) {
434 			isTiny = (zExp < -1)
435 			    || (zSig + roundIncrement <
436 				LIT64(0x8000000000000000));
437 			shift64RightJamming(zSig, -zExp, &zSig);
438 			zExp = 0;
439 			roundBits = zSig & 0x3FF;
440 			if (isTiny && roundBits)
441 				float_raise(FPSCR_CAUSE_UNDERFLOW);
442 		}
443 	}
444 	if (roundBits)
445 		float_raise(FPSCR_CAUSE_INEXACT);
446 	zSig = (zSig + roundIncrement) >> 10;
447 	zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
448 	if (zSig == 0)
449 		zExp = 0;
450 	return packFloat64(zSign, zExp, zSig);
451 
452 }
453 
subFloat32Sigs(float32 a,float32 b,flag zSign)454 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
455 {
456 	int16 aExp, bExp, zExp;
457 	bits32 aSig, bSig, zSig;
458 	int16 expDiff;
459 
460 	aSig = extractFloat32Frac(a);
461 	aExp = extractFloat32Exp(a);
462 	bSig = extractFloat32Frac(b);
463 	bExp = extractFloat32Exp(b);
464 	expDiff = aExp - bExp;
465 	aSig <<= 7;
466 	bSig <<= 7;
467 	if (0 < expDiff)
468 		goto aExpBigger;
469 	if (expDiff < 0)
470 		goto bExpBigger;
471 	if (aExp == 0) {
472 		aExp = 1;
473 		bExp = 1;
474 	}
475 	if (bSig < aSig)
476 		goto aBigger;
477 	if (aSig < bSig)
478 		goto bBigger;
479 	return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
480       bExpBigger:
481 	if (bExp == 0xFF) {
482 		return packFloat32(zSign ^ 1, 0xFF, 0);
483 	}
484 	if (aExp == 0) {
485 		++expDiff;
486 	} else {
487 		aSig |= 0x40000000;
488 	}
489 	shift32RightJamming(aSig, -expDiff, &aSig);
490 	bSig |= 0x40000000;
491       bBigger:
492 	zSig = bSig - aSig;
493 	zExp = bExp;
494 	zSign ^= 1;
495 	goto normalizeRoundAndPack;
496       aExpBigger:
497 	if (aExp == 0xFF) {
498 		return a;
499 	}
500 	if (bExp == 0) {
501 		--expDiff;
502 	} else {
503 		bSig |= 0x40000000;
504 	}
505 	shift32RightJamming(bSig, expDiff, &bSig);
506 	aSig |= 0x40000000;
507       aBigger:
508 	zSig = aSig - bSig;
509 	zExp = aExp;
510       normalizeRoundAndPack:
511 	--zExp;
512 	return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
513 
514 }
515 
addFloat32Sigs(float32 a,float32 b,flag zSign)516 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
517 {
518 	int16 aExp, bExp, zExp;
519 	bits32 aSig, bSig, zSig;
520 	int16 expDiff;
521 
522 	aSig = extractFloat32Frac(a);
523 	aExp = extractFloat32Exp(a);
524 	bSig = extractFloat32Frac(b);
525 	bExp = extractFloat32Exp(b);
526 	expDiff = aExp - bExp;
527 	aSig <<= 6;
528 	bSig <<= 6;
529 	if (0 < expDiff) {
530 		if (aExp == 0xFF) {
531 			return a;
532 		}
533 		if (bExp == 0) {
534 			--expDiff;
535 		} else {
536 			bSig |= 0x20000000;
537 		}
538 		shift32RightJamming(bSig, expDiff, &bSig);
539 		zExp = aExp;
540 	} else if (expDiff < 0) {
541 		if (bExp == 0xFF) {
542 			return packFloat32(zSign, 0xFF, 0);
543 		}
544 		if (aExp == 0) {
545 			++expDiff;
546 		} else {
547 			aSig |= 0x20000000;
548 		}
549 		shift32RightJamming(aSig, -expDiff, &aSig);
550 		zExp = bExp;
551 	} else {
552 		if (aExp == 0xFF) {
553 			return a;
554 		}
555 		if (aExp == 0)
556 			return packFloat32(zSign, 0, (aSig + bSig) >> 6);
557 		zSig = 0x40000000 + aSig + bSig;
558 		zExp = aExp;
559 		goto roundAndPack;
560 	}
561 	aSig |= 0x20000000;
562 	zSig = (aSig + bSig) << 1;
563 	--zExp;
564 	if ((sbits32) zSig < 0) {
565 		zSig = aSig + bSig;
566 		++zExp;
567 	}
568       roundAndPack:
569 	return roundAndPackFloat32(zSign, zExp, zSig);
570 
571 }
572 
float64_sub(float64 a,float64 b)573 float64 float64_sub(float64 a, float64 b)
574 {
575 	flag aSign, bSign;
576 
577 	aSign = extractFloat64Sign(a);
578 	bSign = extractFloat64Sign(b);
579 	if (aSign == bSign) {
580 		return subFloat64Sigs(a, b, aSign);
581 	} else {
582 		return addFloat64Sigs(a, b, aSign);
583 	}
584 
585 }
586 
float32_sub(float32 a,float32 b)587 float32 float32_sub(float32 a, float32 b)
588 {
589 	flag aSign, bSign;
590 
591 	aSign = extractFloat32Sign(a);
592 	bSign = extractFloat32Sign(b);
593 	if (aSign == bSign) {
594 		return subFloat32Sigs(a, b, aSign);
595 	} else {
596 		return addFloat32Sigs(a, b, aSign);
597 	}
598 
599 }
600 
float32_add(float32 a,float32 b)601 float32 float32_add(float32 a, float32 b)
602 {
603 	flag aSign, bSign;
604 
605 	aSign = extractFloat32Sign(a);
606 	bSign = extractFloat32Sign(b);
607 	if (aSign == bSign) {
608 		return addFloat32Sigs(a, b, aSign);
609 	} else {
610 		return subFloat32Sigs(a, b, aSign);
611 	}
612 
613 }
614 
float64_add(float64 a,float64 b)615 float64 float64_add(float64 a, float64 b)
616 {
617 	flag aSign, bSign;
618 
619 	aSign = extractFloat64Sign(a);
620 	bSign = extractFloat64Sign(b);
621 	if (aSign == bSign) {
622 		return addFloat64Sigs(a, b, aSign);
623 	} else {
624 		return subFloat64Sigs(a, b, aSign);
625 	}
626 }
627 
628 static void
normalizeFloat64Subnormal(bits64 aSig,int16 * zExpPtr,bits64 * zSigPtr)629 normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
630 {
631 	int8 shiftCount;
632 
633 	shiftCount = countLeadingZeros64(aSig) - 11;
634 	*zSigPtr = aSig << shiftCount;
635 	*zExpPtr = 1 - shiftCount;
636 }
637 
add128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)638 void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
639 		   bits64 * z1Ptr)
640 {
641 	bits64 z1;
642 
643 	z1 = a1 + b1;
644 	*z1Ptr = z1;
645 	*z0Ptr = a0 + b0 + (z1 < a1);
646 }
647 
648 void
sub128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)649 sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
650        bits64 * z1Ptr)
651 {
652 	*z1Ptr = a1 - b1;
653 	*z0Ptr = a0 - b0 - (a1 < b1);
654 }
655 
estimateDiv128To64(bits64 a0,bits64 a1,bits64 b)656 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
657 {
658 	bits64 b0, b1;
659 	bits64 rem0, rem1, term0, term1;
660 	bits64 z, tmp;
661 	if (b <= a0)
662 		return LIT64(0xFFFFFFFFFFFFFFFF);
663 	b0 = b >> 32;
664 	tmp = a0;
665 	do_div(tmp, b0);
666 
667 	z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32;
668 	mul64To128(b, z, &term0, &term1);
669 	sub128(a0, a1, term0, term1, &rem0, &rem1);
670 	while (((sbits64) rem0) < 0) {
671 		z -= LIT64(0x100000000);
672 		b1 = b << 32;
673 		add128(rem0, rem1, b0, b1, &rem0, &rem1);
674 	}
675 	rem0 = (rem0 << 32) | (rem1 >> 32);
676 	tmp = rem0;
677 	do_div(tmp, b0);
678 	z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp;
679 	return z;
680 }
681 
mul64To128(bits64 a,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr)682 void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
683 {
684 	bits32 aHigh, aLow, bHigh, bLow;
685 	bits64 z0, zMiddleA, zMiddleB, z1;
686 
687 	aLow = a;
688 	aHigh = a >> 32;
689 	bLow = b;
690 	bHigh = b >> 32;
691 	z1 = ((bits64) aLow) * bLow;
692 	zMiddleA = ((bits64) aLow) * bHigh;
693 	zMiddleB = ((bits64) aHigh) * bLow;
694 	z0 = ((bits64) aHigh) * bHigh;
695 	zMiddleA += zMiddleB;
696 	z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
697 	zMiddleA <<= 32;
698 	z1 += zMiddleA;
699 	z0 += (z1 < zMiddleA);
700 	*z1Ptr = z1;
701 	*z0Ptr = z0;
702 
703 }
704 
normalizeFloat32Subnormal(bits32 aSig,int16 * zExpPtr,bits32 * zSigPtr)705 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
706 				      bits32 * zSigPtr)
707 {
708 	int8 shiftCount;
709 
710 	shiftCount = countLeadingZeros32(aSig) - 8;
711 	*zSigPtr = aSig << shiftCount;
712 	*zExpPtr = 1 - shiftCount;
713 
714 }
715 
float64_div(float64 a,float64 b)716 float64 float64_div(float64 a, float64 b)
717 {
718 	flag aSign, bSign, zSign;
719 	int16 aExp, bExp, zExp;
720 	bits64 aSig, bSig, zSig;
721 	bits64 rem0, rem1;
722 	bits64 term0, term1;
723 
724 	aSig = extractFloat64Frac(a);
725 	aExp = extractFloat64Exp(a);
726 	aSign = extractFloat64Sign(a);
727 	bSig = extractFloat64Frac(b);
728 	bExp = extractFloat64Exp(b);
729 	bSign = extractFloat64Sign(b);
730 	zSign = aSign ^ bSign;
731 	if (aExp == 0x7FF) {
732 		if (bExp == 0x7FF) {
733 		}
734 		return packFloat64(zSign, 0x7FF, 0);
735 	}
736 	if (bExp == 0x7FF) {
737 		return packFloat64(zSign, 0, 0);
738 	}
739 	if (bExp == 0) {
740 		if (bSig == 0) {
741 			if ((aExp | aSig) == 0) {
742 				float_raise(FPSCR_CAUSE_INVALID);
743 			}
744 			return packFloat64(zSign, 0x7FF, 0);
745 		}
746 		normalizeFloat64Subnormal(bSig, &bExp, &bSig);
747 	}
748 	if (aExp == 0) {
749 		if (aSig == 0)
750 			return packFloat64(zSign, 0, 0);
751 		normalizeFloat64Subnormal(aSig, &aExp, &aSig);
752 	}
753 	zExp = aExp - bExp + 0x3FD;
754 	aSig = (aSig | LIT64(0x0010000000000000)) << 10;
755 	bSig = (bSig | LIT64(0x0010000000000000)) << 11;
756 	if (bSig <= (aSig + aSig)) {
757 		aSig >>= 1;
758 		++zExp;
759 	}
760 	zSig = estimateDiv128To64(aSig, 0, bSig);
761 	if ((zSig & 0x1FF) <= 2) {
762 		mul64To128(bSig, zSig, &term0, &term1);
763 		sub128(aSig, 0, term0, term1, &rem0, &rem1);
764 		while ((sbits64) rem0 < 0) {
765 			--zSig;
766 			add128(rem0, rem1, 0, bSig, &rem0, &rem1);
767 		}
768 		zSig |= (rem1 != 0);
769 	}
770 	return roundAndPackFloat64(zSign, zExp, zSig);
771 
772 }
773 
float32_div(float32 a,float32 b)774 float32 float32_div(float32 a, float32 b)
775 {
776 	flag aSign, bSign, zSign;
777 	int16 aExp, bExp, zExp;
778 	bits32 aSig, bSig;
779 	uint64_t zSig;
780 
781 	aSig = extractFloat32Frac(a);
782 	aExp = extractFloat32Exp(a);
783 	aSign = extractFloat32Sign(a);
784 	bSig = extractFloat32Frac(b);
785 	bExp = extractFloat32Exp(b);
786 	bSign = extractFloat32Sign(b);
787 	zSign = aSign ^ bSign;
788 	if (aExp == 0xFF) {
789 		if (bExp == 0xFF) {
790 		}
791 		return packFloat32(zSign, 0xFF, 0);
792 	}
793 	if (bExp == 0xFF) {
794 		return packFloat32(zSign, 0, 0);
795 	}
796 	if (bExp == 0) {
797 		if (bSig == 0) {
798 			return packFloat32(zSign, 0xFF, 0);
799 		}
800 		normalizeFloat32Subnormal(bSig, &bExp, &bSig);
801 	}
802 	if (aExp == 0) {
803 		if (aSig == 0)
804 			return packFloat32(zSign, 0, 0);
805 		normalizeFloat32Subnormal(aSig, &aExp, &aSig);
806 	}
807 	zExp = aExp - bExp + 0x7D;
808 	aSig = (aSig | 0x00800000) << 7;
809 	bSig = (bSig | 0x00800000) << 8;
810 	if (bSig <= (aSig + aSig)) {
811 		aSig >>= 1;
812 		++zExp;
813 	}
814 	zSig = (((bits64) aSig) << 32);
815 	do_div(zSig, bSig);
816 
817 	if ((zSig & 0x3F) == 0) {
818 		zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
819 	}
820 	return roundAndPackFloat32(zSign, zExp, (bits32)zSig);
821 
822 }
823 
float32_mul(float32 a,float32 b)824 float32 float32_mul(float32 a, float32 b)
825 {
826 	char aSign, bSign, zSign;
827 	int aExp, bExp, zExp;
828 	unsigned int aSig, bSig;
829 	unsigned long long zSig64;
830 	unsigned int zSig;
831 
832 	aSig = extractFloat32Frac(a);
833 	aExp = extractFloat32Exp(a);
834 	aSign = extractFloat32Sign(a);
835 	bSig = extractFloat32Frac(b);
836 	bExp = extractFloat32Exp(b);
837 	bSign = extractFloat32Sign(b);
838 	zSign = aSign ^ bSign;
839 	if (aExp == 0) {
840 		if (aSig == 0)
841 			return packFloat32(zSign, 0, 0);
842 		normalizeFloat32Subnormal(aSig, &aExp, &aSig);
843 	}
844 	if (bExp == 0) {
845 		if (bSig == 0)
846 			return packFloat32(zSign, 0, 0);
847 		normalizeFloat32Subnormal(bSig, &bExp, &bSig);
848 	}
849 	if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
850 		return roundAndPackFloat32(zSign, 0xff, 0);
851 
852 	zExp = aExp + bExp - 0x7F;
853 	aSig = (aSig | 0x00800000) << 7;
854 	bSig = (bSig | 0x00800000) << 8;
855 	shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
856 	zSig = zSig64;
857 	if (0 <= (signed int)(zSig << 1)) {
858 		zSig <<= 1;
859 		--zExp;
860 	}
861 	return roundAndPackFloat32(zSign, zExp, zSig);
862 
863 }
864 
float64_mul(float64 a,float64 b)865 float64 float64_mul(float64 a, float64 b)
866 {
867 	char aSign, bSign, zSign;
868 	int aExp, bExp, zExp;
869 	unsigned long long int aSig, bSig, zSig0, zSig1;
870 
871 	aSig = extractFloat64Frac(a);
872 	aExp = extractFloat64Exp(a);
873 	aSign = extractFloat64Sign(a);
874 	bSig = extractFloat64Frac(b);
875 	bExp = extractFloat64Exp(b);
876 	bSign = extractFloat64Sign(b);
877 	zSign = aSign ^ bSign;
878 
879 	if (aExp == 0) {
880 		if (aSig == 0)
881 			return packFloat64(zSign, 0, 0);
882 		normalizeFloat64Subnormal(aSig, &aExp, &aSig);
883 	}
884 	if (bExp == 0) {
885 		if (bSig == 0)
886 			return packFloat64(zSign, 0, 0);
887 		normalizeFloat64Subnormal(bSig, &bExp, &bSig);
888 	}
889 	if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
890 		return roundAndPackFloat64(zSign, 0x7ff, 0);
891 
892 	zExp = aExp + bExp - 0x3FF;
893 	aSig = (aSig | 0x0010000000000000LL) << 10;
894 	bSig = (bSig | 0x0010000000000000LL) << 11;
895 	mul64To128(aSig, bSig, &zSig0, &zSig1);
896 	zSig0 |= (zSig1 != 0);
897 	if (0 <= (signed long long int)(zSig0 << 1)) {
898 		zSig0 <<= 1;
899 		--zExp;
900 	}
901 	return roundAndPackFloat64(zSign, zExp, zSig0);
902 }
903 
904 /*
905  * -------------------------------------------------------------------------------
906  *  Returns the result of converting the double-precision floating-point value
907  *  `a' to the single-precision floating-point format.  The conversion is
908  *  performed according to the IEC/IEEE Standard for Binary Floating-point
909  *  Arithmetic.
910  *  -------------------------------------------------------------------------------
911  *  */
float64_to_float32(float64 a)912 float32 float64_to_float32(float64 a)
913 {
914     flag aSign;
915     int16 aExp;
916     bits64 aSig;
917     bits32 zSig;
918 
919     aSig = extractFloat64Frac( a );
920     aExp = extractFloat64Exp( a );
921     aSign = extractFloat64Sign( a );
922 
923     shift64RightJamming( aSig, 22, &aSig );
924     zSig = aSig;
925     if ( aExp || zSig ) {
926         zSig |= 0x40000000;
927         aExp -= 0x381;
928     }
929     return roundAndPackFloat32(aSign, aExp, zSig);
930 }
931