xref: /openbmc/linux/arch/sh/kernel/cpu/sh4/softfloat.c (revision 8fa5723aa7e053d498336b48448b292fc2e0458b)
1 /*
2  * Floating point emulation support for subnormalised numbers on SH4
3  * architecture This file is derived from the SoftFloat IEC/IEEE
4  * Floating-point Arithmetic Package, Release 2 the original license of
5  * which is reproduced below.
6  *
7  * ========================================================================
8  *
9  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10  * Arithmetic Package, Release 2.
11  *
12  * Written by John R. Hauser.  This work was made possible in part by the
13  * International Computer Science Institute, located at Suite 600, 1947 Center
14  * Street, Berkeley, California 94704.  Funding was partially provided by the
15  * National Science Foundation under grant MIP-9311980.  The original version
16  * of this code was written as part of a project to build a fixed-point vector
17  * processor in collaboration with the University of California at Berkeley,
18  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20  * arithmetic/softfloat.html'.
21  *
22  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
23  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
25  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
27  *
28  * Derivative works are acceptable, even for commercial purposes, so long as
29  * (1) they include prominent notice that the work is derivative, and (2) they
30  * include prominent notice akin to these three paragraphs for those parts of
31  * this code that are retained.
32  *
33  * ========================================================================
34  *
35  * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
36  * and Kamel Khelifi <kamel.khelifi@st.com>
37  */
38 #include <linux/kernel.h>
39 #include <cpu/fpu.h>
40 
41 #define LIT64( a ) a##LL
42 
43 typedef char flag;
44 typedef unsigned char uint8;
45 typedef signed char int8;
46 typedef int uint16;
47 typedef int int16;
48 typedef unsigned int uint32;
49 typedef signed int int32;
50 
51 typedef unsigned long long int bits64;
52 typedef signed long long int sbits64;
53 
54 typedef unsigned char bits8;
55 typedef signed char sbits8;
56 typedef unsigned short int bits16;
57 typedef signed short int sbits16;
58 typedef unsigned int bits32;
59 typedef signed int sbits32;
60 
61 typedef unsigned long long int uint64;
62 typedef signed long long int int64;
63 
64 typedef unsigned long int float32;
65 typedef unsigned long long float64;
66 
67 extern void float_raise(unsigned int flags);	/* in fpu.c */
68 extern int float_rounding_mode(void);	/* in fpu.c */
69 
70 inline bits64 extractFloat64Frac(float64 a);
71 inline flag extractFloat64Sign(float64 a);
72 inline int16 extractFloat64Exp(float64 a);
73 inline int16 extractFloat32Exp(float32 a);
74 inline flag extractFloat32Sign(float32 a);
75 inline bits32 extractFloat32Frac(float32 a);
76 inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
77 inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
78 inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
79 inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
80 float64 float64_sub(float64 a, float64 b);
81 float32 float32_sub(float32 a, float32 b);
82 float32 float32_add(float32 a, float32 b);
83 float64 float64_add(float64 a, float64 b);
84 float64 float64_div(float64 a, float64 b);
85 float32 float32_div(float32 a, float32 b);
86 float32 float32_mul(float32 a, float32 b);
87 float64 float64_mul(float64 a, float64 b);
88 float32 float64_to_float32(float64 a);
89 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
90 		   bits64 * z1Ptr);
91 inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
92 		   bits64 * z1Ptr);
93 inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
94 
95 static int8 countLeadingZeros32(bits32 a);
96 static int8 countLeadingZeros64(bits64 a);
97 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
98 					    bits64 zSig);
99 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
100 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
101 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
102 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
103 					    bits32 zSig);
104 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
105 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
106 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
107 static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
108 				      bits64 * zSigPtr);
109 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
110 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
111 				      bits32 * zSigPtr);
112 
113 inline bits64 extractFloat64Frac(float64 a)
114 {
115 	return a & LIT64(0x000FFFFFFFFFFFFF);
116 }
117 
118 inline flag extractFloat64Sign(float64 a)
119 {
120 	return a >> 63;
121 }
122 
123 inline int16 extractFloat64Exp(float64 a)
124 {
125 	return (a >> 52) & 0x7FF;
126 }
127 
128 inline int16 extractFloat32Exp(float32 a)
129 {
130 	return (a >> 23) & 0xFF;
131 }
132 
133 inline flag extractFloat32Sign(float32 a)
134 {
135 	return a >> 31;
136 }
137 
138 inline bits32 extractFloat32Frac(float32 a)
139 {
140 	return a & 0x007FFFFF;
141 }
142 
143 inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
144 {
145 	return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
146 }
147 
148 inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
149 {
150 	bits64 z;
151 
152 	if (count == 0) {
153 		z = a;
154 	} else if (count < 64) {
155 		z = (a >> count) | ((a << ((-count) & 63)) != 0);
156 	} else {
157 		z = (a != 0);
158 	}
159 	*zPtr = z;
160 }
161 
162 static int8 countLeadingZeros32(bits32 a)
163 {
164 	static const int8 countLeadingZerosHigh[] = {
165 		8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
166 		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
167 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
168 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
181 	};
182 	int8 shiftCount;
183 
184 	shiftCount = 0;
185 	if (a < 0x10000) {
186 		shiftCount += 16;
187 		a <<= 16;
188 	}
189 	if (a < 0x1000000) {
190 		shiftCount += 8;
191 		a <<= 8;
192 	}
193 	shiftCount += countLeadingZerosHigh[a >> 24];
194 	return shiftCount;
195 
196 }
197 
198 static int8 countLeadingZeros64(bits64 a)
199 {
200 	int8 shiftCount;
201 
202 	shiftCount = 0;
203 	if (a < ((bits64) 1) << 32) {
204 		shiftCount += 32;
205 	} else {
206 		a >>= 32;
207 	}
208 	shiftCount += countLeadingZeros32(a);
209 	return shiftCount;
210 
211 }
212 
213 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
214 {
215 	int8 shiftCount;
216 
217 	shiftCount = countLeadingZeros64(zSig) - 1;
218 	return roundAndPackFloat64(zSign, zExp - shiftCount,
219 				   zSig << shiftCount);
220 
221 }
222 
223 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
224 {
225 	int16 aExp, bExp, zExp;
226 	bits64 aSig, bSig, zSig;
227 	int16 expDiff;
228 
229 	aSig = extractFloat64Frac(a);
230 	aExp = extractFloat64Exp(a);
231 	bSig = extractFloat64Frac(b);
232 	bExp = extractFloat64Exp(b);
233 	expDiff = aExp - bExp;
234 	aSig <<= 10;
235 	bSig <<= 10;
236 	if (0 < expDiff)
237 		goto aExpBigger;
238 	if (expDiff < 0)
239 		goto bExpBigger;
240 	if (aExp == 0) {
241 		aExp = 1;
242 		bExp = 1;
243 	}
244 	if (bSig < aSig)
245 		goto aBigger;
246 	if (aSig < bSig)
247 		goto bBigger;
248 	return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
249       bExpBigger:
250 	if (bExp == 0x7FF) {
251 		return packFloat64(zSign ^ 1, 0x7FF, 0);
252 	}
253 	if (aExp == 0) {
254 		++expDiff;
255 	} else {
256 		aSig |= LIT64(0x4000000000000000);
257 	}
258 	shift64RightJamming(aSig, -expDiff, &aSig);
259 	bSig |= LIT64(0x4000000000000000);
260       bBigger:
261 	zSig = bSig - aSig;
262 	zExp = bExp;
263 	zSign ^= 1;
264 	goto normalizeRoundAndPack;
265       aExpBigger:
266 	if (aExp == 0x7FF) {
267 		return a;
268 	}
269 	if (bExp == 0) {
270 		--expDiff;
271 	} else {
272 		bSig |= LIT64(0x4000000000000000);
273 	}
274 	shift64RightJamming(bSig, expDiff, &bSig);
275 	aSig |= LIT64(0x4000000000000000);
276       aBigger:
277 	zSig = aSig - bSig;
278 	zExp = aExp;
279       normalizeRoundAndPack:
280 	--zExp;
281 	return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
282 
283 }
284 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
285 {
286 	int16 aExp, bExp, zExp;
287 	bits64 aSig, bSig, zSig;
288 	int16 expDiff;
289 
290 	aSig = extractFloat64Frac(a);
291 	aExp = extractFloat64Exp(a);
292 	bSig = extractFloat64Frac(b);
293 	bExp = extractFloat64Exp(b);
294 	expDiff = aExp - bExp;
295 	aSig <<= 9;
296 	bSig <<= 9;
297 	if (0 < expDiff) {
298 		if (aExp == 0x7FF) {
299 			return a;
300 		}
301 		if (bExp == 0) {
302 			--expDiff;
303 		} else {
304 			bSig |= LIT64(0x2000000000000000);
305 		}
306 		shift64RightJamming(bSig, expDiff, &bSig);
307 		zExp = aExp;
308 	} else if (expDiff < 0) {
309 		if (bExp == 0x7FF) {
310 			return packFloat64(zSign, 0x7FF, 0);
311 		}
312 		if (aExp == 0) {
313 			++expDiff;
314 		} else {
315 			aSig |= LIT64(0x2000000000000000);
316 		}
317 		shift64RightJamming(aSig, -expDiff, &aSig);
318 		zExp = bExp;
319 	} else {
320 		if (aExp == 0x7FF) {
321 			return a;
322 		}
323 		if (aExp == 0)
324 			return packFloat64(zSign, 0, (aSig + bSig) >> 9);
325 		zSig = LIT64(0x4000000000000000) + aSig + bSig;
326 		zExp = aExp;
327 		goto roundAndPack;
328 	}
329 	aSig |= LIT64(0x2000000000000000);
330 	zSig = (aSig + bSig) << 1;
331 	--zExp;
332 	if ((sbits64) zSig < 0) {
333 		zSig = aSig + bSig;
334 		++zExp;
335 	}
336       roundAndPack:
337 	return roundAndPackFloat64(zSign, zExp, zSig);
338 
339 }
340 
341 inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
342 {
343 	return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
344 }
345 
346 inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
347 {
348 	bits32 z;
349 	if (count == 0) {
350 		z = a;
351 	} else if (count < 32) {
352 		z = (a >> count) | ((a << ((-count) & 31)) != 0);
353 	} else {
354 		z = (a != 0);
355 	}
356 	*zPtr = z;
357 }
358 
359 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
360 {
361 	flag roundNearestEven;
362 	int8 roundIncrement, roundBits;
363 	flag isTiny;
364 
365 	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
366 	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
367 	roundIncrement = 0x40;
368 	if (!roundNearestEven) {
369 		roundIncrement = 0;
370 	}
371 	roundBits = zSig & 0x7F;
372 	if (0xFD <= (bits16) zExp) {
373 		if ((0xFD < zExp)
374 		    || ((zExp == 0xFD)
375 			&& ((sbits32) (zSig + roundIncrement) < 0))
376 		    ) {
377 			float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
378 			return packFloat32(zSign, 0xFF,
379 					   0) - (roundIncrement == 0);
380 		}
381 		if (zExp < 0) {
382 			isTiny = (zExp < -1)
383 			    || (zSig + roundIncrement < 0x80000000);
384 			shift32RightJamming(zSig, -zExp, &zSig);
385 			zExp = 0;
386 			roundBits = zSig & 0x7F;
387 			if (isTiny && roundBits)
388 				float_raise(FPSCR_CAUSE_UNDERFLOW);
389 		}
390 	}
391 	if (roundBits)
392 		float_raise(FPSCR_CAUSE_INEXACT);
393 	zSig = (zSig + roundIncrement) >> 7;
394 	zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
395 	if (zSig == 0)
396 		zExp = 0;
397 	return packFloat32(zSign, zExp, zSig);
398 
399 }
400 
401 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
402 {
403 	int8 shiftCount;
404 
405 	shiftCount = countLeadingZeros32(zSig) - 1;
406 	return roundAndPackFloat32(zSign, zExp - shiftCount,
407 				   zSig << shiftCount);
408 }
409 
410 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
411 {
412 	flag roundNearestEven;
413 	int16 roundIncrement, roundBits;
414 	flag isTiny;
415 
416 	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
417 	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
418 	roundIncrement = 0x200;
419 	if (!roundNearestEven) {
420 		roundIncrement = 0;
421 	}
422 	roundBits = zSig & 0x3FF;
423 	if (0x7FD <= (bits16) zExp) {
424 		if ((0x7FD < zExp)
425 		    || ((zExp == 0x7FD)
426 			&& ((sbits64) (zSig + roundIncrement) < 0))
427 		    ) {
428 			float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
429 			return packFloat64(zSign, 0x7FF,
430 					   0) - (roundIncrement == 0);
431 		}
432 		if (zExp < 0) {
433 			isTiny = (zExp < -1)
434 			    || (zSig + roundIncrement <
435 				LIT64(0x8000000000000000));
436 			shift64RightJamming(zSig, -zExp, &zSig);
437 			zExp = 0;
438 			roundBits = zSig & 0x3FF;
439 			if (isTiny && roundBits)
440 				float_raise(FPSCR_CAUSE_UNDERFLOW);
441 		}
442 	}
443 	if (roundBits)
444 		float_raise(FPSCR_CAUSE_INEXACT);
445 	zSig = (zSig + roundIncrement) >> 10;
446 	zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
447 	if (zSig == 0)
448 		zExp = 0;
449 	return packFloat64(zSign, zExp, zSig);
450 
451 }
452 
453 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
454 {
455 	int16 aExp, bExp, zExp;
456 	bits32 aSig, bSig, zSig;
457 	int16 expDiff;
458 
459 	aSig = extractFloat32Frac(a);
460 	aExp = extractFloat32Exp(a);
461 	bSig = extractFloat32Frac(b);
462 	bExp = extractFloat32Exp(b);
463 	expDiff = aExp - bExp;
464 	aSig <<= 7;
465 	bSig <<= 7;
466 	if (0 < expDiff)
467 		goto aExpBigger;
468 	if (expDiff < 0)
469 		goto bExpBigger;
470 	if (aExp == 0) {
471 		aExp = 1;
472 		bExp = 1;
473 	}
474 	if (bSig < aSig)
475 		goto aBigger;
476 	if (aSig < bSig)
477 		goto bBigger;
478 	return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
479       bExpBigger:
480 	if (bExp == 0xFF) {
481 		return packFloat32(zSign ^ 1, 0xFF, 0);
482 	}
483 	if (aExp == 0) {
484 		++expDiff;
485 	} else {
486 		aSig |= 0x40000000;
487 	}
488 	shift32RightJamming(aSig, -expDiff, &aSig);
489 	bSig |= 0x40000000;
490       bBigger:
491 	zSig = bSig - aSig;
492 	zExp = bExp;
493 	zSign ^= 1;
494 	goto normalizeRoundAndPack;
495       aExpBigger:
496 	if (aExp == 0xFF) {
497 		return a;
498 	}
499 	if (bExp == 0) {
500 		--expDiff;
501 	} else {
502 		bSig |= 0x40000000;
503 	}
504 	shift32RightJamming(bSig, expDiff, &bSig);
505 	aSig |= 0x40000000;
506       aBigger:
507 	zSig = aSig - bSig;
508 	zExp = aExp;
509       normalizeRoundAndPack:
510 	--zExp;
511 	return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
512 
513 }
514 
515 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
516 {
517 	int16 aExp, bExp, zExp;
518 	bits32 aSig, bSig, zSig;
519 	int16 expDiff;
520 
521 	aSig = extractFloat32Frac(a);
522 	aExp = extractFloat32Exp(a);
523 	bSig = extractFloat32Frac(b);
524 	bExp = extractFloat32Exp(b);
525 	expDiff = aExp - bExp;
526 	aSig <<= 6;
527 	bSig <<= 6;
528 	if (0 < expDiff) {
529 		if (aExp == 0xFF) {
530 			return a;
531 		}
532 		if (bExp == 0) {
533 			--expDiff;
534 		} else {
535 			bSig |= 0x20000000;
536 		}
537 		shift32RightJamming(bSig, expDiff, &bSig);
538 		zExp = aExp;
539 	} else if (expDiff < 0) {
540 		if (bExp == 0xFF) {
541 			return packFloat32(zSign, 0xFF, 0);
542 		}
543 		if (aExp == 0) {
544 			++expDiff;
545 		} else {
546 			aSig |= 0x20000000;
547 		}
548 		shift32RightJamming(aSig, -expDiff, &aSig);
549 		zExp = bExp;
550 	} else {
551 		if (aExp == 0xFF) {
552 			return a;
553 		}
554 		if (aExp == 0)
555 			return packFloat32(zSign, 0, (aSig + bSig) >> 6);
556 		zSig = 0x40000000 + aSig + bSig;
557 		zExp = aExp;
558 		goto roundAndPack;
559 	}
560 	aSig |= 0x20000000;
561 	zSig = (aSig + bSig) << 1;
562 	--zExp;
563 	if ((sbits32) zSig < 0) {
564 		zSig = aSig + bSig;
565 		++zExp;
566 	}
567       roundAndPack:
568 	return roundAndPackFloat32(zSign, zExp, zSig);
569 
570 }
571 
572 float64 float64_sub(float64 a, float64 b)
573 {
574 	flag aSign, bSign;
575 
576 	aSign = extractFloat64Sign(a);
577 	bSign = extractFloat64Sign(b);
578 	if (aSign == bSign) {
579 		return subFloat64Sigs(a, b, aSign);
580 	} else {
581 		return addFloat64Sigs(a, b, aSign);
582 	}
583 
584 }
585 
586 float32 float32_sub(float32 a, float32 b)
587 {
588 	flag aSign, bSign;
589 
590 	aSign = extractFloat32Sign(a);
591 	bSign = extractFloat32Sign(b);
592 	if (aSign == bSign) {
593 		return subFloat32Sigs(a, b, aSign);
594 	} else {
595 		return addFloat32Sigs(a, b, aSign);
596 	}
597 
598 }
599 
600 float32 float32_add(float32 a, float32 b)
601 {
602 	flag aSign, bSign;
603 
604 	aSign = extractFloat32Sign(a);
605 	bSign = extractFloat32Sign(b);
606 	if (aSign == bSign) {
607 		return addFloat32Sigs(a, b, aSign);
608 	} else {
609 		return subFloat32Sigs(a, b, aSign);
610 	}
611 
612 }
613 
614 float64 float64_add(float64 a, float64 b)
615 {
616 	flag aSign, bSign;
617 
618 	aSign = extractFloat64Sign(a);
619 	bSign = extractFloat64Sign(b);
620 	if (aSign == bSign) {
621 		return addFloat64Sigs(a, b, aSign);
622 	} else {
623 		return subFloat64Sigs(a, b, aSign);
624 	}
625 }
626 
627 static void
628 normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
629 {
630 	int8 shiftCount;
631 
632 	shiftCount = countLeadingZeros64(aSig) - 11;
633 	*zSigPtr = aSig << shiftCount;
634 	*zExpPtr = 1 - shiftCount;
635 }
636 
637 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
638 		   bits64 * z1Ptr)
639 {
640 	bits64 z1;
641 
642 	z1 = a1 + b1;
643 	*z1Ptr = z1;
644 	*z0Ptr = a0 + b0 + (z1 < a1);
645 }
646 
647 inline void
648 sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
649        bits64 * z1Ptr)
650 {
651 	*z1Ptr = a1 - b1;
652 	*z0Ptr = a0 - b0 - (a1 < b1);
653 }
654 
655 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
656 {
657 	bits64 b0, b1;
658 	bits64 rem0, rem1, term0, term1;
659 	bits64 z;
660 	if (b <= a0)
661 		return LIT64(0xFFFFFFFFFFFFFFFF);
662 	b0 = b >> 32;
663 	z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32;
664 	mul64To128(b, z, &term0, &term1);
665 	sub128(a0, a1, term0, term1, &rem0, &rem1);
666 	while (((sbits64) rem0) < 0) {
667 		z -= LIT64(0x100000000);
668 		b1 = b << 32;
669 		add128(rem0, rem1, b0, b1, &rem0, &rem1);
670 	}
671 	rem0 = (rem0 << 32) | (rem1 >> 32);
672 	z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
673 	return z;
674 }
675 
676 inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
677 {
678 	bits32 aHigh, aLow, bHigh, bLow;
679 	bits64 z0, zMiddleA, zMiddleB, z1;
680 
681 	aLow = a;
682 	aHigh = a >> 32;
683 	bLow = b;
684 	bHigh = b >> 32;
685 	z1 = ((bits64) aLow) * bLow;
686 	zMiddleA = ((bits64) aLow) * bHigh;
687 	zMiddleB = ((bits64) aHigh) * bLow;
688 	z0 = ((bits64) aHigh) * bHigh;
689 	zMiddleA += zMiddleB;
690 	z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
691 	zMiddleA <<= 32;
692 	z1 += zMiddleA;
693 	z0 += (z1 < zMiddleA);
694 	*z1Ptr = z1;
695 	*z0Ptr = z0;
696 
697 }
698 
699 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
700 				      bits32 * zSigPtr)
701 {
702 	int8 shiftCount;
703 
704 	shiftCount = countLeadingZeros32(aSig) - 8;
705 	*zSigPtr = aSig << shiftCount;
706 	*zExpPtr = 1 - shiftCount;
707 
708 }
709 
710 float64 float64_div(float64 a, float64 b)
711 {
712 	flag aSign, bSign, zSign;
713 	int16 aExp, bExp, zExp;
714 	bits64 aSig, bSig, zSig;
715 	bits64 rem0, rem1;
716 	bits64 term0, term1;
717 
718 	aSig = extractFloat64Frac(a);
719 	aExp = extractFloat64Exp(a);
720 	aSign = extractFloat64Sign(a);
721 	bSig = extractFloat64Frac(b);
722 	bExp = extractFloat64Exp(b);
723 	bSign = extractFloat64Sign(b);
724 	zSign = aSign ^ bSign;
725 	if (aExp == 0x7FF) {
726 		if (bExp == 0x7FF) {
727 		}
728 		return packFloat64(zSign, 0x7FF, 0);
729 	}
730 	if (bExp == 0x7FF) {
731 		return packFloat64(zSign, 0, 0);
732 	}
733 	if (bExp == 0) {
734 		if (bSig == 0) {
735 			if ((aExp | aSig) == 0) {
736 				float_raise(FPSCR_CAUSE_INVALID);
737 			}
738 			return packFloat64(zSign, 0x7FF, 0);
739 		}
740 		normalizeFloat64Subnormal(bSig, &bExp, &bSig);
741 	}
742 	if (aExp == 0) {
743 		if (aSig == 0)
744 			return packFloat64(zSign, 0, 0);
745 		normalizeFloat64Subnormal(aSig, &aExp, &aSig);
746 	}
747 	zExp = aExp - bExp + 0x3FD;
748 	aSig = (aSig | LIT64(0x0010000000000000)) << 10;
749 	bSig = (bSig | LIT64(0x0010000000000000)) << 11;
750 	if (bSig <= (aSig + aSig)) {
751 		aSig >>= 1;
752 		++zExp;
753 	}
754 	zSig = estimateDiv128To64(aSig, 0, bSig);
755 	if ((zSig & 0x1FF) <= 2) {
756 		mul64To128(bSig, zSig, &term0, &term1);
757 		sub128(aSig, 0, term0, term1, &rem0, &rem1);
758 		while ((sbits64) rem0 < 0) {
759 			--zSig;
760 			add128(rem0, rem1, 0, bSig, &rem0, &rem1);
761 		}
762 		zSig |= (rem1 != 0);
763 	}
764 	return roundAndPackFloat64(zSign, zExp, zSig);
765 
766 }
767 
768 float32 float32_div(float32 a, float32 b)
769 {
770 	flag aSign, bSign, zSign;
771 	int16 aExp, bExp, zExp;
772 	bits32 aSig, bSig, zSig;
773 
774 	aSig = extractFloat32Frac(a);
775 	aExp = extractFloat32Exp(a);
776 	aSign = extractFloat32Sign(a);
777 	bSig = extractFloat32Frac(b);
778 	bExp = extractFloat32Exp(b);
779 	bSign = extractFloat32Sign(b);
780 	zSign = aSign ^ bSign;
781 	if (aExp == 0xFF) {
782 		if (bExp == 0xFF) {
783 		}
784 		return packFloat32(zSign, 0xFF, 0);
785 	}
786 	if (bExp == 0xFF) {
787 		return packFloat32(zSign, 0, 0);
788 	}
789 	if (bExp == 0) {
790 		if (bSig == 0) {
791 			return packFloat32(zSign, 0xFF, 0);
792 		}
793 		normalizeFloat32Subnormal(bSig, &bExp, &bSig);
794 	}
795 	if (aExp == 0) {
796 		if (aSig == 0)
797 			return packFloat32(zSign, 0, 0);
798 		normalizeFloat32Subnormal(aSig, &aExp, &aSig);
799 	}
800 	zExp = aExp - bExp + 0x7D;
801 	aSig = (aSig | 0x00800000) << 7;
802 	bSig = (bSig | 0x00800000) << 8;
803 	if (bSig <= (aSig + aSig)) {
804 		aSig >>= 1;
805 		++zExp;
806 	}
807 	zSig = (((bits64) aSig) << 32) / bSig;
808 	if ((zSig & 0x3F) == 0) {
809 		zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
810 	}
811 	return roundAndPackFloat32(zSign, zExp, zSig);
812 
813 }
814 
815 float32 float32_mul(float32 a, float32 b)
816 {
817 	char aSign, bSign, zSign;
818 	int aExp, bExp, zExp;
819 	unsigned int aSig, bSig;
820 	unsigned long long zSig64;
821 	unsigned int zSig;
822 
823 	aSig = extractFloat32Frac(a);
824 	aExp = extractFloat32Exp(a);
825 	aSign = extractFloat32Sign(a);
826 	bSig = extractFloat32Frac(b);
827 	bExp = extractFloat32Exp(b);
828 	bSign = extractFloat32Sign(b);
829 	zSign = aSign ^ bSign;
830 	if (aExp == 0) {
831 		if (aSig == 0)
832 			return packFloat32(zSign, 0, 0);
833 		normalizeFloat32Subnormal(aSig, &aExp, &aSig);
834 	}
835 	if (bExp == 0) {
836 		if (bSig == 0)
837 			return packFloat32(zSign, 0, 0);
838 		normalizeFloat32Subnormal(bSig, &bExp, &bSig);
839 	}
840 	if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
841 		return roundAndPackFloat32(zSign, 0xff, 0);
842 
843 	zExp = aExp + bExp - 0x7F;
844 	aSig = (aSig | 0x00800000) << 7;
845 	bSig = (bSig | 0x00800000) << 8;
846 	shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
847 	zSig = zSig64;
848 	if (0 <= (signed int)(zSig << 1)) {
849 		zSig <<= 1;
850 		--zExp;
851 	}
852 	return roundAndPackFloat32(zSign, zExp, zSig);
853 
854 }
855 
856 float64 float64_mul(float64 a, float64 b)
857 {
858 	char aSign, bSign, zSign;
859 	int aExp, bExp, zExp;
860 	unsigned long long int aSig, bSig, zSig0, zSig1;
861 
862 	aSig = extractFloat64Frac(a);
863 	aExp = extractFloat64Exp(a);
864 	aSign = extractFloat64Sign(a);
865 	bSig = extractFloat64Frac(b);
866 	bExp = extractFloat64Exp(b);
867 	bSign = extractFloat64Sign(b);
868 	zSign = aSign ^ bSign;
869 
870 	if (aExp == 0) {
871 		if (aSig == 0)
872 			return packFloat64(zSign, 0, 0);
873 		normalizeFloat64Subnormal(aSig, &aExp, &aSig);
874 	}
875 	if (bExp == 0) {
876 		if (bSig == 0)
877 			return packFloat64(zSign, 0, 0);
878 		normalizeFloat64Subnormal(bSig, &bExp, &bSig);
879 	}
880 	if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
881 		return roundAndPackFloat64(zSign, 0x7ff, 0);
882 
883 	zExp = aExp + bExp - 0x3FF;
884 	aSig = (aSig | 0x0010000000000000LL) << 10;
885 	bSig = (bSig | 0x0010000000000000LL) << 11;
886 	mul64To128(aSig, bSig, &zSig0, &zSig1);
887 	zSig0 |= (zSig1 != 0);
888 	if (0 <= (signed long long int)(zSig0 << 1)) {
889 		zSig0 <<= 1;
890 		--zExp;
891 	}
892 	return roundAndPackFloat64(zSign, zExp, zSig0);
893 }
894 
895 /*
896  * -------------------------------------------------------------------------------
897  *  Returns the result of converting the double-precision floating-point value
898  *  `a' to the single-precision floating-point format.  The conversion is
899  *  performed according to the IEC/IEEE Standard for Binary Floating-point
900  *  Arithmetic.
901  *  -------------------------------------------------------------------------------
902  *  */
903 float32 float64_to_float32(float64 a)
904 {
905     flag aSign;
906     int16 aExp;
907     bits64 aSig;
908     bits32 zSig;
909 
910     aSig = extractFloat64Frac( a );
911     aExp = extractFloat64Exp( a );
912     aSign = extractFloat64Sign( a );
913 
914     shift64RightJamming( aSig, 22, &aSig );
915     zSig = aSig;
916     if ( aExp || zSig ) {
917         zSig |= 0x40000000;
918         aExp -= 0x381;
919     }
920     return roundAndPackFloat32(aSign, aExp, zSig);
921 }
922