xref: /openbmc/linux/arch/parisc/math-emu/fmpyfadd.c (revision 023e41632e065d49bcbe31b3c4b336217f96a271)
1 /*
2  * Linux/PA-RISC Project (http://www.parisc-linux.org/)
3  *
4  * Floating-point emulation code
5  *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
6  *
7  *    This program is free software; you can redistribute it and/or modify
8  *    it under the terms of the GNU General Public License as published by
9  *    the Free Software Foundation; either version 2, or (at your option)
10  *    any later version.
11  *
12  *    This program is distributed in the hope that it will be useful,
13  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *    GNU General Public License for more details.
16  *
17  *    You should have received a copy of the GNU General Public License
18  *    along with this program; if not, write to the Free Software
19  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  */
21 /*
22  * BEGIN_DESC
23  *
24  *  File:
25  *	@(#)	pa/spmath/fmpyfadd.c		$Revision: 1.1 $
26  *
27  *  Purpose:
28  *	Double Floating-point Multiply Fused Add
29  *	Double Floating-point Multiply Negate Fused Add
30  *	Single Floating-point Multiply Fused Add
31  *	Single Floating-point Multiply Negate Fused Add
32  *
33  *  External Interfaces:
34  *	dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
35  *	dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
36  *	sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
37  *	sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
38  *
39  *  Internal Interfaces:
40  *
41  *  Theory:
42  *	<<please update with a overview of the operation of this file>>
43  *
44  * END_DESC
45 */
46 
47 
48 #include "float.h"
49 #include "sgl_float.h"
50 #include "dbl_float.h"
51 
52 
53 /*
54  *  Double Floating-point Multiply Fused Add
55  */
56 
57 int
58 dbl_fmpyfadd(
59 	    dbl_floating_point *src1ptr,
60 	    dbl_floating_point *src2ptr,
61 	    dbl_floating_point *src3ptr,
62 	    unsigned int *status,
63 	    dbl_floating_point *dstptr)
64 {
65 	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
66 	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
67 	unsigned int rightp1, rightp2, rightp3, rightp4;
68 	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
69 	register int mpy_exponent, add_exponent, count;
70 	boolean inexact = FALSE, is_tiny = FALSE;
71 
72 	unsigned int signlessleft1, signlessright1, save;
73 	register int result_exponent, diff_exponent;
74 	int sign_save, jumpsize;
75 
76 	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
77 	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
78 	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
79 
80 	/*
81 	 * set sign bit of result of multiply
82 	 */
83 	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
84 		Dbl_setnegativezerop1(resultp1);
85 	else Dbl_setzerop1(resultp1);
86 
87 	/*
88 	 * Generate multiply exponent
89 	 */
90 	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
91 
92 	/*
93 	 * check first operand for NaN's or infinity
94 	 */
95 	if (Dbl_isinfinity_exponent(opnd1p1)) {
96 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
97 			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
98 			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
99 				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
100 					/*
101 					 * invalid since operands are infinity
102 					 * and zero
103 					 */
104 					if (Is_invalidtrap_enabled())
105 						return(OPC_2E_INVALIDEXCEPTION);
106 					Set_invalidflag();
107 					Dbl_makequietnan(resultp1,resultp2);
108 					Dbl_copytoptr(resultp1,resultp2,dstptr);
109 					return(NOEXCEPTION);
110 				}
111 				/*
112 				 * Check third operand for infinity with a
113 				 *  sign opposite of the multiply result
114 				 */
115 				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
116 				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
117 					/*
118 					 * invalid since attempting a magnitude
119 					 * subtraction of infinities
120 					 */
121 					if (Is_invalidtrap_enabled())
122 						return(OPC_2E_INVALIDEXCEPTION);
123 					Set_invalidflag();
124 					Dbl_makequietnan(resultp1,resultp2);
125 					Dbl_copytoptr(resultp1,resultp2,dstptr);
126 					return(NOEXCEPTION);
127 				}
128 
129 				/*
130 			 	 * return infinity
131 			 	 */
132 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
133 				Dbl_copytoptr(resultp1,resultp2,dstptr);
134 				return(NOEXCEPTION);
135 			}
136 		}
137 		else {
138 			/*
139 		 	 * is NaN; signaling or quiet?
140 		 	 */
141 			if (Dbl_isone_signaling(opnd1p1)) {
142 				/* trap if INVALIDTRAP enabled */
143 				if (Is_invalidtrap_enabled())
144 			    		return(OPC_2E_INVALIDEXCEPTION);
145 				/* make NaN quiet */
146 				Set_invalidflag();
147 				Dbl_set_quiet(opnd1p1);
148 			}
149 			/*
150 			 * is second operand a signaling NaN?
151 			 */
152 			else if (Dbl_is_signalingnan(opnd2p1)) {
153 				/* trap if INVALIDTRAP enabled */
154 				if (Is_invalidtrap_enabled())
155 			    		return(OPC_2E_INVALIDEXCEPTION);
156 				/* make NaN quiet */
157 				Set_invalidflag();
158 				Dbl_set_quiet(opnd2p1);
159 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
160 				return(NOEXCEPTION);
161 			}
162 			/*
163 			 * is third operand a signaling NaN?
164 			 */
165 			else if (Dbl_is_signalingnan(opnd3p1)) {
166 				/* trap if INVALIDTRAP enabled */
167 				if (Is_invalidtrap_enabled())
168 			    		return(OPC_2E_INVALIDEXCEPTION);
169 				/* make NaN quiet */
170 				Set_invalidflag();
171 				Dbl_set_quiet(opnd3p1);
172 				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
173 				return(NOEXCEPTION);
174 			}
175 			/*
176 		 	 * return quiet NaN
177 		 	 */
178 			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
179 			return(NOEXCEPTION);
180 		}
181 	}
182 
183 	/*
184 	 * check second operand for NaN's or infinity
185 	 */
186 	if (Dbl_isinfinity_exponent(opnd2p1)) {
187 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
188 			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
189 				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
190 					/*
191 					 * invalid since multiply operands are
192 					 * zero & infinity
193 					 */
194 					if (Is_invalidtrap_enabled())
195 						return(OPC_2E_INVALIDEXCEPTION);
196 					Set_invalidflag();
197 					Dbl_makequietnan(opnd2p1,opnd2p2);
198 					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
199 					return(NOEXCEPTION);
200 				}
201 
202 				/*
203 				 * Check third operand for infinity with a
204 				 *  sign opposite of the multiply result
205 				 */
206 				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
207 				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
208 					/*
209 					 * invalid since attempting a magnitude
210 					 * subtraction of infinities
211 					 */
212 					if (Is_invalidtrap_enabled())
213 				       		return(OPC_2E_INVALIDEXCEPTION);
214 				       	Set_invalidflag();
215 				       	Dbl_makequietnan(resultp1,resultp2);
216 					Dbl_copytoptr(resultp1,resultp2,dstptr);
217 					return(NOEXCEPTION);
218 				}
219 
220 				/*
221 				 * return infinity
222 				 */
223 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
224 				Dbl_copytoptr(resultp1,resultp2,dstptr);
225 				return(NOEXCEPTION);
226 			}
227 		}
228 		else {
229 			/*
230 			 * is NaN; signaling or quiet?
231 			 */
232 			if (Dbl_isone_signaling(opnd2p1)) {
233 				/* trap if INVALIDTRAP enabled */
234 				if (Is_invalidtrap_enabled())
235 					return(OPC_2E_INVALIDEXCEPTION);
236 				/* make NaN quiet */
237 				Set_invalidflag();
238 				Dbl_set_quiet(opnd2p1);
239 			}
240 			/*
241 			 * is third operand a signaling NaN?
242 			 */
243 			else if (Dbl_is_signalingnan(opnd3p1)) {
244 			       	/* trap if INVALIDTRAP enabled */
245 			       	if (Is_invalidtrap_enabled())
246 				   		return(OPC_2E_INVALIDEXCEPTION);
247 			       	/* make NaN quiet */
248 			       	Set_invalidflag();
249 			       	Dbl_set_quiet(opnd3p1);
250 				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
251 		       		return(NOEXCEPTION);
252 			}
253 			/*
254 			 * return quiet NaN
255 			 */
256 			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
257 			return(NOEXCEPTION);
258 		}
259 	}
260 
261 	/*
262 	 * check third operand for NaN's or infinity
263 	 */
264 	if (Dbl_isinfinity_exponent(opnd3p1)) {
265 		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
266 			/* return infinity */
267 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
268 			return(NOEXCEPTION);
269 		} else {
270 			/*
271 			 * is NaN; signaling or quiet?
272 			 */
273 			if (Dbl_isone_signaling(opnd3p1)) {
274 				/* trap if INVALIDTRAP enabled */
275 				if (Is_invalidtrap_enabled())
276 					return(OPC_2E_INVALIDEXCEPTION);
277 				/* make NaN quiet */
278 				Set_invalidflag();
279 				Dbl_set_quiet(opnd3p1);
280 			}
281 			/*
282 			 * return quiet NaN
283  			 */
284 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
285 			return(NOEXCEPTION);
286 		}
287     	}
288 
289 	/*
290 	 * Generate multiply mantissa
291 	 */
292 	if (Dbl_isnotzero_exponent(opnd1p1)) {
293 		/* set hidden bit */
294 		Dbl_clear_signexponent_set_hidden(opnd1p1);
295 	}
296 	else {
297 		/* check for zero */
298 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
299 			/*
300 			 * Perform the add opnd3 with zero here.
301 			 */
302 			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
303 				if (Is_rounding_mode(ROUNDMINUS)) {
304 					Dbl_or_signs(opnd3p1,resultp1);
305 				} else {
306 					Dbl_and_signs(opnd3p1,resultp1);
307 				}
308 			}
309 			/*
310 			 * Now let's check for trapped underflow case.
311 			 */
312 			else if (Dbl_iszero_exponent(opnd3p1) &&
313 			         Is_underflowtrap_enabled()) {
314                     		/* need to normalize results mantissa */
315                     		sign_save = Dbl_signextendedsign(opnd3p1);
316 				result_exponent = 0;
317                     		Dbl_leftshiftby1(opnd3p1,opnd3p2);
318                     		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
319                     		Dbl_set_sign(opnd3p1,/*using*/sign_save);
320                     		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
321 							unfl);
322                     		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
323                     		/* inexact = FALSE */
324                     		return(OPC_2E_UNDERFLOWEXCEPTION);
325 			}
326 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
327 			return(NOEXCEPTION);
328 		}
329 		/* is denormalized, adjust exponent */
330 		Dbl_clear_signexponent(opnd1p1);
331 		Dbl_leftshiftby1(opnd1p1,opnd1p2);
332 		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
333 	}
334 	/* opnd2 needs to have hidden bit set with msb in hidden bit */
335 	if (Dbl_isnotzero_exponent(opnd2p1)) {
336 		Dbl_clear_signexponent_set_hidden(opnd2p1);
337 	}
338 	else {
339 		/* check for zero */
340 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
341 			/*
342 			 * Perform the add opnd3 with zero here.
343 			 */
344 			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
345 				if (Is_rounding_mode(ROUNDMINUS)) {
346 					Dbl_or_signs(opnd3p1,resultp1);
347 				} else {
348 					Dbl_and_signs(opnd3p1,resultp1);
349 				}
350 			}
351 			/*
352 			 * Now let's check for trapped underflow case.
353 			 */
354 			else if (Dbl_iszero_exponent(opnd3p1) &&
355 			    Is_underflowtrap_enabled()) {
356                     		/* need to normalize results mantissa */
357                     		sign_save = Dbl_signextendedsign(opnd3p1);
358 				result_exponent = 0;
359                     		Dbl_leftshiftby1(opnd3p1,opnd3p2);
360                     		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
361                     		Dbl_set_sign(opnd3p1,/*using*/sign_save);
362                     		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
363 							unfl);
364                     		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
365                     		/* inexact = FALSE */
366 				return(OPC_2E_UNDERFLOWEXCEPTION);
367 			}
368 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
369 			return(NOEXCEPTION);
370 		}
371 		/* is denormalized; want to normalize */
372 		Dbl_clear_signexponent(opnd2p1);
373 		Dbl_leftshiftby1(opnd2p1,opnd2p2);
374 		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
375 	}
376 
377 	/* Multiply the first two source mantissas together */
378 
379 	/*
380 	 * The intermediate result will be kept in tmpres,
381 	 * which needs enough room for 106 bits of mantissa,
382 	 * so lets call it a Double extended.
383 	 */
384 	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
385 
386 	/*
387 	 * Four bits at a time are inspected in each loop, and a
388 	 * simple shift and add multiply algorithm is used.
389 	 */
390 	for (count = DBL_P-1; count >= 0; count -= 4) {
391 		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
392 		if (Dbit28p2(opnd1p2)) {
393 	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
394 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
395 			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
396 		}
397 		if (Dbit29p2(opnd1p2)) {
398 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
399 			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
400 		}
401 		if (Dbit30p2(opnd1p2)) {
402 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
403 			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
404 		}
405 		if (Dbit31p2(opnd1p2)) {
406 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
407 			 opnd2p1, opnd2p2, 0, 0);
408 		}
409 		Dbl_rightshiftby4(opnd1p1,opnd1p2);
410 	}
411 	if (Is_dexthiddenoverflow(tmpresp1)) {
412 		/* result mantissa >= 2 (mantissa overflow) */
413 		mpy_exponent++;
414 		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
415 	}
416 
417 	/*
418 	 * Restore the sign of the mpy result which was saved in resultp1.
419 	 * The exponent will continue to be kept in mpy_exponent.
420 	 */
421 	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
422 
423 	/*
424 	 * No rounding is required, since the result of the multiply
425 	 * is exact in the extended format.
426 	 */
427 
428 	/*
429 	 * Now we are ready to perform the add portion of the operation.
430 	 *
431 	 * The exponents need to be kept as integers for now, since the
432 	 * multiply result might not fit into the exponent field.  We
433 	 * can't overflow or underflow because of this yet, since the
434 	 * add could bring the final result back into range.
435 	 */
436 	add_exponent = Dbl_exponent(opnd3p1);
437 
438 	/*
439 	 * Check for denormalized or zero add operand.
440 	 */
441 	if (add_exponent == 0) {
442 		/* check for zero */
443 		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
444 			/* right is zero */
445 			/* Left can't be zero and must be result.
446 			 *
447 			 * The final result is now in tmpres and mpy_exponent,
448 			 * and needs to be rounded and squeezed back into
449 			 * double precision format from double extended.
450 			 */
451 			result_exponent = mpy_exponent;
452 			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
453 				resultp1,resultp2,resultp3,resultp4);
454 			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
455 			goto round;
456 		}
457 
458 		/*
459 		 * Neither are zeroes.
460 		 * Adjust exponent and normalize add operand.
461 		 */
462 		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
463 		Dbl_clear_signexponent(opnd3p1);
464 		Dbl_leftshiftby1(opnd3p1,opnd3p2);
465 		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
466 		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
467 	} else {
468 		Dbl_clear_exponent_set_hidden(opnd3p1);
469 	}
470 	/*
471 	 * Copy opnd3 to the double extended variable called right.
472 	 */
473 	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
474 
475 	/*
476 	 * A zero "save" helps discover equal operands (for later),
477 	 * and is used in swapping operands (if needed).
478 	 */
479 	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
480 
481 	/*
482 	 * Compare magnitude of operands.
483 	 */
484 	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
485 	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
486 	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
487 	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
488 		/*
489 		 * Set the left operand to the larger one by XOR swap.
490 		 * First finish the first word "save".
491 		 */
492 		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
493 		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
494 		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
495 			rightp2,rightp3,rightp4);
496 		/* also setup exponents used in rest of routine */
497 		diff_exponent = add_exponent - mpy_exponent;
498 		result_exponent = add_exponent;
499 	} else {
500 		/* also setup exponents used in rest of routine */
501 		diff_exponent = mpy_exponent - add_exponent;
502 		result_exponent = mpy_exponent;
503 	}
504 	/* Invariant: left is not smaller than right. */
505 
506 	/*
507 	 * Special case alignment of operands that would force alignment
508 	 * beyond the extent of the extension.  A further optimization
509 	 * could special case this but only reduces the path length for
510 	 * this infrequent case.
511 	 */
512 	if (diff_exponent > DBLEXT_THRESHOLD) {
513 		diff_exponent = DBLEXT_THRESHOLD;
514 	}
515 
516 	/* Align right operand by shifting it to the right */
517 	Dblext_clear_sign(rightp1);
518 	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
519 		/*shifted by*/diff_exponent);
520 
521 	/* Treat sum and difference of the operands separately. */
522 	if ((int)save < 0) {
523 		/*
524 		 * Difference of the two operands.  Overflow can occur if the
525 		 * multiply overflowed.  A borrow can occur out of the hidden
526 		 * bit and force a post normalization phase.
527 		 */
528 		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
529 			rightp1,rightp2,rightp3,rightp4,
530 			resultp1,resultp2,resultp3,resultp4);
531 		sign_save = Dbl_signextendedsign(resultp1);
532 		if (Dbl_iszero_hidden(resultp1)) {
533 			/* Handle normalization */
534 		/* A straightforward algorithm would now shift the
535 		 * result and extension left until the hidden bit
536 		 * becomes one.  Not all of the extension bits need
537 		 * participate in the shift.  Only the two most
538 		 * significant bits (round and guard) are needed.
539 		 * If only a single shift is needed then the guard
540 		 * bit becomes a significant low order bit and the
541 		 * extension must participate in the rounding.
542 		 * If more than a single shift is needed, then all
543 		 * bits to the right of the guard bit are zeros,
544 		 * and the guard bit may or may not be zero. */
545 			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
546 				resultp4);
547 
548 			/* Need to check for a zero result.  The sign and
549 			 * exponent fields have already been zeroed.  The more
550 			 * efficient test of the full object can be used.
551 			 */
552 			 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
553 				/* Must have been "x-x" or "x+(-x)". */
554 				if (Is_rounding_mode(ROUNDMINUS))
555 					Dbl_setone_sign(resultp1);
556 				Dbl_copytoptr(resultp1,resultp2,dstptr);
557 				return(NOEXCEPTION);
558 			}
559 			result_exponent--;
560 
561 			/* Look to see if normalization is finished. */
562 			if (Dbl_isone_hidden(resultp1)) {
563 				/* No further normalization is needed */
564 				goto round;
565 			}
566 
567 			/* Discover first one bit to determine shift amount.
568 			 * Use a modified binary search.  We have already
569 			 * shifted the result one position right and still
570 			 * not found a one so the remainder of the extension
571 			 * must be zero and simplifies rounding. */
572 			/* Scan bytes */
573 			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
574 				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
575 				result_exponent -= 8;
576 			}
577 			/* Now narrow it down to the nibble */
578 			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
579 				/* The lower nibble contains the
580 				 * normalizing one */
581 				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
582 				result_exponent -= 4;
583 			}
584 			/* Select case where first bit is set (already
585 			 * normalized) otherwise select the proper shift. */
586 			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
587 			if (jumpsize <= 7) switch(jumpsize) {
588 			case 1:
589 				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
590 					resultp4);
591 				result_exponent -= 3;
592 				break;
593 			case 2:
594 			case 3:
595 				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
596 					resultp4);
597 				result_exponent -= 2;
598 				break;
599 			case 4:
600 			case 5:
601 			case 6:
602 			case 7:
603 				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
604 					resultp4);
605 				result_exponent -= 1;
606 				break;
607 			}
608 		} /* end if (hidden...)... */
609 	/* Fall through and round */
610 	} /* end if (save < 0)... */
611 	else {
612 		/* Add magnitudes */
613 		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
614 			rightp1,rightp2,rightp3,rightp4,
615 			/*to*/resultp1,resultp2,resultp3,resultp4);
616 		sign_save = Dbl_signextendedsign(resultp1);
617 		if (Dbl_isone_hiddenoverflow(resultp1)) {
618 	    		/* Prenormalization required. */
619 	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
620 				resultp4);
621 	    		result_exponent++;
622 		} /* end if hiddenoverflow... */
623 	} /* end else ...add magnitudes... */
624 
625 	/* Round the result.  If the extension and lower two words are
626 	 * all zeros, then the result is exact.  Otherwise round in the
627 	 * correct direction.  Underflow is possible. If a postnormalization
628 	 * is necessary, then the mantissa is all zeros so no shift is needed.
629 	 */
630   round:
631 	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
632 		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
633 			result_exponent,is_tiny);
634 	}
635 	Dbl_set_sign(resultp1,/*using*/sign_save);
636 	if (Dblext_isnotzero_mantissap3(resultp3) ||
637 	    Dblext_isnotzero_mantissap4(resultp4)) {
638 		inexact = TRUE;
639 		switch(Rounding_mode()) {
640 		case ROUNDNEAREST: /* The default. */
641 			if (Dblext_isone_highp3(resultp3)) {
642 				/* at least 1/2 ulp */
643 				if (Dblext_isnotzero_low31p3(resultp3) ||
644 				    Dblext_isnotzero_mantissap4(resultp4) ||
645 				    Dblext_isone_lowp2(resultp2)) {
646 					/* either exactly half way and odd or
647 					 * more than 1/2ulp */
648 					Dbl_increment(resultp1,resultp2);
649 				}
650 			}
651 	    		break;
652 
653 		case ROUNDPLUS:
654 	    		if (Dbl_iszero_sign(resultp1)) {
655 				/* Round up positive results */
656 				Dbl_increment(resultp1,resultp2);
657 			}
658 			break;
659 
660 		case ROUNDMINUS:
661 	    		if (Dbl_isone_sign(resultp1)) {
662 				/* Round down negative results */
663 				Dbl_increment(resultp1,resultp2);
664 			}
665 
666 		case ROUNDZERO:;
667 			/* truncate is simple */
668 		} /* end switch... */
669 		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
670 	}
671 	if (result_exponent >= DBL_INFINITY_EXPONENT) {
672                 /* trap if OVERFLOWTRAP enabled */
673                 if (Is_overflowtrap_enabled()) {
674                         /*
675                          * Adjust bias of result
676                          */
677                         Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
678                         Dbl_copytoptr(resultp1,resultp2,dstptr);
679                         if (inexact)
680                             if (Is_inexacttrap_enabled())
681                                 return (OPC_2E_OVERFLOWEXCEPTION |
682 					OPC_2E_INEXACTEXCEPTION);
683                             else Set_inexactflag();
684                         return (OPC_2E_OVERFLOWEXCEPTION);
685                 }
686                 inexact = TRUE;
687                 Set_overflowflag();
688                 /* set result to infinity or largest number */
689                 Dbl_setoverflow(resultp1,resultp2);
690 
691 	} else if (result_exponent <= 0) {	/* underflow case */
692 		if (Is_underflowtrap_enabled()) {
693                         /*
694                          * Adjust bias of result
695                          */
696                 	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
697 			Dbl_copytoptr(resultp1,resultp2,dstptr);
698                         if (inexact)
699                             if (Is_inexacttrap_enabled())
700                                 return (OPC_2E_UNDERFLOWEXCEPTION |
701 					OPC_2E_INEXACTEXCEPTION);
702                             else Set_inexactflag();
703 	    		return(OPC_2E_UNDERFLOWEXCEPTION);
704 		}
705 		else if (inexact && is_tiny) Set_underflowflag();
706 	}
707 	else Dbl_set_exponent(resultp1,result_exponent);
708 	Dbl_copytoptr(resultp1,resultp2,dstptr);
709 	if (inexact)
710 		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
711 		else Set_inexactflag();
712     	return(NOEXCEPTION);
713 }
714 
715 /*
716  *  Double Floating-point Multiply Negate Fused Add
717  */
718 
719 dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
720 
721 dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
722 unsigned int *status;
723 {
724 	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
725 	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
726 	unsigned int rightp1, rightp2, rightp3, rightp4;
727 	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
728 	register int mpy_exponent, add_exponent, count;
729 	boolean inexact = FALSE, is_tiny = FALSE;
730 
731 	unsigned int signlessleft1, signlessright1, save;
732 	register int result_exponent, diff_exponent;
733 	int sign_save, jumpsize;
734 
735 	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
736 	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
737 	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
738 
739 	/*
740 	 * set sign bit of result of multiply
741 	 */
742 	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
743 		Dbl_setzerop1(resultp1);
744 	else
745 		Dbl_setnegativezerop1(resultp1);
746 
747 	/*
748 	 * Generate multiply exponent
749 	 */
750 	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
751 
752 	/*
753 	 * check first operand for NaN's or infinity
754 	 */
755 	if (Dbl_isinfinity_exponent(opnd1p1)) {
756 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
757 			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
758 			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
759 				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
760 					/*
761 					 * invalid since operands are infinity
762 					 * and zero
763 					 */
764 					if (Is_invalidtrap_enabled())
765 						return(OPC_2E_INVALIDEXCEPTION);
766 					Set_invalidflag();
767 					Dbl_makequietnan(resultp1,resultp2);
768 					Dbl_copytoptr(resultp1,resultp2,dstptr);
769 					return(NOEXCEPTION);
770 				}
771 				/*
772 				 * Check third operand for infinity with a
773 				 *  sign opposite of the multiply result
774 				 */
775 				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
776 				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
777 					/*
778 					 * invalid since attempting a magnitude
779 					 * subtraction of infinities
780 					 */
781 					if (Is_invalidtrap_enabled())
782 						return(OPC_2E_INVALIDEXCEPTION);
783 					Set_invalidflag();
784 					Dbl_makequietnan(resultp1,resultp2);
785 					Dbl_copytoptr(resultp1,resultp2,dstptr);
786 					return(NOEXCEPTION);
787 				}
788 
789 				/*
790 			 	 * return infinity
791 			 	 */
792 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
793 				Dbl_copytoptr(resultp1,resultp2,dstptr);
794 				return(NOEXCEPTION);
795 			}
796 		}
797 		else {
798 			/*
799 		 	 * is NaN; signaling or quiet?
800 		 	 */
801 			if (Dbl_isone_signaling(opnd1p1)) {
802 				/* trap if INVALIDTRAP enabled */
803 				if (Is_invalidtrap_enabled())
804 			    		return(OPC_2E_INVALIDEXCEPTION);
805 				/* make NaN quiet */
806 				Set_invalidflag();
807 				Dbl_set_quiet(opnd1p1);
808 			}
809 			/*
810 			 * is second operand a signaling NaN?
811 			 */
812 			else if (Dbl_is_signalingnan(opnd2p1)) {
813 				/* trap if INVALIDTRAP enabled */
814 				if (Is_invalidtrap_enabled())
815 			    		return(OPC_2E_INVALIDEXCEPTION);
816 				/* make NaN quiet */
817 				Set_invalidflag();
818 				Dbl_set_quiet(opnd2p1);
819 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
820 				return(NOEXCEPTION);
821 			}
822 			/*
823 			 * is third operand a signaling NaN?
824 			 */
825 			else if (Dbl_is_signalingnan(opnd3p1)) {
826 				/* trap if INVALIDTRAP enabled */
827 				if (Is_invalidtrap_enabled())
828 			    		return(OPC_2E_INVALIDEXCEPTION);
829 				/* make NaN quiet */
830 				Set_invalidflag();
831 				Dbl_set_quiet(opnd3p1);
832 				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
833 				return(NOEXCEPTION);
834 			}
835 			/*
836 		 	 * return quiet NaN
837 		 	 */
838 			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
839 			return(NOEXCEPTION);
840 		}
841 	}
842 
843 	/*
844 	 * check second operand for NaN's or infinity
845 	 */
846 	if (Dbl_isinfinity_exponent(opnd2p1)) {
847 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
848 			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
849 				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
850 					/*
851 					 * invalid since multiply operands are
852 					 * zero & infinity
853 					 */
854 					if (Is_invalidtrap_enabled())
855 						return(OPC_2E_INVALIDEXCEPTION);
856 					Set_invalidflag();
857 					Dbl_makequietnan(opnd2p1,opnd2p2);
858 					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
859 					return(NOEXCEPTION);
860 				}
861 
862 				/*
863 				 * Check third operand for infinity with a
864 				 *  sign opposite of the multiply result
865 				 */
866 				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
867 				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
868 					/*
869 					 * invalid since attempting a magnitude
870 					 * subtraction of infinities
871 					 */
872 					if (Is_invalidtrap_enabled())
873 				       		return(OPC_2E_INVALIDEXCEPTION);
874 				       	Set_invalidflag();
875 				       	Dbl_makequietnan(resultp1,resultp2);
876 					Dbl_copytoptr(resultp1,resultp2,dstptr);
877 					return(NOEXCEPTION);
878 				}
879 
880 				/*
881 				 * return infinity
882 				 */
883 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
884 				Dbl_copytoptr(resultp1,resultp2,dstptr);
885 				return(NOEXCEPTION);
886 			}
887 		}
888 		else {
889 			/*
890 			 * is NaN; signaling or quiet?
891 			 */
892 			if (Dbl_isone_signaling(opnd2p1)) {
893 				/* trap if INVALIDTRAP enabled */
894 				if (Is_invalidtrap_enabled())
895 					return(OPC_2E_INVALIDEXCEPTION);
896 				/* make NaN quiet */
897 				Set_invalidflag();
898 				Dbl_set_quiet(opnd2p1);
899 			}
900 			/*
901 			 * is third operand a signaling NaN?
902 			 */
903 			else if (Dbl_is_signalingnan(opnd3p1)) {
904 			       	/* trap if INVALIDTRAP enabled */
905 			       	if (Is_invalidtrap_enabled())
906 				   		return(OPC_2E_INVALIDEXCEPTION);
907 			       	/* make NaN quiet */
908 			       	Set_invalidflag();
909 			       	Dbl_set_quiet(opnd3p1);
910 				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
911 		       		return(NOEXCEPTION);
912 			}
913 			/*
914 			 * return quiet NaN
915 			 */
916 			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
917 			return(NOEXCEPTION);
918 		}
919 	}
920 
921 	/*
922 	 * check third operand for NaN's or infinity
923 	 */
924 	if (Dbl_isinfinity_exponent(opnd3p1)) {
925 		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
926 			/* return infinity */
927 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
928 			return(NOEXCEPTION);
929 		} else {
930 			/*
931 			 * is NaN; signaling or quiet?
932 			 */
933 			if (Dbl_isone_signaling(opnd3p1)) {
934 				/* trap if INVALIDTRAP enabled */
935 				if (Is_invalidtrap_enabled())
936 					return(OPC_2E_INVALIDEXCEPTION);
937 				/* make NaN quiet */
938 				Set_invalidflag();
939 				Dbl_set_quiet(opnd3p1);
940 			}
941 			/*
942 			 * return quiet NaN
943  			 */
944 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
945 			return(NOEXCEPTION);
946 		}
947     	}
948 
949 	/*
950 	 * Generate multiply mantissa
951 	 */
952 	if (Dbl_isnotzero_exponent(opnd1p1)) {
953 		/* set hidden bit */
954 		Dbl_clear_signexponent_set_hidden(opnd1p1);
955 	}
956 	else {
957 		/* check for zero */
958 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
959 			/*
960 			 * Perform the add opnd3 with zero here.
961 			 */
962 			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
963 				if (Is_rounding_mode(ROUNDMINUS)) {
964 					Dbl_or_signs(opnd3p1,resultp1);
965 				} else {
966 					Dbl_and_signs(opnd3p1,resultp1);
967 				}
968 			}
969 			/*
970 			 * Now let's check for trapped underflow case.
971 			 */
972 			else if (Dbl_iszero_exponent(opnd3p1) &&
973 			         Is_underflowtrap_enabled()) {
974                     		/* need to normalize results mantissa */
975                     		sign_save = Dbl_signextendedsign(opnd3p1);
976 				result_exponent = 0;
977                     		Dbl_leftshiftby1(opnd3p1,opnd3p2);
978                     		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
979                     		Dbl_set_sign(opnd3p1,/*using*/sign_save);
980                     		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
981 							unfl);
982                     		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
983                     		/* inexact = FALSE */
984                     		return(OPC_2E_UNDERFLOWEXCEPTION);
985 			}
986 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
987 			return(NOEXCEPTION);
988 		}
989 		/* is denormalized, adjust exponent */
990 		Dbl_clear_signexponent(opnd1p1);
991 		Dbl_leftshiftby1(opnd1p1,opnd1p2);
992 		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
993 	}
994 	/* opnd2 needs to have hidden bit set with msb in hidden bit */
995 	if (Dbl_isnotzero_exponent(opnd2p1)) {
996 		Dbl_clear_signexponent_set_hidden(opnd2p1);
997 	}
998 	else {
999 		/* check for zero */
1000 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
1001 			/*
1002 			 * Perform the add opnd3 with zero here.
1003 			 */
1004 			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
1005 				if (Is_rounding_mode(ROUNDMINUS)) {
1006 					Dbl_or_signs(opnd3p1,resultp1);
1007 				} else {
1008 					Dbl_and_signs(opnd3p1,resultp1);
1009 				}
1010 			}
1011 			/*
1012 			 * Now let's check for trapped underflow case.
1013 			 */
1014 			else if (Dbl_iszero_exponent(opnd3p1) &&
1015 			    Is_underflowtrap_enabled()) {
1016                     		/* need to normalize results mantissa */
1017                     		sign_save = Dbl_signextendedsign(opnd3p1);
1018 				result_exponent = 0;
1019                     		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1020                     		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1021                     		Dbl_set_sign(opnd3p1,/*using*/sign_save);
1022                     		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1023 							unfl);
1024                     		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1025                     		/* inexact = FALSE */
1026                     		return(OPC_2E_UNDERFLOWEXCEPTION);
1027 			}
1028 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1029 			return(NOEXCEPTION);
1030 		}
1031 		/* is denormalized; want to normalize */
1032 		Dbl_clear_signexponent(opnd2p1);
1033 		Dbl_leftshiftby1(opnd2p1,opnd2p2);
1034 		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1035 	}
1036 
1037 	/* Multiply the first two source mantissas together */
1038 
1039 	/*
1040 	 * The intermediate result will be kept in tmpres,
1041 	 * which needs enough room for 106 bits of mantissa,
1042 	 * so lets call it a Double extended.
1043 	 */
1044 	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1045 
1046 	/*
1047 	 * Four bits at a time are inspected in each loop, and a
1048 	 * simple shift and add multiply algorithm is used.
1049 	 */
1050 	for (count = DBL_P-1; count >= 0; count -= 4) {
1051 		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1052 		if (Dbit28p2(opnd1p2)) {
1053 	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
1054 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1055 			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1056 		}
1057 		if (Dbit29p2(opnd1p2)) {
1058 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1059 			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1060 		}
1061 		if (Dbit30p2(opnd1p2)) {
1062 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1063 			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1064 		}
1065 		if (Dbit31p2(opnd1p2)) {
1066 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1067 			 opnd2p1, opnd2p2, 0, 0);
1068 		}
1069 		Dbl_rightshiftby4(opnd1p1,opnd1p2);
1070 	}
1071 	if (Is_dexthiddenoverflow(tmpresp1)) {
1072 		/* result mantissa >= 2 (mantissa overflow) */
1073 		mpy_exponent++;
1074 		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1075 	}
1076 
1077 	/*
1078 	 * Restore the sign of the mpy result which was saved in resultp1.
1079 	 * The exponent will continue to be kept in mpy_exponent.
1080 	 */
1081 	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1082 
1083 	/*
1084 	 * No rounding is required, since the result of the multiply
1085 	 * is exact in the extended format.
1086 	 */
1087 
1088 	/*
1089 	 * Now we are ready to perform the add portion of the operation.
1090 	 *
1091 	 * The exponents need to be kept as integers for now, since the
1092 	 * multiply result might not fit into the exponent field.  We
1093 	 * can't overflow or underflow because of this yet, since the
1094 	 * add could bring the final result back into range.
1095 	 */
1096 	add_exponent = Dbl_exponent(opnd3p1);
1097 
1098 	/*
1099 	 * Check for denormalized or zero add operand.
1100 	 */
1101 	if (add_exponent == 0) {
1102 		/* check for zero */
1103 		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1104 			/* right is zero */
1105 			/* Left can't be zero and must be result.
1106 			 *
1107 			 * The final result is now in tmpres and mpy_exponent,
1108 			 * and needs to be rounded and squeezed back into
1109 			 * double precision format from double extended.
1110 			 */
1111 			result_exponent = mpy_exponent;
1112 			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1113 				resultp1,resultp2,resultp3,resultp4);
1114 			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1115 			goto round;
1116 		}
1117 
1118 		/*
1119 		 * Neither are zeroes.
1120 		 * Adjust exponent and normalize add operand.
1121 		 */
1122 		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
1123 		Dbl_clear_signexponent(opnd3p1);
1124 		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1125 		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1126 		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
1127 	} else {
1128 		Dbl_clear_exponent_set_hidden(opnd3p1);
1129 	}
1130 	/*
1131 	 * Copy opnd3 to the double extended variable called right.
1132 	 */
1133 	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1134 
1135 	/*
1136 	 * A zero "save" helps discover equal operands (for later),
1137 	 * and is used in swapping operands (if needed).
1138 	 */
1139 	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1140 
1141 	/*
1142 	 * Compare magnitude of operands.
1143 	 */
1144 	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1145 	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1146 	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1147 	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1148 		/*
1149 		 * Set the left operand to the larger one by XOR swap.
1150 		 * First finish the first word "save".
1151 		 */
1152 		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1153 		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1154 		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1155 			rightp2,rightp3,rightp4);
1156 		/* also setup exponents used in rest of routine */
1157 		diff_exponent = add_exponent - mpy_exponent;
1158 		result_exponent = add_exponent;
1159 	} else {
1160 		/* also setup exponents used in rest of routine */
1161 		diff_exponent = mpy_exponent - add_exponent;
1162 		result_exponent = mpy_exponent;
1163 	}
1164 	/* Invariant: left is not smaller than right. */
1165 
1166 	/*
1167 	 * Special case alignment of operands that would force alignment
1168 	 * beyond the extent of the extension.  A further optimization
1169 	 * could special case this but only reduces the path length for
1170 	 * this infrequent case.
1171 	 */
1172 	if (diff_exponent > DBLEXT_THRESHOLD) {
1173 		diff_exponent = DBLEXT_THRESHOLD;
1174 	}
1175 
1176 	/* Align right operand by shifting it to the right */
1177 	Dblext_clear_sign(rightp1);
1178 	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1179 		/*shifted by*/diff_exponent);
1180 
1181 	/* Treat sum and difference of the operands separately. */
1182 	if ((int)save < 0) {
1183 		/*
1184 		 * Difference of the two operands.  Overflow can occur if the
1185 		 * multiply overflowed.  A borrow can occur out of the hidden
1186 		 * bit and force a post normalization phase.
1187 		 */
1188 		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1189 			rightp1,rightp2,rightp3,rightp4,
1190 			resultp1,resultp2,resultp3,resultp4);
1191 		sign_save = Dbl_signextendedsign(resultp1);
1192 		if (Dbl_iszero_hidden(resultp1)) {
1193 			/* Handle normalization */
1194 		/* A straightforward algorithm would now shift the
1195 		 * result and extension left until the hidden bit
1196 		 * becomes one.  Not all of the extension bits need
1197 		 * participate in the shift.  Only the two most
1198 		 * significant bits (round and guard) are needed.
1199 		 * If only a single shift is needed then the guard
1200 		 * bit becomes a significant low order bit and the
1201 		 * extension must participate in the rounding.
1202 		 * If more than a single shift is needed, then all
1203 		 * bits to the right of the guard bit are zeros,
1204 		 * and the guard bit may or may not be zero. */
1205 			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1206 				resultp4);
1207 
1208 			/* Need to check for a zero result.  The sign and
1209 			 * exponent fields have already been zeroed.  The more
1210 			 * efficient test of the full object can be used.
1211 			 */
1212 			 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1213 				/* Must have been "x-x" or "x+(-x)". */
1214 				if (Is_rounding_mode(ROUNDMINUS))
1215 					Dbl_setone_sign(resultp1);
1216 				Dbl_copytoptr(resultp1,resultp2,dstptr);
1217 				return(NOEXCEPTION);
1218 			}
1219 			result_exponent--;
1220 
1221 			/* Look to see if normalization is finished. */
1222 			if (Dbl_isone_hidden(resultp1)) {
1223 				/* No further normalization is needed */
1224 				goto round;
1225 			}
1226 
1227 			/* Discover first one bit to determine shift amount.
1228 			 * Use a modified binary search.  We have already
1229 			 * shifted the result one position right and still
1230 			 * not found a one so the remainder of the extension
1231 			 * must be zero and simplifies rounding. */
1232 			/* Scan bytes */
1233 			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1234 				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1235 				result_exponent -= 8;
1236 			}
1237 			/* Now narrow it down to the nibble */
1238 			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1239 				/* The lower nibble contains the
1240 				 * normalizing one */
1241 				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1242 				result_exponent -= 4;
1243 			}
1244 			/* Select case where first bit is set (already
1245 			 * normalized) otherwise select the proper shift. */
1246 			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1247 			if (jumpsize <= 7) switch(jumpsize) {
1248 			case 1:
1249 				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1250 					resultp4);
1251 				result_exponent -= 3;
1252 				break;
1253 			case 2:
1254 			case 3:
1255 				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1256 					resultp4);
1257 				result_exponent -= 2;
1258 				break;
1259 			case 4:
1260 			case 5:
1261 			case 6:
1262 			case 7:
1263 				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1264 					resultp4);
1265 				result_exponent -= 1;
1266 				break;
1267 			}
1268 		} /* end if (hidden...)... */
1269 	/* Fall through and round */
1270 	} /* end if (save < 0)... */
1271 	else {
1272 		/* Add magnitudes */
1273 		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1274 			rightp1,rightp2,rightp3,rightp4,
1275 			/*to*/resultp1,resultp2,resultp3,resultp4);
1276 		sign_save = Dbl_signextendedsign(resultp1);
1277 		if (Dbl_isone_hiddenoverflow(resultp1)) {
1278 	    		/* Prenormalization required. */
1279 	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1280 				resultp4);
1281 	    		result_exponent++;
1282 		} /* end if hiddenoverflow... */
1283 	} /* end else ...add magnitudes... */
1284 
1285 	/* Round the result.  If the extension and lower two words are
1286 	 * all zeros, then the result is exact.  Otherwise round in the
1287 	 * correct direction.  Underflow is possible. If a postnormalization
1288 	 * is necessary, then the mantissa is all zeros so no shift is needed.
1289 	 */
1290   round:
1291 	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1292 		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1293 			result_exponent,is_tiny);
1294 	}
1295 	Dbl_set_sign(resultp1,/*using*/sign_save);
1296 	if (Dblext_isnotzero_mantissap3(resultp3) ||
1297 	    Dblext_isnotzero_mantissap4(resultp4)) {
1298 		inexact = TRUE;
1299 		switch(Rounding_mode()) {
1300 		case ROUNDNEAREST: /* The default. */
1301 			if (Dblext_isone_highp3(resultp3)) {
1302 				/* at least 1/2 ulp */
1303 				if (Dblext_isnotzero_low31p3(resultp3) ||
1304 				    Dblext_isnotzero_mantissap4(resultp4) ||
1305 				    Dblext_isone_lowp2(resultp2)) {
1306 					/* either exactly half way and odd or
1307 					 * more than 1/2ulp */
1308 					Dbl_increment(resultp1,resultp2);
1309 				}
1310 			}
1311 	    		break;
1312 
1313 		case ROUNDPLUS:
1314 	    		if (Dbl_iszero_sign(resultp1)) {
1315 				/* Round up positive results */
1316 				Dbl_increment(resultp1,resultp2);
1317 			}
1318 			break;
1319 
1320 		case ROUNDMINUS:
1321 	    		if (Dbl_isone_sign(resultp1)) {
1322 				/* Round down negative results */
1323 				Dbl_increment(resultp1,resultp2);
1324 			}
1325 
1326 		case ROUNDZERO:;
1327 			/* truncate is simple */
1328 		} /* end switch... */
1329 		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1330 	}
1331 	if (result_exponent >= DBL_INFINITY_EXPONENT) {
1332 		/* Overflow */
1333 		if (Is_overflowtrap_enabled()) {
1334                         /*
1335                          * Adjust bias of result
1336                          */
1337                         Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1338                         Dbl_copytoptr(resultp1,resultp2,dstptr);
1339                         if (inexact)
1340                             if (Is_inexacttrap_enabled())
1341                                 return (OPC_2E_OVERFLOWEXCEPTION |
1342 					OPC_2E_INEXACTEXCEPTION);
1343                             else Set_inexactflag();
1344                         return (OPC_2E_OVERFLOWEXCEPTION);
1345 		}
1346 		inexact = TRUE;
1347 		Set_overflowflag();
1348 		Dbl_setoverflow(resultp1,resultp2);
1349 	} else if (result_exponent <= 0) {	/* underflow case */
1350 		if (Is_underflowtrap_enabled()) {
1351                         /*
1352                          * Adjust bias of result
1353                          */
1354                 	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1355 			Dbl_copytoptr(resultp1,resultp2,dstptr);
1356                         if (inexact)
1357                             if (Is_inexacttrap_enabled())
1358                                 return (OPC_2E_UNDERFLOWEXCEPTION |
1359 					OPC_2E_INEXACTEXCEPTION);
1360                             else Set_inexactflag();
1361 	    		return(OPC_2E_UNDERFLOWEXCEPTION);
1362 		}
1363 		else if (inexact && is_tiny) Set_underflowflag();
1364 	}
1365 	else Dbl_set_exponent(resultp1,result_exponent);
1366 	Dbl_copytoptr(resultp1,resultp2,dstptr);
1367 	if (inexact)
1368 		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1369 		else Set_inexactflag();
1370     	return(NOEXCEPTION);
1371 }
1372 
1373 /*
1374  *  Single Floating-point Multiply Fused Add
1375  */
1376 
1377 sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1378 
1379 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1380 unsigned int *status;
1381 {
1382 	unsigned int opnd1, opnd2, opnd3;
1383 	register unsigned int tmpresp1, tmpresp2;
1384 	unsigned int rightp1, rightp2;
1385 	unsigned int resultp1, resultp2 = 0;
1386 	register int mpy_exponent, add_exponent, count;
1387 	boolean inexact = FALSE, is_tiny = FALSE;
1388 
1389 	unsigned int signlessleft1, signlessright1, save;
1390 	register int result_exponent, diff_exponent;
1391 	int sign_save, jumpsize;
1392 
1393 	Sgl_copyfromptr(src1ptr,opnd1);
1394 	Sgl_copyfromptr(src2ptr,opnd2);
1395 	Sgl_copyfromptr(src3ptr,opnd3);
1396 
1397 	/*
1398 	 * set sign bit of result of multiply
1399 	 */
1400 	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1401 		Sgl_setnegativezero(resultp1);
1402 	else Sgl_setzero(resultp1);
1403 
1404 	/*
1405 	 * Generate multiply exponent
1406 	 */
1407 	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1408 
1409 	/*
1410 	 * check first operand for NaN's or infinity
1411 	 */
1412 	if (Sgl_isinfinity_exponent(opnd1)) {
1413 		if (Sgl_iszero_mantissa(opnd1)) {
1414 			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1415 				if (Sgl_iszero_exponentmantissa(opnd2)) {
1416 					/*
1417 					 * invalid since operands are infinity
1418 					 * and zero
1419 					 */
1420 					if (Is_invalidtrap_enabled())
1421 						return(OPC_2E_INVALIDEXCEPTION);
1422 					Set_invalidflag();
1423 					Sgl_makequietnan(resultp1);
1424 					Sgl_copytoptr(resultp1,dstptr);
1425 					return(NOEXCEPTION);
1426 				}
1427 				/*
1428 				 * Check third operand for infinity with a
1429 				 *  sign opposite of the multiply result
1430 				 */
1431 				if (Sgl_isinfinity(opnd3) &&
1432 				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1433 					/*
1434 					 * invalid since attempting a magnitude
1435 					 * subtraction of infinities
1436 					 */
1437 					if (Is_invalidtrap_enabled())
1438 						return(OPC_2E_INVALIDEXCEPTION);
1439 					Set_invalidflag();
1440 					Sgl_makequietnan(resultp1);
1441 					Sgl_copytoptr(resultp1,dstptr);
1442 					return(NOEXCEPTION);
1443 				}
1444 
1445 				/*
1446 			 	 * return infinity
1447 			 	 */
1448 				Sgl_setinfinity_exponentmantissa(resultp1);
1449 				Sgl_copytoptr(resultp1,dstptr);
1450 				return(NOEXCEPTION);
1451 			}
1452 		}
1453 		else {
1454 			/*
1455 		 	 * is NaN; signaling or quiet?
1456 		 	 */
1457 			if (Sgl_isone_signaling(opnd1)) {
1458 				/* trap if INVALIDTRAP enabled */
1459 				if (Is_invalidtrap_enabled())
1460 			    		return(OPC_2E_INVALIDEXCEPTION);
1461 				/* make NaN quiet */
1462 				Set_invalidflag();
1463 				Sgl_set_quiet(opnd1);
1464 			}
1465 			/*
1466 			 * is second operand a signaling NaN?
1467 			 */
1468 			else if (Sgl_is_signalingnan(opnd2)) {
1469 				/* trap if INVALIDTRAP enabled */
1470 				if (Is_invalidtrap_enabled())
1471 			    		return(OPC_2E_INVALIDEXCEPTION);
1472 				/* make NaN quiet */
1473 				Set_invalidflag();
1474 				Sgl_set_quiet(opnd2);
1475 				Sgl_copytoptr(opnd2,dstptr);
1476 				return(NOEXCEPTION);
1477 			}
1478 			/*
1479 			 * is third operand a signaling NaN?
1480 			 */
1481 			else if (Sgl_is_signalingnan(opnd3)) {
1482 				/* trap if INVALIDTRAP enabled */
1483 				if (Is_invalidtrap_enabled())
1484 			    		return(OPC_2E_INVALIDEXCEPTION);
1485 				/* make NaN quiet */
1486 				Set_invalidflag();
1487 				Sgl_set_quiet(opnd3);
1488 				Sgl_copytoptr(opnd3,dstptr);
1489 				return(NOEXCEPTION);
1490 			}
1491 			/*
1492 		 	 * return quiet NaN
1493 		 	 */
1494 			Sgl_copytoptr(opnd1,dstptr);
1495 			return(NOEXCEPTION);
1496 		}
1497 	}
1498 
1499 	/*
1500 	 * check second operand for NaN's or infinity
1501 	 */
1502 	if (Sgl_isinfinity_exponent(opnd2)) {
1503 		if (Sgl_iszero_mantissa(opnd2)) {
1504 			if (Sgl_isnotnan(opnd3)) {
1505 				if (Sgl_iszero_exponentmantissa(opnd1)) {
1506 					/*
1507 					 * invalid since multiply operands are
1508 					 * zero & infinity
1509 					 */
1510 					if (Is_invalidtrap_enabled())
1511 						return(OPC_2E_INVALIDEXCEPTION);
1512 					Set_invalidflag();
1513 					Sgl_makequietnan(opnd2);
1514 					Sgl_copytoptr(opnd2,dstptr);
1515 					return(NOEXCEPTION);
1516 				}
1517 
1518 				/*
1519 				 * Check third operand for infinity with a
1520 				 *  sign opposite of the multiply result
1521 				 */
1522 				if (Sgl_isinfinity(opnd3) &&
1523 				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1524 					/*
1525 					 * invalid since attempting a magnitude
1526 					 * subtraction of infinities
1527 					 */
1528 					if (Is_invalidtrap_enabled())
1529 				       		return(OPC_2E_INVALIDEXCEPTION);
1530 				       	Set_invalidflag();
1531 				       	Sgl_makequietnan(resultp1);
1532 					Sgl_copytoptr(resultp1,dstptr);
1533 					return(NOEXCEPTION);
1534 				}
1535 
1536 				/*
1537 				 * return infinity
1538 				 */
1539 				Sgl_setinfinity_exponentmantissa(resultp1);
1540 				Sgl_copytoptr(resultp1,dstptr);
1541 				return(NOEXCEPTION);
1542 			}
1543 		}
1544 		else {
1545 			/*
1546 			 * is NaN; signaling or quiet?
1547 			 */
1548 			if (Sgl_isone_signaling(opnd2)) {
1549 				/* trap if INVALIDTRAP enabled */
1550 				if (Is_invalidtrap_enabled())
1551 					return(OPC_2E_INVALIDEXCEPTION);
1552 				/* make NaN quiet */
1553 				Set_invalidflag();
1554 				Sgl_set_quiet(opnd2);
1555 			}
1556 			/*
1557 			 * is third operand a signaling NaN?
1558 			 */
1559 			else if (Sgl_is_signalingnan(opnd3)) {
1560 			       	/* trap if INVALIDTRAP enabled */
1561 			       	if (Is_invalidtrap_enabled())
1562 				   		return(OPC_2E_INVALIDEXCEPTION);
1563 			       	/* make NaN quiet */
1564 			       	Set_invalidflag();
1565 			       	Sgl_set_quiet(opnd3);
1566 				Sgl_copytoptr(opnd3,dstptr);
1567 		       		return(NOEXCEPTION);
1568 			}
1569 			/*
1570 			 * return quiet NaN
1571 			 */
1572 			Sgl_copytoptr(opnd2,dstptr);
1573 			return(NOEXCEPTION);
1574 		}
1575 	}
1576 
1577 	/*
1578 	 * check third operand for NaN's or infinity
1579 	 */
1580 	if (Sgl_isinfinity_exponent(opnd3)) {
1581 		if (Sgl_iszero_mantissa(opnd3)) {
1582 			/* return infinity */
1583 			Sgl_copytoptr(opnd3,dstptr);
1584 			return(NOEXCEPTION);
1585 		} else {
1586 			/*
1587 			 * is NaN; signaling or quiet?
1588 			 */
1589 			if (Sgl_isone_signaling(opnd3)) {
1590 				/* trap if INVALIDTRAP enabled */
1591 				if (Is_invalidtrap_enabled())
1592 					return(OPC_2E_INVALIDEXCEPTION);
1593 				/* make NaN quiet */
1594 				Set_invalidflag();
1595 				Sgl_set_quiet(opnd3);
1596 			}
1597 			/*
1598 			 * return quiet NaN
1599  			 */
1600 			Sgl_copytoptr(opnd3,dstptr);
1601 			return(NOEXCEPTION);
1602 		}
1603     	}
1604 
1605 	/*
1606 	 * Generate multiply mantissa
1607 	 */
1608 	if (Sgl_isnotzero_exponent(opnd1)) {
1609 		/* set hidden bit */
1610 		Sgl_clear_signexponent_set_hidden(opnd1);
1611 	}
1612 	else {
1613 		/* check for zero */
1614 		if (Sgl_iszero_mantissa(opnd1)) {
1615 			/*
1616 			 * Perform the add opnd3 with zero here.
1617 			 */
1618 			if (Sgl_iszero_exponentmantissa(opnd3)) {
1619 				if (Is_rounding_mode(ROUNDMINUS)) {
1620 					Sgl_or_signs(opnd3,resultp1);
1621 				} else {
1622 					Sgl_and_signs(opnd3,resultp1);
1623 				}
1624 			}
1625 			/*
1626 			 * Now let's check for trapped underflow case.
1627 			 */
1628 			else if (Sgl_iszero_exponent(opnd3) &&
1629 			         Is_underflowtrap_enabled()) {
1630                     		/* need to normalize results mantissa */
1631                     		sign_save = Sgl_signextendedsign(opnd3);
1632 				result_exponent = 0;
1633                     		Sgl_leftshiftby1(opnd3);
1634                     		Sgl_normalize(opnd3,result_exponent);
1635                     		Sgl_set_sign(opnd3,/*using*/sign_save);
1636                     		Sgl_setwrapped_exponent(opnd3,result_exponent,
1637 							unfl);
1638                     		Sgl_copytoptr(opnd3,dstptr);
1639                     		/* inexact = FALSE */
1640                     		return(OPC_2E_UNDERFLOWEXCEPTION);
1641 			}
1642 			Sgl_copytoptr(opnd3,dstptr);
1643 			return(NOEXCEPTION);
1644 		}
1645 		/* is denormalized, adjust exponent */
1646 		Sgl_clear_signexponent(opnd1);
1647 		Sgl_leftshiftby1(opnd1);
1648 		Sgl_normalize(opnd1,mpy_exponent);
1649 	}
1650 	/* opnd2 needs to have hidden bit set with msb in hidden bit */
1651 	if (Sgl_isnotzero_exponent(opnd2)) {
1652 		Sgl_clear_signexponent_set_hidden(opnd2);
1653 	}
1654 	else {
1655 		/* check for zero */
1656 		if (Sgl_iszero_mantissa(opnd2)) {
1657 			/*
1658 			 * Perform the add opnd3 with zero here.
1659 			 */
1660 			if (Sgl_iszero_exponentmantissa(opnd3)) {
1661 				if (Is_rounding_mode(ROUNDMINUS)) {
1662 					Sgl_or_signs(opnd3,resultp1);
1663 				} else {
1664 					Sgl_and_signs(opnd3,resultp1);
1665 				}
1666 			}
1667 			/*
1668 			 * Now let's check for trapped underflow case.
1669 			 */
1670 			else if (Sgl_iszero_exponent(opnd3) &&
1671 			    Is_underflowtrap_enabled()) {
1672                     		/* need to normalize results mantissa */
1673                     		sign_save = Sgl_signextendedsign(opnd3);
1674 				result_exponent = 0;
1675                     		Sgl_leftshiftby1(opnd3);
1676                     		Sgl_normalize(opnd3,result_exponent);
1677                     		Sgl_set_sign(opnd3,/*using*/sign_save);
1678                     		Sgl_setwrapped_exponent(opnd3,result_exponent,
1679 							unfl);
1680                     		Sgl_copytoptr(opnd3,dstptr);
1681                     		/* inexact = FALSE */
1682                     		return(OPC_2E_UNDERFLOWEXCEPTION);
1683 			}
1684 			Sgl_copytoptr(opnd3,dstptr);
1685 			return(NOEXCEPTION);
1686 		}
1687 		/* is denormalized; want to normalize */
1688 		Sgl_clear_signexponent(opnd2);
1689 		Sgl_leftshiftby1(opnd2);
1690 		Sgl_normalize(opnd2,mpy_exponent);
1691 	}
1692 
1693 	/* Multiply the first two source mantissas together */
1694 
1695 	/*
1696 	 * The intermediate result will be kept in tmpres,
1697 	 * which needs enough room for 106 bits of mantissa,
1698 	 * so lets call it a Double extended.
1699 	 */
1700 	Sglext_setzero(tmpresp1,tmpresp2);
1701 
1702 	/*
1703 	 * Four bits at a time are inspected in each loop, and a
1704 	 * simple shift and add multiply algorithm is used.
1705 	 */
1706 	for (count = SGL_P-1; count >= 0; count -= 4) {
1707 		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1708 		if (Sbit28(opnd1)) {
1709 	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
1710 			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1711 		}
1712 		if (Sbit29(opnd1)) {
1713 			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1714 		}
1715 		if (Sbit30(opnd1)) {
1716 			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1717 		}
1718 		if (Sbit31(opnd1)) {
1719 			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1720 		}
1721 		Sgl_rightshiftby4(opnd1);
1722 	}
1723 	if (Is_sexthiddenoverflow(tmpresp1)) {
1724 		/* result mantissa >= 2 (mantissa overflow) */
1725 		mpy_exponent++;
1726 		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1727 	} else {
1728 		Sglext_rightshiftby3(tmpresp1,tmpresp2);
1729 	}
1730 
1731 	/*
1732 	 * Restore the sign of the mpy result which was saved in resultp1.
1733 	 * The exponent will continue to be kept in mpy_exponent.
1734 	 */
1735 	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1736 
1737 	/*
1738 	 * No rounding is required, since the result of the multiply
1739 	 * is exact in the extended format.
1740 	 */
1741 
1742 	/*
1743 	 * Now we are ready to perform the add portion of the operation.
1744 	 *
1745 	 * The exponents need to be kept as integers for now, since the
1746 	 * multiply result might not fit into the exponent field.  We
1747 	 * can't overflow or underflow because of this yet, since the
1748 	 * add could bring the final result back into range.
1749 	 */
1750 	add_exponent = Sgl_exponent(opnd3);
1751 
1752 	/*
1753 	 * Check for denormalized or zero add operand.
1754 	 */
1755 	if (add_exponent == 0) {
1756 		/* check for zero */
1757 		if (Sgl_iszero_mantissa(opnd3)) {
1758 			/* right is zero */
1759 			/* Left can't be zero and must be result.
1760 			 *
1761 			 * The final result is now in tmpres and mpy_exponent,
1762 			 * and needs to be rounded and squeezed back into
1763 			 * double precision format from double extended.
1764 			 */
1765 			result_exponent = mpy_exponent;
1766 			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1767 			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1768 			goto round;
1769 		}
1770 
1771 		/*
1772 		 * Neither are zeroes.
1773 		 * Adjust exponent and normalize add operand.
1774 		 */
1775 		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
1776 		Sgl_clear_signexponent(opnd3);
1777 		Sgl_leftshiftby1(opnd3);
1778 		Sgl_normalize(opnd3,add_exponent);
1779 		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
1780 	} else {
1781 		Sgl_clear_exponent_set_hidden(opnd3);
1782 	}
1783 	/*
1784 	 * Copy opnd3 to the double extended variable called right.
1785 	 */
1786 	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1787 
1788 	/*
1789 	 * A zero "save" helps discover equal operands (for later),
1790 	 * and is used in swapping operands (if needed).
1791 	 */
1792 	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1793 
1794 	/*
1795 	 * Compare magnitude of operands.
1796 	 */
1797 	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1798 	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1799 	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1800 	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1801 		/*
1802 		 * Set the left operand to the larger one by XOR swap.
1803 		 * First finish the first word "save".
1804 		 */
1805 		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1806 		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1807 		Sglext_swap_lower(tmpresp2,rightp2);
1808 		/* also setup exponents used in rest of routine */
1809 		diff_exponent = add_exponent - mpy_exponent;
1810 		result_exponent = add_exponent;
1811 	} else {
1812 		/* also setup exponents used in rest of routine */
1813 		diff_exponent = mpy_exponent - add_exponent;
1814 		result_exponent = mpy_exponent;
1815 	}
1816 	/* Invariant: left is not smaller than right. */
1817 
1818 	/*
1819 	 * Special case alignment of operands that would force alignment
1820 	 * beyond the extent of the extension.  A further optimization
1821 	 * could special case this but only reduces the path length for
1822 	 * this infrequent case.
1823 	 */
1824 	if (diff_exponent > SGLEXT_THRESHOLD) {
1825 		diff_exponent = SGLEXT_THRESHOLD;
1826 	}
1827 
1828 	/* Align right operand by shifting it to the right */
1829 	Sglext_clear_sign(rightp1);
1830 	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1831 
1832 	/* Treat sum and difference of the operands separately. */
1833 	if ((int)save < 0) {
1834 		/*
1835 		 * Difference of the two operands.  Overflow can occur if the
1836 		 * multiply overflowed.  A borrow can occur out of the hidden
1837 		 * bit and force a post normalization phase.
1838 		 */
1839 		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1840 			resultp1,resultp2);
1841 		sign_save = Sgl_signextendedsign(resultp1);
1842 		if (Sgl_iszero_hidden(resultp1)) {
1843 			/* Handle normalization */
1844 		/* A straightforward algorithm would now shift the
1845 		 * result and extension left until the hidden bit
1846 		 * becomes one.  Not all of the extension bits need
1847 		 * participate in the shift.  Only the two most
1848 		 * significant bits (round and guard) are needed.
1849 		 * If only a single shift is needed then the guard
1850 		 * bit becomes a significant low order bit and the
1851 		 * extension must participate in the rounding.
1852 		 * If more than a single shift is needed, then all
1853 		 * bits to the right of the guard bit are zeros,
1854 		 * and the guard bit may or may not be zero. */
1855 			Sglext_leftshiftby1(resultp1,resultp2);
1856 
1857 			/* Need to check for a zero result.  The sign and
1858 			 * exponent fields have already been zeroed.  The more
1859 			 * efficient test of the full object can be used.
1860 			 */
1861 			 if (Sglext_iszero(resultp1,resultp2)) {
1862 				/* Must have been "x-x" or "x+(-x)". */
1863 				if (Is_rounding_mode(ROUNDMINUS))
1864 					Sgl_setone_sign(resultp1);
1865 				Sgl_copytoptr(resultp1,dstptr);
1866 				return(NOEXCEPTION);
1867 			}
1868 			result_exponent--;
1869 
1870 			/* Look to see if normalization is finished. */
1871 			if (Sgl_isone_hidden(resultp1)) {
1872 				/* No further normalization is needed */
1873 				goto round;
1874 			}
1875 
1876 			/* Discover first one bit to determine shift amount.
1877 			 * Use a modified binary search.  We have already
1878 			 * shifted the result one position right and still
1879 			 * not found a one so the remainder of the extension
1880 			 * must be zero and simplifies rounding. */
1881 			/* Scan bytes */
1882 			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1883 				Sglext_leftshiftby8(resultp1,resultp2);
1884 				result_exponent -= 8;
1885 			}
1886 			/* Now narrow it down to the nibble */
1887 			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1888 				/* The lower nibble contains the
1889 				 * normalizing one */
1890 				Sglext_leftshiftby4(resultp1,resultp2);
1891 				result_exponent -= 4;
1892 			}
1893 			/* Select case where first bit is set (already
1894 			 * normalized) otherwise select the proper shift. */
1895 			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1896 			if (jumpsize <= 7) switch(jumpsize) {
1897 			case 1:
1898 				Sglext_leftshiftby3(resultp1,resultp2);
1899 				result_exponent -= 3;
1900 				break;
1901 			case 2:
1902 			case 3:
1903 				Sglext_leftshiftby2(resultp1,resultp2);
1904 				result_exponent -= 2;
1905 				break;
1906 			case 4:
1907 			case 5:
1908 			case 6:
1909 			case 7:
1910 				Sglext_leftshiftby1(resultp1,resultp2);
1911 				result_exponent -= 1;
1912 				break;
1913 			}
1914 		} /* end if (hidden...)... */
1915 	/* Fall through and round */
1916 	} /* end if (save < 0)... */
1917 	else {
1918 		/* Add magnitudes */
1919 		Sglext_addition(tmpresp1,tmpresp2,
1920 			rightp1,rightp2, /*to*/resultp1,resultp2);
1921 		sign_save = Sgl_signextendedsign(resultp1);
1922 		if (Sgl_isone_hiddenoverflow(resultp1)) {
1923 	    		/* Prenormalization required. */
1924 	    		Sglext_arithrightshiftby1(resultp1,resultp2);
1925 	    		result_exponent++;
1926 		} /* end if hiddenoverflow... */
1927 	} /* end else ...add magnitudes... */
1928 
1929 	/* Round the result.  If the extension and lower two words are
1930 	 * all zeros, then the result is exact.  Otherwise round in the
1931 	 * correct direction.  Underflow is possible. If a postnormalization
1932 	 * is necessary, then the mantissa is all zeros so no shift is needed.
1933 	 */
1934   round:
1935 	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1936 		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1937 	}
1938 	Sgl_set_sign(resultp1,/*using*/sign_save);
1939 	if (Sglext_isnotzero_mantissap2(resultp2)) {
1940 		inexact = TRUE;
1941 		switch(Rounding_mode()) {
1942 		case ROUNDNEAREST: /* The default. */
1943 			if (Sglext_isone_highp2(resultp2)) {
1944 				/* at least 1/2 ulp */
1945 				if (Sglext_isnotzero_low31p2(resultp2) ||
1946 				    Sglext_isone_lowp1(resultp1)) {
1947 					/* either exactly half way and odd or
1948 					 * more than 1/2ulp */
1949 					Sgl_increment(resultp1);
1950 				}
1951 			}
1952 	    		break;
1953 
1954 		case ROUNDPLUS:
1955 	    		if (Sgl_iszero_sign(resultp1)) {
1956 				/* Round up positive results */
1957 				Sgl_increment(resultp1);
1958 			}
1959 			break;
1960 
1961 		case ROUNDMINUS:
1962 	    		if (Sgl_isone_sign(resultp1)) {
1963 				/* Round down negative results */
1964 				Sgl_increment(resultp1);
1965 			}
1966 
1967 		case ROUNDZERO:;
1968 			/* truncate is simple */
1969 		} /* end switch... */
1970 		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1971 	}
1972 	if (result_exponent >= SGL_INFINITY_EXPONENT) {
1973 		/* Overflow */
1974 		if (Is_overflowtrap_enabled()) {
1975                         /*
1976                          * Adjust bias of result
1977                          */
1978                         Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1979                         Sgl_copytoptr(resultp1,dstptr);
1980                         if (inexact)
1981                             if (Is_inexacttrap_enabled())
1982                                 return (OPC_2E_OVERFLOWEXCEPTION |
1983 					OPC_2E_INEXACTEXCEPTION);
1984                             else Set_inexactflag();
1985                         return (OPC_2E_OVERFLOWEXCEPTION);
1986 		}
1987 		inexact = TRUE;
1988 		Set_overflowflag();
1989 		Sgl_setoverflow(resultp1);
1990 	} else if (result_exponent <= 0) {	/* underflow case */
1991 		if (Is_underflowtrap_enabled()) {
1992                         /*
1993                          * Adjust bias of result
1994                          */
1995                 	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1996 			Sgl_copytoptr(resultp1,dstptr);
1997                         if (inexact)
1998                             if (Is_inexacttrap_enabled())
1999                                 return (OPC_2E_UNDERFLOWEXCEPTION |
2000 					OPC_2E_INEXACTEXCEPTION);
2001                             else Set_inexactflag();
2002 	    		return(OPC_2E_UNDERFLOWEXCEPTION);
2003 		}
2004 		else if (inexact && is_tiny) Set_underflowflag();
2005 	}
2006 	else Sgl_set_exponent(resultp1,result_exponent);
2007 	Sgl_copytoptr(resultp1,dstptr);
2008 	if (inexact)
2009 		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2010 		else Set_inexactflag();
2011     	return(NOEXCEPTION);
2012 }
2013 
2014 /*
2015  *  Single Floating-point Multiply Negate Fused Add
2016  */
2017 
2018 sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2019 
2020 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2021 unsigned int *status;
2022 {
2023 	unsigned int opnd1, opnd2, opnd3;
2024 	register unsigned int tmpresp1, tmpresp2;
2025 	unsigned int rightp1, rightp2;
2026 	unsigned int resultp1, resultp2 = 0;
2027 	register int mpy_exponent, add_exponent, count;
2028 	boolean inexact = FALSE, is_tiny = FALSE;
2029 
2030 	unsigned int signlessleft1, signlessright1, save;
2031 	register int result_exponent, diff_exponent;
2032 	int sign_save, jumpsize;
2033 
2034 	Sgl_copyfromptr(src1ptr,opnd1);
2035 	Sgl_copyfromptr(src2ptr,opnd2);
2036 	Sgl_copyfromptr(src3ptr,opnd3);
2037 
2038 	/*
2039 	 * set sign bit of result of multiply
2040 	 */
2041 	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2042 		Sgl_setzero(resultp1);
2043 	else
2044 		Sgl_setnegativezero(resultp1);
2045 
2046 	/*
2047 	 * Generate multiply exponent
2048 	 */
2049 	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2050 
2051 	/*
2052 	 * check first operand for NaN's or infinity
2053 	 */
2054 	if (Sgl_isinfinity_exponent(opnd1)) {
2055 		if (Sgl_iszero_mantissa(opnd1)) {
2056 			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2057 				if (Sgl_iszero_exponentmantissa(opnd2)) {
2058 					/*
2059 					 * invalid since operands are infinity
2060 					 * and zero
2061 					 */
2062 					if (Is_invalidtrap_enabled())
2063 						return(OPC_2E_INVALIDEXCEPTION);
2064 					Set_invalidflag();
2065 					Sgl_makequietnan(resultp1);
2066 					Sgl_copytoptr(resultp1,dstptr);
2067 					return(NOEXCEPTION);
2068 				}
2069 				/*
2070 				 * Check third operand for infinity with a
2071 				 *  sign opposite of the multiply result
2072 				 */
2073 				if (Sgl_isinfinity(opnd3) &&
2074 				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2075 					/*
2076 					 * invalid since attempting a magnitude
2077 					 * subtraction of infinities
2078 					 */
2079 					if (Is_invalidtrap_enabled())
2080 						return(OPC_2E_INVALIDEXCEPTION);
2081 					Set_invalidflag();
2082 					Sgl_makequietnan(resultp1);
2083 					Sgl_copytoptr(resultp1,dstptr);
2084 					return(NOEXCEPTION);
2085 				}
2086 
2087 				/*
2088 			 	 * return infinity
2089 			 	 */
2090 				Sgl_setinfinity_exponentmantissa(resultp1);
2091 				Sgl_copytoptr(resultp1,dstptr);
2092 				return(NOEXCEPTION);
2093 			}
2094 		}
2095 		else {
2096 			/*
2097 		 	 * is NaN; signaling or quiet?
2098 		 	 */
2099 			if (Sgl_isone_signaling(opnd1)) {
2100 				/* trap if INVALIDTRAP enabled */
2101 				if (Is_invalidtrap_enabled())
2102 			    		return(OPC_2E_INVALIDEXCEPTION);
2103 				/* make NaN quiet */
2104 				Set_invalidflag();
2105 				Sgl_set_quiet(opnd1);
2106 			}
2107 			/*
2108 			 * is second operand a signaling NaN?
2109 			 */
2110 			else if (Sgl_is_signalingnan(opnd2)) {
2111 				/* trap if INVALIDTRAP enabled */
2112 				if (Is_invalidtrap_enabled())
2113 			    		return(OPC_2E_INVALIDEXCEPTION);
2114 				/* make NaN quiet */
2115 				Set_invalidflag();
2116 				Sgl_set_quiet(opnd2);
2117 				Sgl_copytoptr(opnd2,dstptr);
2118 				return(NOEXCEPTION);
2119 			}
2120 			/*
2121 			 * is third operand a signaling NaN?
2122 			 */
2123 			else if (Sgl_is_signalingnan(opnd3)) {
2124 				/* trap if INVALIDTRAP enabled */
2125 				if (Is_invalidtrap_enabled())
2126 			    		return(OPC_2E_INVALIDEXCEPTION);
2127 				/* make NaN quiet */
2128 				Set_invalidflag();
2129 				Sgl_set_quiet(opnd3);
2130 				Sgl_copytoptr(opnd3,dstptr);
2131 				return(NOEXCEPTION);
2132 			}
2133 			/*
2134 		 	 * return quiet NaN
2135 		 	 */
2136 			Sgl_copytoptr(opnd1,dstptr);
2137 			return(NOEXCEPTION);
2138 		}
2139 	}
2140 
2141 	/*
2142 	 * check second operand for NaN's or infinity
2143 	 */
2144 	if (Sgl_isinfinity_exponent(opnd2)) {
2145 		if (Sgl_iszero_mantissa(opnd2)) {
2146 			if (Sgl_isnotnan(opnd3)) {
2147 				if (Sgl_iszero_exponentmantissa(opnd1)) {
2148 					/*
2149 					 * invalid since multiply operands are
2150 					 * zero & infinity
2151 					 */
2152 					if (Is_invalidtrap_enabled())
2153 						return(OPC_2E_INVALIDEXCEPTION);
2154 					Set_invalidflag();
2155 					Sgl_makequietnan(opnd2);
2156 					Sgl_copytoptr(opnd2,dstptr);
2157 					return(NOEXCEPTION);
2158 				}
2159 
2160 				/*
2161 				 * Check third operand for infinity with a
2162 				 *  sign opposite of the multiply result
2163 				 */
2164 				if (Sgl_isinfinity(opnd3) &&
2165 				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2166 					/*
2167 					 * invalid since attempting a magnitude
2168 					 * subtraction of infinities
2169 					 */
2170 					if (Is_invalidtrap_enabled())
2171 				       		return(OPC_2E_INVALIDEXCEPTION);
2172 				       	Set_invalidflag();
2173 				       	Sgl_makequietnan(resultp1);
2174 					Sgl_copytoptr(resultp1,dstptr);
2175 					return(NOEXCEPTION);
2176 				}
2177 
2178 				/*
2179 				 * return infinity
2180 				 */
2181 				Sgl_setinfinity_exponentmantissa(resultp1);
2182 				Sgl_copytoptr(resultp1,dstptr);
2183 				return(NOEXCEPTION);
2184 			}
2185 		}
2186 		else {
2187 			/*
2188 			 * is NaN; signaling or quiet?
2189 			 */
2190 			if (Sgl_isone_signaling(opnd2)) {
2191 				/* trap if INVALIDTRAP enabled */
2192 				if (Is_invalidtrap_enabled())
2193 					return(OPC_2E_INVALIDEXCEPTION);
2194 				/* make NaN quiet */
2195 				Set_invalidflag();
2196 				Sgl_set_quiet(opnd2);
2197 			}
2198 			/*
2199 			 * is third operand a signaling NaN?
2200 			 */
2201 			else if (Sgl_is_signalingnan(opnd3)) {
2202 			       	/* trap if INVALIDTRAP enabled */
2203 			       	if (Is_invalidtrap_enabled())
2204 				   		return(OPC_2E_INVALIDEXCEPTION);
2205 			       	/* make NaN quiet */
2206 			       	Set_invalidflag();
2207 			       	Sgl_set_quiet(opnd3);
2208 				Sgl_copytoptr(opnd3,dstptr);
2209 		       		return(NOEXCEPTION);
2210 			}
2211 			/*
2212 			 * return quiet NaN
2213 			 */
2214 			Sgl_copytoptr(opnd2,dstptr);
2215 			return(NOEXCEPTION);
2216 		}
2217 	}
2218 
2219 	/*
2220 	 * check third operand for NaN's or infinity
2221 	 */
2222 	if (Sgl_isinfinity_exponent(opnd3)) {
2223 		if (Sgl_iszero_mantissa(opnd3)) {
2224 			/* return infinity */
2225 			Sgl_copytoptr(opnd3,dstptr);
2226 			return(NOEXCEPTION);
2227 		} else {
2228 			/*
2229 			 * is NaN; signaling or quiet?
2230 			 */
2231 			if (Sgl_isone_signaling(opnd3)) {
2232 				/* trap if INVALIDTRAP enabled */
2233 				if (Is_invalidtrap_enabled())
2234 					return(OPC_2E_INVALIDEXCEPTION);
2235 				/* make NaN quiet */
2236 				Set_invalidflag();
2237 				Sgl_set_quiet(opnd3);
2238 			}
2239 			/*
2240 			 * return quiet NaN
2241  			 */
2242 			Sgl_copytoptr(opnd3,dstptr);
2243 			return(NOEXCEPTION);
2244 		}
2245     	}
2246 
2247 	/*
2248 	 * Generate multiply mantissa
2249 	 */
2250 	if (Sgl_isnotzero_exponent(opnd1)) {
2251 		/* set hidden bit */
2252 		Sgl_clear_signexponent_set_hidden(opnd1);
2253 	}
2254 	else {
2255 		/* check for zero */
2256 		if (Sgl_iszero_mantissa(opnd1)) {
2257 			/*
2258 			 * Perform the add opnd3 with zero here.
2259 			 */
2260 			if (Sgl_iszero_exponentmantissa(opnd3)) {
2261 				if (Is_rounding_mode(ROUNDMINUS)) {
2262 					Sgl_or_signs(opnd3,resultp1);
2263 				} else {
2264 					Sgl_and_signs(opnd3,resultp1);
2265 				}
2266 			}
2267 			/*
2268 			 * Now let's check for trapped underflow case.
2269 			 */
2270 			else if (Sgl_iszero_exponent(opnd3) &&
2271 			         Is_underflowtrap_enabled()) {
2272                     		/* need to normalize results mantissa */
2273                     		sign_save = Sgl_signextendedsign(opnd3);
2274 				result_exponent = 0;
2275                     		Sgl_leftshiftby1(opnd3);
2276                     		Sgl_normalize(opnd3,result_exponent);
2277                     		Sgl_set_sign(opnd3,/*using*/sign_save);
2278                     		Sgl_setwrapped_exponent(opnd3,result_exponent,
2279 							unfl);
2280                     		Sgl_copytoptr(opnd3,dstptr);
2281                     		/* inexact = FALSE */
2282                     		return(OPC_2E_UNDERFLOWEXCEPTION);
2283 			}
2284 			Sgl_copytoptr(opnd3,dstptr);
2285 			return(NOEXCEPTION);
2286 		}
2287 		/* is denormalized, adjust exponent */
2288 		Sgl_clear_signexponent(opnd1);
2289 		Sgl_leftshiftby1(opnd1);
2290 		Sgl_normalize(opnd1,mpy_exponent);
2291 	}
2292 	/* opnd2 needs to have hidden bit set with msb in hidden bit */
2293 	if (Sgl_isnotzero_exponent(opnd2)) {
2294 		Sgl_clear_signexponent_set_hidden(opnd2);
2295 	}
2296 	else {
2297 		/* check for zero */
2298 		if (Sgl_iszero_mantissa(opnd2)) {
2299 			/*
2300 			 * Perform the add opnd3 with zero here.
2301 			 */
2302 			if (Sgl_iszero_exponentmantissa(opnd3)) {
2303 				if (Is_rounding_mode(ROUNDMINUS)) {
2304 					Sgl_or_signs(opnd3,resultp1);
2305 				} else {
2306 					Sgl_and_signs(opnd3,resultp1);
2307 				}
2308 			}
2309 			/*
2310 			 * Now let's check for trapped underflow case.
2311 			 */
2312 			else if (Sgl_iszero_exponent(opnd3) &&
2313 			    Is_underflowtrap_enabled()) {
2314                     		/* need to normalize results mantissa */
2315                     		sign_save = Sgl_signextendedsign(opnd3);
2316 				result_exponent = 0;
2317                     		Sgl_leftshiftby1(opnd3);
2318                     		Sgl_normalize(opnd3,result_exponent);
2319                     		Sgl_set_sign(opnd3,/*using*/sign_save);
2320                     		Sgl_setwrapped_exponent(opnd3,result_exponent,
2321 							unfl);
2322                     		Sgl_copytoptr(opnd3,dstptr);
2323                     		/* inexact = FALSE */
2324                     		return(OPC_2E_UNDERFLOWEXCEPTION);
2325 			}
2326 			Sgl_copytoptr(opnd3,dstptr);
2327 			return(NOEXCEPTION);
2328 		}
2329 		/* is denormalized; want to normalize */
2330 		Sgl_clear_signexponent(opnd2);
2331 		Sgl_leftshiftby1(opnd2);
2332 		Sgl_normalize(opnd2,mpy_exponent);
2333 	}
2334 
2335 	/* Multiply the first two source mantissas together */
2336 
2337 	/*
2338 	 * The intermediate result will be kept in tmpres,
2339 	 * which needs enough room for 106 bits of mantissa,
2340 	 * so lets call it a Double extended.
2341 	 */
2342 	Sglext_setzero(tmpresp1,tmpresp2);
2343 
2344 	/*
2345 	 * Four bits at a time are inspected in each loop, and a
2346 	 * simple shift and add multiply algorithm is used.
2347 	 */
2348 	for (count = SGL_P-1; count >= 0; count -= 4) {
2349 		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2350 		if (Sbit28(opnd1)) {
2351 	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
2352 			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2353 		}
2354 		if (Sbit29(opnd1)) {
2355 			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2356 		}
2357 		if (Sbit30(opnd1)) {
2358 			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2359 		}
2360 		if (Sbit31(opnd1)) {
2361 			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2362 		}
2363 		Sgl_rightshiftby4(opnd1);
2364 	}
2365 	if (Is_sexthiddenoverflow(tmpresp1)) {
2366 		/* result mantissa >= 2 (mantissa overflow) */
2367 		mpy_exponent++;
2368 		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2369 	} else {
2370 		Sglext_rightshiftby3(tmpresp1,tmpresp2);
2371 	}
2372 
2373 	/*
2374 	 * Restore the sign of the mpy result which was saved in resultp1.
2375 	 * The exponent will continue to be kept in mpy_exponent.
2376 	 */
2377 	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2378 
2379 	/*
2380 	 * No rounding is required, since the result of the multiply
2381 	 * is exact in the extended format.
2382 	 */
2383 
2384 	/*
2385 	 * Now we are ready to perform the add portion of the operation.
2386 	 *
2387 	 * The exponents need to be kept as integers for now, since the
2388 	 * multiply result might not fit into the exponent field.  We
2389 	 * can't overflow or underflow because of this yet, since the
2390 	 * add could bring the final result back into range.
2391 	 */
2392 	add_exponent = Sgl_exponent(opnd3);
2393 
2394 	/*
2395 	 * Check for denormalized or zero add operand.
2396 	 */
2397 	if (add_exponent == 0) {
2398 		/* check for zero */
2399 		if (Sgl_iszero_mantissa(opnd3)) {
2400 			/* right is zero */
2401 			/* Left can't be zero and must be result.
2402 			 *
2403 			 * The final result is now in tmpres and mpy_exponent,
2404 			 * and needs to be rounded and squeezed back into
2405 			 * double precision format from double extended.
2406 			 */
2407 			result_exponent = mpy_exponent;
2408 			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2409 			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2410 			goto round;
2411 		}
2412 
2413 		/*
2414 		 * Neither are zeroes.
2415 		 * Adjust exponent and normalize add operand.
2416 		 */
2417 		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
2418 		Sgl_clear_signexponent(opnd3);
2419 		Sgl_leftshiftby1(opnd3);
2420 		Sgl_normalize(opnd3,add_exponent);
2421 		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
2422 	} else {
2423 		Sgl_clear_exponent_set_hidden(opnd3);
2424 	}
2425 	/*
2426 	 * Copy opnd3 to the double extended variable called right.
2427 	 */
2428 	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2429 
2430 	/*
2431 	 * A zero "save" helps discover equal operands (for later),
2432 	 * and is used in swapping operands (if needed).
2433 	 */
2434 	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2435 
2436 	/*
2437 	 * Compare magnitude of operands.
2438 	 */
2439 	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2440 	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2441 	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2442 	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2443 		/*
2444 		 * Set the left operand to the larger one by XOR swap.
2445 		 * First finish the first word "save".
2446 		 */
2447 		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2448 		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2449 		Sglext_swap_lower(tmpresp2,rightp2);
2450 		/* also setup exponents used in rest of routine */
2451 		diff_exponent = add_exponent - mpy_exponent;
2452 		result_exponent = add_exponent;
2453 	} else {
2454 		/* also setup exponents used in rest of routine */
2455 		diff_exponent = mpy_exponent - add_exponent;
2456 		result_exponent = mpy_exponent;
2457 	}
2458 	/* Invariant: left is not smaller than right. */
2459 
2460 	/*
2461 	 * Special case alignment of operands that would force alignment
2462 	 * beyond the extent of the extension.  A further optimization
2463 	 * could special case this but only reduces the path length for
2464 	 * this infrequent case.
2465 	 */
2466 	if (diff_exponent > SGLEXT_THRESHOLD) {
2467 		diff_exponent = SGLEXT_THRESHOLD;
2468 	}
2469 
2470 	/* Align right operand by shifting it to the right */
2471 	Sglext_clear_sign(rightp1);
2472 	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2473 
2474 	/* Treat sum and difference of the operands separately. */
2475 	if ((int)save < 0) {
2476 		/*
2477 		 * Difference of the two operands.  Overflow can occur if the
2478 		 * multiply overflowed.  A borrow can occur out of the hidden
2479 		 * bit and force a post normalization phase.
2480 		 */
2481 		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2482 			resultp1,resultp2);
2483 		sign_save = Sgl_signextendedsign(resultp1);
2484 		if (Sgl_iszero_hidden(resultp1)) {
2485 			/* Handle normalization */
2486 		/* A straightforward algorithm would now shift the
2487 		 * result and extension left until the hidden bit
2488 		 * becomes one.  Not all of the extension bits need
2489 		 * participate in the shift.  Only the two most
2490 		 * significant bits (round and guard) are needed.
2491 		 * If only a single shift is needed then the guard
2492 		 * bit becomes a significant low order bit and the
2493 		 * extension must participate in the rounding.
2494 		 * If more than a single shift is needed, then all
2495 		 * bits to the right of the guard bit are zeros,
2496 		 * and the guard bit may or may not be zero. */
2497 			Sglext_leftshiftby1(resultp1,resultp2);
2498 
2499 			/* Need to check for a zero result.  The sign and
2500 			 * exponent fields have already been zeroed.  The more
2501 			 * efficient test of the full object can be used.
2502 			 */
2503 			 if (Sglext_iszero(resultp1,resultp2)) {
2504 				/* Must have been "x-x" or "x+(-x)". */
2505 				if (Is_rounding_mode(ROUNDMINUS))
2506 					Sgl_setone_sign(resultp1);
2507 				Sgl_copytoptr(resultp1,dstptr);
2508 				return(NOEXCEPTION);
2509 			}
2510 			result_exponent--;
2511 
2512 			/* Look to see if normalization is finished. */
2513 			if (Sgl_isone_hidden(resultp1)) {
2514 				/* No further normalization is needed */
2515 				goto round;
2516 			}
2517 
2518 			/* Discover first one bit to determine shift amount.
2519 			 * Use a modified binary search.  We have already
2520 			 * shifted the result one position right and still
2521 			 * not found a one so the remainder of the extension
2522 			 * must be zero and simplifies rounding. */
2523 			/* Scan bytes */
2524 			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2525 				Sglext_leftshiftby8(resultp1,resultp2);
2526 				result_exponent -= 8;
2527 			}
2528 			/* Now narrow it down to the nibble */
2529 			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2530 				/* The lower nibble contains the
2531 				 * normalizing one */
2532 				Sglext_leftshiftby4(resultp1,resultp2);
2533 				result_exponent -= 4;
2534 			}
2535 			/* Select case where first bit is set (already
2536 			 * normalized) otherwise select the proper shift. */
2537 			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2538 			if (jumpsize <= 7) switch(jumpsize) {
2539 			case 1:
2540 				Sglext_leftshiftby3(resultp1,resultp2);
2541 				result_exponent -= 3;
2542 				break;
2543 			case 2:
2544 			case 3:
2545 				Sglext_leftshiftby2(resultp1,resultp2);
2546 				result_exponent -= 2;
2547 				break;
2548 			case 4:
2549 			case 5:
2550 			case 6:
2551 			case 7:
2552 				Sglext_leftshiftby1(resultp1,resultp2);
2553 				result_exponent -= 1;
2554 				break;
2555 			}
2556 		} /* end if (hidden...)... */
2557 	/* Fall through and round */
2558 	} /* end if (save < 0)... */
2559 	else {
2560 		/* Add magnitudes */
2561 		Sglext_addition(tmpresp1,tmpresp2,
2562 			rightp1,rightp2, /*to*/resultp1,resultp2);
2563 		sign_save = Sgl_signextendedsign(resultp1);
2564 		if (Sgl_isone_hiddenoverflow(resultp1)) {
2565 	    		/* Prenormalization required. */
2566 	    		Sglext_arithrightshiftby1(resultp1,resultp2);
2567 	    		result_exponent++;
2568 		} /* end if hiddenoverflow... */
2569 	} /* end else ...add magnitudes... */
2570 
2571 	/* Round the result.  If the extension and lower two words are
2572 	 * all zeros, then the result is exact.  Otherwise round in the
2573 	 * correct direction.  Underflow is possible. If a postnormalization
2574 	 * is necessary, then the mantissa is all zeros so no shift is needed.
2575 	 */
2576   round:
2577 	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2578 		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2579 	}
2580 	Sgl_set_sign(resultp1,/*using*/sign_save);
2581 	if (Sglext_isnotzero_mantissap2(resultp2)) {
2582 		inexact = TRUE;
2583 		switch(Rounding_mode()) {
2584 		case ROUNDNEAREST: /* The default. */
2585 			if (Sglext_isone_highp2(resultp2)) {
2586 				/* at least 1/2 ulp */
2587 				if (Sglext_isnotzero_low31p2(resultp2) ||
2588 				    Sglext_isone_lowp1(resultp1)) {
2589 					/* either exactly half way and odd or
2590 					 * more than 1/2ulp */
2591 					Sgl_increment(resultp1);
2592 				}
2593 			}
2594 	    		break;
2595 
2596 		case ROUNDPLUS:
2597 	    		if (Sgl_iszero_sign(resultp1)) {
2598 				/* Round up positive results */
2599 				Sgl_increment(resultp1);
2600 			}
2601 			break;
2602 
2603 		case ROUNDMINUS:
2604 	    		if (Sgl_isone_sign(resultp1)) {
2605 				/* Round down negative results */
2606 				Sgl_increment(resultp1);
2607 			}
2608 
2609 		case ROUNDZERO:;
2610 			/* truncate is simple */
2611 		} /* end switch... */
2612 		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2613 	}
2614 	if (result_exponent >= SGL_INFINITY_EXPONENT) {
2615 		/* Overflow */
2616 		if (Is_overflowtrap_enabled()) {
2617                         /*
2618                          * Adjust bias of result
2619                          */
2620                         Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2621                         Sgl_copytoptr(resultp1,dstptr);
2622                         if (inexact)
2623                             if (Is_inexacttrap_enabled())
2624                                 return (OPC_2E_OVERFLOWEXCEPTION |
2625 					OPC_2E_INEXACTEXCEPTION);
2626                             else Set_inexactflag();
2627                         return (OPC_2E_OVERFLOWEXCEPTION);
2628 		}
2629 		inexact = TRUE;
2630 		Set_overflowflag();
2631 		Sgl_setoverflow(resultp1);
2632 	} else if (result_exponent <= 0) {	/* underflow case */
2633 		if (Is_underflowtrap_enabled()) {
2634                         /*
2635                          * Adjust bias of result
2636                          */
2637                 	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2638 			Sgl_copytoptr(resultp1,dstptr);
2639                         if (inexact)
2640                             if (Is_inexacttrap_enabled())
2641                                 return (OPC_2E_UNDERFLOWEXCEPTION |
2642 					OPC_2E_INEXACTEXCEPTION);
2643                             else Set_inexactflag();
2644 	    		return(OPC_2E_UNDERFLOWEXCEPTION);
2645 		}
2646 		else if (inexact && is_tiny) Set_underflowflag();
2647 	}
2648 	else Sgl_set_exponent(resultp1,result_exponent);
2649 	Sgl_copytoptr(resultp1,dstptr);
2650 	if (inexact)
2651 		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2652 		else Set_inexactflag();
2653     	return(NOEXCEPTION);
2654 }
2655 
2656