2 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
4 * Floating-point emulation code
5 * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
28 * Double Floating-point Multiply Fused Add
29 * Double Floating-point Multiply Negate Fused Add
30 * Single Floating-point Multiply Fused Add
31 * Single Floating-point Multiply Negate Fused Add
33 * External Interfaces:
34 * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
35 * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
36 * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
37 * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
39 * Internal Interfaces:
42 * <<please update with a overview of the operation of this file>>
49 #include "sgl_float.h"
50 #include "dbl_float.h"
54 * Double Floating-point Multiply Fused Add
59 dbl_floating_point
*src1ptr
,
60 dbl_floating_point
*src2ptr
,
61 dbl_floating_point
*src3ptr
,
63 dbl_floating_point
*dstptr
)
65 unsigned int opnd1p1
, opnd1p2
, opnd2p1
, opnd2p2
, opnd3p1
, opnd3p2
;
66 register unsigned int tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
;
67 unsigned int rightp1
, rightp2
, rightp3
, rightp4
;
68 unsigned int resultp1
, resultp2
= 0, resultp3
= 0, resultp4
= 0;
69 register int mpy_exponent
, add_exponent
, count
;
70 boolean inexact
= FALSE
, is_tiny
= FALSE
;
72 unsigned int signlessleft1
, signlessright1
, save
;
73 register int result_exponent
, diff_exponent
;
74 int sign_save
, jumpsize
;
76 Dbl_copyfromptr(src1ptr
,opnd1p1
,opnd1p2
);
77 Dbl_copyfromptr(src2ptr
,opnd2p1
,opnd2p2
);
78 Dbl_copyfromptr(src3ptr
,opnd3p1
,opnd3p2
);
81 * set sign bit of result of multiply
83 if (Dbl_sign(opnd1p1
) ^ Dbl_sign(opnd2p1
))
84 Dbl_setnegativezerop1(resultp1
);
85 else Dbl_setzerop1(resultp1
);
88 * Generate multiply exponent
90 mpy_exponent
= Dbl_exponent(opnd1p1
) + Dbl_exponent(opnd2p1
) - DBL_BIAS
;
93 * check first operand for NaN's or infinity
95 if (Dbl_isinfinity_exponent(opnd1p1
)) {
96 if (Dbl_iszero_mantissa(opnd1p1
,opnd1p2
)) {
97 if (Dbl_isnotnan(opnd2p1
,opnd2p2
) &&
98 Dbl_isnotnan(opnd3p1
,opnd3p2
)) {
99 if (Dbl_iszero_exponentmantissa(opnd2p1
,opnd2p2
)) {
101 * invalid since operands are infinity
104 if (Is_invalidtrap_enabled())
105 return(OPC_2E_INVALIDEXCEPTION
);
107 Dbl_makequietnan(resultp1
,resultp2
);
108 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
112 * Check third operand for infinity with a
113 * sign opposite of the multiply result
115 if (Dbl_isinfinity(opnd3p1
,opnd3p2
) &&
116 (Dbl_sign(resultp1
) ^ Dbl_sign(opnd3p1
))) {
118 * invalid since attempting a magnitude
119 * subtraction of infinities
121 if (Is_invalidtrap_enabled())
122 return(OPC_2E_INVALIDEXCEPTION
);
124 Dbl_makequietnan(resultp1
,resultp2
);
125 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
132 Dbl_setinfinity_exponentmantissa(resultp1
,resultp2
);
133 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
139 * is NaN; signaling or quiet?
141 if (Dbl_isone_signaling(opnd1p1
)) {
142 /* trap if INVALIDTRAP enabled */
143 if (Is_invalidtrap_enabled())
144 return(OPC_2E_INVALIDEXCEPTION
);
147 Dbl_set_quiet(opnd1p1
);
150 * is second operand a signaling NaN?
152 else if (Dbl_is_signalingnan(opnd2p1
)) {
153 /* trap if INVALIDTRAP enabled */
154 if (Is_invalidtrap_enabled())
155 return(OPC_2E_INVALIDEXCEPTION
);
158 Dbl_set_quiet(opnd2p1
);
159 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
163 * is third operand a signaling NaN?
165 else if (Dbl_is_signalingnan(opnd3p1
)) {
166 /* trap if INVALIDTRAP enabled */
167 if (Is_invalidtrap_enabled())
168 return(OPC_2E_INVALIDEXCEPTION
);
171 Dbl_set_quiet(opnd3p1
);
172 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
178 Dbl_copytoptr(opnd1p1
,opnd1p2
,dstptr
);
184 * check second operand for NaN's or infinity
186 if (Dbl_isinfinity_exponent(opnd2p1
)) {
187 if (Dbl_iszero_mantissa(opnd2p1
,opnd2p2
)) {
188 if (Dbl_isnotnan(opnd3p1
,opnd3p2
)) {
189 if (Dbl_iszero_exponentmantissa(opnd1p1
,opnd1p2
)) {
191 * invalid since multiply operands are
194 if (Is_invalidtrap_enabled())
195 return(OPC_2E_INVALIDEXCEPTION
);
197 Dbl_makequietnan(opnd2p1
,opnd2p2
);
198 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
203 * Check third operand for infinity with a
204 * sign opposite of the multiply result
206 if (Dbl_isinfinity(opnd3p1
,opnd3p2
) &&
207 (Dbl_sign(resultp1
) ^ Dbl_sign(opnd3p1
))) {
209 * invalid since attempting a magnitude
210 * subtraction of infinities
212 if (Is_invalidtrap_enabled())
213 return(OPC_2E_INVALIDEXCEPTION
);
215 Dbl_makequietnan(resultp1
,resultp2
);
216 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
223 Dbl_setinfinity_exponentmantissa(resultp1
,resultp2
);
224 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
230 * is NaN; signaling or quiet?
232 if (Dbl_isone_signaling(opnd2p1
)) {
233 /* trap if INVALIDTRAP enabled */
234 if (Is_invalidtrap_enabled())
235 return(OPC_2E_INVALIDEXCEPTION
);
238 Dbl_set_quiet(opnd2p1
);
241 * is third operand a signaling NaN?
243 else if (Dbl_is_signalingnan(opnd3p1
)) {
244 /* trap if INVALIDTRAP enabled */
245 if (Is_invalidtrap_enabled())
246 return(OPC_2E_INVALIDEXCEPTION
);
249 Dbl_set_quiet(opnd3p1
);
250 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
256 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
262 * check third operand for NaN's or infinity
264 if (Dbl_isinfinity_exponent(opnd3p1
)) {
265 if (Dbl_iszero_mantissa(opnd3p1
,opnd3p2
)) {
266 /* return infinity */
267 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
271 * is NaN; signaling or quiet?
273 if (Dbl_isone_signaling(opnd3p1
)) {
274 /* trap if INVALIDTRAP enabled */
275 if (Is_invalidtrap_enabled())
276 return(OPC_2E_INVALIDEXCEPTION
);
279 Dbl_set_quiet(opnd3p1
);
284 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
290 * Generate multiply mantissa
292 if (Dbl_isnotzero_exponent(opnd1p1
)) {
294 Dbl_clear_signexponent_set_hidden(opnd1p1
);
298 if (Dbl_iszero_mantissa(opnd1p1
,opnd1p2
)) {
300 * Perform the add opnd3 with zero here.
302 if (Dbl_iszero_exponentmantissa(opnd3p1
,opnd3p2
)) {
303 if (Is_rounding_mode(ROUNDMINUS
)) {
304 Dbl_or_signs(opnd3p1
,resultp1
);
306 Dbl_and_signs(opnd3p1
,resultp1
);
310 * Now let's check for trapped underflow case.
312 else if (Dbl_iszero_exponent(opnd3p1
) &&
313 Is_underflowtrap_enabled()) {
314 /* need to normalize results mantissa */
315 sign_save
= Dbl_signextendedsign(opnd3p1
);
317 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
318 Dbl_normalize(opnd3p1
,opnd3p2
,result_exponent
);
319 Dbl_set_sign(opnd3p1
,/*using*/sign_save
);
320 Dbl_setwrapped_exponent(opnd3p1
,result_exponent
,
322 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
323 /* inexact = FALSE */
324 return(OPC_2E_UNDERFLOWEXCEPTION
);
326 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
329 /* is denormalized, adjust exponent */
330 Dbl_clear_signexponent(opnd1p1
);
331 Dbl_leftshiftby1(opnd1p1
,opnd1p2
);
332 Dbl_normalize(opnd1p1
,opnd1p2
,mpy_exponent
);
334 /* opnd2 needs to have hidden bit set with msb in hidden bit */
335 if (Dbl_isnotzero_exponent(opnd2p1
)) {
336 Dbl_clear_signexponent_set_hidden(opnd2p1
);
340 if (Dbl_iszero_mantissa(opnd2p1
,opnd2p2
)) {
342 * Perform the add opnd3 with zero here.
344 if (Dbl_iszero_exponentmantissa(opnd3p1
,opnd3p2
)) {
345 if (Is_rounding_mode(ROUNDMINUS
)) {
346 Dbl_or_signs(opnd3p1
,resultp1
);
348 Dbl_and_signs(opnd3p1
,resultp1
);
352 * Now let's check for trapped underflow case.
354 else if (Dbl_iszero_exponent(opnd3p1
) &&
355 Is_underflowtrap_enabled()) {
356 /* need to normalize results mantissa */
357 sign_save
= Dbl_signextendedsign(opnd3p1
);
359 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
360 Dbl_normalize(opnd3p1
,opnd3p2
,result_exponent
);
361 Dbl_set_sign(opnd3p1
,/*using*/sign_save
);
362 Dbl_setwrapped_exponent(opnd3p1
,result_exponent
,
364 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
365 /* inexact = FALSE */
366 return(OPC_2E_UNDERFLOWEXCEPTION
);
368 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
371 /* is denormalized; want to normalize */
372 Dbl_clear_signexponent(opnd2p1
);
373 Dbl_leftshiftby1(opnd2p1
,opnd2p2
);
374 Dbl_normalize(opnd2p1
,opnd2p2
,mpy_exponent
);
377 /* Multiply the first two source mantissas together */
380 * The intermediate result will be kept in tmpres,
381 * which needs enough room for 106 bits of mantissa,
382 * so lets call it a Double extended.
384 Dblext_setzero(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
387 * Four bits at a time are inspected in each loop, and a
388 * simple shift and add multiply algorithm is used.
390 for (count
= DBL_P
-1; count
>= 0; count
-= 4) {
391 Dblext_rightshiftby4(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
392 if (Dbit28p2(opnd1p2
)) {
393 /* Fourword_add should be an ADD followed by 3 ADDC's */
394 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
395 opnd2p1
<<3 | opnd2p2
>>29, opnd2p2
<<3, 0, 0);
397 if (Dbit29p2(opnd1p2
)) {
398 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
399 opnd2p1
<<2 | opnd2p2
>>30, opnd2p2
<<2, 0, 0);
401 if (Dbit30p2(opnd1p2
)) {
402 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
403 opnd2p1
<<1 | opnd2p2
>>31, opnd2p2
<<1, 0, 0);
405 if (Dbit31p2(opnd1p2
)) {
406 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
407 opnd2p1
, opnd2p2
, 0, 0);
409 Dbl_rightshiftby4(opnd1p1
,opnd1p2
);
411 if (Is_dexthiddenoverflow(tmpresp1
)) {
412 /* result mantissa >= 2 (mantissa overflow) */
414 Dblext_rightshiftby1(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
418 * Restore the sign of the mpy result which was saved in resultp1.
419 * The exponent will continue to be kept in mpy_exponent.
421 Dblext_set_sign(tmpresp1
,Dbl_sign(resultp1
));
424 * No rounding is required, since the result of the multiply
425 * is exact in the extended format.
429 * Now we are ready to perform the add portion of the operation.
431 * The exponents need to be kept as integers for now, since the
432 * multiply result might not fit into the exponent field. We
433 * can't overflow or underflow because of this yet, since the
434 * add could bring the final result back into range.
436 add_exponent
= Dbl_exponent(opnd3p1
);
439 * Check for denormalized or zero add operand.
441 if (add_exponent
== 0) {
443 if (Dbl_iszero_mantissa(opnd3p1
,opnd3p2
)) {
445 /* Left can't be zero and must be result.
447 * The final result is now in tmpres and mpy_exponent,
448 * and needs to be rounded and squeezed back into
449 * double precision format from double extended.
451 result_exponent
= mpy_exponent
;
452 Dblext_copy(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
453 resultp1
,resultp2
,resultp3
,resultp4
);
454 sign_save
= Dbl_signextendedsign(resultp1
);/*save sign*/
459 * Neither are zeroes.
460 * Adjust exponent and normalize add operand.
462 sign_save
= Dbl_signextendedsign(opnd3p1
); /* save sign */
463 Dbl_clear_signexponent(opnd3p1
);
464 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
465 Dbl_normalize(opnd3p1
,opnd3p2
,add_exponent
);
466 Dbl_set_sign(opnd3p1
,sign_save
); /* restore sign */
468 Dbl_clear_exponent_set_hidden(opnd3p1
);
471 * Copy opnd3 to the double extended variable called right.
473 Dbl_copyto_dblext(opnd3p1
,opnd3p2
,rightp1
,rightp2
,rightp3
,rightp4
);
476 * A zero "save" helps discover equal operands (for later),
477 * and is used in swapping operands (if needed).
479 Dblext_xortointp1(tmpresp1
,rightp1
,/*to*/save
);
482 * Compare magnitude of operands.
484 Dblext_copytoint_exponentmantissap1(tmpresp1
,signlessleft1
);
485 Dblext_copytoint_exponentmantissap1(rightp1
,signlessright1
);
486 if (mpy_exponent
< add_exponent
|| mpy_exponent
== add_exponent
&&
487 Dblext_ismagnitudeless(tmpresp2
,rightp2
,signlessleft1
,signlessright1
)){
489 * Set the left operand to the larger one by XOR swap.
490 * First finish the first word "save".
492 Dblext_xorfromintp1(save
,rightp1
,/*to*/rightp1
);
493 Dblext_xorfromintp1(save
,tmpresp1
,/*to*/tmpresp1
);
494 Dblext_swap_lower(tmpresp2
,tmpresp3
,tmpresp4
,
495 rightp2
,rightp3
,rightp4
);
496 /* also setup exponents used in rest of routine */
497 diff_exponent
= add_exponent
- mpy_exponent
;
498 result_exponent
= add_exponent
;
500 /* also setup exponents used in rest of routine */
501 diff_exponent
= mpy_exponent
- add_exponent
;
502 result_exponent
= mpy_exponent
;
504 /* Invariant: left is not smaller than right. */
507 * Special case alignment of operands that would force alignment
508 * beyond the extent of the extension. A further optimization
509 * could special case this but only reduces the path length for
510 * this infrequent case.
512 if (diff_exponent
> DBLEXT_THRESHOLD
) {
513 diff_exponent
= DBLEXT_THRESHOLD
;
516 /* Align right operand by shifting it to the right */
517 Dblext_clear_sign(rightp1
);
518 Dblext_right_align(rightp1
,rightp2
,rightp3
,rightp4
,
519 /*shifted by*/diff_exponent
);
521 /* Treat sum and difference of the operands separately. */
524 * Difference of the two operands. Overflow can occur if the
525 * multiply overflowed. A borrow can occur out of the hidden
526 * bit and force a post normalization phase.
528 Dblext_subtract(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
529 rightp1
,rightp2
,rightp3
,rightp4
,
530 resultp1
,resultp2
,resultp3
,resultp4
);
531 sign_save
= Dbl_signextendedsign(resultp1
);
532 if (Dbl_iszero_hidden(resultp1
)) {
533 /* Handle normalization */
534 /* A straightforward algorithm would now shift the
535 * result and extension left until the hidden bit
536 * becomes one. Not all of the extension bits need
537 * participate in the shift. Only the two most
538 * significant bits (round and guard) are needed.
539 * If only a single shift is needed then the guard
540 * bit becomes a significant low order bit and the
541 * extension must participate in the rounding.
542 * If more than a single shift is needed, then all
543 * bits to the right of the guard bit are zeros,
544 * and the guard bit may or may not be zero. */
545 Dblext_leftshiftby1(resultp1
,resultp2
,resultp3
,
548 /* Need to check for a zero result. The sign and
549 * exponent fields have already been zeroed. The more
550 * efficient test of the full object can be used.
552 if(Dblext_iszero(resultp1
,resultp2
,resultp3
,resultp4
)){
553 /* Must have been "x-x" or "x+(-x)". */
554 if (Is_rounding_mode(ROUNDMINUS
))
555 Dbl_setone_sign(resultp1
);
556 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
561 /* Look to see if normalization is finished. */
562 if (Dbl_isone_hidden(resultp1
)) {
563 /* No further normalization is needed */
567 /* Discover first one bit to determine shift amount.
568 * Use a modified binary search. We have already
569 * shifted the result one position right and still
570 * not found a one so the remainder of the extension
571 * must be zero and simplifies rounding. */
573 while (Dbl_iszero_hiddenhigh7mantissa(resultp1
)) {
574 Dblext_leftshiftby8(resultp1
,resultp2
,resultp3
,resultp4
);
575 result_exponent
-= 8;
577 /* Now narrow it down to the nibble */
578 if (Dbl_iszero_hiddenhigh3mantissa(resultp1
)) {
579 /* The lower nibble contains the
581 Dblext_leftshiftby4(resultp1
,resultp2
,resultp3
,resultp4
);
582 result_exponent
-= 4;
584 /* Select case where first bit is set (already
585 * normalized) otherwise select the proper shift. */
586 jumpsize
= Dbl_hiddenhigh3mantissa(resultp1
);
587 if (jumpsize
<= 7) switch(jumpsize
) {
589 Dblext_leftshiftby3(resultp1
,resultp2
,resultp3
,
591 result_exponent
-= 3;
595 Dblext_leftshiftby2(resultp1
,resultp2
,resultp3
,
597 result_exponent
-= 2;
603 Dblext_leftshiftby1(resultp1
,resultp2
,resultp3
,
605 result_exponent
-= 1;
608 } /* end if (hidden...)... */
609 /* Fall through and round */
610 } /* end if (save < 0)... */
613 Dblext_addition(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
614 rightp1
,rightp2
,rightp3
,rightp4
,
615 /*to*/resultp1
,resultp2
,resultp3
,resultp4
);
616 sign_save
= Dbl_signextendedsign(resultp1
);
617 if (Dbl_isone_hiddenoverflow(resultp1
)) {
618 /* Prenormalization required. */
619 Dblext_arithrightshiftby1(resultp1
,resultp2
,resultp3
,
622 } /* end if hiddenoverflow... */
623 } /* end else ...add magnitudes... */
625 /* Round the result. If the extension and lower two words are
626 * all zeros, then the result is exact. Otherwise round in the
627 * correct direction. Underflow is possible. If a postnormalization
628 * is necessary, then the mantissa is all zeros so no shift is needed.
631 if (result_exponent
<= 0 && !Is_underflowtrap_enabled()) {
632 Dblext_denormalize(resultp1
,resultp2
,resultp3
,resultp4
,
633 result_exponent
,is_tiny
);
635 Dbl_set_sign(resultp1
,/*using*/sign_save
);
636 if (Dblext_isnotzero_mantissap3(resultp3
) ||
637 Dblext_isnotzero_mantissap4(resultp4
)) {
639 switch(Rounding_mode()) {
640 case ROUNDNEAREST
: /* The default. */
641 if (Dblext_isone_highp3(resultp3
)) {
642 /* at least 1/2 ulp */
643 if (Dblext_isnotzero_low31p3(resultp3
) ||
644 Dblext_isnotzero_mantissap4(resultp4
) ||
645 Dblext_isone_lowp2(resultp2
)) {
646 /* either exactly half way and odd or
647 * more than 1/2ulp */
648 Dbl_increment(resultp1
,resultp2
);
654 if (Dbl_iszero_sign(resultp1
)) {
655 /* Round up positive results */
656 Dbl_increment(resultp1
,resultp2
);
661 if (Dbl_isone_sign(resultp1
)) {
662 /* Round down negative results */
663 Dbl_increment(resultp1
,resultp2
);
667 /* truncate is simple */
668 } /* end switch... */
669 if (Dbl_isone_hiddenoverflow(resultp1
)) result_exponent
++;
671 if (result_exponent
>= DBL_INFINITY_EXPONENT
) {
672 /* trap if OVERFLOWTRAP enabled */
673 if (Is_overflowtrap_enabled()) {
675 * Adjust bias of result
677 Dbl_setwrapped_exponent(resultp1
,result_exponent
,ovfl
);
678 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
680 if (Is_inexacttrap_enabled())
681 return (OPC_2E_OVERFLOWEXCEPTION
|
682 OPC_2E_INEXACTEXCEPTION
);
683 else Set_inexactflag();
684 return (OPC_2E_OVERFLOWEXCEPTION
);
688 /* set result to infinity or largest number */
689 Dbl_setoverflow(resultp1
,resultp2
);
691 } else if (result_exponent
<= 0) { /* underflow case */
692 if (Is_underflowtrap_enabled()) {
694 * Adjust bias of result
696 Dbl_setwrapped_exponent(resultp1
,result_exponent
,unfl
);
697 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
699 if (Is_inexacttrap_enabled())
700 return (OPC_2E_UNDERFLOWEXCEPTION
|
701 OPC_2E_INEXACTEXCEPTION
);
702 else Set_inexactflag();
703 return(OPC_2E_UNDERFLOWEXCEPTION
);
705 else if (inexact
&& is_tiny
) Set_underflowflag();
707 else Dbl_set_exponent(resultp1
,result_exponent
);
708 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
710 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION
);
711 else Set_inexactflag();
716 * Double Floating-point Multiply Negate Fused Add
719 dbl_fmpynfadd(src1ptr
,src2ptr
,src3ptr
,status
,dstptr
)
721 dbl_floating_point
*src1ptr
, *src2ptr
, *src3ptr
, *dstptr
;
722 unsigned int *status
;
724 unsigned int opnd1p1
, opnd1p2
, opnd2p1
, opnd2p2
, opnd3p1
, opnd3p2
;
725 register unsigned int tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
;
726 unsigned int rightp1
, rightp2
, rightp3
, rightp4
;
727 unsigned int resultp1
, resultp2
= 0, resultp3
= 0, resultp4
= 0;
728 register int mpy_exponent
, add_exponent
, count
;
729 boolean inexact
= FALSE
, is_tiny
= FALSE
;
731 unsigned int signlessleft1
, signlessright1
, save
;
732 register int result_exponent
, diff_exponent
;
733 int sign_save
, jumpsize
;
735 Dbl_copyfromptr(src1ptr
,opnd1p1
,opnd1p2
);
736 Dbl_copyfromptr(src2ptr
,opnd2p1
,opnd2p2
);
737 Dbl_copyfromptr(src3ptr
,opnd3p1
,opnd3p2
);
740 * set sign bit of result of multiply
742 if (Dbl_sign(opnd1p1
) ^ Dbl_sign(opnd2p1
))
743 Dbl_setzerop1(resultp1
);
745 Dbl_setnegativezerop1(resultp1
);
748 * Generate multiply exponent
750 mpy_exponent
= Dbl_exponent(opnd1p1
) + Dbl_exponent(opnd2p1
) - DBL_BIAS
;
753 * check first operand for NaN's or infinity
755 if (Dbl_isinfinity_exponent(opnd1p1
)) {
756 if (Dbl_iszero_mantissa(opnd1p1
,opnd1p2
)) {
757 if (Dbl_isnotnan(opnd2p1
,opnd2p2
) &&
758 Dbl_isnotnan(opnd3p1
,opnd3p2
)) {
759 if (Dbl_iszero_exponentmantissa(opnd2p1
,opnd2p2
)) {
761 * invalid since operands are infinity
764 if (Is_invalidtrap_enabled())
765 return(OPC_2E_INVALIDEXCEPTION
);
767 Dbl_makequietnan(resultp1
,resultp2
);
768 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
772 * Check third operand for infinity with a
773 * sign opposite of the multiply result
775 if (Dbl_isinfinity(opnd3p1
,opnd3p2
) &&
776 (Dbl_sign(resultp1
) ^ Dbl_sign(opnd3p1
))) {
778 * invalid since attempting a magnitude
779 * subtraction of infinities
781 if (Is_invalidtrap_enabled())
782 return(OPC_2E_INVALIDEXCEPTION
);
784 Dbl_makequietnan(resultp1
,resultp2
);
785 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
792 Dbl_setinfinity_exponentmantissa(resultp1
,resultp2
);
793 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
799 * is NaN; signaling or quiet?
801 if (Dbl_isone_signaling(opnd1p1
)) {
802 /* trap if INVALIDTRAP enabled */
803 if (Is_invalidtrap_enabled())
804 return(OPC_2E_INVALIDEXCEPTION
);
807 Dbl_set_quiet(opnd1p1
);
810 * is second operand a signaling NaN?
812 else if (Dbl_is_signalingnan(opnd2p1
)) {
813 /* trap if INVALIDTRAP enabled */
814 if (Is_invalidtrap_enabled())
815 return(OPC_2E_INVALIDEXCEPTION
);
818 Dbl_set_quiet(opnd2p1
);
819 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
823 * is third operand a signaling NaN?
825 else if (Dbl_is_signalingnan(opnd3p1
)) {
826 /* trap if INVALIDTRAP enabled */
827 if (Is_invalidtrap_enabled())
828 return(OPC_2E_INVALIDEXCEPTION
);
831 Dbl_set_quiet(opnd3p1
);
832 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
838 Dbl_copytoptr(opnd1p1
,opnd1p2
,dstptr
);
844 * check second operand for NaN's or infinity
846 if (Dbl_isinfinity_exponent(opnd2p1
)) {
847 if (Dbl_iszero_mantissa(opnd2p1
,opnd2p2
)) {
848 if (Dbl_isnotnan(opnd3p1
,opnd3p2
)) {
849 if (Dbl_iszero_exponentmantissa(opnd1p1
,opnd1p2
)) {
851 * invalid since multiply operands are
854 if (Is_invalidtrap_enabled())
855 return(OPC_2E_INVALIDEXCEPTION
);
857 Dbl_makequietnan(opnd2p1
,opnd2p2
);
858 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
863 * Check third operand for infinity with a
864 * sign opposite of the multiply result
866 if (Dbl_isinfinity(opnd3p1
,opnd3p2
) &&
867 (Dbl_sign(resultp1
) ^ Dbl_sign(opnd3p1
))) {
869 * invalid since attempting a magnitude
870 * subtraction of infinities
872 if (Is_invalidtrap_enabled())
873 return(OPC_2E_INVALIDEXCEPTION
);
875 Dbl_makequietnan(resultp1
,resultp2
);
876 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
883 Dbl_setinfinity_exponentmantissa(resultp1
,resultp2
);
884 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
890 * is NaN; signaling or quiet?
892 if (Dbl_isone_signaling(opnd2p1
)) {
893 /* trap if INVALIDTRAP enabled */
894 if (Is_invalidtrap_enabled())
895 return(OPC_2E_INVALIDEXCEPTION
);
898 Dbl_set_quiet(opnd2p1
);
901 * is third operand a signaling NaN?
903 else if (Dbl_is_signalingnan(opnd3p1
)) {
904 /* trap if INVALIDTRAP enabled */
905 if (Is_invalidtrap_enabled())
906 return(OPC_2E_INVALIDEXCEPTION
);
909 Dbl_set_quiet(opnd3p1
);
910 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
916 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
922 * check third operand for NaN's or infinity
924 if (Dbl_isinfinity_exponent(opnd3p1
)) {
925 if (Dbl_iszero_mantissa(opnd3p1
,opnd3p2
)) {
926 /* return infinity */
927 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
931 * is NaN; signaling or quiet?
933 if (Dbl_isone_signaling(opnd3p1
)) {
934 /* trap if INVALIDTRAP enabled */
935 if (Is_invalidtrap_enabled())
936 return(OPC_2E_INVALIDEXCEPTION
);
939 Dbl_set_quiet(opnd3p1
);
944 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
950 * Generate multiply mantissa
952 if (Dbl_isnotzero_exponent(opnd1p1
)) {
954 Dbl_clear_signexponent_set_hidden(opnd1p1
);
958 if (Dbl_iszero_mantissa(opnd1p1
,opnd1p2
)) {
960 * Perform the add opnd3 with zero here.
962 if (Dbl_iszero_exponentmantissa(opnd3p1
,opnd3p2
)) {
963 if (Is_rounding_mode(ROUNDMINUS
)) {
964 Dbl_or_signs(opnd3p1
,resultp1
);
966 Dbl_and_signs(opnd3p1
,resultp1
);
970 * Now let's check for trapped underflow case.
972 else if (Dbl_iszero_exponent(opnd3p1
) &&
973 Is_underflowtrap_enabled()) {
974 /* need to normalize results mantissa */
975 sign_save
= Dbl_signextendedsign(opnd3p1
);
977 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
978 Dbl_normalize(opnd3p1
,opnd3p2
,result_exponent
);
979 Dbl_set_sign(opnd3p1
,/*using*/sign_save
);
980 Dbl_setwrapped_exponent(opnd3p1
,result_exponent
,
982 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
983 /* inexact = FALSE */
984 return(OPC_2E_UNDERFLOWEXCEPTION
);
986 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
989 /* is denormalized, adjust exponent */
990 Dbl_clear_signexponent(opnd1p1
);
991 Dbl_leftshiftby1(opnd1p1
,opnd1p2
);
992 Dbl_normalize(opnd1p1
,opnd1p2
,mpy_exponent
);
994 /* opnd2 needs to have hidden bit set with msb in hidden bit */
995 if (Dbl_isnotzero_exponent(opnd2p1
)) {
996 Dbl_clear_signexponent_set_hidden(opnd2p1
);
1000 if (Dbl_iszero_mantissa(opnd2p1
,opnd2p2
)) {
1002 * Perform the add opnd3 with zero here.
1004 if (Dbl_iszero_exponentmantissa(opnd3p1
,opnd3p2
)) {
1005 if (Is_rounding_mode(ROUNDMINUS
)) {
1006 Dbl_or_signs(opnd3p1
,resultp1
);
1008 Dbl_and_signs(opnd3p1
,resultp1
);
1012 * Now let's check for trapped underflow case.
1014 else if (Dbl_iszero_exponent(opnd3p1
) &&
1015 Is_underflowtrap_enabled()) {
1016 /* need to normalize results mantissa */
1017 sign_save
= Dbl_signextendedsign(opnd3p1
);
1018 result_exponent
= 0;
1019 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
1020 Dbl_normalize(opnd3p1
,opnd3p2
,result_exponent
);
1021 Dbl_set_sign(opnd3p1
,/*using*/sign_save
);
1022 Dbl_setwrapped_exponent(opnd3p1
,result_exponent
,
1024 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
1025 /* inexact = FALSE */
1026 return(OPC_2E_UNDERFLOWEXCEPTION
);
1028 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
1029 return(NOEXCEPTION
);
1031 /* is denormalized; want to normalize */
1032 Dbl_clear_signexponent(opnd2p1
);
1033 Dbl_leftshiftby1(opnd2p1
,opnd2p2
);
1034 Dbl_normalize(opnd2p1
,opnd2p2
,mpy_exponent
);
1037 /* Multiply the first two source mantissas together */
1040 * The intermediate result will be kept in tmpres,
1041 * which needs enough room for 106 bits of mantissa,
1042 * so lets call it a Double extended.
1044 Dblext_setzero(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
1047 * Four bits at a time are inspected in each loop, and a
1048 * simple shift and add multiply algorithm is used.
1050 for (count
= DBL_P
-1; count
>= 0; count
-= 4) {
1051 Dblext_rightshiftby4(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
1052 if (Dbit28p2(opnd1p2
)) {
1053 /* Fourword_add should be an ADD followed by 3 ADDC's */
1054 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
1055 opnd2p1
<<3 | opnd2p2
>>29, opnd2p2
<<3, 0, 0);
1057 if (Dbit29p2(opnd1p2
)) {
1058 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
1059 opnd2p1
<<2 | opnd2p2
>>30, opnd2p2
<<2, 0, 0);
1061 if (Dbit30p2(opnd1p2
)) {
1062 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
1063 opnd2p1
<<1 | opnd2p2
>>31, opnd2p2
<<1, 0, 0);
1065 if (Dbit31p2(opnd1p2
)) {
1066 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
1067 opnd2p1
, opnd2p2
, 0, 0);
1069 Dbl_rightshiftby4(opnd1p1
,opnd1p2
);
1071 if (Is_dexthiddenoverflow(tmpresp1
)) {
1072 /* result mantissa >= 2 (mantissa overflow) */
1074 Dblext_rightshiftby1(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
1078 * Restore the sign of the mpy result which was saved in resultp1.
1079 * The exponent will continue to be kept in mpy_exponent.
1081 Dblext_set_sign(tmpresp1
,Dbl_sign(resultp1
));
1084 * No rounding is required, since the result of the multiply
1085 * is exact in the extended format.
1089 * Now we are ready to perform the add portion of the operation.
1091 * The exponents need to be kept as integers for now, since the
1092 * multiply result might not fit into the exponent field. We
1093 * can't overflow or underflow because of this yet, since the
1094 * add could bring the final result back into range.
1096 add_exponent
= Dbl_exponent(opnd3p1
);
1099 * Check for denormalized or zero add operand.
1101 if (add_exponent
== 0) {
1102 /* check for zero */
1103 if (Dbl_iszero_mantissa(opnd3p1
,opnd3p2
)) {
1105 /* Left can't be zero and must be result.
1107 * The final result is now in tmpres and mpy_exponent,
1108 * and needs to be rounded and squeezed back into
1109 * double precision format from double extended.
1111 result_exponent
= mpy_exponent
;
1112 Dblext_copy(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
1113 resultp1
,resultp2
,resultp3
,resultp4
);
1114 sign_save
= Dbl_signextendedsign(resultp1
);/*save sign*/
1119 * Neither are zeroes.
1120 * Adjust exponent and normalize add operand.
1122 sign_save
= Dbl_signextendedsign(opnd3p1
); /* save sign */
1123 Dbl_clear_signexponent(opnd3p1
);
1124 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
1125 Dbl_normalize(opnd3p1
,opnd3p2
,add_exponent
);
1126 Dbl_set_sign(opnd3p1
,sign_save
); /* restore sign */
1128 Dbl_clear_exponent_set_hidden(opnd3p1
);
1131 * Copy opnd3 to the double extended variable called right.
1133 Dbl_copyto_dblext(opnd3p1
,opnd3p2
,rightp1
,rightp2
,rightp3
,rightp4
);
1136 * A zero "save" helps discover equal operands (for later),
1137 * and is used in swapping operands (if needed).
1139 Dblext_xortointp1(tmpresp1
,rightp1
,/*to*/save
);
1142 * Compare magnitude of operands.
1144 Dblext_copytoint_exponentmantissap1(tmpresp1
,signlessleft1
);
1145 Dblext_copytoint_exponentmantissap1(rightp1
,signlessright1
);
1146 if (mpy_exponent
< add_exponent
|| mpy_exponent
== add_exponent
&&
1147 Dblext_ismagnitudeless(tmpresp2
,rightp2
,signlessleft1
,signlessright1
)){
1149 * Set the left operand to the larger one by XOR swap.
1150 * First finish the first word "save".
1152 Dblext_xorfromintp1(save
,rightp1
,/*to*/rightp1
);
1153 Dblext_xorfromintp1(save
,tmpresp1
,/*to*/tmpresp1
);
1154 Dblext_swap_lower(tmpresp2
,tmpresp3
,tmpresp4
,
1155 rightp2
,rightp3
,rightp4
);
1156 /* also setup exponents used in rest of routine */
1157 diff_exponent
= add_exponent
- mpy_exponent
;
1158 result_exponent
= add_exponent
;
1160 /* also setup exponents used in rest of routine */
1161 diff_exponent
= mpy_exponent
- add_exponent
;
1162 result_exponent
= mpy_exponent
;
1164 /* Invariant: left is not smaller than right. */
1167 * Special case alignment of operands that would force alignment
1168 * beyond the extent of the extension. A further optimization
1169 * could special case this but only reduces the path length for
1170 * this infrequent case.
1172 if (diff_exponent
> DBLEXT_THRESHOLD
) {
1173 diff_exponent
= DBLEXT_THRESHOLD
;
1176 /* Align right operand by shifting it to the right */
1177 Dblext_clear_sign(rightp1
);
1178 Dblext_right_align(rightp1
,rightp2
,rightp3
,rightp4
,
1179 /*shifted by*/diff_exponent
);
1181 /* Treat sum and difference of the operands separately. */
1182 if ((int)save
< 0) {
1184 * Difference of the two operands. Overflow can occur if the
1185 * multiply overflowed. A borrow can occur out of the hidden
1186 * bit and force a post normalization phase.
1188 Dblext_subtract(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
1189 rightp1
,rightp2
,rightp3
,rightp4
,
1190 resultp1
,resultp2
,resultp3
,resultp4
);
1191 sign_save
= Dbl_signextendedsign(resultp1
);
1192 if (Dbl_iszero_hidden(resultp1
)) {
1193 /* Handle normalization */
1194 /* A straightforward algorithm would now shift the
1195 * result and extension left until the hidden bit
1196 * becomes one. Not all of the extension bits need
1197 * participate in the shift. Only the two most
1198 * significant bits (round and guard) are needed.
1199 * If only a single shift is needed then the guard
1200 * bit becomes a significant low order bit and the
1201 * extension must participate in the rounding.
1202 * If more than a single shift is needed, then all
1203 * bits to the right of the guard bit are zeros,
1204 * and the guard bit may or may not be zero. */
1205 Dblext_leftshiftby1(resultp1
,resultp2
,resultp3
,
1208 /* Need to check for a zero result. The sign and
1209 * exponent fields have already been zeroed. The more
1210 * efficient test of the full object can be used.
1212 if (Dblext_iszero(resultp1
,resultp2
,resultp3
,resultp4
)) {
1213 /* Must have been "x-x" or "x+(-x)". */
1214 if (Is_rounding_mode(ROUNDMINUS
))
1215 Dbl_setone_sign(resultp1
);
1216 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
1217 return(NOEXCEPTION
);
1221 /* Look to see if normalization is finished. */
1222 if (Dbl_isone_hidden(resultp1
)) {
1223 /* No further normalization is needed */
1227 /* Discover first one bit to determine shift amount.
1228 * Use a modified binary search. We have already
1229 * shifted the result one position right and still
1230 * not found a one so the remainder of the extension
1231 * must be zero and simplifies rounding. */
1233 while (Dbl_iszero_hiddenhigh7mantissa(resultp1
)) {
1234 Dblext_leftshiftby8(resultp1
,resultp2
,resultp3
,resultp4
);
1235 result_exponent
-= 8;
1237 /* Now narrow it down to the nibble */
1238 if (Dbl_iszero_hiddenhigh3mantissa(resultp1
)) {
1239 /* The lower nibble contains the
1240 * normalizing one */
1241 Dblext_leftshiftby4(resultp1
,resultp2
,resultp3
,resultp4
);
1242 result_exponent
-= 4;
1244 /* Select case where first bit is set (already
1245 * normalized) otherwise select the proper shift. */
1246 jumpsize
= Dbl_hiddenhigh3mantissa(resultp1
);
1247 if (jumpsize
<= 7) switch(jumpsize
) {
1249 Dblext_leftshiftby3(resultp1
,resultp2
,resultp3
,
1251 result_exponent
-= 3;
1255 Dblext_leftshiftby2(resultp1
,resultp2
,resultp3
,
1257 result_exponent
-= 2;
1263 Dblext_leftshiftby1(resultp1
,resultp2
,resultp3
,
1265 result_exponent
-= 1;
1268 } /* end if (hidden...)... */
1269 /* Fall through and round */
1270 } /* end if (save < 0)... */
1272 /* Add magnitudes */
1273 Dblext_addition(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
1274 rightp1
,rightp2
,rightp3
,rightp4
,
1275 /*to*/resultp1
,resultp2
,resultp3
,resultp4
);
1276 sign_save
= Dbl_signextendedsign(resultp1
);
1277 if (Dbl_isone_hiddenoverflow(resultp1
)) {
1278 /* Prenormalization required. */
1279 Dblext_arithrightshiftby1(resultp1
,resultp2
,resultp3
,
1282 } /* end if hiddenoverflow... */
1283 } /* end else ...add magnitudes... */
1285 /* Round the result. If the extension and lower two words are
1286 * all zeros, then the result is exact. Otherwise round in the
1287 * correct direction. Underflow is possible. If a postnormalization
1288 * is necessary, then the mantissa is all zeros so no shift is needed.
1291 if (result_exponent
<= 0 && !Is_underflowtrap_enabled()) {
1292 Dblext_denormalize(resultp1
,resultp2
,resultp3
,resultp4
,
1293 result_exponent
,is_tiny
);
1295 Dbl_set_sign(resultp1
,/*using*/sign_save
);
1296 if (Dblext_isnotzero_mantissap3(resultp3
) ||
1297 Dblext_isnotzero_mantissap4(resultp4
)) {
1299 switch(Rounding_mode()) {
1300 case ROUNDNEAREST
: /* The default. */
1301 if (Dblext_isone_highp3(resultp3
)) {
1302 /* at least 1/2 ulp */
1303 if (Dblext_isnotzero_low31p3(resultp3
) ||
1304 Dblext_isnotzero_mantissap4(resultp4
) ||
1305 Dblext_isone_lowp2(resultp2
)) {
1306 /* either exactly half way and odd or
1307 * more than 1/2ulp */
1308 Dbl_increment(resultp1
,resultp2
);
1314 if (Dbl_iszero_sign(resultp1
)) {
1315 /* Round up positive results */
1316 Dbl_increment(resultp1
,resultp2
);
1321 if (Dbl_isone_sign(resultp1
)) {
1322 /* Round down negative results */
1323 Dbl_increment(resultp1
,resultp2
);
1327 /* truncate is simple */
1328 } /* end switch... */
1329 if (Dbl_isone_hiddenoverflow(resultp1
)) result_exponent
++;
1331 if (result_exponent
>= DBL_INFINITY_EXPONENT
) {
1333 if (Is_overflowtrap_enabled()) {
1335 * Adjust bias of result
1337 Dbl_setwrapped_exponent(resultp1
,result_exponent
,ovfl
);
1338 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
1340 if (Is_inexacttrap_enabled())
1341 return (OPC_2E_OVERFLOWEXCEPTION
|
1342 OPC_2E_INEXACTEXCEPTION
);
1343 else Set_inexactflag();
1344 return (OPC_2E_OVERFLOWEXCEPTION
);
1348 Dbl_setoverflow(resultp1
,resultp2
);
1349 } else if (result_exponent
<= 0) { /* underflow case */
1350 if (Is_underflowtrap_enabled()) {
1352 * Adjust bias of result
1354 Dbl_setwrapped_exponent(resultp1
,result_exponent
,unfl
);
1355 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
1357 if (Is_inexacttrap_enabled())
1358 return (OPC_2E_UNDERFLOWEXCEPTION
|
1359 OPC_2E_INEXACTEXCEPTION
);
1360 else Set_inexactflag();
1361 return(OPC_2E_UNDERFLOWEXCEPTION
);
1363 else if (inexact
&& is_tiny
) Set_underflowflag();
1365 else Dbl_set_exponent(resultp1
,result_exponent
);
1366 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
1368 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION
);
1369 else Set_inexactflag();
1370 return(NOEXCEPTION
);
1374 * Single Floating-point Multiply Fused Add
1377 sgl_fmpyfadd(src1ptr
,src2ptr
,src3ptr
,status
,dstptr
)
1379 sgl_floating_point
*src1ptr
, *src2ptr
, *src3ptr
, *dstptr
;
1380 unsigned int *status
;
1382 unsigned int opnd1
, opnd2
, opnd3
;
1383 register unsigned int tmpresp1
, tmpresp2
;
1384 unsigned int rightp1
, rightp2
;
1385 unsigned int resultp1
, resultp2
= 0;
1386 register int mpy_exponent
, add_exponent
, count
;
1387 boolean inexact
= FALSE
, is_tiny
= FALSE
;
1389 unsigned int signlessleft1
, signlessright1
, save
;
1390 register int result_exponent
, diff_exponent
;
1391 int sign_save
, jumpsize
;
1393 Sgl_copyfromptr(src1ptr
,opnd1
);
1394 Sgl_copyfromptr(src2ptr
,opnd2
);
1395 Sgl_copyfromptr(src3ptr
,opnd3
);
1398 * set sign bit of result of multiply
1400 if (Sgl_sign(opnd1
) ^ Sgl_sign(opnd2
))
1401 Sgl_setnegativezero(resultp1
);
1402 else Sgl_setzero(resultp1
);
1405 * Generate multiply exponent
1407 mpy_exponent
= Sgl_exponent(opnd1
) + Sgl_exponent(opnd2
) - SGL_BIAS
;
1410 * check first operand for NaN's or infinity
1412 if (Sgl_isinfinity_exponent(opnd1
)) {
1413 if (Sgl_iszero_mantissa(opnd1
)) {
1414 if (Sgl_isnotnan(opnd2
) && Sgl_isnotnan(opnd3
)) {
1415 if (Sgl_iszero_exponentmantissa(opnd2
)) {
1417 * invalid since operands are infinity
1420 if (Is_invalidtrap_enabled())
1421 return(OPC_2E_INVALIDEXCEPTION
);
1423 Sgl_makequietnan(resultp1
);
1424 Sgl_copytoptr(resultp1
,dstptr
);
1425 return(NOEXCEPTION
);
1428 * Check third operand for infinity with a
1429 * sign opposite of the multiply result
1431 if (Sgl_isinfinity(opnd3
) &&
1432 (Sgl_sign(resultp1
) ^ Sgl_sign(opnd3
))) {
1434 * invalid since attempting a magnitude
1435 * subtraction of infinities
1437 if (Is_invalidtrap_enabled())
1438 return(OPC_2E_INVALIDEXCEPTION
);
1440 Sgl_makequietnan(resultp1
);
1441 Sgl_copytoptr(resultp1
,dstptr
);
1442 return(NOEXCEPTION
);
1448 Sgl_setinfinity_exponentmantissa(resultp1
);
1449 Sgl_copytoptr(resultp1
,dstptr
);
1450 return(NOEXCEPTION
);
1455 * is NaN; signaling or quiet?
1457 if (Sgl_isone_signaling(opnd1
)) {
1458 /* trap if INVALIDTRAP enabled */
1459 if (Is_invalidtrap_enabled())
1460 return(OPC_2E_INVALIDEXCEPTION
);
1461 /* make NaN quiet */
1463 Sgl_set_quiet(opnd1
);
1466 * is second operand a signaling NaN?
1468 else if (Sgl_is_signalingnan(opnd2
)) {
1469 /* trap if INVALIDTRAP enabled */
1470 if (Is_invalidtrap_enabled())
1471 return(OPC_2E_INVALIDEXCEPTION
);
1472 /* make NaN quiet */
1474 Sgl_set_quiet(opnd2
);
1475 Sgl_copytoptr(opnd2
,dstptr
);
1476 return(NOEXCEPTION
);
1479 * is third operand a signaling NaN?
1481 else if (Sgl_is_signalingnan(opnd3
)) {
1482 /* trap if INVALIDTRAP enabled */
1483 if (Is_invalidtrap_enabled())
1484 return(OPC_2E_INVALIDEXCEPTION
);
1485 /* make NaN quiet */
1487 Sgl_set_quiet(opnd3
);
1488 Sgl_copytoptr(opnd3
,dstptr
);
1489 return(NOEXCEPTION
);
1494 Sgl_copytoptr(opnd1
,dstptr
);
1495 return(NOEXCEPTION
);
1500 * check second operand for NaN's or infinity
1502 if (Sgl_isinfinity_exponent(opnd2
)) {
1503 if (Sgl_iszero_mantissa(opnd2
)) {
1504 if (Sgl_isnotnan(opnd3
)) {
1505 if (Sgl_iszero_exponentmantissa(opnd1
)) {
1507 * invalid since multiply operands are
1510 if (Is_invalidtrap_enabled())
1511 return(OPC_2E_INVALIDEXCEPTION
);
1513 Sgl_makequietnan(opnd2
);
1514 Sgl_copytoptr(opnd2
,dstptr
);
1515 return(NOEXCEPTION
);
1519 * Check third operand for infinity with a
1520 * sign opposite of the multiply result
1522 if (Sgl_isinfinity(opnd3
) &&
1523 (Sgl_sign(resultp1
) ^ Sgl_sign(opnd3
))) {
1525 * invalid since attempting a magnitude
1526 * subtraction of infinities
1528 if (Is_invalidtrap_enabled())
1529 return(OPC_2E_INVALIDEXCEPTION
);
1531 Sgl_makequietnan(resultp1
);
1532 Sgl_copytoptr(resultp1
,dstptr
);
1533 return(NOEXCEPTION
);
1539 Sgl_setinfinity_exponentmantissa(resultp1
);
1540 Sgl_copytoptr(resultp1
,dstptr
);
1541 return(NOEXCEPTION
);
1546 * is NaN; signaling or quiet?
1548 if (Sgl_isone_signaling(opnd2
)) {
1549 /* trap if INVALIDTRAP enabled */
1550 if (Is_invalidtrap_enabled())
1551 return(OPC_2E_INVALIDEXCEPTION
);
1552 /* make NaN quiet */
1554 Sgl_set_quiet(opnd2
);
1557 * is third operand a signaling NaN?
1559 else if (Sgl_is_signalingnan(opnd3
)) {
1560 /* trap if INVALIDTRAP enabled */
1561 if (Is_invalidtrap_enabled())
1562 return(OPC_2E_INVALIDEXCEPTION
);
1563 /* make NaN quiet */
1565 Sgl_set_quiet(opnd3
);
1566 Sgl_copytoptr(opnd3
,dstptr
);
1567 return(NOEXCEPTION
);
1572 Sgl_copytoptr(opnd2
,dstptr
);
1573 return(NOEXCEPTION
);
1578 * check third operand for NaN's or infinity
1580 if (Sgl_isinfinity_exponent(opnd3
)) {
1581 if (Sgl_iszero_mantissa(opnd3
)) {
1582 /* return infinity */
1583 Sgl_copytoptr(opnd3
,dstptr
);
1584 return(NOEXCEPTION
);
1587 * is NaN; signaling or quiet?
1589 if (Sgl_isone_signaling(opnd3
)) {
1590 /* trap if INVALIDTRAP enabled */
1591 if (Is_invalidtrap_enabled())
1592 return(OPC_2E_INVALIDEXCEPTION
);
1593 /* make NaN quiet */
1595 Sgl_set_quiet(opnd3
);
1600 Sgl_copytoptr(opnd3
,dstptr
);
1601 return(NOEXCEPTION
);
1606 * Generate multiply mantissa
1608 if (Sgl_isnotzero_exponent(opnd1
)) {
1609 /* set hidden bit */
1610 Sgl_clear_signexponent_set_hidden(opnd1
);
1613 /* check for zero */
1614 if (Sgl_iszero_mantissa(opnd1
)) {
1616 * Perform the add opnd3 with zero here.
1618 if (Sgl_iszero_exponentmantissa(opnd3
)) {
1619 if (Is_rounding_mode(ROUNDMINUS
)) {
1620 Sgl_or_signs(opnd3
,resultp1
);
1622 Sgl_and_signs(opnd3
,resultp1
);
1626 * Now let's check for trapped underflow case.
1628 else if (Sgl_iszero_exponent(opnd3
) &&
1629 Is_underflowtrap_enabled()) {
1630 /* need to normalize results mantissa */
1631 sign_save
= Sgl_signextendedsign(opnd3
);
1632 result_exponent
= 0;
1633 Sgl_leftshiftby1(opnd3
);
1634 Sgl_normalize(opnd3
,result_exponent
);
1635 Sgl_set_sign(opnd3
,/*using*/sign_save
);
1636 Sgl_setwrapped_exponent(opnd3
,result_exponent
,
1638 Sgl_copytoptr(opnd3
,dstptr
);
1639 /* inexact = FALSE */
1640 return(OPC_2E_UNDERFLOWEXCEPTION
);
1642 Sgl_copytoptr(opnd3
,dstptr
);
1643 return(NOEXCEPTION
);
1645 /* is denormalized, adjust exponent */
1646 Sgl_clear_signexponent(opnd1
);
1647 Sgl_leftshiftby1(opnd1
);
1648 Sgl_normalize(opnd1
,mpy_exponent
);
1650 /* opnd2 needs to have hidden bit set with msb in hidden bit */
1651 if (Sgl_isnotzero_exponent(opnd2
)) {
1652 Sgl_clear_signexponent_set_hidden(opnd2
);
1655 /* check for zero */
1656 if (Sgl_iszero_mantissa(opnd2
)) {
1658 * Perform the add opnd3 with zero here.
1660 if (Sgl_iszero_exponentmantissa(opnd3
)) {
1661 if (Is_rounding_mode(ROUNDMINUS
)) {
1662 Sgl_or_signs(opnd3
,resultp1
);
1664 Sgl_and_signs(opnd3
,resultp1
);
1668 * Now let's check for trapped underflow case.
1670 else if (Sgl_iszero_exponent(opnd3
) &&
1671 Is_underflowtrap_enabled()) {
1672 /* need to normalize results mantissa */
1673 sign_save
= Sgl_signextendedsign(opnd3
);
1674 result_exponent
= 0;
1675 Sgl_leftshiftby1(opnd3
);
1676 Sgl_normalize(opnd3
,result_exponent
);
1677 Sgl_set_sign(opnd3
,/*using*/sign_save
);
1678 Sgl_setwrapped_exponent(opnd3
,result_exponent
,
1680 Sgl_copytoptr(opnd3
,dstptr
);
1681 /* inexact = FALSE */
1682 return(OPC_2E_UNDERFLOWEXCEPTION
);
1684 Sgl_copytoptr(opnd3
,dstptr
);
1685 return(NOEXCEPTION
);
1687 /* is denormalized; want to normalize */
1688 Sgl_clear_signexponent(opnd2
);
1689 Sgl_leftshiftby1(opnd2
);
1690 Sgl_normalize(opnd2
,mpy_exponent
);
1693 /* Multiply the first two source mantissas together */
1696 * The intermediate result will be kept in tmpres,
1697 * which needs enough room for 106 bits of mantissa,
1698 * so lets call it a Double extended.
1700 Sglext_setzero(tmpresp1
,tmpresp2
);
1703 * Four bits at a time are inspected in each loop, and a
1704 * simple shift and add multiply algorithm is used.
1706 for (count
= SGL_P
-1; count
>= 0; count
-= 4) {
1707 Sglext_rightshiftby4(tmpresp1
,tmpresp2
);
1708 if (Sbit28(opnd1
)) {
1709 /* Twoword_add should be an ADD followed by 2 ADDC's */
1710 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<3, 0);
1712 if (Sbit29(opnd1
)) {
1713 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<2, 0);
1715 if (Sbit30(opnd1
)) {
1716 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<1, 0);
1718 if (Sbit31(opnd1
)) {
1719 Twoword_add(tmpresp1
, tmpresp2
, opnd2
, 0);
1721 Sgl_rightshiftby4(opnd1
);
1723 if (Is_sexthiddenoverflow(tmpresp1
)) {
1724 /* result mantissa >= 2 (mantissa overflow) */
1726 Sglext_rightshiftby4(tmpresp1
,tmpresp2
);
1728 Sglext_rightshiftby3(tmpresp1
,tmpresp2
);
1732 * Restore the sign of the mpy result which was saved in resultp1.
1733 * The exponent will continue to be kept in mpy_exponent.
1735 Sglext_set_sign(tmpresp1
,Sgl_sign(resultp1
));
1738 * No rounding is required, since the result of the multiply
1739 * is exact in the extended format.
1743 * Now we are ready to perform the add portion of the operation.
1745 * The exponents need to be kept as integers for now, since the
1746 * multiply result might not fit into the exponent field. We
1747 * can't overflow or underflow because of this yet, since the
1748 * add could bring the final result back into range.
1750 add_exponent
= Sgl_exponent(opnd3
);
1753 * Check for denormalized or zero add operand.
1755 if (add_exponent
== 0) {
1756 /* check for zero */
1757 if (Sgl_iszero_mantissa(opnd3
)) {
1759 /* Left can't be zero and must be result.
1761 * The final result is now in tmpres and mpy_exponent,
1762 * and needs to be rounded and squeezed back into
1763 * double precision format from double extended.
1765 result_exponent
= mpy_exponent
;
1766 Sglext_copy(tmpresp1
,tmpresp2
,resultp1
,resultp2
);
1767 sign_save
= Sgl_signextendedsign(resultp1
);/*save sign*/
1772 * Neither are zeroes.
1773 * Adjust exponent and normalize add operand.
1775 sign_save
= Sgl_signextendedsign(opnd3
); /* save sign */
1776 Sgl_clear_signexponent(opnd3
);
1777 Sgl_leftshiftby1(opnd3
);
1778 Sgl_normalize(opnd3
,add_exponent
);
1779 Sgl_set_sign(opnd3
,sign_save
); /* restore sign */
1781 Sgl_clear_exponent_set_hidden(opnd3
);
1784 * Copy opnd3 to the double extended variable called right.
1786 Sgl_copyto_sglext(opnd3
,rightp1
,rightp2
);
1789 * A zero "save" helps discover equal operands (for later),
1790 * and is used in swapping operands (if needed).
1792 Sglext_xortointp1(tmpresp1
,rightp1
,/*to*/save
);
1795 * Compare magnitude of operands.
1797 Sglext_copytoint_exponentmantissa(tmpresp1
,signlessleft1
);
1798 Sglext_copytoint_exponentmantissa(rightp1
,signlessright1
);
1799 if (mpy_exponent
< add_exponent
|| mpy_exponent
== add_exponent
&&
1800 Sglext_ismagnitudeless(signlessleft1
,signlessright1
)) {
1802 * Set the left operand to the larger one by XOR swap.
1803 * First finish the first word "save".
1805 Sglext_xorfromintp1(save
,rightp1
,/*to*/rightp1
);
1806 Sglext_xorfromintp1(save
,tmpresp1
,/*to*/tmpresp1
);
1807 Sglext_swap_lower(tmpresp2
,rightp2
);
1808 /* also setup exponents used in rest of routine */
1809 diff_exponent
= add_exponent
- mpy_exponent
;
1810 result_exponent
= add_exponent
;
1812 /* also setup exponents used in rest of routine */
1813 diff_exponent
= mpy_exponent
- add_exponent
;
1814 result_exponent
= mpy_exponent
;
1816 /* Invariant: left is not smaller than right. */
1819 * Special case alignment of operands that would force alignment
1820 * beyond the extent of the extension. A further optimization
1821 * could special case this but only reduces the path length for
1822 * this infrequent case.
1824 if (diff_exponent
> SGLEXT_THRESHOLD
) {
1825 diff_exponent
= SGLEXT_THRESHOLD
;
1828 /* Align right operand by shifting it to the right */
1829 Sglext_clear_sign(rightp1
);
1830 Sglext_right_align(rightp1
,rightp2
,/*shifted by*/diff_exponent
);
1832 /* Treat sum and difference of the operands separately. */
1833 if ((int)save
< 0) {
1835 * Difference of the two operands. Overflow can occur if the
1836 * multiply overflowed. A borrow can occur out of the hidden
1837 * bit and force a post normalization phase.
1839 Sglext_subtract(tmpresp1
,tmpresp2
, rightp1
,rightp2
,
1841 sign_save
= Sgl_signextendedsign(resultp1
);
1842 if (Sgl_iszero_hidden(resultp1
)) {
1843 /* Handle normalization */
1844 /* A straightforward algorithm would now shift the
1845 * result and extension left until the hidden bit
1846 * becomes one. Not all of the extension bits need
1847 * participate in the shift. Only the two most
1848 * significant bits (round and guard) are needed.
1849 * If only a single shift is needed then the guard
1850 * bit becomes a significant low order bit and the
1851 * extension must participate in the rounding.
1852 * If more than a single shift is needed, then all
1853 * bits to the right of the guard bit are zeros,
1854 * and the guard bit may or may not be zero. */
1855 Sglext_leftshiftby1(resultp1
,resultp2
);
1857 /* Need to check for a zero result. The sign and
1858 * exponent fields have already been zeroed. The more
1859 * efficient test of the full object can be used.
1861 if (Sglext_iszero(resultp1
,resultp2
)) {
1862 /* Must have been "x-x" or "x+(-x)". */
1863 if (Is_rounding_mode(ROUNDMINUS
))
1864 Sgl_setone_sign(resultp1
);
1865 Sgl_copytoptr(resultp1
,dstptr
);
1866 return(NOEXCEPTION
);
1870 /* Look to see if normalization is finished. */
1871 if (Sgl_isone_hidden(resultp1
)) {
1872 /* No further normalization is needed */
1876 /* Discover first one bit to determine shift amount.
1877 * Use a modified binary search. We have already
1878 * shifted the result one position right and still
1879 * not found a one so the remainder of the extension
1880 * must be zero and simplifies rounding. */
1882 while (Sgl_iszero_hiddenhigh7mantissa(resultp1
)) {
1883 Sglext_leftshiftby8(resultp1
,resultp2
);
1884 result_exponent
-= 8;
1886 /* Now narrow it down to the nibble */
1887 if (Sgl_iszero_hiddenhigh3mantissa(resultp1
)) {
1888 /* The lower nibble contains the
1889 * normalizing one */
1890 Sglext_leftshiftby4(resultp1
,resultp2
);
1891 result_exponent
-= 4;
1893 /* Select case where first bit is set (already
1894 * normalized) otherwise select the proper shift. */
1895 jumpsize
= Sgl_hiddenhigh3mantissa(resultp1
);
1896 if (jumpsize
<= 7) switch(jumpsize
) {
1898 Sglext_leftshiftby3(resultp1
,resultp2
);
1899 result_exponent
-= 3;
1903 Sglext_leftshiftby2(resultp1
,resultp2
);
1904 result_exponent
-= 2;
1910 Sglext_leftshiftby1(resultp1
,resultp2
);
1911 result_exponent
-= 1;
1914 } /* end if (hidden...)... */
1915 /* Fall through and round */
1916 } /* end if (save < 0)... */
1918 /* Add magnitudes */
1919 Sglext_addition(tmpresp1
,tmpresp2
,
1920 rightp1
,rightp2
, /*to*/resultp1
,resultp2
);
1921 sign_save
= Sgl_signextendedsign(resultp1
);
1922 if (Sgl_isone_hiddenoverflow(resultp1
)) {
1923 /* Prenormalization required. */
1924 Sglext_arithrightshiftby1(resultp1
,resultp2
);
1926 } /* end if hiddenoverflow... */
1927 } /* end else ...add magnitudes... */
1929 /* Round the result. If the extension and lower two words are
1930 * all zeros, then the result is exact. Otherwise round in the
1931 * correct direction. Underflow is possible. If a postnormalization
1932 * is necessary, then the mantissa is all zeros so no shift is needed.
1935 if (result_exponent
<= 0 && !Is_underflowtrap_enabled()) {
1936 Sglext_denormalize(resultp1
,resultp2
,result_exponent
,is_tiny
);
1938 Sgl_set_sign(resultp1
,/*using*/sign_save
);
1939 if (Sglext_isnotzero_mantissap2(resultp2
)) {
1941 switch(Rounding_mode()) {
1942 case ROUNDNEAREST
: /* The default. */
1943 if (Sglext_isone_highp2(resultp2
)) {
1944 /* at least 1/2 ulp */
1945 if (Sglext_isnotzero_low31p2(resultp2
) ||
1946 Sglext_isone_lowp1(resultp1
)) {
1947 /* either exactly half way and odd or
1948 * more than 1/2ulp */
1949 Sgl_increment(resultp1
);
1955 if (Sgl_iszero_sign(resultp1
)) {
1956 /* Round up positive results */
1957 Sgl_increment(resultp1
);
1962 if (Sgl_isone_sign(resultp1
)) {
1963 /* Round down negative results */
1964 Sgl_increment(resultp1
);
1968 /* truncate is simple */
1969 } /* end switch... */
1970 if (Sgl_isone_hiddenoverflow(resultp1
)) result_exponent
++;
1972 if (result_exponent
>= SGL_INFINITY_EXPONENT
) {
1974 if (Is_overflowtrap_enabled()) {
1976 * Adjust bias of result
1978 Sgl_setwrapped_exponent(resultp1
,result_exponent
,ovfl
);
1979 Sgl_copytoptr(resultp1
,dstptr
);
1981 if (Is_inexacttrap_enabled())
1982 return (OPC_2E_OVERFLOWEXCEPTION
|
1983 OPC_2E_INEXACTEXCEPTION
);
1984 else Set_inexactflag();
1985 return (OPC_2E_OVERFLOWEXCEPTION
);
1989 Sgl_setoverflow(resultp1
);
1990 } else if (result_exponent
<= 0) { /* underflow case */
1991 if (Is_underflowtrap_enabled()) {
1993 * Adjust bias of result
1995 Sgl_setwrapped_exponent(resultp1
,result_exponent
,unfl
);
1996 Sgl_copytoptr(resultp1
,dstptr
);
1998 if (Is_inexacttrap_enabled())
1999 return (OPC_2E_UNDERFLOWEXCEPTION
|
2000 OPC_2E_INEXACTEXCEPTION
);
2001 else Set_inexactflag();
2002 return(OPC_2E_UNDERFLOWEXCEPTION
);
2004 else if (inexact
&& is_tiny
) Set_underflowflag();
2006 else Sgl_set_exponent(resultp1
,result_exponent
);
2007 Sgl_copytoptr(resultp1
,dstptr
);
2009 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION
);
2010 else Set_inexactflag();
2011 return(NOEXCEPTION
);
2015 * Single Floating-point Multiply Negate Fused Add
2018 sgl_fmpynfadd(src1ptr
,src2ptr
,src3ptr
,status
,dstptr
)
2020 sgl_floating_point
*src1ptr
, *src2ptr
, *src3ptr
, *dstptr
;
2021 unsigned int *status
;
2023 unsigned int opnd1
, opnd2
, opnd3
;
2024 register unsigned int tmpresp1
, tmpresp2
;
2025 unsigned int rightp1
, rightp2
;
2026 unsigned int resultp1
, resultp2
= 0;
2027 register int mpy_exponent
, add_exponent
, count
;
2028 boolean inexact
= FALSE
, is_tiny
= FALSE
;
2030 unsigned int signlessleft1
, signlessright1
, save
;
2031 register int result_exponent
, diff_exponent
;
2032 int sign_save
, jumpsize
;
2034 Sgl_copyfromptr(src1ptr
,opnd1
);
2035 Sgl_copyfromptr(src2ptr
,opnd2
);
2036 Sgl_copyfromptr(src3ptr
,opnd3
);
2039 * set sign bit of result of multiply
2041 if (Sgl_sign(opnd1
) ^ Sgl_sign(opnd2
))
2042 Sgl_setzero(resultp1
);
2044 Sgl_setnegativezero(resultp1
);
2047 * Generate multiply exponent
2049 mpy_exponent
= Sgl_exponent(opnd1
) + Sgl_exponent(opnd2
) - SGL_BIAS
;
2052 * check first operand for NaN's or infinity
2054 if (Sgl_isinfinity_exponent(opnd1
)) {
2055 if (Sgl_iszero_mantissa(opnd1
)) {
2056 if (Sgl_isnotnan(opnd2
) && Sgl_isnotnan(opnd3
)) {
2057 if (Sgl_iszero_exponentmantissa(opnd2
)) {
2059 * invalid since operands are infinity
2062 if (Is_invalidtrap_enabled())
2063 return(OPC_2E_INVALIDEXCEPTION
);
2065 Sgl_makequietnan(resultp1
);
2066 Sgl_copytoptr(resultp1
,dstptr
);
2067 return(NOEXCEPTION
);
2070 * Check third operand for infinity with a
2071 * sign opposite of the multiply result
2073 if (Sgl_isinfinity(opnd3
) &&
2074 (Sgl_sign(resultp1
) ^ Sgl_sign(opnd3
))) {
2076 * invalid since attempting a magnitude
2077 * subtraction of infinities
2079 if (Is_invalidtrap_enabled())
2080 return(OPC_2E_INVALIDEXCEPTION
);
2082 Sgl_makequietnan(resultp1
);
2083 Sgl_copytoptr(resultp1
,dstptr
);
2084 return(NOEXCEPTION
);
2090 Sgl_setinfinity_exponentmantissa(resultp1
);
2091 Sgl_copytoptr(resultp1
,dstptr
);
2092 return(NOEXCEPTION
);
2097 * is NaN; signaling or quiet?
2099 if (Sgl_isone_signaling(opnd1
)) {
2100 /* trap if INVALIDTRAP enabled */
2101 if (Is_invalidtrap_enabled())
2102 return(OPC_2E_INVALIDEXCEPTION
);
2103 /* make NaN quiet */
2105 Sgl_set_quiet(opnd1
);
2108 * is second operand a signaling NaN?
2110 else if (Sgl_is_signalingnan(opnd2
)) {
2111 /* trap if INVALIDTRAP enabled */
2112 if (Is_invalidtrap_enabled())
2113 return(OPC_2E_INVALIDEXCEPTION
);
2114 /* make NaN quiet */
2116 Sgl_set_quiet(opnd2
);
2117 Sgl_copytoptr(opnd2
,dstptr
);
2118 return(NOEXCEPTION
);
2121 * is third operand a signaling NaN?
2123 else if (Sgl_is_signalingnan(opnd3
)) {
2124 /* trap if INVALIDTRAP enabled */
2125 if (Is_invalidtrap_enabled())
2126 return(OPC_2E_INVALIDEXCEPTION
);
2127 /* make NaN quiet */
2129 Sgl_set_quiet(opnd3
);
2130 Sgl_copytoptr(opnd3
,dstptr
);
2131 return(NOEXCEPTION
);
2136 Sgl_copytoptr(opnd1
,dstptr
);
2137 return(NOEXCEPTION
);
2142 * check second operand for NaN's or infinity
2144 if (Sgl_isinfinity_exponent(opnd2
)) {
2145 if (Sgl_iszero_mantissa(opnd2
)) {
2146 if (Sgl_isnotnan(opnd3
)) {
2147 if (Sgl_iszero_exponentmantissa(opnd1
)) {
2149 * invalid since multiply operands are
2152 if (Is_invalidtrap_enabled())
2153 return(OPC_2E_INVALIDEXCEPTION
);
2155 Sgl_makequietnan(opnd2
);
2156 Sgl_copytoptr(opnd2
,dstptr
);
2157 return(NOEXCEPTION
);
2161 * Check third operand for infinity with a
2162 * sign opposite of the multiply result
2164 if (Sgl_isinfinity(opnd3
) &&
2165 (Sgl_sign(resultp1
) ^ Sgl_sign(opnd3
))) {
2167 * invalid since attempting a magnitude
2168 * subtraction of infinities
2170 if (Is_invalidtrap_enabled())
2171 return(OPC_2E_INVALIDEXCEPTION
);
2173 Sgl_makequietnan(resultp1
);
2174 Sgl_copytoptr(resultp1
,dstptr
);
2175 return(NOEXCEPTION
);
2181 Sgl_setinfinity_exponentmantissa(resultp1
);
2182 Sgl_copytoptr(resultp1
,dstptr
);
2183 return(NOEXCEPTION
);
2188 * is NaN; signaling or quiet?
2190 if (Sgl_isone_signaling(opnd2
)) {
2191 /* trap if INVALIDTRAP enabled */
2192 if (Is_invalidtrap_enabled())
2193 return(OPC_2E_INVALIDEXCEPTION
);
2194 /* make NaN quiet */
2196 Sgl_set_quiet(opnd2
);
2199 * is third operand a signaling NaN?
2201 else if (Sgl_is_signalingnan(opnd3
)) {
2202 /* trap if INVALIDTRAP enabled */
2203 if (Is_invalidtrap_enabled())
2204 return(OPC_2E_INVALIDEXCEPTION
);
2205 /* make NaN quiet */
2207 Sgl_set_quiet(opnd3
);
2208 Sgl_copytoptr(opnd3
,dstptr
);
2209 return(NOEXCEPTION
);
2214 Sgl_copytoptr(opnd2
,dstptr
);
2215 return(NOEXCEPTION
);
2220 * check third operand for NaN's or infinity
2222 if (Sgl_isinfinity_exponent(opnd3
)) {
2223 if (Sgl_iszero_mantissa(opnd3
)) {
2224 /* return infinity */
2225 Sgl_copytoptr(opnd3
,dstptr
);
2226 return(NOEXCEPTION
);
2229 * is NaN; signaling or quiet?
2231 if (Sgl_isone_signaling(opnd3
)) {
2232 /* trap if INVALIDTRAP enabled */
2233 if (Is_invalidtrap_enabled())
2234 return(OPC_2E_INVALIDEXCEPTION
);
2235 /* make NaN quiet */
2237 Sgl_set_quiet(opnd3
);
2242 Sgl_copytoptr(opnd3
,dstptr
);
2243 return(NOEXCEPTION
);
2248 * Generate multiply mantissa
2250 if (Sgl_isnotzero_exponent(opnd1
)) {
2251 /* set hidden bit */
2252 Sgl_clear_signexponent_set_hidden(opnd1
);
2255 /* check for zero */
2256 if (Sgl_iszero_mantissa(opnd1
)) {
2258 * Perform the add opnd3 with zero here.
2260 if (Sgl_iszero_exponentmantissa(opnd3
)) {
2261 if (Is_rounding_mode(ROUNDMINUS
)) {
2262 Sgl_or_signs(opnd3
,resultp1
);
2264 Sgl_and_signs(opnd3
,resultp1
);
2268 * Now let's check for trapped underflow case.
2270 else if (Sgl_iszero_exponent(opnd3
) &&
2271 Is_underflowtrap_enabled()) {
2272 /* need to normalize results mantissa */
2273 sign_save
= Sgl_signextendedsign(opnd3
);
2274 result_exponent
= 0;
2275 Sgl_leftshiftby1(opnd3
);
2276 Sgl_normalize(opnd3
,result_exponent
);
2277 Sgl_set_sign(opnd3
,/*using*/sign_save
);
2278 Sgl_setwrapped_exponent(opnd3
,result_exponent
,
2280 Sgl_copytoptr(opnd3
,dstptr
);
2281 /* inexact = FALSE */
2282 return(OPC_2E_UNDERFLOWEXCEPTION
);
2284 Sgl_copytoptr(opnd3
,dstptr
);
2285 return(NOEXCEPTION
);
2287 /* is denormalized, adjust exponent */
2288 Sgl_clear_signexponent(opnd1
);
2289 Sgl_leftshiftby1(opnd1
);
2290 Sgl_normalize(opnd1
,mpy_exponent
);
2292 /* opnd2 needs to have hidden bit set with msb in hidden bit */
2293 if (Sgl_isnotzero_exponent(opnd2
)) {
2294 Sgl_clear_signexponent_set_hidden(opnd2
);
2297 /* check for zero */
2298 if (Sgl_iszero_mantissa(opnd2
)) {
2300 * Perform the add opnd3 with zero here.
2302 if (Sgl_iszero_exponentmantissa(opnd3
)) {
2303 if (Is_rounding_mode(ROUNDMINUS
)) {
2304 Sgl_or_signs(opnd3
,resultp1
);
2306 Sgl_and_signs(opnd3
,resultp1
);
2310 * Now let's check for trapped underflow case.
2312 else if (Sgl_iszero_exponent(opnd3
) &&
2313 Is_underflowtrap_enabled()) {
2314 /* need to normalize results mantissa */
2315 sign_save
= Sgl_signextendedsign(opnd3
);
2316 result_exponent
= 0;
2317 Sgl_leftshiftby1(opnd3
);
2318 Sgl_normalize(opnd3
,result_exponent
);
2319 Sgl_set_sign(opnd3
,/*using*/sign_save
);
2320 Sgl_setwrapped_exponent(opnd3
,result_exponent
,
2322 Sgl_copytoptr(opnd3
,dstptr
);
2323 /* inexact = FALSE */
2324 return(OPC_2E_UNDERFLOWEXCEPTION
);
2326 Sgl_copytoptr(opnd3
,dstptr
);
2327 return(NOEXCEPTION
);
2329 /* is denormalized; want to normalize */
2330 Sgl_clear_signexponent(opnd2
);
2331 Sgl_leftshiftby1(opnd2
);
2332 Sgl_normalize(opnd2
,mpy_exponent
);
2335 /* Multiply the first two source mantissas together */
2338 * The intermediate result will be kept in tmpres,
2339 * which needs enough room for 106 bits of mantissa,
2340 * so lets call it a Double extended.
2342 Sglext_setzero(tmpresp1
,tmpresp2
);
2345 * Four bits at a time are inspected in each loop, and a
2346 * simple shift and add multiply algorithm is used.
2348 for (count
= SGL_P
-1; count
>= 0; count
-= 4) {
2349 Sglext_rightshiftby4(tmpresp1
,tmpresp2
);
2350 if (Sbit28(opnd1
)) {
2351 /* Twoword_add should be an ADD followed by 2 ADDC's */
2352 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<3, 0);
2354 if (Sbit29(opnd1
)) {
2355 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<2, 0);
2357 if (Sbit30(opnd1
)) {
2358 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<1, 0);
2360 if (Sbit31(opnd1
)) {
2361 Twoword_add(tmpresp1
, tmpresp2
, opnd2
, 0);
2363 Sgl_rightshiftby4(opnd1
);
2365 if (Is_sexthiddenoverflow(tmpresp1
)) {
2366 /* result mantissa >= 2 (mantissa overflow) */
2368 Sglext_rightshiftby4(tmpresp1
,tmpresp2
);
2370 Sglext_rightshiftby3(tmpresp1
,tmpresp2
);
2374 * Restore the sign of the mpy result which was saved in resultp1.
2375 * The exponent will continue to be kept in mpy_exponent.
2377 Sglext_set_sign(tmpresp1
,Sgl_sign(resultp1
));
2380 * No rounding is required, since the result of the multiply
2381 * is exact in the extended format.
2385 * Now we are ready to perform the add portion of the operation.
2387 * The exponents need to be kept as integers for now, since the
2388 * multiply result might not fit into the exponent field. We
2389 * can't overflow or underflow because of this yet, since the
2390 * add could bring the final result back into range.
2392 add_exponent
= Sgl_exponent(opnd3
);
2395 * Check for denormalized or zero add operand.
2397 if (add_exponent
== 0) {
2398 /* check for zero */
2399 if (Sgl_iszero_mantissa(opnd3
)) {
2401 /* Left can't be zero and must be result.
2403 * The final result is now in tmpres and mpy_exponent,
2404 * and needs to be rounded and squeezed back into
2405 * double precision format from double extended.
2407 result_exponent
= mpy_exponent
;
2408 Sglext_copy(tmpresp1
,tmpresp2
,resultp1
,resultp2
);
2409 sign_save
= Sgl_signextendedsign(resultp1
);/*save sign*/
2414 * Neither are zeroes.
2415 * Adjust exponent and normalize add operand.
2417 sign_save
= Sgl_signextendedsign(opnd3
); /* save sign */
2418 Sgl_clear_signexponent(opnd3
);
2419 Sgl_leftshiftby1(opnd3
);
2420 Sgl_normalize(opnd3
,add_exponent
);
2421 Sgl_set_sign(opnd3
,sign_save
); /* restore sign */
2423 Sgl_clear_exponent_set_hidden(opnd3
);
2426 * Copy opnd3 to the double extended variable called right.
2428 Sgl_copyto_sglext(opnd3
,rightp1
,rightp2
);
2431 * A zero "save" helps discover equal operands (for later),
2432 * and is used in swapping operands (if needed).
2434 Sglext_xortointp1(tmpresp1
,rightp1
,/*to*/save
);
2437 * Compare magnitude of operands.
2439 Sglext_copytoint_exponentmantissa(tmpresp1
,signlessleft1
);
2440 Sglext_copytoint_exponentmantissa(rightp1
,signlessright1
);
2441 if (mpy_exponent
< add_exponent
|| mpy_exponent
== add_exponent
&&
2442 Sglext_ismagnitudeless(signlessleft1
,signlessright1
)) {
2444 * Set the left operand to the larger one by XOR swap.
2445 * First finish the first word "save".
2447 Sglext_xorfromintp1(save
,rightp1
,/*to*/rightp1
);
2448 Sglext_xorfromintp1(save
,tmpresp1
,/*to*/tmpresp1
);
2449 Sglext_swap_lower(tmpresp2
,rightp2
);
2450 /* also setup exponents used in rest of routine */
2451 diff_exponent
= add_exponent
- mpy_exponent
;
2452 result_exponent
= add_exponent
;
2454 /* also setup exponents used in rest of routine */
2455 diff_exponent
= mpy_exponent
- add_exponent
;
2456 result_exponent
= mpy_exponent
;
2458 /* Invariant: left is not smaller than right. */
2461 * Special case alignment of operands that would force alignment
2462 * beyond the extent of the extension. A further optimization
2463 * could special case this but only reduces the path length for
2464 * this infrequent case.
2466 if (diff_exponent
> SGLEXT_THRESHOLD
) {
2467 diff_exponent
= SGLEXT_THRESHOLD
;
2470 /* Align right operand by shifting it to the right */
2471 Sglext_clear_sign(rightp1
);
2472 Sglext_right_align(rightp1
,rightp2
,/*shifted by*/diff_exponent
);
2474 /* Treat sum and difference of the operands separately. */
2475 if ((int)save
< 0) {
2477 * Difference of the two operands. Overflow can occur if the
2478 * multiply overflowed. A borrow can occur out of the hidden
2479 * bit and force a post normalization phase.
2481 Sglext_subtract(tmpresp1
,tmpresp2
, rightp1
,rightp2
,
2483 sign_save
= Sgl_signextendedsign(resultp1
);
2484 if (Sgl_iszero_hidden(resultp1
)) {
2485 /* Handle normalization */
2486 /* A straightforward algorithm would now shift the
2487 * result and extension left until the hidden bit
2488 * becomes one. Not all of the extension bits need
2489 * participate in the shift. Only the two most
2490 * significant bits (round and guard) are needed.
2491 * If only a single shift is needed then the guard
2492 * bit becomes a significant low order bit and the
2493 * extension must participate in the rounding.
2494 * If more than a single shift is needed, then all
2495 * bits to the right of the guard bit are zeros,
2496 * and the guard bit may or may not be zero. */
2497 Sglext_leftshiftby1(resultp1
,resultp2
);
2499 /* Need to check for a zero result. The sign and
2500 * exponent fields have already been zeroed. The more
2501 * efficient test of the full object can be used.
2503 if (Sglext_iszero(resultp1
,resultp2
)) {
2504 /* Must have been "x-x" or "x+(-x)". */
2505 if (Is_rounding_mode(ROUNDMINUS
))
2506 Sgl_setone_sign(resultp1
);
2507 Sgl_copytoptr(resultp1
,dstptr
);
2508 return(NOEXCEPTION
);
2512 /* Look to see if normalization is finished. */
2513 if (Sgl_isone_hidden(resultp1
)) {
2514 /* No further normalization is needed */
2518 /* Discover first one bit to determine shift amount.
2519 * Use a modified binary search. We have already
2520 * shifted the result one position right and still
2521 * not found a one so the remainder of the extension
2522 * must be zero and simplifies rounding. */
2524 while (Sgl_iszero_hiddenhigh7mantissa(resultp1
)) {
2525 Sglext_leftshiftby8(resultp1
,resultp2
);
2526 result_exponent
-= 8;
2528 /* Now narrow it down to the nibble */
2529 if (Sgl_iszero_hiddenhigh3mantissa(resultp1
)) {
2530 /* The lower nibble contains the
2531 * normalizing one */
2532 Sglext_leftshiftby4(resultp1
,resultp2
);
2533 result_exponent
-= 4;
2535 /* Select case where first bit is set (already
2536 * normalized) otherwise select the proper shift. */
2537 jumpsize
= Sgl_hiddenhigh3mantissa(resultp1
);
2538 if (jumpsize
<= 7) switch(jumpsize
) {
2540 Sglext_leftshiftby3(resultp1
,resultp2
);
2541 result_exponent
-= 3;
2545 Sglext_leftshiftby2(resultp1
,resultp2
);
2546 result_exponent
-= 2;
2552 Sglext_leftshiftby1(resultp1
,resultp2
);
2553 result_exponent
-= 1;
2556 } /* end if (hidden...)... */
2557 /* Fall through and round */
2558 } /* end if (save < 0)... */
2560 /* Add magnitudes */
2561 Sglext_addition(tmpresp1
,tmpresp2
,
2562 rightp1
,rightp2
, /*to*/resultp1
,resultp2
);
2563 sign_save
= Sgl_signextendedsign(resultp1
);
2564 if (Sgl_isone_hiddenoverflow(resultp1
)) {
2565 /* Prenormalization required. */
2566 Sglext_arithrightshiftby1(resultp1
,resultp2
);
2568 } /* end if hiddenoverflow... */
2569 } /* end else ...add magnitudes... */
2571 /* Round the result. If the extension and lower two words are
2572 * all zeros, then the result is exact. Otherwise round in the
2573 * correct direction. Underflow is possible. If a postnormalization
2574 * is necessary, then the mantissa is all zeros so no shift is needed.
2577 if (result_exponent
<= 0 && !Is_underflowtrap_enabled()) {
2578 Sglext_denormalize(resultp1
,resultp2
,result_exponent
,is_tiny
);
2580 Sgl_set_sign(resultp1
,/*using*/sign_save
);
2581 if (Sglext_isnotzero_mantissap2(resultp2
)) {
2583 switch(Rounding_mode()) {
2584 case ROUNDNEAREST
: /* The default. */
2585 if (Sglext_isone_highp2(resultp2
)) {
2586 /* at least 1/2 ulp */
2587 if (Sglext_isnotzero_low31p2(resultp2
) ||
2588 Sglext_isone_lowp1(resultp1
)) {
2589 /* either exactly half way and odd or
2590 * more than 1/2ulp */
2591 Sgl_increment(resultp1
);
2597 if (Sgl_iszero_sign(resultp1
)) {
2598 /* Round up positive results */
2599 Sgl_increment(resultp1
);
2604 if (Sgl_isone_sign(resultp1
)) {
2605 /* Round down negative results */
2606 Sgl_increment(resultp1
);
2610 /* truncate is simple */
2611 } /* end switch... */
2612 if (Sgl_isone_hiddenoverflow(resultp1
)) result_exponent
++;
2614 if (result_exponent
>= SGL_INFINITY_EXPONENT
) {
2616 if (Is_overflowtrap_enabled()) {
2618 * Adjust bias of result
2620 Sgl_setwrapped_exponent(resultp1
,result_exponent
,ovfl
);
2621 Sgl_copytoptr(resultp1
,dstptr
);
2623 if (Is_inexacttrap_enabled())
2624 return (OPC_2E_OVERFLOWEXCEPTION
|
2625 OPC_2E_INEXACTEXCEPTION
);
2626 else Set_inexactflag();
2627 return (OPC_2E_OVERFLOWEXCEPTION
);
2631 Sgl_setoverflow(resultp1
);
2632 } else if (result_exponent
<= 0) { /* underflow case */
2633 if (Is_underflowtrap_enabled()) {
2635 * Adjust bias of result
2637 Sgl_setwrapped_exponent(resultp1
,result_exponent
,unfl
);
2638 Sgl_copytoptr(resultp1
,dstptr
);
2640 if (Is_inexacttrap_enabled())
2641 return (OPC_2E_UNDERFLOWEXCEPTION
|
2642 OPC_2E_INEXACTEXCEPTION
);
2643 else Set_inexactflag();
2644 return(OPC_2E_UNDERFLOWEXCEPTION
);
2646 else if (inexact
&& is_tiny
) Set_underflowflag();
2648 else Sgl_set_exponent(resultp1
,result_exponent
);
2649 Sgl_copytoptr(resultp1
,dstptr
);
2651 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION
);
2652 else Set_inexactflag();
2653 return(NOEXCEPTION
);