2 /*============================================================================
4 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
5 Arithmetic Package, Release 2b.
7 Written by John R. Hauser. This work was made possible in part by the
8 International Computer Science Institute, located at Suite 600, 1947 Center
9 Street, Berkeley, California 94704. Funding was partially provided by the
10 National Science Foundation under grant MIP-9311980. The original version
11 of this code was written as part of a project to build a fixed-point vector
12 processor in collaboration with the University of California at Berkeley,
13 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
14 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
15 arithmetic/SoftFloat.html'.
17 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
18 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
19 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
20 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
21 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
22 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
23 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
24 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
26 Derivative works are acceptable, even for commercial purposes, so long as
27 (1) the source code for the derivative work includes prominent notice that
28 the work is derivative, and (2) the source code includes prominent notice with
29 these four paragraphs for those parts of this code that are retained.
31 =============================================================================*/
33 /*----------------------------------------------------------------------------
34 | Shifts `a' right by the number of bits given in `count'. If any nonzero
35 | bits are shifted off, they are ``jammed'' into the least significant bit of
36 | the result by setting the least significant bit to 1. The value of `count'
37 | can be arbitrarily large; in particular, if `count' is greater than 32, the
38 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
39 | The result is stored in the location pointed to by `zPtr'.
40 *----------------------------------------------------------------------------*/
42 INLINE
void shift32RightJamming( bits32 a
, int16 count
, bits32
*zPtr
)
49 else if ( count
< 32 ) {
50 z
= ( a
>>count
) | ( ( a
<<( ( - count
) & 31 ) ) != 0 );
59 /*----------------------------------------------------------------------------
60 | Shifts `a' right by the number of bits given in `count'. If any nonzero
61 | bits are shifted off, they are ``jammed'' into the least significant bit of
62 | the result by setting the least significant bit to 1. The value of `count'
63 | can be arbitrarily large; in particular, if `count' is greater than 64, the
64 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
65 | The result is stored in the location pointed to by `zPtr'.
66 *----------------------------------------------------------------------------*/
68 INLINE
void shift64RightJamming( bits64 a
, int16 count
, bits64
*zPtr
)
75 else if ( count
< 64 ) {
76 z
= ( a
>>count
) | ( ( a
<<( ( - count
) & 63 ) ) != 0 );
85 /*----------------------------------------------------------------------------
86 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
87 | _plus_ the number of bits given in `count'. The shifted result is at most
88 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
89 | bits shifted off form a second 64-bit result as follows: The _last_ bit
90 | shifted off is the most-significant bit of the extra result, and the other
91 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
92 | bits shifted off were all zero. This extra result is stored in the location
93 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
94 | (This routine makes more sense if `a0' and `a1' are considered to form
95 | a fixed-point value with binary point between `a0' and `a1'. This fixed-
96 | point value is shifted right by the number of bits given in `count', and
97 | the integer part of the result is returned at the location pointed to by
98 | `z0Ptr'. The fractional part of the result may be slightly corrupted as
99 | described above, and is returned at the location pointed to by `z1Ptr'.)
100 *----------------------------------------------------------------------------*/
103 shift64ExtraRightJamming(
104 bits64 a0
, bits64 a1
, int16 count
, bits64
*z0Ptr
, bits64
*z1Ptr
)
107 int8 negCount
= ( - count
) & 63;
113 else if ( count
< 64 ) {
114 z1
= ( a0
<<negCount
) | ( a1
!= 0 );
119 z1
= a0
| ( a1
!= 0 );
122 z1
= ( ( a0
| a1
) != 0 );
131 /*----------------------------------------------------------------------------
132 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
133 | number of bits given in `count'. Any bits shifted off are lost. The value
134 | of `count' can be arbitrarily large; in particular, if `count' is greater
135 | than 128, the result will be 0. The result is broken into two 64-bit pieces
136 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
137 *----------------------------------------------------------------------------*/
141 bits64 a0
, bits64 a1
, int16 count
, bits64
*z0Ptr
, bits64
*z1Ptr
)
144 int8 negCount
= ( - count
) & 63;
150 else if ( count
< 64 ) {
151 z1
= ( a0
<<negCount
) | ( a1
>>count
);
155 z1
= ( count
< 64 ) ? ( a0
>>( count
& 63 ) ) : 0;
163 /*----------------------------------------------------------------------------
164 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
165 | number of bits given in `count'. If any nonzero bits are shifted off, they
166 | are ``jammed'' into the least significant bit of the result by setting the
167 | least significant bit to 1. The value of `count' can be arbitrarily large;
168 | in particular, if `count' is greater than 128, the result will be either
169 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
170 | nonzero. The result is broken into two 64-bit pieces which are stored at
171 | the locations pointed to by `z0Ptr' and `z1Ptr'.
172 *----------------------------------------------------------------------------*/
175 shift128RightJamming(
176 bits64 a0
, bits64 a1
, int16 count
, bits64
*z0Ptr
, bits64
*z1Ptr
)
179 int8 negCount
= ( - count
) & 63;
185 else if ( count
< 64 ) {
186 z1
= ( a0
<<negCount
) | ( a1
>>count
) | ( ( a1
<<negCount
) != 0 );
191 z1
= a0
| ( a1
!= 0 );
193 else if ( count
< 128 ) {
194 z1
= ( a0
>>( count
& 63 ) ) | ( ( ( a0
<<negCount
) | a1
) != 0 );
197 z1
= ( ( a0
| a1
) != 0 );
206 /*----------------------------------------------------------------------------
207 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
208 | by 64 _plus_ the number of bits given in `count'. The shifted result is
209 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
210 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
211 | off form a third 64-bit result as follows: The _last_ bit shifted off is
212 | the most-significant bit of the extra result, and the other 63 bits of the
213 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
214 | were all zero. This extra result is stored in the location pointed to by
215 | `z2Ptr'. The value of `count' can be arbitrarily large.
216 | (This routine makes more sense if `a0', `a1', and `a2' are considered
217 | to form a fixed-point value with binary point between `a1' and `a2'. This
218 | fixed-point value is shifted right by the number of bits given in `count',
219 | and the integer part of the result is returned at the locations pointed to
220 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
221 | corrupted as described above, and is returned at the location pointed to by
223 *----------------------------------------------------------------------------*/
226 shift128ExtraRightJamming(
237 int8 negCount
= ( - count
) & 63;
247 z1
= ( a0
<<negCount
) | ( a1
>>count
);
259 z1
= a0
>>( count
& 63 );
262 z2
= ( count
== 128 ) ? a0
: ( a0
!= 0 );
276 /*----------------------------------------------------------------------------
277 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
278 | number of bits given in `count'. Any bits shifted off are lost. The value
279 | of `count' must be less than 64. The result is broken into two 64-bit
280 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
281 *----------------------------------------------------------------------------*/
285 bits64 a0
, bits64 a1
, int16 count
, bits64
*z0Ptr
, bits64
*z1Ptr
)
290 ( count
== 0 ) ? a0
: ( a0
<<count
) | ( a1
>>( ( - count
) & 63 ) );
294 /*----------------------------------------------------------------------------
295 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
296 | by the number of bits given in `count'. Any bits shifted off are lost.
297 | The value of `count' must be less than 64. The result is broken into three
298 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
299 | `z1Ptr', and `z2Ptr'.
300 *----------------------------------------------------------------------------*/
320 negCount
= ( ( - count
) & 63 );
330 /*----------------------------------------------------------------------------
331 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
332 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
333 | any carry out is lost. The result is broken into two 64-bit pieces which
334 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
335 *----------------------------------------------------------------------------*/
339 bits64 a0
, bits64 a1
, bits64 b0
, bits64 b1
, bits64
*z0Ptr
, bits64
*z1Ptr
)
345 *z0Ptr
= a0
+ b0
+ ( z1
< a1
);
349 /*----------------------------------------------------------------------------
350 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
351 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
352 | modulo 2^192, so any carry out is lost. The result is broken into three
353 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
354 | `z1Ptr', and `z2Ptr'.
355 *----------------------------------------------------------------------------*/
374 carry1
= ( z2
< a2
);
376 carry0
= ( z1
< a1
);
379 z0
+= ( z1
< carry1
);
387 /*----------------------------------------------------------------------------
388 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
389 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
390 | 2^128, so any borrow out (carry out) is lost. The result is broken into two
391 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
393 *----------------------------------------------------------------------------*/
397 bits64 a0
, bits64 a1
, bits64 b0
, bits64 b1
, bits64
*z0Ptr
, bits64
*z1Ptr
)
401 *z0Ptr
= a0
- b0
- ( a1
< b1
);
405 /*----------------------------------------------------------------------------
406 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
407 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
408 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
409 | result is broken into three 64-bit pieces which are stored at the locations
410 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
411 *----------------------------------------------------------------------------*/
427 int8 borrow0
, borrow1
;
430 borrow1
= ( a2
< b2
);
432 borrow0
= ( a1
< b1
);
434 z0
-= ( z1
< borrow1
);
443 /*----------------------------------------------------------------------------
444 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
445 | into two 64-bit pieces which are stored at the locations pointed to by
446 | `z0Ptr' and `z1Ptr'.
447 *----------------------------------------------------------------------------*/
449 INLINE
void mul64To128( bits64 a
, bits64 b
, bits64
*z0Ptr
, bits64
*z1Ptr
)
451 bits32 aHigh
, aLow
, bHigh
, bLow
;
452 bits64 z0
, zMiddleA
, zMiddleB
, z1
;
458 z1
= ( (bits64
) aLow
) * bLow
;
459 zMiddleA
= ( (bits64
) aLow
) * bHigh
;
460 zMiddleB
= ( (bits64
) aHigh
) * bLow
;
461 z0
= ( (bits64
) aHigh
) * bHigh
;
462 zMiddleA
+= zMiddleB
;
463 z0
+= ( ( (bits64
) ( zMiddleA
< zMiddleB
) )<<32 ) + ( zMiddleA
>>32 );
466 z0
+= ( z1
< zMiddleA
);
472 /*----------------------------------------------------------------------------
473 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
474 | `b' to obtain a 192-bit product. The product is broken into three 64-bit
475 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
477 *----------------------------------------------------------------------------*/
489 bits64 z0
, z1
, z2
, more1
;
491 mul64To128( a1
, b
, &z1
, &z2
);
492 mul64To128( a0
, b
, &z0
, &more1
);
493 add128( z0
, more1
, 0, z1
, &z0
, &z1
);
500 /*----------------------------------------------------------------------------
501 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
502 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
503 | product. The product is broken into four 64-bit pieces which are stored at
504 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
505 *----------------------------------------------------------------------------*/
519 bits64 z0
, z1
, z2
, z3
;
522 mul64To128( a1
, b1
, &z2
, &z3
);
523 mul64To128( a1
, b0
, &z1
, &more2
);
524 add128( z1
, more2
, 0, z2
, &z1
, &z2
);
525 mul64To128( a0
, b0
, &z0
, &more1
);
526 add128( z0
, more1
, 0, z1
, &z0
, &z1
);
527 mul64To128( a0
, b1
, &more1
, &more2
);
528 add128( more1
, more2
, 0, z2
, &more1
, &z2
);
529 add128( z0
, z1
, 0, more1
, &z0
, &z1
);
537 /*----------------------------------------------------------------------------
538 | Returns an approximation to the 64-bit integer quotient obtained by dividing
539 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
540 | divisor `b' must be at least 2^63. If q is the exact quotient truncated
541 | toward zero, the approximation returned lies between q and q + 2 inclusive.
542 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
543 | unsigned integer is returned.
544 *----------------------------------------------------------------------------*/
546 static bits64
estimateDiv128To64( bits64 a0
, bits64 a1
, bits64 b
)
549 bits64 rem0
, rem1
, term0
, term1
;
552 if ( b
<= a0
) return LIT64( 0xFFFFFFFFFFFFFFFF );
554 z
= ( b0
<<32 <= a0
) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0
/ b0
)<<32;
555 mul64To128( b
, z
, &term0
, &term1
);
556 sub128( a0
, a1
, term0
, term1
, &rem0
, &rem1
);
557 while ( ( (sbits64
) rem0
) < 0 ) {
558 z
-= LIT64( 0x100000000 );
560 add128( rem0
, rem1
, b0
, b1
, &rem0
, &rem1
);
562 rem0
= ( rem0
<<32 ) | ( rem1
>>32 );
563 z
|= ( b0
<<32 <= rem0
) ? 0xFFFFFFFF : rem0
/ b0
;
568 /*----------------------------------------------------------------------------
569 | Returns an approximation to the square root of the 32-bit significand given
570 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
571 | `aExp' (the least significant bit) is 1, the integer returned approximates
572 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
573 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
574 | case, the approximation returned lies strictly within +/-2 of the exact
576 *----------------------------------------------------------------------------*/
578 static bits32
estimateSqrt32( int16 aExp
, bits32 a
)
580 static const bits16 sqrtOddAdjustments
[] = {
581 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
582 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
584 static const bits16 sqrtEvenAdjustments
[] = {
585 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
586 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
591 index
= ( a
>>27 ) & 15;
593 z
= 0x4000 + ( a
>>17 ) - sqrtOddAdjustments
[ (int)index
];
594 z
= ( ( a
/ z
)<<14 ) + ( z
<<15 );
598 z
= 0x8000 + ( a
>>17 ) - sqrtEvenAdjustments
[ (int)index
];
600 z
= ( 0x20000 <= z
) ? 0xFFFF8000 : ( z
<<15 );
601 if ( z
<= a
) return (bits32
) ( ( (sbits32
) a
)>>1 );
603 return ( (bits32
) ( ( ( (bits64
) a
)<<31 ) / z
) ) + ( z
>>1 );
607 /*----------------------------------------------------------------------------
608 | Returns the number of leading 0 bits before the most-significant 1 bit of
609 | `a'. If `a' is zero, 32 is returned.
610 *----------------------------------------------------------------------------*/
612 static int8
countLeadingZeros32( bits32 a
)
614 static const int8 countLeadingZerosHigh
[] = {
615 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
616 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
617 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
618 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
619 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
620 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
621 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
622 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
623 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
624 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
625 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
639 if ( a
< 0x1000000 ) {
643 shiftCount
+= countLeadingZerosHigh
[ a
>>24 ];
648 /*----------------------------------------------------------------------------
649 | Returns the number of leading 0 bits before the most-significant 1 bit of
650 | `a'. If `a' is zero, 64 is returned.
651 *----------------------------------------------------------------------------*/
653 static int8
countLeadingZeros64( bits64 a
)
658 if ( a
< ( (bits64
) 1 )<<32 ) {
664 shiftCount
+= countLeadingZeros32( a
);
669 /*----------------------------------------------------------------------------
670 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
671 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
672 | Otherwise, returns 0.
673 *----------------------------------------------------------------------------*/
675 INLINE flag
eq128( bits64 a0
, bits64 a1
, bits64 b0
, bits64 b1
)
678 return ( a0
== b0
) && ( a1
== b1
);
682 /*----------------------------------------------------------------------------
683 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
684 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
685 | Otherwise, returns 0.
686 *----------------------------------------------------------------------------*/
688 INLINE flag
le128( bits64 a0
, bits64 a1
, bits64 b0
, bits64 b1
)
691 return ( a0
< b0
) || ( ( a0
== b0
) && ( a1
<= b1
) );
695 /*----------------------------------------------------------------------------
696 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
697 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
699 *----------------------------------------------------------------------------*/
701 INLINE flag
lt128( bits64 a0
, bits64 a1
, bits64 b0
, bits64 b1
)
704 return ( a0
< b0
) || ( ( a0
== b0
) && ( a1
< b1
) );
708 /*----------------------------------------------------------------------------
709 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
710 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
711 | Otherwise, returns 0.
712 *----------------------------------------------------------------------------*/
714 INLINE flag
ne128( bits64 a0
, bits64 a1
, bits64 b0
, bits64 b1
)
717 return ( a0
!= b0
) || ( a1
!= b1
);