2 * QEMU float support macros
4 * Derived from SoftFloat.
7 /*============================================================================
9 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
10 Arithmetic Package, Release 2b.
12 Written by John R. Hauser. This work was made possible in part by the
13 International Computer Science Institute, located at Suite 600, 1947 Center
14 Street, Berkeley, California 94704. Funding was partially provided by the
15 National Science Foundation under grant MIP-9311980. The original version
16 of this code was written as part of a project to build a fixed-point vector
17 processor in collaboration with the University of California at Berkeley,
18 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20 arithmetic/SoftFloat.html'.
22 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
23 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
29 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
31 Derivative works are acceptable, even for commercial purposes, so long as
32 (1) the source code for the derivative work includes prominent notice that
33 the work is derivative, and (2) the source code includes prominent notice with
34 these four paragraphs for those parts of this code that are retained.
36 =============================================================================*/
38 /*----------------------------------------------------------------------------
39 | Shifts `a' right by the number of bits given in `count'. If any nonzero
40 | bits are shifted off, they are ``jammed'' into the least significant bit of
41 | the result by setting the least significant bit to 1. The value of `count'
42 | can be arbitrarily large; in particular, if `count' is greater than 32, the
43 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
44 | The result is stored in the location pointed to by `zPtr'.
45 *----------------------------------------------------------------------------*/
47 INLINE
void shift32RightJamming( uint32_t a
, int16 count
, uint32_t *zPtr
)
54 else if ( count
< 32 ) {
55 z
= ( a
>>count
) | ( ( a
<<( ( - count
) & 31 ) ) != 0 );
64 /*----------------------------------------------------------------------------
65 | Shifts `a' right by the number of bits given in `count'. If any nonzero
66 | bits are shifted off, they are ``jammed'' into the least significant bit of
67 | the result by setting the least significant bit to 1. The value of `count'
68 | can be arbitrarily large; in particular, if `count' is greater than 64, the
69 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
70 | The result is stored in the location pointed to by `zPtr'.
71 *----------------------------------------------------------------------------*/
73 INLINE
void shift64RightJamming( uint64_t a
, int16 count
, uint64_t *zPtr
)
80 else if ( count
< 64 ) {
81 z
= ( a
>>count
) | ( ( a
<<( ( - count
) & 63 ) ) != 0 );
90 /*----------------------------------------------------------------------------
91 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
92 | _plus_ the number of bits given in `count'. The shifted result is at most
93 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
94 | bits shifted off form a second 64-bit result as follows: The _last_ bit
95 | shifted off is the most-significant bit of the extra result, and the other
96 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
97 | bits shifted off were all zero. This extra result is stored in the location
98 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
99 | (This routine makes more sense if `a0' and `a1' are considered to form
100 | a fixed-point value with binary point between `a0' and `a1'. This fixed-
101 | point value is shifted right by the number of bits given in `count', and
102 | the integer part of the result is returned at the location pointed to by
103 | `z0Ptr'. The fractional part of the result may be slightly corrupted as
104 | described above, and is returned at the location pointed to by `z1Ptr'.)
105 *----------------------------------------------------------------------------*/
108 shift64ExtraRightJamming(
109 uint64_t a0
, uint64_t a1
, int16 count
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
112 int8 negCount
= ( - count
) & 63;
118 else if ( count
< 64 ) {
119 z1
= ( a0
<<negCount
) | ( a1
!= 0 );
124 z1
= a0
| ( a1
!= 0 );
127 z1
= ( ( a0
| a1
) != 0 );
136 /*----------------------------------------------------------------------------
137 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
138 | number of bits given in `count'. Any bits shifted off are lost. The value
139 | of `count' can be arbitrarily large; in particular, if `count' is greater
140 | than 128, the result will be 0. The result is broken into two 64-bit pieces
141 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
142 *----------------------------------------------------------------------------*/
146 uint64_t a0
, uint64_t a1
, int16 count
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
149 int8 negCount
= ( - count
) & 63;
155 else if ( count
< 64 ) {
156 z1
= ( a0
<<negCount
) | ( a1
>>count
);
160 z1
= ( count
< 64 ) ? ( a0
>>( count
& 63 ) ) : 0;
168 /*----------------------------------------------------------------------------
169 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
170 | number of bits given in `count'. If any nonzero bits are shifted off, they
171 | are ``jammed'' into the least significant bit of the result by setting the
172 | least significant bit to 1. The value of `count' can be arbitrarily large;
173 | in particular, if `count' is greater than 128, the result will be either
174 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
175 | nonzero. The result is broken into two 64-bit pieces which are stored at
176 | the locations pointed to by `z0Ptr' and `z1Ptr'.
177 *----------------------------------------------------------------------------*/
180 shift128RightJamming(
181 uint64_t a0
, uint64_t a1
, int16 count
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
184 int8 negCount
= ( - count
) & 63;
190 else if ( count
< 64 ) {
191 z1
= ( a0
<<negCount
) | ( a1
>>count
) | ( ( a1
<<negCount
) != 0 );
196 z1
= a0
| ( a1
!= 0 );
198 else if ( count
< 128 ) {
199 z1
= ( a0
>>( count
& 63 ) ) | ( ( ( a0
<<negCount
) | a1
) != 0 );
202 z1
= ( ( a0
| a1
) != 0 );
211 /*----------------------------------------------------------------------------
212 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
213 | by 64 _plus_ the number of bits given in `count'. The shifted result is
214 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
215 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
216 | off form a third 64-bit result as follows: The _last_ bit shifted off is
217 | the most-significant bit of the extra result, and the other 63 bits of the
218 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
219 | were all zero. This extra result is stored in the location pointed to by
220 | `z2Ptr'. The value of `count' can be arbitrarily large.
221 | (This routine makes more sense if `a0', `a1', and `a2' are considered
222 | to form a fixed-point value with binary point between `a1' and `a2'. This
223 | fixed-point value is shifted right by the number of bits given in `count',
224 | and the integer part of the result is returned at the locations pointed to
225 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
226 | corrupted as described above, and is returned at the location pointed to by
228 *----------------------------------------------------------------------------*/
231 shift128ExtraRightJamming(
242 int8 negCount
= ( - count
) & 63;
252 z1
= ( a0
<<negCount
) | ( a1
>>count
);
264 z1
= a0
>>( count
& 63 );
267 z2
= ( count
== 128 ) ? a0
: ( a0
!= 0 );
281 /*----------------------------------------------------------------------------
282 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
283 | number of bits given in `count'. Any bits shifted off are lost. The value
284 | of `count' must be less than 64. The result is broken into two 64-bit
285 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
286 *----------------------------------------------------------------------------*/
290 uint64_t a0
, uint64_t a1
, int16 count
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
295 ( count
== 0 ) ? a0
: ( a0
<<count
) | ( a1
>>( ( - count
) & 63 ) );
299 /*----------------------------------------------------------------------------
300 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
301 | by the number of bits given in `count'. Any bits shifted off are lost.
302 | The value of `count' must be less than 64. The result is broken into three
303 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
304 | `z1Ptr', and `z2Ptr'.
305 *----------------------------------------------------------------------------*/
325 negCount
= ( ( - count
) & 63 );
335 /*----------------------------------------------------------------------------
336 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
337 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
338 | any carry out is lost. The result is broken into two 64-bit pieces which
339 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
340 *----------------------------------------------------------------------------*/
344 uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
350 *z0Ptr
= a0
+ b0
+ ( z1
< a1
);
354 /*----------------------------------------------------------------------------
355 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
356 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
357 | modulo 2^192, so any carry out is lost. The result is broken into three
358 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
359 | `z1Ptr', and `z2Ptr'.
360 *----------------------------------------------------------------------------*/
379 carry1
= ( z2
< a2
);
381 carry0
= ( z1
< a1
);
384 z0
+= ( z1
< carry1
);
392 /*----------------------------------------------------------------------------
393 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
394 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
395 | 2^128, so any borrow out (carry out) is lost. The result is broken into two
396 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
398 *----------------------------------------------------------------------------*/
402 uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
406 *z0Ptr
= a0
- b0
- ( a1
< b1
);
410 /*----------------------------------------------------------------------------
411 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
412 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
413 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
414 | result is broken into three 64-bit pieces which are stored at the locations
415 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
416 *----------------------------------------------------------------------------*/
432 int8 borrow0
, borrow1
;
435 borrow1
= ( a2
< b2
);
437 borrow0
= ( a1
< b1
);
439 z0
-= ( z1
< borrow1
);
448 /*----------------------------------------------------------------------------
449 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
450 | into two 64-bit pieces which are stored at the locations pointed to by
451 | `z0Ptr' and `z1Ptr'.
452 *----------------------------------------------------------------------------*/
454 INLINE
void mul64To128( uint64_t a
, uint64_t b
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
456 uint32_t aHigh
, aLow
, bHigh
, bLow
;
457 uint64_t z0
, zMiddleA
, zMiddleB
, z1
;
463 z1
= ( (uint64_t) aLow
) * bLow
;
464 zMiddleA
= ( (uint64_t) aLow
) * bHigh
;
465 zMiddleB
= ( (uint64_t) aHigh
) * bLow
;
466 z0
= ( (uint64_t) aHigh
) * bHigh
;
467 zMiddleA
+= zMiddleB
;
468 z0
+= ( ( (uint64_t) ( zMiddleA
< zMiddleB
) )<<32 ) + ( zMiddleA
>>32 );
471 z0
+= ( z1
< zMiddleA
);
477 /*----------------------------------------------------------------------------
478 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
479 | `b' to obtain a 192-bit product. The product is broken into three 64-bit
480 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
482 *----------------------------------------------------------------------------*/
494 uint64_t z0
, z1
, z2
, more1
;
496 mul64To128( a1
, b
, &z1
, &z2
);
497 mul64To128( a0
, b
, &z0
, &more1
);
498 add128( z0
, more1
, 0, z1
, &z0
, &z1
);
505 /*----------------------------------------------------------------------------
506 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
507 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
508 | product. The product is broken into four 64-bit pieces which are stored at
509 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
510 *----------------------------------------------------------------------------*/
524 uint64_t z0
, z1
, z2
, z3
;
525 uint64_t more1
, more2
;
527 mul64To128( a1
, b1
, &z2
, &z3
);
528 mul64To128( a1
, b0
, &z1
, &more2
);
529 add128( z1
, more2
, 0, z2
, &z1
, &z2
);
530 mul64To128( a0
, b0
, &z0
, &more1
);
531 add128( z0
, more1
, 0, z1
, &z0
, &z1
);
532 mul64To128( a0
, b1
, &more1
, &more2
);
533 add128( more1
, more2
, 0, z2
, &more1
, &z2
);
534 add128( z0
, z1
, 0, more1
, &z0
, &z1
);
542 /*----------------------------------------------------------------------------
543 | Returns an approximation to the 64-bit integer quotient obtained by dividing
544 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
545 | divisor `b' must be at least 2^63. If q is the exact quotient truncated
546 | toward zero, the approximation returned lies between q and q + 2 inclusive.
547 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
548 | unsigned integer is returned.
549 *----------------------------------------------------------------------------*/
551 static uint64_t estimateDiv128To64( uint64_t a0
, uint64_t a1
, uint64_t b
)
554 uint64_t rem0
, rem1
, term0
, term1
;
557 if ( b
<= a0
) return LIT64( 0xFFFFFFFFFFFFFFFF );
559 z
= ( b0
<<32 <= a0
) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0
/ b0
)<<32;
560 mul64To128( b
, z
, &term0
, &term1
);
561 sub128( a0
, a1
, term0
, term1
, &rem0
, &rem1
);
562 while ( ( (int64_t) rem0
) < 0 ) {
563 z
-= LIT64( 0x100000000 );
565 add128( rem0
, rem1
, b0
, b1
, &rem0
, &rem1
);
567 rem0
= ( rem0
<<32 ) | ( rem1
>>32 );
568 z
|= ( b0
<<32 <= rem0
) ? 0xFFFFFFFF : rem0
/ b0
;
573 /*----------------------------------------------------------------------------
574 | Returns an approximation to the square root of the 32-bit significand given
575 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
576 | `aExp' (the least significant bit) is 1, the integer returned approximates
577 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
578 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
579 | case, the approximation returned lies strictly within +/-2 of the exact
581 *----------------------------------------------------------------------------*/
583 static uint32_t estimateSqrt32( int16 aExp
, uint32_t a
)
585 static const uint16_t sqrtOddAdjustments
[] = {
586 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
587 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
589 static const uint16_t sqrtEvenAdjustments
[] = {
590 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
591 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
596 index
= ( a
>>27 ) & 15;
598 z
= 0x4000 + ( a
>>17 ) - sqrtOddAdjustments
[ (int)index
];
599 z
= ( ( a
/ z
)<<14 ) + ( z
<<15 );
603 z
= 0x8000 + ( a
>>17 ) - sqrtEvenAdjustments
[ (int)index
];
605 z
= ( 0x20000 <= z
) ? 0xFFFF8000 : ( z
<<15 );
606 if ( z
<= a
) return (uint32_t) ( ( (int32_t) a
)>>1 );
608 return ( (uint32_t) ( ( ( (uint64_t) a
)<<31 ) / z
) ) + ( z
>>1 );
612 /*----------------------------------------------------------------------------
613 | Returns the number of leading 0 bits before the most-significant 1 bit of
614 | `a'. If `a' is zero, 32 is returned.
615 *----------------------------------------------------------------------------*/
617 static int8
countLeadingZeros32( uint32_t a
)
619 static const int8 countLeadingZerosHigh
[] = {
620 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
621 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
622 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
623 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
624 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
625 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
626 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
627 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
635 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
644 if ( a
< 0x1000000 ) {
648 shiftCount
+= countLeadingZerosHigh
[ a
>>24 ];
653 /*----------------------------------------------------------------------------
654 | Returns the number of leading 0 bits before the most-significant 1 bit of
655 | `a'. If `a' is zero, 64 is returned.
656 *----------------------------------------------------------------------------*/
658 static int8
countLeadingZeros64( uint64_t a
)
663 if ( a
< ( (uint64_t) 1 )<<32 ) {
669 shiftCount
+= countLeadingZeros32( a
);
674 /*----------------------------------------------------------------------------
675 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
676 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
677 | Otherwise, returns 0.
678 *----------------------------------------------------------------------------*/
680 INLINE flag
eq128( uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
)
683 return ( a0
== b0
) && ( a1
== b1
);
687 /*----------------------------------------------------------------------------
688 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
689 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
690 | Otherwise, returns 0.
691 *----------------------------------------------------------------------------*/
693 INLINE flag
le128( uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
)
696 return ( a0
< b0
) || ( ( a0
== b0
) && ( a1
<= b1
) );
700 /*----------------------------------------------------------------------------
701 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
702 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
704 *----------------------------------------------------------------------------*/
706 INLINE flag
lt128( uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
)
709 return ( a0
< b0
) || ( ( a0
== b0
) && ( a1
< b1
) );
713 /*----------------------------------------------------------------------------
714 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
715 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
716 | Otherwise, returns 0.
717 *----------------------------------------------------------------------------*/
719 INLINE flag
ne128( uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
)
722 return ( a0
!= b0
) || ( a1
!= b1
);