2 * QEMU float support macros
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 #ifndef FPU_SOFTFLOAT_MACROS_H
83 #define FPU_SOFTFLOAT_MACROS_H
85 #include "fpu/softfloat-types.h"
86 #include "qemu/host-utils.h"
89 * shl_double: double-word merging left shift
90 * @l: left or most-significant word
91 * @r: right or least-significant word
94 * Shift @l left by @c bits, shifting in bits from @r.
96 static inline uint64_t shl_double(uint64_t l
, uint64_t r
, int c
)
98 #if defined(__x86_64__)
99 asm("shld %b2, %1, %0" : "+r"(l
) : "r"(r
), "ci"(c
));
102 return c
? (l
<< c
) | (r
>> (64 - c
)) : l
;
107 * shr_double: double-word merging right shift
108 * @l: left or most-significant word
109 * @r: right or least-significant word
112 * Shift @r right by @c bits, shifting in bits from @l.
114 static inline uint64_t shr_double(uint64_t l
, uint64_t r
, int c
)
116 #if defined(__x86_64__)
117 asm("shrd %b2, %1, %0" : "+r"(r
) : "r"(l
), "ci"(c
));
120 return c
? (r
>> c
) | (l
<< (64 - c
)) : r
;
124 /*----------------------------------------------------------------------------
125 | Shifts `a' right by the number of bits given in `count'. If any nonzero
126 | bits are shifted off, they are ``jammed'' into the least significant bit of
127 | the result by setting the least significant bit to 1. The value of `count'
128 | can be arbitrarily large; in particular, if `count' is greater than 32, the
129 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
130 | The result is stored in the location pointed to by `zPtr'.
131 *----------------------------------------------------------------------------*/
133 static inline void shift32RightJamming(uint32_t a
, int count
, uint32_t *zPtr
)
140 else if ( count
< 32 ) {
141 z
= ( a
>>count
) | ( ( a
<<( ( - count
) & 31 ) ) != 0 );
150 /*----------------------------------------------------------------------------
151 | Shifts `a' right by the number of bits given in `count'. If any nonzero
152 | bits are shifted off, they are ``jammed'' into the least significant bit of
153 | the result by setting the least significant bit to 1. The value of `count'
154 | can be arbitrarily large; in particular, if `count' is greater than 64, the
155 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
156 | The result is stored in the location pointed to by `zPtr'.
157 *----------------------------------------------------------------------------*/
159 static inline void shift64RightJamming(uint64_t a
, int count
, uint64_t *zPtr
)
166 else if ( count
< 64 ) {
167 z
= ( a
>>count
) | ( ( a
<<( ( - count
) & 63 ) ) != 0 );
176 /*----------------------------------------------------------------------------
177 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
178 | _plus_ the number of bits given in `count'. The shifted result is at most
179 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
180 | bits shifted off form a second 64-bit result as follows: The _last_ bit
181 | shifted off is the most-significant bit of the extra result, and the other
182 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
183 | bits shifted off were all zero. This extra result is stored in the location
184 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
185 | (This routine makes more sense if `a0' and `a1' are considered to form a
186 | fixed-point value with binary point between `a0' and `a1'. This fixed-point
187 | value is shifted right by the number of bits given in `count', and the
188 | integer part of the result is returned at the location pointed to by
189 | `z0Ptr'. The fractional part of the result may be slightly corrupted as
190 | described above, and is returned at the location pointed to by `z1Ptr'.)
191 *----------------------------------------------------------------------------*/
194 shift64ExtraRightJamming(
195 uint64_t a0
, uint64_t a1
, int count
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
198 int8_t negCount
= ( - count
) & 63;
204 else if ( count
< 64 ) {
205 z1
= ( a0
<<negCount
) | ( a1
!= 0 );
210 z1
= a0
| ( a1
!= 0 );
213 z1
= ( ( a0
| a1
) != 0 );
222 /*----------------------------------------------------------------------------
223 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
224 | number of bits given in `count'. Any bits shifted off are lost. The value
225 | of `count' can be arbitrarily large; in particular, if `count' is greater
226 | than 128, the result will be 0. The result is broken into two 64-bit pieces
227 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
228 *----------------------------------------------------------------------------*/
232 uint64_t a0
, uint64_t a1
, int count
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
235 int8_t negCount
= ( - count
) & 63;
241 else if ( count
< 64 ) {
242 z1
= ( a0
<<negCount
) | ( a1
>>count
);
246 z1
= (count
< 128) ? (a0
>> (count
& 63)) : 0;
254 /*----------------------------------------------------------------------------
255 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
256 | number of bits given in `count'. If any nonzero bits are shifted off, they
257 | are ``jammed'' into the least significant bit of the result by setting the
258 | least significant bit to 1. The value of `count' can be arbitrarily large;
259 | in particular, if `count' is greater than 128, the result will be either
260 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
261 | nonzero. The result is broken into two 64-bit pieces which are stored at
262 | the locations pointed to by `z0Ptr' and `z1Ptr'.
263 *----------------------------------------------------------------------------*/
266 shift128RightJamming(
267 uint64_t a0
, uint64_t a1
, int count
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
270 int8_t negCount
= ( - count
) & 63;
276 else if ( count
< 64 ) {
277 z1
= ( a0
<<negCount
) | ( a1
>>count
) | ( ( a1
<<negCount
) != 0 );
282 z1
= a0
| ( a1
!= 0 );
284 else if ( count
< 128 ) {
285 z1
= ( a0
>>( count
& 63 ) ) | ( ( ( a0
<<negCount
) | a1
) != 0 );
288 z1
= ( ( a0
| a1
) != 0 );
297 /*----------------------------------------------------------------------------
298 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
299 | by 64 _plus_ the number of bits given in `count'. The shifted result is
300 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
301 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
302 | off form a third 64-bit result as follows: The _last_ bit shifted off is
303 | the most-significant bit of the extra result, and the other 63 bits of the
304 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
305 | were all zero. This extra result is stored in the location pointed to by
306 | `z2Ptr'. The value of `count' can be arbitrarily large.
307 | (This routine makes more sense if `a0', `a1', and `a2' are considered
308 | to form a fixed-point value with binary point between `a1' and `a2'. This
309 | fixed-point value is shifted right by the number of bits given in `count',
310 | and the integer part of the result is returned at the locations pointed to
311 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
312 | corrupted as described above, and is returned at the location pointed to by
314 *----------------------------------------------------------------------------*/
317 shift128ExtraRightJamming(
328 int8_t negCount
= ( - count
) & 63;
338 z1
= ( a0
<<negCount
) | ( a1
>>count
);
350 z1
= a0
>>( count
& 63 );
353 z2
= ( count
== 128 ) ? a0
: ( a0
!= 0 );
367 /*----------------------------------------------------------------------------
368 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
369 | number of bits given in `count'. Any bits shifted off are lost. The value
370 | of `count' must be less than 64. The result is broken into two 64-bit
371 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
372 *----------------------------------------------------------------------------*/
374 static inline void shortShift128Left(uint64_t a0
, uint64_t a1
, int count
,
375 uint64_t *z0Ptr
, uint64_t *z1Ptr
)
377 *z1Ptr
= a1
<< count
;
378 *z0Ptr
= count
== 0 ? a0
: (a0
<< count
) | (a1
>> (-count
& 63));
381 /*----------------------------------------------------------------------------
382 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
383 | number of bits given in `count'. Any bits shifted off are lost. The value
384 | of `count' may be greater than 64. The result is broken into two 64-bit
385 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
386 *----------------------------------------------------------------------------*/
388 static inline void shift128Left(uint64_t a0
, uint64_t a1
, int count
,
389 uint64_t *z0Ptr
, uint64_t *z1Ptr
)
392 *z1Ptr
= a1
<< count
;
393 *z0Ptr
= count
== 0 ? a0
: (a0
<< count
) | (a1
>> (-count
& 63));
396 *z0Ptr
= a1
<< (count
- 64);
400 /*----------------------------------------------------------------------------
401 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
402 | by the number of bits given in `count'. Any bits shifted off are lost.
403 | The value of `count' must be less than 64. The result is broken into three
404 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
405 | `z1Ptr', and `z2Ptr'.
406 *----------------------------------------------------------------------------*/
426 negCount
= ( ( - count
) & 63 );
436 /*----------------------------------------------------------------------------
437 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
438 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
439 | any carry out is lost. The result is broken into two 64-bit pieces which
440 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
441 *----------------------------------------------------------------------------*/
443 static inline void add128(uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
,
444 uint64_t *z0Ptr
, uint64_t *z1Ptr
)
447 *z1Ptr
= uadd64_carry(a1
, b1
, &c
);
448 *z0Ptr
= uadd64_carry(a0
, b0
, &c
);
451 /*----------------------------------------------------------------------------
452 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
453 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
454 | modulo 2^192, so any carry out is lost. The result is broken into three
455 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
456 | `z1Ptr', and `z2Ptr'.
457 *----------------------------------------------------------------------------*/
459 static inline void add192(uint64_t a0
, uint64_t a1
, uint64_t a2
,
460 uint64_t b0
, uint64_t b1
, uint64_t b2
,
461 uint64_t *z0Ptr
, uint64_t *z1Ptr
, uint64_t *z2Ptr
)
464 *z2Ptr
= uadd64_carry(a2
, b2
, &c
);
465 *z1Ptr
= uadd64_carry(a1
, b1
, &c
);
466 *z0Ptr
= uadd64_carry(a0
, b0
, &c
);
469 /*----------------------------------------------------------------------------
470 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
471 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
472 | 2^128, so any borrow out (carry out) is lost. The result is broken into two
473 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
475 *----------------------------------------------------------------------------*/
477 static inline void sub128(uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
,
478 uint64_t *z0Ptr
, uint64_t *z1Ptr
)
481 *z1Ptr
= usub64_borrow(a1
, b1
, &c
);
482 *z0Ptr
= usub64_borrow(a0
, b0
, &c
);
485 /*----------------------------------------------------------------------------
486 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
487 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
488 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
489 | result is broken into three 64-bit pieces which are stored at the locations
490 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
491 *----------------------------------------------------------------------------*/
493 static inline void sub192(uint64_t a0
, uint64_t a1
, uint64_t a2
,
494 uint64_t b0
, uint64_t b1
, uint64_t b2
,
495 uint64_t *z0Ptr
, uint64_t *z1Ptr
, uint64_t *z2Ptr
)
498 *z2Ptr
= usub64_borrow(a2
, b2
, &c
);
499 *z1Ptr
= usub64_borrow(a1
, b1
, &c
);
500 *z0Ptr
= usub64_borrow(a0
, b0
, &c
);
503 /*----------------------------------------------------------------------------
504 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
505 | into two 64-bit pieces which are stored at the locations pointed to by
506 | `z0Ptr' and `z1Ptr'.
507 *----------------------------------------------------------------------------*/
510 mul64To128(uint64_t a
, uint64_t b
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
512 mulu64(z1Ptr
, z0Ptr
, a
, b
);
515 /*----------------------------------------------------------------------------
516 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
517 | `b' to obtain a 192-bit product. The product is broken into three 64-bit
518 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
520 *----------------------------------------------------------------------------*/
523 mul128By64To192(uint64_t a0
, uint64_t a1
, uint64_t b
,
524 uint64_t *z0Ptr
, uint64_t *z1Ptr
, uint64_t *z2Ptr
)
528 mul64To128(a1
, b
, &m1
, z2Ptr
);
529 mul64To128(a0
, b
, &z0
, &z1
);
530 add128(z0
, z1
, 0, m1
, z0Ptr
, z1Ptr
);
533 /*----------------------------------------------------------------------------
534 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
535 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
536 | product. The product is broken into four 64-bit pieces which are stored at
537 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
538 *----------------------------------------------------------------------------*/
540 static inline void mul128To256(uint64_t a0
, uint64_t a1
,
541 uint64_t b0
, uint64_t b1
,
542 uint64_t *z0Ptr
, uint64_t *z1Ptr
,
543 uint64_t *z2Ptr
, uint64_t *z3Ptr
)
546 uint64_t m0
, m1
, m2
, n1
, n2
;
548 mul64To128(a1
, b0
, &m1
, &m2
);
549 mul64To128(a0
, b1
, &n1
, &n2
);
550 mul64To128(a1
, b1
, &z2
, z3Ptr
);
551 mul64To128(a0
, b0
, &z0
, &z1
);
553 add192( 0, m1
, m2
, 0, n1
, n2
, &m0
, &m1
, &m2
);
554 add192(m0
, m1
, m2
, z0
, z1
, z2
, z0Ptr
, z1Ptr
, z2Ptr
);
557 /*----------------------------------------------------------------------------
558 | Returns an approximation to the 64-bit integer quotient obtained by dividing
559 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
560 | divisor `b' must be at least 2^63. If q is the exact quotient truncated
561 | toward zero, the approximation returned lies between q and q + 2 inclusive.
562 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
563 | unsigned integer is returned.
564 *----------------------------------------------------------------------------*/
566 static inline uint64_t estimateDiv128To64(uint64_t a0
, uint64_t a1
, uint64_t b
)
569 uint64_t rem0
, rem1
, term0
, term1
;
572 if ( b
<= a0
) return UINT64_C(0xFFFFFFFFFFFFFFFF);
574 z
= ( b0
<<32 <= a0
) ? UINT64_C(0xFFFFFFFF00000000) : ( a0
/ b0
)<<32;
575 mul64To128( b
, z
, &term0
, &term1
);
576 sub128( a0
, a1
, term0
, term1
, &rem0
, &rem1
);
577 while ( ( (int64_t) rem0
) < 0 ) {
578 z
-= UINT64_C(0x100000000);
580 add128( rem0
, rem1
, b0
, b1
, &rem0
, &rem1
);
582 rem0
= ( rem0
<<32 ) | ( rem1
>>32 );
583 z
|= ( b0
<<32 <= rem0
) ? 0xFFFFFFFF : rem0
/ b0
;
588 /* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
589 * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
591 * Licensed under the GPLv2/LGPLv3
593 static inline uint64_t udiv_qrnnd(uint64_t *r
, uint64_t n1
,
594 uint64_t n0
, uint64_t d
)
596 #if defined(__x86_64__)
598 asm("divq %4" : "=a"(q
), "=d"(*r
) : "0"(n0
), "1"(n1
), "rm"(d
));
600 #elif defined(__s390x__) && !defined(__clang__)
601 /* Need to use a TImode type to get an even register pair for DLGR. */
602 unsigned __int128 n
= (unsigned __int128
)n1
<< 64 | n0
;
603 asm("dlgr %0, %1" : "+r"(n
) : "r"(d
));
606 #elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
607 /* From Power ISA 2.06, programming note for divdeu. */
608 uint64_t q1
, q2
, Q
, r1
, r2
, R
;
609 asm("divdeu %0,%2,%4; divdu %1,%3,%4"
610 : "=&r"(q1
), "=r"(q2
)
611 : "r"(n1
), "r"(n0
), "r"(d
));
612 r1
= -(q1
* d
); /* low part of (n1<<64) - (q1 * d) */
616 if (R
>= d
|| R
< r2
) { /* overflow implies R > d */
623 uint64_t d0
, d1
, q0
, q1
, r1
, r0
, m
;
631 r1
= (r1
<< 32) | (n0
>> 32);
647 r0
= (r0
<< 32) | (uint32_t)n0
;
661 return (q1
<< 32) | q0
;
665 /*----------------------------------------------------------------------------
666 | Returns an approximation to the square root of the 32-bit significand given
667 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
668 | `aExp' (the least significant bit) is 1, the integer returned approximates
669 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
670 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
671 | case, the approximation returned lies strictly within +/-2 of the exact
673 *----------------------------------------------------------------------------*/
675 static inline uint32_t estimateSqrt32(int aExp
, uint32_t a
)
677 static const uint16_t sqrtOddAdjustments
[] = {
678 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
679 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
681 static const uint16_t sqrtEvenAdjustments
[] = {
682 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
683 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
688 index
= ( a
>>27 ) & 15;
690 z
= 0x4000 + ( a
>>17 ) - sqrtOddAdjustments
[ (int)index
];
691 z
= ( ( a
/ z
)<<14 ) + ( z
<<15 );
695 z
= 0x8000 + ( a
>>17 ) - sqrtEvenAdjustments
[ (int)index
];
697 z
= ( 0x20000 <= z
) ? 0xFFFF8000 : ( z
<<15 );
698 if ( z
<= a
) return (uint32_t) ( ( (int32_t) a
)>>1 );
700 return ( (uint32_t) ( ( ( (uint64_t) a
)<<31 ) / z
) ) + ( z
>>1 );
704 /*----------------------------------------------------------------------------
705 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
706 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
707 | Otherwise, returns 0.
708 *----------------------------------------------------------------------------*/
710 static inline bool eq128(uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
)
712 return a0
== b0
&& a1
== b1
;
715 /*----------------------------------------------------------------------------
716 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
717 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
718 | Otherwise, returns 0.
719 *----------------------------------------------------------------------------*/
721 static inline bool le128(uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
)
723 return a0
< b0
|| (a0
== b0
&& a1
<= b1
);
726 /*----------------------------------------------------------------------------
727 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
728 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
730 *----------------------------------------------------------------------------*/
732 static inline bool lt128(uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
)
734 return a0
< b0
|| (a0
== b0
&& a1
< b1
);
737 /*----------------------------------------------------------------------------
738 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
739 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
740 | Otherwise, returns 0.
741 *----------------------------------------------------------------------------*/
743 static inline bool ne128(uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
)
745 return a0
!= b0
|| a1
!= b1
;