libcacard: initial commit
[qemu/ar7.git] / fpu / softfloat-macros.h
blob3128e60cbf5a5901470fcf38e34f9f97fd658709
1 /*
2 * QEMU float support macros
4 * Derived from SoftFloat.
5 */
7 /*============================================================================
9 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
10 Arithmetic Package, Release 2b.
12 Written by John R. Hauser. This work was made possible in part by the
13 International Computer Science Institute, located at Suite 600, 1947 Center
14 Street, Berkeley, California 94704. Funding was partially provided by the
15 National Science Foundation under grant MIP-9311980. The original version
16 of this code was written as part of a project to build a fixed-point vector
17 processor in collaboration with the University of California at Berkeley,
18 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20 arithmetic/SoftFloat.html'.
22 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
23 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
29 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
31 Derivative works are acceptable, even for commercial purposes, so long as
32 (1) the source code for the derivative work includes prominent notice that
33 the work is derivative, and (2) the source code includes prominent notice with
34 these four paragraphs for those parts of this code that are retained.
36 =============================================================================*/
38 /*----------------------------------------------------------------------------
39 | Shifts `a' right by the number of bits given in `count'. If any nonzero
40 | bits are shifted off, they are ``jammed'' into the least significant bit of
41 | the result by setting the least significant bit to 1. The value of `count'
42 | can be arbitrarily large; in particular, if `count' is greater than 32, the
43 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
44 | The result is stored in the location pointed to by `zPtr'.
45 *----------------------------------------------------------------------------*/
47 INLINE void shift32RightJamming( uint32_t a, int16 count, uint32_t *zPtr )
49 uint32_t z;
51 if ( count == 0 ) {
52 z = a;
54 else if ( count < 32 ) {
55 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
57 else {
58 z = ( a != 0 );
60 *zPtr = z;
64 /*----------------------------------------------------------------------------
65 | Shifts `a' right by the number of bits given in `count'. If any nonzero
66 | bits are shifted off, they are ``jammed'' into the least significant bit of
67 | the result by setting the least significant bit to 1. The value of `count'
68 | can be arbitrarily large; in particular, if `count' is greater than 64, the
69 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
70 | The result is stored in the location pointed to by `zPtr'.
71 *----------------------------------------------------------------------------*/
73 INLINE void shift64RightJamming( uint64_t a, int16 count, uint64_t *zPtr )
75 uint64_t z;
77 if ( count == 0 ) {
78 z = a;
80 else if ( count < 64 ) {
81 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
83 else {
84 z = ( a != 0 );
86 *zPtr = z;
90 /*----------------------------------------------------------------------------
91 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
92 | _plus_ the number of bits given in `count'. The shifted result is at most
93 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
94 | bits shifted off form a second 64-bit result as follows: The _last_ bit
95 | shifted off is the most-significant bit of the extra result, and the other
96 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
97 | bits shifted off were all zero. This extra result is stored in the location
98 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
99 | (This routine makes more sense if `a0' and `a1' are considered to form
100 | a fixed-point value with binary point between `a0' and `a1'. This fixed-
101 | point value is shifted right by the number of bits given in `count', and
102 | the integer part of the result is returned at the location pointed to by
103 | `z0Ptr'. The fractional part of the result may be slightly corrupted as
104 | described above, and is returned at the location pointed to by `z1Ptr'.)
105 *----------------------------------------------------------------------------*/
107 INLINE void
108 shift64ExtraRightJamming(
109 uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
111 uint64_t z0, z1;
112 int8 negCount = ( - count ) & 63;
114 if ( count == 0 ) {
115 z1 = a1;
116 z0 = a0;
118 else if ( count < 64 ) {
119 z1 = ( a0<<negCount ) | ( a1 != 0 );
120 z0 = a0>>count;
122 else {
123 if ( count == 64 ) {
124 z1 = a0 | ( a1 != 0 );
126 else {
127 z1 = ( ( a0 | a1 ) != 0 );
129 z0 = 0;
131 *z1Ptr = z1;
132 *z0Ptr = z0;
136 /*----------------------------------------------------------------------------
137 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
138 | number of bits given in `count'. Any bits shifted off are lost. The value
139 | of `count' can be arbitrarily large; in particular, if `count' is greater
140 | than 128, the result will be 0. The result is broken into two 64-bit pieces
141 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
142 *----------------------------------------------------------------------------*/
144 INLINE void
145 shift128Right(
146 uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
148 uint64_t z0, z1;
149 int8 negCount = ( - count ) & 63;
151 if ( count == 0 ) {
152 z1 = a1;
153 z0 = a0;
155 else if ( count < 64 ) {
156 z1 = ( a0<<negCount ) | ( a1>>count );
157 z0 = a0>>count;
159 else {
160 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
161 z0 = 0;
163 *z1Ptr = z1;
164 *z0Ptr = z0;
168 /*----------------------------------------------------------------------------
169 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
170 | number of bits given in `count'. If any nonzero bits are shifted off, they
171 | are ``jammed'' into the least significant bit of the result by setting the
172 | least significant bit to 1. The value of `count' can be arbitrarily large;
173 | in particular, if `count' is greater than 128, the result will be either
174 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
175 | nonzero. The result is broken into two 64-bit pieces which are stored at
176 | the locations pointed to by `z0Ptr' and `z1Ptr'.
177 *----------------------------------------------------------------------------*/
179 INLINE void
180 shift128RightJamming(
181 uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
183 uint64_t z0, z1;
184 int8 negCount = ( - count ) & 63;
186 if ( count == 0 ) {
187 z1 = a1;
188 z0 = a0;
190 else if ( count < 64 ) {
191 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
192 z0 = a0>>count;
194 else {
195 if ( count == 64 ) {
196 z1 = a0 | ( a1 != 0 );
198 else if ( count < 128 ) {
199 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
201 else {
202 z1 = ( ( a0 | a1 ) != 0 );
204 z0 = 0;
206 *z1Ptr = z1;
207 *z0Ptr = z0;
211 /*----------------------------------------------------------------------------
212 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
213 | by 64 _plus_ the number of bits given in `count'. The shifted result is
214 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
215 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
216 | off form a third 64-bit result as follows: The _last_ bit shifted off is
217 | the most-significant bit of the extra result, and the other 63 bits of the
218 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
219 | were all zero. This extra result is stored in the location pointed to by
220 | `z2Ptr'. The value of `count' can be arbitrarily large.
221 | (This routine makes more sense if `a0', `a1', and `a2' are considered
222 | to form a fixed-point value with binary point between `a1' and `a2'. This
223 | fixed-point value is shifted right by the number of bits given in `count',
224 | and the integer part of the result is returned at the locations pointed to
225 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
226 | corrupted as described above, and is returned at the location pointed to by
227 | `z2Ptr'.)
228 *----------------------------------------------------------------------------*/
230 INLINE void
231 shift128ExtraRightJamming(
232 uint64_t a0,
233 uint64_t a1,
234 uint64_t a2,
235 int16 count,
236 uint64_t *z0Ptr,
237 uint64_t *z1Ptr,
238 uint64_t *z2Ptr
241 uint64_t z0, z1, z2;
242 int8 negCount = ( - count ) & 63;
244 if ( count == 0 ) {
245 z2 = a2;
246 z1 = a1;
247 z0 = a0;
249 else {
250 if ( count < 64 ) {
251 z2 = a1<<negCount;
252 z1 = ( a0<<negCount ) | ( a1>>count );
253 z0 = a0>>count;
255 else {
256 if ( count == 64 ) {
257 z2 = a1;
258 z1 = a0;
260 else {
261 a2 |= a1;
262 if ( count < 128 ) {
263 z2 = a0<<negCount;
264 z1 = a0>>( count & 63 );
266 else {
267 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
268 z1 = 0;
271 z0 = 0;
273 z2 |= ( a2 != 0 );
275 *z2Ptr = z2;
276 *z1Ptr = z1;
277 *z0Ptr = z0;
281 /*----------------------------------------------------------------------------
282 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
283 | number of bits given in `count'. Any bits shifted off are lost. The value
284 | of `count' must be less than 64. The result is broken into two 64-bit
285 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
286 *----------------------------------------------------------------------------*/
288 INLINE void
289 shortShift128Left(
290 uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
293 *z1Ptr = a1<<count;
294 *z0Ptr =
295 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
299 /*----------------------------------------------------------------------------
300 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
301 | by the number of bits given in `count'. Any bits shifted off are lost.
302 | The value of `count' must be less than 64. The result is broken into three
303 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
304 | `z1Ptr', and `z2Ptr'.
305 *----------------------------------------------------------------------------*/
307 INLINE void
308 shortShift192Left(
309 uint64_t a0,
310 uint64_t a1,
311 uint64_t a2,
312 int16 count,
313 uint64_t *z0Ptr,
314 uint64_t *z1Ptr,
315 uint64_t *z2Ptr
318 uint64_t z0, z1, z2;
319 int8 negCount;
321 z2 = a2<<count;
322 z1 = a1<<count;
323 z0 = a0<<count;
324 if ( 0 < count ) {
325 negCount = ( ( - count ) & 63 );
326 z1 |= a2>>negCount;
327 z0 |= a1>>negCount;
329 *z2Ptr = z2;
330 *z1Ptr = z1;
331 *z0Ptr = z0;
335 /*----------------------------------------------------------------------------
336 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
337 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
338 | any carry out is lost. The result is broken into two 64-bit pieces which
339 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
340 *----------------------------------------------------------------------------*/
342 INLINE void
343 add128(
344 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
346 uint64_t z1;
348 z1 = a1 + b1;
349 *z1Ptr = z1;
350 *z0Ptr = a0 + b0 + ( z1 < a1 );
354 /*----------------------------------------------------------------------------
355 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
356 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
357 | modulo 2^192, so any carry out is lost. The result is broken into three
358 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
359 | `z1Ptr', and `z2Ptr'.
360 *----------------------------------------------------------------------------*/
362 INLINE void
363 add192(
364 uint64_t a0,
365 uint64_t a1,
366 uint64_t a2,
367 uint64_t b0,
368 uint64_t b1,
369 uint64_t b2,
370 uint64_t *z0Ptr,
371 uint64_t *z1Ptr,
372 uint64_t *z2Ptr
375 uint64_t z0, z1, z2;
376 int8 carry0, carry1;
378 z2 = a2 + b2;
379 carry1 = ( z2 < a2 );
380 z1 = a1 + b1;
381 carry0 = ( z1 < a1 );
382 z0 = a0 + b0;
383 z1 += carry1;
384 z0 += ( z1 < carry1 );
385 z0 += carry0;
386 *z2Ptr = z2;
387 *z1Ptr = z1;
388 *z0Ptr = z0;
392 /*----------------------------------------------------------------------------
393 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
394 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
395 | 2^128, so any borrow out (carry out) is lost. The result is broken into two
396 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
397 | `z1Ptr'.
398 *----------------------------------------------------------------------------*/
400 INLINE void
401 sub128(
402 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
405 *z1Ptr = a1 - b1;
406 *z0Ptr = a0 - b0 - ( a1 < b1 );
410 /*----------------------------------------------------------------------------
411 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
412 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
413 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
414 | result is broken into three 64-bit pieces which are stored at the locations
415 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
416 *----------------------------------------------------------------------------*/
418 INLINE void
419 sub192(
420 uint64_t a0,
421 uint64_t a1,
422 uint64_t a2,
423 uint64_t b0,
424 uint64_t b1,
425 uint64_t b2,
426 uint64_t *z0Ptr,
427 uint64_t *z1Ptr,
428 uint64_t *z2Ptr
431 uint64_t z0, z1, z2;
432 int8 borrow0, borrow1;
434 z2 = a2 - b2;
435 borrow1 = ( a2 < b2 );
436 z1 = a1 - b1;
437 borrow0 = ( a1 < b1 );
438 z0 = a0 - b0;
439 z0 -= ( z1 < borrow1 );
440 z1 -= borrow1;
441 z0 -= borrow0;
442 *z2Ptr = z2;
443 *z1Ptr = z1;
444 *z0Ptr = z0;
448 /*----------------------------------------------------------------------------
449 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
450 | into two 64-bit pieces which are stored at the locations pointed to by
451 | `z0Ptr' and `z1Ptr'.
452 *----------------------------------------------------------------------------*/
454 INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
456 uint32_t aHigh, aLow, bHigh, bLow;
457 uint64_t z0, zMiddleA, zMiddleB, z1;
459 aLow = a;
460 aHigh = a>>32;
461 bLow = b;
462 bHigh = b>>32;
463 z1 = ( (uint64_t) aLow ) * bLow;
464 zMiddleA = ( (uint64_t) aLow ) * bHigh;
465 zMiddleB = ( (uint64_t) aHigh ) * bLow;
466 z0 = ( (uint64_t) aHigh ) * bHigh;
467 zMiddleA += zMiddleB;
468 z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
469 zMiddleA <<= 32;
470 z1 += zMiddleA;
471 z0 += ( z1 < zMiddleA );
472 *z1Ptr = z1;
473 *z0Ptr = z0;
477 /*----------------------------------------------------------------------------
478 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
479 | `b' to obtain a 192-bit product. The product is broken into three 64-bit
480 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
481 | `z2Ptr'.
482 *----------------------------------------------------------------------------*/
484 INLINE void
485 mul128By64To192(
486 uint64_t a0,
487 uint64_t a1,
488 uint64_t b,
489 uint64_t *z0Ptr,
490 uint64_t *z1Ptr,
491 uint64_t *z2Ptr
494 uint64_t z0, z1, z2, more1;
496 mul64To128( a1, b, &z1, &z2 );
497 mul64To128( a0, b, &z0, &more1 );
498 add128( z0, more1, 0, z1, &z0, &z1 );
499 *z2Ptr = z2;
500 *z1Ptr = z1;
501 *z0Ptr = z0;
505 /*----------------------------------------------------------------------------
506 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
507 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
508 | product. The product is broken into four 64-bit pieces which are stored at
509 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
510 *----------------------------------------------------------------------------*/
512 INLINE void
513 mul128To256(
514 uint64_t a0,
515 uint64_t a1,
516 uint64_t b0,
517 uint64_t b1,
518 uint64_t *z0Ptr,
519 uint64_t *z1Ptr,
520 uint64_t *z2Ptr,
521 uint64_t *z3Ptr
524 uint64_t z0, z1, z2, z3;
525 uint64_t more1, more2;
527 mul64To128( a1, b1, &z2, &z3 );
528 mul64To128( a1, b0, &z1, &more2 );
529 add128( z1, more2, 0, z2, &z1, &z2 );
530 mul64To128( a0, b0, &z0, &more1 );
531 add128( z0, more1, 0, z1, &z0, &z1 );
532 mul64To128( a0, b1, &more1, &more2 );
533 add128( more1, more2, 0, z2, &more1, &z2 );
534 add128( z0, z1, 0, more1, &z0, &z1 );
535 *z3Ptr = z3;
536 *z2Ptr = z2;
537 *z1Ptr = z1;
538 *z0Ptr = z0;
542 /*----------------------------------------------------------------------------
543 | Returns an approximation to the 64-bit integer quotient obtained by dividing
544 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
545 | divisor `b' must be at least 2^63. If q is the exact quotient truncated
546 | toward zero, the approximation returned lies between q and q + 2 inclusive.
547 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
548 | unsigned integer is returned.
549 *----------------------------------------------------------------------------*/
551 static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
553 uint64_t b0, b1;
554 uint64_t rem0, rem1, term0, term1;
555 uint64_t z;
557 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
558 b0 = b>>32;
559 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
560 mul64To128( b, z, &term0, &term1 );
561 sub128( a0, a1, term0, term1, &rem0, &rem1 );
562 while ( ( (int64_t) rem0 ) < 0 ) {
563 z -= LIT64( 0x100000000 );
564 b1 = b<<32;
565 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
567 rem0 = ( rem0<<32 ) | ( rem1>>32 );
568 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
569 return z;
573 /*----------------------------------------------------------------------------
574 | Returns an approximation to the square root of the 32-bit significand given
575 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
576 | `aExp' (the least significant bit) is 1, the integer returned approximates
577 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
578 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
579 | case, the approximation returned lies strictly within +/-2 of the exact
580 | value.
581 *----------------------------------------------------------------------------*/
583 static uint32_t estimateSqrt32( int16 aExp, uint32_t a )
585 static const uint16_t sqrtOddAdjustments[] = {
586 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
587 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
589 static const uint16_t sqrtEvenAdjustments[] = {
590 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
591 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
593 int8 index;
594 uint32_t z;
596 index = ( a>>27 ) & 15;
597 if ( aExp & 1 ) {
598 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
599 z = ( ( a / z )<<14 ) + ( z<<15 );
600 a >>= 1;
602 else {
603 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
604 z = a / z + z;
605 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
606 if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
608 return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
612 /*----------------------------------------------------------------------------
613 | Returns the number of leading 0 bits before the most-significant 1 bit of
614 | `a'. If `a' is zero, 32 is returned.
615 *----------------------------------------------------------------------------*/
617 static int8 countLeadingZeros32( uint32_t a )
619 static const int8 countLeadingZerosHigh[] = {
620 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
621 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
622 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
623 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
624 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
625 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
626 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
627 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
635 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
637 int8 shiftCount;
639 shiftCount = 0;
640 if ( a < 0x10000 ) {
641 shiftCount += 16;
642 a <<= 16;
644 if ( a < 0x1000000 ) {
645 shiftCount += 8;
646 a <<= 8;
648 shiftCount += countLeadingZerosHigh[ a>>24 ];
649 return shiftCount;
653 /*----------------------------------------------------------------------------
654 | Returns the number of leading 0 bits before the most-significant 1 bit of
655 | `a'. If `a' is zero, 64 is returned.
656 *----------------------------------------------------------------------------*/
658 static int8 countLeadingZeros64( uint64_t a )
660 int8 shiftCount;
662 shiftCount = 0;
663 if ( a < ( (uint64_t) 1 )<<32 ) {
664 shiftCount += 32;
666 else {
667 a >>= 32;
669 shiftCount += countLeadingZeros32( a );
670 return shiftCount;
674 /*----------------------------------------------------------------------------
675 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
676 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
677 | Otherwise, returns 0.
678 *----------------------------------------------------------------------------*/
680 INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
683 return ( a0 == b0 ) && ( a1 == b1 );
687 /*----------------------------------------------------------------------------
688 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
689 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
690 | Otherwise, returns 0.
691 *----------------------------------------------------------------------------*/
693 INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
696 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
700 /*----------------------------------------------------------------------------
701 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
702 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
703 | returns 0.
704 *----------------------------------------------------------------------------*/
706 INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
709 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
713 /*----------------------------------------------------------------------------
714 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
715 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
716 | Otherwise, returns 0.
717 *----------------------------------------------------------------------------*/
719 INLINE flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
722 return ( a0 != b0 ) || ( a1 != b1 );