1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System
.Diagnostics
;
9 internal unsafe partial class Number
11 public readonly struct FloatingPointInfo
13 public static readonly FloatingPointInfo Double
= new FloatingPointInfo(
14 denormalMantissaBits: 52,
16 maxBinaryExponent: 1023,
18 infinityBits: 0x7FF00000_
00000000
21 public static readonly FloatingPointInfo Single
= new FloatingPointInfo(
22 denormalMantissaBits: 23,
24 maxBinaryExponent: 127,
26 infinityBits: 0x7F800000
29 public ulong ZeroBits { get; }
30 public ulong InfinityBits { get; }
32 public ulong NormalMantissaMask { get; }
33 public ulong DenormalMantissaMask { get; }
35 public int MinBinaryExponent { get; }
36 public int MaxBinaryExponent { get; }
38 public int ExponentBias { get; }
39 public int OverflowDecimalExponent { get; }
41 public ushort NormalMantissaBits { get; }
42 public ushort DenormalMantissaBits { get; }
44 public ushort ExponentBits { get; }
46 public FloatingPointInfo(ushort denormalMantissaBits
, ushort exponentBits
, int maxBinaryExponent
, int exponentBias
, ulong infinityBits
)
48 ExponentBits
= exponentBits
;
50 DenormalMantissaBits
= denormalMantissaBits
;
51 NormalMantissaBits
= (ushort)(denormalMantissaBits
+ 1); // we get an extra (hidden) bit for normal mantissas
53 OverflowDecimalExponent
= (maxBinaryExponent
+ 2 * NormalMantissaBits
) / 3;
54 ExponentBias
= exponentBias
;
56 MaxBinaryExponent
= maxBinaryExponent
;
57 MinBinaryExponent
= 1 - maxBinaryExponent
;
59 DenormalMantissaMask
= (1UL << denormalMantissaBits
) - 1;
60 NormalMantissaMask
= (1UL << NormalMantissaBits
) - 1;
62 InfinityBits
= infinityBits
;
67 private static readonly float[] s_Pow10SingleTable
= new float[]
82 private static readonly double[] s_Pow10DoubleTable
= new double[]
109 private static void AccumulateDecimalDigitsIntoBigInteger(ref NumberBuffer number
, uint firstIndex
, uint lastIndex
, out BigInteger result
)
111 result
= new BigInteger(0);
113 byte* src
= number
.GetDigitsPointer() + firstIndex
;
114 uint remaining
= lastIndex
- firstIndex
;
116 while (remaining
!= 0)
118 uint count
= Math
.Min(remaining
, 9);
119 uint value = DigitsToUInt32(src
, (int)(count
));
121 result
.MultiplyPow10(count
);
129 private static ulong AssembleFloatingPointBits(in FloatingPointInfo info
, ulong initialMantissa
, int initialExponent
, bool hasZeroTail
)
131 // number of bits by which we must adjust the mantissa to shift it into the
132 // correct position, and compute the resulting base two exponent for the
133 // normalized mantissa:
134 uint initialMantissaBits
= BigInteger
.CountSignificantBits(initialMantissa
);
135 int normalMantissaShift
= info
.NormalMantissaBits
- (int)(initialMantissaBits
);
136 int normalExponent
= initialExponent
- normalMantissaShift
;
138 ulong mantissa
= initialMantissa
;
139 int exponent
= normalExponent
;
141 if (normalExponent
> info
.MaxBinaryExponent
)
143 // The exponent is too large to be represented by the floating point
144 // type; report the overflow condition:
145 return info
.InfinityBits
;
147 else if (normalExponent
< info
.MinBinaryExponent
)
149 // The exponent is too small to be represented by the floating point
150 // type as a normal value, but it may be representable as a denormal
151 // value. Compute the number of bits by which we need to shift the
152 // mantissa in order to form a denormal number. (The subtraction of
153 // an extra 1 is to account for the hidden bit of the mantissa that
154 // is not available for use when representing a denormal.)
155 int denormalMantissaShift
= normalMantissaShift
+ normalExponent
+ info
.ExponentBias
- 1;
157 // Denormal values have an exponent of zero, so the debiased exponent is
158 // the negation of the exponent bias:
159 exponent
= -info
.ExponentBias
;
161 if (denormalMantissaShift
< 0)
163 // Use two steps for right shifts: for a shift of N bits, we first
164 // shift by N-1 bits, then shift the last bit and use its value to
165 // round the mantissa.
166 mantissa
= RightShiftWithRounding(mantissa
, -denormalMantissaShift
, hasZeroTail
);
168 // If the mantissa is now zero, we have underflowed:
171 return info
.ZeroBits
;
174 // When we round the mantissa, the result may be so large that the
175 // number becomes a normal value. For example, consider the single
176 // precision case where the mantissa is 0x01ffffff and a right shift
177 // of 2 is required to shift the value into position. We perform the
178 // shift in two steps: we shift by one bit, then we shift again and
179 // round using the dropped bit. The initial shift yields 0x00ffffff.
180 // The rounding shift then yields 0x007fffff and because the least
181 // significant bit was 1, we add 1 to this number to round it. The
182 // final result is 0x00800000.
184 // 0x00800000 is 24 bits, which is more than the 23 bits available
185 // in the mantissa. Thus, we have rounded our denormal number into
188 // We detect this case here and re-adjust the mantissa and exponent
189 // appropriately, to form a normal number:
190 if (mantissa
> info
.DenormalMantissaMask
)
192 // We add one to the denormal_mantissa_shift to account for the
193 // hidden mantissa bit (we subtracted one to account for this bit
194 // when we computed the denormal_mantissa_shift above).
195 exponent
= initialExponent
- (denormalMantissaShift
+ 1) - normalMantissaShift
;
200 mantissa
<<= denormalMantissaShift
;
205 if (normalMantissaShift
< 0)
207 // Use two steps for right shifts: for a shift of N bits, we first
208 // shift by N-1 bits, then shift the last bit and use its value to
209 // round the mantissa.
210 mantissa
= RightShiftWithRounding(mantissa
, -normalMantissaShift
, hasZeroTail
);
212 // When we round the mantissa, it may produce a result that is too
213 // large. In this case, we divide the mantissa by two and increment
214 // the exponent (this does not change the value).
215 if (mantissa
> info
.NormalMantissaMask
)
220 // The increment of the exponent may have generated a value too
221 // large to be represented. In this case, report the overflow:
222 if (exponent
> info
.MaxBinaryExponent
)
224 return info
.InfinityBits
;
228 else if (normalMantissaShift
> 0)
230 mantissa
<<= normalMantissaShift
;
234 // Unset the hidden bit in the mantissa and assemble the floating point value
235 // from the computed components:
236 mantissa
&= info
.DenormalMantissaMask
;
238 Debug
.Assert((info
.DenormalMantissaMask
& (1UL << info
.DenormalMantissaBits
)) == 0);
239 ulong shiftedExponent
= ((ulong)(exponent
+ info
.ExponentBias
)) << info
.DenormalMantissaBits
;
240 Debug
.Assert((shiftedExponent
& info
.DenormalMantissaMask
) == 0);
241 Debug
.Assert((mantissa
& ~info
.DenormalMantissaMask
) == 0);
242 Debug
.Assert((shiftedExponent
& ~
(((1UL << info
.ExponentBits
) - 1) << info
.DenormalMantissaBits
)) == 0); // exponent fits in its place
244 return shiftedExponent
| mantissa
;
247 private static ulong ConvertBigIntegerToFloatingPointBits(ref BigInteger
value, in FloatingPointInfo info
, uint integerBitsOfPrecision
, bool hasNonZeroFractionalPart
)
249 int baseExponent
= info
.DenormalMantissaBits
;
251 // When we have 64-bits or less of precision, we can just get the mantissa directly
252 if (integerBitsOfPrecision
<= 64)
254 return AssembleFloatingPointBits(in info
, value.ToUInt64(), baseExponent
, !hasNonZeroFractionalPart
);
257 uint topBlockIndex
= Math
.DivRem(integerBitsOfPrecision
, 32, out uint topBlockBits
);
258 uint middleBlockIndex
= topBlockIndex
- 1;
259 uint bottomBlockIndex
= middleBlockIndex
- 1;
262 int exponent
= baseExponent
+ ((int)(bottomBlockIndex
) * 32);
263 bool hasZeroTail
= !hasNonZeroFractionalPart
;
265 // When the top 64-bits perfectly span two blocks, we can get those blocks directly
266 if (topBlockBits
== 0)
268 mantissa
= ((ulong)(value.GetBlock(middleBlockIndex
)) << 32) + value.GetBlock(bottomBlockIndex
);
272 // Otherwise, we need to read three blocks and combine them into a 64-bit mantissa
274 int bottomBlockShift
= (int)(topBlockBits
);
275 int topBlockShift
= 64 - bottomBlockShift
;
276 int middleBlockShift
= topBlockShift
- 32;
278 exponent
+= (int)(topBlockBits
);
280 uint bottomBlock
= value.GetBlock(bottomBlockIndex
);
281 uint bottomBits
= bottomBlock
>> bottomBlockShift
;
283 ulong middleBits
= (ulong)(value.GetBlock(middleBlockIndex
)) << middleBlockShift
;
284 ulong topBits
= (ulong)(value.GetBlock(topBlockIndex
)) << topBlockShift
;
286 mantissa
= topBits
+ middleBits
+ bottomBits
;
288 uint unusedBottomBlockBitsMask
= (1u << (int)(topBlockBits
)) - 1;
289 hasZeroTail
&= (bottomBlock
& unusedBottomBlockBitsMask
) == 0;
292 for (uint i
= 0; i
!= bottomBlockIndex
; i
++)
294 hasZeroTail
&= (value.GetBlock(i
) == 0);
297 return AssembleFloatingPointBits(in info
, mantissa
, exponent
, hasZeroTail
);
300 // get 32-bit integer from at most 9 digits
301 private static uint DigitsToUInt32(byte* p
, int count
)
303 Debug
.Assert((1 <= count
) && (count
<= 9));
305 byte* end
= (p
+ count
);
306 uint res
= (uint)(p
[0] - '0');
308 for (p
++; p
< end
; p
++)
310 res
= (10 * res
) + p
[0] - '0';
316 // get 64-bit integer from at most 19 digits
317 private static ulong DigitsToUInt64(byte* p
, int count
)
319 Debug
.Assert((1 <= count
) && (count
<= 19));
321 byte* end
= (p
+ count
);
322 ulong res
= (ulong)(p
[0] - '0');
324 for (p
++; p
< end
; p
++)
326 res
= (10 * res
) + p
[0] - '0';
332 private static ulong NumberToFloatingPointBits(ref NumberBuffer number
, in FloatingPointInfo info
)
334 Debug
.Assert(number
.GetDigitsPointer()[0] != '0');
336 Debug
.Assert(number
.Scale
<= FloatingPointMaxExponent
);
337 Debug
.Assert(number
.Scale
>= FloatingPointMinExponent
);
339 Debug
.Assert(number
.DigitsCount
!= 0);
341 // The input is of the form 0.Mantissa x 10^Exponent, where 'Mantissa' are
342 // the decimal digits of the mantissa and 'Exponent' is the decimal exponent.
343 // We decompose the mantissa into two parts: an integer part and a fractional
344 // part. If the exponent is positive, then the integer part consists of the
345 // first 'exponent' digits, or all present digits if there are fewer digits.
346 // If the exponent is zero or negative, then the integer part is empty. In
347 // either case, the remaining digits form the fractional part of the mantissa.
349 uint totalDigits
= (uint)(number
.DigitsCount
);
350 uint positiveExponent
= (uint)(Math
.Max(0, number
.Scale
));
352 uint integerDigitsPresent
= Math
.Min(positiveExponent
, totalDigits
);
353 uint fractionalDigitsPresent
= totalDigits
- integerDigitsPresent
;
355 uint fastExponent
= (uint)(Math
.Abs(number
.Scale
- integerDigitsPresent
- fractionalDigitsPresent
));
357 // When the number of significant digits is less than or equal to 15 and the
358 // scale is less than or equal to 22, we can take some shortcuts and just rely
359 // on floating-point arithmetic to compute the correct result. This is
360 // because each floating-point precision values allows us to exactly represent
361 // different whole integers and certain powers of 10, depending on the underlying
362 // formats exact range. Additionally, IEEE operations dictate that the result is
363 // computed to the infinitely precise result and then rounded, which means that
364 // we can rely on it to produce the correct result when both inputs are exact.
366 byte* src
= number
.GetDigitsPointer();
368 if ((info
.DenormalMantissaBits
== 23) && (totalDigits
<= 7) && (fastExponent
<= 10))
370 // It is only valid to do this optimization for single-precision floating-point
371 // values since we can lose some of the mantissa bits and would return the
372 // wrong value when upcasting to double.
374 float result
= DigitsToUInt32(src
, (int)(totalDigits
));
375 float scale
= s_Pow10SingleTable
[fastExponent
];
377 if (fractionalDigitsPresent
!= 0)
386 return (uint)(BitConverter
.SingleToInt32Bits(result
));
389 if ((totalDigits
<= 15) && (fastExponent
<= 22))
391 double result
= DigitsToUInt64(src
, (int)(totalDigits
));
392 double scale
= s_Pow10DoubleTable
[fastExponent
];
394 if (fractionalDigitsPresent
!= 0)
403 if (info
.DenormalMantissaBits
== 52)
405 return (ulong)(BitConverter
.DoubleToInt64Bits(result
));
409 Debug
.Assert(info
.DenormalMantissaBits
== 23);
410 return (uint)(BitConverter
.SingleToInt32Bits((float)(result
)));
414 return NumberToFloatingPointBitsSlow(ref number
, in info
, positiveExponent
, integerDigitsPresent
, fractionalDigitsPresent
);
417 private static ulong NumberToFloatingPointBitsSlow(ref NumberBuffer number
, in FloatingPointInfo info
, uint positiveExponent
, uint integerDigitsPresent
, uint fractionalDigitsPresent
)
419 // To generate an N bit mantissa we require N + 1 bits of precision. The
420 // extra bit is used to correctly round the mantissa (if there are fewer bits
421 // than this available, then that's totally okay; in that case we use what we
422 // have and we don't need to round).
423 uint requiredBitsOfPrecision
= (uint)(info
.NormalMantissaBits
+ 1);
425 uint totalDigits
= (uint)(number
.DigitsCount
);
426 uint integerDigitsMissing
= positiveExponent
- integerDigitsPresent
;
428 const uint IntegerFirstIndex
= 0;
429 uint integerLastIndex
= integerDigitsPresent
;
431 uint fractionalFirstIndex
= integerLastIndex
;
432 uint fractionalLastIndex
= totalDigits
;
434 // First, we accumulate the integer part of the mantissa into a big_integer:
435 AccumulateDecimalDigitsIntoBigInteger(ref number
, IntegerFirstIndex
, integerLastIndex
, out BigInteger integerValue
);
437 if (integerDigitsMissing
> 0)
439 if (integerDigitsMissing
> info
.OverflowDecimalExponent
)
441 return info
.InfinityBits
;
444 integerValue
.MultiplyPow10(integerDigitsMissing
);
447 // At this point, the integer_value contains the value of the integer part
448 // of the mantissa. If either [1] this number has more than the required
449 // number of bits of precision or [2] the mantissa has no fractional part,
450 // then we can assemble the result immediately:
451 uint integerBitsOfPrecision
= BigInteger
.CountSignificantBits(ref integerValue
);
453 if ((integerBitsOfPrecision
>= requiredBitsOfPrecision
) || (fractionalDigitsPresent
== 0))
455 return ConvertBigIntegerToFloatingPointBits(
458 integerBitsOfPrecision
,
459 fractionalDigitsPresent
!= 0
463 // Otherwise, we did not get enough bits of precision from the integer part,
464 // and the mantissa has a fractional part. We parse the fractional part of
465 // the mantissa to obtain more bits of precision. To do this, we convert
466 // the fractional part into an actual fraction N/M, where the numerator N is
467 // computed from the digits of the fractional part, and the denominator M is
468 // computed as the power of 10 such that N/M is equal to the value of the
469 // fractional part of the mantissa.
471 uint fractionalDenominatorExponent
= fractionalDigitsPresent
;
473 if (number
.Scale
< 0)
475 fractionalDenominatorExponent
+= (uint)(-number
.Scale
);
478 if ((integerBitsOfPrecision
== 0) && (fractionalDenominatorExponent
- (int)(totalDigits
)) > info
.OverflowDecimalExponent
)
480 // If there were any digits in the integer part, it is impossible to
481 // underflow (because the exponent cannot possibly be small enough),
482 // so if we underflow here it is a true underflow and we return zero.
483 return info
.ZeroBits
;
486 AccumulateDecimalDigitsIntoBigInteger(ref number
, fractionalFirstIndex
, fractionalLastIndex
, out BigInteger fractionalNumerator
);
487 Debug
.Assert(!fractionalNumerator
.IsZero());
489 BigInteger
.Pow10(fractionalDenominatorExponent
, out BigInteger fractionalDenominator
);
491 // Because we are using only the fractional part of the mantissa here, the
492 // numerator is guaranteed to be smaller than the denominator. We normalize
493 // the fraction such that the most significant bit of the numerator is in
494 // the same position as the most significant bit in the denominator. This
495 // ensures that when we later shift the numerator N bits to the left, we
496 // will produce N bits of precision.
497 uint fractionalNumeratorBits
= BigInteger
.CountSignificantBits(ref fractionalNumerator
);
498 uint fractionalDenominatorBits
= BigInteger
.CountSignificantBits(ref fractionalDenominator
);
500 uint fractionalShift
= 0;
502 if (fractionalDenominatorBits
> fractionalNumeratorBits
)
504 fractionalShift
= fractionalDenominatorBits
- fractionalNumeratorBits
;
507 if (fractionalShift
> 0)
509 fractionalNumerator
.ShiftLeft(fractionalShift
);
512 uint requiredFractionalBitsOfPrecision
= requiredBitsOfPrecision
- integerBitsOfPrecision
;
513 uint remainingBitsOfPrecisionRequired
= requiredFractionalBitsOfPrecision
;
515 if (integerBitsOfPrecision
> 0)
517 // If the fractional part of the mantissa provides no bits of precision
518 // and cannot affect rounding, we can just take whatever bits we got from
519 // the integer part of the mantissa. This is the case for numbers like
520 // 5.0000000000000000000001, where the significant digits of the fractional
521 // part start so far to the right that they do not affect the floating
522 // point representation.
524 // If the fractional shift is exactly equal to the number of bits of
525 // precision that we require, then no fractional bits will be part of the
526 // result, but the result may affect rounding. This is e.g. the case for
527 // large, odd integers with a fractional part greater than or equal to .5.
528 // Thus, we need to do the division to correctly round the result.
529 if (fractionalShift
> remainingBitsOfPrecisionRequired
)
531 return ConvertBigIntegerToFloatingPointBits(
534 integerBitsOfPrecision
,
535 fractionalDigitsPresent
!= 0
539 remainingBitsOfPrecisionRequired
-= fractionalShift
;
542 // If there was no integer part of the mantissa, we will need to compute the
543 // exponent from the fractional part. The fractional exponent is the power
544 // of two by which we must multiply the fractional part to move it into the
545 // range [1.0, 2.0). This will either be the same as the shift we computed
546 // earlier, or one greater than that shift:
547 uint fractionalExponent
= fractionalShift
;
549 if (BigInteger
.Compare(ref fractionalNumerator
, ref fractionalDenominator
) < 0)
551 fractionalExponent
++;
554 fractionalNumerator
.ShiftLeft(remainingBitsOfPrecisionRequired
);
556 BigInteger
.DivRem(ref fractionalNumerator
, ref fractionalDenominator
, out BigInteger bigFractionalMantissa
, out BigInteger fractionalRemainder
);
557 ulong fractionalMantissa
= bigFractionalMantissa
.ToUInt64();
558 bool hasZeroTail
= !number
.HasNonZeroTail
&& fractionalRemainder
.IsZero();
560 // We may have produced more bits of precision than were required. Check,
561 // and remove any "extra" bits:
562 uint fractionalMantissaBits
= BigInteger
.CountSignificantBits(fractionalMantissa
);
564 if (fractionalMantissaBits
> requiredFractionalBitsOfPrecision
)
566 int shift
= (int)(fractionalMantissaBits
- requiredFractionalBitsOfPrecision
);
567 hasZeroTail
= hasZeroTail
&& (fractionalMantissa
& ((1UL << shift
) - 1)) == 0;
568 fractionalMantissa
>>= shift
;
571 // Compose the mantissa from the integer and fractional parts:
572 ulong integerMantissa
= integerValue
.ToUInt64();
573 ulong completeMantissa
= (integerMantissa
<< (int)(requiredFractionalBitsOfPrecision
)) + fractionalMantissa
;
575 // Compute the final exponent:
576 // * If the mantissa had an integer part, then the exponent is one less than
577 // the number of bits we obtained from the integer part. (It's one less
578 // because we are converting to the form 1.11111, with one 1 to the left
579 // of the decimal point.)
580 // * If the mantissa had no integer part, then the exponent is the fractional
581 // exponent that we computed.
582 // Then, in both cases, we subtract an additional one from the exponent, to
583 // account for the fact that we've generated an extra bit of precision, for
585 int finalExponent
= (integerBitsOfPrecision
> 0) ? (int)(integerBitsOfPrecision
) - 2 : -(int)(fractionalExponent
) - 1;
587 return AssembleFloatingPointBits(in info
, completeMantissa
, finalExponent
, hasZeroTail
);
590 private static ulong RightShiftWithRounding(ulong value, int shift
, bool hasZeroTail
)
592 // If we'd need to shift further than it is possible to shift, the answer
599 ulong extraBitsMask
= (1UL << (shift
- 1)) - 1;
600 ulong roundBitMask
= (1UL << (shift
- 1));
601 ulong lsbBitMask
= 1UL << shift
;
603 bool lsbBit
= (value & lsbBitMask
) != 0;
604 bool roundBit
= (value & roundBitMask
) != 0;
605 bool hasTailBits
= !hasZeroTail
|| (value & extraBitsMask
) != 0;
607 return (value >> shift
) + (ShouldRoundUp(lsbBit
, roundBit
, hasTailBits
) ? 1UL : 0);
610 private static bool ShouldRoundUp(bool lsbBit
, bool roundBit
, bool hasTailBits
)
612 // If there are insignificant set bits, we need to round to the
613 // nearest; there are two cases:
614 // we round up if either [1] the value is slightly greater than the midpoint
615 // between two exactly representable values or [2] the value is exactly the
616 // midpoint between two exactly representable values and the greater of the
617 // two is even (this is "round-to-even").
618 return roundBit
&& (hasTailBits
|| lsbBit
);