1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 // An extFloat represents an extended floating-point number, with more
8 // precision than a float64. It does not try to save bits: the
9 // number represented by the structure is mant*(2^exp), with a negative
10 // sign if neg is true.
11 type extFloat
struct {
17 // Powers of ten taken from double-conversion library.
18 // http://code.google.com/p/double-conversion/
20 firstPowerOfTen
= -348
24 var smallPowersOfTen
= [...]extFloat
{
25 {1 << 63, -63, false}, // 1
26 {0xa << 60, -60, false}, // 1e1
27 {0x64 << 57, -57, false}, // 1e2
28 {0x3e8 << 54, -54, false}, // 1e3
29 {0x2710 << 50, -50, false}, // 1e4
30 {0x186a0 << 47, -47, false}, // 1e5
31 {0xf4240 << 44, -44, false}, // 1e6
32 {0x989680 << 40, -40, false}, // 1e7
35 var powersOfTen
= [...]extFloat
{
36 {0xfa8fd5a0081c0288, -1220, false}, // 10^-348
37 {0xbaaee17fa23ebf76, -1193, false}, // 10^-340
38 {0x8b16fb203055ac76, -1166, false}, // 10^-332
39 {0xcf42894a5dce35ea, -1140, false}, // 10^-324
40 {0x9a6bb0aa55653b2d, -1113, false}, // 10^-316
41 {0xe61acf033d1a45df, -1087, false}, // 10^-308
42 {0xab70fe17c79ac6ca, -1060, false}, // 10^-300
43 {0xff77b1fcbebcdc4f, -1034, false}, // 10^-292
44 {0xbe5691ef416bd60c, -1007, false}, // 10^-284
45 {0x8dd01fad907ffc3c, -980, false}, // 10^-276
46 {0xd3515c2831559a83, -954, false}, // 10^-268
47 {0x9d71ac8fada6c9b5, -927, false}, // 10^-260
48 {0xea9c227723ee8bcb, -901, false}, // 10^-252
49 {0xaecc49914078536d, -874, false}, // 10^-244
50 {0x823c12795db6ce57, -847, false}, // 10^-236
51 {0xc21094364dfb5637, -821, false}, // 10^-228
52 {0x9096ea6f3848984f, -794, false}, // 10^-220
53 {0xd77485cb25823ac7, -768, false}, // 10^-212
54 {0xa086cfcd97bf97f4, -741, false}, // 10^-204
55 {0xef340a98172aace5, -715, false}, // 10^-196
56 {0xb23867fb2a35b28e, -688, false}, // 10^-188
57 {0x84c8d4dfd2c63f3b, -661, false}, // 10^-180
58 {0xc5dd44271ad3cdba, -635, false}, // 10^-172
59 {0x936b9fcebb25c996, -608, false}, // 10^-164
60 {0xdbac6c247d62a584, -582, false}, // 10^-156
61 {0xa3ab66580d5fdaf6, -555, false}, // 10^-148
62 {0xf3e2f893dec3f126, -529, false}, // 10^-140
63 {0xb5b5ada8aaff80b8, -502, false}, // 10^-132
64 {0x87625f056c7c4a8b, -475, false}, // 10^-124
65 {0xc9bcff6034c13053, -449, false}, // 10^-116
66 {0x964e858c91ba2655, -422, false}, // 10^-108
67 {0xdff9772470297ebd, -396, false}, // 10^-100
68 {0xa6dfbd9fb8e5b88f, -369, false}, // 10^-92
69 {0xf8a95fcf88747d94, -343, false}, // 10^-84
70 {0xb94470938fa89bcf, -316, false}, // 10^-76
71 {0x8a08f0f8bf0f156b, -289, false}, // 10^-68
72 {0xcdb02555653131b6, -263, false}, // 10^-60
73 {0x993fe2c6d07b7fac, -236, false}, // 10^-52
74 {0xe45c10c42a2b3b06, -210, false}, // 10^-44
75 {0xaa242499697392d3, -183, false}, // 10^-36
76 {0xfd87b5f28300ca0e, -157, false}, // 10^-28
77 {0xbce5086492111aeb, -130, false}, // 10^-20
78 {0x8cbccc096f5088cc, -103, false}, // 10^-12
79 {0xd1b71758e219652c, -77, false}, // 10^-4
80 {0x9c40000000000000, -50, false}, // 10^4
81 {0xe8d4a51000000000, -24, false}, // 10^12
82 {0xad78ebc5ac620000, 3, false}, // 10^20
83 {0x813f3978f8940984, 30, false}, // 10^28
84 {0xc097ce7bc90715b3, 56, false}, // 10^36
85 {0x8f7e32ce7bea5c70, 83, false}, // 10^44
86 {0xd5d238a4abe98068, 109, false}, // 10^52
87 {0x9f4f2726179a2245, 136, false}, // 10^60
88 {0xed63a231d4c4fb27, 162, false}, // 10^68
89 {0xb0de65388cc8ada8, 189, false}, // 10^76
90 {0x83c7088e1aab65db, 216, false}, // 10^84
91 {0xc45d1df942711d9a, 242, false}, // 10^92
92 {0x924d692ca61be758, 269, false}, // 10^100
93 {0xda01ee641a708dea, 295, false}, // 10^108
94 {0xa26da3999aef774a, 322, false}, // 10^116
95 {0xf209787bb47d6b85, 348, false}, // 10^124
96 {0xb454e4a179dd1877, 375, false}, // 10^132
97 {0x865b86925b9bc5c2, 402, false}, // 10^140
98 {0xc83553c5c8965d3d, 428, false}, // 10^148
99 {0x952ab45cfa97a0b3, 455, false}, // 10^156
100 {0xde469fbd99a05fe3, 481, false}, // 10^164
101 {0xa59bc234db398c25, 508, false}, // 10^172
102 {0xf6c69a72a3989f5c, 534, false}, // 10^180
103 {0xb7dcbf5354e9bece, 561, false}, // 10^188
104 {0x88fcf317f22241e2, 588, false}, // 10^196
105 {0xcc20ce9bd35c78a5, 614, false}, // 10^204
106 {0x98165af37b2153df, 641, false}, // 10^212
107 {0xe2a0b5dc971f303a, 667, false}, // 10^220
108 {0xa8d9d1535ce3b396, 694, false}, // 10^228
109 {0xfb9b7cd9a4a7443c, 720, false}, // 10^236
110 {0xbb764c4ca7a44410, 747, false}, // 10^244
111 {0x8bab8eefb6409c1a, 774, false}, // 10^252
112 {0xd01fef10a657842c, 800, false}, // 10^260
113 {0x9b10a4e5e9913129, 827, false}, // 10^268
114 {0xe7109bfba19c0c9d, 853, false}, // 10^276
115 {0xac2820d9623bf429, 880, false}, // 10^284
116 {0x80444b5e7aa7cf85, 907, false}, // 10^292
117 {0xbf21e44003acdd2d, 933, false}, // 10^300
118 {0x8e679c2f5e44ff8f, 960, false}, // 10^308
119 {0xd433179d9c8cb841, 986, false}, // 10^316
120 {0x9e19db92b4e31ba9, 1013, false}, // 10^324
121 {0xeb96bf6ebadf77d9, 1039, false}, // 10^332
122 {0xaf87023b9bf0ee6b, 1066, false}, // 10^340
125 // floatBits returns the bits of the float64 that best approximates
126 // the extFloat passed as receiver. Overflow is set to true if
127 // the resulting float64 is ±Inf.
128 func (f
*extFloat
) floatBits(flt
*floatInfo
) (bits
uint64, overflow
bool) {
133 // Exponent too small.
134 if exp
< flt
.bias
+1 {
135 n
:= flt
.bias
+ 1 - exp
140 // Extract 1+flt.mantbits bits from the 64-bit mantissa.
141 mant
:= f
.mant
>> (63 - flt
.mantbits
)
142 if f
.mant
&(1<<(62-flt
.mantbits
)) != 0 {
147 // Rounding might have added a bit; shift down.
148 if mant
== 2<<flt
.mantbits
{
154 if exp
-flt
.bias
>= 1<<flt
.expbits
-1 {
157 exp
= 1<<flt
.expbits
- 1 + flt
.bias
159 } else if mant
&(1<<flt
.mantbits
) == 0 {
164 bits
= mant
& (uint64(1)<<flt
.mantbits
- 1)
165 bits |
= uint64((exp
-flt
.bias
)&(1<<flt
.expbits
-1)) << flt
.mantbits
167 bits |
= 1 << (flt
.mantbits
+ flt
.expbits
)
172 // AssignComputeBounds sets f to the floating point value
173 // defined by mant, exp and precision given by flt. It returns
174 // lower, upper such that any number in the closed interval
175 // [lower, upper] is converted back to the same floating point number.
176 func (f
*extFloat
) AssignComputeBounds(mant
uint64, exp
int, neg
bool, flt
*floatInfo
) (lower
, upper extFloat
) {
178 f
.exp
= exp
- int(flt
.mantbits
)
180 if f
.exp
<= 0 && mant
== (mant
>>uint(-f
.exp
))<<uint(-f
.exp
) {
182 f
.mant
>>= uint(-f
.exp
)
186 expBiased
:= exp
- flt
.bias
188 upper
= extFloat
{mant
: 2*f
.mant
+ 1, exp
: f
.exp
- 1, neg
: f
.neg
}
189 if mant
!= 1<<flt
.mantbits || expBiased
== 1 {
190 lower
= extFloat
{mant
: 2*f
.mant
- 1, exp
: f
.exp
- 1, neg
: f
.neg
}
192 lower
= extFloat
{mant
: 4*f
.mant
- 1, exp
: f
.exp
- 2, neg
: f
.neg
}
197 // Normalize normalizes f so that the highest bit of the mantissa is
198 // set, and returns the number by which the mantissa was left-shifted.
199 func (f
*extFloat
) Normalize() (shift
uint) {
200 mant
, exp
:= f
.mant
, f
.exp
204 if mant
>>(64-32) == 0 {
208 if mant
>>(64-16) == 0 {
212 if mant
>>(64-8) == 0 {
216 if mant
>>(64-4) == 0 {
220 if mant
>>(64-2) == 0 {
224 if mant
>>(64-1) == 0 {
228 shift
= uint(f
.exp
- exp
)
229 f
.mant
, f
.exp
= mant
, exp
233 // Multiply sets f to the product f*g: the result is correctly rounded,
234 // but not normalized.
235 func (f
*extFloat
) Multiply(g extFloat
) {
236 fhi
, flo
:= f
.mant
>>32, uint64(uint32(f
.mant
))
237 ghi
, glo
:= g
.mant
>>32, uint64(uint32(g
.mant
))
243 // f.mant*g.mant is fhi*ghi << 64 + (cross1+cross2) << 32 + flo*glo
244 f
.mant
= fhi
*ghi
+ (cross1
>> 32) + (cross2
>> 32)
245 rem
:= uint64(uint32(cross1
)) + uint64(uint32(cross2
)) + ((flo
* glo
) >> 32)
249 f
.mant
+= (rem
>> 32)
250 f
.exp
= f
.exp
+ g
.exp
+ 64
253 var uint64pow10
= [...]uint64{
254 1, 1e1
, 1e2
, 1e3
, 1e4
, 1e5
, 1e6
, 1e7
, 1e8
, 1e9
,
255 1e10
, 1e11
, 1e12
, 1e13
, 1e14
, 1e15
, 1e16
, 1e17
, 1e18
, 1e19
,
258 // AssignDecimal sets f to an approximate value mantissa*10^exp. It
259 // returns true if the value represented by f is guaranteed to be the
260 // best approximation of d after being rounded to a float64 or
261 // float32 depending on flt.
262 func (f
*extFloat
) AssignDecimal(mantissa
uint64, exp10
int, neg
bool, trunc
bool, flt
*floatInfo
) (ok
bool) {
263 const uint64digits
= 19
265 errors
:= 0 // An upper bound for error, computed in errorscale*ulp.
267 // the decimal number was truncated.
268 errors
+= errorscale
/ 2
275 // Multiply by powers of ten.
276 i
:= (exp10
- firstPowerOfTen
) / stepPowerOfTen
277 if exp10
< firstPowerOfTen || i
>= len(powersOfTen
) {
280 adjExp
:= (exp10
- firstPowerOfTen
) % stepPowerOfTen
282 // We multiply by exp%step
283 if adjExp
< uint64digits
&& mantissa
< uint64pow10
[uint64digits
-adjExp
] {
284 // We can multiply the mantissa exactly.
285 f
.mant
*= uint64pow10
[adjExp
]
289 f
.Multiply(smallPowersOfTen
[adjExp
])
290 errors
+= errorscale
/ 2
293 // We multiply by 10 to the exp - exp%step.
294 f
.Multiply(powersOfTen
[i
])
298 errors
+= errorscale
/ 2
301 shift
:= f
.Normalize()
304 // Now f is a good approximation of the decimal.
305 // Check whether the error is too large: that is, if the mantissa
306 // is perturbated by the error, the resulting float64 will change.
307 // The 64 bits mantissa is 1 + 52 bits for float64 + 11 extra bits.
309 // In many cases the approximation will be good enough.
310 denormalExp
:= flt
.bias
- 63
312 if f
.exp
<= denormalExp
{
313 // f.mant * 2^f.exp is smaller than 2^(flt.bias+1).
314 extrabits
= uint(63 - flt
.mantbits
+ 1 + uint(denormalExp
-f
.exp
))
316 extrabits
= uint(63 - flt
.mantbits
)
319 halfway
:= uint64(1) << (extrabits
- 1)
320 mant_extra
:= f
.mant
& (1<<extrabits
- 1)
322 // Do a signed comparison here! If the error estimate could make
323 // the mantissa round differently for the conversion to double,
324 // then we can't give a definite answer.
325 if int64(halfway
)-int64(errors
) < int64(mant_extra
) &&
326 int64(mant_extra
) < int64(halfway
)+int64(errors
) {
332 // Frexp10 is an analogue of math.Frexp for decimal powers. It scales
333 // f by an approximate power of ten 10^-exp, and returns exp10, so
334 // that f*10^exp10 has the same value as the old f, up to an ulp,
335 // as well as the index of 10^-exp in the powersOfTen table.
336 func (f
*extFloat
) frexp10() (exp10
, index
int) {
337 // The constants expMin and expMax constrain the final value of the
338 // binary exponent of f. We want a small integral part in the result
339 // because finding digits of an integer requires divisions, whereas
340 // digits of the fractional part can be found by repeatedly multiplying
344 // Find power of ten such that x * 10^n has a binary exponent
345 // between expMin and expMax.
346 approxExp10
:= ((expMin
+expMax
)/2 - f
.exp
) * 28 / 93 // log(10)/log(2) is close to 93/28.
347 i
:= (approxExp10
- firstPowerOfTen
) / stepPowerOfTen
350 exp
:= f
.exp
+ powersOfTen
[i
].exp
+ 64
360 // Apply the desired decimal shift on f. It will have exponent
361 // in the desired range. This is multiplication by 10^-exp10.
362 f
.Multiply(powersOfTen
[i
])
364 return -(firstPowerOfTen
+ i
*stepPowerOfTen
), i
367 // frexp10Many applies a common shift by a power of ten to a, b, c.
368 func frexp10Many(a
, b
, c
*extFloat
) (exp10
int) {
369 exp10
, i
:= c
.frexp10()
370 a
.Multiply(powersOfTen
[i
])
371 b
.Multiply(powersOfTen
[i
])
375 // FixedDecimal stores in d the first n significant digits
376 // of the decimal representation of f. It returns false
377 // if it cannot be sure of the answer.
378 func (f
*extFloat
) FixedDecimal(d
*decimalSlice
, n
int) bool {
386 panic("strconv: internal error: extFloat.FixedDecimal called with n == 0")
388 // Multiply by an appropriate power of ten to have a reasonable
389 // number to process.
391 exp10
, _
:= f
.frexp10()
393 shift
:= uint(-f
.exp
)
394 integer
:= uint32(f
.mant
>> shift
)
395 fraction
:= f
.mant
- (uint64(integer
) << shift
)
396 ε
:= uint64(1) // ε is the uncertainty we have on the mantissa of f.
398 // Write exactly n digits to d.
399 needed
:= n
// how many digits are left to write.
400 integerDigits
:= 0 // the number of decimal digits of integer.
401 pow10
:= uint64(1) // the power of ten by which f was scaled.
402 for i
, pow
:= 0, uint64(1); i
< 20; i
++ {
403 if pow
> uint64(integer
) {
410 if integerDigits
> needed
{
411 // the integral part is already large, trim the last digits.
412 pow10
= uint64pow10
[integerDigits
-needed
]
413 integer
/= uint32(pow10
)
414 rest
-= integer
* uint32(pow10
)
419 // Write the digits of integer: the digits of rest are omitted.
422 for v
:= integer
; v
> 0; {
426 buf
[pos
] = byte(v
+ '0')
429 for i
:= pos
; i
< len(buf
); i
++ {
434 d
.dp
= integerDigits
+ exp10
438 if rest
!= 0 || pow10
!= 1 {
439 panic("strconv: internal error, rest != 0 but needed > 0")
441 // Emit digits for the fractional part. Each time, 10*fraction
442 // fits in a uint64 without overflow.
445 ε
*= 10 // the uncertainty scales as we multiply by ten.
447 // the error is so large it could modify which digit to write, abort.
450 digit
:= fraction
>> shift
451 d
.d
[nd
] = byte(digit
+ '0')
452 fraction
-= digit
<< shift
459 // We have written a truncation of f (a numerator / 10^d.dp). The remaining part
460 // can be interpreted as a small number (< 1) to be added to the last digit of the
463 // If rest > 0, the amount is:
464 // (rest<<shift | fraction) / (pow10 << shift)
465 // fraction being known with a ±ε uncertainty.
466 // The fact that n > 0 guarantees that pow10 << shift does not overflow a uint64.
468 // If rest = 0, pow10 == 1 and the amount is
469 // fraction / (1 << shift)
470 // fraction being known with a ±ε uncertainty.
472 // We pass this information to the rounding routine for adjustment.
474 ok
:= adjustLastDigitFixed(d
, uint64(rest
)<<shift|fraction
, pow10
, shift
, ε
)
478 // Trim trailing zeros.
479 for i
:= d
.nd
- 1; i
>= 0; i
-- {
488 // adjustLastDigitFixed assumes d contains the representation of the integral part
489 // of some number, whose fractional part is num / (den << shift). The numerator
490 // num is only known up to an uncertainty of size ε, assumed to be less than
493 // It will increase the last digit by one to account for correct rounding, typically
494 // when the fractional part is greater than 1/2, and will return false if ε is such
495 // that no correct answer can be given.
496 func adjustLastDigitFixed(d
*decimalSlice
, num
, den
uint64, shift
uint, ε
uint64) bool {
497 if num
> den
<<shift
{
498 panic("strconv: num > den<<shift in adjustLastDigitFixed")
500 if 2*ε
> den
<<shift
{
501 panic("strconv: ε > (den<<shift)/2")
503 if 2*(num
+ε
) < den
<<shift
{
506 if 2*(num
-ε
) > den
<<shift
{
528 // ShortestDecimal stores in d the shortest decimal representation of f
529 // which belongs to the open interval (lower, upper), where f is supposed
530 // to lie. It returns false whenever the result is unsure. The implementation
531 // uses the Grisu3 algorithm.
532 func (f
*extFloat
) ShortestDecimal(d
*decimalSlice
, lower
, upper
*extFloat
) bool {
539 if f
.exp
== 0 && *lower
== *f
&& *lower
== *upper
{
543 for v
:= f
.mant
; v
> 0; {
546 buf
[n
] = byte(v
+ '0')
550 nd
:= len(buf
) - n
- 1
551 for i
:= 0; i
< nd
; i
++ {
555 for d
.nd
> 0 && d
.d
[d
.nd
-1] == '0' {
565 // Uniformize exponents.
566 if f
.exp
> upper
.exp
{
567 f
.mant
<<= uint(f
.exp
- upper
.exp
)
570 if lower
.exp
> upper
.exp
{
571 lower
.mant
<<= uint(lower
.exp
- upper
.exp
)
572 lower
.exp
= upper
.exp
575 exp10
:= frexp10Many(lower
, f
, upper
)
576 // Take a safety margin due to rounding in frexp10Many, but we lose precision.
580 // The shortest representation of f is either rounded up or down, but
581 // in any case, it is a truncation of upper.
582 shift
:= uint(-upper
.exp
)
583 integer
:= uint32(upper
.mant
>> shift
)
584 fraction
:= upper
.mant
- (uint64(integer
) << shift
)
586 // How far we can go down from upper until the result is wrong.
587 allowance
:= upper
.mant
- lower
.mant
588 // How far we should go to get a very precise result.
589 targetDiff
:= upper
.mant
- f
.mant
591 // Count integral digits: there are at most 10.
592 var integerDigits
int
593 for i
, pow
:= 0, uint64(1); i
< 20; i
++ {
594 if pow
> uint64(integer
) {
600 for i
:= 0; i
< integerDigits
; i
++ {
601 pow
:= uint64pow10
[integerDigits
-i
-1]
602 digit
:= integer
/ uint32(pow
)
603 d
.d
[i
] = byte(digit
+ '0')
604 integer
-= digit
* uint32(pow
)
605 // evaluate whether we should stop.
606 if currentDiff
:= uint64(integer
)<<shift
+ fraction
; currentDiff
< allowance
{
608 d
.dp
= integerDigits
+ exp10
610 // Sometimes allowance is so large the last digit might need to be
611 // decremented to get closer to f.
612 return adjustLastDigit(d
, currentDiff
, targetDiff
, allowance
, pow
<<shift
, 2)
619 // Compute digits of the fractional part. At each step fraction does not
620 // overflow. The choice of minExp implies that fraction is less than 2^60.
622 multiplier
:= uint64(1)
626 digit
= int(fraction
>> shift
)
627 d
.d
[d
.nd
] = byte(digit
+ '0')
629 fraction
-= uint64(digit
) << shift
630 if fraction
< allowance
*multiplier
{
631 // We are in the admissible range. Note that if allowance is about to
632 // overflow, that is, allowance > 2^64/10, the condition is automatically
633 // true due to the limited range of fraction.
634 return adjustLastDigit(d
,
635 fraction
, targetDiff
*multiplier
, allowance
*multiplier
,
636 1<<shift
, multiplier
*2)
641 // adjustLastDigit modifies d = x-currentDiff*ε, to get closest to
642 // d = x-targetDiff*ε, without becoming smaller than x-maxDiff*ε.
643 // It assumes that a decimal digit is worth ulpDecimal*ε, and that
644 // all data is known with a error estimate of ulpBinary*ε.
645 func adjustLastDigit(d
*decimalSlice
, currentDiff
, targetDiff
, maxDiff
, ulpDecimal
, ulpBinary
uint64) bool {
646 if ulpDecimal
< 2*ulpBinary
{
647 // Approximation is too wide.
650 for currentDiff
+ulpDecimal
/2+ulpBinary
< targetDiff
{
652 currentDiff
+= ulpDecimal
654 if currentDiff
+ulpDecimal
<= targetDiff
+ulpDecimal
/2+ulpBinary
{
655 // we have two choices, and don't know what to do.
658 if currentDiff
< ulpBinary || currentDiff
> maxDiff
-ulpBinary
{
662 if d
.nd
== 1 && d
.d
[0] == '0' {
663 // the number has actually reached zero.