From d44d7d6bebdb7fa52090b744854d49f34099e044 Mon Sep 17 00:00:00 2001 From: Erik Lindahl Date: Sun, 8 May 2016 22:49:36 +0200 Subject: [PATCH] Fix precision loss in tabulated normal distribution The tabulated normal distribution was generated by simply integrating a gaussian function, which led to severe loss-of-precision for double precision tables. Fixed by implementing proper inverse error functions in the math module, and using this to fill the table directly. Fixes #1930. Change-Id: I74b72b4a6d36f1eb382c6456501ce5644c92725e --- src/gromacs/math/functions.cpp | 93 ++++++++++++++++++++++ src/gromacs/math/functions.h | 20 +++++ src/gromacs/math/tests/functions.cpp | 56 ++++++++++++- .../tests/refdata/FunctionTest_ErfInvDouble.xml | 17 ++++ .../tests/refdata/FunctionTest_ErfInvFloat.xml | 17 ++++ src/gromacs/random/tabulatednormaldistribution.cpp | 2 +- src/gromacs/random/tabulatednormaldistribution.h | 90 +++++++++++++-------- src/gromacs/random/tests/CMakeLists.txt | 2 +- .../TabulatedNormalDistributionTest_Output16.xml | 2 +- ...ulatedNormalDistributionTest_OutputDouble14.xml | 34 ++++---- .../random/tests/tabulatednormaldistribution.cpp | 23 +++++- 11 files changed, 299 insertions(+), 57 deletions(-) create mode 100644 src/gromacs/math/tests/refdata/FunctionTest_ErfInvDouble.xml create mode 100644 src/gromacs/math/tests/refdata/FunctionTest_ErfInvFloat.xml rewrite src/gromacs/random/tests/refdata/TabulatedNormalDistributionTest_OutputDouble14.xml (62%) diff --git a/src/gromacs/math/functions.cpp b/src/gromacs/math/functions.cpp index 9c6f52ec1e..c99718ecfb 100644 --- a/src/gromacs/math/functions.cpp +++ b/src/gromacs/math/functions.cpp @@ -51,11 +51,13 @@ #include #include +#include #if GMX_NATIVE_WINDOWS # include // _BitScanReverse, _BitScanReverse64 #endif +#include "gromacs/math/utilities.h" #include "gromacs/utility/gmxassert.h" namespace gmx @@ -178,4 +180,95 @@ greatestCommonDivisor(std::int64_t p, return p; } +double +erfinv(double x) +{ + double xabs = std::abs(x); + + if (xabs > 1.0) + { + return std::nan(""); + } + + if (x == 1.0) + { + return std::numeric_limits::infinity(); + } + + if (x == -1.0) + { + return -std::numeric_limits::infinity(); + } + + double res; + + if (xabs <= 0.7) + { + // Rational approximation in range [0,0.7] + double z = x*x; + double P = (((-0.140543331 * z + 0.914624893) * z - 1.645349621) * z + 0.886226899); + double Q = ((((0.012229801 * z - 0.329097515) * z + 1.442710462) * z - 2.118377725) * z + 1.0); + res = x * P/Q; + } + else + { + // Rational approximation in range [0.7,1) + double z = std::sqrt(-std::log((1.0 - std::abs(x))/2.0)); + double P = ((1.641345311 * z + 3.429567803) * z - 1.624906493) * z - 1.970840454; + double Q = (1.637067800 * z + 3.543889200) * z + 1.0; + res = std::copysign(1.0, x) * P/Q; + } + + // Double precision requires two N-R iterations + res = res - (std::erf(res) - x)/( (2.0/std::sqrt(M_PI))*std::exp(-res*res)); + res = res - (std::erf(res) - x)/( (2.0/std::sqrt(M_PI))*std::exp(-res*res)); + + return res; +} + +float +erfinv(float x) +{ + float xabs = std::abs(x); + + if (xabs > 1.0f) + { + return std::nan(""); + } + + if (x == 1.0f) + { + return std::numeric_limits::infinity(); + } + + if (x == -1.0f) + { + return -std::numeric_limits::infinity(); + } + + float res; + + if (xabs <= 0.7f) + { + // Rational approximation in range [0,0.7] + float z = x*x; + float P = (((-0.140543331f * z + 0.914624893f) * z - 1.645349621f) * z + 0.886226899f); + float Q = ((((0.012229801f * z - 0.329097515f) * z + 1.442710462f) * z - 2.118377725f) * z + 1.0f); + res = x * P/Q; + } + else + { + // Rational approximation in range [0.7,1) + float z = std::sqrt(-std::log((1.0 - std::abs(x))/2.0f)); + float P = ((1.641345311f * z + 3.429567803f) * z - 1.624906493f) * z - 1.970840454f; + float Q = (1.637067800f * z + 3.543889200f) * z + 1.0f; + res = std::copysign(1.0f, x) * P/Q; + } + + // Single N-R iteration sufficient for single precision + res = res - (std::erf(res) - x)/( (2.0f/std::sqrt(M_PI))*std::exp(-res*res)); + + return res; +} + } // namespace gmx diff --git a/src/gromacs/math/functions.h b/src/gromacs/math/functions.h index 864bece80d..b858b7a99d 100644 --- a/src/gromacs/math/functions.h +++ b/src/gromacs/math/functions.h @@ -420,6 +420,26 @@ static inline real series_sinhx(real x) return (1 + (x2/6.0)*(1 + (x2/20.0)*(1 + (x2/42.0)*(1 + (x2/72.0)*(1 + (x2/110.0)))))); } +/*! \brief Inverse error function, double precision. + * + * \param x Argument, should be in the range -1.0 < x < 1.0 + * + * \return The inverse of the error function if the argument is inside the + * range, +/- infinity if it is exactly 1.0 or -1.0, and NaN otherwise. + */ +double +erfinv(double x); + +/*! \brief Inverse error function, single precision. + * + * \param x Argument, should be in the range -1.0 < x < 1.0 + * + * \return The inverse of the error function if the argument is inside the + * range, +/- infinity if it is exactly 1.0 or -1.0, and NaN otherwise. + */ +float +erfinv(float x); + } // namespace gmx diff --git a/src/gromacs/math/tests/functions.cpp b/src/gromacs/math/tests/functions.cpp index d211ecbb52..cbd80479e5 100644 --- a/src/gromacs/math/tests/functions.cpp +++ b/src/gromacs/math/tests/functions.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2015, by the GROMACS development team, led by + * Copyright (c) 2015,2016, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -301,4 +301,58 @@ TEST(FunctionTest, Powers) EXPECT_EQ(59604.644775390625, gmx::power12(2.5)); } +TEST(FunctionTest, ErfInvFloat) +{ + gmx::test::TestReferenceData data; + gmx::test::TestReferenceChecker checker(data.rootChecker()); + std::vector result; + int npoints = 10; + + for (int i = 0; i < npoints; i++) + { + float r = -1.0 + 2.0 * (float(i) + 0.5) / npoints; + + result.push_back(gmx::erfinv(r)); + } + checker.checkSequence(result.begin(), result.end(), "ErfInvFloat"); +} + +TEST(FunctionTest, ErfInvDouble) +{ + gmx::test::TestReferenceData data; + gmx::test::TestReferenceChecker checker(data.rootChecker()); + std::vector result; + int npoints = 10; + + for (int i = 0; i < npoints; i++) + { + double r = -1.0 + 2.0 * (double(i) + 0.5) / npoints; + + result.push_back(gmx::erfinv(r)); + } + checker.checkSequence(result.begin(), result.end(), "ErfInvDouble"); +} + +TEST(FunctionTest, ErfAndErfInvAreInversesFloat) +{ + int npoints = 1000; + + for (int i = 0; i < npoints; i++) + { + float r = -1.0 + 2.0 * (float(i) + 0.5) / npoints; + EXPECT_FLOAT_EQ_TOL(r, std::erf(gmx::erfinv(r)), gmx::test::ulpTolerance(10)); + } +} + +TEST(FunctionTest, ErfAndErfInvAreInversesDouble) +{ + int npoints = 1000; + + for (int i = 0; i < npoints; i++) + { + double r = -1.0 + 2.0 * (double(i) + 0.5) / npoints; + EXPECT_DOUBLE_EQ_TOL(r, std::erf(gmx::erfinv(r)), gmx::test::ulpTolerance(10)); + } +} + } // namespace diff --git a/src/gromacs/math/tests/refdata/FunctionTest_ErfInvDouble.xml b/src/gromacs/math/tests/refdata/FunctionTest_ErfInvDouble.xml new file mode 100644 index 0000000000..dac6954eee --- /dev/null +++ b/src/gromacs/math/tests/refdata/FunctionTest_ErfInvDouble.xml @@ -0,0 +1,17 @@ + + + + + 10 + -1.1630871536766743 + -0.73286907795921663 + -0.47693627620446977 + -0.27246271472675443 + -0.088855990494257672 + 0.088855990494257769 + 0.27246271472675443 + 0.47693627620446977 + 0.73286907795921663 + 1.1630871536766734 + + diff --git a/src/gromacs/math/tests/refdata/FunctionTest_ErfInvFloat.xml b/src/gromacs/math/tests/refdata/FunctionTest_ErfInvFloat.xml new file mode 100644 index 0000000000..593492530b --- /dev/null +++ b/src/gromacs/math/tests/refdata/FunctionTest_ErfInvFloat.xml @@ -0,0 +1,17 @@ + + + + + 10 + -1.163087 + -0.73286909 + -0.47693628 + -0.27246273 + -0.088855989 + 0.088855989 + 0.27246273 + 0.47693628 + 0.73286909 + 1.163087 + + diff --git a/src/gromacs/random/tabulatednormaldistribution.cpp b/src/gromacs/random/tabulatednormaldistribution.cpp index 46f9168f44..b03a33b6f7 100644 --- a/src/gromacs/random/tabulatednormaldistribution.cpp +++ b/src/gromacs/random/tabulatednormaldistribution.cpp @@ -48,7 +48,7 @@ namespace gmx // the table in all files using it, unless the user has requested a different // precision or resolution. template<> -const std::vector TabulatedNormalDistribution::c_table_ = TabulatedNormalDistribution::makeTable(); +const std::vector TabulatedNormalDistribution::c_table_ = TabulatedNormalDistribution::makeTable(); #else // Avoid compiler warnings about no public symbols void TabulatedNormalDistributionDummy(){} diff --git a/src/gromacs/random/tabulatednormaldistribution.h b/src/gromacs/random/tabulatednormaldistribution.h index 199ec419b0..34eb2be9e0 100644 --- a/src/gromacs/random/tabulatednormaldistribution.h +++ b/src/gromacs/random/tabulatednormaldistribution.h @@ -51,14 +51,24 @@ #include #include +#include "gromacs/math/functions.h" #include "gromacs/math/utilities.h" #include "gromacs/utility/basedefinitions.h" #include "gromacs/utility/classhelpers.h" +#include "gromacs/utility/gmxassert.h" #include "gromacs/utility/real.h" namespace gmx { +namespace +{ + +//! Number of bits that determines the resolution of the lookup table for the normal distribution. +const int c_TabulatedNormalDistributionDefaultBits = 14; + +} + /*! \brief Tabulated normal random distribution * * Random distribution compatible with C++11 distributions - it can be @@ -76,8 +86,9 @@ namespace gmx * important property is the distribution - given the noise in forces * we do not need very high resolution. * This distribution uses an internal table to return samples from a - * normal distribution with limited resolution. By default the table uses - * 14 bits, but this is specified with a template parameter. + * normal distribution with limited resolution. By default the table + * uses c_TabulatedNormalDistributionDefaultBits bits, but this is + * specified with a template parameter. * * Since this distribution only uses tableBits bits per value generated, * the values draw from the random engine are used for several results. @@ -93,7 +104,7 @@ namespace gmx * return arbitrarily small/large values, but with e.g. 14 bits * the results are limited to roughly +/- 4 standard deviations. */ -template +template class TabulatedNormalDistribution { static_assert(tableBits <= 24, "Normal distribution table is limited to 24bits (64MB in single precision)"); @@ -150,49 +161,56 @@ class TabulatedNormalDistribution result_type stddev_; }; - private: - /*! \brief Fill the table with values for the normal distribution * * This routine returns a new a std::vector with the table data. + * + * This routine is used to help construct objects of this class, + * and is exposed only to permit testing. Normal code should not + * need to call this function. */ static const std::vector // cppcheck-suppress unusedPrivateFunction makeTable() { - /* Fill the table with the integral of a gaussian distribution: - */ + /* Fill the table with the integral of a gaussian distribution, which + * corresponds to the inverse error function. + * We avoid integrating a gaussian numerically, since that leads to + * some loss-of-precision which also accumulates so it is worse for + * larger indices in the table. */ std::size_t tableSize = 1 << tableBits; - std::size_t halfSize = (1 << tableBits)/2; - double invSize = 1.0/tableSize; - double factor = std::sqrt(2.0*M_PI); - double x = 0.5*factor*invSize; + std::size_t halfSize = tableSize/2; + double invHalfSize = 1.0/halfSize; - std::vector table(1ULL << tableBits); + std::vector table(tableSize); - for (std::size_t i = 0; i < halfSize; i++) + // Fill in all but the extremal entries of the table + for (std::size_t i = 0; i < halfSize-1; i++) { - if (i > 0) - { - double dx; - - if (i < halfSize-1) - { - double invNormal = factor*std::exp(0.5*x*x); - /* det is larger than 0 for all x, except the last */ - double det = 1.0 - 2.0*invSize*x*invNormal; - dx = (1.0 - std::sqrt(det))/x; - } - else - { - dx = 1.0/x; - } - x = x + dx; - } + double r = (i + 0.5) * invHalfSize; + double x = std::sqrt(2.0) * erfinv(r); + table.at(halfSize-1-i) = -x; table.at(halfSize+i) = x; } + // We want to fill in the extremal table entries with + // values that make the total variance equal to 1, so + // measure the variance by summing the squares of the + // other values of the distribution, starting from the + // smallest values. + double sumOfSquares = 0; + for (std::size_t i = 1; i < halfSize; i++) + { + double value = table.at(i); + sumOfSquares += value * value; + } + double missingVariance = 1.0 - 2.0*sumOfSquares/tableSize; + GMX_RELEASE_ASSERT(missingVariance > 0, "Incorrect computation of tabulated normal distribution"); + double extremalValue = std::sqrt(0.5*missingVariance*tableSize); + table.at(0) = -extremalValue; + table.back() = extremalValue; + return table; } @@ -302,9 +320,11 @@ class TabulatedNormalDistribution // store it in our 64-bit value, and set the number of active bits. // For tableBits up to 16 this will be as efficient both with 32 // and 64 bit random engines when drawing multiple numbers - // (our default value is 14), It also avoids drawing multiple - // 32-bit random numbres even if we just call this routine for a - // single result. + // (our default value is + // c_TabulatedNormalDistributionDefaultBits == 14). It + // also avoids drawing multiple 32-bit random numbers + // even if we just call this routine for a single + // result. savedRandomBits_ = static_cast(g()); savedRandomBitsLeft_ = std::numeric_limits::digits; } @@ -356,10 +376,10 @@ class TabulatedNormalDistribution #if !defined(_MSC_VER) && !defined(DOXYGEN) // Declaration of template specialization template<> -const std::vector TabulatedNormalDistribution::c_table_; +const std::vector TabulatedNormalDistribution::c_table_; extern template -const std::vector TabulatedNormalDistribution::c_table_; +const std::vector TabulatedNormalDistribution::c_table_; #endif // Instantiation for all tables without specialization diff --git a/src/gromacs/random/tests/CMakeLists.txt b/src/gromacs/random/tests/CMakeLists.txt index fefacc4350..e40918c347 100644 --- a/src/gromacs/random/tests/CMakeLists.txt +++ b/src/gromacs/random/tests/CMakeLists.txt @@ -32,7 +32,7 @@ # To help us fund GROMACS development, we humbly ask that you cite # the research papers on the package. Check out http://www.gromacs.org. -gmx_add_unit_test(MathUnitTests random-test +gmx_add_unit_test(RandomUnitTests random-test exponentialdistribution.cpp gammadistribution.cpp normaldistribution.cpp diff --git a/src/gromacs/random/tests/refdata/TabulatedNormalDistributionTest_Output16.xml b/src/gromacs/random/tests/refdata/TabulatedNormalDistributionTest_Output16.xml index 9a6e44d05f..d10180dc51 100644 --- a/src/gromacs/random/tests/refdata/TabulatedNormalDistributionTest_Output16.xml +++ b/src/gromacs/random/tests/refdata/TabulatedNormalDistributionTest_Output16.xml @@ -10,7 +10,7 @@ 0.35591698 13.225243 3.8506312 - -17.386944 + -17.372868 -0.6433773 -1.8184407 diff --git a/src/gromacs/random/tests/refdata/TabulatedNormalDistributionTest_OutputDouble14.xml b/src/gromacs/random/tests/refdata/TabulatedNormalDistributionTest_OutputDouble14.xml dissimilarity index 62% index 42b3c98d70..b1c2f06878 100644 --- a/src/gromacs/random/tests/refdata/TabulatedNormalDistributionTest_OutputDouble14.xml +++ b/src/gromacs/random/tests/refdata/TabulatedNormalDistributionTest_OutputDouble14.xml @@ -1,17 +1,17 @@ - - - - - 10 - 6.2040742877964199 - -5.0443347697093763 - -3.8484410188878648 - -3.5752202548964327 - 1.806798968442128 - 6.2412274164063648 - -1.5439517116690484 - -8.951438171789011 - -2.3151355578004198 - 2.7598447450963115 - - + + + + + 10 + 6.2040743073882094 + -5.0443347858799923 + -3.8484410453534643 + -3.5752202809260982 + 1.8067989676891278 + 6.2412274362065912 + -1.5439517275812444 + -8.9514357643656126 + -2.315135578014341 + 2.7598447480822306 + + diff --git a/src/gromacs/random/tests/tabulatednormaldistribution.cpp b/src/gromacs/random/tests/tabulatednormaldistribution.cpp index 269fd63d40..a4e8fc652d 100644 --- a/src/gromacs/random/tests/tabulatednormaldistribution.cpp +++ b/src/gromacs/random/tests/tabulatednormaldistribution.cpp @@ -100,7 +100,6 @@ TEST(TabulatedNormalDistributionTest, OutputDouble14) { result.push_back(dist(rng)); } - checker.setDefaultTolerance(test::ulpTolerance(15)); //compiler usage of FMA in makeTable can cause higher difference checker.checkSequence(result.begin(), result.end(), "TabulatedNormalDistributionDouble14"); } @@ -152,6 +151,28 @@ TEST(TabulatedNormalDistributionTest, AltParam) EXPECT_EQ(distA(rngA), distB(rngB, paramA)); } +TEST(TabulatedNormalDistributionTableTest, HasValidProperties) +{ + std::vector table = TabulatedNormalDistribution::makeTable(); + + EXPECT_EQ(table.size() % 2, 0) << "Table must have even number of entries"; + + size_t halfSize = table.size() / 2; + double sumOfSquares = 0.0; + auto tolerance = gmx::test::ulpTolerance(1); + for (size_t i = 0, iFromEnd = table.size()-1; i < halfSize; ++i, --iFromEnd) + { + EXPECT_REAL_EQ_TOL(table.at(i), -table.at(iFromEnd), tolerance) + << "Table is not an odd-valued function for entries " << i << " and " << iFromEnd; + // Add up the squares of the table values in order of ascending + // magnitude (to minimize accumulation of round-off error). + sumOfSquares += table.at(i) * table.at(i) + table.at(iFromEnd) * table.at(iFromEnd); + } + + double variance = sumOfSquares / table.size(); + EXPECT_REAL_EQ_TOL(1.0, variance, tolerance) << "Table should have unit variance"; +} + } // namespace anonymous } // namespace gmx -- 2.11.4.GIT