src/gmxlib/maths.c

   1 /*
   2  * $Id$
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROningen Mixture of Alchemy and Childrens' Stories
  35  */
  36 #include <math.h>
  37 #include <limits.h>
  38 #include "maths.h"
  39
  40 int gmx_nint(real a)
  41 {
  42   const real half = .5;
  43   int   result;
  44
  45   result = (a < 0.) ? ((int)(a - half)) : ((int)(a + half));
  46   return result;
  47 }
  48
  49 real sign(real x,real y)
  50 {
  51   if (y < 0)
  52     return -fabs(x);
  53   else
  54     return +fabs(x);
  55 }
  56
  57 /* Double and single precision erf() and erfc() from
  58  * the GNU C library, for hosts that don't have them.
  59  */
  60 /*
  61  * ====================================================
  62  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  63  *
  64  * Developed at SunPro, a Sun Microsystems, Inc. business.
  65  * Permission to use, copy, modify, and distribute this
  66  * software is freely granted, provided that this notice
  67  * is preserved.
  68  * ====================================================
  69  */
  70 /* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
  71    for performance improvement on pipelined processors.
  72 */
  73
  74 #if (INT_MAX == 2147483647)
  75    typedef int erf_int32_t;
  76    typedef unsigned int erf_u_int32_t;
  77 #elif (LONG_MAX == 2147483647L)
  78    typedef long erf_int32_t;
  79    typedef unsigned long erf_u_int32_t;
  80 #elif (SHRT_MAX == 2147483647)
  81    typedef short erf_int32_t;
  82    typedef unsigned short erf_u_int32_t;
  83 #else
  84 #  error ERROR: No 32 bit wide integer type found!
  85 #endif
  86
  87
  88 #ifdef DOUBLE
  89
  90 static const double
  91 tiny        = 1e-300,
  92 half=  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
  93 one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
  94 two =  2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
  95         /* c = (float)0.84506291151 */
  96 erx =  8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
  97 /*
  98  * Coefficients for approximation to  erf on [0,0.84375]
  99  */
 100 efx =  1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
 101 efx8=  1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
 102 pp[]  =  {1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
 103  -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
 104  -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
 105  -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
 106  -2.37630166566501626084e-05}, /* 0xBEF8EAD6, 0x120016AC */
 107 qq[]  =  {0.0, 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
 108   6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
 109   5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
 110   1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
 111  -3.96022827877536812320e-06}, /* 0xBED09C43, 0x42A26120 */
 112 /*
 113  * Coefficients for approximation to  erf  in [0.84375,1.25]
 114  */
 115 pa[]  = {-2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
 116   4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
 117  -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
 118   3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
 119  -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
 120   3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
 121  -2.16637559486879084300e-03}, /* 0xBF61BF38, 0x0A96073F */
 122 qa[]  =  {0.0, 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
 123   5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
 124   7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
 125   1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
 126   1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
 127   1.19844998467991074170e-02}, /* 0x3F888B54, 0x5735151D */
 128 /*
 129  * Coefficients for approximation to  erfc in [1.25,1/0.35]
 130  */
 131 ra[]  = {-9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
 132  -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
 133  -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
 134  -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
 135  -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
 136  -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
 137  -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
 138  -9.81432934416914548592e+00}, /* 0xC023A0EF, 0xC69AC25C */
 139 sa[]  =  {0.0,1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
 140   1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
 141   4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
 142   6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
 143   4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
 144   1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
 145   6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
 146  -6.04244152148580987438e-02}, /* 0xBFAEEFF2, 0xEE749A62 */
 147 /*
 148  * Coefficients for approximation to  erfc in [1/.35,28]
 149  */
 150 rb[]  = {-9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
 151  -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
 152  -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
 153  -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
 154  -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
 155  -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
 156  -4.83519191608651397019e+02}, /* 0xC07E384E, 0x9BDC383F */
 157 sb[]  =  {0.0,3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
 158   3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
 159   1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
 160   3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
 161   2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
 162   4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
 163  -2.24409524465858183362e+01}; /* 0xC03670E2, 0x42712D62 */
 164
 165 double gmx_erf(double x)
 166 {
 167
 168         erf_int32_t hx,ix,i;
 169         double R,S,P,Q,s,y,z,r;
 170         double test=0.987654321; /* Just a number */
 171         int be_fword;
 172         unsigned char itest = *((char *)&test);
 173
 174         /* Possible representations in IEEE double precision:
 175          * (S=small endian, B=big endian)
 176          *
 177          * Byte order, Word order, Hex
 178          *     S           S       b8 56 0e 3c dd 9a ef 3f
 179          *     B           S       3c 0e 56 b8 3f ef 9a dd
 180          *     S           B       dd 9a ef 3f b8 56 0e 3c
 181          *     B           B       3f ef 9a dd 3c 0e 56 b8
 182          */
 183
 184         if(itest==0xdd || itest==0x3f)
 185           be_fword=1;  /* Big endian word order */
 186         else if(itest==0xb8 || itest==0x3c)
 187           be_fword=0;  /* Small endian word order */
 188         else { /* Catch strange errors */
 189           printf("Error detecting floating-point word order in gmx_erf().\n");
 190           exit(0);
 191         }
 192
 193         /* Get the high (most significant) part of a double.
 194          * We HAVE to use the constants 0/1 here, or the gcc
 195          * scheduler will get it wrong. (see comments in fdlibm)
 196          */
 197         if(be_fword)
 198           hx=*((int *)&x);
 199         else
 200           hx=*(1+(int *)&x);
 201
 202         ix = hx&0x7fffffff;
 203         if(ix>=0x7ff00000) {            /* erf(nan)=nan */
 204             i = ((erf_u_int32_t)hx>>31)<<1;
 205             return (double)(1-i)+one/x; /* erf(+-inf)=+-1 */
 206         }
 207
 208         if(ix < 0x3feb0000) {           /* |x|<0.84375 */
 209             double r1,r2,s1,s2,s3,z2,z4;
 210             if(ix < 0x3e300000) {       /* |x|<2**-28 */
 211                 if (ix < 0x00800000)
 212                     return 0.125*(8.0*x+efx8*x);  /*avoid underflow */
 213                 return x + efx*x;
 214             }
 215             z = x*x;
 216             r1 = pp[0]+z*pp[1]; z2=z*z;
 217             r2 = pp[2]+z*pp[3]; z4=z2*z2;
 218             s1 = one+z*qq[1];
 219             s2 = qq[2]+z*qq[3];
 220             s3 = qq[4]+z*qq[5];
 221             r = r1 + z2*r2 + z4*pp[4];
 222             s  = s1 + z2*s2 + z4*s3;
 223             y = r/s;
 224             return x + x*y;
 225         }
 226         if(ix < 0x3ff40000) {           /* 0.84375 <= |x| < 1.25 */
 227             double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4;
 228             s = fabs(x)-one;
 229             P1 = pa[0]+s*pa[1]; s2=s*s;
 230             Q1 = one+s*qa[1];   s4=s2*s2;
 231             P2 = pa[2]+s*pa[3]; s6=s4*s2;
 232             Q2 = qa[2]+s*qa[3];
 233             P3 = pa[4]+s*pa[5];
 234             Q3 = qa[4]+s*qa[5];
 235             P4 = pa[6];
 236             Q4 = qa[6];
 237             P = P1 + s2*P2 + s4*P3 + s6*P4;
 238             Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4;
 239             if(hx>=0) return erx + P/Q; else return -erx - P/Q;
 240         }
 241         if (ix >= 0x40180000) {         /* inf>|x|>=6 */
 242             if(hx>=0) return one-tiny; else return tiny-one;
 243         }
 244         x = fabs(x);
 245         s = one/(x*x);
 246         if(ix< 0x4006DB6E) {    /* |x| < 1/0.35 */
 247             double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8;
 248             R1 = ra[0]+s*ra[1];s2 = s*s;
 249             S1 = one+s*sa[1];  s4 = s2*s2;
 250             R2 = ra[2]+s*ra[3];s6 = s4*s2;
 251             S2 = sa[2]+s*sa[3];s8 = s4*s4;
 252             R3 = ra[4]+s*ra[5];
 253             S3 = sa[4]+s*sa[5];
 254             R4 = ra[6]+s*ra[7];
 255             S4 = sa[6]+s*sa[7];
 256             R = R1 + s2*R2 + s4*R3 + s6*R4;
 257             S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8];
 258         } else {        /* |x| >= 1/0.35 */
 259             double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6;
 260             R1 = rb[0]+s*rb[1];s2 = s*s;
 261             S1 = one+s*sb[1];  s4 = s2*s2;
 262             R2 = rb[2]+s*rb[3];s6 = s4*s2;
 263             S2 = sb[2]+s*sb[3];
 264             R3 = rb[4]+s*rb[5];
 265             S3 = sb[4]+s*sb[5];
 266             S4 = sb[6]+s*sb[7];
 267             R = R1 + s2*R2 + s4*R3 + s6*rb[6];
 268             S = S1 + s2*S2 + s4*S3 + s6*S4;
 269         }
 270
 271         z  = x;
 272         /* Set the low (least significant) part of a double.
 273          * We HAVE to use the constants 0/1 here, or the gcc
 274          * scheduler will get it wrong. (see comments in fdlibm)
 275          */
 276         if(be_fword)
 277           *(1+(int *)&z)=0;
 278         else
 279           *((int *)&z)=0;
 280
 281         r  =  exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S);
 282         if(hx>=0) return one-r/x; else return  r/x-one;
 283 }
 284
 285
 286 double gmx_erfc(double x)
 287 {
 288         erf_int32_t hx,ix;
 289         double R,S,P,Q,s,y,z,r;
 290         double test=0.987654321; /* Just a number */
 291         int be_fword;
 292         unsigned char itest = *((char *)&test);
 293
 294         /* Possible representations in IEEE double precision:
 295          * (S=small endian, B=big endian)
 296          *
 297          * Byte order, Word order, Hex
 298          *     S           S       b8 56 0e 3c dd 9a ef 3f
 299          *     B           S       3c 0e 56 b8 3f ef 9a dd
 300          *     S           B       dd 9a ef 3f b8 56 0e 3c
 301          *     B           B       3f ef 9a dd 3c 0e 56 b8
 302          */
 303
 304         if(itest==0xdd || itest==0x3f)
 305           be_fword=1;  /* Big endian word order */
 306         else if(itest==0xb8 || itest==0x3c)
 307           be_fword=0;  /* Small endian word order */
 308         else { /* Catch strange errors */
 309           printf("Error detecting floating-point word order in gmx_erf().\n");
 310           exit(0);
 311         }
 312
 313         /* Get the high (most significant) part of a double.
 314          * We HAVE to use the constants 0/1 here, or the gcc
 315          * scheduler will get it wrong. (see comments in fdlibm)
 316          */
 317         if(be_fword)
 318           hx=*((int *)&x);
 319         else
 320           hx=*(1+(int *)&x);
 321
 322         ix = hx&0x7fffffff;
 323         if(ix>=0x7ff00000) {                    /* erfc(nan)=nan */
 324                                                 /* erfc(+-inf)=0,2 */
 325             return (double)(((erf_u_int32_t)hx>>31)<<1)+one/x;
 326         }
 327
 328         if(ix < 0x3feb0000) {           /* |x|<0.84375 */
 329             double r1,r2,s1,s2,s3,z2,z4;
 330             if(ix < 0x3c700000)         /* |x|<2**-56 */
 331                 return one-x;
 332             z = x*x;
 333             r1 = pp[0]+z*pp[1]; z2=z*z;
 334             r2 = pp[2]+z*pp[3]; z4=z2*z2;
 335             s1 = one+z*qq[1];
 336             s2 = qq[2]+z*qq[3];
 337             s3 = qq[4]+z*qq[5];
 338             r = r1 + z2*r2 + z4*pp[4];
 339             s  = s1 + z2*s2 + z4*s3;
 340             y = r/s;
 341             if(hx < 0x3fd00000) {       /* x<1/4 */
 342                 return one-(x+x*y);
 343             } else {
 344                 r = x*y;
 345                 r += (x-half);
 346                 return half - r ;
 347             }
 348         }
 349         if(ix < 0x3ff40000) {           /* 0.84375 <= |x| < 1.25 */
 350             double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4;
 351             s = fabs(x)-one;
 352             P1 = pa[0]+s*pa[1]; s2=s*s;
 353             Q1 = one+s*qa[1];   s4=s2*s2;
 354             P2 = pa[2]+s*pa[3]; s6=s4*s2;
 355             Q2 = qa[2]+s*qa[3];
 356             P3 = pa[4]+s*pa[5];
 357             Q3 = qa[4]+s*qa[5];
 358             P4 = pa[6];
 359             Q4 = qa[6];
 360             P = P1 + s2*P2 + s4*P3 + s6*P4;
 361             Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4;
 362             if(hx>=0) {
 363                 z  = one-erx; return z - P/Q;
 364             } else {
 365                 z = erx+P/Q; return one+z;
 366             }
 367         }
 368         if (ix < 0x403c0000) {          /* |x|<28 */
 369             x = fabs(x);
 370             s = one/(x*x);
 371             if(ix< 0x4006DB6D) {        /* |x| < 1/.35 ~ 2.857143*/
 372                 double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8;
 373             R1 = ra[0]+s*ra[1];s2 = s*s;
 374             S1 = one+s*sa[1];  s4 = s2*s2;
 375             R2 = ra[2]+s*ra[3];s6 = s4*s2;
 376             S2 = sa[2]+s*sa[3];s8 = s4*s4;
 377             R3 = ra[4]+s*ra[5];
 378             S3 = sa[4]+s*sa[5];
 379             R4 = ra[6]+s*ra[7];
 380             S4 = sa[6]+s*sa[7];
 381             R = R1 + s2*R2 + s4*R3 + s6*R4;
 382             S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8];
 383             } else {                    /* |x| >= 1/.35 ~ 2.857143 */
 384                 double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6;
 385                 if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */
 386                 R1 = rb[0]+s*rb[1];s2 = s*s;
 387                 S1 = one+s*sb[1];  s4 = s2*s2;
 388                 R2 = rb[2]+s*rb[3];s6 = s4*s2;
 389                 S2 = sb[2]+s*sb[3];
 390                 R3 = rb[4]+s*rb[5];
 391                 S3 = sb[4]+s*sb[5];
 392                 S4 = sb[6]+s*sb[7];
 393                 R = R1 + s2*R2 + s4*R3 + s6*rb[6];
 394                 S = S1 + s2*S2 + s4*S3 + s6*S4;
 395             }
 396             z  = x;
 397
 398             /* Set the low (least significant) part of a double.
 399              * We HAVE to use the constants 0/1 here, or the gcc
 400              * scheduler will get it wrong. (see comments in fdlibm)
 401              */
 402             if(be_fword)
 403               *(1+(int *)&z)=0;
 404             else
 405               *((int *)&z)=0;
 406
 407             r  =  exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S);
 408             if(hx>0) return r/x; else return two-r/x;
 409         } else {
 410             if(hx>0) return tiny*tiny; else return two-tiny;
 411         }
 412 }
 413
 414 #else /* single precision */
 415
 416
 417
 418 static const float
 419 tiny        = 1e-30,
 420 half=  5.0000000000e-01, /* 0x3F000000 */
 421 one =  1.0000000000e+00, /* 0x3F800000 */
 422 two =  2.0000000000e+00, /* 0x40000000 */
 423         /* c = (subfloat)0.84506291151 */
 424 erx =  8.4506291151e-01, /* 0x3f58560b */
 425 /*
 426  * Coefficients for approximation to  erf on [0,0.84375]
 427  */
 428 efx =  1.2837916613e-01, /* 0x3e0375d4 */
 429 efx8=  1.0270333290e+00, /* 0x3f8375d4 */
 430 pp0  =  1.2837916613e-01, /* 0x3e0375d4 */
 431 pp1  = -3.2504209876e-01, /* 0xbea66beb */
 432 pp2  = -2.8481749818e-02, /* 0xbce9528f */
 433 pp3  = -5.7702702470e-03, /* 0xbbbd1489 */
 434 pp4  = -2.3763017452e-05, /* 0xb7c756b1 */
 435 qq1  =  3.9791721106e-01, /* 0x3ecbbbce */
 436 qq2  =  6.5022252500e-02, /* 0x3d852a63 */
 437 qq3  =  5.0813062117e-03, /* 0x3ba68116 */
 438 qq4  =  1.3249473704e-04, /* 0x390aee49 */
 439 qq5  = -3.9602282413e-06, /* 0xb684e21a */
 440 /*
 441  * Coefficients for approximation to  erf  in [0.84375,1.25]
 442  */
 443 pa0  = -2.3621185683e-03, /* 0xbb1acdc6 */
 444 pa1  =  4.1485610604e-01, /* 0x3ed46805 */
 445 pa2  = -3.7220788002e-01, /* 0xbebe9208 */
 446 pa3  =  3.1834661961e-01, /* 0x3ea2fe54 */
 447 pa4  = -1.1089469492e-01, /* 0xbde31cc2 */
 448 pa5  =  3.5478305072e-02, /* 0x3d1151b3 */
 449 pa6  = -2.1663755178e-03, /* 0xbb0df9c0 */
 450 qa1  =  1.0642088205e-01, /* 0x3dd9f331 */
 451 qa2  =  5.4039794207e-01, /* 0x3f0a5785 */
 452 qa3  =  7.1828655899e-02, /* 0x3d931ae7 */
 453 qa4  =  1.2617121637e-01, /* 0x3e013307 */
 454 qa5  =  1.3637083583e-02, /* 0x3c5f6e13 */
 455 qa6  =  1.1984500103e-02, /* 0x3c445aa3 */
 456 /*
 457  * Coefficients for approximation to  erfc in [1.25,1/0.35]
 458  */
 459 ra0  = -9.8649440333e-03, /* 0xbc21a093 */
 460 ra1  = -6.9385856390e-01, /* 0xbf31a0b7 */
 461 ra2  = -1.0558626175e+01, /* 0xc128f022 */
 462 ra3  = -6.2375331879e+01, /* 0xc2798057 */
 463 ra4  = -1.6239666748e+02, /* 0xc322658c */
 464 ra5  = -1.8460508728e+02, /* 0xc3389ae7 */
 465 ra6  = -8.1287437439e+01, /* 0xc2a2932b */
 466 ra7  = -9.8143291473e+00, /* 0xc11d077e */
 467 sa1  =  1.9651271820e+01, /* 0x419d35ce */
 468 sa2  =  1.3765776062e+02, /* 0x4309a863 */
 469 sa3  =  4.3456588745e+02, /* 0x43d9486f */
 470 sa4  =  6.4538726807e+02, /* 0x442158c9 */
 471 sa5  =  4.2900814819e+02, /* 0x43d6810b */
 472 sa6  =  1.0863500214e+02, /* 0x42d9451f */
 473 sa7  =  6.5702495575e+00, /* 0x40d23f7c */
 474 sa8  = -6.0424413532e-02, /* 0xbd777f97 */
 475 /*
 476  * Coefficients for approximation to  erfc in [1/.35,28]
 477  */
 478 rb0  = -9.8649431020e-03, /* 0xbc21a092 */
 479 rb1  = -7.9928326607e-01, /* 0xbf4c9dd4 */
 480 rb2  = -1.7757955551e+01, /* 0xc18e104b */
 481 rb3  = -1.6063638306e+02, /* 0xc320a2ea */
 482 rb4  = -6.3756646729e+02, /* 0xc41f6441 */
 483 rb5  = -1.0250950928e+03, /* 0xc480230b */
 484 rb6  = -4.8351919556e+02, /* 0xc3f1c275 */
 485 sb1  =  3.0338060379e+01, /* 0x41f2b459 */
 486 sb2  =  3.2579251099e+02, /* 0x43a2e571 */
 487 sb3  =  1.5367296143e+03, /* 0x44c01759 */
 488 sb4  =  3.1998581543e+03, /* 0x4547fdbb */
 489 sb5  =  2.5530502930e+03, /* 0x451f90ce */
 490 sb6  =  4.7452853394e+02, /* 0x43ed43a7 */
 491 sb7  = -2.2440952301e+01; /* 0xc1b38712 */
 492
 493
 494 typedef union
 495 {
 496   float value;
 497   erf_u_int32_t word;
 498 } ieee_float_shape_type;
 499
 500 #define GET_FLOAT_WORD(i,d)                                     \
 501 do {                                                            \
 502   ieee_float_shape_type gf_u;                                   \
 503   gf_u.value = (d);                                             \
 504   (i) = gf_u.word;                                              \
 505 } while (0)
 506
 507
 508 #define SET_FLOAT_WORD(d,i)                                     \
 509 do {                                                            \
 510   ieee_float_shape_type sf_u;                                   \
 511   sf_u.word = (i);                                              \
 512   (d) = sf_u.value;                                             \
 513 } while (0)
 514
 515
 516 float gmx_erf(float x)
 517 {
 518         erf_int32_t hx,ix,i;
 519         float R,S,P,Q,s,y,z,r;
 520         GET_FLOAT_WORD(hx,x);
 521         ix = hx&0x7fffffff;
 522         if(ix>=0x7f800000) {            /* erf(nan)=nan */
 523             i = ((erf_u_int32_t)hx>>31)<<1;
 524             return (float)(1-i)+one/x;  /* erf(+-inf)=+-1 */
 525         }
 526
 527         if(ix < 0x3f580000) {           /* |x|<0.84375 */
 528             if(ix < 0x31800000) {       /* |x|<2**-28 */
 529                 if (ix < 0x04000000)
 530                     /*avoid underflow */
 531                     return (float)0.125*((float)8.0*x+efx8*x);
 532                 return x + efx*x;
 533             }
 534             z = x*x;
 535             r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
 536             s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
 537             y = r/s;
 538             return x + x*y;
 539         }
 540         if(ix < 0x3fa00000) {           /* 0.84375 <= |x| < 1.25 */
 541             s = fabs(x)-one;
 542             P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
 543             Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
 544             if(hx>=0) return erx + P/Q; else return -erx - P/Q;
 545         }
 546         if (ix >= 0x40c00000) {         /* inf>|x|>=6 */
 547             if(hx>=0) return one-tiny; else return tiny-one;
 548         }
 549         x = fabs(x);
 550         s = one/(x*x);
 551         if(ix< 0x4036DB6E) {    /* |x| < 1/0.35 */
 552             R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
 553                                 ra5+s*(ra6+s*ra7))))));
 554             S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
 555                                 sa5+s*(sa6+s*(sa7+s*sa8)))))));
 556         } else {        /* |x| >= 1/0.35 */
 557             R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
 558                                 rb5+s*rb6)))));
 559             S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
 560                                 sb5+s*(sb6+s*sb7))))));
 561         }
 562         GET_FLOAT_WORD(ix,x);
 563         SET_FLOAT_WORD(z,ix&0xfffff000);
 564         r  =  exp(-z*z-(float)0.5625)*exp((z-x)*(z+x)+R/S);
 565         if(hx>=0) return one-r/x; else return  r/x-one;
 566 }
 567
 568 float gmx_erfc(float x)
 569 {
 570         erf_int32_t hx,ix;
 571         float R,S,P,Q,s,y,z,r;
 572         GET_FLOAT_WORD(hx,x);
 573         ix = hx&0x7fffffff;
 574         if(ix>=0x7f800000) {                    /* erfc(nan)=nan */
 575                                                 /* erfc(+-inf)=0,2 */
 576             return (float)(((erf_u_int32_t)hx>>31)<<1)+one/x;
 577         }
 578
 579         if(ix < 0x3f580000) {           /* |x|<0.84375 */
 580             if(ix < 0x23800000)         /* |x|<2**-56 */
 581                 return one-x;
 582             z = x*x;
 583             r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
 584             s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
 585             y = r/s;
 586             if(hx < 0x3e800000) {       /* x<1/4 */
 587                 return one-(x+x*y);
 588             } else {
 589                 r = x*y;
 590                 r += (x-half);
 591                 return half - r ;
 592             }
 593         }
 594         if(ix < 0x3fa00000) {           /* 0.84375 <= |x| < 1.25 */
 595             s = fabs(x)-one;
 596             P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
 597             Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
 598             if(hx>=0) {
 599                 z  = one-erx; return z - P/Q;
 600             } else {
 601                 z = erx+P/Q; return one+z;
 602             }
 603         }
 604         if (ix < 0x41e00000) {          /* |x|<28 */
 605             x = fabs(x);
 606             s = one/(x*x);
 607             if(ix< 0x4036DB6D) {        /* |x| < 1/.35 ~ 2.857143*/
 608                 R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
 609                                 ra5+s*(ra6+s*ra7))))));
 610                 S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
 611                                 sa5+s*(sa6+s*(sa7+s*sa8)))))));
 612             } else {                    /* |x| >= 1/.35 ~ 2.857143 */
 613                 if(hx<0&&ix>=0x40c00000) return two-tiny;/* x < -6 */
 614                 R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
 615                                 rb5+s*rb6)))));
 616                 S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
 617                                 sb5+s*(sb6+s*sb7))))));
 618             }
 619             GET_FLOAT_WORD(ix,x);
 620             SET_FLOAT_WORD(z,ix&0xfffff000);
 621             r  =  exp(-z*z-(float)0.5625)*exp((z-x)*(z+x)+R/S);
 622             if(hx>0) return r/x; else return two-r/x;
 623         } else {
 624             if(hx>0) return tiny*tiny; else return two-tiny;
 625         }
 626 }
 627
 628 #endif
 629
 630 float fast_float_erf(float x)
 631 {
 632         float t,ans;
 633
 634         t=1.0/(1.0+0.5*x);
 635         ans=t*exp(-x*x-1.26551223+t*(1.00002368+t*(0.37409196+t*(0.09678418+
 636                 t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+
 637                 t*(-0.82215223+t*0.17087277)))))))));
 638         return 1.0-ans;
 639 }
 640
 641 float fast_float_erfc(float x)
 642 {
 643         float t,ans;
 644
 645         t=1.0/(1.0+0.5*x);
 646         ans=t*exp(-x*x-1.26551223+t*(1.00002368+t*(0.37409196+t*(0.09678418+
 647                 t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+
 648                 t*(-0.82215223+t*0.17087277)))))))));
 649         return ans;
 650 }