sysdeps/ieee754/dbl-64/mpsqrt.c

   1 /*
   2  * IBM Accurate Mathematical Library
   3  * written by International Business Machines Corp.
   4  * Copyright (C) 2001-2015 Free Software Foundation, Inc.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU Lesser General Public License as published by
   8  * the Free Software Foundation; either version 2.1 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public License
  17  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 /****************************************************************************/
  20 /*  MODULE_NAME:mpsqrt.c                                                    */
  21 /*                                                                          */
  22 /*  FUNCTION:mpsqrt                                                         */
  23 /*           fastiroot                                                      */
  24 /*                                                                          */
  25 /* FILES NEEDED:endian.h mpa.h mpsqrt.h                                     */
  26 /*              mpa.c                                                       */
  27 /* Multi-Precision square root function subroutine for precision p >= 4.    */
  28 /* The relative error is bounded by 3.501*r**(1-p), where r=2**24.          */
  29 /*                                                                          */
  30 /****************************************************************************/
  31 #include "endian.h"
  32 #include "mpa.h"
  33
  34 #ifndef SECTION
  35 # define SECTION
  36 #endif
  37
  38 #include "mpsqrt.h"
  39
  40 /****************************************************************************/
  41 /* Multi-Precision square root function subroutine for precision p >= 4.    */
  42 /* The relative error is bounded by 3.501*r**(1-p), where r=2**24.          */
  43 /* Routine receives two pointers to  Multi Precision numbers:               */
  44 /* x (left argument) and y (next argument). Routine also receives precision */
  45 /* p as integer. Routine computes sqrt(*x) and stores result in *y          */
  46 /****************************************************************************/
  47
  48 static double fastiroot (double);
  49
  50 void
  51 SECTION
  52 __mpsqrt (mp_no *x, mp_no *y, int p)
  53 {
  54   int i, m, ey;
  55   double dx, dy;
  56   static const mp_no mphalf = {0, {1.0, HALFRAD}};
  57   static const mp_no mp3halfs = {1, {1.0, 1.0, HALFRAD}};
  58   mp_no mpxn, mpz, mpu, mpt1, mpt2;
  59
  60   ey = EX / 2;
  61   __cpy (x, &mpxn, p);
  62   mpxn.e -= (ey + ey);
  63   __mp_dbl (&mpxn, &dx, p);
  64   dy = fastiroot (dx);
  65   __dbl_mp (dy, &mpu, p);
  66   __mul (&mpxn, &mphalf, &mpz, p);
  67
  68   m = __mpsqrt_mp[p];
  69   for (i = 0; i < m; i++)
  70     {
  71       __sqr (&mpu, &mpt1, p);
  72       __mul (&mpt1, &mpz, &mpt2, p);
  73       __sub (&mp3halfs, &mpt2, &mpt1, p);
  74       __mul (&mpu, &mpt1, &mpt2, p);
  75       __cpy (&mpt2, &mpu, p);
  76     }
  77   __mul (&mpxn, &mpu, y, p);
  78   EY += ey;
  79 }
  80
  81 /***********************************************************/
  82 /* Compute a double precision approximation for 1/sqrt(x)  */
  83 /* with the relative error bounded by 2**-51.              */
  84 /***********************************************************/
  85 static double
  86 SECTION
  87 fastiroot (double x)
  88 {
  89   union
  90   {
  91     int i[2];
  92     double d;
  93   } p, q;
  94   double y, z, t;
  95   int n;
  96   static const double c0 = 0.99674, c1 = -0.53380;
  97   static const double c2 = 0.45472, c3 = -0.21553;
  98
  99   p.d = x;
 100   p.i[HIGH_HALF] = (p.i[HIGH_HALF] & 0x3FFFFFFF) | 0x3FE00000;
 101   q.d = x;
 102   y = p.d;
 103   z = y - 1.0;
 104   n = (q.i[HIGH_HALF] - p.i[HIGH_HALF]) >> 1;
 105   z = ((c3 * z + c2) * z + c1) * z + c0;        /* 2**-7         */
 106   z = z * (1.5 - 0.5 * y * z * z);              /* 2**-14        */
 107   p.d = z * (1.5 - 0.5 * y * z * z);            /* 2**-28        */
 108   p.i[HIGH_HALF] -= n;
 109   t = x * p.d;
 110   return p.d * (1.5 - 0.5 * p.d * t);
 111 }