sysdeps/ieee754/dbl-64/s_atan.c

   1 /*
   2  * IBM Accurate Mathematical Library
   3  * written by International Business Machines Corp.
   4  * Copyright (C) 2001-2015 Free Software Foundation, Inc.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU Lesser General Public License as published by
   8  * the Free Software Foundation; either version 2.1 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public License
  17  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 /************************************************************************/
  20 /*  MODULE_NAME: atnat.c                                                */
  21 /*                                                                      */
  22 /*  FUNCTIONS:  uatan                                                   */
  23 /*              atanMp                                                  */
  24 /*              signArctan                                              */
  25 /*                                                                      */
  26 /*                                                                      */
  27 /*  FILES NEEDED: dla.h endian.h mpa.h mydefs.h atnat.h                 */
  28 /*                mpatan.c mpatan2.c mpsqrt.c                           */
  29 /*                uatan.tbl                                             */
  30 /*                                                                      */
  31 /* An ultimate atan() routine. Given an IEEE double machine number x    */
  32 /* it computes the correctly rounded (to nearest) value of atan(x).     */
  33 /*                                                                      */
  34 /* Assumption: Machine arithmetic operations are performed in           */
  35 /* round to nearest mode of IEEE 754 standard.                          */
  36 /*                                                                      */
  37 /************************************************************************/
  38
  39 #include <dla.h>
  40 #include "mpa.h"
  41 #include "MathLib.h"
  42 #include "uatan.tbl"
  43 #include "atnat.h"
  44 #include <fenv.h>
  45 #include <float.h>
  46 #include <math.h>
  47 #include <math_private.h>
  48 #include <stap-probe.h>
  49
  50 void __mpatan (mp_no *, mp_no *, int);  /* see definition in mpatan.c */
  51 static double atanMp (double, const int[]);
  52
  53   /* Fix the sign of y and return */
  54 static double
  55 __signArctan (double x, double y)
  56 {
  57   return __copysign (y, x);
  58 }
  59
  60
  61 /* An ultimate atan() routine. Given an IEEE double machine number x,    */
  62 /* routine computes the correctly rounded (to nearest) value of atan(x). */
  63 double
  64 atan (double x)
  65 {
  66   double cor, s1, ss1, s2, ss2, t1, t2, t3, t7, t8, t9, t10, u, u2, u3,
  67          v, vv, w, ww, y, yy, z, zz;
  68 #ifndef DLA_FMS
  69   double t4, t5, t6;
  70 #endif
  71   int i, ux, dx;
  72   static const int pr[M] = { 6, 8, 10, 32 };
  73   number num;
  74
  75   num.d = x;
  76   ux = num.i[HIGH_HALF];
  77   dx = num.i[LOW_HALF];
  78
  79   /* x=NaN */
  80   if (((ux & 0x7ff00000) == 0x7ff00000)
  81       && (((ux & 0x000fffff) | dx) != 0x00000000))
  82     return x + x;
  83
  84   /* Regular values of x, including denormals +-0 and +-INF */
  85   SET_RESTORE_ROUND (FE_TONEAREST);
  86   u = (x < 0) ? -x : x;
  87   if (u < C)
  88     {
  89       if (u < B)
  90         {
  91           if (u < A)
  92             {
  93               if (u < DBL_MIN)
  94                 {
  95                   double force_underflow = x * x;
  96                   math_force_eval (force_underflow);
  97                 }
  98               return x;
  99             }
 100           else
 101             {                   /* A <= u < B */
 102               v = x * x;
 103               yy = d11.d + v * d13.d;
 104               yy = d9.d + v * yy;
 105               yy = d7.d + v * yy;
 106               yy = d5.d + v * yy;
 107               yy = d3.d + v * yy;
 108               yy *= x * v;
 109
 110               if ((y = x + (yy - U1 * x)) == x + (yy + U1 * x))
 111                 return y;
 112
 113               EMULV (x, x, v, vv, t1, t2, t3, t4, t5);  /* v+vv=x^2 */
 114
 115               s1 = f17.d + v * f19.d;
 116               s1 = f15.d + v * s1;
 117               s1 = f13.d + v * s1;
 118               s1 = f11.d + v * s1;
 119               s1 *= v;
 120
 121               ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
 122               MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 123               ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
 124               MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 125               ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
 126               MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 127               ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
 128               MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 129               MUL2 (x, 0, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7,
 130                     t8);
 131               ADD2 (x, 0, s2, ss2, s1, ss1, t1, t2);
 132               if ((y = s1 + (ss1 - U5 * s1)) == s1 + (ss1 + U5 * s1))
 133                 return y;
 134
 135               return atanMp (x, pr);
 136             }
 137         }
 138       else
 139         {                       /* B <= u < C */
 140           i = (TWO52 + TWO8 * u) - TWO52;
 141           i -= 16;
 142           z = u - cij[i][0].d;
 143           yy = cij[i][5].d + z * cij[i][6].d;
 144           yy = cij[i][4].d + z * yy;
 145           yy = cij[i][3].d + z * yy;
 146           yy = cij[i][2].d + z * yy;
 147           yy *= z;
 148
 149           t1 = cij[i][1].d;
 150           if (i < 112)
 151             {
 152               if (i < 48)
 153                 u2 = U21;       /* u < 1/4        */
 154               else
 155                 u2 = U22;
 156             }                   /* 1/4 <= u < 1/2 */
 157           else
 158             {
 159               if (i < 176)
 160                 u2 = U23;       /* 1/2 <= u < 3/4 */
 161               else
 162                 u2 = U24;
 163             }                   /* 3/4 <= u <= 1  */
 164           if ((y = t1 + (yy - u2 * t1)) == t1 + (yy + u2 * t1))
 165             return __signArctan (x, y);
 166
 167           z = u - hij[i][0].d;
 168
 169           s1 = hij[i][14].d + z * hij[i][15].d;
 170           s1 = hij[i][13].d + z * s1;
 171           s1 = hij[i][12].d + z * s1;
 172           s1 = hij[i][11].d + z * s1;
 173           s1 *= z;
 174
 175           ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
 176           MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 177           ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
 178           MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 179           ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
 180           MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 181           ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
 182           MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 183           ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
 184           if ((y = s2 + (ss2 - U6 * s2)) == s2 + (ss2 + U6 * s2))
 185             return __signArctan (x, y);
 186
 187           return atanMp (x, pr);
 188         }
 189     }
 190   else
 191     {
 192       if (u < D)
 193         {                       /* C <= u < D */
 194           w = 1 / u;
 195           EMULV (w, u, t1, t2, t3, t4, t5, t6, t7);
 196           ww = w * ((1 - t1) - t2);
 197           i = (TWO52 + TWO8 * w) - TWO52;
 198           i -= 16;
 199           z = (w - cij[i][0].d) + ww;
 200
 201           yy = cij[i][5].d + z * cij[i][6].d;
 202           yy = cij[i][4].d + z * yy;
 203           yy = cij[i][3].d + z * yy;
 204           yy = cij[i][2].d + z * yy;
 205           yy = HPI1 - z * yy;
 206
 207           t1 = HPI - cij[i][1].d;
 208           if (i < 112)
 209             u3 = U31;           /* w <  1/2 */
 210           else
 211             u3 = U32;           /* w >= 1/2 */
 212           if ((y = t1 + (yy - u3)) == t1 + (yy + u3))
 213             return __signArctan (x, y);
 214
 215           DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8, t9,
 216                 t10);
 217           t1 = w - hij[i][0].d;
 218           EADD (t1, ww, z, zz);
 219
 220           s1 = hij[i][14].d + z * hij[i][15].d;
 221           s1 = hij[i][13].d + z * s1;
 222           s1 = hij[i][12].d + z * s1;
 223           s1 = hij[i][11].d + z * s1;
 224           s1 *= z;
 225
 226           ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
 227           MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 228           ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
 229           MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 230           ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
 231           MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 232           ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
 233           MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 234           ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
 235           SUB2 (HPI, HPI1, s2, ss2, s1, ss1, t1, t2);
 236           if ((y = s1 + (ss1 - U7)) == s1 + (ss1 + U7))
 237             return __signArctan (x, y);
 238
 239           return atanMp (x, pr);
 240         }
 241       else
 242         {
 243           if (u < E)
 244             {                   /* D <= u < E */
 245               w = 1 / u;
 246               v = w * w;
 247               EMULV (w, u, t1, t2, t3, t4, t5, t6, t7);
 248
 249               yy = d11.d + v * d13.d;
 250               yy = d9.d + v * yy;
 251               yy = d7.d + v * yy;
 252               yy = d5.d + v * yy;
 253               yy = d3.d + v * yy;
 254               yy *= w * v;
 255
 256               ww = w * ((1 - t1) - t2);
 257               ESUB (HPI, w, t3, cor);
 258               yy = ((HPI1 + cor) - ww) - yy;
 259               if ((y = t3 + (yy - U4)) == t3 + (yy + U4))
 260                 return __signArctan (x, y);
 261
 262               DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8,
 263                     t9, t10);
 264               MUL2 (w, ww, w, ww, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
 265
 266               s1 = f17.d + v * f19.d;
 267               s1 = f15.d + v * s1;
 268               s1 = f13.d + v * s1;
 269               s1 = f11.d + v * s1;
 270               s1 *= v;
 271
 272               ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
 273               MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 274               ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
 275               MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 276               ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
 277               MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 278               ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
 279               MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 280               MUL2 (w, ww, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
 281               ADD2 (w, ww, s2, ss2, s1, ss1, t1, t2);
 282               SUB2 (HPI, HPI1, s1, ss1, s2, ss2, t1, t2);
 283
 284               if ((y = s2 + (ss2 - U8)) == s2 + (ss2 + U8))
 285                 return __signArctan (x, y);
 286
 287               return atanMp (x, pr);
 288             }
 289           else
 290             {
 291               /* u >= E */
 292               if (x > 0)
 293                 return HPI;
 294               else
 295                 return MHPI;
 296             }
 297         }
 298     }
 299 }
 300
 301  /* Final stages. Compute atan(x) by multiple precision arithmetic */
 302 static double
 303 atanMp (double x, const int pr[])
 304 {
 305   mp_no mpx, mpy, mpy2, mperr, mpt1, mpy1;
 306   double y1, y2;
 307   int i, p;
 308
 309   for (i = 0; i < M; i++)
 310     {
 311       p = pr[i];
 312       __dbl_mp (x, &mpx, p);
 313       __mpatan (&mpx, &mpy, p);
 314       __dbl_mp (u9[i].d, &mpt1, p);
 315       __mul (&mpy, &mpt1, &mperr, p);
 316       __add (&mpy, &mperr, &mpy1, p);
 317       __sub (&mpy, &mperr, &mpy2, p);
 318       __mp_dbl (&mpy1, &y1, p);
 319       __mp_dbl (&mpy2, &y2, p);
 320       if (y1 == y2)
 321         {
 322           LIBC_PROBE (slowatan, 3, &p, &x, &y1);
 323           return y1;
 324         }
 325     }
 326   LIBC_PROBE (slowatan_inexact, 3, &p, &x, &y1);
 327   return y1;                    /*if impossible to do exact computing */
 328 }
 329
 330 #ifdef NO_LONG_DOUBLE
 331 weak_alias (atan, atanl)
 332 #endif