ports/sysdeps/alpha/fpu/e_sqrt.c

   1 /* Copyright (C) 1996-2014 Free Software Foundation, Inc.
   2    Contributed by David Mosberger (davidm@cs.arizona.edu).
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library.  If not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <math.h>
  20 #include <math_private.h>
  21 #include <shlib-compat.h>
  22
  23 #if !defined(_IEEE_FP_INEXACT)
  24
  25 /*
  26  * This version is much faster than generic sqrt implementation, but
  27  * it doesn't handle the inexact flag.  It doesn't handle exceptional
  28  * values either, but will defer to the full ieee754_sqrt routine which
  29  * can.
  30  */
  31
  32 /* Careful with rearranging this without consulting the assembly below.  */
  33 const static struct sqrt_data_struct {
  34         unsigned long dn, up, half, almost_three_half;
  35         unsigned long one_and_a_half, two_to_minus_30, one, nan;
  36         const int T2[64];
  37 } sqrt_data __attribute__((used)) = {
  38         0x3fefffffffffffff,     /* __dn = nextafter(1,-Inf) */
  39         0x3ff0000000000001,     /* __up = nextafter(1,+Inf) */
  40         0x3fe0000000000000,     /* half */
  41         0x3ff7ffffffc00000,     /* almost_three_half = 1.5-2^-30 */
  42         0x3ff8000000000000,     /* one_and_a_half */
  43         0x3e10000000000000,     /* two_to_minus_30 */
  44         0x3ff0000000000000,     /* one */
  45         0xffffffffffffffff,     /* nan */
  46
  47         { 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
  48         0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
  49         0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
  50         0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
  51         0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
  52         0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
  53         0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
  54         0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd }
  55 };
  56
  57 asm ("\
  58   /* Define offsets into the structure defined in C above.  */          \n\
  59         $DN = 0*8                                                       \n\
  60         $UP = 1*8                                                       \n\
  61         $HALF = 2*8                                                     \n\
  62         $ALMOST_THREE_HALF = 3*8                                        \n\
  63         $NAN = 7*8                                                      \n\
  64         $T2 = 8*8                                                       \n\
  65                                                                         \n\
  66   /* Stack variables.  */                                               \n\
  67         $K = 0                                                          \n\
  68         $Y = 8                                                          \n\
  69                                                                         \n\
  70         .text                                                           \n\
  71         .align  5                                                       \n\
  72         .globl  __ieee754_sqrt                                          \n\
  73         .ent    __ieee754_sqrt                                          \n\
  74 __ieee754_sqrt:                                                         \n\
  75         ldgp    $29, 0($27)                                             \n\
  76         subq    $sp, 16, $sp                                            \n\
  77         .frame  $sp, 16, $26, 0\n"
  78 #ifdef PROF
  79 "       lda     $28, _mcount                                            \n\
  80         jsr     $28, ($28), _mcount\n"
  81 #endif
  82 "       .prologue 1                                                     \n\
  83                                                                         \n\
  84         .align 4                                                        \n\
  85         stt     $f16, $K($sp)           # e0    :                       \n\
  86         mult    $f31, $f31, $f31        # .. fm :                       \n\
  87         lda     $4, sqrt_data           # e0    :                       \n\
  88         fblt    $f16, $fixup            # .. fa :                       \n\
  89                                                                         \n\
  90         ldah    $2, 0x5fe8              # e0    :                       \n\
  91         ldq     $3, $K($sp)             # .. e1 :                       \n\
  92         ldt     $f12, $HALF($4)         # e0    :                       \n\
  93         ldt     $f18, $ALMOST_THREE_HALF($4)    # .. e1 :               \n\
  94                                                                         \n\
  95         sll     $3, 52, $5              # e0    :                       \n\
  96         lda     $6, 0x7fd               # .. e1 :                       \n\
  97         fnop                            # .. fa :                       \n\
  98         fnop                            # .. fm :                       \n\
  99                                                                         \n\
 100         subq    $5, 1, $5               # e1    :                       \n\
 101         srl     $3, 33, $1              # .. e0 :                       \n\
 102         cmpule  $5, $6, $5              # e0    :                       \n\
 103         beq     $5, $fixup              # .. e1 :                       \n\
 104                                                                         \n\
 105         mult    $f16, $f12, $f11        # fm    : $f11 = x * 0.5        \n\
 106         subl    $2, $1, $2              # .. e0 :                       \n\
 107         addt    $f12, $f12, $f17        # .. fa : $f17 = 1.0            \n\
 108         srl     $2, 12, $1              # e0    :                       \n\
 109                                                                         \n\
 110         and     $1, 0xfc, $1            # e0    :                       \n\
 111         addq    $1, $4, $1              # e1    :                       \n\
 112         ldl     $1, $T2($1)             # e0    :                       \n\
 113         addt    $f12, $f17, $f15        # .. fa : $f15 = 1.5            \n\
 114                                                                         \n\
 115         subl    $2, $1, $2              # e0    :                       \n\
 116         ldt     $f14, $DN($4)           # .. e1 :                       \n\
 117         sll     $2, 32, $2              # e0    :                       \n\
 118         stq     $2, $Y($sp)             # e0    :                       \n\
 119                                                                         \n\
 120         ldt     $f13, $Y($sp)           # e0    :                       \n\
 121         mult/su $f11, $f13, $f10        # fm   2: $f10 = (x * 0.5) * y  \n\
 122         mult    $f10, $f13, $f10        # fm   4: $f10 = ((x*0.5)*y)*y  \n\
 123         subt    $f15, $f10, $f1         # fa   4: $f1 = (1.5-0.5*x*y*y) \n\
 124                                                                         \n\
 125         mult    $f13, $f1, $f13         # fm   4: yp = y*(1.5-0.5*x*y^2)\n\
 126         mult/su $f11, $f13, $f1         # fm   4: $f11 = x * 0.5 * yp   \n\
 127         mult    $f1, $f13, $f11         # fm   4: $f11 = (x*0.5*yp)*yp  \n\
 128         subt    $f18, $f11, $f1         # fa   4: $f1=(1.5-2^-30)-x/2*yp^2\n\
 129                                                                         \n\
 130         mult    $f13, $f1, $f13         # fm   4: ypp = $f13 = yp*$f1   \n\
 131         subt    $f15, $f12, $f1         # .. fa : $f1 = (1.5 - 0.5)     \n\
 132         ldt     $f15, $UP($4)           # .. e0 :                       \n\
 133         mult/su $f16, $f13, $f10        # fm   4: z = $f10 = x * ypp    \n\
 134                                                                         \n\
 135         mult    $f10, $f13, $f11        # fm   4: $f11 = z*ypp          \n\
 136         mult    $f10, $f12, $f12        # fm    : $f12 = z*0.5          \n\
 137         subt    $f1, $f11, $f1          # fa   4: $f1 = 1 - z*ypp       \n\
 138         mult    $f12, $f1, $f12         # fm   4: $f12 = z/2*(1 - z*ypp)\n\
 139                                                                         \n\
 140         addt    $f10, $f12, $f0         # fa   4: zp=res= z+z/2*(1-z*ypp)\n\
 141         mult/c  $f0, $f14, $f12         # fm   4: zmi = zp * DN         \n\
 142         mult/c  $f0, $f15, $f11         # fm    : zpl = zp * UP         \n\
 143         mult/c  $f0, $f12, $f1          # fm    : $f1 = zp * zmi        \n\
 144                                                                         \n\
 145         mult/c  $f0, $f11, $f15         # fm    : $f15 = zp * zpl       \n\
 146         subt/su $f1, $f16, $f13         # .. fa : y1 = zp*zmi - x       \n\
 147         subt/su $f15, $f16, $f14        # fa   4: y2 = zp*zpl - x       \n\
 148         fcmovge $f13, $f12, $f0         # fa   3: res = (y1>=0)?zmi:res \n\
 149                                                                         \n\
 150         fcmovlt $f14, $f11, $f0         # fa   4: res = (y2<0)?zpl:res  \n\
 151         addq    $sp, 16, $sp            # .. e0 :                       \n\
 152         ret                             # .. e1 :                       \n\
 153                                                                         \n\
 154         .align 4                                                        \n\
 155 $fixup:                                                                 \n\
 156         addq    $sp, 16, $sp                                            \n\
 157         br      __full_ieee754_sqrt     !samegp                         \n\
 158                                                                         \n\
 159         .end    __ieee754_sqrt");
 160
 161 /* Avoid the __sqrt_finite alias that dbl-64/e_sqrt.c would give...  */
 162 #undef strong_alias
 163 #define strong_alias(a,b)
 164
 165 /* ... defining our own.  */
 166 #if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18)
 167 asm (".global   __sqrt_finite1; __sqrt_finite1 = __ieee754_sqrt");
 168 #else
 169 asm (".global   __sqrt_finite; __sqrt_finite = __ieee754_sqrt");
 170 #endif
 171
 172 static double __full_ieee754_sqrt(double) __attribute_used__;
 173 #define __ieee754_sqrt __full_ieee754_sqrt
 174
 175 #elif SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18)
 176 # define __sqrt_finite __sqrt_finite1
 177 #endif /* _IEEE_FP_INEXACT */
 178
 179 #include <sysdeps/ieee754/dbl-64/e_sqrt.c>
 180
 181 /* Work around forgotten symbol in alphaev6 build.  */
 182 #if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18)
 183 # undef __sqrt_finite
 184 # undef __ieee754_sqrt
 185 compat_symbol (libm, __sqrt_finite1, __sqrt_finite, GLIBC_2_15);
 186 versioned_symbol (libm, __ieee754_sqrt, __sqrt_finite, GLIBC_2_18);
 187 #endif