sysdeps/alpha/fpu/e_sqrt.c

   1 /* Copyright (C) 1996,1997,1998,2002,2003 Free Software Foundation, Inc.
   2    Contributed by David Mosberger (davidm@cs.arizona.edu).
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #include <features.h>
  21
  22 #if !defined(_IEEE_FP_INEXACT)
  23
  24 /*
  25  * This version is much faster than generic sqrt implementation, but
  26  * it doesn't handle the inexact flag.  It doesn't handle exceptional
  27  * values either, but will defer to the full ieee754_sqrt routine which
  28  * can.
  29  */
  30
  31 /* Careful with rearranging this without consulting the assembly below.  */
  32 const static struct sqrt_data_struct {
  33         unsigned long dn, up, half, almost_three_half;
  34         unsigned long one_and_a_half, two_to_minus_30, one, nan;
  35         const int T2[64];
  36 } sqrt_data __attribute__((used)) = {
  37         0x3fefffffffffffff,     /* __dn = nextafter(1,-Inf) */
  38         0x3ff0000000000001,     /* __up = nextafter(1,+Inf) */
  39         0x3fe0000000000000,     /* half */
  40         0x3ff7ffffffc00000,     /* almost_three_half = 1.5-2^-30 */
  41         0x3ff8000000000000,     /* one_and_a_half */
  42         0x3e10000000000000,     /* two_to_minus_30 */
  43         0x3ff0000000000000,     /* one */
  44         0xffffffffffffffff,     /* nan */
  45
  46         { 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
  47         0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
  48         0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
  49         0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
  50         0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
  51         0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
  52         0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
  53         0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd }
  54 };
  55
  56 asm ("\
  57   /* Define offsets into the structure defined in C above.  */          \n\
  58         $DN = 0*8                                                       \n\
  59         $UP = 1*8                                                       \n\
  60         $HALF = 2*8                                                     \n\
  61         $ALMOST_THREE_HALF = 3*8                                        \n\
  62         $NAN = 7*8                                                      \n\
  63         $T2 = 8*8                                                       \n\
  64                                                                         \n\
  65   /* Stack variables.  */                                               \n\
  66         $K = 0                                                          \n\
  67         $Y = 8                                                          \n\
  68                                                                         \n\
  69         .text                                                           \n\
  70         .align  5                                                       \n\
  71         .globl  __ieee754_sqrt                                          \n\
  72         .ent    __ieee754_sqrt                                          \n\
  73 __ieee754_sqrt:                                                         \n\
  74         ldgp    $29, 0($27)                                             \n\
  75         subq    $sp, 16, $sp                                            \n\
  76         .frame  $sp, 16, $26, 0\n"
  77 #ifdef PROF
  78 "       lda     $28, _mcount                                            \n\
  79         jsr     $28, ($28), _mcount\n"
  80 #endif
  81 "       .prologue 1                                                     \n\
  82                                                                         \n\
  83         .align 4                                                        \n\
  84         stt     $f16, $K($sp)           # e0    :                       \n\
  85         mult    $f31, $f31, $f31        # .. fm :                       \n\
  86         lda     $4, sqrt_data           # e0    :                       \n\
  87         fblt    $f16, $fixup            # .. fa :                       \n\
  88                                                                         \n\
  89         ldah    $2, 0x5fe8              # e0    :                       \n\
  90         ldq     $3, $K($sp)             # .. e1 :                       \n\
  91         ldt     $f12, $HALF($4)         # e0    :                       \n\
  92         ldt     $f18, $ALMOST_THREE_HALF($4)    # .. e1 :               \n\
  93                                                                         \n\
  94         sll     $3, 52, $5              # e0    :                       \n\
  95         lda     $6, 0x7fd               # .. e1 :                       \n\
  96         fnop                            # .. fa :                       \n\
  97         fnop                            # .. fm :                       \n\
  98                                                                         \n\
  99         subq    $5, 1, $5               # e1    :                       \n\
 100         srl     $3, 33, $1              # .. e0 :                       \n\
 101         cmpule  $5, $6, $5              # e0    :                       \n\
 102         beq     $5, $fixup              # .. e1 :                       \n\
 103                                                                         \n\
 104         mult    $f16, $f12, $f11        # fm    : $f11 = x * 0.5        \n\
 105         subl    $2, $1, $2              # .. e0 :                       \n\
 106         addt    $f12, $f12, $f17        # .. fa : $f17 = 1.0            \n\
 107         srl     $2, 12, $1              # e0    :                       \n\
 108                                                                         \n\
 109         and     $1, 0xfc, $1            # e0    :                       \n\
 110         addq    $1, $4, $1              # e1    :                       \n\
 111         ldl     $1, $T2($1)             # e0    :                       \n\
 112         addt    $f12, $f17, $f15        # .. fa : $f15 = 1.5            \n\
 113                                                                         \n\
 114         subl    $2, $1, $2              # e0    :                       \n\
 115         ldt     $f14, $DN($4)           # .. e1 :                       \n\
 116         sll     $2, 32, $2              # e0    :                       \n\
 117         stq     $2, $Y($sp)             # e0    :                       \n\
 118                                                                         \n\
 119         ldt     $f13, $Y($sp)           # e0    :                       \n\
 120         mult/su $f11, $f13, $f10        # fm   2: $f10 = (x * 0.5) * y  \n\
 121         mult    $f10, $f13, $f10        # fm   4: $f10 = ((x*0.5)*y)*y  \n\
 122         subt    $f15, $f10, $f1         # fa   4: $f1 = (1.5-0.5*x*y*y) \n\
 123                                                                         \n\
 124         mult    $f13, $f1, $f13         # fm   4: yp = y*(1.5-0.5*x*y^2)\n\
 125         mult/su $f11, $f13, $f1         # fm   4: $f11 = x * 0.5 * yp   \n\
 126         mult    $f1, $f13, $f11         # fm   4: $f11 = (x*0.5*yp)*yp  \n\
 127         subt    $f18, $f11, $f1         # fa   4: $f1=(1.5-2^-30)-x/2*yp^2\n\
 128                                                                         \n\
 129         mult    $f13, $f1, $f13         # fm   4: ypp = $f13 = yp*$f1   \n\
 130         subt    $f15, $f12, $f1         # .. fa : $f1 = (1.5 - 0.5)     \n\
 131         ldt     $f15, $UP($4)           # .. e0 :                       \n\
 132         mult/su $f16, $f13, $f10        # fm   4: z = $f10 = x * ypp    \n\
 133                                                                         \n\
 134         mult    $f10, $f13, $f11        # fm   4: $f11 = z*ypp          \n\
 135         mult    $f10, $f12, $f12        # fm    : $f12 = z*0.5          \n\
 136         subt    $f1, $f11, $f1          # fa   4: $f1 = 1 - z*ypp       \n\
 137         mult    $f12, $f1, $f12         # fm   4: $f12 = z/2*(1 - z*ypp)\n\
 138                                                                         \n\
 139         addt    $f10, $f12, $f0         # fa   4: zp=res= z+z/2*(1-z*ypp)\n\
 140         mult/c  $f0, $f14, $f12         # fm   4: zmi = zp * DN         \n\
 141         mult/c  $f0, $f15, $f11         # fm    : zpl = zp * UP         \n\
 142         mult/c  $f0, $f12, $f1          # fm    : $f1 = zp * zmi        \n\
 143                                                                         \n\
 144         mult/c  $f0, $f11, $f15         # fm    : $f15 = zp * zpl       \n\
 145         subt/su $f1, $f16, $f13         # .. fa : y1 = zp*zmi - x       \n\
 146         subt/su $f15, $f16, $f14        # fa   4: y2 = zp*zpl - x       \n\
 147         fcmovge $f13, $f12, $f0         # fa   3: res = (y1>=0)?zmi:res \n\
 148                                                                         \n\
 149         fcmovlt $f14, $f11, $f0         # fa   4: res = (y2<0)?zpl:res  \n\
 150         addq    $sp, 16, $sp            # .. e0 :                       \n\
 151         ret                             # .. e1 :                       \n\
 152                                                                         \n\
 153         .align 4                                                        \n\
 154 $fixup:                                                                 \n\
 155         addq    $sp, 16, $sp                                            \n\
 156         br      __full_ieee754_sqrt     !samegp                         \n\
 157                                                                         \n\
 158         .end    __ieee754_sqrt");
 159
 160 static double __full_ieee754_sqrt(double) __attribute_used__;
 161 #define __ieee754_sqrt __full_ieee754_sqrt
 162
 163 #endif /* _IEEE_FP_INEXACT */
 164
 165 #include <sysdeps/ieee754/dbl-64/e_sqrt.c>