sysdeps/i386/fpu/s_cbrt.S

   1 /* Compute cubic root of double value.
   2    Copyright (C) 1997, 2005 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
   5    Ulrich Drepper <drepper@cygnus.com>, 1997.
   6
   7    The GNU C Library is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU Lesser General Public
   9    License as published by the Free Software Foundation; either
  10    version 2.1 of the License, or (at your option) any later version.
  11
  12    The GNU C Library is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    Lesser General Public License for more details.
  16
  17    You should have received a copy of the GNU Lesser General Public
  18    License along with the GNU C Library; if not, write to the Free
  19    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  20    02111-1307 USA.  */
  21
  22 #include <machine/asm.h>
  23
  24 #ifdef __ELF__
  25         .section .rodata
  26 #else
  27         .text
  28 #endif
  29
  30         .align ALIGNARG(4)
  31         ASM_TYPE_DIRECTIVE(f7,@object)
  32 f7:     .double -0.145263899385486377
  33         ASM_SIZE_DIRECTIVE(f7)
  34         ASM_TYPE_DIRECTIVE(f6,@object)
  35 f6:     .double 0.784932344976639262
  36         ASM_SIZE_DIRECTIVE(f6)
  37         ASM_TYPE_DIRECTIVE(f5,@object)
  38 f5:     .double -1.83469277483613086
  39         ASM_SIZE_DIRECTIVE(f5)
  40         ASM_TYPE_DIRECTIVE(f4,@object)
  41 f4:     .double 2.44693122563534430
  42         ASM_SIZE_DIRECTIVE(f4)
  43         ASM_TYPE_DIRECTIVE(f3,@object)
  44 f3:     .double -2.11499494167371287
  45         ASM_SIZE_DIRECTIVE(f3)
  46         ASM_TYPE_DIRECTIVE(f2,@object)
  47 f2:     .double 1.50819193781584896
  48         ASM_SIZE_DIRECTIVE(f2)
  49         ASM_TYPE_DIRECTIVE(f1,@object)
  50 f1:     .double 0.354895765043919860
  51         ASM_SIZE_DIRECTIVE(f1)
  52
  53 #define CBRT2           1.2599210498948731648
  54 #define ONE_CBRT2       0.793700525984099737355196796584
  55 #define SQR_CBRT2       1.5874010519681994748
  56 #define ONE_SQR_CBRT2   0.629960524947436582364439673883
  57
  58         ASM_TYPE_DIRECTIVE(factor,@object)
  59 factor: .double ONE_SQR_CBRT2
  60         .double ONE_CBRT2
  61         .double 1.0
  62         .double CBRT2
  63         .double SQR_CBRT2
  64         ASM_SIZE_DIRECTIVE(factor)
  65
  66         ASM_TYPE_DIRECTIVE(two54,@object)
  67 two54:  .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
  68         ASM_SIZE_DIRECTIVE(two54)
  69
  70 #ifdef PIC
  71 #define MO(op) op##@GOTOFF(%ebx)
  72 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
  73 #else
  74 #define MO(op) op
  75 #define MOX(op,x) op(x)
  76 #endif
  77
  78         .text
  79 ENTRY(__cbrt)
  80         movl    4(%esp), %ecx
  81         movl    8(%esp), %eax
  82         movl    %eax, %edx
  83         andl    $0x7fffffff, %eax
  84         orl     %eax, %ecx
  85         jz      1f
  86         xorl    %ecx, %ecx
  87         cmpl    $0x7ff00000, %eax
  88         jae     1f
  89
  90 #ifdef PIC
  91         pushl   %ebx
  92         cfi_adjust_cfa_offset (4)
  93         cfi_rel_offset (ebx, 0)
  94         LOAD_PIC_REG (bx)
  95 #endif
  96
  97         cmpl    $0x00100000, %eax
  98         jae     2f
  99
 100 #ifdef PIC
 101         fldl    8(%esp)
 102 #else
 103         fldl    4(%esp)
 104 #endif
 105         fmull   MO(two54)
 106         movl    $-54, %ecx
 107 #ifdef PIC
 108         fstpl   8(%esp)
 109         movl    12(%esp), %eax
 110 #else
 111         fstpl   4(%esp)
 112         movl    8(%esp), %eax
 113 #endif
 114         movl    %eax, %edx
 115         andl    $0x7fffffff, %eax
 116
 117 2:      shrl    $20, %eax
 118         andl    $0x800fffff, %edx
 119         subl    $1022, %eax
 120         orl     $0x3fe00000, %edx
 121         addl    %eax, %ecx
 122 #ifdef PIC
 123         movl    %edx, 12(%esp)
 124
 125         fldl    8(%esp)                 /* xm */
 126 #else
 127         movl    %edx, 8(%esp)
 128
 129         fldl    4(%esp)                 /* xm */
 130 #endif
 131         fabs
 132
 133         /* The following code has two tracks:
 134             a) compute the normalized cbrt value
 135             b) compute xe/3 and xe%3
 136            The right track computes the value for b) and this is done
 137            in an optimized way by avoiding division.
 138
 139            But why two tracks at all?  Very easy: efficiency.  Some FP
 140            instruction can overlap with a certain amount of integer (and
 141            FP) instructions.  So we get (except for the imull) all
 142            instructions for free.  */
 143
 144         fld     %st(0)                  /* xm : xm */
 145
 146         fmull   MO(f7)                  /* f7*xm : xm */
 147                         movl    $1431655766, %eax
 148         faddl   MO(f6)                  /* f6+f7*xm : xm */
 149                         imull   %ecx
 150         fmul    %st(1)                  /* (f6+f7*xm)*xm : xm */
 151                         movl    %ecx, %eax
 152         faddl   MO(f5)                  /* f5+(f6+f7*xm)*xm : xm */
 153                         sarl    $31, %eax
 154         fmul    %st(1)                  /* (f5+(f6+f7*xm)*xm)*xm : xm */
 155                         subl    %eax, %edx
 156         faddl   MO(f4)                  /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
 157         fmul    %st(1)                  /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
 158         faddl   MO(f3)                  /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
 159         fmul    %st(1)                  /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
 160         faddl   MO(f2)                  /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
 161         fmul    %st(1)                  /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
 162         faddl   MO(f1)                  /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
 163
 164         fld     %st                     /* u : u : xm */
 165         fmul    %st(1)                  /* u*u : u : xm */
 166         fld     %st(2)                  /* xm : u*u : u : xm */
 167         fadd    %st                     /* 2*xm : u*u : u : xm */
 168         fxch    %st(1)                  /* u*u : 2*xm : u : xm */
 169         fmul    %st(2)                  /* t2:=u*u*u : 2*xm : u : xm */
 170                         movl    %edx, %eax
 171         fadd    %st, %st(1)             /* t2 : t2+2*xm : u : xm */
 172                         leal    (%edx,%edx,2),%edx
 173         fadd    %st(0)                  /* 2*t2 : t2+2*xm : u : xm */
 174                         subl    %edx, %ecx
 175         faddp   %st, %st(3)             /* t2+2*xm : u : 2*t2+xm */
 176                         shll    $3, %ecx
 177         fmulp                           /* u*(t2+2*xm) : 2*t2+xm */
 178         fdivp   %st, %st(1)             /* u*(t2+2*xm)/(2*t2+xm) */
 179         fmull   MOX(16+factor,%ecx)     /* u*(t2+2*xm)/(2*t2+xm)*FACT */
 180         pushl   %eax
 181         cfi_adjust_cfa_offset (4)
 182         fildl   (%esp)                  /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
 183         fxch                            /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
 184         fscale                          /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
 185         popl    %edx
 186         cfi_adjust_cfa_offset (-4)
 187 #ifdef PIC
 188         movl    12(%esp), %eax
 189         popl    %ebx
 190         cfi_adjust_cfa_offset (-4)
 191         cfi_restore (ebx)
 192 #else
 193         movl    8(%esp), %eax
 194 #endif
 195         testl   %eax, %eax
 196         fstp    %st(1)
 197         jns     4f
 198         fchs
 199 4:      ret
 200
 201         /* Return the argument.  */
 202 1:      fldl    4(%esp)
 203         popl %ecx; nacljmp %ecx
 204 END(__cbrt)
 205 weak_alias (__cbrt, cbrt)