sysdeps/libm-i387/s_cbrtl.S

   1 /* Compute cubic root of long double value.
   2    Copyright (C) 1997 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
   5    Ulrich Drepper <drepper@cygnus.com>, 1997.
   6
   7    The GNU C Library is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU Library General Public License as
   9    published by the Free Software Foundation; either version 2 of the
  10    License, or (at your option) any later version.
  11
  12    The GNU C Library is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    Library General Public License for more details.
  16
  17    You should have received a copy of the GNU Library General Public
  18    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  19    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22 #include <machine/asm.h>
  23
  24 #ifdef __ELF__
  25         .section .rodata
  26 #else
  27         .text
  28 #endif
  29
  30         .align ALIGNARG(4)
  31         ASM_TYPE_DIRECTIVE(f1,@object)
  32 f1:     .double 0.338058687610520237
  33         ASM_SIZE_DIRECTIVE(f1)
  34         ASM_TYPE_DIRECTIVE(f2,@object)
  35 f2:     .double 1.67595307700780102
  36         ASM_SIZE_DIRECTIVE(f2)
  37         ASM_TYPE_DIRECTIVE(f3,@object)
  38 f3:     .double -2.82414939754975962
  39         ASM_SIZE_DIRECTIVE(f3)
  40         ASM_TYPE_DIRECTIVE(f4,@object)
  41 f4:     .double 4.09559907378707839
  42         ASM_SIZE_DIRECTIVE(f4)
  43         ASM_TYPE_DIRECTIVE(f5,@object)
  44 f5:     .double -4.11151425200350531
  45         ASM_SIZE_DIRECTIVE(f5)
  46         ASM_TYPE_DIRECTIVE(f6,@object)
  47 f6:     .double 2.65298938441952296
  48         ASM_SIZE_DIRECTIVE(f6)
  49         ASM_TYPE_DIRECTIVE(f7,@object)
  50 f7:     .double -0.988553671195413709
  51         ASM_SIZE_DIRECTIVE(f7)
  52         ASM_TYPE_DIRECTIVE(f8,@object)
  53 f8:     .double 0.161617097923756032
  54         ASM_SIZE_DIRECTIVE(f8)
  55
  56 #define CBRT2 1.2599210498948731648
  57 #define SQR_CBRT2 1.5874010519681994748
  58
  59         ASM_TYPE_DIRECTIVE(factor,@object)
  60 factor: .double 1.0 / SQR_CBRT2
  61         .double 1.0 / CBRT2
  62         .double 1.0
  63         .double CBRT2
  64         .double SQR_CBRT2
  65         ASM_SIZE_DIRECTIVE(factor)
  66
  67         ASM_TYPE_DIRECTIVE(two64,@object)
  68 two64:  .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
  69         ASM_SIZE_DIRECTIVE(two64)
  70
  71 #ifdef PIC
  72 #define MO(op) op##@GOTOFF(%ebx)
  73 #define MOX(op,x,f) op##@GOTOFF(%ebx,x,f)
  74 #else
  75 #define MO(op) op
  76 #define MOX(op,x,f) op(,x,f)
  77 #endif
  78
  79         .text
  80 ENTRY(__cbrtl)
  81         movl    4(%esp), %ecx
  82         movl    12(%esp), %eax
  83         orl     8(%esp), %ecx
  84         movl    %eax, %edx
  85         andl    $0x7fff, %eax
  86         orl     %eax, %ecx
  87         jz      1f
  88         xorl    %ecx, %ecx
  89         cmpl    $0x7fff, %eax
  90         je      1f
  91
  92 #ifdef PIC
  93         pushl   %ebx
  94         call    3f
  95 3:      popl    %ebx
  96         addl    $_GLOBAL_OFFSET_TABLE_+[.-3b], %ebx
  97 #endif
  98
  99         cmpl    $0, %eax
 100         je      2f
 101
 102 #ifdef PIC
 103         fldt    8(%esp)
 104 #else
 105         fldt    4(%esp)
 106 #endif
 107         fmull   MO(two64)
 108         movl    $-64, %ecx
 109         fstpt   4(%esp)
 110         movl    12(%esp), %eax
 111         movl    %eax, %edx
 112         andl    $0x7fff, %eax
 113
 114 2:      andl    $0x8000, %edx
 115         subl    $16382, %eax
 116         orl     $0x3ffe, %edx
 117         addl    %eax, %ecx
 118 #ifdef PIC
 119         movl    %edx, 16(%esp)
 120
 121         fldt    8(%esp)                 /* xm */
 122 #else
 123         movl    %edx, 12(%esp)
 124
 125         fldt    4(%esp)                 /* xm */
 126 #endif
 127         fabs
 128
 129         /* The following code has two track:
 130             a) compute the normalized cbrt value
 131             b) compute xe/3 and xe%3
 132            The right track computes the value for b) and this is done
 133            in an optimized way by avoiding division.  */
 134
 135         fld     %st(0)                  /* xm : xm */
 136
 137         fmull   MO(f7)                  /* f7*xm : xm */
 138                         movl    $1431655766, %eax
 139         faddl   MO(f6)                  /* f6+f7*xm : xm */
 140                         imull   %ecx
 141         fmul    %st(1)                  /* (f6+f7*xm)*xm : xm */
 142                         movl    %ecx, %eax
 143         faddl   MO(f5)                  /* f5+(f6+f7*xm)*xm : xm */
 144                         sarl    $31, %eax
 145         fmul    %st(1)                  /* (f5+(f6+f7*xm)*xm)*xm : xm */
 146                         subl    %eax, %edx
 147         faddl   MO(f4)                  /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
 148         fmul    %st(1)                  /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
 149         faddl   MO(f3)                  /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
 150         fmul    %st(1)                  /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
 151         faddl   MO(f2)                  /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
 152         fmul    %st(1)                  /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
 153         faddl   MO(f1)                  /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
 154
 155         fld     %st                     /* u : u : xm */
 156         fmul    %st(1)                  /* u*u : u : xm */
 157         fld     %st(2)                  /* xm : u*u : u : xm */
 158         fadd    %st                     /* 2*xm : u*u : u : xm */
 159         fxch    %st(1)                  /* u*u : 2*xm : u : xm */
 160         fmul    %st(2)                  /* t2:=u*u*u : 2*xm : u : xm */
 161                         movl    %edx, %eax
 162         fadd    %st, %st(1)             /* t2 : t2+2*xm : u : xm */
 163                         leal    (%edx,%edx,2),%edx
 164         fadd    %st(0)                  /* 2*t2 : t2+2*xm : u : xm */
 165                         subl    %edx, %ecx
 166         faddp   %st, %st(3)             /* t2+2*xm : u : 2*t2+xm */
 167         fmulp                           /* u*(t2+2*xm) : 2*t2+xm */
 168         fdivp   %st, %st(1)             /* u*(t2+2*xm)/(2*t2+xm) */
 169         fmull   MOX(16+factor,%ecx,8)   /* u*(t2+2*xm)/(2*t2+xm)*FACT */
 170         pushl   %eax
 171         fildl   (%esp)                  /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
 172         fxch                            /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
 173         popl    %eax
 174         fscale                          /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
 175         fstp    %st(1)
 176 #ifdef PIC
 177         popl    %ebx
 178 #endif
 179         testl   $0x8000, 12(%esp)
 180         jz      4f
 181         fchs
 182 4:      ret
 183
 184         /* Return the argument.  */
 185 1:      fldt    4(%esp)
 186         ret
 187 END(__cbrtl)
 188 weak_alias (__cbrtl, cbrtl)