sysdeps/i386/fpu/s_cbrtf.S

   1 /* Compute cubic root of float value.
   2    Copyright (C) 1997-2014 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
   5    Ulrich Drepper <drepper@cygnus.com>, 1997.
   6
   7    The GNU C Library is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU Lesser General Public
   9    License as published by the Free Software Foundation; either
  10    version 2.1 of the License, or (at your option) any later version.
  11
  12    The GNU C Library is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    Lesser General Public License for more details.
  16
  17    You should have received a copy of the GNU Lesser General Public
  18    License along with the GNU C Library; if not, see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include <machine/asm.h>
  22
  23         .section .rodata
  24
  25         .align ALIGNARG(4)
  26         .type f3,@object
  27 f3:     .double 0.191502161678719066
  28         ASM_SIZE_DIRECTIVE(f3)
  29         .type f2,@object
  30 f2:     .double 0.697570460207922770
  31         ASM_SIZE_DIRECTIVE(f2)
  32         .type f1,@object
  33 f1:     .double 0.492659620528969547
  34         ASM_SIZE_DIRECTIVE(f1)
  35
  36 #define CBRT2           1.2599210498948731648
  37 #define ONE_CBRT2       0.793700525984099737355196796584
  38 #define SQR_CBRT2       1.5874010519681994748
  39 #define ONE_SQR_CBRT2   0.629960524947436582364439673883
  40
  41         .type factor,@object
  42         .align ALIGNARG(4)
  43 factor: .double ONE_SQR_CBRT2
  44         .double ONE_CBRT2
  45         .double 1.0
  46         .double CBRT2
  47         .double SQR_CBRT2
  48         ASM_SIZE_DIRECTIVE(factor)
  49
  50         .type two25,@object
  51 two25:  .byte 0, 0, 0, 0x4c
  52         ASM_SIZE_DIRECTIVE(two25)
  53
  54 #ifdef PIC
  55 #define MO(op) op##@GOTOFF(%ebx)
  56 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
  57 #else
  58 #define MO(op) op
  59 #define MOX(op,x) op(x)
  60 #endif
  61
  62         .text
  63 ENTRY(__cbrtf)
  64         movl    4(%esp), %eax
  65         xorl    %ecx, %ecx
  66         movl    %eax, %edx
  67         andl    $0x7fffffff, %eax
  68         jz      1f
  69         cmpl    $0x7f800000, %eax
  70         jae     1f
  71
  72 #ifdef PIC
  73         pushl   %ebx
  74         cfi_adjust_cfa_offset (4)
  75         cfi_rel_offset (ebx, 0)
  76         LOAD_PIC_REG (bx)
  77 #endif
  78
  79         cmpl    $0x00800000, %eax
  80         jae     2f
  81
  82 #ifdef PIC
  83         flds    8(%esp)
  84 #else
  85         flds    4(%esp)
  86 #endif
  87         fmuls   MO(two25)
  88         movl    $-25, %ecx
  89 #ifdef PIC
  90         fstps   8(%esp)
  91         movl    8(%esp), %eax
  92 #else
  93         fstps   4(%esp)
  94         movl    4(%esp), %eax
  95 #endif
  96         movl    %eax, %edx
  97         andl    $0x7fffffff, %eax
  98
  99 2:      shrl    $23, %eax
 100         andl    $0x807fffff, %edx
 101         subl    $126, %eax
 102         orl     $0x3f000000, %edx
 103         addl    %eax, %ecx
 104 #ifdef PIC
 105         movl    %edx, 8(%esp)
 106
 107         flds    8(%esp)                 /* xm */
 108 #else
 109         movl    %edx, 4(%esp)
 110
 111         flds    4(%esp)                 /* xm */
 112 #endif
 113         fabs
 114
 115         /* The following code has two tracks:
 116             a) compute the normalized cbrt value
 117             b) compute xe/3 and xe%3
 118            The right track computes the value for b) and this is done
 119            in an optimized way by avoiding division.
 120
 121            But why two tracks at all?  Very easy: efficiency.  Some FP
 122            instruction can overlap with a certain amount of integer (and
 123            FP) instructions.  So we get (except for the imull) all
 124            instructions for free.  */
 125
 126         fld     %st(0)                  /* xm : xm */
 127         fmull   MO(f3)                  /* f3*xm : xm */
 128                         movl    $1431655766, %eax
 129         fsubrl  MO(f2)                  /* f2-f3*xm : xm */
 130                         imull   %ecx
 131         fmul    %st(1)                  /* (f2-f3*xm)*xm : xm */
 132                         movl    %ecx, %eax
 133         faddl   MO(f1)                  /* u:=f1+(f2-f3*xm)*xm : xm */
 134                         sarl    $31, %eax
 135         fld     %st                     /* u : u : xm */
 136                         subl    %eax, %edx
 137         fmul    %st(1)                  /* u*u : u : xm */
 138         fld     %st(2)                  /* xm : u*u : u : xm */
 139         fadd    %st                     /* 2*xm : u*u : u : xm */
 140         fxch    %st(1)                  /* u*u : 2*xm : u : xm */
 141         fmul    %st(2)                  /* t2:=u*u*u : 2*xm : u : xm */
 142                         movl    %edx, %eax
 143         fadd    %st, %st(1)             /* t2 : t2+2*xm : u : xm */
 144                         leal    (%edx,%edx,2),%edx
 145         fadd    %st(0)                  /* 2*t2 : t2+2*xm : u : xm */
 146                         subl    %edx, %ecx
 147         faddp   %st, %st(3)             /* t2+2*xm : u : 2*t2+xm */
 148                         shll    $3, %ecx
 149         fmulp                           /* u*(t2+2*xm) : 2*t2+xm */
 150         fdivp   %st, %st(1)             /* u*(t2+2*xm)/(2*t2+xm) */
 151         fmull   MOX(16+factor,%ecx)     /* u*(t2+2*xm)/(2*t2+xm)*FACT */
 152         pushl   %eax
 153         cfi_adjust_cfa_offset (4)
 154         fildl   (%esp)                  /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
 155         fxch                            /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
 156         fscale                          /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
 157         popl    %edx
 158         cfi_adjust_cfa_offset (-4)
 159 #ifdef PIC
 160         movl    8(%esp), %eax
 161         popl    %ebx
 162         cfi_adjust_cfa_offset (-4)
 163         cfi_restore (ebx)
 164 #else
 165         movl    4(%esp), %eax
 166 #endif
 167         testl   %eax, %eax
 168         fstp    %st(1)
 169         jns     4f
 170         fchs
 171 4:      ret
 172
 173         /* Return the argument.  */
 174 1:      flds    4(%esp)
 175         ret
 176 END(__cbrtf)
 177 weak_alias (__cbrtf, cbrtf)