sysdeps/i386/fpu/s_cbrt.S

   1 /* Compute cubic root of double value.
   2    Copyright (C) 1997-2024 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #include <machine/asm.h>
  20 #include <libm-alias-double.h>
  21
  22         .section .rodata
  23
  24         .align ALIGNARG(4)
  25         .type f7,@object
  26 f7:     .double -0.145263899385486377
  27         ASM_SIZE_DIRECTIVE(f7)
  28         .type f6,@object
  29 f6:     .double 0.784932344976639262
  30         ASM_SIZE_DIRECTIVE(f6)
  31         .type f5,@object
  32 f5:     .double -1.83469277483613086
  33         ASM_SIZE_DIRECTIVE(f5)
  34         .type f4,@object
  35 f4:     .double 2.44693122563534430
  36         ASM_SIZE_DIRECTIVE(f4)
  37         .type f3,@object
  38 f3:     .double -2.11499494167371287
  39         ASM_SIZE_DIRECTIVE(f3)
  40         .type f2,@object
  41 f2:     .double 1.50819193781584896
  42         ASM_SIZE_DIRECTIVE(f2)
  43         .type f1,@object
  44 f1:     .double 0.354895765043919860
  45         ASM_SIZE_DIRECTIVE(f1)
  46
  47 #define CBRT2           1.2599210498948731648
  48 #define ONE_CBRT2       0.793700525984099737355196796584
  49 #define SQR_CBRT2       1.5874010519681994748
  50 #define ONE_SQR_CBRT2   0.629960524947436582364439673883
  51
  52         .type factor,@object
  53 factor: .double ONE_SQR_CBRT2
  54         .double ONE_CBRT2
  55         .double 1.0
  56         .double CBRT2
  57         .double SQR_CBRT2
  58         ASM_SIZE_DIRECTIVE(factor)
  59
  60         .type two54,@object
  61 two54:  .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
  62         ASM_SIZE_DIRECTIVE(two54)
  63
  64 #ifdef PIC
  65 #define MO(op) op##@GOTOFF(%ebx)
  66 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
  67 #else
  68 #define MO(op) op
  69 #define MOX(op,x) op(x)
  70 #endif
  71
  72         .text
  73 ENTRY(__cbrt)
  74         movl    4(%esp), %ecx
  75         movl    8(%esp), %eax
  76         movl    %eax, %edx
  77         andl    $0x7fffffff, %eax
  78         orl     %eax, %ecx
  79         jz      1f
  80         xorl    %ecx, %ecx
  81         cmpl    $0x7ff00000, %eax
  82         jae     1f
  83
  84 #ifdef PIC
  85         pushl   %ebx
  86         cfi_adjust_cfa_offset (4)
  87         cfi_rel_offset (ebx, 0)
  88         LOAD_PIC_REG (bx)
  89 #endif
  90
  91         cmpl    $0x00100000, %eax
  92         jae     2f
  93
  94 #ifdef PIC
  95         fldl    8(%esp)
  96 #else
  97         fldl    4(%esp)
  98 #endif
  99         fmull   MO(two54)
 100         movl    $-54, %ecx
 101 #ifdef PIC
 102         fstpl   8(%esp)
 103         movl    12(%esp), %eax
 104 #else
 105         fstpl   4(%esp)
 106         movl    8(%esp), %eax
 107 #endif
 108         movl    %eax, %edx
 109         andl    $0x7fffffff, %eax
 110
 111 2:      shrl    $20, %eax
 112         andl    $0x800fffff, %edx
 113         subl    $1022, %eax
 114         orl     $0x3fe00000, %edx
 115         addl    %eax, %ecx
 116 #ifdef PIC
 117         movl    %edx, 12(%esp)
 118
 119         fldl    8(%esp)                 /* xm */
 120 #else
 121         movl    %edx, 8(%esp)
 122
 123         fldl    4(%esp)                 /* xm */
 124 #endif
 125         fabs
 126
 127         /* The following code has two tracks:
 128             a) compute the normalized cbrt value
 129             b) compute xe/3 and xe%3
 130            The right track computes the value for b) and this is done
 131            in an optimized way by avoiding division.
 132
 133            But why two tracks at all?  Very easy: efficiency.  Some FP
 134            instruction can overlap with a certain amount of integer (and
 135            FP) instructions.  So we get (except for the imull) all
 136            instructions for free.  */
 137
 138         fld     %st(0)                  /* xm : xm */
 139
 140         fmull   MO(f7)                  /* f7*xm : xm */
 141                         movl    $1431655766, %eax
 142         faddl   MO(f6)                  /* f6+f7*xm : xm */
 143                         imull   %ecx
 144         fmul    %st(1)                  /* (f6+f7*xm)*xm : xm */
 145                         movl    %ecx, %eax
 146         faddl   MO(f5)                  /* f5+(f6+f7*xm)*xm : xm */
 147                         sarl    $31, %eax
 148         fmul    %st(1)                  /* (f5+(f6+f7*xm)*xm)*xm : xm */
 149                         subl    %eax, %edx
 150         faddl   MO(f4)                  /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
 151         fmul    %st(1)                  /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
 152         faddl   MO(f3)                  /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
 153         fmul    %st(1)                  /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
 154         faddl   MO(f2)                  /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
 155         fmul    %st(1)                  /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
 156         faddl   MO(f1)                  /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
 157
 158         fld     %st                     /* u : u : xm */
 159         fmul    %st(1)                  /* u*u : u : xm */
 160         fld     %st(2)                  /* xm : u*u : u : xm */
 161         fadd    %st                     /* 2*xm : u*u : u : xm */
 162         fxch    %st(1)                  /* u*u : 2*xm : u : xm */
 163         fmul    %st(2)                  /* t2:=u*u*u : 2*xm : u : xm */
 164                         movl    %edx, %eax
 165         fadd    %st, %st(1)             /* t2 : t2+2*xm : u : xm */
 166                         leal    (%edx,%edx,2),%edx
 167         fadd    %st(0)                  /* 2*t2 : t2+2*xm : u : xm */
 168                         subl    %edx, %ecx
 169         faddp   %st, %st(3)             /* t2+2*xm : u : 2*t2+xm */
 170                         shll    $3, %ecx
 171         fmulp                           /* u*(t2+2*xm) : 2*t2+xm */
 172         fdivp   %st, %st(1)             /* u*(t2+2*xm)/(2*t2+xm) */
 173         fmull   MOX(16+factor,%ecx)     /* u*(t2+2*xm)/(2*t2+xm)*FACT */
 174         pushl   %eax
 175         cfi_adjust_cfa_offset (4)
 176         fildl   (%esp)                  /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
 177         fxch                            /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
 178         fscale                          /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
 179         popl    %edx
 180         cfi_adjust_cfa_offset (-4)
 181 #ifdef PIC
 182         movl    12(%esp), %eax
 183         popl    %ebx
 184         cfi_adjust_cfa_offset (-4)
 185         cfi_restore (ebx)
 186 #else
 187         movl    8(%esp), %eax
 188 #endif
 189         testl   %eax, %eax
 190         fstp    %st(1)
 191         jns     4f
 192         fchs
 193 4:      ret
 194
 195         /* Return the argument.  */
 196 1:      fldl    4(%esp)
 197         ret
 198 END(__cbrt)
 199 libm_alias_double (__cbrt, cbrt)