sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S

   1 /* Optimized with sse2 version of sinf
   2    Copyright (C) 2012-2022 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #include <sysdep.h>
  20 #include <errno.h>
  21
  22 /* Short algorithm description:
  23  *
  24  *  1) if |x| == 0: return x.
  25  *  2) if |x| <  2^-27: return x-x*DP_SMALL, raise underflow only when needed.
  26  *  3) if |x| <  2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1.
  27  *  4) if |x| <   Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).
  28  *  5) if |x| < 9*Pi/4:
  29  *      5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
  30  *           t=|x|-j*Pi/4.
  31  *      5.2) Reconstruction:
  32  *          s = sign(x) * (-1.0)^((n>>2)&1)
  33  *          if(n&2 != 0) {
  34  *              using cos(t) polynomial for |t|<Pi/4, result is
  35  *              s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
  36  *          } else {
  37  *              using sin(t) polynomial for |t|<Pi/4, result is
  38  *              s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
  39  *          }
  40  *  6) if |x| < 2^23, large args:
  41  *      6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
  42  *           t=|x|-j*Pi/4.
  43  *      6.2) Reconstruction same as (5.2).
  44  *  7) if |x| >= 2^23, very large args:
  45  *      7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
  46  *           t=|x|-j*Pi/4.
  47  *      7.2) Reconstruction same as (5.2).
  48  *  8) if x is Inf, return x-x, and set errno=EDOM.
  49  *  9) if x is NaN, return x-x.
  50  *
  51  * Special cases:
  52  *  sin(+-0) = +-0 not raising inexact/underflow,
  53  *  sin(subnormal) raises inexact/underflow,
  54  *  sin(min_normalized) raises inexact/underflow,
  55  *  sin(normalized) raises inexact,
  56  *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
  57  *  sin(NaN) = NaN.
  58  */
  59
  60 #ifdef  PIC
  61 # define MO1(symbol)                    L(symbol)##@GOTOFF(%ebx)
  62 # define MO2(symbol,reg2,_scale)        L(symbol)##@GOTOFF(%ebx,reg2,_scale)
  63 # define CFI_PUSH(REG)  cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
  64 # define CFI_POP(REG)   cfi_adjust_cfa_offset(-4); cfi_restore(REG)
  65 # define PUSH(REG)                      pushl REG; CFI_PUSH(REG)
  66 # define POP(REG)                       popl REG; CFI_POP(REG)
  67 # define ENTRANCE                       PUSH(%ebx); LOAD_PIC_REG(bx)
  68 # define RETURN                         POP(%ebx); ret; CFI_PUSH(%ebx)
  69 # define ARG_X                          8(%esp)
  70 #else
  71 # define MO1(symbol)                    L(symbol)
  72 # define MO2(symbol,reg2,_scale)        L(symbol)(,reg2,_scale)
  73 # define ENTRANCE
  74 # define RETURN                         ret
  75 # define ARG_X                          4(%esp)
  76 #endif
  77
  78         .text
  79 ENTRY(__sinf_sse2)
  80         /* Input: single precision x on stack at address ARG_X */
  81
  82         ENTRANCE
  83         movl    ARG_X, %eax             /* Bits of x */
  84         cvtss2sd ARG_X, %xmm0           /* DP x */
  85         andl    $0x7fffffff, %eax       /* |x| */
  86
  87         cmpl    $0x3f490fdb, %eax       /* |x|<Pi/4?  */
  88         jb      L(arg_less_pio4)
  89
  90         /* Here if |x|>=Pi/4 */
  91         movd    %eax, %xmm3             /* SP |x| */
  92         andpd   MO1(DP_ABS_MASK),%xmm0  /* DP |x| */
  93         movss   MO1(SP_INVPIO4), %xmm2  /* SP 1/(Pi/4) */
  94
  95         cmpl    $0x40e231d6, %eax       /* |x|<9*Pi/4?  */
  96         jae     L(large_args)
  97
  98         /* Here if Pi/4<=|x|<9*Pi/4 */
  99         mulss   %xmm3, %xmm2            /* SP |x|/(Pi/4) */
 100         movl    ARG_X, %ecx             /* Load x */
 101         cvttss2si %xmm2, %eax           /* k, number of Pi/4 in x */
 102         shrl    $31, %ecx               /* sign of x */
 103         addl    $1, %eax                /* k+1 */
 104         movl    $0x0e, %edx
 105         andl    %eax, %edx              /* j = (k+1)&0x0e */
 106         subsd   MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
 107
 108 L(reconstruction):
 109         /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
 110         testl   $2, %eax                /* n&2 != 0?  */
 111         jz      L(sin_poly)
 112
 113 /*L(cos_poly):*/
 114         /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4:
 115          * y = t*t; z = y*y;
 116          * s = sign(x) * (-1.0)^((n>>2)&1)
 117          * result = s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
 118          */
 119         shrl    $2, %eax                /* n>>2 */
 120         mulsd   %xmm0, %xmm0            /* y=t^2 */
 121         andl    $1, %eax                /* (n>>2)&1 */
 122         movaps  %xmm0, %xmm1            /* y */
 123         mulsd   %xmm0, %xmm0            /* z=t^4 */
 124
 125         movsd   MO1(DP_C4), %xmm4       /* C4 */
 126         mulsd   %xmm0, %xmm4            /* z*C4 */
 127         xorl    %eax, %ecx              /* (-1.0)^((n>>2)&1) XOR sign(x) */
 128         movsd   MO1(DP_C3), %xmm3       /* C3 */
 129         mulsd   %xmm0, %xmm3            /* z*C3 */
 130         addsd   MO1(DP_C2), %xmm4       /* C2+z*C4 */
 131         mulsd   %xmm0, %xmm4            /* z*(C2+z*C4) */
 132         lea     -8(%esp), %esp          /* Borrow 4 bytes of stack frame */
 133         addsd   MO1(DP_C1), %xmm3       /* C1+z*C3 */
 134         mulsd   %xmm0, %xmm3            /* z*(C1+z*C3) */
 135         addsd   MO1(DP_C0), %xmm4       /* C0+z*(C2+z*C4) */
 136         mulsd   %xmm1, %xmm4            /* y*(C0+z*(C2+z*C4)) */
 137
 138         addsd   %xmm4, %xmm3            /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 139         /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 140         addsd   MO1(DP_ONES), %xmm3
 141
 142         mulsd   MO2(DP_ONES,%ecx,8), %xmm3 /* DP result */
 143         movsd   %xmm3, 0(%esp)          /* Move result from sse...  */
 144         fldl    0(%esp)                 /* ...to FPU.  */
 145         /* Return back 4 bytes of stack frame */
 146         lea     8(%esp), %esp
 147         RETURN
 148
 149         .p2align        4
 150 L(sin_poly):
 151         /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4:
 152          * y = t*t; z = y*y;
 153          * s = sign(x) * (-1.0)^((n>>2)&1)
 154          * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
 155          */
 156
 157         movaps  %xmm0, %xmm4            /* t */
 158         shrl    $2, %eax                /* n>>2 */
 159         mulsd   %xmm0, %xmm0            /* y=t^2 */
 160         andl    $1, %eax                /* (n>>2)&1 */
 161         movaps  %xmm0, %xmm1            /* y */
 162         xorl    %eax, %ecx              /* (-1.0)^((n>>2)&1) XOR sign(x) */
 163         mulsd   %xmm0, %xmm0            /* z=t^4 */
 164
 165         movsd   MO1(DP_S4), %xmm2       /* S4 */
 166         mulsd   %xmm0, %xmm2            /* z*S4 */
 167         movsd   MO1(DP_S3), %xmm3       /* S3 */
 168         mulsd   %xmm0, %xmm3            /* z*S3 */
 169         lea     -8(%esp), %esp          /* Borrow 4 bytes of stack frame */
 170         addsd   MO1(DP_S2), %xmm2       /* S2+z*S4 */
 171         mulsd   %xmm0, %xmm2            /* z*(S2+z*S4) */
 172         addsd   MO1(DP_S1), %xmm3       /* S1+z*S3 */
 173         mulsd   %xmm0, %xmm3            /* z*(S1+z*S3) */
 174         addsd   MO1(DP_S0), %xmm2       /* S0+z*(S2+z*S4) */
 175         mulsd   %xmm1, %xmm2            /* y*(S0+z*(S2+z*S4)) */
 176         /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
 177         mulsd   MO2(DP_ONES,%ecx,8), %xmm4
 178         addsd   %xmm2, %xmm3            /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 179         /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 180         mulsd   %xmm4, %xmm3
 181         /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 182         addsd   %xmm4, %xmm3
 183         movsd   %xmm3, 0(%esp)          /* Move result from sse...  */
 184         fldl    0(%esp)                 /* ...to FPU.  */
 185         /* Return back 4 bytes of stack frame */
 186         lea     8(%esp), %esp
 187         RETURN
 188
 189         .p2align        4
 190 L(large_args):
 191         /* Here if |x|>=9*Pi/4 */
 192         cmpl    $0x7f800000, %eax       /* x is Inf or NaN?  */
 193         jae     L(arg_inf_or_nan)
 194
 195         /* Here if finite |x|>=9*Pi/4 */
 196         cmpl    $0x4b000000, %eax       /* |x|<2^23?  */
 197         jae     L(very_large_args)
 198
 199         /* Here if 9*Pi/4<=|x|<2^23 */
 200         movsd   MO1(DP_INVPIO4), %xmm1  /* 1/(Pi/4) */
 201         mulsd   %xmm0, %xmm1            /* |x|/(Pi/4) */
 202         cvttsd2si %xmm1, %eax           /* k=trunc(|x|/(Pi/4)) */
 203         addl    $1, %eax                /* k+1 */
 204         movl    %eax, %edx
 205         andl    $0xfffffffe, %edx       /* j=(k+1)&0xfffffffe */
 206         cvtsi2sdl %edx, %xmm4           /* DP j */
 207         movl    ARG_X, %ecx             /* Load x */
 208         movsd   MO1(DP_PIO4HI), %xmm2   /* -PIO4HI = high part of -Pi/4 */
 209         shrl    $31, %ecx               /* sign bit of x */
 210         mulsd   %xmm4, %xmm2            /* -j*PIO4HI */
 211         movsd   MO1(DP_PIO4LO), %xmm3   /* -PIO4LO = low part of -Pi/4 */
 212         addsd   %xmm2, %xmm0            /* |x| - j*PIO4HI */
 213         mulsd   %xmm3, %xmm4            /* j*PIO4LO */
 214         addsd   %xmm4, %xmm0            /* t = |x| - j*PIO4HI - j*PIO4LO */
 215         jmp     L(reconstruction)
 216
 217         .p2align        4
 218 L(very_large_args):
 219         /* Here if finite |x|>=2^23 */
 220
 221         /* bitpos = (ix>>23) - BIAS_32 + 59; */
 222         shrl    $23, %eax               /* eb = biased exponent of x */
 223         /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
 224         subl    $68, %eax
 225         movl    $28, %ecx               /* %cl=28 */
 226         movl    %eax, %edx              /* bitpos copy */
 227
 228         /* j = bitpos/28; */
 229         div     %cl                     /* j in register %al=%ax/%cl */
 230         movapd  %xmm0, %xmm3            /* |x| */
 231         /* clear unneeded remainder from %ah */
 232         andl    $0xff, %eax
 233
 234         imull   $28, %eax, %ecx         /* j*28 */
 235         movsd   MO1(DP_HI_MASK), %xmm4  /* DP_HI_MASK */
 236         movapd  %xmm0, %xmm5            /* |x| */
 237         mulsd   -2*8+MO2(_FPI,%eax,8), %xmm3    /* tmp3 = FPI[j-2]*|x| */
 238         movapd  %xmm0, %xmm1            /* |x| */
 239         mulsd   -1*8+MO2(_FPI,%eax,8), %xmm5    /* tmp2 = FPI[j-1]*|x| */
 240         mulsd   0*8+MO2(_FPI,%eax,8), %xmm0     /* tmp0 = FPI[j]*|x| */
 241         addl    $19, %ecx               /* j*28+19 */
 242         mulsd   1*8+MO2(_FPI,%eax,8), %xmm1     /* tmp1 = FPI[j+1]*|x| */
 243         cmpl    %ecx, %edx              /* bitpos>=j*28+19?   */
 244         jl      L(very_large_skip1)
 245
 246         /* Here if bitpos>=j*28+19 */
 247         andpd   %xmm3, %xmm4            /* HI(tmp3) */
 248         subsd   %xmm4, %xmm3            /* tmp3 = tmp3 - HI(tmp3) */
 249 L(very_large_skip1):
 250
 251         movsd   MO1(DP_2POW52), %xmm6
 252         movapd  %xmm5, %xmm2            /* tmp2 copy */
 253         addsd   %xmm3, %xmm5            /* tmp5 = tmp3 + tmp2 */
 254         movl    $1, %edx
 255         addsd   %xmm5, %xmm6            /* tmp6 = tmp5 + 2^52 */
 256         movsd   8+MO1(DP_2POW52), %xmm4
 257         movd    %xmm6, %eax             /* k = I64_LO(tmp6); */
 258         addsd   %xmm6, %xmm4            /* tmp4 = tmp6 - 2^52 */
 259         movl    ARG_X, %ecx             /* Load x */
 260         comisd  %xmm5, %xmm4            /* tmp4 > tmp5?  */
 261         jbe     L(very_large_skip2)
 262
 263         /* Here if tmp4 > tmp5 */
 264         subl    $1, %eax                /* k-- */
 265         addsd   8+MO1(DP_ONES), %xmm4   /* tmp4 -= 1.0 */
 266 L(very_large_skip2):
 267
 268         andl    %eax, %edx              /* k&1 */
 269         subsd   %xmm4, %xmm3            /* tmp3 -= tmp4 */
 270         addsd   MO2(DP_ZERONE,%edx,8), %xmm3 /* t  = DP_ZERONE[k&1] + tmp3 */
 271         addsd   %xmm2, %xmm3            /* t += tmp2 */
 272         shrl    $31, %ecx               /* sign of x */
 273         addsd   %xmm3, %xmm0            /* t += tmp0 */
 274         addl    $1, %eax                /* n=k+1 */
 275         addsd   %xmm1, %xmm0            /* t += tmp1 */
 276         mulsd   MO1(DP_PIO4), %xmm0     /* t *= PI04 */
 277
 278         jmp     L(reconstruction)       /* end of very_large_args peth */
 279
 280         .p2align        4
 281 L(arg_less_pio4):
 282         /* Here if |x|<Pi/4 */
 283         cmpl    $0x3d000000, %eax       /* |x|<2^-5?  */
 284         jl      L(arg_less_2pn5)
 285
 286         /* Here if 2^-5<=|x|<Pi/4 */
 287         movaps  %xmm0, %xmm3            /* x */
 288         mulsd   %xmm0, %xmm0            /* y=x^2 */
 289         movaps  %xmm0, %xmm1            /* y */
 290         mulsd   %xmm0, %xmm0            /* z=x^4 */
 291         movsd   MO1(DP_S4), %xmm4       /* S4 */
 292         mulsd   %xmm0, %xmm4            /* z*S4 */
 293         movsd   MO1(DP_S3), %xmm5       /* S3 */
 294         mulsd   %xmm0, %xmm5            /* z*S3 */
 295         addsd   MO1(DP_S2), %xmm4       /* S2+z*S4 */
 296         mulsd   %xmm0, %xmm4            /* z*(S2+z*S4) */
 297         addsd   MO1(DP_S1), %xmm5       /* S1+z*S3 */
 298         mulsd   %xmm0, %xmm5            /* z*(S1+z*S3) */
 299         addsd   MO1(DP_S0), %xmm4       /* S0+z*(S2+z*S4) */
 300         mulsd   %xmm1, %xmm4            /* y*(S0+z*(S2+z*S4)) */
 301         mulsd   %xmm3, %xmm5            /* x*z*(S1+z*S3) */
 302         mulsd   %xmm3, %xmm4            /* x*y*(S0+z*(S2+z*S4)) */
 303         /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 304         addsd   %xmm5, %xmm4
 305         /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 306         addsd   %xmm4, %xmm3
 307         cvtsd2ss %xmm3, %xmm3           /* SP result */
 308
 309 L(epilogue):
 310         lea     -4(%esp), %esp          /* Borrow 4 bytes of stack frame */
 311         movss   %xmm3, 0(%esp)          /* Move result from sse...  */
 312         flds    0(%esp)                 /* ...to FPU.  */
 313         /* Return back 4 bytes of stack frame */
 314         lea     4(%esp), %esp
 315         RETURN
 316
 317         .p2align        4
 318 L(arg_less_2pn5):
 319         /* Here if |x|<2^-5 */
 320         cmpl    $0x32000000, %eax       /* |x|<2^-27?  */
 321         jl      L(arg_less_2pn27)
 322
 323         /* Here if 2^-27<=|x|<2^-5 */
 324         movaps  %xmm0, %xmm1            /* DP x */
 325         mulsd   %xmm0, %xmm0            /* DP x^2 */
 326         movsd   MO1(DP_SIN2_1), %xmm3   /* DP DP_SIN2_1 */
 327         mulsd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_1 */
 328         addsd   MO1(DP_SIN2_0), %xmm3   /* DP DP_SIN2_0+x^2*DP_SIN2_1 */
 329         mulsd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
 330         mulsd   %xmm1, %xmm3            /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 331         addsd   %xmm1, %xmm3            /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 332         cvtsd2ss %xmm3, %xmm3           /* SP result */
 333         jmp     L(epilogue)
 334
 335         .p2align        4
 336 L(arg_less_2pn27):
 337         movss   ARG_X, %xmm3            /* SP x */
 338         cmpl    $0, %eax                /* x=0?  */
 339         je      L(epilogue)             /* in case x=0 return sin(+-0)==+-0 */
 340         /* Here if |x|<2^-27 */
 341         /*
 342          * Special cases here:
 343          *  sin(subnormal) raises inexact/underflow
 344          *  sin(min_normalized) raises inexact/underflow
 345          *  sin(normalized) raises inexact
 346          */
 347         movaps  %xmm0, %xmm3            /* Copy of DP x */
 348         mulsd   MO1(DP_SMALL), %xmm0    /* x*DP_SMALL */
 349         subsd   %xmm0, %xmm3            /* Result is x-x*DP_SMALL */
 350         cvtsd2ss %xmm3, %xmm3           /* Result converted to SP */
 351         jmp     L(epilogue)
 352
 353         .p2align        4
 354 L(arg_inf_or_nan):
 355         /* Here if |x| is Inf or NAN */
 356         jne     L(skip_errno_setting)   /* in case of x is NaN */
 357
 358         /* Here if x is Inf. Set errno to EDOM.  */
 359         call    JUMPTARGET(__errno_location)
 360         movl    $EDOM, (%eax)
 361
 362         .p2align        4
 363 L(skip_errno_setting):
 364         /* Here if |x| is Inf or NAN. Continued.  */
 365         movss   ARG_X, %xmm3            /* load x */
 366         subss   %xmm3, %xmm3            /* Result is NaN */
 367         jmp     L(epilogue)
 368 END(__sinf_sse2)
 369
 370         .section .rodata, "a"
 371         .p2align 3
 372 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
 373         .long   0x00000000,0x00000000
 374         .long   0x54442d18,0x3fe921fb
 375         .long   0x54442d18,0x3ff921fb
 376         .long   0x7f3321d2,0x4002d97c
 377         .long   0x54442d18,0x400921fb
 378         .long   0x2955385e,0x400f6a7a
 379         .long   0x7f3321d2,0x4012d97c
 380         .long   0xe9bba775,0x4015fdbb
 381         .long   0x54442d18,0x401921fb
 382         .long   0xbeccb2bb,0x401c463a
 383         .long   0x2955385e,0x401f6a7a
 384         .type L(PIO4J), @object
 385         ASM_SIZE_DIRECTIVE(L(PIO4J))
 386
 387         .p2align 3
 388 L(_FPI): /* 4/Pi broken into sum of positive DP values */
 389         .long   0x00000000,0x00000000
 390         .long   0x6c000000,0x3ff45f30
 391         .long   0x2a000000,0x3e3c9c88
 392         .long   0xa8000000,0x3c54fe13
 393         .long   0xd0000000,0x3aaf47d4
 394         .long   0x6c000000,0x38fbb81b
 395         .long   0xe0000000,0x3714acc9
 396         .long   0x7c000000,0x3560e410
 397         .long   0x56000000,0x33bca2c7
 398         .long   0xac000000,0x31fbd778
 399         .long   0xe0000000,0x300b7246
 400         .long   0xe8000000,0x2e5d2126
 401         .long   0x48000000,0x2c970032
 402         .long   0xe8000000,0x2ad77504
 403         .long   0xe0000000,0x290921cf
 404         .long   0xb0000000,0x274deb1c
 405         .long   0xe0000000,0x25829a73
 406         .long   0xbe000000,0x23fd1046
 407         .long   0x10000000,0x2224baed
 408         .long   0x8e000000,0x20709d33
 409         .long   0x80000000,0x1e535a2f
 410         .long   0x64000000,0x1cef904e
 411         .long   0x30000000,0x1b0d6398
 412         .long   0x24000000,0x1964ce7d
 413         .long   0x16000000,0x17b908bf
 414         .type L(_FPI), @object
 415         ASM_SIZE_DIRECTIVE(L(_FPI))
 416
 417 /* Coefficients of polynomial
 418    for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5.  */
 419         .p2align 3
 420 L(DP_SIN2_0):
 421         .long   0x5543d49d,0xbfc55555
 422         .type L(DP_SIN2_0), @object
 423         ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
 424
 425         .p2align 3
 426 L(DP_SIN2_1):
 427         .long   0x75cec8c5,0x3f8110f4
 428         .type L(DP_SIN2_1), @object
 429         ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
 430
 431         .p2align 3
 432 L(DP_ZERONE):
 433         .long   0x00000000,0x00000000   /* 0.0 */
 434         .long   0x00000000,0xbff00000   /* 1.0 */
 435         .type L(DP_ZERONE), @object
 436         ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
 437
 438         .p2align 3
 439 L(DP_ONES):
 440         .long   0x00000000,0x3ff00000   /* +1.0 */
 441         .long   0x00000000,0xbff00000   /* -1.0 */
 442         .type L(DP_ONES), @object
 443         ASM_SIZE_DIRECTIVE(L(DP_ONES))
 444
 445 /* Coefficients of polynomial
 446    for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4.  */
 447         .p2align 3
 448 L(DP_S3):
 449         .long   0x64e6b5b4,0x3ec71d72
 450         .type L(DP_S3), @object
 451         ASM_SIZE_DIRECTIVE(L(DP_S3))
 452
 453         .p2align 3
 454 L(DP_S1):
 455         .long   0x10c2688b,0x3f811111
 456         .type L(DP_S1), @object
 457         ASM_SIZE_DIRECTIVE(L(DP_S1))
 458
 459         .p2align 3
 460 L(DP_S4):
 461         .long   0x1674b58a,0xbe5a947e
 462         .type L(DP_S4), @object
 463         ASM_SIZE_DIRECTIVE(L(DP_S4))
 464
 465         .p2align 3
 466 L(DP_S2):
 467         .long   0x8b4bd1f9,0xbf2a019f
 468         .type L(DP_S2), @object
 469         ASM_SIZE_DIRECTIVE(L(DP_S2))
 470
 471         .p2align 3
 472 L(DP_S0):
 473         .long   0x55551cd9,0xbfc55555
 474         .type L(DP_S0), @object
 475         ASM_SIZE_DIRECTIVE(L(DP_S0))
 476
 477         .p2align 3
 478 L(DP_SMALL):
 479         .long   0x00000000,0x3cd00000   /* 2^(-50) */
 480         .type L(DP_SMALL), @object
 481         ASM_SIZE_DIRECTIVE(L(DP_SMALL))
 482
 483 /* Coefficients of polynomial
 484    for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4.  */
 485         .p2align 3
 486 L(DP_C3):
 487         .long   0x9ac43cc0,0x3efa00eb
 488         .type L(DP_C3), @object
 489         ASM_SIZE_DIRECTIVE(L(DP_C3))
 490
 491         .p2align 3
 492 L(DP_C1):
 493         .long   0x545c50c7,0x3fa55555
 494         .type L(DP_C1), @object
 495         ASM_SIZE_DIRECTIVE(L(DP_C1))
 496
 497         .p2align 3
 498 L(DP_C4):
 499         .long   0xdd8844d7,0xbe923c97
 500         .type L(DP_C4), @object
 501         ASM_SIZE_DIRECTIVE(L(DP_C4))
 502
 503         .p2align 3
 504 L(DP_C2):
 505         .long   0x348b6874,0xbf56c16b
 506         .type L(DP_C2), @object
 507         ASM_SIZE_DIRECTIVE(L(DP_C2))
 508
 509         .p2align 3
 510 L(DP_C0):
 511         .long   0xfffe98ae,0xbfdfffff
 512         .type L(DP_C0), @object
 513         ASM_SIZE_DIRECTIVE(L(DP_C0))
 514
 515         .p2align 3
 516 L(DP_PIO4):
 517         .long   0x54442d18,0x3fe921fb   /* Pi/4 */
 518         .type L(DP_PIO4), @object
 519         ASM_SIZE_DIRECTIVE(L(DP_PIO4))
 520
 521         .p2align 3
 522 L(DP_2POW52):
 523         .long   0x00000000,0x43300000   /* +2^52 */
 524         .long   0x00000000,0xc3300000   /* -2^52 */
 525         .type L(DP_2POW52), @object
 526         ASM_SIZE_DIRECTIVE(L(DP_2POW52))
 527
 528         .p2align 3
 529 L(DP_INVPIO4):
 530         .long   0x6dc9c883,0x3ff45f30   /* 4/Pi */
 531         .type L(DP_INVPIO4), @object
 532         ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
 533
 534         .p2align 3
 535 L(DP_PIO4HI):
 536         .long   0x54000000,0xbfe921fb   /* High part of Pi/4 */
 537         .type L(DP_PIO4HI), @object
 538         ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
 539
 540         .p2align 3
 541 L(DP_PIO4LO):
 542         .long   0x11A62633,0xbe010b46   /* Low part of Pi/4 */
 543         .type L(DP_PIO4LO), @object
 544         ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
 545
 546         .p2align 2
 547 L(SP_INVPIO4):
 548         .long   0x3fa2f983              /* 4/Pi */
 549         .type L(SP_INVPIO4), @object
 550         ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
 551
 552         .p2align 4
 553 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 554         .long   0xffffffff,0x7fffffff
 555         .long   0xffffffff,0x7fffffff
 556         .type L(DP_ABS_MASK), @object
 557         ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
 558
 559         .p2align 3
 560 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 561         .long   0x00000000,0xffffffff
 562         .type L(DP_HI_MASK), @object
 563         ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 564
 565 weak_alias (__sinf, sinf)