sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S

   1 /* Optimized with sse2 version of sinf
   2    Copyright (C) 2012-2014 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <sysdep.h>
  20 #define __need_Emath
  21 #include <bits/errno.h>
  22
  23 /* Short algorithm description:
  24  *
  25  *  1) if |x| == 0: return x.
  26  *  2) if |x| <  2^-27: return x-x*DP_SMALL, raise underflow only when needed.
  27  *  3) if |x| <  2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1.
  28  *  4) if |x| <   Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).
  29  *  5) if |x| < 9*Pi/4:
  30  *      5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
  31  *           t=|x|-j*Pi/4.
  32  *      5.2) Reconstruction:
  33  *          s = sign(x) * (-1.0)^((n>>2)&1)
  34  *          if(n&2 != 0) {
  35  *              using cos(t) polynomial for |t|<Pi/4, result is
  36  *              s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
  37  *          } else {
  38  *              using sin(t) polynomial for |t|<Pi/4, result is
  39  *              s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
  40  *          }
  41  *  6) if |x| < 2^23, large args:
  42  *      6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
  43  *           t=|x|-j*Pi/4.
  44  *      6.2) Reconstruction same as (5.2).
  45  *  7) if |x| >= 2^23, very large args:
  46  *      7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
  47  *           t=|x|-j*Pi/4.
  48  *      7.2) Reconstruction same as (5.2).
  49  *  8) if x is Inf, return x-x, and set errno=EDOM.
  50  *  9) if x is NaN, return x-x.
  51  *
  52  * Special cases:
  53  *  sin(+-0) = +-0 not raising inexact/underflow,
  54  *  sin(subnormal) raises inexact/underflow,
  55  *  sin(min_normalized) raises inexact/underflow,
  56  *  sin(normalized) raises inexact,
  57  *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
  58  *  sin(NaN) = NaN.
  59  */
  60
  61 #ifdef  PIC
  62 # define MO1(symbol)                    L(symbol)##@GOTOFF(%ebx)
  63 # define MO2(symbol,reg2,_scale)        L(symbol)##@GOTOFF(%ebx,reg2,_scale)
  64 # define CFI_PUSH(REG)  cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
  65 # define CFI_POP(REG)   cfi_adjust_cfa_offset(-4); cfi_restore(REG)
  66 # define PUSH(REG)                      pushl REG; CFI_PUSH(REG)
  67 # define POP(REG)                       popl REG; CFI_POP(REG)
  68 # define ENTRANCE                       PUSH(%ebx); LOAD_PIC_REG(bx)
  69 # define RETURN                         POP(%ebx); ret; CFI_PUSH(%ebx)
  70 # define ARG_X                          8(%esp)
  71 #else
  72 # define MO1(symbol)                    L(symbol)
  73 # define MO2(symbol,reg2,_scale)        L(symbol)(,reg2,_scale)
  74 # define ENTRANCE
  75 # define RETURN                         ret
  76 # define ARG_X                          4(%esp)
  77 #endif
  78
  79         .text
  80 ENTRY(__sinf_sse2)
  81         /* Input: single precision x on stack at address ARG_X */
  82
  83         ENTRANCE
  84         movl    ARG_X, %eax             /* Bits of x */
  85         cvtss2sd ARG_X, %xmm0           /* DP x */
  86         andl    $0x7fffffff, %eax       /* |x| */
  87
  88         cmpl    $0x3f490fdb, %eax       /* |x|<Pi/4?  */
  89         jb      L(arg_less_pio4)
  90
  91         /* Here if |x|>=Pi/4 */
  92         movd    %eax, %xmm3             /* SP |x| */
  93         andpd   MO1(DP_ABS_MASK),%xmm0  /* DP |x| */
  94         movss   MO1(SP_INVPIO4), %xmm2  /* SP 1/(Pi/4) */
  95
  96         cmpl    $0x40e231d6, %eax       /* |x|<9*Pi/4?  */
  97         jae     L(large_args)
  98
  99         /* Here if Pi/4<=|x|<9*Pi/4 */
 100         mulss   %xmm3, %xmm2            /* SP |x|/(Pi/4) */
 101         movl    ARG_X, %ecx             /* Load x */
 102         cvttss2si %xmm2, %eax           /* k, number of Pi/4 in x */
 103         shrl    $31, %ecx               /* sign of x */
 104         addl    $1, %eax                /* k+1 */
 105         movl    $0x0e, %edx
 106         andl    %eax, %edx              /* j = (k+1)&0x0e */
 107         subsd   MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
 108
 109 L(reconstruction):
 110         /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
 111         testl   $2, %eax                /* n&2 != 0?  */
 112         jz      L(sin_poly)
 113
 114 /*L(cos_poly):*/
 115         /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4:
 116          * y = t*t; z = y*y;
 117          * s = sign(x) * (-1.0)^((n>>2)&1)
 118          * result = s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
 119          */
 120         shrl    $2, %eax                /* n>>2 */
 121         mulsd   %xmm0, %xmm0            /* y=t^2 */
 122         andl    $1, %eax                /* (n>>2)&1 */
 123         movaps  %xmm0, %xmm1            /* y */
 124         mulsd   %xmm0, %xmm0            /* z=t^4 */
 125
 126         movsd   MO1(DP_C4), %xmm4       /* C4 */
 127         mulsd   %xmm0, %xmm4            /* z*C4 */
 128         xorl    %eax, %ecx              /* (-1.0)^((n>>2)&1) XOR sign(x) */
 129         movsd   MO1(DP_C3), %xmm3       /* C3 */
 130         mulsd   %xmm0, %xmm3            /* z*C3 */
 131         addsd   MO1(DP_C2), %xmm4       /* C2+z*C4 */
 132         mulsd   %xmm0, %xmm4            /* z*(C2+z*C4) */
 133         lea     -8(%esp), %esp          /* Borrow 4 bytes of stack frame */
 134         addsd   MO1(DP_C1), %xmm3       /* C1+z*C3 */
 135         mulsd   %xmm0, %xmm3            /* z*(C1+z*C3) */
 136         addsd   MO1(DP_C0), %xmm4       /* C0+z*(C2+z*C4) */
 137         mulsd   %xmm1, %xmm4            /* y*(C0+z*(C2+z*C4)) */
 138
 139         addsd   %xmm4, %xmm3            /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 140         /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 141         addsd   MO1(DP_ONES), %xmm3
 142
 143         mulsd   MO2(DP_ONES,%ecx,8), %xmm3 /* DP result */
 144         movsd   %xmm3, 0(%esp)          /* Move result from sse...  */
 145         fldl    0(%esp)                 /* ...to FPU.  */
 146         /* Return back 4 bytes of stack frame */
 147         lea     8(%esp), %esp
 148         RETURN
 149
 150         .p2align        4
 151 L(sin_poly):
 152         /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4:
 153          * y = t*t; z = y*y;
 154          * s = sign(x) * (-1.0)^((n>>2)&1)
 155          * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
 156          */
 157
 158         movaps  %xmm0, %xmm4            /* t */
 159         shrl    $2, %eax                /* n>>2 */
 160         mulsd   %xmm0, %xmm0            /* y=t^2 */
 161         andl    $1, %eax                /* (n>>2)&1 */
 162         movaps  %xmm0, %xmm1            /* y */
 163         xorl    %eax, %ecx              /* (-1.0)^((n>>2)&1) XOR sign(x) */
 164         mulsd   %xmm0, %xmm0            /* z=t^4 */
 165
 166         movsd   MO1(DP_S4), %xmm2       /* S4 */
 167         mulsd   %xmm0, %xmm2            /* z*S4 */
 168         movsd   MO1(DP_S3), %xmm3       /* S3 */
 169         mulsd   %xmm0, %xmm3            /* z*S3 */
 170         lea     -8(%esp), %esp          /* Borrow 4 bytes of stack frame */
 171         addsd   MO1(DP_S2), %xmm2       /* S2+z*S4 */
 172         mulsd   %xmm0, %xmm2            /* z*(S2+z*S4) */
 173         addsd   MO1(DP_S1), %xmm3       /* S1+z*S3 */
 174         mulsd   %xmm0, %xmm3            /* z*(S1+z*S3) */
 175         addsd   MO1(DP_S0), %xmm2       /* S0+z*(S2+z*S4) */
 176         mulsd   %xmm1, %xmm2            /* y*(S0+z*(S2+z*S4)) */
 177         /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
 178         mulsd   MO2(DP_ONES,%ecx,8), %xmm4
 179         addsd   %xmm2, %xmm3            /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 180         /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 181         mulsd   %xmm4, %xmm3
 182         /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 183         addsd   %xmm4, %xmm3
 184         movsd   %xmm3, 0(%esp)          /* Move result from sse...  */
 185         fldl    0(%esp)                 /* ...to FPU.  */
 186         /* Return back 4 bytes of stack frame */
 187         lea     8(%esp), %esp
 188         RETURN
 189
 190         .p2align        4
 191 L(large_args):
 192         /* Here if |x|>=9*Pi/4 */
 193         cmpl    $0x7f800000, %eax       /* x is Inf or NaN?  */
 194         jae     L(arg_inf_or_nan)
 195
 196         /* Here if finite |x|>=9*Pi/4 */
 197         cmpl    $0x4b000000, %eax       /* |x|<2^23?  */
 198         jae     L(very_large_args)
 199
 200         /* Here if 9*Pi/4<=|x|<2^23 */
 201         movsd   MO1(DP_INVPIO4), %xmm1  /* 1/(Pi/4) */
 202         mulsd   %xmm0, %xmm1            /* |x|/(Pi/4) */
 203         cvttsd2si %xmm1, %eax           /* k=trunc(|x|/(Pi/4)) */
 204         addl    $1, %eax                /* k+1 */
 205         movl    %eax, %edx
 206         andl    $0xfffffffe, %edx       /* j=(k+1)&0xfffffffe */
 207         cvtsi2sdl %edx, %xmm4           /* DP j */
 208         movl    ARG_X, %ecx             /* Load x */
 209         movsd   MO1(DP_PIO4HI), %xmm2   /* -PIO4HI = high part of -Pi/4 */
 210         shrl    $31, %ecx               /* sign bit of x */
 211         mulsd   %xmm4, %xmm2            /* -j*PIO4HI */
 212         movsd   MO1(DP_PIO4LO), %xmm3   /* -PIO4LO = low part of -Pi/4 */
 213         addsd   %xmm2, %xmm0            /* |x| - j*PIO4HI */
 214         mulsd   %xmm3, %xmm4            /* j*PIO4LO */
 215         addsd   %xmm4, %xmm0            /* t = |x| - j*PIO4HI - j*PIO4LO */
 216         jmp     L(reconstruction)
 217
 218         .p2align        4
 219 L(very_large_args):
 220         /* Here if finite |x|>=2^23 */
 221
 222         /* bitpos = (ix>>23) - BIAS_32 + 59; */
 223         shrl    $23, %eax               /* eb = biased exponent of x */
 224         /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
 225         subl    $68, %eax
 226         movl    $28, %ecx               /* %cl=28 */
 227         movl    %eax, %edx              /* bitpos copy */
 228
 229         /* j = bitpos/28; */
 230         div     %cl                     /* j in register %al=%ax/%cl */
 231         movapd  %xmm0, %xmm3            /* |x| */
 232         /* clear unneeded remainder from %ah */
 233         andl    $0xff, %eax
 234
 235         imull   $28, %eax, %ecx         /* j*28 */
 236         movsd   MO1(DP_HI_MASK), %xmm4  /* DP_HI_MASK */
 237         movapd  %xmm0, %xmm5            /* |x| */
 238         mulsd   -2*8+MO2(_FPI,%eax,8), %xmm3    /* tmp3 = FPI[j-2]*|x| */
 239         movapd  %xmm0, %xmm1            /* |x| */
 240         mulsd   -1*8+MO2(_FPI,%eax,8), %xmm5    /* tmp2 = FPI[j-1]*|x| */
 241         mulsd   0*8+MO2(_FPI,%eax,8), %xmm0     /* tmp0 = FPI[j]*|x| */
 242         addl    $19, %ecx               /* j*28+19 */
 243         mulsd   1*8+MO2(_FPI,%eax,8), %xmm1     /* tmp1 = FPI[j+1]*|x| */
 244         cmpl    %ecx, %edx              /* bitpos>=j*28+19?   */
 245         jl      L(very_large_skip1)
 246
 247         /* Here if bitpos>=j*28+19 */
 248         andpd   %xmm3, %xmm4            /* HI(tmp3) */
 249         subsd   %xmm4, %xmm3            /* tmp3 = tmp3 - HI(tmp3) */
 250 L(very_large_skip1):
 251
 252         movsd   MO1(DP_2POW52), %xmm6
 253         movapd  %xmm5, %xmm2            /* tmp2 copy */
 254         addsd   %xmm3, %xmm5            /* tmp5 = tmp3 + tmp2 */
 255         movl    $1, %edx
 256         addsd   %xmm5, %xmm6            /* tmp6 = tmp5 + 2^52 */
 257         movsd   8+MO1(DP_2POW52), %xmm4
 258         movd    %xmm6, %eax             /* k = I64_LO(tmp6); */
 259         addsd   %xmm6, %xmm4            /* tmp4 = tmp6 - 2^52 */
 260         movl    ARG_X, %ecx             /* Load x */
 261         comisd  %xmm5, %xmm4            /* tmp4 > tmp5?  */
 262         jbe     L(very_large_skip2)
 263
 264         /* Here if tmp4 > tmp5 */
 265         subl    $1, %eax                /* k-- */
 266         addsd   8+MO1(DP_ONES), %xmm4   /* tmp4 -= 1.0 */
 267 L(very_large_skip2):
 268
 269         andl    %eax, %edx              /* k&1 */
 270         subsd   %xmm4, %xmm3            /* tmp3 -= tmp4 */
 271         addsd   MO2(DP_ZERONE,%edx,8), %xmm3 /* t  = DP_ZERONE[k&1] + tmp3 */
 272         addsd   %xmm2, %xmm3            /* t += tmp2 */
 273         shrl    $31, %ecx               /* sign of x */
 274         addsd   %xmm3, %xmm0            /* t += tmp0 */
 275         addl    $1, %eax                /* n=k+1 */
 276         addsd   %xmm1, %xmm0            /* t += tmp1 */
 277         mulsd   MO1(DP_PIO4), %xmm0     /* t *= PI04 */
 278
 279         jmp     L(reconstruction)       /* end of very_large_args peth */
 280
 281         .p2align        4
 282 L(arg_less_pio4):
 283         /* Here if |x|<Pi/4 */
 284         cmpl    $0x3d000000, %eax       /* |x|<2^-5?  */
 285         jl      L(arg_less_2pn5)
 286
 287         /* Here if 2^-5<=|x|<Pi/4 */
 288         movaps  %xmm0, %xmm3            /* x */
 289         mulsd   %xmm0, %xmm0            /* y=x^2 */
 290         movaps  %xmm0, %xmm1            /* y */
 291         mulsd   %xmm0, %xmm0            /* z=x^4 */
 292         movsd   MO1(DP_S4), %xmm4       /* S4 */
 293         mulsd   %xmm0, %xmm4            /* z*S4 */
 294         movsd   MO1(DP_S3), %xmm5       /* S3 */
 295         mulsd   %xmm0, %xmm5            /* z*S3 */
 296         addsd   MO1(DP_S2), %xmm4       /* S2+z*S4 */
 297         mulsd   %xmm0, %xmm4            /* z*(S2+z*S4) */
 298         addsd   MO1(DP_S1), %xmm5       /* S1+z*S3 */
 299         mulsd   %xmm0, %xmm5            /* z*(S1+z*S3) */
 300         addsd   MO1(DP_S0), %xmm4       /* S0+z*(S2+z*S4) */
 301         mulsd   %xmm1, %xmm4            /* y*(S0+z*(S2+z*S4)) */
 302         mulsd   %xmm3, %xmm5            /* x*z*(S1+z*S3) */
 303         mulsd   %xmm3, %xmm4            /* x*y*(S0+z*(S2+z*S4)) */
 304         /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 305         addsd   %xmm5, %xmm4
 306         /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 307         addsd   %xmm4, %xmm3
 308         cvtsd2ss %xmm3, %xmm3           /* SP result */
 309
 310 L(epilogue):
 311         lea     -4(%esp), %esp          /* Borrow 4 bytes of stack frame */
 312         movss   %xmm3, 0(%esp)          /* Move result from sse...  */
 313         flds    0(%esp)                 /* ...to FPU.  */
 314         /* Return back 4 bytes of stack frame */
 315         lea     4(%esp), %esp
 316         RETURN
 317
 318         .p2align        4
 319 L(arg_less_2pn5):
 320         /* Here if |x|<2^-5 */
 321         cmpl    $0x32000000, %eax       /* |x|<2^-27?  */
 322         jl      L(arg_less_2pn27)
 323
 324         /* Here if 2^-27<=|x|<2^-5 */
 325         movaps  %xmm0, %xmm1            /* DP x */
 326         mulsd   %xmm0, %xmm0            /* DP x^2 */
 327         movsd   MO1(DP_SIN2_1), %xmm3   /* DP DP_SIN2_1 */
 328         mulsd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_1 */
 329         addsd   MO1(DP_SIN2_0), %xmm3   /* DP DP_SIN2_0+x^2*DP_SIN2_1 */
 330         mulsd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
 331         mulsd   %xmm1, %xmm3            /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 332         addsd   %xmm1, %xmm3            /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 333         cvtsd2ss %xmm3, %xmm3           /* SP result */
 334         jmp     L(epilogue)
 335
 336         .p2align        4
 337 L(arg_less_2pn27):
 338         movss   ARG_X, %xmm3            /* SP x */
 339         cmpl    $0, %eax                /* x=0?  */
 340         je      L(epilogue)             /* in case x=0 return sin(+-0)==+-0 */
 341         /* Here if |x|<2^-27 */
 342         /*
 343          * Special cases here:
 344          *  sin(subnormal) raises inexact/underflow
 345          *  sin(min_normalized) raises inexact/underflow
 346          *  sin(normalized) raises inexact
 347          */
 348         movaps  %xmm0, %xmm3            /* Copy of DP x */
 349         mulsd   MO1(DP_SMALL), %xmm0    /* x*DP_SMALL */
 350         subsd   %xmm0, %xmm3            /* Result is x-x*DP_SMALL */
 351         cvtsd2ss %xmm3, %xmm3           /* Result converted to SP */
 352         jmp     L(epilogue)
 353
 354         .p2align        4
 355 L(arg_inf_or_nan):
 356         /* Here if |x| is Inf or NAN */
 357         jne     L(skip_errno_setting)   /* in case of x is NaN */
 358
 359         /* Here if x is Inf. Set errno to EDOM.  */
 360         call    JUMPTARGET(__errno_location)
 361         movl    $EDOM, (%eax)
 362
 363         .p2align        4
 364 L(skip_errno_setting):
 365         /* Here if |x| is Inf or NAN. Continued.  */
 366         movss   ARG_X, %xmm3            /* load x */
 367         subss   %xmm3, %xmm3            /* Result is NaN */
 368         jmp     L(epilogue)
 369 END(__sinf_sse2)
 370
 371         .section .rodata, "a"
 372         .p2align 3
 373 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
 374         .long   0x00000000,0x00000000
 375         .long   0x54442d18,0x3fe921fb
 376         .long   0x54442d18,0x3ff921fb
 377         .long   0x7f3321d2,0x4002d97c
 378         .long   0x54442d18,0x400921fb
 379         .long   0x2955385e,0x400f6a7a
 380         .long   0x7f3321d2,0x4012d97c
 381         .long   0xe9bba775,0x4015fdbb
 382         .long   0x54442d18,0x401921fb
 383         .long   0xbeccb2bb,0x401c463a
 384         .long   0x2955385e,0x401f6a7a
 385         .type L(PIO4J), @object
 386         ASM_SIZE_DIRECTIVE(L(PIO4J))
 387
 388         .p2align 3
 389 L(_FPI): /* 4/Pi broken into sum of positive DP values */
 390         .long   0x00000000,0x00000000
 391         .long   0x6c000000,0x3ff45f30
 392         .long   0x2a000000,0x3e3c9c88
 393         .long   0xa8000000,0x3c54fe13
 394         .long   0xd0000000,0x3aaf47d4
 395         .long   0x6c000000,0x38fbb81b
 396         .long   0xe0000000,0x3714acc9
 397         .long   0x7c000000,0x3560e410
 398         .long   0x56000000,0x33bca2c7
 399         .long   0xac000000,0x31fbd778
 400         .long   0xe0000000,0x300b7246
 401         .long   0xe8000000,0x2e5d2126
 402         .long   0x48000000,0x2c970032
 403         .long   0xe8000000,0x2ad77504
 404         .long   0xe0000000,0x290921cf
 405         .long   0xb0000000,0x274deb1c
 406         .long   0xe0000000,0x25829a73
 407         .long   0xbe000000,0x23fd1046
 408         .long   0x10000000,0x2224baed
 409         .long   0x8e000000,0x20709d33
 410         .long   0x80000000,0x1e535a2f
 411         .long   0x64000000,0x1cef904e
 412         .long   0x30000000,0x1b0d6398
 413         .long   0x24000000,0x1964ce7d
 414         .long   0x16000000,0x17b908bf
 415         .type L(_FPI), @object
 416         ASM_SIZE_DIRECTIVE(L(_FPI))
 417
 418 /* Coefficients of polynomial
 419    for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5.  */
 420         .p2align 3
 421 L(DP_SIN2_0):
 422         .long   0x5543d49d,0xbfc55555
 423         .type L(DP_SIN2_0), @object
 424         ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
 425
 426         .p2align 3
 427 L(DP_SIN2_1):
 428         .long   0x75cec8c5,0x3f8110f4
 429         .type L(DP_SIN2_1), @object
 430         ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
 431
 432         .p2align 3
 433 L(DP_ZERONE):
 434         .long   0x00000000,0x00000000   /* 0.0 */
 435         .long   0x00000000,0xbff00000   /* 1.0 */
 436         .type L(DP_ZERONE), @object
 437         ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
 438
 439         .p2align 3
 440 L(DP_ONES):
 441         .long   0x00000000,0x3ff00000   /* +1.0 */
 442         .long   0x00000000,0xbff00000   /* -1.0 */
 443         .type L(DP_ONES), @object
 444         ASM_SIZE_DIRECTIVE(L(DP_ONES))
 445
 446 /* Coefficients of polynomial
 447    for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4.  */
 448         .p2align 3
 449 L(DP_S3):
 450         .long   0x64e6b5b4,0x3ec71d72
 451         .type L(DP_S3), @object
 452         ASM_SIZE_DIRECTIVE(L(DP_S3))
 453
 454         .p2align 3
 455 L(DP_S1):
 456         .long   0x10c2688b,0x3f811111
 457         .type L(DP_S1), @object
 458         ASM_SIZE_DIRECTIVE(L(DP_S1))
 459
 460         .p2align 3
 461 L(DP_S4):
 462         .long   0x1674b58a,0xbe5a947e
 463         .type L(DP_S4), @object
 464         ASM_SIZE_DIRECTIVE(L(DP_S4))
 465
 466         .p2align 3
 467 L(DP_S2):
 468         .long   0x8b4bd1f9,0xbf2a019f
 469         .type L(DP_S2), @object
 470         ASM_SIZE_DIRECTIVE(L(DP_S2))
 471
 472         .p2align 3
 473 L(DP_S0):
 474         .long   0x55551cd9,0xbfc55555
 475         .type L(DP_S0), @object
 476         ASM_SIZE_DIRECTIVE(L(DP_S0))
 477
 478         .p2align 3
 479 L(DP_SMALL):
 480         .long   0x00000000,0x3cd00000   /* 2^(-50) */
 481         .type L(DP_SMALL), @object
 482         ASM_SIZE_DIRECTIVE(L(DP_SMALL))
 483
 484 /* Coefficients of polynomial
 485    for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4.  */
 486         .p2align 3
 487 L(DP_C3):
 488         .long   0x9ac43cc0,0x3efa00eb
 489         .type L(DP_C3), @object
 490         ASM_SIZE_DIRECTIVE(L(DP_C3))
 491
 492         .p2align 3
 493 L(DP_C1):
 494         .long   0x545c50c7,0x3fa55555
 495         .type L(DP_C1), @object
 496         ASM_SIZE_DIRECTIVE(L(DP_C1))
 497
 498         .p2align 3
 499 L(DP_C4):
 500         .long   0xdd8844d7,0xbe923c97
 501         .type L(DP_C4), @object
 502         ASM_SIZE_DIRECTIVE(L(DP_C4))
 503
 504         .p2align 3
 505 L(DP_C2):
 506         .long   0x348b6874,0xbf56c16b
 507         .type L(DP_C2), @object
 508         ASM_SIZE_DIRECTIVE(L(DP_C2))
 509
 510         .p2align 3
 511 L(DP_C0):
 512         .long   0xfffe98ae,0xbfdfffff
 513         .type L(DP_C0), @object
 514         ASM_SIZE_DIRECTIVE(L(DP_C0))
 515
 516         .p2align 3
 517 L(DP_PIO4):
 518         .long   0x54442d18,0x3fe921fb   /* Pi/4 */
 519         .type L(DP_PIO4), @object
 520         ASM_SIZE_DIRECTIVE(L(DP_PIO4))
 521
 522         .p2align 3
 523 L(DP_2POW52):
 524         .long   0x00000000,0x43300000   /* +2^52 */
 525         .long   0x00000000,0xc3300000   /* -2^52 */
 526         .type L(DP_2POW52), @object
 527         ASM_SIZE_DIRECTIVE(L(DP_2POW52))
 528
 529         .p2align 3
 530 L(DP_INVPIO4):
 531         .long   0x6dc9c883,0x3ff45f30   /* 4/Pi */
 532         .type L(DP_INVPIO4), @object
 533         ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
 534
 535         .p2align 3
 536 L(DP_PIO4HI):
 537         .long   0x54000000,0xbfe921fb   /* High part of Pi/4 */
 538         .type L(DP_PIO4HI), @object
 539         ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
 540
 541         .p2align 3
 542 L(DP_PIO4LO):
 543         .long   0x11A62633,0xbe010b46   /* Low part of Pi/4 */
 544         .type L(DP_PIO4LO), @object
 545         ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
 546
 547         .p2align 2
 548 L(SP_INVPIO4):
 549         .long   0x3fa2f983              /* 4/Pi */
 550         .type L(SP_INVPIO4), @object
 551         ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
 552
 553         .p2align 4
 554 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 555         .long   0xffffffff,0x7fffffff
 556         .long   0xffffffff,0x7fffffff
 557         .type L(DP_ABS_MASK), @object
 558         ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
 559
 560         .p2align 3
 561 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 562         .long   0x00000000,0xffffffff
 563         .type L(DP_HI_MASK), @object
 564         ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 565
 566 weak_alias (__sinf, sinf)