sysdeps/x86_64/fpu/s_sinf.S

   1 /* Optimized sinf function.
   2    Copyright (C) 2012-2014 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <sysdep.h>
  20 #define __need_Emath
  21 #include <bits/errno.h>
  22
  23 /* Short algorithm description:
  24  *
  25  *  1) if |x| == 0: return x.
  26  *  2) if |x| <  2^-27: return x-x*DP_SMALL, raise underflow only when needed.
  27  *  3) if |x| <  2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1.
  28  *  4) if |x| <   Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).
  29  *  5) if |x| < 9*Pi/4:
  30  *      5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
  31  *           t=|x|-j*Pi/4.
  32  *      5.2) Reconstruction:
  33  *          s = sign(x) * (-1.0)^((n>>2)&1)
  34  *          if(n&2 != 0) {
  35  *              using cos(t) polynomial for |t|<Pi/4, result is
  36  *              s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
  37  *          } else {
  38  *              using sin(t) polynomial for |t|<Pi/4, result is
  39  *              s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
  40  *          }
  41  *  6) if |x| < 2^23, large args:
  42  *      6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
  43  *           t=|x|-j*Pi/4.
  44  *      6.2) Reconstruction same as (5.2).
  45  *  7) if |x| >= 2^23, very large args:
  46  *      7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
  47  *           t=|x|-j*Pi/4.
  48  *      7.2) Reconstruction same as (5.2).
  49  *  8) if x is Inf, return x-x, and set errno=EDOM.
  50  *  9) if x is NaN, return x-x.
  51  *
  52  * Special cases:
  53  *  sin(+-0) = +-0 not raising inexact/underflow,
  54  *  sin(subnormal) raises inexact/underflow,
  55  *  sin(min_normalized) raises inexact/underflow,
  56  *  sin(normalized) raises inexact,
  57  *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
  58  *  sin(NaN) = NaN.
  59  */
  60
  61         .text
  62 ENTRY(__sinf)
  63         /* Input: single precision x in %xmm0 */
  64
  65         movd    %xmm0, %eax             /* Bits of x */
  66         movaps  %xmm0, %xmm7            /* Copy of x */
  67         cvtss2sd %xmm0, %xmm0           /* DP x */
  68         movss   L(SP_ABS_MASK)(%rip), %xmm3
  69         movl    %eax, %edi              /* Copy of x bits */
  70         andl    $0x7fffffff, %eax       /* |x| */
  71
  72         cmpl    $0x3f490fdb, %eax       /* |x|<Pi/4?  */
  73         jb      L(arg_less_pio4)
  74
  75         /* Here if |x|>=Pi/4 */
  76         andps   %xmm7, %xmm3            /* SP |x| */
  77         andpd   L(DP_ABS_MASK)(%rip),%xmm0 /* DP |x| */
  78         movss   L(SP_INVPIO4)(%rip), %xmm2 /* SP 1/(Pi/4) */
  79
  80         cmpl    $0x40e231d6, %eax       /* |x|<9*Pi/4?  */
  81         jae     L(large_args)
  82
  83         /* Here if Pi/4<=|x|<9*Pi/4 */
  84         mulss   %xmm3, %xmm2            /* SP |x|/(Pi/4) */
  85         movl    %edi, %ecx              /* Load x */
  86         cvttss2si %xmm2, %eax           /* k, number of Pi/4 in x */
  87         lea     L(PIO4J)(%rip), %rsi
  88         shrl    $31, %ecx               /* sign of x */
  89         addl    $1, %eax                /* k+1 */
  90         movl    $0x0e, %edx
  91         andl    %eax, %edx              /* j = (k+1)&0x0e */
  92         subsd   (%rsi,%rdx,8), %xmm0    /* t = |x| - j * Pi/4 */
  93
  94 L(reconstruction):
  95         /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
  96         testl   $2, %eax                /* n&2 != 0?  */
  97         jz      L(sin_poly)
  98
  99 /*L(cos_poly):*/
 100         /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4:
 101          * y = t*t; z = y*y;
 102          * s = sign(x) * (-1.0)^((n>>2)&1)
 103          * result = s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
 104          */
 105         shrl    $2, %eax                /* n>>2 */
 106         mulsd   %xmm0, %xmm0            /* y=t^2 */
 107         andl    $1, %eax                /* (n>>2)&1 */
 108         movaps  %xmm0, %xmm1            /* y */
 109         mulsd   %xmm0, %xmm0            /* z=t^4 */
 110
 111         movsd   L(DP_C4)(%rip), %xmm4   /* C4 */
 112         mulsd   %xmm0, %xmm4            /* z*C4 */
 113         xorl    %eax, %ecx              /* (-1.0)^((n>>2)&1) XOR sign(x) */
 114         movsd   L(DP_C3)(%rip), %xmm3   /* C3 */
 115         mulsd   %xmm0, %xmm3            /* z*C3 */
 116         lea     L(DP_ONES)(%rip), %rsi
 117         addsd   L(DP_C2)(%rip), %xmm4   /* C2+z*C4 */
 118         mulsd   %xmm0, %xmm4            /* z*(C2+z*C4) */
 119         addsd   L(DP_C1)(%rip), %xmm3   /* C1+z*C3 */
 120         mulsd   %xmm0, %xmm3            /* z*(C1+z*C3) */
 121         addsd   L(DP_C0)(%rip), %xmm4   /* C0+z*(C2+z*C4) */
 122         mulsd   %xmm1, %xmm4            /* y*(C0+z*(C2+z*C4)) */
 123
 124         /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 125         addsd   %xmm4, %xmm3
 126         /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 127         addsd   L(DP_ONES)(%rip), %xmm3
 128
 129         mulsd   (%rsi,%rcx,8), %xmm3    /* DP result */
 130         cvtsd2ss %xmm3, %xmm0           /* SP result */
 131         ret
 132
 133         .p2align        4
 134 L(sin_poly):
 135         /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4:
 136          * y = t*t; z = y*y;
 137          * s = sign(x) * (-1.0)^((n>>2)&1)
 138          * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
 139          */
 140
 141         movaps  %xmm0, %xmm4            /* t */
 142         shrl    $2, %eax                /* n>>2 */
 143         mulsd   %xmm0, %xmm0            /* y=t^2 */
 144         andl    $1, %eax                /* (n>>2)&1 */
 145         movaps  %xmm0, %xmm1            /* y */
 146         xorl    %eax, %ecx              /* (-1.0)^((n>>2)&1) XOR sign(x) */
 147         mulsd   %xmm0, %xmm0            /* z=t^4 */
 148
 149         movsd   L(DP_S4)(%rip), %xmm2   /* S4 */
 150         mulsd   %xmm0, %xmm2            /* z*S4 */
 151         movsd   L(DP_S3)(%rip), %xmm3   /* S3 */
 152         mulsd   %xmm0, %xmm3            /* z*S3 */
 153         lea     L(DP_ONES)(%rip), %rsi
 154         addsd   L(DP_S2)(%rip), %xmm2   /* S2+z*S4 */
 155         mulsd   %xmm0, %xmm2            /* z*(S2+z*S4) */
 156         addsd   L(DP_S1)(%rip), %xmm3   /* S1+z*S3 */
 157         mulsd   %xmm0, %xmm3            /* z*(S1+z*S3) */
 158         addsd   L(DP_S0)(%rip), %xmm2   /* S0+z*(S2+z*S4) */
 159         mulsd   %xmm1, %xmm2            /* y*(S0+z*(S2+z*S4)) */
 160         /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
 161         mulsd   (%rsi,%rcx,8), %xmm4
 162         /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 163         addsd   %xmm2, %xmm3
 164         /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 165         mulsd   %xmm4, %xmm3
 166         /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 167         addsd   %xmm4, %xmm3
 168         cvtsd2ss %xmm3, %xmm0           /* SP result */
 169         ret
 170
 171         .p2align        4
 172 L(large_args):
 173         /* Here if |x|>=9*Pi/4 */
 174         cmpl    $0x7f800000, %eax       /* x is Inf or NaN?  */
 175         jae     L(arg_inf_or_nan)
 176
 177         /* Here if finite |x|>=9*Pi/4 */
 178         cmpl    $0x4b000000, %eax       /* |x|<2^23?  */
 179         jae     L(very_large_args)
 180
 181         /* Here if 9*Pi/4<=|x|<2^23 */
 182         movsd   L(DP_INVPIO4)(%rip), %xmm1 /* 1/(Pi/4) */
 183         mulsd   %xmm0, %xmm1            /* |x|/(Pi/4) */
 184         cvttsd2si %xmm1, %eax           /* k=trunc(|x|/(Pi/4)) */
 185         addl    $1, %eax                /* k+1 */
 186         movl    %eax, %edx
 187         andl    $0xfffffffe, %edx       /* j=(k+1)&0xfffffffe */
 188         cvtsi2sdl %edx, %xmm4           /* DP j */
 189         movl    %edi, %ecx              /* Load x */
 190         movsd   L(DP_PIO4HI)(%rip), %xmm2 /* -PIO4HI = high part of -Pi/4 */
 191         shrl    $31, %ecx               /* sign bit of x */
 192         mulsd   %xmm4, %xmm2            /* -j*PIO4HI */
 193         movsd   L(DP_PIO4LO)(%rip), %xmm3 /* -PIO4LO = low part of -Pi/4 */
 194         addsd   %xmm2, %xmm0            /* |x| - j*PIO4HI */
 195         mulsd   %xmm3, %xmm4            /* j*PIO4LO */
 196         addsd   %xmm4, %xmm0            /* t = |x| - j*PIO4HI - j*PIO4LO */
 197         jmp     L(reconstruction)
 198
 199         .p2align        4
 200 L(very_large_args):
 201         /* Here if finite |x|>=2^23 */
 202
 203         /* bitpos = (ix>>23) - BIAS_32 + 59; */
 204         shrl    $23, %eax               /* eb = biased exponent of x */
 205         /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
 206         subl    $68, %eax
 207         movl    $28, %ecx               /* %cl=28 */
 208         movl    %eax, %edx              /* bitpos copy */
 209
 210         /* j = bitpos/28; */
 211         div     %cl                     /* j in register %al=%ax/%cl */
 212         movapd  %xmm0, %xmm3            /* |x| */
 213         /* clear unneeded remainder from %ah */
 214         andl    $0xff, %eax
 215
 216         imull   $28, %eax, %ecx         /* j*28 */
 217         lea     L(_FPI)(%rip), %rsi
 218         movsd   L(DP_HI_MASK)(%rip), %xmm4 /* DP_HI_MASK */
 219         movapd  %xmm0, %xmm5            /* |x| */
 220         mulsd   -16(%rsi,%rax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
 221         movapd  %xmm0, %xmm1            /* |x| */
 222         mulsd   -8(%rsi,%rax,8), %xmm5  /* tmp2 = FPI[j-1]*|x| */
 223         mulsd   (%rsi,%rax,8), %xmm0    /* tmp0 = FPI[j]*|x| */
 224         addl    $19, %ecx               /* j*28+19 */
 225         mulsd   8(%rsi,%rax,8), %xmm1   /* tmp1 = FPI[j+1]*|x| */
 226         cmpl    %ecx, %edx              /* bitpos>=j*28+19?  */
 227         jl      L(very_large_skip1)
 228
 229         /* Here if bitpos>=j*28+19 */
 230         andpd   %xmm3, %xmm4            /* HI(tmp3) */
 231         subsd   %xmm4, %xmm3            /* tmp3 = tmp3 - HI(tmp3) */
 232 L(very_large_skip1):
 233
 234         movsd   L(DP_2POW52)(%rip), %xmm6
 235         movapd  %xmm5, %xmm2            /* tmp2 copy */
 236         addsd   %xmm3, %xmm5            /* tmp5 = tmp3 + tmp2 */
 237         movl    $1, %edx
 238         addsd   %xmm5, %xmm6            /* tmp6 = tmp5 + 2^52 */
 239         movsd   8+L(DP_2POW52)(%rip), %xmm4
 240         movd    %xmm6, %eax             /* k = I64_LO(tmp6); */
 241         addsd   %xmm6, %xmm4            /* tmp4 = tmp6 - 2^52 */
 242         movl    %edi, %ecx              /* Load x */
 243         comisd  %xmm5, %xmm4            /* tmp4 > tmp5?  */
 244         jbe     L(very_large_skip2)
 245
 246         /* Here if tmp4 > tmp5 */
 247         subl    $1, %eax                /* k-- */
 248         addsd   8+L(DP_ONES)(%rip), %xmm4 /* tmp4 -= 1.0 */
 249 L(very_large_skip2):
 250
 251         andl    %eax, %edx              /* k&1 */
 252         lea     L(DP_ZERONE)(%rip), %rsi
 253         subsd   %xmm4, %xmm3            /* tmp3 -= tmp4 */
 254         addsd   (%rsi,%rdx,8), %xmm3    /* t  = DP_ZERONE[k&1] + tmp3 */
 255         addsd   %xmm2, %xmm3            /* t += tmp2 */
 256         shrl    $31, %ecx               /* sign of x */
 257         addsd   %xmm3, %xmm0            /* t += tmp0 */
 258         addl    $1, %eax                /* n=k+1 */
 259         addsd   %xmm1, %xmm0            /* t += tmp1 */
 260         mulsd   L(DP_PIO4)(%rip), %xmm0 /* t *= PI04 */
 261
 262         jmp     L(reconstruction)       /* end of very_large_args peth */
 263
 264         .p2align        4
 265 L(arg_less_pio4):
 266         /* Here if |x|<Pi/4 */
 267         cmpl    $0x3d000000, %eax       /* |x|<2^-5?  */
 268         jl      L(arg_less_2pn5)
 269
 270         /* Here if 2^-5<=|x|<Pi/4 */
 271         movaps  %xmm0, %xmm3            /* x */
 272         mulsd   %xmm0, %xmm0            /* y=x^2 */
 273         movaps  %xmm0, %xmm1            /* y */
 274         mulsd   %xmm0, %xmm0            /* z=x^4 */
 275         movsd   L(DP_S4)(%rip), %xmm4   /* S4 */
 276         mulsd   %xmm0, %xmm4            /* z*S4 */
 277         movsd   L(DP_S3)(%rip), %xmm5   /* S3 */
 278         mulsd   %xmm0, %xmm5            /* z*S3 */
 279         addsd   L(DP_S2)(%rip), %xmm4   /* S2+z*S4 */
 280         mulsd   %xmm0, %xmm4            /* z*(S2+z*S4) */
 281         addsd   L(DP_S1)(%rip), %xmm5   /* S1+z*S3 */
 282         mulsd   %xmm0, %xmm5            /* z*(S1+z*S3) */
 283         addsd   L(DP_S0)(%rip), %xmm4   /* S0+z*(S2+z*S4) */
 284         mulsd   %xmm1, %xmm4            /* y*(S0+z*(S2+z*S4)) */
 285         mulsd   %xmm3, %xmm5            /* x*z*(S1+z*S3) */
 286         mulsd   %xmm3, %xmm4            /* x*y*(S0+z*(S2+z*S4)) */
 287         /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 288         addsd   %xmm5, %xmm4
 289         /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 290         addsd   %xmm4, %xmm3
 291         cvtsd2ss %xmm3, %xmm0           /* SP result */
 292         ret
 293
 294         .p2align        4
 295 L(arg_less_2pn5):
 296         /* Here if |x|<2^-5 */
 297         cmpl    $0x32000000, %eax       /* |x|<2^-27?  */
 298         jl      L(arg_less_2pn27)
 299
 300         /* Here if 2^-27<=|x|<2^-5 */
 301         movaps  %xmm0, %xmm1            /* DP x */
 302         mulsd   %xmm0, %xmm0            /* DP x^2 */
 303         movsd   L(DP_SIN2_1)(%rip), %xmm3 /* DP DP_SIN2_1 */
 304         mulsd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_1 */
 305         addsd   L(DP_SIN2_0)(%rip), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */
 306         mulsd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
 307         mulsd   %xmm1, %xmm3            /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 308         addsd   %xmm1, %xmm3            /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 309         cvtsd2ss %xmm3, %xmm0           /* SP result */
 310         ret
 311
 312         .p2align        4
 313 L(arg_less_2pn27):
 314         cmpl    $0, %eax                /* x=0?  */
 315         je      L(arg_zero)             /* in case x=0 return sin(+-0)==+-0 */
 316         /* Here if |x|<2^-27 */
 317         /*
 318          * Special cases here:
 319          *  sin(subnormal) raises inexact/underflow
 320          *  sin(min_normalized) raises inexact/underflow
 321          *  sin(normalized) raises inexact
 322          */
 323         movaps  %xmm0, %xmm3            /* Copy of DP x */
 324         mulsd   L(DP_SMALL)(%rip), %xmm0 /* x*DP_SMALL */
 325         subsd   %xmm0, %xmm3            /* Result is x-x*DP_SMALL */
 326         cvtsd2ss %xmm3, %xmm0           /* Result converted to SP */
 327         ret
 328
 329         .p2align        4
 330 L(arg_zero):
 331         movaps  %xmm7, %xmm0            /* SP x */
 332         ret
 333
 334         .p2align        4
 335 L(arg_inf_or_nan):
 336         /* Here if |x| is Inf or NAN */
 337         jne     L(skip_errno_setting)   /* in case of x is NaN */
 338
 339         /* Here if x is Inf. Set errno to EDOM.  */
 340         call    JUMPTARGET(__errno_location)
 341         movl    $EDOM, (%rax)
 342
 343         .p2align        4
 344 L(skip_errno_setting):
 345         /* Here if |x| is Inf or NAN. Continued.  */
 346         movaps  %xmm7, %xmm0            /* load x */
 347         subss   %xmm0, %xmm0            /* Result is NaN */
 348         ret
 349 END(__sinf)
 350
 351         .section .rodata, "a"
 352         .p2align 3
 353 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
 354         .long   0x00000000,0x00000000
 355         .long   0x54442d18,0x3fe921fb
 356         .long   0x54442d18,0x3ff921fb
 357         .long   0x7f3321d2,0x4002d97c
 358         .long   0x54442d18,0x400921fb
 359         .long   0x2955385e,0x400f6a7a
 360         .long   0x7f3321d2,0x4012d97c
 361         .long   0xe9bba775,0x4015fdbb
 362         .long   0x54442d18,0x401921fb
 363         .long   0xbeccb2bb,0x401c463a
 364         .long   0x2955385e,0x401f6a7a
 365         .type L(PIO4J), @object
 366         ASM_SIZE_DIRECTIVE(L(PIO4J))
 367
 368         .p2align 3
 369 L(_FPI): /* 4/Pi broken into sum of positive DP values */
 370         .long   0x00000000,0x00000000
 371         .long   0x6c000000,0x3ff45f30
 372         .long   0x2a000000,0x3e3c9c88
 373         .long   0xa8000000,0x3c54fe13
 374         .long   0xd0000000,0x3aaf47d4
 375         .long   0x6c000000,0x38fbb81b
 376         .long   0xe0000000,0x3714acc9
 377         .long   0x7c000000,0x3560e410
 378         .long   0x56000000,0x33bca2c7
 379         .long   0xac000000,0x31fbd778
 380         .long   0xe0000000,0x300b7246
 381         .long   0xe8000000,0x2e5d2126
 382         .long   0x48000000,0x2c970032
 383         .long   0xe8000000,0x2ad77504
 384         .long   0xe0000000,0x290921cf
 385         .long   0xb0000000,0x274deb1c
 386         .long   0xe0000000,0x25829a73
 387         .long   0xbe000000,0x23fd1046
 388         .long   0x10000000,0x2224baed
 389         .long   0x8e000000,0x20709d33
 390         .long   0x80000000,0x1e535a2f
 391         .long   0x64000000,0x1cef904e
 392         .long   0x30000000,0x1b0d6398
 393         .long   0x24000000,0x1964ce7d
 394         .long   0x16000000,0x17b908bf
 395         .type L(_FPI), @object
 396         ASM_SIZE_DIRECTIVE(L(_FPI))
 397
 398 /* Coefficients of polynomial
 399    for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5.  */
 400         .p2align 3
 401 L(DP_SIN2_0):
 402         .long   0x5543d49d,0xbfc55555
 403         .type L(DP_SIN2_0), @object
 404         ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
 405
 406         .p2align 3
 407 L(DP_SIN2_1):
 408         .long   0x75cec8c5,0x3f8110f4
 409         .type L(DP_SIN2_1), @object
 410         ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
 411
 412         .p2align 3
 413 L(DP_ZERONE):
 414         .long   0x00000000,0x00000000   /* 0.0 */
 415         .long   0x00000000,0xbff00000   /* 1.0 */
 416         .type L(DP_ZERONE), @object
 417         ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
 418
 419         .p2align 3
 420 L(DP_ONES):
 421         .long   0x00000000,0x3ff00000   /* +1.0 */
 422         .long   0x00000000,0xbff00000   /* -1.0 */
 423         .type L(DP_ONES), @object
 424         ASM_SIZE_DIRECTIVE(L(DP_ONES))
 425
 426 /* Coefficients of polynomial
 427    for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4.  */
 428         .p2align 3
 429 L(DP_S3):
 430         .long   0x64e6b5b4,0x3ec71d72
 431         .type L(DP_S3), @object
 432         ASM_SIZE_DIRECTIVE(L(DP_S3))
 433
 434         .p2align 3
 435 L(DP_S1):
 436         .long   0x10c2688b,0x3f811111
 437         .type L(DP_S1), @object
 438         ASM_SIZE_DIRECTIVE(L(DP_S1))
 439
 440         .p2align 3
 441 L(DP_S4):
 442         .long   0x1674b58a,0xbe5a947e
 443         .type L(DP_S4), @object
 444         ASM_SIZE_DIRECTIVE(L(DP_S4))
 445
 446         .p2align 3
 447 L(DP_S2):
 448         .long   0x8b4bd1f9,0xbf2a019f
 449         .type L(DP_S2), @object
 450         ASM_SIZE_DIRECTIVE(L(DP_S2))
 451
 452         .p2align 3
 453 L(DP_S0):
 454         .long   0x55551cd9,0xbfc55555
 455         .type L(DP_S0), @object
 456         ASM_SIZE_DIRECTIVE(L(DP_S0))
 457
 458         .p2align 3
 459 L(DP_SMALL):
 460         .long   0x00000000,0x3cd00000   /* 2^(-50) */
 461         .type L(DP_SMALL), @object
 462         ASM_SIZE_DIRECTIVE(L(DP_SMALL))
 463
 464 /* Coefficients of polynomial
 465    for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4.  */
 466         .p2align 3
 467 L(DP_C3):
 468         .long   0x9ac43cc0,0x3efa00eb
 469         .type L(DP_C3), @object
 470         ASM_SIZE_DIRECTIVE(L(DP_C3))
 471
 472         .p2align 3
 473 L(DP_C1):
 474         .long   0x545c50c7,0x3fa55555
 475         .type L(DP_C1), @object
 476         ASM_SIZE_DIRECTIVE(L(DP_C1))
 477
 478         .p2align 3
 479 L(DP_C4):
 480         .long   0xdd8844d7,0xbe923c97
 481         .type L(DP_C4), @object
 482         ASM_SIZE_DIRECTIVE(L(DP_C4))
 483
 484         .p2align 3
 485 L(DP_C2):
 486         .long   0x348b6874,0xbf56c16b
 487         .type L(DP_C2), @object
 488         ASM_SIZE_DIRECTIVE(L(DP_C2))
 489
 490         .p2align 3
 491 L(DP_C0):
 492         .long   0xfffe98ae,0xbfdfffff
 493         .type L(DP_C0), @object
 494         ASM_SIZE_DIRECTIVE(L(DP_C0))
 495
 496         .p2align 3
 497 L(DP_PIO4):
 498         .long   0x54442d18,0x3fe921fb   /* Pi/4 */
 499         .type L(DP_PIO4), @object
 500         ASM_SIZE_DIRECTIVE(L(DP_PIO4))
 501
 502         .p2align 3
 503 L(DP_2POW52):
 504         .long   0x00000000,0x43300000   /* +2^52 */
 505         .long   0x00000000,0xc3300000   /* -2^52 */
 506         .type L(DP_2POW52), @object
 507         ASM_SIZE_DIRECTIVE(L(DP_2POW52))
 508
 509         .p2align 3
 510 L(DP_INVPIO4):
 511         .long   0x6dc9c883,0x3ff45f30   /* 4/Pi */
 512         .type L(DP_INVPIO4), @object
 513         ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
 514
 515         .p2align 3
 516 L(DP_PIO4HI):
 517         .long   0x54000000,0xbfe921fb   /* High part of Pi/4 */
 518         .type L(DP_PIO4HI), @object
 519         ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
 520
 521         .p2align 3
 522 L(DP_PIO4LO):
 523         .long   0x11A62633,0xbe010b46   /* Low part of Pi/4 */
 524         .type L(DP_PIO4LO), @object
 525         ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
 526
 527         .p2align 2
 528 L(SP_INVPIO4):
 529         .long   0x3fa2f983              /* 4/Pi */
 530         .type L(SP_INVPIO4), @object
 531         ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
 532
 533         .p2align 4
 534 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 535         .long   0xffffffff,0x7fffffff
 536         .long   0xffffffff,0x7fffffff
 537         .type L(DP_ABS_MASK), @object
 538         ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
 539
 540         .p2align 3
 541 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 542         .long   0x00000000,0xffffffff
 543         .type L(DP_HI_MASK),@object
 544         ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 545
 546         .p2align 4
 547 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
 548         .long   0x7fffffff,0x7fffffff
 549         .long   0x7fffffff,0x7fffffff
 550         .type L(SP_ABS_MASK), @object
 551         ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
 552
 553 weak_alias(__sinf, sinf)