sysdeps/x86_64/fpu/s_sincosf.S

   1 /* Optimized sincosf function.
   2    Copyright (C) 2012-2014 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <sysdep.h>
  20 #define __need_Emath
  21 #include <bits/errno.h>
  22
  23 /* Short algorithm description:
  24  *
  25  *  1) if |x|==0:    sin(x)=x,
  26  *                   cos(x)=1.
  27  *  2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed,
  28  *                   cos(x)=1-|x|.
  29  *  3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1,
  30  *                   cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1
  31  *  4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))),
  32  *                   cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
  33  *  5) if |x| < 9*Pi/4:
  34  *      5.1) Range reduction:
  35  *          k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4.
  36  *      5.2) Reconstruction:
  37  *          sign_sin = sign(x) * (-1.0)^(( n   >>2)&1)
  38  *          sign_cos =           (-1.0)^(((n+2)>>2)&1)
  39  *          poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t
  40  *          poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s
  41  *          if(n&2 != 0) {
  42  *              using cos(t) and sin(t) polynomials for |t|<Pi/4, results are
  43  *              cos(x) = poly_sin * sign_cos
  44  *              sin(x) = poly_cos * sign_sin
  45  *          } else {
  46  *              sin(x) = poly_sin * sign_sin
  47  *              cos(x) = poly_cos * sign_cos
  48  *          }
  49  *  6) if |x| < 2^23, large args:
  50  *      6.1) Range reduction:
  51  *          k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4
  52  *      6.2) Reconstruction same as (5.2).
  53  *  7) if |x| >= 2^23, very large args:
  54  *      7.1) Range reduction:
  55  *          k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4.
  56  *      7.2) Reconstruction same as (5.2).
  57  *  8) if x is Inf, return x-x, and set errno=EDOM.
  58  *  9) if x is NaN, return x-x.
  59  *
  60  * Special cases:
  61  *  sin/cos(+-0) = +-0/1 not raising inexact/underflow,
  62  *  sin/cos(subnormal) raises inexact/underflow,
  63  *  sin/cos(min_normalized) raises inexact/underflow,
  64  *  sin/cos(normalized) raises inexact,
  65  *  sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM,
  66  *  sin/cos(NaN) = NaN.
  67  */
  68
  69 # define ARG_SIN_PTR    %rdi
  70 # define ARG_COS_PTR    %rsi
  71
  72         .text
  73 ENTRY(__sincosf)
  74         /* Input: %xmm0 contains single precision argument x */
  75         /*        %rdi points to sin result */
  76         /*        %rsi points to cos result */
  77
  78         movd    %xmm0, %eax             /* Bits of x */
  79         movaps  %xmm0, %xmm7            /* Copy of x */
  80         cvtss2sd %xmm0, %xmm0           /* DP x */
  81         movss   L(SP_ABS_MASK)(%rip), %xmm3
  82         movl    %eax, %r8d              /* Copy of x bits */
  83         andl    $0x7fffffff, %eax       /* |x| */
  84
  85         cmpl    $0x3f490fdb, %eax       /* |x|<Pi/4 ? */
  86         jb      L(arg_less_pio4)
  87
  88         /* Here if |x|>=Pi/4 */
  89         andps   %xmm7, %xmm3            /* SP |x| */
  90         andpd   L(DP_ABS_MASK)(%rip),%xmm0 /* DP |x| */
  91         movss   L(SP_INVPIO4)(%rip), %xmm2 /* SP 1/(Pi/4) */
  92
  93         cmpl    $0x40e231d6, %eax       /* |x|<9*Pi/4 ? */
  94         jae     L(large_args)
  95
  96         /* Here if Pi/4<=|x|<9*Pi/4 */
  97         mulss   %xmm3, %xmm2            /* SP |x|/(Pi/4) */
  98         movl    %r8d, %ecx              /* Load x */
  99         cvttss2si %xmm2, %eax           /* k, number of Pi/4 in x */
 100         lea     L(PIO4J)(%rip), %r9
 101         shrl    $29, %ecx               /* (sign of x) << 2 */
 102         addl    $1, %eax                /* k+1 */
 103         movl    $0x0e, %edx
 104         andl    %eax, %edx              /* j = (k+1)&0x0e */
 105         subsd   (%r9,%rdx,8), %xmm0     /* t = |x| - j * Pi/4 */
 106
 107 L(reconstruction):
 108         /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
 109
 110         movaps  %xmm0, %xmm4            /* t */
 111         movhpd  L(DP_ONES)(%rip), %xmm4 /* 1|t */
 112         mulsd   %xmm0, %xmm0            /* y=t^2 */
 113         movl    $2, %edx
 114         unpcklpd %xmm0, %xmm0           /* y|y */
 115         addl    %eax, %edx              /* k+2 */
 116         movaps  %xmm0, %xmm1            /* y|y */
 117         mulpd   %xmm0, %xmm0            /* z=t^4|z=t^4 */
 118
 119         movaps  L(DP_SC4)(%rip), %xmm2  /* S4 */
 120         mulpd   %xmm0, %xmm2            /* z*S4 */
 121         movaps  L(DP_SC3)(%rip), %xmm3  /* S3 */
 122         mulpd   %xmm0, %xmm3            /* z*S3 */
 123         xorl    %eax, %ecx              /* (sign_x ^ (k>>2))<<2 */
 124         addpd   L(DP_SC2)(%rip), %xmm2  /* S2+z*S4 */
 125         mulpd   %xmm0, %xmm2            /* z*(S2+z*S4) */
 126         shrl    $2, %edx                /* (k+2)>>2 */
 127         addpd   L(DP_SC1)(%rip), %xmm3  /* S1+z*S3 */
 128         mulpd   %xmm0, %xmm3            /* z*(S1+z*S3) */
 129         shrl    $2, %ecx                /* sign_x ^ k>>2 */
 130         addpd   L(DP_SC0)(%rip), %xmm2  /* S0+z*(S2+z*S4) */
 131         andl    $1, %edx                /* sign_cos = ((k+2)>>2)&1 */
 132         mulpd   %xmm1, %xmm2            /* y*(S0+z*(S2+z*S4)) */
 133         andl    $1, %ecx                /* sign_sin = sign_x ^ ((k>>2)&1) */
 134         addpd   %xmm2, %xmm3            /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 135         lea     L(DP_ONES)(%rip), %r9
 136         mulpd   %xmm4, %xmm3            /*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
 137         testl   $2, %eax                /* n&2 != 0 ? */
 138         addpd   %xmm4, %xmm3            /*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
 139         jnz     L(sin_result_sin_poly)
 140
 141 /*L(sin_result_cos_poly):*/
 142         /*
 143          * Here if
 144          * cos(x) = poly_sin * sign_cos
 145          * sin(x) = poly_cos * sign_sin
 146          */
 147         movsd   (%r9,%rcx,8), %xmm4     /* 0|sign_sin */
 148         movhpd  (%r9,%rdx,8), %xmm4     /* sign_cos|sign_sin */
 149         mulpd   %xmm4, %xmm3            /* result_cos|result_sin */
 150         cvtpd2ps %xmm3, %xmm0           /* SP results */
 151         movss   %xmm0, (ARG_SIN_PTR)    /* store sin(x) from xmm0[0] */
 152         shufps  $1, %xmm0, %xmm0        /* move cos(x) to xmm0[0] */
 153         movss   %xmm0, (ARG_COS_PTR)    /* store cos(x) */
 154         ret
 155
 156         .p2align        4
 157 L(sin_result_sin_poly):
 158         /*
 159          * Here if
 160          * sin(x) = poly_sin * sign_sin
 161          * cos(x) = poly_cos * sign_cos
 162          */
 163         movsd   (%r9,%rdx,8), %xmm4     /* 0|sign_cos */
 164         movhpd  (%r9,%rcx,8), %xmm4     /* sign_sin|sign_cos */
 165         mulpd   %xmm4, %xmm3            /* result_sin|result_cos */
 166         cvtpd2ps %xmm3, %xmm0           /* SP results */
 167         movss   %xmm0, (ARG_COS_PTR)    /* store cos(x) from xmm0[0] */
 168         shufps  $1, %xmm0, %xmm0        /* move sin(x) to xmm0[0] */
 169         movss   %xmm0, (ARG_SIN_PTR)    /* store sin(x) */
 170         ret
 171
 172         .p2align        4
 173 L(large_args):
 174         /* Here if |x|>=9*Pi/4 */
 175         cmpl    $0x7f800000, %eax       /* x is Inf or NaN ? */
 176         jae     L(arg_inf_or_nan)
 177
 178         /* Here if finite |x|>=9*Pi/4 */
 179         cmpl    $0x4b000000, %eax       /* |x|<2^23 ? */
 180         jae     L(very_large_args)
 181
 182         /* Here if 9*Pi/4<=|x|<2^23 */
 183         movsd   L(DP_INVPIO4)(%rip), %xmm1 /* 1/(Pi/4) */
 184         mulsd   %xmm0, %xmm1            /* |x|/(Pi/4) */
 185         cvttsd2si %xmm1, %eax           /* k=trunc(|x|/(Pi/4)) */
 186         addl    $1, %eax                /* k+1 */
 187         movl    %eax, %edx
 188         andl    $0xfffffffe, %edx       /* j=(k+1)&0xfffffffe */
 189         cvtsi2sdl %edx, %xmm4           /* DP j */
 190         movl    %r8d, %ecx              /* Load x */
 191         movsd   L(DP_PIO4HI)(%rip), %xmm2 /* -PIO4HI = high part of -Pi/4 */
 192         shrl    $29, %ecx               /* (sign of x) << 2 */
 193         mulsd   %xmm4, %xmm2            /* -j*PIO4HI */
 194         movsd   L(DP_PIO4LO)(%rip), %xmm3 /* -PIO4LO = low part of -Pi/4 */
 195         addsd   %xmm2, %xmm0            /* |x| - j*PIO4HI */
 196         mulsd   %xmm3, %xmm4            /* j*PIO4LO */
 197         addsd   %xmm4, %xmm0            /* t = |x| - j*PIO4HI - j*PIO4LO */
 198         jmp     L(reconstruction)
 199
 200         .p2align        4
 201 L(very_large_args):
 202         /* Here if finite |x|>=2^23 */
 203
 204         /* bitpos = (ix>>23) - BIAS_32 + 59; */
 205         shrl    $23, %eax               /* eb = biased exponent of x */
 206         subl    $68, %eax               /* bitpos=eb-0x7f+59, where 0x7f */
 207                                                         /*is exponent bias */
 208         movl    $28, %ecx               /* %cl=28 */
 209         movl    %eax, %edx              /* bitpos copy */
 210
 211         /* j = bitpos/28; */
 212         div     %cl                     /* j in register %al=%ax/%cl */
 213         movapd  %xmm0, %xmm3            /* |x| */
 214         andl    $0xff, %eax             /* clear unneeded remainder from %ah*/
 215
 216         imull   $28, %eax, %ecx         /* j*28 */
 217         lea     L(_FPI)(%rip), %r9
 218         movsd   L(DP_HI_MASK)(%rip), %xmm4 /* DP_HI_MASK */
 219         movapd  %xmm0, %xmm5            /* |x| */
 220         mulsd   -16(%r9,%rax,8), %xmm3  /* tmp3 = FPI[j-2]*|x| */
 221         movapd  %xmm0, %xmm1            /* |x| */
 222         mulsd   -8(%r9,%rax,8), %xmm5   /* tmp2 = FPI[j-1]*|x| */
 223         mulsd   (%r9,%rax,8), %xmm0     /* tmp0 = FPI[j]*|x| */
 224         addl    $19, %ecx               /* j*28+19 */
 225         mulsd   8(%r9,%rax,8), %xmm1    /* tmp1 = FPI[j+1]*|x| */
 226         cmpl    %ecx, %edx              /* bitpos>=j*28+19 ? */
 227         jl      L(very_large_skip1)
 228
 229         /* Here if bitpos>=j*28+19 */
 230         andpd   %xmm3, %xmm4            /* HI(tmp3) */
 231         subsd   %xmm4, %xmm3            /* tmp3 = tmp3 - HI(tmp3) */
 232 L(very_large_skip1):
 233
 234         movsd   L(DP_2POW52)(%rip), %xmm6
 235         movapd  %xmm5, %xmm2            /* tmp2 copy */
 236         addsd   %xmm3, %xmm5            /* tmp5 = tmp3 + tmp2 */
 237         movl    $1, %edx
 238         addsd   %xmm5, %xmm6            /* tmp6 = tmp5 + 2^52 */
 239         movsd   8+L(DP_2POW52)(%rip), %xmm4
 240         movd    %xmm6, %eax             /* k = I64_LO(tmp6); */
 241         addsd   %xmm6, %xmm4            /* tmp4 = tmp6 - 2^52 */
 242         movl    %r8d, %ecx              /* Load x */
 243         comisd  %xmm5, %xmm4            /* tmp4 > tmp5 ? */
 244         jbe     L(very_large_skip2)
 245
 246         /* Here if tmp4 > tmp5 */
 247         subl    $1, %eax                /* k-- */
 248         addsd   8+L(DP_ONES)(%rip), %xmm4 /* tmp4 -= 1.0 */
 249 L(very_large_skip2):
 250
 251         andl    %eax, %edx              /* k&1 */
 252         lea     L(DP_ZERONE)(%rip), %r9
 253         subsd   %xmm4, %xmm3            /* tmp3 -= tmp4 */
 254         addsd   (%r9,%rdx,8), %xmm3     /* t  = DP_ZERONE[k&1] + tmp3 */
 255         addsd   %xmm2, %xmm3            /* t += tmp2 */
 256         shrl    $29, %ecx               /* (sign of x) << 2 */
 257         addsd   %xmm3, %xmm0            /* t += tmp0 */
 258         addl    $1, %eax                /* n=k+1 */
 259         addsd   %xmm1, %xmm0            /* t += tmp1 */
 260         mulsd   L(DP_PIO4)(%rip), %xmm0 /* t *= PI04 */
 261
 262         jmp     L(reconstruction)       /* end of very_large_args peth */
 263
 264         .p2align        4
 265 L(arg_less_pio4):
 266         /* Here if |x|<Pi/4 */
 267         cmpl    $0x3d000000, %eax       /* |x|<2^-5 ? */
 268         jl      L(arg_less_2pn5)
 269
 270         /* Here if 2^-5<=|x|<Pi/4 */
 271         movaps  %xmm0, %xmm3            /* DP x */
 272         movhpd  L(DP_ONES)(%rip), %xmm3 /* DP 1|x */
 273         mulsd   %xmm0, %xmm0            /* DP y=x^2 */
 274         unpcklpd %xmm0, %xmm0           /* DP y|y */
 275         movaps  %xmm0, %xmm1            /* y|y */
 276         mulpd   %xmm0, %xmm0            /* z=x^4|z=x^4 */
 277
 278         movapd  L(DP_SC4)(%rip), %xmm4  /* S4 */
 279         mulpd   %xmm0, %xmm4            /* z*S4 */
 280         movapd  L(DP_SC3)(%rip), %xmm5  /* S3 */
 281         mulpd   %xmm0, %xmm5            /* z*S3 */
 282         addpd   L(DP_SC2)(%rip), %xmm4  /* S2+z*S4 */
 283         mulpd   %xmm0, %xmm4            /* z*(S2+z*S4) */
 284         addpd   L(DP_SC1)(%rip), %xmm5  /* S1+z*S3 */
 285         mulpd   %xmm0, %xmm5            /* z*(S1+z*S3) */
 286         addpd   L(DP_SC0)(%rip), %xmm4  /* S0+z*(S2+z*S4) */
 287         mulpd   %xmm1, %xmm4            /* y*(S0+z*(S2+z*S4)) */
 288         mulpd   %xmm3, %xmm5            /* x*z*(S1+z*S3) */
 289         mulpd   %xmm3, %xmm4            /* x*y*(S0+z*(S2+z*S4)) */
 290         addpd   %xmm5, %xmm4            /*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
 291         addpd   %xmm4, %xmm3            /*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
 292         cvtpd2ps %xmm3, %xmm0           /* SP results */
 293         movss   %xmm0, (ARG_SIN_PTR)    /* store sin(x) from xmm0[0] */
 294         shufps  $1, %xmm0, %xmm0        /* move cos(x) to xmm0[0] */
 295         movss   %xmm0, (ARG_COS_PTR)    /* store cos(x) */
 296         ret
 297
 298         .p2align        4
 299 L(arg_less_2pn5):
 300         /* Here if |x|<2^-5 */
 301         cmpl    $0x32000000, %eax       /* |x|<2^-27 ? */
 302         jl      L(arg_less_2pn27)
 303
 304         /* Here if 2^-27<=|x|<2^-5 */
 305         movaps  %xmm0, %xmm1            /* DP x */
 306         movhpd  L(DP_ONES)(%rip), %xmm1 /* DP 1|x */
 307         mulsd   %xmm0, %xmm0            /* DP x^2 */
 308         unpcklpd %xmm0, %xmm0           /* DP x^2|x^2 */
 309
 310         movaps  L(DP_SINCOS2_1)(%rip), %xmm3 /* DP DP_SIN2_1 */
 311         mulpd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_1 */
 312         addpd   L(DP_SINCOS2_0)(%rip), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */
 313         mulpd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
 314         mulpd   %xmm1, %xmm3            /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 315         addpd   %xmm1, %xmm3            /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 316         cvtpd2ps %xmm3, %xmm0           /* SP results */
 317         movss   %xmm0, (ARG_SIN_PTR)    /* store sin(x) from xmm0[0] */
 318         shufps  $1, %xmm0, %xmm0        /* move cos(x) to xmm0[0] */
 319         movss   %xmm0, (ARG_COS_PTR)    /* store cos(x) */
 320         ret
 321
 322         .p2align        4
 323 L(arg_less_2pn27):
 324         cmpl    $0, %eax                /* x=0 ? */
 325         je      L(arg_zero)             /* in case x=0 return sin(+-0)==+-0 */
 326         /* Here if |x|<2^-27 */
 327         /*
 328          * Special cases here:
 329          *  sin(subnormal) raises inexact/underflow
 330          *  sin(min_normalized) raises inexact/underflow
 331          *  sin(normalized) raises inexact
 332          *  cos(here)=1-|x| (raising inexact)
 333          */
 334         movaps  %xmm0, %xmm3            /* DP x */
 335         mulsd   L(DP_SMALL)(%rip), %xmm0/* DP x*DP_SMALL */
 336         subsd   %xmm0, %xmm3            /* DP sin result is x-x*DP_SMALL */
 337         andps   L(SP_ABS_MASK)(%rip), %xmm7/* SP |x| */
 338         cvtsd2ss %xmm3, %xmm0           /* sin(x) */
 339         movss   L(SP_ONE)(%rip), %xmm1  /* SP 1.0 */
 340         movss   %xmm0, (ARG_SIN_PTR)    /* sin(x) store */
 341         subss   %xmm7, %xmm1            /* cos(x) */
 342         movss   %xmm1, (ARG_COS_PTR)    /* cos(x) store */
 343         ret
 344
 345         .p2align        4
 346 L(arg_zero):
 347         movss   L(SP_ONE)(%rip), %xmm0  /* 1.0 */
 348         movss   %xmm7, (ARG_SIN_PTR)    /* sin(+-0)==x */
 349         movss   %xmm0, (ARG_COS_PTR)    /* cos(+-0)==1 */
 350         ret
 351
 352         .p2align        4
 353 L(arg_inf_or_nan):
 354         /* Here if |x| is Inf or NAN */
 355         jne     L(skip_errno_setting)   /* in case of x is NaN */
 356
 357         /* Here if x is Inf. Set errno to EDOM.  */
 358         call    JUMPTARGET(__errno_location)
 359         movl    $EDOM, (%rax)
 360
 361         .p2align        4
 362 L(skip_errno_setting):
 363         /* Here if |x| is Inf or NAN. Continued. */
 364         subss   %xmm7, %xmm7            /* x-x, result is NaN */
 365         movss   %xmm7, (ARG_SIN_PTR)
 366         movss   %xmm7, (ARG_COS_PTR)
 367         ret
 368 END(__sincosf)
 369
 370         .section .rodata, "a"
 371         .p2align 3
 372 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
 373         .long   0x00000000,0x00000000
 374         .long   0x54442d18,0x3fe921fb
 375         .long   0x54442d18,0x3ff921fb
 376         .long   0x7f3321d2,0x4002d97c
 377         .long   0x54442d18,0x400921fb
 378         .long   0x2955385e,0x400f6a7a
 379         .long   0x7f3321d2,0x4012d97c
 380         .long   0xe9bba775,0x4015fdbb
 381         .long   0x54442d18,0x401921fb
 382         .long   0xbeccb2bb,0x401c463a
 383         .long   0x2955385e,0x401f6a7a
 384         .type L(PIO4J), @object
 385         ASM_SIZE_DIRECTIVE(L(PIO4J))
 386
 387         .p2align 3
 388 L(_FPI): /* 4/Pi broken into sum of positive DP values */
 389         .long   0x00000000,0x00000000
 390         .long   0x6c000000,0x3ff45f30
 391         .long   0x2a000000,0x3e3c9c88
 392         .long   0xa8000000,0x3c54fe13
 393         .long   0xd0000000,0x3aaf47d4
 394         .long   0x6c000000,0x38fbb81b
 395         .long   0xe0000000,0x3714acc9
 396         .long   0x7c000000,0x3560e410
 397         .long   0x56000000,0x33bca2c7
 398         .long   0xac000000,0x31fbd778
 399         .long   0xe0000000,0x300b7246
 400         .long   0xe8000000,0x2e5d2126
 401         .long   0x48000000,0x2c970032
 402         .long   0xe8000000,0x2ad77504
 403         .long   0xe0000000,0x290921cf
 404         .long   0xb0000000,0x274deb1c
 405         .long   0xe0000000,0x25829a73
 406         .long   0xbe000000,0x23fd1046
 407         .long   0x10000000,0x2224baed
 408         .long   0x8e000000,0x20709d33
 409         .long   0x80000000,0x1e535a2f
 410         .long   0x64000000,0x1cef904e
 411         .long   0x30000000,0x1b0d6398
 412         .long   0x24000000,0x1964ce7d
 413         .long   0x16000000,0x17b908bf
 414         .type L(_FPI), @object
 415         ASM_SIZE_DIRECTIVE(L(_FPI))
 416
 417 /* Coefficients of polynomials for */
 418 /* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low  DP part, */
 419 /* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */
 420 /* for |x|<2^-5. */
 421         .p2align 4
 422 L(DP_SINCOS2_0):
 423         .long   0x5543d49d,0xbfc55555
 424         .long   0xff5cc6fd,0xbfdfffff
 425         .type L(DP_SINCOS2_0), @object
 426         ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0))
 427
 428         .p2align 4
 429 L(DP_SINCOS2_1):
 430         .long   0x75cec8c5,0x3f8110f4
 431         .long   0xb178dac5,0x3fa55514
 432         .type L(DP_SINCOS2_1), @object
 433         ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1))
 434
 435
 436         .p2align 3
 437 L(DP_ZERONE):
 438         .long   0x00000000,0x00000000   /* 0.0 */
 439         .long   0x00000000,0xbff00000   /* 1.0 */
 440         .type L(DP_ZERONE), @object
 441         ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
 442
 443         .p2align 3
 444 L(DP_ONES):
 445         .long   0x00000000,0x3ff00000   /* +1.0 */
 446         .long   0x00000000,0xbff00000   /* -1.0 */
 447         .type L(DP_ONES), @object
 448         ASM_SIZE_DIRECTIVE(L(DP_ONES))
 449
 450 /* Coefficients of polynomials for */
 451 /* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low  DP part, */
 452 /* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */
 453 /* for |t|<Pi/4. */
 454         .p2align 4
 455 L(DP_SC4):
 456         .long   0x1674b58a,0xbe5a947e
 457         .long   0xdd8844d7,0xbe923c97
 458         .type L(DP_SC4), @object
 459         ASM_SIZE_DIRECTIVE(L(DP_SC4))
 460
 461         .p2align 4
 462 L(DP_SC3):
 463         .long   0x64e6b5b4,0x3ec71d72
 464         .long   0x9ac43cc0,0x3efa00eb
 465         .type L(DP_SC3), @object
 466         ASM_SIZE_DIRECTIVE(L(DP_SC3))
 467
 468         .p2align 4
 469 L(DP_SC2):
 470         .long   0x8b4bd1f9,0xbf2a019f
 471         .long   0x348b6874,0xbf56c16b
 472         .type L(DP_SC2), @object
 473         ASM_SIZE_DIRECTIVE(L(DP_SC2))
 474
 475         .p2align 4
 476 L(DP_SC1):
 477         .long   0x10c2688b,0x3f811111
 478         .long   0x545c50c7,0x3fa55555
 479         .type L(DP_SC1), @object
 480         ASM_SIZE_DIRECTIVE(L(DP_SC1))
 481
 482         .p2align 4
 483 L(DP_SC0):
 484         .long   0x55551cd9,0xbfc55555
 485         .long   0xfffe98ae,0xbfdfffff
 486         .type L(DP_SC0), @object
 487         ASM_SIZE_DIRECTIVE(L(DP_SC0))
 488
 489         .p2align 3
 490 L(DP_SMALL):
 491         .long   0x00000000,0x3cd00000   /* 2^(-50) */
 492         .type L(DP_SMALL), @object
 493         ASM_SIZE_DIRECTIVE(L(DP_SMALL))
 494
 495         .p2align 3
 496 L(DP_PIO4):
 497         .long   0x54442d18,0x3fe921fb   /* Pi/4 */
 498         .type L(DP_PIO4), @object
 499         ASM_SIZE_DIRECTIVE(L(DP_PIO4))
 500
 501         .p2align 3
 502 L(DP_2POW52):
 503         .long   0x00000000,0x43300000   /* +2^52 */
 504         .long   0x00000000,0xc3300000   /* -2^52 */
 505         .type L(DP_2POW52), @object
 506         ASM_SIZE_DIRECTIVE(L(DP_2POW52))
 507
 508         .p2align 3
 509 L(DP_INVPIO4):
 510         .long   0x6dc9c883,0x3ff45f30   /* 4/Pi */
 511         .type L(DP_INVPIO4), @object
 512         ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
 513
 514         .p2align 3
 515 L(DP_PIO4HI):
 516         .long   0x54000000,0xbfe921fb   /* High part of Pi/4 */
 517         .type L(DP_PIO4HI), @object
 518         ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
 519
 520         .p2align 3
 521 L(DP_PIO4LO):
 522         .long   0x11A62633,0xbe010b46   /* Low part of Pi/4 */
 523         .type L(DP_PIO4LO), @object
 524         ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
 525
 526         .p2align 2
 527 L(SP_INVPIO4):
 528         .long   0x3fa2f983              /* 4/Pi */
 529         .type L(SP_INVPIO4), @object
 530         ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
 531
 532         .p2align 4
 533 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 534         .long   0xffffffff,0x7fffffff
 535         .long   0xffffffff,0x7fffffff
 536         .type L(DP_ABS_MASK), @object
 537         ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
 538
 539         .p2align 3
 540 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 541         .long   0x00000000,0xffffffff
 542         .type L(DP_HI_MASK), @object
 543         ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 544
 545         .p2align 4
 546 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
 547         .long   0x7fffffff,0x7fffffff
 548         .long   0x7fffffff,0x7fffffff
 549         .type L(SP_ABS_MASK), @object
 550         ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
 551
 552         .p2align 2
 553 L(SP_ONE):
 554         .long   0x3f800000              /* 1.0 */
 555         .type L(SP_ONE), @object
 556         ASM_SIZE_DIRECTIVE(L(SP_ONE))
 557
 558 weak_alias(__sincosf, sincosf)