sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S

   1 /* Optimized with sse2 version of sincosf
   2    Copyright (C) 2012-2014 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <sysdep.h>
  20 #define __need_Emath
  21 #include <bits/errno.h>
  22
  23 /* Short algorithm description:
  24  *
  25  *  1) if |x|==0:    sin(x)=x,
  26  *                   cos(x)=1.
  27  *  2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed,
  28  *                   cos(x)=1-|x|.
  29  *  3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1,
  30  *                   cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1
  31  *  4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))),
  32  *                   cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
  33  *  5) if |x| < 9*Pi/4:
  34  *      5.1) Range reduction:
  35  *          k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4.
  36  *      5.2) Reconstruction:
  37  *          sign_sin = sign(x) * (-1.0)^(( n   >>2)&1)
  38  *          sign_cos =           (-1.0)^(((n+2)>>2)&1)
  39  *          poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t
  40  *          poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s
  41  *          if(n&2 != 0) {
  42  *              using cos(t) and sin(t) polynomials for |t|<Pi/4, results are
  43  *              cos(x) = poly_sin * sign_cos
  44  *              sin(x) = poly_cos * sign_sin
  45  *          } else {
  46  *              sin(x) = poly_sin * sign_sin
  47  *              cos(x) = poly_cos * sign_cos
  48  *          }
  49  *  6) if |x| < 2^23, large args:
  50  *      6.1) Range reduction:
  51  *          k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4
  52  *      6.2) Reconstruction same as (5.2).
  53  *  7) if |x| >= 2^23, very large args:
  54  *      7.1) Range reduction:
  55  *          k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4.
  56  *      7.2) Reconstruction same as (5.2).
  57  *  8) if x is Inf, return x-x, and set errno=EDOM.
  58  *  9) if x is NaN, return x-x.
  59  *
  60  * Special cases:
  61  *  sin/cos(+-0) = +-0/1 not raising inexact/underflow,
  62  *  sin/cos(subnormal) raises inexact/underflow,
  63  *  sin/cos(min_normalized) raises inexact/underflow,
  64  *  sin/cos(normalized) raises inexact,
  65  *  sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM,
  66  *  sin/cos(NaN) = NaN.
  67  */
  68
  69 #ifdef  PIC
  70 # define MO1(symbol)                    L(symbol)##@GOTOFF(%ebx)
  71 # define MO2(symbol,reg2,_scale)        L(symbol)##@GOTOFF(%ebx,reg2,_scale)
  72 # define CFI_PUSH(REG)  cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
  73 # define CFI_POP(REG)   cfi_adjust_cfa_offset(-4); cfi_restore(REG)
  74 # define PUSH(REG)                      pushl REG; CFI_PUSH(REG)
  75 # define POP(REG)                       popl REG; CFI_POP(REG)
  76 # define ENTRANCE                       PUSH(%ebx); LOAD_PIC_REG(bx)
  77 # define RETURN                         POP(%ebx); ret; CFI_PUSH(%ebx)
  78 # define ARG_X                          8(%esp)
  79 # define ARG_SIN_PTR                    12(%esp)
  80 # define ARG_COS_PTR                    16(%esp)
  81 #else
  82 # define MO1(symbol)                    L(symbol)
  83 # define MO2(symbol,reg2,_scale)        L(symbol)(,reg2,_scale)
  84 # define ENTRANCE
  85 # define RETURN                         ret
  86 # define ARG_X                          4(%esp)
  87 # define ARG_SIN_PTR                    8(%esp)
  88 # define ARG_COS_PTR                    12(%esp)
  89 #endif
  90
  91         .text
  92 ENTRY(__sincosf_sse2)
  93         /* Input: single precision x on stack at address ARG_X */
  94         /*        pointer to sin result on stack at address ARG_SIN_PTR */
  95         /*        pointer to cos result on stack at address ARG_COS_PTR */
  96
  97         ENTRANCE
  98         movl    ARG_X, %eax             /* Bits of x */
  99         cvtss2sd ARG_X, %xmm0           /* DP x */
 100         andl    $0x7fffffff, %eax       /* |x| */
 101
 102         cmpl    $0x3f490fdb, %eax       /* |x|<Pi/4 ? */
 103         jb      L(arg_less_pio4)
 104
 105         /* Here if |x|>=Pi/4 */
 106         movd    %eax, %xmm3             /* SP |x| */
 107         andpd   MO1(DP_ABS_MASK),%xmm0  /* DP |x| */
 108         movss   MO1(SP_INVPIO4), %xmm2  /* SP 1/(Pi/4) */
 109
 110         cmpl    $0x40e231d6, %eax       /* |x|<9*Pi/4 ? */
 111         jae     L(large_args)
 112
 113         /* Here if Pi/4<=|x|<9*Pi/4 */
 114         mulss   %xmm3, %xmm2            /* SP |x|/(Pi/4) */
 115         movl    ARG_X, %ecx             /* Load x */
 116         cvttss2si %xmm2, %eax           /* k, number of Pi/4 in x */
 117         shrl    $29, %ecx               /* (sign of x) << 2 */
 118         addl    $1, %eax                /* k+1 */
 119         movl    $0x0e, %edx
 120         andl    %eax, %edx              /* j = (k+1)&0x0e */
 121         subsd   MO2(PIO4J,%edx,8), %xmm0/* t = |x| - j * Pi/4 */
 122
 123 L(reconstruction):
 124         /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
 125
 126         movaps  %xmm0, %xmm4            /* t */
 127         movhpd  MO1(DP_ONES), %xmm4     /* 1|t */
 128         mulsd   %xmm0, %xmm0            /* y=t^2 */
 129         movl    $2, %edx
 130         unpcklpd %xmm0, %xmm0           /* y|y */
 131         addl    %eax, %edx              /* k+2 */
 132         movaps  %xmm0, %xmm1            /* y|y */
 133         mulpd   %xmm0, %xmm0            /* z=t^4|z=t^4 */
 134
 135         movaps  MO1(DP_SC4), %xmm2      /* S4 */
 136         mulpd   %xmm0, %xmm2            /* z*S4 */
 137         movaps  MO1(DP_SC3), %xmm3      /* S3 */
 138         mulpd   %xmm0, %xmm3            /* z*S3 */
 139         xorl    %eax, %ecx              /* (sign_x ^ (k>>2))<<2 */
 140         addpd   MO1(DP_SC2), %xmm2      /* S2+z*S4 */
 141         mulpd   %xmm0, %xmm2            /* z*(S2+z*S4) */
 142         shrl    $2, %edx                /* (k+2)>>2 */
 143         addpd   MO1(DP_SC1), %xmm3      /* S1+z*S3 */
 144         mulpd   %xmm0, %xmm3            /* z*(S1+z*S3) */
 145         shrl    $2, %ecx                /* sign_x ^ k>>2 */
 146         addpd   MO1(DP_SC0), %xmm2      /* S0+z*(S2+z*S4) */
 147         andl    $1, %edx                /* sign_cos = ((k+2)>>2)&1 */
 148         mulpd   %xmm1, %xmm2            /* y*(S0+z*(S2+z*S4)) */
 149         andl    $1, %ecx                /* sign_sin = sign_x ^ ((k>>2)&1) */
 150         addpd   %xmm2, %xmm3            /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 151         mulpd   %xmm4, %xmm3            /*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
 152         testl   $2, %eax                /* n&2 != 0 ? */
 153         addpd   %xmm4, %xmm3            /*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
 154         jnz     L(sin_result_sin_poly)
 155
 156 /*L(sin_result_cos_poly):*/
 157         /*
 158          * Here if
 159          * cos(x) = poly_sin * sign_cos
 160          * sin(x) = poly_cos * sign_sin
 161          */
 162         movsd   MO2(DP_ONES,%ecx,8), %xmm4/* 0|sign_sin */
 163         movhpd  MO2(DP_ONES,%edx,8), %xmm4/* sign_cos|sign_sin */
 164         mulpd   %xmm4, %xmm3            /* result_cos|result_sin */
 165         movl    ARG_SIN_PTR, %eax
 166         cvtpd2ps %xmm3, %xmm0           /* SP results */
 167         movl    ARG_COS_PTR, %ecx
 168         movss   %xmm0, (%eax)           /* store sin(x) from xmm0[0] */
 169         shufps  $1, %xmm0, %xmm0        /* move cos(x) to xmm0[0] */
 170         movss   %xmm0, (%ecx)           /* store cos(x) */
 171         RETURN
 172
 173         .p2align        4
 174 L(sin_result_sin_poly):
 175         /*
 176          * Here if
 177          * sin(x) = poly_sin * sign_sin
 178          * cos(x) = poly_cos * sign_cos
 179          */
 180         movsd   MO2(DP_ONES,%edx,8), %xmm4/* 0|sign_cos */
 181         movhpd  MO2(DP_ONES,%ecx,8), %xmm4/* sign_sin|sign_cos */
 182         mulpd   %xmm4, %xmm3            /* result_sin|result_cos */
 183         movl    ARG_SIN_PTR, %eax
 184         cvtpd2ps %xmm3, %xmm0           /* SP results */
 185         movl    ARG_COS_PTR, %ecx
 186         movss   %xmm0, (%ecx)           /* store cos(x) from xmm0[0] */
 187         shufps  $1, %xmm0, %xmm0        /* move sin(x) to xmm0[0] */
 188         movss   %xmm0, (%eax)           /* store sin(x) */
 189         RETURN
 190
 191         .p2align        4
 192 L(large_args):
 193         /* Here if |x|>=9*Pi/4 */
 194         cmpl    $0x7f800000, %eax       /* x is Inf or NaN ? */
 195         jae     L(arg_inf_or_nan)
 196
 197         /* Here if finite |x|>=9*Pi/4 */
 198         cmpl    $0x4b000000, %eax       /* |x|<2^23 ? */
 199         jae     L(very_large_args)
 200
 201         /* Here if 9*Pi/4<=|x|<2^23 */
 202         movsd   MO1(DP_INVPIO4), %xmm1  /* 1/(Pi/4) */
 203         mulsd   %xmm0, %xmm1            /* |x|/(Pi/4) */
 204         cvttsd2si %xmm1, %eax           /* k=trunc(|x|/(Pi/4)) */
 205         addl    $1, %eax                /* k+1 */
 206         movl    %eax, %edx
 207         andl    $0xfffffffe, %edx       /* j=(k+1)&0xfffffffe */
 208         cvtsi2sdl %edx, %xmm4           /* DP j */
 209         movl    ARG_X, %ecx             /* Load x */
 210         movsd   MO1(DP_PIO4HI), %xmm2   /* -PIO4HI = high part of -Pi/4 */
 211         shrl    $29, %ecx               /* (sign of x) << 2 */
 212         mulsd   %xmm4, %xmm2            /* -j*PIO4HI */
 213         movsd   MO1(DP_PIO4LO), %xmm3   /* -PIO4LO = low part of -Pi/4 */
 214         addsd   %xmm2, %xmm0            /* |x| - j*PIO4HI */
 215         mulsd   %xmm3, %xmm4            /* j*PIO4LO */
 216         addsd   %xmm4, %xmm0            /* t = |x| - j*PIO4HI - j*PIO4LO */
 217         jmp     L(reconstruction)
 218
 219         .p2align        4
 220 L(very_large_args):
 221         /* Here if finite |x|>=2^23 */
 222
 223         /* bitpos = (ix>>23) - BIAS_32 + 59; */
 224         shrl    $23, %eax               /* eb = biased exponent of x */
 225         subl    $68, %eax               /* bitpos=eb-0x7f+59, where 0x7f */
 226                                                         /*is exponent bias */
 227         movl    $28, %ecx               /* %cl=28 */
 228         movl    %eax, %edx              /* bitpos copy */
 229
 230         /* j = bitpos/28; */
 231         div     %cl                     /* j in register %al=%ax/%cl */
 232         movapd  %xmm0, %xmm3            /* |x| */
 233         andl    $0xff, %eax             /* clear unneeded remainder from %ah*/
 234
 235         imull   $28, %eax, %ecx         /* j*28 */
 236         movsd   MO1(DP_HI_MASK), %xmm4  /* DP_HI_MASK */
 237         movapd  %xmm0, %xmm5            /* |x| */
 238         mulsd   -2*8+MO2(_FPI,%eax,8), %xmm3/* tmp3 = FPI[j-2]*|x| */
 239         movapd  %xmm0, %xmm1            /* |x| */
 240         mulsd   -1*8+MO2(_FPI,%eax,8), %xmm5/* tmp2 = FPI[j-1]*|x| */
 241         mulsd   0*8+MO2(_FPI,%eax,8), %xmm0/* tmp0 = FPI[j]*|x| */
 242         addl    $19, %ecx               /* j*28+19 */
 243         mulsd   1*8+MO2(_FPI,%eax,8), %xmm1/* tmp1 = FPI[j+1]*|x| */
 244         cmpl    %ecx, %edx              /* bitpos>=j*28+19 ? */
 245         jl      L(very_large_skip1)
 246
 247         /* Here if bitpos>=j*28+19 */
 248         andpd   %xmm3, %xmm4            /* HI(tmp3) */
 249         subsd   %xmm4, %xmm3            /* tmp3 = tmp3 - HI(tmp3) */
 250 L(very_large_skip1):
 251
 252         movsd   MO1(DP_2POW52), %xmm6
 253         movapd  %xmm5, %xmm2            /* tmp2 copy */
 254         addsd   %xmm3, %xmm5            /* tmp5 = tmp3 + tmp2 */
 255         movl    $1, %edx
 256         addsd   %xmm5, %xmm6            /* tmp6 = tmp5 + 2^52 */
 257         movsd   8+MO1(DP_2POW52), %xmm4
 258         movd    %xmm6, %eax             /* k = I64_LO(tmp6); */
 259         addsd   %xmm6, %xmm4            /* tmp4 = tmp6 - 2^52 */
 260         movl    ARG_X, %ecx             /* Load x */
 261         comisd  %xmm5, %xmm4            /* tmp4 > tmp5 ? */
 262         jbe     L(very_large_skip2)
 263
 264         /* Here if tmp4 > tmp5 */
 265         subl    $1, %eax                /* k-- */
 266         addsd   8+MO1(DP_ONES), %xmm4   /* tmp4 -= 1.0 */
 267 L(very_large_skip2):
 268
 269         andl    %eax, %edx              /* k&1 */
 270         subsd   %xmm4, %xmm3            /* tmp3 -= tmp4 */
 271         addsd   MO2(DP_ZERONE,%edx,8), %xmm3/* t  = DP_ZERONE[k&1] + tmp3 */
 272         addsd   %xmm2, %xmm3            /* t += tmp2 */
 273         shrl    $29, %ecx               /* (sign of x) << 2 */
 274         addsd   %xmm3, %xmm0            /* t += tmp0 */
 275         addl    $1, %eax                /* n=k+1 */
 276         addsd   %xmm1, %xmm0            /* t += tmp1 */
 277         mulsd   MO1(DP_PIO4), %xmm0     /* t *= PI04 */
 278
 279         jmp     L(reconstruction)       /* end of very_large_args peth */
 280
 281         .p2align        4
 282 L(arg_less_pio4):
 283         /* Here if |x|<Pi/4 */
 284         cmpl    $0x3d000000, %eax       /* |x|<2^-5 ? */
 285         jl      L(arg_less_2pn5)
 286
 287         /* Here if 2^-5<=|x|<Pi/4 */
 288         movaps  %xmm0, %xmm3            /* DP x */
 289         movhpd  MO1(DP_ONES), %xmm3     /* DP 1|x */
 290         mulsd   %xmm0, %xmm0            /* DP y=x^2 */
 291         unpcklpd %xmm0, %xmm0           /* DP y|y */
 292         movaps  %xmm0, %xmm1            /* y|y */
 293         mulpd   %xmm0, %xmm0            /* z=x^4|z=x^4 */
 294
 295         movapd  MO1(DP_SC4), %xmm4      /* S4 */
 296         mulpd   %xmm0, %xmm4            /* z*S4 */
 297         movapd  MO1(DP_SC3), %xmm5      /* S3 */
 298         mulpd   %xmm0, %xmm5            /* z*S3 */
 299         addpd   MO1(DP_SC2), %xmm4      /* S2+z*S4 */
 300         mulpd   %xmm0, %xmm4            /* z*(S2+z*S4) */
 301         addpd   MO1(DP_SC1), %xmm5      /* S1+z*S3 */
 302         mulpd   %xmm0, %xmm5            /* z*(S1+z*S3) */
 303         addpd   MO1(DP_SC0), %xmm4      /* S0+z*(S2+z*S4) */
 304         mulpd   %xmm1, %xmm4            /* y*(S0+z*(S2+z*S4)) */
 305         mulpd   %xmm3, %xmm5            /* x*z*(S1+z*S3) */
 306         mulpd   %xmm3, %xmm4            /* x*y*(S0+z*(S2+z*S4)) */
 307         addpd   %xmm5, %xmm4            /*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
 308         movl    ARG_SIN_PTR, %eax
 309         addpd   %xmm4, %xmm3            /*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
 310         movl    ARG_COS_PTR, %ecx
 311         cvtpd2ps %xmm3, %xmm0           /* SP results */
 312         movss   %xmm0, (%eax)           /* store sin(x) from xmm0[0] */
 313         shufps  $1, %xmm0, %xmm0        /* move cos(x) to xmm0[0] */
 314         movss   %xmm0, (%ecx)           /* store cos(x) */
 315         RETURN
 316
 317         .p2align        4
 318 L(arg_less_2pn5):
 319         /* Here if |x|<2^-5 */
 320         cmpl    $0x32000000, %eax       /* |x|<2^-27 ? */
 321         jl      L(arg_less_2pn27)
 322
 323         /* Here if 2^-27<=|x|<2^-5 */
 324         movaps  %xmm0, %xmm1            /* DP x */
 325         movhpd  MO1(DP_ONES), %xmm1     /* DP 1|x */
 326         mulsd   %xmm0, %xmm0            /* DP x^2 */
 327         unpcklpd %xmm0, %xmm0           /* DP x^2|x^2 */
 328
 329         movaps  MO1(DP_SINCOS2_1), %xmm3/* DP DP_SIN2_1 */
 330         mulpd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_1 */
 331         addpd   MO1(DP_SINCOS2_0), %xmm3/* DP DP_SIN2_0+x^2*DP_SIN2_1 */
 332         mulpd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
 333         mulpd   %xmm1, %xmm3            /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 334         addpd   %xmm1, %xmm3            /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 335         movl    ARG_SIN_PTR, %eax
 336         cvtpd2ps %xmm3, %xmm0           /* SP results */
 337         movl    ARG_COS_PTR, %ecx
 338         movss   %xmm0, (%eax)           /* store sin(x) from xmm0[0] */
 339         shufps  $1, %xmm0, %xmm0        /* move cos(x) to xmm0[0] */
 340         movss   %xmm0, (%ecx)           /* store cos(x) */
 341         RETURN
 342
 343         .p2align        4
 344 L(arg_less_2pn27):
 345         movss   ARG_X, %xmm7            /* SP x */
 346         cmpl    $0, %eax                /* x=0 ? */
 347         je      L(arg_zero)             /* in case x=0 return sin(+-0)==+-0 */
 348         /* Here if |x|<2^-27 */
 349         /*
 350          * Special cases here:
 351          *  sin(subnormal) raises inexact/underflow
 352          *  sin(min_normalized) raises inexact/underflow
 353          *  sin(normalized) raises inexact
 354          *  cos(here)=1-|x| (raising inexact)
 355          */
 356         movaps  %xmm0, %xmm3            /* DP x */
 357         mulsd   MO1(DP_SMALL), %xmm0    /* DP x*DP_SMALL */
 358         subsd   %xmm0, %xmm3            /* DP sin result is x-x*DP_SMALL */
 359         andps   MO1(SP_ABS_MASK), %xmm7 /* SP |x| */
 360         cvtsd2ss %xmm3, %xmm0           /* sin(x) */
 361         movl    ARG_SIN_PTR, %eax
 362         movss   MO1(SP_ONE), %xmm1      /* SP 1.0 */
 363         movss   %xmm0, (%eax)           /* sin(x) store */
 364         movl    ARG_COS_PTR, %ecx
 365         subss   %xmm7, %xmm1            /* cos(x) */
 366         movss   %xmm1, (%ecx)           /* cos(x) store */
 367         RETURN
 368
 369         .p2align        4
 370 L(arg_zero):
 371         movss   MO1(SP_ONE), %xmm0      /* 1.0 */
 372         movl    ARG_SIN_PTR, %eax
 373         movl    ARG_COS_PTR, %ecx
 374         movss   %xmm7, (%eax)           /* sin(+-0)==x */
 375         movss   %xmm0, (%ecx)           /* cos(+-0)==1 */
 376         RETURN
 377
 378         .p2align        4
 379 L(arg_inf_or_nan):
 380         movss   ARG_X, %xmm7            /* SP x */
 381         /* Here if |x| is Inf or NAN */
 382         jne     L(skip_errno_setting)   /* in case of x is NaN */
 383
 384         /* Here if x is Inf. Set errno to EDOM.  */
 385         call    JUMPTARGET(__errno_location)
 386         movl    $EDOM, (%eax)
 387
 388         .p2align        4
 389 L(skip_errno_setting):
 390         /* Here if |x| is Inf or NAN. Continued. */
 391         subss   %xmm7, %xmm7            /* x-x, result is NaN */
 392         movl    ARG_SIN_PTR, %eax
 393         movl    ARG_COS_PTR, %ecx
 394         movss   %xmm7, (%eax)
 395         movss   %xmm7, (%ecx)
 396         RETURN
 397 END(__sincosf_sse2)
 398
 399         .section .rodata, "a"
 400         .p2align 3
 401 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
 402         .long   0x00000000,0x00000000
 403         .long   0x54442d18,0x3fe921fb
 404         .long   0x54442d18,0x3ff921fb
 405         .long   0x7f3321d2,0x4002d97c
 406         .long   0x54442d18,0x400921fb
 407         .long   0x2955385e,0x400f6a7a
 408         .long   0x7f3321d2,0x4012d97c
 409         .long   0xe9bba775,0x4015fdbb
 410         .long   0x54442d18,0x401921fb
 411         .long   0xbeccb2bb,0x401c463a
 412         .long   0x2955385e,0x401f6a7a
 413         .type L(PIO4J), @object
 414         ASM_SIZE_DIRECTIVE(L(PIO4J))
 415
 416         .p2align 3
 417 L(_FPI): /* 4/Pi broken into sum of positive DP values */
 418         .long   0x00000000,0x00000000
 419         .long   0x6c000000,0x3ff45f30
 420         .long   0x2a000000,0x3e3c9c88
 421         .long   0xa8000000,0x3c54fe13
 422         .long   0xd0000000,0x3aaf47d4
 423         .long   0x6c000000,0x38fbb81b
 424         .long   0xe0000000,0x3714acc9
 425         .long   0x7c000000,0x3560e410
 426         .long   0x56000000,0x33bca2c7
 427         .long   0xac000000,0x31fbd778
 428         .long   0xe0000000,0x300b7246
 429         .long   0xe8000000,0x2e5d2126
 430         .long   0x48000000,0x2c970032
 431         .long   0xe8000000,0x2ad77504
 432         .long   0xe0000000,0x290921cf
 433         .long   0xb0000000,0x274deb1c
 434         .long   0xe0000000,0x25829a73
 435         .long   0xbe000000,0x23fd1046
 436         .long   0x10000000,0x2224baed
 437         .long   0x8e000000,0x20709d33
 438         .long   0x80000000,0x1e535a2f
 439         .long   0x64000000,0x1cef904e
 440         .long   0x30000000,0x1b0d6398
 441         .long   0x24000000,0x1964ce7d
 442         .long   0x16000000,0x17b908bf
 443         .type L(_FPI), @object
 444         ASM_SIZE_DIRECTIVE(L(_FPI))
 445
 446 /* Coefficients of polynomials for */
 447 /* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low  DP part, */
 448 /* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */
 449 /* for |x|<2^-5. */
 450         .p2align 4
 451 L(DP_SINCOS2_0):
 452         .long   0x5543d49d,0xbfc55555
 453         .long   0xff5cc6fd,0xbfdfffff
 454         .type L(DP_SINCOS2_0), @object
 455         ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0))
 456
 457         .p2align 4
 458 L(DP_SINCOS2_1):
 459         .long   0x75cec8c5,0x3f8110f4
 460         .long   0xb178dac5,0x3fa55514
 461         .type L(DP_SINCOS2_1), @object
 462         ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1))
 463
 464         .p2align 3
 465 L(DP_ZERONE):
 466         .long   0x00000000,0x00000000   /* 0.0 */
 467         .long   0x00000000,0xbff00000   /* 1.0 */
 468         .type L(DP_ZERONE), @object
 469         ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
 470
 471         .p2align 3
 472 L(DP_ONES):
 473         .long   0x00000000,0x3ff00000   /* +1.0 */
 474         .long   0x00000000,0xbff00000   /* -1.0 */
 475         .type L(DP_ONES), @object
 476         ASM_SIZE_DIRECTIVE(L(DP_ONES))
 477
 478 /* Coefficients of polynomials for */
 479 /* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low  DP part, */
 480 /* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */
 481 /* for |t|<Pi/4. */
 482         .p2align 4
 483 L(DP_SC4):
 484         .long   0x1674b58a,0xbe5a947e
 485         .long   0xdd8844d7,0xbe923c97
 486         .type L(DP_SC4), @object
 487         ASM_SIZE_DIRECTIVE(L(DP_SC4))
 488
 489         .p2align 4
 490 L(DP_SC3):
 491         .long   0x64e6b5b4,0x3ec71d72
 492         .long   0x9ac43cc0,0x3efa00eb
 493         .type L(DP_SC3), @object
 494         ASM_SIZE_DIRECTIVE(L(DP_SC3))
 495
 496         .p2align 4
 497 L(DP_SC2):
 498         .long   0x8b4bd1f9,0xbf2a019f
 499         .long   0x348b6874,0xbf56c16b
 500         .type L(DP_SC2), @object
 501         ASM_SIZE_DIRECTIVE(L(DP_SC2))
 502
 503         .p2align 4
 504 L(DP_SC1):
 505         .long   0x10c2688b,0x3f811111
 506         .long   0x545c50c7,0x3fa55555
 507         .type L(DP_SC1), @object
 508         ASM_SIZE_DIRECTIVE(L(DP_SC1))
 509
 510         .p2align 4
 511 L(DP_SC0):
 512         .long   0x55551cd9,0xbfc55555
 513         .long   0xfffe98ae,0xbfdfffff
 514         .type L(DP_SC0), @object
 515         ASM_SIZE_DIRECTIVE(L(DP_SC0))
 516
 517         .p2align 3
 518 L(DP_SMALL):
 519         .long   0x00000000,0x3cd00000   /* 2^(-50) */
 520         .type L(DP_SMALL), @object
 521         ASM_SIZE_DIRECTIVE(L(DP_SMALL))
 522
 523         .p2align 3
 524 L(DP_PIO4):
 525         .long   0x54442d18,0x3fe921fb   /* Pi/4 */
 526         .type L(DP_PIO4), @object
 527         ASM_SIZE_DIRECTIVE(L(DP_PIO4))
 528
 529         .p2align 3
 530 L(DP_2POW52):
 531         .long   0x00000000,0x43300000   /* +2^52 */
 532         .long   0x00000000,0xc3300000   /* -2^52 */
 533         .type L(DP_2POW52), @object
 534         ASM_SIZE_DIRECTIVE(L(DP_2POW52))
 535
 536         .p2align 3
 537 L(DP_INVPIO4):
 538         .long   0x6dc9c883,0x3ff45f30   /* 4/Pi */
 539         .type L(DP_INVPIO4), @object
 540         ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
 541
 542         .p2align 3
 543 L(DP_PIO4HI):
 544         .long   0x54000000,0xbfe921fb   /* High part of Pi/4 */
 545         .type L(DP_PIO4HI), @object
 546         ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
 547
 548         .p2align 3
 549 L(DP_PIO4LO):
 550         .long   0x11A62633,0xbe010b46   /* Low part of Pi/4 */
 551         .type L(DP_PIO4LO), @object
 552         ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
 553
 554         .p2align 2
 555 L(SP_INVPIO4):
 556         .long   0x3fa2f983              /* 4/Pi */
 557         .type L(SP_INVPIO4), @object
 558         ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
 559
 560         .p2align 4
 561 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 562         .long   0xffffffff,0x7fffffff
 563         .long   0xffffffff,0x7fffffff
 564         .type L(DP_ABS_MASK), @object
 565         ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
 566
 567         .p2align 3
 568 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 569         .long   0x00000000,0xffffffff
 570         .type L(DP_HI_MASK), @object
 571         ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 572
 573         .p2align 4
 574 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
 575         .long   0x7fffffff,0x7fffffff
 576         .long   0x7fffffff,0x7fffffff
 577         .type L(SP_ABS_MASK), @object
 578         ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
 579
 580         .p2align 2
 581 L(SP_ONE):
 582         .long   0x3f800000              /* 1.0 */
 583         .type L(SP_ONE), @object
 584         ASM_SIZE_DIRECTIVE(L(SP_ONE))
 585
 586 weak_alias(__sincosf, sincosf)