sysdeps/x86_64/fpu/s_cosf.S

   1 /* Optimized cosf function.
   2    Copyright (C) 2012-2014 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <sysdep.h>
  20 #define __need_Emath
  21 #include <bits/errno.h>
  22
  23 /* Short algorithm description:
  24  *
  25  *  1) if |x| == 0: return 1.0-|x|.
  26  *  2) if |x| <  2^-27: return 1.0-|x|.
  27  *  3) if |x| <  2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1.
  28  *  4) if |x| <   Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
  29  *  5) if |x| < 9*Pi/4:
  30  *      5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3,
  31  *           t=|x|-j*Pi/4.
  32  *      5.2) Reconstruction:
  33  *          s = (-1.0)^((n>>2)&1)
  34  *          if(n&2 != 0) {
  35  *              using cos(t) polynomial for |t|<Pi/4, result is
  36  *              s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
  37  *          } else {
  38  *              using sin(t) polynomial for |t|<Pi/4, result is
  39  *              s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
  40  *          }
  41  *  6) if |x| < 2^23, large args:
  42  *      6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
  43  *           t=|x|-j*Pi/4.
  44  *      6.2) Reconstruction same as (5.2).
  45  *  7) if |x| >= 2^23, very large args:
  46  *      7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
  47  *           t=|x|-j*Pi/4.
  48  *      7.2) Reconstruction same as (5.2).
  49  *  8) if x is Inf, return x-x, and set errno=EDOM.
  50  *  9) if x is NaN, return x-x.
  51  *
  52  * Special cases:
  53  *  cos(+-0) = 1 not raising inexact,
  54  *  cos(subnormal) raises inexact,
  55  *  cos(min_normalized) raises inexact,
  56  *  cos(normalized) raises inexact,
  57  *  cos(Inf) = NaN, raises invalid, sets errno to EDOM,
  58  *  cos(NaN) = NaN.
  59  */
  60
  61         .text
  62 ENTRY(__cosf)
  63         /* Input: single precision x in %xmm0 */
  64
  65         movd    %xmm0, %eax             /* Bits of x */
  66         movaps  %xmm0, %xmm7            /* Copy of x */
  67         cvtss2sd %xmm0, %xmm0           /* DP x */
  68         movss   L(SP_ABS_MASK)(%rip), %xmm3
  69         andl    $0x7fffffff, %eax       /* |x| */
  70
  71         cmpl    $0x3f490fdb, %eax       /* |x|<Pi/4?  */
  72         jb      L(arg_less_pio4)
  73
  74         /* Here if |x|>=Pi/4 */
  75         andps   %xmm7, %xmm3            /* SP |x| */
  76         andpd   L(DP_ABS_MASK)(%rip), %xmm0     /* DP |x| */
  77         movss   L(SP_INVPIO4)(%rip), %xmm2      /* SP 1/(Pi/4) */
  78
  79         cmpl    $0x40e231d6, %eax       /* |x|<9*Pi/4?  */
  80         jae     L(large_args)
  81
  82         /* Here if Pi/4<=|x|<9*Pi/4 */
  83         mulss   %xmm3, %xmm2            /* SP |x|/(Pi/4) */
  84         cvttss2si %xmm2, %eax           /* k, number of Pi/4 in x */
  85         lea     L(PIO4J)(%rip), %rsi
  86         addl    $1, %eax                /* k+1 */
  87         movl    $0x0e, %edx
  88         andl    %eax, %edx              /* j = (k+1)&0x0e */
  89         addl    $2, %eax                /* n */
  90         subsd   (%rsi,%rdx,8), %xmm0    /* t = |x| - j * Pi/4 */
  91
  92 L(reconstruction):
  93         /* Input: %eax=n, %xmm0=t */
  94         testl   $2, %eax                /* n&2 != 0?  */
  95         jz      L(sin_poly)
  96
  97 /*L(cos_poly):*/
  98         /* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4:
  99          * y = t*t; z = y*y;
 100          * s = sign(x) * (-1.0)^((n>>2)&1)
 101          * result = s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
 102          */
 103         shrl    $2, %eax                /* n>>2 */
 104         mulsd   %xmm0, %xmm0            /* y=t^2 */
 105         andl    $1, %eax                /* (n>>2)&1 */
 106         movaps  %xmm0, %xmm1            /* y */
 107         mulsd   %xmm0, %xmm0            /* z=t^4 */
 108
 109         movsd   L(DP_C4)(%rip), %xmm4   /* C4 */
 110         mulsd   %xmm0, %xmm4            /* z*C4 */
 111         movsd   L(DP_C3)(%rip), %xmm3   /* C3 */
 112         mulsd   %xmm0, %xmm3            /* z*C3 */
 113         lea     L(DP_ONES)(%rip), %rsi
 114         addsd   L(DP_C2)(%rip), %xmm4   /* C2+z*C4 */
 115         mulsd   %xmm0, %xmm4            /* z*(C2+z*C4) */
 116         addsd   L(DP_C1)(%rip), %xmm3   /* C1+z*C3 */
 117         mulsd   %xmm0, %xmm3            /* z*(C1+z*C3) */
 118         addsd   L(DP_C0)(%rip), %xmm4   /* C0+z*(C2+z*C4) */
 119         mulsd   %xmm1, %xmm4            /* y*(C0+z*(C2+z*C4)) */
 120
 121         addsd   %xmm4, %xmm3            /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 122         /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 123         addsd   L(DP_ONES)(%rip), %xmm3
 124
 125         mulsd   (%rsi,%rax,8), %xmm3    /* DP result */
 126         cvtsd2ss %xmm3, %xmm0           /* SP result */
 127         ret
 128
 129         .p2align        4
 130 L(sin_poly):
 131         /* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4:
 132          * y = t*t; z = y*y;
 133          * s = sign(x) * (-1.0)^((n>>2)&1)
 134          * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
 135          */
 136
 137         movaps  %xmm0, %xmm4            /* t */
 138         shrl    $2, %eax                /* n>>2 */
 139         mulsd   %xmm0, %xmm0            /* y=t^2 */
 140         andl    $1, %eax                /* (n>>2)&1 */
 141         movaps  %xmm0, %xmm1            /* y */
 142         mulsd   %xmm0, %xmm0            /* z=t^4 */
 143
 144         movsd   L(DP_S4)(%rip), %xmm2   /* S4 */
 145         mulsd   %xmm0, %xmm2            /* z*S4 */
 146         movsd   L(DP_S3)(%rip), %xmm3   /* S3 */
 147         mulsd   %xmm0, %xmm3            /* z*S3 */
 148         lea     L(DP_ONES)(%rip), %rsi
 149         addsd   L(DP_S2)(%rip), %xmm2   /* S2+z*S4 */
 150         mulsd   %xmm0, %xmm2            /* z*(S2+z*S4) */
 151         addsd   L(DP_S1)(%rip), %xmm3   /* S1+z*S3 */
 152         mulsd   %xmm0, %xmm3            /* z*(S1+z*S3) */
 153         addsd   L(DP_S0)(%rip), %xmm2   /* S0+z*(S2+z*S4) */
 154         mulsd   %xmm1, %xmm2            /* y*(S0+z*(S2+z*S4)) */
 155         /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
 156         mulsd   (%rsi,%rax,8), %xmm4
 157         /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 158         addsd   %xmm2, %xmm3
 159         /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 160         mulsd   %xmm4, %xmm3
 161         /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 162         addsd   %xmm4, %xmm3
 163         cvtsd2ss %xmm3, %xmm0           /* SP result */
 164         ret
 165
 166         .p2align        4
 167 L(large_args):
 168         /* Here if |x|>=9*Pi/4 */
 169         cmpl    $0x7f800000, %eax       /* x is Inf or NaN?  */
 170         jae     L(arg_inf_or_nan)
 171
 172         /* Here if finite |x|>=9*Pi/4 */
 173         cmpl    $0x4b000000, %eax       /* |x|<2^23?  */
 174         jae     L(very_large_args)
 175
 176         /* Here if 9*Pi/4<=|x|<2^23 */
 177         movsd   L(DP_INVPIO4)(%rip), %xmm1 /* 1/(Pi/4) */
 178         mulsd   %xmm0, %xmm1            /* |x|/(Pi/4) */
 179         cvttsd2si %xmm1, %eax           /* k=trunc(|x|/(Pi/4)) */
 180         addl    $1, %eax                /* k+1 */
 181         movl    %eax, %edx
 182         andl    $0xfffffffe, %edx       /* j=(k+1)&0xfffffffe */
 183         cvtsi2sdl %edx, %xmm4           /* DP j */
 184         movsd   L(DP_PIO4HI)(%rip), %xmm2 /* -PIO4HI = high part of -Pi/4 */
 185         mulsd   %xmm4, %xmm2            /* -j*PIO4HI */
 186         movsd   L(DP_PIO4LO)(%rip), %xmm3 /* -PIO4LO = low part of -Pi/4 */
 187         addsd   %xmm2, %xmm0            /* |x| - j*PIO4HI */
 188         addl    $2, %eax                /* n */
 189         mulsd   %xmm3, %xmm4            /* j*PIO4LO */
 190         addsd   %xmm4, %xmm0            /* t = |x| - j*PIO4HI - j*PIO4LO */
 191         jmp     L(reconstruction)
 192
 193         .p2align        4
 194 L(very_large_args):
 195         /* Here if finite |x|>=2^23 */
 196
 197         /* bitpos = (ix>>23) - BIAS_32 + 59; */
 198         shrl    $23, %eax               /* eb = biased exponent of x */
 199         /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
 200         subl    $68, %eax
 201         movl    $28, %ecx               /* %cl=28 */
 202         movl    %eax, %edx              /* bitpos copy */
 203
 204         /* j = bitpos/28; */
 205         div     %cl                     /* j in register %al=%ax/%cl */
 206         movapd  %xmm0, %xmm3            /* |x| */
 207         /* clear unneeded remainder from %ah */
 208         andl    $0xff, %eax
 209
 210         imull   $28, %eax, %ecx         /* j*28 */
 211         lea     L(_FPI)(%rip), %rsi
 212         movsd   L(DP_HI_MASK)(%rip), %xmm4 /* DP_HI_MASK */
 213         movapd  %xmm0, %xmm5            /* |x| */
 214         mulsd   -16(%rsi,%rax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
 215         movapd  %xmm0, %xmm1            /* |x| */
 216         mulsd   -8(%rsi,%rax,8), %xmm5  /* tmp2 = FPI[j-1]*|x| */
 217         mulsd   (%rsi,%rax,8), %xmm0    /* tmp0 = FPI[j]*|x| */
 218         addl    $19, %ecx               /* j*28+19 */
 219         mulsd   8(%rsi,%rax,8), %xmm1   /* tmp1 = FPI[j+1]*|x| */
 220         cmpl    %ecx, %edx              /* bitpos>=j*28+19?  */
 221         jl      L(very_large_skip1)
 222
 223         /* Here if bitpos>=j*28+19 */
 224         andpd   %xmm3, %xmm4            /* HI(tmp3) */
 225         subsd   %xmm4, %xmm3            /* tmp3 = tmp3 - HI(tmp3) */
 226 L(very_large_skip1):
 227
 228         movsd   L(DP_2POW52)(%rip), %xmm6
 229         movapd  %xmm5, %xmm2            /* tmp2 copy */
 230         addsd   %xmm3, %xmm5            /* tmp5 = tmp3 + tmp2 */
 231         movl    $1, %edx
 232         addsd   %xmm5, %xmm6            /* tmp6 = tmp5 + 2^52 */
 233         movsd   8+L(DP_2POW52)(%rip), %xmm4
 234         movd    %xmm6, %eax             /* k = I64_LO(tmp6); */
 235         addsd   %xmm6, %xmm4            /* tmp4 = tmp6 - 2^52 */
 236         comisd  %xmm5, %xmm4            /* tmp4 > tmp5?  */
 237         jbe     L(very_large_skip2)
 238
 239         /* Here if tmp4 > tmp5 */
 240         subl    $1, %eax                /* k-- */
 241         addsd   8+L(DP_ONES)(%rip), %xmm4 /* tmp4 -= 1.0 */
 242 L(very_large_skip2):
 243
 244         andl    %eax, %edx              /* k&1 */
 245         lea     L(DP_ZERONE)(%rip), %rsi
 246         subsd   %xmm4, %xmm3            /* tmp3 -= tmp4 */
 247         addsd   (%rsi,%rdx,8), %xmm3    /* t  = DP_ZERONE[k&1] + tmp3 */
 248         addsd   %xmm2, %xmm3            /* t += tmp2 */
 249         addsd   %xmm3, %xmm0            /* t += tmp0 */
 250         addl    $3, %eax                /* n=k+3 */
 251         addsd   %xmm1, %xmm0            /* t += tmp1 */
 252         mulsd   L(DP_PIO4)(%rip), %xmm0 /* t *= PI04 */
 253
 254         jmp     L(reconstruction)       /* end of very_large_args peth */
 255
 256         .p2align        4
 257 L(arg_less_pio4):
 258         /* Here if |x|<Pi/4 */
 259         cmpl    $0x3d000000, %eax       /* |x|<2^-5?  */
 260         jl      L(arg_less_2pn5)
 261
 262         /* Here if 2^-5<=|x|<Pi/4 */
 263         mulsd   %xmm0, %xmm0            /* y=x^2 */
 264         movaps  %xmm0, %xmm1            /* y */
 265         mulsd   %xmm0, %xmm0            /* z=x^4 */
 266         movsd   L(DP_C4)(%rip), %xmm3   /* C4 */
 267         mulsd   %xmm0, %xmm3            /* z*C4 */
 268         movsd   L(DP_C3)(%rip), %xmm5   /* C3 */
 269         mulsd   %xmm0, %xmm5            /* z*C3 */
 270         addsd   L(DP_C2)(%rip), %xmm3   /* C2+z*C4 */
 271         mulsd   %xmm0, %xmm3            /* z*(C2+z*C4) */
 272         addsd   L(DP_C1)(%rip), %xmm5   /* C1+z*C3 */
 273         mulsd   %xmm0, %xmm5            /* z*(C1+z*C3) */
 274         addsd   L(DP_C0)(%rip), %xmm3   /* C0+z*(C2+z*C4) */
 275         mulsd   %xmm1, %xmm3            /* y*(C0+z*(C2+z*C4)) */
 276         /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 277         addsd   %xmm5, %xmm3
 278         /* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 279         addsd   L(DP_ONES)(%rip), %xmm3
 280         cvtsd2ss %xmm3, %xmm0           /* SP result */
 281         ret
 282
 283         .p2align        4
 284 L(arg_less_2pn5):
 285         /* Here if |x|<2^-5 */
 286         cmpl    $0x32000000, %eax       /* |x|<2^-27?  */
 287         jl      L(arg_less_2pn27)
 288
 289         /* Here if 2^-27<=|x|<2^-5 */
 290         mulsd   %xmm0, %xmm0            /* DP x^2 */
 291         movsd   L(DP_COS2_1)(%rip), %xmm3 /* DP DP_COS2_1 */
 292         mulsd   %xmm0, %xmm3            /* DP x^2*DP_COS2_1 */
 293         addsd   L(DP_COS2_0)(%rip), %xmm3 /* DP DP_COS2_0+x^2*DP_COS2_1 */
 294         mulsd   %xmm0, %xmm3            /* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */
 295         /* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */
 296         addsd   L(DP_ONES)(%rip), %xmm3
 297         cvtsd2ss %xmm3, %xmm0           /* SP result */
 298         ret
 299
 300         .p2align        4
 301 L(arg_less_2pn27):
 302         /* Here if |x|<2^-27 */
 303         andps   L(SP_ABS_MASK)(%rip),%xmm7 /* |x| */
 304         movss   L(SP_ONE)(%rip), %xmm0  /* 1.0 */
 305         subss   %xmm7, %xmm0            /* result is 1.0-|x| */
 306         ret
 307
 308         .p2align        4
 309 L(arg_inf_or_nan):
 310         /* Here if |x| is Inf or NAN */
 311         jne     L(skip_errno_setting)   /* in case of x is NaN */
 312
 313         /* Here if x is Inf. Set errno to EDOM.  */
 314         call    JUMPTARGET(__errno_location)
 315         movl    $EDOM, (%rax)
 316
 317         .p2align        4
 318 L(skip_errno_setting):
 319         /* Here if |x| is Inf or NAN. Continued.  */
 320         movaps  %xmm7, %xmm0            /* load x */
 321         subss   %xmm0, %xmm0            /* Result is NaN */
 322         ret
 323 END(__cosf)
 324
 325         .section .rodata, "a"
 326         .p2align 3
 327 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
 328         .long   0x00000000,0x00000000
 329         .long   0x54442d18,0x3fe921fb
 330         .long   0x54442d18,0x3ff921fb
 331         .long   0x7f3321d2,0x4002d97c
 332         .long   0x54442d18,0x400921fb
 333         .long   0x2955385e,0x400f6a7a
 334         .long   0x7f3321d2,0x4012d97c
 335         .long   0xe9bba775,0x4015fdbb
 336         .long   0x54442d18,0x401921fb
 337         .long   0xbeccb2bb,0x401c463a
 338         .long   0x2955385e,0x401f6a7a
 339         .type L(PIO4J), @object
 340         ASM_SIZE_DIRECTIVE(L(PIO4J))
 341
 342         .p2align 3
 343 L(_FPI): /* 4/Pi broken into sum of positive DP values */
 344         .long   0x00000000,0x00000000
 345         .long   0x6c000000,0x3ff45f30
 346         .long   0x2a000000,0x3e3c9c88
 347         .long   0xa8000000,0x3c54fe13
 348         .long   0xd0000000,0x3aaf47d4
 349         .long   0x6c000000,0x38fbb81b
 350         .long   0xe0000000,0x3714acc9
 351         .long   0x7c000000,0x3560e410
 352         .long   0x56000000,0x33bca2c7
 353         .long   0xac000000,0x31fbd778
 354         .long   0xe0000000,0x300b7246
 355         .long   0xe8000000,0x2e5d2126
 356         .long   0x48000000,0x2c970032
 357         .long   0xe8000000,0x2ad77504
 358         .long   0xe0000000,0x290921cf
 359         .long   0xb0000000,0x274deb1c
 360         .long   0xe0000000,0x25829a73
 361         .long   0xbe000000,0x23fd1046
 362         .long   0x10000000,0x2224baed
 363         .long   0x8e000000,0x20709d33
 364         .long   0x80000000,0x1e535a2f
 365         .long   0x64000000,0x1cef904e
 366         .long   0x30000000,0x1b0d6398
 367         .long   0x24000000,0x1964ce7d
 368         .long   0x16000000,0x17b908bf
 369         .type L(_FPI), @object
 370         ASM_SIZE_DIRECTIVE(L(_FPI))
 371
 372 /* Coefficients of polynomial
 373    for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5.  */
 374         .p2align 3
 375 L(DP_COS2_0):
 376         .long   0xff5cc6fd,0xbfdfffff
 377         .type L(DP_COS2_0), @object
 378         ASM_SIZE_DIRECTIVE(L(DP_COS2_0))
 379
 380         .p2align 3
 381 L(DP_COS2_1):
 382         .long   0xb178dac5,0x3fa55514
 383         .type L(DP_COS2_1), @object
 384         ASM_SIZE_DIRECTIVE(L(DP_COS2_1))
 385
 386         .p2align 3
 387 L(DP_ZERONE):
 388         .long   0x00000000,0x00000000   /* 0.0 */
 389         .long   0x00000000,0xbff00000   /* 1.0 */
 390         .type L(DP_ZERONE), @object
 391         ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
 392
 393         .p2align 3
 394 L(DP_ONES):
 395         .long   0x00000000,0x3ff00000   /* +1.0 */
 396         .long   0x00000000,0xbff00000   /* -1.0 */
 397         .type L(DP_ONES), @object
 398         ASM_SIZE_DIRECTIVE(L(DP_ONES))
 399
 400 /* Coefficients of polynomial
 401    for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4.  */
 402         .p2align 3
 403 L(DP_S3):
 404         .long   0x64e6b5b4,0x3ec71d72
 405         .type L(DP_S3), @object
 406         ASM_SIZE_DIRECTIVE(L(DP_S3))
 407
 408         .p2align 3
 409 L(DP_S1):
 410         .long   0x10c2688b,0x3f811111
 411         .type L(DP_S1), @object
 412         ASM_SIZE_DIRECTIVE(L(DP_S1))
 413
 414         .p2align 3
 415 L(DP_S4):
 416         .long   0x1674b58a,0xbe5a947e
 417         .type L(DP_S4), @object
 418         ASM_SIZE_DIRECTIVE(L(DP_S4))
 419
 420         .p2align 3
 421 L(DP_S2):
 422         .long   0x8b4bd1f9,0xbf2a019f
 423         .type L(DP_S2),@object
 424         ASM_SIZE_DIRECTIVE(L(DP_S2))
 425
 426         .p2align 3
 427 L(DP_S0):
 428         .long   0x55551cd9,0xbfc55555
 429         .type L(DP_S0), @object
 430         ASM_SIZE_DIRECTIVE(L(DP_S0))
 431
 432 /* Coefficients of polynomial
 433    for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4.  */
 434         .p2align 3
 435 L(DP_C3):
 436         .long   0x9ac43cc0,0x3efa00eb
 437         .type L(DP_C3), @object
 438         ASM_SIZE_DIRECTIVE(L(DP_C3))
 439
 440         .p2align 3
 441 L(DP_C1):
 442         .long   0x545c50c7,0x3fa55555
 443         .type L(DP_C1), @object
 444         ASM_SIZE_DIRECTIVE(L(DP_C1))
 445
 446         .p2align 3
 447 L(DP_C4):
 448         .long   0xdd8844d7,0xbe923c97
 449         .type L(DP_C4), @object
 450         ASM_SIZE_DIRECTIVE(L(DP_C4))
 451
 452         .p2align 3
 453 L(DP_C2):
 454         .long   0x348b6874,0xbf56c16b
 455         .type L(DP_C2), @object
 456         ASM_SIZE_DIRECTIVE(L(DP_C2))
 457
 458         .p2align 3
 459 L(DP_C0):
 460         .long   0xfffe98ae,0xbfdfffff
 461         .type L(DP_C0), @object
 462         ASM_SIZE_DIRECTIVE(L(DP_C0))
 463
 464         .p2align 3
 465 L(DP_PIO4):
 466         .long   0x54442d18,0x3fe921fb   /* Pi/4 */
 467         .type L(DP_PIO4), @object
 468         ASM_SIZE_DIRECTIVE(L(DP_PIO4))
 469
 470         .p2align 3
 471 L(DP_2POW52):
 472         .long   0x00000000,0x43300000   /* +2^52 */
 473         .long   0x00000000,0xc3300000   /* -2^52 */
 474         .type L(DP_2POW52), @object
 475         ASM_SIZE_DIRECTIVE(L(DP_2POW52))
 476
 477         .p2align 3
 478 L(DP_INVPIO4):
 479         .long   0x6dc9c883,0x3ff45f30   /* 4/Pi */
 480         .type L(DP_INVPIO4), @object
 481         ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
 482
 483         .p2align 3
 484 L(DP_PIO4HI):
 485         .long   0x54000000,0xbfe921fb   /* High part of Pi/4 */
 486         .type L(DP_PIO4HI), @object
 487         ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
 488
 489         .p2align 3
 490 L(DP_PIO4LO):
 491         .long   0x11A62633,0xbe010b46   /* Low part of Pi/4 */
 492         .type L(DP_PIO4LO), @object
 493         ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
 494
 495         .p2align 2
 496 L(SP_INVPIO4):
 497         .long   0x3fa2f983              /* 4/Pi */
 498         .type L(SP_INVPIO4), @object
 499         ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
 500
 501         .p2align 4
 502 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 503         .long   0xffffffff,0x7fffffff
 504         .long   0xffffffff,0x7fffffff
 505         .type L(DP_ABS_MASK), @object
 506         ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
 507
 508         .p2align 3
 509 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 510         .long   0x00000000,0xffffffff
 511         .type L(DP_HI_MASK), @object
 512         ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 513
 514         .p2align 4
 515 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
 516         .long   0x7fffffff,0x7fffffff
 517         .long   0x7fffffff,0x7fffffff
 518         .type L(SP_ABS_MASK), @object
 519         ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
 520
 521         .p2align 2
 522 L(SP_ONE):
 523         .long   0x3f800000              /* 1.0 */
 524         .type L(SP_ONE), @object
 525         ASM_SIZE_DIRECTIVE(L(SP_ONE))
 526
 527 weak_alias(__cosf, cosf)