select: overhaul for time64
[musl.git] / src / math / x32 / expl.s
blob369f7bd2167b562163233a868bc0091b3bb32740
1 # exp(x) = 2^hi + 2^hi (2^lo - 1)
2 # where hi+lo = log2e*x with 128bit precision
3 # exact log2e*x calculation depends on nearest rounding mode
4 # using the exact multiplication method of Dekker and Veltkamp
6 .global expl
7 .type expl,@function
8 expl:
9 fldt 8(%esp)
11 # interesting case: 0x1p-32 <= |x| < 16384
12 # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13]
13 mov 16(%esp), %ax
14 or $0x8000, %ax
15 sub $0xbfdf, %ax
16 cmp $45, %ax
17 jbe 2f
18 test %ax, %ax
19 fld1
20 js 1f
21 # if |x|>=0x1p14 or nan return 2^trunc(x)
22 fscale
23 fstp %st(1)
24 ret
25 # if |x|<0x1p-32 return 1+x
26 1: faddp
27 ret
29 # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc
30 # it will be wrong on non-nearest rounding mode
31 2: fldl2e
32 sub $48, %esp
33 # hi = log2e_hi*x
34 # 2^hi = exp2l(hi)
35 fmul %st(1),%st
36 fld %st(0)
37 fstpt (%esp)
38 fstpt 16(%esp)
39 fstpt 32(%esp)
40 call exp2l@PLT
41 # if 2^hi == inf return 2^hi
42 fld %st(0)
43 fstpt (%esp)
44 cmpw $0x7fff, 8(%esp)
45 je 1f
46 fldt 32(%esp)
47 fldt 16(%esp)
48 # fpu stack: 2^hi x hi
49 # exact mult: x*log2e
50 fld %st(1)
51 # c = 0x1p32+1
52 movq $0x41f0000000100000,%rax
53 pushq %rax
54 fldl (%esp)
55 # xh = x - c*x + c*x
56 # xl = x - xh
57 fmulp
58 fld %st(2)
59 fsub %st(1), %st
60 faddp
61 fld %st(2)
62 fsub %st(1), %st
63 # yh = log2e_hi - c*log2e_hi + c*log2e_hi
64 movq $0x3ff7154765200000,%rax
65 pushq %rax
66 fldl (%esp)
67 # fpu stack: 2^hi x hi xh xl yh
68 # lo = hi - xh*yh + xl*yh
69 fld %st(2)
70 fmul %st(1), %st
71 fsubp %st, %st(4)
72 fmul %st(1), %st
73 faddp %st, %st(3)
74 # yl = log2e_hi - yh
75 movq $0x3de705fc2f000000,%rax
76 pushq %rax
77 fldl (%esp)
78 # fpu stack: 2^hi x lo xh xl yl
79 # lo += xh*yl + xl*yl
80 fmul %st, %st(2)
81 fmulp %st, %st(1)
82 fxch %st(2)
83 faddp
84 faddp
85 # log2e_lo
86 movq $0xbfbe,%rax
87 pushq %rax
88 movq $0x82f0025f2dc582ee,%rax
89 pushq %rax
90 fldt (%esp)
91 add $40,%esp
92 # fpu stack: 2^hi x lo log2e_lo
93 # lo += log2e_lo*x
94 # return 2^hi + 2^hi (2^lo - 1)
95 fmulp %st, %st(2)
96 faddp
97 f2xm1
98 fmul %st(1), %st
99 faddp
100 1: add $48, %esp