2.9
[glibc/nacl-glibc.git] / sysdeps / ia64 / fpu / s_log1p.S
blobe1e6dcc80bc92516d13c56fa04ced542b3e093e6
1 .file "log1p.s"
4 // Copyright (c) 2000 - 2005, Intel Corporation
5 // All rights reserved.
6 //
7 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
22 // permission.
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 // Intel Corporation is the author of this code, and requests that all
37 // problem reports or change requests be submitted to it directly at
38 // http://www.intel.com/software/products/opensource/libraries/num.htm.
40 // History
41 //==============================================================
42 // 02/02/00 Initial version
43 // 04/04/00 Unwind support added
44 // 08/15/00 Bundle added after call to __libm_error_support to properly
45 //          set [the previously overwritten] GR_Parameter_RESULT.
46 // 06/29/01 Improved speed of all paths
47 // 05/20/02 Cleaned up namespace and sf0 syntax
48 // 10/02/02 Improved performance by basing on log algorithm
49 // 02/10/03 Reordered header: .section, .global, .proc, .align
50 // 04/18/03 Eliminate possible WAW dependency warning
51 // 03/31/05 Reformatted delimiters between data tables
53 // API
54 //==============================================================
55 // double log1p(double)
57 // log1p(x) = log(x+1)
59 // Overview of operation
60 //==============================================================
61 // Background
62 // ----------
64 // This algorithm is based on fact that
65 // log1p(x) = log(1+x) and
66 // log(a b) = log(a) + log(b).
67 // In our case we have 1+x = 2^N f, where 1 <= f < 2.
68 // So
69 //   log(1+x) = log(2^N f) = log(2^N) + log(f) = n*log(2) + log(f)
71 // To calculate log(f) we do following
72 //   log(f) = log(f * frcpa(f) / frcpa(f)) =
73 //          = log(f * frcpa(f)) + log(1/frcpa(f))
75 // According to definition of IA-64's frcpa instruction it's a
76 // floating point that approximates 1/f using a lookup on the
77 // top of 8 bits of the input number's + 1 significand with relative
78 // error < 2^(-8.886). So we have following
80 // |(1/f - frcpa(f)) / (1/f))| = |1 - f*frcpa(f)| < 1/256
82 // and
84 // log(f) = log(f * frcpa(f)) + log(1/frcpa(f)) =
85 //        = log(1 + r) + T
87 // The first value can be computed by polynomial P(r) approximating
88 // log(1 + r) on |r| < 1/256 and the second is precomputed tabular
89 // value defined by top 8 bit of f.
91 // Finally we have that  log(1+x) ~ (N*log(2) + T) + P(r)
93 // Note that if input argument is close to 0.0 (in our case it means
94 // that |x| < 1/256) we can use just polynomial approximation
95 // because 1+x = 2^0 * f = f = 1 + r and
96 // log(1+x) = log(1 + r) ~ P(r)
99 // Implementation
100 // --------------
102 // 1. |x| >= 2^(-8), and x > -1
103 //   InvX = frcpa(x+1)
104 //   r = InvX*(x+1) - 1
105 //   P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
106 //   all coefficients are calcutated in quad and rounded to double
107 //   precision. A7,A6,A5,A4 are stored in memory whereas A3 and A2
108 //   created with setf.
110 //   N = float(n) where n is true unbiased exponent of x
112 //   T is tabular value of log(1/frcpa(x)) calculated in quad precision
113 //   and represented by two floating-point numbers 64-bit Thi and 32-bit Tlo.
114 //   To load Thi,Tlo we get bits from 55 to 62 of register format significand
115 //   as index and calculate two addresses
116 //     ad_Thi = Thi_table_base_addr + 8 * index
117 //     ad_Tlo = Tlo_table_base_addr + 4 * index
119 //   L1 (log(2)) is calculated in quad
120 //   precision and represented by two floating-point 64-bit numbers L1hi,L1lo
121 //   stored in memory.
123 //   And final result = ((L1hi*N + Thi) + (N*L1lo + Tlo)) + P(r)
126 // 2. 2^(-80) <= |x| < 2^(-8)
127 //   r = x
128 //   P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
129 //   A7,A6,A5,A4,A3,A2 are the same as in case |x| >= 1/256
131 //   And final results
132 //     log(1+x)   = P(r)
134 // 3. 0 < |x| < 2^(-80)
135 //   Although log1p(x) is basically x, we would like to preserve the inexactness
136 //   nature as well as consistent behavior under different rounding modes.
137 //   We can do this by computing the result as
139 //     log1p(x) = x - x*x
142 //    Note: NaT, any NaNs, +/-INF, +/-0, negatives and unnormalized numbers are
143 //          filtered and processed on special branches.
147 // Special values
148 //==============================================================
150 // log1p(-1)    = -inf            // Call error support
152 // log1p(+qnan) = +qnan
153 // log1p(-qnan) = -qnan
154 // log1p(+snan) = +qnan
155 // log1p(-snan) = -qnan
157 // log1p(x),x<-1= QNAN Indefinite // Call error support
158 // log1p(-inf)  = QNAN Indefinite
159 // log1p(+inf)  = +inf
160 // log1p(+/-0)  = +/-0
163 // Registers used
164 //==============================================================
165 // Floating Point registers used:
166 // f8, input
167 // f7 -> f15,  f32 -> f40
169 // General registers used:
170 // r8  -> r11
171 // r14 -> r20
173 // Predicate registers used:
174 // p6 -> p12
176 // Assembly macros
177 //==============================================================
178 GR_TAG                 = r8
179 GR_ad_1                = r8
180 GR_ad_2                = r9
181 GR_Exp                 = r10
182 GR_N                   = r11
184 GR_signexp_x           = r14
185 GR_exp_mask            = r15
186 GR_exp_bias            = r16
187 GR_05                  = r17
188 GR_A3                  = r18
189 GR_Sig                 = r19
190 GR_Ind                 = r19
191 GR_exp_x               = r20
194 GR_SAVE_B0             = r33
195 GR_SAVE_PFS            = r34
196 GR_SAVE_GP             = r35
197 GR_SAVE_SP             = r36
199 GR_Parameter_X         = r37
200 GR_Parameter_Y         = r38
201 GR_Parameter_RESULT    = r39
202 GR_Parameter_TAG       = r40
206 FR_NormX               = f7
207 FR_RcpX                = f9
208 FR_r                   = f10
209 FR_r2                  = f11
210 FR_r4                  = f12
211 FR_N                   = f13
212 FR_Ln2hi               = f14
213 FR_Ln2lo               = f15
215 FR_A7                  = f32
216 FR_A6                  = f33
217 FR_A5                  = f34
218 FR_A4                  = f35
219 FR_A3                  = f36
220 FR_A2                  = f37
222 FR_Thi                 = f38
223 FR_NxLn2hipThi         = f38
224 FR_NxLn2pT             = f38
225 FR_Tlo                 = f39
226 FR_NxLn2lopTlo         = f39
228 FR_Xp1                 = f40
231 FR_Y                   = f1
232 FR_X                   = f10
233 FR_RESULT              = f8
236 // Data
237 //==============================================================
238 RODATA
239 .align 16
241 LOCAL_OBJECT_START(log_data)
242 // coefficients of polynomial approximation
243 data8 0x3FC2494104381A8E // A7
244 data8 0xBFC5556D556BBB69 // A6
245 data8 0x3FC999999988B5E9 // A5
246 data8 0xBFCFFFFFFFF6FFF5 // A4
248 // hi parts of ln(1/frcpa(1+i/256)), i=0...255
249 data8 0x3F60040155D5889D // 0
250 data8 0x3F78121214586B54 // 1
251 data8 0x3F841929F96832EF // 2
252 data8 0x3F8C317384C75F06 // 3
253 data8 0x3F91A6B91AC73386 // 4
254 data8 0x3F95BA9A5D9AC039 // 5
255 data8 0x3F99D2A8074325F3 // 6
256 data8 0x3F9D6B2725979802 // 7
257 data8 0x3FA0C58FA19DFAA9 // 8
258 data8 0x3FA2954C78CBCE1A // 9
259 data8 0x3FA4A94D2DA96C56 // 10
260 data8 0x3FA67C94F2D4BB58 // 11
261 data8 0x3FA85188B630F068 // 12
262 data8 0x3FAA6B8ABE73AF4C // 13
263 data8 0x3FAC441E06F72A9E // 14
264 data8 0x3FAE1E6713606D06 // 15
265 data8 0x3FAFFA6911AB9300 // 16
266 data8 0x3FB0EC139C5DA600 // 17
267 data8 0x3FB1DBD2643D190B // 18
268 data8 0x3FB2CC7284FE5F1C // 19
269 data8 0x3FB3BDF5A7D1EE64 // 20
270 data8 0x3FB4B05D7AA012E0 // 21
271 data8 0x3FB580DB7CEB5701 // 22
272 data8 0x3FB674F089365A79 // 23
273 data8 0x3FB769EF2C6B568D // 24
274 data8 0x3FB85FD927506A47 // 25
275 data8 0x3FB9335E5D594988 // 26
276 data8 0x3FBA2B0220C8E5F4 // 27
277 data8 0x3FBB0004AC1A86AB // 28
278 data8 0x3FBBF968769FCA10 // 29
279 data8 0x3FBCCFEDBFEE13A8 // 30
280 data8 0x3FBDA727638446A2 // 31
281 data8 0x3FBEA3257FE10F79 // 32
282 data8 0x3FBF7BE9FEDBFDE5 // 33
283 data8 0x3FC02AB352FF25F3 // 34
284 data8 0x3FC097CE579D204C // 35
285 data8 0x3FC1178E8227E47B // 36
286 data8 0x3FC185747DBECF33 // 37
287 data8 0x3FC1F3B925F25D41 // 38
288 data8 0x3FC2625D1E6DDF56 // 39
289 data8 0x3FC2D1610C868139 // 40
290 data8 0x3FC340C59741142E // 41
291 data8 0x3FC3B08B6757F2A9 // 42
292 data8 0x3FC40DFB08378003 // 43
293 data8 0x3FC47E74E8CA5F7C // 44
294 data8 0x3FC4EF51F6466DE4 // 45
295 data8 0x3FC56092E02BA516 // 46
296 data8 0x3FC5D23857CD74D4 // 47
297 data8 0x3FC6313A37335D76 // 48
298 data8 0x3FC6A399DABBD383 // 49
299 data8 0x3FC70337DD3CE41A // 50
300 data8 0x3FC77654128F6127 // 51
301 data8 0x3FC7E9D82A0B022D // 52
302 data8 0x3FC84A6B759F512E // 53
303 data8 0x3FC8AB47D5F5A30F // 54
304 data8 0x3FC91FE49096581B // 55
305 data8 0x3FC981634011AA75 // 56
306 data8 0x3FC9F6C407089664 // 57
307 data8 0x3FCA58E729348F43 // 58
308 data8 0x3FCABB55C31693AC // 59
309 data8 0x3FCB1E104919EFD0 // 60
310 data8 0x3FCB94EE93E367CA // 61
311 data8 0x3FCBF851C067555E // 62
312 data8 0x3FCC5C0254BF23A5 // 63
313 data8 0x3FCCC000C9DB3C52 // 64
314 data8 0x3FCD244D99C85673 // 65
315 data8 0x3FCD88E93FB2F450 // 66
316 data8 0x3FCDEDD437EAEF00 // 67
317 data8 0x3FCE530EFFE71012 // 68
318 data8 0x3FCEB89A1648B971 // 69
319 data8 0x3FCF1E75FADF9BDE // 70
320 data8 0x3FCF84A32EAD7C35 // 71
321 data8 0x3FCFEB2233EA07CD // 72
322 data8 0x3FD028F9C7035C1C // 73
323 data8 0x3FD05C8BE0D9635A // 74
324 data8 0x3FD085EB8F8AE797 // 75
325 data8 0x3FD0B9C8E32D1911 // 76
326 data8 0x3FD0EDD060B78080 // 77
327 data8 0x3FD122024CF0063F // 78
328 data8 0x3FD14BE2927AECD4 // 79
329 data8 0x3FD180618EF18ADF // 80
330 data8 0x3FD1B50BBE2FC63B // 81
331 data8 0x3FD1DF4CC7CF242D // 82
332 data8 0x3FD214456D0EB8D4 // 83
333 data8 0x3FD23EC5991EBA49 // 84
334 data8 0x3FD2740D9F870AFB // 85
335 data8 0x3FD29ECDABCDFA03 // 86
336 data8 0x3FD2D46602ADCCEE // 87
337 data8 0x3FD2FF66B04EA9D4 // 88
338 data8 0x3FD335504B355A37 // 89
339 data8 0x3FD360925EC44F5C // 90
340 data8 0x3FD38BF1C3337E74 // 91
341 data8 0x3FD3C25277333183 // 92
342 data8 0x3FD3EDF463C1683E // 93
343 data8 0x3FD419B423D5E8C7 // 94
344 data8 0x3FD44591E0539F48 // 95
345 data8 0x3FD47C9175B6F0AD // 96
346 data8 0x3FD4A8B341552B09 // 97
347 data8 0x3FD4D4F39089019F // 98
348 data8 0x3FD501528DA1F967 // 99
349 data8 0x3FD52DD06347D4F6 // 100
350 data8 0x3FD55A6D3C7B8A89 // 101
351 data8 0x3FD5925D2B112A59 // 102
352 data8 0x3FD5BF406B543DB1 // 103
353 data8 0x3FD5EC433D5C35AD // 104
354 data8 0x3FD61965CDB02C1E // 105
355 data8 0x3FD646A84935B2A1 // 106
356 data8 0x3FD6740ADD31DE94 // 107
357 data8 0x3FD6A18DB74A58C5 // 108
358 data8 0x3FD6CF31058670EC // 109
359 data8 0x3FD6F180E852F0B9 // 110
360 data8 0x3FD71F5D71B894EF // 111
361 data8 0x3FD74D5AEFD66D5C // 112
362 data8 0x3FD77B79922BD37D // 113
363 data8 0x3FD7A9B9889F19E2 // 114
364 data8 0x3FD7D81B037EB6A6 // 115
365 data8 0x3FD8069E33827230 // 116
366 data8 0x3FD82996D3EF8BCA // 117
367 data8 0x3FD85855776DCBFA // 118
368 data8 0x3FD8873658327CCE // 119
369 data8 0x3FD8AA75973AB8CE // 120
370 data8 0x3FD8D992DC8824E4 // 121
371 data8 0x3FD908D2EA7D9511 // 122
372 data8 0x3FD92C59E79C0E56 // 123
373 data8 0x3FD95BD750EE3ED2 // 124
374 data8 0x3FD98B7811A3EE5B // 125
375 data8 0x3FD9AF47F33D406B // 126
376 data8 0x3FD9DF270C1914A7 // 127
377 data8 0x3FDA0325ED14FDA4 // 128
378 data8 0x3FDA33440224FA78 // 129
379 data8 0x3FDA57725E80C382 // 130
380 data8 0x3FDA87D0165DD199 // 131
381 data8 0x3FDAAC2E6C03F895 // 132
382 data8 0x3FDADCCC6FDF6A81 // 133
383 data8 0x3FDB015B3EB1E790 // 134
384 data8 0x3FDB323A3A635948 // 135
385 data8 0x3FDB56FA04462909 // 136
386 data8 0x3FDB881AA659BC93 // 137
387 data8 0x3FDBAD0BEF3DB164 // 138
388 data8 0x3FDBD21297781C2F // 139
389 data8 0x3FDC039236F08818 // 140
390 data8 0x3FDC28CB1E4D32FC // 141
391 data8 0x3FDC4E19B84723C1 // 142
392 data8 0x3FDC7FF9C74554C9 // 143
393 data8 0x3FDCA57B64E9DB05 // 144
394 data8 0x3FDCCB130A5CEBAF // 145
395 data8 0x3FDCF0C0D18F326F // 146
396 data8 0x3FDD232075B5A201 // 147
397 data8 0x3FDD490246DEFA6B // 148
398 data8 0x3FDD6EFA918D25CD // 149
399 data8 0x3FDD9509707AE52F // 150
400 data8 0x3FDDBB2EFE92C554 // 151
401 data8 0x3FDDEE2F3445E4AE // 152
402 data8 0x3FDE148A1A2726CD // 153
403 data8 0x3FDE3AFC0A49FF3F // 154
404 data8 0x3FDE6185206D516D // 155
405 data8 0x3FDE882578823D51 // 156
406 data8 0x3FDEAEDD2EAC990C // 157
407 data8 0x3FDED5AC5F436BE2 // 158
408 data8 0x3FDEFC9326D16AB8 // 159
409 data8 0x3FDF2391A21575FF // 160
410 data8 0x3FDF4AA7EE03192C // 161
411 data8 0x3FDF71D627C30BB0 // 162
412 data8 0x3FDF991C6CB3B379 // 163
413 data8 0x3FDFC07ADA69A90F // 164
414 data8 0x3FDFE7F18EB03D3E // 165
415 data8 0x3FE007C053C5002E // 166
416 data8 0x3FE01B942198A5A0 // 167
417 data8 0x3FE02F74400C64EA // 168
418 data8 0x3FE04360BE7603AC // 169
419 data8 0x3FE05759AC47FE33 // 170
420 data8 0x3FE06B5F1911CF51 // 171
421 data8 0x3FE078BF0533C568 // 172
422 data8 0x3FE08CD9687E7B0E // 173
423 data8 0x3FE0A10074CF9019 // 174
424 data8 0x3FE0B5343A234476 // 175
425 data8 0x3FE0C974C89431CD // 176
426 data8 0x3FE0DDC2305B9886 // 177
427 data8 0x3FE0EB524BAFC918 // 178
428 data8 0x3FE0FFB54213A475 // 179
429 data8 0x3FE114253DA97D9F // 180
430 data8 0x3FE128A24F1D9AFF // 181
431 data8 0x3FE1365252BF0864 // 182
432 data8 0x3FE14AE558B4A92D // 183
433 data8 0x3FE15F85A19C765B // 184
434 data8 0x3FE16D4D38C119FA // 185
435 data8 0x3FE18203C20DD133 // 186
436 data8 0x3FE196C7BC4B1F3A // 187
437 data8 0x3FE1A4A738B7A33C // 188
438 data8 0x3FE1B981C0C9653C // 189
439 data8 0x3FE1CE69E8BB106A // 190
440 data8 0x3FE1DC619DE06944 // 191
441 data8 0x3FE1F160A2AD0DA3 // 192
442 data8 0x3FE2066D7740737E // 193
443 data8 0x3FE2147DBA47A393 // 194
444 data8 0x3FE229A1BC5EBAC3 // 195
445 data8 0x3FE237C1841A502E // 196
446 data8 0x3FE24CFCE6F80D9A // 197
447 data8 0x3FE25B2C55CD5762 // 198
448 data8 0x3FE2707F4D5F7C40 // 199
449 data8 0x3FE285E0842CA383 // 200
450 data8 0x3FE294294708B773 // 201
451 data8 0x3FE2A9A2670AFF0C // 202
452 data8 0x3FE2B7FB2C8D1CC0 // 203
453 data8 0x3FE2C65A6395F5F5 // 204
454 data8 0x3FE2DBF557B0DF42 // 205
455 data8 0x3FE2EA64C3F97654 // 206
456 data8 0x3FE3001823684D73 // 207
457 data8 0x3FE30E97E9A8B5CC // 208
458 data8 0x3FE32463EBDD34E9 // 209
459 data8 0x3FE332F4314AD795 // 210
460 data8 0x3FE348D90E7464CF // 211
461 data8 0x3FE35779F8C43D6D // 212
462 data8 0x3FE36621961A6A99 // 213
463 data8 0x3FE37C299F3C366A // 214
464 data8 0x3FE38AE2171976E7 // 215
465 data8 0x3FE399A157A603E7 // 216
466 data8 0x3FE3AFCCFE77B9D1 // 217
467 data8 0x3FE3BE9D503533B5 // 218
468 data8 0x3FE3CD7480B4A8A2 // 219
469 data8 0x3FE3E3C43918F76C // 220
470 data8 0x3FE3F2ACB27ED6C6 // 221
471 data8 0x3FE4019C2125CA93 // 222
472 data8 0x3FE4181061389722 // 223
473 data8 0x3FE42711518DF545 // 224
474 data8 0x3FE436194E12B6BF // 225
475 data8 0x3FE445285D68EA69 // 226
476 data8 0x3FE45BCC464C893A // 227
477 data8 0x3FE46AED21F117FC // 228
478 data8 0x3FE47A1527E8A2D3 // 229
479 data8 0x3FE489445EFFFCCB // 230
480 data8 0x3FE4A018BCB69835 // 231
481 data8 0x3FE4AF5A0C9D65D7 // 232
482 data8 0x3FE4BEA2A5BDBE87 // 233
483 data8 0x3FE4CDF28F10AC46 // 234
484 data8 0x3FE4DD49CF994058 // 235
485 data8 0x3FE4ECA86E64A683 // 236
486 data8 0x3FE503C43CD8EB68 // 237
487 data8 0x3FE513356667FC57 // 238
488 data8 0x3FE522AE0738A3D7 // 239
489 data8 0x3FE5322E26867857 // 240
490 data8 0x3FE541B5CB979809 // 241
491 data8 0x3FE55144FDBCBD62 // 242
492 data8 0x3FE560DBC45153C6 // 243
493 data8 0x3FE5707A26BB8C66 // 244
494 data8 0x3FE587F60ED5B8FF // 245
495 data8 0x3FE597A7977C8F31 // 246
496 data8 0x3FE5A760D634BB8A // 247
497 data8 0x3FE5B721D295F10E // 248
498 data8 0x3FE5C6EA94431EF9 // 249
499 data8 0x3FE5D6BB22EA86F5 // 250
500 data8 0x3FE5E6938645D38F // 251
501 data8 0x3FE5F673C61A2ED1 // 252
502 data8 0x3FE6065BEA385926 // 253
503 data8 0x3FE6164BFA7CC06B // 254
504 data8 0x3FE62643FECF9742 // 255
506 // two parts of ln(2)
507 data8 0x3FE62E42FEF00000,0x3DD473DE6AF278ED
509 // lo parts of ln(1/frcpa(1+i/256)), i=0...255
510 data4 0x20E70672 // 0
511 data4 0x1F60A5D0 // 1
512 data4 0x218EABA0 // 2
513 data4 0x21403104 // 3
514 data4 0x20E9B54E // 4
515 data4 0x21EE1382 // 5
516 data4 0x226014E3 // 6
517 data4 0x2095E5C9 // 7
518 data4 0x228BA9D4 // 8
519 data4 0x22932B86 // 9
520 data4 0x22608A57 // 10
521 data4 0x220209F3 // 11
522 data4 0x212882CC // 12
523 data4 0x220D46E2 // 13
524 data4 0x21FA4C28 // 14
525 data4 0x229E5BD9 // 15
526 data4 0x228C9838 // 16
527 data4 0x2311F954 // 17
528 data4 0x221365DF // 18
529 data4 0x22BD0CB3 // 19
530 data4 0x223D4BB7 // 20
531 data4 0x22A71BBE // 21
532 data4 0x237DB2FA // 22
533 data4 0x23194C9D // 23
534 data4 0x22EC639E // 24
535 data4 0x2367E669 // 25
536 data4 0x232E1D5F // 26
537 data4 0x234A639B // 27
538 data4 0x2365C0E0 // 28
539 data4 0x234646C1 // 29
540 data4 0x220CBF9C // 30
541 data4 0x22A00FD4 // 31
542 data4 0x2306A3F2 // 32
543 data4 0x23745A9B // 33
544 data4 0x2398D756 // 34
545 data4 0x23DD0B6A // 35
546 data4 0x23DE338B // 36
547 data4 0x23A222DF // 37
548 data4 0x223164F8 // 38
549 data4 0x23B4E87B // 39
550 data4 0x23D6CCB8 // 40
551 data4 0x220C2099 // 41
552 data4 0x21B86B67 // 42
553 data4 0x236D14F1 // 43
554 data4 0x225A923F // 44
555 data4 0x22748723 // 45
556 data4 0x22200D13 // 46
557 data4 0x23C296EA // 47
558 data4 0x2302AC38 // 48
559 data4 0x234B1996 // 49
560 data4 0x2385E298 // 50
561 data4 0x23175BE5 // 51
562 data4 0x2193F482 // 52
563 data4 0x23BFEA90 // 53
564 data4 0x23D70A0C // 54
565 data4 0x231CF30A // 55
566 data4 0x235D9E90 // 56
567 data4 0x221AD0CB // 57
568 data4 0x22FAA08B // 58
569 data4 0x23D29A87 // 59
570 data4 0x20C4B2FE // 60
571 data4 0x2381B8B7 // 61
572 data4 0x23F8D9FC // 62
573 data4 0x23EAAE7B // 63
574 data4 0x2329E8AA // 64
575 data4 0x23EC0322 // 65
576 data4 0x2357FDCB // 66
577 data4 0x2392A9AD // 67
578 data4 0x22113B02 // 68
579 data4 0x22DEE901 // 69
580 data4 0x236A6D14 // 70
581 data4 0x2371D33E // 71
582 data4 0x2146F005 // 72
583 data4 0x23230B06 // 73
584 data4 0x22F1C77D // 74
585 data4 0x23A89FA3 // 75
586 data4 0x231D1241 // 76
587 data4 0x244DA96C // 77
588 data4 0x23ECBB7D // 78
589 data4 0x223E42B4 // 79
590 data4 0x23801BC9 // 80
591 data4 0x23573263 // 81
592 data4 0x227C1158 // 82
593 data4 0x237BD749 // 83
594 data4 0x21DDBAE9 // 84
595 data4 0x23401735 // 85
596 data4 0x241D9DEE // 86
597 data4 0x23BC88CB // 87
598 data4 0x2396D5F1 // 88
599 data4 0x23FC89CF // 89
600 data4 0x2414F9A2 // 90
601 data4 0x2474A0F5 // 91
602 data4 0x24354B60 // 92
603 data4 0x23C1EB40 // 93
604 data4 0x2306DD92 // 94
605 data4 0x24353B6B // 95
606 data4 0x23CD1701 // 96
607 data4 0x237C7A1C // 97
608 data4 0x245793AA // 98
609 data4 0x24563695 // 99
610 data4 0x23C51467 // 100
611 data4 0x24476B68 // 101
612 data4 0x212585A9 // 102
613 data4 0x247B8293 // 103
614 data4 0x2446848A // 104
615 data4 0x246A53F8 // 105
616 data4 0x246E496D // 106
617 data4 0x23ED1D36 // 107
618 data4 0x2314C258 // 108
619 data4 0x233244A7 // 109
620 data4 0x245B7AF0 // 110
621 data4 0x24247130 // 111
622 data4 0x22D67B38 // 112
623 data4 0x2449F620 // 113
624 data4 0x23BBC8B8 // 114
625 data4 0x237D3BA0 // 115
626 data4 0x245E8F13 // 116
627 data4 0x2435573F // 117
628 data4 0x242DE666 // 118
629 data4 0x2463BC10 // 119
630 data4 0x2466587D // 120
631 data4 0x2408144B // 121
632 data4 0x2405F0E5 // 122
633 data4 0x22381CFF // 123
634 data4 0x24154F9B // 124
635 data4 0x23A4E96E // 125
636 data4 0x24052967 // 126
637 data4 0x2406963F // 127
638 data4 0x23F7D3CB // 128
639 data4 0x2448AFF4 // 129
640 data4 0x24657A21 // 130
641 data4 0x22FBC230 // 131
642 data4 0x243C8DEA // 132
643 data4 0x225DC4B7 // 133
644 data4 0x23496EBF // 134
645 data4 0x237C2B2B // 135
646 data4 0x23A4A5B1 // 136
647 data4 0x2394E9D1 // 137
648 data4 0x244BC950 // 138
649 data4 0x23C7448F // 139
650 data4 0x2404A1AD // 140
651 data4 0x246511D5 // 141
652 data4 0x24246526 // 142
653 data4 0x23111F57 // 143
654 data4 0x22868951 // 144
655 data4 0x243EB77F // 145
656 data4 0x239F3DFF // 146
657 data4 0x23089666 // 147
658 data4 0x23EBFA6A // 148
659 data4 0x23C51312 // 149
660 data4 0x23E1DD5E // 150
661 data4 0x232C0944 // 151
662 data4 0x246A741F // 152
663 data4 0x2414DF8D // 153
664 data4 0x247B5546 // 154
665 data4 0x2415C980 // 155
666 data4 0x24324ABD // 156
667 data4 0x234EB5E5 // 157
668 data4 0x2465E43E // 158
669 data4 0x242840D1 // 159
670 data4 0x24444057 // 160
671 data4 0x245E56F0 // 161
672 data4 0x21AE30F8 // 162
673 data4 0x23FB3283 // 163
674 data4 0x247A4D07 // 164
675 data4 0x22AE314D // 165
676 data4 0x246B7727 // 166
677 data4 0x24EAD526 // 167
678 data4 0x24B41DC9 // 168
679 data4 0x24EE8062 // 169
680 data4 0x24A0C7C4 // 170
681 data4 0x24E8DA67 // 171
682 data4 0x231120F7 // 172
683 data4 0x24401FFB // 173
684 data4 0x2412DD09 // 174
685 data4 0x248C131A // 175
686 data4 0x24C0A7CE // 176
687 data4 0x243DD4C8 // 177
688 data4 0x24457FEB // 178
689 data4 0x24DEEFBB // 179
690 data4 0x243C70AE // 180
691 data4 0x23E7A6FA // 181
692 data4 0x24C2D311 // 182
693 data4 0x23026255 // 183
694 data4 0x2437C9B9 // 184
695 data4 0x246BA847 // 185
696 data4 0x2420B448 // 186
697 data4 0x24C4CF5A // 187
698 data4 0x242C4981 // 188
699 data4 0x24DE1525 // 189
700 data4 0x24F5CC33 // 190
701 data4 0x235A85DA // 191
702 data4 0x24A0B64F // 192
703 data4 0x244BA0A4 // 193
704 data4 0x24AAF30A // 194
705 data4 0x244C86F9 // 195
706 data4 0x246D5B82 // 196
707 data4 0x24529347 // 197
708 data4 0x240DD008 // 198
709 data4 0x24E98790 // 199
710 data4 0x2489B0CE // 200
711 data4 0x22BC29AC // 201
712 data4 0x23F37C7A // 202
713 data4 0x24987FE8 // 203
714 data4 0x22AFE20B // 204
715 data4 0x24C8D7C2 // 205
716 data4 0x24B28B7D // 206
717 data4 0x23B6B271 // 207
718 data4 0x24C77CB6 // 208
719 data4 0x24EF1DCA // 209
720 data4 0x24A4F0AC // 210
721 data4 0x24CF113E // 211
722 data4 0x2496BBAB // 212
723 data4 0x23C7CC8A // 213
724 data4 0x23AE3961 // 214
725 data4 0x2410A895 // 215
726 data4 0x23CE3114 // 216
727 data4 0x2308247D // 217
728 data4 0x240045E9 // 218
729 data4 0x24974F60 // 219
730 data4 0x242CB39F // 220
731 data4 0x24AB8D69 // 221
732 data4 0x23436788 // 222
733 data4 0x24305E9E // 223
734 data4 0x243E71A9 // 224
735 data4 0x23C2A6B3 // 225
736 data4 0x23FFE6CF // 226
737 data4 0x2322D801 // 227
738 data4 0x24515F21 // 228
739 data4 0x2412A0D6 // 229
740 data4 0x24E60D44 // 230
741 data4 0x240D9251 // 231
742 data4 0x247076E2 // 232
743 data4 0x229B101B // 233
744 data4 0x247B12DE // 234
745 data4 0x244B9127 // 235
746 data4 0x2499EC42 // 236
747 data4 0x21FC3963 // 237
748 data4 0x23E53266 // 238
749 data4 0x24CE102D // 239
750 data4 0x23CC45D2 // 240
751 data4 0x2333171D // 241
752 data4 0x246B3533 // 242
753 data4 0x24931129 // 243
754 data4 0x24405FFA // 244
755 data4 0x24CF464D // 245
756 data4 0x237095CD // 246
757 data4 0x24F86CBD // 247
758 data4 0x24E2D84B // 248
759 data4 0x21ACBB44 // 249
760 data4 0x24F43A8C // 250
761 data4 0x249DB931 // 251
762 data4 0x24A385EF // 252
763 data4 0x238B1279 // 253
764 data4 0x2436213E // 254
765 data4 0x24F18A3B // 255
766 LOCAL_OBJECT_END(log_data)
769 // Code
770 //==============================================================
772 .section .text
773 GLOBAL_IEEE754_ENTRY(log1p)
774 { .mfi
775       getf.exp      GR_signexp_x = f8 // if x is unorm then must recompute
776       fadd.s1       FR_Xp1 = f8, f1       // Form 1+x
777       mov           GR_05 = 0xfffe
779 { .mlx
780       addl          GR_ad_1 = @ltoff(log_data),gp
781       movl          GR_A3 = 0x3fd5555555555557 // double precision memory
782                                                // representation of A3
786 { .mfi
787       ld8           GR_ad_1 = [GR_ad_1]
788       fclass.m      p8,p0 = f8,0xb // Is x unorm?
789       mov           GR_exp_mask = 0x1ffff
791 { .mfi
792       nop.m         0
793       fnorm.s1      FR_NormX = f8              // Normalize x
794       mov           GR_exp_bias = 0xffff
798 { .mfi
799       setf.exp      FR_A2 = GR_05 // create A2 = 0.5
800       fclass.m      p9,p0 = f8,0x1E1 // is x NaN, NaT or +Inf?
801       nop.i         0
803 { .mib
804       setf.d        FR_A3 = GR_A3 // create A3
805       add           GR_ad_2 = 16,GR_ad_1 // address of A5,A4
806 (p8)  br.cond.spnt  log1p_unorm          // Branch if x=unorm
810 log1p_common:
811 { .mfi
812       nop.m         0
813       frcpa.s1      FR_RcpX,p0 = f1,FR_Xp1
814       nop.i         0
816 { .mfb
817       nop.m         0
818 (p9)  fma.d.s0      f8 = f8,f1,f0 // set V-flag
819 (p9)  br.ret.spnt   b0 // exit for NaN, NaT and +Inf
823 { .mfi
824       getf.exp      GR_Exp = FR_Xp1            // signexp of x+1
825       fclass.m      p10,p0 = FR_Xp1,0x3A // is 1+x < 0?
826       and           GR_exp_x = GR_exp_mask, GR_signexp_x // biased exponent of x
828 { .mfi
829       ldfpd         FR_A7,FR_A6 = [GR_ad_1]
830       nop.f         0
831       nop.i         0
835 { .mfi
836       getf.sig      GR_Sig = FR_Xp1 // get significand to calculate index
837                                     // for Thi,Tlo if |x| >= 2^-8
838       fcmp.eq.s1    p12,p0 = f8,f0     // is x equal to 0?
839       sub           GR_exp_x = GR_exp_x, GR_exp_bias // true exponent of x
843 { .mfi
844       sub           GR_N = GR_Exp,GR_exp_bias // true exponent of x+1
845       fcmp.eq.s1    p11,p0 = FR_Xp1,f0     // is x = -1?
846       cmp.gt        p6,p7 = -8, GR_exp_x  // Is |x| < 2^-8
848 { .mfb
849       ldfpd         FR_A5,FR_A4 = [GR_ad_2],16
850       nop.f         0
851 (p10) br.cond.spnt  log1p_lt_minus_1   // jump if x < -1
855 // p6 is true if |x| < 1/256
856 // p7 is true if |x| >= 1/256
857 .pred.rel "mutex",p6,p7
858 { .mfi
859 (p7)  add           GR_ad_1 = 0x820,GR_ad_1 // address of log(2) parts
860 (p6)  fms.s1        FR_r = f8,f1,f0 // range reduction for |x|<1/256
861 (p6)  cmp.gt.unc    p10,p0 = -80, GR_exp_x  // Is |x| < 2^-80
863 { .mfb
864 (p7)  setf.sig      FR_N = GR_N // copy unbiased exponent of x to the
865                                 // significand field of FR_N
866 (p7)  fms.s1        FR_r = FR_RcpX,FR_Xp1,f1 // range reduction for |x|>=1/256
867 (p12) br.ret.spnt   b0 // exit for x=0, return x
871 { .mib
872 (p7)  ldfpd         FR_Ln2hi,FR_Ln2lo = [GR_ad_1],16
873 (p7)  extr.u        GR_Ind = GR_Sig,55,8 // get bits from 55 to 62 as index
874 (p11) br.cond.spnt  log1p_eq_minus_1 // jump if x = -1
878 { .mmf
879 (p7)  shladd        GR_ad_2 = GR_Ind,3,GR_ad_2 // address of Thi
880 (p7)  shladd        GR_ad_1 = GR_Ind,2,GR_ad_1 // address of Tlo
881 (p10) fnma.d.s0     f8 = f8,f8,f8   // If |x| very small, result=x-x*x
885 { .mmb
886 (p7)  ldfd          FR_Thi = [GR_ad_2]
887 (p7)  ldfs          FR_Tlo = [GR_ad_1]
888 (p10) br.ret.spnt   b0                   // Exit if |x| < 2^(-80)
892 { .mfi
893       nop.m         0
894       fma.s1        FR_r2 = FR_r,FR_r,f0 // r^2
895       nop.i         0
897 { .mfi
898       nop.m         0
899       fms.s1        FR_A2 = FR_A3,FR_r,FR_A2 // A3*r+A2
900       nop.i         0
904 { .mfi
905       nop.m         0
906       fma.s1        FR_A6 = FR_A7,FR_r,FR_A6 // A7*r+A6
907       nop.i         0
909 { .mfi
910       nop.m         0
911       fma.s1        FR_A4 = FR_A5,FR_r,FR_A4 // A5*r+A4
912       nop.i         0
916 { .mfi
917       nop.m         0
918 (p7)  fcvt.xf       FR_N = FR_N
919       nop.i         0
923 { .mfi
924       nop.m         0
925       fma.s1        FR_r4 = FR_r2,FR_r2,f0 // r^4
926       nop.i         0
928 { .mfi
929       nop.m         0
930       // (A3*r+A2)*r^2+r
931       fma.s1        FR_A2 = FR_A2,FR_r2,FR_r
932       nop.i         0
936 { .mfi
937       nop.m         0
938       // (A7*r+A6)*r^2+(A5*r+A4)
939       fma.s1        FR_A4 = FR_A6,FR_r2,FR_A4
940       nop.i         0
944 { .mfi
945       nop.m         0
946       // N*Ln2hi+Thi
947 (p7)  fma.s1        FR_NxLn2hipThi = FR_N,FR_Ln2hi,FR_Thi
948       nop.i         0
950 { .mfi
951       nop.m         0
952       // N*Ln2lo+Tlo
953 (p7)  fma.s1        FR_NxLn2lopTlo = FR_N,FR_Ln2lo,FR_Tlo
954       nop.i         0
958 { .mfi
959       nop.m         0
960 (p7)  fma.s1        f8 = FR_A4,FR_r4,FR_A2 // P(r) if |x| >= 1/256
961       nop.i         0
963 { .mfi
964       nop.m         0
965       // (N*Ln2hi+Thi) + (N*Ln2lo+Tlo)
966 (p7)  fma.s1        FR_NxLn2pT = FR_NxLn2hipThi,f1,FR_NxLn2lopTlo
967       nop.i         0
971 .pred.rel "mutex",p6,p7
972 { .mfi
973       nop.m         0
974 (p6)  fma.d.s0      f8 = FR_A4,FR_r4,FR_A2 // result if 2^(-80) <= |x| < 1/256
975       nop.i         0
977 { .mfb
978       nop.m         0
979 (p7)  fma.d.s0      f8 = f8,f1,FR_NxLn2pT  // result if |x| >= 1/256
980       br.ret.sptk   b0                     // Exit if |x| >= 2^(-80)
984 .align 32
985 log1p_unorm:
986 // Here if x=unorm
987 { .mfb
988       getf.exp      GR_signexp_x = FR_NormX // recompute biased exponent
989       nop.f         0
990       br.cond.sptk  log1p_common
994 .align 32
995 log1p_eq_minus_1:
996 // Here if x=-1
997 { .mfi
998       nop.m         0
999       fmerge.s      FR_X = f8,f8 // keep input argument for subsequent
1000                                  // call of __libm_error_support#
1001       nop.i         0
1005 { .mfi
1006       mov           GR_TAG = 140  // set libm error in case of log1p(-1).
1007       frcpa.s0      f8,p0 = f8,f0 // log1p(-1) should be equal to -INF.
1008                                       // We can get it using frcpa because it
1009                                       // sets result to the IEEE-754 mandated
1010                                       // quotient of f8/f0.
1011       nop.i         0
1013 { .mib
1014       nop.m         0
1015       nop.i         0
1016       br.cond.sptk  log_libm_err
1020 .align 32
1021 log1p_lt_minus_1:
1022 // Here if x < -1
1023 { .mfi
1024       nop.m         0
1025       fmerge.s      FR_X = f8,f8
1026       nop.i         0
1030 { .mfi
1031       mov           GR_TAG = 141  // set libm error in case of x < -1.
1032       frcpa.s0      f8,p0 = f0,f0 // log1p(x) x < -1 should be equal to NaN.
1033                                   // We can get it using frcpa because it
1034                                   // sets result to the IEEE-754 mandated
1035                                   // quotient of f0/f0 i.e. NaN.
1036       nop.i         0
1040 .align 32
1041 log_libm_err:
1042 { .mmi
1043       alloc         r32 = ar.pfs,1,4,4,0
1044       mov           GR_Parameter_TAG = GR_TAG
1045       nop.i         0
1049 GLOBAL_IEEE754_END(log1p)
1052 LOCAL_LIBM_ENTRY(__libm_error_region)
1053 .prologue
1054 { .mfi
1055         add   GR_Parameter_Y = -32,sp         // Parameter 2 value
1056         nop.f 0
1057 .save   ar.pfs,GR_SAVE_PFS
1058         mov  GR_SAVE_PFS = ar.pfs             // Save ar.pfs
1060 { .mfi
1061 .fframe 64
1062         add sp = -64,sp                       // Create new stack
1063         nop.f 0
1064         mov GR_SAVE_GP = gp                   // Save gp
1066 { .mmi
1067         stfd [GR_Parameter_Y] = FR_Y,16       // STORE Parameter 2 on stack
1068         add GR_Parameter_X = 16,sp            // Parameter 1 address
1069 .save   b0, GR_SAVE_B0
1070         mov GR_SAVE_B0 = b0                   // Save b0
1072 .body
1073 { .mib
1074         stfd [GR_Parameter_X] = FR_X          // STORE Parameter 1 on stack
1075         add   GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
1076         nop.b 0
1078 { .mib
1079         stfd [GR_Parameter_Y] = FR_RESULT     // STORE Parameter 3 on stack
1080         add   GR_Parameter_Y = -16,GR_Parameter_Y
1081         br.call.sptk b0=__libm_error_support# // Call error handling function
1083 { .mmi
1084         add   GR_Parameter_RESULT = 48,sp
1085         nop.m 0
1086         nop.i 0
1088 { .mmi
1089         ldfd  f8 = [GR_Parameter_RESULT]      // Get return result off stack
1090 .restore sp
1091         add   sp = 64,sp                      // Restore stack pointer
1092         mov   b0 = GR_SAVE_B0                 // Restore return address
1094 { .mib
1095         mov   gp = GR_SAVE_GP                 // Restore gp
1096         mov   ar.pfs = GR_SAVE_PFS            // Restore ar.pfs
1097         br.ret.sptk     b0                    // Return
1099 LOCAL_LIBM_END(__libm_error_region)
1101 .type   __libm_error_support#,@function
1102 .global __libm_error_support#