2.9
[glibc/nacl-glibc.git] / sysdeps / ia64 / fpu / libm_lgamma.S
blob6096319ba5011642c5d19e8129ddaae3ffd71eae
1 .file "libm_lgamma.s"
4 // Copyright (c) 2002 - 2005, Intel Corporation
5 // All rights reserved.
6 //
7 // Contributed 2002 by the Intel Numerics Group, Intel Corporation
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
22 // permission.
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
26 // LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
29 // EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32 // OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 // SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 // Intel Corporation is the author of this code,and requests that all
37 // problem reports or change requests be submitted to it directly at
38 // http://www.intel.com/software/products/opensource/libraries/num.htm.
40 //*********************************************************************
42 // History:
43 // 01/10/02  Initial version
44 // 01/25/02  Corrected error tag numbers
45 // 02/04/02  Added support of SIGN(GAMMA(x)) calculation
46 // 05/20/02  Cleaned up namespace and sf0 syntax
47 // 09/15/02  Fixed bug on the branch lgamma_negrecursion
48 // 10/21/02  Now it returns SIGN(GAMMA(x))=-1 for negative zero
49 // 02/10/03  Reordered header: .section, .global, .proc, .align
50 // 07/22/03  Reformatted some data tables
51 // 03/31/05  Reformatted delimiters between data tables
53 //*********************************************************************
55 //*********************************************************************
57 // Function: __libm_lgamma(double x, int* signgam, int szsigngam)
58 // computes the principle value of the logarithm of the GAMMA function
59 // of x. Signum of GAMMA(x) is stored to memory starting at the address
60 // specified by the signgam.
62 //*********************************************************************
64 // Resources Used:
66 //    Floating-Point Registers: f6-f15
67 //                              f32-f122
69 //    General Purpose Registers:
70 //      r8-r11
71 //      r14-r31
72 //      r32-r36
73 //      r37-r40 (Used to pass arguments to error handling routine)
75 //    Predicate Registers:      p6-p15
77 //*********************************************************************
79 // IEEE Special Conditions:
81 //    __libm_lgamma(+inf) = +inf
82 //    __libm_lgamma(-inf) = QNaN
83 //    __libm_lgamma(+/-0) = +inf
84 //    __libm_lgamma(x<0, x - integer) = +inf
85 //    __libm_lgamma(SNaN) = QNaN
86 //    __libm_lgamma(QNaN) = QNaN
88 //*********************************************************************
90 // Overview
92 // The method consists of three cases.
94 // If      512 <= x < OVERFLOW_BOUNDARY   use case lgamma_pstirling;
95 // else if 1 < x < 512                    use case lgamma_regular;
96 // else if -17 < x < 1                    use case lgamma_negrecursion;
97 // else if -512 <  x < -17                use case lgamma_negpoly;
98 // else if x < -512                       use case lgamma_negstirling;
99 // else if x is close to negative
100 //         roots of ln(GAMMA(x))          use case lgamma_negroots;
103 // Case 512 <= x < OVERFLOW_BOUNDARY
104 // ---------------------------------
105 //   Here we use algorithm based on the Stirling formula:
106 //   ln(GAMMA(x)) = ln(sqrt(2*Pi)) + (x-0.5)ln(x) - x + (W2 + W4/x^2)/x
108 // Case 1 < x < 512
109 // ----------------
110 //   To calculate GAMMA(x) on this interval we use polynomial approximation
111 //   on following intervals [0.875; 1.25), [1.25; 1.75), [1.75, 2.25),
112 //   [2.25; 4), [2^i; 2^(i+1)), i=2..8
114 //   Following variants of approximation and argument reduction are used:
115 //    1. [0.875; 1.25)
116 //       ln(GAMMA(x)) ~ (x-1.0)*P17(x-1.0)
118 //    2. [1.25; 1.75)
119 //       ln(GAMMA(x)) ~ (x-LocalMinimun)*P17(x-LocalMinimun)
121 //    3. [1.75, 2.25)
122 //       ln(GAMMA(x)) ~ (x-2.0)*P17(x-2.0)
124 //    4. [2.25; 4)
125 //       ln(GAMMA(x)) ~ P22(x)
127 //    5. [2^i; 2^(i+1)), i=2..8
128 //       ln(GAMMA(x)) ~ P22((x-2^i)/2^i)
130 // Case -17 < x < 1
131 // ----------------
132 //   Here we use the recursive formula:
133 //   ln(GAMMA(x)) = ln(GAMMA(x+1)) - ln(x)
135 //   Using this formula we reduce argument to base interval [1.0; 2.0]
137 // Case -512 <  x < -17
138 // --------------------
139 //   Here we use the formula:
140 //   ln(GAMMA(-x)) = ln(Pi/(x*GAMMA(x)*sin(Pi*x))) =
141 //   = -ln(x) - ln((GAMMA(x)) - ln(sin(Pi*r)/(Pi*r)) - ln(|r|)
142 //   where r = x - rounded_to_nearest(x), i.e |r| <= 0.5 and
143 //   ln(sin(Pi*r)/(Pi*r)) is approximated by 14-degree polynomial of r^2
146 // Case x < -512
147 // -------------
148 //   Here we use algorithm based on the Stirling formula:
149 //   ln(GAMMA(-x)) = -ln(sqrt(2*Pi)) + (-x-0.5)ln(x) + x - (W2 + W4/x^2)/x -
150 //   - ln(sin(Pi*r)/(Pi*r)) - ln(|r|)
151 //   where r = x - rounded_to_nearest(x).
153 // Neighbourhoods of negative roots
154 // --------------------------------
155 //   Here we use polynomial approximation
156 //   ln(GAMMA(x-x0)) = ln(GAMMA(x0)) + (x-x0)*P14(x-x0),
157 //   where x0 is a root of ln(GAMMA(x)) rounded to nearest double
158 //   precision number.
161 //*********************************************************************
163 FR_X                   = f10
164 FR_Y                   = f1 // __libm_lgamma is single argument function
165 FR_RESULT              = f8
167 FR_B11                 = f6
168 FR_B10                 = f7
170 FR_int_N               = f9
171 FR_N                   = f10
172 FR_P5                  = f11
173 FR_P4                  = f12
174 FR_P3                  = f13
175 FR_P2                  = f14
176 FR_NormX               = f15
178 FR_Ln2                 = f32
179 FR_C01                 = f33
180 FR_A17                 = f33
181 FR_C00                 = f34
182 FR_Xp2                 = f34
183 FR_A00                 = f34
184 FR_A16                 = f34
185 FR_C11                 = f35
186 FR_A15                 = f35
187 FR_C10                 = f36
188 FR_Xp3                 = f36
189 FR_A14                 = f36
190 FR_B1                  = f36
191 FR_C21                 = f37
192 FR_A13                 = f37
193 FR_PR01                = f37
194 FR_C20                 = f38
195 FR_Xp6                 = f38
196 FR_A12                 = f38
197 FR_C31                 = f39
198 FR_Xp7                 = f39
199 FR_B0                  = f39
200 FR_A11                 = f39
201 FR_C30                 = f40
202 FR_Xp8                 = f40
203 FR_A10                 = f40
204 FR_PR00                = f40
205 FR_C41                 = f41
206 FR_Xp9                 = f41
207 FR_A9                  = f41
208 FR_PR11                = f41
209 FR_C40                 = f42
210 FR_A8                  = f42
211 FR_C51                 = f43
212 FR_Xp11                = f43
213 FR_A7                  = f43
214 FR_C50                 = f44
215 FR_C                   = f44
216 FR_Xp12                = f44
217 FR_A6                  = f44
218 FR_Xm2                 = f45
219 FR_Xp13                = f45
220 FR_A5                  = f45
221 FR_PR10                = f45
222 FR_C61                 = f46
223 FR_Xp14                = f46
224 FR_A4                  = f46
225 FR_PR21                = f46
226 FR_C60                 = f47
227 FR_Xp15                = f47
228 FR_A3                  = f47
229 FR_PR20                = f47
230 FR_C71                 = f48
231 FR_Xp16                = f48
232 FR_A2                  = f48
233 FR_PR31                = f48
234 FR_C70                 = f49
235 FR_Xp17                = f49
236 FR_A1                  = f49
237 FR_PR30                = f49
238 FR_C81                 = f50
239 FR_B17                 = f50
240 FR_A0                  = f50
241 FR_C80                 = f51
242 FR_B16                 = f51
243 FR_C91                 = f52
244 FR_B15                 = f52
245 FR_C90                 = f53
246 FR_B14                 = f53
247 FR_CA1                 = f54
248 FR_B13                 = f54
249 FR_CA0                 = f55
250 FR_B12                 = f55
251 FR_CN                  = f56
252 FR_Qlo                 = f56
253 FR_PRN                 = f56
254 FR_B7                  = f57
255 FR_B6                  = f58
256 FR_Qhi                 = f59
257 FR_x                   = f60
258 FR_x2                  = f61
259 FR_TpNxLn2             = f62
260 FR_W2                  = f63
261 FR_x4                  = f64
262 FR_r4                  = f64
263 FR_x8                  = f65
264 FR_r8                  = f65
265 FR_r05                 = f66
266 FR_Xm05                = f66
267 FR_B5                  = f66
268 FR_LnSqrt2Pi           = f67
269 FR_B4                  = f67
270 FR_InvX                = f68
271 FR_B3                  = f68
272 FR_InvX2               = f69
273 FR_B2                  = f69
274 FR_W4                  = f70
275 FR_OvfBound            = f71
276 FR_05                  = f72
277 FR_LocalMin            = f73
278 FR_tmp                 = f73
279 FR_LnX                 = f74
280 FR_Xf                  = f75
281 FR_InvXf               = f76
282 FR_rf                  = f77
283 FR_rf2                 = f78
284 FR_P54f                = f79
285 FR_P32f                = f80
286 FR_rf3                 = f81
287 FR_P10f                = f82
288 FR_TpNxLn2f            = f83
289 FR_Nf                  = f84
290 FR_LnXf                = f85
291 FR_int_Nf              = f86
292 FR_Tf                  = f87
293 FR_Xf2                 = f88
294 FR_Xp10                = f89
295 FR_w3                  = f90
296 FR_S28                 = f90
297 FR_w2                  = f91
298 FR_S26                 = f91
299 FR_w6                  = f92
300 FR_S24                 = f92
301 FR_w4                  = f93
302 FR_S22                 = f93
303 FR_w                   = f94
304 FR_S20                 = f94
305 FR_Q8                  = f95
306 FR_S18                 = f95
307 FR_Q7                  = f96
308 FR_S16                 = f96
309 FR_Q4                  = f97
310 FR_S14                 = f97
311 FR_Q3                  = f98
312 FR_S12                 = f98
313 FR_Q6                  = f99
314 FR_S10                 = f99
315 FR_Q5                  = f100
316 FR_S8                  = f100
317 FR_Q2                  = f101
318 FR_S6                  = f101
319 FR_Root                = f101
320 FR_S4                  = f102
321 FR_Q1                  = f102
322 FR_S2                  = f103
323 FR_Xp1                 = f104
324 FR_Xf4                 = f105
325 FR_Xf8                 = f106
326 FR_Xfr                 = f107
327 FR_Xf6                 = f108
328 FR_Ntrunc              = f109
329 FR_B9                  = f110
330 FR_2                   = f110
331 FR_B8                  = f111
332 FR_3                   = f111
333 FR_5                   = f112
334 FR_Xp4                 = f113
335 FR_Xp5                 = f114
336 FR_P54                 = f115
337 FR_P32                 = f116
338 FR_P10                 = f117
339 FR_r                   = f118
340 FR_r2                  = f119
341 FR_r3                  = f120
342 FR_T                   = f121
343 FR_int_Ntrunc          = f122
345 //===================================
347 GR_TAG                 = r8
348 GR_ExpMask             = r8
349 GR_ExpBias             = r9
350 GR_ad_Roots            = r9
351 GR_Expf                = r10
352 GR_Arg                 = r10
353 GR_SignExp             = r11
354 GR_ArgXfr              = r11
356 GR_Exp                 = r14
357 GR_Arg125              = r14
358 GR_RootInd             = r14
359 GR_ArgAsIs             = r15
360 GR_Arg175              = r15
361 GR_Sig                 = r16
362 GR_Ind                 = r17
363 GR_ad_Dx               = r17
364 GR_ad_1                = r18
365 GR_SignExp_w           = r19
366 GR_2_25                = r19
367 GR_Arg025              = r19
368 GR_Arg15               = r19
369 GR_Arg17               = r19
370 GR_Exp_w               = r19//21
371 GR_ad_2                = r20
372 GR_2xDx                = r21
373 GR_SignOfGamma         = r21
374 GR_fff9                = r22
375 GR_Offs                = r22
376 GR_ad_Co7              = r23
377 GR_Arg075              = r23
378 GR_Arg0875             = r23
379 GR_ad_T                = r24
380 GR_ad_Root             = r24
381 GR_Ind                 = r24
382 GR_ad_Co               = r25
383 GR_ad_Ce               = r26
384 GR_ad_Ce7              = r27
385 GR_Arg05               = r27
386 GR_Offs7               = r28
387 GR_ArgXfrAsIs          = r28
388 GR_ExpOf2              = r29
389 GR_ad_LnT              = r29
390 GR_Dx                  = r29
391 GR_ExpOf256            = r30
392 GR_0x30033             = r30
393 GR_Root                = r30
394 GR_PseudoRoot          = r30
395 GR_ad_Data             = r31
396 GR_ad_SignGam          = r31
399 GR_SAVE_B0             = r33
400 GR_SAVE_PFS            = r34
401 GR_SAVE_GP             = r35
402 GR_SAVE_SP             = r36
404 GR_Parameter_X         = r37
405 GR_Parameter_Y         = r38
406 GR_Parameter_RESULT    = r39
407 GR_Parameter_TAG       = r40
411 // Data tables
412 //==============================================================
414 RODATA
415 .align 16
416 LOCAL_OBJECT_START(lgamma_data)
417 // polynomial approximation of ln(GAMMA(x)), 2.25 <= x < 512
418 // [2.25; 4)
419 data8 0xF888E8D7892718A2,0xC001 // C01
420 data8 0xF62F273BA12A4639,0x3FFD // C11
421 data8 0xA93AC50A37EC8D38,0xBFFC // C21
422 data8 0xB4CC43D2C161E057,0xBFFF // C31
423 data8 0xC6AC672F0C1392C7,0xC000 // C41
424 data8 0xA292B9AE3276942E,0xC001 // C51
425 data8 0xE554E4CCCA6C7B7B,0xC001 // C61
426 data8 0x92F0F55FBC87F860,0xC002 // C71
427 data8 0xAF60D0112843F6C1,0xC002 // C81
428 data8 0xC5956500FA3D92E7,0xC002 // C91
429 data8 0xD3B22CCBD8587750,0xC002 // CA1
430 data8 0xD888B6CF34159B54,0x4001 // C00
431 data8 0xBCB79C8329FD9F44,0x3FFE // C10
432 data8 0xCB8896FAD69C455D,0x4000 // C20
433 data8 0xE510A424639EBF5E,0x4001 // C30
434 data8 0xC65ED41B097486B3,0x4002 // C40
435 // [4; 8)
436 data8 0x9F1F3C822D03080E,0xC001 // C01
437 data8 0x941CACFA9C0FA8A6,0xC001 // C11
438 data8 0xFE34336391D99CB7,0xC000 // C21
439 data8 0xC40BAEAA165F81A1,0xC000 // C31
440 data8 0xFE3AE166E9B4DE8F,0xBFFF // C41
441 data8 0xD744F91AF7DAF873,0xBFFE // C51
442 data8 0x87871851E9C32D02,0x3FFD // C61
443 data8 0x9C93C03C502E808F,0x3FFF // C71
444 data8 0xF78BED07501D6A8E,0x3FFF // C81
445 data8 0x92FE41BA8BEADF70,0x4000 // C91
446 data8 0xA021878E1903A2C6,0x3FFF // CA1
447 data8 0xC85EFAC379FAFEE2,0x4001 // C00
448 data8 0xC10D7AAB7CEC7FF2,0x4001 // C10
449 data8 0xB3537BDF603E454C,0x4001 // C20
450 data8 0xA0D44E3D5BBE44C4,0x4001 // C30
451 data8 0x8B9C229B6241E7B3,0x4001 // C40
452 // [8; 16)
453 data8 0xD16AB33AEC220DF6,0x3FFF // C01
454 data8 0x987483646E150BCD,0x4000 // C11
455 data8 0x80C10A24C863999B,0x4000 // C21
456 data8 0xA39A8EB6F8AACE75,0x3FFF // C31
457 data8 0x93E04A1379BEC764,0x3FFD // C41
458 data8 0xD9F59C4BD3A69BD1,0xBFFE // C51
459 data8 0x82094EC891179B1A,0xC000 // C61
460 data8 0xC90CFE3A24F70659,0xC000 // C71
461 data8 0x827984EA7C155184,0xC001 // C81
462 data8 0x981BFDF79D1E0D80,0xC001 // C91
463 data8 0xA37209A8B97D230D,0xC001 // CA1
464 data8 0xAA1989737D6BA66D,0x3FFE // C00
465 data8 0xDBC013A351630AF8,0x3FFF // C10
466 data8 0x8B8D47698299389D,0x4000 // C20
467 data8 0xACCDD1315DE06EB0,0x4000 // C30
468 data8 0xD3414A5AC81BBB2D,0x4000 // C40
469 // [16; 32)
470 data8 0xECB2B0BE75C5F995,0x3FFF // C01
471 data8 0x9DD28BD6DBC96500,0x4000 // C11
472 data8 0x8521431B99C6244F,0x4000 // C21
473 data8 0xA95F92612B8413C3,0x3FFF // C31
474 data8 0x9C76E643B22D9544,0x3FFD // C41
475 data8 0xDD90EA99417C8038,0xBFFE // C51
476 data8 0x84EA6B6D32E5F906,0xC000 // C61
477 data8 0xCDBFE499E05AA622,0xC000 // C71
478 data8 0x8594A7DE35427100,0xC001 // C81
479 data8 0x9BC1CB2C10DC702F,0xC001 // C91
480 data8 0xA7602268762666B0,0xC001 // CA1
481 data8 0xDA082BCC6BDB8F7B,0x3FFE // C00
482 data8 0xEEBFE1C99322B85E,0x3FFF // C10
483 data8 0x96FED4C785361946,0x4000 // C20
484 data8 0xB9E3A7207C16B2FE,0x4000 // C30
485 data8 0xE1E8170CED48E2C7,0x4000 // C40
486 // [32; 64)
487 data8 0xFD481EB9AEDD53E7,0x3FFF // C01
488 data8 0xA216FB66AC8C53E1,0x4000 // C11
489 data8 0x885FF935787553BA,0x4000 // C21
490 data8 0xAD471CD89A313327,0x3FFF // C31
491 data8 0x9FF13FBA139D21E0,0x3FFD // C41
492 data8 0xE25E1663A6EE0266,0xBFFE // C51
493 data8 0x87BE51DD5D262FA2,0xC000 // C61
494 data8 0xD211A9D4CCE55696,0xC000 // C71
495 data8 0x885BEFC29FDED3C9,0xC001 // C81
496 data8 0x9EFA48E6367A67F6,0xC001 // C91
497 data8 0xAAD3978FC0791297,0xC001 // CA1
498 data8 0xF96D210DF37A0AEA,0x3FFE // C00
499 data8 0xFE11DC6783917C82,0x3FFF // C10
500 data8 0x9FFCD928291B7DDE,0x4000 // C20
501 data8 0xC4518F4A80E09AE1,0x4000 // C30
502 data8 0xEDDFE9E0FD297C63,0x4000 // C40
503 // [64; 128)
504 data8 0x840E2E62609B0AD3,0x4000 // C01
505 data8 0xA5275A0DD0D3DDF8,0x4000 // C11
506 data8 0x8AADC6ABFC441731,0x4000 // C21
507 data8 0xB041C6696BE90E50,0x3FFF // C31
508 data8 0xA4A8C9153F4B037E,0x3FFD // C41
509 data8 0xE3C6A461A7B86736,0xBFFE // C51
510 data8 0x89047681C6DE7673,0xC000 // C61
511 data8 0xD42DF77A480092DF,0xC000 // C71
512 data8 0x89C25D17F086FB20,0xC001 // C81
513 data8 0xA09F907D02E34EC7,0xC001 // C91
514 data8 0xAC998A9CB79805B7,0xC001 // CA1
515 data8 0x875CC9B69AE964CC,0x3FFF // C00
516 data8 0x847836BA85DD4C12,0x4000 // C10
517 data8 0xA5F3CB2B32E74936,0x4000 // C20
518 data8 0xCAE2197C96CB5A0F,0x4000 // C30
519 data8 0xF50F7EB60DE5CD09,0x4000 // C40
520 // [128; 256)
521 data8 0x87D9065DD1876926,0x4000 // C01
522 data8 0xA781C28FDAD7CC25,0x4000 // C11
523 data8 0x8C6A4FCE35A7EC8D,0x4000 // C21
524 data8 0xB27BA081728354F9,0x3FFF // C31
525 data8 0xA82FEA7124B0EB2B,0x3FFD // C41
526 data8 0xE4C996E42ECBF77A,0xBFFE // C51
527 data8 0x89F1A92C84FA538F,0xC000 // C61
528 data8 0xD5B6CFF7DB7F6070,0xC000 // C71
529 data8 0x8AC6B561FAE38B66,0xC001 // C81
530 data8 0xA1D1505C438D8F46,0xC001 // C91
531 data8 0xADE2DC1C924FEC81,0xC001 // CA1
532 data8 0x8EF6CC62A7E0EB5A,0x3FFF // C00
533 data8 0x88A2FFC0ABCB00C0,0x4000 // C10
534 data8 0xAA6EA8FCB75B065B,0x4000 // C20
535 data8 0xCFC4B82B3D5C9363,0x4000 // C30
536 data8 0xFA60FD85DE861771,0x4000 // C40
537 // [256; 512)
538 data8 0x8AAA7CE4ED5C1EFD,0x4000 // C01
539 data8 0xA9679234FB56F1E1,0x4000 // C11
540 data8 0x8DCE02287789D841,0x4000 // C21
541 data8 0xB44328EF30A8DE7E,0x3FFF // C31
542 data8 0xAB0DC564BFA1AB12,0x3FFD // C41
543 data8 0xE5882B16FCF2D3CB,0xBFFE // C51
544 data8 0x8AA7F48993006A86,0xC000 // C61
545 data8 0xD6E63752D192750D,0xC000 // C71
546 data8 0x8B90080B17853295,0xC001 // C81
547 data8 0xA2BDD4253128D1AB,0xC001 // C91
548 data8 0xAEE1A042F96B8121,0xC001 // CA1
549 data8 0x94A9C37A42E43BA7,0x3FFF // C00
550 data8 0x8BFA54E703878F5A,0x4000 // C10
551 data8 0xADFA426DDF14647B,0x4000 // C20
552 data8 0xD39C7F7B3958EAF0,0x4000 // C30
553 data8 0xFE8C3987853C01E3,0x4000 // C40
555 // [2.25; 4)
556 data8 0x943AF77763601441,0x4003 // C50
557 data8 0xC8A93F9ECB06E891,0x4003 // C60
558 data8 0xFC2E5A4AD33DE19D,0x4003 // C70
559 data8 0x9526B75B38670119,0x4004 // C80
560 data8 0xA7675879D68B587E,0x4004 // C90
561 data8 0xB31DFA672D7FB8C0,0x4004 // CA0
562 data8 0x83A27775D86F9A81,0xBFD7 // CN
563 // [4; 8)
564 data8 0xEB8049BA5E79ADA3,0x4000 // C50
565 data8 0xC20C95EA99037228,0x4000 // C60
566 data8 0x9D4A8C864053CEB8,0x4000 // C70
567 data8 0xFC7716544AB0C5C9,0x3FFF // C80
568 data8 0xC7EB985259EABA5F,0x3FFF // C90
569 data8 0xC042FB3B4C95096D,0x3FFD // CA0
570 data8 0xCC2A7F930856177B,0x3FEE // CN
571 // [8; 16)
572 data8 0xFE1903679D078C7A,0x4000 // C50
573 data8 0x957C221AB90171F1,0x4001 // C60
574 data8 0xAB2C53B2A78F4031,0x4001 // C70
575 data8 0xBE080AE6063AE387,0x4001 // C80
576 data8 0xCC019A0311605CB9,0x4001 // C90
577 data8 0xD3739D85A12C8ADF,0x4001 // CA0
578 data8 0x81FA4D2B7BD7A82D,0x3FEF // CN
579 // [16; 32)
580 data8 0x871F69E2DD221F02,0x4001 // C50
581 data8 0x9E3EF2D477442A9C,0x4001 // C60
582 data8 0xB48733582B3C82C5,0x4001 // C70
583 data8 0xC7DB9B3C25854A2A,0x4001 // C80
584 data8 0xD628B87975BE898F,0x4001 // C90
585 data8 0xDDC569C321FF119C,0x4001 // CA0
586 data8 0xB27B65560DF7ADA7,0x3FEF // CN
587 // [32; 64)
588 data8 0x8DE4127349719B22,0x4001 // C50
589 data8 0xA5C30A7760F5FBB2,0x4001 // C60
590 data8 0xBCB4096055AA2A4E,0x4001 // C70
591 data8 0xD08F5F2FB4E7B899,0x4001 // C80
592 data8 0xDF39ED39DC91F9CF,0x4001 // C90
593 data8 0xE7063E45322F072E,0x4001 // CA0
594 data8 0x85A9E11DDDDE67C8,0x3FF0 // CN
595 // [64; 128)
596 data8 0x91CA191EB80E8893,0x4001 // C50
597 data8 0xA9F1D5A55397334A,0x4001 // C60
598 data8 0xC1222710295094E3,0x4001 // C70
599 data8 0xD52FFABBA6CBE5C6,0x4001 // C80
600 data8 0xE3FD9D5282052E1D,0x4001 // C90
601 data8 0xEBDBE47BB662F3EF,0x4001 // CA0
602 data8 0xEF889F489D88FD31,0x3FF0 // CN
603 // [128; 256)
604 data8 0x94AA029C2286F8D2,0x4001 // C50
605 data8 0xAD0549E55A72389F,0x4001 // C60
606 data8 0xC4628899DAF94BA4,0x4001 // C70
607 data8 0xD89432A4161C72CB,0x4001 // C80
608 data8 0xE77ABA75E9C38F3A,0x4001 // C90
609 data8 0xEF65BFFFF71347FF,0x4001 // CA0
610 data8 0xE2627460064D918D,0x3FF1 // CN
611 // [256; 512)
612 data8 0x96E9890D722C2FC1,0x4001 // C50
613 data8 0xAF6C2236F6A1CEC4,0x4001 // C60
614 data8 0xC6EBB8C9F987D20D,0x4001 // C70
615 data8 0xDB38CEFD5EF328CC,0x4001 // C80
616 data8 0xEA3265DC66C9A0B4,0x4001 // C90
617 data8 0xF2272D6B368C70B1,0x4001 // CA0
618 data8 0xDBFF93ECEBCEF1F3,0x3FF2 // CN
620 data8 0x3FDD8B618D5AF8FE // point of local minimum on [1;2]
621 data8 0x3FE0000000000000 // 0.5
622 data8 0xBFC5555DA7212371 // P5
623 data8 0x3FC999A19EEF5826 // P4
624 data8 0xb17217f7d1cf79ac,0x3ffe // ln(2)
625 data8 0xEB3F8E4325F5A535,0x3FFE // ln(sqrt(4*arcsin(1)))
627 data8 0xBFCFFFFFFFFEF009 // P3
628 data8 0x3FD555555554ECB2 // P2
629 data8 0xBF66C16C16C16C17 // W4=B4/12=-1/360
630 data8 0x7F5754D9278B51A8 // overflow boundary (first inf result)
631 data8 0xAAAAAAAAAAAAAAAB,0x3FFB // W2=B2/2=1/12
633 data8 0x3FBC756AC654273B // Q8
634 data8 0xBFC001A42489AB4D // Q7
635 data8 0x3FC99999999A169B // Q4
636 data8 0xBFD00000000019AC // Q3
637 data8 0x3FC2492479AA0DF8 // Q6
638 data8 0xBFC5555544986F52 // Q5
639 data8 0x3FD5555555555555 // Q2
640 data8 0xBFE0000000000000 // Q1, P1 = -0.5
642 data8 0x80200aaeac44ef38,0x3ff6 // ln(1/frcpa(1+  0/2^-8))
643 data8 0xc09090a2c35aa070,0x3ff7 // ln(1/frcpa(1+  1/2^-8))
644 data8 0xa0c94fcb41977c75,0x3ff8 // ln(1/frcpa(1+  2/2^-8))
645 data8 0xe18b9c263af83301,0x3ff8 // ln(1/frcpa(1+  3/2^-8))
646 data8 0x8d35c8d6399c30ea,0x3ff9 // ln(1/frcpa(1+  4/2^-8))
647 data8 0xadd4d2ecd601cbb8,0x3ff9 // ln(1/frcpa(1+  5/2^-8))
648 data8 0xce95403a192f9f01,0x3ff9 // ln(1/frcpa(1+  6/2^-8))
649 data8 0xeb59392cbcc01096,0x3ff9 // ln(1/frcpa(1+  7/2^-8))
650 data8 0x862c7d0cefd54c5d,0x3ffa // ln(1/frcpa(1+  8/2^-8))
651 data8 0x94aa63c65e70d499,0x3ffa // ln(1/frcpa(1+  9/2^-8))
652 data8 0xa54a696d4b62b382,0x3ffa // ln(1/frcpa(1+ 10/2^-8))
653 data8 0xb3e4a796a5dac208,0x3ffa // ln(1/frcpa(1+ 11/2^-8))
654 data8 0xc28c45b1878340a9,0x3ffa // ln(1/frcpa(1+ 12/2^-8))
655 data8 0xd35c55f39d7a6235,0x3ffa // ln(1/frcpa(1+ 13/2^-8))
656 data8 0xe220f037b954f1f5,0x3ffa // ln(1/frcpa(1+ 14/2^-8))
657 data8 0xf0f3389b036834f3,0x3ffa // ln(1/frcpa(1+ 15/2^-8))
658 data8 0xffd3488d5c980465,0x3ffa // ln(1/frcpa(1+ 16/2^-8))
659 data8 0x87609ce2ed300490,0x3ffb // ln(1/frcpa(1+ 17/2^-8))
660 data8 0x8ede9321e8c85927,0x3ffb // ln(1/frcpa(1+ 18/2^-8))
661 data8 0x96639427f2f8e2f4,0x3ffb // ln(1/frcpa(1+ 19/2^-8))
662 data8 0x9defad3e8f73217b,0x3ffb // ln(1/frcpa(1+ 20/2^-8))
663 data8 0xa582ebd50097029c,0x3ffb // ln(1/frcpa(1+ 21/2^-8))
664 data8 0xac06dbe75ab80fee,0x3ffb // ln(1/frcpa(1+ 22/2^-8))
665 data8 0xb3a78449b2d3ccca,0x3ffb // ln(1/frcpa(1+ 23/2^-8))
666 data8 0xbb4f79635ab46bb2,0x3ffb // ln(1/frcpa(1+ 24/2^-8))
667 data8 0xc2fec93a83523f3f,0x3ffb // ln(1/frcpa(1+ 25/2^-8))
668 data8 0xc99af2eaca4c4571,0x3ffb // ln(1/frcpa(1+ 26/2^-8))
669 data8 0xd1581106472fa653,0x3ffb // ln(1/frcpa(1+ 27/2^-8))
670 data8 0xd8002560d4355f2e,0x3ffb // ln(1/frcpa(1+ 28/2^-8))
671 data8 0xdfcb43b4fe508632,0x3ffb // ln(1/frcpa(1+ 29/2^-8))
672 data8 0xe67f6dff709d4119,0x3ffb // ln(1/frcpa(1+ 30/2^-8))
673 data8 0xed393b1c22351280,0x3ffb // ln(1/frcpa(1+ 31/2^-8))
674 data8 0xf5192bff087bcc35,0x3ffb // ln(1/frcpa(1+ 32/2^-8))
675 data8 0xfbdf4ff6dfef2fa3,0x3ffb // ln(1/frcpa(1+ 33/2^-8))
676 data8 0x81559a97f92f9cc7,0x3ffc // ln(1/frcpa(1+ 34/2^-8))
677 data8 0x84be72bce90266e8,0x3ffc // ln(1/frcpa(1+ 35/2^-8))
678 data8 0x88bc74113f23def2,0x3ffc // ln(1/frcpa(1+ 36/2^-8))
679 data8 0x8c2ba3edf6799d11,0x3ffc // ln(1/frcpa(1+ 37/2^-8))
680 data8 0x8f9dc92f92ea08b1,0x3ffc // ln(1/frcpa(1+ 38/2^-8))
681 data8 0x9312e8f36efab5a7,0x3ffc // ln(1/frcpa(1+ 39/2^-8))
682 data8 0x968b08643409ceb6,0x3ffc // ln(1/frcpa(1+ 40/2^-8))
683 data8 0x9a062cba08a1708c,0x3ffc // ln(1/frcpa(1+ 41/2^-8))
684 data8 0x9d845b3abf95485c,0x3ffc // ln(1/frcpa(1+ 42/2^-8))
685 data8 0xa06fd841bc001bb4,0x3ffc // ln(1/frcpa(1+ 43/2^-8))
686 data8 0xa3f3a74652fbe0db,0x3ffc // ln(1/frcpa(1+ 44/2^-8))
687 data8 0xa77a8fb2336f20f5,0x3ffc // ln(1/frcpa(1+ 45/2^-8))
688 data8 0xab0497015d28b0a0,0x3ffc // ln(1/frcpa(1+ 46/2^-8))
689 data8 0xae91c2be6ba6a615,0x3ffc // ln(1/frcpa(1+ 47/2^-8))
690 data8 0xb189d1b99aebb20b,0x3ffc // ln(1/frcpa(1+ 48/2^-8))
691 data8 0xb51cced5de9c1b2c,0x3ffc // ln(1/frcpa(1+ 49/2^-8))
692 data8 0xb819bee9e720d42f,0x3ffc // ln(1/frcpa(1+ 50/2^-8))
693 data8 0xbbb2a0947b093a5d,0x3ffc // ln(1/frcpa(1+ 51/2^-8))
694 data8 0xbf4ec1505811684a,0x3ffc // ln(1/frcpa(1+ 52/2^-8))
695 data8 0xc2535bacfa8975ff,0x3ffc // ln(1/frcpa(1+ 53/2^-8))
696 data8 0xc55a3eafad187eb8,0x3ffc // ln(1/frcpa(1+ 54/2^-8))
697 data8 0xc8ff2484b2c0da74,0x3ffc // ln(1/frcpa(1+ 55/2^-8))
698 data8 0xcc0b1a008d53ab76,0x3ffc // ln(1/frcpa(1+ 56/2^-8))
699 data8 0xcfb6203844b3209b,0x3ffc // ln(1/frcpa(1+ 57/2^-8))
700 data8 0xd2c73949a47a19f5,0x3ffc // ln(1/frcpa(1+ 58/2^-8))
701 data8 0xd5daae18b49d6695,0x3ffc // ln(1/frcpa(1+ 59/2^-8))
702 data8 0xd8f08248cf7e8019,0x3ffc // ln(1/frcpa(1+ 60/2^-8))
703 data8 0xdca7749f1b3e540e,0x3ffc // ln(1/frcpa(1+ 61/2^-8))
704 data8 0xdfc28e033aaaf7c7,0x3ffc // ln(1/frcpa(1+ 62/2^-8))
705 data8 0xe2e012a5f91d2f55,0x3ffc // ln(1/frcpa(1+ 63/2^-8))
706 data8 0xe600064ed9e292a8,0x3ffc // ln(1/frcpa(1+ 64/2^-8))
707 data8 0xe9226cce42b39f60,0x3ffc // ln(1/frcpa(1+ 65/2^-8))
708 data8 0xec4749fd97a28360,0x3ffc // ln(1/frcpa(1+ 66/2^-8))
709 data8 0xef6ea1bf57780495,0x3ffc // ln(1/frcpa(1+ 67/2^-8))
710 data8 0xf29877ff38809091,0x3ffc // ln(1/frcpa(1+ 68/2^-8))
711 data8 0xf5c4d0b245cb89be,0x3ffc // ln(1/frcpa(1+ 69/2^-8))
712 data8 0xf8f3afd6fcdef3aa,0x3ffc // ln(1/frcpa(1+ 70/2^-8))
713 data8 0xfc2519756be1abc7,0x3ffc // ln(1/frcpa(1+ 71/2^-8))
714 data8 0xff59119f503e6832,0x3ffc // ln(1/frcpa(1+ 72/2^-8))
715 data8 0x8147ce381ae0e146,0x3ffd // ln(1/frcpa(1+ 73/2^-8))
716 data8 0x82e45f06cb1ad0f2,0x3ffd // ln(1/frcpa(1+ 74/2^-8))
717 data8 0x842f5c7c573cbaa2,0x3ffd // ln(1/frcpa(1+ 75/2^-8))
718 data8 0x85ce471968c8893a,0x3ffd // ln(1/frcpa(1+ 76/2^-8))
719 data8 0x876e8305bc04066d,0x3ffd // ln(1/frcpa(1+ 77/2^-8))
720 data8 0x891012678031fbb3,0x3ffd // ln(1/frcpa(1+ 78/2^-8))
721 data8 0x8a5f1493d766a05f,0x3ffd // ln(1/frcpa(1+ 79/2^-8))
722 data8 0x8c030c778c56fa00,0x3ffd // ln(1/frcpa(1+ 80/2^-8))
723 data8 0x8da85df17e31d9ae,0x3ffd // ln(1/frcpa(1+ 81/2^-8))
724 data8 0x8efa663e7921687e,0x3ffd // ln(1/frcpa(1+ 82/2^-8))
725 data8 0x90a22b6875c6a1f8,0x3ffd // ln(1/frcpa(1+ 83/2^-8))
726 data8 0x91f62cc8f5d24837,0x3ffd // ln(1/frcpa(1+ 84/2^-8))
727 data8 0x93a06cfc3857d980,0x3ffd // ln(1/frcpa(1+ 85/2^-8))
728 data8 0x94f66d5e6fd01ced,0x3ffd // ln(1/frcpa(1+ 86/2^-8))
729 data8 0x96a330156e6772f2,0x3ffd // ln(1/frcpa(1+ 87/2^-8))
730 data8 0x97fb3582754ea25b,0x3ffd // ln(1/frcpa(1+ 88/2^-8))
731 data8 0x99aa8259aad1bbf2,0x3ffd // ln(1/frcpa(1+ 89/2^-8))
732 data8 0x9b0492f6227ae4a8,0x3ffd // ln(1/frcpa(1+ 90/2^-8))
733 data8 0x9c5f8e199bf3a7a5,0x3ffd // ln(1/frcpa(1+ 91/2^-8))
734 data8 0x9e1293b9998c1daa,0x3ffd // ln(1/frcpa(1+ 92/2^-8))
735 data8 0x9f6fa31e0b41f308,0x3ffd // ln(1/frcpa(1+ 93/2^-8))
736 data8 0xa0cda11eaf46390e,0x3ffd // ln(1/frcpa(1+ 94/2^-8))
737 data8 0xa22c8f029cfa45aa,0x3ffd // ln(1/frcpa(1+ 95/2^-8))
738 data8 0xa3e48badb7856b34,0x3ffd // ln(1/frcpa(1+ 96/2^-8))
739 data8 0xa5459a0aa95849f9,0x3ffd // ln(1/frcpa(1+ 97/2^-8))
740 data8 0xa6a79c84480cfebd,0x3ffd // ln(1/frcpa(1+ 98/2^-8))
741 data8 0xa80a946d0fcb3eb2,0x3ffd // ln(1/frcpa(1+ 99/2^-8))
742 data8 0xa96e831a3ea7b314,0x3ffd // ln(1/frcpa(1+100/2^-8))
743 data8 0xaad369e3dc544e3b,0x3ffd // ln(1/frcpa(1+101/2^-8))
744 data8 0xac92e9588952c815,0x3ffd // ln(1/frcpa(1+102/2^-8))
745 data8 0xadfa035aa1ed8fdc,0x3ffd // ln(1/frcpa(1+103/2^-8))
746 data8 0xaf6219eae1ad6e34,0x3ffd // ln(1/frcpa(1+104/2^-8))
747 data8 0xb0cb2e6d8160f753,0x3ffd // ln(1/frcpa(1+105/2^-8))
748 data8 0xb2354249ad950f72,0x3ffd // ln(1/frcpa(1+106/2^-8))
749 data8 0xb3a056e98ef4a3b4,0x3ffd // ln(1/frcpa(1+107/2^-8))
750 data8 0xb50c6dba52c6292a,0x3ffd // ln(1/frcpa(1+108/2^-8))
751 data8 0xb679882c33876165,0x3ffd // ln(1/frcpa(1+109/2^-8))
752 data8 0xb78c07429785cedc,0x3ffd // ln(1/frcpa(1+110/2^-8))
753 data8 0xb8faeb8dc4a77d24,0x3ffd // ln(1/frcpa(1+111/2^-8))
754 data8 0xba6ad77eb36ae0d6,0x3ffd // ln(1/frcpa(1+112/2^-8))
755 data8 0xbbdbcc915e9bee50,0x3ffd // ln(1/frcpa(1+113/2^-8))
756 data8 0xbd4dcc44f8cf12ef,0x3ffd // ln(1/frcpa(1+114/2^-8))
757 data8 0xbec0d81bf5b531fa,0x3ffd // ln(1/frcpa(1+115/2^-8))
758 data8 0xc034f19c139186f4,0x3ffd // ln(1/frcpa(1+116/2^-8))
759 data8 0xc14cb69f7c5e55ab,0x3ffd // ln(1/frcpa(1+117/2^-8))
760 data8 0xc2c2abbb6e5fd56f,0x3ffd // ln(1/frcpa(1+118/2^-8))
761 data8 0xc439b2c193e6771e,0x3ffd // ln(1/frcpa(1+119/2^-8))
762 data8 0xc553acb9d5c67733,0x3ffd // ln(1/frcpa(1+120/2^-8))
763 data8 0xc6cc96e441272441,0x3ffd // ln(1/frcpa(1+121/2^-8))
764 data8 0xc8469753eca88c30,0x3ffd // ln(1/frcpa(1+122/2^-8))
765 data8 0xc962cf3ce072b05c,0x3ffd // ln(1/frcpa(1+123/2^-8))
766 data8 0xcadeba8771f694aa,0x3ffd // ln(1/frcpa(1+124/2^-8))
767 data8 0xcc5bc08d1f72da94,0x3ffd // ln(1/frcpa(1+125/2^-8))
768 data8 0xcd7a3f99ea035c29,0x3ffd // ln(1/frcpa(1+126/2^-8))
769 data8 0xcef93860c8a53c35,0x3ffd // ln(1/frcpa(1+127/2^-8))
770 data8 0xd0192f68a7ed23df,0x3ffd // ln(1/frcpa(1+128/2^-8))
771 data8 0xd19a201127d3c645,0x3ffd // ln(1/frcpa(1+129/2^-8))
772 data8 0xd2bb92f4061c172c,0x3ffd // ln(1/frcpa(1+130/2^-8))
773 data8 0xd43e80b2ee8cc8fc,0x3ffd // ln(1/frcpa(1+131/2^-8))
774 data8 0xd56173601fc4ade4,0x3ffd // ln(1/frcpa(1+132/2^-8))
775 data8 0xd6e6637efb54086f,0x3ffd // ln(1/frcpa(1+133/2^-8))
776 data8 0xd80ad9f58f3c8193,0x3ffd // ln(1/frcpa(1+134/2^-8))
777 data8 0xd991d1d31aca41f8,0x3ffd // ln(1/frcpa(1+135/2^-8))
778 data8 0xdab7d02231484a93,0x3ffd // ln(1/frcpa(1+136/2^-8))
779 data8 0xdc40d532cde49a54,0x3ffd // ln(1/frcpa(1+137/2^-8))
780 data8 0xdd685f79ed8b265e,0x3ffd // ln(1/frcpa(1+138/2^-8))
781 data8 0xde9094bbc0e17b1d,0x3ffd // ln(1/frcpa(1+139/2^-8))
782 data8 0xe01c91b78440c425,0x3ffd // ln(1/frcpa(1+140/2^-8))
783 data8 0xe14658f26997e729,0x3ffd // ln(1/frcpa(1+141/2^-8))
784 data8 0xe270cdc2391e0d23,0x3ffd // ln(1/frcpa(1+142/2^-8))
785 data8 0xe3ffce3a2aa64922,0x3ffd // ln(1/frcpa(1+143/2^-8))
786 data8 0xe52bdb274ed82887,0x3ffd // ln(1/frcpa(1+144/2^-8))
787 data8 0xe6589852e75d7df6,0x3ffd // ln(1/frcpa(1+145/2^-8))
788 data8 0xe786068c79937a7d,0x3ffd // ln(1/frcpa(1+146/2^-8))
789 data8 0xe91903adad100911,0x3ffd // ln(1/frcpa(1+147/2^-8))
790 data8 0xea481236f7d35bb0,0x3ffd // ln(1/frcpa(1+148/2^-8))
791 data8 0xeb77d48c692e6b14,0x3ffd // ln(1/frcpa(1+149/2^-8))
792 data8 0xeca84b83d7297b87,0x3ffd // ln(1/frcpa(1+150/2^-8))
793 data8 0xedd977f4962aa158,0x3ffd // ln(1/frcpa(1+151/2^-8))
794 data8 0xef7179a22f257754,0x3ffd // ln(1/frcpa(1+152/2^-8))
795 data8 0xf0a450d139366ca7,0x3ffd // ln(1/frcpa(1+153/2^-8))
796 data8 0xf1d7e0524ff9ffdb,0x3ffd // ln(1/frcpa(1+154/2^-8))
797 data8 0xf30c29036a8b6cae,0x3ffd // ln(1/frcpa(1+155/2^-8))
798 data8 0xf4412bc411ea8d92,0x3ffd // ln(1/frcpa(1+156/2^-8))
799 data8 0xf576e97564c8619d,0x3ffd // ln(1/frcpa(1+157/2^-8))
800 data8 0xf6ad62fa1b5f172f,0x3ffd // ln(1/frcpa(1+158/2^-8))
801 data8 0xf7e499368b55c542,0x3ffd // ln(1/frcpa(1+159/2^-8))
802 data8 0xf91c8d10abaffe22,0x3ffd // ln(1/frcpa(1+160/2^-8))
803 data8 0xfa553f7018c966f3,0x3ffd // ln(1/frcpa(1+161/2^-8))
804 data8 0xfb8eb13e185d802c,0x3ffd // ln(1/frcpa(1+162/2^-8))
805 data8 0xfcc8e3659d9bcbed,0x3ffd // ln(1/frcpa(1+163/2^-8))
806 data8 0xfe03d6d34d487fd2,0x3ffd // ln(1/frcpa(1+164/2^-8))
807 data8 0xff3f8c7581e9f0ae,0x3ffd // ln(1/frcpa(1+165/2^-8))
808 data8 0x803e029e280173ae,0x3ffe // ln(1/frcpa(1+166/2^-8))
809 data8 0x80dca10cc52d0757,0x3ffe // ln(1/frcpa(1+167/2^-8))
810 data8 0x817ba200632755a1,0x3ffe // ln(1/frcpa(1+168/2^-8))
811 data8 0x821b05f3b01d6774,0x3ffe // ln(1/frcpa(1+169/2^-8))
812 data8 0x82bacd623ff19d06,0x3ffe // ln(1/frcpa(1+170/2^-8))
813 data8 0x835af8c88e7a8f47,0x3ffe // ln(1/frcpa(1+171/2^-8))
814 data8 0x83c5f8299e2b4091,0x3ffe // ln(1/frcpa(1+172/2^-8))
815 data8 0x8466cb43f3d87300,0x3ffe // ln(1/frcpa(1+173/2^-8))
816 data8 0x850803a67c80ca4b,0x3ffe // ln(1/frcpa(1+174/2^-8))
817 data8 0x85a9a1d11a23b461,0x3ffe // ln(1/frcpa(1+175/2^-8))
818 data8 0x864ba644a18e6e05,0x3ffe // ln(1/frcpa(1+176/2^-8))
819 data8 0x86ee1182dcc432f7,0x3ffe // ln(1/frcpa(1+177/2^-8))
820 data8 0x875a925d7e48c316,0x3ffe // ln(1/frcpa(1+178/2^-8))
821 data8 0x87fdaa109d23aef7,0x3ffe // ln(1/frcpa(1+179/2^-8))
822 data8 0x88a129ed4becfaf2,0x3ffe // ln(1/frcpa(1+180/2^-8))
823 data8 0x89451278ecd7f9cf,0x3ffe // ln(1/frcpa(1+181/2^-8))
824 data8 0x89b29295f8432617,0x3ffe // ln(1/frcpa(1+182/2^-8))
825 data8 0x8a572ac5a5496882,0x3ffe // ln(1/frcpa(1+183/2^-8))
826 data8 0x8afc2d0ce3b2dadf,0x3ffe // ln(1/frcpa(1+184/2^-8))
827 data8 0x8b6a69c608cfd3af,0x3ffe // ln(1/frcpa(1+185/2^-8))
828 data8 0x8c101e106e899a83,0x3ffe // ln(1/frcpa(1+186/2^-8))
829 data8 0x8cb63de258f9d626,0x3ffe // ln(1/frcpa(1+187/2^-8))
830 data8 0x8d2539c5bd19e2b1,0x3ffe // ln(1/frcpa(1+188/2^-8))
831 data8 0x8dcc0e064b29e6f1,0x3ffe // ln(1/frcpa(1+189/2^-8))
832 data8 0x8e734f45d88357ae,0x3ffe // ln(1/frcpa(1+190/2^-8))
833 data8 0x8ee30cef034a20db,0x3ffe // ln(1/frcpa(1+191/2^-8))
834 data8 0x8f8b0515686d1d06,0x3ffe // ln(1/frcpa(1+192/2^-8))
835 data8 0x90336bba039bf32f,0x3ffe // ln(1/frcpa(1+193/2^-8))
836 data8 0x90a3edd23d1c9d58,0x3ffe // ln(1/frcpa(1+194/2^-8))
837 data8 0x914d0de2f5d61b32,0x3ffe // ln(1/frcpa(1+195/2^-8))
838 data8 0x91be0c20d28173b5,0x3ffe // ln(1/frcpa(1+196/2^-8))
839 data8 0x9267e737c06cd34a,0x3ffe // ln(1/frcpa(1+197/2^-8))
840 data8 0x92d962ae6abb1237,0x3ffe // ln(1/frcpa(1+198/2^-8))
841 data8 0x9383fa6afbe2074c,0x3ffe // ln(1/frcpa(1+199/2^-8))
842 data8 0x942f0421651c1c4e,0x3ffe // ln(1/frcpa(1+200/2^-8))
843 data8 0x94a14a3845bb985e,0x3ffe // ln(1/frcpa(1+201/2^-8))
844 data8 0x954d133857f861e7,0x3ffe // ln(1/frcpa(1+202/2^-8))
845 data8 0x95bfd96468e604c4,0x3ffe // ln(1/frcpa(1+203/2^-8))
846 data8 0x9632d31cafafa858,0x3ffe // ln(1/frcpa(1+204/2^-8))
847 data8 0x96dfaabd86fa1647,0x3ffe // ln(1/frcpa(1+205/2^-8))
848 data8 0x9753261fcbb2a594,0x3ffe // ln(1/frcpa(1+206/2^-8))
849 data8 0x9800c11b426b996d,0x3ffe // ln(1/frcpa(1+207/2^-8))
850 data8 0x9874bf4d45ae663c,0x3ffe // ln(1/frcpa(1+208/2^-8))
851 data8 0x99231f5ee9a74f79,0x3ffe // ln(1/frcpa(1+209/2^-8))
852 data8 0x9997a18a56bcad28,0x3ffe // ln(1/frcpa(1+210/2^-8))
853 data8 0x9a46c873a3267e79,0x3ffe // ln(1/frcpa(1+211/2^-8))
854 data8 0x9abbcfc621eb6cb6,0x3ffe // ln(1/frcpa(1+212/2^-8))
855 data8 0x9b310cb0d354c990,0x3ffe // ln(1/frcpa(1+213/2^-8))
856 data8 0x9be14cf9e1b3515c,0x3ffe // ln(1/frcpa(1+214/2^-8))
857 data8 0x9c5710b8cbb73a43,0x3ffe // ln(1/frcpa(1+215/2^-8))
858 data8 0x9ccd0abd301f399c,0x3ffe // ln(1/frcpa(1+216/2^-8))
859 data8 0x9d7e67f3bdce8888,0x3ffe // ln(1/frcpa(1+217/2^-8))
860 data8 0x9df4ea81a99daa01,0x3ffe // ln(1/frcpa(1+218/2^-8))
861 data8 0x9e6ba405a54514ba,0x3ffe // ln(1/frcpa(1+219/2^-8))
862 data8 0x9f1e21c8c7bb62b3,0x3ffe // ln(1/frcpa(1+220/2^-8))
863 data8 0x9f956593f6b6355c,0x3ffe // ln(1/frcpa(1+221/2^-8))
864 data8 0xa00ce1092e5498c3,0x3ffe // ln(1/frcpa(1+222/2^-8))
865 data8 0xa0c08309c4b912c1,0x3ffe // ln(1/frcpa(1+223/2^-8))
866 data8 0xa1388a8c6faa2afa,0x3ffe // ln(1/frcpa(1+224/2^-8))
867 data8 0xa1b0ca7095b5f985,0x3ffe // ln(1/frcpa(1+225/2^-8))
868 data8 0xa22942eb47534a00,0x3ffe // ln(1/frcpa(1+226/2^-8))
869 data8 0xa2de62326449d0a3,0x3ffe // ln(1/frcpa(1+227/2^-8))
870 data8 0xa357690f88bfe345,0x3ffe // ln(1/frcpa(1+228/2^-8))
871 data8 0xa3d0a93f45169a4b,0x3ffe // ln(1/frcpa(1+229/2^-8))
872 data8 0xa44a22f7ffe65f30,0x3ffe // ln(1/frcpa(1+230/2^-8))
873 data8 0xa500c5e5b4c1aa36,0x3ffe // ln(1/frcpa(1+231/2^-8))
874 data8 0xa57ad064eb2ebbc2,0x3ffe // ln(1/frcpa(1+232/2^-8))
875 data8 0xa5f5152dedf4384e,0x3ffe // ln(1/frcpa(1+233/2^-8))
876 data8 0xa66f9478856233ec,0x3ffe // ln(1/frcpa(1+234/2^-8))
877 data8 0xa6ea4e7cca02c32e,0x3ffe // ln(1/frcpa(1+235/2^-8))
878 data8 0xa765437325341ccf,0x3ffe // ln(1/frcpa(1+236/2^-8))
879 data8 0xa81e21e6c75b4020,0x3ffe // ln(1/frcpa(1+237/2^-8))
880 data8 0xa899ab333fe2b9ca,0x3ffe // ln(1/frcpa(1+238/2^-8))
881 data8 0xa9157039c51ebe71,0x3ffe // ln(1/frcpa(1+239/2^-8))
882 data8 0xa991713433c2b999,0x3ffe // ln(1/frcpa(1+240/2^-8))
883 data8 0xaa0dae5cbcc048b3,0x3ffe // ln(1/frcpa(1+241/2^-8))
884 data8 0xaa8a27ede5eb13ad,0x3ffe // ln(1/frcpa(1+242/2^-8))
885 data8 0xab06de228a9e3499,0x3ffe // ln(1/frcpa(1+243/2^-8))
886 data8 0xab83d135dc633301,0x3ffe // ln(1/frcpa(1+244/2^-8))
887 data8 0xac3fb076adc7fe7a,0x3ffe // ln(1/frcpa(1+245/2^-8))
888 data8 0xacbd3cbbe47988f1,0x3ffe // ln(1/frcpa(1+246/2^-8))
889 data8 0xad3b06b1a5dc57c3,0x3ffe // ln(1/frcpa(1+247/2^-8))
890 data8 0xadb90e94af887717,0x3ffe // ln(1/frcpa(1+248/2^-8))
891 data8 0xae3754a218f7c816,0x3ffe // ln(1/frcpa(1+249/2^-8))
892 data8 0xaeb5d9175437afa2,0x3ffe // ln(1/frcpa(1+250/2^-8))
893 data8 0xaf349c322e9c7cee,0x3ffe // ln(1/frcpa(1+251/2^-8))
894 data8 0xafb39e30d1768d1c,0x3ffe // ln(1/frcpa(1+252/2^-8))
895 data8 0xb032df51c2c93116,0x3ffe // ln(1/frcpa(1+253/2^-8))
896 data8 0xb0b25fd3e6035ad9,0x3ffe // ln(1/frcpa(1+254/2^-8))
897 data8 0xb1321ff67cba178c,0x3ffe // ln(1/frcpa(1+255/2^-8))
899 data8 0xC7DC2985D3B44557,0x3FCA // A00
901 // polynomial approximation of ln(GAMMA(x)), 1 <= x < 2.25
902 // [0.875,1.25)
903 data8 0xBF9A04F7E40C8498,0x3FAB79D8D9380F03 // C17,C16
904 data8 0xBFB3B63609CA0CBD,0x3FB5564EA1675539 // C13,C12
905 data8 0xBFBC806766F48C41,0x3FC010B36CDA773A // C9,C8
906 data8 0xD45CE0BD54BE3D67,0xBFFC // C5
907 data8 0xCD26AADF559676D0,0xBFFD // C3
908 data8 0x93C467E37DB0C7A7,0xBFFE // C1
909 data8 0xBFB10C251723B123,0x3FB2669DAD69A12D // C15,C14
910 data8 0xBFB748A3CFCE4717,0x3FB9A01DEE29966A // C11,C10
911 data8 0xBFC2703A1D85497E,0x3FC5B40CB0FD353C // C7,C6
912 data8 0x8A8991563ECBBA5D,0x3FFD // C4
913 data8 0xD28D3312983E9844,0x3FFE // C2
914 data8 0,0                       // C0
915 // [1.25,1.75)
916 data8 0xBF12680486396DE6,0x3F23C51FC332CD9D // C17,C16
917 data8 0xBF422633DA3A1496,0x3F4CC70680768857 // C13,C12
918 data8 0xBF6E2F1A1F804B5D,0x3F78FCE02A032428 // C9,C8
919 data8 0x864D46FA895985C1,0xBFFA // C5
920 data8 0x97213C6E35E12043,0xBFFC // C3
921 data8 0x8A8A42A401D979B7,0x3FC7 // C1
922 data8 0xBF2E098A8A2332A8,0x3F370E61B73B205C // C15,C14
923 data8 0xBF56F9849D3BC6CC,0x3F6283126F58D7F4 // C11,C10
924 data8 0xBF851F9F9516A98F,0x3F9266E797A1433F // C7,C6
925 data8 0x845A14A6A81B0638,0x3FFB // C4
926 data8 0xF7B95E4771C55C99,0x3FFD // C2
927 data8 0xF8CDCDE61C520E0F,0xBFFB // C0
928 // [1.75,2.25)
929 data8 0xBEA01D7AFA5D8F52,0x3EB1010986E60253 // C17,C16
930 data8 0xBEE3CBEDB4C918AA,0x3EF580F6D9D0F72D // C13,C12
931 data8 0xBF2D3FD4C7F68563,0x3F40B36AF884AE9A // C9,C8
932 data8 0xF2027E10C7B051EC,0xBFF7 // C5
933 data8 0x89F000D2ABB03401,0xBFFB // C3
934 data8 0xD8773039049E70B6,0x3FFD // C1
935 data8 0xBEC112CD07CFC31A,0x3ED2528A428D30E1 // C15,C14
936 data8 0xBF078DE5618D8C9F,0x3F1A127AD811A53D // C11,C10
937 data8 0xBF538AC5C2BF540D,0x3F67ADD6EADB5718 // C7,C6
938 data8 0xA8991563EC243383,0x3FF9 // C4
939 data8 0xA51A6625307D3230,0x3FFD // C2
940 data8 0,0                       // C0
942 // polynomial approximation of ln(sin(Pi*x)/(Pi*x)), 9 <= x <= 0.5
943 data8 0xBFDC1BF0931AE591,0x3FD36D6D6CE263D7 //S28,S26
944 data8 0xBFBD516F4FD9FB18,0xBFBBE1703F315086 //S20,S18
945 data8 0xAAB5A3CCEFCD3628,0xBFFC //S12
946 data8 0x80859B5C318E19A5,0xBFFD //S8
947 data8 0x8A8991563EC7EB33,0xBFFE //S4
948 data8 0xBFD23AB9E6CC88AC,0xBF9957F5146FC7AF //S24,S22
949 data8 0xBFC007B324E23040,0xBFC248DEC29CAC4A //S16,S14
950 data8 0xCD00EFF2F8F86899,0xBFFC //S10
951 data8 0xADA06587FACD668B,0xBFFD //S6
952 data8 0xD28D3312983E98A0,0xBFFF //S2
954 data8 0x8090F777D7942F73,0x4001 // PR01
955 data8 0xE5B521193CF61E63,0x4000 // PR11
956 data8 0xC02C000000001939 // (-15;-14)
957 data8 0x0000000000000233 // (-15;-14)
958 data8 0xC02A000000016124 // (-14;-13)
959 data8 0x0000000000002BFB // (-14;-13)
960 data8 0xC02800000011EED9 // (-13;-12)
961 data8 0x0000000000025CBB // (-13;-12)
962 data8 0xC026000000D7322A // (-12;-11)
963 data8 0x00000000001E1095 // (-12;-11)
964 data8 0xC0240000093F2777 // (-11;-10)
965 data8 0x00000000013DD3DC // (-11;-10)
966 data8 0xC02200005C7768FB // (-10;-9)
967 data8 0x000000000C9539B9 // (-10;-9)
968 data8 0xC02000034028B3F9 // (-9;-8)
969 data8 0x000000007570C565 // (-9;-8)
970 data8 0xC01C0033FDEDFE1F // (-8;-7)
971 data8 0x00000007357E670E // (-8;-7)
972 data8 0xC018016B25897C8D // (-7;-6)
973 data8 0x000000346DC5D639 // (-7;-6)
974 data8 0xC014086A57F0B6D9 // (-6;-5)
975 data8 0x0000010624DD2F1B // (-6;-5)
976 data8 0xC010284E78599581 // (-5;-4)
977 data8 0x0000051EB851EB85 // (-5;-4)
978 data8 0xC009260DBC9E59AF // (-4;-3)
979 data8 0x000028F5C28F5C29 // (-4;-3)
980 data8 0xC003A7FC9600F86C // (-3;-2)
981 data8 0x0000666666666666 // (-3;-2)
982 data8 0xCC15879606130890,0x4000 // PR21
983 data8 0xB42FE3281465E1CC,0x4000 // PR31
985 data8 0x828185F0B95C9916,0x4001 // PR00
987 data8 0xD4D3C819E4E5654B,0x4000 // PR10
988 data8 0xA82FBBA4FCC75298,0x4000 // PR20
989 data8 0xC02DFFFFFFFFFE52 // (-15;-14)
990 data8 0x000000000000001C // (-15;-14)
991 data8 0xC02BFFFFFFFFE6C7 // (-14;-13)
992 data8 0x00000000000001A6 // (-14;-13)
993 data8 0xC029FFFFFFFE9EDC // (-13;-12)
994 data8 0x0000000000002BFB // (-13;-12)
995 data8 0xC027FFFFFFEE1127 // (-12;-11)
996 data8 0x000000000001EEC8 // (-12;-11)
997 data8 0xC025FFFFFF28CDD4 // (-11;-10)
998 data8 0x00000000001E1095 // (-11;-10)
999 data8 0xC023FFFFF6C0D7C0 // (-10;-9)
1000 data8 0x000000000101B2B3 // (-10;-9)
1001 data8 0xC021FFFFA3884BD0 // (-9;-8)
1002 data8 0x000000000D6BF94D // (-9;-8)
1003 data8 0xC01FFFF97F8159CF // (-8;-7)
1004 data8 0x00000000C9539B89 // (-8;-7)
1005 data8 0xC01BFFCBF76B86F0 // (-7;-6)
1006 data8 0x00000007357E670E // (-7;-6)
1007 data8 0xC017FE92F591F40D // (-6;-5)
1008 data8 0x000000346DC5D639 // (-6;-5)
1009 data8 0xC013F7577A6EEAFD // (-5;-4)
1010 data8 0x00000147AE147AE1 // (-5;-4)
1011 data8 0xC00FA471547C2FE5 // (-4;-3)
1012 data8 0x00000C49BA5E353F // (-4;-3)
1013 data8 0xC005FB410A1BD901 // (-3;-2)
1014 data8 0x000053F7CED91687 // (-3;-2)
1015 data8 0x80151BB918A293AA,0x4000 // PR30
1016 data8 0xB3C9F8F47422A314,0x400B // PRN
1018 // right negative roots
1019 //(-3;-2)
1020 data8 0x40BFCF8B90BE7F6B,0x40B237623345EFC3 // A15,A14
1021 data8 0x407A92EFB03B281E,0x40728700C7819759 // A11,A10
1022 data8 0x403809F04EF4D0F2,0x4038D32F682D9593 // A7,A6
1023 data8 0xB4A5302C53C2F2D8,0x3FFF // A3
1024 data8 0xC1FF4B357A9B0383,0x3FFF // A1
1025 data8 0x409C46632EB4B2D3,0x4091A72AFA2148F5 // A13,A12
1026 data8 0x4059297AC79A88DB,0x40548EAA7BE7FA6B // A9,A8
1027 data8 0x4017339FE04B227F,0x4021718D7CA09E02 // A5,A4
1028 data8 0x9B775D8017AAE668,0x4001 // A2
1029 data8 0x8191DB68FF4366A1,0x3FC9 // A0
1030 //(-4;-3)
1031 data8 0x425260910D35307B,0x422668F5BE7983BB // A15,A14
1032 data8 0x41A4454DBE4BEE43,0x41799CA93F6EA817 // A11,A10
1033 data8 0x40FBB97AA1400F31,0x40D293C3F7ADAB15 // A7,A6
1034 data8 0xE089B8926AE4517B,0x4005 // A3
1035 data8 0xF90532F97D630C69,0x4001 // A1
1036 data8 0x41F9F0CF98C5F2EA,0x41D026336C6BF394 // A13,A12
1037 data8 0x415057F61156D5B8,0x41251EA3055CB754 // A9,A8
1038 data8 0x40A99A6337D9FC2B,0x408267203D776151 // A5,A4
1039 data8 0xCEA694BB8A8827A9,0x4003 // A2
1040 data8 0xF4B02F1D73D30EED,0x3FCD // A0
1041 //(-5;-4)
1042 data8 0x4412365489340979,0x43C86441BAFDEE39 // A15,A14
1043 data8 0x42ED68FCB19352DD,0x42A45FCE3905CD6F // A11,A10
1044 data8 0x41CD14FE49FD4FCA,0x41855E3DBFA89744 // A7,A6
1045 data8 0xAACD88D954E0EC16,0x400B // A3
1046 data8 0xD652E7A490B0DCDF,0x4003 // A1
1047 data8 0x437F52608E0E752A,0x433560E0633E33D5 // A13,A12
1048 data8 0x425C83998976DE3D,0x421433DCCD3B473B // A9,A8
1049 data8 0x4140261EB5732106,0x40F96D18E21AE6CC // A5,A4
1050 data8 0xA220AE6C09FA8A0E,0x4007 // A2
1051 data8 0xCC1682D17A2B5A58,0xBFCF // A0
1052 //(-6;-5)
1053 data8 0x4630E41D6386CF5A,0x45C2E7992C628C8C // A15,A14
1054 data8 0x447AABEC714F913A,0x440EDCAB45339F3A // A11,A10
1055 data8 0x42C9A8D00C97E3CE,0x425F7D8D5BEAB44D // A7,A6
1056 data8 0x929EC2B1FB95BB5B,0x4012 // A3
1057 data8 0xF6B970414D717D38,0x4005 // A1
1058 data8 0x45545E578976F6A2,0x44E738288DD52686 // A13,A12
1059 data8 0x43A20921FEC49492,0x433557FD7C6A41B3 // A9,A8
1060 data8 0x41F3E01773761DB4,0x418A225DF2DA6C47 // A5,A4
1061 data8 0xE7661976117F9312,0x400B // A2
1062 data8 0xC33C13FEE07494DE,0x3FCF // A0
1063 //(-7;-6)
1064 data8 0x4898F1E6133305AD,0x4802C5306FE4A850 // A15,A14
1065 data8 0x463FD37946B44094,0x45A8D489B784C2DD // A11,A10
1066 data8 0x43E9500995815F06,0x4354F21E2FEE6DF5 // A7,A6
1067 data8 0xEF281D1E1BBE10BD,0x4019 // A3
1068 data8 0xB4EF24F1D78C2029,0x4008 // A1
1069 data8 0x476AB1D5930011E5,0x46D4867E77BFB622 // A13,A12
1070 data8 0x45139151ECDEF7C5,0x447F3A2BC6BF466F // A9,A8
1071 data8 0x42C1D3D50713FA40,0x422F9C7B52556A1B // A5,A4
1072 data8 0xFE711A4267CEA83A,0x4010 // A2
1073 data8 0xD11E91B3FF8F4B94,0xBFD2 // A0
1074 //(-8;-7)
1075 data8 0x4B39E57569811B6E,0x4A7656073EB1FA21 // A15,A14
1076 data8 0x482C9B24A516B0BB,0x47698FF55139C62B // A11,A10
1077 data8 0x452393E2BC8E8D04,0x44628E1C710DA478 // A7,A6
1078 data8 0x9F2A95AF1B7A773F,0x4022 // A3
1079 data8 0x9DA03D51C303C918,0x400B // A1
1080 data8 0x49B24C241A3D5BCB,0x48F01CB936ECDA67 // A13,A12
1081 data8 0x46A712B3425C6797,0x45E5164114BD6DA1 // A9,A8
1082 data8 0x43A216A356069D01,0x42E25E42A45E2108 // A5,A4
1083 data8 0xC1F42ED57BBC2529,0x4016 // A2
1084 data8 0xB1C7B615A7DCA8A9,0xBFD7 // A0
1085 //(-9;-8)
1086 data8 0x4E09D478E5EE857D,0x4D1647782106E9AB // A15,A14
1087 data8 0x4A3C7F4D51927548,0x49497954796D743A // A11,A10
1088 data8 0x467387BD6AF0CBDF,0x4582843E134111D2 // A7,A6
1089 data8 0x9F003C6DE9666513,0x402B // A3
1090 data8 0x9D8447F6BF99950A,0x400E // A1
1091 data8 0x4C22364D238C61A9,0x4B300B18050AB940 // A13,A12
1092 data8 0x4857004D64215772,0x4765074E448C3C9A // A9,A8
1093 data8 0x44920E9EA07BF624,0x43A257BEC94BBF48 // A5,A4
1094 data8 0xC1D1C49AC5B2A4B4,0x401C // A2
1095 data8 0x9A749AF9F2D2E688,0x3FDB // A0
1096 //(-10;-9)
1097 data8 0x5102C7C43EA26C83,0x4FDCD174DEB0426B // A15,A14
1098 data8 0x4C6A036195CD5BAD,0x4B44ABB52B65628A // A11,A10
1099 data8 0x47D6439374B98FED,0x46B2C3903EF44D7D // A7,A6
1100 data8 0xE25BAF73AB8A7DB3,0x4034 // A3
1101 data8 0xB130901CA6D81B61,0x4011 // A1
1102 data8 0x4EB50BB0726AE206,0x4D907A96E6D2B6E2 // A13,A12
1103 data8 0x4A20975D78EAF01A,0x48FAF79C9C3E7908 // A9,A8
1104 data8 0x459044144129A247,0x446D6043FA3150A3 // A5,A4
1105 data8 0xF547997E083D9BA7,0x4022 // A2
1106 data8 0x977AF525A6ECA1BC,0x3FDC // A0
1107 //(-11;-10)
1108 data8 0x5420A5D5E90C6D73,0x52C4710A503DC67A // A15,A14
1109 data8 0x4EB2ED07BA88D2A8,0x4D581001ED9A5ECE // A11,A10
1110 data8 0x494A8A28E9E3DFEF,0x47F1E4E1E476793E // A7,A6
1111 data8 0xDD0C97E12D4A3378,0x403E // A3
1112 data8 0xDD7C12D5182FD543,0x4014 // A1
1113 data8 0x5167ED536877A072,0x500DF9AF21DDC0B6 // A13,A12
1114 data8 0x4BFEE6F04BC34FF8,0x4AA4175CEF736A5E // A9,A8
1115 data8 0x4698D1B4388FEC78,0x4541EDE7607A600D // A5,A4
1116 data8 0xBF9F645F282AC552,0x4029 // A2
1117 data8 0xAE1BBE4D3CDACCF4,0x3FE1 // A0
1118 //(-12;-11)
1119 data8 0x575F0EEF5FB7D4C0,0x55CBB7302B211A7C // A15,A14
1120 data8 0x5113A4F1825C7CB2,0x4F822A0D46E0605A // A11,A10
1121 data8 0x4ACED38FC8BE069A,0x493E3B56D2649F18 // A7,A6
1122 data8 0x8FA8FF5DF8B72D5E,0x4049 // A3
1123 data8 0x9845417E8598D642,0x4018 // A1
1124 data8 0x5437780541C3F2D3,0x52A56279B563C1B2 // A13,A12
1125 data8 0x4DF0F71A48C50188,0x4C600B358988DEBF // A9,A8
1126 data8 0x47AE7EE95BDA3DE9,0x46200599DC16B18F // A5,A4
1127 data8 0xB5249F914932E55D,0x4030 // A2
1128 data8 0xEAE760CD2C086094,0x3FE5 // A0
1129 //(-13;-12)
1130 data8 0x5ABA5848651F6D18,0x58EF60D8A817650B // A15,A14
1131 data8 0x538A8CA86E13EFB1,0x51C05DBD4D01076D // A11,A10
1132 data8 0x4C607594C339D259,0x4A9585BD5BF932BB // A7,A6
1133 data8 0xF26D282C36EC3611,0x4053 // A3
1134 data8 0xE467DF4810EE7EEE,0x401B // A1
1135 data8 0x5721D9BA485E8CC3,0x5555AF2CCFB2104D // A13,A12
1136 data8 0x4FF4619A17B14EA6,0x4E29B2F29EB9F8C4 // A9,A8
1137 data8 0x48CCF27629D46E79,0x47044715F991A63D // A5,A4
1138 data8 0xCBC92FB9BDAA95A9,0x4037 // A2
1139 data8 0xFB743A426163665B,0xBFE6 // A0
1140 //(-14;-13)
1141 data8 0x5E3295B24B353EAA,0x5C2B447E29796F20 // A15,A14
1142 data8 0x5615A35CB5EAFAE5,0x54106AB089C95CAF // A11,A10
1143 data8 0x4DFEC7D93501900A,0x4BF8C4C685F01B83 // A7,A6
1144 data8 0x820899603D9A74D5,0x405F // A3
1145 data8 0xB9949919933821CB,0x401F // A1
1146 data8 0x5A23373DB9A995AC,0x581CBA0AF7F53009 // A13,A12
1147 data8 0x520929836BB304CD,0x500386409A7076DA // A9,A8
1148 data8 0x49F480173FEAF90B,0x47F1ACB14B810793 // A5,A4
1149 data8 0x86881B8674DBF205,0x403F // A2
1150 data8 0x8CF3CC35AA2C5F90,0x3FED // A0
1151 //(-15;-14)
1152 data8 0x61C37D53BE0029D6,0x5F80667CD9D68354 // A15,A14
1153 data8 0x58B3F01898E6605B,0x567149652116DB6A // A11,A10
1154 data8 0x4FA82FA4F5D35B00,0x4D663DB00832DF8F // A7,A6
1155 data8 0xAE426731C9B94996,0x406A // A3
1156 data8 0xA264C84BE3708F3F,0x4023 // A1
1157 data8 0x5D3B254BC1C806A8,0x5AF72E736048B553 // A13,A12
1158 data8 0x542E476505104BB0,0x51EAD96CDC4FB48F // A9,A8
1159 data8 0x4B25095F498DB134,0x48E4B9FDEBFE24AB // A5,A4
1160 data8 0xCE076A5A116C1D34,0x4046 // A2
1161 data8 0x940013871A15050B,0x3FF1 // A0
1163 // left negative roots
1164 //(-3;-2)
1165 data8 0x41AEB7998DBE2B2C,0xC19053D8FAC05DF7 // A16,A15
1166 data8 0x4133197BF1ADEAF9,0xC1150728B9B82072 // A12,A11
1167 data8 0x40BDBA65E74F4526,0xC0A12239BEEF8F72 // A8,A7
1168 data8 0xFA8256664F99E2AA,0x4004 // A4
1169 data8 0x9933F9E132D2A5DB,0x4002 // A2
1170 data8 0x416FFB167B85F77C,0xC15166AE0ACCF87C // A14,A13
1171 data8 0x40F75815106322C0,0xC0DA2D23C59C348D // A10,A9
1172 data8 0x4084373F7CC42043,0xC0685884581F8C61 // A6,A5
1173 data8 0xA0C2D6186460FF9D,0xC003 // A3
1174 data8 0xF5096D48258CA0AD,0xBFFF // A1
1175 //(-4;-3)
1176 data8 0xC3E5BD233016D4B9,0x43A084DAD2D94AB1 // A15,A14
1177 data8 0xC2CCFFF5E5AED722,0x4286D143AC7D29A6 // A11,A10
1178 data8 0xC1B7DBBE0680D07B,0x4173E8F3ABB79CED // A7,A6
1179 data8 0xE929ACEA59799BAF,0xC00A // A3
1180 data8 0xA5CCECB362B21E1C,0xC003 // A1
1181 data8 0xC357EED873871B81,0x43128E0B873204FC // A13,A12
1182 data8 0xC242225FA76E8450,0x41FD2F76AE7386CE // A9,A8
1183 data8 0xC13116F7806D0C7A,0x40EE8F829F141025 // A5,A4
1184 data8 0xFBB6F57021B5B397,0x4006 // A2
1185 data8 0xEEE019B4C05AC269,0xBFCB // A0
1186 //(-5;-4)
1187 data8 0xC626A52FE8AAA100,0x45B9FD1F4DDFE31E // A15,A14
1188 data8 0xC473812A5675F08B,0x440738530AECC254 // A11,A10
1189 data8 0xC2C5068B3F94AC27,0x425A8C5C539A500B // A7,A6
1190 data8 0x869FBFF732F20C3A,0xC012 // A3
1191 data8 0xE91251F7CF25A655,0xC005 // A1
1192 data8 0xC54C18CB48E5DA0F,0x44E07BD36FF561DF // A13,A12
1193 data8 0xC39BEC120D2FEBEA,0x4330FFA5388435BE // A9,A8
1194 data8 0xC1F13D5D163B7FB5,0x418752A6F5AC0F39 // A5,A4
1195 data8 0xDA99E33C51D360F0,0x400B // A2
1196 data8 0x9F47A66A2F53D9B9,0x3FD1 // A0
1197 //(-6;-5)
1198 data8 0xC8970DAC16B6D59E,0x480170728306FD76 // A15,A14
1199 data8 0xC63E0E5030604CF3,0x45A7924D74D57C65 // A11,A10
1200 data8 0xC3E8684E41730FC6,0x43544D54EA2E5B9A // A7,A6
1201 data8 0xEB7404450C47C5F4,0xC019 // A3
1202 data8 0xB30FB521D2C19F8B,0xC008 // A1
1203 data8 0xC768F34D35DF6320,0x46D348B3BB2E68B8 // A13,A12
1204 data8 0xC512AC2FE5EA638E,0x447DF44BC7FC5E17 // A9,A8
1205 data8 0xC2C15EA6B0AAFEF9,0x422EF5D308DBC420 // A5,A4
1206 data8 0xFBCEE5BCA70FD3A3,0x4010 // A2
1207 data8 0x8589A7CFFE0A3E86,0xBFD5 // A0
1208 //(-7;-6)
1209 data8 0xCB3995A0CC961E5A,0x4A7615C6C7116ADD // A15,A14
1210 data8 0xC82C5AFE0BF9C427,0x47695BD2F367668B // A11,A10
1211 data8 0xC52377E70BA14CF5,0x4462775E859E4392 // A7,A6
1212 data8 0x9EC8ED6E4C3D4DBE,0xC022 // A3
1213 data8 0x9D5FBD2E75520E65,0xC00B // A1
1214 data8 0xC9B21BB881A4DDF8,0x48EFEAB06FBA0207 // A13,A12
1215 data8 0xC6A6E8550CBC188F,0x45E4F3D26238B099 // A9,A8
1216 data8 0xC3A20427DF1B110A,0x42E24F3D636F2E4E // A5,A4
1217 data8 0xC1A4D12A82280CFB,0x4016 // A2
1218 data8 0xEF46D8DCCA9E8197,0x3FD2 // A0
1219 //(-8;-7)
1220 data8 0xCE0946982B27DE5B,0x4D15DBC6664E2DD2 // A15,A14
1221 data8 0xCA3C769F6B3B2B93,0x49497251CD0C4363 // A11,A10
1222 data8 0xC67384066C47F489,0x458281393433AB28 // A7,A6
1223 data8 0x9EF3459926D0F14F,0xC02B // A3
1224 data8 0x9D7BB7F2600DFF0B,0xC00E // A1
1225 data8 0xCC22351326C939A7,0x4B3009431C4F1D3F // A13,A12
1226 data8 0xC856FAADDD48815D,0x476502BC3ECA040C // A9,A8
1227 data8 0xC4920C2A84173810,0x43A255C052525F99 // A5,A4
1228 data8 0xC1C73B6554011EFA,0x401C // A2
1229 data8 0x954612700ADF8317,0xBFD8 // A0
1230 //(-9;-8)
1231 data8 0xD102F5CC7B590D3A,0x4FDD0F1C30E4EB22 // A15,A14
1232 data8 0xCC6A02912B0DF650,0x4B44AB18E4FCC159 // A11,A10
1233 data8 0xC7D64314B4A2FAAB,0x46B2C334AE5E2D34 // A7,A6
1234 data8 0xE2598724F7E28E99,0xC034 // A3
1235 data8 0xB12F6FE2E195452C,0xC011 // A1
1236 data8 0xCEB507747AF9356A,0x4D907802C08BA48F // A13,A12
1237 data8 0xCA2096E3DC29516F,0x48FAF6ED046A1DB7 // A9,A8
1238 data8 0xC59043D21BA5EE56,0x446D5FE468B30450 // A5,A4
1239 data8 0xF5460A8196B59C83,0x4022 // A2
1240 data8 0xB108F35A8EDA92D5,0xBFDD // A0
1241 //(-10;-9)
1242 data8 0xD420430D91F8265B,0x52C406CAAAC9E0EE // A15,A14
1243 data8 0xCEB2ECDDDAA3DAD1,0x4D580FDA97F92E3A // A11,A10
1244 data8 0xC94A8A192341B5D4,0x47F1E4D8C690D07B // A7,A6
1245 data8 0xDD0C5F920C2F0D2B,0xC03E // A3
1246 data8 0xDD7BED3631657B48,0xC014 // A1
1247 data8 0xD167F410E64E90A4,0x500DFFED20F714A7 // A13,A12
1248 data8 0xCBFEE6D9043169E9,0x4AA4174F64B40AA7 // A9,A8
1249 data8 0xC698D1A9AF0AB9C2,0x4541EDE14987A887 // A5,A4
1250 data8 0xBF9F43D461B3DE6E,0x4029 // A2
1251 data8 0xF3891A50642FAF26,0x3FE1 // A0
1252 //(-11;-10)
1253 data8 0xD75F0EEAF769D42A,0x55CBB72C8869183A // A15,A14
1254 data8 0xD113A4EF80394F77,0x4F822A0B96B3ECA9 // A11,A10
1255 data8 0xCACED38DC75763CB,0x493E3B5522D2D028 // A7,A6
1256 data8 0x8FA8FB5C92533701,0xC049 // A3
1257 data8 0x98453EDB9339C24E,0xC018 // A1
1258 data8 0xD43778026CCD4B20,0x52A5627753273B9B // A13,A12
1259 data8 0xCDF0F718DD7E1214,0x4C600B34582911EB // A9,A8
1260 data8 0xC7AE7EE7F112362C,0x46200599439C264F // A5,A4
1261 data8 0xB5249C335342B5BC,0x4030 // A2
1262 data8 0x881550711D143475,0x3FE4 // A0
1263 //(-12;-11)
1264 data8 0xDAB9C724EEEE2BBB,0x58EEC971340EDDBA // A15,A14
1265 data8 0xD38A8C8AE63BD8BF,0x51C05DB21CEE00D3 // A11,A10
1266 data8 0xCC607594C311C12D,0x4A9585BD5BE6AB57 // A7,A6
1267 data8 0xF26D282C36EC0E66,0xC053 // A3
1268 data8 0xE467DF1FA674BFAE,0xC01B // A1
1269 data8 0xD721DE506999AA9C,0x5555B34F71B45132 // A13,A12
1270 data8 0xCFF4619A476BF76F,0x4E29B2F2BBE7A67E // A9,A8
1271 data8 0xC8CCF27629D48EDC,0x47044715F991AB46 // A5,A4
1272 data8 0xCBC92FB9BDAA928D,0x4037 // A2
1273 data8 0xCE27C4F01CF53284,0xBFE6 // A0
1274 //(-13;-12)
1275 data8 0xDE3295B24355C5A1,0x5C2B447E298B562D // A15,A14
1276 data8 0xD615A35CB5E92103,0x54106AB089C95E8C // A11,A10
1277 data8 0xCDFEC7D935019005,0x4BF8C4C685F01B83 // A7,A6
1278 data8 0x820899603D9A74D5,0xC05F // A3
1279 data8 0xB9949916F8DF4AC4,0xC01F // A1
1280 data8 0xDA23373DBA0B7548,0x581CBA0AF7F45C01 // A13,A12
1281 data8 0xD20929836BB30934,0x500386409A7076D6 // A9,A8
1282 data8 0xC9F480173FEAF90B,0x47F1ACB14B810793 // A5,A4
1283 data8 0x86881B8674DBF205,0x403F // A2
1284 data8 0x8CFAFA9A142C1FF0,0x3FED // A0
1285 //(-14;-13)
1286 data8 0xE1C33F356FA2C630,0x5F8038B8AA919DD7 // A15,A14
1287 data8 0xD8B3F0167E14982D,0x5671496400BAE0DB // A11,A10
1288 data8 0xCFA82FA4F5D25C3E,0x4D663DB008328C58 // A7,A6
1289 data8 0xAE426731C9B94980,0xC06A // A3
1290 data8 0xA264C84BB8A66F86,0xC023 // A1
1291 data8 0xDD3B26E34762ED1E,0x5AF72F76E3C1B793 // A13,A12
1292 data8 0xD42E476507E3D06E,0x51EAD96CDD881DFA // A9,A8
1293 data8 0xCB25095F498DB15F,0x48E4B9FDEBFE24B5 // A5,A4
1294 data8 0xCE076A5A116C1D32,0x4046 // A2
1295 data8 0x94001BF5A24966F5,0x3FF1 // A0
1296 //(-15;-14)
1297 data8 0xE56DB8B72D7156FF,0x62EAB0CDB22539BE // A15,A14
1298 data8 0xDB63D76B0D3457E7,0x58E254823D0AE4FF // A11,A10
1299 data8 0xD15F060BF548404A,0x4EDE65C20CD4E961 // A7,A6
1300 data8 0x900DA565ED76C19D,0xC076 // A3
1301 data8 0x9868C809852DA712,0xC027 // A1
1302 data8 0xE067CCDA0408AAF0,0x5DE5A79C5C5C54AF // A13,A12
1303 data8 0xD6611ADBF5958ED0,0x53E0294092BE9677 // A9,A8
1304 data8 0xCC5EA28D90EE8C5D,0x49E014930EF336EE // A5,A4
1305 data8 0xB57930DCE7A61AE8,0x404E // A2
1306 data8 0x976BEC1F30DF151C,0x3FF5 // A0
1307 LOCAL_OBJECT_END(lgamma_data)
1310 .section .text
1311 GLOBAL_LIBM_ENTRY(__libm_lgamma)
1313 { .mfi
1314       getf.exp      GR_SignExp = f8
1315       frcpa.s1      FR_C,p9 = f1,f8
1316       mov           GR_ExpMask = 0x1ffff
1318 { .mfi
1319       addl          GR_ad_Data = @ltoff(lgamma_data),gp
1320       fcvt.fx.s1    FR_int_N = f8
1321       mov           GR_2_25 = 0x4002 // 2.25
1323 { .mfi
1324       getf.d        GR_ArgAsIs = f8
1325       fclass.m      p13,p0 = f8,0x1EF // is x NaTVal, NaN,
1326                                       // +/-0, +/-INF or +/-deno?
1327       mov           GR_ExpBias = 0xFFFF
1329 { .mfi
1330       ld8           GR_ad_Data = [GR_ad_Data]
1331       fcvt.fx.trunc.s1 FR_int_Ntrunc = f8
1332       mov           GR_ExpOf256 = 0x10007
1334 { .mfi
1335       mov           GR_ExpOf2 = 0x10000
1336       fcmp.lt.s1    p14,p15 = f8,f0 // p14 if x<0
1337       dep.z         GR_Ind = GR_SignExp,8,4
1339 { .mfi
1340       and           GR_Exp = GR_SignExp,GR_ExpMask
1341       fma.s1        FR_2 = f1,f1,f1
1342       cmp.lt        p10,p0 = GR_SignExp,GR_ExpBias
1344 { .mfi
1345       add           GR_ad_1 = 0xB80,GR_ad_Data
1346       fnorm.s1      FR_NormX = f8
1347       shr.u         GR_Arg = GR_ArgAsIs,48
1349 { .mib
1350       add           GR_ad_Co = GR_Ind,GR_ad_Data
1351       add           GR_ad_Ce = 0x10,GR_ad_Data
1352       // jump if the input argument is NaTVal, NaN, +/-0, +/-INF or +/-deno
1353 (p13) br.cond.spnt  lgamma_spec
1355 lgamma_common:
1356 { .mfi
1357       ldfpd         FR_LocalMin,FR_05 = [GR_ad_1],16
1358       fmerge.se     FR_x = f1,f8
1359       add           GR_ad_2 = 0xBC0,GR_ad_Data
1361 { .mfb
1362       add           GR_ad_Ce = GR_Ind,GR_ad_Ce
1363       fms.s1        FR_w = f8,f1,f1 // x-1
1364       // jump if the input argument is positive and  less than 1.0
1365 (p10) br.cond.spnt  lgamma_0_1
1367 { .mfi
1368       ldfe          FR_C01 = [GR_ad_Co],32
1369       fnma.s1       FR_InvX = FR_C,f8,f1 // NR iteration #1
1370 (p15) cmp.lt.unc    p8,p0 = GR_ExpOf256,GR_SignExp
1372 { .mib
1373       ldfe          FR_C11 = [GR_ad_Ce],32
1374 (p15) cmp.lt.unc    p11,p0 = GR_Arg,GR_2_25
1375       // jump if the input argument isn't less than 512.0
1376 (p8)  br.cond.spnt  lgamma_pstirling
1378 { .mfi
1379       ldfe          FR_C21 = [GR_ad_Co],32
1380 (p14) fms.s1        FR_r = FR_C,f8,f1 // reduced arg for log(x)
1381 (p14) cmp.lt.unc    p0,p9 = GR_Exp,GR_ExpOf256
1383 { .mib
1384       ldfe          FR_C31 = [GR_ad_Ce],32
1385       add           GR_ad_Co7 = 0x12C0,GR_ad_2
1386       // jump if the input argument is from range [1.0; 2.25)
1387 (p11) br.cond.spnt  lgamma_1_2
1389 { .mfi
1390       ldfe          FR_C41 = [GR_ad_Co],32
1391       fcvt.xf       FR_N = FR_int_N
1392       add           GR_ad_Ce7 = 0x1310,GR_ad_2
1394 { .mfb
1395       ldfe          FR_C51 = [GR_ad_Ce],32
1396 (p14) fma.s1        FR_5 = FR_2,FR_2,f1
1397       // jump if the input argument is less or equal to -512.0
1398 (p9)  br.cond.spnt  lgamma_negstirling
1400 { .mfi
1401       ldfe          FR_C61 = [GR_ad_Co],32
1402 (p14) fcvt.xf       FR_Ntrunc = FR_int_Ntrunc
1403       shr           GR_Ind = GR_Ind,4
1405 { .mfi
1406       ldfe          FR_C71 = [GR_ad_Ce],32
1407 (p14) fma.s1        FR_Xp1 = f1,f1,FR_NormX // x+1
1408       cmp.eq        p6,p7 = GR_ExpOf2,GR_SignExp
1410 .pred.rel "mutex",p6,p7
1411 { .mfi
1412       ldfe          FR_C81 = [GR_ad_Co],32
1413 (p6)  fma.s1        FR_x = f0,f0,FR_NormX
1414       shladd        GR_Offs7 = GR_Ind,2,GR_Ind // (ind*16)*5
1416 { .mfi
1417       ldfe          FR_C91 = [GR_ad_Ce],32
1418 (p7)  fms.s1        FR_x = FR_x,f1,f1
1419       add           GR_ad_Co7 = 0x800,GR_ad_Data
1421 { .mfi
1422       ldfe          FR_CA1 = [GR_ad_Co],32
1423 (p14) fma.s1        FR_3 = f1,f1,FR_2
1424       shladd        GR_Offs7 = GR_Ind,1,GR_Offs7 // (ind*16)*7
1426 { .mfi
1427       ldfe          FR_C00 = [GR_ad_Ce],32
1428 (p14) fma.s1        FR_Xp4 = FR_2,FR_2,FR_NormX
1429       add           GR_ad_Ce7 = 0x810,GR_ad_Data
1431 { .mfi
1432       ldfe          FR_C10 = [GR_ad_Co],32
1433 (p6)  fms.s1        FR_Xm2 = FR_w,f1,f1
1434       add           GR_ad_Co7 = GR_ad_Co7,GR_Offs7
1436 { .mfi
1437       ldfe          FR_C20 = [GR_ad_Ce],32
1438 (p14) fma.s1        FR_r2 = FR_r,FR_r,f0 // log(x)
1439       add           GR_ad_Ce7 = GR_ad_Ce7,GR_Offs7
1441 { .mfi
1442       ldfe          FR_C30 = [GR_ad_Co],32
1443 (p14) fms.s1        FR_Xf = FR_NormX,f1,FR_N  // xf = x - [x]
1444 (p14) mov           GR_Arg17 = 0xC031 // -17
1446 { .mfi
1447       ldfe          FR_C40 = [GR_ad_Ce],32
1448 (p14) fma.s1        FR_Xp5 = FR_5,f1,FR_NormX
1449 (p14) sub           GR_Exp = GR_Exp,GR_ExpBias
1451 { .mfi
1452       ldfe          FR_C50 = [GR_ad_Co7],32
1453 (p14) fms.s1        FR_Xfr = FR_Xp1,f1,FR_Ntrunc // xfr = (x+1) - [x]
1454 (p14) cmp.lt.unc    p13,p0 = GR_Arg,GR_Arg17
1456 { .mfb
1457       ldfe          FR_C60 = [GR_ad_Ce7],32
1458 (p14) fma.s1        FR_Xp10 = FR_5,FR_2,FR_NormX
1459       // jump if the input argument is negative and great than -17.0
1460 (p13) br.cond.spnt  lgamma_negrecursion
1462 { .mfi
1463       ldfe          FR_C70 = [GR_ad_Co7],32
1464       fma.s1        FR_C01 = FR_x,f1,FR_C01
1465 (p14) add           GR_ad_Ce = 0x1310,GR_ad_2
1467 { .mfi
1468       ldfe          FR_C80 = [GR_ad_Ce7],32
1469       fma.s1        FR_C11 = FR_x,f1,FR_C11
1470 (p14) add           GR_ad_Co = 0x12C0,GR_ad_2
1472 { .mfi
1473       ldfe          FR_C90 = [GR_ad_Co7],32
1474       fma.s1        FR_C21 = FR_x,f1,FR_C21
1475       nop.i         0
1477 { .mfi
1478       ldfe          FR_CA0 = [GR_ad_Ce7],32
1479       fma.s1        FR_C31 = FR_x,f1,FR_C31
1480       nop.i         0
1482 { .mfi
1483       ldfe          FR_CN = [GR_ad_Co7],32
1484       fma.s1        FR_C41 = FR_x,f1,FR_C41
1485       nop.i         0
1487 { .mfi
1488 (p14) ldfpd         FR_P5,FR_P4 = [GR_ad_1],16
1489       fma.s1        FR_C51 = FR_x,f1,FR_C51
1490       nop.i         0
1492 { .mfi
1493 (p14) ldfpd         FR_P3,FR_P2 = [GR_ad_2],16
1494       fma.s1        FR_C61 = FR_x,f1,FR_C61
1495       nop.i         0
1497 { .mfi
1498 (p14) ldfe          FR_Ln2 = [GR_ad_1]
1499       fma.s1        FR_C71 = FR_x,f1,FR_C71
1500       nop.i         0
1502 { .mfi
1503 (p14) ldfpd         FR_S28,FR_S26 = [GR_ad_Co],16
1504       fma.s1        FR_C81 = FR_x,f1,FR_C81
1505       add           GR_ad_2 = 0x60,GR_ad_2
1507 { .mfi
1508 (p14) ldfpd         FR_S24,FR_S22 = [GR_ad_Ce],16
1509       fma.s1        FR_C91 = FR_x,f1,FR_C91
1510       nop.i         0
1512 { .mfi
1513 (p14) ldfpd         FR_S20,FR_S18 = [GR_ad_Co],16
1514       fma.s1        FR_CA1 = FR_x,f1,FR_CA1
1515       nop.i         0
1517 { .mfi
1518 (p14) ldfpd         FR_S16,FR_S14 = [GR_ad_Ce],16
1519       fma.s1        FR_C01 = FR_C01,FR_x,FR_C00
1520       nop.i         0
1522 { .mfi
1523 (p14) getf.exp      GR_SignExp = FR_Xf
1524       fma.s1        FR_C11 = FR_C11,FR_x,FR_C10
1525       nop.i         0
1527 { .mfi
1528 (p14) ldfe          FR_S12 = [GR_ad_Co],16
1529       fma.s1        FR_C21 = FR_C21,FR_x,FR_C20
1530       nop.i         0
1532 { .mfi
1533 (p14) getf.sig      GR_Sig = FR_Xf
1534 (p14) frcpa.s1      FR_InvXf,p0 = f1,FR_Xf
1535       nop.i         0
1537 { .mfi
1538 (p14) ldfe          FR_S10 = [GR_ad_Ce],16
1539       fma.s1        FR_C41 = FR_C41,FR_x,FR_C40
1540       nop.i         0
1542 { .mfi
1543 (p14) ldfe          FR_S8 = [GR_ad_Co],16
1544       fma.s1        FR_C51 = FR_C51,FR_x,FR_C50
1545       nop.i         0
1547 { .mfi
1548 (p14) ldfe          FR_S6 = [GR_ad_Ce],16
1549       fma.s1        FR_C61 = FR_C61,FR_x,FR_C60
1550 (p14) and           GR_Expf = GR_SignExp,GR_ExpMask
1552 { .mfi
1553 (p14) sub           GR_Expf = GR_Expf,GR_ExpBias
1554       fma.s1        FR_C71 = FR_C71,FR_x,FR_C70
1555 (p14) shl           GR_Ind = GR_Sig,1
1557 { .mfi
1558 (p14) ldfe          FR_S4 = [GR_ad_Co],16
1559       fma.s1        FR_C81 = FR_C81,FR_x,FR_C80
1560 (p14) cmp.eq.unc    p8,p0 = 0,GR_Sig
1562 { .mfi
1563 (p14) setf.sig      FR_int_Nf = GR_Expf
1564       fma.s1        FR_C91 = FR_C91,FR_x,FR_C90
1565 (p14) shr.u         GR_Ind = GR_Ind,56
1567 { .mfb
1568 (p14) ldfe          FR_S2 = [GR_ad_Ce],16
1569       fma.s1        FR_CA1 = FR_CA1,FR_x,FR_CA0
1570       // jump if the input argument is integer number from range (-512.0;-17.0]
1571 (p8)  br.cond.spnt  lgamma_singularity
1573 { .mfi
1574 (p14) getf.sig      GR_Sig = FR_int_Ntrunc
1575       fma.s1        FR_C01 = FR_C01,FR_C11,f0
1576       nop.i         0
1578 { .mfi
1579 (p14) shladd        GR_ad_T = GR_Ind,4,GR_ad_2
1580       fma.s1        FR_C31 = FR_C31,FR_x,FR_C30
1581       nop.i         0
1583 { .mfi
1584 (p14) ldfe          FR_Tf = [GR_ad_T]
1585 (p14) fms.s1        FR_rf = FR_InvXf,FR_Xf,f1 // reduced arg for log({x})
1586 (p14) extr.u        GR_Ind = GR_ArgAsIs,44,8
1588 { .mfi
1589       // set p9  if signgum is 32-bit int
1590       // set p10 if signgum is 64-bit int
1591       cmp.eq        p10,p9 = 8,r34
1592       fma.s1        FR_C21 = FR_C21,FR_C41,f0
1593       mov           GR_SignOfGamma = 1
1595 { .mfi
1596       nop.m         0
1597       fma.s1        FR_C51 = FR_C51,FR_C61,f0
1598 (p14) tbit.z.unc    p8,p0 = GR_Sig,0
1600 { .mfi
1601 (p14) shladd        GR_ad_T = GR_Ind,4,GR_ad_2
1602 (p6)  fma.s1        FR_CN = FR_CN,FR_Xm2,f0
1603       nop.i         0
1605 { .mfi
1606 (p14) setf.sig      FR_int_N = GR_Exp
1607       fma.s1        FR_C71 = FR_C71,FR_C81,f0
1608 (p8)  sub           GR_SignOfGamma = r0,GR_SignOfGamma
1610 { .mfi
1611       nop.m         0
1612 (p14) fma.s1        FR_Xf2 = FR_Xf,FR_Xf,f0
1613       nop.i         0
1615 { .mfi
1616 (p14) ldfe          FR_T = [GR_ad_T]
1617       fma.s1        FR_C91 = FR_C91,FR_CA1,f0
1618       nop.i         0
1620 { .mfi
1621       nop.m         0
1622 (p14) fma.s1        FR_r2 = FR_r,FR_r,f0
1623       nop.i         0
1625 .pred.rel "mutex",p9,p10
1626 { .mfi
1627       // store sign of gamma(x) as 32-bit int
1628 (p9)  st4           [r33] = GR_SignOfGamma
1629       fma.s1        FR_C01 = FR_C01,FR_C31,f0
1630       nop.i         0
1632 { .mfi
1633       // store sign of gamma(x) as 64-bit int
1634 (p10) st8           [r33] = GR_SignOfGamma
1635 (p14) fma.s1        FR_P54 = FR_P5,FR_r,FR_P4
1636       nop.i         0
1638 { .mfi
1639       nop.m         0
1640 (p14) fma.s1        FR_P32 = FR_P3,FR_r,FR_P2
1641       nop.i         0
1643 { .mfb
1644       nop.m         0
1645 (p14) fma.s1        FR_P54f = FR_P5,FR_rf,FR_P4
1646       // jump if the input argument is non-integer from range (-512.0;-17.0]
1647 (p14) br.cond.spnt  lgamma_negpoly
1649 { .mfi
1650       nop.m         0
1651       fma.s1        FR_C21 = FR_C21,FR_C51,f0
1652       nop.i         0
1654 { .mfi
1655       nop.m         0
1656       fma.s1        FR_C71 = FR_C71,FR_C91,f0
1657       nop.i         0
1659 { .mfi
1660       nop.m         0
1661       fma.s1        FR_CN  = FR_C01,FR_CN,f0
1662       nop.i         0
1664 { .mfi
1665       nop.m         0
1666       fma.s1        FR_C21 = FR_C21,FR_C71,f0
1667       nop.i         0
1669 { .mfb
1670       nop.m         0
1671       fma.d.s0      f8 = FR_C21,FR_CN,f0
1672       br.ret.sptk   b0 // exit for arguments from range [2.25; 512.0)
1674 // branch for calculating of ln(GAMMA(x)) for -512 < x < -17
1675 //---------------------------------------------------------------------
1676 .align 32
1677 lgamma_negpoly:
1678 { .mfi
1679       nop.m         0
1680       fma.s1        FR_Xf4 = FR_Xf2,FR_Xf2,f0
1681       nop.i         0
1683 { .mfi
1684       nop.m         0
1685       fma.s1        FR_S28 = FR_S28,FR_Xf2,FR_S26
1686       nop.i         0
1688 { .mfi
1689       nop.m         0
1690       fma.s1        FR_S24 = FR_S24,FR_Xf2,FR_S22
1691       nop.i         0
1693 { .mfi
1694       nop.m         0
1695       fma.s1        FR_S20 = FR_S20,FR_Xf2,FR_S18
1696       nop.i         0
1698 { .mfi
1699       nop.m         0
1700       fma.s1        FR_S16 = FR_S16,FR_Xf2,FR_S14
1701       nop.i         0
1703 { .mfi
1704       nop.m         0
1705       fma.s1        FR_S12 = FR_S12,FR_Xf2,FR_S10
1706       nop.i         0
1708 { .mfi
1709       nop.m         0
1710       fma.s1        FR_S8 = FR_S8,FR_Xf2,FR_S6
1711       nop.i         0
1713 { .mfi
1714       nop.m         0
1715       fma.s1        FR_S4 = FR_S4,FR_Xf2,FR_S2
1716       nop.i         0
1718 { .mfi
1719       nop.m         0
1720       fma.s1        FR_rf2 = FR_rf,FR_rf,f0
1721       nop.i         0
1723 { .mfi
1724       nop.m         0
1725       fma.s1        FR_P32f = FR_P3,FR_rf,FR_P2 // log(x)
1726       nop.i         0
1728 { .mfi
1729       nop.m         0
1730       fma.s1        FR_r3 = FR_r2,FR_r,f0 // log(x)
1731       nop.i         0
1733 { .mfi
1734       nop.m         0
1735       fcvt.xf       FR_Nf = FR_int_Nf // log({x})
1736       nop.i         0
1738 { .mfi
1739       nop.m         0
1740       fma.s1        FR_S28 = FR_S28,FR_Xf4,FR_S24
1741       nop.i         0
1743 { .mfi
1744       nop.m         0
1745       fma.s1        FR_Xf8 = FR_Xf4,FR_Xf4,f0
1746       nop.i         0
1748 { .mfi
1749       nop.m         0
1750       fma.s1        FR_S20 = FR_S20,FR_Xf4,FR_S16
1751       nop.i         0
1753 { .mfi
1754       nop.m         0
1755       fma.s1        FR_C21 = FR_C21,FR_C51,f0
1756       nop.i         0
1758 { .mfi
1759       nop.m         0
1760       fma.s1        FR_S12 = FR_S12,FR_Xf4,FR_S8
1761       nop.i         0
1763 { .mfi
1764       nop.m         0
1765       fma.s1        FR_C71 = FR_C71,FR_C91,f0
1766       nop.i         0
1768 { .mfi
1769       nop.m         0
1770       fnma.s1       FR_P10 = FR_r2,FR_05,FR_r // log(x)
1771       nop.i         0
1773 { .mfi
1774       nop.m         0
1775       fma.s1        FR_P54 = FR_P54,FR_r2,FR_P32 // log(x)
1776       nop.i         0
1778 { .mfi
1779       nop.m         0
1780       fnma.s1       FR_P10f = FR_rf2,FR_05,FR_rf // log({x})
1781       nop.i         0
1783 { .mfi
1784       nop.m         0
1785       fcvt.xf       FR_N = FR_int_N // log(x)
1786       nop.i         0
1788 { .mfi
1789       nop.m         0
1790       fma.s1        FR_rf3 = FR_rf2,FR_rf,f0 // log({x})
1791       nop.i         0
1793 { .mfi
1794       nop.m         0
1795       fma.s1        FR_P54f = FR_P54f,FR_rf2,FR_P32f // log({x})
1796       nop.i         0
1798 { .mfi
1799       nop.m         0
1800       fma.s1        FR_S28 = FR_S28,FR_Xf8,FR_S20
1801       nop.i         0
1803 { .mfi
1804       nop.m         0
1805       fma.s1        FR_TpNxLn2f = FR_Nf,FR_Ln2,FR_Tf // log({x})
1806       nop.i         0
1808 { .mfi
1809       nop.m         0
1810       fma.s1        FR_CN  = FR_C01,FR_CN,f0
1811       nop.i         0
1813 { .mfi
1814       nop.m         0
1815       fma.s1        FR_C21 = FR_C21,FR_C71,f0
1816       nop.i         0
1818 { .mfi
1819       nop.m         0
1820       fma.s1        FR_P54 = FR_P54,FR_r3,FR_P10 // log(x)
1821       nop.i         0
1823 { .mfi
1824       nop.m         0
1825       fma.s1        FR_TpNxLn2 = FR_N,FR_Ln2,FR_T // log(x)
1826       nop.i         0
1828 { .mfi
1829       nop.m         0
1830       fma.s1        FR_P54f = FR_P54f,FR_rf3,FR_P10f // log({x})
1831       nop.i         0
1833 { .mfi
1834       nop.m         0
1835       fma.s1        FR_S28 = FR_S28,FR_Xf8,FR_S12
1836       nop.i         0
1838 { .mfi
1839       nop.m         0
1840       fnma.s1       FR_C21 = FR_C21,FR_CN,f0
1841       nop.i         0
1843 { .mfi
1844       nop.m         0
1845       fma.s1        FR_LnX = FR_TpNxLn2,f1,FR_P54 // log(x)
1846       nop.i         0
1848 { .mfi
1849       nop.m         0
1850       fma.s1        FR_LnXf = FR_TpNxLn2f,f1,FR_P54f // log({x})
1851       nop.i         0
1853 { .mfi
1854       nop.m         0
1855       fma.s1        FR_S28 = FR_S28,FR_Xf4,FR_S4
1856       nop.i         0
1858 { .mfi
1859       nop.m         0
1860       fma.s1        FR_LnX = FR_LnX,f1,FR_LnXf
1861       nop.i         0
1863 { .mfi
1864       nop.m         0
1865       fnma.s1       FR_S28 = FR_S28,FR_Xf2,FR_C21
1866       nop.i         0
1868 { .mfb
1869       nop.m         0
1870       fms.d.s0      f8 = FR_S28,f1,FR_LnX
1871       br.ret.sptk   b0
1873 // branch for calculating of ln(GAMMA(x)) for x >= 512
1874 //---------------------------------------------------------------------
1875 .align 32
1876 lgamma_pstirling:
1877 { .mfi
1878       ldfpd         FR_P5,FR_P4 = [GR_ad_1],16
1879       nop.f         0
1880       and           GR_Exp = GR_SignExp,GR_ExpMask
1882 { .mfi
1883       ldfpd         FR_P3,FR_P2 = [GR_ad_2],16
1884       fma.s1        FR_InvX = FR_C,FR_InvX,FR_C // NR iteration #1
1885       mov           GR_ExpBias = 0xffff
1887 { .mfi
1888       ldfe          FR_Ln2 = [GR_ad_1],16
1889       nop.f         0
1890       sub           GR_Exp = GR_Exp,GR_ExpBias
1892 { .mfi
1893       ldfpd         FR_W4,FR_OvfBound = [GR_ad_2],16
1894       nop.f         0
1895       nop.i         0
1897 { .mfi
1898       setf.sig      FR_int_N = GR_Exp
1899       fms.s1        FR_r = FR_C,f8,f1
1900       nop.i         0
1902 { .mmf
1903       getf.sig      GR_Sig = FR_NormX
1904       ldfe          FR_LnSqrt2Pi = [GR_ad_1],16
1905       nop.f         0
1907 { .mmf
1908       ldfe          FR_W2 = [GR_ad_2],16
1909       nop.m         0
1910       fnma.s1       FR_InvX2 = FR_InvX,FR_NormX,f1 // NR iteration #2
1912 { .mfi
1913       add           GR_ad_2 = 0x40,GR_ad_2
1914       nop.f         0
1915       shl           GR_Ind = GR_Sig,1
1917 { .mfi
1918       mov           GR_SignOfGamma = 1
1919       nop.f         0
1920       shr.u         GR_Ind = GR_Ind,56
1922 { .mfi
1923       shladd        GR_ad_2 = GR_Ind,4,GR_ad_2
1924       fma.s1        FR_r2 = FR_r,FR_r,f0
1925       // set p9  if signgum is 32-bit int
1926       // set p10 if signgum is 64-bit int
1927       cmp.eq        p10,p9 = 8,r34
1929 { .mfi
1930       ldfe          FR_T = [GR_ad_2]
1931       fma.s1        FR_P54 = FR_P5,FR_r,FR_P4
1932       nop.i         0
1934 { .mfi
1935       nop.m         0
1936       fma.s1        FR_P32 = FR_P3,FR_r,FR_P2
1937       nop.i         0
1939 { .mfi
1940       nop.m         0
1941       fcmp.le.s1    p6,p0 = FR_OvfBound,FR_NormX
1942       nop.i         0
1944 { .mfi
1945       nop.m         0
1946       fma.s1        FR_InvX2 = FR_InvX,FR_InvX2,FR_InvX // NR iteration #2
1947       nop.i         0
1949 { .mfi
1950       nop.m         0
1951       fcvt.xf       FR_N = FR_int_N
1952       nop.i         0
1954 { .mfb
1955       nop.m         0
1956       nop.f         0
1957       // jump if x is great than OVERFLOW_BOUNDARY
1958 (p6)  br.cond.spnt  lgamma_overflow
1960 .pred.rel "mutex",p9,p10
1961 { .mfi
1962       // store sign of gamma(x) as 32-bit int
1963 (p9)  st4           [r33] = GR_SignOfGamma
1964       fma.s1        FR_r3 = FR_r2,FR_r,f0
1965       nop.i         0
1967 { .mfi
1968       // store sign of gamma(x) as 64-bit int
1969 (p10) st8           [r33] = GR_SignOfGamma
1970       fnma.s1       FR_P10 = FR_r2,FR_05,FR_r
1971       nop.i         0
1973 { .mfi
1974       nop.m         0
1975       fma.s1        FR_P54 = FR_P54,FR_r2,FR_P32
1976       nop.i         0
1978 { .mfi
1979       nop.m         0
1980       fnma.s1       FR_InvX = FR_InvX2,FR_NormX,f1 // NR iteration #3
1981       nop.i         0
1983 { .mfi
1984       nop.m         0
1985       fms.s1        FR_Xm05 = FR_NormX,f1,FR_05 // (x-1/2)
1986       nop.i         0
1988 { .mfi
1989       nop.m         0
1990       fma.s1        FR_TpNxLn2 = FR_N,FR_Ln2,FR_T
1991       nop.i         0
1993 { .mfi
1994       nop.m         0
1995       fma.s1        FR_P54 = FR_P54,FR_r3,FR_P10
1996       nop.i         0
1998 { .mfi
1999       nop.m         0
2000       fma.s1        FR_InvX = FR_InvX2,FR_InvX,FR_InvX2 // NR iteration #3
2001       nop.i         0
2003 { .mfi
2004       nop.m         0
2005       fms.s1        FR_LnSqrt2Pi = FR_LnSqrt2Pi,f1,FR_NormX // ln(sqrt(2*Pi))-x
2006       nop.i         0
2008 { .mfi
2009       nop.m         0
2010       fma.s1        FR_LnX = FR_TpNxLn2,f1,FR_P54
2011       nop.i         0
2013 { .mfi
2014       nop.m         0
2015       fma.s1        FR_InvX2 = FR_InvX,FR_InvX,f0
2016       nop.i         0
2018 { .mfi
2019       nop.m         0
2020       // (x-1/2)*ln(x)+ln(sqrt(2*Pi))-x
2021       fma.s1        FR_LnX = FR_LnX,FR_Xm05,FR_LnSqrt2Pi
2022       nop.i         0
2024 { .mfi
2025       nop.m         0
2026       fma.s1        FR_W2 = FR_W4,FR_InvX2,FR_W2 // W2 + W4/x^2
2027       nop.i         0
2029 { .mfb
2030       nop.m         0
2031       fma.d.s0      f8 = FR_InvX,FR_W2,FR_LnX
2032       br.ret.sptk   b0
2034 // branch for calculating of ln(GAMMA(x)) for x < -512
2035 //---------------------------------------------------------------------
2036 .align 32
2037 lgamma_negstirling:
2038 { .mfi
2039       ldfpd         FR_P5,FR_P4 = [GR_ad_1],16
2040       fms.s1        FR_Xf = FR_NormX,f1,FR_N  // xf = x - [x]
2041       and           GR_Exp = GR_SignExp,GR_ExpMask
2043 { .mfi
2044       ldfpd         FR_P3,FR_P2 = [GR_ad_2],16
2045       fma.s1        FR_InvX = FR_C,FR_InvX,FR_C // NR iteration #1
2046       mov           GR_0x30033 = 0x30033
2048 { .mfi
2049       ldfe          FR_Ln2 = [GR_ad_1],16
2050       nop.f         0
2051       extr.u        GR_Ind = GR_ArgAsIs,44,8
2053 { .mib
2054       ldfd          FR_W4 = [GR_ad_2],16
2055       // jump if x is less or equal to -2^52, i.e. x is big negative integer
2056       cmp.leu.unc   p7,p0 = GR_0x30033,GR_SignExp
2057 (p7)  br.cond.spnt  lgamma_singularity
2059 { .mfi
2060       ldfpd         FR_S28,FR_S26 = [GR_ad_Co7],16
2061       nop.f         0
2062       add           GR_ad_LnT = 0x50,GR_ad_2
2064 { .mfi
2065       ldfpd         FR_S24,FR_S22 = [GR_ad_Ce7],16
2066       nop.f         0
2067       mov           GR_ExpBias = 0xffff
2069 { .mfi
2070       ldfpd         FR_S20,FR_S18 = [GR_ad_Co7],16
2071       nop.f         0
2072       shladd        GR_ad_T = GR_Ind,4,GR_ad_LnT
2074 { .mfi
2075       ldfpd         FR_S16,FR_S14 = [GR_ad_Ce7],16
2076       nop.f         0
2077       sub           GR_Exp = GR_Exp,GR_ExpBias
2079 { .mfi
2080       ldfe          FR_S12 = [GR_ad_Co7],16
2081       nop.f         0
2082       nop.i         0
2084 { .mfi
2085       ldfe          FR_S10 = [GR_ad_Ce7],16
2086       fms.s1        FR_r = FR_C,f8,f1
2087       nop.i         0
2089 { .mmf
2090       ldfe          FR_S8 = [GR_ad_Co7],16
2091       ldfe          FR_S6 = [GR_ad_Ce7],16
2092       nop.f         0
2094 { .mfi
2095       ldfe          FR_S4 = [GR_ad_Co7],16
2096       fma.s1        FR_Xf2 = FR_Xf,FR_Xf,f0
2097       nop.i         0
2099 { .mfi
2100       ldfe          FR_S2 = [GR_ad_Ce7],16
2101       fnma.s1       FR_InvX2 = FR_InvX,FR_NormX,f1 // NR iteration #2
2102       nop.i         0
2104 { .mfi
2105       setf.sig      FR_int_N = GR_Exp
2106       frcpa.s1      FR_InvXf,p9 = f1,FR_Xf // 1/xf
2107       nop.i         0
2109 { .mfi
2110       ldfe          FR_LnSqrt2Pi = [GR_ad_1],16
2111       nop.f         0
2112       nop.i         0
2114 { .mfi
2115       getf.exp      GR_SignExp = FR_Xf
2116       nop.f         0
2117       nop.i         0
2119 { .mfi
2120       ldfe          FR_W2 = [GR_ad_2],16
2121       nop.f         0
2122       nop.i         0
2124 { .mfi
2125       getf.sig      GR_Sig = FR_Xf
2126       fma.s1        FR_P54 = FR_P5,FR_r,FR_P4
2127       nop.i         0
2129 { .mfi
2130       ldfe          FR_T = [GR_ad_T]
2131       fma.s1        FR_P32 = FR_P3,FR_r,FR_P2
2132       nop.i         0
2134 { .mfi
2135       and           GR_Exp = GR_SignExp,GR_ExpMask
2136       fma.s1        FR_r2 = FR_r,FR_r,f0
2137       nop.i         0
2139 { .mfi
2140       nop.m         0
2141       fms.s1        FR_Xm05 = FR_NormX,f1,FR_05 // (x-1/2)
2142       nop.i         0
2144 { .mfi
2145       nop.m         0
2146       fma.s1        FR_InvX2 = FR_InvX,FR_InvX2,FR_InvX // NR iteration #2
2147       extr.u        GR_Ind = GR_Sig,55,8
2149 { .mfi
2150       sub           GR_Exp = GR_Exp,GR_ExpBias
2151       fma.s1        FR_Xf4 = FR_Xf2,FR_Xf2,f0
2152       cmp.eq        p6,p0 = 0,GR_Sig
2154 { .mfi
2155       setf.sig      FR_int_Nf = GR_Exp
2156       fma.s1        FR_S28 = FR_S28,FR_Xf2,FR_S26
2157       shladd        GR_ad_T = GR_Ind,4,GR_ad_LnT
2159 { .mfb
2160       nop.m         0
2161       fma.s1        FR_S24 = FR_S24,FR_Xf2,FR_S22
2162       // jump if the input argument is integer number from range (-512.0;-17.0]
2163 (p6)  br.cond.spnt  lgamma_singularity
2165 { .mfi
2166       getf.sig      GR_Sig = FR_int_Ntrunc
2167       fma.s1        FR_S20 = FR_S20,FR_Xf2,FR_S18
2168       nop.i         0
2170 { .mfi
2171       nop.m         0
2172       fma.s1        FR_S16 = FR_S16,FR_Xf2,FR_S14
2173       nop.i         0
2175 { .mfi
2176       ldfe          FR_Tf = [GR_ad_T]
2177       fma.s1        FR_S12 = FR_S12,FR_Xf2,FR_S10
2178       nop.i         0
2180 { .mfi
2181       nop.m         0
2182       fma.s1        FR_S8 = FR_S8,FR_Xf2,FR_S6
2183       mov           GR_SignOfGamma = 1
2185 { .mfi
2186       nop.m         0
2187       fms.s1        FR_rf = FR_InvXf,FR_Xf,f1 // reduced arg rf
2188       tbit.z        p8,p0 = GR_Sig,0
2190 { .mfi
2191       nop.m         0
2192       fma.s1        FR_r3 = FR_r2,FR_r,f0
2193       // set p9  if signgum is 32-bit int
2194       // set p10 if signgum is 64-bit int
2195       cmp.eq        p10,p9 = 8,r34
2197 { .mfi
2198       nop.m         0
2199       fcvt.xf       FR_N = FR_int_N
2200 (p8)  sub           GR_SignOfGamma = r0,GR_SignOfGamma
2202 { .mfi
2203       nop.m         0
2204       fnma.s1       FR_InvX = FR_InvX2,FR_NormX,f1 // NR iteration #3
2205       nop.i         0
2207 .pred.rel "mutex",p9,p10
2208 { .mfi
2209       // store sign of gamma(x) as 32-bit int
2210 (p9)  st4           [r33] = GR_SignOfGamma
2211       fma.s1        FR_P54 = FR_P54,FR_r2,FR_P32
2212       nop.i         0
2214 { .mfi
2215       // store sign of gamma(x) as 64-bit int
2216 (p10) st8           [r33] = GR_SignOfGamma
2217       fnma.s1       FR_P10 = FR_r2,FR_05,FR_r
2218       nop.i         0
2220 { .mfi
2221       nop.m         0
2222       fma.s1        FR_Xf8 = FR_Xf4,FR_Xf4,f0
2223       nop.i         0
2225 { .mfi
2226       nop.m         0
2227       fma.s1        FR_S28 = FR_S28,FR_Xf4,FR_S24
2228       nop.i         0
2230 { .mfi
2231       nop.m         0
2232       fma.s1        FR_S20 = FR_S20,FR_Xf4,FR_S16
2233       nop.i         0
2235 { .mfi
2236       nop.m         0
2237       fma.s1        FR_S12 = FR_S12,FR_Xf4,FR_S8
2238       nop.i         0
2240 { .mfi
2241       nop.m         0
2242       fma.s1        FR_rf2 = FR_rf,FR_rf,f0
2243       nop.i         0
2245 { .mfi
2246       nop.m         0
2247       fma.s1        FR_P54f = FR_P5,FR_rf,FR_P4
2248       nop.i         0
2250 { .mfi
2251       nop.m         0
2252       fma.s1        FR_P32f = FR_P3,FR_rf,FR_P2
2253       nop.i         0
2255 { .mfi
2256       nop.m         0
2257       fma.s1        FR_InvX = FR_InvX2,FR_InvX,FR_InvX2 // NR iteration #3
2258       nop.i         0
2260 { .mfi
2261       nop.m         0
2262       fcvt.xf       FR_Nf = FR_int_Nf
2263       nop.i         0
2265 { .mfi
2266       nop.m         0
2267       fma.s1        FR_LnSqrt2Pi = FR_NormX,f1,FR_LnSqrt2Pi // x+ln(sqrt(2*Pi))
2268       nop.i         0
2270 { .mfi
2271       nop.m         0
2272       fma.s1        FR_P54 = FR_P54,FR_r3,FR_P10
2273       nop.i         0
2275 { .mfi
2276       nop.m         0
2277       fma.s1        FR_S28 = FR_S28,FR_Xf8,FR_S20
2278       nop.i         0
2280 { .mfi
2281       nop.m         0
2282       fma.s1        FR_rf3 = FR_rf2,FR_rf,f0
2283       nop.i         0
2285 { .mfi
2286       nop.m         0
2287       fnma.s1       FR_P10f = FR_rf2,FR_05,FR_rf
2288       nop.i         0
2290 { .mfi
2291       nop.m         0
2292       fma.s1        FR_TpNxLn2 = FR_N,FR_Ln2,FR_T
2293       nop.i         0
2295 { .mfi
2296       nop.m         0
2297       fma.s1        FR_P54f = FR_P54f,FR_rf2,FR_P32f
2298       nop.i         0
2300 { .mfi
2301       nop.m         0
2302       fma.s1        FR_InvX2 = FR_InvX,FR_InvX,f0
2303       nop.i         0
2305 { .mfi
2306       nop.m         0
2307       fma.s1        FR_S28 = FR_S28,FR_Xf8,FR_S12
2308       nop.i         0
2310 { .mfi
2311       nop.m         0
2312       fma.s1        FR_S4 = FR_S4,FR_Xf2,FR_S2
2313       nop.i         0
2315 { .mfi
2316       nop.m         0
2317       fma.s1        FR_P54f = FR_P54f,FR_rf3,FR_P10f
2318       nop.i         0
2320 { .mfi
2321       nop.m         0
2322       fma.s1        FR_TpNxLn2f = FR_Nf,FR_Ln2,FR_Tf
2323       nop.i         0
2325 { .mfi
2326       nop.m         0
2327       fma.s1        FR_LnX = FR_TpNxLn2,f1,FR_P54
2328       nop.i         0
2330 { .mfi
2331       nop.m         0
2332       fma.s1        FR_W2 = FR_W4,FR_InvX2,FR_W2
2333       nop.i         0
2335 { .mfi
2336       nop.m         0
2337       fma.s1        FR_S28 = FR_S28,FR_Xf4,FR_S4
2338       nop.i         0
2340 { .mfi
2341       nop.m         0
2342       fma.s1        FR_LnXf = FR_TpNxLn2f,f1,FR_P54f
2343       nop.i         0
2345 { .mfi
2346       nop.m         0
2347       fms.s1        FR_LnX = FR_LnX,FR_Xm05,FR_LnSqrt2Pi
2348       nop.i         0
2350 { .mfi
2351       nop.m         0
2352       fma.s1        FR_LnX = FR_InvX,FR_W2,FR_LnX
2353       nop.i         0
2355 { .mfi
2356       nop.m         0
2357       fnma.s1       FR_LnX = FR_S28,FR_Xf2,FR_LnX
2358       nop.i         0
2360 { .mfb
2361       nop.m         0
2362       fms.d.s0      f8 = FR_LnX,f1,FR_LnXf
2363       br.ret.sptk   b0
2365 // branch for calculating of ln(GAMMA(x)) for 0 <= x < 1
2366 //---------------------------------------------------------------------
2367 .align 32
2368 lgamma_0_1:
2369 { .mfi
2370       ldfpd         FR_P5,FR_P4 = [GR_ad_1],16
2371       fms.s1        FR_x = FR_NormX,f1,f0 // x
2372       mov           GR_Arg025 = 0x3FD0
2374 { .mfi
2375       ldfpd         FR_P3,FR_P2 = [GR_ad_2],16
2376       nop.f         0
2377       add           GR_ad_Co = 0x1C40,GR_ad_Data
2379 { .mfi
2380       ldfe          FR_Ln2 = [GR_ad_1],0x50
2381       nop.f         0
2382       // p6 if arg < 0.25
2383       cmp.lt        p6,p9 = GR_Arg,GR_Arg025
2385 { .mfi
2386       add           GR_ad_2 = 0x40,GR_ad_2
2387       nop.f         0
2388       mov           GR_Arg075 = 0x3FE8
2390 { .mfi
2391       ldfpd         FR_Q8,FR_Q7 = [GR_ad_1],16
2392       fma.s1        FR_w2 = FR_w,FR_w,f0
2393       // p7 if 0.25 <= arg < 0.75
2394       // p8 if 0.75 <= arg < 1.0
2395 (p9)  cmp.lt.unc    p7,p8 = GR_Arg,GR_Arg075
2397 { .mfi
2398       mov           GR_Arg0875 = 0x3FEC
2399       nop.f         0
2400       sub           GR_Exp = GR_Exp,GR_ExpBias
2402 { .mfi
2403       ldfpd         FR_Q6,FR_Q5 = [GR_ad_2],16
2404       nop.f         0
2405 (p8)  cmp.lt        p9,p0 = GR_Arg,GR_Arg0875
2407 { .mfi
2408       ldfpd         FR_Q4,FR_Q3 = [GR_ad_1],16
2409       nop.f         0
2410       add           GR_ad_Ce = 0x60,GR_ad_Co
2412 .pred.rel "mutex",p7,p8
2413 { .mfi
2414       ldfd          FR_Q2 = [GR_ad_2],16
2415       fms.s1        FR_r = FR_C,f8,f1
2416 (p7)  mov           GR_Offs = 0xC0
2418 { .mfi
2419       setf.sig      FR_int_N = GR_Exp
2420       nop.f         0
2421 (p8)  mov           GR_Offs = 0x180
2423 .pred.rel "mutex",p6,p7
2424 { .mfi
2425 (p9)  add           GR_ad_Co = GR_Offs,GR_ad_Co
2426 (p8)  fms.s1        FR_x = FR_NormX,f1,f1 // x-1
2427       nop.i         0
2429 { .mfi
2430 (p9)  add           GR_ad_Ce = GR_Offs,GR_ad_Ce
2431 (p7)  fms.s1        FR_x = FR_NormX,f1,FR_LocalMin // x-LocalMin
2432       cmp.lt        p10,p0 = GR_Arg,GR_Arg0875
2434 lgamma_common_0_2:
2435 { .mfi
2436       ldfpd         FR_A17,FR_A16 = [GR_ad_Co],16
2437       nop.f         0
2438       nop.i         0
2440 { .mfi
2441       ldfpd         FR_A15,FR_A14 = [GR_ad_Ce],16
2442       nop.f         0
2443       nop.i         0
2445 { .mfi
2446       ldfpd         FR_A13,FR_A12 = [GR_ad_Co],16
2447       nop.f         0
2448 (p10) extr.u        GR_Ind = GR_ArgAsIs,44,8
2450 { .mfi
2451       ldfpd         FR_A11,FR_A10 = [GR_ad_Ce],16
2452       nop.f         0
2453       nop.i         0
2455 { .mfi
2456       ldfpd         FR_A9,FR_A8 = [GR_ad_Co],16
2457 (p10) fnma.s1       FR_Q1 = FR_05,FR_w2,FR_w
2458       nop.i         0
2460 { .mfi
2461       ldfpd         FR_A7,FR_A6 = [GR_ad_Ce],16
2462 (p10) fma.s1        FR_w3 = FR_w2,FR_w,f0
2463       nop.i         0
2465 { .mfi
2466 (p10) getf.exp      GR_SignExp_w = FR_w
2467 (p10) fma.s1        FR_w4 = FR_w2,FR_w2,f0
2468       nop.i         0
2470 { .mfi
2471 (p10) shladd        GR_ad_2 = GR_Ind,4,GR_ad_2
2472 (p10) fma.s1        FR_r2 = FR_r,FR_r,f0
2473       nop.i         0
2475 { .mfi
2476 (p10) ldfe          FR_T = [GR_ad_2]
2477 (p10) fma.s1        FR_P54 = FR_P5,FR_r,FR_P4
2478       nop.i         0
2480 { .mfi
2481       ldfe          FR_A5 = [GR_ad_Co],16
2482 (p10) fma.s1        FR_P32 = FR_P3,FR_r,FR_P2
2483       nop.i         0
2485 { .mfi
2486       ldfe          FR_A4 = [GR_ad_Ce],16
2487       fma.s1        FR_x2 = FR_x,FR_x,f0
2488 (p10) and           GR_Exp_w = GR_ExpMask, GR_SignExp_w
2490 { .mfi
2491       ldfe          FR_A3 = [GR_ad_Co],16
2492       nop.f         0
2493 (p10) mov           GR_fff9 = 0xfff9
2495 //    p13 <== large w __libm_lgamma
2496 //    p14 <== small w __libm_lgamma
2497 { .mfi
2498       ldfe          FR_A2 = [GR_ad_Ce],16
2499 (p10) fma.s1        FR_Q8 = FR_Q8,FR_w,FR_Q7
2500 (p10) cmp.ge.unc    p13,p14 = GR_Exp_w,GR_fff9
2502 { .mfi
2503       ldfe          FR_A1 = [GR_ad_Co],16
2504 (p10) fma.s1        FR_Q6 = FR_Q6,FR_w,FR_Q5
2505       nop.i         0
2507 { .mfi
2508       ldfe          FR_A0 = [GR_ad_Ce],16
2509 (p10) fma.s1        FR_Q4 = FR_Q4,FR_w,FR_Q3
2510       nop.i         0
2512 { .mfi
2513       nop.m         0
2514 (p10) fma.s1        FR_Q2 = FR_Q2,FR_w3,FR_Q1
2515       nop.i         0
2517 { .mfi
2518       // set p11 if signgum is 32-bit int
2519       // set p12 if signgum is 64-bit int
2520       cmp.eq        p12,p11 = 8,r34
2521 (p10) fma.s1        FR_r3 = FR_r2,FR_r,f0
2522       nop.i         0
2524 { .mfi
2525       nop.m         0
2526 (p10) fnma.s1       FR_P10 = FR_r2,FR_05,FR_r
2527       mov           GR_SignOfGamma = 1
2529 .pred.rel "mutex",p11,p12
2530 { .mfi
2531       // store sign of gamma(x) as 32-bit int
2532 (p11) st4           [r33] = GR_SignOfGamma
2533       fma.s1        FR_A17 = FR_A17,FR_x,FR_A16
2534       nop.i         0
2536 { .mfi
2537       // store sign of gamma(x) as 64-bit int
2538 (p12) st8           [r33] = GR_SignOfGamma
2539       fma.s1        FR_A15 = FR_A15,FR_x,FR_A14
2540       nop.i         0
2542 { .mfi
2543       nop.m         0
2544 (p10) fcvt.xf       FR_N = FR_int_N
2545       nop.i         0
2547 { .mfi
2548       nop.m         0
2549 (p10) fma.s1        FR_P54 = FR_P54,FR_r2,FR_P32
2550       nop.i         0
2552 { .mfi
2553       nop.m         0
2554       fma.s1        FR_A13 = FR_A13,FR_x,FR_A12
2555       nop.i         0
2557 { .mfi
2558       nop.m         0
2559       fma.s1        FR_A11 = FR_A11,FR_x,FR_A10
2560       nop.i         0
2562 { .mfi
2563       nop.m         0
2564       fma.s1        FR_A9 = FR_A9,FR_x,FR_A8
2565       nop.i         0
2567 { .mfi
2568       nop.m         0
2569       fma.s1        FR_A7 = FR_A7,FR_x,FR_A6
2570       nop.i         0
2572 { .mfi
2573       nop.m         0
2574 (p10) fma.s1        FR_Qlo = FR_Q8,FR_w2,FR_Q6
2575       nop.i         0
2577 { .mfi
2578       nop.m         0
2579 (p10) fma.s1        FR_w6 = FR_w3,FR_w3,f0
2580       nop.i         0
2582 { .mfi
2583       nop.m         0
2584 (p10) fma.s1        FR_Qhi = FR_Q4,FR_w4,FR_Q2
2585       nop.i         0
2587 { .mfi
2588       nop.m         0
2589       fma.s1        FR_A5 = FR_A5,FR_x,FR_A4
2590       nop.i         0
2592 { .mfi
2593       nop.m         0
2594 (p10) fma.s1        FR_TpNxLn2 = FR_N,FR_Ln2,FR_T
2595       nop.i         0
2597 { .mfi
2598       nop.m         0
2599       fma.s1        FR_A3 = FR_A3,FR_x,FR_A2
2600       nop.i         0
2602 { .mfi
2603       nop.m         0
2604 (p10) fma.s1        FR_P54 = FR_P54,FR_r3,FR_P10
2605       nop.i         0
2607 { .mfi
2608       nop.m         0
2609       fma.s1        FR_A1 = FR_A1,FR_x,FR_A0
2610       nop.i         0
2612 { .mfi
2613       nop.m         0
2614       fma.s1        FR_A17 = FR_A17,FR_x2,FR_A15
2615       nop.i         0
2617 { .mfi
2618       nop.m         0
2619       fma.s1        FR_A13 = FR_A13,FR_x2,FR_A11
2620       nop.i         0
2622 { .mfi
2623       nop.m         0
2624       fma.s1        FR_A9 = FR_A9,FR_x2,FR_A7
2625       nop.i         0
2627 { .mfi
2628       nop.m         0
2629       fma.s1        FR_x4 = FR_x2,FR_x2,f0
2630       nop.i         0
2632 { .mfi
2633       nop.m         0
2634 (p14) fma.s1        FR_LnX = FR_Qlo,FR_w6,FR_Qhi
2635       nop.i         0
2637 { .mfi
2638       nop.m         0
2639       fma.s1        FR_A5 = FR_A5,FR_x2,FR_A3
2640       nop.i         0
2642 { .mfi
2643       nop.m         0
2644 (p13) fma.s1        FR_LnX = FR_TpNxLn2,f1,FR_P54
2645       nop.i         0
2647 { .mfi
2648       nop.m         0
2649       fma.s1        FR_A17 = FR_A17,FR_x4,FR_A13
2650       nop.i         0
2652 { .mfi
2653       nop.m         0
2654       fma.s1        FR_x8 = FR_x4,FR_x4,f0
2655       nop.i         0
2657 { .mfi
2658       nop.m         0
2659       fma.s1        FR_A9 = FR_A9,FR_x4,FR_A5
2660       nop.i         0
2662 { .mfi
2663       nop.m         0
2664       fma.s1        FR_A17 = FR_A17,FR_x8,FR_A9
2665       nop.i         0
2667 { .mfi
2668       nop.m         0
2669 (p10) fms.s1        FR_A1 = FR_A1,f1,FR_LnX
2670       nop.i         0
2672 { .mfb
2673       nop.m         0
2674       fma.d.s0      f8 = FR_A17,FR_x2,FR_A1
2675       br.ret.sptk   b0
2677 // branch for calculating of ln(GAMMA(x)) for 1.0 <= x < 2.25
2678 //---------------------------------------------------------------------
2679 .align 32
2680 lgamma_1_2:
2681 { .mfi
2682       add           GR_ad_Co = 0x10B0,GR_ad_1
2683       fcmp.eq.s1    p12,p0 = f1,FR_w
2684       mov           GR_Arg125 = 0x3FF4
2686 { .mfi
2687       add           GR_ad_Ce = 0x1110,GR_ad_1
2688       nop.f         0
2689       mov           GR_Arg175 = 0x3FFC
2691 { .mfi
2692       mov           GR_SignOfGamma = 1
2693       fcmp.eq.s1    p13,p0 = f1,FR_NormX
2694       cmp.lt        p6,p9 = GR_Arg,GR_Arg125 // 1.0 <= x < 1.25
2696 { .mfi
2697       // set p10 if signgum is 32-bit int
2698       // set p11 if signgum is 64-bit int
2699       cmp.eq        p11,p10 = 8,r34
2700       nop.f         0
2701       cmp.ge        p8,p0 = GR_Arg,GR_Arg175 // x >= 1.75
2703 .pred.rel "mutex",p10,p11
2704 { .mfi
2705       // store sign of gamma(x) as 32-bit int
2706 (p10) st4           [r33] = GR_SignOfGamma
2707 (p12) fma.d.s0      f8 = f0,f0,f0
2708 (p9)  cmp.lt.unc    p7,p0 = GR_Arg,GR_Arg175 // 1.25 <= x < 1.75
2710 { .mib
2711       // store sign of gamma(x) as 64-bit int
2712 (p11) st8           [r33] = GR_SignOfGamma
2713       mov           GR_Offs = 0
2714 (p12) br.ret.spnt   b0 // fast exit for 2.0
2716 .pred.rel "mutex",p7,p8
2717 { .mfi
2718 (p7)  mov           GR_Offs = 0xC0
2719 (p7)  fms.s1        FR_x = FR_w,f1,FR_LocalMin
2720       nop.i         0
2722 { .mfb
2723 (p8)  mov           GR_Offs = 0x180
2724 (p13) fma.d.s0      f8 = f0,f0,f0
2725 (p13) br.ret.spnt   b0 // fast exit for 1.0
2727 .pred.rel "mutex",p6,p8
2728 { .mfi
2729       add           GR_ad_Co = GR_ad_Co,GR_Offs
2730 (p8)  fms.s1        FR_x = FR_w,f1,f1
2731       cmp.eq        p0,p10 = r0,r0
2733 { .mfb
2734       add           GR_ad_Ce = GR_ad_Ce,GR_Offs
2735 (p6)  fma.s1        FR_x = f0,f0,FR_w
2736       br.cond.sptk  lgamma_common_0_2
2738 // branch for calculating of ln(GAMMA(x)) for -17 < x < 0
2739 //---------------------------------------------------------------------
2740 .align 32
2741 lgamma_negrecursion:
2742 { .mfi
2743       getf.d        GR_ArgXfrAsIs = FR_Xfr
2744       fma.s1        FR_Xp2 = FR_2,f1,FR_NormX
2745       mov           GR_Arg05 = 0x3FE
2747 { .mfi
2748       add           GR_ad_Roots = 0x1390,GR_ad_1
2749       fma.s1        FR_NormX = FR_NormX,FR_Xfr,f0
2750       mov           GR_Arg075 = 0x3FE8
2752 { .mfi
2753       getf.sig      GR_Sig = FR_int_Ntrunc
2754       fma.s1        FR_Xp3 = FR_2,f1,FR_Xp1
2755       shl           GR_Arg05 = GR_Arg05,52
2757 { .mfi
2758       mov           GR_Arg025 = 0x3FD0
2759       fma.s1        FR_Xp6 = FR_5,f1,FR_Xp1
2760       add           GR_ad_Co = 0x1C40,GR_ad_Data
2762 { .mfi
2763       add           GR_ad_Dx = 8,GR_ad_Roots
2764       fma.s1        FR_Xp7 = FR_2,f1,FR_Xp5
2765       shr.u         GR_ArgXfr = GR_ArgXfrAsIs,48
2767 { .mfi
2768       add           GR_ad_Ce = 0x60,GR_ad_Co
2769       fma.s1        FR_Xp8 = FR_3,f1,FR_Xp5
2770       cmp.lt        p6,p0 = GR_ArgXfrAsIs,GR_Arg05
2772 { .mfi
2773       and           GR_RootInd = 0xF,GR_Sig
2774       fma.s1        FR_Xp9 = FR_2,FR_2,FR_Xp5
2775       // p10 if arg < 0.25
2776       cmp.lt        p10,p14 = GR_ArgXfr,GR_Arg025
2778 { .mfi
2779 (p6)  add           GR_ad_Roots = 0x120,GR_ad_Roots
2780       fma.s1        FR_Xp11 = f1,f1,FR_Xp10
2781 (p6)  add           GR_ad_Dx = 0x120,GR_ad_Dx
2783 { .mfi
2784       shladd        GR_ad_Root = GR_RootInd,4,GR_ad_Roots
2785       fma.s1        FR_Xp12 = FR_2,f1,FR_Xp10
2786       // p11 if 0.25 <= arg < 0.75
2787       // p12 if 0.75 <= arg < 1.0
2788 (p14) cmp.lt.unc    p11,p12 = GR_ArgXfr,GR_Arg075
2790 { .mfi
2791       shladd        GR_ad_Dx = GR_RootInd,4,GR_ad_Dx
2792       fma.s1        FR_Xp13 = FR_3,f1,FR_Xp10
2793       cmp.eq        p0,p13 = 0,GR_Sig
2795 { .mfi
2796       ld8           GR_Root = [GR_ad_Root]
2797       fma.s1        FR_Xp14 = FR_2,FR_2,FR_Xp10
2798 (p12) mov           GR_Offs = 0x180
2800 { .mfi
2801       ldfd          FR_Root = [GR_ad_Root]
2802       fma.s1        FR_Xp15 = FR_5,f1,FR_Xp10
2803       and           GR_Sig = 0xF,GR_Sig
2805 { .mfi
2806       ld8           GR_Dx = [GR_ad_Dx]
2807       fma.s1        FR_Xp16 = FR_3,FR_2,FR_Xp10
2808 (p13) cmp.ge.unc    p6,p0 = 0xD,GR_Sig
2810 { .mfi
2811 (p11) mov           GR_Offs = 0xC0
2812 (p13) fma.s1        FR_NormX = FR_NormX,FR_Xp1,f0
2813 (p13) cmp.ge.unc    p7,p0 = 0xB,GR_Sig
2815 { .mfi
2816 (p14) add           GR_ad_Co = GR_Offs,GR_ad_Co
2817 (p6)  fma.s1        FR_Xp2 = FR_Xp2,FR_Xp3,f0
2818 (p13) cmp.ge.unc    p8,p0 = 0x9,GR_Sig
2820 { .mfi
2821 (p14) add           GR_ad_Ce = GR_Offs,GR_ad_Ce
2822 (p7)  fma.s1        FR_Xp4 = FR_Xp4,FR_Xp5,f0
2823 (p13) cmp.ge.unc    p9,p0 = 0x7,GR_Sig
2825 { .mfi
2826       ldfpd         FR_B17,FR_B16 = [GR_ad_Co],16
2827 (p8)  fma.s1        FR_Xp6 = FR_Xp6,FR_Xp7,f0
2828 (p13) cmp.ge.unc    p6,p0 = 0x5,GR_Sig
2830 { .mfi
2831       ldfpd         FR_B15,FR_B14 = [GR_ad_Ce],16
2832 (p9)  fma.s1        FR_Xp8 = FR_Xp8,FR_Xp9,f0
2833 (p13) cmp.ge.unc    p7,p0 = 0x3,GR_Sig
2835 { .mfi
2836       ldfpd         FR_B13,FR_B12 = [GR_ad_Co],16
2837 (p6)  fma.s1        FR_Xp10 = FR_Xp10,FR_Xp11,f0
2838 (p13) cmp.ge.unc    p8,p0 = 0x1,GR_Sig
2840 { .mfi
2841       ldfpd         FR_B11,FR_B10 = [GR_ad_Ce],16
2842 (p7)  fma.s1        FR_Xp12 = FR_Xp12,FR_Xp13,f0
2843 (p13) cmp.eq.unc    p9,p0 = 0,GR_Sig
2845 { .mfi
2846       ldfpd         FR_B9,FR_B8 = [GR_ad_Co],16
2847 (p8)  fma.s1        FR_Xp14 = FR_Xp14,FR_Xp15,f0
2848       mov           GR_Arg15 = 0xC02E // -15
2850 { .mfi
2851       ldfpd         FR_B7,FR_B6 = [GR_ad_Ce],16
2852       fcmp.eq.s1    p15,p0 = f0,FR_Xf
2853 (p13) cmp.ge.unc    p6,p0 = 0xC,GR_Sig
2855 { .mfi
2856       ldfe          FR_B5 = [GR_ad_Co],16
2857 (p9)  fma.s1        FR_NormX = FR_NormX,FR_Xp16,f0
2858       sub           GR_Root = GR_ArgAsIs,GR_Root
2860 { .mfi
2861       sub           GR_RootInd = 0xE,GR_RootInd
2862 (p11) fms.s1        FR_x = FR_Xfr,f1,FR_LocalMin // x-LocalMin
2863 (p13) cmp.ge.unc    p7,p0 = 0x8,GR_Sig
2865 .pred.rel "mutex",p10,p12
2866 { .mfi
2867       ldfe          FR_B4 = [GR_ad_Ce],16
2868 (p10) fms.s1        FR_x = FR_Xfr,f1,f0 // x
2869       add           GR_Root = GR_Root,GR_Dx
2871 { .mfb
2872       cmp.gtu       p14,p0 = 0xE,GR_RootInd
2873 (p12) fms.s1        FR_x = FR_Xfr,f1,f1 // x-1
2874 (p15) br.cond.spnt  lgamma_singularity
2876 { .mfi
2877       ldfe          FR_B3 = [GR_ad_Co],16
2878 (p6)  fma.s1        FR_Xp2 = FR_Xp2,FR_Xp4,f0
2879 (p14) cmp.lt.unc    p11,p0 = GR_Arg,GR_Arg15
2881 { .mfi
2882       ldfe          FR_B2 = [GR_ad_Ce],16
2883 (p7)  fma.s1        FR_Xp6 = FR_Xp6,FR_Xp8,f0
2884       add           GR_2xDx = GR_Dx,GR_Dx
2886 { .mfi
2887       ldfe          FR_B1 = [GR_ad_Co],16
2888       fms.s1        FR_r = f8,f1,FR_Root
2889 (p13) cmp.ge.unc    p6,p0 = 0x4,GR_Sig
2891 { .mib
2892       ldfe          FR_B0 = [GR_ad_Ce],16
2893 (p11) cmp.leu.unc   p10,p0 = GR_Root,GR_2xDx
2894 (p10) br.cond.spnt  lgamma_negroots
2896 { .mfi
2897       ldfpd         FR_P5,FR_P4 = [GR_ad_1],16
2898 (p6)  fma.s1        FR_Xp10 = FR_Xp10,FR_Xp12,f0
2899       tbit.z        p14,p15 = GR_Sig,0
2901 { .mfi
2902       ldfpd         FR_P3,FR_P2 = [GR_ad_2],16
2903       fnma.d.s0     FR_T = f1,f1,f8 //      nop.f         0
2905 (p13) cmp.ge.unc    p7,p0 = 0x2,GR_Sig
2907 { .mfi
2908       ldfe          FR_Ln2 = [GR_ad_1],0x50
2909 (p7)  fma.s1        FR_NormX = FR_NormX,FR_Xp14,f0
2910       mov           GR_PseudoRoot = 0xBFFBC
2912 { .mlx
2913       add           GR_ad_2 = 0x40,GR_ad_2
2914       movl          GR_2xDx = 0x00002346DC5D6389
2916 { .mfi
2917       ldfpd         FR_Q8,FR_Q7 = [GR_ad_1],16
2918       fma.s1        FR_x2 = FR_x,FR_x,f0
2919       shl           GR_PseudoRoot = GR_PseudoRoot,44
2921 { .mfi
2922       ldfpd         FR_Q6,FR_Q5 = [GR_ad_2],16
2923       fma.s1        FR_B17 = FR_B17,FR_x,FR_B16
2924 (p13) cmp.ge.unc    p6,p0 = 0xA,GR_Sig
2926 { .mfi
2927       ldfpd         FR_Q4,FR_Q3 = [GR_ad_1],16
2928 (p6)  fma.s1        FR_Xp2 = FR_Xp2,FR_Xp6,f0
2929       sub           GR_PseudoRoot = GR_ArgAsIs,GR_PseudoRoot
2931 { .mfi
2932       ldfpd         FR_Q2,FR_Q1 = [GR_ad_2],16
2933       fma.s1        FR_B15 = FR_B15,FR_x,FR_B14
2934 (p13) cmp.ge.unc    p7,p0 = 0x6,GR_Sig
2936 { .mfi
2937       add           GR_ad_Co = 0x12F0,GR_ad_2
2938       fma.s1        FR_B13 = FR_B13,FR_x,FR_B12
2939       cmp.leu.unc   p10,p0 = GR_PseudoRoot,GR_2xDx
2941 { .mfi
2942       add           GR_ad_Ce = 0x1300,GR_ad_2
2943       fma.s1        FR_B11 = FR_B11,FR_x,FR_B10
2944       mov           GR_ExpMask = 0x1ffff
2946 { .mfi
2947 (p10) ldfe          FR_PR01 = [GR_ad_Co],0xF0
2948       fma.s1        FR_B9 = FR_B9,FR_x,FR_B8
2949       mov           GR_ExpBias = 0xFFFF
2951 { .mfb
2952 (p10) ldfe          FR_PR11 = [GR_ad_Ce],0xF0
2953       fma.s1        FR_B7 = FR_B7,FR_x,FR_B6
2954 (p10) br.cond.spnt  lgamma_pseudoroot
2956 { .mfi
2957 (p13) cmp.ge.unc    p6,p0 = 0xE,GR_Sig
2958 (p7)  fma.s1        FR_NormX = FR_NormX,FR_Xp10,f0
2959       tbit.z.unc    p8,p0 = GR_Sig,0
2961 { .mfi
2962       mov           GR_SignOfGamma = 1
2963       fma.s1        FR_B5 = FR_B5,FR_x,FR_B4
2964       // set p9  if signgum is 32-bit int
2965       // set p10 if signgum is 64-bit int
2966       cmp.eq        p10,p9 = 8,r34
2968 { .mfi
2969       nop.m         0
2970       fma.s1        FR_B3 = FR_B3,FR_x,FR_B2
2971 (p8)  sub           GR_SignOfGamma = r0,GR_SignOfGamma
2973 { .mfi
2974       nop.m         0
2975 (p14) fms.s1        FR_w = f0,f0,f1
2976       nop.i         0
2978 .pred.rel "mutex",p9,p10
2979 { .mfi
2980       // store sign of gamma(x) as 32-bit int
2981 (p9)  st4           [r33] = GR_SignOfGamma
2982       fma.s1        FR_B1 = FR_B1,FR_x,FR_B0
2983       nop.i         0
2985 { .mfi
2986       // store sign of gamma(x) as 64-bit int
2987 (p10) st8           [r33] = GR_SignOfGamma
2988       fma.s1        FR_B17 = FR_B17,FR_x2,FR_B15
2989       nop.i         0
2991 { .mfi
2992       nop.m         0
2993       fma.s1        FR_B13 = FR_B13,FR_x2,FR_B11
2994       nop.i         0
2996 { .mfi
2997       nop.m         0
2998       fma.s1        FR_B9 = FR_B9,FR_x2,FR_B7
2999       nop.i         0
3001 { .mfi
3002       nop.m         0
3003       fma.s1        FR_x4 = FR_x2,FR_x2,f0
3004       nop.i         0
3006 { .mfi
3007       nop.m         0
3008 (p6)  fma.s1        FR_NormX = FR_NormX,FR_Xp2,f0
3009       nop.i         0
3011 { .mfi
3012       nop.m         0
3013       fma.s1        FR_B5 = FR_B5,FR_x2,FR_B3
3014       nop.i         0
3016 { .mfi
3017       nop.m         0
3018       fma.s1        FR_B17 = FR_B17,FR_x4,FR_B13
3019       nop.i         0
3021 { .mfi
3022       nop.m         0
3023       fma.s1        FR_x8 = FR_x4,FR_x4,f0
3024       nop.i         0
3026 .pred.rel "mutex",p14,p15
3027 { .mfi
3028       nop.m         0
3029 (p15) fms.s1        FR_w = FR_NormX,f1,f1
3030       nop.i         0
3032 { .mfi
3033       nop.m         0
3034 (p14) fnma.s1       FR_w = FR_NormX,f1,FR_w
3035       nop.i         0
3037 { .mfi
3038       nop.m         0
3039       fma.s1        FR_B9 = FR_B9,FR_x4,FR_B5
3040       nop.i         0
3042 { .mfi
3043       nop.m         0
3044       frcpa.s1      FR_C,p0 = f1,FR_NormX
3045       nop.i         0
3047 { .mfi
3048       getf.exp      GR_Exp = FR_NormX
3049       nop.f         0
3050       nop.i         0
3052 { .mfi
3053       getf.d        GR_ArgAsIs = FR_NormX
3054       nop.f         0
3055       nop.i         0
3057 { .mfi
3058       nop.m         0
3059       fma.s1        FR_w2 = FR_w,FR_w,f0
3060       nop.i         0
3062 { .mfi
3063       and           GR_Exp = GR_Exp,GR_ExpMask
3064       fma.s1        FR_Q8 = FR_Q8,FR_w,FR_Q7
3065       nop.i         0
3067 { .mfi
3068       sub           GR_Exp = GR_Exp,GR_ExpBias
3069       fma.s1        FR_B17 = FR_B17,FR_x8,FR_B9
3070       extr.u        GR_Ind = GR_ArgAsIs,44,8
3072 { .mfi
3073       nop.m         0
3074       fma.s1        FR_Q6 = FR_Q6,FR_w,FR_Q5
3075       nop.i         0
3077 { .mfi
3078       setf.sig      FR_int_N = GR_Exp
3079       fms.s1        FR_r = FR_C,FR_NormX,f1
3080       nop.i         0
3082 { .mfi
3083       shladd        GR_ad_2 = GR_Ind,4,GR_ad_2
3084       nop.f         0
3085       nop.i         0
3087 { .mfi
3088       getf.exp      GR_SignExp_w = FR_w
3089       fma.s1        FR_Q4 = FR_Q4,FR_w,FR_Q3
3090       nop.i         0
3092 { .mfi
3093       ldfe          FR_T = [GR_ad_2]
3094       nop.f         0
3095       nop.i         0
3097 { .mfi
3098       and           GR_Exp_w = GR_ExpMask, GR_SignExp_w
3099       fnma.s1       FR_Q1 = FR_05,FR_w2,FR_w
3100       mov           GR_fff9 = 0xfff9
3102 { .mfi
3103       nop.m         0
3104       fma.s1        FR_w3 = FR_w2,FR_w,f0
3105       nop.i         0
3107 { .mfi
3108       nop.m         0
3109       fma.s1        FR_w4 = FR_w2,FR_w2,f0
3110 //    p13 <== large w __libm_lgamma
3111 //    p14 <== small w __libm_lgamma
3112       cmp.ge        p13,p14 = GR_Exp_w,GR_fff9
3114 { .mfi
3115       nop.m         0
3116       fma.s1        FR_Qlo = FR_Q8,FR_w2,FR_Q6
3117       nop.i         0
3119 { .mfi
3120       nop.m         0
3121 (p13) fma.s1        FR_r2 = FR_r,FR_r,f0
3122       nop.i         0
3124 { .mfi
3125       nop.m         0
3126       fma.s1        FR_B17 = FR_B17,FR_x2,FR_B1
3127       nop.i         0
3129 { .mfi
3130       nop.m         0
3131 (p13) fma.s1        FR_P32 = FR_P3,FR_r,FR_P2
3132       nop.i         0
3134 { .mfi
3135       nop.m         0
3136 (p13) fma.s1        FR_P54 = FR_P5,FR_r,FR_P4
3137       nop.i         0
3139 { .mfi
3140       nop.m         0
3141 (p14) fma.s1        FR_Q2 = FR_Q2,FR_w3,FR_Q1
3142       nop.i         0
3144 { .mfi
3145       nop.m         0
3146 (p14) fma.s1        FR_w6 = FR_w3,FR_w3,f0
3147       nop.i         0
3149 { .mfi
3150       nop.m         0
3151 (p13) fcvt.xf       FR_N = FR_int_N
3152       nop.i         0
3154 { .mfi
3155       nop.m         0
3156 (p13) fma.s1        FR_r3 = FR_r2,FR_r,f0
3157       nop.i         0
3159 { .mfi
3160       nop.m         0
3161 (p13) fnma.s1       FR_P10 = FR_r2,FR_05,FR_r
3162       nop.i         0
3164 { .mfi
3165       nop.m         0
3166 (p13) fma.s1        FR_P54 = FR_P54,FR_r2,FR_P32
3167       nop.i         0
3169 { .mfi
3170       nop.m         0
3171 (p14) fma.s1        FR_Qhi = FR_Q4,FR_w4,FR_Q2
3172       nop.i         0
3174 { .mfi
3175       nop.m         0
3176 (p14) fnma.s1       FR_Qlo = FR_Qlo,FR_w6,FR_B17
3177       nop.i         0
3179 { .mfi
3180       nop.m         0
3181 (p13) fma.s1        FR_TpNxLn2 = FR_N,FR_Ln2,FR_T
3182       nop.i         0
3184 { .mfi
3185       nop.m         0
3186 (p13) fma.s1        FR_P54 = FR_P54,FR_r3,FR_P10
3187       nop.i         0
3189 .pred.rel "mutex",p13,p14
3190 { .mfi
3191       nop.m         0
3192 (p14) fms.d.s0      f8 = FR_Qlo,f1,FR_Qhi
3193       nop.i         0
3195 { .mfi
3196       nop.m         0
3197 (p13) fma.s1        FR_LnX = FR_TpNxLn2,f1,FR_P54
3198       nop.i         0
3200 { .mfb
3201       nop.m         0
3202 (p13) fms.d.s0      f8 = FR_B17,f1,FR_LnX
3203       br.ret.sptk   b0
3205 // branch for calculating of ln(GAMMA(x)) near negative roots
3206 //---------------------------------------------------------------------
3207 .align 32
3208 lgamma_negroots:
3209 { .mfi
3210       shladd        GR_Offs = GR_RootInd,3,r0 //GR_RootInd*8
3211       fma.s1        FR_r2 = FR_r,FR_r,f0
3212       add           GR_ad_Co = 0x15C0,GR_ad_1//0x1590,GR_ad_1
3214 { .mfi
3215       add           GR_ad_Ce = 0x1610,GR_ad_1//0x15E0,GR_ad_1
3216       nop.f         0
3217       cmp.lt        p6,p0 = GR_ArgXfrAsIs,GR_Arg05
3219 { .mfi
3220       add           GR_ad_Roots = 0x10A0,GR_ad_1
3221       nop.f         0
3222 (p6)  add           GR_ad_Co = 0x820,GR_ad_Co
3224 { .mfi
3225 (p6)  add           GR_ad_Ce = 0x820,GR_ad_Ce
3226       nop.f         0
3227       shladd        GR_Offs = GR_RootInd,1,GR_Offs //GR_RootInd*10
3229 { .mmi
3230       shladd        GR_ad_Co = GR_Offs,4,GR_ad_Co
3231       shladd        GR_ad_Ce = GR_Offs,4,GR_ad_Ce
3232       cmp.eq        p8,p7 = r0,r0
3234 { .mmi
3235       ldfpd         FR_A15,FR_A14 = [GR_ad_Co],16
3236       ldfpd         FR_A13,FR_A12 = [GR_ad_Ce],16
3237       mov           GR_SignOfGamma = 1
3239 { .mmi
3240       ldfpd         FR_A11,FR_A10 = [GR_ad_Co],16
3241       ldfpd         FR_A9,FR_A8   = [GR_ad_Ce],16
3242 (p6)  cmp.eq        p7,p8 = r0,GR_RootInd
3244 { .mmi
3245       ldfpd         FR_A7,FR_A6 = [GR_ad_Co],16
3246       ldfpd         FR_A5,FR_A4 = [GR_ad_Ce],16
3247       tbit.z        p11,p0 = GR_Sig,0
3249 { .mmi
3250       ldfe          FR_A3 = [GR_ad_Co],16
3251       ldfe          FR_A2 = [GR_ad_Ce],16
3252       // set p9  if signgum is 32-bit int
3253       // set p10 if signgum is 64-bit int
3254       cmp.eq        p10,p9 = 8,r34
3256 { .mmi
3257       ldfe          FR_A1 = [GR_ad_Co],16
3258       ldfe          FR_A0 = [GR_ad_Ce],16
3259 (p11) sub           GR_SignOfGamma = r0,GR_SignOfGamma
3261 { .mfi
3262       ldfe          FR_A00 = [GR_ad_Roots]
3263       fma.s1        FR_r4 = FR_r2,FR_r2,f0
3264       nop.i         0
3266 { .mfi
3267       nop.m         0
3268       fma.s1        FR_A15 = FR_A15,FR_r,FR_A14
3269       nop.i         0
3271 { .mfi
3272       nop.m         0
3273       fma.s1        FR_A13 = FR_A13,FR_r,FR_A12
3274       nop.i         0
3276 .pred.rel "mutex",p9,p10
3277 { .mfi
3278       // store sign of gamma(x) as 32-bit int
3279 (p9)  st4           [r33] = GR_SignOfGamma
3280       fma.s1        FR_A11 = FR_A11,FR_r,FR_A10
3281       nop.i         0
3283 { .mfi
3284       // store sign of gamma(x) as 64-bit int
3285 (p10) st8           [r33] = GR_SignOfGamma
3286       fma.s1        FR_A9 = FR_A9,FR_r,FR_A8
3287       nop.i         0
3289 { .mfi
3290       nop.m         0
3291       fma.s1        FR_A7 = FR_A7,FR_r,FR_A6
3292       nop.i         0
3294 { .mfi
3295       nop.m         0
3296       fma.s1        FR_A5 = FR_A5,FR_r,FR_A4
3297       nop.i         0
3299 { .mfi
3300       nop.m         0
3301       fma.s1        FR_A3 = FR_A3,FR_r,FR_A2
3302       nop.i         0
3304 { .mfi
3305       nop.m         0
3306       fma.s1        FR_r8 = FR_r4,FR_r4,f0
3307       nop.i         0
3309 { .mfi
3310       nop.m         0
3311       fma.s1        FR_A1 = FR_A1,FR_r,FR_A0
3312       nop.i         0
3314 { .mfi
3315       nop.m         0
3316       fma.s1        FR_A15 = FR_A15,FR_r2,FR_A13
3317       nop.i         0
3319 { .mfi
3320       nop.m         0
3321       fma.s1        FR_A11 = FR_A11,FR_r2,FR_A9
3322       nop.i         0
3324 { .mfi
3325       nop.m         0
3326       fma.s1        FR_A7 = FR_A7,FR_r2,FR_A5
3327       nop.i         0
3329 { .mfi
3330       nop.m         0
3331       fma.s1        FR_A3 = FR_A3,FR_r2,FR_A1
3332       nop.i         0
3334 { .mfi
3335       nop.m         0
3336       fma.s1        FR_A15 = FR_A15,FR_r4,FR_A11
3337       nop.i         0
3339 { .mfi
3340       nop.m         0
3341       fma.s1        FR_A7 = FR_A7,FR_r4,FR_A3
3342       nop.i         0
3344 .pred.rel "mutex",p7,p8
3345 { .mfi
3346       nop.m         0
3347 (p7)  fma.s1        FR_A1 = FR_A15,FR_r8,FR_A7
3348       nop.i         0
3350 { .mfi
3351       nop.m         0
3352 (p8)  fma.d.s0      f8 = FR_A15,FR_r8,FR_A7
3353       nop.i         0
3355 { .mfb
3356       nop.m         0
3357 (p7)  fma.d.s0      f8 = FR_A1,FR_r,FR_A00
3358       br.ret.sptk   b0
3360 // branch for handling pseudo root on (-2;-1)
3361 //---------------------------------------------------------------------
3362 .align 32
3363 lgamma_pseudoroot:
3364 { .mmi
3365       ldfe          FR_PR21 = [GR_ad_Co],32
3366       ldfe          FR_PR31 = [GR_ad_Ce],32
3367       // set p9  if signgum is 32-bit int
3368       // set p10 if signgum is 64-bit int
3369       cmp.eq        p10,p9 = 8,r34
3371 { .mmi
3372       ldfe          FR_PR00 = [GR_ad_Co],32
3373       ldfe          FR_PR10 = [GR_ad_Ce],0xF0
3374       mov           GR_SignOfGamma = 1
3376 { .mmi
3377       ldfe          FR_PR20 = [GR_ad_Co],0xF0
3378       ldfe          FR_PR30 = [GR_ad_Ce]
3379       tbit.z        p8,p0 = GR_Sig,0
3381 { .mfi
3382       ldfe          FR_PRN = [GR_ad_Co]
3383       fma.s1        FR_PR01 = f8,f1,FR_PR01
3384       nop.i         0
3386 { .mfi
3387       nop.m         0
3388       fma.s1        FR_PR11 = f8,f1,FR_PR11
3389 (p8)  sub           GR_SignOfGamma = r0,GR_SignOfGamma
3391 .pred.rel "mutex",p9,p10
3392 { .mfi
3393       // store sign of gamma(x) as 32-bit int
3394 (p9)  st4           [r33] = GR_SignOfGamma
3395       fma.s1        FR_PR21 = f8,f1,FR_PR21
3396       nop.i         0
3398 { .mfi
3399       // store sign of gamma(x) as 64-bit int
3400 (p10) st8           [r33] = GR_SignOfGamma
3401       fma.s1        FR_PR31 = f8,f1,FR_PR31
3402       nop.i         0
3404 { .mfi
3405       nop.m         0
3406       fma.s1        FR_PR01 = f8,FR_PR01,FR_PR00
3407       nop.i         0
3409 { .mfi
3410       nop.m         0
3411       fma.s1        FR_PR11 = f8,FR_PR11,FR_PR10
3412       nop.i         0
3414 { .mfi
3415       nop.m         0
3416       fma.s1        FR_PR21 = f8,FR_PR21,FR_PR20
3417       nop.i         0
3419 { .mfi
3420       nop.m         0
3421       fma.s1        FR_PR31 = f8,FR_PR31,FR_PR30
3422       nop.i         0
3424 { .mfi
3425       nop.m         0
3426       fma.s1        FR_PR01 = FR_PR11,FR_PR01,f0
3427       nop.i         0
3429 { .mfi
3430       nop.m         0
3431       fma.s1        FR_PR21 = FR_PR31,FR_PR21,f0
3432       nop.i         0
3434 { .mfi
3435       nop.m         0
3436       fma.s1        FR_PR01 = FR_PR21,FR_PR01,f0
3437       nop.i         0
3439 { .mfb
3440       nop.m         0
3441       fma.d.s0      f8 = FR_PR01,FR_PRN,f0
3442       br.ret.sptk   b0
3444 // branch for handling +/-0, NaT, QNaN, +/-INF and denormalised numbers
3445 //---------------------------------------------------------------------
3446 .align 32
3447 lgamma_spec:
3448 { .mfi
3449       getf.exp      GR_SignExp = FR_NormX
3450       fclass.m      p6,p0 = f8,0x21 // is arg +INF?
3451       mov           GR_SignOfGamma = 1
3453 { .mfi
3454       getf.sig      GR_ArgAsIs = FR_NormX
3455       fclass.m      p7,p0 = f8,0xB // is x deno?
3456       // set p11 if signgum is 32-bit int
3457       // set p12 if signgum is 64-bit int
3458       cmp.eq        p12,p11 = 8,r34
3460 .pred.rel "mutex",p11,p12
3461 { .mfi
3462       // store sign of gamma(x) as 32-bit int
3463 (p11) st4           [r33] = GR_SignOfGamma
3464       fclass.m      p8,p0 = f8,0x1C0 // is arg NaT or NaN?
3465       dep.z         GR_Ind = GR_SignExp,8,4
3467 { .mib
3468       // store sign of gamma(x) as 64-bit int
3469 (p12) st8           [r33] = GR_SignOfGamma
3470       cmp.lt        p10,p0 = GR_SignExp,GR_ExpBias
3471 (p6)  br.ret.spnt   b0 // exit for +INF
3473 { .mfi
3474       and           GR_Exp = GR_SignExp,GR_ExpMask
3475       fclass.m      p9,p0 = f8,0x22 // is arg -INF?
3476       nop.i         0
3478 { .mfi
3479       add           GR_ad_Co = GR_Ind,GR_ad_Data
3480 (p7)  fma.s0        FR_tmp = f8,f8,f8
3481       extr.u        GR_ArgAsIs = GR_ArgAsIs,11,52
3483 { .mfb
3484       nop.m         0
3485 (p8)  fms.d.s0      f8 = f8,f1,f8
3486 (p8)  br.ret.spnt   b0 // exit for NaT and NaN
3488 { .mib
3489       nop.m         0
3490       shr.u         GR_Arg = GR_ArgAsIs,48
3491 (p7)  br.cond.sptk  lgamma_common
3493 { .mfb
3494       nop.m         0
3495 (p9)  fmerge.s      f8 = f1,f8
3496 (p9)  br.ret.spnt   b0 // exit -INF
3498 // branch for handling negative integers and +/-0
3499 //---------------------------------------------------------------------
3500 .align 32
3501 lgamma_singularity:
3502 { .mfi
3503       mov           GR_ad_SignGam = r33
3504       fclass.m      p6,p0 = f8, 0x6 // is x -0?
3505       mov           GR_SignOfGamma = 1
3507 { .mfi
3508       // set p9  if signgum is 32-bit int
3509       // set p10 if signgum is 64-bit int
3510       cmp.eq        p10,p9 = 8,r34
3511       fma.s1        FR_X = f0,f0,f8
3512       nop.i         0
3514 { .mfi
3515       nop.m         0
3516       frcpa.s0      f8,p0 = f1,f0
3517       mov           GR_TAG = 106 // negative
3519 { .mib
3520       nop.m         0
3521 (p6)  sub           GR_SignOfGamma = r0,GR_SignOfGamma
3522       br.cond.sptk  lgamma_libm_err
3524 // overflow (x > OVERFLOV_BOUNDARY)
3525 //---------------------------------------------------------------------
3526 .align 32
3527 lgamma_overflow:
3528 { .mfi
3529       mov           GR_SignOfGamma = 1
3530       nop.f         0
3531       mov           r8 = 0x1FFFE
3533 { .mfi
3534       setf.exp      f9 = r8
3535       fmerge.s      FR_X = f8,f8
3536       mov           GR_TAG = 105 // overflow
3538 { .mfi
3539       mov           GR_ad_SignGam = r33
3540       nop.f         0
3541       // set p9  if signgum is 32-bit int
3542       // set p10 if signgum is 64-bit int
3543       cmp.eq        p10,p9 = 8,r34
3545 { .mfi
3546       nop.m         0
3547       fma.d.s0      f8 = f9,f9,f0 // Set I,O and +INF result
3548       nop.i         0
3551 //---------------------------------------------------------------------
3552 .align 32
3553 lgamma_libm_err:
3554 { .mmi
3555       alloc         r32 = ar.pfs,1,4,4,0
3556       mov           GR_Parameter_TAG = GR_TAG
3557       nop.i         0
3559 .pred.rel "mutex",p9,p10
3560 { .mmi
3561       // store sign of gamma(x) as 32-bit int
3562 (p9)  st4           [GR_ad_SignGam] = GR_SignOfGamma
3563       // store sign of gamma(x) as 64-bit int
3564 (p10) st8           [GR_ad_SignGam] = GR_SignOfGamma
3565       nop.i         0
3567 GLOBAL_LIBM_END(__libm_lgamma)
3570 LOCAL_LIBM_ENTRY(__libm_error_region)
3571 .prologue
3572 { .mfi
3573         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
3574         nop.f 0
3575 .save   ar.pfs,GR_SAVE_PFS
3576         mov   GR_SAVE_PFS=ar.pfs                // Save ar.pfs
3578 { .mfi
3579 .fframe 64
3580         add   sp=-64,sp                         // Create new stack
3581         nop.f 0
3582         mov   GR_SAVE_GP=gp                     // Save gp
3584 { .mmi
3585         stfd [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
3586         add GR_Parameter_X = 16,sp              // Parameter 1 address
3587 .save   b0, GR_SAVE_B0
3588         mov GR_SAVE_B0=b0                       // Save b0
3590 .body
3591 { .mib
3592         stfd [GR_Parameter_X] = FR_X                  // STORE Parameter 1
3593                                                       // on stack
3594         add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
3595         nop.b 0
3597 { .mib
3598         stfd [GR_Parameter_Y] = FR_RESULT             // STORE Parameter 3
3599                                                       // on stack
3600         add   GR_Parameter_Y = -16,GR_Parameter_Y
3601         br.call.sptk b0=__libm_error_support#         // Call error handling
3602                                                       // function
3604 { .mmi
3605         nop.m 0
3606         nop.m 0
3607         add   GR_Parameter_RESULT = 48,sp
3609 { .mmi
3610         ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
3611 .restore sp
3612         add   sp = 64,sp                       // Restore stack pointer
3613         mov   b0 = GR_SAVE_B0                  // Restore return address
3615 { .mib
3616         mov   gp = GR_SAVE_GP                  // Restore gp
3617         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
3618         br.ret.sptk     b0                     // Return
3621 LOCAL_LIBM_END(__libm_error_region)
3622 .type   __libm_error_support#,@function
3623 .global __libm_error_support#