2.9
[glibc/nacl-glibc.git] / sysdeps / ia64 / fpu / w_tgamma.S
blob24f3d11840f0d11a8c8b2f2d12612245625ea2a9
1 .file "tgamma.s"
4 // Copyright (c) 2001 - 2005, Intel Corporation
5 // All rights reserved.
6 //
7 // Contributed 2001 by the Intel Numerics Group, Intel Corporation
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
22 // permission.
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT 
26 // LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
29 // EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR 
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
32 // OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
34 // SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
35 // 
36 // Intel Corporation is the author of this code,and requests that all
37 // problem reports or change requests be submitted to it directly at 
38 // http://www.intel.com/software/products/opensource/libraries/num.htm.
40 //*********************************************************************
42 // History: 
43 // 10/12/01  Initial version
44 // 05/20/02  Cleaned up namespace and sf0 syntax
45 // 02/10/03  Reordered header: .section, .global, .proc, .align
46 // 04/04/03  Changed error codes for overflow and negative integers
47 // 04/10/03  Changed code for overflow near zero handling
48 // 03/31/05  Reformatted delimiters between data tables
50 //*********************************************************************
52 //*********************************************************************
54 // Function: tgamma(x) computes the principle value of the GAMMA
55 // function of x.
57 //*********************************************************************
59 // Resources Used:
61 //    Floating-Point Registers: f8-f15
62 //                              f33-f87
64 //    General Purpose Registers:
65 //      r8-r11
66 //      r14-r28
67 //      r32-r36
68 //      r37-r40 (Used to pass arguments to error handling routine)
70 //    Predicate Registers:      p6-p15
72 //*********************************************************************
74 // IEEE Special Conditions:
76 //    tgamma(+inf) = +inf
77 //    tgamma(-inf) = QNaN 
78 //    tgamma(+/-0) = +/-inf 
79 //    tgamma(x<0, x - integer) = QNaN
80 //    tgamma(SNaN) = QNaN
81 //    tgamma(QNaN) = QNaN
83 //*********************************************************************
85 // Overview
87 // The method consists of three cases.
88 // 
89 // If       2 <= x < OVERFLOW_BOUNDARY      use case tgamma_regular;
90 // else if    0 < x < 2                     use case tgamma_from_0_to_2;
91 // else    if  -(i+1) <  x < -i, i = 0...184 use case tgamma_negatives;
93 // Case 2 <= x < OVERFLOW_BOUNDARY
94 // -------------------------------
95 //   Here we use algorithm based on the recursive formula
96 //   GAMMA(x+1) = x*GAMMA(x). For that we subdivide interval
97 //   [2; OVERFLOW_BOUNDARY] into intervals [16*n; 16*(n+1)] and
98 //   approximate GAMMA(x) by polynomial of 22th degree on each
99 //   [16*n; 16*n+1], recursive formula is used to expand GAMMA(x)
100 //   to [16*n; 16*n+1]. In other words we need to find n, i and r
101 //   such that x = 16 * n + i + r where n and i are integer numbers
102 //   and r is fractional part of x. So GAMMA(x) = GAMMA(16*n+i+r) =
103 //   = (x-1)*(x-2)*...*(x-i)*GAMMA(x-i) =
104 //   = (x-1)*(x-2)*...*(x-i)*GAMMA(16*n+r) ~
105 //   ~ (x-1)*(x-2)*...*(x-i)*P22n(r).
107 //   Step 1: Reduction
108 //   -----------------
109 //    N = [x] with truncate
110 //    r = x - N, note 0 <= r < 1
112 //    n = N & ~0xF - index of table that contains coefficient of
113 //                   polynomial approximation 
114 //    i = N & 0xF  - is used in recursive formula
115 //   
117 //   Step 2: Approximation
118 //   ---------------------
119 //    We use factorized minimax approximation polynomials
120 //    P22n(r) = A22*(r^2+C01(n)*R+C00(n))*
121 //              *(r^2+C11(n)*R+C10(n))*...*(r^2+CA1(n)*R+CA0(n))
123 //   Step 3: Recursion
124 //   -----------------
125 //    In case when i > 0 we need to multiply P22n(r) by product
126 //    R(i)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions
127 //    we can calculate R as follow:  
128 //    R(i) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is
129 //    even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))*
130 //    *(i-1) if i is odd. In both cases we need to calculate
131 //    R2(i) = (x^2-3*x+2)*(x^2-7*x+12)*...*(x^2+x+2*j*(2*j-1)) =
132 //    = (x^2-3*x+2)*(x^2-7*x+12)*...*((x^2+x)+2*j*(2*(j-1)+(1-2*x))) =
133 //    = (RA+2*(2-RB))*(RA+4*(4-RB))*...*(RA+2*j*(2*(j-1)+RB))
134 //    where j = 1..[i/2], RA = x^2+x, RB = 1-2*x.
136 //   Step 4: Reconstruction
137 //   ----------------------
138 //    Reconstruction is just simple multiplication i.e.
139 //    GAMMA(x) = P22n(r)*R(i)
141 // Case 0 < x < 2
142 // --------------
143 //    To calculate GAMMA(x) on this interval we do following
144 //        if 1 <= x < 1.25   than  GAMMA(x) = P15(x-1)
145 //        if 1.25 <= x < 1.5 than  GAMMA(x) = P15(x-x_min) where
146 //        x_min is point of local minimum on [1; 2] interval.
147 //        if 1.5  <= x < 2.0 than  GAMMA(x) = P15(x-1.5)
148 //    and      
149 //        if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x
151 // Case -(i+1) <  x < -i, i = 0...184
152 // ----------------------------------
153 //    Here we use the fact that GAMMA(-x) = PI/(x*GAMMA(x)*sin(PI*x)) and
154 //    so we need to calculate GAMMA(x), sin(PI*x)/PI. Calculation of
155 //    GAMMA(x) is described above.
157 //   Step 1: Reduction
158 //   -----------------
159 //    Note that period of sin(PI*x) is 2 and range reduction for 
160 //    sin(PI*x) is like to range reduction for GAMMA(x) 
161 //    i.e r = x - [x] with exception of cases
162 //    when r > 0.5 (in such cases r = 1 - (x - [x])).
164 //   Step 2: Approximation
165 //   ---------------------
166 //    To approximate sin(PI*x)/PI = sin(PI*(2*n+r))/PI = 
167 //    = (-1)^n*sin(PI*r)/PI Taylor series is used.
168 //    sin(PI*r)/PI ~ S21(r).
170 //   Step 3: Division
171 //   ----------------
172 //    To calculate 1/(x*GAMMA(x)*S21(r)) we use frcpa instruction
173 //    with following Newton-Raphson interations.
174 //  
176 //*********************************************************************
178 GR_Sig                  = r8
179 GR_TAG                  = r8
180 GR_ad_Data              = r9
181 GR_SigRqLin             = r10
182 GR_iSig                 = r11
183 GR_ExpOf1               = r11
184 GR_ExpOf8               = r11
187 GR_Sig2                 = r14
188 GR_Addr_Mask1           = r15
189 GR_Sign_Exp             = r16
190 GR_Tbl_Offs             = r17
191 GR_Addr_Mask2           = r18
192 GR_ad_Co                = r19
193 GR_Bit2                 = r19
194 GR_ad_Ce                = r20
195 GR_ad_Co7               = r21
196 GR_NzOvfBound           = r21
197 GR_ad_Ce7               = r22
198 GR_Tbl_Ind              = r23
199 GR_Tbl_16xInd           = r24
200 GR_ExpOf025             = r24
201 GR_ExpOf05              = r25
202 GR_0x30033              = r26
203 GR_10                   = r26
204 GR_12                   = r27
205 GR_185                  = r27
206 GR_14                   = r28
207 GR_2                    = r28
208 GR_fpsr                 = r28
210 GR_SAVE_B0              = r33
211 GR_SAVE_PFS             = r34
212 GR_SAVE_GP              = r35
213 GR_SAVE_SP              = r36
215 GR_Parameter_X          = r37
216 GR_Parameter_Y          = r38
217 GR_Parameter_RESULT     = r39
218 GR_Parameter_TAG        = r40
222 FR_X                    = f10
223 FR_Y                    = f1 // tgamma is single argument function
224 FR_RESULT               = f8
226 FR_AbsX                 = f9
227 FR_NormX                = f9
228 FR_r02                  = f11
229 FR_AbsXp1               = f12
230 FR_X2pX                 = f13
231 FR_1m2X                 = f14
232 FR_Rq1                  = f14
233 FR_Xt                   = f15
235 FR_r                    = f33
236 FR_OvfBound             = f34
237 FR_Xmin                 = f35
238 FR_2                    = f36
239 FR_Rcp1                 = f36
240 FR_Rcp3                 = f36
241 FR_4                    = f37
242 FR_5                    = f38
243 FR_6                    = f39
244 FR_8                    = f40
245 FR_10                   = f41
246 FR_12                   = f42
247 FR_14                   = f43
248 FR_GAMMA                = f43
249 FR_05                   = f44
251 FR_Rq2                  = f45
252 FR_Rq3                  = f46
253 FR_Rq4                  = f47
254 FR_Rq5                  = f48
255 FR_Rq6                  = f49
256 FR_Rq7                  = f50
257 FR_RqLin                = f51
259 FR_InvAn                = f52
261 FR_C01                  = f53
262 FR_A15                  = f53
263 FR_C11                  = f54
264 FR_A14                  = f54
265 FR_C21                  = f55
266 FR_A13                  = f55
267 FR_C31                  = f56
268 FR_A12                  = f56
269 FR_C41                  = f57
270 FR_A11                  = f57
271 FR_C51                  = f58
272 FR_A10                  = f58
273 FR_C61                  = f59
274 FR_A9                   = f59
275 FR_C71                  = f60
276 FR_A8                   = f60
277 FR_C81                  = f61
278 FR_A7                   = f61
279 FR_C91                  = f62
280 FR_A6                   = f62
281 FR_CA1                  = f63
282 FR_A5                   = f63
283 FR_C00                  = f64
284 FR_A4                   = f64
285 FR_rs2                  = f64
286 FR_C10                  = f65
287 FR_A3                   = f65
288 FR_rs3                  = f65
289 FR_C20                  = f66
290 FR_A2                   = f66
291 FR_rs4                  = f66
292 FR_C30                  = f67
293 FR_A1                   = f67
294 FR_rs7                  = f67
295 FR_C40                  = f68
296 FR_A0                   = f68
297 FR_rs8                  = f68
298 FR_C50                  = f69
299 FR_r2                   = f69
300 FR_C60                  = f70
301 FR_r3                   = f70
302 FR_C70                  = f71
303 FR_r4                   = f71
304 FR_C80                  = f72
305 FR_r7                   = f72
306 FR_C90                  = f73
307 FR_r8                   = f73
308 FR_CA0                  = f74
309 FR_An                   = f75
311 FR_S21                  = f76
312 FR_S19                  = f77
313 FR_Rcp0                 = f77
314 FR_Rcp2                 = f77
315 FR_S17                  = f78
316 FR_S15                  = f79
317 FR_S13                  = f80
318 FR_S11                  = f81
319 FR_S9                   = f82
320 FR_S7                   = f83
321 FR_S5                   = f84
322 FR_S3                   = f85
324 FR_iXt                  = f86
325 FR_rs                   = f87
328 // Data tables
329 //==============================================================
330 RODATA
331 .align 16
333 LOCAL_OBJECT_START(tgamma_data)
334 data8 0x406573FAE561F648 // overflow boundary (171.624376956302739927196)
335 data8 0x3FDD8B618D5AF8FE // point of local minium (0.461632144968362356785)
337 //[2; 3]
338 data8 0xEF0E85C9AE40ABE2,0x00004000 // C01
339 data8 0xCA2049DDB4096DD8,0x00004000 // C11
340 data8 0x99A203B4DC2D1A8C,0x00004000 // C21
341 data8 0xBF5D9D9C0C295570,0x00003FFF // C31
342 data8 0xE8DD037DEB833BAB,0x00003FFD // C41
343 data8 0xB6AE39A2A36AA03A,0x0000BFFE // C51
344 data8 0x804960DC2850277B,0x0000C000 // C61
345 data8 0xD9F3973841C09F80,0x0000C000 // C71
346 data8 0x9C198A676F8A2239,0x0000C001 // C81
347 data8 0xC98B7DAE02BE3226,0x0000C001 // C91
348 data8 0xE9CAF31AC69301BA,0x0000C001 // CA1
349 data8 0xFBBDD58608A0D172,0x00004000 // C00
350 data8 0xFDD0316D1E078301,0x00004000 // C10
351 data8 0x8630B760468C15E4,0x00004001 // C20
352 data8 0x93EDE20E47D9152E,0x00004001 // C30
353 data8 0xA86F3A38C77D6B19,0x00004001 // C40
354 //[16; 17]
355 data8 0xF87F757F365EE813,0x00004000 // C01
356 data8 0xECA84FBA92759DA4,0x00004000 // C11
357 data8 0xD4E0A55E07A8E913,0x00004000 // C21
358 data8 0xB0EB45E94C8A5F7B,0x00004000 // C31
359 data8 0x8050D6B4F7C8617D,0x00004000 // C41
360 data8 0x8471B111AA691E5A,0x00003FFF // C51
361 data8 0xADAF462AF96585C9,0x0000BFFC // C61
362 data8 0xD327C7A587A8C32B,0x0000BFFF // C71
363 data8 0xDEF5192B4CF5E0F1,0x0000C000 // C81
364 data8 0xBADD64BB205AEF02,0x0000C001 // C91
365 data8 0x9330A24AA67D6860,0x0000C002 // CA1
366 data8 0xF57EEAF36D8C47BE,0x00004000 // C00
367 data8 0x807092E12A251B38,0x00004001 // C10
368 data8 0x8C458F80DEE7ED1C,0x00004001 // C20
369 data8 0x9F30C731DC77F1A6,0x00004001 // C30
370 data8 0xBAC4E7E099C3A373,0x00004001 // C40
371 //[32; 33]
372 data8 0xC3059A415F142DEF,0x00004000 // C01
373 data8 0xB9C1DAC24664587A,0x00004000 // C11
374 data8 0xA7101D910992FFB2,0x00004000 // C21
375 data8 0x8A9522B8E4AA0AB4,0x00004000 // C31
376 data8 0xC76A271E4BA95DCC,0x00003FFF // C41
377 data8 0xC5D6DE2A38DB7FF2,0x00003FFE // C51
378 data8 0xDBA42086997818B2,0x0000BFFC // C61
379 data8 0xB8EDDB1424C1C996,0x0000BFFF // C71
380 data8 0xBF7372FB45524B5D,0x0000C000 // C81
381 data8 0xA03DDE759131580A,0x0000C001 // C91
382 data8 0xFDA6FC4022C1FFE3,0x0000C001 // CA1
383 data8 0x9759ABF797B2533D,0x00004000 // C00
384 data8 0x9FA160C6CF18CEC5,0x00004000 // C10
385 data8 0xB0EFF1E3530E0FCD,0x00004000 // C20
386 data8 0xCCD60D5C470165D1,0x00004000 // C30
387 data8 0xF5E53F6307B0B1C1,0x00004000 // C40
388 //[48; 49]
389 data8 0xAABE577FBCE37F5E,0x00004000 // C01
390 data8 0xA274CAEEB5DF7172,0x00004000 // C11
391 data8 0x91B90B6646C1B924,0x00004000 // C21
392 data8 0xF06718519CA256D9,0x00003FFF // C31
393 data8 0xAA9EE181C0E30263,0x00003FFF // C41
394 data8 0xA07BDB5325CB28D2,0x00003FFE // C51
395 data8 0x86C8B873204F9219,0x0000BFFD // C61
396 data8 0xB0192C5D3E4787D6,0x0000BFFF // C71
397 data8 0xB1E0A6263D4C19EF,0x0000C000 // C81
398 data8 0x93BA32A118EAC9AE,0x0000C001 // C91
399 data8 0xE942A39CD9BEE887,0x0000C001 // CA1
400 data8 0xE838B0957B0D3D0D,0x00003FFF // C00
401 data8 0xF60E0F00074FCF34,0x00003FFF // C10
402 data8 0x89869936AE00C2A5,0x00004000 // C20
403 data8 0xA0FE4E8AA611207F,0x00004000 // C30
404 data8 0xC3B1229CFF1DDAFE,0x00004000 // C40
405 //[64; 65]
406 data8 0x9C00DDF75CDC6183,0x00004000 // C01
407 data8 0x9446AE9C0F6A833E,0x00004000 // C11
408 data8 0x84ABC5083310B774,0x00004000 // C21
409 data8 0xD9BA3A0977B1ED83,0x00003FFF // C31
410 data8 0x989B18C99411D300,0x00003FFF // C41
411 data8 0x886E66402318CE6F,0x00003FFE // C51
412 data8 0x99028C2468F18F38,0x0000BFFD // C61
413 data8 0xAB72D17DCD40CCE1,0x0000BFFF // C71
414 data8 0xA9D9AC9BE42C2EF9,0x0000C000 // C81
415 data8 0x8C11D983AA177AD2,0x0000C001 // C91
416 data8 0xDC779E981C1F0F06,0x0000C001 // CA1
417 data8 0xC1FD4AC85965E8D6,0x00003FFF // C00
418 data8 0xCE3D2D909D389EC2,0x00003FFF // C10
419 data8 0xE7F79980AD06F5D8,0x00003FFF // C20
420 data8 0x88DD9F73C8680B5D,0x00004000 // C30
421 data8 0xA7D6CB2CB2D46F9D,0x00004000 // C40
422 //[80; 81]
423 data8 0x91C7FF4E993430D0,0x00004000 // C01
424 data8 0x8A6E7AB83E45A7E9,0x00004000 // C11
425 data8 0xF72D6382E427BEA9,0x00003FFF // C21
426 data8 0xC9E2E4F9B3B23ED6,0x00003FFF // C31
427 data8 0x8BEFEF56AE05D775,0x00003FFF // C41
428 data8 0xEE9666AB6A185560,0x00003FFD // C51
429 data8 0xA6AFAF5CEFAEE04D,0x0000BFFD // C61
430 data8 0xA877EAFEF1F9C880,0x0000BFFF // C71
431 data8 0xA45BD433048ECA15,0x0000C000 // C81
432 data8 0x86BD1636B774CC2E,0x0000C001 // C91
433 data8 0xD3721BE006E10823,0x0000C001 // CA1
434 data8 0xA97EFABA91854208,0x00003FFF // C00
435 data8 0xB4AF0AEBB3F97737,0x00003FFF // C10
436 data8 0xCC38241936851B0B,0x00003FFF // C20
437 data8 0xF282A6261006EA84,0x00003FFF // C30
438 data8 0x95B8E9DB1BD45BAF,0x00004000 // C40
439 //[96; 97]
440 data8 0x8A1FA3171B35A106,0x00004000 // C01
441 data8 0x830D5B8843890F21,0x00004000 // C11
442 data8 0xE98B0F1616677A23,0x00003FFF // C21
443 data8 0xBDF8347F5F67D4EC,0x00003FFF // C31
444 data8 0x825F15DE34EC055D,0x00003FFF // C41
445 data8 0xD4846186B8AAC7BE,0x00003FFD // C51
446 data8 0xB161093AB14919B1,0x0000BFFD // C61
447 data8 0xA65758EEA4800EF4,0x0000BFFF // C71
448 data8 0xA046B67536FA329C,0x0000C000 // C81
449 data8 0x82BBEC1BCB9E9068,0x0000C001 // C91
450 data8 0xCC9DE2B23BA91B0B,0x0000C001 // CA1
451 data8 0x983B16148AF77F94,0x00003FFF // C00
452 data8 0xA2A4D8EE90FEE5DD,0x00003FFF // C10
453 data8 0xB89446FA37FF481C,0x00003FFF // C20
454 data8 0xDC5572648485FB01,0x00003FFF // C30
455 data8 0x88CD5D7DB976129A,0x00004000 // C40
456 //[112; 113]
457 data8 0x8417098FD62AC5E3,0x00004000 // C01
458 data8 0xFA7896486B779CBB,0x00003FFF // C11
459 data8 0xDEC98B14AF5EEBD1,0x00003FFF // C21
460 data8 0xB48E153C6BF0B5A3,0x00003FFF // C31
461 data8 0xF597B038BC957582,0x00003FFE // C41
462 data8 0xBFC6F0884A415694,0x00003FFD // C51
463 data8 0xBA075A1392BDB5E5,0x0000BFFD // C61
464 data8 0xA4B79E01B44C7DB4,0x0000BFFF // C71
465 data8 0x9D12FA7711BFAB0F,0x0000C000 // C81
466 data8 0xFF24C47C8E108AB4,0x0000C000 // C91
467 data8 0xC7325EC86562606A,0x0000C001 // CA1
468 data8 0x8B47DCD9E1610938,0x00003FFF // C00
469 data8 0x9518B111B70F88B8,0x00003FFF // C10
470 data8 0xA9CC197206F68682,0x00003FFF // C20
471 data8 0xCB98294CC0D7A6A6,0x00003FFF // C30
472 data8 0xFE09493EA9165181,0x00003FFF // C40
473 //[128; 129]
474 data8 0xFE53D03442270D90,0x00003FFF // C01
475 data8 0xF0F857BAEC1993E4,0x00003FFF // C11
476 data8 0xD5FF6D70DBBC2FD3,0x00003FFF // C21
477 data8 0xACDAA5F4988B1074,0x00003FFF // C31
478 data8 0xE92E069F8AD75B54,0x00003FFE // C41
479 data8 0xAEBB64645BD94234,0x00003FFD // C51
480 data8 0xC13746249F39B43C,0x0000BFFD // C61
481 data8 0xA36B74F5B6297A1F,0x0000BFFF // C71
482 data8 0x9A77860DF180F6E5,0x0000C000 // C81
483 data8 0xF9F8457D84410A0C,0x0000C000 // C91
484 data8 0xC2BF44C649EB8597,0x0000C001 // CA1
485 data8 0x81225E7489BCDC0E,0x00003FFF // C00
486 data8 0x8A788A09CE0EED11,0x00003FFF // C10
487 data8 0x9E2E6F86D1B1D89C,0x00003FFF // C20
488 data8 0xBE6866B21CF6CCB5,0x00003FFF // C30
489 data8 0xEE94426EC1486AAE,0x00003FFF // C40
490 //[144; 145]
491 data8 0xF6113E09732A6497,0x00003FFF // C01
492 data8 0xE900D45931B04FC8,0x00003FFF // C11
493 data8 0xCE9FD58F745EBA5D,0x00003FFF // C21
494 data8 0xA663A9636C864C86,0x00003FFF // C31
495 data8 0xDEBF5315896CE629,0x00003FFE // C41
496 data8 0xA05FEA415EBD7737,0x00003FFD // C51
497 data8 0xC750F112BD9C4031,0x0000BFFD // C61
498 data8 0xA2593A35C51C6F6C,0x0000BFFF // C71
499 data8 0x9848E1DA7FB40C8C,0x0000C000 // C81
500 data8 0xF59FEE87A5759A4B,0x0000C000 // C91
501 data8 0xBF00203909E45A1D,0x0000C001 // CA1
502 data8 0xF1D8E157200127E5,0x00003FFE // C00
503 data8 0x81DD5397CB08D487,0x00003FFF // C10
504 data8 0x94C1DC271A8B766F,0x00003FFF // C20
505 data8 0xB3AFAF9B5D6EDDCF,0x00003FFF // C30
506 data8 0xE1FB4C57CA81BE1E,0x00003FFF // C40
507 //[160; 161]
508 data8 0xEEFFE5122AC72FFD,0x00003FFF // C01
509 data8 0xE22F70BB52AD54B3,0x00003FFF // C11
510 data8 0xC84FF021FE993EEA,0x00003FFF // C21
511 data8 0xA0DA2208EB5B2752,0x00003FFF // C31
512 data8 0xD5CDD2FCF8AD2DF5,0x00003FFE // C41
513 data8 0x940BEC6DCD811A59,0x00003FFD // C51
514 data8 0xCC954EF4FD4EBB81,0x0000BFFD // C61
515 data8 0xA1712E29A8C04554,0x0000BFFF // C71
516 data8 0x966B55DFB243521A,0x0000C000 // C81
517 data8 0xF1E6A2B9CEDD0C4C,0x0000C000 // C91
518 data8 0xBBC87BCC031012DB,0x0000C001 // CA1
519 data8 0xE43974E6D2818583,0x00003FFE // C00
520 data8 0xF5702A516B64C5B7,0x00003FFE // C10
521 data8 0x8CEBCB1B32E19471,0x00003FFF // C20
522 data8 0xAAC10F05BB77E0AF,0x00003FFF // C30
523 data8 0xD776EFCAB205CC58,0x00003FFF // C40
524 //[176; 177]
525 data8 0xE8DA614119811E5D,0x00003FFF // C01
526 data8 0xDC415E0288B223D8,0x00003FFF // C11
527 data8 0xC2D2243E44EC970E,0x00003FFF // C21
528 data8 0x9C086664B5307BEA,0x00003FFF // C31
529 data8 0xCE03D7A08B461156,0x00003FFE // C41
530 data8 0x894BE3BAAAB66ADC,0x00003FFD // C51
531 data8 0xD131EDD71A702D4D,0x0000BFFD // C61
532 data8 0xA0A907CDDBE10898,0x0000BFFF // C71
533 data8 0x94CC3CD9C765C808,0x0000C000 // C81
534 data8 0xEEA85F237815FC0D,0x0000C000 // C91
535 data8 0xB8FA04B023E43F91,0x0000C001 // CA1
536 data8 0xD8B2C7D9FCBD7EF9,0x00003FFE // C00
537 data8 0xE9566E93AAE7E38F,0x00003FFE // C10
538 data8 0x8646E78AABEF0255,0x00003FFF // C20
539 data8 0xA32AEDB62E304345,0x00003FFF // C30
540 data8 0xCE83E40280EE7DF0,0x00003FFF // C40
542 //[2; 3]
543 data8 0xC44FB47E90584083,0x00004001 // C50
544 data8 0xE863EE77E1C45981,0x00004001 // C60
545 data8 0x8AC15BE238B9D70E,0x00004002 // C70
546 data8 0xA5D94B6592350EF4,0x00004002 // C80
547 data8 0xC379DB3E20A148B3,0x00004002 // C90
548 data8 0xDACA49B73974F6C9,0x00004002 // CA0
549 data8 0x810E496A1AFEC895,0x00003FE1 // An
550 //[16; 17]
551 data8 0xE17C0357AAF3F817,0x00004001 // C50
552 data8 0x8BA8804750FBFBFE,0x00004002 // C60
553 data8 0xB18EAB3CB64BEBEE,0x00004002 // C70
554 data8 0xE90AB7015AF1C28F,0x00004002 // C80
555 data8 0xA0AB97CE9E259196,0x00004003 // C90
556 data8 0xF5E0E0A000C2D720,0x00004003 // CA0
557 data8 0xD97F0F87EC791954,0x00004005 // An
558 //[32; 33]
559 data8 0x980C293F3696040D,0x00004001 // C50
560 data8 0xC0DBFFBB948A9A4E,0x00004001 // C60
561 data8 0xFAB54625E9A588A2,0x00004001 // C70
562 data8 0xA7E08176D6050FBF,0x00004002 // C80
563 data8 0xEBAAEC4952270A9F,0x00004002 // C90
564 data8 0xB7479CDAD20550FE,0x00004003 // CA0
565 data8 0xAACD45931C3FF634,0x00004054 // An
566 //[48; 49]
567 data8 0xF5180F0000419AD5,0x00004000 // C50
568 data8 0x9D507D07BFBB2273,0x00004001 // C60
569 data8 0xCEB53F7A13A383E3,0x00004001 // C70
570 data8 0x8BAFEF9E0A49128F,0x00004002 // C80
571 data8 0xC58EF912D39E228C,0x00004002 // C90
572 data8 0x9A88118422BA208E,0x00004003 // CA0
573 data8 0xBD6C0E2477EC12CB,0x000040AC // An
574 //[64; 65]
575 data8 0xD410AC48BF7748DA,0x00004000 // C50
576 data8 0x89399B90AFEBD931,0x00004001 // C60
577 data8 0xB596DF8F77EB8560,0x00004001 // C70
578 data8 0xF6D9445A047FB4A6,0x00004001 // C80
579 data8 0xAF52F0DD65221357,0x00004002 // C90
580 data8 0x8989B45BFC881989,0x00004003 // CA0
581 data8 0xB7FCAE86E6E10D5A,0x0000410B // An
582 //[80; 81]
583 data8 0xBE759740E3B5AA84,0x00004000 // C50
584 data8 0xF8037B1B07D27609,0x00004000 // C60
585 data8 0xA4F6F6C7F0977D4F,0x00004001 // C70
586 data8 0xE131960233BF02C4,0x00004001 // C80
587 data8 0xA06DF43D3922BBE2,0x00004002 // C90
588 data8 0xFC266AB27255A360,0x00004002 // CA0
589 data8 0xD9F4B012EDAFEF2F,0x0000416F // An
590 //[96; 97]
591 data8 0xAEFC84CDA8E1EAA6,0x00004000 // C50
592 data8 0xE5009110DB5F3C8A,0x00004000 // C60
593 data8 0x98F5F48738E7B232,0x00004001 // C70
594 data8 0xD17EE64E21FFDC6B,0x00004001 // C80
595 data8 0x9596F7A7E36145CC,0x00004002 // C90
596 data8 0xEB64DBE50E125CAF,0x00004002 // CA0
597 data8 0xA090530D79E32D2E,0x000041D8 // An
598 //[112; 113]
599 data8 0xA33AEA22A16B2655,0x00004000 // C50
600 data8 0xD682B93BD7D7945C,0x00004000 // C60
601 data8 0x8FC854C6E6E30CC3,0x00004001 // C70
602 data8 0xC5754D828AFFDC7A,0x00004001 // C80
603 data8 0x8D41216B397139C2,0x00004002 // C90
604 data8 0xDE78D746848116E5,0x00004002 // CA0
605 data8 0xB8A297A2DC0630DB,0x00004244 // An
606 //[128; 129]
607 data8 0x99EB00F11D95E292,0x00004000 // C50
608 data8 0xCB005CB911EB779A,0x00004000 // C60
609 data8 0x8879AA2FDFF3A37A,0x00004001 // C70
610 data8 0xBBDA538AD40CAC2C,0x00004001 // C80
611 data8 0x8696D849D311B9DE,0x00004002 // C90
612 data8 0xD41E1C041481199F,0x00004002 // CA0
613 data8 0xEBA1A43D34EE61EE,0x000042B3 // An
614 //[144; 145]
615 data8 0x924F822578AA9F3D,0x00004000 // C50
616 data8 0xC193FAF9D3B36960,0x00004000 // C60
617 data8 0x827AE3A6B68ED0CA,0x00004001 // C70
618 data8 0xB3F52A27EED23F0B,0x00004001 // C80
619 data8 0x811A079FB3C94D79,0x00004002 // C90
620 data8 0xCB94415470B6F8D2,0x00004002 // CA0
621 data8 0x80A0260DCB3EC9AC,0x00004326 // An
622 //[160; 161]
623 data8 0x8BF24091E88B331D,0x00004000 // C50
624 data8 0xB9ADE01187E65201,0x00004000 // C60
625 data8 0xFAE4508F6E7625FE,0x00004000 // C70
626 data8 0xAD516668AD6D7367,0x00004001 // C80
627 data8 0xF8F5FF171154F637,0x00004001 // C90
628 data8 0xC461321268990C82,0x00004002 // CA0
629 data8 0xC3B693F344B0E6FE,0x0000439A // An
631 //[176; 177]
632 data8 0x868545EB42A258ED,0x00004000 // C50
633 data8 0xB2EF04ACE8BA0E6E,0x00004000 // C60
634 data8 0xF247D22C22E69230,0x00004000 // C70
635 data8 0xA7A1AB93E3981A90,0x00004001 // C80
636 data8 0xF10951733E2C697F,0x00004001 // C90
637 data8 0xBE3359BFAD128322,0x00004002 // CA0
638 data8 0x8000000000000000,0x00003fff
640 //[160; 161] for negatives
641 data8 0xA76DBD55B2E32D71,0x00003C63 // 1/An
643 // sin(pi*x)/pi
644 data8 0xBCBC4342112F52A2,0x00003FDE // S21
645 data8 0xFAFCECB86536F655,0x0000BFE3 // S19
646 data8 0x87E4C97F9CF09B92,0x00003FE9 // S17
647 data8 0xEA124C68E704C5CB,0x0000BFED // S15
648 data8 0x9BA38CFD59C8AA1D,0x00003FF2 // S13
649 data8 0x99C0B552303D5B21,0x0000BFF6 // S11
651 //[176; 177] for negatives
652 data8 0xBA5D5869211696FF,0x00003BEC // 1/An
654 // sin(pi*x)/pi
655 data8 0xD63402E79A853175,0x00003FF9 // S9
656 data8 0xC354723906DB36BA,0x0000BFFC // S7
657 data8 0xCFCE5A015E236291,0x00003FFE // S5
658 data8 0xD28D3312983E9918,0x0000BFFF // S3
661 // [1.0;1.25]
662 data8 0xA405530B067ECD3C,0x0000BFFC // A15
663 data8 0xF5B5413F95E1C282,0x00003FFD // A14
664 data8 0xC4DED71C782F76C8,0x0000BFFE // A13
665 data8 0xECF7DDDFD27C9223,0x00003FFE // A12
666 data8 0xFB73D31793068463,0x0000BFFE // A11
667 data8 0xFF173B7E66FD1D61,0x00003FFE // A10
668 data8 0xFFA5EF3959089E94,0x0000BFFE // A9
669 data8 0xFF8153BD42E71A4F,0x00003FFE // A8
670 data8 0xFEF9CAEE2CB5B533,0x0000BFFE // A7
671 data8 0xFE3F02E5EDB6811E,0x00003FFE // A6
672 data8 0xFB64074CED2658FB,0x0000BFFE // A5
673 data8 0xFB52882A095B18A4,0x00003FFE // A4
674 data8 0xE8508C7990A0DAC0,0x0000BFFE // A3
675 data8 0xFD32C611D8A881D0,0x00003FFE // A2
676 data8 0x93C467E37DB0C536,0x0000BFFE // A1
677 data8 0x8000000000000000,0x00003FFF // A0
679 // [1.25;1.5]
680 data8 0xD038092400619677,0x0000BFF7 // A15
681 data8 0xEA6DE925E6EB8C8F,0x00003FF3 // A14
682 data8 0xC53F83645D4597FC,0x0000BFF7 // A13
683 data8 0xE366DB2FB27B7ECD,0x00003FF7 // A12
684 data8 0xAC8FD5E11F6EEAD8,0x0000BFF8 // A11
685 data8 0xFB14010FB3697785,0x00003FF8 // A10
686 data8 0xB6F91CB5C371177B,0x0000BFF9 // A9
687 data8 0x85A262C6F8FEEF71,0x00003FFA // A8
688 data8 0xC038E6E3261568F9,0x0000BFFA // A7
689 data8 0x8F4BDE8883232364,0x00003FFB // A6
690 data8 0xBCFBBD5786537E9A,0x0000BFFB // A5
691 data8 0xA4C08BAF0A559479,0x00003FFC // A4
692 data8 0x85D74FA063E81476,0x0000BFFC // A3
693 data8 0xDB629FB9BBDC1C4E,0x00003FFD // A2
694 data8 0xF4F8FBC7C0C9D317,0x00003FC6 // A1
695 data8 0xE2B6E4153A57746C,0x00003FFE // A0
697 // [1.25;1.5]
698 data8 0x9533F9D3723B448C,0x0000BFF2 // A15
699 data8 0xF1F75D3C561CBBAF,0x00003FF5 // A14
700 data8 0xBA55A9A1FC883523,0x0000BFF8 // A13
701 data8 0xB5D5E9E5104FA995,0x00003FFA // A12
702 data8 0xFD84F35B70CD9AE2,0x0000BFFB // A11
703 data8 0x87445235F4688CC5,0x00003FFD // A10
704 data8 0xE7F236EBFB9F774E,0x0000BFFD // A9
705 data8 0xA6605F2721F787CE,0x00003FFE // A8
706 data8 0xCF579312AD7EAD72,0x0000BFFE // A7
707 data8 0xE96254A2407A5EAC,0x00003FFE // A6
708 data8 0xF41312A8572ED346,0x0000BFFE // A5
709 data8 0xF9535027C1B1F795,0x00003FFE // A4
710 data8 0xE7E82D0C613A8DE4,0x0000BFFE // A3
711 data8 0xFD23CD9741B460B8,0x00003FFE // A2
712 data8 0x93C30FD9781DBA88,0x0000BFFE // A1
713 data8 0xFFFFF1781FDBEE84,0x00003FFE // A0
714 LOCAL_OBJECT_END(tgamma_data)
717 //==============================================================
718 // Code
719 //==============================================================
721 .section .text
722 GLOBAL_LIBM_ENTRY(tgamma)
723 { .mfi
724       getf.exp      GR_Sign_Exp = f8
725       fma.s1        FR_1m2X = f8,f1,f8 // 2x
726       addl          GR_ad_Data = @ltoff(tgamma_data), gp
728 { .mfi
729       mov           GR_ExpOf8 = 0x10002 // 8
730       fcvt.fx.trunc.s1 FR_iXt = f8 // [x]
731       mov           GR_ExpOf05 = 0xFFFE // 0.5
733 { .mfi
734       getf.sig      GR_Sig = f8
735       fma.s1        FR_2 = f1,f1,f1 // 2
736       mov           GR_Addr_Mask1 = 0x780
738 { .mlx
739       setf.exp      FR_8 = GR_ExpOf8
740       movl          GR_10 = 0x4024000000000000
742 { .mfi
743       ld8           GR_ad_Data = [GR_ad_Data]
744       fcmp.lt.s1    p14,p15 = f8,f0
745       tbit.z        p12,p13 = GR_Sign_Exp,0x10 // p13 if x >= 2
747 { .mlx
748       and           GR_Bit2 = 4,GR_Sign_Exp
749       movl          GR_12 = 0x4028000000000000
751 { .mfi
752       setf.d        FR_10 = GR_10
753       fma.s1        FR_r02 = f8,f1,f0
754       extr.u        GR_Tbl_Offs = GR_Sig,58,6
756 { .mfi
757 (p12) mov           GR_Addr_Mask1 = r0
758       fma.s1        FR_NormX = f8,f1,f0
759       cmp.ne        p8,p0 = GR_Bit2,r0
761 { .mfi
762 (p8)  shladd        GR_Tbl_Offs = GR_Tbl_Offs,4,r0
763       fclass.m      p10,p0 =  f8,0x1E7 // Test x for NaTVal, NaN, +/-0, +/-INF
764       tbit.nz       p11,p0 = GR_Sign_Exp,1
766 { .mlx
767       add           GR_Addr_Mask2 = GR_Addr_Mask1,GR_Addr_Mask1
768       movl          GR_14 = 0x402C000000000000
770 .pred.rel "mutex",p14,p15
771 { .mfi
772       setf.d        FR_12 = GR_12
773 (p14) fma.s1        FR_1m2X = f1,f1,FR_1m2X // RB=1-2|x|
774       tbit.nz       p8,p9 = GR_Sign_Exp,0
776 { .mfi
777       ldfpd         FR_OvfBound,FR_Xmin = [GR_ad_Data],16
778 (p15) fms.s1        FR_1m2X = f1,f1,FR_1m2X // RB=1-2|x|
779 (p11) shladd        GR_Tbl_Offs = GR_Tbl_Offs,2,r0
781 .pred.rel "mutex",p9,p8
782 { .mfi
783       setf.d        FR_14 = GR_14
784       fma.s1        FR_4 = FR_2,FR_2,f0
785 (p8)  and           GR_Tbl_Offs = GR_Tbl_Offs, GR_Addr_Mask1
787 { .mfi
788       setf.exp      FR_05 = GR_ExpOf05
789       fma.s1        FR_6 = FR_2,FR_2,FR_2
790 (p9)  and           GR_Tbl_Offs = GR_Tbl_Offs, GR_Addr_Mask2
792 .pred.rel "mutex",p9,p8
793 { .mfi
794 (p8)  shladd        GR_ad_Co = GR_Tbl_Offs,1,GR_ad_Data
795       fcvt.xf       FR_Xt = FR_iXt // [x]
796 (p15) tbit.z.unc    p11,p0 = GR_Sign_Exp,0x10 // p11 if 0 < x < 2
798 { .mfi
799 (p9)  add           GR_ad_Co = GR_ad_Data,GR_Tbl_Offs
800       fma.s1        FR_5 = FR_2,FR_2,f1
801 (p15) cmp.lt.unc    p7,p6 = GR_ExpOf05,GR_Sign_Exp // p7 if 0 < x < 1
803 { .mfi
804       add           GR_ad_Ce = 16,GR_ad_Co
805 (p11) frcpa.s1      FR_Rcp0,p0 = f1,f8
806       sub           GR_Tbl_Offs = GR_ad_Co,GR_ad_Data
808 { .mfb
809       ldfe          FR_C01 = [GR_ad_Co],32
810 (p7)  fms.s1        FR_r02 = FR_r02,f1,f1 
811       // jump if x is NaTVal, NaN, +/-0, +/-INF
812 (p10) br.cond.spnt  tgamma_spec
814 .pred.rel "mutex",p14,p15
815 { .mfi
816       ldfe          FR_C11 = [GR_ad_Ce],32
817 (p14) fms.s1        FR_X2pX = f8,f8,f8 // RA=x^2+|x|
818       shr           GR_Tbl_Ind = GR_Tbl_Offs,8
820 { .mfb
821       ldfe          FR_C21 = [GR_ad_Co],32
822 (p15) fma.s1        FR_X2pX = f8,f8,f8 // RA=x^2+x
823       // jump if 0 < x < 2
824 (p11) br.cond.spnt  tgamma_from_0_to_2
826 { .mfi
827       ldfe          FR_C31 = [GR_ad_Ce],32
828       fma.s1        FR_Rq2 = FR_2,f1,FR_1m2X  // 2 + B
829       cmp.ltu       p7,p0=0xB,GR_Tbl_Ind
831 { .mfb
832       ldfe          FR_C41 = [GR_ad_Co],32
833       fma.s1        FR_Rq3 = FR_2,FR_2,FR_1m2X  // 4 + B
834       // jump if GR_Tbl_Ind > 11, i.e |x| is more than 192
835 (p7)  br.cond.spnt  tgamma_spec_res
837 { .mfi
838       ldfe          FR_C51 = [GR_ad_Ce],32
839       fma.s1        FR_Rq4 = FR_6,f1,FR_1m2X  // 6 + B
840       shr           GR_Tbl_Offs = GR_Tbl_Offs,1
842 { .mfi
843       ldfe          FR_C61 = [GR_ad_Co],32
844       fma.s1        FR_Rq5 = FR_4,FR_2,FR_1m2X // 8 + B
845       nop.i         0
847 { .mfi
848       ldfe          FR_C71 = [GR_ad_Ce],32
849 (p14) fms.s1        FR_r = FR_Xt,f1,f8 // r = |x| - [|x|]
850       shr           GR_Tbl_16xInd = GR_Tbl_Offs,3
852 { .mfi
853       ldfe          FR_C81 = [GR_ad_Co],32
854 (p15) fms.s1        FR_r = f8,f1,FR_Xt // r = x - [x]
855       add           GR_ad_Data = 0xC00,GR_ad_Data
857 { .mfi
858       ldfe          FR_C91 = [GR_ad_Ce],32
859       fma.s1        FR_Rq6 = FR_5,FR_2,FR_1m2X  // 10 + B
860 (p14) mov           GR_0x30033 = 0x30033
862 { .mfi
863       ldfe          FR_CA1 = [GR_ad_Co],32
864       fma.s1        FR_Rq7 = FR_6,FR_2,FR_1m2X // 12 + B
865       sub           GR_Tbl_Offs = GR_Tbl_Offs,GR_Tbl_16xInd
867 { .mfi
868       ldfe          FR_C00 = [GR_ad_Ce],32
869       fma.s1        FR_Rq1 = FR_Rq1,FR_2,FR_X2pX // (x-1)*(x-2)
870 (p13) cmp.eq.unc    p8,p0 = r0,GR_Tbl_16xInd // index is 0 i.e. arg from [2;16)
872 { .mfi
873       ldfe          FR_C10 = [GR_ad_Co],32
874 (p14) fms.s1        FR_AbsX = f0,f0,FR_NormX // absolute value of argument
875       add           GR_ad_Co7 = GR_ad_Data,GR_Tbl_Offs
877 { .mfi
878       ldfe          FR_C20 = [GR_ad_Ce],32
879       fma.s1        FR_Rq2 = FR_Rq2,FR_4,FR_X2pX // (x-3)*(x-4)
880       add           GR_ad_Ce7 = 16,GR_ad_Co7
882 { .mfi
883       ldfe          FR_C30 = [GR_ad_Co],32
884       fma.s1        FR_Rq3 = FR_Rq3,FR_6,FR_X2pX // (x-5)*(x-6)
885       nop.i         0 
887 { .mfi
888       ldfe          FR_C40 = [GR_ad_Ce],32
889       fma.s1        FR_Rq4 = FR_Rq4,FR_8,FR_X2pX // (x-7)*(x-8)
890 (p14) cmp.leu.unc   p7,p0 = GR_0x30033,GR_Sign_Exp
892 { .mfb
893       ldfe          FR_C50 = [GR_ad_Co7],32
894       fma.s1        FR_Rq5 = FR_Rq5,FR_10,FR_X2pX // (x-9)*(x-10)
895       // jump if x is less or equal to -2^52, i.e. x is big negative integer
896 (p7)  br.cond.spnt  tgamma_singularity
898 { .mfi
899       ldfe          FR_C60 = [GR_ad_Ce7],32
900       fma.s1        FR_C01 = FR_C01,f1,FR_r
901       add           GR_ad_Ce = 0x560,GR_ad_Data
903 { .mfi
904       ldfe          FR_C70 = [GR_ad_Co7],32
905       fma.s1        FR_rs = f0,f0,FR_r // reduced arg for sin(pi*x)  
906       add           GR_ad_Co = 0x550,GR_ad_Data
908 { .mfi
909       ldfe          FR_C80 = [GR_ad_Ce7],32
910       fma.s1        FR_C11 = FR_C11,f1,FR_r
911       nop.i         0
913 { .mfi
914       ldfe          FR_C90 = [GR_ad_Co7],32
915       fma.s1        FR_C21 = FR_C21,f1,FR_r
916       nop.i         0
918 .pred.rel "mutex",p12,p13
919 { .mfi
920 (p13) getf.sig      GR_iSig = FR_iXt
921       fcmp.lt.s1    p11,p0 = FR_05,FR_r
922       mov           GR_185 = 185
924 { .mfi
925       nop.m         0
926       fma.s1        FR_Rq6 = FR_Rq6,FR_12,FR_X2pX // (x-11)*(x-12)
927       nop.i         0
929 { .mfi
930       ldfe          FR_CA0 = [GR_ad_Ce7],32
931       fma.s1        FR_C31 = FR_C31,f1,FR_r
932 (p12) mov           GR_iSig = 0
934 { .mfi
935       ldfe          FR_An = [GR_ad_Co7],0x80
936       fma.s1        FR_C41 = FR_C41,f1,FR_r
937       nop.i         0
939 { .mfi
940 (p14) getf.sig      GR_Sig = FR_r
941       fma.s1        FR_C51 = FR_C51,f1,FR_r
942 (p14) sub           GR_iSig = r0,GR_iSig
944 { .mfi
945       ldfe          FR_S21 = [GR_ad_Co],32
946       fma.s1        FR_C61 = FR_C61,f1,FR_r
947       nop.i         0
949 { .mfi
950       ldfe          FR_S19 = [GR_ad_Ce],32
951       fma.s1        FR_C71 = FR_C71,f1,FR_r
952       and           GR_SigRqLin = 0xF,GR_iSig
954 { .mfi
955       ldfe          FR_S17 = [GR_ad_Co],32
956       fma.s1        FR_C81 = FR_C81,f1,FR_r
957       mov           GR_2 = 2
959 { .mfi
960 (p14) ldfe          FR_InvAn = [GR_ad_Co7]
961       fma.s1        FR_C91 = FR_C91,f1,FR_r
962       // if significand of r is 0 tnan argument is negative integer
963 (p14) cmp.eq.unc    p12,p0 = r0,GR_Sig
965 { .mfb
966 (p8)  sub           GR_SigRqLin = GR_SigRqLin,GR_2 // subtract 2 if 2 <= x < 16
967       fma.s1        FR_CA1 = FR_CA1,f1,FR_r
968       // jump if x is negative integer such that -2^52 < x < -185
969 (p12) br.cond.spnt  tgamma_singularity
971 { .mfi
972       setf.sig      FR_Xt = GR_SigRqLin
973 (p11) fms.s1        FR_rs = f1,f1,FR_r
974 (p14) cmp.ltu.unc   p7,p0 = GR_185,GR_iSig
976 { .mfb
977       ldfe          FR_S15 = [GR_ad_Ce],32
978       fma.s1        FR_Rq7 = FR_Rq7,FR_14,FR_X2pX // (x-13)*(x-14)
979       // jump if x is noninteger such that -2^52 < x < -185
980 (p7)  br.cond.spnt  tgamma_underflow
982 { .mfi
983       ldfe          FR_S13 = [GR_ad_Co],48
984       fma.s1        FR_C01 = FR_C01,FR_r,FR_C00
985       and           GR_Sig2 = 0xE,GR_SigRqLin
987 { .mfi
988       ldfe          FR_S11 = [GR_ad_Ce],48
989       fma.s1        FR_C11 = FR_C11,FR_r,FR_C10
990       nop.i         0
992 { .mfi
993       ldfe          FR_S9 = [GR_ad_Co],32
994       fma.s1        FR_C21 = FR_C21,FR_r,FR_C20
995       // should we mul by polynomial of recursion?
996       cmp.eq        p13,p12 = r0,GR_SigRqLin
998 { .mfi
999       ldfe          FR_S7 = [GR_ad_Ce],32
1000       fma.s1        FR_C31 = FR_C31,FR_r,FR_C30
1001       nop.i         0
1003 { .mfi
1004       ldfe          FR_S5 = [GR_ad_Co],32
1005       fma.s1        FR_C41 = FR_C41,FR_r,FR_C40
1006       nop.i         0
1008 { .mfi
1009       ldfe          FR_S3 = [GR_ad_Ce],32
1010       fma.s1        FR_C51 = FR_C51,FR_r,FR_C50
1011       nop.i         0
1013 { .mfi
1014       nop.m         0
1015       fma.s1        FR_C61 = FR_C61,FR_r,FR_C60
1016       nop.i         0
1018 { .mfi
1019       nop.m         0
1020       fma.s1        FR_C71 = FR_C71,FR_r,FR_C70
1021       nop.i         0
1023 { .mfi
1024       nop.m         0
1025       fma.s1        FR_C81 = FR_C81,FR_r,FR_C80
1026       nop.i         0
1028 { .mfi
1029       nop.m         0
1030       fma.s1        FR_C91 = FR_C91,FR_r,FR_C90
1031       nop.i         0
1033 { .mfi
1034       nop.m         0
1035       fma.s1        FR_CA1 = FR_CA1,FR_r,FR_CA0
1036       nop.i         0
1038 { .mfi
1039       nop.m         0 
1040       fma.s1        FR_C01 = FR_C01,FR_C11,f0
1041       nop.i         0
1043 { .mfi
1044       nop.m         0 
1045       fma.s1        FR_C21 = FR_C21,FR_C31,f0
1046       nop.i         0
1048 { .mfi
1049       nop.m         0
1050       fma.s1        FR_rs2 = FR_rs,FR_rs,f0
1051 (p12) cmp.lt.unc    p7,p0 = 2,GR_Sig2 // should mul by FR_Rq2?
1053 { .mfi
1054       nop.m         0 
1055       fma.s1        FR_C41 = FR_C41,FR_C51,f0
1056       nop.i         0 
1058 { .mfi
1059       nop.m         0
1060 (p7)  fma.s1        FR_Rq1 = FR_Rq1,FR_Rq2,f0
1061 (p12) cmp.lt.unc    p9,p0 = 6,GR_Sig2 // should mul by FR_Rq4?
1063 { .mfi
1064       nop.m         0 
1065       fma.s1        FR_C61 = FR_C61,FR_C71,f0
1066 (p15) cmp.eq        p11,p0 = r0,r0
1068 { .mfi
1069       nop.m         0
1070 (p9)  fma.s1        FR_Rq3 = FR_Rq3,FR_Rq4,f0
1071 (p12) cmp.lt.unc    p8,p0 = 10,GR_Sig2 // should mul by FR_Rq6?
1073 { .mfi
1074       nop.m         0 
1075       fma.s1        FR_C81 = FR_C81,FR_C91,f0
1076       nop.i         0
1078 { .mfi
1079       nop.m         0
1080 (p8)  fma.s1        FR_Rq5 = FR_Rq5,FR_Rq6,f0
1081 (p14) cmp.ltu       p0,p11 = 0x9,GR_Tbl_Ind
1083 { .mfi
1084       nop.m         0 
1085       fcvt.xf       FR_RqLin = FR_Xt  
1086       nop.i         0
1088 { .mfi
1089       nop.m         0
1090 (p11) fma.s1        FR_CA1 = FR_CA1,FR_An,f0
1091       nop.i         0
1093 { .mfi
1094       nop.m         0
1095       fma.s1        FR_S21 = FR_S21,FR_rs2,FR_S19
1096       nop.i         0
1098 { .mfi
1099       nop.m         0
1100       fma.s1        FR_S17 = FR_S17,FR_rs2,FR_S15
1101       nop.i         0
1103 { .mfi
1104       nop.m         0 
1105       fma.s1        FR_C01 = FR_C01,FR_C21,f0
1106       nop.i         0
1108 { .mfi
1109       nop.m         0 
1110       fma.s1        FR_rs4 = FR_rs2,FR_rs2,f0
1111 (p12) cmp.lt.unc    p8,p0 = 4,GR_Sig2 // should mul by FR_Rq3?
1113 { .mfi
1114       nop.m         0
1115 (p8)  fma.s1        FR_Rq1 = FR_Rq1,FR_Rq3,f0
1116       nop.i         0
1118 { .mfi
1119       nop.m         0
1120       fma.s1        FR_S13 = FR_S13,FR_rs2,FR_S11
1121 (p12) cmp.lt.unc    p9,p0 = 12,GR_Sig2 // should mul by FR_Rq7?
1123 { .mfi
1124       nop.m         0 
1125       fma.s1        FR_C41 = FR_C41,FR_C61,f0
1126       nop.i         0
1128 { .mfi
1129       nop.m         0 
1130 (p9)  fma.s1        FR_Rq5 = FR_Rq5,FR_Rq7,f0
1131       nop.i         0 
1133 { .mfi
1134       nop.m         0 
1135       fma.s1        FR_C81 = FR_C81,FR_CA1,f0
1136       nop.i         0 
1138 { .mfi
1139       nop.m         0
1140       fma.s1        FR_S9 = FR_S9,FR_rs2,FR_S7
1141       nop.i         0
1143 { .mfi
1144       nop.m         0
1145       fma.s1        FR_S5 = FR_S5,FR_rs2,FR_S3
1146       nop.i         0
1148 { .mfi
1149       nop.m         0
1150       fma.s1        FR_rs3 = FR_rs2,FR_rs,f0
1151 (p12) tbit.nz.unc   p6,p0 = GR_SigRqLin,0
1153 { .mfi
1154       nop.m         0
1155       fma.s1        FR_rs8 = FR_rs4,FR_rs4,f0
1156       nop.i         0
1158 { .mfi
1159       nop.m         0
1160       fma.s1        FR_S21 = FR_S21,FR_rs4,FR_S17
1161       mov           GR_ExpOf1 = 0x2FFFF
1163 { .mfi
1164       nop.m         0 
1165 (p6)  fms.s1        FR_RqLin = FR_AbsX,f1,FR_RqLin
1166 (p12) cmp.lt.unc    p8,p0 = 8,GR_Sig2 // should mul by FR_Rq5?
1168 { .mfi
1169       nop.m         0 
1170       fma.s1        FR_C01 = FR_C01,FR_C41,f0
1171       nop.i         0
1173 { .mfi
1174       nop.m         0
1175 (p8)  fma.s1        FR_Rq1 = FR_Rq1,FR_Rq5,f0
1176 (p14) cmp.gtu.unc   p7,p0 = GR_Sign_Exp,GR_ExpOf1
1178 { .mfi
1179       nop.m         0
1180       fma.s1        FR_S13 = FR_S13,FR_rs4,FR_S9
1181       nop.i         0
1183 { .mfi
1184       nop.m         0
1185 (p7)  fma.s1        FR_C81 = FR_C81,FR_AbsX,f0
1186       nop.i         0
1188 { .mfi
1189       nop.m         0
1190 (p14) fma.s1        FR_AbsXp1 = f1,f1,FR_AbsX // |x|+1
1191       nop.i         0
1193 { .mfi
1194       nop.m         0
1195 (p15) fcmp.lt.unc.s1 p0,p10 = FR_AbsX,FR_OvfBound // x >= overflow_boundary 
1196       nop.i         0
1198 { .mfi
1199       nop.m         0
1200       fma.s1        FR_rs7 = FR_rs4,FR_rs3,f0
1201       nop.i         0
1203 { .mfi
1204       nop.m         0
1205       fma.s1        FR_S5 = FR_S5,FR_rs3,FR_rs
1206       nop.i         0
1208 { .mib
1209 (p14) cmp.lt        p13,p0 = r0,r0 // set p13 to 0 if x < 0
1210 (p12) cmp.eq.unc    p8,p9 = 1,GR_SigRqLin
1211 (p10) br.cond.spnt  tgamma_spec_res
1213 { .mfi
1214       getf.sig      GR_Sig = FR_iXt
1215 (p6)  fma.s1        FR_Rq1 = FR_Rq1,FR_RqLin,f0
1216       // should we mul by polynomial of recursion?
1217 (p15) cmp.eq.unc    p0,p11 = r0,GR_SigRqLin
1219 { .mfb
1220       nop.m         0 
1221       fma.s1        FR_GAMMA = FR_C01,FR_C81,f0
1222 (p11) br.cond.spnt  tgamma_positives
1224 { .mfi
1225       nop.m         0
1226       fma.s1        FR_S21 = FR_S21,FR_rs8,FR_S13
1227       nop.i         0
1229 { .mfb
1230       nop.m         0
1231 (p13) fma.d.s0      f8 = FR_C01,FR_C81,f0
1232 (p13) br.ret.spnt   b0
1234 .pred.rel "mutex",p8,p9
1235 { .mfi
1236       nop.m         0 
1237 (p9)  fma.s1        FR_GAMMA = FR_GAMMA,FR_Rq1,f0
1238       tbit.z        p6,p7 = GR_Sig,0 // p6 if sin<0, p7 if sin>0
1240 { .mfi
1241       nop.m         0 
1242 (p8)  fma.s1        FR_GAMMA = FR_GAMMA,FR_RqLin,f0
1243       nop.i         0
1245 { .mfi
1246       nop.m         0
1247       fma.s1        FR_S21 = FR_S21,FR_rs7,FR_S5
1248       nop.i         0
1250 .pred.rel "mutex",p6,p7
1251 { .mfi
1252       nop.m         0 
1253 (p6)  fnma.s1       FR_GAMMA = FR_GAMMA,FR_S21,f0 
1254       nop.i         0 
1256 { .mfi
1257       nop.m         0 
1258 (p7)  fma.s1        FR_GAMMA = FR_GAMMA,FR_S21,f0
1259       mov           GR_Sig2 = 1
1261 { .mfi
1262       nop.m         0
1263       frcpa.s1      FR_Rcp0,p0 = f1,FR_GAMMA
1264       cmp.ltu       p13,p0 = GR_Sign_Exp,GR_ExpOf1
1266 // NR method: ineration #1
1267 { .mfi
1268 (p13) getf.exp      GR_Sign_Exp = FR_AbsX
1269       fnma.s1       FR_Rcp1 = FR_Rcp0,FR_GAMMA,f1 // t = 1 - r0*x
1270 (p13) shl           GR_Sig2 = GR_Sig2,63
1272 { .mfi
1273 (p13) getf.sig      GR_Sig = FR_AbsX
1274       nop.f         0
1275 (p13) mov           GR_NzOvfBound = 0xFBFF
1277 { .mfi
1278 (p13) cmp.ltu.unc   p8,p0 = GR_Sign_Exp,GR_NzOvfBound // p8 <- overflow
1279       nop.f         0
1280 (p13) cmp.eq.unc    p9,p0 = GR_Sign_Exp,GR_NzOvfBound
1282 { .mfb
1283       nop.m         0
1284 (p13) fma.d.s0      FR_X = f1,f1,f8 // set deno & inexact flags
1285 (p8)  br.cond.spnt  tgamma_ovf_near_0 //tgamma_neg_overflow
1287 { .mib
1288       nop.m         0
1289 (p9)  cmp.eq.unc    p8,p0 = GR_Sig,GR_Sig2
1290 (p8)  br.cond.spnt  tgamma_ovf_near_0_boundary //tgamma_neg_overflow
1292 { .mfi
1293       nop.m         0
1294       fma.s1        FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
1295       nop.i         0 
1297 // NR method: ineration #2
1298 { .mfi
1299       nop.m         0
1300       fnma.s1       FR_Rcp2 = FR_Rcp1,FR_GAMMA,f1 // t = 1 - r1*x
1301       nop.i         0
1303 { .mfi
1304       nop.m         0
1305       fma.s1        FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1
1306       nop.i         0
1308 // NR method: ineration #3
1309 { .mfi
1310       nop.m         0
1311       fnma.s1       FR_Rcp3 = FR_Rcp2,FR_GAMMA,f1 // t = 1 - r2*x
1312       nop.i         0
1314 { .mfi
1315       nop.m         0
1316 (p13) fma.s1        FR_Rcp2 = FR_Rcp2,FR_AbsXp1,f0
1317 (p14) cmp.ltu       p10,p11 = 0x9,GR_Tbl_Ind
1319 .pred.rel "mutex",p10,p11
1320 { .mfi
1321       nop.m         0
1322 (p10) fma.s1        FR_GAMMA = FR_Rcp2,FR_Rcp3,FR_Rcp2
1323       nop.i         0
1325 { .mfi
1326       nop.m         0
1327 (p11) fma.d.s0      f8 = FR_Rcp2,FR_Rcp3,FR_Rcp2
1328       nop.i         0
1330 { .mfb
1331       nop.m         0
1332 (p10) fma.d.s0      f8 = FR_GAMMA,FR_InvAn,f0
1333       br.ret.sptk   b0
1337 // here if x >= 3
1338 //--------------------------------------------------------------------
1339 .align 32
1340 tgamma_positives:
1341 .pred.rel "mutex",p8,p9
1342 { .mfi
1343       nop.m         0 
1344 (p9)  fma.d.s0      f8 = FR_GAMMA,FR_Rq1,f0
1345       nop.i         0
1347 { .mfb
1348       nop.m         0 
1349 (p8)  fma.d.s0      f8 = FR_GAMMA,FR_RqLin,f0
1350       br.ret.sptk   b0
1353 // here if 0 < x < 1
1354 //--------------------------------------------------------------------
1355 .align 32
1356 tgamma_from_0_to_2:
1357 { .mfi
1358       getf.exp      GR_Sign_Exp = FR_r02
1359       fms.s1        FR_r = FR_r02,f1,FR_Xmin
1360       mov           GR_ExpOf025 = 0xFFFD
1362 { .mfi
1363       add           GR_ad_Co = 0x1200,GR_ad_Data
1364 (p6)  fnma.s1       FR_Rcp1 = FR_Rcp0,FR_NormX,f1  // t = 1 - r0*x
1365 (p6)  mov           GR_Sig2 = 1
1367 { .mfi
1368 (p6)  getf.sig      GR_Sig = FR_NormX
1369       nop.f         0 
1370 (p6)  shl           GR_Sig2 = GR_Sig2,63
1372 { .mfi
1373       add           GR_ad_Ce = 0x1210,GR_ad_Data
1374       nop.f         0
1375 (p6)  mov           GR_NzOvfBound = 0xFBFF
1377 { .mfi
1378       cmp.eq        p8,p0 = GR_Sign_Exp,GR_ExpOf05 // r02 >= 1/2 
1379       nop.f         0
1380       cmp.eq        p9,p10 = GR_Sign_Exp,GR_ExpOf025 // r02 >= 1/4 
1382 { .mfi
1383 (p6)  cmp.ltu.unc   p11,p0 = GR_Sign_Exp,GR_NzOvfBound // p11 <- overflow
1384       nop.f         0
1385 (p6)  cmp.eq.unc    p12,p0 = GR_Sign_Exp,GR_NzOvfBound
1387 .pred.rel "mutex",p8,p9
1388 { .mfi
1389 (p8)  add           GR_ad_Co = 0x200,GR_ad_Co
1390 (p6)  fma.d.s0      FR_X = f1,f1,f8 // set deno & inexact flags
1391 (p9)  add           GR_ad_Co = 0x100,GR_ad_Co
1393 { .mib
1394 (p8)  add           GR_ad_Ce = 0x200,GR_ad_Ce
1395 (p9)  add           GR_ad_Ce = 0x100,GR_ad_Ce
1396 (p11) br.cond.spnt  tgamma_ovf_near_0 //tgamma_spec_res
1398 { .mfi
1399       ldfe          FR_A15 = [GR_ad_Co],32 
1400       nop.f         0
1401 (p12) cmp.eq.unc    p13,p0 = GR_Sig,GR_Sig2
1403 { .mfb
1404       ldfe          FR_A14 = [GR_ad_Ce],32 
1405       nop.f         0
1406 (p13) br.cond.spnt  tgamma_ovf_near_0_boundary //tgamma_spec_res
1408 { .mfi
1409       ldfe          FR_A13 = [GR_ad_Co],32 
1410       nop.f         0
1411       nop.i         0
1413 { .mfi
1414       ldfe          FR_A12 = [GR_ad_Ce],32 
1415       nop.f         0
1416       nop.i         0
1418 .pred.rel "mutex",p9,p10
1419 { .mfi
1420       ldfe          FR_A11 = [GR_ad_Co],32 
1421 (p10) fma.s1        FR_r2 = FR_r02,FR_r02,f0 
1422       nop.i         0
1424 { .mfi
1425       ldfe          FR_A10 = [GR_ad_Ce],32 
1426 (p9)  fma.s1        FR_r2 = FR_r,FR_r,f0 
1427       nop.i         0
1429 { .mfi
1430       ldfe          FR_A9 = [GR_ad_Co],32 
1431 (p6)  fma.s1        FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
1432       nop.i         0
1434 { .mfi
1435       ldfe          FR_A8 = [GR_ad_Ce],32 
1436 (p10) fma.s1        FR_r = f0,f0,FR_r02
1437       nop.i         0
1439 { .mfi
1440       ldfe          FR_A7 = [GR_ad_Co],32 
1441       nop.f         0
1442       nop.i         0
1444 { .mfi
1445       ldfe          FR_A6 = [GR_ad_Ce],32 
1446       nop.f         0 
1447       nop.i         0
1449 { .mfi
1450       ldfe          FR_A5 = [GR_ad_Co],32 
1451       nop.f         0
1452       nop.i         0
1454 { .mfi
1455       ldfe          FR_A4 = [GR_ad_Ce],32 
1456       nop.f         0
1457       nop.i         0
1459 { .mfi
1460       ldfe          FR_A3 = [GR_ad_Co],32 
1461       nop.f         0
1462       nop.i         0
1464 { .mfi
1465       ldfe          FR_A2 = [GR_ad_Ce],32 
1466       nop.f         0
1467       nop.i         0
1469 { .mfi
1470       ldfe          FR_A1 = [GR_ad_Co],32 
1471       fma.s1        FR_r4 = FR_r2,FR_r2,f0 
1472       nop.i         0
1474 { .mfi
1475       ldfe          FR_A0 = [GR_ad_Ce],32 
1476       nop.f         0
1477       nop.i         0
1479 { .mfi
1480       nop.m         0
1481 (p6)  fnma.s1       FR_Rcp2 = FR_Rcp1,FR_NormX,f1 // t = 1 - r1*x
1482       nop.i         0
1484 { .mfi
1485       nop.m         0
1486       fma.s1        FR_A15 = FR_A15,FR_r,FR_A14
1487       nop.i         0
1489 { .mfi
1490       nop.m         0
1491       fma.s1        FR_A11 = FR_A11,FR_r,FR_A10
1492       nop.i         0
1494 { .mfi
1495       nop.m         0
1496       fma.s1        FR_r8 = FR_r4,FR_r4,f0 
1497       nop.i         0
1499 { .mfi
1500       nop.m         0
1501 (p6)  fma.s1        FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1
1502       nop.i         0
1504 { .mfi
1505       nop.m         0
1506       fma.s1        FR_A7 = FR_A7,FR_r,FR_A6
1507       nop.i         0
1509 { .mfi
1510       nop.m         0
1511       fma.s1        FR_A3 = FR_A3,FR_r,FR_A2
1512       nop.i         0
1514 { .mfi
1515       nop.m         0
1516       fma.s1        FR_A15 = FR_A15,FR_r,FR_A13
1517       nop.i         0
1519 { .mfi
1520       nop.m         0
1521       fma.s1        FR_A11 = FR_A11,FR_r,FR_A9
1522       nop.i         0
1524 { .mfi
1525       nop.m         0
1526 (p6)  fnma.s1       FR_Rcp3 = FR_Rcp2,FR_NormX,f1 // t = 1 - r1*x
1527       nop.i         0
1529 { .mfi
1530       nop.m         0
1531       fma.s1        FR_A7 = FR_A7,FR_r,FR_A5
1532       nop.i         0
1534 { .mfi
1535       nop.m         0
1536       fma.s1        FR_A3 = FR_A3,FR_r,FR_A1
1537       nop.i         0
1539 { .mfi
1540       nop.m         0
1541       fma.s1        FR_A15 = FR_A15,FR_r,FR_A12
1542       nop.i         0
1544 { .mfi
1545       nop.m         0
1546       fma.s1        FR_A11 = FR_A11,FR_r,FR_A8
1547       nop.i         0
1549 { .mfi
1550       nop.m         0
1551 (p6)  fma.s1        FR_Rcp3 = FR_Rcp2,FR_Rcp3,FR_Rcp2
1552       nop.i         0
1554 { .mfi
1555       nop.m         0
1556       fma.s1        FR_A7 = FR_A7,FR_r,FR_A4
1557       nop.i         0
1559 { .mfi
1560       nop.m         0
1561       fma.s1        FR_A3 = FR_A3,FR_r,FR_A0
1562       nop.i         0
1564 { .mfi
1565       nop.m         0
1566       fma.s1        FR_A15 = FR_A15,FR_r4,FR_A11
1567       nop.i         0
1569 { .mfi
1570       nop.m         0
1571       fma.s1        FR_A7 = FR_A7,FR_r4,FR_A3
1572       nop.i         0
1574 .pred.rel "mutex",p6,p7
1575 { .mfi
1576       nop.m         0 
1577 (p6)  fma.s1        FR_A15 = FR_A15,FR_r8,FR_A7
1578       nop.i         0
1580 { .mfi
1581       nop.m         0 
1582 (p7)  fma.d.s0      f8 = FR_A15,FR_r8,FR_A7
1583       nop.i         0
1585 { .mfb
1586       nop.m         0 
1587 (p6)  fma.d.s0      f8 = FR_A15,FR_Rcp3,f0
1588       br.ret.sptk   b0
1591 // overflow
1592 //--------------------------------------------------------------------
1593 .align 32
1594 tgamma_ovf_near_0_boundary:
1595 .pred.rel "mutex",p14,p15
1596 { .mfi
1597           mov           GR_fpsr = ar.fpsr
1598           nop.f         0
1599 (p15) mov           r8 = 0x7ff
1601 { .mfi
1602       nop.m         0
1603       nop.f         0
1604 (p14) mov           r8 = 0xfff
1606 { .mfi
1607           nop.m         0
1608           nop.f         0
1609           shl           r8 = r8,52 
1611 { .mfi
1612       sub           r8 = r8,r0,1
1613       nop.f         0
1614           extr.u        GR_fpsr = GR_fpsr,10,2 // rounding mode
1616 .pred.rel "mutex",p14,p15
1617 { .mfi
1618       // set p8 to 0 in case of overflow and to 1 otherwise
1619           // for negative arg: 
1620           //    no overflow if rounding mode either Z or +Inf, i.e.
1621           //    GR_fpsr > 1
1622 (p14) cmp.lt        p8,p0 = 1,GR_fpsr
1623       nop.f         0
1624           // for positive arg: 
1625           //    no overflow if rounding mode either Z or -Inf, i.e.
1626           //    (GR_fpsr & 1) == 0
1627 (p15) tbit.z        p0,p8 = GR_fpsr,0
1629 { .mib
1630 (p8)  setf.d        f8 = r8 // set result to 0x7fefffffffffffff without
1631                             // OVERFLOW flag raising
1632       nop.i         0
1633 (p8)  br.ret.sptk   b0
1635 .align 32
1636 tgamma_ovf_near_0:
1637 { .mfi
1638       mov           r8 = 0x1FFFE
1639       nop.f         0  
1640       nop.i         0
1642 { .mfi
1643       setf.exp      f9 = r8
1644       fmerge.s      FR_X = f8,f8
1645       mov           GR_TAG = 258 // overflow
1647 .pred.rel "mutex",p14,p15
1648 { .mfi
1649       nop.m         0 
1650 (p15) fma.d.s0      f8 = f9,f9,f0 // Set I,O and +INF result
1651       nop.i         0 
1653 { .mfb
1654       nop.m         0 
1655 (p14) fnma.d.s0     f8 = f9,f9,f0 // Set I,O and -INF result
1656       br.cond.sptk  tgamma_libm_err
1658 // overflow or absolute value of x is too big
1659 //--------------------------------------------------------------------
1660 .align 32
1661 tgamma_spec_res:
1662 { .mfi
1663       mov           GR_0x30033 = 0x30033
1664 (p14) fcmp.eq.unc.s1 p10,p11 = f8,FR_Xt
1665 (p15) mov           r8 = 0x1FFFE
1667 { .mfi
1668 (p15) setf.exp      f9 = r8
1669       nop.f         0
1670       nop.i         0
1672 { .mfb
1673 (p11) cmp.ltu.unc   p7,p8 = GR_0x30033,GR_Sign_Exp
1674       nop.f         0 
1675 (p10) br.cond.spnt  tgamma_singularity
1677 .pred.rel "mutex",p7,p8
1678 { .mbb
1679       nop.m         0
1680 (p7)  br.cond.spnt  tgamma_singularity
1681 (p8)  br.cond.spnt  tgamma_underflow
1683 { .mfi
1684       nop.m         0
1685       fmerge.s      FR_X = f8,f8
1686       mov           GR_TAG = 258 // overflow
1688 { .mfb
1689       nop.m         0 
1690 (p15) fma.d.s0      f8 = f9,f9,f0 // Set I,O and +INF result
1691       br.cond.sptk  tgamma_libm_err
1694 // x is negative integer or +/-0
1695 //--------------------------------------------------------------------
1696 .align 32
1697 tgamma_singularity:
1698 { .mfi
1699       nop.m         0
1700       fmerge.s      FR_X = f8,f8
1701       mov           GR_TAG = 259 // negative
1703 { .mfb
1704       nop.m         0
1705       frcpa.s0      f8,p0 = f0,f0
1706       br.cond.sptk  tgamma_libm_err
1708 // x is negative noninteger with big absolute value
1709 //--------------------------------------------------------------------
1710 .align 32
1711 tgamma_underflow:
1712 { .mmi
1713       getf.sig      GR_Sig = FR_iXt
1714       mov           r11 = 0x00001
1715       nop.i         0
1717 { .mfi
1718       setf.exp      f9 = r11
1719       nop.f         0
1720       nop.i         0
1722 { .mfi
1723       nop.m         0
1724       nop.f         0
1725       tbit.z        p6,p7 = GR_Sig,0
1727 .pred.rel "mutex",p6,p7
1728 { .mfi
1729       nop.m         0
1730 (p6)  fms.d.s0      f8 = f9,f9,f9
1731       nop.i         0
1733 { .mfb
1734       nop.m         0
1735 (p7)  fma.d.s0      f8 = f9,f9,f9
1736       br.ret.sptk   b0
1739 //  x for natval, nan, +/-inf or +/-0
1740 //--------------------------------------------------------------------
1741 .align 32
1742 tgamma_spec:
1743 { .mfi
1744       nop.m         0
1745       fclass.m      p6,p0 =  f8,0x1E1 // Test x for natval, nan, +inf
1746       nop.i         0
1748 { .mfi
1749       nop.m         0
1750       fclass.m      p7,p8 =  f8,0x7 // +/-0
1751       nop.i         0
1753 { .mfi
1754       nop.m         0
1755       fmerge.s      FR_X = f8,f8
1756       nop.i         0
1758 { .mfb
1759       nop.m         0
1760 (p6)  fma.d.s0      f8 = f8,f1,f8
1761 (p6)  br.ret.spnt   b0
1763 .pred.rel "mutex",p7,p8
1764 { .mfi
1765 (p7)  mov           GR_TAG = 259 // negative
1766 (p7)  frcpa.s0      f8,p0 = f1,f8
1767       nop.i         0 
1769 { .mib
1770       nop.m         0
1771       nop.i         0
1772 (p8)  br.cond.spnt  tgamma_singularity
1775 .align 32
1776 tgamma_libm_err:
1777 { .mfi
1778        alloc        r32 = ar.pfs,1,4,4,0
1779        nop.f        0
1780        mov          GR_Parameter_TAG = GR_TAG
1783 GLOBAL_LIBM_END(tgamma)
1785 LOCAL_LIBM_ENTRY(__libm_error_region)
1786 .prologue
1787 { .mfi
1788         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
1789         nop.f 0
1790 .save   ar.pfs,GR_SAVE_PFS
1791         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs 
1793 { .mfi
1794 .fframe 64 
1795         add sp=-64,sp                           // Create new stack
1796         nop.f 0
1797         mov GR_SAVE_GP=gp                       // Save gp
1799 { .mmi
1800         stfd [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
1801         add GR_Parameter_X = 16,sp              // Parameter 1 address
1802 .save   b0, GR_SAVE_B0                      
1803         mov GR_SAVE_B0=b0                       // Save b0 
1805 .body
1806 { .mib
1807         stfd [GR_Parameter_X] = FR_X                  // STORE Parameter 1 on stack 
1808         add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address 
1809         nop.b 0
1811 { .mib
1812         stfd [GR_Parameter_Y] = FR_RESULT             // STORE Parameter 3 on stack
1813         add   GR_Parameter_Y = -16,GR_Parameter_Y  
1814         br.call.sptk b0=__libm_error_support#         // Call error handling function
1816 { .mmi
1817         nop.m 0
1818         nop.m 0
1819         add   GR_Parameter_RESULT = 48,sp
1821 { .mmi
1822         ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
1823 .restore sp
1824         add   sp = 64,sp                       // Restore stack pointer
1825         mov   b0 = GR_SAVE_B0                  // Restore return address
1827 { .mib
1828         mov   gp = GR_SAVE_GP                  // Restore gp 
1829         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
1830         br.ret.sptk     b0                     // Return
1831 };; 
1833 LOCAL_LIBM_END(__libm_error_region)
1834 .type   __libm_error_support#,@function
1835 .global __libm_error_support#