Don't fail on non-bi-arch arches.
[glibc.git] / sysdeps / ia64 / fpu / w_tgammaf.S
blob4363ca27b80d680163bc9783f5f0f5cc7caa59be
1 .file "tgammaf.s"
4 // Copyright (c) 2001 - 2003, Intel Corporation
5 // All rights reserved.
6 //
7 // Contributed 2001 by the Intel Numerics Group, Intel Corporation
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
22 // permission.
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT 
26 // LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
29 // EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR 
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
32 // OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
34 // SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
35 // 
36 // Intel Corporation is the author of this code,and requests that all
37 // problem reports or change requests be submitted to it directly at 
38 // http://www.intel.com/software/products/opensource/libraries/num.htm.
40 //*********************************************************************
42 // History: 
43 // 11/30/01  Initial version
44 // 05/20/02  Cleaned up namespace and sf0 syntax
45 // 02/10/03  Reordered header: .section, .global, .proc, .align
46 // 04/04/03  Changed error codes for overflow and negative integers
47 // 04/10/03  Changed code for overflow near zero handling
49 //*********************************************************************
51 //*********************************************************************
53 // Function: tgammaf(x) computes the principle value of the GAMMA
54 // function of x.
56 //*********************************************************************
58 // Resources Used:
60 //    Floating-Point Registers: f8-f15
61 //                              f33-f75
63 //    General Purpose Registers:
64 //      r8-r11
65 //      r14-r29
66 //      r32-r36
67 //      r37-r40 (Used to pass arguments to error handling routine)
69 //    Predicate Registers:      p6-p15
71 //*********************************************************************
73 // IEEE Special Conditions:
75 //    tgammaf(+inf) = +inf
76 //    tgammaf(-inf) = QNaN 
77 //    tgammaf(+/-0) = +/-inf 
78 //    tgammaf(x<0, x - integer) = QNaN
79 //    tgammaf(SNaN) = QNaN
80 //    tgammaf(QNaN) = QNaN
82 //*********************************************************************
84 // Overview
86 // The method consists of three cases.
87 // 
88 // If       2 <= x < OVERFLOW_BOUNDARY   use case tgamma_regular;
89 // else if  0 < x < 2                    use case tgamma_from_0_to_2;
90 // else if  -(i+1) <  x < -i, i = 0...43 use case tgamma_negatives;
92 // Case 2 <= x < OVERFLOW_BOUNDARY
93 // -------------------------------
94 //   Here we use algorithm based on the recursive formula
95 //   GAMMA(x+1) = x*GAMMA(x). For that we subdivide interval
96 //   [2; OVERFLOW_BOUNDARY] into intervals [8*n; 8*(n+1)] and
97 //   approximate GAMMA(x) by polynomial of 22th degree on each
98 //   [8*n; 8*n+1], recursive formula is used to expand GAMMA(x)
99 //   to [8*n; 8*n+1]. In other words we need to find n, i and r
100 //   such that x = 8 * n + i + r where n and i are integer numbers
101 //   and r is fractional part of x. So GAMMA(x) = GAMMA(8*n+i+r) =
102 //   = (x-1)*(x-2)*...*(x-i)*GAMMA(x-i) =
103 //   = (x-1)*(x-2)*...*(x-i)*GAMMA(8*n+r) ~
104 //   ~ (x-1)*(x-2)*...*(x-i)*P12n(r).
106 //   Step 1: Reduction
107 //   -----------------
108 //    N = [x] with truncate
109 //    r = x - N, note 0 <= r < 1
111 //    n = N & ~0xF - index of table that contains coefficient of
112 //                   polynomial approximation 
113 //    i = N & 0xF  - is used in recursive formula
114 //   
116 //   Step 2: Approximation
117 //   ---------------------
118 //    We use factorized minimax approximation polynomials
119 //    P12n(r) = A12*(r^2+C01(n)*r+C00(n))*
120 //              *(r^2+C11(n)*r+C10(n))*...*(r^2+C51(n)*r+C50(n))
122 //   Step 3: Recursion
123 //   -----------------
124 //    In case when i > 0 we need to multiply P12n(r) by product
125 //    R(i,x)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions
126 //    we can calculate R as follow:  
127 //    R(i,x) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is
128 //    even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))*
129 //    *(i-1) if i is odd. In both cases we need to calculate
130 //    R2(i,x) = (x^2-3*x+2)*(x^2-7*x+12)*...*(x^2+x+2*j*(2*j-1)) =
131 //    = ((x^2-x)+2*(1-x))*((x^2-x)+6*(2-x))*...*((x^2-x)+2*(2*j-1)*(j-x)) =
132 //    = (RA+2*RB)*(RA+6*(1-RB))*...*(RA+2*(2*j-1)*(j-1+RB))
133 //    where j = 1..[i/2], RA = x^2-x, RB = 1-x.
135 //   Step 4: Reconstruction
136 //   ----------------------
137 //    Reconstruction is just simple multiplication i.e.
138 //    GAMMA(x) = P12n(r)*R(i,x)
140 // Case 0 < x < 2
141 // --------------
142 //    To calculate GAMMA(x) on this interval we do following
143 //        if 1.0  <= x < 1.25  than  GAMMA(x) = P7(x-1)
144 //        if 1.25 <= x < 1.5   than  GAMMA(x) = P7(x-x_min) where
145 //              x_min is point of local minimum on [1; 2] interval.
146 //        if 1.5  <= x < 1.75  than  GAMMA(x) = P7(x-1.5)
147 //        if 1.75 <= x < 2.0   than  GAMMA(x) = P7(x-1.5)
148 //    and      
149 //        if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x
151 // Case -(i+1) <  x < -i, i = 0...43
152 // ----------------------------------
153 //    Here we use the fact that GAMMA(-x) = PI/(x*GAMMA(x)*sin(PI*x)) and
154 //    so we need to calculate GAMMA(x), sin(PI*x)/PI. Calculation of
155 //    GAMMA(x) is described above.
157 //   Step 1: Reduction
158 //   -----------------
159 //    Note that period of sin(PI*x) is 2 and range reduction for 
160 //    sin(PI*x) is like to range reduction for GAMMA(x) 
161 //    i.e rs = x - round(x) and |rs| <= 0.5.
163 //   Step 2: Approximation
164 //   ---------------------
165 //    To approximate sin(PI*x)/PI = sin(PI*(2*n+rs))/PI = 
166 //    = (-1)^n*sin(PI*rs)/PI Taylor series is used.
167 //    sin(PI*rs)/PI ~ S17(rs).
169 //   Step 3: Division
170 //   ----------------
171 //    To calculate 1/x and 1/(GAMMA(x)*S12(rs)) we use frcpa
172 //    instruction with following Newton-Raphson interations.
173 //  
175 //*********************************************************************
177 GR_ad_Data              = r8
178 GR_TAG                  = r8
179 GR_SignExp              = r9
180 GR_Sig                  = r10
181 GR_ArgNz                = r10
182 GR_RqDeg                = r11
184 GR_NanBound             = r14
185 GR_ExpOf025             = r15
186 GR_ExpOf05              = r16
187 GR_ad_Co                = r17
188 GR_ad_Ce                = r18
189 GR_TblOffs              = r19
190 GR_Arg                  = r20
191 GR_Exp2Ind              = r21
192 GR_TblOffsMask          = r21
193 GR_Offs                 = r22
194 GR_OvfNzBound           = r23
195 GR_ZeroResBound         = r24
196 GR_ad_SinO              = r25
197 GR_ad_SinE              = r26
198 GR_Correction           = r27
199 GR_Tbl12Offs            = r28
200 GR_NzBound              = r28
201 GR_ExpOf1               = r29
202 GR_fpsr                 = r29
204 GR_SAVE_B0              = r33
205 GR_SAVE_PFS             = r34
206 GR_SAVE_GP              = r35
207 GR_SAVE_SP              = r36
209 GR_Parameter_X          = r37
210 GR_Parameter_Y          = r38
211 GR_Parameter_RESULT     = r39
212 GR_Parameter_TAG        = r40
215 FR_X                    = f10
216 FR_Y                    = f1
217 FR_RESULT               = f8
219 FR_iXt                  = f11 
220 FR_Xt                   = f12
221 FR_r                    = f13
222 FR_r2                   = f14
223 FR_r4                   = f15
225 FR_C01                  = f33
226 FR_A7                   = f33
227 FR_C11                  = f34
228 FR_A6                   = f34
229 FR_C21                  = f35
230 FR_A5                   = f35
231 FR_C31                  = f36
232 FR_A4                   = f36
233 FR_C41                  = f37
234 FR_A3                   = f37
235 FR_C51                  = f38
236 FR_A2                   = f38
238 FR_C00                  = f39
239 FR_A1                   = f39
240 FR_C10                  = f40
241 FR_A0                   = f40
242 FR_C20                  = f41
243 FR_C30                  = f42
244 FR_C40                  = f43
245 FR_C50                  = f44
246 FR_An                   = f45
247 FR_OvfBound             = f46
248 FR_InvAn                = f47
250 FR_Multplr              = f48
251 FR_NormX                = f49
252 FR_X2mX                 = f50
253 FR_1mX                  = f51
254 FR_Rq0                  = f51
255 FR_Rq1                  = f52
256 FR_Rq2                  = f53
257 FR_Rq3                  = f54
259 FR_Rcp0                 = f55
260 FR_Rcp1                 = f56
261 FR_Rcp2                 = f57
263 FR_InvNormX1            = f58
264 FR_InvNormX2            = f59
266 FR_rs                   = f60
267 FR_rs2                  = f61
269 FR_LocalMin             = f62
270 FR_10                   = f63
272 FR_05                   = f64
274 FR_S32                  = f65
275 FR_S31                  = f66
276 FR_S01                  = f67
277 FR_S11                  = f68
278 FR_S21                  = f69
279 FR_S00                  = f70
280 FR_S10                  = f71
281 FR_S20                  = f72
283 FR_GAMMA                = f73
284 FR_2                    = f74
285 FR_6                    = f75
290 // Data tables
291 //==============================================================
292 RODATA
293 .align 16
294 LOCAL_OBJECT_START(tgammaf_data)
295 data8 0x3FDD8B618D5AF8FE // local minimum (0.461632144968362356785)
296 data8 0x4024000000000000 // 10.0
297 data8 0x3E90FC992FF39E13 // S32
298 data8 0xBEC144B2760626E2 // S31
300 //[2; 8)
301 data8 0x4009EFD1BA0CB3B4 // C01
302 data8 0x3FFFB35378FF4822 // C11
303 data8 0xC01032270413B896 // C41
304 data8 0xC01F171A4C0D6827 // C51
305 data8 0x40148F8E197396AC // C20
306 data8 0x401C601959F1249C // C30
307 data8 0x3EE21AD881741977 // An
308 data8 0x4041852200000000 // overflow boundary (35.04010009765625)
309 data8 0x3FD9CE68F695B198 // C21
310 data8 0xBFF8C30AC900DA03 // C31
311 data8 0x400E17D2F0535C02 // C00
312 data8 0x4010689240F7FAC8 // C10
313 data8 0x402563147DDCCF8D // C40
314 data8 0x4033406D0480A21C // C50
316 //[8; 16)
317 data8 0x4006222BAE0B793B // C01
318 data8 0x4002452733473EDA // C11
319 data8 0xC0010EF3326FDDB3 // C41
320 data8 0xC01492B817F99C0F // C51
321 data8 0x40099C905A249B75 // C20
322 data8 0x4012B972AE0E533D // C30
323 data8 0x3FE6F6DB91D0D4CC // An
324 data8 0x4041852200000000 // overflow boundary
325 data8 0x3FF545828F7B73C5 // C21
326 data8 0xBFBBD210578764DF // C31
327 data8 0x4000542098F53CFC // C00
328 data8 0x40032C1309AD6C81 // C10
329 data8 0x401D7331E19BD2E1 // C40
330 data8 0x402A06807295EF57 // C50
332 //[16; 24)
333 data8 0x4000131002867596 // C01
334 data8 0x3FFAA362D5D1B6F2 // C11
335 data8 0xBFFCB6985697DB6D // C41
336 data8 0xC0115BEE3BFC3B3B // C51
337 data8 0x3FFE62FF83456F73 // C20
338 data8 0x4007E33478A114C4 // C30
339 data8 0x41E9B2B73795ED57 // An
340 data8 0x4041852200000000 // overflow boundary
341 data8 0x3FEEB1F345BC2769 // C21
342 data8 0xBFC3BBE6E7F3316F // C31
343 data8 0x3FF14E07DA5E9983 // C00
344 data8 0x3FF53B76BF81E2C0 // C10
345 data8 0x4014051E0269A3DC // C40
346 data8 0x40229D4227468EDB // C50
348 //[24; 32)
349 data8 0x3FFAF7BD498384DE // C01
350 data8 0x3FF62AD8B4D1C3D2 // C11
351 data8 0xBFFABCADCD004C32 // C41
352 data8 0xC00FADE97C097EC9 // C51
353 data8 0x3FF6DA9ED737707E // C20
354 data8 0x4002A29E9E0C782C // C30
355 data8 0x44329D5B5167C6C3 // An
356 data8 0x4041852200000000 // overflow boundary
357 data8 0x3FE8943CBBB4B727 // C21
358 data8 0xBFCB39D466E11756 // C31
359 data8 0x3FE879AF3243D8C1 // C00
360 data8 0x3FEEC7DEBB14CE1E // C10
361 data8 0x401017B79BA80BCB // C40
362 data8 0x401E941DC3C4DE80 // C50
364 //[32; 40)
365 data8 0x3FF7ECB3A0E8FE5C // C01
366 data8 0x3FF3815A8516316B // C11
367 data8 0xBFF9ABD8FCC000C3 // C41
368 data8 0xC00DD89969A4195B // C51
369 data8 0x3FF2E43139CBF563 // C20
370 data8 0x3FFF96DC3474A606 // C30
371 data8 0x46AFF4CA9B0DDDF0 // An
372 data8 0x4041852200000000 // overflow boundary
373 data8 0x3FE4CE76DA1B5783 // C21
374 data8 0xBFD0524DB460BC4E // C31
375 data8 0x3FE35852DF14E200 // C00
376 data8 0x3FE8C7610359F642 // C10
377 data8 0x400BCF750EC16173 // C40
378 data8 0x401AC14E02EA701C // C50
380 //[40; 48)
381 data8 0x3FF5DCE4D8193097 // C01
382 data8 0x3FF1B0D8C4974FFA // C11
383 data8 0xBFF8FB450194CAEA // C41
384 data8 0xC00C9658E030A6C4 // C51
385 data8 0x3FF068851118AB46 // C20
386 data8 0x3FFBF7C7BB46BF7D // C30
387 data8 0x3FF0000000000000 // An
388 data8 0x4041852200000000 // overflow boundary
389 data8 0x3FE231DEB11D847A // C21
390 data8 0xBFD251ECAFD7E935 // C31
391 data8 0x3FE0368AE288F6BF // C00
392 data8 0x3FE513AE4215A70C // C10
393 data8 0x4008F960F7141B8B // C40
394 data8 0x40183BA08134397B // C50
396 //[1.0; 1.25)
397 data8 0xBFD9909648921868 // A7
398 data8 0x3FE96FFEEEA8520F // A6
399 data8 0xBFED0800D93449B8 // A3
400 data8 0x3FEFA648D144911C // A2
401 data8 0xBFEE3720F7720B4D // A5
402 data8 0x3FEF4857A010CA3B // A4
403 data8 0xBFE2788CCD545AA4 // A1
404 data8 0x3FEFFFFFFFE9209E // A0
406 //[1.25; 1.5)
407 data8 0xBFB421236426936C // A7
408 data8 0x3FAF237514F36691 // A6
409 data8 0xBFC0BADE710A10B9 // A3
410 data8 0x3FDB6C5465BBEF1F // A2
411 data8 0xBFB7E7F83A546EBE // A5
412 data8 0x3FC496A01A545163 // A4
413 data8 0xBDEE86A39D8452EB // A1
414 data8 0x3FEC56DC82A39AA2 // A0
416 //[1.5; 1.75)
417 data8 0xBF94730B51795867 // A7
418 data8 0x3FBF4203E3816C7B // A6
419 data8 0xBFE85B427DBD23E4 // A3
420 data8 0x3FEE65557AB26771 // A2
421 data8 0xBFD59D31BE3AB42A // A5
422 data8 0x3FE3C90CC8F09147 // A4
423 data8 0xBFE245971DF735B8 // A1
424 data8 0x3FEFFC613AE7FBC8 // A0
426 //[1.75; 2.0)
427 data8 0xBF7746A85137617E // A7
428 data8 0x3FA96E37D09735F3 // A6
429 data8 0xBFE3C24AC40AC0BB // A3
430 data8 0x3FEC56A80A977CA5 // A2
431 data8 0xBFC6F0E707560916 // A5
432 data8 0x3FDB262D949175BE // A4
433 data8 0xBFE1C1AEDFB25495 // A1
434 data8 0x3FEFEE1E644B2022 // A0
436 // sin(pi*x)/pi
437 data8 0xC026FB0D377656CC // S01
438 data8 0x3FFFB15F95A22324 // S11
439 data8 0x406CE58F4A41C6E7 // S10
440 data8 0x404453786302C61E // S20
441 data8 0xC023D59A47DBFCD3 // S21
442 data8 0x405541D7ABECEFCA // S00
444 // 1/An for [40; 48)
445 data8 0xCAA7576DE621FCD5, 0x3F68
446 LOCAL_OBJECT_END(tgammaf_data)
448 //==============================================================
449 // Code
450 //==============================================================
452 .section .text
453 GLOBAL_LIBM_ENTRY(tgammaf)
454 { .mfi
455       getf.exp      GR_SignExp = f8
456       fma.s1        FR_NormX = f8,f1,f0
457       addl          GR_ad_Data = @ltoff(tgammaf_data), gp
459 { .mfi
460       mov           GR_ExpOf05 = 0xFFFE
461       fcvt.fx.trunc.s1 FR_iXt = f8 // [x]
462       mov           GR_Offs = 0 // 2 <= x < 8
464 { .mfi
465       getf.d        GR_Arg = f8
466       fcmp.lt.s1    p14,p15 = f8,f0
467       mov           GR_Tbl12Offs = 0
469 { .mfi
470       setf.exp      FR_05 = GR_ExpOf05
471       fma.s1        FR_2 = f1,f1,f1 // 2
472       mov           GR_Correction = 0
474 { .mfi
475       ld8           GR_ad_Data = [GR_ad_Data]
476       fclass.m      p10,p0 = f8,0x1E7 // is x  NaTVal, NaN, +/-0 or +/-INF?
477       tbit.z        p12,p13 = GR_SignExp,16 // p13 if |x| >= 2
479 { .mfi
480       mov           GR_ExpOf1 = 0xFFFF
481       fcvt.fx.s1    FR_rs = f8 // round(x)
482       and           GR_Exp2Ind = 7,GR_SignExp
484 .pred.rel "mutex",p14,p15
485 { .mfi
486 (p15) cmp.eq.unc    p11,p0 = GR_ExpOf1,GR_SignExp // p11 if 1 <= x < 2
487 (p14) fma.s1        FR_1mX = f1,f1,f8 // 1 - |x|
488       mov           GR_Sig = 0 // if |x| < 2
490 { .mfi
491 (p13) cmp.eq.unc    p7,p0 = 2,GR_Exp2Ind
492 (p15) fms.s1        FR_1mX = f1,f1,f8 // 1 - |x|
493 (p13) cmp.eq.unc    p8,p0 = 3,GR_Exp2Ind
495 .pred.rel "mutex",p7,p8
496 { .mfi
497 (p7)  mov           GR_Offs = 0x7    // 8 <= |x| < 16
498       nop.f         0
499 (p8)  tbit.z.unc    p0,p6 = GR_Arg,51
501 { .mib
502 (p13) cmp.lt.unc    p9,p0 = 3,GR_Exp2Ind
503 (p8)  mov           GR_Offs = 0xE // 16 <= |x| < 32
504       // jump if x is NaTVal, NaN, +/-0 or +/-INF?
505 (p10) br.cond.spnt  tgammaf_spec_args
507 .pred.rel "mutex",p14,p15
508 .pred.rel "mutex",p6,p9
509 { .mfi
510 (p9)  mov           GR_Offs = 0x1C // 32 <= |x|
511 (p14) fma.s1        FR_X2mX = FR_NormX,FR_NormX,FR_NormX // x^2-|x|
512 (p9)  tbit.z.unc    p0,p8 = GR_Arg,50
514 { .mfi
515       ldfpd         FR_LocalMin,FR_10 = [GR_ad_Data],16
516 (p15) fms.s1        FR_X2mX = FR_NormX,FR_NormX,FR_NormX // x^2-|x|
517 (p6)  add           GR_Offs = 0x7,GR_Offs // 24 <= x < 32
519 .pred.rel "mutex",p8,p12
520 { .mfi
521       add           GR_ad_Ce = 0x50,GR_ad_Data
522 (p15) fcmp.lt.unc.s1 p10,p0 = f8,f1 // p10 if 0 <= x < 1
523       mov           GR_OvfNzBound = 2
525 { .mib
526       ldfpd         FR_S32,FR_S31 = [GR_ad_Data],16
527 (p8)  add           GR_Offs = 0x7,GR_Offs // 40 <= |x|
528       // jump if 1 <= x < 2
529 (p11) br.cond.spnt  tgammaf_from_1_to_2
531 { .mfi
532       shladd        GR_ad_Ce = GR_Offs,4,GR_ad_Ce
533       fcvt.xf       FR_Xt = FR_iXt // [x]
534 (p13) cmp.eq.unc    p7,p0 = r0,GR_Offs // p7 if 2 <= |x| < 8
536 { .mfi
537       shladd        GR_ad_Co = GR_Offs,4,GR_ad_Data
538       fma.s1        FR_6 = FR_2,FR_2,FR_2
539       mov           GR_ExpOf05 = 0x7FC
541 { .mfi
542 (p13) getf.sig      GR_Sig = FR_iXt // if |x| >= 2
543       frcpa.s1      FR_Rcp0,p0 = f1,FR_NormX
544 (p10) shr           GR_Arg = GR_Arg,51
546 { .mib
547       ldfpd         FR_C01,FR_C11 = [GR_ad_Co],16
548 (p7)  mov           GR_Correction = 2
549       // jump if 0 < x < 1
550 (p10) br.cond.spnt  tgammaf_from_0_to_1
552 { .mfi
553       ldfpd         FR_C21,FR_C31 = [GR_ad_Ce],16
554       fma.s1        FR_Rq2 = f1,f1,FR_1mX // 2 - |x|
555 (p14) sub           GR_Correction = r0,GR_Correction
557 { .mfi
558       ldfpd         FR_C41,FR_C51 = [GR_ad_Co],16
559 (p14) fcvt.xf       FR_rs = FR_rs
560 (p14) add           GR_ad_SinO = 0x3A0,GR_ad_Data
562 .pred.rel "mutex",p14,p15
563 { .mfi
564       ldfpd         FR_C00,FR_C10 = [GR_ad_Ce],16
565       nop.f         0
566 (p14) sub           GR_Sig = GR_Correction,GR_Sig
568 { .mfi
569       ldfpd         FR_C20,FR_C30 = [GR_ad_Co],16
570       fma.s1        FR_Rq1 = FR_1mX,FR_2,FR_X2mX // (x-1)*(x-2)
571 (p15) sub           GR_Sig = GR_Sig,GR_Correction
573 { .mfi
574 (p14) ldfpd         FR_S01,FR_S11 = [GR_ad_SinO],16
575       fma.s1        FR_Rq3 = FR_2,f1,FR_1mX // 3 - |x|
576       and           GR_RqDeg = 0x6,GR_Sig
578 { .mfi
579       ldfpd         FR_C40,FR_C50 = [GR_ad_Ce],16
580 (p14) fma.d.s0      FR_X = f0,f0,f8 // set deno flag
581       mov           GR_NanBound = 0x30016 // -2^23
583 .pred.rel "mutex",p14,p15
584 { .mfi
585 (p14) add           GR_ad_SinE = 0x3C0,GR_ad_Data
586 (p15) fms.s1        FR_r = FR_NormX,f1,FR_Xt // r = x - [x]
587       cmp.eq        p8,p0 = 2,GR_RqDeg
589 { .mfi
590       ldfpd         FR_An,FR_OvfBound = [GR_ad_Co]
591 (p14) fms.s1        FR_r = FR_Xt,f1,FR_NormX // r = |x - [x]|
592       cmp.eq        p9,p0 = 4,GR_RqDeg
594 .pred.rel "mutex",p8,p9
595 { .mfi
596 (p14) ldfpd         FR_S21,FR_S00 = [GR_ad_SinE],16
597 (p8)  fma.s1        FR_Rq0 = FR_2,f1,FR_1mX // (3-x)
598       tbit.z        p0,p6 = GR_Sig,0
600 { .mfi
601 (p14) ldfpd         FR_S10,FR_S20 = [GR_ad_SinO],16
602 (p9)  fma.s1        FR_Rq0 = FR_2,FR_2,FR_1mX // (5-x)
603       cmp.eq        p10,p0 = 6,GR_RqDeg
605 { .mfi
606 (p14) getf.s        GR_Arg = f8
607 (p14) fcmp.eq.unc.s1 p13,p0 = FR_NormX,FR_Xt
608 (p14) mov           GR_ZeroResBound = 0xC22C // -43
610 { .mfi
611 (p14) ldfe          FR_InvAn = [GR_ad_SinE]
612 (p10) fma.s1        FR_Rq0 = FR_6,f1,FR_1mX // (7-x)
613       cmp.eq        p7,p0 = r0,GR_RqDeg
615 { .mfi
616 (p14) cmp.ge.unc    p11,p0 = GR_SignExp,GR_NanBound
617       fma.s1        FR_Rq2 = FR_Rq2,FR_6,FR_X2mX // (x-3)*(x-4)
618 (p14) shl           GR_ZeroResBound = GR_ZeroResBound,16
620 { .mfb
621 (p14) mov           GR_OvfNzBound = 0x802
622 (p14) fms.s1        FR_rs = FR_rs,f1,FR_NormX // rs = round(x) - x
623       // jump if  x < -2^23 i.e. x is negative integer
624 (p11) br.cond.spnt  tgammaf_singularity
626 { .mfi
627       nop.m         0
628 (p7)  fma.s1        FR_Rq1 = f0,f0,f1
629 (p14) shl           GR_OvfNzBound = GR_OvfNzBound,20
631 { .mfb
632       nop.m         0
633       fma.s1        FR_Rq3 = FR_Rq3,FR_10,FR_X2mX // (x-5)*(x-6)
634       // jump if x is negative integer such that -2^23 < x < 0
635 (p13) br.cond.spnt  tgammaf_singularity
637 { .mfi
638       nop.m         0
639       fma.s1        FR_C01 = FR_C01,f1,FR_r
640 (p14) mov           GR_ExpOf05 = 0xFFFE
642 { .mfi
643 (p14) cmp.eq.unc    p7,p0 = GR_Arg,GR_OvfNzBound
644       fma.s1        FR_C11 = FR_C11,f1,FR_r
645 (p14) cmp.ltu.unc   p11,p0 = GR_Arg,GR_OvfNzBound
647 { .mfi
648       nop.m         0
649       fma.s1        FR_C21 = FR_C21,f1,FR_r
650 (p14) cmp.ltu.unc   p9,p0 = GR_ZeroResBound,GR_Arg
652 { .mfb
653       nop.m         0
654       fma.s1        FR_C31 = FR_C31,f1,FR_r
655       // jump if argument is close to 0 negative
656 (p11) br.cond.spnt  tgammaf_overflow
658 { .mfi
659       nop.m         0
660       fma.s1        FR_C41 = FR_C41,f1,FR_r
661       nop.i         0
663 { .mfb
664       nop.m         0
665       fma.s1        FR_C51 = FR_C51,f1,FR_r
666       // jump if x is negative noninteger such that -2^23 < x < -43
667 (p9)  br.cond.spnt  tgammaf_underflow
669 { .mfi
670       nop.m         0
671 (p14) fma.s1        FR_rs2 = FR_rs,FR_rs,f0
672       nop.i         0 
674 { .mfb
675       nop.m         0
676 (p14) fma.s1        FR_S01 = FR_rs,FR_rs,FR_S01
677       // jump if argument is 0x80200000
678 (p7)  br.cond.spnt  tgammaf_overflow_near0_bound
680 { .mfi
681       nop.m         0 
682 (p6)  fnma.s1       FR_Rq1 = FR_Rq1,FR_Rq0,f0
683       nop.i         0 
685 { .mfi
686       nop.m         0 
687 (p10) fma.s1        FR_Rq2 = FR_Rq2,FR_Rq3,f0
688       and           GR_Sig = 0x7,GR_Sig
690 { .mfi
691       nop.m         0
692       fma.s1        FR_C01 = FR_C01,FR_r,FR_C00
693       nop.i         0
695 { .mfi
696       nop.m         0
697       fma.s1        FR_C11 = FR_C11,FR_r,FR_C10
698       cmp.eq        p6,p7 = r0,GR_Sig // p6 if |x| from one of base intervals
700 { .mfi
701       nop.m         0
702       fma.s1        FR_C21 = FR_C21,FR_r,FR_C20
703       nop.i         0
705 { .mfi
706       nop.m         0
707       fma.s1        FR_C31 = FR_C31,FR_r,FR_C30
708 (p7)  cmp.lt.unc    p9,p0 = 2,GR_RqDeg
710 { .mfi
711       nop.m         0
712 (p14) fma.s1        FR_S11 = FR_rs,FR_rs,FR_S11
713       nop.i         0
715 { .mfi
716       nop.m         0
717 (p14) fma.s1        FR_S21 = FR_rs,FR_rs,FR_S21
718       nop.i         0
720 { .mfi
721       nop.m         0
722       fma.s1        FR_C41 = FR_C41,FR_r,FR_C40
723       nop.i         0
725 { .mfi
726       nop.m         0
727 (p14) fma.s1        FR_S32 = FR_rs2,FR_S32,FR_S31
728       nop.i         0
730 { .mfi
731       nop.m         0 
732 (p9)  fma.s1        FR_Rq1 = FR_Rq1,FR_Rq2,f0
733       nop.i         0
735 { .mfi
736       nop.m         0
737       fma.s1        FR_C51 = FR_C51,FR_r,FR_C50
738       nop.i         0 
740 { .mfi
741 (p14) getf.exp      GR_SignExp = FR_rs
742       fma.s1        FR_C01 = FR_C01,FR_C11,f0
743       nop.i         0 
745 { .mfi
746       nop.m         0
747 (p14) fma.s1        FR_S01 = FR_S01,FR_rs2,FR_S00
748       nop.i         0 
750 { .mfi
751       nop.m         0
752       fma.s1        FR_C21 = FR_C21,FR_C31,f0
753       nop.i         0
755 { .mfi
756       nop.m         0
757       // NR-iteration
758 (p14) fnma.s1       FR_InvNormX1 = FR_Rcp0,FR_NormX,f1
759       nop.i         0
761 { .mfi
762       nop.m         0
763 (p14) fma.s1        FR_S11 = FR_S11,FR_rs2,FR_S10
764 (p14) tbit.z.unc    p11,p12 = GR_SignExp,17 
766 { .mfi
767       nop.m         0
768 (p14) fma.s1        FR_S21 = FR_S21,FR_rs2,FR_S20
769       nop.i         0
771 { .mfi
772       nop.m         0
773 (p15) fcmp.lt.unc.s1 p0,p13 = FR_NormX,FR_OvfBound
774       nop.i         0
776 { .mfi
777       nop.m         0
778 (p14) fma.s1        FR_S32 = FR_rs2,FR_S32,f0
779       nop.i         0
781 { .mfi
782       nop.m         0
783       fma.s1        FR_C41 = FR_C41,FR_C51,f0
784       nop.i         0
786 { .mfi
787       nop.m         0
788 (p7)  fma.s1        FR_An = FR_Rq1,FR_An,f0
789       nop.i         0 
791 { .mfb
792       nop.m         0
793       nop.f         0
794       // jump if x > 35.04010009765625
795 (p13) br.cond.spnt  tgammaf_overflow
797 { .mfi
798       nop.m         0
799       // NR-iteration
800 (p14) fma.s1        FR_InvNormX1 = FR_Rcp0,FR_InvNormX1,FR_Rcp0
801       nop.i         0
803 { .mfi
804       nop.m         0
805 (p14) fma.s1        FR_S01 = FR_S01,FR_S11,f0
806       nop.i         0
808 { .mfi
809       nop.m         0
810 (p14) fma.s1        FR_S21 = FR_S21,FR_S32,f0
811       nop.i         0
813 { .mfi
814 (p14) getf.exp      GR_SignExp = FR_NormX
815       fma.s1        FR_C01 = FR_C01,FR_C21,f0
816       nop.i         0
818 { .mfi
819       nop.m         0
820       fma.s1        FR_C41 = FR_C41,FR_An,f0
821 (p14) mov           GR_ExpOf1 = 0x2FFFF
823 { .mfi
824       nop.m         0
825       // NR-iteration
826 (p14) fnma.s1       FR_InvNormX2 = FR_InvNormX1,FR_NormX,f1
827       nop.i         0
829 .pred.rel "mutex",p11,p12
830 { .mfi
831       nop.m         0
832 (p12) fnma.s1       FR_S01 = FR_S01,FR_S21,f0
833       nop.i         0
835 { .mfi
836       nop.m         0
837 (p11) fma.s1        FR_S01 = FR_S01,FR_S21,f0
838       nop.i         0
841 { .mfi
842       nop.m         0 
843 (p14) fma.s1        FR_GAMMA = FR_C01,FR_C41,f0
844 (p14) tbit.z.unc    p6,p7 = GR_Sig,0
846 { .mfb
847       nop.m         0
848 (p15) fma.s.s0      f8 = FR_C01,FR_C41,f0
849 (p15) br.ret.spnt   b0 // exit for positives
851 .pred.rel "mutex",p11,p12
852 { .mfi
853       nop.m         0
854 (p12) fms.s1        FR_S01 = FR_rs,FR_S01,FR_rs
855       nop.i         0
857 { .mfi
858       nop.m         0
859 (p11) fma.s1        FR_S01 = FR_rs,FR_S01,FR_rs
860       nop.i         0
862 { .mfi
863       nop.m         0
864       // NR-iteration
865       fma.s1        FR_InvNormX2 = FR_InvNormX1,FR_InvNormX2,FR_InvNormX1
866       cmp.eq        p10,p0 = 0x23,GR_Offs
868 .pred.rel "mutex",p6,p7
869 { .mfi
870       nop.m         0
871 (p6)  fma.s1        FR_GAMMA = FR_S01,FR_GAMMA,f0
872       cmp.gtu       p8,p0 = GR_SignExp,GR_ExpOf1
874 { .mfi
875       nop.m         0
876 (p7)  fnma.s1       FR_GAMMA = FR_S01,FR_GAMMA,f0
877       cmp.eq        p9,p0 = GR_SignExp,GR_ExpOf1
879 { .mfi
880       nop.m         0
881       // NR-iteration
882       fnma.s1       FR_InvNormX1 = FR_InvNormX2,FR_NormX,f1
883       nop.i         0
885 { .mfi
886       nop.m         0
887 (p10) fma.s1        FR_InvNormX2 = FR_InvNormX2,FR_InvAn,f0
888       nop.i         0
890 { .mfi
891       nop.m         0
892       frcpa.s1      FR_Rcp0,p0 = f1,FR_GAMMA
893       nop.i         0
895 { .mfi
896       nop.m         0
897       fms.s1        FR_Multplr = FR_NormX,f1,f1 // x - 1
898       nop.i         0
900 { .mfi
901       nop.m         0
902       // NR-iteration
903       fnma.s1       FR_Rcp1 = FR_Rcp0,FR_GAMMA,f1
904       nop.i         0
906 .pred.rel "mutex",p8,p9
907 { .mfi
908       nop.m         0
909       // 1/x or 1/(An*x)
910 (p8)  fma.s1        FR_Multplr = FR_InvNormX2,FR_InvNormX1,FR_InvNormX2
911       nop.i         0
913 { .mfi
914       nop.m         0
915 (p9)  fma.s1        FR_Multplr = f1,f1,f0
916       nop.i         0
918 { .mfi
919       nop.m         0
920       // NR-iteration
921       fma.s1        FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
922       nop.i         0
924 { .mfi
925       nop.m         0
926       // NR-iteration
927       fnma.s1       FR_Rcp2 = FR_Rcp1,FR_GAMMA,f1
928       nop.i         0
930 { .mfi
931       nop.m         0
932       // NR-iteration
933       fma.s1        FR_Rcp1 = FR_Rcp1,FR_Multplr,f0
934       nop.i         0
936 { .mfb
937       nop.m         0
938       fma.s.s0      f8 = FR_Rcp1,FR_Rcp2,FR_Rcp1
939       br.ret.sptk   b0
942 // here if 0 < x < 1
943 //--------------------------------------------------------------------
944 .align 32
945 tgammaf_from_0_to_1:
946 { .mfi
947       cmp.lt        p7,p0 = GR_Arg,GR_ExpOf05
948       // NR-iteration
949       fnma.s1       FR_Rcp1 = FR_Rcp0,FR_NormX,f1
950       cmp.eq        p8,p0 = GR_Arg,GR_ExpOf05
952 { .mfi
953       cmp.gt        p9,p0 = GR_Arg,GR_ExpOf05
954       fma.s1        FR_r = f0,f0,FR_NormX // reduced arg for (0;1)
955       mov           GR_ExpOf025 = 0x7FA       
957 { .mfi
958       getf.s        GR_ArgNz = f8
959       fma.d.s0      FR_X = f0,f0,f8 // set deno flag
960       shl           GR_OvfNzBound = GR_OvfNzBound,20
962 { .mfi
963 (p8)  mov           GR_Tbl12Offs = 0x80 // 0.5 <= x < 0.75
964       nop.f         0
965 (p7)  cmp.ge.unc    p6,p0 = GR_Arg,GR_ExpOf025
967 .pred.rel "mutex",p6,p9
968 { .mfi
969 (p9)  mov           GR_Tbl12Offs = 0xC0 // 0.75 <= x < 1
970       nop.f         0
971 (p6)  mov           GR_Tbl12Offs = 0x40 // 0.25 <= x < 0.5
973 { .mfi
974       add           GR_ad_Ce = 0x2C0,GR_ad_Data      
975       nop.f         0
976       add           GR_ad_Co = 0x2A0,GR_ad_Data
978 { .mfi
979       add           GR_ad_Co = GR_ad_Co,GR_Tbl12Offs
980       nop.f         0
981       cmp.lt        p12,p0 = GR_ArgNz,GR_OvfNzBound
983 { .mib
984       add           GR_ad_Ce = GR_ad_Ce,GR_Tbl12Offs
985       cmp.eq        p7,p0 = GR_ArgNz,GR_OvfNzBound
986       // jump if argument is 0x00200000
987 (p7)  br.cond.spnt  tgammaf_overflow_near0_bound
989 { .mmb
990       ldfpd         FR_A7,FR_A6 = [GR_ad_Co],16
991       ldfpd         FR_A5,FR_A4 = [GR_ad_Ce],16
992       // jump if argument is close to 0 positive
993 (p12) br.cond.spnt  tgammaf_overflow      
995 { .mfi
996       ldfpd         FR_A3,FR_A2 = [GR_ad_Co],16
997       // NR-iteration
998       fma.s1        FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
999       nop.i         0
1001 { .mfb
1002       ldfpd         FR_A1,FR_A0 = [GR_ad_Ce],16
1003       nop.f         0
1004       br.cond.sptk  tgamma_from_0_to_2 
1007 // here if 1 < x < 2
1008 //--------------------------------------------------------------------
1009 .align 32
1010 tgammaf_from_1_to_2:
1011 { .mfi
1012       add           GR_ad_Co = 0x2A0,GR_ad_Data
1013       fms.s1        FR_r = f0,f0,FR_1mX
1014       shr           GR_TblOffs = GR_Arg,47
1016 { .mfi
1017       add           GR_ad_Ce = 0x2C0,GR_ad_Data
1018       nop.f         0
1019       mov           GR_TblOffsMask = 0x18
1021 { .mfi
1022       nop.m         0
1023       nop.f         0
1024       and           GR_TblOffs = GR_TblOffs,GR_TblOffsMask 
1026 { .mfi
1027       shladd        GR_ad_Co = GR_TblOffs,3,GR_ad_Co
1028       nop.f         0
1029       nop.i         0
1031 { .mfi
1032       shladd        GR_ad_Ce = GR_TblOffs,3,GR_ad_Ce
1033       nop.f         0
1034       cmp.eq        p6,p7 = 8,GR_TblOffs
1036 { .mmi
1037       ldfpd         FR_A7,FR_A6 = [GR_ad_Co],16
1038       ldfpd         FR_A5,FR_A4 = [GR_ad_Ce],16
1039       nop.i         0
1041 { .mmi
1042       ldfpd         FR_A3,FR_A2 = [GR_ad_Co],16
1043       ldfpd         FR_A1,FR_A0 = [GR_ad_Ce],16
1044       nop.i         0
1047 .align 32
1048 tgamma_from_0_to_2:
1049 { .mfi
1050       nop.m         0
1051 (p6)  fms.s1        FR_r = FR_r,f1,FR_LocalMin
1052       nop.i         0
1054 { .mfi
1055       nop.m         0
1056       // NR-iteration
1057 (p10) fnma.s1       FR_Rcp2 = FR_Rcp1,FR_NormX,f1
1058       nop.i         0
1060 { .mfi
1061       nop.m         0
1062       fms.s1        FR_r2 = FR_r,FR_r,f0
1063       nop.i         0
1065 { .mfi
1066       nop.m         0
1067       fma.s1        FR_A7 = FR_A7,FR_r,FR_A6
1068       nop.i         0
1070 { .mfi
1071       nop.m         0
1072       fma.s1        FR_A5 = FR_A5,FR_r,FR_A4
1073       nop.i         0
1075 { .mfi
1076       nop.m         0
1077       fma.s1        FR_A3 = FR_A3,FR_r,FR_A2
1078       nop.i         0
1080 { .mfi
1081       nop.m         0
1082       fma.s1        FR_A1 = FR_A1,FR_r,FR_A0
1083       nop.i         0
1085 { .mfi
1086       nop.m         0
1087       // NR-iteration
1088 (p10) fma.s1        FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1
1089       nop.i         0
1091 { .mfi
1092       nop.m         0
1093       fma.s1        FR_A7 = FR_A7,FR_r2,FR_A5
1094       nop.i         0
1096 { .mfi
1097       nop.m         0
1098       fma.s1        FR_r4 = FR_r2,FR_r2,f0
1099       nop.i         0
1101 { .mfi
1102       nop.m         0
1103       fma.s1        FR_A3 = FR_A3,FR_r2,FR_A1
1104       nop.i         0
1106 { .mfi
1107       nop.m         0 
1108 (p10) fma.s1        FR_GAMMA = FR_A7,FR_r4,FR_A3
1109       nop.i         0
1111 { .mfi
1112       nop.m         0 
1113 (p11) fma.s.s0      f8 = FR_A7,FR_r4,FR_A3
1114       nop.i         0
1116 { .mfb
1117       nop.m         0 
1118 (p10) fma.s.s0      f8 = FR_GAMMA,FR_Rcp2,f0
1119       br.ret.sptk   b0
1123 // overflow
1124 //--------------------------------------------------------------------
1125 .align 32
1126 tgammaf_overflow_near0_bound:
1127 .pred.rel "mutex",p14,p15
1128 { .mfi
1129           mov           GR_fpsr = ar.fpsr
1130           nop.f         0
1131 (p15) mov           r8 = 0x7f8
1133 { .mfi
1134       nop.m         0
1135       nop.f         0
1136 (p14) mov           r8 = 0xff8
1138 { .mfi
1139           nop.m         0
1140           nop.f         0
1141           shl           r8 = r8,20 
1143 { .mfi
1144       sub           r8 = r8,r0,1
1145       nop.f         0
1146           extr.u        GR_fpsr = GR_fpsr,10,2 // rounding mode
1148 .pred.rel "mutex",p14,p15
1149 { .mfi
1150       // set p8 to 0 in case of overflow and to 1 otherwise
1151           // for negative arg: 
1152           //    no overflow if rounding mode either Z or +Inf, i.e.
1153           //    GR_fpsr > 1
1154 (p14) cmp.lt        p8,p0 = 1,GR_fpsr
1155       nop.f         0
1156           // for positive arg: 
1157           //    no overflow if rounding mode either Z or -Inf, i.e.
1158           //    (GR_fpsr & 1) == 0
1159 (p15) tbit.z        p0,p8 = GR_fpsr,0
1161 { .mib
1162 (p8)  setf.s        f8 = r8 // set result to 0x7f7fffff without
1163                             // OVERFLOW flag raising
1164       nop.i         0
1165 (p8)  br.ret.sptk   b0
1168 .align 32
1169 tgammaf_overflow:
1170 { .mfi
1171       nop.m         0
1172       nop.f         0
1173       mov           r8 = 0x1FFFE
1175 { .mfi
1176       setf.exp      f9 = r8
1177       fmerge.s      FR_X = f8,f8
1178       nop.i         0
1180 .pred.rel "mutex",p14,p15
1181 { .mfi
1182       nop.m         0
1183 (p14) fnma.s.s0     f8 = f9,f9,f0 // set I,O and -INF result
1184       mov           GR_TAG = 261 // overflow
1186 { .mfb
1187       nop.m         0 
1188 (p15) fma.s.s0      f8 = f9,f9,f0 // set I,O and +INF result
1189       br.cond.sptk  tgammaf_libm_err
1192 // x is negative integer or +/-0
1193 //--------------------------------------------------------------------
1194 .align 32
1195 tgammaf_singularity:
1196 { .mfi
1197       nop.m         0
1198       fmerge.s      FR_X = f8,f8
1199       mov           GR_TAG = 262 // negative
1201 { .mfb
1202       nop.m         0
1203       frcpa.s0      f8,p0 = f0,f0
1204       br.cond.sptk  tgammaf_libm_err
1206 // x is negative noninteger with big absolute value
1207 //--------------------------------------------------------------------
1208 .align 32
1209 tgammaf_underflow:
1210 { .mfi
1211       mov           r8 = 0x00001
1212       nop.f         0
1213       tbit.z        p6,p7 = GR_Sig,0
1215 { .mfi
1216       setf.exp      f9 = r8
1217       nop.f         0
1218       nop.i         0
1220 .pred.rel "mutex",p6,p7
1221 { .mfi
1222       nop.m         0
1223 (p6)  fms.s.s0      f8 = f9,f9,f9
1224       nop.i         0
1226 { .mfb
1227       nop.m         0
1228 (p7)  fma.s.s0      f8 = f9,f9,f9
1229       br.ret.sptk   b0
1232 //  x for natval, nan, +/-inf or +/-0
1233 //--------------------------------------------------------------------
1234 .align 32
1235 tgammaf_spec_args:
1236 { .mfi
1237       nop.m         0
1238       fclass.m      p6,p0 =  f8,0x1E1 // Test x for natval, nan, +inf
1239       nop.i         0
1241 { .mfi
1242       nop.m         0
1243       fclass.m      p7,p8 =  f8,0x7 // +/-0
1244       nop.i         0
1246 { .mfi
1247       nop.m         0
1248       fmerge.s      FR_X = f8,f8
1249       nop.i         0
1251 { .mfb
1252       nop.m         0
1253 (p6)  fma.s.s0      f8 = f8,f1,f8
1254 (p6)  br.ret.spnt   b0
1256 .pred.rel "mutex",p7,p8
1257 { .mfi
1258 (p7)  mov           GR_TAG = 262 // negative
1259 (p7)  frcpa.s0      f8,p0 = f1,f8
1260       nop.i         0 
1262 { .mib
1263       nop.m         0
1264       nop.i         0
1265 (p8)  br.cond.spnt  tgammaf_singularity
1268 .align 32
1269 tgammaf_libm_err:
1270 { .mfi
1271       alloc        r32 = ar.pfs,1,4,4,0
1272       nop.f        0
1273       mov          GR_Parameter_TAG = GR_TAG
1276 GLOBAL_LIBM_END(tgammaf)
1277 LOCAL_LIBM_ENTRY(__libm_error_region)
1278 .prologue
1279 { .mfi
1280         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
1281         nop.f 0
1282 .save   ar.pfs,GR_SAVE_PFS
1283         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs 
1285 { .mfi
1286 .fframe 64 
1287         add sp=-64,sp                           // Create new stack
1288         nop.f 0
1289         mov GR_SAVE_GP=gp                       // Save gp
1291 { .mmi
1292         stfd [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
1293         add GR_Parameter_X = 16,sp              // Parameter 1 address
1294 .save   b0, GR_SAVE_B0                      
1295         mov GR_SAVE_B0=b0                       // Save b0 
1297 .body
1298 { .mib
1299         stfd [GR_Parameter_X] = FR_X           // STORE Parameter 1 on stack 
1300         add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address 
1301         nop.b 0                                      
1303 { .mib
1304         stfd [GR_Parameter_Y] = FR_RESULT      // STORE Parameter 3 on stack
1305         add   GR_Parameter_Y = -16,GR_Parameter_Y  
1306         br.call.sptk b0=__libm_error_support# // Call error handling function
1308 { .mmi
1309         nop.m 0
1310         nop.m 0
1311         add   GR_Parameter_RESULT = 48,sp
1313 { .mmi
1314         ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
1315 .restore sp
1316         add   sp = 64,sp                       // Restore stack pointer
1317         mov   b0 = GR_SAVE_B0                  // Restore return address
1319 { .mib
1320         mov   gp = GR_SAVE_GP                  // Restore gp 
1321         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
1322         br.ret.sptk     b0                     // Return
1323 };; 
1325 LOCAL_LIBM_END(__libm_error_region)
1326 .type   __libm_error_support#,@function
1327 .global __libm_error_support#