4 // Copyright (c) 2002 - 2005, Intel Corporation
5 // All rights reserved.
7 // Contributed 2002 by the Intel Numerics Group, Intel Corporation
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 // Intel Corporation is the author of this code, and requests that all
37 // problem reports or change requests be submitted to it directly at
38 // http://www.intel.com/software/products/opensource/libraries/num.htm.
41 //==============================================================
42 // 01/16/02 Initial version
43 // 05/20/02 Cleaned up namespace and sf0 syntax
44 // 02/10/03 Reordered header: .section, .global, .proc, .align;
45 // used data8 for long double table values
46 // 03/17/03 Moved tgammal_libm_err label into .proc region
47 // 04/10/03 Changed error codes for overflow and negative integers
48 // 03/31/05 Reformatted delimiters between data tables
51 //==============================================================
52 // long double tgammal(long double)
56 // Floating-Point Registers: f8-f15
59 // General Purpose Registers: r32-r67
61 // Predicate Registers: p6-p15
63 //*********************************************************************
65 // IEEE Special Conditions:
67 // tgammal(+inf) = +inf
68 // tgammal(-inf) = QNaN
69 // tgammal(+/-0) = +/-inf
70 // tgammal(x<0, x - integer) = QNaN
71 // tgammal(SNaN) = QNaN
72 // tgammal(QNaN) = QNaN
74 //*********************************************************************
75 // Overview of operation
76 //==============================================================
78 // Algorithm description
79 // ---------------------
81 // There are 3 main paths in the implementation
82 // (and additional special values branches)
84 // 1) |X| >= 13 - Stirling formula computation
85 // a) Positive arguments:
86 // TGAMMAL(X) = exp((X-0.5)*ln(X) - X + C + S(Z)),
87 // where C = 0.5*ln(2*Pi) , Z = 1/Z, S(Z) - Bernulli polynomial
88 // (up to 'B18' term).
89 // Some of these calculation done in multiprecision.
90 // Ln returns multiprecision result too
91 // and exp also accepts and returns pair of values.
93 // b) Negative arguments
94 // TGAMMAL(-X) = PI/(X*TGAMMAL(X)*sin(PI*X)).
95 // (X*sin(PI*X))/PI calculated in parallel with TGAMMAL.
96 // Here we use polynomial of 9th degree with 2 multiprecision steps.
97 // Argument range reduction is:
98 // N = [x] with round to nearest, r = x - N, -0.5 <= r < 0.5
99 // After ((X-0.5)*ln(X) - X + C + S(Z)) completed we just invert
100 // its result and compute exp with negative argument (1/exp(x)=exp(-x))
101 // Then we multiply exp result to PI/(X*sin(PI*X)).
103 // 2) 1 <= |X| < 13 - Polynomial part
104 // a) Positive arguments:
105 // All values are splitted to such intervals as:
106 // #0->[2;3], #1->[3,4], #2->[5,6]...
107 // For even intervals we just use polynomial computation with degree 20
108 // and first 6 multiprecision computations.
109 // Range reduction looks like
110 // N = [x] with truncate, r = x - N - 0.5, -0.5 <= r < 0.5
111 // For odd intervals we use reccurent formula:
112 // TGAMMAL(X) = TGAMMA(X-1)*(X-1)
113 // [1;2] interval is splitted to 3 subranges:
114 // [1;1.25], [1.25;1.75], [1.75;2] with the same polynomial forms
116 // b) Negative arguments
117 // TGAMMAL(-X) = PI/(X*TGAMMAL(X)*sin(PI*X)).
118 // (X*sin(PI*X))/PI calculated in parallel with TGAMMAL.
119 // After multiplication by TGAMMAL(X) result we calculate reciprocal
120 // and get final result.
122 // 3) 0 < |X| < 1 - Near 0 part
123 // a) Here we use reccurent formula TGAMMAL(X) = TGAMMAL(X+1)/X
124 // TGAMMAL(X+1) calculated as shown above,
125 // 1/X result obtained in parallel. Then we just multiply these values.
126 // There is only additional separated subrange: [0;0.125] with specific
127 // polynomial constants set.
129 // b) Negative arguments
130 // TGAMMAL(-X) = PI/(TGAMMAL(X+1)*sin(PI*X)).
131 // There is no need to compute 1/X.
138 LOCAL_OBJECT_START(Constants_Tgammal_log_80_Q)
139 // log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1
140 data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
141 data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
142 data4 0xA51BE0AF,0x92492453,0x00003FFC,0x00000000
143 data4 0xA0CFD29F,0xAAAAAB73,0x0000BFFC,0x00000000
144 data4 0xCCCE3872,0xCCCCCCCC,0x00003FFC,0x00000000
145 data4 0xFFFFB4FB,0xFFFFFFFF,0x0000BFFC,0x00000000
146 data4 0xAAAAAAAB,0xAAAAAAAA,0x00003FFD,0x00000000
147 data4 0x00000000,0x80000000,0x0000BFFE,0x00000000
148 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Q)
151 LOCAL_OBJECT_START(Constants_Tgammal_log_80_Z_G_H_h1)
152 // Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double
153 data4 0x00008000,0x3F800000,0x00000000,0x00000000
154 data4 0x00000000,0x00000000,0x00000000,0x00000000
155 data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000
156 data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000
157 data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000
158 data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000
159 data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000
160 data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000
161 data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000
162 data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000
163 data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000
164 data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000
165 data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000
166 data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000
167 data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000
168 data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000
169 data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000
170 data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000
171 data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000
172 data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000
173 data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000
174 data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000
175 data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000
176 data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000
177 data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000
178 data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000
179 data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000
180 data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000
181 data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000
182 data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000
183 data4 0x00004211,0x3F042108,0x3F29516A,0x00000000
184 data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000
185 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Z_G_H_h1)
188 LOCAL_OBJECT_START(Constants_Tgammal_log_80_Z_G_H_h2)
189 // Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double
190 data4 0x00008000,0x3F800000,0x00000000,0x00000000
191 data4 0x00000000,0x00000000,0x00000000,0x00000000
192 data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000
193 data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000
194 data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000
195 data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000
196 data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000
197 data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000
198 data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000
199 data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000
200 data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000
201 data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000
202 data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000
203 data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000
204 data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000
205 data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000
206 data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000
207 data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000
208 data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000
209 data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000
210 data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000
211 data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000
212 data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000
213 data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000
214 data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000
215 data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000
216 data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000
217 data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000
218 data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000
219 data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000
220 data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000
221 data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000
222 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Z_G_H_h2)
225 LOCAL_OBJECT_START(Constants_Tgammal_log_80_h3_G_H)
226 // h3 IEEE double extended, H3 and G3 IEEE single
227 data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00
228 data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400
229 data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00
230 data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400
231 data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00
232 data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400
233 data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08
234 data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408
235 data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10
236 data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410
237 data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18
238 data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420
239 data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20
240 data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428
241 data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30
242 data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438
243 data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40
244 data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448
245 data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50
246 data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458
247 data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68
248 data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470
249 data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78
250 data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488
251 data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90
252 data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0
253 data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8
254 data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8
255 data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8
256 data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8
257 data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0
258 data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0
259 data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here
260 data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D
261 data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101
262 data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED
263 data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766
264 data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6
265 data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620
266 data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D
267 LOCAL_OBJECT_END(Constants_Tgammal_log_80_h3_G_H)
270 LOCAL_OBJECT_START(Constants_Tgammal_stirling)
271 //0.5*ln(2*Pi)=9.1893853320467266954096885e-01 + 7.2239360881843238220057778e-17
272 data8 0x3FED67F1C864BEB4, 0x3C94D252F2400510
274 data8 0xAAAAAAAAAAAAAAAB, 0x00003FFB //B2 = 8.3333333333333333333333333333e-02
275 data8 0xBF66C16C16C16C17 //B4 = -2.7777777777777777777777777778e-03
276 data8 0x3F4A01A01A01A01A //B6 = 7.9365079365079365079365079365e-04
277 data8 0xBF43813813813814 //B8 = -5.9523809523809523809523809524e-04
278 data8 0x3F4B951E2B18FF23 //B10 = 8.4175084175084175084175084175e-04
279 data8 0xBF5F6AB0D9993C7D //B12 = -1.9175269175269175269175269175e-03
280 data8 0x3F7A41A41A41A41A //B14 = 6.4102564102564102564102564103e-03
281 data8 0xBF9E4286CB0F5398 //B16 = -2.9550653594771241830065359477e-02
282 data8 0x3FC6FE96381E0680 //B18 = 1.7964437236883057316493849002e-01
283 data8 0x3FE0000000000000 // 0.5
284 LOCAL_OBJECT_END(Constants_Tgammal_stirling)
287 LOCAL_OBJECT_START(Constants_Tgammal_sin)
288 // Polynomial coefficients for the sin(Pi*x)/Pi, 0 <= |x| < 0.5
289 //A2 = 8.1174242528335360802316245099e-01 + 5.1302254650266899774269946201e-18
290 data8 0x3FE9F9CB402BC46C, 0x3C57A8B3819B7CEC
291 //A1 = -1.6449340668482264060656916627e+00 + -3.0210280454695477893051351574e-17
292 data8 0xBFFA51A6625307D3, 0xBC816A402079D0EF
293 data8 0xF3AEF1FFCCE6C813, 0x0000BFE3 //A9 = -7.0921197799923779127089910470e-09
294 data8 0x87D54408E6D4BB9D, 0x00003FE9 //A8 = 2.5300880778252693946712766029e-07
295 data8 0xEA12033DCE7B8ED9, 0x0000BFED //A7 = -6.9758403885461690048189307819e-06
296 data8 0x9BA38C952A59D1A8, 0x00003FF2 //A6 = 1.4842878710882320255092707181e-04
297 data8 0x99C0B55178FF0E38, 0x0000BFF6 //A5 = -2.3460810348048124421268761990e-03
298 data8 0xD63402E798FEC896, 0x00003FF9 //A4 = 2.6147847817611456327417812320e-02
299 data8 0xC354723906D95E92, 0x0000BFFC //A3 = -1.9075182412208257558294507774e-01
300 LOCAL_OBJECT_END(Constants_Tgammal_sin)
303 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_Arg)
304 data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000 // L_hi = hi part log(2)/2^12
305 data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000 // L_lo = lo part log(2)/2^12
306 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_Arg)
308 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_A)
309 data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000 // A3
310 data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000 // A2
311 data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000 // A1
312 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_A)
314 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_T1)
315 data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
316 data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
317 data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
318 data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
319 data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
320 data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
321 data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
322 data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
323 data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
324 data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
325 data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
326 data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
327 data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
328 data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
329 data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
330 data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
331 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_T1)
333 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_T2)
334 data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
335 data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
336 data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
337 data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
338 data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
339 data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
340 data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
341 data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
342 data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
343 data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
344 data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
345 data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
346 data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
347 data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
348 data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
349 data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
350 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_T2)
352 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_W1)
353 data8 0x0000000000000000, 0xBE384454171EC4B4
354 data8 0xBE6947414AA72766, 0xBE5D32B6D42518F8
355 data8 0x3E68D96D3A319149, 0xBE68F4DA62415F36
356 data8 0xBE6DDA2FC9C86A3B, 0x3E6B2E50F49228FE
357 data8 0xBE49C0C21188B886, 0x3E64BFC21A4C2F1F
358 data8 0xBE6A2FBB2CB98B54, 0x3E5DC5DE9A55D329
359 data8 0x3E69649039A7AACE, 0x3E54728B5C66DBA5
360 data8 0xBE62B0DBBA1C7D7D, 0x3E576E0409F1AF5F
361 data8 0x3E6125001A0DD6A1, 0xBE66A419795FBDEF
362 data8 0xBE5CDE8CE1BD41FC, 0xBE621376EA54964F
363 data8 0x3E6370BE476E76EE, 0x3E390D1A3427EB92
364 data8 0x3E1336DE2BF82BF8, 0xBE5FF1CBD0F7BD9E
365 data8 0xBE60A3550CEB09DD, 0xBE5CA37E0980F30D
366 data8 0xBE5C541B4C082D25, 0xBE5BBECA3B467D29
367 data8 0xBE400D8AB9D946C5, 0xBE5E2A0807ED374A
368 data8 0xBE66CB28365C8B0A, 0x3E3AAD5BD3403BCA
369 data8 0x3E526055C7EA21E0, 0xBE442C75E72880D6
370 data8 0x3E58B2BB85222A43, 0xBE5AAB79522C42BF
371 data8 0xBE605CB4469DC2BC, 0xBE589FA7A48C40DC
372 data8 0xBE51C2141AA42614, 0xBE48D087C37293F4
373 data8 0x3E367A1CA2D673E0, 0xBE51BEBB114F7A38
374 data8 0xBE6348E5661A4B48, 0xBDF526431D3B9962
375 data8 0x3E3A3B5E35A78A53, 0xBE46C46C1CECD788
376 data8 0xBE60B7EC7857D689, 0xBE594D3DD14F1AD7
377 data8 0xBE4F9C304C9A8F60, 0xBE52187302DFF9D2
378 data8 0xBE5E4C8855E6D68F, 0xBE62140F667F3DC4
379 data8 0xBE36961B3BF88747, 0x3E602861C96EC6AA
380 data8 0xBE3B5151D57FD718, 0x3E561CD0FC4A627B
381 data8 0xBE3A5217CA913FEA, 0x3E40A3CC9A5D193A
382 data8 0xBE5AB71310A9C312, 0x3E4FDADBC5F57719
383 data8 0x3E361428DBDF59D5, 0x3E5DB5DB61B4180D
384 data8 0xBE42AD5F7408D856, 0x3E2A314831B2B707
385 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_W1)
387 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_W2)
388 data8 0x0000000000000000, 0xBE641F2537A3D7A2
389 data8 0xBE68DD57AD028C40, 0xBE5C77D8F212B1B6
390 data8 0x3E57878F1BA5B070, 0xBE55A36A2ECAE6FE
391 data8 0xBE620608569DFA3B, 0xBE53B50EA6D300A3
392 data8 0x3E5B5EF2223F8F2C, 0xBE56A0D9D6DE0DF4
393 data8 0xBE64EEF3EAE28F51, 0xBE5E5AE2367EA80B
394 data8 0x3E47CB1A5FCBC02D, 0xBE656BA09BDAFEB7
395 data8 0x3E6E70C6805AFEE7, 0xBE6E0509A3415EBA
396 data8 0xBE56856B49BFF529, 0x3E66DD3300508651
397 data8 0x3E51165FC114BC13, 0x3E53333DC453290F
398 data8 0x3E6A072B05539FDA, 0xBE47CD877C0A7696
399 data8 0xBE668BF4EB05C6D9, 0xBE67C3E36AE86C93
400 data8 0xBE533904D0B3E84B, 0x3E63E8D9556B53CE
401 data8 0x3E212C8963A98DC8, 0xBE33138F032A7A22
402 data8 0x3E530FA9BC584008, 0xBE6ADF82CCB93C97
403 data8 0x3E5F91138370EA39, 0x3E5443A4FB6A05D8
404 data8 0x3E63DACD181FEE7A, 0xBE62B29DF0F67DEC
405 data8 0x3E65C4833DDE6307, 0x3E5BF030D40A24C1
406 data8 0x3E658B8F14E437BE, 0xBE631C29ED98B6C7
407 data8 0x3E6335D204CF7C71, 0x3E529EEDE954A79D
408 data8 0x3E5D9257F64A2FB8, 0xBE6BED1B854ED06C
409 data8 0x3E5096F6D71405CB, 0xBE3D4893ACB9FDF5
410 data8 0xBDFEB15801B68349, 0x3E628D35C6A463B9
411 data8 0xBE559725ADE45917, 0xBE68C29C042FC476
412 data8 0xBE67593B01E511FA, 0xBE4A4313398801ED
413 data8 0x3E699571DA7C3300, 0x3E5349BE08062A9E
414 data8 0x3E5229C4755BB28E, 0x3E67E42677A1F80D
415 data8 0xBE52B33F6B69C352, 0xBE6B3550084DA57F
416 data8 0xBE6DB03FD1D09A20, 0xBE60CBC42161B2C1
417 data8 0x3E56ED9C78A2B771, 0xBE508E319D0FA795
418 data8 0xBE59482AFD1A54E9, 0xBE2A17CEB07FD23E
419 data8 0x3E68BF5C17365712, 0x3E3956F9B3785569
420 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_W2)
424 LOCAL_OBJECT_START(Constants_Tgammal_poly)
426 // Polynomial coefficients for the tgammal(x), 2 <= |x| < 3
427 //A5 = 2.8360780594841213109180699803e-02 + 2.2504152891014320704380000000e-19
428 data8 0x3F9D0A9BC49353D2, 0x3C109AEA0F23CE2D
429 //A4 = 1.0967323400216015538699565468e-01 + 9.9225166000430644587276000000e-18
430 data8 0x3FBC138B89492C5B, 0x3C66E138506D5652
431 //A3 = 2.5387124684114281691904579930e-01 + 2.2667777637607113205546600000e-17
432 data8 0x3FD03F6D2FA4F4F8, 0x3C7A2258DA8CD8B1
433 data8 0xC5866457328BC39B, 0x00003FE3 //A20 = 5.7487331964156762795056629138e-09
434 data8 0xE93D9F1ACD59C929, 0x0000BFE4 //A19= -1.3576396100397317396956445658e-08
435 data8 0xE33389C8F6CBA813, 0x00003FE5 //A18 = 2.6449714924964597501721434271e-08
436 data8 0x8FE7B25B9CD26D2A, 0x0000BFE7 //A17= -6.7011017946055513660266853311e-08
437 data8 0xB89F4721BFBC15B0, 0x00003FE8 //A16 = 1.7194280320370423615174419192e-07
438 data8 0xE49CBDC1874EBABA, 0x0000BFE9 //A15= -4.2582353660153782928729466776e-07
439 data8 0x913AF50A336129CA, 0x00003FEB //A14 = 1.0820500665257088283172211622e-06
440 data8 0xABCF0F7313B3B332, 0x0000BFEC //A13= -2.5601510627710417669568115706e-06
441 //A2 = 6.5455857798133676439533701341e-01 + 1.3292075193155190798867000000e-18
442 data8 0x3FE4F224D4B7E01C, 0x3C3885014A2B8319
443 //A1 = 9.3473452162608550164435428087e-01 + 3.2785154201417136611642400000e-17
444 data8 0x3FEDE9585F1A7093, 0x3C82E63C1B5028BF
445 //A0 = 1.3293403881791368004172682049e+00 + 2.2005689328949279282607500000e-16
446 data8 0x3FF544FA6D47B38F, 0x3CAFB6AA9829E81F
447 data8 0xF3668F799997C76D, 0x00003FED //A12 = 7.2539039479124273660331538367e-06
448 data8 0xD6C6BBD54CDEAEB1, 0x0000BFEE //A11= -1.2801665282681088568639378920e-05
449 data8 0x809E4763B06F6883, 0x00003FF1 //A10 = 6.1329973609906572700697893187e-05
450 data8 0x8443B000F8F9A71A, 0x00003FED //A9 = 3.9417864189995544394564413428e-06
451 data8 0xC5C7E6D62A6991D8, 0x00003FF4 //A8 = 7.5447412886334708803357581519e-04
452 data8 0xD2AF690725C62D88, 0x00003FF5 //A7 = 1.6074004848394703022110823298e-03
453 data8 0xAA44E635D4B7B682, 0x00003FF8 //A6 = 1.0392403425906843901680697839e-02
455 // Polynomial coefficients for the tgammal(x), 4 <= |x| < 5
456 //A5 = 1.1600674810589555185913468449e+00 + 3.0229979112715124660731000000e-17
457 data8 0x3FF28FA2EB44D22E, 0x3C816D285234C815
458 //A4 = 3.1374268565470946334983182169e+00 + 1.3694868953995008497659600000e-16
459 data8 0x400919734073B1E1, 0x3CA3BC83CD7E9565
460 //A3 = 7.0834593993741057360580271052e+00 + 3.3899702569039156457249800000e-16
461 data8 0x401C5576617B6C1F, 0x3CB86D6431213296
462 data8 0xA4A5FB49C094966B, 0x00003FDA //A20 = 9.3591760106637809309720130828e-12
463 data8 0xA9260DA0F51D7ED8, 0x00003FDD //A19 = 7.6919898428091669411809372180e-11
464 data8 0xA16441DFB14BD6E1, 0x00003FE0 //A18 = 5.8713933014370867331213494535e-10
465 data8 0x95F098D9C2234849, 0x00003FE3 //A17 = 4.3638234584169302324461091035e-09
466 data8 0x8581817400E5AD2B, 0x00003FE6 //A16 = 3.1084260332429955234755367839e-08
467 data8 0xE272940E373EBE15, 0x00003FE8 //A15 = 2.1089573544273993580820317236e-07
468 data8 0xB6B3391145D226FB, 0x00003FEB //A14 = 1.3612217421122787182942706259e-06
469 data8 0x8B9428C4DF95FCD5, 0x00003FEE //A13 = 8.3195416382628990683949003789e-06
470 //A2 = 1.2665135075272345943631080445e+01 + 9.8721896915973874255877000000e-16
471 data8 0x4029548C95A76F38, 0x3CD1C8BE715B8E13
472 //A1 = 1.6154969393303069580269948347e+01 + 9.6850518810678379641029000000e-16
473 data8 0x403027AC12FC1E1E, 0x3CD172711C15501B
474 //A0 = 1.1631728396567448058362970187e+01 + 8.7078125362814179268673000000e-16
475 data8 0x40274371E7866C65, 0x3CCF5F8A1A5FACA0
476 data8 0xC94A903114272C03, 0x00003FF0 //A12 = 4.7991576836334427243159066630e-05
477 data8 0x8844262960E04BE6, 0x00003FF3 //A11 = 2.5990716419283017929486175141e-04
478 data8 0xAC5418A76767678D, 0x00003FF5 //A10 = 1.3147621245497801180184809726e-03
479 data8 0xCA231B6EFE959132, 0x00003FF7 //A9 = 6.1687358811367989146517222415e-03
480 data8 0xDA38E39C13819D2A, 0x00003FF9 //A8 = 2.6638454961912040754759086920e-02
481 data8 0xD696DF8D8389FE53, 0x00003FFB //A7 = 1.0477995539298934056097943975e-01
482 data8 0xBDD5C153048BC435, 0x00003FFD //A6 = 3.7077144754791605130056406006e-01
484 // Polynomial coefficients for the tgammal(x), 6 <= |x| < 7
485 //A5 = 6.7169398121054200601065531373e+01 + 2.9481001527213915901489600000e-15
486 data8 0x4050CAD76B377BA0, 0x3CEA8DDB2B2DE93E
487 //A4 = 1.6115104376855398982115730178e+02 + 1.3422421925418824418257300000e-14
488 data8 0x406424D559BDC687, 0x3D0E397FDB5B33DC
489 //A3 = 3.1812194028053562533386866562e+02 + 3.9881709875858650942409600000e-14
490 data8 0x4073E1F377A6CF73, 0x3D26738F63FE9C4C
491 data8 0xD6E1B5FF90CAABD3, 0x00003FE1 //A20 = 1.5634700199277480081025480635e-09
492 data8 0xD451987B925DD37E, 0x00003FE4 //A19 = 1.2358576813211397717382327174e-08
493 data8 0xBFC151B67FA58E6B, 0x00003FE7 //A18 = 8.9292951435632759686382657901e-08
494 data8 0xA9034C5E1D67572E, 0x00003FEA //A17 = 6.2962205718327848327368724720e-07
495 data8 0x8E40F6EAA30A71EC, 0x00003FED //A16 = 4.2394926442967995119170095258e-06
496 data8 0xE3C3541B03A1C350, 0x00003FEF //A15 = 2.7151465666109594512258841637e-05
497 data8 0xACE2E58436B2DDCE, 0x00003FF2 //A14 = 1.6487723793339152877117376243e-04
498 data8 0xF7EAF8D8D1CAA3D1, 0x00003FF4 //A13 = 9.4573158112768812533636022369e-04
499 //A2 = 4.8664351544258869353143381886e+02 + 4.7424047995944376868895400000e-14
500 data8 0x407E6A4BD6D9463B, 0x3D2AB2868D79E192
501 //A1 = 5.1615277644992545447166776285e+02 + 3.0901956935588717379242200000e-14
502 data8 0x40802138E2DC003B, 0x3D216570FB601AEA
503 //A0 = 2.8788527781504433278314536437e+02 + 2.8213174117085164944959600000e-14
504 data8 0x4071FE2A1911F7D6, 0x3D1FC3E4CF4DB5AF
505 data8 0xA72B88E48D3D1BAB, 0x00003FF7 //A12 = 5.1016252919939028020562237471e-03
506 data8 0xD2EFB1067DB4FFB2, 0x00003FF9 //A11 = 2.5749059441230515023024615917e-02
507 data8 0xF788AF9522205C24, 0x00003FFB //A10 = 1.2086617635601742290221382521e-01
508 data8 0x861A6CE06CB29EAF, 0x00003FFE //A9 = 5.2384071807018493367136112163e-01
509 data8 0x84FBDE0947718B58, 0x00004000 //A8 = 2.0778727617851237754568261869e+00
510 data8 0xEEC1371E265A2C3A, 0x00004001 //A7 = 7.4610858525146049022238037342e+00
511 data8 0xBF514B9BE68ED59D, 0x00004003 //A6 = 2.3914694993947572859629197920e+01
513 // Polynomial coefficients for the tgammal(x), 8 <= |x| < 9
514 //A5 = 5.8487447114416836484451778233e+03 + 4.7365465221455983144182900000e-13
515 data8 0x40B6D8BEA568B6FD, 0x3D60AA4D44C2589B
516 //A4 = 1.2796464063087094473303295672e+04 + 1.2373341702514898266244200000e-12
517 data8 0x40C8FE3B666B532D, 0x3D75C4752C5B4783
518 //A3 = 2.2837606581322281272150576115e+04 + 2.6598064610627891398831000000e-13
519 data8 0x40D64D66D23A7764, 0x3D52B77B3A10EA5C
520 data8 0xB23418F75B0BE22A, 0x00003FE9 //A20 = 3.3192989594206801808678663868e-07
521 data8 0xA984A7BC8B856ED2, 0x00003FEC //A19 = 2.5260177918662350066375115788e-06
522 data8 0x921A49729416372C, 0x00003FEF //A18 = 1.7416797068239475136398213598e-05
523 data8 0xF5BB9415CC399CA4, 0x00003FF1 //A17 = 1.1717449586392814601938207599e-04
524 data8 0xC50B91A40B81F9DF, 0x00003FF4 //A16 = 7.5166775151159345732094429036e-04
525 data8 0x96002572326DB203, 0x00003FF7 //A15 = 4.5776541559407384162139204300e-03
526 data8 0xD81A1A595E4157BA, 0x00003FF9 //A14 = 2.6379634345126284099420760736e-02
527 data8 0x92B700D0CFECADD8, 0x00003FFC //A13 = 1.4327622675407940907282658100e-01
528 //A2 = 3.1237895525940199149772524834e+04 + 3.1280450505163186432331700000e-12
529 data8 0x40DE8179504C0878, 0x3D8B83BB33FBB766
530 //A1 = 2.9192841741344487672904506326e+04 + 7.9300780509779689630767000000e-13
531 data8 0x40DC8235DF171691, 0x3D6BE6C780EE54DF
532 //A0 = 1.4034407293483411194756627083e+04 + 1.4038139346291543309253700000e-12
533 data8 0x40CB693422315F90, 0x3D78B23746113FCE
534 data8 0xBAE50807548BC711, 0x00003FFE //A12 = 7.3005724123917935346868107005e-01
535 data8 0xDE28B1F57E68CFB6, 0x00004000 //A11 = 3.4712338349724065462763671443e+00
536 data8 0xF4DCA5A5FF901118, 0x00004002 //A10 = 1.5303868912154033908205911714e+01
537 data8 0xF85AAA1AD5E84E5E, 0x00004004 //A9 = 6.2088539523416399361048051373e+01
538 data8 0xE5AA8BB1BF02934D, 0x00004006 //A8 = 2.2966619406617480799195651466e+02
539 data8 0xBF6CFEFD67F59845, 0x00004008 //A7 = 7.6570306334640770654588802417e+02
540 data8 0x8DB5D2F001635C29, 0x0000400A //A6 = 2.2673639984182571062068713002e+03
542 // Polynomial coefficients for the tgammal(x), 10 <= |x| < 11
543 //A5 = 7.2546009516580589115619659424e+05 + 1.0343348865365065212891728822e-10
544 data8 0x412623A830B99290, 0x3DDC6E7C157611C4
545 //A4 = 1.4756292870840241666883230209e+06 + 8.1516565365333844166705674775e-11
546 data8 0x4136842D497E56AF, 0x3DD66837E4C3F9EE
547 //A3 = 2.4356116926500420086085796356e+06 + 3.5508860076560925641351069404e-10
548 data8 0x4142950DD8A8C1AF, 0x3DF866C8E3DD0980
549 data8 0xB7FD0D1EEAC38EB4, 0x00003FF1 //A20 = 8.7732544640091602721643775932e-05
550 data8 0xA9345C64AC750AE9, 0x00003FF4 //A19 = 6.4546407626804942279126469603e-04
551 data8 0x8BEABC81BE1E93C9, 0x00003FF7 //A18 = 4.2699261134524096128048819443e-03
552 data8 0xE1CD281EDD7315F8, 0x00003FF9 //A17 = 2.7563646660310313164706189622e-02
553 data8 0xAD8A5BA6D0FD9758, 0x00003FFC //A16 = 1.6947310643831556048460963841e-01
554 data8 0xFCDDA464AD3F182E, 0x00003FFE //A15 = 9.8775699098518676937088606052e-01
555 data8 0xAE0DCE2F7B60D1AE, 0x00004001 //A14 = 5.4391852309591064073782104822e+00
556 data8 0xE1745D9ABEB8D1A7, 0x00004003 //A13 = 2.8181819161363002758615770457e+01
557 //A2 = 3.0619656223573554307222366333e+06 + 1.0819940302945474471259520006e-10
558 data8 0x41475C66CFA967E4, 0x3DDDBDDB2A27334B
559 //A1 = 2.6099413018962685018777847290e+06 + 3.6851882860056025385268615240e-10
560 data8 0x4143E98AA6A48974, 0x3DF9530D42589AB6
561 //A0 = 1.1332783889487853739410638809e+06 + 1.9339350553312096248591829758e-10
562 data8 0x41314ADE639225C9, 0x3DEA946DD6C2C8D3
563 data8 0x88BCFAAE71812A1C, 0x00004006 //A12 = 1.3673820009490115307300592012e+02
564 data8 0x9A770F5AB540A326, 0x00004008 //A11 = 6.1786031215382040427126476507e+02
565 data8 0xA170C1D2C6B413FC, 0x0000400A //A10 = 2.5830473201524594051391525170e+03
566 data8 0x9AE56061CB02EB55, 0x0000400C //A9 = 9.9133441230507404119297200255e+03
567 data8 0x872390769650FBE2, 0x0000400E //A8 = 3.4595564309496661629764193479e+04
568 data8 0xD3E5E8D6923910C1, 0x0000400F //A7 = 1.0849181904819284819615140521e+05
569 data8 0x930D70602F50B754, 0x00004011 //A6 = 3.0116351174131169193070583741e+05
571 // Polynomial coefficients for the tgammal(x), 12 <= |x| < 13
572 //A5 = 1.2249876249976964294910430908e+08 + 6.0051348061679753770848000000e-09
573 data8 0x419D34BB29FFC39D, 0x3E39CAB72E01818D
574 //A4 = 2.3482765927605420351028442383e+08 + 1.1874729051592862323641700000e-08
575 data8 0x41ABFE5F168D56FA, 0x3E4980338AA7B04B
576 //A3 = 3.6407329688125067949295043945e+08 + 2.6657200942150363994658700000e-08
577 data8 0x41B5B35150E199A5, 0x3E5C9F79C0EB5300
578 data8 0xE89AE0F8D726329D, 0x00003FF9 //A20 = 2.8394164465429105626588451540e-02
579 data8 0xCF90981F86E38013, 0x00003FFC //A19 = 2.0270002071785908652476845915e-01
580 data8 0xA56C658079CA8C4A, 0x00003FFF //A18 = 1.2923704984019263122675412350e+00
581 data8 0x80AEF96A67C5615A, 0x00004002 //A17 = 8.0427183300456238315262463506e+00
582 data8 0xBE886D7529678931, 0x00004004 //A16 = 4.7633230047847868242503413461e+01
583 data8 0x858EDBA4CE2F7508, 0x00004007 //A15 = 2.6711607799594541057655957154e+02
584 data8 0xB0B0A3AF388274F0, 0x00004009 //A14 = 1.4135199810126975119809102782e+03
585 data8 0xDBA87137988751EF, 0x0000400B //A13 = 7.0290552818218513870879313985e+03
586 //A2 = 4.2828433593031734228134155273e+08 + 3.9760422293645854535247300000e-08
587 data8 0x41B98719AFEE2947, 0x3E6558A17E0D3007
588 //A1 = 3.4008253676084774732589721680e+08 + 1.2558352335001093116071000000e-09
589 data8 0x41B4453F68C2C6EB, 0x3E159338C5BC7EC3
590 //A0 = 1.3684336546556583046913146973e+08 + 2.6786516700381562934240300000e-08
591 data8 0x41A05020CAEE5EA5, 0x3E5CC3058A858579
592 data8 0xFF5E3940FB4BA576, 0x0000400D //A12 = 3.2687111823895439312116108631e+04
593 data8 0x8A08C124C7F74B6C, 0x00004010 //A11 = 1.4134701786994123329786229006e+05
594 data8 0x89D701953540BFFB, 0x00004012 //A10 = 5.6459209892773907605385652281e+05
595 data8 0xFC46344B3116C3AD, 0x00004013 //A9 = 2.0666305367147234406757715163e+06
596 data8 0xD183EBD7A400151F, 0x00004015 //A8 = 6.8653979211730981618367536737e+06
597 data8 0x9C083A40742112F4, 0x00004017 //A7 = 2.0451444503543981795037456447e+07
598 data8 0xCD3C475B1A8B6662, 0x00004018 //A6 = 5.3801245423495149598177886823e+07
599 LOCAL_OBJECT_END(Constants_Tgammal_poly)
602 LOCAL_OBJECT_START(Constants_Tgammal_poly_splitted)
604 // Polynomial coefficients for the tgammal(x), 1 <= |x| < 1.25
605 //A5 = -9.8199506890310417350775651357e-01+ -3.2546247786122976510752200000e-17
606 data8 0xBFEF6C80EC38B509, 0xBC82C2FA7A3DE3BD
607 //A4 = 9.8172808683439960475425323239e-01 + 4.4847611775298520359811400000e-17
608 data8 0x3FEF6A51055096B0, 0x3C89DA56DE95EFE4
609 //A3 = -9.0747907608088618225394839101e-01 +-1.0244057366544064435443970000e-16
610 data8 0xBFED0A118F324B62, 0xBC9D86C7B9EBCFFF
611 data8 0xB8E3FDAA66CC738E, 0x00003FFB //A20 = 9.0278608095877488976217714815e-02
612 data8 0xA76067AE1738699C, 0x0000BFFD //A19 =-3.2690738678103132837070881737e-01
613 data8 0x9D66B13718408C44, 0x00003FFE //A18 = 6.1484820933424283818320582920e-01
614 data8 0xD4AC67BBB4AE5599, 0x0000BFFE //A17 =-8.3075569470082063491389474937e-01
615 data8 0xF1426ED1C1488DB3, 0x00003FFE //A16 = 9.4241993542644505594957058785e-01
616 data8 0xFC12EB07AA6F4B6B, 0x0000BFFE //A15 =-9.8466366707947121954333549690e-01
617 data8 0xFF2B32CFE5B0DDC8, 0x00003FFE //A14 = 9.9675290656677214804168895915e-01
618 data8 0xFFD8E7E6FF3662EA, 0x0000BFFE //A13 =-9.9940347089360552383472582319e-01
619 //A2 = 9.8905599532797250361682017683e-01 + 5.1760162410376024240867300000e-17
620 data8 0x3FEFA658C23B1578, 0x3C8DD673A61F6FE7
621 //A1 = -5.7721566490153275452712478000e-01+ -1.0607935612223465065923310000e-16
622 data8 0xBFE2788CFC6FB618, 0xBC9E9346622D53B7
623 //A0 = 9.9999999999999988897769753748e-01 + 1.1102230245372554544790880000e-16
624 data8 0x3FEFFFFFFFFFFFFF, 0x3C9FFFFFFFF51E4E
625 data8 0xFFF360DF628F0BC9, 0x00003FFE //A12 = 9.9980740979895815468216470840e-01
626 data8 0xFFEF8F9A72B40480, 0x0000BFFE //A11 = -9.9974916001038145045939523470e-01
627 data8 0xFFE037B8C7E39952, 0x00003FFE //A10 = 9.9951504002809911822597567307e-01
628 data8 0xFFC01E08F348BED2, 0x0000BFFE //A9 = -9.9902522772325406705059517941e-01
629 data8 0xFF83DAC83119B52C, 0x00003FFE //A8 = 9.9810569179053383842734164901e-01
630 data8 0xFEF9F8AB891ABB24, 0x0000BFFE //A7 = -9.9600176036720260345608796766e-01
631 data8 0xFE3F0537573C8235, 0x00003FFE //A6 = 9.9314911461918778676646301341e-01
633 // Polynomial coefficients for the tgammal(x), 1.25 <= |x| < 1.75
634 //A5 = -7.7523052299853054125655660300e-02+ -1.2693512521686721504433600000e-17
635 data8 0xBFB3D88CFE50601B, 0xBC6D44ED60EE2170
636 //A4 = 1.4464535904462152982041800442e-01 + 2.5426820829345729856648800000e-17
637 data8 0x3FC283BD374EB2A9, 0x3C7D50AC436187C3
638 //A3 = -1.0729480456477220873257039102e-01+ -6.2429894945456418196551000000e-18
639 data8 0xBFBB77AC1CA2EBA5, 0xBC5CCA6BCC422D41
640 data8 0xF732D2689F323283, 0x00003FF2 //A20 = 2.3574688251652899567587145422e-04
641 data8 0xB6B00E23DE89D13A, 0x0000BFF3 //A19 =-3.4844916488842618776630058875e-04
642 data8 0xE98396FE4A1B2799, 0x00003FF3 //A18 =4.4539265198744452020440735977e-04
643 data8 0xAF8D235A640DB1A2, 0x0000BFF4 //A17 =-6.6967514303333563295261178346e-04
644 data8 0x8513B736C918B261, 0x00003FF5 //A16 = 1.0152970456990865810615917715e-03
645 data8 0xC790A1A2C78D8E17, 0x0000BFF5 //A15 =-1.5225598630329403515321688394e-03
646 data8 0x959706CFA638CDE2, 0x00003FF6 //A14 = 2.2825614575133879623648932383e-03
647 data8 0xE050A6021E129860, 0x0000BFF6 //A13 =-3.4227757733947066666295285936e-03
648 //A2 = 4.1481345368830113695679528973e-01 + 3.1252439808354284892632100000e-17
649 data8 0x3FDA8C4DBA620D56, 0x3C82040BCB483C76
650 //A1 = 3.2338397448885010387886751460e-02 + 3.4437825798552300531443100000e-18
651 data8 0x3FA08EA88EE561B1, 0x3C4FC366D6C64806
652 //A0 = 8.8622692545275794095971377828e-01 + 7.2689375867553992399219000000e-17
653 data8 0x3FEC5BF891B4EF6A, 0x3C94F3877D311C0C
654 data8 0xA8275AADC09D16FC, 0x00003FF7 //A12 = 5.1316445128621071486146117136e-03
655 data8 0xFBFE2CE9215267A2, 0x0000BFF7 //A11= -7.6902121820788373000579382408e-03
656 data8 0xBCC8EEAB67ECD91D, 0x00003FF8 //A10 = 1.1522515369164312742737727262e-02
657 data8 0x8D1614BB97E5E8C2, 0x0000BFF9 //A9 = -1.7222443097804730395560633583e-02
658 data8 0xD3A963578BE291E3, 0x00003FF9 //A8 = 2.5837606456090186343624210891e-02
659 data8 0x9BA7EAE64C42FDF7, 0x0000BFFA //A7 = -3.8001935555045161419575037512e-02
660 data8 0xF0115BA1A77607E7, 0x00003FFA //A6 = 5.8610303817173477119764956736e-02
662 // Polynomial coefficients for the tgammal(x), 1.75 <= |x| < 2.0
663 //A5 = 2.6698206874501426502654943818e-04 + 3.4033756836921062797887300000e-20
664 data8 0x3F317F3740FE2A68, 0x3BE417093234B06E
665 //A4 = 7.4249010753513894345090307070e-02 + 3.9810018444482764697014200000e-18
666 data8 0x3FB301FBB0F25A92, 0x3C525BEFFABB622F
667 //A3 = -8.1576919247086265851720554565e-02+ -5.2716624487804746360745000000e-19
668 data8 0xBFB4E239984650AC, 0xBC2372F1C4F276FF
669 data8 0xFEF3AEE71038E9A3, 0x00003FEB //A20 = 1.8995395865421509009969188571e-06
670 data8 0xA11CFA2672BF876A, 0x0000BFEB //A19 =-1.2003868221414015771269244270e-06
671 data8 0xF8E107215DAE2164, 0x00003FEC //A18 = 3.7085863210303833432006027217e-06
672 data8 0xBCDDD3FC011EF7D6, 0x00003FEC //A17 = 2.8143303971756051015245433043e-06
673 data8 0x8683C4687FA22E68, 0x00003FEE //A16 = 8.0177018464360416764308252462e-06
674 data8 0xFDA09E5D33E32968, 0x00003FEE //A15 = 1.5117372062443781157389064848e-05
675 data8 0xFFB00D0CFF4089B4, 0x00003FEF //A14 = 3.0480348961227424242198174995e-05
676 data8 0xFEF6C39566785085, 0x00003FF0 //A13 = 6.0788135974125244644334004947e-05
677 //A2 = 4.1184033042643969357854416558e-01 + 1.2103396182129232634761000000e-18
678 data8 0x3FDA5B978B96BEBF, 0x3C3653AAD0A139E4
679 //A1 = -4.2278433509846713445057275749e-01+ -4.9429151528135657430413000000e-18
680 data8 0xBFDB0EE6072093CE, 0xBC56CB907027554F
681 //A0 = 1.0000000000000000000000000000e+00 + 1.0969171200000000000000000000e-31
682 data8 0x3FF0000000000000, 0x3981CC6A5B20B4D5
683 data8 0xFF2B7BA9A8D68C37, 0x00003FF1 //A12 = 1.2167446884801403650547161615e-04
684 data8 0xFCA53468E3692EF1, 0x00003FF2 //A11 = 2.4094136329542400976250900707e-04
685 data8 0x808D698A9C993615, 0x00003FF4 //A10 = 4.9038845704938303659791698883e-04
686 data8 0xF10F8E3FB8BB4AFB, 0x00003FF4 //A9 = 9.1957383840999861214472423976e-04
687 data8 0x89E224E42F93F005, 0x00003FF6 //A8 = 2.1039333407187324139473634747e-03
688 data8 0xBAF374824937A323, 0x00003FF6 //A7 = 2.8526458211545152218493600470e-03
689 data8 0xB6BF7564F52140C6, 0x00003FF8 //A6 = 1.1154045718131014476684982178e-02
691 // Polynomial coefficients for the tgammal(x), 0.0 <= |x| < 0.125
692 //A5 = -9.8199506890314514073736518185e-01+ -5.9363811993837985890950900000e-17
693 data8 0xBFEF6C80EC38B67A, 0xBC911C46B447C81F
694 //A4 = 9.8172808683440015986576554496e-01 + 2.7457414262802803699834200000e-17
695 data8 0x3FEF6A51055096B5, 0x3C7FA7FF90ACAD1F
696 //A3 = -9.0747907608088618225394839101e-01 + -1.0676255850934306734701780000e-16
697 data8 0xBFED0A118F324B62, 0xBC9EC5AFB633438D
698 data8 0x9217E83FA207CB80, 0x00003FFD //A20 = 2.8533864762086088781083621561e-01
699 data8 0xA8DABFA52FDF03EC, 0x0000BFFE //A19= -6.5958783896337186303285832783e-01
700 data8 0xE331ED293AF39F9B, 0x00003FFE //A18 = 8.8748056656454687449654731184e-01
701 data8 0xF9163C5DDB52419D, 0x0000BFFE //A17= -9.7299554149078295602977718525e-01
702 data8 0xFEC0A1C672CB9265, 0x00003FFE //A16 = 9.9512683005268190987854104489e-01
703 data8 0xFFD2D65B8EA7B5F4, 0x0000BFFE //A15= -9.9931087241443958201592847861e-01
704 data8 0xFFF93AA39EE53445, 0x00003FFE //A14 = 9.9989668364186884793382816496e-01
705 data8 0xFFFB99A9A3F5F480, 0x0000BFFE //A13= -9.9993286506283835663204999212e-01
706 //A2 = 9.8905599532797250361682017683e-01 + 5.1778575360788420716540100000e-17
707 data8 0x3FEFA658C23B1578, 0x3C8DD92B45408D07
708 //A1 = -5.7721566490153275452712478000e-01+ -1.0607938730998824663273110000e-16
709 data8 0xBFE2788CFC6FB618, 0xBC9E9346F8FDE55B
710 //A0 = 9.9999999999999988897769753748e-01 + 1.1102230246251564036631420000e-16
711 data8 0x3FEFFFFFFFFFFFFF, 0x3C9FFFFFFFFFFFFF
712 data8 0xFFF7FEBB545812C1, 0x00003FFE //A12 = 9.9987785409425126648628395084e-01
713 data8 0xFFF00C02E943A3F2, 0x0000BFFE //A11= -9.9975657530855116454438747397e-01
714 data8 0xFFE0420AADC53820, 0x00003FFE //A10 = 9.9951565514290485919027183699e-01
715 data8 0xFFC01EB42EF27EEB, 0x0000BFFE //A9 = -9.9902526759155739377365522320e-01
716 data8 0xFF83DAD0BF23FF12, 0x00003FFE //A8 = 9.9810569378236378800364235948e-01
717 data8 0xFEF9F8ABDBCDB2F3, 0x0000BFFE //A7 = -9.9600176044241699109053158187e-01
718 data8 0xFE3F05375988491D, 0x00003FFE //A6 = 9.9314911462127599008937257662e-01
719 LOCAL_OBJECT_END(Constants_Tgammal_poly_splitted)
722 LOCAL_OBJECT_START(Constants_Tgammal_common)
723 // Positive overflow value
724 data8 0x3FE0000000000000 // 0.5
725 data8 0x3FF8000000000000 // 1.5
726 data8 0x3FD0000000000000 // 0.25
727 data8 0x0000000000000000 // 0
728 data8 0xDB718C066B352E21, 0x00004009 // Positive overflow value
729 LOCAL_OBJECT_END(Constants_Tgammal_common)
733 //=======================================================
736 // General Purpose Registers
738 GR_l_Log_Table1 = r34
750 GR_l_Stirling_Table = r45
751 GR_l_N_Unbiased = r46
753 // Floating Point Registers
819 FR_l_AbsX_m_Half = f80
831 FR_l_SignedXYH = f123
837 //=======================================================
838 // Negative part registers
840 // General Purpose Registers
844 // Float point registers
875 FR_n_SinxH = f113 // the same as FR_n_Poly1H
877 FR_n_SinxL = f114 // the same as FR_n_Poly1L
911 //=======================================================
914 // General Purpose Registers
932 GR_e_sig_inv_ln2 = r49
933 GR_e_rshf_2to51 = r50
934 GR_e_exp_2tom51 = r51
937 // Floating Point Registers
938 FR_e_RSHF_2TO51 = f10
939 FR_e_INV_LN2_2TO63 = f11
940 FR_e_W_2TO51_RSH = f12
970 FR_e_Wp1_T_scale = f58
972 FR_e_expl_Input_X = f123
973 FR_e_expl_Input_Y = f124
974 FR_e_expl_Output_X = f123
975 FR_e_expl_Output_Y = f124
978 FR_e_expl_Input_AbsX = f122
982 //=======================================================
985 // General Purpose Registers
987 GR_c_NegUnderflow = r54
988 GR_c_NegSingularity = r55
994 // Floating Point Registers
995 FR_c_PosOverflow = f123
999 //=======================================================
1000 // Polynomial part registers
1002 // General Purpose Registers
1009 GR_p_X_Sgnd = GR_l_signif_Z // = r37
1014 // Floating Point Registers
1015 FR_p_AbsX = FR_l_AbsX // = f127
1016 FR_p_IXN = FR_n_IXN // = f126
1086 FR_p_OddPoly0H = f56
1087 FR_p_OddPoly0L = f51
1092 //=======================================================
1093 // Negative polynomial part registers
1094 // General Purpose Registers
1095 GR_r_sin_Table = r47
1096 GR_r_sin_Table2 = r60
1098 // Floating Point Registers
1099 FR_r_IXNS = FR_n_IXNS
1102 FR_r_AbsX = FR_l_AbsX
1174 // General Purpose Registers
1176 GR_DenOverflow = r33
1183 // Floating Point Registers
1187 // ERROR HANDLER REGISTERS
1188 GR_Parameter_X = r64
1189 GR_Parameter_Y = r65
1190 GR_Parameter_RESULT = r66
1191 GR_Parameter_TAG = r67
1199 GLOBAL_LIBM_ENTRY(tgammal)
1201 alloc r32 = ar.pfs,0,32,4,0
1202 fabs FR_l_AbsX = f8 // Get absolute value of X
1203 addl GR_n_sin_Table = @ltoff(Constants_Tgammal_sin), gp
1206 addl GR_l_Log_Table=@ltoff(Constants_Tgammal_log_80_Z_G_H_h1#),gp
1208 addl GR_l_Stirling_Table = @ltoff(Constants_Tgammal_stirling), gp
1212 getf.sig GR_l_signif_Z = f8 // Significand of X
1213 fcvt.fx.s1 FR_n_IXNS = f8 // Convert to fixed point
1214 addl GR_c_Table = @ltoff(Constants_Tgammal_common), gp
1217 ld8 GR_l_Log_Table = [GR_l_Log_Table]
1219 addl GR_p_Table = @ltoff(Constants_Tgammal_poly), gp
1223 ld8 GR_n_sin_Table = [GR_n_sin_Table]
1224 fclass.m p6,p0 = f8,0x1EF // Check x for NaN, 0, INF, denorm
1226 addl GR_c_NegSingularity = 0x1003E, r0
1229 ld8 GR_l_Stirling_Table = [GR_l_Stirling_Table]
1230 movl GR_c_13 = 0x402A000000000000 // 13.0
1234 getf.d GR_c_X = f8 // Double prec. X to general register
1235 frcpa.s1 FR_z_Y0,p0 = f1,f8 // y = frcpa(x) (for negatives)
1236 extr.u GR_l_Index1 = GR_l_signif_Z, 59, 4 // = High 4 bits of Z
1239 ld8 GR_c_Table = [GR_c_Table]
1240 movl GR_c_SignBit = 0x8000000000000000 // High bit (sign)
1244 ld8 GR_p_Table = [GR_p_Table]
1245 fcmp.lt.s1 p15, p14 = f8,f0 // p14 - positive arg, p15 - negative
1246 shl GR_l_Index1 = GR_l_Index1,5 // Adjust Index1 ptr (x32)
1249 adds GR_c_NegUnderflow = 1765, r0
1251 (p6) br.cond.spnt tgammal_spec // Spec. values processing branch ////////////
1252 // (0s, INFs, NANs, NatVals, denormals) //////
1256 ldfpd FR_l_CH,FR_l_CL= [GR_l_Stirling_Table], 16 // Load CH, CL
1257 fcvt.fx.trunc.s1 FR_n_IXN = FR_l_AbsX // Abs arg to int by trunc
1258 extr.u GR_l_X_0 = GR_l_signif_Z, 49, 15 // High 15 bit of Z
1261 add GR_l_Index1 = GR_l_Index1,GR_l_Log_Table // Add offset
1262 fma.s1 FR_p_2 = f1, f1, f1 // 2.0
1263 andcm GR_c_X = GR_c_X, GR_c_SignBit // Remove sign
1267 addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_Z_G_H_h2#), gp
1268 fcmp.lt.s1 p10, p0 = FR_l_AbsX, f1 // If |X|<1 then p10 = 1
1272 ld2 GR_l_Z_1 = [GR_l_Index1],4 // load Z_1 from Index1
1273 movl GR_l_BIAS = 0x000000000000FFFF // Bias for exponent
1277 ld8 GR_l_Log_Table = [GR_l_Log_Table]
1278 frcpa.s1 FR_l_Y0, p0 = f1, FR_l_AbsX // y = frcpa(x)
1282 ldfs FR_l_G_1 = [GR_l_Index1],4 // Load G_1
1283 fsub.s1 FR_l_W = FR_l_AbsX, f1 // W = |X|-1
1288 getf.exp GR_l_N_Unbiased= FR_l_AbsX // exponent of |X|
1289 fmerge.se FR_l_S = f1, FR_l_AbsX // S = merging of X and 1.0
1290 cmp.gtu p11, p0 = GR_c_13, GR_c_X // If 1 <= |X| < 13
1294 ldfs FR_l_H_1 = [GR_l_Index1],8 // Load H_1
1295 fcvt.xf FR_n_XNS = FR_n_IXNS // Convert to FP repr. of int X
1296 (p10) br.cond.spnt tgamma_lt_1 // Branch to |X| < 1 path ///////////////////
1300 ldfpd FR_n_A2H, FR_n_A2L = [GR_n_sin_Table], 16
1302 pmpyshr2.u GR_l_X_1 = GR_l_X_0,GR_l_Z_1,15 // Adjust Index2 (x32)
1305 ldfe FR_l_B2 = [GR_l_Stirling_Table], 16
1307 (p11) br.cond.spnt tgamma_lt_13 // Branch to 1 <= |X| < 13 path ///////////////
1311 ldfe FR_l_h_1 = [GR_l_Index1],0
1313 sub GR_l_N = GR_l_N_Unbiased, GR_l_BIAS // N - BIAS
1316 ldfpd FR_l_B4,FR_l_B6= [GR_l_Stirling_Table], 16 // Load C
1317 (p15) cmp.geu.unc p8,p0 = GR_l_N_Unbiased, GR_c_NegSingularity
1318 (p8) br.cond.spnt tgammal_singularity // Singularity for arg < to -2^63 //////
1322 (p15) ldfpd FR_n_A1H, FR_n_A1L = [GR_n_sin_Table], 16
1323 ldfpd FR_l_B8, FR_l_B10 = [GR_l_Stirling_Table], 16
1324 add GR_c_Table = 0x20, GR_c_Table
1328 (p15) ldfe FR_n_A9 = [GR_n_sin_Table], 16
1329 fma.s1 FR_l_Q0 = f1,FR_l_Y0,f0 // Q0 = Y0
1333 ldfpd FR_l_B12, FR_l_B14 = [GR_l_Stirling_Table], 16
1334 fnma.s1 FR_l_E0 = FR_l_Y0,FR_l_AbsX,f1 // e = 1-b*y
1339 (p15) ldfe FR_n_A8 = [GR_n_sin_Table], 16
1340 fcvt.xf FR_c_XN = FR_n_IXN // Convert to FP repr. of int X
1341 extr.u GR_l_Index2 = GR_l_X_1, 6, 4 // Extract Index2
1344 ldfpd FR_l_B16, FR_l_B18 = [GR_l_Stirling_Table], 16
1350 (p15) ldfe FR_n_A7 = [GR_n_sin_Table], 16
1351 fms.s1 FR_l_CXH = FR_l_CH, f1, FR_l_AbsX // CXH = CH+|X|
1352 shl GR_l_Index2 = GR_l_Index2,5
1355 ldfd FR_l_Half = [GR_l_Stirling_Table] // Load 0.5
1361 add GR_l_Index2 = GR_l_Index2, GR_l_Log_Table // Add offset
1366 (p15) ldfe FR_n_A6 = [GR_n_sin_Table], 16
1367 (p15) fma.s1 FR_n_XS = FR_l_AbsX , f1, FR_n_XNS // xs = x - int(x)
1372 ld2 GR_l_Z_2 = [GR_l_Index2],4
1373 addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_h3_G_H#),gp
1378 ld8 GR_l_Log_Table = [GR_l_Log_Table]
1379 fma.s1 FR_l_E2 = FR_l_E0,FR_l_E0,FR_l_E0 // e2 = e+e^2
1383 ldfs FR_l_G_2 = [GR_l_Index2],4
1384 fma.s1 FR_l_E1 = FR_l_E0,FR_l_E0,f0 // e1 = e^2
1389 ldfs FR_l_H_2 = [GR_l_Index2],8
1390 (p15) ldfe FR_n_A5 = [GR_n_sin_Table], 16
1395 setf.sig FR_l_float_N = GR_l_N // float_N = Make N a fp number
1397 pmpyshr2.u GR_l_X_2 = GR_l_X_1,GR_l_Z_2,15 // X_2 = X_1 * Z_2
1400 ldfe FR_l_h_2 = [GR_l_Index2],0
1401 fma.s1 FR_l_CXL = FR_l_AbsX, f1, FR_l_CXH // CXL = |X|+CXH
1402 add GR_l_Log_Table1= 0x200, GR_l_Log_Table
1406 (p15) ldfe FR_n_A4 = [GR_n_sin_Table], 16
1407 (p15) fcmp.eq.unc.s1 p9,p0 = FR_l_AbsX, FR_c_XN //if argument is integer
1412 ldfe FR_c_PosOverflow = [GR_c_Table],16 //Load pos overflow value
1413 (p15) fma.s1 FR_n_XS2 = FR_n_XS, FR_n_XS, f0 // xs^2 = xs*xs
1418 (p15) ldfe FR_n_A3 = [GR_n_sin_Table], 16
1424 (p15) getf.sig GR_n_XN = FR_n_IXN // int(x) to general reg
1425 fma.s1 FR_l_Y1 = FR_l_Y0,FR_l_E2,FR_l_Y0 // y1 = y+y*e2
1430 fma.s1 FR_l_E3 = FR_l_E1,FR_l_E1,FR_l_E0 // e3 = e+e1^2
1431 (p9) br.cond.spnt tgammal_singularity // Singularity for integer /////////////
1432 // and negative arguments //////////////
1437 fms.s1 FR_l_AbsX_m_Half = FR_l_AbsX, f1, FR_l_Half // |x|-0.5
1438 extr.u GR_l_Index2 = GR_l_X_2, 1, 5 // Get Index3
1442 shladd GR_l_Log_Table1= GR_l_Index2, 2, GR_l_Log_Table1
1444 shladd GR_l_Index3 = GR_l_Index2,4, GR_l_Log_Table // Index3
1447 (p15) cmp.gtu.unc p11, p0 = GR_n_XN, GR_c_NegUnderflow // X < -1765
1448 fms.s1 FR_l_CXL = FR_l_CH, f1, FR_l_CXL // CXL = CH - CXL
1449 (p11) br.cond.spnt tgammal_underflow // Singularity for negative argument //////
1450 // at underflow domain (X < -1765) //////
1454 addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_Q#), gp
1455 (p15) fma.s1 FR_n_TT = FR_n_A2L, FR_n_XS2, f0 // T=A2L*x^2
1456 tbit.nz.unc p13, p12 = GR_n_XN, 0x0 // whether [X] odd or even
1460 (p15) fms.s1 FR_n_XS2L = FR_n_XS, FR_n_XS, FR_n_XS2 // xs^2 Low part
1465 ld8 GR_l_Log_Table = [GR_l_Log_Table]
1466 (p15) fma.s1 FR_n_A7 = FR_n_A8, FR_n_XS2, FR_n_A7 // poly tail
1470 ldfe FR_l_h_3 = [GR_l_Index3],12
1471 (p15) fma.s1 FR_n_XS4 = FR_n_XS2, FR_n_XS2, f0 // xs^4 = xs^2*xs^2
1476 ldfs FR_l_H_3 = [GR_l_Log_Table1], 0
1477 fma.s1 FR_l_Y2 = FR_l_Y1, FR_l_E3, FR_l_Y0 // y2 = y+y1*e3
1481 ldfs FR_l_G_3 = [GR_l_Index3], 0
1482 fnma.s1 FR_l_Z = FR_l_AbsX,FR_l_Q0,f1 // r = a-b*q
1488 fmpy.s1 FR_l_G = FR_l_G_1, FR_l_G_2 // G = G1 * G_2
1493 fadd.s1 FR_l_H = FR_l_H_1, FR_l_H_2 // H = H_1 + H_2
1498 ldfe FR_l_log2_hi = [GR_l_Log_Table],16 // load log2_hi part
1499 fadd.s1 FR_l_h = FR_l_h_1, FR_l_h_2 // h = h_1 + h_2
1504 fcvt.xf FR_l_float_N = FR_l_float_N // int(N)
1509 ldfe FR_l_log2_lo = [GR_l_Log_Table],16 // Load log2_lo part
1510 fma.s1 FR_l_CXL = FR_l_CXL, f1, FR_l_CL
1515 (p15) fma.s1 FR_n_TT = FR_n_A2H, FR_n_XS2L, FR_n_TT // T=A2H*x2L+T
1520 ldfe FR_l_Q_6 = [GR_l_Log_Table],16
1521 (p15) fma.s1 FR_n_A3 = FR_n_A4, FR_n_XS2, FR_n_A3 // poly tail
1526 (p15) fma.s1 FR_n_A5 = FR_n_A6, FR_n_XS2, FR_n_A5 // poly tail
1531 ldfe FR_l_Q_5 = [GR_l_Log_Table],16
1532 (p15) fabs FR_n_XS = FR_n_XS // abs(xs)
1537 fma.s1 FR_l_Z = FR_l_Z,FR_l_Y2,FR_l_Q0 // x_hi = q+r*y2
1542 ldfe FR_l_Q_4 = [GR_l_Log_Table],16
1543 (p15) fma.s1 FR_n_A7 = FR_n_A9, FR_n_XS4, FR_n_A7 // poly tail
1548 (p15) fma.s1 FR_n_XS7 = FR_n_XS4, FR_n_XS2, f0 // = x^4*x^2
1553 ldfe FR_l_Q_3 = [GR_l_Log_Table],16
1554 fneg FR_n_NegOne = f1 // -1.0
1559 (p15) fma.s1 FR_n_XS8 = FR_n_XS4, FR_n_XS4, f0 // xs^8 = xs^4*xs^4
1564 ldfe FR_l_Q_2 = [GR_l_Log_Table],16
1565 fadd.s1 FR_l_h = FR_l_h, FR_l_h_3 // h = h_1 + h_2 + h_3
1570 (p15) fma.s1 FR_n_TH = FR_n_A2H, FR_n_XS2, FR_n_TT // A2H*xs2+T
1575 ldfe FR_l_Q_1 = [GR_l_Log_Table],16
1576 fmpy.s1 FR_l_G = FR_l_G, FR_l_G_3 // G = G_1 * G_2 * G_3
1581 fadd.s1 FR_l_H = FR_l_H, FR_l_H_3 // H = H_1 + H_2 + H_3
1587 fma.s1 FR_l_Z2 = FR_l_Z, FR_l_Z, f0 // Z^2
1592 (p15) fma.s1 FR_n_A3 = FR_n_A5, FR_n_XS4, FR_n_A3 // poly tail
1598 (p14) fcmp.gt.unc.s1 p7,p0 = FR_l_AbsX, FR_c_PosOverflow //X > 1755.5483
1599 // (overflow domain, result cannot be represented by normal value)
1604 (p15) fma.s1 FR_n_XS7 = FR_n_XS7, FR_n_XS, f0 // x^7 construction
1610 (p15) fms.s1 FR_n_TL = FR_n_A2H, FR_n_XS2, FR_n_TH // A2H*xs2+TH
1615 (p15) fma.s1 FR_n_PolyH = FR_n_TH, f1, FR_n_A1H // PolyH=TH+A1H
1621 fmpy.s1 FR_l_GS_hi = FR_l_G, FR_l_S // GS_hi = G*S
1626 fms.s1 FR_l_r = FR_l_G, FR_l_S, f1 // r = G*S -1
1627 (p7) br.cond.spnt tgammal_overflow // Overflow path for arg > 1755.5483 //////
1632 fma.s1 FR_l_B14 = FR_l_B16, FR_l_Z2, FR_l_B14// bernulli tail
1637 fma.s1 FR_l_Z4 = FR_l_Z2, FR_l_Z2, f0 // Z^4 = Z^2*Z^2
1643 fma.s1 FR_l_B2 = FR_l_B4, FR_l_Z2, FR_l_B2 // bernulli tail
1648 fma.s1 FR_l_B6 = FR_l_B8, FR_l_Z2, FR_l_B6 // bernulli tail
1654 fma.s1 FR_l_B10 = FR_l_B12, FR_l_Z2, FR_l_B10// bernulli tail
1659 (p15) fma.s1 FR_n_Tail = FR_n_A7, FR_n_XS8, FR_n_A3 // poly tail
1665 (p15) fma.s1 FR_n_TL = FR_n_TL, f1, FR_n_TT // TL = TL+T
1670 (p15) fms.s1 FR_n_PolyL = FR_n_A1H, f1, FR_n_PolyH // polyH+A1H
1676 fma.s1 FR_l_poly_lo = FR_l_r, FR_l_Q_6, FR_l_Q_5 // Q_5+r*Q_6
1681 fsub.s1 FR_l_r_cor = FR_l_GS_hi, f1 // r_cor = GS_hi -1
1687 fms.s1 FR_l_GS_lo = FR_l_G, FR_l_S, FR_l_GS_hi // G*S-GS_hi
1692 fma.s1 FR_l_poly = FR_l_r, FR_l_Q_2, FR_l_Q_1 //poly=r*Q2+Q1
1698 fmpy.s1 FR_l_rsq = FR_l_r, FR_l_r // rsq = r * r
1703 fma.s1 FR_l_G = FR_l_float_N, FR_l_log2_hi, FR_l_H // Tbl =
1704 // float_N*log2_hi + H
1710 fma.s1 FR_l_Y_lo = FR_l_float_N, FR_l_log2_lo, FR_l_h // Y_lo=
1711 // float_N*log2_lo + h
1716 fma.s1 FR_l_B14 = FR_l_B18, FR_l_Z4, FR_l_B14 //bernulli tail
1722 fma.s1 FR_l_B2 = FR_l_B6, FR_l_Z4, FR_l_B2 //bernulli tail
1727 fma.s1 FR_l_Z8 = FR_l_Z4, FR_l_Z4, f0 //bernulli tail
1733 fma.s1 FR_l_poly_lo = FR_l_r, FR_l_poly_lo, FR_l_Q_4 // poly_lo =
1734 // Q_4 + r * poly_lo
1739 fsub.s1 FR_l_r_cor = FR_l_r_cor, FR_l_r // r_cor = r_cor - r
1745 (p15) fma.s1 FR_n_PolyL = FR_n_PolyL, f1, FR_n_TH // polyL+TH
1750 (p15) fma.s1 FR_n_TT = FR_n_TL, f1, FR_n_A1L // TL+A1L
1756 fadd.s1 FR_l_logl_YHi = FR_l_G, FR_l_r // Y_hi = Tbl + r
1762 fma.s1 FR_l_B10 = FR_l_B14, FR_l_Z4, FR_l_B10 //bernulli tail
1768 fma.s1 FR_l_poly_lo = FR_l_r, FR_l_poly_lo, FR_l_Q_3 // poly_lo =
1769 // Q_3 + r * poly_lo
1774 fadd.s1 FR_l_r_cor = FR_l_r_cor, FR_l_GS_lo // r_cor=r_cor+GS_lo
1780 (p15) fma.s1 FR_n_PolyL = FR_n_PolyL, f1, FR_n_TT // polyL+TT
1786 fsub.s1 FR_l_Y_lo_res = FR_l_G, FR_l_logl_YHi // Y_lo = Tbl - Y_hi
1791 fma.s1 FR_l_XYH = FR_l_logl_YHi, FR_l_AbsX_m_Half, f0 // XYH=
1798 fma.s1 FR_l_SS = FR_l_B10, FR_l_Z8, FR_l_B2 // bernulli tail
1804 fadd.s1 FR_l_r_cor = FR_l_r_cor, FR_l_Y_lo // r_cor = r_cor+Y_lo
1809 fma.s1 FR_l_poly = FR_l_rsq, FR_l_poly_lo, FR_l_poly //poly=
1816 (p15) fma.s1 FR_n_TT = FR_n_PolyL, FR_n_XS2, f0 // T=polyL*xs^2
1822 fadd.s1 FR_l_Y_lo = FR_l_Y_lo_res, FR_l_r // Y_lo = Y_lo + r
1827 fms.s1 FR_l_XYL = FR_l_logl_YHi, FR_l_AbsX_m_Half, FR_l_XYH
1828 // XYL = YHi*|x-0.5|-XYH
1834 fma.s1 FR_l_SSCXH = FR_l_SS, FR_l_Z, FR_l_CXH // SS*Z+CXH
1838 mov GR_e_exp_2tom51= 0xffff-51 // 2^-51
1839 (p15) fma.s1 FR_l_SignedXYH = FR_l_XYH, FR_n_NegOne, f0 // XYH = -XYH
1846 movl GR_e_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
1850 movl GR_e_sig_inv_ln2 = 0xb8aa3b295c17f0bc //significand of 1/ln2
1855 fma.s1 FR_l_poly = FR_l_rsq, FR_l_poly, FR_l_r_cor // poly =
1856 // rsq * poly + r_cor
1861 addl GR_e_ad_Arg = @ltoff(Constants_Tgammal_exp_64_Arg#),gp
1862 (p15) fma.s1 FR_n_TT = FR_n_PolyH, FR_n_XS2L, FR_n_TT
1863 mov GR_e_exp_mask = 0x1FFFF // Form exponent mask
1867 movl GR_e_rshf = 0x43e8000000000000 // 1.10000 2^63 rshift
1872 setf.sig FR_e_INV_LN2_2TO63 = GR_e_sig_inv_ln2 // form 1/ln2 * 2^63
1873 setf.d FR_e_RSHF_2TO51 = GR_e_rshf_2to51 // 1.1000 * 2^(63+51)
1879 fms.s1 FR_l_SSCXL = FR_l_CXH, f1, FR_l_SSCXH // CXH+SS*CXH
1884 fma.s1 FR_e_expl_Input_AbsX = FR_l_XYH, f1, FR_l_SSCXH // HI EXP
1888 .pred.rel "mutex",p14,p15
1891 (p14) fma.s1 FR_e_expl_Input_X = FR_l_XYH, f1, FR_l_SSCXH // HI EXP
1892 mov GR_e_exp_bias = 0x0FFFF // Set exponent bias
1895 ld8 GR_e_ad_Arg = [GR_e_ad_Arg] // Point to Arg table
1896 (p15) fms.s1 FR_e_expl_Input_X = FR_l_SignedXYH, f1, FR_l_SSCXH // HI EXP
1902 fadd.s1 FR_l_logl_YLo = FR_l_Y_lo, FR_l_poly // YLo = YLo+poly
1907 setf.exp FR_e_2TOM51 = GR_e_exp_2tom51 //2^-51 for scaling float_N
1908 (p15) fma.s1 FR_n_TH = FR_n_PolyH, FR_n_XS2, FR_n_TT // TH=
1913 setf.d FR_e_RSHF = GR_e_rshf // Right shift const 1.1000*2^63
1919 add GR_e_ad_A = 0x20, GR_e_ad_Arg // Point to A table
1921 add GR_e_ad_T1 = 0x50, GR_e_ad_Arg // Point to T1 table
1924 add GR_e_ad_T2 = 0x150, GR_e_ad_Arg // Point to T2 table
1931 fma.s1 FR_l_SSCXL = FR_l_SS, FR_l_Z, FR_l_SSCXL
1936 fms.s1 FR_e_expl_Input_Y = FR_l_XYH, f1, FR_e_expl_Input_AbsX
1941 ldfe FR_e_L_hi = [GR_e_ad_Arg],16 // Get L_hi
1948 fma.s1 FR_l_XYL = FR_l_logl_YLo, FR_l_AbsX_m_Half, FR_l_XYL
1949 // XYL = YLo*|x-0.5|+XYL
1954 ldfe FR_e_L_lo = [GR_e_ad_Arg],16 // Get L_lo
1955 (p15) fms.s1 FR_n_TL = FR_n_PolyH, FR_n_XS2, FR_n_TH // TL =
1957 add GR_e_ad_W1 = 0x100, GR_e_ad_T2 // Point to W1 table
1961 (p15) fma.s1 FR_n_Poly1H = FR_n_TH, f1, f1 // poly1H = TH+1
1962 add GR_e_ad_W2 = 0x300, GR_e_ad_T2 // Point to W2 table
1966 getf.exp GR_e_signexp_x = FR_e_expl_Input_X // Extract sign and exp
1967 ldfe FR_e_A3 = [GR_e_ad_A],16 // Get A3
1973 fma.s1 FR_l_SSCXL = FR_l_SSCXL, f1, FR_l_CXL
1978 fma.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_SSCXH
1984 fma.s1 FR_e_N_signif=FR_e_expl_Input_X,FR_e_INV_LN2_2TO63,FR_e_RSHF_2TO51
1985 and GR_e_exp_x = GR_e_signexp_x, GR_e_exp_mask
1989 sub GR_e_exp_x = GR_e_exp_x, GR_e_exp_bias // Get exponent
1990 ldfe FR_e_A2 = [GR_e_ad_A],16 // Get A2 for main path
1996 (p15) fma.s1 FR_n_PolyH = FR_n_Poly1H, FR_n_XS, f0//sin(Pi*x) poly
2001 (p15) fms.s1 FR_n_Poly1L = f1, f1, FR_n_Poly1H//sin(Pi*x) poly
2007 (p15) fma.s1 FR_n_TL = FR_n_TL, f1, FR_n_TT//sin(Pi*x) poly
2013 fma.s1 FR_l_Temp = FR_l_XYL, f1, FR_l_SSCXL // XYL+SS*CXL
2018 (p15) fma.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, FR_n_NegOne, f0
2019 // Negate lo part of exp argument for negative input values
2024 ldfe FR_e_A1 = [GR_e_ad_A],16 // Get A1
2030 fms.s1 FR_e_float_N = FR_e_N_signif, FR_e_2TOM51, FR_e_RSHF
2031 // Get float N = signd*2^51-RSHIFTER
2037 (p15) fma.s1 FR_n_Poly1L = FR_n_Poly1L, f1, FR_n_TH //sin(Pi*x) poly
2042 (p15) fms.s1 FR_n_PolyL = FR_n_Poly1H, FR_n_XS, FR_n_PolyH//sin(Pi*x)
2047 getf.sig GR_e_N_fix = FR_e_N_signif // Get N from significand
2052 .pred.rel "mutex",p14,p15
2055 (p14) fma.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_Temp
2060 (p15) fms.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_Temp
2061 // arguments for exp computation
2067 fnma.s1 FR_e_r = FR_e_L_hi, FR_e_float_N, FR_e_expl_Input_X
2068 // r = -L_hi * float_N + x
2069 extr.u GR_e_M1 = GR_e_N_fix, 6, 6 // Extract index M_1
2074 (p15) fma.s1 FR_n_Poly1L = FR_n_Poly1L, f1, FR_n_TL //sin(Pi*x) poly
2082 fma.s1 FR_e_r = FR_e_r, f1, FR_e_expl_Input_Y
2083 // r = r + FR_e_expl_Input_Y
2087 shladd GR_e_ad_W1 = GR_e_M1,3,GR_e_ad_W1 // Point to W1
2088 shladd GR_e_ad_T1 = GR_e_M1,2,GR_e_ad_T1 // Point to T1
2089 extr.u GR_e_M2 = GR_e_N_fix, 0, 6 // Extract index M_2
2094 ldfs FR_e_T1 = [GR_e_ad_T1],0 // Get T1
2096 extr GR_e_K = GR_e_N_fix, 12, 32 //Extract limit range K
2099 shladd GR_e_ad_T2 = GR_e_M2,2,GR_e_ad_T2 // Point to T2
2100 (p15) fma.s1 FR_n_PolyL = FR_n_Poly1L, FR_n_XS, FR_n_PolyL
2102 shladd GR_e_ad_W2 = GR_e_M2,3,GR_e_ad_W2 // Point to W2
2106 ldfs FR_e_T2 = [GR_e_ad_T2],0 // Get T2
2108 add GR_e_exp_2_k = GR_e_exp_bias, GR_e_K // exp of 2^k
2111 ldfd FR_e_W1 = [GR_e_ad_W1],0 // Get W1
2113 sub GR_e_exp_2_mk = GR_e_exp_bias, GR_e_K // exp of 2^-k
2117 ldfd FR_e_W2 = [GR_e_ad_W2],0 // Get W2
2123 setf.exp FR_e_scale = GR_e_exp_2_k // Set scale = 2^k
2124 setf.exp FR_e_2_mk = GR_e_exp_2_mk // Form 2^-k
2125 fnma.s1 FR_e_r = FR_e_L_lo, FR_e_float_N, FR_e_r
2126 // r = -L_lo * float_N + r
2131 (p15) fma.s1 FR_n_PolyL = FR_n_Tail, FR_n_XS7, FR_n_PolyL
2138 fma.s1 FR_e_poly = FR_e_r, FR_e_A3, FR_e_A2 // poly=r*A3+A2
2143 fmpy.s1 FR_e_rsq = FR_e_r, FR_e_r // rsq = r * r
2149 fmpy.s1 FR_e_T = FR_e_T1, FR_e_T2 // T = T1 * T2
2154 fadd.s1 FR_e_W1_p1 = FR_e_W1, f1 // W1_p1 = W1 + 1.0
2160 (p15) fma.s1 FR_n_TT = FR_n_PolyL, FR_l_AbsX, f0 //sin(Pi*x) poly
2166 fma.s1 FR_e_poly = FR_e_r, FR_e_poly, FR_e_A1
2167 // poly = r * poly + A1
2173 fma.s1 FR_e_T_scale = FR_e_T, FR_e_scale, f0 // T_scale=T*scale
2178 fma.s1 FR_e_W = FR_e_W2, FR_e_W1_p1, FR_e_W1
2179 // W = W2 * (W1+1.0) + W1
2185 (p15) fma.s1 FR_n_SinxH = FR_n_PolyH, FR_l_AbsX, FR_n_TT
2192 mov FR_e_Y_hi = FR_e_T // Assume Y_hi = T
2198 fma.s1 FR_e_poly = FR_e_rsq, FR_e_poly, FR_e_r
2199 // poly = rsq * poly + r
2205 fma.s1 FR_e_Wp1_T_scale = FR_e_W, FR_e_T_scale, FR_e_T_scale
2211 fma.s1 FR_e_W_T_scale = FR_e_W, FR_e_T_scale, f0 // W*T*scale
2217 (p15) fms.s1 FR_n_SinxL = FR_n_PolyH, FR_l_AbsX, FR_n_SinxH
2224 (p15) frcpa.s1 FR_n_Y0, p0 = f1, FR_n_SinxH // y = frcpa(b)
2230 fma.s1 FR_e_result_lo = FR_e_Wp1_T_scale, FR_e_poly, FR_e_W_T_scale
2231 // Low part of exp result
2237 (p15) fma.s1 FR_n_SinxL = FR_n_SinxL, f1, FR_n_TT // sin low result
2243 (p15) fma.s1 FR_n_Q0 = f1,FR_n_Y0,f0 // q = y
2248 (p15) fnma.s1 FR_n_E0 = FR_n_Y0, FR_n_SinxH, f1 // e = 1-b*y
2255 (p14) fma.s0 f8 = FR_e_Y_hi, FR_e_scale, FR_e_result_lo
2256 (p14) br.ret.spnt b0 // Exit for positive Stirling path //////////////////////
2261 fma.s1 FR_e_expl_Output_X = FR_e_Y_hi, FR_e_scale, f0 // exp result
2266 fma.s1 FR_e_expl_Output_Y = FR_e_result_lo, f1, f0// exp lo result
2272 fma.s1 FR_n_E2 = FR_n_E0,FR_n_E0,FR_n_E0 // e2 = e+e^2
2277 fma.s1 FR_n_E1 = FR_n_E0,FR_n_E0,f0 // e1 = e^2
2283 fma.s1 FR_n_Y1 = FR_n_Y0,FR_n_E2,FR_n_Y0 // y1 = y+y*e2
2288 fma.s1 FR_n_E3 = FR_n_E1,FR_n_E1,FR_n_E0 // e3 = e+e1^2
2294 fma.s1 FR_n_Y2 = FR_n_Y1,FR_n_E3,FR_n_Y0 // y2 = y+y1*e3
2299 fnma.s1 FR_n_R0 = FR_n_SinxH,FR_n_Q0,f1 // r = a-b*q
2305 fnma.s1 FR_n_E4 = FR_n_SinxH,FR_n_Y2,f1 // e4 = 1-b*y2
2310 fma.s1 FR_n_RcpResH = FR_n_R0,FR_n_Y2,FR_n_Q0 // x = q+r*y2
2316 fma.s1 FR_n_Y3 = FR_n_Y2,FR_n_E4,FR_n_Y2 // y3 = y2+y2*e4
2321 fnma.s1 FR_n_R1 = FR_n_SinxH,FR_n_RcpResH,f1 // r1 = a-b*x
2327 fnma.s1 FR_n_R1 = FR_n_SinxL,FR_n_RcpResH,FR_n_R1
2334 fma.s1 FR_n_RcpResL = FR_n_R1,FR_n_Y3,f0 // x_lo = r1*y3
2339 fma.s1 FR_n_Temp = FR_n_RcpResH, FR_e_expl_Output_Y, f0
2340 // Multiplying exp and sin result
2346 fma.s1 FR_n_Temp = FR_n_RcpResL, FR_e_expl_Output_X, FR_n_Temp
2347 // Multiplying exp and sin result
2353 fma.s1 FR_n_ResH = FR_n_RcpResH, FR_e_expl_Output_X, FR_n_Temp
2354 // Multiplying exp and sin result
2360 fms.s1 FR_n_ResL = FR_n_RcpResH, FR_e_expl_Output_X, FR_n_ResH
2361 // Multiplying exp and sin result
2366 (p12) fma.s1 FR_n_ResH = FR_n_ResH, FR_n_NegOne, f0 // Negate
2372 fma.s1 FR_n_ResL = FR_n_ResL, f1, FR_n_Temp
2373 // Multiplying exp and sin result - low result obtained
2377 .pred.rel "mutex",p12,p13
2380 (p13) fma.s0 f8 = FR_n_ResH, f1, FR_n_ResL // For odd
2385 (p12) fms.s0 f8 = FR_n_ResH, f1, FR_n_ResL // For even
2386 br.ret.sptk b0 // Exit for negative Stirling path //////////////////////
2390 //////////// 1 <= |X| < 13 path ////////////////////////////////////////////////
2391 //------------------------------------------------------------------------------
2395 getf.sig GR_p_XN = FR_p_IXN // Get significand
2396 fcvt.xf FR_p_XN = FR_p_IXN // xn = [x]
2397 add GR_r_sin_Table2= 0x40, GR_r_sin_Table // Shifted table addr.
2400 ldfpd FR_p_0p5, FR_p_1p5 = [GR_c_Table], 16 // 0.5 & 1.5
2401 fms.s1 FR_p_AbsXM1 = FR_p_AbsX, f1, f1 // X-1
2402 add GR_p_Table2 = 0xB0, GR_p_Table
2406 add GR_r_sin_Table = -16, GR_r_sin_Table // For compensation
2407 fcvt.xf FR_r_XNS = FR_r_IXNS // Convert int repr to float
2408 shr.u GR_p_X_Sgnd = GR_p_X_Sgnd, 59 // Get only 5 bit of signd
2412 ldfpd FR_r_A2H,FR_r_A2L = [GR_r_sin_Table], 16 // Load A2
2414 add GR_p_Int = -2, GR_p_XN // int = int - 2
2417 ldfe FR_r_A6 = [GR_r_sin_Table2], 16
2419 cmp.gtu p11, p12 = 0x2, GR_p_XN // p11: x < 2 (splitted intervals),
2420 // p12: x > 2 (base intervals)
2424 ldfpd FR_r_A1H, FR_r_A1L = [GR_r_sin_Table], 16
2426 shr GR_p_Int = GR_p_Int, 1 // int/2
2429 ldfe FR_r_A5 = [GR_r_sin_Table2], 16
2431 (p11) cmp.gtu.unc p10, p11 = 0x1C, GR_p_X_Sgnd // sgnd(x) < 0.75
2435 ldfe FR_r_A9 = [GR_r_sin_Table], 16
2437 shl GR_p_Offset = GR_p_Int, 4 // offset = int*16
2440 ldfe FR_r_A4 = [GR_r_sin_Table2], 16
2442 (p10) cmp.gtu.unc p9, p10 = 0x14, GR_p_X_Sgnd // sgnd(x) < 0.25
2447 ldfe FR_r_A8 = [GR_r_sin_Table], 16
2449 (p12) tbit.nz.unc p13, p12 = GR_p_XN, 0x0 // p13: reccurent computations
2450 // X is at [3;4], [5;6], [7;8]... interval
2453 ldfe FR_r_A3 = [GR_r_sin_Table2], 16
2455 shladd GR_p_Offset = GR_p_Int, 2, GR_p_Offset // +int*4
2458 .pred.rel "mutex",p9,p11
2460 add GR_p_Offset = GR_p_Int, GR_p_Offset
2461 // +int, so offset = int*21
2462 (p9) fms.s1 FR_p_XR = FR_p_AbsX, f1, f1 // r = x-1
2466 ldfe FR_r_A7 = [GR_r_sin_Table], 16
2467 (p11) fms.s1 FR_p_XR = FR_p_2, f1, FR_p_AbsX
2468 // r = 2-x for 1.75 < x < 2
2472 .pred.rel "mutex",p9,p10
2473 .pred.rel "mutex",p10,p11
2474 .pred.rel "mutex",p9,p11
2476 (p9) add GR_p_Offset = 126, r0 // 1.0 < x < 1.25 table
2477 (p15) fcmp.eq.unc.s1 p7,p0 = FR_p_AbsX, FR_p_XN
2478 // If arg is integer and negative - singularity branch
2482 (p10) add GR_p_Offset = 147, r0 // 1.25 < x < 1.75 table
2484 (p11) add GR_p_Offset = 168, r0 // 1.75 < x < 2.0 table
2488 shladd GR_p_Table = GR_p_Offset, 4, GR_p_Table
2489 shladd GR_p_Table2 = GR_p_Offset, 4, GR_p_Table2
2490 fma.s1 FR_r_XS = FR_r_AbsX , f1, FR_r_XNS // xs = x - [x]
2494 ldfpd FR_p_A5H, FR_p_A5L = [GR_p_Table], 16
2495 ldfpd FR_p_A2H, FR_p_A2L = [GR_p_Table2], 16
2496 (p7) br.cond.spnt tgammal_singularity // Singularity for integer /////////////
2497 // and negative argument ///////////////
2501 ldfpd FR_p_A4H, FR_p_A4L = [GR_p_Table], 16
2502 fma.s1 FR_p_XN = FR_p_XN, f1, FR_p_0p5 // xn = xn+0.5
2506 ldfpd FR_p_A1H, FR_p_A1L = [GR_p_Table2], 16
2507 (p10) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_1p5 // r = x - 1.5
2512 ldfpd FR_p_A3H, FR_p_A3L = [GR_p_Table], 16
2513 ldfpd FR_p_A0H, FR_p_A0L = [GR_p_Table2], 16
2518 ldfe FR_p_A20 = [GR_p_Table], 16
2519 ldfe FR_p_A12 = [GR_p_Table2], 16
2524 ldfe FR_p_A19 = [GR_p_Table], 16
2525 ldfe FR_p_A11 = [GR_p_Table2], 16
2526 fma.s1 FR_r_XS2 = FR_r_XS, FR_r_XS, f0 // xs2 = xs*xs
2530 ldfe FR_p_A18 = [GR_p_Table], 16
2531 ldfe FR_p_A10 = [GR_p_Table2], 16
2535 .pred.rel "mutex",p12,p13
2537 ldfe FR_p_A17 = [GR_p_Table], 16
2538 (p12) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_XN // r = x - xn
2542 ldfe FR_p_A9 = [GR_p_Table2], 16
2543 (p13) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_XN
2548 ldfe FR_p_A16 = [GR_p_Table], 16
2549 ldfe FR_p_A8 = [GR_p_Table2], 16
2550 (p9) cmp.eq p12, p0 = r0, r0 // clear p12
2554 ldfe FR_p_A15 = [GR_p_Table], 16
2555 ldfe FR_p_A7 = [GR_p_Table2], 16
2556 (p10) cmp.eq p12, p0 = r0, r0 // clear p12
2560 ldfe FR_p_A14 = [GR_p_Table], 16
2561 fma.s1 FR_r_TH = FR_r_A2H, FR_r_XS2, f0 // sin for neg
2562 (p11) cmp.eq p12, p0 = r0, r0 // clear p12
2565 ldfe FR_p_A6 = [GR_p_Table2], 16
2566 fma.s1 FR_r_TL = FR_r_A2L, FR_r_XS2, f0 // sin for neg
2571 ldfe FR_p_A13 = [GR_p_Table], 16
2572 fms.s1 FR_r_XS2L = FR_r_XS, FR_r_XS, FR_r_XS2 // x2Lo part
2578 fma.s1 FR_p_Temp5H = FR_p_A5H, FR_p_XR, f0 // A5H*r
2584 fma.s1 FR_p_XR2 = FR_p_XR, FR_p_XR, f0 // r^2 = r*r
2590 fabs FR_r_XS = FR_r_XS // abs(xs)
2595 fma.s1 FR_p_Temp2H = FR_p_A2H, FR_p_XR, f0 // A2H*r
2602 fms.s1 FR_r_TT = FR_r_A2H, FR_r_XS2, FR_r_TH // sin for neg
2607 fma.s1 FR_r_ResH = FR_r_TH, f1, FR_r_A1H // sin for neg
2613 fma.s1 FR_r_TL = FR_r_A2H, FR_r_XS2L, FR_r_TL // sin for neg
2619 fms.s1 FR_p_Temp5L = FR_p_A5H,FR_p_XR,FR_p_Temp5H //A5H*r delta
2625 fma.s1 FR_p_Poly5H = FR_p_Temp5H, f1, FR_p_A4H // A5H*r+A4H
2632 fms.s1 FR_p_Temp2L = FR_p_A2H, FR_p_XR, FR_p_Temp2H//A2H*r delta
2638 fma.s1 FR_p_Poly2H = FR_p_Temp2H, f1, FR_p_A1H // A2H*r+A1H
2645 fma.s1 FR_p_XR3 = FR_p_XR2, FR_p_XR, f0 // r^3 = r^2*r
2650 fms.s1 FR_p_XR2L = FR_p_XR, FR_p_XR, FR_p_XR2 // r^2 delta
2656 fma.s1 FR_p_A18 = FR_p_A19, FR_p_XR, FR_p_A18 // Poly tail
2661 fma.s1 FR_p_A14 = FR_p_A15, FR_p_XR, FR_p_A14 // Poly tail
2667 fma.s1 FR_p_XR4 = FR_p_XR2, FR_p_XR2, f0 // r^4 = r^2*r^2
2673 fma.s1 FR_p_Temp5L = FR_p_A5L, FR_p_XR, FR_p_Temp5L// Low part
2679 fms.s1 FR_p_Poly5L = FR_p_A4H, f1, FR_p_Poly5H // Low part
2686 fma.s1 FR_p_Temp4H = FR_p_Poly5H, FR_p_XR, f0 // (A5H*r+A4H)*r
2691 fma.s1 FR_p_Temp2L = FR_p_A2L, FR_p_XR, FR_p_Temp2L // A2*r low
2697 fms.s1 FR_p_Poly2L = FR_p_A1H, f1, FR_p_Poly2H // High poly
2702 fma.s1 FR_p_Temp1H = FR_p_Poly2H, FR_p_XR, f0 // High poly
2708 fms.s1 FR_p_XR3L = FR_p_XR2, FR_p_XR, FR_p_XR3 // x^3 delta
2713 fma.s1 FR_p_A16 = FR_p_A17, FR_p_XR, FR_p_A16 // Poly tail
2719 fms.s1 FR_r_ResL = FR_r_A1H, f1, FR_r_ResH // sin for neg
2724 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // sin for neg
2730 fma.s1 FR_p_Temp5L = FR_p_Temp5L, f1, FR_p_A4L // Low poly
2735 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5H // Low poly
2741 fms.s1 FR_p_Temp4L = FR_p_Poly5H,FR_p_XR,FR_p_Temp4H //Low poly
2746 fma.s1 FR_p_Poly4H = FR_p_Temp4H, f1, FR_p_A3H // Low poly
2752 fma.s1 FR_p_Temp2L = FR_p_Temp2L, f1, FR_p_A1L // High poly
2757 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2H // High poly
2763 fms.s1 FR_p_Temp1L = FR_p_Poly2H,FR_p_XR,FR_p_Temp1H //High poly
2768 fma.s1 FR_p_Poly1H = FR_p_Temp1H, f1, FR_p_A0H // High poly
2774 fma.s1 FR_p_A12 = FR_p_A13, FR_p_XR, FR_p_A12 // Poly tail
2779 fma.s1 FR_p_XR3L = FR_p_XR2L, FR_p_XR, FR_p_XR3L // x^3 low
2785 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5L // Low poly
2790 fma.s1 FR_p_A10 = FR_p_A11, FR_p_XR, FR_p_A10 // Poly tail
2796 fms.s1 FR_p_Poly4L = FR_p_A3H, f1, FR_p_Poly4H // Low poly
2801 fma.s1 FR_p_A6 = FR_p_A7, FR_p_XR, FR_p_A6 // Poly tail
2807 fma.s1 FR_p_A8 = FR_p_A9, FR_p_XR, FR_p_A8 // Poly tail
2812 fma.s1 FR_p_XR6 = FR_p_XR4, FR_p_XR2, f0 // Poly tail
2818 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2L // High poly
2823 fms.s1 FR_p_Poly1L = FR_p_A0H, f1, FR_p_Poly1H // High poly
2829 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TH // sin for neg
2834 fma.s1 FR_r_TT = FR_r_TL, f1, FR_r_A1L // sin for neg
2840 fma.s1 FR_p_Temp4L = FR_p_Poly5L,FR_p_XR,FR_p_Temp4L // Low poly
2845 fma.s1 FR_p_A18 = FR_p_A20, FR_p_XR2, FR_p_A18 // Poly tail
2851 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4H // Low poly
2856 fma.s1 FR_p_A14 = FR_p_A16, FR_p_XR2, FR_p_A14 // Poly tail
2862 fma.s1 FR_p_A6 = FR_p_A8, FR_p_XR2, FR_p_A6 // Poly tail
2867 fma.s1 FR_p_A10 = FR_p_A12, FR_p_XR2, FR_p_A10 // Poly tail
2873 fma.s1 FR_p_Temp1L = FR_p_Poly2L,FR_p_XR,FR_p_Temp1L //High poly
2878 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1H // High poly
2884 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TT // sin for neg
2889 fma.s1 FR_r_TH = FR_r_ResH, FR_r_XS2, f0 // sin for neg
2895 fma.s1 FR_p_Temp4L = FR_p_Temp4L, f1, FR_p_A3L // Low poly
2900 fma.s1 FR_p_Poly3H = FR_p_Poly4H, FR_p_XR3, f0 // Low poly
2906 fma.s1 FR_p_A14 = FR_p_A18, FR_p_XR4, FR_p_A14 // Poly tail
2911 fma.s1 FR_p_XR8 = FR_p_XR4, FR_p_XR4, f0 // Poly tail
2917 fma.s1 FR_r_TL = FR_r_ResH, FR_r_XS2L, f0 // sin for neg
2923 fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_A0L // High poly
2928 fma.s1 FR_p_A6 = FR_p_A10, FR_p_XR4, FR_p_A6 // Poly tail
2934 fms.s1 FR_r_TT = FR_r_ResH, FR_r_XS2, FR_r_TH // sin for neg
2939 fma.s1 FR_r_Res3H = FR_r_TH, f1, f1 // sin for neg
2945 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4L // Low poly
2950 fma.s1 FR_p_Poly3L = FR_p_Poly4H, FR_p_XR3L, f0 // Low poly
2956 fma.s1 FR_p_Poly0H = FR_p_Poly3H,f1,FR_p_Poly1H //Low & High add
2961 fma.s1 FR_r_A7 = FR_r_A8, FR_r_XS2, FR_r_A7 // sin for neg
2967 fma.s1 FR_r_TL = FR_r_ResL, FR_r_XS2, FR_r_TL // sin for neg
2972 fma.s1 FR_r_XS4 = FR_r_XS2, FR_r_XS2, f0 // sin for neg
2978 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1L // High poly
2983 fma.s1 FR_p_PolyTail = FR_p_A14, FR_p_XR8, FR_p_A6 // Poly tail
2989 fms.s1 FR_r_Res3L = f1, f1, FR_r_Res3H // sin for neg
2994 fma.s1 FR_r_ResH = FR_r_Res3H, FR_r_XS, f0 // sin for neg
3000 fms.s1 FR_p_Temp0L = FR_p_Poly4H,FR_p_XR3,FR_p_Poly3H //Low poly
3005 fma.s1 FR_p_Poly3L = FR_p_Poly4L,FR_p_XR3,FR_p_Poly3L //Low poly
3011 fms.s1 FR_p_Poly0L = FR_p_Poly1H,f1,FR_p_Poly0H //Low & High add
3016 (p13) fma.s1 FR_p_OddPoly0H = FR_p_Poly0H, FR_p_AbsXM1, f0
3017 // Reccurent computations - multiplying by X-1
3023 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // sin for neg
3028 fma.s1 FR_r_A3 = FR_r_A4, FR_r_XS2, FR_r_A3 // sin for neg
3034 fma.s1 FR_p_Poly1L = FR_p_PolyTail,FR_p_XR6,FR_p_Poly1L//High
3039 fma.s1 FR_r_A5 = FR_r_A6, FR_r_XS2, FR_r_A5 // sin for neg
3045 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TH // sin for neg
3050 fms.s1 FR_r_ResL = FR_r_Res3H, FR_r_XS, FR_r_ResH//sin for neg
3056 fma.s1 FR_p_Poly3L = FR_p_Poly3L, f1, FR_p_Temp0L // Low poly
3061 fma.s1 FR_r_A7 = FR_r_A9, FR_r_XS4, FR_r_A7 // sin for neg
3067 fma.s1 FR_p_Poly0L = FR_p_Poly0L,f1,FR_p_Poly3H //Low & High add
3072 (p13) fms.s1 FR_p_OddPoly0L = FR_p_Poly0H, FR_p_AbsXM1, FR_p_OddPoly0H
3073 // Reccurent computations - multiplying by X-1 (low part)
3079 fma.s1 FR_r_A3 = FR_r_A5, FR_r_XS4, FR_r_A3 // sin for neg
3084 fma.s1 FR_r_XS7 = FR_r_XS4, FR_r_XS2, f0 // xs^6
3090 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TL // sin for neg
3095 fma.s1 FR_r_XS8 = FR_r_XS4, FR_r_XS4, f0 // sin for neg
3101 fma.s1 FR_p_Temp0H = FR_p_Poly3L,f1,FR_p_Poly1L //Low & High add
3107 fma.s1 FR_r_XS7 = FR_r_XS7, FR_r_XS, f0 // xs^7
3113 fma.s1 FR_r_ResL = FR_r_Res3L, FR_r_XS, FR_r_ResL//sin for neg
3118 fma.s1 FR_r_Tail = FR_r_A7, FR_r_XS8, FR_r_A3 // sin tail res
3124 fma.s1 FR_p_Poly0L = FR_p_Poly0L,f1,FR_p_Temp0H //Low & High add
3131 fma.s1 FR_r_ResL = FR_r_Tail,FR_r_XS7,FR_r_ResL //sin for neg
3137 (p13) fma.s1 FR_p_OddPoly0L = FR_p_Poly0L, FR_p_AbsXM1, FR_p_OddPoly0L
3138 // Reccurent computations - multiplying by X-1 (low part)
3144 fma.s1 FR_r_TT = FR_r_ResL, FR_r_AbsX, f0 // X*sin
3148 .pred.rel "mutex",p12,p13
3151 (p12) fma.s0 f8 = FR_p_Poly0H, f1, FR_p_Poly0L // Even
3156 (p13) fma.s0 f8 = FR_p_OddPoly0H, f1, FR_p_OddPoly0L // Odd
3157 (p14) br.ret.spnt b0 // Exit for 1 <= |X| < 13 path (positive arguments)/////
3162 (p13) fma.s1 FR_p_Poly0H = FR_p_OddPoly0H, f1, f0
3163 // Reccurent computations
3168 (p13) fma.s1 FR_p_Poly0L = FR_p_OddPoly0L, f1, f0
3169 // Reccurent computations
3175 fma.s1 FR_r_Res1H = FR_r_ResH, FR_r_AbsX, FR_r_TT // X*sin
3176 (p11) cmp.eq p13, p12 = r0, r0
3181 fms.s1 FR_r_Res1L = FR_r_ResH,FR_r_AbsX,FR_r_Res1H// X*sin
3182 (p9) cmp.eq p13, p12 = r0, r0
3187 fma.s1 FR_r_Res1L = FR_r_Res1L, f1, FR_r_TT // sin for neg
3188 (p10) cmp.eq p13, p12 = r0, r0
3192 fma.s1 FR_r_TL = FR_p_Poly0L, FR_r_Res1H, f0 // mult by sin
3198 fma.s1 FR_r_TL = FR_p_Poly0H,FR_r_Res1L,FR_r_TL//mult by sin
3204 fma.s1 FR_r_ResH = FR_p_Poly0H,FR_r_Res1H,FR_r_TL//mult by sin
3210 fms.s1 FR_r_ResL = FR_p_Poly0H,FR_r_Res1H,FR_r_ResH//sin mult
3216 frcpa.s1 FR_r_Y0,p0 = f1,FR_r_ResH // y = frcpa(b)
3222 fneg FR_r_NegOne = f1 // Form -1.0
3227 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TL //Low result of mult
3233 fma.s1 FR_r_Q0 = f1,FR_r_Y0,f0 // q = a*y
3238 fnma.s1 FR_r_E0 = FR_r_Y0,FR_r_ResH,f1 // e = 1-b*y
3244 fma.s1 FR_r_E2 = FR_r_E0,FR_r_E0,FR_r_E0 // e2 = e+e^2
3249 fma.s1 FR_r_E1 = FR_r_E0,FR_r_E0,f0 // e1 = e^2
3255 fma.s1 FR_r_Y1 = FR_r_Y0,FR_r_E2,FR_r_Y0 // y1 = y+y*e2
3260 fma.s1 FR_r_E3 = FR_r_E1,FR_r_E1,FR_r_E0 // e3 = e+e1^2
3266 fma.s1 FR_r_Y2 = FR_r_Y1,FR_r_E3,FR_r_Y0 // y2 = y+y1*e3
3271 fnma.s1 FR_r_R0 = FR_r_ResH,FR_r_Q0,f1 // r = a-b*q
3277 fnma.s1 FR_r_E4 = FR_r_ResH,FR_r_Y2,f1 // e4 = 1-b*y2
3282 fma.s1 FR_r_ZH = FR_r_R0,FR_r_Y2,FR_r_Q0 // x = q+r*y2
3288 fma.s1 FR_r_Y3 = FR_r_Y2,FR_r_E4,FR_r_Y2 // y3 = y2+y2*e4
3293 fnma.s1 FR_r_R1 = FR_r_ResH,FR_r_ZH,f1 // r1 = a-b*x
3299 fnma.s1 FR_r_R1 = FR_r_ResL,FR_r_ZH,FR_r_R1 // r1=r1-b_lo*X
3304 (p12) fma.s1 FR_r_ZHN = FR_r_ZH,FR_r_NegOne, f0 // Negate for evens
3308 .pred.rel "mutex",p13,p12
3311 (p13) fma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZH // Final result
3316 (p12) fnma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZHN // Final result
3317 br.ret.sptk b0 // Exit for 1 <= |X| < 13 path (negative arguments)//////
3321 //////////// |X| < 1 path /////////////////////////////////////////////////////
3322 //------------------------------------------------------------------------------
3326 getf.exp GR_p_Exp = FR_p_AbsX // exp of abs X
3327 fma.s1 FR_z_Q0 = f1,FR_z_Y0,f0 // q = a*y
3328 add GR_r_sin_Table2= 0x50, GR_r_sin_Table
3331 ldfpd FR_p_0p5, FR_p_1p5 = [GR_c_Table], 16
3332 fnma.s1 FR_z_E0 = FR_z_Y0,f8,f1 // e = 1-b*y
3333 add GR_p_Table2 = 0xB0, GR_p_Table
3337 ldfd FR_p_0p25 = [GR_c_Table]
3338 fcvt.xf FR_r_XNS = FR_r_IXNS // Convert int repr to float
3339 shr.u GR_p_X_Sgnd = GR_p_X_Sgnd, 60
3340 // Obtain only 4 bits of significand
3345 add GR_p_Bias = 0xffff, r0 // Set bias
3349 ldfpd FR_r_A2H, FR_r_A2L = [GR_r_sin_Table], 16
3351 shl GR_p_XN = GR_p_Exp, 4
3352 // Shift exp to 4 bits left to set place for significand
3355 ldfe FR_r_A6 = [GR_r_sin_Table2], 16
3356 movl GR_p_0p75 = 0xfffec // 0.75
3360 ldfpd FR_r_A1H, FR_r_A1L = [GR_r_sin_Table], 16
3362 or GR_p_XN = GR_p_XN, GR_p_X_Sgnd
3363 // Combine exp with 4 high bits of significand
3366 ldfe FR_r_A5 = [GR_r_sin_Table2], 16
3368 sub GR_p_Exp = GR_p_Exp, GR_p_Bias // Unbiased exp
3372 ldfe FR_r_A9 = [GR_r_sin_Table], 16
3373 ldfe FR_r_A4 = [GR_r_sin_Table2], 16
3374 cmp.gtu.unc p10, p11 = GR_p_0p75, GR_p_XN // sgnd(x) < 0.75
3378 ldfe FR_r_A8 = [GR_r_sin_Table], 16
3379 fma.s1 FR_z_E2 = FR_z_E0,FR_z_E0,FR_z_E0 // e2 = e+e^2
3380 (p10) cmp.gt.unc p9, p10 = -2, GR_p_Exp // x < 0.25
3383 ldfe FR_r_A3 = [GR_r_sin_Table2], 16
3384 fma.s1 FR_z_E1 = FR_z_E0,FR_z_E0,f0 // e1 = e^2
3385 (p11) add GR_p_Offset = 168, r0 // [0.75;1] interval
3389 (p10) add GR_p_Offset = 147, r0 // [0.25;0.75] interval
3390 ldfe FR_r_A7 = [GR_r_sin_Table], 16
3391 (p9) cmp.gt.unc p8, p9 = -3, GR_p_Exp // x < 0.125
3394 .pred.rel "mutex",p9,p8
3396 (p9) add GR_p_Offset = 126, r0 // [0.125;0.25] interval
3397 (p8) add GR_p_Offset = 189, r0 // [0.;0.125] interval
3402 shladd GR_p_Table = GR_p_Offset, 4, GR_p_Table //Make addresses
3403 shladd GR_p_Table2 = GR_p_Offset, 4, GR_p_Table2
3404 fma.s1 FR_r_XS = FR_r_AbsX , f1, FR_r_XNS // xs = |x|-[x]
3407 .pred.rel "mutex",p8,p11
3409 ldfpd FR_p_A5H, FR_p_A5L = [GR_p_Table], 16
3410 (p11) fms.s1 FR_p_XR = f1, f1, FR_p_AbsX // r = 1 - |x|
3411 // for [0.75;1] interval
3415 ldfpd FR_p_A2H, FR_p_A2L = [GR_p_Table2], 16
3416 (p8) fms.s1 FR_p_XR = FR_p_AbsX, f1, f0 // r = |x|
3417 // for [0.;0.125] interval
3422 ldfpd FR_p_A4H, FR_p_A4L = [GR_p_Table], 16
3423 fma.s1 FR_z_Y1 = FR_z_Y0,FR_z_E2,FR_z_Y0 // y1 = y+y*e2
3427 ldfpd FR_p_A1H, FR_p_A1L = [GR_p_Table2], 16
3428 fma.s1 FR_z_E3 = FR_z_E1,FR_z_E1,FR_z_E0 // e3 = e+e1^2
3432 .pred.rel "mutex",p9,p10
3434 ldfpd FR_p_A3H, FR_p_A3L = [GR_p_Table], 16
3435 (p9) fms.s1 FR_p_XR = FR_p_AbsX, f1, f0 // r = |x|
3436 // for [0.125;0.25] interval
3440 ldfpd FR_p_A0H, FR_p_A0L = [GR_p_Table2], 16
3441 (p10) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_0p5 // r = |x| - 0.5
3442 // for [0.25;0.75] interval
3447 ldfe FR_p_A20 = [GR_p_Table], 16
3448 ldfe FR_p_A12 = [GR_p_Table2], 16
3453 ldfe FR_p_A19 = [GR_p_Table], 16
3454 fma.s1 FR_r_XS2 = FR_r_XS, FR_r_XS, f0 // xs^2
3458 ldfe FR_p_A11 = [GR_p_Table2], 16
3464 ldfe FR_p_A18 = [GR_p_Table], 16
3465 ldfe FR_p_A10 = [GR_p_Table2], 16
3469 .pred.rel "mutex",p12,p13
3471 ldfe FR_p_A17 = [GR_p_Table], 16
3472 fma.s1 FR_z_Y2 = FR_z_Y1,FR_z_E3,FR_z_Y0 // y2 = y+y1*e3
3476 ldfe FR_p_A9 = [GR_p_Table2], 16
3477 fnma.s1 FR_z_R0 = f8,FR_z_Q0,f1 // r = a-b*q
3482 ldfe FR_p_A16 = [GR_p_Table], 16
3483 ldfe FR_p_A8 = [GR_p_Table2], 16
3488 ldfe FR_p_A15 = [GR_p_Table], 16
3489 ldfe FR_p_A7 = [GR_p_Table2], 16
3494 ldfe FR_p_A14 = [GR_p_Table], 16
3495 fma.s1 FR_r_TH = FR_r_A2H, FR_r_XS2, f0 // neg sin
3499 ldfe FR_p_A6 = [GR_p_Table2], 16
3500 fma.s1 FR_r_TL = FR_r_A2L, FR_r_XS2, f0 // neg sin
3505 ldfe FR_p_A13 = [GR_p_Table], 16
3506 fms.s1 FR_r_XS2L = FR_r_XS, FR_r_XS, FR_r_XS2 // xs^2 delta
3512 fma.s1 FR_p_Temp5H = FR_p_A5H, FR_p_XR, f0 // Low poly
3517 fma.s1 FR_p_XR2 = FR_p_XR, FR_p_XR, f0 // poly tail
3523 fabs FR_r_XS = FR_r_XS // Absolute value of xs
3528 fma.s1 FR_p_Temp2H = FR_p_A2H, FR_p_XR, f0 // High poly
3534 fnma.s1 FR_z_E4 = f8,FR_z_Y2,f1 // e4 = 1-b*y2
3539 fma.s1 FR_z_ZH = FR_z_R0,FR_z_Y2,FR_z_Q0 // 1/x = q+r*y2
3545 fms.s1 FR_r_TT = FR_r_A2H, FR_r_XS2, FR_r_TH // neg sin
3550 fma.s1 FR_r_ResH = FR_r_TH, f1, FR_r_A1H // neg sin
3556 fma.s1 FR_r_TL = FR_r_A2H, FR_r_XS2L, FR_r_TL // neg sin
3562 fms.s1 FR_p_Temp5L = FR_p_A5H, FR_p_XR, FR_p_Temp5H // Low poly
3567 fma.s1 FR_p_Poly5H = FR_p_Temp5H, f1, FR_p_A4H // Low poly
3573 fms.s1 FR_p_Temp2L = FR_p_A2H, FR_p_XR, FR_p_Temp2H // High poly
3578 fma.s1 FR_p_Poly2H = FR_p_Temp2H, f1, FR_p_A1H // High poly
3584 fma.s1 FR_p_XR3 = FR_p_XR2, FR_p_XR, f0 // r^3
3589 fms.s1 FR_p_XR2L = FR_p_XR, FR_p_XR, FR_p_XR2 // r^2 delta
3595 fma.s1 FR_p_A18 = FR_p_A19, FR_p_XR, FR_p_A18 // poly tail
3600 fma.s1 FR_p_A14 = FR_p_A15, FR_p_XR, FR_p_A14 // poly tail
3606 fma.s1 FR_p_XR4 = FR_p_XR2, FR_p_XR2, f0 // poly tail
3611 fma.s1 FR_z_Y3 = FR_z_Y2,FR_z_E4,FR_z_Y2 // y3 = y2+y2*e4
3617 fma.s1 FR_p_Temp5L = FR_p_A5L, FR_p_XR, FR_p_Temp5L // Low poly
3622 fms.s1 FR_p_Poly5L = FR_p_A4H, f1, FR_p_Poly5H // Low poly
3628 fma.s1 FR_p_Temp4H = FR_p_Poly5H, FR_p_XR, f0 // Low poly
3633 fma.s1 FR_p_Temp2L = FR_p_A2L, FR_p_XR, FR_p_Temp2L // High poly
3639 fms.s1 FR_p_Poly2L = FR_p_A1H, f1, FR_p_Poly2H // High poly
3644 fma.s1 FR_p_Temp1H = FR_p_Poly2H, FR_p_XR, f0 // High poly
3650 fms.s1 FR_p_XR3L = FR_p_XR2, FR_p_XR, FR_p_XR3 // x^3 delta
3655 fma.s1 FR_p_A16 = FR_p_A17, FR_p_XR, FR_p_A16 //poly tail
3661 fms.s1 FR_r_ResL = FR_r_A1H, f1, FR_r_ResH // neg sin
3666 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // neg sin
3672 fma.s1 FR_p_Temp5L = FR_p_Temp5L, f1, FR_p_A4L // Low poly
3677 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5H //Low poly
3683 fms.s1 FR_p_Temp4L = FR_p_Poly5H, FR_p_XR, FR_p_Temp4H//Low poly
3688 fma.s1 FR_p_Poly4H = FR_p_Temp4H, f1, FR_p_A3H // Low poly
3694 fma.s1 FR_p_Temp2L = FR_p_Temp2L, f1, FR_p_A1L // High poly
3699 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2H // High poly
3705 fms.s1 FR_p_Temp1L = FR_p_Poly2H,FR_p_XR,FR_p_Temp1H //High poly
3710 fma.s1 FR_p_Poly1H = FR_p_Temp1H, f1, FR_p_A0H // High poly
3716 fma.s1 FR_p_A12 = FR_p_A13, FR_p_XR, FR_p_A12 // poly tail
3721 fma.s1 FR_p_XR3L = FR_p_XR2L, FR_p_XR, FR_p_XR3L // x^3 low
3727 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5L //Low poly
3732 fma.s1 FR_p_A10 = FR_p_A11, FR_p_XR, FR_p_A10 //poly tail
3738 fms.s1 FR_p_Poly4L = FR_p_A3H, f1, FR_p_Poly4H /// Low poly
3743 fma.s1 FR_p_A6 = FR_p_A7, FR_p_XR, FR_p_A6 // poly tail
3749 fma.s1 FR_p_A8 = FR_p_A9, FR_p_XR, FR_p_A8 // poly tail
3754 fma.s1 FR_p_XR6 = FR_p_XR4, FR_p_XR2, f0 // r^6
3760 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2L // High poly
3765 fms.s1 FR_p_Poly1L = FR_p_A0H, f1, FR_p_Poly1H // High poly
3771 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TH // neg sin
3776 fma.s1 FR_r_TT = FR_r_TL, f1, FR_r_A1L // neg sin
3782 fma.s1 FR_p_Temp4L = FR_p_Poly5L,FR_p_XR,FR_p_Temp4L //Low poly
3787 fma.s1 FR_p_A18 = FR_p_A20, FR_p_XR2, FR_p_A18 // poly tail
3793 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4H // Low poly
3798 fma.s1 FR_p_A14 = FR_p_A16, FR_p_XR2, FR_p_A14 // poly tail
3804 fma.s1 FR_p_A6 = FR_p_A8, FR_p_XR2, FR_p_A6 // poly tail
3809 fma.s1 FR_p_A10 = FR_p_A12, FR_p_XR2, FR_p_A10 // poly tail
3815 fma.s1 FR_p_Temp1L = FR_p_Poly2L,FR_p_XR,FR_p_Temp1L //High poly
3820 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1H // High poly
3826 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TT // neg sin
3831 fma.s1 FR_r_TH = FR_r_ResH, FR_r_XS2, f0 // neg sin
3837 fma.s1 FR_p_Temp4L = FR_p_Temp4L, f1, FR_p_A3L // Low poly
3842 fma.s1 FR_p_Poly3H = FR_p_Poly4H, FR_p_XR3, f0 // Low poly
3848 fma.s1 FR_p_A14 = FR_p_A18, FR_p_XR4, FR_p_A14 // poly tail
3853 fma.s1 FR_p_XR8 = FR_p_XR4, FR_p_XR4, f0 // r^8
3859 fma.s1 FR_r_TL = FR_r_ResH, FR_r_XS2L, f0 // neg sin
3864 fnma.s1 FR_z_R1 = f8,FR_z_ZH,f1 // r1 = a-b*x
3870 fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_A0L // High poly
3875 fma.s1 FR_p_A6 = FR_p_A10, FR_p_XR4, FR_p_A6 // poly tail
3881 fms.s1 FR_r_TT = FR_r_ResH, FR_r_XS2, FR_r_TH // neg sin
3886 fma.s1 FR_r_Res3H = FR_r_TH, f1, f1 // neg sin
3892 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4L // Low poly
3897 fma.s1 FR_p_Poly3L = FR_p_Poly4H, FR_p_XR3L, f0 // Low poly
3903 fma.s1 FR_p_Poly0H = FR_p_Poly3H, f1, FR_p_Poly1H // Result
3908 fma.s1 FR_r_A7 = FR_r_A8, FR_r_XS2, FR_r_A7 // neg sin
3914 fma.s1 FR_r_TL = FR_r_ResL, FR_r_XS2, FR_r_TL // neg sin
3919 fma.s1 FR_r_XS4 = FR_r_XS2, FR_r_XS2, f0 // xs^4
3925 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1L // High poly
3930 fma.s1 FR_p_PolyTail = FR_p_A14, FR_p_XR8, FR_p_A6 // poly tail
3936 fms.s1 FR_r_Res3L = f1, f1, FR_r_Res3H // neg sin
3941 fma.s1 FR_r_ResH = FR_r_Res3H, FR_r_XS, f0 // neg sin
3947 fms.s1 FR_p_Temp0L = FR_p_Poly4H,FR_p_XR3,FR_p_Poly3H //Low poly
3952 fma.s1 FR_p_Poly3L = FR_p_Poly4L,FR_p_XR3,FR_p_Poly3L //Low poly
3958 fms.s1 FR_p_Poly0L = FR_p_Poly1H, f1, FR_p_Poly0H // Result
3963 fma.s1 FR_z_ZL = FR_z_R1,FR_z_Y3, f0 // x_lo = r1*y3
3969 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // neg sin
3974 fma.s1 FR_r_A3 = FR_r_A4, FR_r_XS2, FR_r_A3 /// neg sin
3980 fma.s1 FR_p_Poly1L = FR_p_PolyTail,FR_p_XR6,FR_p_Poly1L // High
3985 fma.s1 FR_r_A5 = FR_r_A6, FR_r_XS2, FR_r_A5 // neg sin
3991 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TH // neg sin
3996 fms.s1 FR_r_ResL = FR_r_Res3H, FR_r_XS, FR_r_ResH // neg sin
4002 fma.s1 FR_p_Poly3L = FR_p_Poly3L, f1, FR_p_Temp0L // Low poly
4007 fma.s1 FR_r_A7 = FR_r_A9, FR_r_XS4, FR_r_A7 // neg sin
4013 fma.s1 FR_p_Poly0L = FR_p_Poly0L, f1, FR_p_Poly3H // result
4019 (p14) fma.s1 f8 = FR_p_Poly0H, FR_z_ZH, f0 // z*poly
4024 fma.s1 FR_p_Temp1L = FR_p_Poly0H, FR_z_ZL, f0 // z*poly low
4030 fma.s1 FR_r_A3 = FR_r_A5, FR_r_XS4, FR_r_A3 // sin tail
4035 fma.s1 FR_r_XS7 = FR_r_XS4, FR_r_XS2, f0 // xs^6
4041 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TL // sin low
4046 fma.s1 FR_r_XS8 = FR_r_XS4, FR_r_XS4, f0 // xs^8
4052 fma.s1 FR_p_Temp0H = FR_p_Poly3L, f1, FR_p_Poly1L // result
4058 (p14) fms.s1 FR_p_Temp1H = FR_p_Poly0H, FR_z_ZH, f8 // hi result
4064 fma.s1 FR_r_XS7 = FR_r_XS7, FR_r_XS, f0 // xs^7
4070 fma.s1 FR_r_ResL = FR_r_Res3L, FR_r_XS, FR_r_ResL // lo result
4075 fma.s1 FR_r_Tail = FR_r_A7, FR_r_XS8, FR_r_A3 // tail result
4081 fma.s1 FR_p_Poly0L = FR_p_Poly0L, f1, FR_p_Temp0H // lo result
4087 fma.s1 FR_r_ResL = FR_r_Tail, FR_r_XS7, FR_r_ResL // lo result
4093 (p14) fma.s1 FR_p_Temp1L = FR_p_Poly0L,FR_z_ZH,FR_p_Temp1L //hi result
4099 fma.s1 FR_r_TT = FR_r_ResL, f1, f0 // for low result
4103 .pred.rel "mutex",p12,p13
4106 (p14) fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_Temp1H // for lo res
4111 (p10) cmp.eq p13, p12 = r0, r0 // set p13, clear p12
4112 fma.s1 FR_r_Res1H = FR_r_ResH, f1, FR_r_TT // hi res
4117 (p9) cmp.eq p13, p12 = r0, r0 // set p13, clear p12
4118 (p14) fma.s0 f8 = f8, f1, FR_p_Temp1L // Final result
4119 (p14) br.ret.spnt b0 // Exit for 0 < |X| < 1 path (positive arguments)///////
4123 (p11) cmp.eq p13, p12 = r0, r0 // set p13, clear p12
4124 fms.s1 FR_r_Res1L = FR_r_ResH, f1, FR_r_Res1H // Low sin result
4130 fma.s1 FR_r_Res1L = FR_r_Res1L, f1, FR_r_TT // Low sin result
4135 fma.s1 FR_r_TL = FR_p_Poly0L,FR_r_Res1H,f0 //Low sin result
4141 fma.s1 FR_r_TL = FR_p_Poly0H, FR_r_Res1L, FR_r_TL //Low sin
4147 fma.s1 FR_r_ResH = FR_p_Poly0H, FR_r_Res1H, FR_r_TL //High sin
4153 fms.s1 FR_r_ResL = FR_p_Poly0H,FR_r_Res1H,FR_r_ResH //Low res
4159 frcpa.s1 FR_r_Y0,p0 = f1,FR_r_ResH // y = frcpa(b)
4165 fneg FR_r_NegOne = f1 // Construct -1.0
4170 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TL // low sin
4176 fma.s1 FR_r_Q0 = f1,FR_r_Y0,f0 // q = a*y
4181 fnma.s1 FR_r_E0 = FR_r_Y0,FR_r_ResH,f1 // e = 1-b*y
4187 fma.s1 FR_r_E2 = FR_r_E0,FR_r_E0,FR_r_E0 // e2 = e+e^2
4192 fma.s1 FR_r_E1 = FR_r_E0,FR_r_E0,f0 // e1 = e^2
4198 fma.s1 FR_r_Y1 = FR_r_Y0,FR_r_E2,FR_r_Y0 // y1 = y+y*e2
4203 fma.s1 FR_r_E3 = FR_r_E1,FR_r_E1,FR_r_E0 // e3 = e+e1^2
4209 fma.s1 FR_r_Y2 = FR_r_Y1,FR_r_E3,FR_r_Y0 // y2 = y+y1*e3
4214 fnma.s1 FR_r_R0 = FR_r_ResH,FR_r_Q0,f1 // r = a-b*q
4220 fnma.s1 FR_r_E4 = FR_r_ResH,FR_r_Y2,f1 // e4 = 1-b*y2
4225 fma.s1 FR_r_ZH = FR_r_R0,FR_r_Y2,FR_r_Q0 // x = q+r*y2
4231 fma.s1 FR_r_Y3 = FR_r_Y2,FR_r_E4,FR_r_Y2 // y3 = y2+y2*e4
4236 fnma.s1 FR_r_R1 = FR_r_ResH,FR_r_ZH,f1 // r1 = a-b*x
4242 fnma.s1 FR_r_R1 = FR_r_ResL,FR_r_ZH,FR_r_R1 // r1=r1 - b_lo*X
4247 fma.s1 FR_r_ZHN = FR_r_ZH,FR_r_NegOne, f0 // Negate
4251 .pred.rel "mutex",p13,p12
4254 fnma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZHN // Result for neg
4255 br.ret.sptk b0 // Exit for 0 < |X| < 1 path (negative arguments)//////
4261 // SPECIALS (x for natval, nan, +/-inf or +/-0) ///////////////////////////////
4262 //------------------------------------------------------------------------------
4267 movl GR_DenOverflow = 0x2000000000000001
4271 fclass.m p9,p0 = f8,0xB // +/-denormals
4276 fclass.m p6,p0 = f8,0x1E1 // Test x for natval, nan, +inf
4281 fclass.m p7,p8 = f8,0x7 // +/-0
4286 (p9) cmp.ltu.unc p10,p11 = GR_l_signif_Z, GR_DenOverflow
4287 (p9) fnorm.s0 f8 = f8
4293 (p9) fcvt.fx.trunc.s1 FR_n_IXN = FR_l_AbsX // Round by truncate
4294 (p11) br.cond.sptk tgamma_lt_1 // Return to gamma ('good' denormal)////////////
4300 (p10) br.cond.spnt tgammal_overflow // "Bad" denormal - overflow! /////////////
4305 mov FR_X = f8 // for error handler
4310 (p6) fma.s0 f8 = f8,f1,f8 // res = x + x
4311 (p6) br.ret.spnt b0 // Exit for NAN, INF and NatVals ////////////////////////
4313 .pred.rel "mutex",p7,p8
4315 (p7) mov GR_Parameter_TAG = 256 // negative
4316 (p7) frcpa.s0 f8,p0 = f1,f8 // Raise V flag
4322 (p8) br.cond.spnt tgammal_singularity // Branch for +ZERO ////////////////////
4328 br.cond.spnt tgammal_libm_err // Branch for -ZERO ///////////////////////
4334 // SINGULARITY (x is negative integer or 0) ////////////////////////////////////
4335 //------------------------------------------------------------------------------
4337 tgammal_singularity:
4340 mov FR_X = f8 // For error handler
4341 mov GR_Parameter_TAG = 256 // negative
4345 frcpa.s0 f8,p0 = f0,f0 // Raise V flag
4346 br.cond.sptk tgammal_libm_err // Call error handler /////////////////////
4347 // with singularity error /////////////////
4353 // OVERFLOW (result is too big and cannot be represented by normal value) //////
4354 // ( X > 1755.54 and for denormals with abs value less than 0x2000000000000001 )
4355 //------------------------------------------------------------------------------
4359 addl r8 = 0x1FFFE, r0 // Exp of INF
4360 fcmp.lt.s1 p15,p14 = f8,f0 // p14 - pos arg, p15 - neg arg
4366 mov FR_X = f8 // For error handler
4367 mov GR_Parameter_TAG = 255 // overflow
4370 .pred.rel "mutex",p14,p15
4373 (p14) fma.s0 f8 = f9,f9,f0 // Set I,O and +INF result
4378 (p15) fnma.s0 f8 = f9,f9,f0 // Set I,O and -INF result
4379 br.cond.sptk tgammal_libm_err // Call error handler /////////////////////
4380 // with overflow error ////////////////////
4387 // UNDERFLOW (x is negative noninteger with big absolute value) ////////////////
4388 //------------------------------------------------------------------------------
4393 fcvt.fx.trunc.s1 FR_u_IXN = f8 // Convert arg to int repres. in FR
4398 getf.sig GR_u_XN = FR_u_IXN
4412 tbit.z p6,p7 = GR_u_XN,0 // even or odd
4415 .pred.rel "mutex",p6,p7
4418 (p6) fms.s0 f8 = f9,f9,f9 // for negatives
4423 (p7) fma.s0 f8 = f9,f9,f9 // for positives
4424 br.ret.sptk b0 // Exit for underflow path //////////////////////////////
4428 GLOBAL_LIBM_END(tgammal)
4433 ////////////////// Tgammal error handler ///////////////////////////////////////
4434 //------------------------------------------------------------------------------
4435 LOCAL_LIBM_ENTRY(__libm_error_region)
4439 add GR_Parameter_Y=-32,sp // Parameter 2 value
4441 .save ar.pfs,GR_SAVE_PFS
4442 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
4446 add sp=-64,sp // Create new stack
4448 mov GR_SAVE_GP=gp // Save gp
4451 stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
4452 add GR_Parameter_X = 16,sp // Parameter 1 address
4453 .save b0, GR_SAVE_B0
4454 mov GR_SAVE_B0=b0 // Save b0
4458 stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
4459 add GR_Parameter_RESULT = 0,GR_Parameter_Y
4460 nop.b 0 // Parameter 3 address
4463 stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
4464 add GR_Parameter_Y = -16,GR_Parameter_Y
4465 br.call.sptk b0=__libm_error_support# // Call error handling function
4470 add GR_Parameter_RESULT = 48,sp
4473 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
4475 add sp = 64,sp // Restore stack pointer
4476 mov b0 = GR_SAVE_B0 // Restore return address
4479 mov gp = GR_SAVE_GP // Restore gp
4480 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
4481 br.ret.sptk b0 // Return
4484 LOCAL_LIBM_END(__libm_error_region#)
4486 .type __libm_error_support#,@function
4487 .global __libm_error_support#