4 // Copyright (c) 2002 - 2005, Intel Corporation
5 // All rights reserved.
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
15 // * Redistributions in binary form must reproduce the above copyright
16 // notice, this list of conditions and the following disclaimer in the
17 // documentation and/or other materials provided with the distribution.
19 // * The name of Intel Corporation may not be used to endorse or promote
20 // products derived from this software without specific prior written
23 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
32 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 // Intel Corporation is the author of this code, and requests that all
36 // problem reports or change requests be submitted to it directly at
37 // http://www.intel.com/software/products/opensource/libraries/num.htm.
40 //==============================================================
41 // 01/16/02 Initial version
42 // 05/20/02 Cleaned up namespace and sf0 syntax
43 // 02/10/03 Reordered header: .section, .global, .proc, .align;
44 // used data8 for long double table values
45 // 03/17/03 Moved tgammal_libm_err label into .proc region
46 // 04/10/03 Changed error codes for overflow and negative integers
47 // 03/31/05 Reformatted delimiters between data tables
50 //==============================================================
51 // long double tgammal(long double)
55 // Floating-Point Registers: f8-f15
58 // General Purpose Registers: r32-r67
60 // Predicate Registers: p6-p15
62 //*********************************************************************
64 // IEEE Special Conditions:
66 // tgammal(+inf) = +inf
67 // tgammal(-inf) = QNaN
68 // tgammal(+/-0) = +/-inf
69 // tgammal(x<0, x - integer) = QNaN
70 // tgammal(SNaN) = QNaN
71 // tgammal(QNaN) = QNaN
73 //*********************************************************************
74 // Overview of operation
75 //==============================================================
77 // Algorithm description
78 // ---------------------
80 // There are 3 main paths in the implementation
81 // (and additional special values branches)
83 // 1) |X| >= 13 - Stirling formula computation
84 // a) Positive arguments:
85 // TGAMMAL(X) = exp((X-0.5)*ln(X) - X + C + S(Z)),
86 // where C = 0.5*ln(2*Pi) , Z = 1/Z, S(Z) - Bernulli polynomial
87 // (up to 'B18' term).
88 // Some of these calculation done in multiprecision.
89 // Ln returns multiprecision result too
90 // and exp also accepts and returns pair of values.
92 // b) Negative arguments
93 // TGAMMAL(-X) = PI/(X*TGAMMAL(X)*sin(PI*X)).
94 // (X*sin(PI*X))/PI calculated in parallel with TGAMMAL.
95 // Here we use polynomial of 9th degree with 2 multiprecision steps.
96 // Argument range reduction is:
97 // N = [x] with round to nearest, r = x - N, -0.5 <= r < 0.5
98 // After ((X-0.5)*ln(X) - X + C + S(Z)) completed we just invert
99 // its result and compute exp with negative argument (1/exp(x)=exp(-x))
100 // Then we multiply exp result to PI/(X*sin(PI*X)).
102 // 2) 1 <= |X| < 13 - Polynomial part
103 // a) Positive arguments:
104 // All values are splitted to such intervals as:
105 // #0->[2;3], #1->[3,4], #2->[5,6]...
106 // For even intervals we just use polynomial computation with degree 20
107 // and first 6 multiprecision computations.
108 // Range reduction looks like
109 // N = [x] with truncate, r = x - N - 0.5, -0.5 <= r < 0.5
110 // For odd intervals we use reccurent formula:
111 // TGAMMAL(X) = TGAMMA(X-1)*(X-1)
112 // [1;2] interval is splitted to 3 subranges:
113 // [1;1.25], [1.25;1.75], [1.75;2] with the same polynomial forms
115 // b) Negative arguments
116 // TGAMMAL(-X) = PI/(X*TGAMMAL(X)*sin(PI*X)).
117 // (X*sin(PI*X))/PI calculated in parallel with TGAMMAL.
118 // After multiplication by TGAMMAL(X) result we calculate reciprocal
119 // and get final result.
121 // 3) 0 < |X| < 1 - Near 0 part
122 // a) Here we use reccurent formula TGAMMAL(X) = TGAMMAL(X+1)/X
123 // TGAMMAL(X+1) calculated as shown above,
124 // 1/X result obtained in parallel. Then we just multiply these values.
125 // There is only additional separated subrange: [0;0.125] with specific
126 // polynomial constants set.
128 // b) Negative arguments
129 // TGAMMAL(-X) = PI/(TGAMMAL(X+1)*sin(PI*X)).
130 // There is no need to compute 1/X.
137 LOCAL_OBJECT_START(Constants_Tgammal_log_80_Q)
138 // log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1
139 data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
140 data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
141 data4 0xA51BE0AF,0x92492453,0x00003FFC,0x00000000
142 data4 0xA0CFD29F,0xAAAAAB73,0x0000BFFC,0x00000000
143 data4 0xCCCE3872,0xCCCCCCCC,0x00003FFC,0x00000000
144 data4 0xFFFFB4FB,0xFFFFFFFF,0x0000BFFC,0x00000000
145 data4 0xAAAAAAAB,0xAAAAAAAA,0x00003FFD,0x00000000
146 data4 0x00000000,0x80000000,0x0000BFFE,0x00000000
147 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Q)
150 LOCAL_OBJECT_START(Constants_Tgammal_log_80_Z_G_H_h1)
151 // Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double
152 data4 0x00008000,0x3F800000,0x00000000,0x00000000
153 data4 0x00000000,0x00000000,0x00000000,0x00000000
154 data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000
155 data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000
156 data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000
157 data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000
158 data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000
159 data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000
160 data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000
161 data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000
162 data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000
163 data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000
164 data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000
165 data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000
166 data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000
167 data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000
168 data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000
169 data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000
170 data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000
171 data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000
172 data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000
173 data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000
174 data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000
175 data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000
176 data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000
177 data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000
178 data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000
179 data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000
180 data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000
181 data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000
182 data4 0x00004211,0x3F042108,0x3F29516A,0x00000000
183 data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000
184 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Z_G_H_h1)
187 LOCAL_OBJECT_START(Constants_Tgammal_log_80_Z_G_H_h2)
188 // Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double
189 data4 0x00008000,0x3F800000,0x00000000,0x00000000
190 data4 0x00000000,0x00000000,0x00000000,0x00000000
191 data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000
192 data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000
193 data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000
194 data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000
195 data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000
196 data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000
197 data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000
198 data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000
199 data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000
200 data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000
201 data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000
202 data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000
203 data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000
204 data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000
205 data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000
206 data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000
207 data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000
208 data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000
209 data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000
210 data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000
211 data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000
212 data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000
213 data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000
214 data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000
215 data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000
216 data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000
217 data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000
218 data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000
219 data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000
220 data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000
221 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Z_G_H_h2)
224 LOCAL_OBJECT_START(Constants_Tgammal_log_80_h3_G_H)
225 // h3 IEEE double extended, H3 and G3 IEEE single
226 data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00
227 data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400
228 data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00
229 data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400
230 data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00
231 data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400
232 data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08
233 data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408
234 data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10
235 data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410
236 data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18
237 data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420
238 data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20
239 data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428
240 data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30
241 data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438
242 data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40
243 data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448
244 data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50
245 data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458
246 data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68
247 data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470
248 data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78
249 data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488
250 data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90
251 data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0
252 data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8
253 data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8
254 data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8
255 data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8
256 data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0
257 data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0
258 data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here
259 data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D
260 data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101
261 data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED
262 data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766
263 data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6
264 data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620
265 data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D
266 LOCAL_OBJECT_END(Constants_Tgammal_log_80_h3_G_H)
269 LOCAL_OBJECT_START(Constants_Tgammal_stirling)
270 //0.5*ln(2*Pi)=9.1893853320467266954096885e-01 + 7.2239360881843238220057778e-17
271 data8 0x3FED67F1C864BEB4, 0x3C94D252F2400510
273 data8 0xAAAAAAAAAAAAAAAB, 0x00003FFB //B2 = 8.3333333333333333333333333333e-02
274 data8 0xBF66C16C16C16C17 //B4 = -2.7777777777777777777777777778e-03
275 data8 0x3F4A01A01A01A01A //B6 = 7.9365079365079365079365079365e-04
276 data8 0xBF43813813813814 //B8 = -5.9523809523809523809523809524e-04
277 data8 0x3F4B951E2B18FF23 //B10 = 8.4175084175084175084175084175e-04
278 data8 0xBF5F6AB0D9993C7D //B12 = -1.9175269175269175269175269175e-03
279 data8 0x3F7A41A41A41A41A //B14 = 6.4102564102564102564102564103e-03
280 data8 0xBF9E4286CB0F5398 //B16 = -2.9550653594771241830065359477e-02
281 data8 0x3FC6FE96381E0680 //B18 = 1.7964437236883057316493849002e-01
282 data8 0x3FE0000000000000 // 0.5
283 LOCAL_OBJECT_END(Constants_Tgammal_stirling)
286 LOCAL_OBJECT_START(Constants_Tgammal_sin)
287 // Polynomial coefficients for the sin(Pi*x)/Pi, 0 <= |x| < 0.5
288 //A2 = 8.1174242528335360802316245099e-01 + 5.1302254650266899774269946201e-18
289 data8 0x3FE9F9CB402BC46C, 0x3C57A8B3819B7CEC
290 //A1 = -1.6449340668482264060656916627e+00 + -3.0210280454695477893051351574e-17
291 data8 0xBFFA51A6625307D3, 0xBC816A402079D0EF
292 data8 0xF3AEF1FFCCE6C813, 0x0000BFE3 //A9 = -7.0921197799923779127089910470e-09
293 data8 0x87D54408E6D4BB9D, 0x00003FE9 //A8 = 2.5300880778252693946712766029e-07
294 data8 0xEA12033DCE7B8ED9, 0x0000BFED //A7 = -6.9758403885461690048189307819e-06
295 data8 0x9BA38C952A59D1A8, 0x00003FF2 //A6 = 1.4842878710882320255092707181e-04
296 data8 0x99C0B55178FF0E38, 0x0000BFF6 //A5 = -2.3460810348048124421268761990e-03
297 data8 0xD63402E798FEC896, 0x00003FF9 //A4 = 2.6147847817611456327417812320e-02
298 data8 0xC354723906D95E92, 0x0000BFFC //A3 = -1.9075182412208257558294507774e-01
299 LOCAL_OBJECT_END(Constants_Tgammal_sin)
302 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_Arg)
303 data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000 // L_hi = hi part log(2)/2^12
304 data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000 // L_lo = lo part log(2)/2^12
305 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_Arg)
307 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_A)
308 data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000 // A3
309 data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000 // A2
310 data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000 // A1
311 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_A)
313 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_T1)
314 data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
315 data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
316 data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
317 data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
318 data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
319 data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
320 data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
321 data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
322 data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
323 data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
324 data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
325 data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
326 data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
327 data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
328 data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
329 data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
330 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_T1)
332 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_T2)
333 data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
334 data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
335 data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
336 data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
337 data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
338 data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
339 data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
340 data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
341 data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
342 data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
343 data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
344 data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
345 data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
346 data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
347 data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
348 data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
349 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_T2)
351 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_W1)
352 data8 0x0000000000000000, 0xBE384454171EC4B4
353 data8 0xBE6947414AA72766, 0xBE5D32B6D42518F8
354 data8 0x3E68D96D3A319149, 0xBE68F4DA62415F36
355 data8 0xBE6DDA2FC9C86A3B, 0x3E6B2E50F49228FE
356 data8 0xBE49C0C21188B886, 0x3E64BFC21A4C2F1F
357 data8 0xBE6A2FBB2CB98B54, 0x3E5DC5DE9A55D329
358 data8 0x3E69649039A7AACE, 0x3E54728B5C66DBA5
359 data8 0xBE62B0DBBA1C7D7D, 0x3E576E0409F1AF5F
360 data8 0x3E6125001A0DD6A1, 0xBE66A419795FBDEF
361 data8 0xBE5CDE8CE1BD41FC, 0xBE621376EA54964F
362 data8 0x3E6370BE476E76EE, 0x3E390D1A3427EB92
363 data8 0x3E1336DE2BF82BF8, 0xBE5FF1CBD0F7BD9E
364 data8 0xBE60A3550CEB09DD, 0xBE5CA37E0980F30D
365 data8 0xBE5C541B4C082D25, 0xBE5BBECA3B467D29
366 data8 0xBE400D8AB9D946C5, 0xBE5E2A0807ED374A
367 data8 0xBE66CB28365C8B0A, 0x3E3AAD5BD3403BCA
368 data8 0x3E526055C7EA21E0, 0xBE442C75E72880D6
369 data8 0x3E58B2BB85222A43, 0xBE5AAB79522C42BF
370 data8 0xBE605CB4469DC2BC, 0xBE589FA7A48C40DC
371 data8 0xBE51C2141AA42614, 0xBE48D087C37293F4
372 data8 0x3E367A1CA2D673E0, 0xBE51BEBB114F7A38
373 data8 0xBE6348E5661A4B48, 0xBDF526431D3B9962
374 data8 0x3E3A3B5E35A78A53, 0xBE46C46C1CECD788
375 data8 0xBE60B7EC7857D689, 0xBE594D3DD14F1AD7
376 data8 0xBE4F9C304C9A8F60, 0xBE52187302DFF9D2
377 data8 0xBE5E4C8855E6D68F, 0xBE62140F667F3DC4
378 data8 0xBE36961B3BF88747, 0x3E602861C96EC6AA
379 data8 0xBE3B5151D57FD718, 0x3E561CD0FC4A627B
380 data8 0xBE3A5217CA913FEA, 0x3E40A3CC9A5D193A
381 data8 0xBE5AB71310A9C312, 0x3E4FDADBC5F57719
382 data8 0x3E361428DBDF59D5, 0x3E5DB5DB61B4180D
383 data8 0xBE42AD5F7408D856, 0x3E2A314831B2B707
384 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_W1)
386 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_W2)
387 data8 0x0000000000000000, 0xBE641F2537A3D7A2
388 data8 0xBE68DD57AD028C40, 0xBE5C77D8F212B1B6
389 data8 0x3E57878F1BA5B070, 0xBE55A36A2ECAE6FE
390 data8 0xBE620608569DFA3B, 0xBE53B50EA6D300A3
391 data8 0x3E5B5EF2223F8F2C, 0xBE56A0D9D6DE0DF4
392 data8 0xBE64EEF3EAE28F51, 0xBE5E5AE2367EA80B
393 data8 0x3E47CB1A5FCBC02D, 0xBE656BA09BDAFEB7
394 data8 0x3E6E70C6805AFEE7, 0xBE6E0509A3415EBA
395 data8 0xBE56856B49BFF529, 0x3E66DD3300508651
396 data8 0x3E51165FC114BC13, 0x3E53333DC453290F
397 data8 0x3E6A072B05539FDA, 0xBE47CD877C0A7696
398 data8 0xBE668BF4EB05C6D9, 0xBE67C3E36AE86C93
399 data8 0xBE533904D0B3E84B, 0x3E63E8D9556B53CE
400 data8 0x3E212C8963A98DC8, 0xBE33138F032A7A22
401 data8 0x3E530FA9BC584008, 0xBE6ADF82CCB93C97
402 data8 0x3E5F91138370EA39, 0x3E5443A4FB6A05D8
403 data8 0x3E63DACD181FEE7A, 0xBE62B29DF0F67DEC
404 data8 0x3E65C4833DDE6307, 0x3E5BF030D40A24C1
405 data8 0x3E658B8F14E437BE, 0xBE631C29ED98B6C7
406 data8 0x3E6335D204CF7C71, 0x3E529EEDE954A79D
407 data8 0x3E5D9257F64A2FB8, 0xBE6BED1B854ED06C
408 data8 0x3E5096F6D71405CB, 0xBE3D4893ACB9FDF5
409 data8 0xBDFEB15801B68349, 0x3E628D35C6A463B9
410 data8 0xBE559725ADE45917, 0xBE68C29C042FC476
411 data8 0xBE67593B01E511FA, 0xBE4A4313398801ED
412 data8 0x3E699571DA7C3300, 0x3E5349BE08062A9E
413 data8 0x3E5229C4755BB28E, 0x3E67E42677A1F80D
414 data8 0xBE52B33F6B69C352, 0xBE6B3550084DA57F
415 data8 0xBE6DB03FD1D09A20, 0xBE60CBC42161B2C1
416 data8 0x3E56ED9C78A2B771, 0xBE508E319D0FA795
417 data8 0xBE59482AFD1A54E9, 0xBE2A17CEB07FD23E
418 data8 0x3E68BF5C17365712, 0x3E3956F9B3785569
419 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_W2)
423 LOCAL_OBJECT_START(Constants_Tgammal_poly)
425 // Polynomial coefficients for the tgammal(x), 2 <= |x| < 3
426 //A5 = 2.8360780594841213109180699803e-02 + 2.2504152891014320704380000000e-19
427 data8 0x3F9D0A9BC49353D2, 0x3C109AEA0F23CE2D
428 //A4 = 1.0967323400216015538699565468e-01 + 9.9225166000430644587276000000e-18
429 data8 0x3FBC138B89492C5B, 0x3C66E138506D5652
430 //A3 = 2.5387124684114281691904579930e-01 + 2.2667777637607113205546600000e-17
431 data8 0x3FD03F6D2FA4F4F8, 0x3C7A2258DA8CD8B1
432 data8 0xC5866457328BC39B, 0x00003FE3 //A20 = 5.7487331964156762795056629138e-09
433 data8 0xE93D9F1ACD59C929, 0x0000BFE4 //A19= -1.3576396100397317396956445658e-08
434 data8 0xE33389C8F6CBA813, 0x00003FE5 //A18 = 2.6449714924964597501721434271e-08
435 data8 0x8FE7B25B9CD26D2A, 0x0000BFE7 //A17= -6.7011017946055513660266853311e-08
436 data8 0xB89F4721BFBC15B0, 0x00003FE8 //A16 = 1.7194280320370423615174419192e-07
437 data8 0xE49CBDC1874EBABA, 0x0000BFE9 //A15= -4.2582353660153782928729466776e-07
438 data8 0x913AF50A336129CA, 0x00003FEB //A14 = 1.0820500665257088283172211622e-06
439 data8 0xABCF0F7313B3B332, 0x0000BFEC //A13= -2.5601510627710417669568115706e-06
440 //A2 = 6.5455857798133676439533701341e-01 + 1.3292075193155190798867000000e-18
441 data8 0x3FE4F224D4B7E01C, 0x3C3885014A2B8319
442 //A1 = 9.3473452162608550164435428087e-01 + 3.2785154201417136611642400000e-17
443 data8 0x3FEDE9585F1A7093, 0x3C82E63C1B5028BF
444 //A0 = 1.3293403881791368004172682049e+00 + 2.2005689328949279282607500000e-16
445 data8 0x3FF544FA6D47B38F, 0x3CAFB6AA9829E81F
446 data8 0xF3668F799997C76D, 0x00003FED //A12 = 7.2539039479124273660331538367e-06
447 data8 0xD6C6BBD54CDEAEB1, 0x0000BFEE //A11= -1.2801665282681088568639378920e-05
448 data8 0x809E4763B06F6883, 0x00003FF1 //A10 = 6.1329973609906572700697893187e-05
449 data8 0x8443B000F8F9A71A, 0x00003FED //A9 = 3.9417864189995544394564413428e-06
450 data8 0xC5C7E6D62A6991D8, 0x00003FF4 //A8 = 7.5447412886334708803357581519e-04
451 data8 0xD2AF690725C62D88, 0x00003FF5 //A7 = 1.6074004848394703022110823298e-03
452 data8 0xAA44E635D4B7B682, 0x00003FF8 //A6 = 1.0392403425906843901680697839e-02
454 // Polynomial coefficients for the tgammal(x), 4 <= |x| < 5
455 //A5 = 1.1600674810589555185913468449e+00 + 3.0229979112715124660731000000e-17
456 data8 0x3FF28FA2EB44D22E, 0x3C816D285234C815
457 //A4 = 3.1374268565470946334983182169e+00 + 1.3694868953995008497659600000e-16
458 data8 0x400919734073B1E1, 0x3CA3BC83CD7E9565
459 //A3 = 7.0834593993741057360580271052e+00 + 3.3899702569039156457249800000e-16
460 data8 0x401C5576617B6C1F, 0x3CB86D6431213296
461 data8 0xA4A5FB49C094966B, 0x00003FDA //A20 = 9.3591760106637809309720130828e-12
462 data8 0xA9260DA0F51D7ED8, 0x00003FDD //A19 = 7.6919898428091669411809372180e-11
463 data8 0xA16441DFB14BD6E1, 0x00003FE0 //A18 = 5.8713933014370867331213494535e-10
464 data8 0x95F098D9C2234849, 0x00003FE3 //A17 = 4.3638234584169302324461091035e-09
465 data8 0x8581817400E5AD2B, 0x00003FE6 //A16 = 3.1084260332429955234755367839e-08
466 data8 0xE272940E373EBE15, 0x00003FE8 //A15 = 2.1089573544273993580820317236e-07
467 data8 0xB6B3391145D226FB, 0x00003FEB //A14 = 1.3612217421122787182942706259e-06
468 data8 0x8B9428C4DF95FCD5, 0x00003FEE //A13 = 8.3195416382628990683949003789e-06
469 //A2 = 1.2665135075272345943631080445e+01 + 9.8721896915973874255877000000e-16
470 data8 0x4029548C95A76F38, 0x3CD1C8BE715B8E13
471 //A1 = 1.6154969393303069580269948347e+01 + 9.6850518810678379641029000000e-16
472 data8 0x403027AC12FC1E1E, 0x3CD172711C15501B
473 //A0 = 1.1631728396567448058362970187e+01 + 8.7078125362814179268673000000e-16
474 data8 0x40274371E7866C65, 0x3CCF5F8A1A5FACA0
475 data8 0xC94A903114272C03, 0x00003FF0 //A12 = 4.7991576836334427243159066630e-05
476 data8 0x8844262960E04BE6, 0x00003FF3 //A11 = 2.5990716419283017929486175141e-04
477 data8 0xAC5418A76767678D, 0x00003FF5 //A10 = 1.3147621245497801180184809726e-03
478 data8 0xCA231B6EFE959132, 0x00003FF7 //A9 = 6.1687358811367989146517222415e-03
479 data8 0xDA38E39C13819D2A, 0x00003FF9 //A8 = 2.6638454961912040754759086920e-02
480 data8 0xD696DF8D8389FE53, 0x00003FFB //A7 = 1.0477995539298934056097943975e-01
481 data8 0xBDD5C153048BC435, 0x00003FFD //A6 = 3.7077144754791605130056406006e-01
483 // Polynomial coefficients for the tgammal(x), 6 <= |x| < 7
484 //A5 = 6.7169398121054200601065531373e+01 + 2.9481001527213915901489600000e-15
485 data8 0x4050CAD76B377BA0, 0x3CEA8DDB2B2DE93E
486 //A4 = 1.6115104376855398982115730178e+02 + 1.3422421925418824418257300000e-14
487 data8 0x406424D559BDC687, 0x3D0E397FDB5B33DC
488 //A3 = 3.1812194028053562533386866562e+02 + 3.9881709875858650942409600000e-14
489 data8 0x4073E1F377A6CF73, 0x3D26738F63FE9C4C
490 data8 0xD6E1B5FF90CAABD3, 0x00003FE1 //A20 = 1.5634700199277480081025480635e-09
491 data8 0xD451987B925DD37E, 0x00003FE4 //A19 = 1.2358576813211397717382327174e-08
492 data8 0xBFC151B67FA58E6B, 0x00003FE7 //A18 = 8.9292951435632759686382657901e-08
493 data8 0xA9034C5E1D67572E, 0x00003FEA //A17 = 6.2962205718327848327368724720e-07
494 data8 0x8E40F6EAA30A71EC, 0x00003FED //A16 = 4.2394926442967995119170095258e-06
495 data8 0xE3C3541B03A1C350, 0x00003FEF //A15 = 2.7151465666109594512258841637e-05
496 data8 0xACE2E58436B2DDCE, 0x00003FF2 //A14 = 1.6487723793339152877117376243e-04
497 data8 0xF7EAF8D8D1CAA3D1, 0x00003FF4 //A13 = 9.4573158112768812533636022369e-04
498 //A2 = 4.8664351544258869353143381886e+02 + 4.7424047995944376868895400000e-14
499 data8 0x407E6A4BD6D9463B, 0x3D2AB2868D79E192
500 //A1 = 5.1615277644992545447166776285e+02 + 3.0901956935588717379242200000e-14
501 data8 0x40802138E2DC003B, 0x3D216570FB601AEA
502 //A0 = 2.8788527781504433278314536437e+02 + 2.8213174117085164944959600000e-14
503 data8 0x4071FE2A1911F7D6, 0x3D1FC3E4CF4DB5AF
504 data8 0xA72B88E48D3D1BAB, 0x00003FF7 //A12 = 5.1016252919939028020562237471e-03
505 data8 0xD2EFB1067DB4FFB2, 0x00003FF9 //A11 = 2.5749059441230515023024615917e-02
506 data8 0xF788AF9522205C24, 0x00003FFB //A10 = 1.2086617635601742290221382521e-01
507 data8 0x861A6CE06CB29EAF, 0x00003FFE //A9 = 5.2384071807018493367136112163e-01
508 data8 0x84FBDE0947718B58, 0x00004000 //A8 = 2.0778727617851237754568261869e+00
509 data8 0xEEC1371E265A2C3A, 0x00004001 //A7 = 7.4610858525146049022238037342e+00
510 data8 0xBF514B9BE68ED59D, 0x00004003 //A6 = 2.3914694993947572859629197920e+01
512 // Polynomial coefficients for the tgammal(x), 8 <= |x| < 9
513 //A5 = 5.8487447114416836484451778233e+03 + 4.7365465221455983144182900000e-13
514 data8 0x40B6D8BEA568B6FD, 0x3D60AA4D44C2589B
515 //A4 = 1.2796464063087094473303295672e+04 + 1.2373341702514898266244200000e-12
516 data8 0x40C8FE3B666B532D, 0x3D75C4752C5B4783
517 //A3 = 2.2837606581322281272150576115e+04 + 2.6598064610627891398831000000e-13
518 data8 0x40D64D66D23A7764, 0x3D52B77B3A10EA5C
519 data8 0xB23418F75B0BE22A, 0x00003FE9 //A20 = 3.3192989594206801808678663868e-07
520 data8 0xA984A7BC8B856ED2, 0x00003FEC //A19 = 2.5260177918662350066375115788e-06
521 data8 0x921A49729416372C, 0x00003FEF //A18 = 1.7416797068239475136398213598e-05
522 data8 0xF5BB9415CC399CA4, 0x00003FF1 //A17 = 1.1717449586392814601938207599e-04
523 data8 0xC50B91A40B81F9DF, 0x00003FF4 //A16 = 7.5166775151159345732094429036e-04
524 data8 0x96002572326DB203, 0x00003FF7 //A15 = 4.5776541559407384162139204300e-03
525 data8 0xD81A1A595E4157BA, 0x00003FF9 //A14 = 2.6379634345126284099420760736e-02
526 data8 0x92B700D0CFECADD8, 0x00003FFC //A13 = 1.4327622675407940907282658100e-01
527 //A2 = 3.1237895525940199149772524834e+04 + 3.1280450505163186432331700000e-12
528 data8 0x40DE8179504C0878, 0x3D8B83BB33FBB766
529 //A1 = 2.9192841741344487672904506326e+04 + 7.9300780509779689630767000000e-13
530 data8 0x40DC8235DF171691, 0x3D6BE6C780EE54DF
531 //A0 = 1.4034407293483411194756627083e+04 + 1.4038139346291543309253700000e-12
532 data8 0x40CB693422315F90, 0x3D78B23746113FCE
533 data8 0xBAE50807548BC711, 0x00003FFE //A12 = 7.3005724123917935346868107005e-01
534 data8 0xDE28B1F57E68CFB6, 0x00004000 //A11 = 3.4712338349724065462763671443e+00
535 data8 0xF4DCA5A5FF901118, 0x00004002 //A10 = 1.5303868912154033908205911714e+01
536 data8 0xF85AAA1AD5E84E5E, 0x00004004 //A9 = 6.2088539523416399361048051373e+01
537 data8 0xE5AA8BB1BF02934D, 0x00004006 //A8 = 2.2966619406617480799195651466e+02
538 data8 0xBF6CFEFD67F59845, 0x00004008 //A7 = 7.6570306334640770654588802417e+02
539 data8 0x8DB5D2F001635C29, 0x0000400A //A6 = 2.2673639984182571062068713002e+03
541 // Polynomial coefficients for the tgammal(x), 10 <= |x| < 11
542 //A5 = 7.2546009516580589115619659424e+05 + 1.0343348865365065212891728822e-10
543 data8 0x412623A830B99290, 0x3DDC6E7C157611C4
544 //A4 = 1.4756292870840241666883230209e+06 + 8.1516565365333844166705674775e-11
545 data8 0x4136842D497E56AF, 0x3DD66837E4C3F9EE
546 //A3 = 2.4356116926500420086085796356e+06 + 3.5508860076560925641351069404e-10
547 data8 0x4142950DD8A8C1AF, 0x3DF866C8E3DD0980
548 data8 0xB7FD0D1EEAC38EB4, 0x00003FF1 //A20 = 8.7732544640091602721643775932e-05
549 data8 0xA9345C64AC750AE9, 0x00003FF4 //A19 = 6.4546407626804942279126469603e-04
550 data8 0x8BEABC81BE1E93C9, 0x00003FF7 //A18 = 4.2699261134524096128048819443e-03
551 data8 0xE1CD281EDD7315F8, 0x00003FF9 //A17 = 2.7563646660310313164706189622e-02
552 data8 0xAD8A5BA6D0FD9758, 0x00003FFC //A16 = 1.6947310643831556048460963841e-01
553 data8 0xFCDDA464AD3F182E, 0x00003FFE //A15 = 9.8775699098518676937088606052e-01
554 data8 0xAE0DCE2F7B60D1AE, 0x00004001 //A14 = 5.4391852309591064073782104822e+00
555 data8 0xE1745D9ABEB8D1A7, 0x00004003 //A13 = 2.8181819161363002758615770457e+01
556 //A2 = 3.0619656223573554307222366333e+06 + 1.0819940302945474471259520006e-10
557 data8 0x41475C66CFA967E4, 0x3DDDBDDB2A27334B
558 //A1 = 2.6099413018962685018777847290e+06 + 3.6851882860056025385268615240e-10
559 data8 0x4143E98AA6A48974, 0x3DF9530D42589AB6
560 //A0 = 1.1332783889487853739410638809e+06 + 1.9339350553312096248591829758e-10
561 data8 0x41314ADE639225C9, 0x3DEA946DD6C2C8D3
562 data8 0x88BCFAAE71812A1C, 0x00004006 //A12 = 1.3673820009490115307300592012e+02
563 data8 0x9A770F5AB540A326, 0x00004008 //A11 = 6.1786031215382040427126476507e+02
564 data8 0xA170C1D2C6B413FC, 0x0000400A //A10 = 2.5830473201524594051391525170e+03
565 data8 0x9AE56061CB02EB55, 0x0000400C //A9 = 9.9133441230507404119297200255e+03
566 data8 0x872390769650FBE2, 0x0000400E //A8 = 3.4595564309496661629764193479e+04
567 data8 0xD3E5E8D6923910C1, 0x0000400F //A7 = 1.0849181904819284819615140521e+05
568 data8 0x930D70602F50B754, 0x00004011 //A6 = 3.0116351174131169193070583741e+05
570 // Polynomial coefficients for the tgammal(x), 12 <= |x| < 13
571 //A5 = 1.2249876249976964294910430908e+08 + 6.0051348061679753770848000000e-09
572 data8 0x419D34BB29FFC39D, 0x3E39CAB72E01818D
573 //A4 = 2.3482765927605420351028442383e+08 + 1.1874729051592862323641700000e-08
574 data8 0x41ABFE5F168D56FA, 0x3E4980338AA7B04B
575 //A3 = 3.6407329688125067949295043945e+08 + 2.6657200942150363994658700000e-08
576 data8 0x41B5B35150E199A5, 0x3E5C9F79C0EB5300
577 data8 0xE89AE0F8D726329D, 0x00003FF9 //A20 = 2.8394164465429105626588451540e-02
578 data8 0xCF90981F86E38013, 0x00003FFC //A19 = 2.0270002071785908652476845915e-01
579 data8 0xA56C658079CA8C4A, 0x00003FFF //A18 = 1.2923704984019263122675412350e+00
580 data8 0x80AEF96A67C5615A, 0x00004002 //A17 = 8.0427183300456238315262463506e+00
581 data8 0xBE886D7529678931, 0x00004004 //A16 = 4.7633230047847868242503413461e+01
582 data8 0x858EDBA4CE2F7508, 0x00004007 //A15 = 2.6711607799594541057655957154e+02
583 data8 0xB0B0A3AF388274F0, 0x00004009 //A14 = 1.4135199810126975119809102782e+03
584 data8 0xDBA87137988751EF, 0x0000400B //A13 = 7.0290552818218513870879313985e+03
585 //A2 = 4.2828433593031734228134155273e+08 + 3.9760422293645854535247300000e-08
586 data8 0x41B98719AFEE2947, 0x3E6558A17E0D3007
587 //A1 = 3.4008253676084774732589721680e+08 + 1.2558352335001093116071000000e-09
588 data8 0x41B4453F68C2C6EB, 0x3E159338C5BC7EC3
589 //A0 = 1.3684336546556583046913146973e+08 + 2.6786516700381562934240300000e-08
590 data8 0x41A05020CAEE5EA5, 0x3E5CC3058A858579
591 data8 0xFF5E3940FB4BA576, 0x0000400D //A12 = 3.2687111823895439312116108631e+04
592 data8 0x8A08C124C7F74B6C, 0x00004010 //A11 = 1.4134701786994123329786229006e+05
593 data8 0x89D701953540BFFB, 0x00004012 //A10 = 5.6459209892773907605385652281e+05
594 data8 0xFC46344B3116C3AD, 0x00004013 //A9 = 2.0666305367147234406757715163e+06
595 data8 0xD183EBD7A400151F, 0x00004015 //A8 = 6.8653979211730981618367536737e+06
596 data8 0x9C083A40742112F4, 0x00004017 //A7 = 2.0451444503543981795037456447e+07
597 data8 0xCD3C475B1A8B6662, 0x00004018 //A6 = 5.3801245423495149598177886823e+07
598 LOCAL_OBJECT_END(Constants_Tgammal_poly)
601 LOCAL_OBJECT_START(Constants_Tgammal_poly_splitted)
603 // Polynomial coefficients for the tgammal(x), 1 <= |x| < 1.25
604 //A5 = -9.8199506890310417350775651357e-01+ -3.2546247786122976510752200000e-17
605 data8 0xBFEF6C80EC38B509, 0xBC82C2FA7A3DE3BD
606 //A4 = 9.8172808683439960475425323239e-01 + 4.4847611775298520359811400000e-17
607 data8 0x3FEF6A51055096B0, 0x3C89DA56DE95EFE4
608 //A3 = -9.0747907608088618225394839101e-01 +-1.0244057366544064435443970000e-16
609 data8 0xBFED0A118F324B62, 0xBC9D86C7B9EBCFFF
610 data8 0xB8E3FDAA66CC738E, 0x00003FFB //A20 = 9.0278608095877488976217714815e-02
611 data8 0xA76067AE1738699C, 0x0000BFFD //A19 =-3.2690738678103132837070881737e-01
612 data8 0x9D66B13718408C44, 0x00003FFE //A18 = 6.1484820933424283818320582920e-01
613 data8 0xD4AC67BBB4AE5599, 0x0000BFFE //A17 =-8.3075569470082063491389474937e-01
614 data8 0xF1426ED1C1488DB3, 0x00003FFE //A16 = 9.4241993542644505594957058785e-01
615 data8 0xFC12EB07AA6F4B6B, 0x0000BFFE //A15 =-9.8466366707947121954333549690e-01
616 data8 0xFF2B32CFE5B0DDC8, 0x00003FFE //A14 = 9.9675290656677214804168895915e-01
617 data8 0xFFD8E7E6FF3662EA, 0x0000BFFE //A13 =-9.9940347089360552383472582319e-01
618 //A2 = 9.8905599532797250361682017683e-01 + 5.1760162410376024240867300000e-17
619 data8 0x3FEFA658C23B1578, 0x3C8DD673A61F6FE7
620 //A1 = -5.7721566490153275452712478000e-01+ -1.0607935612223465065923310000e-16
621 data8 0xBFE2788CFC6FB618, 0xBC9E9346622D53B7
622 //A0 = 9.9999999999999988897769753748e-01 + 1.1102230245372554544790880000e-16
623 data8 0x3FEFFFFFFFFFFFFF, 0x3C9FFFFFFFF51E4E
624 data8 0xFFF360DF628F0BC9, 0x00003FFE //A12 = 9.9980740979895815468216470840e-01
625 data8 0xFFEF8F9A72B40480, 0x0000BFFE //A11 = -9.9974916001038145045939523470e-01
626 data8 0xFFE037B8C7E39952, 0x00003FFE //A10 = 9.9951504002809911822597567307e-01
627 data8 0xFFC01E08F348BED2, 0x0000BFFE //A9 = -9.9902522772325406705059517941e-01
628 data8 0xFF83DAC83119B52C, 0x00003FFE //A8 = 9.9810569179053383842734164901e-01
629 data8 0xFEF9F8AB891ABB24, 0x0000BFFE //A7 = -9.9600176036720260345608796766e-01
630 data8 0xFE3F0537573C8235, 0x00003FFE //A6 = 9.9314911461918778676646301341e-01
632 // Polynomial coefficients for the tgammal(x), 1.25 <= |x| < 1.75
633 //A5 = -7.7523052299853054125655660300e-02+ -1.2693512521686721504433600000e-17
634 data8 0xBFB3D88CFE50601B, 0xBC6D44ED60EE2170
635 //A4 = 1.4464535904462152982041800442e-01 + 2.5426820829345729856648800000e-17
636 data8 0x3FC283BD374EB2A9, 0x3C7D50AC436187C3
637 //A3 = -1.0729480456477220873257039102e-01+ -6.2429894945456418196551000000e-18
638 data8 0xBFBB77AC1CA2EBA5, 0xBC5CCA6BCC422D41
639 data8 0xF732D2689F323283, 0x00003FF2 //A20 = 2.3574688251652899567587145422e-04
640 data8 0xB6B00E23DE89D13A, 0x0000BFF3 //A19 =-3.4844916488842618776630058875e-04
641 data8 0xE98396FE4A1B2799, 0x00003FF3 //A18 =4.4539265198744452020440735977e-04
642 data8 0xAF8D235A640DB1A2, 0x0000BFF4 //A17 =-6.6967514303333563295261178346e-04
643 data8 0x8513B736C918B261, 0x00003FF5 //A16 = 1.0152970456990865810615917715e-03
644 data8 0xC790A1A2C78D8E17, 0x0000BFF5 //A15 =-1.5225598630329403515321688394e-03
645 data8 0x959706CFA638CDE2, 0x00003FF6 //A14 = 2.2825614575133879623648932383e-03
646 data8 0xE050A6021E129860, 0x0000BFF6 //A13 =-3.4227757733947066666295285936e-03
647 //A2 = 4.1481345368830113695679528973e-01 + 3.1252439808354284892632100000e-17
648 data8 0x3FDA8C4DBA620D56, 0x3C82040BCB483C76
649 //A1 = 3.2338397448885010387886751460e-02 + 3.4437825798552300531443100000e-18
650 data8 0x3FA08EA88EE561B1, 0x3C4FC366D6C64806
651 //A0 = 8.8622692545275794095971377828e-01 + 7.2689375867553992399219000000e-17
652 data8 0x3FEC5BF891B4EF6A, 0x3C94F3877D311C0C
653 data8 0xA8275AADC09D16FC, 0x00003FF7 //A12 = 5.1316445128621071486146117136e-03
654 data8 0xFBFE2CE9215267A2, 0x0000BFF7 //A11= -7.6902121820788373000579382408e-03
655 data8 0xBCC8EEAB67ECD91D, 0x00003FF8 //A10 = 1.1522515369164312742737727262e-02
656 data8 0x8D1614BB97E5E8C2, 0x0000BFF9 //A9 = -1.7222443097804730395560633583e-02
657 data8 0xD3A963578BE291E3, 0x00003FF9 //A8 = 2.5837606456090186343624210891e-02
658 data8 0x9BA7EAE64C42FDF7, 0x0000BFFA //A7 = -3.8001935555045161419575037512e-02
659 data8 0xF0115BA1A77607E7, 0x00003FFA //A6 = 5.8610303817173477119764956736e-02
661 // Polynomial coefficients for the tgammal(x), 1.75 <= |x| < 2.0
662 //A5 = 2.6698206874501426502654943818e-04 + 3.4033756836921062797887300000e-20
663 data8 0x3F317F3740FE2A68, 0x3BE417093234B06E
664 //A4 = 7.4249010753513894345090307070e-02 + 3.9810018444482764697014200000e-18
665 data8 0x3FB301FBB0F25A92, 0x3C525BEFFABB622F
666 //A3 = -8.1576919247086265851720554565e-02+ -5.2716624487804746360745000000e-19
667 data8 0xBFB4E239984650AC, 0xBC2372F1C4F276FF
668 data8 0xFEF3AEE71038E9A3, 0x00003FEB //A20 = 1.8995395865421509009969188571e-06
669 data8 0xA11CFA2672BF876A, 0x0000BFEB //A19 =-1.2003868221414015771269244270e-06
670 data8 0xF8E107215DAE2164, 0x00003FEC //A18 = 3.7085863210303833432006027217e-06
671 data8 0xBCDDD3FC011EF7D6, 0x00003FEC //A17 = 2.8143303971756051015245433043e-06
672 data8 0x8683C4687FA22E68, 0x00003FEE //A16 = 8.0177018464360416764308252462e-06
673 data8 0xFDA09E5D33E32968, 0x00003FEE //A15 = 1.5117372062443781157389064848e-05
674 data8 0xFFB00D0CFF4089B4, 0x00003FEF //A14 = 3.0480348961227424242198174995e-05
675 data8 0xFEF6C39566785085, 0x00003FF0 //A13 = 6.0788135974125244644334004947e-05
676 //A2 = 4.1184033042643969357854416558e-01 + 1.2103396182129232634761000000e-18
677 data8 0x3FDA5B978B96BEBF, 0x3C3653AAD0A139E4
678 //A1 = -4.2278433509846713445057275749e-01+ -4.9429151528135657430413000000e-18
679 data8 0xBFDB0EE6072093CE, 0xBC56CB907027554F
680 //A0 = 1.0000000000000000000000000000e+00 + 1.0969171200000000000000000000e-31
681 data8 0x3FF0000000000000, 0x3981CC6A5B20B4D5
682 data8 0xFF2B7BA9A8D68C37, 0x00003FF1 //A12 = 1.2167446884801403650547161615e-04
683 data8 0xFCA53468E3692EF1, 0x00003FF2 //A11 = 2.4094136329542400976250900707e-04
684 data8 0x808D698A9C993615, 0x00003FF4 //A10 = 4.9038845704938303659791698883e-04
685 data8 0xF10F8E3FB8BB4AFB, 0x00003FF4 //A9 = 9.1957383840999861214472423976e-04
686 data8 0x89E224E42F93F005, 0x00003FF6 //A8 = 2.1039333407187324139473634747e-03
687 data8 0xBAF374824937A323, 0x00003FF6 //A7 = 2.8526458211545152218493600470e-03
688 data8 0xB6BF7564F52140C6, 0x00003FF8 //A6 = 1.1154045718131014476684982178e-02
690 // Polynomial coefficients for the tgammal(x), 0.0 <= |x| < 0.125
691 //A5 = -9.8199506890314514073736518185e-01+ -5.9363811993837985890950900000e-17
692 data8 0xBFEF6C80EC38B67A, 0xBC911C46B447C81F
693 //A4 = 9.8172808683440015986576554496e-01 + 2.7457414262802803699834200000e-17
694 data8 0x3FEF6A51055096B5, 0x3C7FA7FF90ACAD1F
695 //A3 = -9.0747907608088618225394839101e-01 + -1.0676255850934306734701780000e-16
696 data8 0xBFED0A118F324B62, 0xBC9EC5AFB633438D
697 data8 0x9217E83FA207CB80, 0x00003FFD //A20 = 2.8533864762086088781083621561e-01
698 data8 0xA8DABFA52FDF03EC, 0x0000BFFE //A19= -6.5958783896337186303285832783e-01
699 data8 0xE331ED293AF39F9B, 0x00003FFE //A18 = 8.8748056656454687449654731184e-01
700 data8 0xF9163C5DDB52419D, 0x0000BFFE //A17= -9.7299554149078295602977718525e-01
701 data8 0xFEC0A1C672CB9265, 0x00003FFE //A16 = 9.9512683005268190987854104489e-01
702 data8 0xFFD2D65B8EA7B5F4, 0x0000BFFE //A15= -9.9931087241443958201592847861e-01
703 data8 0xFFF93AA39EE53445, 0x00003FFE //A14 = 9.9989668364186884793382816496e-01
704 data8 0xFFFB99A9A3F5F480, 0x0000BFFE //A13= -9.9993286506283835663204999212e-01
705 //A2 = 9.8905599532797250361682017683e-01 + 5.1778575360788420716540100000e-17
706 data8 0x3FEFA658C23B1578, 0x3C8DD92B45408D07
707 //A1 = -5.7721566490153275452712478000e-01+ -1.0607938730998824663273110000e-16
708 data8 0xBFE2788CFC6FB618, 0xBC9E9346F8FDE55B
709 //A0 = 9.9999999999999988897769753748e-01 + 1.1102230246251564036631420000e-16
710 data8 0x3FEFFFFFFFFFFFFF, 0x3C9FFFFFFFFFFFFF
711 data8 0xFFF7FEBB545812C1, 0x00003FFE //A12 = 9.9987785409425126648628395084e-01
712 data8 0xFFF00C02E943A3F2, 0x0000BFFE //A11= -9.9975657530855116454438747397e-01
713 data8 0xFFE0420AADC53820, 0x00003FFE //A10 = 9.9951565514290485919027183699e-01
714 data8 0xFFC01EB42EF27EEB, 0x0000BFFE //A9 = -9.9902526759155739377365522320e-01
715 data8 0xFF83DAD0BF23FF12, 0x00003FFE //A8 = 9.9810569378236378800364235948e-01
716 data8 0xFEF9F8ABDBCDB2F3, 0x0000BFFE //A7 = -9.9600176044241699109053158187e-01
717 data8 0xFE3F05375988491D, 0x00003FFE //A6 = 9.9314911462127599008937257662e-01
718 LOCAL_OBJECT_END(Constants_Tgammal_poly_splitted)
721 LOCAL_OBJECT_START(Constants_Tgammal_common)
722 // Positive overflow value
723 data8 0x3FE0000000000000 // 0.5
724 data8 0x3FF8000000000000 // 1.5
725 data8 0x3FD0000000000000 // 0.25
726 data8 0x0000000000000000 // 0
727 data8 0xDB718C066B352E21, 0x00004009 // Positive overflow value
728 LOCAL_OBJECT_END(Constants_Tgammal_common)
732 //=======================================================
735 // General Purpose Registers
737 GR_l_Log_Table1 = r34
749 GR_l_Stirling_Table = r45
750 GR_l_N_Unbiased = r46
752 // Floating Point Registers
818 FR_l_AbsX_m_Half = f80
830 FR_l_SignedXYH = f123
836 //=======================================================
837 // Negative part registers
839 // General Purpose Registers
843 // Float point registers
874 FR_n_SinxH = f113 // the same as FR_n_Poly1H
876 FR_n_SinxL = f114 // the same as FR_n_Poly1L
910 //=======================================================
913 // General Purpose Registers
931 GR_e_sig_inv_ln2 = r49
932 GR_e_rshf_2to51 = r50
933 GR_e_exp_2tom51 = r51
936 // Floating Point Registers
937 FR_e_RSHF_2TO51 = f10
938 FR_e_INV_LN2_2TO63 = f11
939 FR_e_W_2TO51_RSH = f12
969 FR_e_Wp1_T_scale = f58
971 FR_e_expl_Input_X = f123
972 FR_e_expl_Input_Y = f124
973 FR_e_expl_Output_X = f123
974 FR_e_expl_Output_Y = f124
977 FR_e_expl_Input_AbsX = f122
981 //=======================================================
984 // General Purpose Registers
986 GR_c_NegUnderflow = r54
987 GR_c_NegSingularity = r55
993 // Floating Point Registers
994 FR_c_PosOverflow = f123
998 //=======================================================
999 // Polynomial part registers
1001 // General Purpose Registers
1008 GR_p_X_Sgnd = GR_l_signif_Z // = r37
1013 // Floating Point Registers
1014 FR_p_AbsX = FR_l_AbsX // = f127
1015 FR_p_IXN = FR_n_IXN // = f126
1085 FR_p_OddPoly0H = f56
1086 FR_p_OddPoly0L = f51
1091 //=======================================================
1092 // Negative polynomial part registers
1093 // General Purpose Registers
1094 GR_r_sin_Table = r47
1095 GR_r_sin_Table2 = r60
1097 // Floating Point Registers
1098 FR_r_IXNS = FR_n_IXNS
1101 FR_r_AbsX = FR_l_AbsX
1173 // General Purpose Registers
1175 GR_DenOverflow = r33
1182 // Floating Point Registers
1186 // ERROR HANDLER REGISTERS
1187 GR_Parameter_X = r64
1188 GR_Parameter_Y = r65
1189 GR_Parameter_RESULT = r66
1190 GR_Parameter_TAG = r67
1198 GLOBAL_LIBM_ENTRY(tgammal)
1200 alloc r32 = ar.pfs,0,32,4,0
1201 fabs FR_l_AbsX = f8 // Get absolute value of X
1202 addl GR_n_sin_Table = @ltoff(Constants_Tgammal_sin), gp
1205 addl GR_l_Log_Table=@ltoff(Constants_Tgammal_log_80_Z_G_H_h1#),gp
1207 addl GR_l_Stirling_Table = @ltoff(Constants_Tgammal_stirling), gp
1211 getf.sig GR_l_signif_Z = f8 // Significand of X
1212 fcvt.fx.s1 FR_n_IXNS = f8 // Convert to fixed point
1213 addl GR_c_Table = @ltoff(Constants_Tgammal_common), gp
1216 ld8 GR_l_Log_Table = [GR_l_Log_Table]
1218 addl GR_p_Table = @ltoff(Constants_Tgammal_poly), gp
1222 ld8 GR_n_sin_Table = [GR_n_sin_Table]
1223 fclass.m p6,p0 = f8,0x1EF // Check x for NaN, 0, INF, denorm
1225 addl GR_c_NegSingularity = 0x1003E, r0
1228 ld8 GR_l_Stirling_Table = [GR_l_Stirling_Table]
1229 movl GR_c_13 = 0x402A000000000000 // 13.0
1233 getf.d GR_c_X = f8 // Double prec. X to general register
1234 frcpa.s1 FR_z_Y0,p0 = f1,f8 // y = frcpa(x) (for negatives)
1235 extr.u GR_l_Index1 = GR_l_signif_Z, 59, 4 // = High 4 bits of Z
1238 ld8 GR_c_Table = [GR_c_Table]
1239 movl GR_c_SignBit = 0x8000000000000000 // High bit (sign)
1243 ld8 GR_p_Table = [GR_p_Table]
1244 fcmp.lt.s1 p15, p14 = f8,f0 // p14 - positive arg, p15 - negative
1245 shl GR_l_Index1 = GR_l_Index1,5 // Adjust Index1 ptr (x32)
1248 adds GR_c_NegUnderflow = 1765, r0
1250 (p6) br.cond.spnt tgammal_spec // Spec. values processing branch ////////////
1251 // (0s, INFs, NANs, NatVals, denormals) //////
1255 ldfpd FR_l_CH,FR_l_CL= [GR_l_Stirling_Table], 16 // Load CH, CL
1256 fcvt.fx.trunc.s1 FR_n_IXN = FR_l_AbsX // Abs arg to int by trunc
1257 extr.u GR_l_X_0 = GR_l_signif_Z, 49, 15 // High 15 bit of Z
1260 add GR_l_Index1 = GR_l_Index1,GR_l_Log_Table // Add offset
1261 fma.s1 FR_p_2 = f1, f1, f1 // 2.0
1262 andcm GR_c_X = GR_c_X, GR_c_SignBit // Remove sign
1266 addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_Z_G_H_h2#), gp
1267 fcmp.lt.s1 p10, p0 = FR_l_AbsX, f1 // If |X|<1 then p10 = 1
1271 ld2 GR_l_Z_1 = [GR_l_Index1],4 // load Z_1 from Index1
1272 movl GR_l_BIAS = 0x000000000000FFFF // Bias for exponent
1276 ld8 GR_l_Log_Table = [GR_l_Log_Table]
1277 frcpa.s1 FR_l_Y0, p0 = f1, FR_l_AbsX // y = frcpa(x)
1281 ldfs FR_l_G_1 = [GR_l_Index1],4 // Load G_1
1282 fsub.s1 FR_l_W = FR_l_AbsX, f1 // W = |X|-1
1287 getf.exp GR_l_N_Unbiased= FR_l_AbsX // exponent of |X|
1288 fmerge.se FR_l_S = f1, FR_l_AbsX // S = merging of X and 1.0
1289 cmp.gtu p11, p0 = GR_c_13, GR_c_X // If 1 <= |X| < 13
1293 ldfs FR_l_H_1 = [GR_l_Index1],8 // Load H_1
1294 fcvt.xf FR_n_XNS = FR_n_IXNS // Convert to FP repr. of int X
1295 (p10) br.cond.spnt tgamma_lt_1 // Branch to |X| < 1 path ///////////////////
1299 ldfpd FR_n_A2H, FR_n_A2L = [GR_n_sin_Table], 16
1301 pmpyshr2.u GR_l_X_1 = GR_l_X_0,GR_l_Z_1,15 // Adjust Index2 (x32)
1304 ldfe FR_l_B2 = [GR_l_Stirling_Table], 16
1306 (p11) br.cond.spnt tgamma_lt_13 // Branch to 1 <= |X| < 13 path ///////////////
1310 ldfe FR_l_h_1 = [GR_l_Index1],0
1312 sub GR_l_N = GR_l_N_Unbiased, GR_l_BIAS // N - BIAS
1315 ldfpd FR_l_B4,FR_l_B6= [GR_l_Stirling_Table], 16 // Load C
1316 (p15) cmp.geu.unc p8,p0 = GR_l_N_Unbiased, GR_c_NegSingularity
1317 (p8) br.cond.spnt tgammal_singularity // Singularity for arg < to -2^63 //////
1321 (p15) ldfpd FR_n_A1H, FR_n_A1L = [GR_n_sin_Table], 16
1322 ldfpd FR_l_B8, FR_l_B10 = [GR_l_Stirling_Table], 16
1323 add GR_c_Table = 0x20, GR_c_Table
1327 (p15) ldfe FR_n_A9 = [GR_n_sin_Table], 16
1328 fma.s1 FR_l_Q0 = f1,FR_l_Y0,f0 // Q0 = Y0
1332 ldfpd FR_l_B12, FR_l_B14 = [GR_l_Stirling_Table], 16
1333 fnma.s1 FR_l_E0 = FR_l_Y0,FR_l_AbsX,f1 // e = 1-b*y
1338 (p15) ldfe FR_n_A8 = [GR_n_sin_Table], 16
1339 fcvt.xf FR_c_XN = FR_n_IXN // Convert to FP repr. of int X
1340 extr.u GR_l_Index2 = GR_l_X_1, 6, 4 // Extract Index2
1343 ldfpd FR_l_B16, FR_l_B18 = [GR_l_Stirling_Table], 16
1349 (p15) ldfe FR_n_A7 = [GR_n_sin_Table], 16
1350 fms.s1 FR_l_CXH = FR_l_CH, f1, FR_l_AbsX // CXH = CH+|X|
1351 shl GR_l_Index2 = GR_l_Index2,5
1354 ldfd FR_l_Half = [GR_l_Stirling_Table] // Load 0.5
1360 add GR_l_Index2 = GR_l_Index2, GR_l_Log_Table // Add offset
1365 (p15) ldfe FR_n_A6 = [GR_n_sin_Table], 16
1366 (p15) fma.s1 FR_n_XS = FR_l_AbsX , f1, FR_n_XNS // xs = x - int(x)
1371 ld2 GR_l_Z_2 = [GR_l_Index2],4
1372 addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_h3_G_H#),gp
1377 ld8 GR_l_Log_Table = [GR_l_Log_Table]
1378 fma.s1 FR_l_E2 = FR_l_E0,FR_l_E0,FR_l_E0 // e2 = e+e^2
1382 ldfs FR_l_G_2 = [GR_l_Index2],4
1383 fma.s1 FR_l_E1 = FR_l_E0,FR_l_E0,f0 // e1 = e^2
1388 ldfs FR_l_H_2 = [GR_l_Index2],8
1389 (p15) ldfe FR_n_A5 = [GR_n_sin_Table], 16
1394 setf.sig FR_l_float_N = GR_l_N // float_N = Make N a fp number
1396 pmpyshr2.u GR_l_X_2 = GR_l_X_1,GR_l_Z_2,15 // X_2 = X_1 * Z_2
1399 ldfe FR_l_h_2 = [GR_l_Index2],0
1400 fma.s1 FR_l_CXL = FR_l_AbsX, f1, FR_l_CXH // CXL = |X|+CXH
1401 add GR_l_Log_Table1= 0x200, GR_l_Log_Table
1405 (p15) ldfe FR_n_A4 = [GR_n_sin_Table], 16
1406 (p15) fcmp.eq.unc.s1 p9,p0 = FR_l_AbsX, FR_c_XN //if argument is integer
1411 ldfe FR_c_PosOverflow = [GR_c_Table],16 //Load pos overflow value
1412 (p15) fma.s1 FR_n_XS2 = FR_n_XS, FR_n_XS, f0 // xs^2 = xs*xs
1417 (p15) ldfe FR_n_A3 = [GR_n_sin_Table], 16
1423 (p15) getf.sig GR_n_XN = FR_n_IXN // int(x) to general reg
1424 fma.s1 FR_l_Y1 = FR_l_Y0,FR_l_E2,FR_l_Y0 // y1 = y+y*e2
1429 fma.s1 FR_l_E3 = FR_l_E1,FR_l_E1,FR_l_E0 // e3 = e+e1^2
1430 (p9) br.cond.spnt tgammal_singularity // Singularity for integer /////////////
1431 // and negative arguments //////////////
1436 fms.s1 FR_l_AbsX_m_Half = FR_l_AbsX, f1, FR_l_Half // |x|-0.5
1437 extr.u GR_l_Index2 = GR_l_X_2, 1, 5 // Get Index3
1441 shladd GR_l_Log_Table1= GR_l_Index2, 2, GR_l_Log_Table1
1443 shladd GR_l_Index3 = GR_l_Index2,4, GR_l_Log_Table // Index3
1446 (p15) cmp.gtu.unc p11, p0 = GR_n_XN, GR_c_NegUnderflow // X < -1765
1447 fms.s1 FR_l_CXL = FR_l_CH, f1, FR_l_CXL // CXL = CH - CXL
1448 (p11) br.cond.spnt tgammal_underflow // Singularity for negative argument //////
1449 // at underflow domain (X < -1765) //////
1453 addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_Q#), gp
1454 (p15) fma.s1 FR_n_TT = FR_n_A2L, FR_n_XS2, f0 // T=A2L*x^2
1455 tbit.nz.unc p13, p12 = GR_n_XN, 0x0 // whether [X] odd or even
1459 (p15) fms.s1 FR_n_XS2L = FR_n_XS, FR_n_XS, FR_n_XS2 // xs^2 Low part
1464 ld8 GR_l_Log_Table = [GR_l_Log_Table]
1465 (p15) fma.s1 FR_n_A7 = FR_n_A8, FR_n_XS2, FR_n_A7 // poly tail
1469 ldfe FR_l_h_3 = [GR_l_Index3],12
1470 (p15) fma.s1 FR_n_XS4 = FR_n_XS2, FR_n_XS2, f0 // xs^4 = xs^2*xs^2
1475 ldfs FR_l_H_3 = [GR_l_Log_Table1], 0
1476 fma.s1 FR_l_Y2 = FR_l_Y1, FR_l_E3, FR_l_Y0 // y2 = y+y1*e3
1480 ldfs FR_l_G_3 = [GR_l_Index3], 0
1481 fnma.s1 FR_l_Z = FR_l_AbsX,FR_l_Q0,f1 // r = a-b*q
1487 fmpy.s1 FR_l_G = FR_l_G_1, FR_l_G_2 // G = G1 * G_2
1492 fadd.s1 FR_l_H = FR_l_H_1, FR_l_H_2 // H = H_1 + H_2
1497 ldfe FR_l_log2_hi = [GR_l_Log_Table],16 // load log2_hi part
1498 fadd.s1 FR_l_h = FR_l_h_1, FR_l_h_2 // h = h_1 + h_2
1503 fcvt.xf FR_l_float_N = FR_l_float_N // int(N)
1508 ldfe FR_l_log2_lo = [GR_l_Log_Table],16 // Load log2_lo part
1509 fma.s1 FR_l_CXL = FR_l_CXL, f1, FR_l_CL
1514 (p15) fma.s1 FR_n_TT = FR_n_A2H, FR_n_XS2L, FR_n_TT // T=A2H*x2L+T
1519 ldfe FR_l_Q_6 = [GR_l_Log_Table],16
1520 (p15) fma.s1 FR_n_A3 = FR_n_A4, FR_n_XS2, FR_n_A3 // poly tail
1525 (p15) fma.s1 FR_n_A5 = FR_n_A6, FR_n_XS2, FR_n_A5 // poly tail
1530 ldfe FR_l_Q_5 = [GR_l_Log_Table],16
1531 (p15) fabs FR_n_XS = FR_n_XS // abs(xs)
1536 fma.s1 FR_l_Z = FR_l_Z,FR_l_Y2,FR_l_Q0 // x_hi = q+r*y2
1541 ldfe FR_l_Q_4 = [GR_l_Log_Table],16
1542 (p15) fma.s1 FR_n_A7 = FR_n_A9, FR_n_XS4, FR_n_A7 // poly tail
1547 (p15) fma.s1 FR_n_XS7 = FR_n_XS4, FR_n_XS2, f0 // = x^4*x^2
1552 ldfe FR_l_Q_3 = [GR_l_Log_Table],16
1553 fneg FR_n_NegOne = f1 // -1.0
1558 (p15) fma.s1 FR_n_XS8 = FR_n_XS4, FR_n_XS4, f0 // xs^8 = xs^4*xs^4
1563 ldfe FR_l_Q_2 = [GR_l_Log_Table],16
1564 fadd.s1 FR_l_h = FR_l_h, FR_l_h_3 // h = h_1 + h_2 + h_3
1569 (p15) fma.s1 FR_n_TH = FR_n_A2H, FR_n_XS2, FR_n_TT // A2H*xs2+T
1574 ldfe FR_l_Q_1 = [GR_l_Log_Table],16
1575 fmpy.s1 FR_l_G = FR_l_G, FR_l_G_3 // G = G_1 * G_2 * G_3
1580 fadd.s1 FR_l_H = FR_l_H, FR_l_H_3 // H = H_1 + H_2 + H_3
1586 fma.s1 FR_l_Z2 = FR_l_Z, FR_l_Z, f0 // Z^2
1591 (p15) fma.s1 FR_n_A3 = FR_n_A5, FR_n_XS4, FR_n_A3 // poly tail
1597 (p14) fcmp.gt.unc.s1 p7,p0 = FR_l_AbsX, FR_c_PosOverflow //X > 1755.5483
1598 // (overflow domain, result cannot be represented by normal value)
1603 (p15) fma.s1 FR_n_XS7 = FR_n_XS7, FR_n_XS, f0 // x^7 construction
1609 (p15) fms.s1 FR_n_TL = FR_n_A2H, FR_n_XS2, FR_n_TH // A2H*xs2+TH
1614 (p15) fma.s1 FR_n_PolyH = FR_n_TH, f1, FR_n_A1H // PolyH=TH+A1H
1620 fmpy.s1 FR_l_GS_hi = FR_l_G, FR_l_S // GS_hi = G*S
1625 fms.s1 FR_l_r = FR_l_G, FR_l_S, f1 // r = G*S -1
1626 (p7) br.cond.spnt tgammal_overflow // Overflow path for arg > 1755.5483 //////
1631 fma.s1 FR_l_B14 = FR_l_B16, FR_l_Z2, FR_l_B14// Bernoulli tail
1636 fma.s1 FR_l_Z4 = FR_l_Z2, FR_l_Z2, f0 // Z^4 = Z^2*Z^2
1642 fma.s1 FR_l_B2 = FR_l_B4, FR_l_Z2, FR_l_B2 // Bernoulli tail
1647 fma.s1 FR_l_B6 = FR_l_B8, FR_l_Z2, FR_l_B6 // Bernoulli tail
1653 fma.s1 FR_l_B10 = FR_l_B12, FR_l_Z2, FR_l_B10// Bernoulli tail
1658 (p15) fma.s1 FR_n_Tail = FR_n_A7, FR_n_XS8, FR_n_A3 // poly tail
1664 (p15) fma.s1 FR_n_TL = FR_n_TL, f1, FR_n_TT // TL = TL+T
1669 (p15) fms.s1 FR_n_PolyL = FR_n_A1H, f1, FR_n_PolyH // polyH+A1H
1675 fma.s1 FR_l_poly_lo = FR_l_r, FR_l_Q_6, FR_l_Q_5 // Q_5+r*Q_6
1680 fsub.s1 FR_l_r_cor = FR_l_GS_hi, f1 // r_cor = GS_hi -1
1686 fms.s1 FR_l_GS_lo = FR_l_G, FR_l_S, FR_l_GS_hi // G*S-GS_hi
1691 fma.s1 FR_l_poly = FR_l_r, FR_l_Q_2, FR_l_Q_1 //poly=r*Q2+Q1
1697 fmpy.s1 FR_l_rsq = FR_l_r, FR_l_r // rsq = r * r
1702 fma.s1 FR_l_G = FR_l_float_N, FR_l_log2_hi, FR_l_H // Tbl =
1703 // float_N*log2_hi + H
1709 fma.s1 FR_l_Y_lo = FR_l_float_N, FR_l_log2_lo, FR_l_h // Y_lo=
1710 // float_N*log2_lo + h
1715 fma.s1 FR_l_B14 = FR_l_B18, FR_l_Z4, FR_l_B14 //bernulli tail
1721 fma.s1 FR_l_B2 = FR_l_B6, FR_l_Z4, FR_l_B2 //bernulli tail
1726 fma.s1 FR_l_Z8 = FR_l_Z4, FR_l_Z4, f0 //bernulli tail
1732 fma.s1 FR_l_poly_lo = FR_l_r, FR_l_poly_lo, FR_l_Q_4 // poly_lo =
1733 // Q_4 + r * poly_lo
1738 fsub.s1 FR_l_r_cor = FR_l_r_cor, FR_l_r // r_cor = r_cor - r
1744 (p15) fma.s1 FR_n_PolyL = FR_n_PolyL, f1, FR_n_TH // polyL+TH
1749 (p15) fma.s1 FR_n_TT = FR_n_TL, f1, FR_n_A1L // TL+A1L
1755 fadd.s1 FR_l_logl_YHi = FR_l_G, FR_l_r // Y_hi = Tbl + r
1761 fma.s1 FR_l_B10 = FR_l_B14, FR_l_Z4, FR_l_B10 //bernulli tail
1767 fma.s1 FR_l_poly_lo = FR_l_r, FR_l_poly_lo, FR_l_Q_3 // poly_lo =
1768 // Q_3 + r * poly_lo
1773 fadd.s1 FR_l_r_cor = FR_l_r_cor, FR_l_GS_lo // r_cor=r_cor+GS_lo
1779 (p15) fma.s1 FR_n_PolyL = FR_n_PolyL, f1, FR_n_TT // polyL+TT
1785 fsub.s1 FR_l_Y_lo_res = FR_l_G, FR_l_logl_YHi // Y_lo = Tbl - Y_hi
1790 fma.s1 FR_l_XYH = FR_l_logl_YHi, FR_l_AbsX_m_Half, f0 // XYH=
1797 fma.s1 FR_l_SS = FR_l_B10, FR_l_Z8, FR_l_B2 // Bernoulli tail
1803 fadd.s1 FR_l_r_cor = FR_l_r_cor, FR_l_Y_lo // r_cor = r_cor+Y_lo
1808 fma.s1 FR_l_poly = FR_l_rsq, FR_l_poly_lo, FR_l_poly //poly=
1815 (p15) fma.s1 FR_n_TT = FR_n_PolyL, FR_n_XS2, f0 // T=polyL*xs^2
1821 fadd.s1 FR_l_Y_lo = FR_l_Y_lo_res, FR_l_r // Y_lo = Y_lo + r
1826 fms.s1 FR_l_XYL = FR_l_logl_YHi, FR_l_AbsX_m_Half, FR_l_XYH
1827 // XYL = YHi*|x-0.5|-XYH
1833 fma.s1 FR_l_SSCXH = FR_l_SS, FR_l_Z, FR_l_CXH // SS*Z+CXH
1837 mov GR_e_exp_2tom51= 0xffff-51 // 2^-51
1838 (p15) fma.s1 FR_l_SignedXYH = FR_l_XYH, FR_n_NegOne, f0 // XYH = -XYH
1845 movl GR_e_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
1849 movl GR_e_sig_inv_ln2 = 0xb8aa3b295c17f0bc //significand of 1/ln2
1854 fma.s1 FR_l_poly = FR_l_rsq, FR_l_poly, FR_l_r_cor // poly =
1855 // rsq * poly + r_cor
1860 addl GR_e_ad_Arg = @ltoff(Constants_Tgammal_exp_64_Arg#),gp
1861 (p15) fma.s1 FR_n_TT = FR_n_PolyH, FR_n_XS2L, FR_n_TT
1862 mov GR_e_exp_mask = 0x1FFFF // Form exponent mask
1866 movl GR_e_rshf = 0x43e8000000000000 // 1.10000 2^63 rshift
1871 setf.sig FR_e_INV_LN2_2TO63 = GR_e_sig_inv_ln2 // form 1/ln2 * 2^63
1872 setf.d FR_e_RSHF_2TO51 = GR_e_rshf_2to51 // 1.1000 * 2^(63+51)
1878 fms.s1 FR_l_SSCXL = FR_l_CXH, f1, FR_l_SSCXH // CXH+SS*CXH
1883 fma.s1 FR_e_expl_Input_AbsX = FR_l_XYH, f1, FR_l_SSCXH // HI EXP
1887 .pred.rel "mutex",p14,p15
1890 (p14) fma.s1 FR_e_expl_Input_X = FR_l_XYH, f1, FR_l_SSCXH // HI EXP
1891 mov GR_e_exp_bias = 0x0FFFF // Set exponent bias
1894 ld8 GR_e_ad_Arg = [GR_e_ad_Arg] // Point to Arg table
1895 (p15) fms.s1 FR_e_expl_Input_X = FR_l_SignedXYH, f1, FR_l_SSCXH // HI EXP
1901 fadd.s1 FR_l_logl_YLo = FR_l_Y_lo, FR_l_poly // YLo = YLo+poly
1906 setf.exp FR_e_2TOM51 = GR_e_exp_2tom51 //2^-51 for scaling float_N
1907 (p15) fma.s1 FR_n_TH = FR_n_PolyH, FR_n_XS2, FR_n_TT // TH=
1912 setf.d FR_e_RSHF = GR_e_rshf // Right shift const 1.1000*2^63
1918 add GR_e_ad_A = 0x20, GR_e_ad_Arg // Point to A table
1920 add GR_e_ad_T1 = 0x50, GR_e_ad_Arg // Point to T1 table
1923 add GR_e_ad_T2 = 0x150, GR_e_ad_Arg // Point to T2 table
1930 fma.s1 FR_l_SSCXL = FR_l_SS, FR_l_Z, FR_l_SSCXL
1935 fms.s1 FR_e_expl_Input_Y = FR_l_XYH, f1, FR_e_expl_Input_AbsX
1940 ldfe FR_e_L_hi = [GR_e_ad_Arg],16 // Get L_hi
1947 fma.s1 FR_l_XYL = FR_l_logl_YLo, FR_l_AbsX_m_Half, FR_l_XYL
1948 // XYL = YLo*|x-0.5|+XYL
1953 ldfe FR_e_L_lo = [GR_e_ad_Arg],16 // Get L_lo
1954 (p15) fms.s1 FR_n_TL = FR_n_PolyH, FR_n_XS2, FR_n_TH // TL =
1956 add GR_e_ad_W1 = 0x100, GR_e_ad_T2 // Point to W1 table
1960 (p15) fma.s1 FR_n_Poly1H = FR_n_TH, f1, f1 // poly1H = TH+1
1961 add GR_e_ad_W2 = 0x300, GR_e_ad_T2 // Point to W2 table
1965 getf.exp GR_e_signexp_x = FR_e_expl_Input_X // Extract sign and exp
1966 ldfe FR_e_A3 = [GR_e_ad_A],16 // Get A3
1972 fma.s1 FR_l_SSCXL = FR_l_SSCXL, f1, FR_l_CXL
1977 fma.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_SSCXH
1983 fma.s1 FR_e_N_signif=FR_e_expl_Input_X,FR_e_INV_LN2_2TO63,FR_e_RSHF_2TO51
1984 and GR_e_exp_x = GR_e_signexp_x, GR_e_exp_mask
1988 sub GR_e_exp_x = GR_e_exp_x, GR_e_exp_bias // Get exponent
1989 ldfe FR_e_A2 = [GR_e_ad_A],16 // Get A2 for main path
1995 (p15) fma.s1 FR_n_PolyH = FR_n_Poly1H, FR_n_XS, f0//sin(Pi*x) poly
2000 (p15) fms.s1 FR_n_Poly1L = f1, f1, FR_n_Poly1H//sin(Pi*x) poly
2006 (p15) fma.s1 FR_n_TL = FR_n_TL, f1, FR_n_TT//sin(Pi*x) poly
2012 fma.s1 FR_l_Temp = FR_l_XYL, f1, FR_l_SSCXL // XYL+SS*CXL
2017 (p15) fma.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, FR_n_NegOne, f0
2018 // Negate lo part of exp argument for negative input values
2023 ldfe FR_e_A1 = [GR_e_ad_A],16 // Get A1
2029 fms.s1 FR_e_float_N = FR_e_N_signif, FR_e_2TOM51, FR_e_RSHF
2030 // Get float N = signd*2^51-RSHIFTER
2036 (p15) fma.s1 FR_n_Poly1L = FR_n_Poly1L, f1, FR_n_TH //sin(Pi*x) poly
2041 (p15) fms.s1 FR_n_PolyL = FR_n_Poly1H, FR_n_XS, FR_n_PolyH//sin(Pi*x)
2046 getf.sig GR_e_N_fix = FR_e_N_signif // Get N from significand
2051 .pred.rel "mutex",p14,p15
2054 (p14) fma.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_Temp
2059 (p15) fms.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_Temp
2060 // arguments for exp computation
2066 fnma.s1 FR_e_r = FR_e_L_hi, FR_e_float_N, FR_e_expl_Input_X
2067 // r = -L_hi * float_N + x
2068 extr.u GR_e_M1 = GR_e_N_fix, 6, 6 // Extract index M_1
2073 (p15) fma.s1 FR_n_Poly1L = FR_n_Poly1L, f1, FR_n_TL //sin(Pi*x) poly
2081 fma.s1 FR_e_r = FR_e_r, f1, FR_e_expl_Input_Y
2082 // r = r + FR_e_expl_Input_Y
2086 shladd GR_e_ad_W1 = GR_e_M1,3,GR_e_ad_W1 // Point to W1
2087 shladd GR_e_ad_T1 = GR_e_M1,2,GR_e_ad_T1 // Point to T1
2088 extr.u GR_e_M2 = GR_e_N_fix, 0, 6 // Extract index M_2
2093 ldfs FR_e_T1 = [GR_e_ad_T1],0 // Get T1
2095 extr GR_e_K = GR_e_N_fix, 12, 32 //Extract limit range K
2098 shladd GR_e_ad_T2 = GR_e_M2,2,GR_e_ad_T2 // Point to T2
2099 (p15) fma.s1 FR_n_PolyL = FR_n_Poly1L, FR_n_XS, FR_n_PolyL
2101 shladd GR_e_ad_W2 = GR_e_M2,3,GR_e_ad_W2 // Point to W2
2105 ldfs FR_e_T2 = [GR_e_ad_T2],0 // Get T2
2107 add GR_e_exp_2_k = GR_e_exp_bias, GR_e_K // exp of 2^k
2110 ldfd FR_e_W1 = [GR_e_ad_W1],0 // Get W1
2112 sub GR_e_exp_2_mk = GR_e_exp_bias, GR_e_K // exp of 2^-k
2116 ldfd FR_e_W2 = [GR_e_ad_W2],0 // Get W2
2122 setf.exp FR_e_scale = GR_e_exp_2_k // Set scale = 2^k
2123 setf.exp FR_e_2_mk = GR_e_exp_2_mk // Form 2^-k
2124 fnma.s1 FR_e_r = FR_e_L_lo, FR_e_float_N, FR_e_r
2125 // r = -L_lo * float_N + r
2130 (p15) fma.s1 FR_n_PolyL = FR_n_Tail, FR_n_XS7, FR_n_PolyL
2137 fma.s1 FR_e_poly = FR_e_r, FR_e_A3, FR_e_A2 // poly=r*A3+A2
2142 fmpy.s1 FR_e_rsq = FR_e_r, FR_e_r // rsq = r * r
2148 fmpy.s1 FR_e_T = FR_e_T1, FR_e_T2 // T = T1 * T2
2153 fadd.s1 FR_e_W1_p1 = FR_e_W1, f1 // W1_p1 = W1 + 1.0
2159 (p15) fma.s1 FR_n_TT = FR_n_PolyL, FR_l_AbsX, f0 //sin(Pi*x) poly
2165 fma.s1 FR_e_poly = FR_e_r, FR_e_poly, FR_e_A1
2166 // poly = r * poly + A1
2172 fma.s1 FR_e_T_scale = FR_e_T, FR_e_scale, f0 // T_scale=T*scale
2177 fma.s1 FR_e_W = FR_e_W2, FR_e_W1_p1, FR_e_W1
2178 // W = W2 * (W1+1.0) + W1
2184 (p15) fma.s1 FR_n_SinxH = FR_n_PolyH, FR_l_AbsX, FR_n_TT
2191 mov FR_e_Y_hi = FR_e_T // Assume Y_hi = T
2197 fma.s1 FR_e_poly = FR_e_rsq, FR_e_poly, FR_e_r
2198 // poly = rsq * poly + r
2204 fma.s1 FR_e_Wp1_T_scale = FR_e_W, FR_e_T_scale, FR_e_T_scale
2210 fma.s1 FR_e_W_T_scale = FR_e_W, FR_e_T_scale, f0 // W*T*scale
2216 (p15) fms.s1 FR_n_SinxL = FR_n_PolyH, FR_l_AbsX, FR_n_SinxH
2223 (p15) frcpa.s1 FR_n_Y0, p0 = f1, FR_n_SinxH // y = frcpa(b)
2229 fma.s1 FR_e_result_lo = FR_e_Wp1_T_scale, FR_e_poly, FR_e_W_T_scale
2230 // Low part of exp result
2236 (p15) fma.s1 FR_n_SinxL = FR_n_SinxL, f1, FR_n_TT // sin low result
2242 (p15) fma.s1 FR_n_Q0 = f1,FR_n_Y0,f0 // q = y
2247 (p15) fnma.s1 FR_n_E0 = FR_n_Y0, FR_n_SinxH, f1 // e = 1-b*y
2254 (p14) fma.s0 f8 = FR_e_Y_hi, FR_e_scale, FR_e_result_lo
2255 (p14) br.ret.spnt b0 // Exit for positive Stirling path //////////////////////
2260 fma.s1 FR_e_expl_Output_X = FR_e_Y_hi, FR_e_scale, f0 // exp result
2265 fma.s1 FR_e_expl_Output_Y = FR_e_result_lo, f1, f0// exp lo result
2271 fma.s1 FR_n_E2 = FR_n_E0,FR_n_E0,FR_n_E0 // e2 = e+e^2
2276 fma.s1 FR_n_E1 = FR_n_E0,FR_n_E0,f0 // e1 = e^2
2282 fma.s1 FR_n_Y1 = FR_n_Y0,FR_n_E2,FR_n_Y0 // y1 = y+y*e2
2287 fma.s1 FR_n_E3 = FR_n_E1,FR_n_E1,FR_n_E0 // e3 = e+e1^2
2293 fma.s1 FR_n_Y2 = FR_n_Y1,FR_n_E3,FR_n_Y0 // y2 = y+y1*e3
2298 fnma.s1 FR_n_R0 = FR_n_SinxH,FR_n_Q0,f1 // r = a-b*q
2304 fnma.s1 FR_n_E4 = FR_n_SinxH,FR_n_Y2,f1 // e4 = 1-b*y2
2309 fma.s1 FR_n_RcpResH = FR_n_R0,FR_n_Y2,FR_n_Q0 // x = q+r*y2
2315 fma.s1 FR_n_Y3 = FR_n_Y2,FR_n_E4,FR_n_Y2 // y3 = y2+y2*e4
2320 fnma.s1 FR_n_R1 = FR_n_SinxH,FR_n_RcpResH,f1 // r1 = a-b*x
2326 fnma.s1 FR_n_R1 = FR_n_SinxL,FR_n_RcpResH,FR_n_R1
2333 fma.s1 FR_n_RcpResL = FR_n_R1,FR_n_Y3,f0 // x_lo = r1*y3
2338 fma.s1 FR_n_Temp = FR_n_RcpResH, FR_e_expl_Output_Y, f0
2339 // Multiplying exp and sin result
2345 fma.s1 FR_n_Temp = FR_n_RcpResL, FR_e_expl_Output_X, FR_n_Temp
2346 // Multiplying exp and sin result
2352 fma.s1 FR_n_ResH = FR_n_RcpResH, FR_e_expl_Output_X, FR_n_Temp
2353 // Multiplying exp and sin result
2359 fms.s1 FR_n_ResL = FR_n_RcpResH, FR_e_expl_Output_X, FR_n_ResH
2360 // Multiplying exp and sin result
2365 (p12) fma.s1 FR_n_ResH = FR_n_ResH, FR_n_NegOne, f0 // Negate
2371 fma.s1 FR_n_ResL = FR_n_ResL, f1, FR_n_Temp
2372 // Multiplying exp and sin result - low result obtained
2376 .pred.rel "mutex",p12,p13
2379 (p13) fma.s0 f8 = FR_n_ResH, f1, FR_n_ResL // For odd
2384 (p12) fms.s0 f8 = FR_n_ResH, f1, FR_n_ResL // For even
2385 br.ret.sptk b0 // Exit for negative Stirling path //////////////////////
2389 //////////// 1 <= |X| < 13 path ////////////////////////////////////////////////
2390 //------------------------------------------------------------------------------
2394 getf.sig GR_p_XN = FR_p_IXN // Get significand
2395 fcvt.xf FR_p_XN = FR_p_IXN // xn = [x]
2396 add GR_r_sin_Table2= 0x40, GR_r_sin_Table // Shifted table addr.
2399 ldfpd FR_p_0p5, FR_p_1p5 = [GR_c_Table], 16 // 0.5 & 1.5
2400 fms.s1 FR_p_AbsXM1 = FR_p_AbsX, f1, f1 // X-1
2401 add GR_p_Table2 = 0xB0, GR_p_Table
2405 add GR_r_sin_Table = -16, GR_r_sin_Table // For compensation
2406 fcvt.xf FR_r_XNS = FR_r_IXNS // Convert int repr to float
2407 shr.u GR_p_X_Sgnd = GR_p_X_Sgnd, 59 // Get only 5 bit of signd
2411 ldfpd FR_r_A2H,FR_r_A2L = [GR_r_sin_Table], 16 // Load A2
2413 add GR_p_Int = -2, GR_p_XN // int = int - 2
2416 ldfe FR_r_A6 = [GR_r_sin_Table2], 16
2418 cmp.gtu p11, p12 = 0x2, GR_p_XN // p11: x < 2 (splitted intervals),
2419 // p12: x > 2 (base intervals)
2423 ldfpd FR_r_A1H, FR_r_A1L = [GR_r_sin_Table], 16
2425 shr GR_p_Int = GR_p_Int, 1 // int/2
2428 ldfe FR_r_A5 = [GR_r_sin_Table2], 16
2430 (p11) cmp.gtu.unc p10, p11 = 0x1C, GR_p_X_Sgnd // sgnd(x) < 0.75
2434 ldfe FR_r_A9 = [GR_r_sin_Table], 16
2436 shl GR_p_Offset = GR_p_Int, 4 // offset = int*16
2439 ldfe FR_r_A4 = [GR_r_sin_Table2], 16
2441 (p10) cmp.gtu.unc p9, p10 = 0x14, GR_p_X_Sgnd // sgnd(x) < 0.25
2446 ldfe FR_r_A8 = [GR_r_sin_Table], 16
2448 (p12) tbit.nz.unc p13, p12 = GR_p_XN, 0x0 // p13: reccurent computations
2449 // X is at [3;4], [5;6], [7;8]... interval
2452 ldfe FR_r_A3 = [GR_r_sin_Table2], 16
2454 shladd GR_p_Offset = GR_p_Int, 2, GR_p_Offset // +int*4
2457 .pred.rel "mutex",p9,p11
2459 add GR_p_Offset = GR_p_Int, GR_p_Offset
2460 // +int, so offset = int*21
2461 (p9) fms.s1 FR_p_XR = FR_p_AbsX, f1, f1 // r = x-1
2465 ldfe FR_r_A7 = [GR_r_sin_Table], 16
2466 (p11) fms.s1 FR_p_XR = FR_p_2, f1, FR_p_AbsX
2467 // r = 2-x for 1.75 < x < 2
2471 .pred.rel "mutex",p9,p10
2472 .pred.rel "mutex",p10,p11
2473 .pred.rel "mutex",p9,p11
2475 (p9) add GR_p_Offset = 126, r0 // 1.0 < x < 1.25 table
2476 (p15) fcmp.eq.unc.s1 p7,p0 = FR_p_AbsX, FR_p_XN
2477 // If arg is integer and negative - singularity branch
2481 (p10) add GR_p_Offset = 147, r0 // 1.25 < x < 1.75 table
2483 (p11) add GR_p_Offset = 168, r0 // 1.75 < x < 2.0 table
2487 shladd GR_p_Table = GR_p_Offset, 4, GR_p_Table
2488 shladd GR_p_Table2 = GR_p_Offset, 4, GR_p_Table2
2489 fma.s1 FR_r_XS = FR_r_AbsX , f1, FR_r_XNS // xs = x - [x]
2493 ldfpd FR_p_A5H, FR_p_A5L = [GR_p_Table], 16
2494 ldfpd FR_p_A2H, FR_p_A2L = [GR_p_Table2], 16
2495 (p7) br.cond.spnt tgammal_singularity // Singularity for integer /////////////
2496 // and negative argument ///////////////
2500 ldfpd FR_p_A4H, FR_p_A4L = [GR_p_Table], 16
2501 fma.s1 FR_p_XN = FR_p_XN, f1, FR_p_0p5 // xn = xn+0.5
2505 ldfpd FR_p_A1H, FR_p_A1L = [GR_p_Table2], 16
2506 (p10) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_1p5 // r = x - 1.5
2511 ldfpd FR_p_A3H, FR_p_A3L = [GR_p_Table], 16
2512 ldfpd FR_p_A0H, FR_p_A0L = [GR_p_Table2], 16
2517 ldfe FR_p_A20 = [GR_p_Table], 16
2518 ldfe FR_p_A12 = [GR_p_Table2], 16
2523 ldfe FR_p_A19 = [GR_p_Table], 16
2524 ldfe FR_p_A11 = [GR_p_Table2], 16
2525 fma.s1 FR_r_XS2 = FR_r_XS, FR_r_XS, f0 // xs2 = xs*xs
2529 ldfe FR_p_A18 = [GR_p_Table], 16
2530 ldfe FR_p_A10 = [GR_p_Table2], 16
2534 .pred.rel "mutex",p12,p13
2536 ldfe FR_p_A17 = [GR_p_Table], 16
2537 (p12) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_XN // r = x - xn
2541 ldfe FR_p_A9 = [GR_p_Table2], 16
2542 (p13) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_XN
2547 ldfe FR_p_A16 = [GR_p_Table], 16
2548 ldfe FR_p_A8 = [GR_p_Table2], 16
2549 (p9) cmp.eq p12, p0 = r0, r0 // clear p12
2553 ldfe FR_p_A15 = [GR_p_Table], 16
2554 ldfe FR_p_A7 = [GR_p_Table2], 16
2555 (p10) cmp.eq p12, p0 = r0, r0 // clear p12
2559 ldfe FR_p_A14 = [GR_p_Table], 16
2560 fma.s1 FR_r_TH = FR_r_A2H, FR_r_XS2, f0 // sin for neg
2561 (p11) cmp.eq p12, p0 = r0, r0 // clear p12
2564 ldfe FR_p_A6 = [GR_p_Table2], 16
2565 fma.s1 FR_r_TL = FR_r_A2L, FR_r_XS2, f0 // sin for neg
2570 ldfe FR_p_A13 = [GR_p_Table], 16
2571 fms.s1 FR_r_XS2L = FR_r_XS, FR_r_XS, FR_r_XS2 // x2Lo part
2577 fma.s1 FR_p_Temp5H = FR_p_A5H, FR_p_XR, f0 // A5H*r
2583 fma.s1 FR_p_XR2 = FR_p_XR, FR_p_XR, f0 // r^2 = r*r
2589 fabs FR_r_XS = FR_r_XS // abs(xs)
2594 fma.s1 FR_p_Temp2H = FR_p_A2H, FR_p_XR, f0 // A2H*r
2601 fms.s1 FR_r_TT = FR_r_A2H, FR_r_XS2, FR_r_TH // sin for neg
2606 fma.s1 FR_r_ResH = FR_r_TH, f1, FR_r_A1H // sin for neg
2612 fma.s1 FR_r_TL = FR_r_A2H, FR_r_XS2L, FR_r_TL // sin for neg
2618 fms.s1 FR_p_Temp5L = FR_p_A5H,FR_p_XR,FR_p_Temp5H //A5H*r delta
2624 fma.s1 FR_p_Poly5H = FR_p_Temp5H, f1, FR_p_A4H // A5H*r+A4H
2631 fms.s1 FR_p_Temp2L = FR_p_A2H, FR_p_XR, FR_p_Temp2H//A2H*r delta
2637 fma.s1 FR_p_Poly2H = FR_p_Temp2H, f1, FR_p_A1H // A2H*r+A1H
2644 fma.s1 FR_p_XR3 = FR_p_XR2, FR_p_XR, f0 // r^3 = r^2*r
2649 fms.s1 FR_p_XR2L = FR_p_XR, FR_p_XR, FR_p_XR2 // r^2 delta
2655 fma.s1 FR_p_A18 = FR_p_A19, FR_p_XR, FR_p_A18 // Poly tail
2660 fma.s1 FR_p_A14 = FR_p_A15, FR_p_XR, FR_p_A14 // Poly tail
2666 fma.s1 FR_p_XR4 = FR_p_XR2, FR_p_XR2, f0 // r^4 = r^2*r^2
2672 fma.s1 FR_p_Temp5L = FR_p_A5L, FR_p_XR, FR_p_Temp5L// Low part
2678 fms.s1 FR_p_Poly5L = FR_p_A4H, f1, FR_p_Poly5H // Low part
2685 fma.s1 FR_p_Temp4H = FR_p_Poly5H, FR_p_XR, f0 // (A5H*r+A4H)*r
2690 fma.s1 FR_p_Temp2L = FR_p_A2L, FR_p_XR, FR_p_Temp2L // A2*r low
2696 fms.s1 FR_p_Poly2L = FR_p_A1H, f1, FR_p_Poly2H // High poly
2701 fma.s1 FR_p_Temp1H = FR_p_Poly2H, FR_p_XR, f0 // High poly
2707 fms.s1 FR_p_XR3L = FR_p_XR2, FR_p_XR, FR_p_XR3 // x^3 delta
2712 fma.s1 FR_p_A16 = FR_p_A17, FR_p_XR, FR_p_A16 // Poly tail
2718 fms.s1 FR_r_ResL = FR_r_A1H, f1, FR_r_ResH // sin for neg
2723 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // sin for neg
2729 fma.s1 FR_p_Temp5L = FR_p_Temp5L, f1, FR_p_A4L // Low poly
2734 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5H // Low poly
2740 fms.s1 FR_p_Temp4L = FR_p_Poly5H,FR_p_XR,FR_p_Temp4H //Low poly
2745 fma.s1 FR_p_Poly4H = FR_p_Temp4H, f1, FR_p_A3H // Low poly
2751 fma.s1 FR_p_Temp2L = FR_p_Temp2L, f1, FR_p_A1L // High poly
2756 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2H // High poly
2762 fms.s1 FR_p_Temp1L = FR_p_Poly2H,FR_p_XR,FR_p_Temp1H //High poly
2767 fma.s1 FR_p_Poly1H = FR_p_Temp1H, f1, FR_p_A0H // High poly
2773 fma.s1 FR_p_A12 = FR_p_A13, FR_p_XR, FR_p_A12 // Poly tail
2778 fma.s1 FR_p_XR3L = FR_p_XR2L, FR_p_XR, FR_p_XR3L // x^3 low
2784 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5L // Low poly
2789 fma.s1 FR_p_A10 = FR_p_A11, FR_p_XR, FR_p_A10 // Poly tail
2795 fms.s1 FR_p_Poly4L = FR_p_A3H, f1, FR_p_Poly4H // Low poly
2800 fma.s1 FR_p_A6 = FR_p_A7, FR_p_XR, FR_p_A6 // Poly tail
2806 fma.s1 FR_p_A8 = FR_p_A9, FR_p_XR, FR_p_A8 // Poly tail
2811 fma.s1 FR_p_XR6 = FR_p_XR4, FR_p_XR2, f0 // Poly tail
2817 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2L // High poly
2822 fms.s1 FR_p_Poly1L = FR_p_A0H, f1, FR_p_Poly1H // High poly
2828 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TH // sin for neg
2833 fma.s1 FR_r_TT = FR_r_TL, f1, FR_r_A1L // sin for neg
2839 fma.s1 FR_p_Temp4L = FR_p_Poly5L,FR_p_XR,FR_p_Temp4L // Low poly
2844 fma.s1 FR_p_A18 = FR_p_A20, FR_p_XR2, FR_p_A18 // Poly tail
2850 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4H // Low poly
2855 fma.s1 FR_p_A14 = FR_p_A16, FR_p_XR2, FR_p_A14 // Poly tail
2861 fma.s1 FR_p_A6 = FR_p_A8, FR_p_XR2, FR_p_A6 // Poly tail
2866 fma.s1 FR_p_A10 = FR_p_A12, FR_p_XR2, FR_p_A10 // Poly tail
2872 fma.s1 FR_p_Temp1L = FR_p_Poly2L,FR_p_XR,FR_p_Temp1L //High poly
2877 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1H // High poly
2883 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TT // sin for neg
2888 fma.s1 FR_r_TH = FR_r_ResH, FR_r_XS2, f0 // sin for neg
2894 fma.s1 FR_p_Temp4L = FR_p_Temp4L, f1, FR_p_A3L // Low poly
2899 fma.s1 FR_p_Poly3H = FR_p_Poly4H, FR_p_XR3, f0 // Low poly
2905 fma.s1 FR_p_A14 = FR_p_A18, FR_p_XR4, FR_p_A14 // Poly tail
2910 fma.s1 FR_p_XR8 = FR_p_XR4, FR_p_XR4, f0 // Poly tail
2916 fma.s1 FR_r_TL = FR_r_ResH, FR_r_XS2L, f0 // sin for neg
2922 fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_A0L // High poly
2927 fma.s1 FR_p_A6 = FR_p_A10, FR_p_XR4, FR_p_A6 // Poly tail
2933 fms.s1 FR_r_TT = FR_r_ResH, FR_r_XS2, FR_r_TH // sin for neg
2938 fma.s1 FR_r_Res3H = FR_r_TH, f1, f1 // sin for neg
2944 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4L // Low poly
2949 fma.s1 FR_p_Poly3L = FR_p_Poly4H, FR_p_XR3L, f0 // Low poly
2955 fma.s1 FR_p_Poly0H = FR_p_Poly3H,f1,FR_p_Poly1H //Low & High add
2960 fma.s1 FR_r_A7 = FR_r_A8, FR_r_XS2, FR_r_A7 // sin for neg
2966 fma.s1 FR_r_TL = FR_r_ResL, FR_r_XS2, FR_r_TL // sin for neg
2971 fma.s1 FR_r_XS4 = FR_r_XS2, FR_r_XS2, f0 // sin for neg
2977 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1L // High poly
2982 fma.s1 FR_p_PolyTail = FR_p_A14, FR_p_XR8, FR_p_A6 // Poly tail
2988 fms.s1 FR_r_Res3L = f1, f1, FR_r_Res3H // sin for neg
2993 fma.s1 FR_r_ResH = FR_r_Res3H, FR_r_XS, f0 // sin for neg
2999 fms.s1 FR_p_Temp0L = FR_p_Poly4H,FR_p_XR3,FR_p_Poly3H //Low poly
3004 fma.s1 FR_p_Poly3L = FR_p_Poly4L,FR_p_XR3,FR_p_Poly3L //Low poly
3010 fms.s1 FR_p_Poly0L = FR_p_Poly1H,f1,FR_p_Poly0H //Low & High add
3015 (p13) fma.s1 FR_p_OddPoly0H = FR_p_Poly0H, FR_p_AbsXM1, f0
3016 // Reccurent computations - multiplying by X-1
3022 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // sin for neg
3027 fma.s1 FR_r_A3 = FR_r_A4, FR_r_XS2, FR_r_A3 // sin for neg
3033 fma.s1 FR_p_Poly1L = FR_p_PolyTail,FR_p_XR6,FR_p_Poly1L//High
3038 fma.s1 FR_r_A5 = FR_r_A6, FR_r_XS2, FR_r_A5 // sin for neg
3044 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TH // sin for neg
3049 fms.s1 FR_r_ResL = FR_r_Res3H, FR_r_XS, FR_r_ResH//sin for neg
3055 fma.s1 FR_p_Poly3L = FR_p_Poly3L, f1, FR_p_Temp0L // Low poly
3060 fma.s1 FR_r_A7 = FR_r_A9, FR_r_XS4, FR_r_A7 // sin for neg
3066 fma.s1 FR_p_Poly0L = FR_p_Poly0L,f1,FR_p_Poly3H //Low & High add
3071 (p13) fms.s1 FR_p_OddPoly0L = FR_p_Poly0H, FR_p_AbsXM1, FR_p_OddPoly0H
3072 // Reccurent computations - multiplying by X-1 (low part)
3078 fma.s1 FR_r_A3 = FR_r_A5, FR_r_XS4, FR_r_A3 // sin for neg
3083 fma.s1 FR_r_XS7 = FR_r_XS4, FR_r_XS2, f0 // xs^6
3089 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TL // sin for neg
3094 fma.s1 FR_r_XS8 = FR_r_XS4, FR_r_XS4, f0 // sin for neg
3100 fma.s1 FR_p_Temp0H = FR_p_Poly3L,f1,FR_p_Poly1L //Low & High add
3106 fma.s1 FR_r_XS7 = FR_r_XS7, FR_r_XS, f0 // xs^7
3112 fma.s1 FR_r_ResL = FR_r_Res3L, FR_r_XS, FR_r_ResL//sin for neg
3117 fma.s1 FR_r_Tail = FR_r_A7, FR_r_XS8, FR_r_A3 // sin tail res
3123 fma.s1 FR_p_Poly0L = FR_p_Poly0L,f1,FR_p_Temp0H //Low & High add
3130 fma.s1 FR_r_ResL = FR_r_Tail,FR_r_XS7,FR_r_ResL //sin for neg
3136 (p13) fma.s1 FR_p_OddPoly0L = FR_p_Poly0L, FR_p_AbsXM1, FR_p_OddPoly0L
3137 // Reccurent computations - multiplying by X-1 (low part)
3143 fma.s1 FR_r_TT = FR_r_ResL, FR_r_AbsX, f0 // X*sin
3147 .pred.rel "mutex",p12,p13
3150 (p12) fma.s0 f8 = FR_p_Poly0H, f1, FR_p_Poly0L // Even
3155 (p13) fma.s0 f8 = FR_p_OddPoly0H, f1, FR_p_OddPoly0L // Odd
3156 (p14) br.ret.spnt b0 // Exit for 1 <= |X| < 13 path (positive arguments)/////
3161 (p13) fma.s1 FR_p_Poly0H = FR_p_OddPoly0H, f1, f0
3162 // Reccurent computations
3167 (p13) fma.s1 FR_p_Poly0L = FR_p_OddPoly0L, f1, f0
3168 // Reccurent computations
3174 fma.s1 FR_r_Res1H = FR_r_ResH, FR_r_AbsX, FR_r_TT // X*sin
3175 (p11) cmp.eq p13, p12 = r0, r0
3180 fms.s1 FR_r_Res1L = FR_r_ResH,FR_r_AbsX,FR_r_Res1H// X*sin
3181 (p9) cmp.eq p13, p12 = r0, r0
3186 fma.s1 FR_r_Res1L = FR_r_Res1L, f1, FR_r_TT // sin for neg
3187 (p10) cmp.eq p13, p12 = r0, r0
3191 fma.s1 FR_r_TL = FR_p_Poly0L, FR_r_Res1H, f0 // mult by sin
3197 fma.s1 FR_r_TL = FR_p_Poly0H,FR_r_Res1L,FR_r_TL//mult by sin
3203 fma.s1 FR_r_ResH = FR_p_Poly0H,FR_r_Res1H,FR_r_TL//mult by sin
3209 fms.s1 FR_r_ResL = FR_p_Poly0H,FR_r_Res1H,FR_r_ResH//sin mult
3215 frcpa.s1 FR_r_Y0,p0 = f1,FR_r_ResH // y = frcpa(b)
3221 fneg FR_r_NegOne = f1 // Form -1.0
3226 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TL //Low result of mult
3232 fma.s1 FR_r_Q0 = f1,FR_r_Y0,f0 // q = a*y
3237 fnma.s1 FR_r_E0 = FR_r_Y0,FR_r_ResH,f1 // e = 1-b*y
3243 fma.s1 FR_r_E2 = FR_r_E0,FR_r_E0,FR_r_E0 // e2 = e+e^2
3248 fma.s1 FR_r_E1 = FR_r_E0,FR_r_E0,f0 // e1 = e^2
3254 fma.s1 FR_r_Y1 = FR_r_Y0,FR_r_E2,FR_r_Y0 // y1 = y+y*e2
3259 fma.s1 FR_r_E3 = FR_r_E1,FR_r_E1,FR_r_E0 // e3 = e+e1^2
3265 fma.s1 FR_r_Y2 = FR_r_Y1,FR_r_E3,FR_r_Y0 // y2 = y+y1*e3
3270 fnma.s1 FR_r_R0 = FR_r_ResH,FR_r_Q0,f1 // r = a-b*q
3276 fnma.s1 FR_r_E4 = FR_r_ResH,FR_r_Y2,f1 // e4 = 1-b*y2
3281 fma.s1 FR_r_ZH = FR_r_R0,FR_r_Y2,FR_r_Q0 // x = q+r*y2
3287 fma.s1 FR_r_Y3 = FR_r_Y2,FR_r_E4,FR_r_Y2 // y3 = y2+y2*e4
3292 fnma.s1 FR_r_R1 = FR_r_ResH,FR_r_ZH,f1 // r1 = a-b*x
3298 fnma.s1 FR_r_R1 = FR_r_ResL,FR_r_ZH,FR_r_R1 // r1=r1-b_lo*X
3303 (p12) fma.s1 FR_r_ZHN = FR_r_ZH,FR_r_NegOne, f0 // Negate for evens
3307 .pred.rel "mutex",p13,p12
3310 (p13) fma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZH // Final result
3315 (p12) fnma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZHN // Final result
3316 br.ret.sptk b0 // Exit for 1 <= |X| < 13 path (negative arguments)//////
3320 //////////// |X| < 1 path /////////////////////////////////////////////////////
3321 //------------------------------------------------------------------------------
3325 getf.exp GR_p_Exp = FR_p_AbsX // exp of abs X
3326 fma.s1 FR_z_Q0 = f1,FR_z_Y0,f0 // q = a*y
3327 add GR_r_sin_Table2= 0x50, GR_r_sin_Table
3330 ldfpd FR_p_0p5, FR_p_1p5 = [GR_c_Table], 16
3331 fnma.s1 FR_z_E0 = FR_z_Y0,f8,f1 // e = 1-b*y
3332 add GR_p_Table2 = 0xB0, GR_p_Table
3336 ldfd FR_p_0p25 = [GR_c_Table]
3337 fcvt.xf FR_r_XNS = FR_r_IXNS // Convert int repr to float
3338 shr.u GR_p_X_Sgnd = GR_p_X_Sgnd, 60
3339 // Obtain only 4 bits of significand
3344 add GR_p_Bias = 0xffff, r0 // Set bias
3348 ldfpd FR_r_A2H, FR_r_A2L = [GR_r_sin_Table], 16
3350 shl GR_p_XN = GR_p_Exp, 4
3351 // Shift exp to 4 bits left to set place for significand
3354 ldfe FR_r_A6 = [GR_r_sin_Table2], 16
3355 movl GR_p_0p75 = 0xfffec // 0.75
3359 ldfpd FR_r_A1H, FR_r_A1L = [GR_r_sin_Table], 16
3361 or GR_p_XN = GR_p_XN, GR_p_X_Sgnd
3362 // Combine exp with 4 high bits of significand
3365 ldfe FR_r_A5 = [GR_r_sin_Table2], 16
3367 sub GR_p_Exp = GR_p_Exp, GR_p_Bias // Unbiased exp
3371 ldfe FR_r_A9 = [GR_r_sin_Table], 16
3372 ldfe FR_r_A4 = [GR_r_sin_Table2], 16
3373 cmp.gtu.unc p10, p11 = GR_p_0p75, GR_p_XN // sgnd(x) < 0.75
3377 ldfe FR_r_A8 = [GR_r_sin_Table], 16
3378 fma.s1 FR_z_E2 = FR_z_E0,FR_z_E0,FR_z_E0 // e2 = e+e^2
3379 (p10) cmp.gt.unc p9, p10 = -2, GR_p_Exp // x < 0.25
3382 ldfe FR_r_A3 = [GR_r_sin_Table2], 16
3383 fma.s1 FR_z_E1 = FR_z_E0,FR_z_E0,f0 // e1 = e^2
3384 (p11) add GR_p_Offset = 168, r0 // [0.75;1] interval
3388 (p10) add GR_p_Offset = 147, r0 // [0.25;0.75] interval
3389 ldfe FR_r_A7 = [GR_r_sin_Table], 16
3390 (p9) cmp.gt.unc p8, p9 = -3, GR_p_Exp // x < 0.125
3393 .pred.rel "mutex",p9,p8
3395 (p9) add GR_p_Offset = 126, r0 // [0.125;0.25] interval
3396 (p8) add GR_p_Offset = 189, r0 // [0.;0.125] interval
3401 shladd GR_p_Table = GR_p_Offset, 4, GR_p_Table //Make addresses
3402 shladd GR_p_Table2 = GR_p_Offset, 4, GR_p_Table2
3403 fma.s1 FR_r_XS = FR_r_AbsX , f1, FR_r_XNS // xs = |x|-[x]
3406 .pred.rel "mutex",p8,p11
3408 ldfpd FR_p_A5H, FR_p_A5L = [GR_p_Table], 16
3409 (p11) fms.s1 FR_p_XR = f1, f1, FR_p_AbsX // r = 1 - |x|
3410 // for [0.75;1] interval
3414 ldfpd FR_p_A2H, FR_p_A2L = [GR_p_Table2], 16
3415 (p8) fms.s1 FR_p_XR = FR_p_AbsX, f1, f0 // r = |x|
3416 // for [0.;0.125] interval
3421 ldfpd FR_p_A4H, FR_p_A4L = [GR_p_Table], 16
3422 fma.s1 FR_z_Y1 = FR_z_Y0,FR_z_E2,FR_z_Y0 // y1 = y+y*e2
3426 ldfpd FR_p_A1H, FR_p_A1L = [GR_p_Table2], 16
3427 fma.s1 FR_z_E3 = FR_z_E1,FR_z_E1,FR_z_E0 // e3 = e+e1^2
3431 .pred.rel "mutex",p9,p10
3433 ldfpd FR_p_A3H, FR_p_A3L = [GR_p_Table], 16
3434 (p9) fms.s1 FR_p_XR = FR_p_AbsX, f1, f0 // r = |x|
3435 // for [0.125;0.25] interval
3439 ldfpd FR_p_A0H, FR_p_A0L = [GR_p_Table2], 16
3440 (p10) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_0p5 // r = |x| - 0.5
3441 // for [0.25;0.75] interval
3446 ldfe FR_p_A20 = [GR_p_Table], 16
3447 ldfe FR_p_A12 = [GR_p_Table2], 16
3452 ldfe FR_p_A19 = [GR_p_Table], 16
3453 fma.s1 FR_r_XS2 = FR_r_XS, FR_r_XS, f0 // xs^2
3457 ldfe FR_p_A11 = [GR_p_Table2], 16
3463 ldfe FR_p_A18 = [GR_p_Table], 16
3464 ldfe FR_p_A10 = [GR_p_Table2], 16
3468 .pred.rel "mutex",p12,p13
3470 ldfe FR_p_A17 = [GR_p_Table], 16
3471 fma.s1 FR_z_Y2 = FR_z_Y1,FR_z_E3,FR_z_Y0 // y2 = y+y1*e3
3475 ldfe FR_p_A9 = [GR_p_Table2], 16
3476 fnma.s1 FR_z_R0 = f8,FR_z_Q0,f1 // r = a-b*q
3481 ldfe FR_p_A16 = [GR_p_Table], 16
3482 ldfe FR_p_A8 = [GR_p_Table2], 16
3487 ldfe FR_p_A15 = [GR_p_Table], 16
3488 ldfe FR_p_A7 = [GR_p_Table2], 16
3493 ldfe FR_p_A14 = [GR_p_Table], 16
3494 fma.s1 FR_r_TH = FR_r_A2H, FR_r_XS2, f0 // neg sin
3498 ldfe FR_p_A6 = [GR_p_Table2], 16
3499 fma.s1 FR_r_TL = FR_r_A2L, FR_r_XS2, f0 // neg sin
3504 ldfe FR_p_A13 = [GR_p_Table], 16
3505 fms.s1 FR_r_XS2L = FR_r_XS, FR_r_XS, FR_r_XS2 // xs^2 delta
3511 fma.s1 FR_p_Temp5H = FR_p_A5H, FR_p_XR, f0 // Low poly
3516 fma.s1 FR_p_XR2 = FR_p_XR, FR_p_XR, f0 // poly tail
3522 fabs FR_r_XS = FR_r_XS // Absolute value of xs
3527 fma.s1 FR_p_Temp2H = FR_p_A2H, FR_p_XR, f0 // High poly
3533 fnma.s1 FR_z_E4 = f8,FR_z_Y2,f1 // e4 = 1-b*y2
3538 fma.s1 FR_z_ZH = FR_z_R0,FR_z_Y2,FR_z_Q0 // 1/x = q+r*y2
3544 fms.s1 FR_r_TT = FR_r_A2H, FR_r_XS2, FR_r_TH // neg sin
3549 fma.s1 FR_r_ResH = FR_r_TH, f1, FR_r_A1H // neg sin
3555 fma.s1 FR_r_TL = FR_r_A2H, FR_r_XS2L, FR_r_TL // neg sin
3561 fms.s1 FR_p_Temp5L = FR_p_A5H, FR_p_XR, FR_p_Temp5H // Low poly
3566 fma.s1 FR_p_Poly5H = FR_p_Temp5H, f1, FR_p_A4H // Low poly
3572 fms.s1 FR_p_Temp2L = FR_p_A2H, FR_p_XR, FR_p_Temp2H // High poly
3577 fma.s1 FR_p_Poly2H = FR_p_Temp2H, f1, FR_p_A1H // High poly
3583 fma.s1 FR_p_XR3 = FR_p_XR2, FR_p_XR, f0 // r^3
3588 fms.s1 FR_p_XR2L = FR_p_XR, FR_p_XR, FR_p_XR2 // r^2 delta
3594 fma.s1 FR_p_A18 = FR_p_A19, FR_p_XR, FR_p_A18 // poly tail
3599 fma.s1 FR_p_A14 = FR_p_A15, FR_p_XR, FR_p_A14 // poly tail
3605 fma.s1 FR_p_XR4 = FR_p_XR2, FR_p_XR2, f0 // poly tail
3610 fma.s1 FR_z_Y3 = FR_z_Y2,FR_z_E4,FR_z_Y2 // y3 = y2+y2*e4
3616 fma.s1 FR_p_Temp5L = FR_p_A5L, FR_p_XR, FR_p_Temp5L // Low poly
3621 fms.s1 FR_p_Poly5L = FR_p_A4H, f1, FR_p_Poly5H // Low poly
3627 fma.s1 FR_p_Temp4H = FR_p_Poly5H, FR_p_XR, f0 // Low poly
3632 fma.s1 FR_p_Temp2L = FR_p_A2L, FR_p_XR, FR_p_Temp2L // High poly
3638 fms.s1 FR_p_Poly2L = FR_p_A1H, f1, FR_p_Poly2H // High poly
3643 fma.s1 FR_p_Temp1H = FR_p_Poly2H, FR_p_XR, f0 // High poly
3649 fms.s1 FR_p_XR3L = FR_p_XR2, FR_p_XR, FR_p_XR3 // x^3 delta
3654 fma.s1 FR_p_A16 = FR_p_A17, FR_p_XR, FR_p_A16 //poly tail
3660 fms.s1 FR_r_ResL = FR_r_A1H, f1, FR_r_ResH // neg sin
3665 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // neg sin
3671 fma.s1 FR_p_Temp5L = FR_p_Temp5L, f1, FR_p_A4L // Low poly
3676 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5H //Low poly
3682 fms.s1 FR_p_Temp4L = FR_p_Poly5H, FR_p_XR, FR_p_Temp4H//Low poly
3687 fma.s1 FR_p_Poly4H = FR_p_Temp4H, f1, FR_p_A3H // Low poly
3693 fma.s1 FR_p_Temp2L = FR_p_Temp2L, f1, FR_p_A1L // High poly
3698 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2H // High poly
3704 fms.s1 FR_p_Temp1L = FR_p_Poly2H,FR_p_XR,FR_p_Temp1H //High poly
3709 fma.s1 FR_p_Poly1H = FR_p_Temp1H, f1, FR_p_A0H // High poly
3715 fma.s1 FR_p_A12 = FR_p_A13, FR_p_XR, FR_p_A12 // poly tail
3720 fma.s1 FR_p_XR3L = FR_p_XR2L, FR_p_XR, FR_p_XR3L // x^3 low
3726 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5L //Low poly
3731 fma.s1 FR_p_A10 = FR_p_A11, FR_p_XR, FR_p_A10 //poly tail
3737 fms.s1 FR_p_Poly4L = FR_p_A3H, f1, FR_p_Poly4H /// Low poly
3742 fma.s1 FR_p_A6 = FR_p_A7, FR_p_XR, FR_p_A6 // poly tail
3748 fma.s1 FR_p_A8 = FR_p_A9, FR_p_XR, FR_p_A8 // poly tail
3753 fma.s1 FR_p_XR6 = FR_p_XR4, FR_p_XR2, f0 // r^6
3759 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2L // High poly
3764 fms.s1 FR_p_Poly1L = FR_p_A0H, f1, FR_p_Poly1H // High poly
3770 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TH // neg sin
3775 fma.s1 FR_r_TT = FR_r_TL, f1, FR_r_A1L // neg sin
3781 fma.s1 FR_p_Temp4L = FR_p_Poly5L,FR_p_XR,FR_p_Temp4L //Low poly
3786 fma.s1 FR_p_A18 = FR_p_A20, FR_p_XR2, FR_p_A18 // poly tail
3792 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4H // Low poly
3797 fma.s1 FR_p_A14 = FR_p_A16, FR_p_XR2, FR_p_A14 // poly tail
3803 fma.s1 FR_p_A6 = FR_p_A8, FR_p_XR2, FR_p_A6 // poly tail
3808 fma.s1 FR_p_A10 = FR_p_A12, FR_p_XR2, FR_p_A10 // poly tail
3814 fma.s1 FR_p_Temp1L = FR_p_Poly2L,FR_p_XR,FR_p_Temp1L //High poly
3819 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1H // High poly
3825 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TT // neg sin
3830 fma.s1 FR_r_TH = FR_r_ResH, FR_r_XS2, f0 // neg sin
3836 fma.s1 FR_p_Temp4L = FR_p_Temp4L, f1, FR_p_A3L // Low poly
3841 fma.s1 FR_p_Poly3H = FR_p_Poly4H, FR_p_XR3, f0 // Low poly
3847 fma.s1 FR_p_A14 = FR_p_A18, FR_p_XR4, FR_p_A14 // poly tail
3852 fma.s1 FR_p_XR8 = FR_p_XR4, FR_p_XR4, f0 // r^8
3858 fma.s1 FR_r_TL = FR_r_ResH, FR_r_XS2L, f0 // neg sin
3863 fnma.s1 FR_z_R1 = f8,FR_z_ZH,f1 // r1 = a-b*x
3869 fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_A0L // High poly
3874 fma.s1 FR_p_A6 = FR_p_A10, FR_p_XR4, FR_p_A6 // poly tail
3880 fms.s1 FR_r_TT = FR_r_ResH, FR_r_XS2, FR_r_TH // neg sin
3885 fma.s1 FR_r_Res3H = FR_r_TH, f1, f1 // neg sin
3891 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4L // Low poly
3896 fma.s1 FR_p_Poly3L = FR_p_Poly4H, FR_p_XR3L, f0 // Low poly
3902 fma.s1 FR_p_Poly0H = FR_p_Poly3H, f1, FR_p_Poly1H // Result
3907 fma.s1 FR_r_A7 = FR_r_A8, FR_r_XS2, FR_r_A7 // neg sin
3913 fma.s1 FR_r_TL = FR_r_ResL, FR_r_XS2, FR_r_TL // neg sin
3918 fma.s1 FR_r_XS4 = FR_r_XS2, FR_r_XS2, f0 // xs^4
3924 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1L // High poly
3929 fma.s1 FR_p_PolyTail = FR_p_A14, FR_p_XR8, FR_p_A6 // poly tail
3935 fms.s1 FR_r_Res3L = f1, f1, FR_r_Res3H // neg sin
3940 fma.s1 FR_r_ResH = FR_r_Res3H, FR_r_XS, f0 // neg sin
3946 fms.s1 FR_p_Temp0L = FR_p_Poly4H,FR_p_XR3,FR_p_Poly3H //Low poly
3951 fma.s1 FR_p_Poly3L = FR_p_Poly4L,FR_p_XR3,FR_p_Poly3L //Low poly
3957 fms.s1 FR_p_Poly0L = FR_p_Poly1H, f1, FR_p_Poly0H // Result
3962 fma.s1 FR_z_ZL = FR_z_R1,FR_z_Y3, f0 // x_lo = r1*y3
3968 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // neg sin
3973 fma.s1 FR_r_A3 = FR_r_A4, FR_r_XS2, FR_r_A3 /// neg sin
3979 fma.s1 FR_p_Poly1L = FR_p_PolyTail,FR_p_XR6,FR_p_Poly1L // High
3984 fma.s1 FR_r_A5 = FR_r_A6, FR_r_XS2, FR_r_A5 // neg sin
3990 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TH // neg sin
3995 fms.s1 FR_r_ResL = FR_r_Res3H, FR_r_XS, FR_r_ResH // neg sin
4001 fma.s1 FR_p_Poly3L = FR_p_Poly3L, f1, FR_p_Temp0L // Low poly
4006 fma.s1 FR_r_A7 = FR_r_A9, FR_r_XS4, FR_r_A7 // neg sin
4012 fma.s1 FR_p_Poly0L = FR_p_Poly0L, f1, FR_p_Poly3H // result
4018 (p14) fma.s1 f8 = FR_p_Poly0H, FR_z_ZH, f0 // z*poly
4023 fma.s1 FR_p_Temp1L = FR_p_Poly0H, FR_z_ZL, f0 // z*poly low
4029 fma.s1 FR_r_A3 = FR_r_A5, FR_r_XS4, FR_r_A3 // sin tail
4034 fma.s1 FR_r_XS7 = FR_r_XS4, FR_r_XS2, f0 // xs^6
4040 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TL // sin low
4045 fma.s1 FR_r_XS8 = FR_r_XS4, FR_r_XS4, f0 // xs^8
4051 fma.s1 FR_p_Temp0H = FR_p_Poly3L, f1, FR_p_Poly1L // result
4057 (p14) fms.s1 FR_p_Temp1H = FR_p_Poly0H, FR_z_ZH, f8 // hi result
4063 fma.s1 FR_r_XS7 = FR_r_XS7, FR_r_XS, f0 // xs^7
4069 fma.s1 FR_r_ResL = FR_r_Res3L, FR_r_XS, FR_r_ResL // lo result
4074 fma.s1 FR_r_Tail = FR_r_A7, FR_r_XS8, FR_r_A3 // tail result
4080 fma.s1 FR_p_Poly0L = FR_p_Poly0L, f1, FR_p_Temp0H // lo result
4086 fma.s1 FR_r_ResL = FR_r_Tail, FR_r_XS7, FR_r_ResL // lo result
4092 (p14) fma.s1 FR_p_Temp1L = FR_p_Poly0L,FR_z_ZH,FR_p_Temp1L //hi result
4098 fma.s1 FR_r_TT = FR_r_ResL, f1, f0 // for low result
4102 .pred.rel "mutex",p12,p13
4105 (p14) fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_Temp1H // for lo res
4110 (p10) cmp.eq p13, p12 = r0, r0 // set p13, clear p12
4111 fma.s1 FR_r_Res1H = FR_r_ResH, f1, FR_r_TT // hi res
4116 (p9) cmp.eq p13, p12 = r0, r0 // set p13, clear p12
4117 (p14) fma.s0 f8 = f8, f1, FR_p_Temp1L // Final result
4118 (p14) br.ret.spnt b0 // Exit for 0 < |X| < 1 path (positive arguments)///////
4122 (p11) cmp.eq p13, p12 = r0, r0 // set p13, clear p12
4123 fms.s1 FR_r_Res1L = FR_r_ResH, f1, FR_r_Res1H // Low sin result
4129 fma.s1 FR_r_Res1L = FR_r_Res1L, f1, FR_r_TT // Low sin result
4134 fma.s1 FR_r_TL = FR_p_Poly0L,FR_r_Res1H,f0 //Low sin result
4140 fma.s1 FR_r_TL = FR_p_Poly0H, FR_r_Res1L, FR_r_TL //Low sin
4146 fma.s1 FR_r_ResH = FR_p_Poly0H, FR_r_Res1H, FR_r_TL //High sin
4152 fms.s1 FR_r_ResL = FR_p_Poly0H,FR_r_Res1H,FR_r_ResH //Low res
4158 frcpa.s1 FR_r_Y0,p0 = f1,FR_r_ResH // y = frcpa(b)
4164 fneg FR_r_NegOne = f1 // Construct -1.0
4169 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TL // low sin
4175 fma.s1 FR_r_Q0 = f1,FR_r_Y0,f0 // q = a*y
4180 fnma.s1 FR_r_E0 = FR_r_Y0,FR_r_ResH,f1 // e = 1-b*y
4186 fma.s1 FR_r_E2 = FR_r_E0,FR_r_E0,FR_r_E0 // e2 = e+e^2
4191 fma.s1 FR_r_E1 = FR_r_E0,FR_r_E0,f0 // e1 = e^2
4197 fma.s1 FR_r_Y1 = FR_r_Y0,FR_r_E2,FR_r_Y0 // y1 = y+y*e2
4202 fma.s1 FR_r_E3 = FR_r_E1,FR_r_E1,FR_r_E0 // e3 = e+e1^2
4208 fma.s1 FR_r_Y2 = FR_r_Y1,FR_r_E3,FR_r_Y0 // y2 = y+y1*e3
4213 fnma.s1 FR_r_R0 = FR_r_ResH,FR_r_Q0,f1 // r = a-b*q
4219 fnma.s1 FR_r_E4 = FR_r_ResH,FR_r_Y2,f1 // e4 = 1-b*y2
4224 fma.s1 FR_r_ZH = FR_r_R0,FR_r_Y2,FR_r_Q0 // x = q+r*y2
4230 fma.s1 FR_r_Y3 = FR_r_Y2,FR_r_E4,FR_r_Y2 // y3 = y2+y2*e4
4235 fnma.s1 FR_r_R1 = FR_r_ResH,FR_r_ZH,f1 // r1 = a-b*x
4241 fnma.s1 FR_r_R1 = FR_r_ResL,FR_r_ZH,FR_r_R1 // r1=r1 - b_lo*X
4246 fma.s1 FR_r_ZHN = FR_r_ZH,FR_r_NegOne, f0 // Negate
4250 .pred.rel "mutex",p13,p12
4253 fnma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZHN // Result for neg
4254 br.ret.sptk b0 // Exit for 0 < |X| < 1 path (negative arguments)//////
4260 // SPECIALS (x for natval, nan, +/-inf or +/-0) ///////////////////////////////
4261 //------------------------------------------------------------------------------
4266 movl GR_DenOverflow = 0x2000000000000001
4270 fclass.m p9,p0 = f8,0xB // +/-denormals
4275 fclass.m p6,p0 = f8,0x1E1 // Test x for natval, nan, +inf
4280 fclass.m p7,p8 = f8,0x7 // +/-0
4285 (p9) cmp.ltu.unc p10,p11 = GR_l_signif_Z, GR_DenOverflow
4286 (p9) fnorm.s0 f8 = f8
4292 (p9) fcvt.fx.trunc.s1 FR_n_IXN = FR_l_AbsX // Round by truncate
4293 (p11) br.cond.sptk tgamma_lt_1 // Return to gamma ('good' denormal)////////////
4299 (p10) br.cond.spnt tgammal_overflow // "Bad" denormal - overflow! /////////////
4304 mov FR_X = f8 // for error handler
4309 (p6) fma.s0 f8 = f8,f1,f8 // res = x + x
4310 (p6) br.ret.spnt b0 // Exit for NAN, INF and NatVals ////////////////////////
4312 .pred.rel "mutex",p7,p8
4314 (p7) mov GR_Parameter_TAG = 256 // negative
4315 (p7) frcpa.s0 f8,p0 = f1,f8 // Raise V flag
4321 (p8) br.cond.spnt tgammal_singularity // Branch for +ZERO ////////////////////
4327 br.cond.spnt tgammal_libm_err // Branch for -ZERO ///////////////////////
4333 // SINGULARITY (x is negative integer or 0) ////////////////////////////////////
4334 //------------------------------------------------------------------------------
4336 tgammal_singularity:
4339 mov FR_X = f8 // For error handler
4340 mov GR_Parameter_TAG = 256 // negative
4344 frcpa.s0 f8,p0 = f0,f0 // Raise V flag
4345 br.cond.sptk tgammal_libm_err // Call error handler /////////////////////
4346 // with singularity error /////////////////
4352 // OVERFLOW (result is too big and cannot be represented by normal value) //////
4353 // ( X > 1755.54 and for denormals with abs value less than 0x2000000000000001 )
4354 //------------------------------------------------------------------------------
4358 addl r8 = 0x1FFFE, r0 // Exp of INF
4359 fcmp.lt.s1 p15,p14 = f8,f0 // p14 - pos arg, p15 - neg arg
4365 mov FR_X = f8 // For error handler
4366 mov GR_Parameter_TAG = 255 // overflow
4369 .pred.rel "mutex",p14,p15
4372 (p14) fma.s0 f8 = f9,f9,f0 // Set I,O and +INF result
4377 (p15) fnma.s0 f8 = f9,f9,f0 // Set I,O and -INF result
4378 br.cond.sptk tgammal_libm_err // Call error handler /////////////////////
4379 // with overflow error ////////////////////
4386 // UNDERFLOW (x is negative noninteger with big absolute value) ////////////////
4387 //------------------------------------------------------------------------------
4392 fcvt.fx.trunc.s1 FR_u_IXN = f8 // Convert arg to int repres. in FR
4397 getf.sig GR_u_XN = FR_u_IXN
4411 tbit.z p6,p7 = GR_u_XN,0 // even or odd
4414 .pred.rel "mutex",p6,p7
4417 (p6) fms.s0 f8 = f9,f9,f9 // for negatives
4422 (p7) fma.s0 f8 = f9,f9,f9 // for positives
4423 br.ret.sptk b0 // Exit for underflow path //////////////////////////////
4427 GLOBAL_LIBM_END(tgammal)
4428 libm_alias_ldouble_other (tgamma, tgamma)
4433 ////////////////// Tgammal error handler ///////////////////////////////////////
4434 //------------------------------------------------------------------------------
4435 LOCAL_LIBM_ENTRY(__libm_error_region)
4439 add GR_Parameter_Y=-32,sp // Parameter 2 value
4441 .save ar.pfs,GR_SAVE_PFS
4442 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
4446 add sp=-64,sp // Create new stack
4448 mov GR_SAVE_GP=gp // Save gp
4451 stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
4452 add GR_Parameter_X = 16,sp // Parameter 1 address
4453 .save b0, GR_SAVE_B0
4454 mov GR_SAVE_B0=b0 // Save b0
4458 stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
4459 add GR_Parameter_RESULT = 0,GR_Parameter_Y
4460 nop.b 0 // Parameter 3 address
4463 stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
4464 add GR_Parameter_Y = -16,GR_Parameter_Y
4465 br.call.sptk b0=__libm_error_support# // Call error handling function
4470 add GR_Parameter_RESULT = 48,sp
4473 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
4475 add sp = 64,sp // Restore stack pointer
4476 mov b0 = GR_SAVE_B0 // Restore return address
4479 mov gp = GR_SAVE_GP // Restore gp
4480 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
4481 br.ret.sptk b0 // Return
4484 LOCAL_LIBM_END(__libm_error_region#)
4486 .type __libm_error_support#,@function
4487 .global __libm_error_support#