3 // Copyright (C) 2000, 2001, Intel Corporation
4 // All rights reserved.
6 // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
7 // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 // Intel Corporation is the author of this code, and requests that all
37 // problem reports or change requests be submitted to it directly at
38 // http://developer.intel.com/opensource.
40 // *********************************************************************
42 // Function: powl(x,y), where
44 // powl(x,y) = x , for double extended precision x and y values
46 // *********************************************************************
49 // 2/02/00 (Hand Optimized)
50 // 4/04/00 Unwind support added
51 // 8/15/00 Bundle added after call to __libm_error_support to properly
52 // set [the previously overwritten] GR_Parameter_RESULT.
53 // 1/22/01 Corrected results for powl(1,inf), powl(1,nan), and
54 // powl(snan,0) to be 1 per C99, not nan. Fixed many flag settings.
55 // 2/06/01 Call __libm_error support if over/underflow when y=2.
57 // *********************************************************************
61 // Floating-Point Registers:
62 // f8 (Input and Return Value)
65 // General Purpose Registers:
67 // Parameters to __libm_error_support r62,r63,r64,r65
69 // Predicate Registers: p6-p15
71 // *********************************************************************
73 // Special Cases and IEEE special conditions:
75 // Denormal fault raised on denormal inputs
76 // Overflow exceptions raised when appropriate for pow
77 // Underflow exceptions raised when appropriate for pow
78 // (Error Handling Routine called for overflow and Underflow)
79 // Inexact raised when appropriate by algorithm
81 // 1. (anything) ** NatVal or (NatVal) ** anything is NatVal
82 // 2. X or Y unsupported or sNaN is qNaN/Invalid
83 // 3. (anything) ** 0 is 1
84 // 4. (anything) ** 1 is itself
85 // 5. (anything except 1) ** qNAN is qNAN
86 // 6. qNAN ** (anything except 0) is qNAN
87 // 7. +-(|x| > 1) ** +INF is +INF
88 // 8. +-(|x| > 1) ** -INF is +0
89 // 9. +-(|x| < 1) ** +INF is +0
90 // 10. +-(|x| < 1) ** -INF is +INF
91 // 11. +-1 ** +-INF is +1
92 // 12. +0 ** (+anything except 0, NAN) is +0
93 // 13. -0 ** (+anything except 0, NAN, odd integer) is +0
94 // 14. +0 ** (-anything except 0, NAN) is +INF/div_0
95 // 15. -0 ** (-anything except 0, NAN, odd integer) is +INF/div_0
96 // 16. -0 ** (odd integer) = -( +0 ** (odd integer) )
97 // 17. +INF ** (+anything except 0,NAN) is +INF
98 // 18. +INF ** (-anything except 0,NAN) is +0
99 // 19. -INF ** (anything except NAN) = -0 ** (-anything)
100 // 20. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
101 // 21. (-anything except 0 and inf) ** (non-integer) is qNAN/Invalid
102 // 22. X or Y denorm/unorm and denorm/unorm operand trap is enabled,
103 // generate denorm/unorm fault except if invalid or div_0 raised.
105 // *********************************************************************
112 // If Y = 2, return X*X.
113 // If Y = 0.5, return sqrt(X).
115 // Compute log(X) to extra precision.
117 // ker_log_80( X, logX_hi, logX_lo, Safe );
119 // ...logX_hi + logX_lo approximates log(X) to roughly 80
120 // ...significant bits of accuracy.
122 // Compute Y*log(X) to extra precision.
124 // P_hi := Y * logX_hi
125 // P_lo := Y * logX_hi - P_hi ...using FMA
126 // P_lo := Y * logX_lo + P_lo ...using FMA
128 // Compute exp(P_hi + P_lo)
131 // Expo_Range := 2; (assuming double-extended power function)
132 // ker_exp_64( P_hi, P_lo, Flag, Expo_Range,
133 // Z_hi, Z_lo, scale, Safe )
135 // scale := sgn * scale
137 // If (Safe) then ...result will not over/underflow
138 // return scale*Z_hi + (scale*Z_lo)
141 // take necessary precaution in computing
142 // scale*Z_hi + (scale*Z_lo)
143 // to set possible exceptions correctly.
148 // ...Follow the order of the case checks
150 // If Y is +-0, return +1 without raising any exception.
151 // If Y is +1, return X without raising any exception.
152 // If Y is qNaN, return Y without exception.
153 // If X is qNaN, return X without exception.
155 // At this point, X is real and Y is +-inf.
156 // Thus |X| can only be 1, strictly bigger than 1, or
157 // strictly less than 1.
160 // return ( Y == +inf? +0 : +inf )
161 // elseif |X| > 1, then
162 // return ( Y == +inf? +0 : +inf )
168 // ...Follow the order of the case checks
169 // ...Note that Y is real, finite, non-zero, and not +1.
171 // If X is qNaN, return X without exception.
174 // return ( Y > 0 ? +0 : +inf )
177 // return ( Y > 0 ? +inf : +0 )
181 // return ( Y > 0 ? +inf : +0 )
185 // Return 0 * inf to generate a quiet NaN together
186 // with an invalid exception.
191 // We describe the quick branch since this part is important
192 // in reaching the normal case efficiently.
196 // This stage contains two threads.
200 // fclass.m X_excep, X_ok = X, (NatVal or s/qNaN) or
203 // fclass.nm X_unsupp, X_supp = X, (NatVal or s/qNaN) or
204 // +-(0, unnorm, norm, infinity)
206 // X_norm := fnorm( X ) with traps disabled
208 // If (X_excep) goto Filtering (Step 2)
209 // If (X_unsupp) goto Filtering (Step 2)
214 // fclass.m Y_excep, Y_ok = Y, (NatVal or s/qNaN) or
217 // fclass.nm Y_unsupp, Y_supp = Y, (NatVal or s/qNaN) or
218 // +-(0, unnorm, norm, infinity)
220 // Y_norm := fnorm( Y ) with traps disabled
222 // If (Y_excep) goto Filtering (Step 2)
223 // If (Y_unsupp) goto Filtering (Step 2)
228 // This stage contains two threads.
233 // Set X_lt_0 if X < 0 (using fcmp)
235 // If (X_lt_0) goto Filtering (Step 2)
240 // Set Y_is_1 if Y = +1 (using fcmp)
241 // If (Y_is_1) goto Filtering (Step 2)
245 // This stage contains two threads.
251 // X := fnorm(X) in prevailing traps
257 // Y := fnorm(Y) in prevailing traps
262 // Go to Case_Normal.
265 #include "libm_support.h"
275 Constants_exp_64_Arg:
276 ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
277 data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000
278 data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
279 data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
280 ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
283 Constants_exp_64_Exponents:
284 ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
285 data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
286 data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
287 data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
288 data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
289 data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
290 data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
291 ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
295 ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
297 data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
298 data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
299 data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
300 ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
304 ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
306 data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
307 data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
308 data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
309 data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
310 data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
311 data4 0x000004C7,0x80000000,0x00003FFE,0x00000000
312 ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
316 ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
317 data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
318 data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
319 data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
320 data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
321 data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
322 data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
323 data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
324 data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
325 data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
326 data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
327 data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
328 data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
329 data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
330 data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
331 data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
332 data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
333 ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
337 ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
338 data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
339 data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
340 data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
341 data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
342 data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
343 data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
344 data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
345 data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
346 data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
347 data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
348 data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
349 data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
350 data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
351 data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
352 data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
353 data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
354 ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
358 ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
359 data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
360 data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
361 data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
362 data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
363 data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
364 data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
365 data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
366 data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
367 data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
368 data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
369 data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A
370 data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB
371 data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E
372 data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA
373 data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08
374 data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B
375 data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75
376 data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79
377 data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7
378 data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087
379 data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB
380 data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643
381 data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C
382 data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D
383 data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873
384 data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F
385 data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861
386 data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0
387 data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC
388 data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB
389 data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB
390 data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148
391 ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
395 ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
396 data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25
397 data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8
398 data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A
399 data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E
400 data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9
401 data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2
402 data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0
403 data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509
404 data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33
405 data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D
406 data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87
407 data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3
408 data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9
409 data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F
410 data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82
411 data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4
412 data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D
413 data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030
414 data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29
415 data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED
416 data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B
417 data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893
418 data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35
419 data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C
420 data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313
421 data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE
422 data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426
423 data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550
424 data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4
425 data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31
426 data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE
427 data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
428 ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
432 ASM_TYPE_DIRECTIVE(Constants_log_80_P,@object)
433 // 1/2, P_8, P_7, ..., P_1
434 data4 0x00000000, 0x80000000, 0x00003FFE, 0x00000000
435 data4 0x3B1042BC, 0xCCCE8B88, 0x0000BFFB, 0x00000000
436 data4 0xCADC2149, 0xE38997B7, 0x00003FFB, 0x00000000
437 data4 0xB1ACB090, 0xFFFFFFFE, 0x0000BFFB, 0x00000000
438 data4 0x06481C81, 0x92492498, 0x00003FFC, 0x00000000
439 data4 0xAAAAB0EF, 0xAAAAAAAA, 0x0000BFFC, 0x00000000
440 data4 0xCCC91416, 0xCCCCCCCC, 0x00003FFC, 0x00000000
441 data4 0x00000000, 0x80000000, 0x0000BFFD, 0x00000000
442 data4 0xAAAAAAAB, 0xAAAAAAAA, 0x00003FFD
443 ASM_SIZE_DIRECTIVE(Constants_log_80_P)
447 ASM_TYPE_DIRECTIVE(Constants_log_80_Q,@object)
448 // log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1
449 data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
450 data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
451 data4 0xA51BE0AF,0x92492453,0x00003FFC,0x00000000
452 data4 0xA0CFD29F,0xAAAAAB73,0x0000BFFC,0x00000000
453 data4 0xCCCE3872,0xCCCCCCCC,0x00003FFC,0x00000000
454 data4 0xFFFFB4FB,0xFFFFFFFF,0x0000BFFC,0x00000000
455 data4 0xAAAAAAAB,0xAAAAAAAA,0x00003FFD,0x00000000
456 data4 0x00000000,0x80000000,0x0000BFFE,0x00000000
457 ASM_SIZE_DIRECTIVE(Constants_log_80_Q)
460 Constants_log_80_Z_G_H_h1:
461 ASM_TYPE_DIRECTIVE(Constants_log_80_Z_G_H_h1,@object)
462 // Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double
463 data4 0x00008000,0x3F800000,0x00000000,0x00000000
464 data4 0x00000000,0x00000000,0x00000000,0x00000000
465 data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000
466 data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000
467 data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000
468 data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000
469 data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000
470 data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000
471 data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000
472 data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000
473 data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000
474 data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000
475 data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000
476 data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000
477 data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000
478 data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000
479 data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000
480 data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000
481 data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000
482 data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000
483 data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000
484 data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000
485 data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000
486 data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000
487 data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000
488 data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000
489 data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000
490 data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000
491 data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000
492 data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000
493 data4 0x00004211,0x3F042108,0x3F29516A,0x00000000
494 data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000
495 ASM_SIZE_DIRECTIVE(Constants_log_80_Z_G_H_h1)
498 Constants_log_80_Z_G_H_h2:
499 ASM_TYPE_DIRECTIVE(Constants_log_80_Z_G_H_h2,@object)
500 // Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double
501 data4 0x00008000,0x3F800000,0x00000000,0x00000000
502 data4 0x00000000,0x00000000,0x00000000,0x00000000
503 data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000
504 data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000
505 data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000
506 data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000
507 data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000
508 data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000
509 data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000
510 data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000
511 data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000
512 data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000
513 data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000
514 data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000
515 data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000
516 data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000
517 data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000
518 data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000
519 data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000
520 data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000
521 data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000
522 data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000
523 data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000
524 data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000
525 data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000
526 data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000
527 data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000
528 data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000
529 data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000
530 data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000
531 data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000
532 data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000
533 ASM_SIZE_DIRECTIVE(Constants_log_80_Z_G_H_h2)
536 Constants_log_80_h3_G_H:
537 ASM_TYPE_DIRECTIVE(Constants_log_80_h3_G_H,@object)
538 // h3 IEEE double extended, H3 and G3 IEEE single
539 data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00
540 data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400
541 data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00
542 data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400
543 data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00
544 data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400
545 data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08
546 data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408
547 data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10
548 data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410
549 data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18
550 data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420
551 data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20
552 data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428
553 data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30
554 data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438
555 data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40
556 data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448
557 data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50
558 data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458
559 data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68
560 data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470
561 data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78
562 data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488
563 data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90
564 data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0
565 data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8
566 data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8
567 data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8
568 data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8
569 data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0
570 data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0
571 data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here
572 data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D
573 data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101
574 data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED
575 data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766
576 data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6
577 data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620
578 data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D
579 ASM_SIZE_DIRECTIVE(Constants_log_80_h3_G_H)
583 ASM_TYPE_DIRECTIVE(Constant_half,@object)
584 data4 0x00000000,0x80000000,0x00003FFE
585 ASM_SIZE_DIRECTIVE(Constant_half)
625 GR_BIASed_exp_y = r47
639 GR_exp_and_sign_y = r55
640 GR_low_order_bit = r56
641 GR_get_exp_mask = r57
642 GR_exponent_zero = r58
644 // ** Registers for unwind support
651 GR_Parameter_RESULT = r64
652 GR_Parameter_TAG = r65
784 FR_Result_small = f100
794 alloc GR_Expo_Range = ar.pfs,0,30,4,0
795 (p0) fclass.m.unc p7, p13 = FR_Input_Y, 0x1E7
799 (p0) getf.exp GR_exp_and_sign_y = FR_Input_Y
803 (p0) fclass.m.unc p6, p12 = FR_Input_X, 0x1E7
807 (p0) getf.sig GR_signif_y = FR_Input_Y
808 (p0) fcmp.eq.unc.s1 p12, p13 = FR_Input_X, f1
815 // Identify EM unsupporteds.
818 (p0) fadd.s1 FR_Two = f1, f1
820 // Load 1/2 in GP register
828 (p0) addl GR_Table_Ptr = @ltoff(Constant_half#), gp
834 ld8 GR_Table_Ptr = [GR_Table_Ptr]
841 (p0) ldfe FR_Half =[GR_Table_Ptr],0
842 (p0) movl GR_get_exp_mask = 0x1FFFF ;;
847 (p0) fclass.nm.unc p9, p15 = FR_Input_Y, 0x1FF
850 // Get exp and significand of Y
854 (p0) and GR_exp_y = GR_get_exp_mask,GR_exp_and_sign_y
858 (p0) movl GR_exponent_zero = 0xFFFF ;;
867 (p0) fcmp.eq.unc.s1 p10, p11 = FR_Input_Y, f1
873 // Identify NatVals, NaNs, Infs, and Zeros.
876 (p0) fclass.nm.unc p8, p14 = FR_Input_X, 0x1FF
878 // Remove sign bit from exponent of y.
881 (p6) br.cond.spnt L(POWL_64_SPECIAL) ;;
886 (p7) br.cond.spnt L(POWL_64_SPECIAL) ;;
891 (p8) br.cond.spnt L(POWL_64_UNSUPPORT) ;;
896 (p9) br.cond.spnt L(POWL_64_UNSUPPORT) ;;
899 (p0) cmp.lt.unc p9, p0 = GR_exp_y,GR_exponent_zero
900 (p0) fcmp.lt.unc.s1 p6, p13 = FR_Input_X, f0
902 // Branch on Infs, Nans, Zeros, and Natvals
903 // Check to see that exponent < 0
905 (p0) sub GR_exp_y = GR_exp_y,GR_exponent_zero
907 // x not zero, is y ==2?
910 (p11) fcmp.eq.unc.s1 p7, p14 = FR_Input_Y, FR_Two
915 (p9) fcmp.lt.unc.s1 p9, p0 = FR_Input_X, f0
916 (p7) br.cond.spnt L(POWL_64_SQUARE) ;; // Branch if x not zero and y=2
920 (p6) fmerge.ns FR_Neg_X = FR_Input_X, FR_Input_X
925 (p10) fmpy.s0 FR_Result = FR_Input_X, f1
927 // For y = 1, compute result = x
928 // For x = 1, compute 1
929 // When Y is one return X and possible raise
930 // denormal operand exception.
931 // Remove exponent BIAS
933 (p6) shl GR_exp_and_sign_y= GR_signif_y,GR_exp_y ;;
936 (p9) or GR_exp_and_sign_y = 0xF,GR_signif_y
937 (p12) fma.s0 FR_Result = FR_Input_Y, f0, f1
942 (p6) extr.u GR_exp_y = GR_exp_and_sign_y,63,1 ;;
943 (p6) cmp.ne.unc p9, p0 = GR_exp_y, r0
948 // Both predicates can be set.
949 // Don't consider y's < 1.
951 (p6) shl GR_signif_y= GR_exp_and_sign_y,1 ;;
953 // Is shift off integer part of y.
954 // Get y's even or odd bit.
956 (p6) cmp.ne.unc p8, p0 = GR_signif_y, r0
962 // Is the fractional part of the y = 0?
963 // Is the integer even or odd.
965 (p10) br.cond.spnt L(POWL_64_RETURN) ;;
970 (p12) br.cond.spnt L(POWL_64_RETURN) ;;
975 (p8) br.cond.spnt L(POWL_64_XNEG) ;;
979 (p9) fmerge.ns FR_Sgn = FR_Sgn, FR_Sgn
984 (p0) fcmp.eq.unc.s0 p11, p0 = FR_Input_Y, FR_Half
988 // Raise possible denormal operand exception for both
994 // Branch for (x < 0) and Y not an integer.
996 (p0) fcmp.eq.unc.s0 p12, p0 = FR_Input_X, f1
998 // For x < 0 and y integer, make x positive
999 // For x < 0 and y odd integer,, set sign = -1.
1001 (p11) br.cond.spnt L(POWL_64_SQRT) ;;
1004 (p0) cmp.eq.unc p15, p14 = r0, r0
1006 (p13) fnorm.s1 FR_Z = FR_Input_X ;;
1010 (p6) fnorm.s1 FR_Z = FR_Neg_X
1016 // Branch to embedded sqrt(x)
1019 // Computes ln( x ) to extra precision
1021 // Output FR 2: FR_Y_hi
1022 // Output FR 3: FR_Y_lo
1023 // Output PR 1: PR_Safe
1028 (p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_Z_G_H_h1#), gp
1034 ld8 GR_Table_Ptr = [GR_Table_Ptr]
1043 (p0) movl GR_BIAS = 0x000000000000FFFF ;;
1047 (p0) fsub.s1 FR_W = FR_Z, f1
1051 // Z = Norm(X) - both + and - case
1055 (p0) getf.sig GR_signif_Z = FR_Z
1056 (p0) getf.exp GR_N = FR_Z
1062 // Get significand of Z
1065 (p0) extr.u GR_Index1 = GR_signif_Z, 59, 4 ;;
1067 // Index1 = High order 4 bits of Z
1068 // X_0 = High order 15 bit of Z
1070 (p0) shl GR_Index1 = GR_Index1,5 ;;
1075 // Add offset to Index1 ptr.
1077 (p0) fabs FR_abs_W = FR_W
1079 // BIAS = 0x000...FFFF
1080 // Adjust Index1 ptr ( x 32) .
1082 (p0) add GR_Index1 = GR_Index1,GR_Table_Ptr
1086 (p0) ld2 GR_Z_1 =[GR_Index1],4
1087 (p0) extr.u GR_X_0 = GR_signif_Z, 49, 15
1093 (p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_Z_G_H_h2#), gp
1099 ld8 GR_Table_Ptr = [GR_Table_Ptr]
1107 (p0) ldfs FR_G_1 = [GR_Index1],4 ;;
1108 (p0) ldfs FR_H_1 = [GR_Index1],8
1112 // Adjust Index2 (x 32).
1115 (p0) ldfe FR_h_1 = [GR_Index1],0
1117 (p0) pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 ;;
1122 // load Z_1 from Index1
1126 (p0) getf.exp GR_M = FR_abs_W
1130 // N = exponent of Z
1147 (p0) extr.u GR_Index2 = GR_X_1, 6, 4 ;;
1156 (p0) shl GR_Index2=GR_Index2,5 ;;
1157 (p0) add GR_Index2 = GR_Index2, GR_Table_Ptr
1160 // M = exponent of abs_W
1164 (p0) sub GR_M = GR_M, GR_BIAS
1166 (p0) cmp.gt.unc p7, p14 = -8, GR_M
1171 (p7) br.cond.spnt L(LOGL80_NEAR) ;;
1175 // Possible branch out.
1176 // Add offset of table to Index2
1179 (p0) ld2 GR_Z_2 =[GR_Index2],4
1180 (p0) fmerge.se FR_S = f1,FR_Z
1181 (p0) sub GR_N = GR_N, GR_BIAS
1187 (p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_h3_G_H#), gp
1193 ld8 GR_Table_Ptr = [GR_Table_Ptr]
1202 // Point to Table 3.
1203 // S = merging of Z and 1.0
1206 (p0) ldfs FR_G_2 = [GR_Index2],4
1207 (p0) setf.sig FR_float_N = GR_N
1208 (p0) add GR_Table_Ptr1 = 0x200,GR_Table_Ptr ;;
1213 // Add offset to Table 2 ptr.
1214 // float_N = significand of N
1217 (p0) ldfs FR_H_2 = [GR_Index2],8 ;;
1222 (p0) ldfe FR_h_2 = [GR_Index2],0
1223 (p0) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;;
1243 (p0) extr.u GR_Index3 = GR_X_2, 1, 5 ;;
1246 (p0) shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1
1252 (p0) shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr ;;
1256 (p0) ldfe FR_h_3 = [GR_Index3],12
1260 (p0) ldfs FR_H_3 = [GR_Table_Ptr1],0
1262 // float_N = Make N a fp number
1264 // Get pointer to Q table.
1266 (p0) ldfs FR_G_3 = [GR_Index3],0
1267 (p0) fmpy.s1 FR_G = FR_G_1, FR_G_2
1273 (p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_Q#), gp
1279 ld8 GR_Table_Ptr = [GR_Table_Ptr]
1288 (p0) ldfe FR_log2_hi = [GR_Table_Ptr],16
1289 (p0) fadd.s1 FR_H = FR_H_1, FR_H_2
1295 // G = G_1 * G_2 * G_3
1297 (p0) ldfe FR_log2_lo = [GR_Table_Ptr],16
1303 (p0) fadd.s1 FR_h = FR_h_1, FR_h_2 ;;
1306 // Load log2_lo part
1310 (p0) ldfe FR_Q_6 = [GR_Table_Ptr],16
1314 (p0) fcvt.xf FR_float_N = FR_float_N
1321 (p0) ldfe FR_Q_5 = [GR_Table_Ptr],16 ;;
1322 (p0) ldfe FR_Q_4 = [GR_Table_Ptr],16
1326 (p0) ldfe FR_Q_3 = [GR_Table_Ptr],16 ;;
1327 (p0) ldfe FR_Q_2 = [GR_Table_Ptr],16
1333 // poly_lo = Q_5 + r * Q_6
1337 (p0) ldfe FR_Q_1 = [GR_Table_Ptr],16
1339 // h = h_1 + h_2 + h_3
1340 // H = H_1 + H_2 + H_3
1342 // Begin Loading Q's - load log2_hi part
1344 (p0) fmpy.s1 FR_G = FR_G, FR_G_3
1348 (p0) fadd.s1 FR_H = FR_H, FR_H_3
1354 // Y_lo = poly + Y_lo
1359 (p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Arg#), gp
1365 ld8 GR_Table_Ptr = [GR_Table_Ptr]
1374 (p0) fadd.s1 FR_h = FR_h, FR_h_3
1382 (p0) fmpy.s1 FR_GS_hi = FR_G, FR_S
1387 (p0) fms.s1 FR_r = FR_G, FR_S, f1
1392 (p0) fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5
1401 (p0) fsub.s1 FR_r_cor = FR_GS_hi, f1
1406 (p0) fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi
1411 (p0) fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1
1419 // GS_lo = G*S - GS_hi
1421 (p0) fmpy.s1 FR_rsq = FR_r, FR_r
1426 (p0) fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H
1432 // poly = poly_hi + rsq * poly_lo
1433 // Tbl = float_N*log2_hi + H
1435 (p0) fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h
1441 // r_cor = r_cor - r
1442 // poly_hi = r * Q_2 + Q_1
1444 (p0) fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4
1452 (p0) fsub.s1 FR_r_cor = FR_r_cor, FR_r
1458 // Y_lo = float_N*log2_lo + h
1460 (p0) fadd.s1 FR_Y_hi = FR_G, FR_r
1466 // poly_lo = Q_4 + r * poly_lo;;
1467 // r_cor = r_cor + GS_lo;;
1469 (p0) fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3
1474 (p0) fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo
1479 (p0) fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo
1485 // poly_lo = Q_3 + r * poly_lo;;
1487 (p0) fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly
1492 (p0) fsub.s1 FR_Y_lo = FR_G, FR_Y_hi
1496 (p0) ldfe FR_L_Inv = [GR_Table_Ptr],16 ;;
1497 (p0) ldfe FR_L_hi = [GR_Table_Ptr],16
1501 (p0) ldfe FR_L_lo = [GR_Table_Ptr],16
1509 // r_cor = r_cor + Y_lo
1511 (p0) fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor
1516 // Y_lo = Tbl - Y_hi
1517 // poly = rsq * poly + r_cor
1519 (p0) fadd.s1 FR_Y_lo = FR_Y_lo, FR_r
1527 (p0) fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly
1532 // all long before they are needed.
1533 // They are used in LOGL_RETURN PATH
1535 br.cond.sptk L(LOGL_RETURN) ;;
1539 // Branch LOGL80_NEAR
1544 (p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_P#), gp
1550 ld8 GR_Table_Ptr = [GR_Table_Ptr]
1558 (p0) fmpy.s1 FR_Wsq = FR_W, FR_W
1559 (p0) add GR_Table_Ptr1 = 0x50,GR_Table_Ptr
1562 // Adjust ptr to 1/2
1563 // Adjust Ptr1 to P_4
1566 (p0) ldfe FR_Half = [GR_Table_Ptr],16 ;;
1567 (p0) ldfe FR_P_4 = [GR_Table_Ptr1],16
1574 (p0) ldfe FR_P_8 = [GR_Table_Ptr],16 ;;
1575 (p0) ldfe FR_P_3 = [GR_Table_Ptr1],16
1579 (p0) ldfe FR_P_7 = [GR_Table_Ptr],16 ;;
1580 (p0) ldfe FR_P_2 = [GR_Table_Ptr1],16
1589 (p0) ldfe FR_P_6 = [GR_Table_Ptr],16 ;;
1590 (p0) ldfe FR_P_1 = [GR_Table_Ptr1],16
1596 // poly = w*P_4 + P_3
1600 (p0) ldfe FR_P_5 = [GR_Table_Ptr],16
1603 // poly_lo = w * P_8 + P_7
1604 // Y_hi = w - (1/2)w*w
1607 (p0) fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq
1612 (p0) fmpy.s1 FR_W3 = FR_Wsq, FR_W
1618 // Y_lo = W3 * poly + Y_lo
1623 (p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Arg#), gp
1629 ld8 GR_Table_Ptr = [GR_Table_Ptr]
1637 (p0) ldfe FR_L_Inv = [GR_Table_Ptr],16 ;;
1638 (p0) ldfe FR_L_hi = [GR_Table_Ptr],16
1642 (p0) ldfe FR_L_lo = [GR_Table_Ptr],16
1647 (p0) fmpy.s1 FR_half_W = FR_Half, FR_W
1652 (p0) fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7
1657 (p0) fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3
1662 (p0) fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W
1669 // poly = w *poly + P_2
1671 (p0) fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6
1676 (p0) fma.s1 FR_poly = FR_W, FR_poly, FR_P_2
1681 (p0) fsub.s1 FR_Y_lo = FR_W, FR_Y_hi
1687 // poly = w * poly + P_1
1690 (p0) fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5
1696 // poly_lo = w * poly_lo + P_6
1699 (p0) fma.s1 FR_poly = FR_W, FR_poly, FR_P_1
1704 (p0) fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo
1710 // poly_lo = w * poly_lo +
1711 // Y_lo = Y_lo - w * (1/2)w
1713 (p0) fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly
1719 // Y_lo = (W-Y_hi) - w * (1/2)w
1720 // poly = W4* poly_lo + poly
1722 (p0) fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo
1727 (p0) add GR_Expo_Range = 0x2,r0
1732 // all long before they are needed.
1735 // kernel_log_80 computed ln(X)
1736 // and return logX_hi and logX_lo as results.
1737 // PR_pow_Safe set as well.
1739 (p0) fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo
1741 // Compute Y * (logX_hi + logX_lo)
1744 // (Manipulate names so that inputs are in
1745 // the place kernel_exp expects them)
1747 // Set GR_Expo_Range to Double
1749 // This function computes exp( x + x_cor)
1751 // Input FR 2: FR_X_cor
1752 // Input GR 1: GR_Flag
1753 // Input GR 2: GR_Expo_Range
1754 // Output FR 3: FR_Y_hi
1755 // Output FR 4: FR_Y_lo
1756 // Output FR 5: FR_Scale
1757 // Output PR 1: PR_Safe
1759 (p0) cmp.eq.unc p15, p0 = r0, r0
1764 (p0) addl GR_W1_ptr = @ltoff(Constants_exp_64_W1#), gp
1765 (p0) addl GR_W2_ptr = @ltoff(Constants_exp_64_W2#), gp
1766 (p0) add GR_Flag = 0x2,r0
1771 ld8 GR_W1_ptr = [GR_W1_ptr]
1772 ld8 GR_W2_ptr = [GR_W2_ptr]
1773 (p0) cmp.ne.unc p7, p0 = 0x1, GR_Flag
1779 (p0) movl GR_Mask = 0x1FFFF ;;
1785 (p0) movl GR_BIAS = 0x0FFFF ;;
1790 // X_lo = Y * logX_lo
1792 (p0) fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo
1799 // Flag is always 2 for this routine
1801 (p0) fmpy.s1 FR_float_N = FR_X, FR_L_Inv
1807 // X_hi = Y * logX_hi + X_lo
1808 // Set GR_Flag = 2 for exp(x + xcor)
1810 (p0) fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi
1815 (p0) getf.exp GR_Expo_X = FR_X
1819 (p0) and GR_Expo_X = GR_Expo_X, GR_Mask
1821 // Calculate unBIASed exponent of X
1822 // Point to Table of W1s
1823 // Point to Table of W2s
1825 (p0) fcvt.fx.s1 FR_N = FR_float_N
1830 (p0) fadd.s1 FR_P_lo = FR_P_lo, FR_X_lo
1832 // Float_N = X * L_Inv
1833 // Create exponent BIAS
1834 // Get BIASed exponent of X
1836 (p0) sub GR_Expo_X = GR_Expo_X, GR_BIAS ;;
1839 (p0) cmp.gt.unc p9, p0 = -6, GR_Expo_X
1842 // N = fcvt.fx(float_N)
1843 // If -6 > Expo_X, set P9
1845 (p9) br.cond.spnt L(EXPL_SMALL)
1850 // If expo_X < -6 goto exp_small
1854 (p0) addl GR_T1_ptr = @ltoff(Constants_exp_64_T1#), gp
1855 (p0) cmp.lt.unc p10, p0 = 14, GR_Expo_X
1860 ld8 GR_T1_ptr = [GR_T1_ptr]
1870 // If 14 < Expo_X, set P10
1871 // Create pointer to T1 table
1873 (p10) br.cond.spnt L(EXPL_HUGE) ;;
1878 (p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Exponents#), gp
1879 (p0) addl GR_T2_ptr = @ltoff(Constants_exp_64_T2#), gp
1885 ld8 GR_Table_Ptr = [GR_Table_Ptr]
1886 ld8 GR_T2_ptr = [GR_T2_ptr]
1893 (p0) shladd GR_Table_Ptr = GR_Expo_Range,4,GR_Table_Ptr ;;
1895 // Adjust T1_ptr by x 4 for single-precision values
1896 // Adjust T2_ptr by x 4 for single-precision values
1898 (p0) ld8 GR_Big_Pos_Exp = [GR_Table_Ptr],8
1903 // Load +max exponent
1906 (p0) ld8 GR_Big_Neg_Exp = [GR_Table_Ptr],0
1908 // If 14 < Expo_X, goto exp_huge
1910 (p0) fcvt.xf FR_float_N = FR_N
1917 // Load -max exponent
1922 (p0) getf.sig GR_N_fix = FR_N
1923 (p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_A#), gp
1929 ld8 GR_Table_Ptr = [GR_Table_Ptr]
1941 (p0) ldfe FR_A_3 = [GR_Table_Ptr],16 ;;
1944 // if k > big_pos_exp, set p14 and Safe=False
1946 (p0) ldfe FR_A_2 = [GR_Table_Ptr],16
1947 (p0) extr.u GR_M1 = GR_N_fix, 6, 6
1951 (p0) shladd GR_W1_ptr = GR_M1,3,GR_W1_ptr
1953 // float_N = fcvt.xf(N)
1954 // N_fix = significand of N
1955 // Create pointer to T2 table
1957 (p0) extr.u GR_M2 = GR_N_fix, 0, 6
1961 // Adjust W1_ptr by x 8 for double-precision values
1962 // Adjust W2_ptr by x 8 for double-precision values
1963 // Adjust Table_ptr by Expo_Rangex16
1966 (p0) shladd GR_T1_ptr = GR_M1,2,GR_T1_ptr ;;
1967 (p0) ldfd FR_W1 = [GR_W1_ptr],0
1968 (p0) shladd GR_W2_ptr = GR_M2,3,GR_W2_ptr
1974 (p0) ldfs FR_T1 = [GR_T1_ptr],0
1975 (p0) fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_X
1976 (p0) shladd GR_T2_ptr = GR_M2,2,GR_T2_ptr ;;
1979 (p0) ldfd FR_W2 = [GR_W2_ptr],0
1980 (p0) ldfs FR_T2 = [GR_T2_ptr],0
1982 // r = x - L_hi * float_N
1983 // M2 = extr.u(N_fix,0,6)
1984 // M1 = extr.u(N_fix,6,6)
1986 (p0) extr GR_k = GR_N_fix, 12, 52 ;;
1990 // poly = A_3 * r + A_2
1994 (p0) add GR_BIAS_p_k = GR_BIAS, GR_k
1995 (p0) cmp.gt.unc p14,p15 = GR_k,GR_Big_Pos_Exp ;;
1996 (p15) cmp.lt p14,p15 = GR_k,GR_Big_Neg_Exp
1999 // BIAS_p_K = BIAS + k
2003 (p0) setf.exp FR_Scale = GR_BIAS_p_k
2009 (p0) fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r
2013 // W = W_1_p1 * W2 + W1
2016 (p0) ldfe FR_A_1 = [GR_Table_Ptr],16
2022 (p0) fadd.s1 FR_W_1_p1 = FR_W1, f1
2028 // k = extr.u(N_fix,0,6)
2030 // Load ptr to Table of exponent thresholds.
2032 (p0) fadd.s1 FR_r = FR_r, FR_X_cor
2037 (p0) fmpy.s1 FR_T = FR_T1, FR_T2
2043 // if k < big_neg_exp, set p14 and Safe=False
2046 (p0) fma.s1 FR_W = FR_W2, FR_W_1_p1, FR_W1
2051 (p0) fma.s1 FR_poly = FR_r, FR_A_3, FR_A_2
2056 (p0) fmpy.s1 FR_rsq = FR_r, FR_r
2061 (p0) mov FR_Y_hi = FR_T
2067 // Scale = set_exp(BIAS_p_k)
2068 // poly = r * poly + A_1
2070 (p0) fadd.s1 FR_Wp1 = FR_W, f1
2075 (p0) fma.s1 FR_poly = FR_r, FR_poly, FR_A_1
2080 (p0) fma.s1 FR_poly = FR_rsq, FR_poly,FR_r
2087 // poly = rsq * poly + rk
2089 (p0) fma.s1 FR_Y_lo = FR_Wp1, FR_poly, FR_W
2095 // Y_lo = poly * Wp1 + W
2098 (p0) fmpy.s1 FR_Y_lo = FR_Y_lo, FR_T
2102 (p0) br.cond.sptk L(EXPL_RETURN) ;;
2113 (p0) addl GR_Table_Ptr1 = @ltoff(Constants_exp_64_P), gp
2119 ld8 GR_Table_Ptr1 = [GR_Table_Ptr1]
2127 (p0) ldfe FR_P_6 = [GR_Table_Ptr1],16
2131 (p0) fadd.s1 FR_r = FR_X,f0 ;;
2136 (p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Exponents#), gp
2142 ld8 GR_Table_Ptr = [GR_Table_Ptr]
2143 (p0) ldfe FR_P_5 = [GR_Table_Ptr1],16
2149 // Is input very small?
2153 (p0) ldfe FR_P_4 = [GR_Table_Ptr1],16
2154 (p0) add GR_Table_Ptr = 0x040,GR_Table_Ptr ;;
2155 (p0) shladd GR_Table_Ptr = GR_Expo_Range,3,GR_Table_Ptr ;;
2158 (p0) ldfe FR_P_3 = [GR_Table_Ptr1],16
2162 (p0) ld8 GR_vsm_expo = [GR_Table_Ptr],0
2168 // r = X (don't seem to need X_Cor)
2169 // Load the threshold exponents
2171 (p0) fmpy.s1 FR_rsq = FR_r, FR_r
2175 // Load the negative integer
2179 (p0) cmp.lt.unc p12, p0 = GR_Expo_X, GR_vsm_expo
2187 // Offset into exponents
2189 (p0) fmpy.s1 FR_r4 = FR_rsq, FR_rsq
2190 (p12) br.cond.spnt L(EXPL_VERY_SMALL) ;;
2193 (p0) ldfe FR_P_2 = [GR_Table_Ptr1],16
2197 (p0) fma.s1 FR_poly_lo = FR_P_6, FR_r, FR_P_5
2199 // Y_lo = r4 * poly_lo + poly_hi
2202 (p0) add GR_temp = 0x1,r0 ;;
2206 (p0) ldfe FR_P_1 = [GR_Table_Ptr1],0
2207 (p0) mov FR_Scale = f1
2210 // Begin creating lsb to perturb final result
2213 (p0) setf.sig FR_temp = GR_temp
2214 (p0) mov FR_Y_hi = f1
2220 // poly_lo = p_5 + p_6 * r
2221 // poly_hi = p_1 + p_2 * r
2223 (p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_P_4
2229 // poly_lo = p_4 + poly_lo * r
2230 // poly_hi = r + poly_hi * rsq
2232 (p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_P_3
2237 (p0) fma.s1 FR_poly_hi = FR_P_2, FR_r, FR_P_1
2242 (p0) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, FR_r
2248 // poly_lo = p_3 + poly_lo * r
2251 (p0) fma.s1 FR_Y_lo = FR_poly_lo, FR_r4, FR_poly_hi
2257 // Set lsb in fp register
2259 (p0) for FR_temp = FR_Y_lo,FR_temp
2265 // Toggle on last bit of Y_lo
2267 (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_temp
2269 // Set lsb of Y_lo to 1
2271 (p0) br.cond.sptk L(EXPL_RETURN) ;;
2276 (p0) mov FR_Y_lo = FR_r
2277 (p0) cmp.eq.unc p15, p0 = r0, r0
2281 (p0) mov FR_Scale = f1
2286 (p0) mov FR_Y_hi = f1
2293 (p0) br.cond.sptk L(EXPL_RETURN) ;;
2299 // Return for flag=2
2301 (p0) fcmp.gt.unc.s1 p12, p13 = FR_X, f0
2302 (p0) cmp.eq.unc p14, p15 = r0, r0 ;;
2307 // Set Safe to false
2310 (p12) movl GR_Mask = 0x15DC0 ;;
2313 (p12) setf.exp FR_Y_hi = GR_Mask
2314 (p13) movl GR_Mask = 0xA240 ;;
2317 (p13) setf.exp FR_Y_hi = GR_Mask
2319 // x > 0: Create mask for Y_hi = 2**(24,000)
2320 // x <= 0: Create mask for Y_hi = 2**(-24,000)
2322 (p13) movl GR_temp = 0xA1DC ;;
2325 (p13) setf.exp FR_Y_lo = GR_temp
2327 // x < =0: Create mask for 2**(-24,100)
2328 // x <= 0: Y_lo = w**(-24,100)
2330 (p12) mov FR_Y_lo = f1
2335 (p12) mov FR_Scale = FR_Y_hi
2341 // x > 0: Y_lo = 1.0
2342 // x > 0: Scale = 2**(24,000)
2344 (p13) mov FR_Scale = FR_Y_hi
2351 // Scale = 2**(24,000)
2354 // exp(y *ln(x)) almost complete
2355 // FR_Scale is Scale
2359 (p0) fmpy.s1 FR_Sgn = FR_Scale, FR_Sgn
2367 (p0) fmpy.s1 FR_Y_lo = FR_Y_lo,FR_Sgn
2373 // Z_lo * (sgn * scale)
2375 (p0) fma.s0 FR_Result = FR_Y_hi, FR_Sgn, FR_Y_lo
2377 // Z_hi * (sgn * scale) + Z_lo
2379 (p15) br.cond.sptk L(POWL_64_RETURN) ;;
2383 (p0) fsetc.s3 0x7F,0x01
2389 // Z_hi * (sgn * scale) + Z_lo with wre & td
2390 // Z_hi * (sgn * scale) + Z_lo with fz & td
2392 (p0) movl GR_T1_ptr = 0x00000000013FFF ;;
2396 (p0) fma.s3 FR_Result_small = FR_Y_hi, FR_Sgn, FR_Y_lo
2401 (p0) fsetc.s3 0x7F,0x40
2407 // Return if no danger of over of underflow.
2409 (p0) fsetc.s2 0x7F,0x42
2415 // S0 user supplied status
2416 // S2 user supplied status + WRE + TD (Overflows)
2417 // S3 user supplied status + FZ + TD (Underflows)
2419 (p0) fma.s2 FR_Result_big = FR_Y_hi, FR_Sgn, FR_Y_lo
2423 // S0 user supplied status
2424 // S2 user supplied status + WRE + TD (Overflows)
2425 // S3 user supplied status + FZ + TD (Underflows)
2428 // If (Safe) is true, then
2429 // Compute result using user supplied status field.
2430 // No overflow or underflow here, but perhaps inexact.
2433 // Determine if overflow or underflow was raised.
2434 // Fetch +/- overflow threshold for IEEE single, double,
2438 (p0) setf.exp FR_Big = GR_T1_ptr
2439 (p0) fsetc.s2 0x7F,0x40
2444 (p0) fclass.m.unc p11, p0 = FR_Result_small, 0x00F
2449 (p0) fmerge.ns FR_NBig = FR_Big, FR_Big
2455 // Create largest double exponent + 1.
2456 // Create smallest double exponent - 1.
2457 // Identify denormals
2459 (p0) fcmp.ge.unc.s1 p8, p0 = FR_Result_big , FR_Big
2466 // fcmp: resultS2 <= - overflow threshold
2467 // fclass: resultS3 is denorm/unorm/0
2469 (p8) mov GR_Parameter_TAG = 18 ;;
2474 // fcmp: resultS2 >= + overflow threshold
2476 (p0) fcmp.le.unc.s1 p9, p0 = FR_Result_big, FR_NBig
2477 (p8) br.cond.spnt __libm_error_region ;;
2482 (p9) mov GR_Parameter_TAG = 18
2487 (p9) br.cond.spnt __libm_error_region ;;
2490 // Report that pow overflowed - either +Inf, or -Inf
2493 (p11) mov GR_Parameter_TAG = 19
2495 (p11) br.cond.spnt __libm_error_region ;;
2501 // Report that pow underflowed
2503 (p0) br.cond.sptk L(POWL_64_RETURN) ;;
2508 // Here if x not zero and y=2.
2509 // Must call __libm_error_support for overflow or underflow
2511 // S0 user supplied status
2512 // S2 user supplied status + WRE + TD (Overflows)
2513 // S3 user supplied status + FZ + TD (Underflows)
2517 (p0) fma.s0 FR_Result = FR_Input_X, FR_Input_X, f0
2522 (p0) fsetc.s3 0x7F,0x01
2527 (p0) movl GR_T1_ptr = 0x00000000013FFF ;;
2531 (p0) fma.s3 FR_Result_small = FR_Input_X, FR_Input_X, f0
2536 (p0) fsetc.s3 0x7F,0x40
2542 // Return if no danger of over of underflow.
2544 (p0) fsetc.s2 0x7F,0x42
2549 (p0) fma.s2 FR_Result_big = FR_Input_X, FR_Input_X, f0
2553 // S0 user supplied status
2554 // S2 user supplied status + WRE + TD (Overflows)
2555 // S3 user supplied status + FZ + TD (Underflows)
2558 // If (Safe) is true, then
2559 // Compute result using user supplied status field.
2560 // No overflow or underflow here, but perhaps inexact.
2563 // Determine if overflow or underflow was raised.
2564 // Fetch +/- overflow threshold for IEEE single, double,
2568 (p0) setf.exp FR_Big = GR_T1_ptr
2569 (p0) fsetc.s2 0x7F,0x40
2574 (p0) fclass.m.unc p11, p0 = FR_Result_small, 0x00F
2579 (p0) fmerge.ns FR_NBig = FR_Big, FR_Big
2585 // Create largest double exponent + 1.
2586 // Create smallest double exponent - 1.
2587 // Identify denormals
2589 (p0) fcmp.ge.unc.s1 p8, p0 = FR_Result_big , FR_Big
2596 // fcmp: resultS2 <= - overflow threshold
2597 // fclass: resultS3 is denorm/unorm/0
2599 (p8) mov GR_Parameter_TAG = 18 ;;
2604 // fcmp: resultS2 >= + overflow threshold
2606 (p0) fcmp.le.unc.s1 p9, p0 = FR_Result_big, FR_NBig
2607 (p8) br.cond.spnt __libm_error_region ;;
2612 (p9) mov GR_Parameter_TAG = 18
2617 (p9) br.cond.spnt __libm_error_region ;;
2620 // Report that pow overflowed - either +Inf, or -Inf
2623 (p11) mov GR_Parameter_TAG = 19
2625 (p11) br.cond.spnt __libm_error_region ;;
2631 // Report that pow underflowed
2633 (p0) br.cond.sptk L(POWL_64_RETURN) ;;
2642 (p0) fcmp.eq.s1 p15, p0 = FR_Input_X, f1 // Is x=+1
2647 (p0) fclass.m.unc p14, p0 = FR_Input_Y, 0x023
2653 (p15) fcmp.eq.unc.s0 p6,p0 = FR_Input_Y, f0 // If x=1, flag invalid if y=SNaN
2658 (p15) fmpy.s0 FR_Result = f1,f1 // If x=1, result=1
2659 (p15) br.cond.spnt L(POWL_64_RETURN) ;; // Exit if x=1
2664 (p0) fclass.m.unc p13, p0 = FR_Input_X, 0x023
2669 (p0) fclass.m.unc p8, p0 = FR_Input_X, 0x143
2674 (p0) fclass.m.unc p9, p0 = FR_Input_Y, 0x143
2679 (p0) fclass.m.unc p10, p0 = FR_Input_X, 0x083
2684 (p0) fclass.m.unc p11, p0 = FR_Input_Y, 0x083
2689 (p0) fclass.m.unc p6, p0 = FR_Input_Y, 0x007
2694 (p0) fcmp.eq.unc.s1 p7, p0 = FR_Input_Y, f1
2700 // set p13 if x +/- Inf
2701 // set p14 if y +/- Inf
2702 // set p8 if x Natval or +/-SNaN
2703 // set p9 if y Natval or +/-SNaN
2704 // set p10 if x QNaN
2705 // set p11 if y QNaNs
2706 // set p6 if y is +/-0
2709 (p8) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X
2710 (p6) cmp.ne p8,p0 = r0,r0 ;; // Don't exit if x=snan, y=0 ==> result=+1
2714 (p9) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X
2715 (p8) br.cond.spnt L(POWL_64_RETURN) ;;
2719 (p10) fmpy.s0 FR_Result = FR_Input_X, f0
2720 (p9) br.cond.spnt L(POWL_64_RETURN) ;;
2725 // Produce result for SNaN and NatVals and return
2727 (p6) fclass.m.unc p15, p0 = FR_Input_X,0x007
2733 // If Y +/- 0, set p15 if x +/- 0
2735 (p6) fclass.m.unc p8, p0 = FR_Input_X,0x0C3
2741 (p6) fcmp.eq.s0 p9,p0 = FR_Input_X, f0 // If y=0, flag if x denormal
2746 (p6) fadd.s0 FR_Result = f1, f0
2752 // Set p8 if y = +/-0 and X is a QNaN/SNaN
2753 // If y = +/-0, let result = 1.0
2755 (p7) fmpy.s0 FR_Result = FR_Input_X,f1
2757 // If y == 1, result = x * 1
2759 (p15) mov GR_Parameter_TAG = 20
2764 (p15) br.cond.spnt __libm_error_region ;;
2769 // If x and y are both zero, result = 1.0 and call error
2772 (p8) mov GR_Parameter_TAG = 23
2773 (p8) br.cond.spnt __libm_error_region ;;
2779 // If y = +/-0 and x is a QNaN, result = 1.0 and call error
2782 (p6) br.cond.spnt L(POWL_64_RETURN) ;;
2785 // If x=0, y=-inf, go to the X_IS_ZERO path
2788 (p14) fcmp.eq.unc.s1 p0,p14 = FR_Input_X,f0
2789 (p7) br.cond.spnt L(POWL_64_RETURN) ;;
2795 // Produce all results for x**0 and x**1
2796 // Let all the result x ** 0 == 1 and return
2797 // Let all x ** 1 == x and return
2799 (p10) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X
2804 (p11) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X
2805 (p10) br.cond.spnt L(POWL_64_RETURN) ;;
2810 (p11) br.cond.spnt L(POWL_64_RETURN) ;;
2816 // Return result for x or y QNaN input with QNaN result
2818 (p14) br.cond.spnt L(POWL_64_Y_IS_INF) ;;
2823 (p13) br.cond.spnt L(POWL_64_X_IS_INF) ;;
2825 L(POWL_64_X_IS_ZERO):
2827 (p0) getf.sig GR_signif_y = FR_Input_Y
2828 (p0) getf.exp GR_BIASed_exp_y = FR_Input_Y
2833 (p0) movl GR_Mask = 0x1FFFF
2837 (p0) movl GR_y_sign = 0x20000 ;;
2840 // Get BIASed exp and significand of y
2843 (p0) and GR_exp_y = GR_Mask,GR_BIASed_exp_y
2845 (p0) and GR_y_sign = GR_y_sign,GR_BIASed_exp_y
2849 (p0) movl GR_BIAS = 0xFFFF ;;
2852 (p0) cmp.lt.unc p9, p8 = GR_exp_y,GR_BIAS
2855 // Maybe y is < 1 already, so
2856 // can never be an integer.
2857 // Remove sign bit from exponent.
2859 (p0) sub GR_exp_y = GR_exp_y,GR_BIAS ;;
2865 // Remove exponent BIAS
2867 (p8) shl GR_exp_y= GR_signif_y,GR_exp_y ;;
2870 (p9) or GR_exp_y= 0xF,GR_signif_y
2877 // Shift significand of y looking for nonzero bits
2878 // For y > 1, shift signif_y exp_y bits to the left
2879 // For y < 1, turn on 4 low order bits of significand of y
2880 // so that the fraction will always be non-zero
2882 (p0) shl GR_signif_y= GR_exp_y,1 ;;
2883 (p0) extr.u GR_low_order_bit = GR_exp_y,63,1
2886 // Integer part of y shifted off.
2887 // Get y's low even or odd bit - y might not be an int.
2890 (p0) cmp.eq.unc p13,p0 = GR_signif_y, r0
2891 (p0) cmp.eq.unc p8,p9 = GR_y_sign, r0 ;;
2896 (p13) cmp.ne.unc p13,p0 = GR_low_order_bit, r0 ;;
2899 // Is y and int and odd?
2902 (p13) cmp.eq.unc p13,p14 = GR_y_sign, r0
2903 (p8) fcmp.eq.s0 p12,p0 = FR_Input_Y, f0 // If x=0 and y>0 flag if y denormal
2909 // Is y and int and odd and positive?
2911 (p13) mov FR_Result = FR_Input_X
2912 (p13) br.cond.sptk L(POWL_64_RETURN) ;;
2917 // Return +/-0 when x=+/-0 and y is and odd pos. int
2919 (p14) frcpa.s0 FR_Result, p10 = f1, FR_Input_X
2920 (p14) mov GR_Parameter_TAG = 21
2925 (p14) br.cond.spnt __libm_error_region ;;
2931 // Return +/-0 when x=+/-Inf and y is and odd neg int
2932 // and raise dz exception
2934 (p8) mov FR_Result = f0
2935 (p8) br.cond.sptk L(POWL_64_RETURN) ;;
2940 // Return +0 when x=+/-0 and y > 0 and not odd.
2942 (p9) frcpa.s0 FR_Result, p10 = f1,f0
2943 (p9) mov GR_Parameter_TAG = 21
2948 (p9) br.cond.sptk __libm_error_region ;;
2950 L(POWL_64_X_IS_INF):
2952 (p0) getf.exp GR_exp_y = FR_Input_Y
2953 (p0) fclass.m.unc p13, p0 = FR_Input_X,0x022
2954 (p0) mov GR_Mask = 0x1FFFF ;;
2958 (p0) getf.sig GR_signif_y = FR_Input_Y
2959 (p0) fcmp.eq.s0 p9,p0 = FR_Input_Y, f0 // Flag if y denormal
2964 // Get exp and significand of y
2965 // Create exponent mask and sign mask
2968 (p0) and GR_low_order_bit = GR_Mask,GR_exp_y
2969 (p0) movl GR_BIAS = 0xFFFF
2974 // Remove sign bit from exponent.
2976 (p0) cmp.lt.unc p9, p8 = GR_low_order_bit,GR_BIAS
2978 // Maybe y is < 1 already, so
2981 (p0) sub GR_low_order_bit = GR_low_order_bit,GR_BIAS
2985 (p0) movl GR_sign_mask = 0x20000 ;;
2988 (p0) and GR_sign_mask = GR_sign_mask,GR_exp_y
2990 // Return +Inf when x=+/-0 and y < 0 and not odd and raise
2991 // divide-by-zero exception.
2993 (p0) fclass.m.unc p11, p0 = FR_Input_X,0x021
2999 // Is shift off integer part of y.
3000 // Get y's even or odd bit - y might not be an int.
3002 (p11) cmp.eq.unc p11,p12 = GR_sign_mask, r0
3004 // Remove exponent BIAS
3006 (p8) shl GR_exp_y = GR_signif_y,GR_low_order_bit ;;
3009 (p9) or GR_exp_y = 0xF,GR_signif_y
3011 // Is y positive or negative when x is +Inf?
3012 // Is y and int when x = -Inf
3014 (p11) mov FR_Result = FR_Input_X
3019 (p12) mov FR_Result = f0
3025 // Shift signficand looking for nonzero bits
3026 // For y non-ints, upset the significand.
3028 (p0) shl GR_signif_y = GR_exp_y,1 ;;
3029 (p13) cmp.eq.unc p13,p0 = GR_signif_y, r0
3033 (p0) extr.u GR_low_order_bit = GR_exp_y,63,1 ;;
3034 (p13) cmp.ne.unc p13,p0 = GR_low_order_bit, r0
3039 (p11) br.cond.sptk L(POWL_64_RETURN) ;;
3044 (p12) br.cond.sptk L(POWL_64_RETURN) ;;
3047 // Return Inf for y > 0
3048 // Return +0 for y < 0
3049 // Is y even or odd?
3052 (p13) cmp.eq.unc p13,p10 = GR_sign_mask, r0
3053 (p0) cmp.eq.unc p8,p9 = GR_sign_mask, r0 ;;
3059 // For x = -inf, y is and int, positive
3061 // Is y positive in general?
3063 (p13) mov FR_Result = FR_Input_X
3068 (p10) fmerge.ns FR_Result = f0, f0
3069 (p13) br.cond.sptk L(POWL_64_RETURN) ;;
3074 (p10) br.cond.sptk L(POWL_64_RETURN) ;;
3079 // Return -Inf for x = -inf and y > 0 and odd int.
3080 // Return -0 for x = -inf and y < 0 and odd int.
3082 (p8) fmerge.ns FR_Result = FR_Input_X, FR_Input_X
3087 (p9) mov FR_Result = f0
3088 (p8) br.cond.sptk L(POWL_64_RETURN) ;;
3093 (p9) br.cond.sptk L(POWL_64_RETURN) ;;
3095 L(POWL_64_Y_IS_INF):
3099 // Return Inf for x = -inf and y > 0 not an odd int.
3100 // Return +0 for x = -inf and y < 0 and not an odd int.
3102 (p0) fclass.m.unc p8, p0 = FR_Input_Y, 0x021
3107 (p0) fclass.m.unc p9, p0 = FR_Input_Y, 0x022
3112 (p0) fabs FR_X = FR_Input_X
3118 (p0) fcmp.eq.s0 p10,p0 = FR_Input_X, f0 // flag if x denormal
3128 (p8) fcmp.lt.unc.s1 p6, p0 = FR_X, f1
3133 (p8) fcmp.gt.unc.s1 p7, p0 = FR_X, f1
3138 (p9) fcmp.lt.unc.s1 p12, p0 = FR_X, f1
3143 (p9) fcmp.gt.unc.s1 p13, p0 = FR_X, f1
3149 // For y = +Inf and |x| < 1 returns 0
3150 // For y = +Inf and |x| > 1 returns Inf
3151 // For y = -Inf and |x| < 1 returns Inf
3152 // For y = -Inf and |x| > 1 returns 0
3154 (p6) mov FR_Result = f0
3159 (p7) mov FR_Result = FR_Input_Y
3164 (p12) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_Y
3169 (p13) mov FR_Result = f0
3171 // Produce x ** +/- Inf results
3173 (p6) br.cond.spnt L(POWL_64_RETURN) ;;
3178 (p7) br.cond.spnt L(POWL_64_RETURN) ;;
3183 (p12) br.cond.spnt L(POWL_64_RETURN) ;;
3188 (p13) br.cond.spnt L(POWL_64_RETURN) ;;
3193 // +/-1 ** +/-Inf, result is +1
3195 (p0) fmpy.s0 FR_Result = f1,f1
3196 (p0) br.cond.sptk L(POWL_64_RETURN) ;;
3198 L(POWL_64_UNSUPPORT):
3202 // Return NaN and raise invalid
3204 (p0) fmpy.s0 FR_Result = FR_Input_X,f0
3206 // Raise exceptions for specific
3207 // values - pseudo NaN and
3210 (p0) br.cond.sptk L(POWL_64_RETURN) ;;
3215 (p0) frcpa.s0 FR_Result, p8 = f0, f0
3217 // Raise invalid for x < 0 and
3218 // y not an integer and
3220 (p0) mov GR_Parameter_TAG = 22
3225 (p0) br.cond.sptk __libm_error_region ;;
3230 (p0) frsqrta.s0 FR_Result,p10 = FR_Input_X
3235 (p10) fma.s1 f62=FR_Half,FR_Input_X,f0
3242 // h = 1/2 * a in f9
3244 (p10) fma.s1 f63=FR_Result,FR_Result,f0
3251 // t1 = y0 * y0 in f10
3253 (p10) fnma.s1 f32=f63,f62,f11
3260 // t2 = 1/2 - t1 * h in f10
3262 (p10) fma.s1 f33=f32,FR_Result,FR_Result
3269 // y1 = y0 + t2 * y0 in f13
3271 (p10) fma.s1 f34=f33,f62,f0
3278 // t3 = y1 * h in f10
3280 (p10) fnma.s1 f35=f34,f33,f11
3287 // t4 = 1/2 - t3 * y1 in f10
3289 (p10) fma.s1 f63=f35,f33,f33
3296 // y2 = y1 + t4 * y1 in f13
3298 (p10) fma.s1 f32=FR_Input_X,f63,f0
3305 // S = a * y2 in f10
3307 (p10) fma.s1 FR_Result=f63,f62,f0
3314 // t5 = y2 * h in f9
3316 (p10) fma.s1 f33=f11,f63,f0
3323 // H = 1/2 * y2 in f11
3325 (p10) fnma.s1 f34=f32,f32,f8
3332 // d = a - S * S in f12
3334 (p10) fnma.s1 f35=FR_Result,f63,f11
3341 // t6 = 1/2 - t5 * y2 in f7
3343 (p10) fma.s1 f62=f33,f34,f32
3350 // S1 = S + d * H in f13
3352 (p10) fma.s1 f63=f33,f35,f33
3359 // H1 = H + t6 * h in f7
3361 (p10) fnma.s1 f32=f62,f62,FR_Input_X
3370 (p10) fma.s0 FR_Result=f32,f63,f62
3375 (p10) br.cond.sptk L(POWL_64_RETURN) ;;
3381 // Do the Newton-Raphson iteration from the EAS.
3383 (p0) br.cond.sptk L(POWL_64_RETURN) ;;
3386 // Take care of the degenerate cases.
3392 (p0) mov FR_Output = FR_Result
3393 (p0) br.ret.sptk b0 ;;
3396 ASM_SIZE_DIRECTIVE(powl)
3398 .proc __libm_error_region
3399 __libm_error_region:
3402 add GR_Parameter_Y=-32,sp // Parameter 2 value
3404 .save ar.pfs,GR_SAVE_PFS
3405 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
3409 add sp=-64,sp // Create new stack
3411 mov GR_SAVE_GP=gp // Save gp
3414 stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
3415 add GR_Parameter_X = 16,sp // Parameter 1 address
3416 .save b0, GR_SAVE_B0
3417 mov GR_SAVE_B0=b0 // Save b0
3421 stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
3422 add GR_Parameter_RESULT = 0,GR_Parameter_Y
3423 nop.b 0 // Parameter 3 address
3426 stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
3427 add GR_Parameter_Y = -16,GR_Parameter_Y
3428 br.call.sptk b0=__libm_error_support# // Call error handling function
3433 add GR_Parameter_RESULT = 48,sp
3436 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
3438 add sp = 64,sp // Restore stack pointer
3439 mov b0 = GR_SAVE_B0 // Restore return address
3442 mov gp = GR_SAVE_GP // Restore gp
3443 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
3444 br.ret.sptk b0 // Return
3447 .endp __libm_error_region
3448 ASM_SIZE_DIRECTIVE(__libm_error_region)
3449 .type __libm_error_support#,@function
3450 .global __libm_error_support#