4 // Copyright (c) 2000 - 2005, Intel Corporation
5 // All rights reserved.
7 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 // Intel Corporation is the author of this code, and requests that all
37 // problem reports or change requests be submitted to it directly at
38 // http://www.intel.com/software/products/opensource/libraries/num.htm.
41 //*********************************************************************
44 // 02/02/00 (hand-optimized)
45 // 04/04/00 Unwind support added
46 // 08/15/00 Bundle added after call to __libm_error_support to properly
47 // set [the previously overwritten] GR_Parameter_RESULT.
48 // 03/13/01 Fixed flags when denormal raised on intermediate result
49 // 01/08/02 Improved speed.
50 // 02/06/02 Corrected .section statement
51 // 05/20/02 Cleaned up namespace and sf0 syntax
52 // 02/10/03 Reordered header: .section, .global, .proc, .align;
53 // used data8 for long double table values
54 // 03/31/05 Reformatted delimiters between data tables
56 //*********************************************************************
58 // Function: atanl(x) = inverse tangent(x), for double extended x values
59 // Function: atan2l(y,x) = atan(y/x), for double extended y, x values
63 // long double atanl (long double x)
64 // long double atan2l (long double y, long double x)
66 //*********************************************************************
70 // Floating-Point Registers: f8 (Input and Return Value)
71 // f9 (Input for atan2l)
74 // General Purpose Registers:
76 // r49-r52 (Arguments to error support for 0,0 case)
78 // Predicate Registers: p6-p15
80 //*********************************************************************
82 // IEEE Special Conditions:
84 // Denormal fault raised on denormal inputs
85 // Underflow exceptions may occur
86 // Special error handling for the y=0 and x=0 case
87 // Inexact raised when appropriate by algorithm
91 // atanl(+/-0) = +/- 0
92 // atanl(+/-Inf) = +/-pi/2
94 // atan2l(Any NaN for x or y) = QNaN
95 // atan2l(+/-0,x) = +/-0 for x > 0
96 // atan2l(+/-0,x) = +/-pi for x < 0
97 // atan2l(+/-0,+0) = +/-0
98 // atan2l(+/-0,-0) = +/-pi
99 // atan2l(y,+/-0) = pi/2 y > 0
100 // atan2l(y,+/-0) = -pi/2 y < 0
101 // atan2l(+/-y, Inf) = +/-0 for finite y > 0
102 // atan2l(+/-Inf, x) = +/-pi/2 for finite x
103 // atan2l(+/-y, -Inf) = +/-pi for finite y > 0
104 // atan2l(+/-Inf, Inf) = +/-pi/4
105 // atan2l(+/-Inf, -Inf) = +/-3pi/4
107 //*********************************************************************
109 // Mathematical Description
110 // ---------------------------
112 // The function ATANL( Arg_Y, Arg_X ) returns the "argument"
113 // or the "phase" of the complex number
117 // or equivalently, the angle in radians from the positive
118 // x-axis to the line joining the origin and the point
127 // \ angle between is ATANL(Arg_Y,Arg_X)
133 // ------------------> X-axis
137 // Moreover, this angle is reported in the range [-pi,pi] thus
139 // -pi <= ATANL( Arg_Y, Arg_X ) <= pi.
141 // From the geometry, it is easy to define ATANL when one of
142 // Arg_X or Arg_Y is +-0 or +-inf:
146 // X \ | +0 | -0 | +inf | -inf | finite non-zero
148 // ______________________________________________________
150 // +-0 | Invalid/ | pi/2 | -pi/2 | sign(Y)*pi/2
152 // --------------------------------------------------------
154 // +inf | +0 | -0 | pi/4 | -pi/4 | sign(Y)*0
155 // --------------------------------------------------------
157 // -inf | +pi | -pi | 3pi/4 | -3pi/4 | sign(Y)*pi
158 // --------------------------------------------------------
159 // finite | X>0? | pi/2 | -pi/2 | normal case
160 // non-zero| sign(Y)*0: | | |
161 // | sign(Y)*pi | | |
164 // One must take note that ATANL is NOT the arctangent of the
165 // value Arg_Y/Arg_X; but rather ATANL and arctan are related
166 // in a slightly more complicated way as follows:
168 // Let U := max(|Arg_X|, |Arg_Y|); V := min(|Arg_X|, |Arg_Y|);
169 // sign_X be the sign bit of Arg_X, i.e., sign_X is 0 or 1;
170 // s_X be the sign of Arg_X, i.e., s_X = (-1)^sign_X;
172 // sign_Y be the sign bit of Arg_Y, i.e., sign_Y is 0 or 1;
173 // s_Y be the sign of Arg_Y, i.e., s_Y = (-1)^sign_Y;
175 // swap be 0 if |Arg_X| >= |Arg_Y| and 1 otherwise.
177 // Then, ATANL(Arg_Y, Arg_X) =
179 // / arctan(V/U) \ sign_X = 0 & swap = 0
180 // | pi/2 - arctan(V/U) | sign_X = 0 & swap = 1
182 // | pi - arctan(V/U) | sign_X = 1 & swap = 0
183 // \ pi/2 + arctan(V/U) / sign_X = 1 & swap = 1
186 // This relationship also suggest that the algorithm's major
187 // task is to calculate arctan(V/U) for 0 < V <= U; and the
188 // final Result is given by
190 // s_Y * { (P_hi + P_lo) + sigma * arctan(V/U) }
194 // (P_hi,P_lo) represents M(sign_X,swap)*(pi/2) accurately
196 // M(sign_X,swap) = 0 for sign_X = 0 and swap = 0
198 // 2 for sign_X = 1 and swap = 0
202 // sigma = { (sign_X XOR swap) : -1.0 : 1.0 }
204 // = (-1) ^ ( sign_X XOR swap )
206 // Both (P_hi,P_lo) and sigma can be stored in a table and fetched
207 // using (sign_X,swap) as an index. (P_hi, P_lo) can be stored as a
208 // double-precision, and single-precision pair; and sigma can
209 // obviously be just a single-precision number.
211 // In the algorithm we propose, arctan(V/U) is calculated to high accuracy
212 // as A_hi + A_lo. Consequently, the Result ATANL( Arg_Y, Arg_X ) is
215 // s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
217 // We now discuss the calculation of arctan(V/U) for 0 < V <= U.
219 // For (V/U) < 2^(-3), we use a simple polynomial of the form
221 // z + z^3*(P_1 + z^2*(P_2 + z^2*(P_3 + ... + P_8)))
225 // For the sake of accuracy, the first term "z" must approximate V/U to
226 // extra precision. For z^3 and higher power, a working precision
227 // approximation to V/U suffices. Thus, we obtain:
229 // z_hi + z_lo = V/U to extra precision and
230 // z = V/U to working precision
232 // The value arctan(V/U) is delivered as two pieces (A_hi, A_lo)
234 // (A_hi,A_lo) = (z_hi, z^3*(P_1 + ... + P_8) + z_lo).
237 // For 2^(-3) <= (V/U) <= 1, we use a table-driven approach.
240 // (V/U) = 2^k * 1.b_1 b_2 .... b_63 b_64 b_65 ....
244 // z_hi = 2^k * 1.b_1 b_2 b_3 b_4 1
250 // arctan(V/U) = arctan(z_hi) + acrtan| -------------- |
251 // | 1 + (V/U)*z_hi |
257 // = arctan(z_hi) + acrtan| -------------- |
261 // = arctan(z_hi) + acrtan( V' / U' )
266 // V' = V - U*z_hi; U' = U + V*z_hi.
270 // w_hi + w_lo = V'/U' to extra precision and
271 // w = V'/U' to working precision
273 // then we can approximate arctan(V'/U') by
275 // arctan(V'/U') = w_hi + w_lo
276 // + w^3*(Q_1 + w^2*(Q_2 + w^2*(Q_3 + w^2*Q_4)))
278 // = w_hi + w_lo + poly
280 // Finally, arctan(z_hi) is calculated beforehand and stored in a table
281 // as Tbl_hi, Tbl_lo. Thus,
283 // (A_hi, A_lo) = (Tbl_hi, w_hi+(poly+(w_lo+Tbl_lo)))
285 // This completes the mathematical description.
291 // Step 0. Check for unsupported format.
294 // ( expo(Arg_X) not zero AND msb(Arg_X) = 0 ) OR
295 // ( expo(Arg_Y) not zero AND msb(Arg_Y) = 0 )
297 // then one of the arguments is unsupported. Generate an
298 // invalid and return qNaN.
300 // Step 1. Initialize
302 // Normalize Arg_X and Arg_Y and set the following
304 // sign_X := sign_bit(Arg_X)
305 // s_Y := (sign_bit(Arg_Y)==0? 1.0 : -1.0)
306 // swap := (|Arg_X| >= |Arg_Y|? 0 : 1 )
307 // U := max( |Arg_X|, |Arg_Y| )
308 // V := min( |Arg_X|, |Arg_Y| )
310 // execute: frcpa E, pred, V, U
311 // If pred is 0, go to Step 5 for special cases handling.
313 // Step 2. Decide on branch.
316 // If Q < 2^(-3) go to Step 4 for simple polynomial case.
318 // Step 3. Table-driven algorithm.
320 // Q is represented as
322 // 2^(-k) * 1.b_1 b_2 b_3 ... b_63; k = 0,-1,-2,-3
324 // and that if k = 0, b_1 = b_2 = b_3 = b_4 = 0.
328 // z_hi := 2^(-k) * 1.b_1 b_2 b_3 b_4 1
330 // (note that there are 49 possible values of z_hi).
332 // ...We now calculate V' and U'. While V' is representable
333 // ...as a 64-bit number because of cancellation, U' is
334 // ...not in general a 64-bit number. Obtaining U' accurately
335 // ...requires two working precision numbers
337 // U_prime_hi := U + V * z_hi ...WP approx. to U'
338 // U_prime_lo := ( U - U_prime_hi ) + V*z_hi ...observe order
339 // V_prime := V - U * z_hi ...this is exact
341 // C_hi := frcpa (1.0, U_prime_hi) ...C_hi approx 1/U'_hi
344 // C_hi := C_hi + C_hi*(1.0 - C_hi*U_prime_hi)
346 // ...at this point C_hi is (1/U_prime_hi) to roughly 64 bits
348 // w_hi := V_prime * C_hi ...w_hi is V_prime/U_prime to
349 // ...roughly working precision
351 // ...note that we want w_hi + w_lo to approximate
352 // ...V_prime/(U_prime_hi + U_prime_lo) to extra precision
353 // ...but for now, w_hi is good enough for the polynomial
357 // poly := w_hi*wsq*(Q_1 + wsq*(Q_2 + wsq*(Q_3 + wsq*Q_4)))
360 // (Tbl_hi, Tbl_lo) = atan(z_hi) indexed by (k,b_1,b_2,b_3,b_4)
361 // ...Tbl_hi is a double-precision number
362 // ...Tbl_lo is a single-precision number
364 // (P_hi, P_lo) := M(sign_X,swap)*(Pi_by_2_hi, Pi_by_2_lo)
365 // ...as discussed previous. Again; the implementation can
366 // ...chose to fetch P_hi and P_lo from a table indexed by
367 // ...(sign_X, swap).
368 // ...P_hi is a double-precision number;
369 // ...P_lo is a single-precision number.
371 // ...calculate w_lo so that w_hi + w_lo is V'/U' accurately
372 // w_lo := ((V_prime - w_hi*U_prime_hi) -
373 // w_hi*U_prime_lo) * C_hi ...observe order
376 // ...Ready to deliver arctan(V'/U') as A_hi, A_lo
378 // A_lo := w_hi + (poly + (Tbl_lo + w_lo)) ...observe order
380 // ...Deliver final Result
381 // ...s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
383 // sigma := ( (sign_X XOR swap) ? -1.0 : 1.0 )
384 // ...sigma can be obtained by a table lookup using
385 // ...(sign_X,swap) as index and stored as single precision
386 // ...sigma should be calculated earlier
391 // Res_hi := P_hi + sigma*A_hi ...this is exact because
392 // ...both P_hi and Tbl_hi
393 // ...are double-precision
394 // ...and |Tbl_hi| > 2^(-4)
395 // ...P_hi is either 0 or
398 // Res_lo := sigma*A_lo + P_lo
400 // Return Res_hi + s_Y*Res_lo in user-defined rounding control
402 // Step 4. Simple polynomial case.
404 // ...E and Q are inherited from Step 2.
406 // A_hi := Q ...Q is inherited from Step 2 Q approx V/U
409 // E := E + E2(1.0 - E*U1
410 // ...at this point E approximates 1/U to roughly working precision
412 // z := V * E ...z approximates V/U to roughly working precision
414 // z4 := zsq * zsq; z8 := z4 * z4
416 // poly1 := P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
417 // poly2 := zsq*(P_1 + zsq*(P_2 + zsq*P_3))
419 // poly := poly1 + z8*poly2
421 // z_lo := (V - A_hi*U)*E
423 // A_lo := z*poly + z_lo
424 // ...A_hi, A_lo approximate arctan(V/U) accurately
426 // (P_hi, P_lo) := M(sign_X,swap)*(Pi_by_2_hi, Pi_by_2_lo)
427 // ...one can store the M(sign_X,swap) as single precision
430 // ...Deliver final Result
431 // ...s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
433 // sigma := ( (sign_X XOR swap) ? -1.0 : 1.0 )
434 // ...sigma can be obtained by a table lookup using
435 // ...(sign_X,swap) as index and stored as single precision
436 // ...sigma should be calculated earlier
441 // Res_hi := P_hi + sigma*A_hi ...need to compute
442 // ...P_hi + sigma*A_hi
445 // tmp := (P_hi - Res_hi) + sigma*A_hi
447 // Res_lo := s_Y*(sigma*A_lo + P_lo) + tmp
449 // Return Res_hi + Res_lo in user-defined rounding control
451 // Step 5. Special Cases
453 // These are detected early in the function by fclass instructions.
455 // We are in one of those special cases when X or Y is 0,+-inf or NaN
457 // If one of X and Y is NaN, return X+Y (which will generate
458 // invalid in case one is a signaling NaN). Otherwise,
459 // return the Result as described in the table
464 // X \ | +0 | -0 | +inf | -inf | finite non-zero
466 // ______________________________________________________
468 // +-0 | Invalid/ | pi/2 | -pi/2 | sign(Y)*pi/2
470 // --------------------------------------------------------
472 // +inf | +0 | -0 | pi/4 | -pi/4 | sign(Y)*0
473 // --------------------------------------------------------
475 // -inf | +pi | -pi | 3pi/4 | -3pi/4 | sign(Y)*pi
476 // --------------------------------------------------------
477 // finite | X>0? | pi/2 | -pi/2 |
478 // non-zero| sign(Y)*0: | | | N/A
479 // | sign(Y)*pi | | |
570 GR_Parameter_RESULT = r51
571 GR_Parameter_TAG = r52
577 LOCAL_OBJECT_START(Constants_atan)
579 data8 0x3FF921FB54442D18
580 // single lo_pi/2, two**(-3)
581 data4 0x248D3132, 0x3E000000
582 data8 0xAAAAAAAAAAAAAAA3, 0xBFFD // P_1
583 data8 0xCCCCCCCCCCCC54B2, 0x3FFC // P_2
584 data8 0x9249249247E4D0C2, 0xBFFC // P_3
585 data8 0xE38E38E058870889, 0x3FFB // P_4
586 data8 0xBA2E895B290149F8, 0xBFFB // P_5
587 data8 0x9D88E6D4250F733D, 0x3FFB // P_6
588 data8 0x884E51FFFB8745A0, 0xBFFB // P_7
589 data8 0xE1C7412B394396BD, 0x3FFA // P_8
590 data8 0xAAAAAAAAAAAAA52F, 0xBFFD // Q_1
591 data8 0xCCCCCCCCC75B60D3, 0x3FFC // Q_2
592 data8 0x924923AD011F1940, 0xBFFC // Q_3
593 data8 0xE36F716D2A5F89BD, 0x3FFB // Q_4
595 // Entries Tbl_hi (double precision)
596 // B = 1+Index/16+1/32 Index = 0
597 // Entries Tbl_lo (single precision)
598 // B = 1+Index/16+1/32 Index = 0
600 data8 0x3FE9A000A935BD8E
601 data4 0x23ACA08F, 0x00000000
603 // Entries Tbl_hi (double precision) Index = 0,1,...,15
604 // B = 2^(-1)*(1+Index/16+1/32)
605 // Entries Tbl_lo (single precision)
606 // Index = 0,1,...,15 B = 2^(-1)*(1+Index/16+1/32)
608 data8 0x3FDE77EB7F175A34
609 data4 0x238729EE, 0x00000000
610 data8 0x3FE0039C73C1A40B
611 data4 0x249334DB, 0x00000000
612 data8 0x3FE0C6145B5B43DA
613 data4 0x22CBA7D1, 0x00000000
614 data8 0x3FE1835A88BE7C13
615 data4 0x246310E7, 0x00000000
616 data8 0x3FE23B71E2CC9E6A
617 data4 0x236210E5, 0x00000000
618 data8 0x3FE2EE628406CBCA
619 data4 0x2462EAF5, 0x00000000
620 data8 0x3FE39C391CD41719
621 data4 0x24B73EF3, 0x00000000
622 data8 0x3FE445065B795B55
623 data4 0x24C11260, 0x00000000
624 data8 0x3FE4E8DE5BB6EC04
625 data4 0x242519EE, 0x00000000
626 data8 0x3FE587D81F732FBA
627 data4 0x24D4346C, 0x00000000
628 data8 0x3FE6220D115D7B8D
629 data4 0x24ED487B, 0x00000000
630 data8 0x3FE6B798920B3D98
631 data4 0x2495FF1E, 0x00000000
632 data8 0x3FE748978FBA8E0F
633 data4 0x223D9531, 0x00000000
634 data8 0x3FE7D528289FA093
635 data4 0x242B0411, 0x00000000
636 data8 0x3FE85D69576CC2C5
637 data4 0x2335B374, 0x00000000
638 data8 0x3FE8E17AA99CC05D
639 data4 0x24C27CFB, 0x00000000
641 // Entries Tbl_hi (double precision) Index = 0,1,...,15
642 // B = 2^(-2)*(1+Index/16+1/32)
643 // Entries Tbl_lo (single precision)
644 // Index = 0,1,...,15 B = 2^(-2)*(1+Index/16+1/32)
646 data8 0x3FD025FA510665B5
647 data4 0x24263482, 0x00000000
648 data8 0x3FD1151A362431C9
649 data4 0x242C8DC9, 0x00000000
650 data8 0x3FD2025567E47C95
651 data4 0x245CF9BA, 0x00000000
652 data8 0x3FD2ED987A823CFE
653 data4 0x235C892C, 0x00000000
654 data8 0x3FD3D6D129271134
655 data4 0x2389BE52, 0x00000000
656 data8 0x3FD4BDEE586890E6
657 data4 0x24436471, 0x00000000
658 data8 0x3FD5A2E0175E0F4E
659 data4 0x2389DBD4, 0x00000000
660 data8 0x3FD685979F5FA6FD
661 data4 0x2476D43F, 0x00000000
662 data8 0x3FD7660752817501
663 data4 0x24711774, 0x00000000
664 data8 0x3FD84422B8DF95D7
665 data4 0x23EBB501, 0x00000000
666 data8 0x3FD91FDE7CD0C662
667 data4 0x23883A0C, 0x00000000
668 data8 0x3FD9F93066168001
669 data4 0x240DF63F, 0x00000000
670 data8 0x3FDAD00F5422058B
671 data4 0x23FE261A, 0x00000000
672 data8 0x3FDBA473378624A5
673 data4 0x23A8CD0E, 0x00000000
674 data8 0x3FDC76550AAD71F8
675 data4 0x2422D1D0, 0x00000000
676 data8 0x3FDD45AEC9EC862B
677 data4 0x2344A109, 0x00000000
679 // Entries Tbl_hi (double precision) Index = 0,1,...,15
680 // B = 2^(-3)*(1+Index/16+1/32)
681 // Entries Tbl_lo (single precision)
682 // Index = 0,1,...,15 B = 2^(-3)*(1+Index/16+1/32)
684 data8 0x3FC068D584212B3D
685 data4 0x239874B6, 0x00000000
686 data8 0x3FC1646541060850
687 data4 0x2335E774, 0x00000000
688 data8 0x3FC25F6E171A535C
689 data4 0x233E36BE, 0x00000000
690 data8 0x3FC359E8EDEB99A3
691 data4 0x239680A3, 0x00000000
692 data8 0x3FC453CEC6092A9E
693 data4 0x230FB29E, 0x00000000
694 data8 0x3FC54D18BA11570A
695 data4 0x230C1418, 0x00000000
696 data8 0x3FC645BFFFB3AA73
697 data4 0x23F0564A, 0x00000000
698 data8 0x3FC73DBDE8A7D201
699 data4 0x23D4A5E1, 0x00000000
700 data8 0x3FC8350BE398EBC7
701 data4 0x23D4ADDA, 0x00000000
702 data8 0x3FC92BA37D050271
703 data4 0x23BCB085, 0x00000000
704 data8 0x3FCA217E601081A5
705 data4 0x23BC841D, 0x00000000
706 data8 0x3FCB1696574D780B
707 data4 0x23CF4A8E, 0x00000000
708 data8 0x3FCC0AE54D768466
709 data4 0x23BECC90, 0x00000000
710 data8 0x3FCCFE654E1D5395
711 data4 0x2323DCD2, 0x00000000
712 data8 0x3FCDF110864C9D9D
713 data4 0x23F53F3A, 0x00000000
714 data8 0x3FCEE2E1451D980C
715 data4 0x23CCB11F, 0x00000000
717 data8 0x400921FB54442D18, 0x3CA1A62633145C07 // PI two doubles
718 data8 0x3FF921FB54442D18, 0x3C91A62633145C07 // PI_by_2 two dbles
719 data8 0x3FE921FB54442D18, 0x3C81A62633145C07 // PI_by_4 two dbles
720 data8 0x4002D97C7F3321D2, 0x3C9A79394C9E8A0A // 3PI_by_4 two dbles
721 LOCAL_OBJECT_END(Constants_atan)
725 GLOBAL_IEEE754_ENTRY(atanl)
727 // Use common code with atan2l after setting x=1.0
729 alloc r32 = ar.pfs, 0, 17, 4, 0
730 fma.s1 Ysq = ArgY_orig, ArgY_orig, f0 // Form y*y
734 addl table_ptr1 = @ltoff(Constants_atan#), gp // Address of table pointer
735 fma.s1 Xsq = f1, f1, f0 // Form x*x
741 ld8 table_ptr1 = [table_ptr1] // Get table pointer
742 fnorm.s1 ArgY = ArgY_orig
753 getf.exp sign_X = f1 // Get signexp of x
754 fmerge.s ArgX_abs = f0, f1 // Form |x|
759 fnorm.s1 ArgX_orig = f1
765 getf.exp sign_Y = ArgY_orig // Get signexp of y
766 fmerge.s ArgY_abs = f0, ArgY_orig // Form |y|
767 mov table_base = table_ptr1 // Save base pointer to tables
772 ldfd P_hi = [table_ptr1],8 // Load double precision hi part of pi
773 fclass.m p8,p0 = ArgY_orig, 0x1e7 // Test y natval, nan, inf, zero
779 ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
785 fma.s1 M = f1, f1, f0 // Set M = 1.0
791 // Check for everything - if false, then must be pseudo-zero
792 // or pseudo-nan (IA unsupporteds).
796 fclass.m p0,p12 = f1, 0x1FF // Test x unsupported
797 (p8) br.cond.spnt ATANL_Y_SPECIAL // Branch if y natval, nan, inf, zero
801 // U = max(ArgX_abs,ArgY_abs)
802 // V = min(ArgX_abs,ArgY_abs)
805 fcmp.ge.s1 p6,p7 = Xsq, Ysq // Test for |x| >= |y| using squares
810 fma.s1 V = ArgX_abs, f1, f0 // Set V assuming |x| < |y|
811 br.cond.sptk ATANL_COMMON // Branch to common code
815 GLOBAL_IEEE754_END(atanl)
817 GLOBAL_IEEE754_ENTRY(atan2l)
820 alloc r32 = ar.pfs, 0, 17, 4, 0
821 fma.s1 Ysq = ArgY_orig, ArgY_orig, f0 // Form y*y
825 addl table_ptr1 = @ltoff(Constants_atan#), gp // Address of table pointer
826 fma.s1 Xsq = ArgX_orig, ArgX_orig, f0 // Form x*x
832 ld8 table_ptr1 = [table_ptr1] // Get table pointer
833 fnorm.s1 ArgY = ArgY_orig
838 fnorm.s1 ArgX = ArgX_orig
844 getf.exp sign_X = ArgX_orig // Get signexp of x
845 fmerge.s ArgX_abs = f0, ArgX_orig // Form |x|
851 getf.exp sign_Y = ArgY_orig // Get signexp of y
852 fmerge.s ArgY_abs = f0, ArgY_orig // Form |y|
853 mov table_base = table_ptr1 // Save base pointer to tables
858 ldfd P_hi = [table_ptr1],8 // Load double precision hi part of pi
859 fclass.m p8,p0 = ArgY_orig, 0x1e7 // Test y natval, nan, inf, zero
865 ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
866 fclass.m p9,p0 = ArgX_orig, 0x1e7 // Test x natval, nan, inf, zero
871 fma.s1 M = f1, f1, f0 // Set M = 1.0
877 // Check for everything - if false, then must be pseudo-zero
878 // or pseudo-nan (IA unsupporteds).
882 fclass.m p0,p12 = ArgX_orig, 0x1FF // Test x unsupported
883 (p8) br.cond.spnt ATANL_Y_SPECIAL // Branch if y natval, nan, inf, zero
887 // U = max(ArgX_abs,ArgY_abs)
888 // V = min(ArgX_abs,ArgY_abs)
891 fcmp.ge.s1 p6,p7 = Xsq, Ysq // Test for |x| >= |y| using squares
896 fma.s1 V = ArgX_abs, f1, f0 // Set V assuming |x| < |y|
897 (p9) br.cond.spnt ATANL_X_SPECIAL // Branch if x natval, nan, inf, zero
901 // Now common code for atanl and atan2l
905 fclass.m p0,p13 = ArgY_orig, 0x1FF // Test y unsupported
906 shr sign_X = sign_X, 17 // Get sign bit of x
910 fma.s1 U = ArgY_abs, f1, f0 // Set U assuming |x| < |y|
911 adds table_ptr1 = 176, table_ptr1 // Point to Q4
916 (p6) add swap = r0, r0 // Set swap=0 if |x| >= |y|
917 (p6) frcpa.s1 E, p0 = ArgY_abs, ArgX_abs // Compute E if |x| >= |y|
918 shr sign_Y = sign_Y, 17 // Get sign bit of y
922 (p6) fma.s1 V = ArgY_abs, f1, f0 // Set V if |x| >= |y|
923 (p12) br.cond.spnt ATANL_UNSUPPORTED // Branch if x unsupported
929 // Set p10 if |x| >= |y| and x >=0
930 // Set p11 if |x| >= |y| and x < 0
932 cmp.eq p8, p9 = 0, sign_Y // Test for y >= 0
933 (p7) frcpa.s1 E, p0 = ArgX_abs, ArgY_abs // Compute E if |x| < |y|
934 (p7) add swap = 1, r0 // Set swap=1 if |x| < |y|
937 (p6) cmp.eq.unc p10, p11 = 0, sign_X // If |x| >= |y|, test for x >= 0
938 (p6) fma.s1 U = ArgX_abs, f1, f0 // Set U if |x| >= |y|
939 (p13) br.cond.spnt ATANL_UNSUPPORTED // Branch if y unsupported
947 .pred.rel "mutex",p8,p9
950 (p8) fadd.s1 s_Y = f0, f1 // If y >= 0 set s_Y = 1.0
955 (p9) fsub.s1 s_Y = f0, f1 // If y < 0 set s_Y = -1.0
960 .pred.rel "mutex",p10,p11
963 (p10) fsub.s1 M = M, f1 // If |x| >= |y| and x >=0, set M=0
968 (p11) fadd.s1 M = M, f1 // If |x| >= |y| and x < 0, set M=2.0
975 fcmp.eq.s0 p0, p9 = ArgX_orig, ArgY_orig // Dummy to set denormal flag
978 // *************************************************
979 // ********************* STEP2 *********************
980 // *************************************************
993 fnma.s1 E_hold = E, U, f1 // E_hold = 1.0 - E*U (1) if POLY path
998 // Create a single precision representation of the signexp of Q with the
999 // 4 most significant bits of the significand followed by a 1 and then 18 0's
1002 fmpy.s1 P_hi = M, P_hi
1003 dep.z special = 0x1, 18, 1 // Form 0x0000000000040000
1007 fmpy.s1 P_lo = M, P_lo
1008 add table_ptr2 = 32, table_ptr1
1014 fma.s1 A_temp = Q, f1, f0 // Set A_temp if POLY path
1019 fma.s1 E = E, E_hold, E // E = E + E*E_hold (1) if POLY path
1026 // swap = xor(swap,sign_X)
1030 fcmp.lt.s1 p9, p0 = Q, TWO_TO_NEG3 // Test Q < 2^-3
1031 xor swap = sign_X, swap
1035 // P_hi = s_Y * P_hi
1037 getf.exp exponent_Q = Q // Get signexp of Q
1038 cmp.eq.unc p7, p6 = 0x00000, swap
1039 fmpy.s1 P_hi = s_Y, P_hi
1044 // if (PR_1) sigma = -1.0
1045 // if (PR_2) sigma = 1.0
1048 getf.sig significand_Q = Q // Get significand of Q
1049 (p6) fsub.s1 sigma = f0, f1
1053 (p9) add table_ptr1 = 128, table_base // Point to P8 if POLY path
1054 (p7) fadd.s1 sigma = f0, f1
1055 (p9) br.cond.spnt ATANL_POLY // Branch to POLY if 0 < Q < 2^-3
1060 // *************************************************
1061 // ******************** STEP3 **********************
1062 // *************************************************
1064 // lookup = b_1 b_2 b_3 B_4
1069 andcm k = 0x0003, exponent_Q // k=0,1,2,3 for exp_Q=0,-1,-2,-3
1074 // Generate sign_exp_Q b_1 b_2 b_3 b_4 1 0 0 0 ... 0 in single precision
1075 // representation. Note sign of Q is always 0.
1078 cmp.eq p8, p9 = 0x0000, k // Test k=0
1080 extr.u lookup = significand_Q, 59, 4 // Extract b_1 b_2 b_3 b_4 for index
1083 sub sp_exp_Q = 0x7f, k // Form single prec biased exp of Q
1085 sub k = k, r0, 1 // Decrement k
1089 // Form pointer to B index table
1091 ldfe Q_4 = [table_ptr1], -16 // Load Q_4
1093 (p9) shl k = k, 8 // k = 0, 256, or 512
1096 (p9) shladd table_ptr2 = lookup, 4, table_ptr2
1098 shladd sp_exp_4sig_Q = sp_exp_Q, 4, lookup // Shift and add in 4 high bits
1103 (p8) add table_ptr2 = -16, table_ptr2 // Pointer if original k was 0
1104 (p9) add table_ptr2 = k, table_ptr2 // Pointer if k was 1, 2, 3
1105 dep special = sp_exp_4sig_Q, special, 19, 13 // Form z_hi as single prec
1109 // z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
1111 ldfd Tbl_hi = [table_ptr2], 8 // Load Tbl_hi from index table
1113 setf.s z_hi = special // Form z_hi
1117 ldfs Tbl_lo = [table_ptr2], 8 // Load Tbl_lo from index table
1119 ldfe Q_3 = [table_ptr1], -16 // Load Q_3
1125 ldfe Q_2 = [table_ptr1], -16 // Load Q_2
1132 ldfe Q_1 = [table_ptr1], -16 // Load Q_1
1140 fma.s1 U_prime_hi = V, z_hi, U // U_prime_hi = U + V * z_hi
1145 fnma.s1 V_prime = U, z_hi, V // V_prime = V - U * z_hi
1152 mov A_hi = Tbl_hi // Start with A_hi = Tbl_hi
1159 fsub.s1 U_hold = U, U_prime_hi // U_hold = U - U_prime_hi
1166 frcpa.s1 C_hi, p0 = f1, U_prime_hi // C_hi = frcpa(1,U_prime_hi)
1173 fmpy.s1 A_hi = s_Y, A_hi // A_hi = s_Y * A_hi
1180 fma.s1 U_prime_lo = z_hi, V, U_hold // U_prime_lo = U_hold + V * z_hi
1185 // C_hi_hold = 1 - C_hi * U_prime_hi (1)
1188 fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
1195 fma.s1 Res_hi = sigma, A_hi, P_hi // Res_hi = P_hi + sigma * A_hi
1202 fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (1)
1207 // C_hi_hold = 1 - C_hi * U_prime_hi (2)
1210 fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
1217 fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (2)
1222 // C_hi_hold = 1 - C_hi * U_prime_hi (3)
1225 fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
1232 fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (3)
1239 fmpy.s1 w_hi = V_prime, C_hi // w_hi = V_prime * C_hi
1246 fmpy.s1 wsq = w_hi, w_hi // wsq = w_hi * w_hi
1251 fnma.s1 w_lo = w_hi, U_prime_hi, V_prime // w_lo = V_prime-w_hi*U_prime_hi
1258 fma.s1 poly = wsq, Q_4, Q_3 // poly = Q_3 + wsq * Q_4
1263 fnma.s1 w_lo = w_hi, U_prime_lo, w_lo // w_lo = w_lo - w_hi * U_prime_lo
1270 fma.s1 poly = wsq, poly, Q_2 // poly = Q_2 + wsq * poly
1275 fmpy.s1 w_lo = C_hi, w_lo // w_lo = = w_lo * C_hi
1282 fma.s1 poly = wsq, poly, Q_1 // poly = Q_1 + wsq * poly
1287 fadd.s1 A_lo = Tbl_lo, w_lo // A_lo = Tbl_lo + w_lo
1294 fmpy.s0 Q_1 = Q_1, Q_1 // Dummy operation to raise inexact
1301 fmpy.s1 poly = wsq, poly // poly = wsq * poly
1308 fmpy.s1 poly = w_hi, poly // poly = w_hi * poly
1315 fadd.s1 A_lo = A_lo, poly // A_lo = A_lo + poly
1322 fadd.s1 A_lo = A_lo, w_hi // A_lo = A_lo + w_hi
1329 fma.s1 Res_lo = sigma, A_lo, P_lo // Res_lo = P_lo + sigma * A_lo
1335 // Result = Res_hi + Res_lo * s_Y (User Supplied Rounding Mode)
1339 fma.s0 Result = Res_lo, s_Y, Res_hi
1340 br.ret.sptk b0 // Exit table path 2^-3 <= V/U < 1
1346 // Here if 0 < V/U < 2^-3
1348 // ***********************************************
1349 // ******************** STEP4 ********************
1350 // ***********************************************
1354 // Iterate 3 times E = E + E*(1.0 - E*U)
1355 // Also load P_8, P_7, P_6, P_5, P_4
1358 ldfe P_8 = [table_ptr1], -16 // Load P_8
1359 fnma.s1 z_lo = A_temp, U, V // z_lo = V - A_temp * U
1364 fnma.s1 E_hold = E, U, f1 // E_hold = 1.0 - E*U (2)
1370 ldfe P_7 = [table_ptr1], -16 // Load P_7
1372 ldfe P_6 = [table_ptr1], -16 // Load P_6
1378 ldfe P_5 = [table_ptr1], -16 // Load P_5
1379 fma.s1 E = E, E_hold, E // E = E + E_hold*E (2)
1385 ldfe P_4 = [table_ptr1], -16 // Load P_4
1387 ldfe P_3 = [table_ptr1], -16 // Load P_3
1393 ldfe P_2 = [table_ptr1], -16 // Load P_2
1394 fnma.s1 E_hold = E, U, f1 // E_hold = 1.0 - E*U (3)
1399 movl int_temp = 0x24005 // Signexp for small neg number
1404 ldfe P_1 = [table_ptr1], -16 // Load P_1
1405 setf.exp tmp_small = int_temp // Form small neg number
1406 fma.s1 E = E, E_hold, E // E = E + E_hold*E (3)
1412 // At this point E approximates 1/U to roughly working precision
1413 // Z = V*E approximates V/U
1417 fmpy.s1 Z = V, E // Z = V * E
1422 fmpy.s1 z_lo = z_lo, E // z_lo = z_lo * E
1428 // Now what we want to do is
1429 // poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
1430 // poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))
1433 // Fixup added to force inexact later -
1434 // A_hi = A_temp + z_lo
1435 // z_lo = (A_temp - A_hi) + z_lo
1439 fmpy.s1 zsq = Z, Z // zsq = Z * Z
1444 fadd.s1 A_hi = A_temp, z_lo // A_hi = A_temp + z_lo
1451 fma.s1 poly1 = zsq, P_8, P_7 // poly1 = P_7 + zsq * P_8
1456 fma.s1 poly2 = zsq, P_3, P_2 // poly2 = P_2 + zsq * P_3
1463 fmpy.s1 z4 = zsq, zsq // z4 = zsq * zsq
1468 fsub.s1 A_temp = A_temp, A_hi // A_temp = A_temp - A_hi
1475 fmerge.s tmp = A_hi, A_hi // Copy tmp = A_hi
1482 fma.s1 poly1 = zsq, poly1, P_6 // poly1 = P_6 + zsq * poly1
1487 fma.s1 poly2 = zsq, poly2, P_1 // poly2 = P_2 + zsq * poly2
1494 fmpy.s1 z8 = z4, z4 // z8 = z4 * z4
1499 fadd.s1 z_lo = A_temp, z_lo // z_lo = (A_temp - A_hi) + z_lo
1506 fma.s1 poly1 = zsq, poly1, P_5 // poly1 = P_5 + zsq * poly1
1511 fmpy.s1 poly2 = poly2, zsq // poly2 = zsq * poly2
1516 // Create small GR double in case need to raise underflow
1519 fma.s1 poly1 = zsq, poly1, P_4 // poly1 = P_4 + zsq * poly1
1520 dep GR_temp = -1,r0,0,53
1524 // Create small double in case need to raise underflow
1526 setf.d FR_temp = GR_temp
1527 fma.s1 poly = z8, poly1, poly2 // poly = poly2 + z8 * poly1
1534 fma.s1 A_lo = Z, poly, z_lo // A_lo = z_lo + Z * poly
1541 fadd.s1 A_hi = tmp, A_lo // A_hi = tmp + A_lo
1548 fsub.s1 tmp = tmp, A_hi // tmp = tmp - A_hi
1553 fmpy.s1 A_hi = s_Y, A_hi // A_hi = s_Y * A_hi
1560 fadd.s1 A_lo = tmp, A_lo // A_lo = tmp + A_lo
1565 fma.s1 Res_hi = sigma, A_hi, P_hi // Res_hi = P_hi + sigma * A_hi
1572 fsub.s1 tmp = P_hi, Res_hi // tmp = P_hi - Res_hi
1578 // Test if A_lo is zero
1582 fclass.m p6,p0 = A_lo, 0x007 // Test A_lo = 0
1589 (p6) mov A_lo = tmp_small // If A_lo zero, make very small
1596 fma.s1 tmp = A_hi, sigma, tmp // tmp = sigma * A_hi + tmp
1601 fma.s1 sigma = A_lo, sigma, P_lo // sigma = A_lo * sigma + P_lo
1608 fma.s1 Res_lo = s_Y, sigma, tmp // Res_lo = s_Y * sigma + tmp
1614 // Test if Res_lo is denormal
1618 fclass.m p14, p15 = Res_lo, 0x0b
1624 // Compute Result = Res_lo + Res_hi. Use s3 if Res_lo is denormal.
1628 (p14) fadd.s3 Result = Res_lo, Res_hi // Result for Res_lo denormal
1633 (p15) fadd.s0 Result = Res_lo, Res_hi // Result for Res_lo normal
1639 // If Res_lo is denormal test if Result equals zero
1643 (p14) fclass.m.unc p14, p0 = Result, 0x07
1649 // If Res_lo is denormal and Result equals zero, raise inexact, underflow
1650 // by squaring small double
1654 (p14) fmpy.d.s0 FR_temp = FR_temp, FR_temp
1655 br.ret.sptk b0 // Exit POLY path, 0 < Q < 2^-3
1663 fmpy.s0 Result = ArgX,ArgY
1668 // Here if y natval, nan, inf, zero
1670 // Here if x natval, nan, inf, zero
1674 fclass.m p13,p12 = ArgY_orig, 0x0c3 // Test y nan
1681 fclass.m p15,p14 = ArgY_orig, 0x103 // Test y natval
1688 (p12) fclass.m p13,p0 = ArgX_orig, 0x0c3 // Test x nan
1695 (p14) fclass.m p15,p0 = ArgX_orig, 0x103 // Test x natval
1702 (p13) fmpy.s0 Result = ArgX_orig, ArgY_orig // Result nan if x or y nan
1703 (p13) br.ret.spnt b0 // Exit if x or y nan
1709 (p15) fmpy.s0 Result = ArgX_orig, ArgY_orig // Result natval if x or y natval
1710 (p15) br.ret.spnt b0 // Exit if x or y natval
1715 // Here if x or y inf or zero
1716 ATANL_SPECIAL_HANDLING:
1719 fclass.m p6, p7 = ArgY_orig, 0x007 // Test y zero
1720 mov special = 992 // Offset to table
1725 add table_ptr1 = table_base, special // Point to 3pi/4
1726 fcmp.eq.s0 p0, p9 = ArgX_orig, ArgY_orig // Dummy to set denormal flag
1727 (p7) br.cond.spnt ATANL_ArgY_Not_ZERO // Branch if y not zero
1733 ldfd Result = [table_ptr1], 8 // Get pi high
1735 fclass.m p14, p0 = ArgX, 0x035 // Test for x>=+0
1741 ldfd Result_lo = [table_ptr1], -8 // Get pi lo
1742 fclass.m p15, p0 = ArgX, 0x036 // Test for x<=-0
1747 // Return sign_Y * 0 when ArgX > +0
1751 (p14) fmerge.s Result = ArgY, f0 // If x>=+0, y=0, hi sgn(y)*0
1758 fclass.m p13, p0 = ArgX, 0x007 // Test for x=0
1765 (p14) fmerge.s Result_lo = ArgY, f0 // If x>=+0, y=0, lo sgn(y)*0
1771 (p13) mov GR_Parameter_TAG = 36 // Error tag for x=0, y=0
1778 // Return sign_Y * pi when ArgX < -0
1782 (p15) fmerge.s Result = ArgY, Result // If x<0, y=0, hi=sgn(y)*pi
1789 (p15) fmerge.s Result_lo = ArgY, Result_lo // If x<0, y=0, lo=sgn(y)*pi
1795 // Call error support function for atan(0,0)
1799 fadd.s0 Result = Result, Result_lo
1800 (p13) br.cond.spnt __libm_error_region // Branch if atan(0,0)
1807 br.ret.sptk b0 // Exit for y=0, x not 0
1811 // Here if y not zero
1812 ATANL_ArgY_Not_ZERO:
1815 fclass.m p0, p10 = ArgY, 0x023 // Test y inf
1822 fclass.m p6, p0 = ArgX, 0x017 // Test for 0 <= |x| < inf
1823 (p10) br.cond.spnt ATANL_ArgY_Not_INF // Branch if 0 < |y| < inf
1829 // Return +PI/2 when ArgY = +Inf and ArgX = +/-0 or normal
1830 // Return -PI/2 when ArgY = -Inf and ArgX = +/-0 or normal
1831 // Return +PI/4 when ArgY = +Inf and ArgX = +Inf
1832 // Return -PI/4 when ArgY = -Inf and ArgX = +Inf
1833 // Return +3PI/4 when ArgY = +Inf and ArgX = -Inf
1834 // Return -3PI/4 when ArgY = -Inf and ArgX = -Inf
1838 fclass.m p7, p0 = ArgX, 0x021 // Test for x=+inf
1844 (p6) add table_ptr1 = 16, table_ptr1 // Point to pi/2, if x finite
1845 fclass.m p8, p0 = ArgX, 0x022 // Test for x=-inf
1851 (p7) add table_ptr1 = 32, table_ptr1 // Point to pi/4 if x=+inf
1853 (p8) add table_ptr1 = 48, table_ptr1 // Point to 3pi/4 if x=-inf
1860 ldfd Result = [table_ptr1], 8 // Load pi/2, pi/4, or 3pi/4 hi
1862 ldfd Result_lo = [table_ptr1], -8 // Load pi/2, pi/4, or 3pi/4 lo
1869 fmerge.s Result = ArgY, Result // Merge sgn(y) in hi
1876 fmerge.s Result_lo = ArgY, Result_lo // Merge sgn(y) in lo
1883 fadd.s0 Result = Result, Result_lo // Compute complete result
1884 br.ret.sptk b0 // Exit for y=inf
1888 // Here if y not INF, and x=0 or INF
1891 // Return +PI/2 when ArgY NOT Inf, ArgY > 0 and ArgX = +/-0
1892 // Return -PI/2 when ArgY NOT Inf, ArgY < 0 and ArgX = +/-0
1893 // Return +0 when ArgY NOT Inf, ArgY > 0 and ArgX = +Inf
1894 // Return -0 when ArgY NOT Inf, ArgY > 0 and ArgX = +Inf
1895 // Return +PI when ArgY NOT Inf, ArgY > 0 and ArgX = -Inf
1896 // Return -PI when ArgY NOT Inf, ArgY > 0 and ArgX = -Inf
1900 fclass.m p7, p9 = ArgX, 0x021 // Test for x=+inf
1907 fclass.m p6, p0 = ArgX, 0x007 // Test for x=0
1913 (p6) add table_ptr1 = 16, table_ptr1 // Point to pi/2
1914 fclass.m p8, p0 = ArgX, 0x022 // Test for x=-inf
1919 .pred.rel "mutex",p7,p9
1921 (p9) ldfd Result = [table_ptr1], 8 // Load pi or pi/2 hi
1922 (p7) fmerge.s Result = ArgY, f0 // If y not inf, x=+inf, sgn(y)*0
1928 (p9) ldfd Result_lo = [table_ptr1], -8 // Load pi or pi/2 lo
1929 (p7) fnorm.s0 Result = Result // If y not inf, x=+inf normalize
1936 (p9) fmerge.s Result = ArgY, Result // Merge sgn(y) in hi
1943 (p9) fmerge.s Result_lo = ArgY, Result_lo // Merge sgn(y) in lo
1950 (p9) fadd.s0 Result = Result, Result_lo // Compute complete result
1951 br.ret.spnt b0 // Exit for y not inf, x=0,inf
1955 GLOBAL_IEEE754_END(atan2l)
1957 LOCAL_LIBM_ENTRY(__libm_error_region)
1960 add GR_Parameter_Y=-32,sp // Parameter 2 value
1962 .save ar.pfs,GR_SAVE_PFS
1963 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
1967 add sp=-64,sp // Create new stack
1969 mov GR_SAVE_GP=gp // Save gp
1972 stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
1973 add GR_Parameter_X = 16,sp // Parameter 1 address
1974 .save b0, GR_SAVE_B0
1975 mov GR_SAVE_B0=b0 // Save b0
1979 stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
1980 add GR_Parameter_RESULT = 0,GR_Parameter_Y
1981 nop.b 0 // Parameter 3 address
1984 stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
1985 add GR_Parameter_Y = -16,GR_Parameter_Y
1986 br.call.sptk b0=__libm_error_support# // Call error handling function
1991 add GR_Parameter_RESULT = 48,sp
1994 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
1996 add sp = 64,sp // Restore stack pointer
1997 mov b0 = GR_SAVE_B0 // Restore return address
2000 mov gp = GR_SAVE_GP // Restore gp
2001 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
2002 br.ret.sptk b0 // Return
2005 LOCAL_LIBM_END(__libm_error_region#)
2006 .type __libm_error_support#,@function
2007 .global __libm_error_support#