4 // Copyright (c) 2000 - 2003, Intel Corporation
5 // All rights reserved.
7 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 // Intel Corporation is the author of this code, and requests that all
37 // problem reports or change requests be submitted to it directly at
38 // http://www.intel.com/software/products/opensource/libraries/num.htm.
41 //*********************************************************************
44 // 02/02/00 (hand-optimized)
45 // 04/04/00 Unwind support added
46 // 08/15/00 Bundle added after call to __libm_error_support to properly
47 // set [the previously overwritten] GR_Parameter_RESULT.
48 // 03/13/01 Fixed flags when denormal raised on intermediate result
49 // 01/08/02 Improved speed.
50 // 02/06/02 Corrected .section statement
51 // 05/20/02 Cleaned up namespace and sf0 syntax
52 // 02/10/03 Reordered header: .section, .global, .proc, .align;
53 // used data8 for long double table values
55 //*********************************************************************
57 // Function: atanl(x) = inverse tangent(x), for double extended x values
58 // Function: atan2l(y,x) = atan(y/x), for double extended y, x values
62 // long double atanl (long double x)
63 // long double atan2l (long double y, long double x)
65 //*********************************************************************
69 // Floating-Point Registers: f8 (Input and Return Value)
70 // f9 (Input for atan2l)
73 // General Purpose Registers:
75 // r49-r52 (Arguments to error support for 0,0 case)
77 // Predicate Registers: p6-p15
79 //*********************************************************************
81 // IEEE Special Conditions:
83 // Denormal fault raised on denormal inputs
84 // Underflow exceptions may occur
85 // Special error handling for the y=0 and x=0 case
86 // Inexact raised when appropriate by algorithm
90 // atanl(+/-0) = +/- 0
91 // atanl(+/-Inf) = +/-pi/2
93 // atan2l(Any NaN for x or y) = QNaN
94 // atan2l(+/-0,x) = +/-0 for x > 0
95 // atan2l(+/-0,x) = +/-pi for x < 0
96 // atan2l(+/-0,+0) = +/-0
97 // atan2l(+/-0,-0) = +/-pi
98 // atan2l(y,+/-0) = pi/2 y > 0
99 // atan2l(y,+/-0) = -pi/2 y < 0
100 // atan2l(+/-y, Inf) = +/-0 for finite y > 0
101 // atan2l(+/-Inf, x) = +/-pi/2 for finite x
102 // atan2l(+/-y, -Inf) = +/-pi for finite y > 0
103 // atan2l(+/-Inf, Inf) = +/-pi/4
104 // atan2l(+/-Inf, -Inf) = +/-3pi/4
106 //*********************************************************************
108 // Mathematical Description
109 // ---------------------------
111 // The function ATANL( Arg_Y, Arg_X ) returns the "argument"
112 // or the "phase" of the complex number
116 // or equivalently, the angle in radians from the positive
117 // x-axis to the line joining the origin and the point
126 // \ angle between is ATANL(Arg_Y,Arg_X)
132 // ------------------> X-axis
136 // Moreover, this angle is reported in the range [-pi,pi] thus
138 // -pi <= ATANL( Arg_Y, Arg_X ) <= pi.
140 // From the geometry, it is easy to define ATANL when one of
141 // Arg_X or Arg_Y is +-0 or +-inf:
145 // X \ | +0 | -0 | +inf | -inf | finite non-zero
147 // ______________________________________________________
149 // +-0 | Invalid/ | pi/2 | -pi/2 | sign(Y)*pi/2
151 // --------------------------------------------------------
153 // +inf | +0 | -0 | pi/4 | -pi/4 | sign(Y)*0
154 // --------------------------------------------------------
156 // -inf | +pi | -pi | 3pi/4 | -3pi/4 | sign(Y)*pi
157 // --------------------------------------------------------
158 // finite | X>0? | pi/2 | -pi/2 | normal case
159 // non-zero| sign(Y)*0: | | |
160 // | sign(Y)*pi | | |
163 // One must take note that ATANL is NOT the arctangent of the
164 // value Arg_Y/Arg_X; but rather ATANL and arctan are related
165 // in a slightly more complicated way as follows:
167 // Let U := max(|Arg_X|, |Arg_Y|); V := min(|Arg_X|, |Arg_Y|);
168 // sign_X be the sign bit of Arg_X, i.e., sign_X is 0 or 1;
169 // s_X be the sign of Arg_X, i.e., s_X = (-1)^sign_X;
171 // sign_Y be the sign bit of Arg_Y, i.e., sign_Y is 0 or 1;
172 // s_Y be the sign of Arg_Y, i.e., s_Y = (-1)^sign_Y;
174 // swap be 0 if |Arg_X| >= |Arg_Y| and 1 otherwise.
176 // Then, ATANL(Arg_Y, Arg_X) =
178 // / arctan(V/U) \ sign_X = 0 & swap = 0
179 // | pi/2 - arctan(V/U) | sign_X = 0 & swap = 1
181 // | pi - arctan(V/U) | sign_X = 1 & swap = 0
182 // \ pi/2 + arctan(V/U) / sign_X = 1 & swap = 1
185 // This relationship also suggest that the algorithm's major
186 // task is to calculate arctan(V/U) for 0 < V <= U; and the
187 // final Result is given by
189 // s_Y * { (P_hi + P_lo) + sigma * arctan(V/U) }
193 // (P_hi,P_lo) represents M(sign_X,swap)*(pi/2) accurately
195 // M(sign_X,swap) = 0 for sign_X = 0 and swap = 0
197 // 2 for sign_X = 1 and swap = 0
201 // sigma = { (sign_X XOR swap) : -1.0 : 1.0 }
203 // = (-1) ^ ( sign_X XOR swap )
205 // Both (P_hi,P_lo) and sigma can be stored in a table and fetched
206 // using (sign_X,swap) as an index. (P_hi, P_lo) can be stored as a
207 // double-precision, and single-precision pair; and sigma can
208 // obviously be just a single-precision number.
210 // In the algorithm we propose, arctan(V/U) is calculated to high accuracy
211 // as A_hi + A_lo. Consequently, the Result ATANL( Arg_Y, Arg_X ) is
214 // s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
216 // We now discuss the calculation of arctan(V/U) for 0 < V <= U.
218 // For (V/U) < 2^(-3), we use a simple polynomial of the form
220 // z + z^3*(P_1 + z^2*(P_2 + z^2*(P_3 + ... + P_8)))
224 // For the sake of accuracy, the first term "z" must approximate V/U to
225 // extra precision. For z^3 and higher power, a working precision
226 // approximation to V/U suffices. Thus, we obtain:
228 // z_hi + z_lo = V/U to extra precision and
229 // z = V/U to working precision
231 // The value arctan(V/U) is delivered as two pieces (A_hi, A_lo)
233 // (A_hi,A_lo) = (z_hi, z^3*(P_1 + ... + P_8) + z_lo).
236 // For 2^(-3) <= (V/U) <= 1, we use a table-driven approach.
239 // (V/U) = 2^k * 1.b_1 b_2 .... b_63 b_64 b_65 ....
243 // z_hi = 2^k * 1.b_1 b_2 b_3 b_4 1
249 // arctan(V/U) = arctan(z_hi) + acrtan| -------------- |
250 // | 1 + (V/U)*z_hi |
256 // = arctan(z_hi) + acrtan| -------------- |
260 // = arctan(z_hi) + acrtan( V' / U' )
265 // V' = V - U*z_hi; U' = U + V*z_hi.
269 // w_hi + w_lo = V'/U' to extra precision and
270 // w = V'/U' to working precision
272 // then we can approximate arctan(V'/U') by
274 // arctan(V'/U') = w_hi + w_lo
275 // + w^3*(Q_1 + w^2*(Q_2 + w^2*(Q_3 + w^2*Q_4)))
277 // = w_hi + w_lo + poly
279 // Finally, arctan(z_hi) is calculated beforehand and stored in a table
280 // as Tbl_hi, Tbl_lo. Thus,
282 // (A_hi, A_lo) = (Tbl_hi, w_hi+(poly+(w_lo+Tbl_lo)))
284 // This completes the mathematical description.
290 // Step 0. Check for unsupported format.
293 // ( expo(Arg_X) not zero AND msb(Arg_X) = 0 ) OR
294 // ( expo(Arg_Y) not zero AND msb(Arg_Y) = 0 )
296 // then one of the arguments is unsupported. Generate an
297 // invalid and return qNaN.
299 // Step 1. Initialize
301 // Normalize Arg_X and Arg_Y and set the following
303 // sign_X := sign_bit(Arg_X)
304 // s_Y := (sign_bit(Arg_Y)==0? 1.0 : -1.0)
305 // swap := (|Arg_X| >= |Arg_Y|? 0 : 1 )
306 // U := max( |Arg_X|, |Arg_Y| )
307 // V := min( |Arg_X|, |Arg_Y| )
309 // execute: frcpa E, pred, V, U
310 // If pred is 0, go to Step 5 for special cases handling.
312 // Step 2. Decide on branch.
315 // If Q < 2^(-3) go to Step 4 for simple polynomial case.
317 // Step 3. Table-driven algorithm.
319 // Q is represented as
321 // 2^(-k) * 1.b_1 b_2 b_3 ... b_63; k = 0,-1,-2,-3
323 // and that if k = 0, b_1 = b_2 = b_3 = b_4 = 0.
327 // z_hi := 2^(-k) * 1.b_1 b_2 b_3 b_4 1
329 // (note that there are 49 possible values of z_hi).
331 // ...We now calculate V' and U'. While V' is representable
332 // ...as a 64-bit number because of cancellation, U' is
333 // ...not in general a 64-bit number. Obtaining U' accurately
334 // ...requires two working precision numbers
336 // U_prime_hi := U + V * z_hi ...WP approx. to U'
337 // U_prime_lo := ( U - U_prime_hi ) + V*z_hi ...observe order
338 // V_prime := V - U * z_hi ...this is exact
340 // C_hi := frcpa (1.0, U_prime_hi) ...C_hi approx 1/U'_hi
343 // C_hi := C_hi + C_hi*(1.0 - C_hi*U_prime_hi)
345 // ...at this point C_hi is (1/U_prime_hi) to roughly 64 bits
347 // w_hi := V_prime * C_hi ...w_hi is V_prime/U_prime to
348 // ...roughly working precision
350 // ...note that we want w_hi + w_lo to approximate
351 // ...V_prime/(U_prime_hi + U_prime_lo) to extra precision
352 // ...but for now, w_hi is good enough for the polynomial
356 // poly := w_hi*wsq*(Q_1 + wsq*(Q_2 + wsq*(Q_3 + wsq*Q_4)))
359 // (Tbl_hi, Tbl_lo) = atan(z_hi) indexed by (k,b_1,b_2,b_3,b_4)
360 // ...Tbl_hi is a double-precision number
361 // ...Tbl_lo is a single-precision number
363 // (P_hi, P_lo) := M(sign_X,swap)*(Pi_by_2_hi, Pi_by_2_lo)
364 // ...as discussed previous. Again; the implementation can
365 // ...chose to fetch P_hi and P_lo from a table indexed by
366 // ...(sign_X, swap).
367 // ...P_hi is a double-precision number;
368 // ...P_lo is a single-precision number.
370 // ...calculate w_lo so that w_hi + w_lo is V'/U' accurately
371 // w_lo := ((V_prime - w_hi*U_prime_hi) -
372 // w_hi*U_prime_lo) * C_hi ...observe order
375 // ...Ready to deliver arctan(V'/U') as A_hi, A_lo
377 // A_lo := w_hi + (poly + (Tbl_lo + w_lo)) ...observe order
379 // ...Deliver final Result
380 // ...s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
382 // sigma := ( (sign_X XOR swap) ? -1.0 : 1.0 )
383 // ...sigma can be obtained by a table lookup using
384 // ...(sign_X,swap) as index and stored as single precision
385 // ...sigma should be calculated earlier
390 // Res_hi := P_hi + sigma*A_hi ...this is exact because
391 // ...both P_hi and Tbl_hi
392 // ...are double-precision
393 // ...and |Tbl_hi| > 2^(-4)
394 // ...P_hi is either 0 or
397 // Res_lo := sigma*A_lo + P_lo
399 // Return Res_hi + s_Y*Res_lo in user-defined rounding control
401 // Step 4. Simple polynomial case.
403 // ...E and Q are inherited from Step 2.
405 // A_hi := Q ...Q is inherited from Step 2 Q approx V/U
408 // E := E + E2(1.0 - E*U1
409 // ...at this point E approximates 1/U to roughly working precision
411 // z := V * E ...z approximates V/U to roughly working precision
413 // z4 := zsq * zsq; z8 := z4 * z4
415 // poly1 := P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
416 // poly2 := zsq*(P_1 + zsq*(P_2 + zsq*P_3))
418 // poly := poly1 + z8*poly2
420 // z_lo := (V - A_hi*U)*E
422 // A_lo := z*poly + z_lo
423 // ...A_hi, A_lo approximate arctan(V/U) accurately
425 // (P_hi, P_lo) := M(sign_X,swap)*(Pi_by_2_hi, Pi_by_2_lo)
426 // ...one can store the M(sign_X,swap) as single precision
429 // ...Deliver final Result
430 // ...s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
432 // sigma := ( (sign_X XOR swap) ? -1.0 : 1.0 )
433 // ...sigma can be obtained by a table lookup using
434 // ...(sign_X,swap) as index and stored as single precision
435 // ...sigma should be calculated earlier
440 // Res_hi := P_hi + sigma*A_hi ...need to compute
441 // ...P_hi + sigma*A_hi
444 // tmp := (P_hi - Res_hi) + sigma*A_hi
446 // Res_lo := s_Y*(sigma*A_lo + P_lo) + tmp
448 // Return Res_hi + Res_lo in user-defined rounding control
450 // Step 5. Special Cases
452 // These are detected early in the function by fclass instructions.
454 // We are in one of those special cases when X or Y is 0,+-inf or NaN
456 // If one of X and Y is NaN, return X+Y (which will generate
457 // invalid in case one is a signaling NaN). Otherwise,
458 // return the Result as described in the table
463 // X \ | +0 | -0 | +inf | -inf | finite non-zero
465 // ______________________________________________________
467 // +-0 | Invalid/ | pi/2 | -pi/2 | sign(Y)*pi/2
469 // --------------------------------------------------------
471 // +inf | +0 | -0 | pi/4 | -pi/4 | sign(Y)*0
472 // --------------------------------------------------------
474 // -inf | +pi | -pi | 3pi/4 | -3pi/4 | sign(Y)*pi
475 // --------------------------------------------------------
476 // finite | X>0? | pi/2 | -pi/2 |
477 // non-zero| sign(Y)*0: | | | N/A
478 // | sign(Y)*pi | | |
569 GR_Parameter_RESULT = r51
570 GR_Parameter_TAG = r52
576 LOCAL_OBJECT_START(Constants_atan)
578 data8 0x3FF921FB54442D18
579 // single lo_pi/2, two**(-3)
580 data4 0x248D3132, 0x3E000000
581 data8 0xAAAAAAAAAAAAAAA3, 0xBFFD // P_1
582 data8 0xCCCCCCCCCCCC54B2, 0x3FFC // P_2
583 data8 0x9249249247E4D0C2, 0xBFFC // P_3
584 data8 0xE38E38E058870889, 0x3FFB // P_4
585 data8 0xBA2E895B290149F8, 0xBFFB // P_5
586 data8 0x9D88E6D4250F733D, 0x3FFB // P_6
587 data8 0x884E51FFFB8745A0, 0xBFFB // P_7
588 data8 0xE1C7412B394396BD, 0x3FFA // P_8
589 data8 0xAAAAAAAAAAAAA52F, 0xBFFD // Q_1
590 data8 0xCCCCCCCCC75B60D3, 0x3FFC // Q_2
591 data8 0x924923AD011F1940, 0xBFFC // Q_3
592 data8 0xE36F716D2A5F89BD, 0x3FFB // Q_4
594 // Entries Tbl_hi (double precision)
595 // B = 1+Index/16+1/32 Index = 0
596 // Entries Tbl_lo (single precision)
597 // B = 1+Index/16+1/32 Index = 0
599 data8 0x3FE9A000A935BD8E
600 data4 0x23ACA08F, 0x00000000
602 // Entries Tbl_hi (double precision) Index = 0,1,...,15
603 // B = 2^(-1)*(1+Index/16+1/32)
604 // Entries Tbl_lo (single precision)
605 // Index = 0,1,...,15 B = 2^(-1)*(1+Index/16+1/32)
607 data8 0x3FDE77EB7F175A34
608 data4 0x238729EE, 0x00000000
609 data8 0x3FE0039C73C1A40B
610 data4 0x249334DB, 0x00000000
611 data8 0x3FE0C6145B5B43DA
612 data4 0x22CBA7D1, 0x00000000
613 data8 0x3FE1835A88BE7C13
614 data4 0x246310E7, 0x00000000
615 data8 0x3FE23B71E2CC9E6A
616 data4 0x236210E5, 0x00000000
617 data8 0x3FE2EE628406CBCA
618 data4 0x2462EAF5, 0x00000000
619 data8 0x3FE39C391CD41719
620 data4 0x24B73EF3, 0x00000000
621 data8 0x3FE445065B795B55
622 data4 0x24C11260, 0x00000000
623 data8 0x3FE4E8DE5BB6EC04
624 data4 0x242519EE, 0x00000000
625 data8 0x3FE587D81F732FBA
626 data4 0x24D4346C, 0x00000000
627 data8 0x3FE6220D115D7B8D
628 data4 0x24ED487B, 0x00000000
629 data8 0x3FE6B798920B3D98
630 data4 0x2495FF1E, 0x00000000
631 data8 0x3FE748978FBA8E0F
632 data4 0x223D9531, 0x00000000
633 data8 0x3FE7D528289FA093
634 data4 0x242B0411, 0x00000000
635 data8 0x3FE85D69576CC2C5
636 data4 0x2335B374, 0x00000000
637 data8 0x3FE8E17AA99CC05D
638 data4 0x24C27CFB, 0x00000000
640 // Entries Tbl_hi (double precision) Index = 0,1,...,15
641 // B = 2^(-2)*(1+Index/16+1/32)
642 // Entries Tbl_lo (single precision)
643 // Index = 0,1,...,15 B = 2^(-2)*(1+Index/16+1/32)
645 data8 0x3FD025FA510665B5
646 data4 0x24263482, 0x00000000
647 data8 0x3FD1151A362431C9
648 data4 0x242C8DC9, 0x00000000
649 data8 0x3FD2025567E47C95
650 data4 0x245CF9BA, 0x00000000
651 data8 0x3FD2ED987A823CFE
652 data4 0x235C892C, 0x00000000
653 data8 0x3FD3D6D129271134
654 data4 0x2389BE52, 0x00000000
655 data8 0x3FD4BDEE586890E6
656 data4 0x24436471, 0x00000000
657 data8 0x3FD5A2E0175E0F4E
658 data4 0x2389DBD4, 0x00000000
659 data8 0x3FD685979F5FA6FD
660 data4 0x2476D43F, 0x00000000
661 data8 0x3FD7660752817501
662 data4 0x24711774, 0x00000000
663 data8 0x3FD84422B8DF95D7
664 data4 0x23EBB501, 0x00000000
665 data8 0x3FD91FDE7CD0C662
666 data4 0x23883A0C, 0x00000000
667 data8 0x3FD9F93066168001
668 data4 0x240DF63F, 0x00000000
669 data8 0x3FDAD00F5422058B
670 data4 0x23FE261A, 0x00000000
671 data8 0x3FDBA473378624A5
672 data4 0x23A8CD0E, 0x00000000
673 data8 0x3FDC76550AAD71F8
674 data4 0x2422D1D0, 0x00000000
675 data8 0x3FDD45AEC9EC862B
676 data4 0x2344A109, 0x00000000
678 // Entries Tbl_hi (double precision) Index = 0,1,...,15
679 // B = 2^(-3)*(1+Index/16+1/32)
680 // Entries Tbl_lo (single precision)
681 // Index = 0,1,...,15 B = 2^(-3)*(1+Index/16+1/32)
683 data8 0x3FC068D584212B3D
684 data4 0x239874B6, 0x00000000
685 data8 0x3FC1646541060850
686 data4 0x2335E774, 0x00000000
687 data8 0x3FC25F6E171A535C
688 data4 0x233E36BE, 0x00000000
689 data8 0x3FC359E8EDEB99A3
690 data4 0x239680A3, 0x00000000
691 data8 0x3FC453CEC6092A9E
692 data4 0x230FB29E, 0x00000000
693 data8 0x3FC54D18BA11570A
694 data4 0x230C1418, 0x00000000
695 data8 0x3FC645BFFFB3AA73
696 data4 0x23F0564A, 0x00000000
697 data8 0x3FC73DBDE8A7D201
698 data4 0x23D4A5E1, 0x00000000
699 data8 0x3FC8350BE398EBC7
700 data4 0x23D4ADDA, 0x00000000
701 data8 0x3FC92BA37D050271
702 data4 0x23BCB085, 0x00000000
703 data8 0x3FCA217E601081A5
704 data4 0x23BC841D, 0x00000000
705 data8 0x3FCB1696574D780B
706 data4 0x23CF4A8E, 0x00000000
707 data8 0x3FCC0AE54D768466
708 data4 0x23BECC90, 0x00000000
709 data8 0x3FCCFE654E1D5395
710 data4 0x2323DCD2, 0x00000000
711 data8 0x3FCDF110864C9D9D
712 data4 0x23F53F3A, 0x00000000
713 data8 0x3FCEE2E1451D980C
714 data4 0x23CCB11F, 0x00000000
716 data8 0x400921FB54442D18, 0x3CA1A62633145C07 // PI two doubles
717 data8 0x3FF921FB54442D18, 0x3C91A62633145C07 // PI_by_2 two dbles
718 data8 0x3FE921FB54442D18, 0x3C81A62633145C07 // PI_by_4 two dbles
719 data8 0x4002D97C7F3321D2, 0x3C9A79394C9E8A0A // 3PI_by_4 two dbles
720 LOCAL_OBJECT_END(Constants_atan)
724 GLOBAL_IEEE754_ENTRY(atanl)
726 // Use common code with atan2l after setting x=1.0
728 alloc r32 = ar.pfs, 0, 17, 4, 0
729 fma.s1 Ysq = ArgY_orig, ArgY_orig, f0 // Form y*y
733 addl table_ptr1 = @ltoff(Constants_atan#), gp // Address of table pointer
734 fma.s1 Xsq = f1, f1, f0 // Form x*x
740 ld8 table_ptr1 = [table_ptr1] // Get table pointer
741 fnorm.s1 ArgY = ArgY_orig
752 getf.exp sign_X = f1 // Get signexp of x
753 fmerge.s ArgX_abs = f0, f1 // Form |x|
758 fnorm.s1 ArgX_orig = f1
764 getf.exp sign_Y = ArgY_orig // Get signexp of y
765 fmerge.s ArgY_abs = f0, ArgY_orig // Form |y|
766 mov table_base = table_ptr1 // Save base pointer to tables
771 ldfd P_hi = [table_ptr1],8 // Load double precision hi part of pi
772 fclass.m p8,p0 = ArgY_orig, 0x1e7 // Test y natval, nan, inf, zero
778 ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
784 fma.s1 M = f1, f1, f0 // Set M = 1.0
790 // Check for everything - if false, then must be pseudo-zero
791 // or pseudo-nan (IA unsupporteds).
795 fclass.m p0,p12 = f1, 0x1FF // Test x unsupported
796 (p8) br.cond.spnt ATANL_Y_SPECIAL // Branch if y natval, nan, inf, zero
800 // U = max(ArgX_abs,ArgY_abs)
801 // V = min(ArgX_abs,ArgY_abs)
804 fcmp.ge.s1 p6,p7 = Xsq, Ysq // Test for |x| >= |y| using squares
809 fma.s1 V = ArgX_abs, f1, f0 // Set V assuming |x| < |y|
810 br.cond.sptk ATANL_COMMON // Branch to common code
814 GLOBAL_IEEE754_END(atanl)
815 GLOBAL_IEEE754_ENTRY(atan2l)
818 alloc r32 = ar.pfs, 0, 17, 4, 0
819 fma.s1 Ysq = ArgY_orig, ArgY_orig, f0 // Form y*y
823 addl table_ptr1 = @ltoff(Constants_atan#), gp // Address of table pointer
824 fma.s1 Xsq = ArgX_orig, ArgX_orig, f0 // Form x*x
830 ld8 table_ptr1 = [table_ptr1] // Get table pointer
831 fnorm.s1 ArgY = ArgY_orig
836 fnorm.s1 ArgX = ArgX_orig
842 getf.exp sign_X = ArgX_orig // Get signexp of x
843 fmerge.s ArgX_abs = f0, ArgX_orig // Form |x|
849 getf.exp sign_Y = ArgY_orig // Get signexp of y
850 fmerge.s ArgY_abs = f0, ArgY_orig // Form |y|
851 mov table_base = table_ptr1 // Save base pointer to tables
856 ldfd P_hi = [table_ptr1],8 // Load double precision hi part of pi
857 fclass.m p8,p0 = ArgY_orig, 0x1e7 // Test y natval, nan, inf, zero
863 ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
864 fclass.m p9,p0 = ArgX_orig, 0x1e7 // Test x natval, nan, inf, zero
869 fma.s1 M = f1, f1, f0 // Set M = 1.0
875 // Check for everything - if false, then must be pseudo-zero
876 // or pseudo-nan (IA unsupporteds).
880 fclass.m p0,p12 = ArgX_orig, 0x1FF // Test x unsupported
881 (p8) br.cond.spnt ATANL_Y_SPECIAL // Branch if y natval, nan, inf, zero
885 // U = max(ArgX_abs,ArgY_abs)
886 // V = min(ArgX_abs,ArgY_abs)
889 fcmp.ge.s1 p6,p7 = Xsq, Ysq // Test for |x| >= |y| using squares
894 fma.s1 V = ArgX_abs, f1, f0 // Set V assuming |x| < |y|
895 (p9) br.cond.spnt ATANL_X_SPECIAL // Branch if x natval, nan, inf, zero
899 // Now common code for atanl and atan2l
903 fclass.m p0,p13 = ArgY_orig, 0x1FF // Test y unsupported
904 shr sign_X = sign_X, 17 // Get sign bit of x
908 fma.s1 U = ArgY_abs, f1, f0 // Set U assuming |x| < |y|
909 adds table_ptr1 = 176, table_ptr1 // Point to Q4
914 (p6) add swap = r0, r0 // Set swap=0 if |x| >= |y|
915 (p6) frcpa.s1 E, p0 = ArgY_abs, ArgX_abs // Compute E if |x| >= |y|
916 shr sign_Y = sign_Y, 17 // Get sign bit of y
920 (p6) fma.s1 V = ArgY_abs, f1, f0 // Set V if |x| >= |y|
921 (p12) br.cond.spnt ATANL_UNSUPPORTED // Branch if x unsupported
927 // Set p10 if |x| >= |y| and x >=0
928 // Set p11 if |x| >= |y| and x < 0
930 cmp.eq p8, p9 = 0, sign_Y // Test for y >= 0
931 (p7) frcpa.s1 E, p0 = ArgX_abs, ArgY_abs // Compute E if |x| < |y|
932 (p7) add swap = 1, r0 // Set swap=1 if |x| < |y|
935 (p6) cmp.eq.unc p10, p11 = 0, sign_X // If |x| >= |y|, test for x >= 0
936 (p6) fma.s1 U = ArgX_abs, f1, f0 // Set U if |x| >= |y|
937 (p13) br.cond.spnt ATANL_UNSUPPORTED // Branch if y unsupported
945 .pred.rel "mutex",p8,p9
948 (p8) fadd.s1 s_Y = f0, f1 // If y >= 0 set s_Y = 1.0
953 (p9) fsub.s1 s_Y = f0, f1 // If y < 0 set s_Y = -1.0
958 .pred.rel "mutex",p10,p11
961 (p10) fsub.s1 M = M, f1 // If |x| >= |y| and x >=0, set M=0
966 (p11) fadd.s1 M = M, f1 // If |x| >= |y| and x < 0, set M=2.0
973 fcmp.eq.s0 p0, p9 = ArgX_orig, ArgY_orig // Dummy to set denormal flag
976 // *************************************************
977 // ********************* STEP2 *********************
978 // *************************************************
991 fnma.s1 E_hold = E, U, f1 // E_hold = 1.0 - E*U (1) if POLY path
996 // Create a single precision representation of the signexp of Q with the
997 // 4 most significant bits of the significand followed by a 1 and then 18 0's
1000 fmpy.s1 P_hi = M, P_hi
1001 dep.z special = 0x1, 18, 1 // Form 0x0000000000040000
1005 fmpy.s1 P_lo = M, P_lo
1006 add table_ptr2 = 32, table_ptr1
1012 fma.s1 A_temp = Q, f1, f0 // Set A_temp if POLY path
1017 fma.s1 E = E, E_hold, E // E = E + E*E_hold (1) if POLY path
1024 // swap = xor(swap,sign_X)
1028 fcmp.lt.s1 p9, p0 = Q, TWO_TO_NEG3 // Test Q < 2^-3
1029 xor swap = sign_X, swap
1033 // P_hi = s_Y * P_hi
1035 getf.exp exponent_Q = Q // Get signexp of Q
1036 cmp.eq.unc p7, p6 = 0x00000, swap
1037 fmpy.s1 P_hi = s_Y, P_hi
1042 // if (PR_1) sigma = -1.0
1043 // if (PR_2) sigma = 1.0
1046 getf.sig significand_Q = Q // Get significand of Q
1047 (p6) fsub.s1 sigma = f0, f1
1051 (p9) add table_ptr1 = 128, table_base // Point to P8 if POLY path
1052 (p7) fadd.s1 sigma = f0, f1
1053 (p9) br.cond.spnt ATANL_POLY // Branch to POLY if 0 < Q < 2^-3
1058 // *************************************************
1059 // ******************** STEP3 **********************
1060 // *************************************************
1062 // lookup = b_1 b_2 b_3 B_4
1067 andcm k = 0x0003, exponent_Q // k=0,1,2,3 for exp_Q=0,-1,-2,-3
1072 // Generate sign_exp_Q b_1 b_2 b_3 b_4 1 0 0 0 ... 0 in single precision
1073 // representation. Note sign of Q is always 0.
1076 cmp.eq p8, p9 = 0x0000, k // Test k=0
1078 extr.u lookup = significand_Q, 59, 4 // Extract b_1 b_2 b_3 b_4 for index
1081 sub sp_exp_Q = 0x7f, k // Form single prec biased exp of Q
1083 sub k = k, r0, 1 // Decrement k
1087 // Form pointer to B index table
1089 ldfe Q_4 = [table_ptr1], -16 // Load Q_4
1091 (p9) shl k = k, 8 // k = 0, 256, or 512
1094 (p9) shladd table_ptr2 = lookup, 4, table_ptr2
1096 shladd sp_exp_4sig_Q = sp_exp_Q, 4, lookup // Shift and add in 4 high bits
1101 (p8) add table_ptr2 = -16, table_ptr2 // Pointer if original k was 0
1102 (p9) add table_ptr2 = k, table_ptr2 // Pointer if k was 1, 2, 3
1103 dep special = sp_exp_4sig_Q, special, 19, 13 // Form z_hi as single prec
1107 // z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
1109 ldfd Tbl_hi = [table_ptr2], 8 // Load Tbl_hi from index table
1111 setf.s z_hi = special // Form z_hi
1115 ldfs Tbl_lo = [table_ptr2], 8 // Load Tbl_lo from index table
1117 ldfe Q_3 = [table_ptr1], -16 // Load Q_3
1123 ldfe Q_2 = [table_ptr1], -16 // Load Q_2
1130 ldfe Q_1 = [table_ptr1], -16 // Load Q_1
1138 fma.s1 U_prime_hi = V, z_hi, U // U_prime_hi = U + V * z_hi
1143 fnma.s1 V_prime = U, z_hi, V // V_prime = V - U * z_hi
1150 mov A_hi = Tbl_hi // Start with A_hi = Tbl_hi
1157 fsub.s1 U_hold = U, U_prime_hi // U_hold = U - U_prime_hi
1164 frcpa.s1 C_hi, p0 = f1, U_prime_hi // C_hi = frcpa(1,U_prime_hi)
1171 fmpy.s1 A_hi = s_Y, A_hi // A_hi = s_Y * A_hi
1178 fma.s1 U_prime_lo = z_hi, V, U_hold // U_prime_lo = U_hold + V * z_hi
1183 // C_hi_hold = 1 - C_hi * U_prime_hi (1)
1186 fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
1193 fma.s1 Res_hi = sigma, A_hi, P_hi // Res_hi = P_hi + sigma * A_hi
1200 fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (1)
1205 // C_hi_hold = 1 - C_hi * U_prime_hi (2)
1208 fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
1215 fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (2)
1220 // C_hi_hold = 1 - C_hi * U_prime_hi (3)
1223 fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
1230 fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (3)
1237 fmpy.s1 w_hi = V_prime, C_hi // w_hi = V_prime * C_hi
1244 fmpy.s1 wsq = w_hi, w_hi // wsq = w_hi * w_hi
1249 fnma.s1 w_lo = w_hi, U_prime_hi, V_prime // w_lo = V_prime-w_hi*U_prime_hi
1256 fma.s1 poly = wsq, Q_4, Q_3 // poly = Q_3 + wsq * Q_4
1261 fnma.s1 w_lo = w_hi, U_prime_lo, w_lo // w_lo = w_lo - w_hi * U_prime_lo
1268 fma.s1 poly = wsq, poly, Q_2 // poly = Q_2 + wsq * poly
1273 fmpy.s1 w_lo = C_hi, w_lo // w_lo = = w_lo * C_hi
1280 fma.s1 poly = wsq, poly, Q_1 // poly = Q_1 + wsq * poly
1285 fadd.s1 A_lo = Tbl_lo, w_lo // A_lo = Tbl_lo + w_lo
1292 fmpy.s0 Q_1 = Q_1, Q_1 // Dummy operation to raise inexact
1299 fmpy.s1 poly = wsq, poly // poly = wsq * poly
1306 fmpy.s1 poly = w_hi, poly // poly = w_hi * poly
1313 fadd.s1 A_lo = A_lo, poly // A_lo = A_lo + poly
1320 fadd.s1 A_lo = A_lo, w_hi // A_lo = A_lo + w_hi
1327 fma.s1 Res_lo = sigma, A_lo, P_lo // Res_lo = P_lo + sigma * A_lo
1333 // Result = Res_hi + Res_lo * s_Y (User Supplied Rounding Mode)
1337 fma.s0 Result = Res_lo, s_Y, Res_hi
1338 br.ret.sptk b0 // Exit table path 2^-3 <= V/U < 1
1344 // Here if 0 < V/U < 2^-3
1346 // ***********************************************
1347 // ******************** STEP4 ********************
1348 // ***********************************************
1352 // Iterate 3 times E = E + E*(1.0 - E*U)
1353 // Also load P_8, P_7, P_6, P_5, P_4
1356 ldfe P_8 = [table_ptr1], -16 // Load P_8
1357 fnma.s1 z_lo = A_temp, U, V // z_lo = V - A_temp * U
1362 fnma.s1 E_hold = E, U, f1 // E_hold = 1.0 - E*U (2)
1368 ldfe P_7 = [table_ptr1], -16 // Load P_7
1370 ldfe P_6 = [table_ptr1], -16 // Load P_6
1376 ldfe P_5 = [table_ptr1], -16 // Load P_5
1377 fma.s1 E = E, E_hold, E // E = E + E_hold*E (2)
1383 ldfe P_4 = [table_ptr1], -16 // Load P_4
1385 ldfe P_3 = [table_ptr1], -16 // Load P_3
1391 ldfe P_2 = [table_ptr1], -16 // Load P_2
1392 fnma.s1 E_hold = E, U, f1 // E_hold = 1.0 - E*U (3)
1397 movl int_temp = 0x24005 // Signexp for small neg number
1402 ldfe P_1 = [table_ptr1], -16 // Load P_1
1403 setf.exp tmp_small = int_temp // Form small neg number
1404 fma.s1 E = E, E_hold, E // E = E + E_hold*E (3)
1410 // At this point E approximates 1/U to roughly working precision
1411 // Z = V*E approximates V/U
1415 fmpy.s1 Z = V, E // Z = V * E
1420 fmpy.s1 z_lo = z_lo, E // z_lo = z_lo * E
1426 // Now what we want to do is
1427 // poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
1428 // poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))
1431 // Fixup added to force inexact later -
1432 // A_hi = A_temp + z_lo
1433 // z_lo = (A_temp - A_hi) + z_lo
1437 fmpy.s1 zsq = Z, Z // zsq = Z * Z
1442 fadd.s1 A_hi = A_temp, z_lo // A_hi = A_temp + z_lo
1449 fma.s1 poly1 = zsq, P_8, P_7 // poly1 = P_7 + zsq * P_8
1454 fma.s1 poly2 = zsq, P_3, P_2 // poly2 = P_2 + zsq * P_3
1461 fmpy.s1 z4 = zsq, zsq // z4 = zsq * zsq
1466 fsub.s1 A_temp = A_temp, A_hi // A_temp = A_temp - A_hi
1473 fmerge.s tmp = A_hi, A_hi // Copy tmp = A_hi
1480 fma.s1 poly1 = zsq, poly1, P_6 // poly1 = P_6 + zsq * poly1
1485 fma.s1 poly2 = zsq, poly2, P_1 // poly2 = P_2 + zsq * poly2
1492 fmpy.s1 z8 = z4, z4 // z8 = z4 * z4
1497 fadd.s1 z_lo = A_temp, z_lo // z_lo = (A_temp - A_hi) + z_lo
1504 fma.s1 poly1 = zsq, poly1, P_5 // poly1 = P_5 + zsq * poly1
1509 fmpy.s1 poly2 = poly2, zsq // poly2 = zsq * poly2
1514 // Create small GR double in case need to raise underflow
1517 fma.s1 poly1 = zsq, poly1, P_4 // poly1 = P_4 + zsq * poly1
1518 dep GR_temp = -1,r0,0,53
1522 // Create small double in case need to raise underflow
1524 setf.d FR_temp = GR_temp
1525 fma.s1 poly = z8, poly1, poly2 // poly = poly2 + z8 * poly1
1532 fma.s1 A_lo = Z, poly, z_lo // A_lo = z_lo + Z * poly
1539 fadd.s1 A_hi = tmp, A_lo // A_hi = tmp + A_lo
1546 fsub.s1 tmp = tmp, A_hi // tmp = tmp - A_hi
1551 fmpy.s1 A_hi = s_Y, A_hi // A_hi = s_Y * A_hi
1558 fadd.s1 A_lo = tmp, A_lo // A_lo = tmp + A_lo
1563 fma.s1 Res_hi = sigma, A_hi, P_hi // Res_hi = P_hi + sigma * A_hi
1570 fsub.s1 tmp = P_hi, Res_hi // tmp = P_hi - Res_hi
1576 // Test if A_lo is zero
1580 fclass.m p6,p0 = A_lo, 0x007 // Test A_lo = 0
1587 (p6) mov A_lo = tmp_small // If A_lo zero, make very small
1594 fma.s1 tmp = A_hi, sigma, tmp // tmp = sigma * A_hi + tmp
1599 fma.s1 sigma = A_lo, sigma, P_lo // sigma = A_lo * sigma + P_lo
1606 fma.s1 Res_lo = s_Y, sigma, tmp // Res_lo = s_Y * sigma + tmp
1612 // Test if Res_lo is denormal
1616 fclass.m p14, p15 = Res_lo, 0x0b
1622 // Compute Result = Res_lo + Res_hi. Use s3 if Res_lo is denormal.
1626 (p14) fadd.s3 Result = Res_lo, Res_hi // Result for Res_lo denormal
1631 (p15) fadd.s0 Result = Res_lo, Res_hi // Result for Res_lo normal
1637 // If Res_lo is denormal test if Result equals zero
1641 (p14) fclass.m.unc p14, p0 = Result, 0x07
1647 // If Res_lo is denormal and Result equals zero, raise inexact, underflow
1648 // by squaring small double
1652 (p14) fmpy.d.s0 FR_temp = FR_temp, FR_temp
1653 br.ret.sptk b0 // Exit POLY path, 0 < Q < 2^-3
1661 fmpy.s0 Result = ArgX,ArgY
1666 // Here if y natval, nan, inf, zero
1668 // Here if x natval, nan, inf, zero
1672 fclass.m p13,p12 = ArgY_orig, 0x0c3 // Test y nan
1679 fclass.m p15,p14 = ArgY_orig, 0x103 // Test y natval
1686 (p12) fclass.m p13,p0 = ArgX_orig, 0x0c3 // Test x nan
1693 (p14) fclass.m p15,p0 = ArgX_orig, 0x103 // Test x natval
1700 (p13) fmpy.s0 Result = ArgX_orig, ArgY_orig // Result nan if x or y nan
1701 (p13) br.ret.spnt b0 // Exit if x or y nan
1707 (p15) fmpy.s0 Result = ArgX_orig, ArgY_orig // Result natval if x or y natval
1708 (p15) br.ret.spnt b0 // Exit if x or y natval
1713 // Here if x or y inf or zero
1714 ATANL_SPECIAL_HANDLING:
1717 fclass.m p6, p7 = ArgY_orig, 0x007 // Test y zero
1718 mov special = 992 // Offset to table
1723 add table_ptr1 = table_base, special // Point to 3pi/4
1724 fcmp.eq.s0 p0, p9 = ArgX_orig, ArgY_orig // Dummy to set denormal flag
1725 (p7) br.cond.spnt ATANL_ArgY_Not_ZERO // Branch if y not zero
1731 ldfd Result = [table_ptr1], 8 // Get pi high
1733 fclass.m p14, p0 = ArgX, 0x035 // Test for x>=+0
1739 ldfd Result_lo = [table_ptr1], -8 // Get pi lo
1740 fclass.m p15, p0 = ArgX, 0x036 // Test for x<=-0
1745 // Return sign_Y * 0 when ArgX > +0
1749 (p14) fmerge.s Result = ArgY, f0 // If x>=+0, y=0, hi sgn(y)*0
1756 fclass.m p13, p0 = ArgX, 0x007 // Test for x=0
1763 (p14) fmerge.s Result_lo = ArgY, f0 // If x>=+0, y=0, lo sgn(y)*0
1769 (p13) mov GR_Parameter_TAG = 36 // Error tag for x=0, y=0
1776 // Return sign_Y * pi when ArgX < -0
1780 (p15) fmerge.s Result = ArgY, Result // If x<0, y=0, hi=sgn(y)*pi
1787 (p15) fmerge.s Result_lo = ArgY, Result_lo // If x<0, y=0, lo=sgn(y)*pi
1793 // Call error support function for atan(0,0)
1797 fadd.s0 Result = Result, Result_lo
1798 (p13) br.cond.spnt __libm_error_region // Branch if atan(0,0)
1805 br.ret.sptk b0 // Exit for y=0, x not 0
1809 // Here if y not zero
1810 ATANL_ArgY_Not_ZERO:
1813 fclass.m p0, p10 = ArgY, 0x023 // Test y inf
1820 fclass.m p6, p0 = ArgX, 0x017 // Test for 0 <= |x| < inf
1821 (p10) br.cond.spnt ATANL_ArgY_Not_INF // Branch if 0 < |y| < inf
1827 // Return +PI/2 when ArgY = +Inf and ArgX = +/-0 or normal
1828 // Return -PI/2 when ArgY = -Inf and ArgX = +/-0 or normal
1829 // Return +PI/4 when ArgY = +Inf and ArgX = +Inf
1830 // Return -PI/4 when ArgY = -Inf and ArgX = +Inf
1831 // Return +3PI/4 when ArgY = +Inf and ArgX = -Inf
1832 // Return -3PI/4 when ArgY = -Inf and ArgX = -Inf
1836 fclass.m p7, p0 = ArgX, 0x021 // Test for x=+inf
1842 (p6) add table_ptr1 = 16, table_ptr1 // Point to pi/2, if x finite
1843 fclass.m p8, p0 = ArgX, 0x022 // Test for x=-inf
1849 (p7) add table_ptr1 = 32, table_ptr1 // Point to pi/4 if x=+inf
1851 (p8) add table_ptr1 = 48, table_ptr1 // Point to 3pi/4 if x=-inf
1858 ldfd Result = [table_ptr1], 8 // Load pi/2, pi/4, or 3pi/4 hi
1860 ldfd Result_lo = [table_ptr1], -8 // Load pi/2, pi/4, or 3pi/4 lo
1867 fmerge.s Result = ArgY, Result // Merge sgn(y) in hi
1874 fmerge.s Result_lo = ArgY, Result_lo // Merge sgn(y) in lo
1881 fadd.s0 Result = Result, Result_lo // Compute complete result
1882 br.ret.sptk b0 // Exit for y=inf
1886 // Here if y not INF, and x=0 or INF
1889 // Return +PI/2 when ArgY NOT Inf, ArgY > 0 and ArgX = +/-0
1890 // Return -PI/2 when ArgY NOT Inf, ArgY < 0 and ArgX = +/-0
1891 // Return +0 when ArgY NOT Inf, ArgY > 0 and ArgX = +Inf
1892 // Return -0 when ArgY NOT Inf, ArgY > 0 and ArgX = +Inf
1893 // Return +PI when ArgY NOT Inf, ArgY > 0 and ArgX = -Inf
1894 // Return -PI when ArgY NOT Inf, ArgY > 0 and ArgX = -Inf
1898 fclass.m p7, p9 = ArgX, 0x021 // Test for x=+inf
1905 fclass.m p6, p0 = ArgX, 0x007 // Test for x=0
1911 (p6) add table_ptr1 = 16, table_ptr1 // Point to pi/2
1912 fclass.m p8, p0 = ArgX, 0x022 // Test for x=-inf
1917 .pred.rel "mutex",p7,p9
1919 (p9) ldfd Result = [table_ptr1], 8 // Load pi or pi/2 hi
1920 (p7) fmerge.s Result = ArgY, f0 // If y not inf, x=+inf, sgn(y)*0
1926 (p9) ldfd Result_lo = [table_ptr1], -8 // Load pi or pi/2 lo
1927 (p7) fnorm.s0 Result = Result // If y not inf, x=+inf normalize
1934 (p9) fmerge.s Result = ArgY, Result // Merge sgn(y) in hi
1941 (p9) fmerge.s Result_lo = ArgY, Result_lo // Merge sgn(y) in lo
1948 (p9) fadd.s0 Result = Result, Result_lo // Compute complete result
1949 br.ret.spnt b0 // Exit for y not inf, x=0,inf
1953 GLOBAL_IEEE754_END(atan2l)
1954 LOCAL_LIBM_ENTRY(__libm_error_region)
1957 add GR_Parameter_Y=-32,sp // Parameter 2 value
1959 .save ar.pfs,GR_SAVE_PFS
1960 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
1964 add sp=-64,sp // Create new stack
1966 mov GR_SAVE_GP=gp // Save gp
1969 stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
1970 add GR_Parameter_X = 16,sp // Parameter 1 address
1971 .save b0, GR_SAVE_B0
1972 mov GR_SAVE_B0=b0 // Save b0
1976 stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
1977 add GR_Parameter_RESULT = 0,GR_Parameter_Y
1978 nop.b 0 // Parameter 3 address
1981 stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
1982 add GR_Parameter_Y = -16,GR_Parameter_Y
1983 br.call.sptk b0=__libm_error_support# // Call error handling function
1988 add GR_Parameter_RESULT = 48,sp
1991 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
1993 add sp = 64,sp // Restore stack pointer
1994 mov b0 = GR_SAVE_B0 // Restore return address
1997 mov gp = GR_SAVE_GP // Restore gp
1998 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
1999 br.ret.sptk b0 // Return
2002 LOCAL_LIBM_END(__libm_error_region#)
2003 .type __libm_error_support#,@function
2004 .global __libm_error_support#