sysdeps/ia64/fpu/e_atan2f.S

   1 .file "atan2f.s"
   2
   3 // Copyright (C) 2000, 2001, Intel Corporation
   4 // All rights reserved.
   5 //
   6 // Contributed 6/1/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
   7 // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
   8 //
   9 // Redistribution and use in source and binary forms, with or without
  10 // modification, are permitted provided that the following conditions are
  11 // met:
  12 //
  13 // * Redistributions of source code must retain the above copyright
  14 // notice, this list of conditions and the following disclaimer.
  15 //
  16 // * Redistributions in binary form must reproduce the above copyright
  17 // notice, this list of conditions and the following disclaimer in the
  18 // documentation and/or other materials provided with the distribution.
  19 //
  20 // * The name of Intel Corporation may not be used to endorse or promote
  21 // products derived from this software without specific prior written
  22 // permission.
  23 //
  24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35 //
  36 // Intel Corporation is the author of this code, and requests that all
  37 // problem reports or change requests be submitted to it directly at
  38 // http://developer.intel.com/opensource.
  39
  40 // History
  41 //==============================================================
  42 // 6/01/00  Initial version
  43 // 8/15/00  Bundle added after call to __libm_error_support to properly
  44 //          set [the previously overwritten] GR_Parameter_RESULT.
  45 // 8/17/00  Changed predicate register macro-usage to direct predicate
  46 //          names due to an assembler bug.
  47 // 1/05/01  Fixed flag settings for denormal input.
  48 // 1/19/01  Added documentation
  49 // 1/30/01  Improved speed
  50
  51 // Description
  52 //=========================================
  53 // The atan2 function computes the principle value of the arc tangent of y/x using
  54 // the signs of both arguments to determine the quadrant of the return value.
  55 // A domain error may occur if both arguments are zero.
  56
  57 // The atan2 function returns the arc tangent of y/x in the range [-pi,+pi] radians.
  58
  59 //..
  60 //..Let (v,u) = (y,x) if |y| <= |x|, and (v,u) = (x,y) otherwise. Note that
  61 //..v and u can be negative. We state the relationship between atan2(y,x) and
  62 //..atan(v/u).
  63 //..
  64 //..Let swap = false if v = y, and swap = true if v = x.
  65 //..Define C according to the matrix
  66 //..
  67 //..                   TABLE FOR C
  68 //..                              x +ve       x -ve
  69 //..   no swap (swap = false)    sgn(y)*0     sgn(y)*pi
  70 //..   swap    (swap = true )    sgn(y)*pi/2  sgn(y)*pi/2
  71 //..
  72 //..   atan2(y,x) =  C +  atan(v/u)  if no swap
  73 //..   atan2(y,x) =  C -  atan(v/u)  if  swap
  74 //..
  75 //..These relationship is more efficient to compute as we accommodate signs in v and u
  76 //..saving the need to obtain the absolute value before computation can proceed.
  77 //..
  78 //..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows:
  79 //..A = y * frcpa(x)    (so A = (y/x)(1 - beta))
  80 //..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is
  81 //..a correction.
  82 //..atan(A) is approximated by a polynomial
  83 //..A + p1 A^3 + p2 A^5 + ... + p10 A^21,
  84 //..atan(G) is approximated as follows:
  85 //..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1
  86 //..where g is a limited precision approximation to G via g = (y - Ax)*frcpa(x + Ay).
  87 //..
  88 //..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows:
  89 //..Z = x * frcpa(y)    (so Z = (x/y)(1 - beta))
  90 //..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is
  91 //..a correction.
  92 //..atan(Z) is approximated by a polynomial
  93 //..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21,
  94 //..atan(T) is approximated as follows:
  95 //..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1
  96 //..where t is a limited precision approximation to T via t = (x - Ay)*frcpa(y + Ax).
  97 //..
  98 //..
  99 //..A = y * frcpa(x)
 100 //..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
 101 //..
 102 //..This polynomial is computed as follows:
 103 //..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
 104 //..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
 105 //..
 106 //..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
 107 //..poly_A1 = poly_A2 + A4 * poly_A1
 108 //..poly_A1 = poly_A3 + A4 * poly_A1
 109 //..
 110 //..poly_A4 = p1 * A
 111 //,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
 112 //..poly_A5 = p2 + Asq * poly_A5
 113 //..poly_A4 = poly_A4 + A5 * poly_A5
 114 //..
 115 //..atan_A = poly_A4 + A11 * poly_A1
 116 //..
 117 //..atan(G) is approximated as follows:
 118 //..G_numer = y - A*x, G_denom = x + A*y
 119 //..H1 = frcpa(G_denom)
 120 //..H_beta = 1 - H1 * G_denom
 121 //..H2 = H1 + H1 * H_beta
 122 //..H_beta2 = H_beta*H_beta
 123 //..H3 = H2 + H2*H_beta2
 124 //..g = H1 * G_numer; gsq = g*g; atan_G = g*p1, atan_G = atan_G*gsq
 125 //..atan_G = G_numer*H3 + atan_G
 126 //..
 127 //..
 128 //..A = y * frcpa(x)
 129 //..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
 130 //..
 131 //..This polynomial is computed as follows:
 132 //..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
 133 //..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
 134 //..
 135 //..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
 136 //..poly_A1 = poly_A2 + A4 * poly_A1
 137 //..poly_A1 = poly_A3 + A4 * poly_A1
 138 //..
 139 //..poly_A4 = p1 * A
 140 //,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
 141 //..poly_A5 = p2 + Asq * poly_A5
 142 //..poly_A4 = poly_A4 + A5 * poly_A5
 143 //..
 144 //..atan_A = poly_A4 + A11 * poly_A1
 145 //..
 146 //..
 147 //..====================================================================
 148 //..    COEFFICIENTS USED IN THE COMPUTATION
 149 //..====================================================================
 150
 151 //coef_pj, j = 1,2,...,10;  atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21
 152 //
 153 //  coef_p1          =      -.3333332707155439167401311806315789E+00
 154 //  coef_p1   in dbl = BFD5 5555 1219 1621
 155 //
 156 //  coef_p2          =       .1999967670926658391827857030875748E+00
 157 //  coef_p2   in dbl = 3FC9 997E 7AFB FF4E
 158 //
 159 //  coef_p3          =      -.1427989384500152360161563301087296E+00
 160 //  coef_p3   in dbl = BFC2 473C 5145 EE38
 161 //
 162 //  coef_p4          =       .1105852823460720770079031213661163E+00
 163 //  coef_p4   in dbl = 3FBC 4F51 2B18 65F5
 164 //
 165 //  coef_p5          =      -.8811839915595312348625710228448363E-01
 166 //  coef_p5   in dbl = BFB6 8EED 6A8C FA32
 167 //
 168 //  coef_p6          =       .6742329836955067042153645159059714E-01
 169 //  coef_p6   in dbl = 3FB1 42A7 3D7C 54E3
 170 //
 171 //  coef_p7          =      -.4468571068774672908561591262231909E-01
 172 //  coef_p7   in dbl = BFA6 E10B A401 393F
 173 //
 174 //  coef_p8          =       .2252333246746511135532726960586493E-01
 175 //  coef_p8   in dbl = 3F97 105B 4160 F86B
 176 //
 177 //  coef_p9          =      -.7303884867007574742501716845542314E-02
 178 //  coef_p9   in dbl = BF7D EAAD AA33 6451
 179 //
 180 //  coef_p10         =       .1109686868355312093949039454619058E-02
 181 //  coef_p10  in dbl = 3F52 2E5D 33BC 9BAA
 182 //
 183
 184 // Special values
 185 //==============================================================
 186 //              Y                 x          Result
 187 //             +number           +inf        +0
 188 //             -number           +inf        -0
 189 //             +number           -inf        +pi
 190 //             -number           -inf        -pi
 191 //
 192 //             +inf              +number     +pi/2
 193 //             -inf              +number     -pi/2
 194 //             +inf              -number     +pi/2
 195 //             -inf              -number     -pi/2
 196 //
 197 //             +inf              +inf        +pi/4
 198 //             -inf              +inf        -pi/4
 199 //             +inf              -inf        +3pi/4
 200 //             -inf              -inf        -3pi/4
 201 //
 202 //             +1                +1          +pi/4
 203 //             -1                +1          -pi/4
 204 //             +1                -1          +3pi/4
 205 //             -1                -1          -3pi/4
 206 //
 207 //             +number           +0          +pi/2    // does not raise DBZ
 208 //             -number           +0          -pi/2    // does not raise DBZ
 209 //             +number           -0          +pi/2    // does not raise DBZ
 210 //             -number           -0          -pi/2    // does not raise DBZ
 211 //
 212 //             +0                +number     +0
 213 //             -0                +number     -0
 214 //             +0                -number     +pi
 215 //             -0                -number     -pi
 216 //
 217 //             +0                +0          +0      // does not raise invalid
 218 //             -0                +0          -0      // does not raise invalid
 219 //             +0                -0          +pi     // does not raise invalid
 220 //             -0                -0          -pi     // does not raise invalid
 221 //
 222 //            Nan             anything      quiet Y
 223 //            anything        NaN           quiet X
 224
 225 // atan2(+-0/+-0) sets double error tag to 37
 226 // atan2f(+-0/+-0) sets single error tag to 38
 227 // These are domain errors.
 228
 229 #include "libm_support.h"
 230
 231 //
 232 // Assembly macros
 233 //=========================================
 234
 235
 236 // integer registers
 237 atan2f_GR_Addr_1              = r33
 238 atan2f_GR_Addr_2              = r34
 239 GR_SAVE_B0                    = r35
 240
 241 GR_SAVE_PFS                   = r36
 242 GR_SAVE_GP                    = r37
 243
 244 GR_Parameter_X                = r38
 245 GR_Parameter_Y                = r39
 246 GR_Parameter_RESULT           = r40
 247 GR_Parameter_TAG              = r41
 248
 249 // floating point registers
 250 atan2f_coef_p1         = f32
 251 atan2f_coef_p10        = f33
 252 atan2f_coef_p7         = f34
 253 atan2f_coef_p6         = f35
 254
 255 atan2f_coef_p3         = f36
 256 atan2f_coef_p2         = f37
 257 atan2f_coef_p9         = f38
 258 atan2f_coef_p8         = f39
 259 atan2f_coef_p5         = f40
 260
 261 atan2f_coef_p4         = f41
 262 atan2f_const_piby2     = f42
 263 atan2f_const_pi        = f43
 264 atan2f_const_piby4     = f44
 265 atan2f_const_3piby4    = f45
 266
 267 atan2f_xsq             = f46
 268 atan2f_ysq             = f47
 269 atan2f_xy              = f48
 270 atan2f_const_1         = f49
 271 atan2f_sgn_Y           = f50
 272
 273 atan2f_Z0              = f51
 274 atan2f_A0              = f52
 275 atan2f_Z               = f53
 276 atan2f_A               = f54
 277 atan2f_C               = f55
 278
 279 atan2f_U               = f56
 280 atan2f_Usq             = f57
 281 atan2f_U4              = f58
 282 atan2f_U6              = f59
 283 atan2f_U8              = f60
 284
 285 atan2f_poly_u109       = f61
 286 atan2f_poly_u87        = f62
 287 atan2f_poly_u65        = f63
 288 atan2f_poly_u43        = f64
 289 atan2f_poly_u21        = f65
 290
 291 atan2f_poly_u10to7     = f66
 292 atan2f_poly_u6to3      = f67
 293 atan2f_poly_u10to3     = f68
 294 atan2f_poly_u10to0     = f69
 295 atan2f_poly_u210       = f70
 296
 297 atan2f_T_numer         = f71
 298 atan2f_T_denom         = f72
 299 atan2f_G_numer         = f73
 300 atan2f_G_denom         = f74
 301 atan2f_p1rnum          = f75
 302
 303 atan2f_R_denom         = f76
 304 atan2f_R_numer         = f77
 305 atan2f_pR              = f78
 306 atan2f_pRC             = f79
 307 atan2f_pQRC            = f80
 308
 309 atan2f_Q1              = f81
 310 atan2f_Q_beta          = f82
 311 atan2f_Q2              = f83
 312 atan2f_Q_beta2         = f84
 313 atan2f_Q3              = f85
 314
 315 atan2f_r               = f86
 316 atan2f_rsq             = f87
 317 atan2f_poly_atan_U     = f88
 318
 319
 320 // predicate registers
 321 //atan2f_Pred_Swap     = p6 // |y| >  |x|
 322 //atan2f_Pred_noSwap   = p7 // |y| <= |x|
 323 //atan2f_Pred_Xpos     = p8 //  x  >=  0
 324 //atan2f_Pred_Xneg     = p9 //  x  <   0
 325
 326
 327 .data
 328
 329 .align 16
 330
 331 atan2f_coef_table1:
 332 ASM_TYPE_DIRECTIVE(atan2f_coef_table1,@object)
 333 data8 0xBFD5555512191621 // p1
 334 data8 0x3F522E5D33BC9BAA // p10
 335 data8 0xBFA6E10BA401393F // p7
 336 data8 0x3FB142A73D7C54E3 // p6
 337 data8 0xBFC2473C5145EE38 // p3
 338 data8 0x3FC9997E7AFBFF4E // p2
 339 ASM_SIZE_DIRECTIVE(atan2f_coef_table1)
 340
 341 atan2f_coef_table2:
 342 ASM_TYPE_DIRECTIVE(atan2f_coef_table2,@object)
 343 data8 0xBF7DEAADAA336451 // p9
 344 data8 0x3F97105B4160F86B // p8
 345 data8 0xBFB68EED6A8CFA32 // p5
 346 data8 0x3FBC4F512B1865F5 // p4
 347 data8 0x3ff921fb54442d18 // pi/2
 348 data8 0x400921fb54442d18 // pi
 349 data8 0x3fe921fb54442d18 // pi/4
 350 data8 0x4002d97c7f3321d2 // 3pi/4
 351 ASM_SIZE_DIRECTIVE(atan2f_coef_table2)
 352
 353
 354
 355 .global atan2f
 356 #ifdef _LIBC
 357 .global __atan2f
 358 .global __ieee754_atan2f
 359 #endif
 360
 361 .text
 362 .align 32
 363
 364 atan2f:
 365 .proc  atan2f
 366 #ifdef _LIBC
 367 .proc  __atan2f
 368 __atan2f:
 369 .proc  __ieee754_atan2f
 370 __ieee754_atan2f:
 371 #endif
 372
 373
 374
 375 {     .mfi
 376      alloc      r32           = ar.pfs,1,5,4,0
 377      frcpa.s1  atan2f_Z0,p0     =    f1,f8   // Approx to 1/y
 378      nop.i  999
 379 }
 380 {     .mfi
 381      addl      atan2f_GR_Addr_1    =    @ltoff(atan2f_coef_table1),gp
 382      fma.s1    atan2f_xsq     =    f9,f9,f0
 383      nop.i  999 ;;
 384 }
 385
 386
 387 {     .mfi
 388      ld8       atan2f_GR_Addr_1    =    [atan2f_GR_Addr_1]
 389      frcpa.s1  atan2f_A0,p0     =    f1,f9   // Approx to 1/x
 390      nop.i  999
 391 }
 392 {     .mfi
 393      nop.m  999
 394      fma.s1    atan2f_ysq     =    f8,f8,f0
 395      nop.i  999 ;;
 396 }
 397
 398 {     .mfi
 399      nop.m  999
 400      fcmp.ge.s1     p8,p9  =    f9,f0  // Set p8 if x>=0, p9 if x<0
 401      nop.i  999
 402 }
 403 {     .mfi
 404      nop.m  999
 405      fma.s1    atan2f_xy     =    f9,f8,f0
 406      nop.i  999 ;;
 407 }
 408
 409
 410 {     .mfi
 411      add   atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1
 412      fmerge.s  atan2f_sgn_Y   =    f8,f1
 413      nop.i  999 ;;
 414 }
 415
 416 {     .mmf
 417      ldfpd     atan2f_coef_p1,atan2f_coef_p10 =    [atan2f_GR_Addr_1],16
 418      ldfpd     atan2f_coef_p9,atan2f_coef_p8 =    [atan2f_GR_Addr_2],16
 419      fclass.m  p10,p0 =    f9,0xe7      // Test x @inf|@snan|@qnan|@zero
 420 }
 421 ;;
 422
 423 {     .mfi
 424      ldfpd     atan2f_coef_p7,atan2f_coef_p6 =    [atan2f_GR_Addr_1],16
 425      fma.s1    atan2f_T_denom =    atan2f_Z0,atan2f_xsq,f8
 426      nop.i  999
 427 }
 428 {     .mfi
 429      ldfpd     atan2f_coef_p5,atan2f_coef_p4     =    [atan2f_GR_Addr_2],16
 430      fma.s1    atan2f_Z                      =    atan2f_Z0,f9,f0
 431      nop.i  999 ;;
 432 }
 433
 434
 435 {     .mfi
 436      ldfpd     atan2f_coef_p3,atan2f_coef_p2 =    [atan2f_GR_Addr_1],16
 437      fma.s1    atan2f_G_denom =    atan2f_A0,atan2f_ysq,f9
 438      nop.i  999
 439 }
 440 {     .mfi
 441      ldfpd     atan2f_const_piby2,atan2f_const_pi =    [atan2f_GR_Addr_2],16
 442      fma.s1    atan2f_A                           =    atan2f_A0,f8,f0
 443      nop.i  999 ;;
 444 }
 445
 446 {     .mfi
 447      ldfpd     atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2]
 448      fclass.m  p11,p0 = f8,0xe7 // Test y @inf|@snan|@qnan|@zero
 449      nop.i  999
 450 }
 451 {     .mfb
 452      nop.m  999
 453      fnma.s1   atan2f_T_numer =    atan2f_Z0,atan2f_xy,f9
 454 (p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;;   // Branch on x nan,inf,zero
 455 }
 456
 457
 458 // p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test
 459 {     .mfi
 460      nop.m  999
 461      fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq
 462      nop.i  999
 463 }
 464 {     .mfb
 465      nop.m  999
 466      fnma.s1   atan2f_G_numer =    atan2f_A0,atan2f_xy,f8
 467 (p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;;  // Branch on y nan,inf,zero
 468 }
 469
 470
 471 {     .mfi
 472      nop.m  999
 473 (p8) fma.s1    atan2f_const_1 =    atan2f_sgn_Y,f0,f0
 474      nop.i  999
 475 }
 476 {     .mfi
 477      nop.m  999
 478 (p9) fma.s1    atan2f_const_1 =    atan2f_sgn_Y,f1,f0
 479      nop.i  999 ;;
 480 }
 481
 482
 483 {     .mfi
 484      nop.m  999
 485 (p6) fnma.s1    atan2f_U       =    atan2f_Z,f1,f0
 486      nop.i  999
 487 }
 488 {     .mfi
 489      nop.m  999
 490 (p6) fma.s1    atan2f_Usq     =    atan2f_Z,atan2f_Z,f0
 491      nop.i  999 ;;
 492 }
 493
 494
 495 {     .mfi
 496      nop.m  999
 497 (p7) fma.s1    atan2f_U       =    atan2f_A,f1,f0
 498      nop.i  999
 499 }
 500 {     .mfi
 501      nop.m  999
 502 (p7) fma.s1    atan2f_Usq     =    atan2f_A,atan2f_A,f0
 503      nop.i  999 ;;
 504 }
 505
 506
 507 {     .mfi
 508      nop.m  999
 509 (p6) frcpa.s1  atan2f_Q1,p0    =    f1,atan2f_T_denom
 510      nop.i  999
 511 }
 512 {     .mfi
 513      nop.m  999
 514 (p6) fma.s1    atan2f_R_denom =   atan2f_T_denom,f1,f0
 515      nop.i  999 ;;
 516 }
 517
 518
 519 {     .mfi
 520      nop.m  999
 521 (p7) frcpa.s1  atan2f_Q1,p0    =    f1,atan2f_G_denom
 522      nop.i  999
 523 }
 524 {     .mfi
 525      nop.m  999
 526 (p7) fma.s1    atan2f_R_denom =   atan2f_G_denom,f1,f0
 527      nop.i  999 ;;
 528 }
 529
 530
 531 {     .mfi
 532      nop.m  999
 533 (p6) fnma.s1    atan2f_R_numer =   atan2f_T_numer,f1,f0
 534      nop.i  999
 535 }
 536 {     .mfi
 537      nop.m  999
 538 (p7) fma.s1    atan2f_R_numer =   atan2f_G_numer,f1,f0
 539      nop.i  999 ;;
 540 }
 541
 542
 543 {     .mfi
 544      nop.m  999
 545 (p6) fnma.s1    atan2f_p1rnum =   atan2f_T_numer,atan2f_coef_p1,f0
 546      nop.i  999 ;;
 547 }
 548 {     .mfi
 549      nop.m  999
 550 (p7) fma.s1    atan2f_p1rnum =   atan2f_G_numer,atan2f_coef_p1,f0
 551      nop.i  999 ;;
 552 }
 553
 554
 555 {     .mfi
 556      nop.m  999
 557      fma.s1    atan2f_U4 =    atan2f_Usq,atan2f_Usq,f0
 558      nop.i  999
 559 }
 560 {     .mfi
 561      nop.m  999
 562      fma.s1    atan2f_poly_u109 = atan2f_Usq,atan2f_coef_p10,atan2f_coef_p9
 563      nop.i  999 ;;
 564 }
 565
 566 {     .mfi
 567      nop.m  999
 568      fma.s1    atan2f_poly_u87 =    atan2f_Usq,atan2f_coef_p8,atan2f_coef_p7
 569      nop.i  999
 570 }
 571 {     .mfi
 572      nop.m  999
 573      fma.s1    atan2f_poly_u65 =    atan2f_Usq,atan2f_coef_p6,atan2f_coef_p5
 574      nop.i  999 ;;
 575 }
 576
 577
 578 {     .mfi
 579      nop.m  999
 580      fma.s1    atan2f_poly_u43 =    atan2f_Usq,atan2f_coef_p4,atan2f_coef_p3
 581      nop.i  999
 582 }
 583 {     .mfi
 584      nop.m  999
 585      fnma.s1   atan2f_Q_beta  =    atan2f_Q1,atan2f_R_denom,f1
 586      nop.i  999 ;;
 587 }
 588
 589
 590 {     .mfi
 591      nop.m  999
 592      fma.s1    atan2f_poly_u21 =    atan2f_Usq,atan2f_coef_p2,atan2f_coef_p1
 593      nop.i  999
 594 }
 595 {     .mfi
 596      nop.m  999
 597      fma.s1    atan2f_r  =    atan2f_Q1,atan2f_R_numer,f0
 598      nop.i  999 ;;
 599 }
 600
 601 {     .mfi
 602      nop.m  999
 603 (p6) fma.s1    atan2f_C  =    atan2f_sgn_Y,atan2f_const_piby2,f0
 604      nop.i  999
 605 }
 606 {     .mfi
 607      nop.m  999
 608 (p7) fma.s1    atan2f_C  =    atan2f_const_1,atan2f_const_pi,f0
 609      nop.i  999 ;;
 610 }
 611
 612 {     .mfi
 613      nop.m  999
 614      fma.s1    atan2f_U6 =    atan2f_U4,atan2f_Usq,f0
 615      nop.i  999
 616 }
 617 {     .mfi
 618      nop.m  999
 619      fma.s1    atan2f_U8 =    atan2f_U4,atan2f_U4,f0
 620      nop.i  999 ;;
 621 }
 622
 623 {     .mfi
 624      nop.m  999
 625      fma.s1    atan2f_poly_u10to7 = atan2f_U4,atan2f_poly_u109,atan2f_poly_u87
 626      nop.i  999
 627 }
 628 {     .mfi
 629      nop.m  999
 630      fma.s1    atan2f_pR = atan2f_p1rnum,atan2f_Q1,f0
 631      nop.i  999 ;;
 632 }
 633
 634 {     .mfi
 635      nop.m  999
 636      fma.s1    atan2f_poly_u6to3 = atan2f_U4,atan2f_poly_u65,atan2f_poly_u43
 637      nop.i  999
 638 }
 639 {     .mfi
 640      nop.m  999
 641      fma.s1    atan2f_Q2 =    atan2f_Q1,atan2f_Q_beta,atan2f_Q1
 642      nop.i  999 ;;
 643 }
 644
 645 {     .mfi
 646      nop.m  999
 647      fma.s1    atan2f_Q_beta2 =    atan2f_Q_beta,atan2f_Q_beta,f0
 648      nop.i  999
 649 }
 650 {     .mfi
 651      nop.m  999
 652      fma.s1    atan2f_rsq     =    atan2f_r,atan2f_r,f0
 653      nop.i  999 ;;
 654 }
 655
 656 {     .mfi
 657      nop.m  999
 658      fma.s1    atan2f_poly_u210 = atan2f_Usq,atan2f_poly_u21,f1
 659      nop.i  999 ;;
 660 }
 661
 662 {     .mfi
 663      nop.m 999
 664      fcmp.eq.s0 p8,p0 = f8,f9      // Dummy op to set flag on denormal inputs
 665      nop.i 999
 666 }
 667 {     .mfi
 668      nop.m  999
 669      fma.s1 atan2f_poly_u10to3 = atan2f_U8,atan2f_poly_u10to7,atan2f_poly_u6to3
 670      nop.i  999 ;;
 671 }
 672
 673 {     .mfi
 674      nop.m                 999
 675      fma.s1    atan2f_Q3 =    atan2f_Q2,atan2f_Q_beta2,atan2f_Q2
 676      nop.i                 999
 677 }
 678 {     .mfi
 679      nop.m  999
 680      fma.s1    atan2f_pRC = atan2f_rsq,atan2f_pR,atan2f_C
 681      nop.i  999 ;;
 682 }
 683
 684 {     .mfi
 685      nop.m  999
 686      fma.s1 atan2f_poly_u10to0 = atan2f_U6,atan2f_poly_u10to3,atan2f_poly_u210
 687      nop.i  999 ;;
 688 }
 689
 690 {     .mfi
 691      nop.m  999
 692      fma.s1    atan2f_pQRC = atan2f_R_numer,atan2f_Q3,atan2f_pRC
 693      nop.i  999 ;;
 694 }
 695
 696 {     .mfb
 697      nop.m  999
 698      fma.s.s0    f8 = atan2f_U,atan2f_poly_u10to0,atan2f_pQRC
 699      br.ret.sptk b0 ;;
 700 }
 701
 702
 703
 704 ATAN2F_XY_INF_NAN_ZERO:
 705
 706 { .mfi
 707       nop.m 999
 708       fclass.m   p10,p0 = f8,0xc3       // Is y nan
 709       nop.i 999
 710 }
 711 ;;
 712
 713 { .mfi
 714       nop.m 999
 715       fclass.m   p12,p0 = f9,0xc3       // Is x nan
 716       nop.i 999
 717 }
 718 ;;
 719
 720 { .mfi
 721       nop.m 999
 722       fclass.m   p6,p0 = f9,0x21        // Is x +inf
 723       nop.i 999
 724 }
 725 { .mfb
 726       nop.m 999
 727 (p10) fma.s f8  = f9,f8,f0          // Result quietized y if y is nan
 728 (p10) br.ret.spnt b0                // Exit if y is nan
 729 }
 730 ;;
 731
 732
 733 { .mfi
 734       nop.m 999
 735 (p6)  fclass.m.unc   p7,p8 = f8,0x23    // x +inf, is y inf
 736       nop.i 999
 737 }
 738 { .mfb
 739       nop.m 999
 740 (p12) fnorm.s f8 = f9               // Result quietized x if x is nan, y not nan
 741 (p12) br.ret.spnt b0                // Exit if x is nan, y not nan
 742 }
 743 ;;
 744
 745 // Here if x or y inf, or x or y zero
 746 { .mfi
 747       nop.m 999
 748       fcmp.eq.s0 p15,p0 = f8,f9     // Dummy op to set flag on denormal inputs
 749       nop.i 999
 750 }
 751 ;;
 752
 753 { .mfi
 754       nop.m 999
 755       fclass.m   p11,p12 = f9,0x22      // Is x -inf
 756       nop.i 999
 757 }
 758 { .mfb
 759       nop.m 999
 760 (p7)  fma.s f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
 761 (p7)  br.ret.spnt b0            // Exit if x +inf and y inf
 762 }
 763 ;;
 764
 765 { .mfb
 766       nop.m 999
 767 (p8)  fmerge.s   f8 = f8,f0     // If x +inf and y not inf, result +-0
 768 (p8)  br.ret.spnt b0            // Exit if x +inf and y not inf
 769 }
 770 ;;
 771
 772 { .mfi
 773       nop.m 999
 774 (p12) fclass.m.unc   p13,p0 = f8,0x23   // x not -inf, is y inf
 775       nop.i 999
 776 }
 777 ;;
 778
 779 { .mfi
 780       nop.m 999
 781 (p11) fclass.m.unc   p14,p15 = f8,0x23  // x -inf, is y inf
 782       nop.i 999
 783 }
 784 ;;
 785
 786 { .mfi
 787       nop.m 999
 788       fclass.m  p6,p7 = f9,0x7  // Is x zero
 789       nop.i 999
 790 }
 791 { .mfb
 792       nop.m 999
 793 (p13) fma.s   f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
 794 (p13) br.ret.spnt b0           // Exit if x not -inf and y inf
 795 }
 796 ;;
 797
 798 { .mfi
 799       nop.m 999
 800 (p14) fma.s   f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
 801       nop.i 999
 802 }
 803 { .mfb
 804       nop.m 999
 805 (p15) fma.s   f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
 806 (p11) br.ret.spnt b0           // Exit if x -inf
 807 }
 808 ;;
 809
 810 // Here if x or y zero
 811 { .mfi
 812       nop.m 999
 813 (p7)  fclass.m.unc   p8,p9 = f9,0x19    // x not zero, y zero, is x > zero
 814       nop.i 999
 815 }
 816 ;;
 817
 818 { .mfi
 819       nop.m 999
 820 (p6)  fclass.m.unc   p10,p11 = f8,0x7   // x zero, is y zero
 821       nop.i 999
 822 }
 823 ;;
 824
 825 { .mfi
 826       nop.m 999
 827 (p8)  fmerge.s  f8 = f8, f0  // x > zero and y zero, result is +-zero
 828       nop.i 999
 829 }
 830 { .mfb
 831       nop.m 999
 832 (p9)  fma.s  f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
 833 (p10) br.cond.spnt   __libm_error_region // Branch if x zero and y zero
 834 }
 835 ;;
 836
 837 { .mfb
 838       nop.m 999
 839 (p11) fma.s  f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
 840       br.ret.sptk b0         // Final special case exit
 841 }
 842 ;;
 843
 844
 845 .endp atan2f
 846 ASM_SIZE_DIRECTIVE(atan2f)
 847
 848
 849 .proc __libm_error_region
 850 __libm_error_region:
 851 .prologue
 852          mov            GR_Parameter_TAG      = 38
 853          fclass.m       p10,p11               = f9,0x5  // @zero | @pos
 854 ;;
 855 (p10)    fmerge.s       f10                   = f8, f0
 856 (p11)    fma.s          f10                   = atan2f_sgn_Y, atan2f_const_pi,f0
 857 ;;
 858
 859 { .mfi
 860         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
 861         nop.f 999
 862 .save   ar.pfs,GR_SAVE_PFS
 863         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 864 }
 865
 866 { .mfi
 867 .fframe 64
 868         add sp=-64,sp                           // Create new stack
 869         nop.f 0
 870         mov GR_SAVE_GP=gp                       // Save gp
 871 }
 872 ;;
 873
 874 { .mmi
 875         stfs [GR_Parameter_Y] = f9,16         // Store Parameter 2 on stack
 876         add GR_Parameter_X = 16,sp              // Parameter 1 address
 877 .save   b0, GR_SAVE_B0
 878         mov GR_SAVE_B0=b0                       // Save b0
 879 }
 880 ;;
 881
 882
 883 .body
 884 { .mib
 885         stfs [GR_Parameter_X] = f8            // Store Parameter 1 on stack
 886         add   GR_Parameter_RESULT = 0,GR_Parameter_Y
 887         nop.b 0                                 // Parameter 3 address
 888 }
 889 { .mib
 890         stfs [GR_Parameter_Y] = f10       // Store Parameter 3 on stack
 891         add   GR_Parameter_Y = -16,GR_Parameter_Y
 892         br.call.sptk b0=__libm_error_support#   // Call error handling function
 893 }
 894 ;;
 895 { .mmi
 896         nop.m 0
 897         nop.m 0
 898         add   GR_Parameter_RESULT = 48,sp
 899 };;
 900
 901 { .mmi
 902         ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 903 .restore sp
 904         add   sp = 64,sp                       // Restore stack pointer
 905         mov   b0 = GR_SAVE_B0                  // Restore return address
 906 }
 907 ;;
 908
 909 { .mib
 910         mov   gp = GR_SAVE_GP                  // Restore gp
 911         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
 912         br.ret.sptk     b0                     // Return
 913 }
 914 ;;
 915
 916 .endp __libm_error_region
 917 ASM_SIZE_DIRECTIVE(__libm_error_region)
 918
 919 .type   __libm_error_support#,@function
 920 .global __libm_error_support#