sysdeps/ia64/fpu/s_tanf.S

   1 .file "tancotf.s"
   2
   3
   4 // Copyright (c) 2000 - 2005, Intel Corporation
   5 // All rights reserved.
   6 //
   7 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
   8 //
   9 // Redistribution and use in source and binary forms, with or without
  10 // modification, are permitted provided that the following conditions are
  11 // met:
  12 //
  13 // * Redistributions of source code must retain the above copyright
  14 // notice, this list of conditions and the following disclaimer.
  15 //
  16 // * Redistributions in binary form must reproduce the above copyright
  17 // notice, this list of conditions and the following disclaimer in the
  18 // documentation and/or other materials provided with the distribution.
  19 //
  20 // * The name of Intel Corporation may not be used to endorse or promote
  21 // products derived from this software without specific prior written
  22 // permission.
  23
  24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35 //
  36 // Intel Corporation is the author of this code, and requests that all
  37 // problem reports or change requests be submitted to it directly at
  38 // http://www.intel.com/software/products/opensource/libraries/num.htm.
  39 //
  40 // History
  41 //==============================================================
  42 // 02/02/00 Initial version
  43 // 04/04/00 Unwind support added
  44 // 12/27/00 Improved speed
  45 // 02/21/01 Updated to call tanl
  46 // 05/30/02 Improved speed, added cotf.
  47 // 11/25/02 Added explicit completer on fnorm
  48 // 02/10/03 Reordered header: .section, .global, .proc, .align
  49 // 04/17/03 Eliminated redundant stop bits
  50 // 03/31/05 Reformatted delimiters between data tables
  51 //
  52 // APIs
  53 //==============================================================
  54 // float tanf(float)
  55 // float cotf(float)
  56 //
  57 // Algorithm Description for tanf
  58 //==============================================================
  59 // The tanf function computes the principle value of the tangent of x,
  60 // where x is radian argument.
  61 //
  62 // There are 5 paths:
  63 // 1. x = +/-0.0
  64 //    Return tanf(x) = +/-0.0
  65 //
  66 // 2. x = [S,Q]NaN
  67 //    Return tanf(x) = QNaN
  68 //
  69 // 3. x = +/-Inf
  70 //    Return tanf(x) = QNaN
  71 //
  72 // 4. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is even, |r|<Pi/4
  73 //    Return tanf(x) = P19(r) = A1*r + A3*r^3 + A5*r^5 + ... + A19*r^19 =
  74 //    = r*(A1 + A3*t + A5*t^2 + ... + A19*t^9) = r*P9(t), where t = r^2
  75 //
  76 // 5. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is odd, |r|<Pi/4
  77 //    Return tanf(x) = -1/r + P11(r) = -1/r + B1*r + B3*r^3 + ... + B11*r^11 =
  78 //    = -1/r + r*(B1 + B3*t + B5*t^2 + ... + B11*t^5) = -1/r + r*P11(t),
  79 //    where t = r^2
  80 //
  81 // Algorithm Description for cotf
  82 //==============================================================
  83 // The cotf function computes the principle value of the cotangent of x,
  84 // where x is radian argument.
  85 //
  86 // There are 5 paths:
  87 // 1. x = +/-0.0
  88 //    Return cotf(x) = +/-Inf and error handling is called
  89 //
  90 // 2. x = [S,Q]NaN
  91 //    Return cotf(x) = QNaN
  92 //
  93 // 3. x = +/-Inf
  94 //    Return cotf(x) = QNaN
  95 //
  96 // 4. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is odd, |r|<Pi/4
  97 //    Return cotf(x) = P19(-r) = A1*(-r) + A3*(-r^3) + ... + A19*(-r^19) =
  98 //    = -r*(A1 + A3*t + A5*t^2 + ... + A19*t^9) = -r*P9(t), where t = r^2
  99 //
 100 // 5. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is even, |r|<Pi/4
 101 //    Return cotf(x) = 1/r + P11(-r) = 1/r + B1*(-r) + ... + B11*(-r^11) =
 102 //    = 1/r - r*(B1 + B3*t + B5*t^2 + ... + B11*t^5) = 1/r - r*P11(t),
 103 //    where t = r^2
 104 //
 105 //    We set p10 and clear p11 if computing tanf, vice versa for cotf.
 106 //
 107 //
 108 // Registers used
 109 //==============================================================
 110 // Floating Point registers used:
 111 // f8, input
 112 // f32 -> f80
 113 //
 114 // General registers used:
 115 // r14 -> r23, r32 -> r39
 116 //
 117 // Predicate registers used:
 118 // p6 -> p13
 119 //
 120 // Assembly macros
 121 //==============================================================
 122 // integer registers
 123 rExp                        = r14
 124 rSignMask                   = r15
 125 rRshf                       = r16
 126 rScFctrExp                  = r17
 127 rIntN                       = r18
 128 rSigRcpPiby2                = r19
 129 rScRshf                     = r20
 130 rCoeffA                     = r21
 131 rCoeffB                     = r22
 132 rExpCut                     = r23
 133
 134 GR_SAVE_B0                  = r33
 135 GR_SAVE_PFS                 = r34
 136 GR_SAVE_GP                  = r35
 137 GR_Parameter_X              = r36
 138 GR_Parameter_Y              = r37
 139 GR_Parameter_RESULT         = r38
 140 GR_Parameter_Tag            = r39
 141
 142 //==============================================================
 143 // floating point registers
 144 fScRcpPiby2                 = f32
 145 fScRshf                     = f33
 146 fNormArg                    = f34
 147 fScFctr                     = f35
 148 fRshf                       = f36
 149 fShiftedN                   = f37
 150 fN                          = f38
 151 fR                          = f39
 152 fA01                        = f40
 153 fA03                        = f41
 154 fA05                        = f42
 155 fA07                        = f43
 156 fA09                        = f44
 157 fA11                        = f45
 158 fA13                        = f46
 159 fA15                        = f47
 160 fA17                        = f48
 161 fA19                        = f49
 162 fB01                        = f50
 163 fB03                        = f51
 164 fB05                        = f52
 165 fB07                        = f53
 166 fB09                        = f54
 167 fB11                        = f55
 168 fA03_01                     = f56
 169 fA07_05                     = f57
 170 fA11_09                     = f58
 171 fA15_13                     = f59
 172 fA19_17                     = f60
 173 fA11_05                     = f61
 174 fA19_13                     = f62
 175 fA19_05                     = f63
 176 fRbyA03_01                  = f64
 177 fB03_01                     = f65
 178 fB07_05                     = f66
 179 fB11_09                     = f67
 180 fB11_05                     = f68
 181 fRbyB03_01                  = f69
 182 fRbyB11_01                  = f70
 183 fRp2                        = f71
 184 fRp4                        = f72
 185 fRp8                        = f73
 186 fRp5                        = f74
 187 fY0                         = f75
 188 fY1                         = f76
 189 fD                          = f77
 190 fDp2                        = f78
 191 fInvR                       = f79
 192 fPiby2                      = f80
 193 //==============================================================
 194
 195
 196 RODATA
 197 .align 16
 198
 199 LOCAL_OBJECT_START(coeff_A)
 200 data8 0x3FF0000000000000 // A1  = 1.00000000000000000000e+00
 201 data8 0x3FD5555556BCE758 // A3  = 3.33333334641442641606e-01
 202 data8 0x3FC111105C2DAE48 // A5  = 1.33333249100689099175e-01
 203 data8 0x3FABA1F876341060 // A7  = 5.39701122561673229739e-02
 204 data8 0x3F965FB86D12A38D // A9  = 2.18495194027670719750e-02
 205 data8 0x3F8265F62415F9D6 // A11 = 8.98353860497717439465e-03
 206 data8 0x3F69E3AE64CCF58D // A13 = 3.16032468108912746342e-03
 207 data8 0x3F63920D09D0E6F6 // A15 = 2.38897844840557235331e-03
 208 LOCAL_OBJECT_END(coeff_A)
 209
 210 LOCAL_OBJECT_START(coeff_B)
 211 data8 0xC90FDAA22168C235, 0x3FFF // pi/2
 212 data8 0x3FD55555555358DB // B1  = 3.33333333326107426583e-01
 213 data8 0x3F96C16C252F643F // B3  = 2.22222230621336129239e-02
 214 data8 0x3F61566243AB3C60 // B5  = 2.11638633968606896785e-03
 215 data8 0x3F2BC1169BD4438B // B7  = 2.11748132564551094391e-04
 216 data8 0x3EF611B4CEA056A1 // B9  = 2.10467959860990200942e-05
 217 data8 0x3EC600F9E32194BF // B11 = 2.62305891234274186608e-06
 218 data8 0xBF42BA7BCC177616 // A17 =-5.71546981685324877205e-04
 219 data8 0x3F4F2614BC6D3BB8 // A19 = 9.50584530849832782542e-04
 220 LOCAL_OBJECT_END(coeff_B)
 221
 222
 223 .section .text
 224
 225 LOCAL_LIBM_ENTRY(cotf)
 226
 227 { .mlx
 228       getf.exp  rExp        = f8                    // ***** Get 2ˆ17 * s + E
 229       movl      rSigRcpPiby2= 0xA2F9836E4E44152A    // significand of 2/Pi
 230 }
 231 { .mlx
 232       addl      rCoeffA     = @ltoff(coeff_A), gp
 233       movl      rScRshf     = 0x47e8000000000000    // 1.5*2^(63+63+1)
 234 }
 235 ;;
 236
 237 { .mfi
 238       alloc     r32         = ar.pfs, 0, 4, 4, 0
 239       fclass.m  p9, p0      = f8, 0xc3              // Test for x=nan
 240       cmp.eq    p11, p10    = r0, r0                // if p11=1 we compute cotf
 241 }
 242 { .mib
 243       ld8       rCoeffA     = [rCoeffA]
 244       mov       rExpCut     = 0x10009               // cutoff for exponent
 245       br.cond.sptk Common_Path
 246 }
 247 ;;
 248
 249 LOCAL_LIBM_END(cotf)
 250
 251
 252 GLOBAL_IEEE754_ENTRY(tanf)
 253
 254 { .mlx
 255       getf.exp  rExp        = f8                    // ***** Get 2ˆ17 * s + E
 256       movl      rSigRcpPiby2= 0xA2F9836E4E44152A    // significand of 2/Pi
 257 }
 258 { .mlx
 259       addl      rCoeffA     = @ltoff(coeff_A), gp
 260       movl      rScRshf     = 0x47e8000000000000    // 1.5*2^(63+63+1)
 261 }
 262 ;;
 263
 264 { .mfi
 265       alloc     r32         = ar.pfs, 0, 4, 4, 0
 266       fclass.m  p9, p0      = f8, 0xc3              // Test for x=nan
 267       cmp.eq    p10, p11    = r0, r0                // if p10=1 we compute tandf
 268 }
 269 { .mib
 270       ld8       rCoeffA     = [rCoeffA]
 271       mov       rExpCut     = 0x10009               // cutoff for exponent
 272       nop.b     0
 273 }
 274 ;;
 275
 276 // Below is common path for both tandf and cotdf
 277 Common_Path:
 278 { .mfi
 279       setf.sig  fScRcpPiby2 = rSigRcpPiby2          // 2^(63+1)*(2/Pi)
 280       fclass.m  p8, p0      = f8, 0x23              // Test for x=inf
 281       mov       rSignMask   = 0x1ffff               // mask for sign bit
 282 }
 283 { .mlx
 284       setf.d    fScRshf     = rScRshf               // 1.5*2^(63+63+1)
 285       movl      rRshf       = 0x43e8000000000000    // 1.5 2^63 for right shift
 286 }
 287 ;;
 288
 289 { .mfi
 290       and       rSignMask   = rSignMask, rExp       // clear sign bit
 291 (p10) fclass.m.unc p7, p0   = f8, 0x07              // Test for x=0 (for tanf)
 292       mov       rScFctrExp  = 0xffff-64             // exp of scaling factor
 293 }
 294 { .mfb
 295       adds      rCoeffB     = coeff_B - coeff_A, rCoeffA
 296 (p9)  fma.s.s0  f8          = f8, f1, f8            // Set qnan if x=nan
 297 (p9)  br.ret.spnt b0                                // Exit for x=nan
 298 }
 299 ;;
 300
 301 { .mfi
 302       cmp.ge    p6, p0      = rSignMask, rExpCut    // p6 = (E => 0x10009)
 303 (p8)  frcpa.s0  f8, p0      = f0, f0                // Set qnan indef if x=inf
 304       mov GR_Parameter_Tag  = 227                   // (cotf)
 305 }
 306 { .mbb
 307       ldfe      fPiby2      = [rCoeffB], 16
 308 (p8)  br.ret.spnt b0                                // Exit for x=inf
 309 (p6)  br.cond.spnt Huge_Argument                    // Branch if |x|>=2^10
 310 }
 311 ;;
 312
 313 { .mfi
 314       nop.m     0
 315 (p11) fclass.m.unc p6, p0   = f8, 0x07              // Test for x=0 (for cotf)
 316       nop.i     0
 317 }
 318 { .mfb
 319       nop.m     0
 320       fnorm.s0  fNormArg    = f8
 321 (p7)  br.ret.spnt b0                                // Exit for x=0 (for tanf)
 322 }
 323 ;;
 324
 325 { .mmf
 326       ldfpd     fA01, fA03  = [rCoeffA], 16
 327       ldfpd     fB01, fB03  = [rCoeffB], 16
 328       fmerge.s  f10         = f8, f8                // Save input for error call
 329 }
 330 ;;
 331
 332 { .mmf
 333       setf.exp  fScFctr     = rScFctrExp            // get as real
 334       setf.d    fRshf       = rRshf                 // get right shifter as real
 335 (p6)  frcpa.s0  f8, p0      = f1, f8                // cotf(+-0) = +-Inf
 336 }
 337 ;;
 338
 339 { .mmb
 340       ldfpd     fA05, fA07  = [rCoeffA], 16
 341       ldfpd     fB05, fB07  = [rCoeffB], 16
 342 (p6)  br.cond.spnt __libm_error_region    // call error support if cotf(+-0)
 343 }
 344 ;;
 345
 346 { .mmi
 347       ldfpd     fA09, fA11  = [rCoeffA], 16
 348       ldfpd     fB09, fB11  = [rCoeffB], 16
 349       nop.i     0
 350 }
 351 ;;
 352
 353 { .mfi
 354       nop.m     0
 355       fma.s1    fShiftedN = fNormArg,fScRcpPiby2,fScRshf // x*2^70*(2/Pi)+ScRshf
 356       nop.i     0
 357 }
 358 ;;
 359
 360 { .mfi
 361       nop.m     0
 362       fms.s1    fN          = fShiftedN, fScFctr, fRshf // N = Y*2^(-70) - Rshf
 363       nop.i     0
 364 }
 365 ;;
 366
 367 .pred.rel "mutex", p10, p11
 368 { .mfi
 369       getf.sig  rIntN       = fShiftedN             // get N as integer
 370 (p10) fnma.s1   fR          = fN, fPiby2, fNormArg  // R = x - (Pi/2)*N (tanf)
 371       nop.i     0
 372 }
 373 { .mfi
 374       nop.m     0
 375 (p11) fms.s1    fR          = fN, fPiby2, fNormArg  // R = (Pi/2)*N - x (cotf)
 376       nop.i     0
 377 }
 378 ;;
 379
 380 { .mmi
 381       ldfpd     fA13, fA15  = [rCoeffA], 16
 382       ldfpd     fA17, fA19  = [rCoeffB], 16
 383       nop.i     0
 384 }
 385 ;;
 386
 387 Return_From_Huges:
 388 { .mfi
 389       nop.m     0
 390       fma.s1    fRp2        = fR, fR, f0            // R^2
 391 (p11) add       rIntN       = 0x1, rIntN            // N = N + 1 (cotf)
 392 }
 393 ;;
 394
 395 { .mfi
 396       nop.m     0
 397       frcpa.s1  fY0, p0     = f1, fR                // Y0 ~ 1/R
 398       tbit.z    p8, p9      = rIntN, 0              // p8=1 if N is even
 399 }
 400 ;;
 401
 402 // Below are mixed polynomial calculations (mixed for even and odd N)
 403 { .mfi
 404       nop.m     0
 405 (p9)  fma.s1    fB03_01     = fRp2, fB03, fB01      // R^2*B3 + B1
 406       nop.i     0
 407 }
 408 { .mfi
 409       nop.m     0
 410       fma.s1    fRp4        = fRp2, fRp2, f0        // R^4
 411       nop.i     0
 412 }
 413 ;;
 414
 415 { .mfi
 416       nop.m     0
 417 (p8)  fma.s1    fA15_13     = fRp2, fA15, fA13      // R^2*A15 + A13
 418       nop.i     0
 419 }
 420 { .mfi
 421       nop.m     0
 422 (p8)  fma.s1    fA19_17     = fRp2, fA19, fA17      // R^2*A19 + A17
 423       nop.i     0
 424 }
 425 ;;
 426
 427 { .mfi
 428       nop.m     0
 429 (p8)  fma.s1    fA07_05     = fRp2, fA07, fA05      // R^2*A7 + A5
 430       nop.i     0
 431 }
 432 { .mfi
 433       nop.m     0
 434 (p8)  fma.s1    fA11_09     = fRp2, fA11, fA09      // R^2*A11 + A9
 435       nop.i     0
 436 }
 437 ;;
 438
 439 { .mfi
 440       nop.m     0
 441 (p9)  fma.s1    fB07_05     = fRp2, fB07, fB05      // R^2*B7 + B5
 442       nop.i     0
 443 }
 444 { .mfi
 445       nop.m     0
 446 (p9)  fma.s1    fB11_09     = fRp2, fB11, fB09      // R^2*B11 + B9
 447       nop.i     0
 448 }
 449 ;;
 450
 451 { .mfi
 452       nop.m     0
 453 (p9)  fnma.s1   fD          = fR, fY0, f1           // D = 1 - R*Y0
 454       nop.i     0
 455 }
 456 { .mfi
 457       nop.m     0
 458 (p8)  fma.s1    fA03_01     = fRp2, fA03, fA01      // R^2*A3 + A1
 459       nop.i     0
 460 }
 461 ;;
 462
 463 { .mfi
 464       nop.m     0
 465       fma.s1    fRp8        = fRp4, fRp4, f0        // R^8
 466       nop.i     0
 467 }
 468 { .mfi
 469       nop.m     0
 470       fma.s1    fRp5        = fR, fRp4, f0          // R^5
 471       nop.i     0
 472 }
 473 ;;
 474
 475 { .mfi
 476       nop.m     0
 477 (p8)  fma.s1    fA11_05     = fRp4, fA11_09, fA07_05 // R^4*(R^2*A11 + A9) + ...
 478       nop.i     0
 479 }
 480 { .mfi
 481       nop.m     0
 482 (p8)  fma.s1    fA19_13     = fRp4, fA19_17, fA15_13 // R^4*(R^2*A19 + A17) + ..
 483       nop.i     0
 484 }
 485 ;;
 486
 487 { .mfi
 488       nop.m     0
 489 (p9)  fma.s1    fB11_05     = fRp4, fB11_09, fB07_05 // R^4*(R^2*B11 + B9) + ...
 490       nop.i     0
 491 }
 492 { .mfi
 493       nop.m     0
 494 (p9)  fma.s1    fRbyB03_01  = fR, fB03_01, f0       // R*(R^2*B3 + B1)
 495       nop.i     0
 496 }
 497 ;;
 498
 499 { .mfi
 500       nop.m     0
 501 (p9)  fma.s1    fY1         = fY0, fD, fY0          // Y1 = Y0*D + Y0
 502       nop.i     0
 503 }
 504 { .mfi
 505       nop.m     0
 506 (p9)  fma.s1    fDp2        = fD, fD, f0            // D^2
 507       nop.i     0
 508 }
 509 ;;
 510
 511 { .mfi
 512       nop.m     0
 513    // R^8*(R^6*A19 + R^4*A17 + R^2*A15 + A13) + R^6*A11 + R^4*A9 + R^2*A7 + A5
 514 (p8)  fma.d.s1  fA19_05     = fRp8, fA19_13, fA11_05
 515       nop.i     0
 516 }
 517 { .mfi
 518       nop.m     0
 519 (p8)  fma.d.s1  fRbyA03_01  = fR, fA03_01, f0       // R*(R^2*A3 + A1)
 520       nop.i     0
 521 }
 522 ;;
 523
 524 { .mfi
 525       nop.m     0
 526 (p9)  fma.d.s1  fInvR       = fY1, fDp2, fY1        // 1/R = Y1*D^2 + Y1
 527       nop.i     0
 528 }
 529 { .mfi
 530       nop.m     0
 531    // R^5*(R^6*B11 + R^4*B9 + R^2*B7 + B5) + R^3*B3 + R*B1
 532 (p9)  fma.d.s1  fRbyB11_01  = fRp5, fB11_05, fRbyB03_01
 533       nop.i     0
 534 }
 535 ;;
 536
 537 .pred.rel "mutex", p8, p9
 538 { .mfi
 539       nop.m     0
 540    // Result = R^5*(R^14*A19 + R^12*A17 + R^10*A15 + ...) + R^3*A3 + R*A1
 541 (p8)  fma.s.s0  f8          = fRp5, fA19_05, fRbyA03_01
 542       nop.i 0
 543 }
 544 { .mfb
 545       nop.m     0
 546    // Result = -1/R + R^11*B11 + R^9*B9 + R^7*B7 + R^5*B5 + R^3*B3 + R*B1
 547 (p9)  fnma.s.s0 f8          = f1, fInvR, fRbyB11_01
 548       br.ret.sptk b0                                // exit for main path
 549 }
 550 ;;
 551
 552 GLOBAL_IEEE754_END(tanf)
 553
 554
 555 LOCAL_LIBM_ENTRY(__libm_callout)
 556 Huge_Argument:
 557 .prologue
 558
 559 { .mfi
 560       nop.m 0
 561       fmerge.s f9 = f0,f0
 562 .save ar.pfs,GR_SAVE_PFS
 563       mov  GR_SAVE_PFS=ar.pfs
 564 }
 565 ;;
 566
 567 { .mfi
 568       mov GR_SAVE_GP=gp
 569       nop.f 0
 570 .save b0, GR_SAVE_B0
 571       mov GR_SAVE_B0=b0
 572 }
 573
 574 .body
 575 { .mmb
 576       nop.m 999
 577       nop.m 999
 578 (p10) br.cond.sptk.many  call_tanl ;;
 579 }
 580
 581 // Here if we should call cotl (p10=0, p11=1)
 582 { .mmb
 583       nop.m 999
 584       nop.m 999
 585       br.call.sptk.many  b0=__libm_cotl# ;;
 586 }
 587
 588 { .mfi
 589       mov gp        = GR_SAVE_GP
 590       fnorm.s.s0 f8 = f8
 591       mov b0        = GR_SAVE_B0
 592 }
 593 ;;
 594
 595 { .mib
 596       nop.m 999
 597       mov ar.pfs    = GR_SAVE_PFS
 598       br.ret.sptk     b0
 599 ;;
 600 }
 601
 602 // Here if we should call tanl (p10=1, p11=0)
 603 call_tanl:
 604 { .mmb
 605       nop.m 999
 606       nop.m 999
 607       br.call.sptk.many  b0=__libm_tanl# ;;
 608 }
 609
 610 { .mfi
 611       mov gp        = GR_SAVE_GP
 612       fnorm.s.s0 f8 = f8
 613       mov b0        = GR_SAVE_B0
 614 }
 615 ;;
 616
 617 { .mib
 618       nop.m 999
 619       mov ar.pfs    = GR_SAVE_PFS
 620       br.ret.sptk     b0
 621 ;;
 622 }
 623
 624 LOCAL_LIBM_END(__libm_callout)
 625
 626 .type __libm_tanl#,@function
 627 .global __libm_tanl#
 628 .type __libm_cotl#,@function
 629 .global __libm_cotl#
 630
 631
 632 LOCAL_LIBM_ENTRY(__libm_error_region)
 633 .prologue
 634
 635 // (1)
 636 { .mfi
 637       add           GR_Parameter_Y=-32,sp        // Parameter 2 value
 638       nop.f         0
 639 .save   ar.pfs,GR_SAVE_PFS
 640       mov           GR_SAVE_PFS=ar.pfs           // Save ar.pfs
 641 }
 642 { .mfi
 643 .fframe 64
 644       add sp=-64,sp                              // Create new stack
 645       nop.f 0
 646       mov GR_SAVE_GP=gp                          // Save gp
 647 };;
 648
 649 // (2)
 650 { .mmi
 651       stfs [GR_Parameter_Y] = f1,16              // STORE Parameter 2 on stack
 652       add GR_Parameter_X = 16,sp                 // Parameter 1 address
 653 .save   b0, GR_SAVE_B0
 654       mov GR_SAVE_B0=b0                          // Save b0
 655 };;
 656
 657 .body
 658 // (3)
 659 { .mib
 660       stfs [GR_Parameter_X] = f10                // STORE Parameter 1 on stack
 661       add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
 662       nop.b 0
 663 }
 664 { .mib
 665       stfs [GR_Parameter_Y] = f8                 // STORE Parameter 3 on stack
 666       add   GR_Parameter_Y = -16,GR_Parameter_Y
 667       br.call.sptk b0=__libm_error_support#      // Call error handling function
 668 };;
 669 { .mmi
 670       nop.m 0
 671       nop.m 0
 672       add   GR_Parameter_RESULT = 48,sp
 673 };;
 674
 675 // (4)
 676 { .mmi
 677       ldfs  f8 = [GR_Parameter_RESULT]           // Get return result off stack
 678 .restore sp
 679       add   sp = 64,sp                           // Restore stack pointer
 680       mov   b0 = GR_SAVE_B0                      // Restore return address
 681 };;
 682 { .mib
 683       mov   gp = GR_SAVE_GP                      // Restore gp
 684       mov   ar.pfs = GR_SAVE_PFS                 // Restore ar.pfs
 685       br.ret.sptk     b0                         // Return
 686 };;
 687
 688 LOCAL_LIBM_END(__libm_error_region)
 689
 690 .type   __libm_error_support#,@function
 691 .global __libm_error_support#
 692