sysdeps/ia64/fpu/s_tanhf.S

   1 .file "tanhf.s"
   2
   3
   4 // Copyright (c) 2001 - 2005, Intel Corporation
   5 // All rights reserved.
   6 //
   7 // Contributed 2001 by the Intel Numerics Group, Intel Corporation
   8 //
   9 // Redistribution and use in source and binary forms, with or without
  10 // modification, are permitted provided that the following conditions are
  11 // met:
  12 //
  13 // * Redistributions of source code must retain the above copyright
  14 // notice, this list of conditions and the following disclaimer.
  15 //
  16 // * Redistributions in binary form must reproduce the above copyright
  17 // notice, this list of conditions and the following disclaimer in the
  18 // documentation and/or other materials provided with the distribution.
  19 //
  20 // * The name of Intel Corporation may not be used to endorse or promote
  21 // products derived from this software without specific prior written
  22 // permission.
  23
  24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35 //
  36 // Intel Corporation is the author of this code, and requests that all
  37 // problem reports or change requests be submitted to it directly at
  38 // http://www.intel.com/software/products/opensource/libraries/num.htm.
  39 //
  40 // History
  41 //==============================================================
  42 // 05/30/01 Initial version
  43 // 05/20/02 Cleaned up namespace and sf0 syntax
  44 // 02/10/03 Reordered header: .section, .global, .proc, .align
  45 // 03/31/05 Reformatted delimiters between data tables
  46 //
  47 // API
  48 //==============================================================
  49 // float tanhf(float)
  50 //
  51 // Overview of operation
  52 //==============================================================
  53 // Background
  54 //
  55 //
  56 // There are 9 paths:
  57 // 1. x = +/-0.0
  58 //    Return tanhf(x) = +/-0.0
  59 //
  60 // 2. 0.0 < |x| < 0.3125
  61 //    Return tanhf(x) = x + x^3*Pol3(x^2),
  62 //    where Pol3(x^2) = C3*x^6 + C2*x^4 + C1*x^2 + C0
  63 //
  64 // 3. 0.3125 <= |x| < 8.0
  65 //    Return tanhf(x) = sign(x)*PolD(x)*PolC(|x|) + sign(x)*PolA(|x|),
  66 //    where sign(x)*PolD(x) = sign(x)*(|x|^7 + D2*x^6 + D1*|x|^5 + D0*x^4),
  67 //          PolC(|x|) = B0*x^4 + C3*|x|^3 + C2*|x|^2 + C1*|x| + C0,
  68 //          PolA(|x|) = A3|x|^3 + A2*x^2 + A1*|x| + A0
  69 //
  70 //    Actually range 0.3125<=|x|< 8.0 is split to 5 subranges.
  71 //    For each subrange there is particular set of coefficients.
  72 //    Below is the list of subranges:
  73 //    3.1 0.3125 <= |x| < 0.5
  74 //    3.2 0.5 <= |x| < 1.0
  75 //    3.3 1.0 <= |x| < 2.0
  76 //    3.4 2.0 <= |x| < 4.0
  77 //    3.5 4.0 <= |x| < 8.0
  78 //
  79 // 4. 8.0 <= |x| < 9.125
  80 //    Return tanhf(x) = sign(x)*(A3|x|^3 + A2*x^2 + A1*|x| + A0)
  81 //
  82 // 5. 9.125 <= |x| < +INF
  83 //    Return tanhf(x) = sign(x)*(1.0d - 2^(-52))
  84 //
  85 // 6. |x| = INF
  86 //    Return tanhf(x) = sign(x) * 1.0
  87 //
  88 // 7. x = [S,Q]NaN
  89 //    Return tanhf(x) = QNaN
  90 //
  91 // 8. x is positive denormal
  92 //    Return tanhf(x) = x - x^2
  93 //
  94 // 9. x is negative denormal
  95 //    Return tanhf(x) = x + x^2
  96 //
  97 // Registers used
  98 //==============================================================
  99 // Floating Point registers used:
 100 // f8, input
 101 // f32 -> f59
 102
 103 // General registers used:
 104 // r32 -> r46, r2, r3
 105
 106 // Predicate registers used:
 107 // p0, p6 -> p15
 108
 109 // p6           to filter out case when x = [Q,S]NaN or +/-0
 110 // p7           to filter out case when x = denormal
 111 // p8           set if |x| >= 0.3125, used also to process denormal input
 112 // p9           to filter out case when |x| = inf
 113 // p10          to filter out case when |x| < 0.3125
 114 // p11          to filter out case when 0.3125 <= |x| < 9.125
 115 // p12          to filter out case when |x| >= 9.125
 116 // p13          to filter out case when 8.0 <= |x| < 9.125
 117 // p14          set to 1 for positive x
 118 // p15          set to 1 for negative x
 119
 120 // Assembly macros
 121 //==============================================================
 122 rDataPtr           = r2
 123 rDataPtr1          = r3
 124
 125 rBias              = r33
 126 rCoeffAddr3        = r34
 127 rNearSaturation    = r35
 128 rCoeffAddr1        = r36
 129 rCoeffAddr2        = r37
 130 rOffset2           = r38
 131 rBias2             = r39
 132 rMask              = r40
 133 rArg               = r41
 134 rBound             = r42
 135 rSignBit           = r43
 136 rAbsArg            = r44
 137 rDataPtr2          = r45
 138 rSaturation        = r46
 139
 140 //==============================================================
 141 fA0                = f32
 142 fA1                = f33
 143 fA2                = f34
 144 fA3                = f35
 145 fC0                = f36
 146 fC1                = f37
 147 fC2                = f38
 148 fC3                = f39
 149 fD0                = f40
 150 fD1                = f41
 151 fD2                = f42
 152 fB0                = f43
 153 fArgSqr            = f44
 154 fAbsArg            = f45
 155 fSignumX           = f46
 156 fArg4              = f47
 157 fArg4Sgn           = f48
 158 fArg3              = f49
 159 fArg3Sgn           = f50
 160 fArg7Sgn           = f51
 161 fArg6Sgn           = f52
 162 fPolC              = f53
 163 fPolCTmp           = f54
 164 fPolA              = f55
 165 fPolATmp           = f56
 166 fPolD              = f57
 167 fPolDTmp           = f58
 168 fArgSqrSgn         = f59
 169
 170 // Data tables
 171 //==============================================================
 172
 173 RODATA
 174
 175 .align 16
 176
 177 LOCAL_OBJECT_START(tanhf_data)
 178 // Polynomial coefficients for the tanh(x), 0.3125 <= |x| < 0.5
 179 data8 0x3F9BEEDFDD177D7B // C0
 180 data8 0x3F970D10C7F32458 // C1
 181 data8 0x3F766D6B051F3A38 // C2
 182 data8 0xBF732F2001B23402 // C3
 183 data8 0xBF854BE1CE1ED499 // D0
 184 data8 0x4013C944F3999A16 // D1
 185 data8 0xC01106C6975222C0 // D2
 186 data8 0x3F783D5ACCF9EBE8 // B0
 187 // Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0
 188 data8 0xBF5D631440786869 // C0
 189 data8 0xBF575D79A0D52069 // C1
 190 data8 0xBF7E2237B7EFC705 // C2
 191 data8 0x3F6A7ACBC273041F // C3
 192 data8 0xC040E32EA52D91EB // D0
 193 data8 0x403D19463E5DB4D7 // D1
 194 data8 0xC02216F61F759F39 // D2
 195 data8 0xBF55B4EA0B844BE7 // B0
 196 // Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0
 197 data8 0x3F8637DBE5B3E690 // C0
 198 data8 0xBF7F7FEC158C07F5 // C1
 199 data8 0x3F711C586706838A // C2
 200 data8 0xBF50EF7EF605554E // C3
 201 data8 0xC054D45448354E25 // D0
 202 data8 0x404ADFEEA282E730 // D1
 203 data8 0xC028AEE456D59549 // D2
 204 data8 0x3F25232D1BED59A8 // B0
 205 // Polynomial coefficients for the tanh(x), 2.0 <= |x| < 4.0
 206 data8 0xBF52602285F2D06C // C0
 207 data8 0x3F2E57C298FFE1E0 // C1
 208 data8 0xBF15ED575DB3C811 // C2
 209 data8 0x3EE428878A08525C // C3
 210 data8 0xC0895A26849039C1 // D0
 211 data8 0x406E3C60BBFBB575 // D1
 212 data8 0xC03A06F62867C75A // D2
 213 data8 0xBEB114C70F1C723E // B0
 214 // Polynomial coefficients for the tanh(x), 4.0 <= |x| < 8.0
 215 data8 0x3EF4B22BD17039A3 // C0
 216 data8 0xBEB704ADC040C57F // C1
 217 data8 0x3E937A98288AFE1A // C2
 218 data8 0xBE4F33B2C9FFE7E7 // C3
 219 data8 0xC0BE48CFADE2431E // D0
 220 data8 0x4090E74249760FDD // D1
 221 data8 0xC04B6F537FCF2F1E // D2
 222 data8 0x3E0DCD879C91ADEA // B0
 223 // Polynomial coefficients for the tanh(x), -0.3125 < x < 0.3125
 224 data8 0xBFD555551E8245B7 // A0
 225 data8 0x3FC110E63F52E689 // A1
 226 data8 0xBFAB8CD6A5B7BAFA // A2
 227 data8 0x3F945D467FCEB553 // A3
 228 // Polynomial coefficients for the tanh(x), 0.3125 <= |x| < 0.5
 229 data8 0xBE3DCC92FCAECBB6 // A0
 230 data8 0x3FF0000043B7D267 // A1
 231 data8 0xBED18BF28ACFC4B1 // A2
 232 data8 0xBFD554A56F82837E // A3
 233 // Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0
 234 data8 0x3EFD6054758539F9 // A0
 235 data8 0x3FEFFBFC77198EBE // A1
 236 data8 0x3F700327CA98D237 // A2
 237 data8 0xBFD68955F5BB2FA1 // A3
 238 // Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0
 239 data8 0xBF71A53F229DF01B // A0
 240 data8 0x3FF0AECFD730DE50 // A1
 241 data8 0xBFC882F88E5DF3BA // A2
 242 data8 0x3FC6EDF212CA2A8D // A3
 243 // Polynomial coefficients for the tanh(x), 2.0 <= |x| < 4.0
 244 data8 0xBFAF0B712E9EDA47 // A0
 245 data8 0x3FF1C208080BEA64 // A1
 246 data8 0x3FC3D29B20C8946E // A2
 247 data8 0xBFF04514ED900A6A // A3
 248 // Polynomial coefficients for the tanh(x), 4.0 <= |x| < 8.0
 249 data8 0xBFB1DEA49A831CBC // A0
 250 data8 0x3FFA729FC7085674 // A1
 251 data8 0xBFF2F44D923A8FA4 // A2
 252 data8 0x3FE092FC5712227E // A3
 253 // Polynomial coefficients for the tanh(x), 8.0 <= |x| <= 9.125
 254 data8 0x3FEFFF5769EE3041 // A0
 255 data8 0x3EFBBF148D850891 // A1
 256 data8 0xBEC86BCEF0F5C2FE // A2
 257 data8 0x3E7CBA4F3A885A5C // A3
 258 //
 259 data8 0x3FEFFFFFFFFFFFFF // 1.0 - epsilon
 260 LOCAL_OBJECT_END(tanhf_data)
 261
 262 .section .text
 263 GLOBAL_LIBM_ENTRY(tanhf)
 264
 265 { .mfi
 266       alloc          r32 = ar.pfs, 1, 14, 0, 0
 267       fmerge.s       fAbsArg = f1, f8             // |x|
 268       addl           rMask = 0x806, r0
 269 }
 270 { .mfi
 271       addl           rDataPtr = @ltoff(tanhf_data), gp
 272       fma.s1         fArgSqr = f8, f8, f0         // x^2
 273       adds           rSignBit = 0x1, r0
 274 }
 275 ;;
 276
 277 { .mfi
 278       getf.s         rArg = f8                    // x in GR
 279       fclass.m       p7,p0 = f8, 0x0b             // is x denormal ?
 280       // sign bit and 2 most bits in significand
 281       shl            rMask = rMask, 20
 282 }
 283 { .mfi
 284       ld8            rDataPtr = [rDataPtr]
 285       nop.f          0
 286       adds           rBias2 = 0x1F4, r0
 287 }
 288 ;;
 289
 290 { .mfi
 291       adds           rNearSaturation = 0x14, r0
 292       fmerge.s       fSignumX = f8, f1            // signum(x)
 293       shl            rSignBit = rSignBit, 31      // mask for sign bit
 294 }
 295 { .mfi
 296       adds           rBound = 0x3EA, r0
 297       nop.f          0
 298       addl           rSaturation = 0x4112, r0
 299 }
 300 ;;
 301
 302 { .mfi
 303       andcm          rOffset2 = rArg, rMask
 304       fclass.m       p6,p0 = f8, 0xc7             // is x [S,Q]NaN or +/-0 ?
 305       shl            rBound = rBound, 20          // 1.0f in GR
 306 }
 307 { .mfb
 308       andcm          rAbsArg = rArg, rSignBit     // |x| in GR
 309       nop.f          0
 310 (p7)  br.cond.spnt   tanhf_denormal               // branch out if x is denormal
 311 }
 312 ;;
 313
 314 { .mfi
 315       adds           rCoeffAddr2 = 352, rDataPtr
 316       fclass.m       p9,p0 = f8, 0x23            // is x +/- inf?
 317       shr            rOffset2 = rOffset2, 21
 318 }
 319 { .mfi
 320       cmp.lt         p10, p8 = rAbsArg, rBound   // |x| < 0.3125?
 321       nop.f          0
 322       adds           rCoeffAddr3 = 16, rDataPtr
 323 }
 324 ;;
 325
 326 { .mfi
 327 (p8)  sub            rBias = rOffset2, rBias2
 328       fma.s1         fArg4 = fArgSqr, fArgSqr, f0 // x^4
 329       shl            rSaturation = rSaturation, 16
 330 }
 331 { .mfb
 332 (p10) adds           rBias = 0x14, r0
 333 (p6)  fma.s.s0       f8 = f8,f1,f8                // NaN or +/-0
 334 (p6)  br.ret.spnt    b0                           // exit for x = NaN or +/-0
 335 }
 336 ;;
 337
 338 { .mfi
 339       shladd         rCoeffAddr1 = rBias, 4, rDataPtr
 340       fma.s1         fArg3Sgn = fArgSqr, f8, f0  // sign(x)*|x|^3
 341       // is |x| < 9.125?
 342       cmp.lt         p11, p12 = rAbsArg, rSaturation
 343 }
 344 { .mfi
 345       shladd         rCoeffAddr3 = rBias, 4, rCoeffAddr3
 346       fma.s1         fArg3 = fArgSqr, fAbsArg, f0 // |x|^3
 347       shladd         rCoeffAddr2 = rBias, 3, rCoeffAddr2
 348 }
 349 ;;
 350
 351 { .mfi
 352 (p11) ldfpd          fC0, fC1 = [rCoeffAddr1]
 353 (p9)  fmerge.s       f8 = f8,f1                   // +/- inf
 354 (p12) adds           rDataPtr = 544, rDataPtr
 355 }
 356 { .mfb
 357 (p11) ldfpd          fC2, fC3 = [rCoeffAddr3], 16
 358       nop.f          0
 359 (p9)  br.ret.spnt    b0                           // exit for x = +/- inf
 360 }
 361 ;;
 362
 363 { .mfi
 364 (p11) ldfpd          fA0, fA1 = [rCoeffAddr2], 16
 365       nop.f          0
 366 (p8)  cmp.eq.unc     p13, p0 = rBias, rNearSaturation
 367 }
 368 { .mfi
 369       add            rCoeffAddr1 = 48, rCoeffAddr1
 370       nop.f          0
 371       nop.i          0
 372 }
 373 ;;
 374
 375 { .mfi
 376 (p11) ldfpd          fD0, fD1 = [rCoeffAddr3]
 377       nop.f          0
 378       nop.i          0
 379 }
 380 { .mfb
 381 (p11) ldfpd          fD2, fB0 = [rCoeffAddr1]
 382       // sign(x)*|x|^2
 383       fma.s1         fArgSqrSgn = fArgSqr, fSignumX, f0
 384 (p10) br.cond.spnt   tanhf_near_zero
 385 }
 386 ;;
 387
 388 { .mfi
 389 (p11) ldfpd          fA2, fA3 = [rCoeffAddr2], 16
 390       fcmp.lt.s1     p15, p14 = f8,f0
 391       nop.i          0
 392 }
 393 { .mfb
 394 (p12) ldfd           fA0 = [rDataPtr]
 395       fma.s1         fArg4Sgn = fArg4, fSignumX, f0 // sign(x)*|x|^4
 396 (p12) br.cond.spnt   tanhf_saturation
 397 }
 398 ;;
 399 { .mfi
 400       nop.m          0
 401       fma.s1         fArg7Sgn = fArg4, fArg3Sgn, f0  // sign(x)*|x|^7
 402       nop.i          0
 403 }
 404 { .mfb
 405       nop.m          0
 406       fma.s1         fArg6Sgn = fArg3, fArg3Sgn, f0  // sign(x)*|x|^6
 407 (p13) br.cond.spnt   tanhf_close_to_saturation
 408 }
 409 ;;
 410
 411 { .mfi
 412       nop.m          0
 413       fma.s1         fPolC = fC3, fAbsArg, fC2    // C3*|x| + C2
 414       nop.i          0
 415 }
 416 { .mfi
 417       nop.m          0
 418       fma.s1         fPolCTmp = fC1, fAbsArg, fC0 // C1*|x| + C0
 419       nop.i          0
 420 };;
 421
 422 { .mfi
 423       nop.m          0
 424       fma.s1         fPolA = fA1, fAbsArg, fA0    // A1*|x| + A0
 425       nop.i          0
 426 }
 427 ;;
 428
 429 { .mfi
 430       nop.m          0
 431       fma.s1         fPolD = fD1, fAbsArg, fD0    // D1*|x| + D0
 432       nop.i          0
 433 }
 434 { .mfi
 435       nop.m          0
 436       // sign(x)*(|x|^7 + D2*x^6)
 437       fma.s1         fPolDTmp = fArg6Sgn, fD2, fArg7Sgn
 438       nop.i          0
 439 };;
 440
 441 { .mfi
 442       nop.m          0
 443       fma.s1         fPolATmp = fA3, fAbsArg, fA2  // A3*|x| + A2
 444       nop.i          0
 445 }
 446 { .mfi
 447       nop.m          0
 448       fma.s1         fB0 = fB0, fArg4, f0          // B0*x^4
 449       nop.i          0
 450 };;
 451
 452 { .mfi
 453       nop.m          0
 454       // C3*|x|^3 + C2*x^2 + C1*|x| + C0
 455       fma.s1         fPolC = fPolC, fArgSqr, fPolCTmp
 456       nop.i          0
 457 }
 458 ;;
 459
 460 { .mfi
 461       nop.m          0
 462       // PolD = sign(x)*(|x|^7 + D2*x^6 + D1*|x|^5 + D0*x^4)
 463       fma.d.s1       fPolD = fPolD, fArg4Sgn, fPolDTmp
 464       nop.i          0
 465 }
 466 ;;
 467
 468 { .mfi
 469       nop.m          0
 470       // PolA = A3|x|^3 + A2*x^2 + A1*|x| + A0
 471       fma.d.s1       fPolA = fPolATmp, fArgSqr, fPolA
 472       nop.i          0
 473 }
 474 ;;
 475
 476 { .mfi
 477       nop.m          0
 478       // PolC = B0*x^4 + C3*|x|^3 + C2*|x|^2 + C1*|x| + C0
 479       fma.d.s1       fPolC = fPolC, f1, fB0
 480       nop.i          0
 481 }
 482 ;;
 483
 484 { .mfi
 485       nop.m          0
 486 (p14) fma.s.s0       f8 = fPolC, fPolD, fPolA     // for positive x
 487       nop.i          0
 488 }
 489 { .mfb
 490       nop.m          0
 491 (p15) fms.s.s0       f8 = fPolC, fPolD, fPolA     // for negative x
 492       br.ret.sptk    b0                           // Exit for 0.3125 <=|x|< 8.0
 493 };;
 494
 495
 496 // Here if |x| < 0.3125
 497 tanhf_near_zero:
 498 { .mfi
 499       nop.m          0
 500       fma.s1         fPolC = fC3, fArgSqr, fC2    // C3*x^2 + C2
 501       nop.i          0
 502 }
 503 { .mfi
 504       nop.m          0
 505       fma.s1         fPolCTmp = fC1, fArgSqr, fC0  // C1*x^2 + C0
 506       nop.i          0
 507 };;
 508
 509 { .mfi
 510       nop.m          0
 511       fma.s1         fPolC = fPolC, fArg4, fPolCTmp // C3*x^6 + C2*x^4 + C1*x^2 + C0
 512       nop.i          0
 513 };;
 514
 515 { .mfb
 516       nop.m          0
 517       // x + x^3*(C3*x^6 + C2*x^4 + C1*x^2 + C0)
 518       fma.s.s0       f8 = fPolC, fArg3Sgn, f8
 519       br.ret.sptk    b0                           // Exit for |x| < 0.3125
 520 };;
 521
 522 // Here if 9.125 <= |x| < +inf
 523 tanhf_saturation:
 524 { .mfb
 525       nop.m          0
 526       fma.s.s0       f8 = fA0, fSignumX, f0       // sign(x)*(1.0d - 2^(-52))
 527       // Exit for 9.125 <= |x| < +inf
 528       br.ret.sptk    b0                           // Exit for 9.125 <=|x|< +inf
 529 }
 530 ;;
 531
 532 // Here if  8.0 <= |x| < 9.125
 533 tanhf_close_to_saturation:
 534 { .mfi
 535       nop.m          0
 536       fma.s1         fPolATmp = fA1, fAbsArg, fA0 // A1*|x| + A0
 537       nop.i          0
 538 }
 539 { .mfi
 540       nop.m          0
 541       fma.s1         fPolA = fA3, fAbsArg, fA2    // A3*|x| + A2
 542       nop.i          0
 543 }
 544 ;;
 545
 546 .pred.rel "mutex", p14, p15
 547 { .mfi
 548       nop.m          0
 549       // for positive x
 550 (p14) fma.s.s0       f8 = fPolA, fArgSqr, fPolATmp
 551       nop.i          0
 552 }
 553 { .mfb
 554       nop.m          0
 555       // for negative x
 556 (p15) fms.s.s0       f8 = fPolA, fArgSqrSgn, fPolATmp
 557       br.ret.sptk    b0                           // Exit for 8.0 <=|x|< 9.125
 558 };;
 559
 560 // Here if x is single precision denormal
 561 tanhf_denormal:
 562 { .mfi
 563       nop.m          0
 564       fclass.m       p7,p8 = f8, 0x0a             // is x -denormal ?
 565       nop.i          0
 566 }
 567 ;;
 568
 569 { .mfi
 570       nop.m          0
 571 (p7)  fma.s.s0       f8 = f8,f8,f8                // -denormal
 572       nop.i          0
 573 }
 574 { .mfb
 575       nop.m          0
 576 (p8)  fnma.s.s0      f8 = f8,f8,f8                // +denormal
 577       br.ret.sptk    b0                           // Exit for denormal
 578 }
 579 ;;
 580
 581 GLOBAL_LIBM_END(tanhf)