sysdeps/ia64/fpu/e_hypotl.S

   1 .file "hypotl.asm"
   2
   3 // Copyright (C) 2000, 2001, Intel Corporation
   4 // All rights reserved.
   5 //
   6 // Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
   7 // Bob Norin, Shane Story, and Ping Tak Peter Tang of the
   8 // Computational Software Lab, Intel Corporation.
   9 //
  10 // Redistribution and use in source and binary forms, with or without
  11 // modification, are permitted provided that the following conditions are
  12 // met:
  13 //
  14 // * Redistributions of source code must retain the above copyright
  15 // notice, this list of conditions and the following disclaimer.
  16 //
  17 // * Redistributions in binary form must reproduce the above copyright
  18 // notice, this list of conditions and the following disclaimer in the
  19 // documentation and/or other materials provided with the distribution.
  20 //
  21 // * The name of Intel Corporation may not be used to endorse or promote
  22 // products derived from this software without specific prior written
  23 // permission.
  24 //
  25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  33 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36 //
  37 // Intel Corporation is the author of this code, and requests that all
  38 // problem reports or change requests be submitted to it directly at
  39 // http://developer.intel.com/opensource.
  40 //
  41 // *********************************************************************
  42 //
  43 // History:
  44 // 2/02/00  hand-optimized
  45 // 4/04/00  Unwind support added
  46 // 6/20/00  new version
  47 // 8/15/00  Bundle added after call to __libm_error_support to properly
  48 //          set [the previously overwritten] GR_Parameter_RESULT.
  49 //
  50 // *********************************************************************
  51 //                           ___________
  52 // Function:   hypotl(x,y) = |(x^2 + y^2) = for double extended values
  53 //             x and y
  54 //             Also provides cabsl functionality.
  55 //
  56 // *********************************************************************
  57 //
  58 // Resources Used:
  59 //
  60 //    Floating-Point Registers: f8  (Input and Return Value)
  61 //                              f9  (Input)
  62 //                              f6 -f15, f32-f34
  63 //
  64 //    General Purpose Registers:
  65 //      r2-r3   (Scratch)
  66 //      r32-r36 (Locals)
  67 //      r37-r40 (Used to pass arguments to error handling routine)
  68 //
  69 //    Predicate Registers:      p6 - p10
  70 //
  71 // *********************************************************************
  72 //
  73 // IEEE Special Conditions:
  74 //
  75 //    All faults and exceptions should be raised correctly.
  76 //    Overflow can occur.
  77 //    hypotl(Infinity and anything) = +Infinity
  78 //    hypotl(QNaN and anything) = QNaN
  79 //    hypotl(SNaN and anything ) = QNaN
  80 //
  81 // *********************************************************************
  82 //
  83 // Implementation:
  84 //  x2 = x * x   in double-extended
  85 //  y2 = y * y   in double-extended
  86 //  temp = x2 + y2   in double-extended
  87 //  sqrt(temp) rounded to double extended
  88 //
  89 // *********************************************************************
  90
  91 #include "libm_support.h"
  92
  93 GR_SAVE_PFS         = r33
  94 GR_SAVE_B0          = r34
  95 GR_SAVE_GP          = r35
  96 GR_Parameter_X      = r36
  97 GR_Parameter_Y      = r37
  98 GR_Parameter_RESULT = r38
  99 GR_Parameter_TAG    = r39
 100
 101 FR_X                = f32
 102 FR_Y                = f33
 103 FR_RESULT           = f8
 104
 105 .section .text
 106 #ifndef _LIBC
 107 .proc cabsl#
 108 .global cabsl#
 109 cabsl:
 110 .endp cabsl
 111 #endif
 112 .proc hypotl#
 113 .global hypotl#
 114 .align 64
 115
 116 hypotl:
 117 #ifdef _LIBC
 118 .global __hypotl
 119 __hypotl:
 120 .global __ieee754_hypotl
 121 __ieee754_hypotl:
 122 #endif
 123 {.mfi
 124   alloc r32= ar.pfs,0,4,4,0
 125   // Compute x*x
 126   fma.s1 f10=f8,f8,f0
 127   // r2=bias-1
 128   mov r2=0xfffe
 129 }
 130 {.mfi
 131   nop.m 0
 132   // y*y
 133   fma.s1 f11=f9,f9,f0
 134   nop.i 0;;
 135 }
 136
 137 { .mfi
 138      nop.m 0
 139 //   Check if x is an Inf - if so return Inf even
 140 //   if y is a NaN (C9X)
 141      fclass.m.unc p7, p6 = f8, 0x023
 142      nop.i 0
 143 }
 144 {.mfi
 145         nop.m 0
 146         // if possible overflow, copy f8 to f32
 147         // set Denormal, if necessary
 148         // (p8)
 149         fma.s0 f32=f8,f1,f0
 150     nop.i 0;;
 151 }
 152 { .mfi
 153      nop.m 0
 154 //   Check if y is an Inf - if so return Inf even
 155 //   if x is a NaN (C9X)
 156      fclass.m.unc p8, p9 = f9, 0x023
 157          nop.i 0
 158 }
 159 { .mfi
 160      nop.m 999
 161 //   For x=inf, multiply y by 1 to raise invalid on y an SNaN
 162 //   (p7) fma.s0 f9=f9,f1,f0
 163      // copy f9 to f33; set Denormal, if necessary
 164          fma.s0 f33=f9,f1,f0
 165      nop.i 0;;
 166 }
 167 {.mfi
 168   nop.m 0
 169   // is y Zero ?
 170   (p6) fclass.m p6,p0=f9,0x7
 171   nop.i 0;;
 172 }
 173
 174 {.mfi
 175   // f7=0.5
 176   setf.exp f7=r2
 177   // a=x2+y2
 178   fma.s1 f12=f10,f1,f11
 179   nop.i 0
 180 }
 181 {.mfi
 182   mov r2=0x408c  //0000
 183   // dx=x*x-x2
 184   fms.s1 f13=f8,f8,f10
 185   nop.i 0;;
 186 }
 187 {.mfi
 188   nop.m 0
 189   // is x Zero ?
 190   (p9) fclass.m p9,p0=f8,0x7
 191   shl r2=r2,16
 192 }
 193 {.mfi
 194   nop.m 0
 195   // dy=y*y-y2
 196   fms.s1 f14=f9,f9,f11
 197   nop.i 0;;
 198 }
 199
 200 {.mfi
 201   nop.m 0
 202   // x not NaN ?
 203   (p6) fclass.m p7,p0=f8,0x3f
 204   nop.i 0
 205 }
 206 {.mfi
 207   nop.m 0
 208   // f6=2
 209   fma.s1 f6=f1,f1,f1
 210   nop.i 0;;
 211 }
 212
 213 {.mfi
 214   nop.m 0
 215   // f34=min(x2,y2)
 216   famin.s1 f34=f10,f11
 217   nop.i 0
 218 }
 219 {.mfb
 220   nop.m 0
 221   // f10=max(x2,y2)
 222   famax.s1 f10=f11,f10
 223   nop.b 0;; //
 224 }
 225
 226 {.mfi
 227   nop.m 0
 228   // y not NaN ?
 229   (p9) fclass.m p8,p0=f9,0x3f
 230   nop.i 0;;
 231 }
 232 {.mfb
 233   // f9=35/8
 234   setf.s f9=r2
 235   // if f8=Infinity or f9=Zero, return |f8|
 236   (p7) fmerge.s f8=f0,f32
 237   (p7) br.ret.spnt b0;;
 238 }
 239
 240
 241 {.mfi
 242   nop.m 0
 243   // z0=frsqrta(a)
 244   frsqrta.s1 f8,p6=f12
 245   nop.i 0;;
 246 }
 247 { .mfi
 248      nop.m 0
 249 //   Identify Natvals, Infs, NaNs, and Zeros
 250 //   and return result
 251      fclass.m.unc p7, p0 = f12, 0x1E7
 252      nop.i 0
 253 }
 254 {.mfi
 255   // get exponent of x^2+y^2
 256   getf.exp r3=f12
 257   // dxy=dx+dy
 258   fma.s1 f13=f13,f1,f14
 259   nop.i 0;;
 260 }
 261
 262 {.mfb
 263   // 2*emax-2
 264   mov r2=0x17ffb
 265   // if f9=Infinity or f8=Zero, return |f9|
 266   (p8) fmerge.s f8=f0,f33
 267   (p8) br.ret.spnt b0
 268 }
 269 {.mfi
 270   nop.m 0
 271   // dd=a-max(x2,y2)
 272   fnma.s1 f10=f10,f1,f12
 273   nop.i 0;;
 274 }
 275
 276 {.mfi
 277   nop.m 0
 278   // S0=a*z0
 279   (p6) fma.s1 f14=f12,f8,f0
 280   nop.i 0
 281 }
 282 {.mfi
 283   nop.m 0
 284   // H0=0.5*z0
 285   (p6) fma.s1 f15=f8,f7,f0
 286   nop.i 0;;
 287 }
 288
 289 {.mfb
 290   nop.m 0
 291   // if special case, set f8
 292   (p7) mov f8=f12
 293   (p7) br.ret.spnt b0
 294 }
 295 {.mfi
 296   nop.m 0
 297   // da=min(x2,y2)-dd
 298   fnma.s1 f10=f10,f1,f34
 299   nop.i 0;;
 300 }
 301 {.mfi
 302   nop.m 0
 303   // f6=5/2
 304   fma.s1 f6=f7,f1,f6
 305   nop.i 0
 306 }
 307 {.mfi
 308   nop.m 0
 309   // f11=3/2
 310   fma.s1 f11=f7,f1,f1
 311   nop.i 0;;
 312 }
 313
 314 {.mfi
 315   nop.m 0
 316   // d=0.5-S0*H0
 317   (p6) fnma.s1 f7=f14,f15,f7
 318   nop.i 0;;
 319 }
 320
 321 {.mfi
 322   nop.m 0
 323   // P1=3/2*d+1
 324   (p6) fma.s1 f11=f11,f7,f1
 325   nop.i 0
 326 }
 327 {.mfi
 328   nop.m 0
 329   // P2=35/8*d+5/2
 330   (p6) fma.s1 f9=f9,f7,f6
 331   nop.i 0;;
 332 }
 333 {.mfi
 334   nop.m 0
 335   // d2=d*d
 336   (p6) fma.s1 f34=f7,f7,f0
 337   nop.i 0;;
 338 }
 339
 340 {.mfi
 341   nop.m 0
 342   // T0=d*S0
 343   (p6) fma.s1 f6=f7,f14,f0
 344   nop.i 0
 345 }
 346 {.mfi
 347   nop.m 0
 348   // G0=d*H0
 349   (p6) fma.s1 f7=f7,f15,f0
 350   nop.i 0;;
 351 }
 352 {.mfi
 353   nop.m 0
 354   // P=d2*P2+P1
 355   (p6) fma.s1 f11=f34,f9,f11
 356   nop.i 0;;
 357 }
 358
 359 {.mfi
 360   nop.m 0
 361   // S1=p*T0+S0
 362   (p6) fma.s1 f14=f11,f6,f14
 363   nop.i 0
 364 }
 365 {.mfi
 366   nop.m 0
 367   // H1=p*G0+H0
 368   (p6) fma.s1 f15=f11,f7,f15
 369   nop.i 0;;
 370 }
 371
 372
 373 {.mfi
 374   nop.m 0
 375   // e1=a-S1*S1
 376   (p6) fnma.s1 f7=f14,f14,f12
 377   nop.i 0
 378 }
 379 {.mfi
 380   // Is x^2 + y^2 well less than the overflow
 381   // threshold?
 382   (p6) cmp.lt.unc p7, p8 =  r3,r2
 383   // c=dxy+da
 384   (p6) fma.s1 f13=f13,f1,f10
 385   nop.i 0;;
 386 }
 387
 388 {.mfi
 389   nop.m 0
 390   // e=e1+c
 391   (p6) fma.s1 f13=f7,f1,f13
 392   nop.i 0;;
 393 }
 394
 395 {.mfb
 396   nop.m 0
 397   // S=e*H1+S1
 398   fma.s0 f8=f13,f15,f14
 399   // No overflow in this case
 400   (p7) br.ret.sptk b0;;
 401 }
 402
 403 { .mfi
 404      nop.m 0
 405 (p8) fsetc.s2 0x7F,0x42
 406      // Possible overflow path, must detect by
 407      // Setting widest range exponent with prevailing
 408      // rounding mode.
 409      nop.i 0 ;;
 410 }
 411
 412
 413 { .mfi
 414    // bias+0x4000 (bias+EMAX+1)
 415    (p8) mov r2=0x13fff
 416    // S=e*H1+S1
 417    (p8) fma.s2 f12=f13,f15,f14
 418    nop.i 0 ;;
 419 }
 420 { .mfi
 421 (p8) setf.exp f11 = r2
 422 (p8) fsetc.s2 0x7F,0x40
 423 //   Restore Original Mode in S2
 424      nop.i 0 ;;
 425 }
 426 { .mfi
 427      nop.m 0
 428 (p8) fcmp.lt.unc.s1 p9, p10 =  f12, f11
 429      nop.i 0 ;;
 430 }
 431 { .mib
 432      nop.m 0
 433      mov GR_Parameter_TAG = 45;
 434          // No overflow
 435 (p9) br.ret.sptk b0;;
 436 }
 437 .endp hypotl
 438 ASM_SIZE_DIRECTIVE(hypotl)
 439
 440 .proc __libm_error_region
 441 __libm_error_region:
 442 .prologue
 443 { .mfi
 444         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
 445         nop.f 0
 446 .save   ar.pfs,GR_SAVE_PFS
 447         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 448 }
 449 { .mfi
 450 .fframe 64
 451         add sp=-64,sp                           // Create new stack
 452         nop.f 0
 453         mov GR_SAVE_GP=gp                       // Save gp
 454 };;
 455 { .mmi
 456         stfe [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
 457         add GR_Parameter_X = 16,sp              // Parameter 1 address
 458 .save   b0, GR_SAVE_B0
 459         mov GR_SAVE_B0=b0                       // Save b0
 460 };;
 461 .body
 462 { .mib
 463         stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
 464         add   GR_Parameter_RESULT = 0,GR_Parameter_Y
 465         nop.b 0                                 // Parameter 3 address
 466 }
 467 { .mib
 468         stfe [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
 469         add   GR_Parameter_Y = -16,GR_Parameter_Y
 470         br.call.sptk b0=__libm_error_support#  // Call error handling function
 471 };;
 472 { .mmi
 473         nop.m 0
 474         nop.m 0
 475         add   GR_Parameter_RESULT = 48,sp
 476 };;
 477 { .mmi
 478         ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 479 .restore sp
 480         add   sp = 64,sp                       // Restore stack pointer
 481         mov   b0 = GR_SAVE_B0                  // Restore return address
 482 };;
 483 { .mib
 484         mov   gp = GR_SAVE_GP                  // Restore gp
 485         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
 486         br.ret.sptk     b0                     // Return
 487 };;
 488 .endp __libm_error_region
 489 ASM_SIZE_DIRECTIVE(__libm_error_region)
 490 .type   __libm_error_support#,@function
 491 .global __libm_error_support#