sysdeps/ia64/fpu/e_hypot.S

   1 .file "hypot.asm"
   2
   3 // Copyright (C) 2000, 2001, Intel Corporation
   4 // All rights reserved.
   5 //
   6 // Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
   7 // Bob Norin, Shane Story, and Ping Tak Peter Tang of the
   8 // Computational Software Lab, Intel Corporation.
   9 //
  10 // Redistribution and use in source and binary forms, with or without
  11 // modification, are permitted provided that the following conditions are
  12 // met:
  13 //
  14 // * Redistributions of source code must retain the above copyright
  15 // notice, this list of conditions and the following disclaimer.
  16 //
  17 // * Redistributions in binary form must reproduce the above copyright
  18 // notice, this list of conditions and the following disclaimer in the
  19 // documentation and/or other materials provided with the distribution.
  20 //
  21 // * The name of Intel Corporation may not be used to endorse or promote
  22 // products derived from this software without specific prior written
  23 // permission.
  24 //
  25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  33 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36 //
  37 // Intel Corporation is the author of this code, and requests that all
  38 // problem reports or change requests be submitted to it directly at
  39 // http://developer.intel.com/opensource.
  40 //
  41 // *********************************************************************
  42 //
  43 // History:
  44 // 2/02/00  hand-optimized
  45 // 4/04/00  Unwind support added
  46 // 6/20/00  new version
  47 // 8/15/00  Bundle added after call to __libm_error_support to properly
  48 //          set [the previously overwritten] GR_Parameter_RESULT.
  49 //
  50 // *********************************************************************
  51 //                           ___________
  52 // Function:   hypot(x,y) = |(x^2 + y^2) = for double precision values
  53 //             x and y
  54 //             Also provides cabs functionality.
  55 //
  56 // *********************************************************************
  57 //
  58 // Resources Used:
  59 //
  60 //    Floating-Point Registers: f8  (Input and Return Value)
  61 //                              f9  (Input)
  62 //                              f6 -f15, f32-f34
  63 //
  64 //    General Purpose Registers:
  65 //      r2,r3,r29 (Scratch)
  66 //      r32-r36 (Locals)
  67 //      r37-r40 (Used to pass arguments to error handling routine)
  68 //
  69 //    Predicate Registers:      p6 - p10
  70 //
  71 // *********************************************************************
  72 //
  73 // IEEE Special Conditions:
  74 //
  75 //    All faults and exceptions should be raised correctly.
  76 //    Overflow can occur.
  77 //    hypot(Infinity and anything) = +Infinity
  78 //    hypot(QNaN and anything) = QNaN
  79 //    hypot(SNaN and anything ) = QNaN
  80 //
  81 // *********************************************************************
  82 //
  83 // Implementation:
  84 //  x2 = x * x   in double-extended
  85 //  y2 = y * y   in double-extended
  86 //  temp = x2 + y2   in double-extended
  87 //  sqrt(temp) rounded to double
  88 //
  89 // *********************************************************************
  90
  91 #include "libm_support.h"
  92
  93 GR_SAVE_PFS         = r33
  94 GR_SAVE_B0          = r34
  95 GR_SAVE_GP          = r35
  96 GR_Parameter_X      = r36
  97 GR_Parameter_Y      = r37
  98 GR_Parameter_RESULT = r38
  99 GR_Parameter_TAG    = r39
 100
 101 FR_X                = f32
 102 FR_Y                = f33
 103 FR_RESULT           = f8
 104
 105 .section .text
 106 #ifndef _LIBC
 107 .proc cabs#
 108 .global cabs#
 109 cabs:
 110 .endp cabs
 111 #endif
 112 .proc hypot#
 113 .global hypot#
 114 .align 64
 115
 116 hypot:
 117 #ifdef _LIBC
 118 .global __hypot
 119 __hypot:
 120 .global __ieee754_hypot
 121 __ieee754_hypot:
 122 #endif
 123 {.mfi
 124   alloc r32= ar.pfs,0,4,4,0
 125   // Compute x*x
 126   fma.s1 f10=f8,f8,f0
 127   // r2=bias-1
 128   mov r2=0xfffe
 129 }
 130 {.mfi
 131   // 63/8
 132   mov r3=0x40fc //0000
 133   // y*y
 134   fma.s1 f11=f9,f9,f0
 135   // r29=429/16
 136   mov r29=0x41d68;; //000
 137 }
 138
 139 { .mfi
 140      nop.m 0
 141 //   Check if x is an Inf - if so return Inf even
 142 //   if y is a NaN (C9X)
 143      fclass.m.unc p7, p6 = f8, 0x023
 144      shl r3=r3,16
 145 }
 146 {.mfi
 147         nop.m 0
 148   // if possible overflow, copy f8 to f32
 149   // set Denormal, if necessary
 150   // (p8)
 151   fma.d.s0 f32=f8,f1,f0
 152   nop.i 0;;
 153 }
 154 { .mfi
 155      nop.m 0
 156 //   Check if y is an Inf - if so return Inf even
 157 //   if x is a NaN (C9X)
 158      fclass.m.unc p8, p9 = f9, 0x023
 159      shl r29=r29,12
 160 }
 161 { .mfb
 162          // f7=0.5
 163          setf.exp f7=r2
 164 //   For x=inf, multiply y by 1 to raise invalid on y an SNaN
 165 //   (p7) fma.s0 f9=f9,f1,f0
 166      // copy f9 to f33; set Denormal, if necessary
 167          fma.d.s0 f33=f9,f1,f0
 168      nop.b 0;;
 169 }
 170 {.mfb
 171   // f13=63/8
 172   setf.s f13=r3
 173   // is y Zero ?
 174   (p6) fclass.m p6,p0=f9,0x7
 175   nop.b 0
 176 }
 177 {.mlx
 178   nop.m 0
 179   movl r2=0x408c0000;;
 180 }
 181
 182 {.mfi
 183   // f34=429/16
 184   setf.s f34=r29
 185   // is x Zero ?
 186   (p9) fclass.m p9,p0=f8,0x7
 187   // 231/16
 188   mov r3=0x4167;; //0000
 189 }
 190 {.mfi
 191   nop.m 0
 192   // a=x2+y2
 193   fma.s1 f12=f10,f1,f11
 194   nop.i 0;;
 195 }
 196 {.mfi
 197   nop.m 0
 198   // y not NaN ?
 199   (p9) fclass.m p8,p0=f9,0x3f
 200   shl r3=r3,16
 201 }
 202 {.mfi
 203   nop.m 0
 204   // f6=2
 205   fma.s1 f6=f1,f1,f1
 206   nop.i 0;;
 207 }
 208
 209
 210 {.mfi
 211   nop.m 0
 212   // x not NaN ?
 213   (p6) fclass.m p7,p0=f8,0x3f
 214   nop.i 0;;
 215 }
 216 {.mfi
 217   // f9=35/8
 218   setf.s f9=r2
 219   nop.f 0
 220   // 2*emax-2
 221   mov r2=0x107fb;;
 222 }
 223
 224 {.mfb
 225   nop.m 0
 226   // if f8=Infinity or f9=Zero, return |f8|
 227   (p7) fmerge.s f8=f0,f32
 228   (p7) br.ret.spnt b0
 229 }
 230 {.mfb
 231   nop.m 0
 232   // if f9=Infinity or f8=Zero, return |f9|
 233   (p8) fmerge.s f8=f0,f33
 234   (p8) br.ret.spnt b0;;
 235 }
 236
 237
 238 {.mfi
 239   // f10 =231/16
 240   setf.s f10=r3
 241   // z0=frsqrta(a)
 242   frsqrta.s1 f8,p6=f12
 243   nop.i 0;;
 244 }
 245
 246 { .mfi
 247          nop.m 0
 248 //   Identify Natvals, Infs, NaNs, and Zeros
 249 //   and return result
 250      fclass.m.unc p7, p0 = f12, 0x1E7
 251      nop.i 0;;
 252 }
 253 {.mfb
 254   // get exponent of x^2+y^2
 255   getf.exp r3=f12
 256   // if special case, set f8
 257   (p7) mov f8=f12
 258   (p7) br.ret.spnt b0;;
 259 }
 260
 261
 262 {.mfi
 263   nop.m 0
 264   // S0=a*z0
 265   (p6) fma.s1 f14=f12,f8,f0
 266   nop.i 0
 267 }
 268 {.mfi
 269   nop.m 0
 270   // H0=0.5*z0
 271   (p6) fma.s1 f15=f8,f7,f0
 272   nop.i 0;;
 273 }
 274
 275
 276 {.mfi
 277   nop.m 0
 278   // f6=5/2
 279   fma.s1 f6=f7,f1,f6
 280   nop.i 0
 281 }
 282 {.mfi
 283   nop.m 0
 284   // f11=3/2
 285   fma.s1 f11=f7,f1,f1
 286   nop.i 0;;
 287 }
 288
 289 {.mfi
 290   nop.m 0
 291   // d=0.5-S0*H0
 292   (p6) fnma.s1 f7=f14,f15,f7
 293   nop.i 0;;
 294 }
 295
 296 {.mfi
 297   nop.m 0
 298   // P67=231/16+429/16*d
 299   (p6) fma.s1 f10=f34,f7,f10
 300   nop.i 0
 301 }
 302 {.mfi
 303   nop.m 0
 304   // P45=63/8*d+35/8
 305   (p6) fma.s1 f9=f13,f7,f9
 306   nop.i 0;;
 307 }
 308 {.mfi
 309   nop.m 0
 310   // P23=5/2*d+3/2
 311   (p6) fma.s1 f11=f6,f7,f11
 312   nop.i 0
 313 }
 314 {.mfi
 315   nop.m 0
 316   // d2=d*d
 317   (p6) fma.s1 f13=f7,f7,f0
 318   nop.i 0;;
 319 }
 320
 321 {.mfi
 322   nop.m 0
 323   // P47=d2*P67+P45
 324   (p6) fma.s1 f10=f10,f13,f9
 325   nop.i 0
 326 }
 327 {.mfi
 328   nop.m 0
 329   // P13=d*P23+1
 330   (p6) fma.s1 f11=f11,f7,f1
 331   nop.i 0;;
 332 }
 333 {.mfi
 334   nop.m 0
 335   // d3=d2*d
 336   (p6) fma.s1 f13=f13,f7,f0
 337   nop.i 0;;
 338 }
 339
 340 {.mfi
 341   nop.m 0
 342   // T0=d*S0
 343   (p6) fma.s1 f15=f7,f14,f0
 344   nop.i 0
 345 }
 346 {.mfi
 347   // Is x^2 + y^2 well less than the overflow
 348   // threshold?
 349   (p6) cmp.lt.unc p7, p8 =  r3,r2
 350   // P=P13+d3*P47
 351   (p6) fma.s1 f10=f13,f10,f11
 352   nop.i 0;;
 353 }
 354
 355 {.mfb
 356   nop.m 0
 357   // S=P*T0+S0
 358   fma.d.s0 f8=f10,f15,f14
 359   // No overflow in this case
 360   (p7) br.ret.sptk b0;;
 361 }
 362
 363 { .mfi
 364      nop.m 0
 365 (p8) fsetc.s2 0x7F,0x42
 366      // Possible overflow path, must detect by
 367      // Setting widest range exponent with prevailing
 368      // rounding mode.
 369      nop.i 0 ;;
 370 }
 371
 372
 373 { .mfi
 374    // bias+0x400 (bias+EMAX+1)
 375    (p8) mov r2=0x103ff
 376    // S=P*T0+S0
 377    (p8) fma.d.s2 f12=f10,f15,f14
 378    nop.i 0 ;;
 379 }
 380 { .mfi
 381 (p8) setf.exp f11 = r2
 382 (p8) fsetc.s2 0x7F,0x40
 383 //   Restore Original Mode in S2
 384      nop.i 0 ;;
 385 }
 386 { .mfi
 387      nop.m 0
 388 (p8) fcmp.lt.unc.s1 p9, p10 =  f12, f11
 389      nop.i 0 ;;
 390 }
 391 { .mib
 392      nop.m 0
 393      mov GR_Parameter_TAG = 46
 394          // No overflow
 395 (p9) br.ret.sptk b0;;
 396 }
 397 .endp hypot
 398 ASM_SIZE_DIRECTIVE(hypot)
 399
 400 .proc __libm_error_region
 401 __libm_error_region:
 402 .prologue
 403 { .mfi
 404         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
 405         nop.f 0
 406 .save   ar.pfs,GR_SAVE_PFS
 407         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 408 }
 409 { .mfi
 410 .fframe 64
 411         add sp=-64,sp                           // Create new stack
 412         nop.f 0
 413         mov GR_SAVE_GP=gp                       // Save gp
 414 };;
 415 { .mmi
 416         stfd [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
 417         add GR_Parameter_X = 16,sp              // Parameter 1 address
 418 .save   b0, GR_SAVE_B0
 419         mov GR_SAVE_B0=b0                       // Save b0
 420 };;
 421 .body
 422 { .mib
 423         stfd [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
 424         add   GR_Parameter_RESULT = 0,GR_Parameter_Y
 425         nop.b 0                                 // Parameter 3 address
 426 }
 427 { .mib
 428         stfd [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
 429         add   GR_Parameter_Y = -16,GR_Parameter_Y
 430         br.call.sptk b0=__libm_error_support#  // Call error handling function
 431 };;
 432 { .mmi
 433         nop.m 0
 434         nop.m 0
 435         add   GR_Parameter_RESULT = 48,sp
 436 };;
 437 { .mmi
 438         ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 439 .restore sp
 440         add   sp = 64,sp                       // Restore stack pointer
 441         mov   b0 = GR_SAVE_B0                  // Restore return address
 442 };;
 443 { .mib
 444         mov   gp = GR_SAVE_GP                  // Restore gp
 445         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
 446         br.ret.sptk     b0                     // Return
 447 };;
 448 .endp __libm_error_region
 449 ASM_SIZE_DIRECTIVE(__libm_error_region)
 450 .type   __libm_error_support#,@function
 451 .global __libm_error_support#