sysdeps/ia64/fpu/e_remainderf.S

   1   .file "remainderf.asm"
   2 // Copyright (C) 2000, 2001, Intel Corporation
   3 // All rights reserved.
   4 //
   5 // Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
   6 // Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
   7 // Software Lab,
   8 // Intel Corporation.
   9 //
  10 // Redistribution and use in source and binary forms, with or without
  11 // modification, are permitted provided that the following conditions are
  12 // met:
  13 //
  14 // * Redistributions of source code must retain the above copyright
  15 // notice, this list of conditions and the following disclaimer.
  16 //
  17 // * Redistributions in binary form must reproduce the above copyright
  18 // notice, this list of conditions and the following disclaimer in the
  19 // documentation and/or other materials provided with the distribution.
  20 //
  21 // * The name of Intel Corporation may not be used to endorse or promote
  22 // products derived from this software without specific prior written
  23 // permission.
  24 //
  25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  33 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36 //
  37 // Intel Corporation is the author of this code, and requests that all
  38 // problem reports or change requests be submitted to it directly at
  39 // http://developer.intel.com/opensource.
  40 //
  41 // History
  42 //====================================================================
  43 // 2/02/00 Initial version
  44 // 3/02/00 New algorithm
  45 // 4/04/00 Unwind support added
  46 // 7/21/00 Fixed quotient=2^{24*m+23} bug
  47 // 8/15/00  Bundle added after call to __libm_error_support to properly
  48 //          set [the previously overwritten] GR_Parameter_RESULT.
  49 //11/29/00  Set FR_Y to f9
  50 //
  51 // API
  52 //====================================================================
  53 // float remainderf(float,float);
  54 //
  55 // Overview of operation
  56 //====================================================================
  57 //  remainder(a,b)=a-i*b,
  58 //  where i is an integer such that, if b!=0 and a is finite,
  59 //  |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
  60 //
  61 // Algorithm
  62 //====================================================================
  63 // a). eliminate special cases
  64 // b). if |a/b|<0.25 (first quotient estimate), return a
  65 // c). use single precision divide algorithm to get quotient q
  66 //     rounded to 24 bits of precision
  67 // d). calculate partial remainders (using both q and q-ulp);
  68 //     select one and RZ(a/b) based on the sign of |a|-|b|*q
  69 // e). if the exponent difference (exponent(a)-exponent(b))
  70 //     is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
  71 //     and sticky bits to round to integer; exit loop and
  72 //     calculate final remainder
  73 // f). if exponent(a)-exponent(b)>=24, select new value of a as
  74 //     the partial remainder calculated using RZ(a/b);
  75 //     repeat from c).
  76 //
  77 // Special cases
  78 //====================================================================
  79 // a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
  80 // a=NaN or b=NaN: return NaN
  81
  82 #include "libm_support.h"
  83
  84 //
  85 // Registers used
  86 //====================================================================
  87 // Predicate registers: p6-p12
  88 // General registers:   r2,r3,r28,r29,r32 (ar.pfs), r33-r39
  89 // Floating point registers: f6-f15
  90 //
  91
  92 .section .text
  93
  94 GR_SAVE_B0                    = r33
  95 GR_SAVE_PFS                   = r34
  96 GR_SAVE_GP                    = r35
  97 GR_SAVE_SP                    = r36
  98
  99 GR_Parameter_X                = r37
 100 GR_Parameter_Y                = r38
 101 GR_Parameter_RESULT           = r39
 102 GR_Parameter_TAG              = r40
 103
 104 FR_X             = f10
 105 FR_Y             = f9
 106 FR_RESULT        = f8
 107
 108
 109   .proc  remainderf#
 110   .align 32
 111   .global remainderf#
 112   .align 32
 113
 114 remainderf:
 115 #ifdef _LIBC
 116 .global __remainderf
 117 .type __remainderf,@function
 118 __remainderf:
 119 #endif
 120 // inputs in f8, f9
 121 // result in f8
 122
 123 { .mfi
 124   alloc r32=ar.pfs,1,4,4,0
 125   // f13=|a|
 126   fmerge.s f13=f0,f8
 127   nop.i 0
 128 }
 129   {.mfi
 130   nop.m 0
 131   // f14=|b|
 132   fmerge.s f14=f0,f9
 133   nop.i 0;;
 134 }
 135  {.mlx
 136   nop.m 0
 137   // r2=2^{24}-2
 138   movl r3=0x4b7ffffe;;
 139 }
 140
 141 // Y +-NAN, +-inf, +-0?     p11
 142 { .mfi
 143       nop.m 999
 144 (p0)  fclass.m.unc  p11,p0 = f9, 0xe7
 145       nop.i 999
 146 }
 147 // qnan snan inf norm     unorm 0 -+
 148 // 1    1    1   0        0     0 11
 149 // e                      3
 150 // X +-NAN, +-inf, ?        p9
 151 { .mfi
 152       nop.m 999
 153 (p0)  fclass.m.unc  p9,p0 = f8, 0xe3
 154       nop.i 999;;
 155 }
 156
 157 {.mfi
 158   nop.m 0
 159   mov f15=f0
 160   nop.i 0
 161 }
 162 { .mfi
 163   // set p7=1
 164   cmp.eq.unc p7,p0=r0,r0
 165   // Step (1)
 166   // y0 = 1 / b in f10
 167   frcpa.s1 f10,p6=f13,f14
 168   nop.i 0;;
 169 }
 170 {.bbb
 171   (p9) br.cond.spnt L(FREM_X_NAN_INF)
 172   (p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
 173   nop.b 0
 174 }  {.mfi
 175    nop.m 0
 176    // set D flag if a (f8) is denormal
 177    fnma.s0 f6=f8,f1,f8
 178    nop.i 0;;
 179 }
 180
 181 .align 32
 182 L(remloop24):
 183   { .mfi
 184   // f12=2^{24}-2
 185   setf.s f12=r3
 186   // Step (2)
 187   // q0 = a * y0 in f15
 188   (p6) fma.s1 f15=f13,f10,f0
 189   nop.i 0
 190 }
 191 { .mfi
 192   nop.m 0
 193   // Step (3)
 194   // e0 = 1 - b * y0 in f7
 195   (p6) fnma.s1 f7=f14,f10,f1
 196   nop.i 0;;
 197 }
 198 {.mlx
 199   nop.m 0
 200   // r2=1.25*2^{-24}
 201   movl r2=0x33a00000;;
 202 }
 203   { .mfi
 204   nop.m 0
 205   // Step (4)
 206   // q1 = q0 + e0 * q0 in f6
 207   (p6) fma.s1 f6=f7,f15,f15
 208   nop.i 0
 209 }
 210 { .mfi
 211   nop.m 0
 212   // Step (5)
 213   // e1 = e0 * e0 in f7
 214   (p6) fma.s1 f7=f7,f7,f0
 215   nop.i 0;;
 216 }
 217  {.mii
 218   (p7) getf.exp r29=f15
 219   (p7) mov r28=0xfffd
 220   nop.i 0;;
 221 }
 222
 223  { .mfi
 224   // f15=1.25*2^{-24}
 225   setf.s f15=r2
 226   // Step (6)
 227   // q2 = q1 + e1 * q1 in f6
 228   (p6) fma.s1 f6=f7,f6,f6
 229   nop.i 0
 230 }
 231 { .mfi
 232   mov r2=0x3e7
 233   // Step (7)
 234   // e2 = e1 * e1 in f7
 235   (p6) fma.s1 f7=f7,f7,f0
 236   nop.i 0;;
 237 }
 238
 239  {.mmi
 240   // q<1/4 ? (i.e. expon< -2)
 241   (p7) cmp.gt.unc p7,p0=r28,r29
 242   nop.m 0
 243   // r2=0x3e7000000
 244   shl r2=r2,24;;
 245 }
 246
 247 {.mfb
 248   // r2=0x3e7000001
 249   add r2=1,r2
 250  // if |a/b|<1/4, set D flag before returning
 251  (p7) fma.s.s0 f9=f9,f0,f8
 252   nop.b 0;;
 253 }
 254  {.mfb
 255  nop.m 0
 256  // can be combined with bundle above if sign of 0 or
 257  // FTZ enabled are not important
 258  (p7) fmerge.s f8=f8,f9
 259  // return if |a|<4*|b| (estimated quotient < 1/4)
 260  (p7) br.ret.spnt b0;;
 261 }
 262   {.mfi
 263   nop.m 0
 264   // set f8 to current a value | sign
 265   fmerge.s f8=f8,f13
 266   // r2=2^{-24}+2^{-48} (double prec.)
 267   shl r2=r2,28;;
 268 }
 269
 270
 271 { .mfi
 272   // r29= -32+bias
 273   mov r29=0xffdf
 274   // Step (8)
 275   // q3 = q2 + e2 * q2 in f6
 276   (p6) fma.d.s1 f6=f7,f6,f6
 277   nop.i 0;;
 278 }
 279 { .mfi
 280   nop.m 0
 281   // Step (9)
 282   // q = q3 in f11
 283   (p6) fma.s.s1 f11=f6,f1,f0
 284   nop.i 0;;
 285 }
 286   {.mfi
 287   // f7=2^{-24}
 288   setf.d f7=r2
 289   // last step ? (q3<2^{24}-2 --> q<2^{24})
 290   fcmp.lt.unc.s1 p0,p12=f6,f12
 291   nop.i 0
 292 } {.mfi
 293   // f12=2^{-32}
 294    setf.exp f12=r29
 295    nop.f 0
 296    nop.i 0;;
 297 }
 298   {.mfi
 299   nop.m 0
 300   // r=a-b*q
 301   fnma.s1 f6=f14,f11,f13
 302   nop.i 0
 303 }
 304 {.mfi
 305   nop.m 0
 306   // q'=q-q*(1.25*2^{-24})   (q'=q-ulp)
 307   fnma.s.s1 f15=f11,f15,f11
 308   nop.i 0;;
 309 }
 310
 311   {.mfi
 312   nop.m 0
 313   // r2=a-b*q'
 314   fnma.s1 f13=f14,f15,f13
 315   nop.i 0;;
 316 }
 317   {.mfi
 318   nop.m 0
 319   // r>0 iff q=RZ(a/b) and inexact
 320   fcmp.gt.unc.s1 p8,p0=f6,f0
 321   nop.i 0
 322 }
 323 {.mfi
 324   nop.m 0
 325   // r<0 iff q'=RZ(a/b) and inexact
 326   fcmp.lt.unc.s1 p9,p10=f6,f0
 327   nop.i 0;;
 328 }
 329 .pred.rel "mutex",p8,p9
 330   {.mfi
 331   nop.m 0
 332   // (p8) Q=q+(last iteration ? sticky bits:0)
 333   // i.e. Q=q+q*x  (x=2^{-32} or 0)
 334   (p8) fma.s1 f11=f11,f12,f11
 335   nop.i 0
 336 }
 337 {.mfi
 338   nop.m 0
 339   // (p9) Q=q'+(last iteration ? sticky bits:0)
 340   // i.e. Q=q'+q'*x  (x=2^{-24} or 0: if expon. difference=23, want to round back to q)
 341   (p9) fma.s1 f11=f15,f7,f15
 342   nop.i 0;;
 343 }
 344
 345   {.mfb
 346   nop.m 0
 347   // (p9) set r=r2 (new a, if not last iteration)
 348   // (p10) new a =r
 349   (p10) mov f13=f6
 350   (p12) br.cond.sptk L(remloop24);;
 351 }
 352
 353 // last iteration
 354   {.mfi
 355   nop.m 0
 356   // set f9=|b|*sgn(a)
 357   fmerge.s f9=f8,f9
 358   nop.i 0
 359 }
 360   {.mfi
 361   nop.m 0
 362   // round to integer
 363   fcvt.fx.s1 f11=f11
 364   nop.i 0;;
 365 }
 366   {.mfi
 367   nop.m 0
 368   // save sign of a
 369   fmerge.s f7=f8,f8
 370   nop.i 0
 371 }
 372 {.mfi
 373   nop.m 0
 374   // normalize
 375   fcvt.xf f11=f11
 376   nop.i 0;;
 377 }
 378   {.mfi
 379   nop.m 0
 380   // This can be removed if sign of 0 is not important
 381   // get remainder using sf1
 382   fnma.s.s1 f12=f9,f11,f8
 383   nop.i 0
 384 }
 385   {.mfi
 386   nop.m 0
 387   // get remainder
 388   fnma.s.s0 f8=f9,f11,f8
 389   nop.i 0;;
 390 }
 391
 392
 393
 394   {.mfi
 395   nop.m 0
 396   // f12=0?
 397   // This can be removed if sign of 0 is not important
 398   fcmp.eq.unc.s1 p8,p0=f12,f0
 399   nop.i 0;;
 400 }
 401   {.mfb
 402   nop.m 0
 403   // if f8=0, set sign correctly
 404   // This can be removed if sign of 0 is not important
 405   (p8) fmerge.s f8=f7,f8
 406   // return
 407   br.ret.sptk b0;;
 408 }
 409
 410
 411 L(FREM_X_NAN_INF):
 412
 413 // Y zero ?
 414 {.mfi
 415   nop.m 0
 416   fma.s1 f10=f9,f1,f0
 417   nop.i 0;;
 418 }
 419 {.mfi
 420  nop.m 0
 421  fcmp.eq.unc.s1 p11,p0=f10,f0
 422  nop.i 0;;
 423 }
 424 {.mib
 425   nop.m 0
 426   nop.i 0
 427   // if Y zero
 428   (p11) br.cond.spnt L(FREM_Y_ZERO);;
 429 }
 430
 431 // X infinity? Return QNAN indefinite
 432 { .mfi
 433       nop.m 999
 434 (p0)  fclass.m.unc  p8,p0 = f8, 0x23
 435       nop.i 999
 436 }
 437 // X infinity? Return QNAN indefinite
 438 { .mfi
 439       nop.m 999
 440 (p0)  fclass.m.unc  p11,p0 = f8, 0x23
 441       nop.i 999;;
 442 }
 443 // Y NaN ?
 444 {.mfi
 445          nop.m 999
 446 (p8) fclass.m.unc p0,p8=f9,0xc3
 447          nop.i 0;;
 448 }
 449 {.mfi
 450         nop.m 999
 451         // also set Denormal flag if necessary
 452 (p8) fma.s0 f9=f9,f1,f0
 453     nop.i 0
 454 }
 455 { .mfi
 456       nop.m 999
 457 (p8)  frcpa.s0 f8,p7 = f8,f8
 458       nop.i 999 ;;
 459 }
 460
 461 {.mfi
 462       nop.m 999
 463 (p11) mov f10=f8
 464           nop.i 0
 465 }
 466 { .mfi
 467       nop.m 999
 468 (p8) fma.s f8=f8,f1,f0
 469           nop.i 0 ;;
 470 }
 471
 472 { .mfb
 473       nop.m 999
 474       frcpa.s0 f8,p7=f8,f9
 475           (p11) br.cond.spnt L(EXP_ERROR_RETURN);;
 476 }
 477 { .mib
 478         nop.m 0
 479         nop.i 0
 480         br.ret.spnt    b0 ;;
 481 }
 482
 483
 484 L(FREM_Y_NAN_INF_ZERO):
 485
 486 // Y INF
 487 { .mfi
 488       nop.m 999
 489 (p0)  fclass.m.unc  p7,p0 = f9, 0x23
 490       nop.i 999 ;;
 491 }
 492
 493 { .mfb
 494       nop.m 999
 495 (p7)  fma.s f8=f8,f1,f0
 496 (p7)  br.ret.spnt    b0 ;;
 497 }
 498
 499 // Y NAN?
 500 { .mfi
 501       nop.m 999
 502 (p0)  fclass.m.unc  p9,p0 = f9, 0xc3
 503       nop.i 999 ;;
 504 }
 505
 506 { .mfb
 507       nop.m 999
 508 (p9)  fma.s f8=f9,f1,f0
 509 (p9)  br.ret.spnt    b0 ;;
 510 }
 511
 512 L(FREM_Y_ZERO):
 513 // Y zero? Must be zero at this point
 514 // because it is the only choice left.
 515 // Return QNAN indefinite
 516
 517 // X NAN?
 518 { .mfi
 519       nop.m 999
 520 (p0)  fclass.m.unc  p9,p10 = f8, 0xc3
 521       nop.i 999 ;;
 522 }
 523 { .mfi
 524       nop.m 999
 525 (p10)  fclass.nm  p9,p10 = f8, 0xff
 526       nop.i 999 ;;
 527 }
 528
 529 {.mfi
 530  nop.m 999
 531  (p9) frcpa f11,p7=f8,f0
 532  nop.i 0;;
 533 }
 534
 535 { .mfi
 536       nop.m 999
 537 (p10)  frcpa         f11,p7 = f0,f0
 538 nop.i 999;;
 539 }
 540
 541 { .mfi
 542       nop.m 999
 543 (p0)  fmerge.s      f10 = f8, f8
 544       nop.i 999
 545 }
 546
 547 { .mfi
 548       nop.m 999
 549 (p0)  fma.s f8=f11,f1,f0
 550       nop.i 999
 551 }
 552
 553
 554 L(EXP_ERROR_RETURN):
 555
 556 { .mib
 557 (p0)  mov   GR_Parameter_TAG = 125
 558           nop.i 999
 559 (p0)  br.sptk __libm_error_region;;
 560 }
 561
 562 .endp remainderf
 563 ASM_SIZE_DIRECTIVE(remainderf)
 564 #ifdef _LIBC
 565 ASM_SIZE_DIRECTIVE(__remainderf)
 566 #endif
 567
 568
 569
 570 .proc __libm_error_region
 571 __libm_error_region:
 572 .prologue
 573 { .mfi
 574         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
 575         nop.f 0
 576 .save   ar.pfs,GR_SAVE_PFS
 577         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 578 }
 579 { .mfi
 580 .fframe 64
 581         add sp=-64,sp                           // Create new stack
 582         nop.f 0
 583         mov GR_SAVE_GP=gp                       // Save gp
 584 };;
 585 { .mmi
 586         stfs [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
 587         add GR_Parameter_X = 16,sp              // Parameter 1 address
 588 .save   b0, GR_SAVE_B0
 589         mov GR_SAVE_B0=b0                       // Save b0
 590 };;
 591 .body
 592 { .mib
 593         stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
 594         add   GR_Parameter_RESULT = 0,GR_Parameter_Y
 595         nop.b 0                                 // Parameter 3 address
 596 }
 597 { .mib
 598         stfs [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
 599         add   GR_Parameter_Y = -16,GR_Parameter_Y
 600         br.call.sptk b0=__libm_error_support#;;  // Call error handling function
 601 }
 602 { .mmi
 603         nop.m 0
 604         nop.m 0
 605         add   GR_Parameter_RESULT = 48,sp
 606 };;
 607 { .mmi
 608         ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 609 .restore sp
 610         add   sp = 64,sp                       // Restore stack pointer
 611         mov   b0 = GR_SAVE_B0                  // Restore return address
 612 };;
 613 { .mib
 614         mov   gp = GR_SAVE_GP                  // Restore gp
 615         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
 616         br.ret.sptk     b0                     // Return
 617 };;
 618
 619 .endp __libm_error_region
 620 ASM_SIZE_DIRECTIVE(__libm_error_region)
 621
 622
 623 .type   __libm_error_support#,@function
 624 .global __libm_error_support#