sysdeps/ia64/fpu/s_libm_ldexpf.S

   1 .file "libm_ldexpf.s"
   2
   3
   4 // Copyright (c) 2000 - 2003, Intel Corporation
   5 // All rights reserved.
   6 //
   7 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
   8 //
   9 // Redistribution and use in source and binary forms, with or without
  10 // modification, are permitted provided that the following conditions are
  11 // met:
  12 //
  13 // * Redistributions of source code must retain the above copyright
  14 // notice, this list of conditions and the following disclaimer.
  15 //
  16 // * Redistributions in binary form must reproduce the above copyright
  17 // notice, this list of conditions and the following disclaimer in the
  18 // documentation and/or other materials provided with the distribution.
  19 //
  20 // * The name of Intel Corporation may not be used to endorse or promote
  21 // products derived from this software without specific prior written
  22 // permission.
  23
  24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35 //
  36 // Intel Corporation is the author of this code, and requests that all
  37 // problem reports or change requests be submitted to it directly at
  38 // http://www.intel.com/software/products/opensource/libraries/num.htm.
  39 //
  40 // History
  41 //==============================================================
  42 // 02/02/00 Initial version
  43 // 01/26/01 ldexpf completely reworked and now standalone version
  44 // 01/04/02 Added handling for int 32 or 64 bits
  45 // 05/20/02 Cleaned up namespace and sf0 syntax
  46 // 02/10/03 Reordered header: .section, .global, .proc, .align
  47 // 08/04/03 Improved performance
  48 //
  49 // API
  50 //==============================================================
  51 // float __libm_ldexpf  (float x, int n, int int_type)
  52 // input  floating point f8 and int n (r33), int int_type (r34)
  53 // output floating point f8
  54 //
  55 // int_type = 0 if int is 32 bits
  56 // int_type = 1 if int is 64 bits
  57 //
  58 // Returns x* 2**n using an fma and detects overflow
  59 // and underflow.
  60 //
  61 //
  62 // Strategy:
  63 //  Compute biased exponent of result exp_Result = N + exp_X
  64 //  Break into ranges:
  65 //   exp_Result > 0x1007e                 -> Certain overflow
  66 //   exp_Result = 0x1007e                 -> Possible overflow
  67 //   0x0ff81 <= exp_Result < 0x1007e      -> No over/underflow (main path)
  68 //   0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
  69 //   exp_Result < 0x0ff81 - 23            -> Certain underflow
  70
  71 FR_Big         = f6
  72 FR_NBig        = f7
  73 FR_Floating_X  = f8
  74 FR_Result      = f8
  75 FR_Result2     = f9
  76 FR_Result3     = f10
  77 FR_Norm_X      = f11
  78 FR_Two_N       = f12
  79
  80 GR_neg_ov_limit= r14
  81 GR_N_Biased    = r15
  82 GR_Big         = r16
  83 GR_NBig        = r17
  84 GR_exp_Result  = r18
  85 GR_pos_ov_limit= r19
  86 GR_Bias        = r20
  87 GR_N_as_int    = r21
  88 GR_signexp_X   = r22
  89 GR_exp_X       = r23
  90 GR_exp_mask    = r24
  91 GR_max_exp     = r25
  92 GR_min_exp     = r26
  93 GR_min_den_exp = r27
  94
  95 GR_SAVE_B0          = r32
  96 GR_SAVE_GP          = r33
  97 GR_SAVE_PFS         = r34
  98 GR_Parameter_X      = r35
  99 GR_Parameter_Y      = r36
 100 GR_Parameter_RESULT = r37
 101 GR_Tag              = r38
 102
 103 .section .text
 104 GLOBAL_LIBM_ENTRY(__libm_ldexpf)
 105
 106 //
 107 //   Is x NAN, INF, ZERO, +-?
 108 //   Build the exponent Bias
 109 //
 110 {    .mfi
 111      getf.exp      GR_signexp_X = FR_Floating_X // Get signexp of x
 112      fclass.m      p6,p0 = FR_Floating_X, 0xe7  // @snan | @qnan | @inf | @zero
 113      mov           GR_Bias = 0x0ffff
 114 }
 115 //
 116 //   Normalize x
 117 //   Is integer type 32 bits?
 118 //
 119 {    .mfi
 120      mov           GR_Big = 35000      // If N this big then certain overflow
 121      fnorm.s1      FR_Norm_X = FR_Floating_X
 122      cmp.eq        p8,p9 = r34,r0
 123 }
 124 ;;
 125
 126 //   Sign extend N if int is 32 bits
 127 {    .mfi
 128 (p9) mov           GR_N_as_int = r33     // Copy N if int is 64 bits
 129      fclass.m      p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
 130 (p8) sxt4          GR_N_as_int = r33     // Sign extend N if int is 32 bits
 131 }
 132 { .mfi
 133      mov           GR_NBig = -35000    // If N this small then certain underflow
 134      nop.f         0
 135      mov           GR_max_exp = 0x1007e      // Exponent of maximum float
 136 }
 137 ;;
 138
 139 //   Create biased exponent for 2**N
 140 {    .mfi
 141      add           GR_N_Biased = GR_Bias,GR_N_as_int
 142      nop.f         0
 143      cmp.ge        p7, p0 = GR_N_as_int, GR_Big  // Certain overflow?
 144 }
 145 {    .mib
 146      cmp.le        p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
 147      mov           GR_min_exp = 0x0ff81      // Exponent of minimum float
 148 (p9) br.cond.spnt  LDEXPF_UNORM              // Branch if x=unorm
 149 }
 150 ;;
 151
 152 LDEXPF_COMMON:
 153 // Main path continues.  Also return here from x=unorm path.
 154 //   Create 2**N
 155 .pred.rel "mutex",p7,p8
 156 {    .mfi
 157      setf.exp      FR_Two_N = GR_N_Biased
 158      nop.f         0
 159 (p7) mov           GR_N_as_int = GR_Big      // Limit max N
 160 }
 161 {    .mfi
 162 (p8) mov           GR_N_as_int = GR_NBig     // Limit min N
 163      nop.f         0
 164 (p8) cmp.eq        p7,p0 = r0,r0             // Set p7 if |N| big
 165 }
 166 ;;
 167
 168 //
 169 //   Create biased exponent for 2**N for N big
 170 //   Is N zero?
 171 //
 172 {    .mfi
 173 (p7) add           GR_N_Biased = GR_Bias,GR_N_as_int
 174      nop.f         0
 175      cmp.eq.or     p6,p0 = r33,r0
 176 }
 177 {    .mfi
 178      mov           GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
 179      nop.f         0
 180      mov           GR_exp_mask = 0x1ffff     // Exponent mask
 181 }
 182 ;;
 183
 184 //
 185 //   Create 2**N for N big
 186 //   Return x when N = 0 or X = Nan, Inf, Zero
 187 //
 188 {    .mfi
 189 (p7) setf.exp      FR_Two_N = GR_N_Biased
 190      nop.f         0
 191      mov           GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
 192 }
 193 {    .mfb
 194      and           GR_exp_X = GR_exp_mask, GR_signexp_X
 195 (p6) fma.s.s0      FR_Result = FR_Floating_X, f1, f0
 196 (p6) br.ret.spnt   b0
 197 }
 198 ;;
 199
 200 //
 201 //   Raise Denormal operand flag with compare
 202 //   Compute biased result exponent
 203 //
 204 {    .mfi
 205      add           GR_exp_Result = GR_exp_X, GR_N_as_int
 206      fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
 207      mov           GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
 208 }
 209 ;;
 210
 211 //
 212 //   Do final operation
 213 //
 214 {    .mfi
 215      cmp.lt        p7,p6 = GR_exp_Result, GR_max_exp  // Test no overflow
 216      fma.s.s0      FR_Result = FR_Two_N,FR_Norm_X,f0
 217      cmp.lt        p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
 218 }
 219 {    .mfb
 220      nop.m         0
 221      nop.f         0
 222 (p9) br.cond.spnt  LDEXPF_UNDERFLOW           // Branch if certain underflow
 223 }
 224 ;;
 225
 226 {    .mib
 227 (p6) cmp.gt.unc    p6,p8 = GR_exp_Result, GR_max_exp  // Test sure overflow
 228 (p7) cmp.ge.unc    p7,p9 = GR_exp_Result, GR_min_exp  // Test no over/underflow
 229 (p7) br.ret.sptk   b0                         // Return from main path
 230 }
 231 ;;
 232
 233 {    .bbb
 234 (p6) br.cond.spnt  LDEXPF_OVERFLOW            // Branch if certain overflow
 235 (p8) br.cond.spnt  LDEXPF_POSSIBLE_OVERFLOW   // Branch if possible overflow
 236 (p9) br.cond.spnt  LDEXPF_POSSIBLE_UNDERFLOW  // Branch if possible underflow
 237 }
 238 ;;
 239
 240 // Here if possible underflow.
 241 // Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
 242 LDEXPF_POSSIBLE_UNDERFLOW:
 243 //
 244 // Here if possible overflow.
 245 // Resulting exponent: 0x1007e = exp_Result
 246 LDEXPF_POSSIBLE_OVERFLOW:
 247
 248 //   Set up necessary status fields
 249 //
 250 //   S0 user supplied status
 251 //   S2 user supplied status + WRE + TD  (Overflows)
 252 //   S3 user supplied status + FZ + TD   (Underflows)
 253 //
 254 {    .mfi
 255      nop.m         0
 256      fsetc.s3      0x7F,0x41
 257      nop.i         0
 258 }
 259 {    .mfi
 260      nop.m         0
 261      fsetc.s2      0x7F,0x42
 262      nop.i         0
 263 }
 264 ;;
 265
 266 //
 267 //   Do final operation with s2 and s3
 268 //
 269 {    .mfi
 270      setf.exp      FR_NBig = GR_neg_ov_limit
 271      fma.s.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0
 272      nop.i         0
 273 }
 274 {    .mfi
 275      setf.exp      FR_Big = GR_pos_ov_limit
 276      fma.s.s2      FR_Result2 = FR_Two_N,FR_Norm_X,f0
 277      nop.i         0
 278 }
 279 ;;
 280
 281 //   Check for overflow or underflow.
 282 //   Restore s3
 283 //   Restore s2
 284 //
 285 {    .mfi
 286      nop.m         0
 287      fsetc.s3      0x7F,0x40
 288      nop.i         0
 289 }
 290 {    .mfi
 291      nop.m         0
 292      fsetc.s2      0x7F,0x40
 293      nop.i         0
 294 }
 295 ;;
 296
 297 //
 298 //   Is the result zero?
 299 //
 300 {    .mfi
 301      nop.m         0
 302      fclass.m      p6, p0 =  FR_Result3, 0x007
 303      nop.i         0
 304 }
 305 {    .mfi
 306      nop.m         0
 307      fcmp.ge.s1    p7, p8 = FR_Result2 , FR_Big
 308      nop.i         0
 309 }
 310 ;;
 311
 312 //
 313 //   Detect masked underflow - Tiny + Inexact Only
 314 //
 315 {    .mfi
 316      nop.m         0
 317 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
 318      nop.i         0
 319 }
 320 ;;
 321
 322 //
 323 //   Is result bigger the allowed range?
 324 //   Branch out for underflow
 325 //
 326 {    .mfb
 327      nop.m          0
 328 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
 329 (p6) br.cond.spnt   LDEXPF_UNDERFLOW
 330 }
 331 ;;
 332
 333 //
 334 //   Branch out for overflow
 335 //
 336 { .bbb
 337 (p7) br.cond.spnt   LDEXPF_OVERFLOW
 338 (p9) br.cond.spnt   LDEXPF_OVERFLOW
 339      br.ret.sptk    b0             //   Return from main path.
 340 }
 341 ;;
 342
 343 // Here if result overflows
 344 LDEXPF_OVERFLOW:
 345 { .mib
 346      alloc         r32=ar.pfs,3,0,4,0
 347      addl          GR_Tag = 148, r0    // Set error tag for overflow
 348      br.cond.sptk  __libm_error_region // Call error support for overflow
 349 }
 350 ;;
 351
 352 // Here if result underflows
 353 LDEXPF_UNDERFLOW:
 354 { .mib
 355      alloc         r32=ar.pfs,3,0,4,0
 356      addl          GR_Tag = 149, r0    // Set error tag for underflow
 357      br.cond.sptk  __libm_error_region // Call error support for underflow
 358 }
 359 ;;
 360
 361 // Here if x=unorm
 362 LDEXPF_UNORM:
 363 { .mib
 364      getf.exp      GR_signexp_X = FR_Norm_X // Get signexp of normalized x
 365      nop.i         0
 366      br.cond.sptk  LDEXPF_COMMON            // Return to main path
 367 }
 368 ;;
 369
 370
 371 GLOBAL_LIBM_END(__libm_ldexpf)
 372 LOCAL_LIBM_ENTRY(__libm_error_region)
 373
 374 //
 375 // Get stack address of N
 376 //
 377 .prologue
 378 { .mfi
 379     add   GR_Parameter_Y=-32,sp
 380     nop.f 0
 381 .save   ar.pfs,GR_SAVE_PFS
 382     mov  GR_SAVE_PFS=ar.pfs
 383 }
 384 //
 385 // Adjust sp
 386 //
 387 { .mfi
 388 .fframe 64
 389    add sp=-64,sp
 390    nop.f 0
 391    mov GR_SAVE_GP=gp
 392 };;
 393
 394 //
 395 //  Store N on stack in correct position
 396 //  Locate the address of x on stack
 397 //
 398 { .mmi
 399    st8 [GR_Parameter_Y] =  GR_N_as_int,16
 400    add GR_Parameter_X = 16,sp
 401 .save   b0, GR_SAVE_B0
 402    mov GR_SAVE_B0=b0
 403 };;
 404
 405 //
 406 // Store x on the stack.
 407 // Get address for result on stack.
 408 //
 409 .body
 410 { .mib
 411    stfs [GR_Parameter_X] = FR_Norm_X
 412    add   GR_Parameter_RESULT = 0,GR_Parameter_Y
 413    nop.b 0
 414 }
 415 { .mib
 416    stfs [GR_Parameter_Y] = FR_Result
 417    add   GR_Parameter_Y = -16,GR_Parameter_Y
 418    br.call.sptk b0=__libm_error_support#
 419 };;
 420
 421 //
 422 //  Get location of result on stack
 423 //
 424 { .mmi
 425    add   GR_Parameter_RESULT = 48,sp
 426    nop.m 0
 427    nop.i 0
 428 };;
 429
 430 //
 431 //  Get the new result
 432 //
 433 { .mmi
 434    ldfs  FR_Result = [GR_Parameter_RESULT]
 435 .restore sp
 436    add   sp = 64,sp
 437    mov   b0 = GR_SAVE_B0
 438 };;
 439
 440 //
 441 //  Restore gp, ar.pfs and return
 442 //
 443 { .mib
 444    mov   gp = GR_SAVE_GP
 445    mov   ar.pfs = GR_SAVE_PFS
 446    br.ret.sptk     b0
 447 };;
 448
 449 LOCAL_LIBM_END(__libm_error_region)
 450
 451 .type   __libm_error_support#,@function
 452 .global __libm_error_support#