sysdeps/ia64/fpu/s_libm_ldexpf.S

   1 .file "libm_ldexpf.s"
   2
   3
   4 // Copyright (c) 2000 - 2003, Intel Corporation
   5 // All rights reserved.
   6 //
   7 //
   8 // Redistribution and use in source and binary forms, with or without
   9 // modification, are permitted provided that the following conditions are
  10 // met:
  11 //
  12 // * Redistributions of source code must retain the above copyright
  13 // notice, this list of conditions and the following disclaimer.
  14 //
  15 // * Redistributions in binary form must reproduce the above copyright
  16 // notice, this list of conditions and the following disclaimer in the
  17 // documentation and/or other materials provided with the distribution.
  18 //
  19 // * The name of Intel Corporation may not be used to endorse or promote
  20 // products derived from this software without specific prior written
  21 // permission.
  22
  23 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  24 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  25 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  26 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  27 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  28 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  29 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  30 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  31 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  32 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  33 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  34 //
  35 // Intel Corporation is the author of this code, and requests that all
  36 // problem reports or change requests be submitted to it directly at
  37 // http://www.intel.com/software/products/opensource/libraries/num.htm.
  38 //
  39 // History
  40 //==============================================================
  41 // 02/02/00 Initial version
  42 // 01/26/01 ldexpf completely reworked and now standalone version
  43 // 01/04/02 Added handling for int 32 or 64 bits
  44 // 05/20/02 Cleaned up namespace and sf0 syntax
  45 // 02/10/03 Reordered header: .section, .global, .proc, .align
  46 // 08/04/03 Improved performance
  47 //
  48 // API
  49 //==============================================================
  50 // float __libm_ldexpf  (float x, int n, int int_type)
  51 // input  floating point f8 and int n (r33), int int_type (r34)
  52 // output floating point f8
  53 //
  54 // int_type = 0 if int is 32 bits
  55 // int_type = 1 if int is 64 bits
  56 //
  57 // Returns x* 2**n using an fma and detects overflow
  58 // and underflow.
  59 //
  60 //
  61 // Strategy:
  62 //  Compute biased exponent of result exp_Result = N + exp_X
  63 //  Break into ranges:
  64 //   exp_Result > 0x1007e                 -> Certain overflow
  65 //   exp_Result = 0x1007e                 -> Possible overflow
  66 //   0x0ff81 <= exp_Result < 0x1007e      -> No over/underflow (main path)
  67 //   0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
  68 //   exp_Result < 0x0ff81 - 23            -> Certain underflow
  69
  70 FR_Big         = f6
  71 FR_NBig        = f7
  72 FR_Floating_X  = f8
  73 FR_Result      = f8
  74 FR_Result2     = f9
  75 FR_Result3     = f10
  76 FR_Norm_X      = f11
  77 FR_Two_N       = f12
  78
  79 GR_neg_ov_limit= r14
  80 GR_N_Biased    = r15
  81 GR_Big         = r16
  82 GR_NBig        = r17
  83 GR_exp_Result  = r18
  84 GR_pos_ov_limit= r19
  85 GR_Bias        = r20
  86 GR_N_as_int    = r21
  87 GR_signexp_X   = r22
  88 GR_exp_X       = r23
  89 GR_exp_mask    = r24
  90 GR_max_exp     = r25
  91 GR_min_exp     = r26
  92 GR_min_den_exp = r27
  93
  94 GR_SAVE_B0          = r32
  95 GR_SAVE_GP          = r33
  96 GR_SAVE_PFS         = r34
  97 GR_Parameter_X      = r35
  98 GR_Parameter_Y      = r36
  99 GR_Parameter_RESULT = r37
 100 GR_Tag              = r38
 101
 102 .section .text
 103 GLOBAL_LIBM_ENTRY(__libm_ldexpf)
 104
 105 //
 106 //   Is x NAN, INF, ZERO, +-?
 107 //   Build the exponent Bias
 108 //
 109 {    .mfi
 110      getf.exp      GR_signexp_X = FR_Floating_X // Get signexp of x
 111      fclass.m      p6,p0 = FR_Floating_X, 0xe7  // @snan | @qnan | @inf | @zero
 112      mov           GR_Bias = 0x0ffff
 113 }
 114 //
 115 //   Normalize x
 116 //   Is integer type 32 bits?
 117 //
 118 {    .mfi
 119      mov           GR_Big = 35000      // If N this big then certain overflow
 120      fnorm.s1      FR_Norm_X = FR_Floating_X
 121      cmp.eq        p8,p9 = r34,r0
 122 }
 123 ;;
 124
 125 //   Sign extend N if int is 32 bits
 126 {    .mfi
 127 (p9) mov           GR_N_as_int = r33     // Copy N if int is 64 bits
 128      fclass.m      p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
 129 (p8) sxt4          GR_N_as_int = r33     // Sign extend N if int is 32 bits
 130 }
 131 { .mfi
 132      mov           GR_NBig = -35000    // If N this small then certain underflow
 133      nop.f         0
 134      mov           GR_max_exp = 0x1007e      // Exponent of maximum float
 135 }
 136 ;;
 137
 138 //   Create biased exponent for 2**N
 139 {    .mfi
 140      add           GR_N_Biased = GR_Bias,GR_N_as_int
 141      nop.f         0
 142      cmp.ge        p7, p0 = GR_N_as_int, GR_Big  // Certain overflow?
 143 }
 144 {    .mib
 145      cmp.le        p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
 146      mov           GR_min_exp = 0x0ff81      // Exponent of minimum float
 147 (p9) br.cond.spnt  LDEXPF_UNORM              // Branch if x=unorm
 148 }
 149 ;;
 150
 151 LDEXPF_COMMON:
 152 // Main path continues.  Also return here from x=unorm path.
 153 //   Create 2**N
 154 .pred.rel "mutex",p7,p8
 155 {    .mfi
 156      setf.exp      FR_Two_N = GR_N_Biased
 157      nop.f         0
 158 (p7) mov           GR_N_as_int = GR_Big      // Limit max N
 159 }
 160 {    .mfi
 161 (p8) mov           GR_N_as_int = GR_NBig     // Limit min N
 162      nop.f         0
 163 (p8) cmp.eq        p7,p0 = r0,r0             // Set p7 if |N| big
 164 }
 165 ;;
 166
 167 //
 168 //   Create biased exponent for 2**N for N big
 169 //   Is N zero?
 170 //
 171 {    .mfi
 172 (p7) add           GR_N_Biased = GR_Bias,GR_N_as_int
 173      nop.f         0
 174      cmp.eq.or     p6,p0 = r33,r0
 175 }
 176 {    .mfi
 177      mov           GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
 178      nop.f         0
 179      mov           GR_exp_mask = 0x1ffff     // Exponent mask
 180 }
 181 ;;
 182
 183 //
 184 //   Create 2**N for N big
 185 //   Return x when N = 0 or X = Nan, Inf, Zero
 186 //
 187 {    .mfi
 188 (p7) setf.exp      FR_Two_N = GR_N_Biased
 189      nop.f         0
 190      mov           GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
 191 }
 192 {    .mfb
 193      and           GR_exp_X = GR_exp_mask, GR_signexp_X
 194 (p6) fma.s.s0      FR_Result = FR_Floating_X, f1, f0
 195 (p6) br.ret.spnt   b0
 196 }
 197 ;;
 198
 199 //
 200 //   Raise Denormal operand flag with compare
 201 //   Compute biased result exponent
 202 //
 203 {    .mfi
 204      add           GR_exp_Result = GR_exp_X, GR_N_as_int
 205      fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
 206      mov           GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
 207 }
 208 ;;
 209
 210 //
 211 //   Do final operation
 212 //
 213 {    .mfi
 214      cmp.lt        p7,p6 = GR_exp_Result, GR_max_exp  // Test no overflow
 215      fma.s.s0      FR_Result = FR_Two_N,FR_Norm_X,f0
 216      cmp.lt        p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
 217 }
 218 {    .mfb
 219      nop.m         0
 220      nop.f         0
 221 (p9) br.cond.spnt  LDEXPF_UNDERFLOW           // Branch if certain underflow
 222 }
 223 ;;
 224
 225 {    .mib
 226 (p6) cmp.gt.unc    p6,p8 = GR_exp_Result, GR_max_exp  // Test sure overflow
 227 (p7) cmp.ge.unc    p7,p9 = GR_exp_Result, GR_min_exp  // Test no over/underflow
 228 (p7) br.ret.sptk   b0                         // Return from main path
 229 }
 230 ;;
 231
 232 {    .bbb
 233 (p6) br.cond.spnt  LDEXPF_OVERFLOW            // Branch if certain overflow
 234 (p8) br.cond.spnt  LDEXPF_POSSIBLE_OVERFLOW   // Branch if possible overflow
 235 (p9) br.cond.spnt  LDEXPF_POSSIBLE_UNDERFLOW  // Branch if possible underflow
 236 }
 237 ;;
 238
 239 // Here if possible underflow.
 240 // Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
 241 LDEXPF_POSSIBLE_UNDERFLOW:
 242 //
 243 // Here if possible overflow.
 244 // Resulting exponent: 0x1007e = exp_Result
 245 LDEXPF_POSSIBLE_OVERFLOW:
 246
 247 //   Set up necessary status fields
 248 //
 249 //   S0 user supplied status
 250 //   S2 user supplied status + WRE + TD  (Overflows)
 251 //   S3 user supplied status + FZ + TD   (Underflows)
 252 //
 253 {    .mfi
 254      nop.m         0
 255      fsetc.s3      0x7F,0x41
 256      nop.i         0
 257 }
 258 {    .mfi
 259      nop.m         0
 260      fsetc.s2      0x7F,0x42
 261      nop.i         0
 262 }
 263 ;;
 264
 265 //
 266 //   Do final operation with s2 and s3
 267 //
 268 {    .mfi
 269      setf.exp      FR_NBig = GR_neg_ov_limit
 270      fma.s.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0
 271      nop.i         0
 272 }
 273 {    .mfi
 274      setf.exp      FR_Big = GR_pos_ov_limit
 275      fma.s.s2      FR_Result2 = FR_Two_N,FR_Norm_X,f0
 276      nop.i         0
 277 }
 278 ;;
 279
 280 //   Check for overflow or underflow.
 281 //   Restore s3
 282 //   Restore s2
 283 //
 284 {    .mfi
 285      nop.m         0
 286      fsetc.s3      0x7F,0x40
 287      nop.i         0
 288 }
 289 {    .mfi
 290      nop.m         0
 291      fsetc.s2      0x7F,0x40
 292      nop.i         0
 293 }
 294 ;;
 295
 296 //
 297 //   Is the result zero?
 298 //
 299 {    .mfi
 300      nop.m         0
 301      fclass.m      p6, p0 =  FR_Result3, 0x007
 302      nop.i         0
 303 }
 304 {    .mfi
 305      nop.m         0
 306      fcmp.ge.s1    p7, p8 = FR_Result2 , FR_Big
 307      nop.i         0
 308 }
 309 ;;
 310
 311 //
 312 //   Detect masked underflow - Tiny + Inexact Only
 313 //
 314 {    .mfi
 315      nop.m         0
 316 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
 317      nop.i         0
 318 }
 319 ;;
 320
 321 //
 322 //   Is result bigger the allowed range?
 323 //   Branch out for underflow
 324 //
 325 {    .mfb
 326      nop.m          0
 327 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
 328 (p6) br.cond.spnt   LDEXPF_UNDERFLOW
 329 }
 330 ;;
 331
 332 //
 333 //   Branch out for overflow
 334 //
 335 { .bbb
 336 (p7) br.cond.spnt   LDEXPF_OVERFLOW
 337 (p9) br.cond.spnt   LDEXPF_OVERFLOW
 338      br.ret.sptk    b0             //   Return from main path.
 339 }
 340 ;;
 341
 342 // Here if result overflows
 343 LDEXPF_OVERFLOW:
 344 { .mib
 345      alloc         r32=ar.pfs,3,0,4,0
 346      addl          GR_Tag = 148, r0    // Set error tag for overflow
 347      br.cond.sptk  __libm_error_region // Call error support for overflow
 348 }
 349 ;;
 350
 351 // Here if result underflows
 352 LDEXPF_UNDERFLOW:
 353 { .mib
 354      alloc         r32=ar.pfs,3,0,4,0
 355      addl          GR_Tag = 149, r0    // Set error tag for underflow
 356      br.cond.sptk  __libm_error_region // Call error support for underflow
 357 }
 358 ;;
 359
 360 // Here if x=unorm
 361 LDEXPF_UNORM:
 362 { .mib
 363      getf.exp      GR_signexp_X = FR_Norm_X // Get signexp of normalized x
 364      nop.i         0
 365      br.cond.sptk  LDEXPF_COMMON            // Return to main path
 366 }
 367 ;;
 368
 369
 370 GLOBAL_LIBM_END(__libm_ldexpf)
 371 LOCAL_LIBM_ENTRY(__libm_error_region)
 372
 373 //
 374 // Get stack address of N
 375 //
 376 .prologue
 377 { .mfi
 378     add   GR_Parameter_Y=-32,sp
 379     nop.f 0
 380 .save   ar.pfs,GR_SAVE_PFS
 381     mov  GR_SAVE_PFS=ar.pfs
 382 }
 383 //
 384 // Adjust sp
 385 //
 386 { .mfi
 387 .fframe 64
 388    add sp=-64,sp
 389    nop.f 0
 390    mov GR_SAVE_GP=gp
 391 };;
 392
 393 //
 394 //  Store N on stack in correct position
 395 //  Locate the address of x on stack
 396 //
 397 { .mmi
 398    st8 [GR_Parameter_Y] =  GR_N_as_int,16
 399    add GR_Parameter_X = 16,sp
 400 .save   b0, GR_SAVE_B0
 401    mov GR_SAVE_B0=b0
 402 };;
 403
 404 //
 405 // Store x on the stack.
 406 // Get address for result on stack.
 407 //
 408 .body
 409 { .mib
 410    stfs [GR_Parameter_X] = FR_Norm_X
 411    add   GR_Parameter_RESULT = 0,GR_Parameter_Y
 412    nop.b 0
 413 }
 414 { .mib
 415    stfs [GR_Parameter_Y] = FR_Result
 416    add   GR_Parameter_Y = -16,GR_Parameter_Y
 417    br.call.sptk b0=__libm_error_support#
 418 };;
 419
 420 //
 421 //  Get location of result on stack
 422 //
 423 { .mmi
 424    add   GR_Parameter_RESULT = 48,sp
 425    nop.m 0
 426    nop.i 0
 427 };;
 428
 429 //
 430 //  Get the new result
 431 //
 432 { .mmi
 433    ldfs  FR_Result = [GR_Parameter_RESULT]
 434 .restore sp
 435    add   sp = 64,sp
 436    mov   b0 = GR_SAVE_B0
 437 };;
 438
 439 //
 440 //  Restore gp, ar.pfs and return
 441 //
 442 { .mib
 443    mov   gp = GR_SAVE_GP
 444    mov   ar.pfs = GR_SAVE_PFS
 445    br.ret.sptk     b0
 446 };;
 447
 448 LOCAL_LIBM_END(__libm_error_region)
 449
 450 .type   __libm_error_support#,@function
 451 .global __libm_error_support#