sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S

   1 /* nearbyint long double.
   2    IBM extended format long double version.
   3    Copyright (C) 2004, 2006 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 #include <sysdep.h>
  22 #include <math_ldbl_opt.h>
  23
  24         .section        ".toc","aw"
  25 .LC0:   /* 2**52 */
  26         .tc FD_43300000_0[TC],0x4330000000000000
  27         .section        ".text"
  28
  29 /* long double [fp1,fp2] nearbyintl (long double x [fp1,fp2])
  30    IEEE 1003.1 nearbyintl function.  nearbyintl is simular to the rintl
  31    but does raise the "inexact" exception.  This implementation is
  32    based on rintl but explicitly maskes the inexact exception on entry
  33    and clears any pending inexact before restoring the exception mask
  34    on exit.
  35
  36    PowerPC64 long double uses the IBM extended format which is
  37    represented two 64-floating point double values. The values are
  38    non-overlapping giving an effective precision of 106 bits. The first
  39    double contains the high order bits of mantisa and is always rounded
  40    to represent a normal rounding of long double to double. Since the
  41    long double value is sum of the high and low values, the low double
  42    normally has the opposite sign to compensate for the this rounding.
  43
  44    For long double there are two cases:
  45    1) |x| < 2**52, all the integer bits are in the high double.
  46       floor the high double and set the low double to -0.0.
  47    2) |x| >= 2**52, Rounding involves both doubles.
  48       See the comment before lable .L2 for details.
  49    */
  50 ENTRY (__nearbyintl)
  51         mffs    fp11            /* Save current FPSCR.  */
  52         lfd     fp13,.LC0@toc(2)
  53         fabs    fp0,fp1
  54         mtfsb0  28              /* Disable "inexact" exceptions.  */
  55         fsub    fp12,fp13,fp13  /* generate 0.0  */
  56         fabs    fp9,fp2
  57         fcmpu   cr7,fp0,fp13    /* if (fabs(x) > TWO52)  */
  58         fcmpu   cr6,fp1,fp12    /* if (x > 0.0)  */
  59         bnl-    cr7,.L2
  60         fmr     fp2,fp12
  61         bng-    cr6,.L4
  62         fadd    fp1,fp1,fp13    /* x+= TWO52;  */
  63         fsub    fp1,fp1,fp13    /* x-= TWO52;  */
  64         b       .L9
  65 .L4:
  66         bnl-    cr6,.L9         /* if (x < 0.0)  */
  67         fsub    fp1,fp13,fp1    /* x = TWO52 - x;  */
  68         fsub    fp0,fp1,fp13    /* x = - (x - TWO52);  */
  69         fneg    fp1,fp0
  70 .L9:
  71         mtfsb0  6               /* Clear any pending "inexact" exceptions.  */
  72         mtfsf   0x01,fp11       /* restore exception mask.  */
  73         blr
  74
  75 /* The high double is > TWO52 so we need to round the low double and
  76    perhaps the high double.  This gets a bit tricky so we use the
  77    following algorithm:
  78
  79    tau = floor(x_high/TWO52);
  80    x0 = x_high - tau;
  81    x1 = x_low + tau;
  82    r1 = nearbyint(x1);
  83    y_high = x0 + r1;
  84    y_low = r1 - tau;
  85    return y;  */
  86 .L2:
  87         fcmpu   cr7,fp9,fp13    /* if (|x_low| > TWO52)  */
  88         fcmpu   cr0,fp9,fp12    /* || (|x_low| == 0.0)  */
  89         bge-    cr7,.L9         /*   return x;  */
  90         beq-  cr0,.L9
  91         fdiv    fp8,fp1,fp13    /* x_high/TWO52  */
  92         fctidz  fp0,fp8
  93         fcfid   fp8,fp0         /* tau = floor(x_high/TWO52);  */
  94         fsub    fp3,fp1,fp8     /* x0 = x_high - tau;  */
  95         fadd    fp4,fp2,fp8     /* x1 = x_low + tau;  */
  96
  97         fcmpu   cr6,fp4,fp12    /* if (x1 > 0.0)  */
  98         bng-    cr6,.L8
  99         fadd    fp5,fp4,fp13    /* r1 = x1 + TWO52;  */
 100         fsub    fp5,fp5,fp13    /* r1 = r1 - TWO52;  */
 101         b       .L6
 102 .L8:
 103         fmr     fp5,fp4
 104         bge-    cr6,.L6         /* if (x1 < 0.0)  */
 105         fsub    fp5,fp13,fp4    /* r1 = TWO52 - x1;  */
 106         fsub    fp0,fp5,fp13    /* r1 = - (r1 - TWO52);  */
 107         fneg    fp5,fp0
 108 .L6:
 109         fadd    fp1,fp3,fp5     /* y_high = x0 + r1;  */
 110         fsub    fp2,fp5,fp8     /* y_low = r1 - tau;  */
 111         b       .L9
 112 END (__nearbyintl)
 113
 114 long_double_symbol (libm, __nearbyintl, nearbyintl)