sysdeps/generic/math_private.h

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /*
  13  * from: @(#)fdlibm.h 5.1 93/09/24
  14  */
  15
  16 #ifndef _MATH_PRIVATE_H_
  17 #define _MATH_PRIVATE_H_
  18
  19 #include <endian.h>
  20 #include <stdint.h>
  21 #include <sys/types.h>
  22 #include <fenv.h>
  23 #include <float.h>
  24 #include <get-rounding-mode.h>
  25
  26 /* The original fdlibm code used statements like:
  27         n0 = ((*(int*)&one)>>29)^1;             * index of high word *
  28         ix0 = *(n0+(int*)&x);                   * high word of x *
  29         ix1 = *((1-n0)+(int*)&x);               * low word of x *
  30    to dig two 32 bit words out of the 64 bit IEEE floating point
  31    value.  That is non-ANSI, and, moreover, the gcc instruction
  32    scheduler gets it wrong.  We instead use the following macros.
  33    Unlike the original code, we determine the endianness at compile
  34    time, not at run time; I don't see much benefit to selecting
  35    endianness at run time.  */
  36
  37 /* A union which permits us to convert between a double and two 32 bit
  38    ints.  */
  39
  40 #if __FLOAT_WORD_ORDER == BIG_ENDIAN
  41
  42 typedef union
  43 {
  44   double value;
  45   struct
  46   {
  47     u_int32_t msw;
  48     u_int32_t lsw;
  49   } parts;
  50   uint64_t word;
  51 } ieee_double_shape_type;
  52
  53 #endif
  54
  55 #if __FLOAT_WORD_ORDER == LITTLE_ENDIAN
  56
  57 typedef union
  58 {
  59   double value;
  60   struct
  61   {
  62     u_int32_t lsw;
  63     u_int32_t msw;
  64   } parts;
  65   uint64_t word;
  66 } ieee_double_shape_type;
  67
  68 #endif
  69
  70 /* Get two 32 bit ints from a double.  */
  71
  72 #define EXTRACT_WORDS(ix0,ix1,d)                                \
  73 do {                                                            \
  74   ieee_double_shape_type ew_u;                                  \
  75   ew_u.value = (d);                                             \
  76   (ix0) = ew_u.parts.msw;                                       \
  77   (ix1) = ew_u.parts.lsw;                                       \
  78 } while (0)
  79
  80 /* Get the more significant 32 bit int from a double.  */
  81
  82 #ifndef GET_HIGH_WORD
  83 # define GET_HIGH_WORD(i,d)                                     \
  84 do {                                                            \
  85   ieee_double_shape_type gh_u;                                  \
  86   gh_u.value = (d);                                             \
  87   (i) = gh_u.parts.msw;                                         \
  88 } while (0)
  89 #endif
  90
  91 /* Get the less significant 32 bit int from a double.  */
  92
  93 #ifndef GET_LOW_WORD
  94 # define GET_LOW_WORD(i,d)                                      \
  95 do {                                                            \
  96   ieee_double_shape_type gl_u;                                  \
  97   gl_u.value = (d);                                             \
  98   (i) = gl_u.parts.lsw;                                         \
  99 } while (0)
 100 #endif
 101
 102 /* Get all in one, efficient on 64-bit machines.  */
 103 #ifndef EXTRACT_WORDS64
 104 # define EXTRACT_WORDS64(i,d)                                   \
 105 do {                                                            \
 106   ieee_double_shape_type gh_u;                                  \
 107   gh_u.value = (d);                                             \
 108   (i) = gh_u.word;                                              \
 109 } while (0)
 110 #endif
 111
 112 /* Set a double from two 32 bit ints.  */
 113 #ifndef INSERT_WORDS
 114 # define INSERT_WORDS(d,ix0,ix1)                                \
 115 do {                                                            \
 116   ieee_double_shape_type iw_u;                                  \
 117   iw_u.parts.msw = (ix0);                                       \
 118   iw_u.parts.lsw = (ix1);                                       \
 119   (d) = iw_u.value;                                             \
 120 } while (0)
 121 #endif
 122
 123 /* Get all in one, efficient on 64-bit machines.  */
 124 #ifndef INSERT_WORDS64
 125 # define INSERT_WORDS64(d,i)                                    \
 126 do {                                                            \
 127   ieee_double_shape_type iw_u;                                  \
 128   iw_u.word = (i);                                              \
 129   (d) = iw_u.value;                                             \
 130 } while (0)
 131 #endif
 132
 133 /* Set the more significant 32 bits of a double from an int.  */
 134 #ifndef SET_HIGH_WORD
 135 #define SET_HIGH_WORD(d,v)                                      \
 136 do {                                                            \
 137   ieee_double_shape_type sh_u;                                  \
 138   sh_u.value = (d);                                             \
 139   sh_u.parts.msw = (v);                                         \
 140   (d) = sh_u.value;                                             \
 141 } while (0)
 142 #endif
 143
 144 /* Set the less significant 32 bits of a double from an int.  */
 145 #ifndef SET_LOW_WORD
 146 # define SET_LOW_WORD(d,v)                                      \
 147 do {                                                            \
 148   ieee_double_shape_type sl_u;                                  \
 149   sl_u.value = (d);                                             \
 150   sl_u.parts.lsw = (v);                                         \
 151   (d) = sl_u.value;                                             \
 152 } while (0)
 153 #endif
 154
 155 /* A union which permits us to convert between a float and a 32 bit
 156    int.  */
 157
 158 typedef union
 159 {
 160   float value;
 161   u_int32_t word;
 162 } ieee_float_shape_type;
 163
 164 /* Get a 32 bit int from a float.  */
 165 #ifndef GET_FLOAT_WORD
 166 # define GET_FLOAT_WORD(i,d)                                    \
 167 do {                                                            \
 168   ieee_float_shape_type gf_u;                                   \
 169   gf_u.value = (d);                                             \
 170   (i) = gf_u.word;                                              \
 171 } while (0)
 172 #endif
 173
 174 /* Set a float from a 32 bit int.  */
 175 #ifndef SET_FLOAT_WORD
 176 # define SET_FLOAT_WORD(d,i)                                    \
 177 do {                                                            \
 178   ieee_float_shape_type sf_u;                                   \
 179   sf_u.word = (i);                                              \
 180   (d) = sf_u.value;                                             \
 181 } while (0)
 182 #endif
 183
 184 /* Get long double macros from a separate header.  */
 185 #include <math_ldbl.h>
 186
 187 /* ieee style elementary functions */
 188 extern double __ieee754_sqrt (double);
 189 extern double __ieee754_acos (double);
 190 extern double __ieee754_acosh (double);
 191 extern double __ieee754_log (double);
 192 extern double __ieee754_atanh (double);
 193 extern double __ieee754_asin (double);
 194 extern double __ieee754_atan2 (double,double);
 195 extern double __ieee754_exp (double);
 196 extern double __ieee754_exp2 (double);
 197 extern double __ieee754_exp10 (double);
 198 extern double __ieee754_cosh (double);
 199 extern double __ieee754_fmod (double,double);
 200 extern double __ieee754_pow (double,double);
 201 extern double __ieee754_lgamma_r (double,int *);
 202 extern double __ieee754_gamma_r (double,int *);
 203 extern double __ieee754_lgamma (double);
 204 extern double __ieee754_gamma (double);
 205 extern double __ieee754_log10 (double);
 206 extern double __ieee754_log2 (double);
 207 extern double __ieee754_sinh (double);
 208 extern double __ieee754_hypot (double,double);
 209 extern double __ieee754_j0 (double);
 210 extern double __ieee754_j1 (double);
 211 extern double __ieee754_y0 (double);
 212 extern double __ieee754_y1 (double);
 213 extern double __ieee754_jn (int,double);
 214 extern double __ieee754_yn (int,double);
 215 extern double __ieee754_remainder (double,double);
 216 extern int32_t __ieee754_rem_pio2 (double,double*);
 217 extern double __ieee754_scalb (double,double);
 218 extern int __ieee754_ilogb (double);
 219
 220 /* fdlibm kernel function */
 221 extern double __kernel_standard (double,double,int);
 222 extern float __kernel_standard_f (float,float,int);
 223 extern long double __kernel_standard_l (long double,long double,int);
 224 extern double __kernel_sin (double,double,int);
 225 extern double __kernel_cos (double,double);
 226 extern double __kernel_tan (double,double,int);
 227 extern int    __kernel_rem_pio2 (double*,double*,int,int,int, const int32_t*);
 228
 229 /* internal functions.  */
 230 extern double __copysign (double x, double __y);
 231
 232 extern inline double __copysign (double x, double y)
 233 { return __builtin_copysign (x, y); }
 234
 235 /* ieee style elementary float functions */
 236 extern float __ieee754_sqrtf (float);
 237 extern float __ieee754_acosf (float);
 238 extern float __ieee754_acoshf (float);
 239 extern float __ieee754_logf (float);
 240 extern float __ieee754_atanhf (float);
 241 extern float __ieee754_asinf (float);
 242 extern float __ieee754_atan2f (float,float);
 243 extern float __ieee754_expf (float);
 244 extern float __ieee754_exp2f (float);
 245 extern float __ieee754_exp10f (float);
 246 extern float __ieee754_coshf (float);
 247 extern float __ieee754_fmodf (float,float);
 248 extern float __ieee754_powf (float,float);
 249 extern float __ieee754_lgammaf_r (float,int *);
 250 extern float __ieee754_gammaf_r (float,int *);
 251 extern float __ieee754_lgammaf (float);
 252 extern float __ieee754_gammaf (float);
 253 extern float __ieee754_log10f (float);
 254 extern float __ieee754_log2f (float);
 255 extern float __ieee754_sinhf (float);
 256 extern float __ieee754_hypotf (float,float);
 257 extern float __ieee754_j0f (float);
 258 extern float __ieee754_j1f (float);
 259 extern float __ieee754_y0f (float);
 260 extern float __ieee754_y1f (float);
 261 extern float __ieee754_jnf (int,float);
 262 extern float __ieee754_ynf (int,float);
 263 extern float __ieee754_remainderf (float,float);
 264 extern int32_t __ieee754_rem_pio2f (float,float*);
 265 extern float __ieee754_scalbf (float,float);
 266 extern int __ieee754_ilogbf (float);
 267
 268
 269 /* float versions of fdlibm kernel functions */
 270 extern float __kernel_sinf (float,float,int);
 271 extern float __kernel_cosf (float,float);
 272 extern float __kernel_tanf (float,float,int);
 273 extern int   __kernel_rem_pio2f (float*,float*,int,int,int, const int32_t*);
 274
 275 /* internal functions.  */
 276 extern float __copysignf (float x, float __y);
 277
 278 extern inline float __copysignf (float x, float y)
 279 { return __builtin_copysignf (x, y); }
 280
 281 /* ieee style elementary long double functions */
 282 extern long double __ieee754_sqrtl (long double);
 283 extern long double __ieee754_acosl (long double);
 284 extern long double __ieee754_acoshl (long double);
 285 extern long double __ieee754_logl (long double);
 286 extern long double __ieee754_atanhl (long double);
 287 extern long double __ieee754_asinl (long double);
 288 extern long double __ieee754_atan2l (long double,long double);
 289 extern long double __ieee754_expl (long double);
 290 extern long double __ieee754_exp2l (long double);
 291 extern long double __ieee754_exp10l (long double);
 292 extern long double __ieee754_coshl (long double);
 293 extern long double __ieee754_fmodl (long double,long double);
 294 extern long double __ieee754_powl (long double,long double);
 295 extern long double __ieee754_lgammal_r (long double,int *);
 296 extern long double __ieee754_gammal_r (long double,int *);
 297 extern long double __ieee754_lgammal (long double);
 298 extern long double __ieee754_gammal (long double);
 299 extern long double __ieee754_log10l (long double);
 300 extern long double __ieee754_log2l (long double);
 301 extern long double __ieee754_sinhl (long double);
 302 extern long double __ieee754_hypotl (long double,long double);
 303 extern long double __ieee754_j0l (long double);
 304 extern long double __ieee754_j1l (long double);
 305 extern long double __ieee754_y0l (long double);
 306 extern long double __ieee754_y1l (long double);
 307 extern long double __ieee754_jnl (int,long double);
 308 extern long double __ieee754_ynl (int,long double);
 309 extern long double __ieee754_remainderl (long double,long double);
 310 extern int   __ieee754_rem_pio2l (long double,long double*);
 311 extern long double __ieee754_scalbl (long double,long double);
 312 extern int   __ieee754_ilogbl (long double);
 313
 314 /* long double versions of fdlibm kernel functions */
 315 extern long double __kernel_sinl (long double,long double,int);
 316 extern long double __kernel_cosl (long double,long double);
 317 extern long double __kernel_tanl (long double,long double,int);
 318 extern void __kernel_sincosl (long double,long double,
 319                               long double *,long double *, int);
 320
 321 #ifndef NO_LONG_DOUBLE
 322 /* prototypes required to compile the ldbl-96 support without warnings */
 323 extern int __finitel (long double);
 324 extern int __ilogbl (long double);
 325 extern int __isinfl (long double);
 326 extern int __isnanl (long double);
 327 extern long double __atanl (long double);
 328 extern long double __copysignl (long double, long double);
 329 extern long double __expm1l (long double);
 330 extern long double __floorl (long double);
 331 extern long double __frexpl (long double, int *);
 332 extern long double __ldexpl (long double, int);
 333 extern long double __log1pl (long double);
 334 extern long double __nanl (const char *);
 335 extern long double __rintl (long double);
 336 extern long double __scalbnl (long double, int);
 337 extern long double __sqrtl (long double x);
 338 extern long double fabsl (long double x);
 339 extern void __sincosl (long double, long double *, long double *);
 340 extern long double __logbl (long double x);
 341 extern long double __significandl (long double x);
 342
 343 extern inline long double __copysignl (long double x, long double y)
 344 { return __builtin_copysignl (x, y); }
 345
 346 #endif
 347
 348 /* Prototypes for functions of the IBM Accurate Mathematical Library.  */
 349 extern double __exp1 (double __x, double __xx, double __error);
 350 extern double __sin (double __x);
 351 extern double __cos (double __x);
 352 extern int __branred (double __x, double *__a, double *__aa);
 353 extern void __doasin (double __x, double __dx, double __v[]);
 354 extern void __dubsin (double __x, double __dx, double __v[]);
 355 extern void __dubcos (double __x, double __dx, double __v[]);
 356 extern double __halfulp (double __x, double __y);
 357 extern double __sin32 (double __x, double __res, double __res1);
 358 extern double __cos32 (double __x, double __res, double __res1);
 359 extern double __mpsin (double __x, double __dx, bool __range_reduce);
 360 extern double __mpcos (double __x, double __dx, bool __range_reduce);
 361 extern double __slowexp (double __x);
 362 extern double __slowpow (double __x, double __y, double __z);
 363 extern void __docos (double __x, double __dx, double __v[]);
 364
 365 /* Return X^2 + Y^2 - 1, computed without large cancellation error.
 366    It is given that 1 > X >= Y >= epsilon / 2, and that X^2 + Y^2 >=
 367    0.5.  */
 368 extern float __x2y2m1f (float x, float y);
 369 extern double __x2y2m1 (double x, double y);
 370 extern long double __x2y2m1l (long double x, long double y);
 371
 372 /* Compute the product of X + X_EPS, X + X_EPS + 1, ..., X + X_EPS + N
 373    - 1, in the form R * (1 + *EPS) where the return value R is an
 374    approximation to the product and *EPS is set to indicate the
 375    approximate error in the return value.  X is such that all the
 376    values X + 1, ..., X + N - 1 are exactly representable, and X_EPS /
 377    X is small enough that factors quadratic in it can be
 378    neglected.  */
 379 extern float __gamma_productf (float x, float x_eps, int n, float *eps);
 380 extern double __gamma_product (double x, double x_eps, int n, double *eps);
 381 extern long double __gamma_productl (long double x, long double x_eps,
 382                                      int n, long double *eps);
 383
 384 /* Compute lgamma of a negative argument X, if it is in a range
 385    (depending on the floating-point format) for which expansion around
 386    zeros is used, setting *SIGNGAMP accordingly.  */
 387 extern float __lgamma_negf (float x, int *signgamp);
 388 extern double __lgamma_neg (double x, int *signgamp);
 389 extern long double __lgamma_negl (long double x, int *signgamp);
 390
 391 /* Compute the product of 1 + (T / (X + X_EPS)), 1 + (T / (X + X_EPS +
 392    1)), ..., 1 + (T / (X + X_EPS + N - 1)), minus 1.  X is such that
 393    all the values X + 1, ..., X + N - 1 are exactly representable, and
 394    X_EPS / X is small enough that factors quadratic in it can be
 395    neglected.  */
 396 extern double __lgamma_product (double t, double x, double x_eps, int n);
 397 extern long double __lgamma_productl (long double t, long double x,
 398                                       long double x_eps, int n);
 399
 400 #ifndef math_opt_barrier
 401 # define math_opt_barrier(x) \
 402 ({ __typeof (x) __x = (x); __asm ("" : "+m" (__x)); __x; })
 403 # define math_force_eval(x) \
 404 ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "m" (__x)); })
 405 #endif
 406
 407 /* math_narrow_eval reduces its floating-point argument to the range
 408    and precision of its semantic type.  (The original evaluation may
 409    still occur with excess range and precision, so the result may be
 410    affected by double rounding.)  */
 411 #if FLT_EVAL_METHOD == 0
 412 # define math_narrow_eval(x) (x)
 413 #else
 414 # if FLT_EVAL_METHOD == 1
 415 #  define excess_precision(type) __builtin_types_compatible_p (type, float)
 416 # else
 417 #  define excess_precision(type) (__builtin_types_compatible_p (type, float) \
 418                                   || __builtin_types_compatible_p (type, \
 419                                                                    double))
 420 # endif
 421 # define math_narrow_eval(x)                                    \
 422   ({                                                            \
 423     __typeof (x) math_narrow_eval_tmp = (x);                    \
 424     if (excess_precision (__typeof (math_narrow_eval_tmp)))     \
 425       __asm__ ("" : "+m" (math_narrow_eval_tmp));               \
 426     math_narrow_eval_tmp;                                       \
 427    })
 428 #endif
 429
 430 #define fabs_tg(x) __builtin_choose_expr                        \
 431   (__builtin_types_compatible_p (__typeof (x), float),          \
 432    __builtin_fabsf (x),                                         \
 433    __builtin_choose_expr                                        \
 434    (__builtin_types_compatible_p (__typeof (x), double),        \
 435     __builtin_fabs (x), __builtin_fabsl (x)))
 436 #define min_of_type(type) __builtin_choose_expr         \
 437   (__builtin_types_compatible_p (type, float),          \
 438    FLT_MIN,                                             \
 439    __builtin_choose_expr                                \
 440    (__builtin_types_compatible_p (type, double),        \
 441     DBL_MIN, LDBL_MIN))
 442
 443 /* If X (which is not a NaN) is subnormal, force an underflow
 444    exception.  */
 445 #define math_check_force_underflow(x)                           \
 446   do                                                            \
 447     {                                                           \
 448       __typeof (x) force_underflow_tmp = (x);                   \
 449       if (fabs_tg (force_underflow_tmp)                         \
 450           < min_of_type (__typeof (force_underflow_tmp)))       \
 451         {                                                       \
 452           __typeof (force_underflow_tmp) force_underflow_tmp2   \
 453             = force_underflow_tmp * force_underflow_tmp;        \
 454           math_force_eval (force_underflow_tmp2);               \
 455         }                                                       \
 456     }                                                           \
 457   while (0)
 458 /* Likewise, but X is also known to be nonnegative.  */
 459 #define math_check_force_underflow_nonneg(x)                    \
 460   do                                                            \
 461     {                                                           \
 462       __typeof (x) force_underflow_tmp = (x);                   \
 463       if (force_underflow_tmp                                   \
 464           < min_of_type (__typeof (force_underflow_tmp)))       \
 465         {                                                       \
 466           __typeof (force_underflow_tmp) force_underflow_tmp2   \
 467             = force_underflow_tmp * force_underflow_tmp;        \
 468           math_force_eval (force_underflow_tmp2);               \
 469         }                                                       \
 470     }                                                           \
 471   while (0)
 472 /* Likewise, for both real and imaginary parts of a complex
 473    result.  */
 474 #define math_check_force_underflow_complex(x)                           \
 475   do                                                                    \
 476     {                                                                   \
 477       __typeof (x) force_underflow_complex_tmp = (x);                   \
 478       math_check_force_underflow (__real__ force_underflow_complex_tmp); \
 479       math_check_force_underflow (__imag__ force_underflow_complex_tmp); \
 480     }                                                                   \
 481   while (0)
 482
 483 /* The standards only specify one variant of the fenv.h interfaces.
 484    But at least for some architectures we can be more efficient if we
 485    know what operations are going to be performed.  Therefore we
 486    define additional interfaces.  By default they refer to the normal
 487    interfaces.  */
 488
 489 static __always_inline void
 490 default_libc_feholdexcept (fenv_t *e)
 491 {
 492   (void) __feholdexcept (e);
 493 }
 494
 495 #ifndef libc_feholdexcept
 496 # define libc_feholdexcept  default_libc_feholdexcept
 497 #endif
 498 #ifndef libc_feholdexceptf
 499 # define libc_feholdexceptf default_libc_feholdexcept
 500 #endif
 501 #ifndef libc_feholdexceptl
 502 # define libc_feholdexceptl default_libc_feholdexcept
 503 #endif
 504
 505 static __always_inline void
 506 default_libc_fesetround (int r)
 507 {
 508   (void) __fesetround (r);
 509 }
 510
 511 #ifndef libc_fesetround
 512 # define libc_fesetround  default_libc_fesetround
 513 #endif
 514 #ifndef libc_fesetroundf
 515 # define libc_fesetroundf default_libc_fesetround
 516 #endif
 517 #ifndef libc_fesetroundl
 518 # define libc_fesetroundl default_libc_fesetround
 519 #endif
 520
 521 static __always_inline void
 522 default_libc_feholdexcept_setround (fenv_t *e, int r)
 523 {
 524   __feholdexcept (e);
 525   __fesetround (r);
 526 }
 527
 528 #ifndef libc_feholdexcept_setround
 529 # define libc_feholdexcept_setround  default_libc_feholdexcept_setround
 530 #endif
 531 #ifndef libc_feholdexcept_setroundf
 532 # define libc_feholdexcept_setroundf default_libc_feholdexcept_setround
 533 #endif
 534 #ifndef libc_feholdexcept_setroundl
 535 # define libc_feholdexcept_setroundl default_libc_feholdexcept_setround
 536 #endif
 537
 538 #ifndef libc_feholdsetround_53bit
 539 # define libc_feholdsetround_53bit libc_feholdsetround
 540 #endif
 541
 542 #ifndef libc_fetestexcept
 543 # define libc_fetestexcept  fetestexcept
 544 #endif
 545 #ifndef libc_fetestexceptf
 546 # define libc_fetestexceptf fetestexcept
 547 #endif
 548 #ifndef libc_fetestexceptl
 549 # define libc_fetestexceptl fetestexcept
 550 #endif
 551
 552 static __always_inline void
 553 default_libc_fesetenv (fenv_t *e)
 554 {
 555   (void) __fesetenv (e);
 556 }
 557
 558 #ifndef libc_fesetenv
 559 # define libc_fesetenv  default_libc_fesetenv
 560 #endif
 561 #ifndef libc_fesetenvf
 562 # define libc_fesetenvf default_libc_fesetenv
 563 #endif
 564 #ifndef libc_fesetenvl
 565 # define libc_fesetenvl default_libc_fesetenv
 566 #endif
 567
 568 static __always_inline void
 569 default_libc_feupdateenv (fenv_t *e)
 570 {
 571   (void) __feupdateenv (e);
 572 }
 573
 574 #ifndef libc_feupdateenv
 575 # define libc_feupdateenv  default_libc_feupdateenv
 576 #endif
 577 #ifndef libc_feupdateenvf
 578 # define libc_feupdateenvf default_libc_feupdateenv
 579 #endif
 580 #ifndef libc_feupdateenvl
 581 # define libc_feupdateenvl default_libc_feupdateenv
 582 #endif
 583
 584 #ifndef libc_feresetround_53bit
 585 # define libc_feresetround_53bit libc_feresetround
 586 #endif
 587
 588 static __always_inline int
 589 default_libc_feupdateenv_test (fenv_t *e, int ex)
 590 {
 591   int ret = fetestexcept (ex);
 592   __feupdateenv (e);
 593   return ret;
 594 }
 595
 596 #ifndef libc_feupdateenv_test
 597 # define libc_feupdateenv_test  default_libc_feupdateenv_test
 598 #endif
 599 #ifndef libc_feupdateenv_testf
 600 # define libc_feupdateenv_testf default_libc_feupdateenv_test
 601 #endif
 602 #ifndef libc_feupdateenv_testl
 603 # define libc_feupdateenv_testl default_libc_feupdateenv_test
 604 #endif
 605
 606 /* Save and set the rounding mode.  The use of fenv_t to store the old mode
 607    allows a target-specific version of this function to avoid converting the
 608    rounding mode from the fpu format.  By default we have no choice but to
 609    manipulate the entire env.  */
 610
 611 #ifndef libc_feholdsetround
 612 # define libc_feholdsetround  libc_feholdexcept_setround
 613 #endif
 614 #ifndef libc_feholdsetroundf
 615 # define libc_feholdsetroundf libc_feholdexcept_setroundf
 616 #endif
 617 #ifndef libc_feholdsetroundl
 618 # define libc_feholdsetroundl libc_feholdexcept_setroundl
 619 #endif
 620
 621 /* ... and the reverse.  */
 622
 623 #ifndef libc_feresetround
 624 # define libc_feresetround  libc_feupdateenv
 625 #endif
 626 #ifndef libc_feresetroundf
 627 # define libc_feresetroundf libc_feupdateenvf
 628 #endif
 629 #ifndef libc_feresetroundl
 630 # define libc_feresetroundl libc_feupdateenvl
 631 #endif
 632
 633 /* ... and a version that may also discard exceptions.  */
 634
 635 #ifndef libc_feresetround_noex
 636 # define libc_feresetround_noex  libc_fesetenv
 637 #endif
 638 #ifndef libc_feresetround_noexf
 639 # define libc_feresetround_noexf libc_fesetenvf
 640 #endif
 641 #ifndef libc_feresetround_noexl
 642 # define libc_feresetround_noexl libc_fesetenvl
 643 #endif
 644
 645 #ifndef HAVE_RM_CTX
 646 # define HAVE_RM_CTX 0
 647 #endif
 648
 649 #if HAVE_RM_CTX
 650 /* Set/Restore Rounding Modes only when necessary.  If defined, these functions
 651    set/restore floating point state only if the state needed within the lexical
 652    block is different from the current state.  This saves a lot of time when
 653    the floating point unit is much slower than the fixed point units.  */
 654
 655 # ifndef libc_feholdsetround_noex_ctx
 656 #   define libc_feholdsetround_noex_ctx  libc_feholdsetround_ctx
 657 # endif
 658 # ifndef libc_feholdsetround_noexf_ctx
 659 #   define libc_feholdsetround_noexf_ctx libc_feholdsetroundf_ctx
 660 # endif
 661 # ifndef libc_feholdsetround_noexl_ctx
 662 #   define libc_feholdsetround_noexl_ctx libc_feholdsetroundl_ctx
 663 # endif
 664
 665 # ifndef libc_feresetround_noex_ctx
 666 #   define libc_feresetround_noex_ctx  libc_fesetenv_ctx
 667 # endif
 668 # ifndef libc_feresetround_noexf_ctx
 669 #   define libc_feresetround_noexf_ctx libc_fesetenvf_ctx
 670 # endif
 671 # ifndef libc_feresetround_noexl_ctx
 672 #   define libc_feresetround_noexl_ctx libc_fesetenvl_ctx
 673 # endif
 674
 675 #else
 676
 677 /* Default implementation using standard fenv functions.
 678    Avoid unnecessary rounding mode changes by first checking the
 679    current rounding mode.  Note the use of __glibc_unlikely is
 680    important for performance.  */
 681
 682 static __always_inline void
 683 libc_feholdsetround_ctx (struct rm_ctx *ctx, int round)
 684 {
 685   ctx->updated_status = false;
 686
 687   /* Update rounding mode only if different.  */
 688   if (__glibc_unlikely (round != get_rounding_mode ()))
 689     {
 690       ctx->updated_status = true;
 691       __fegetenv (&ctx->env);
 692       __fesetround (round);
 693     }
 694 }
 695
 696 static __always_inline void
 697 libc_feresetround_ctx (struct rm_ctx *ctx)
 698 {
 699   /* Restore the rounding mode if updated.  */
 700   if (__glibc_unlikely (ctx->updated_status))
 701     __feupdateenv (&ctx->env);
 702 }
 703
 704 static __always_inline void
 705 libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round)
 706 {
 707   /* Save exception flags and rounding mode.  */
 708   __fegetenv (&ctx->env);
 709
 710   /* Update rounding mode only if different.  */
 711   if (__glibc_unlikely (round != get_rounding_mode ()))
 712     __fesetround (round);
 713 }
 714
 715 static __always_inline void
 716 libc_feresetround_noex_ctx (struct rm_ctx *ctx)
 717 {
 718   /* Restore exception flags and rounding mode.  */
 719   __fesetenv (&ctx->env);
 720 }
 721
 722 # define libc_feholdsetroundf_ctx libc_feholdsetround_ctx
 723 # define libc_feholdsetroundl_ctx libc_feholdsetround_ctx
 724 # define libc_feresetroundf_ctx   libc_feresetround_ctx
 725 # define libc_feresetroundl_ctx   libc_feresetround_ctx
 726
 727 # define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ctx
 728 # define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ctx
 729 # define libc_feresetround_noexf_ctx   libc_feresetround_noex_ctx
 730 # define libc_feresetround_noexl_ctx   libc_feresetround_noex_ctx
 731
 732 #endif
 733
 734 #ifndef libc_feholdsetround_53bit_ctx
 735 #  define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx
 736 #endif
 737 #ifndef libc_feresetround_53bit_ctx
 738 #  define libc_feresetround_53bit_ctx libc_feresetround_ctx
 739 #endif
 740
 741 #define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \
 742   struct rm_ctx ctx __attribute__((cleanup (CLEANUPFUNC ## _ctx))); \
 743   ROUNDFUNC ## _ctx (&ctx, (RM))
 744
 745 /* Set the rounding mode within a lexical block.  Restore the rounding mode to
 746    the value at the start of the block.  The exception mode must be preserved.
 747    Exceptions raised within the block must be set in the exception flags.
 748    Non-stop mode may be enabled inside the block.  */
 749
 750 #define SET_RESTORE_ROUND(RM) \
 751   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround)
 752 #define SET_RESTORE_ROUNDF(RM) \
 753   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundf, libc_feresetroundf)
 754 #define SET_RESTORE_ROUNDL(RM) \
 755   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetroundl)
 756
 757 /* Set the rounding mode within a lexical block.  Restore the rounding mode to
 758    the value at the start of the block.  The exception mode must be preserved.
 759    Exceptions raised within the block must be discarded, and exception flags
 760    are restored to the value at the start of the block.
 761    Non-stop mode may be enabled inside the block.  */
 762
 763 #define SET_RESTORE_ROUND_NOEX(RM) \
 764   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noex, \
 765                              libc_feresetround_noex)
 766 #define SET_RESTORE_ROUND_NOEXF(RM) \
 767   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noexf, \
 768                              libc_feresetround_noexf)
 769 #define SET_RESTORE_ROUND_NOEXL(RM) \
 770   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noexl, \
 771                              libc_feresetround_noexl)
 772
 773 /* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits.  */
 774 #define SET_RESTORE_ROUND_53BIT(RM) \
 775   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_53bit,           \
 776                              libc_feresetround_53bit)
 777
 778 #endif /* _MATH_PRIVATE_H_ */