engine/include/support/irrMath.h

   1 // Copyright (C) 2002-2007 Nikolaus Gebhardt
   2 // This file is part of the "Irrlicht Engine".
   3 // For conditions of distribution and use, see copyright notice in irrlicht.h
   4
   5 #ifndef __IRR_MATH_H_INCLUDED__
   6 #define __IRR_MATH_H_INCLUDED__
   7
   8 #include "IrrCompileConfig.h"
   9 #include "irrTypes.h"
  10 #include <math.h>
  11
  12 #if defined(_IRR_SOLARIS_PLATFORM_) || defined(__BORLANDC__) || defined (__BCPLUSPLUS__)
  13         #define sqrtf(X) (f32)sqrt((f64)(X))
  14         #define sinf(X) (f32)sin((f64)(X))
  15         #define cosf(X) (f32)cos((f64)(X))
  16         #define ceilf(X) (f32)ceil((f64)(X))
  17         #define floorf(X) (f32)floor((f64)(X))
  18         #define powf(X,Y) (f32)pow((f64)(X),(f64)(Y))
  19         #define fmodf(X,Y) (f32)fmod((f64)(X),(f64)(Y))
  20         #define fabsf(X) (f32)fabs((f64)(X))
  21 #endif
  22
  23 namespace irr
  24 {
  25 namespace core
  26 {
  27
  28         //! Rounding error constant often used when comparing f32 values.
  29
  30 #ifdef IRRLICHT_FAST_MATH
  31         const f32 ROUNDING_ERROR_32     = 0.00005f;
  32         const f64 ROUNDING_ERROR_64     = 0.000005f;
  33 #else
  34         const f32 ROUNDING_ERROR_32     = 0.000001f;
  35         const f64 ROUNDING_ERROR_64     = 0.00000001f;
  36 #endif
  37
  38         //! Constant for PI.
  39         const f32 PI                    = 3.14159265359f;
  40
  41         //! Constant for reciprocal of PI.
  42         const f32 RECIPROCAL_PI         = 1.0f/PI;
  43
  44         //! Constant for half of PI.
  45         const f32 HALF_PI               = PI/2.0f;
  46
  47         //! Constant for 64bit PI.
  48         const f64 PI64                  = 3.1415926535897932384626433832795028841971693993751;
  49
  50         //! Constant for 64bit reciprocal of PI.
  51         const f64 RECIPROCAL_PI64       = 1.0/PI64;
  52
  53         //! 32bit Constant for converting from degrees to radians
  54         const f32 DEGTORAD   = PI / 180.0f;
  55
  56         //! 32bit constant for converting from radians to degrees (formally known as GRAD_PI)
  57         const f32 RADTODEG   = 180.0f / PI;
  58
  59         //! 64bit constant for converting from degrees to radians (formally known as GRAD_PI2)
  60         const f64 DEGTORAD64 = PI64 / 180.0;
  61
  62         //! 64bit constant for converting from radians to degrees
  63         const f64 RADTODEG64 = 180.0 / PI64;
  64
  65         //! returns minimum of two values. Own implementation to get rid of the STL (VS6 problems)
  66         template<class T>
  67         inline const T& min_(const T& a, const T& b)
  68         {
  69                 return a < b ? a : b;
  70         }
  71
  72         //! returns minimum of three values. Own implementation to get rid of the STL (VS6 problems)
  73         template<class T>
  74         inline const T& min_(const T& a, const T& b, const T& c)
  75         {
  76                 return a < b ? min_(a, c) : min_(b, c);
  77         }
  78
  79         //! returns maximum of two values. Own implementation to get rid of the STL (VS6 problems)
  80         template<class T>
  81         inline const T& max_(const T& a, const T& b)
  82         {
  83                 return a < b ? b : a;
  84         }
  85
  86         //! returns maximum of three values. Own implementation to get rid of the STL (VS6 problems)
  87         template<class T>
  88         inline const T& max_(const T& a, const T& b, const T& c)
  89         {
  90                 return a < b ? max_(b, c) : max_(a, c);
  91         }
  92
  93         //! returns abs of two values. Own implementation to get rid of STL (VS6 problems)
  94         template<class T>
  95         inline T abs_(const T& a)
  96         {
  97                 return a < (T)0 ? -a : a;
  98         }
  99
 100         //! returns linear interpolation of a and b with ratio t
 101         //! \return: a if t==0, b if t==1, and the linear interpolation else
 102         template<class T>
 103         inline T lerp(const T& a, const T& b, const f32 t)
 104         {
 105                 return (a*(1.f-t)) + (b*t);
 106         }
 107
 108         //! clamps a value between low and high
 109         template <class T>
 110         inline const T clamp (const T& value, const T& low, const T& high)
 111         {
 112                 return min_ (max_(value,low), high);
 113         }
 114
 115         //! returns if a equals b, taking possible rounding errors into account
 116         inline bool equals(const f32 a, const f32 b, const f32 tolerance = ROUNDING_ERROR_32)
 117         {
 118                 return (a + tolerance >= b) && (a - tolerance <= b);
 119         }
 120
 121         //! returns if a equals b, taking possible rounding errors into account
 122         inline bool equals(const s32 a, const s32 b, const s32 tolerance = 0)
 123         {
 124                 return (a + tolerance >= b) && (a - tolerance <= b);
 125         }
 126
 127         //! returns if a equals b, taking possible rounding errors into account
 128         inline bool equals(const u32 a, const u32 b, const u32 tolerance = 0)
 129         {
 130                 return (a + tolerance >= b) && (a - tolerance <= b);
 131         }
 132
 133         //! returns if a equals zero, taking rounding errors into account
 134         inline bool iszero(const f32 a, const f32 tolerance = ROUNDING_ERROR_32)
 135         {
 136                 return fabsf ( a ) <= tolerance;
 137         }
 138
 139         //! returns if a equals zero, taking rounding errors into account
 140         inline bool iszero(const s32 a, const s32 tolerance = 0)
 141         {
 142                 return ( a & 0x7ffffff ) <= tolerance;
 143         }
 144
 145         //! returns if a equals zero, taking rounding errors into account
 146         inline bool iszero(const u32 a, const u32 tolerance = 0)
 147         {
 148                 return a <= tolerance;
 149         }
 150
 151         inline s32 s32_min ( s32 a, s32 b)
 152         {
 153                 s32 mask = (a - b) >> 31;
 154                 return (a & mask) | (b & ~mask);
 155         }
 156
 157         inline s32 s32_max ( s32 a, s32 b)
 158         {
 159                 s32 mask = (a - b) >> 31;
 160                 return (b & mask) | (a & ~mask);
 161         }
 162
 163         inline s32 s32_clamp (s32 value, s32 low, s32 high)
 164         {
 165                 return s32_min (s32_max(value,low), high);
 166         }
 167
 168         /*
 169
 170                 float IEEE-754 bit represenation
 171
 172                 0      0x00000000
 173                 1.0    0x3f800000
 174                 0.5    0x3f000000
 175                 3      0x40400000
 176                 +inf   0x7f800000
 177                 -inf   0xff800000
 178                 +NaN   0x7fc00000 or 0x7ff00000
 179                 in general: number = (sign ? -1:1) * 2^(exponent) * 1.(mantissa bits)
 180         */
 181
 182         #define F32_AS_S32(f)           (*((s32 *) &(f)))
 183         #define F32_AS_U32(f)           (*((u32 *) &(f)))
 184         #define F32_AS_U32_POINTER(f)   ( ((u32 *) &(f)))
 185
 186         #define F32_VALUE_0             0x00000000
 187         #define F32_VALUE_1             0x3f800000
 188         #define F32_SIGN_BIT            0x80000000U
 189         #define F32_EXPON_MANTISSA      0x7FFFFFFFU
 190
 191         //! code is taken from IceFPU
 192         //! Integer representation of a floating-point value.
 193         #define IR(x)                                   ((u32&)(x))
 194
 195         //! Absolute integer representation of a floating-point value
 196         #define AIR(x)                                  (IR(x)&0x7fffffff)
 197
 198         //! Floating-point representation of an integer value.
 199         #define FR(x)                                   ((f32&)(x))
 200
 201         #define IEEE_1_0                        0x3f800000                                              //!<    integer representation of 1.0
 202         #define IEEE_255_0                      0x437f0000                                              //!<    integer representation of 255.0
 203
 204 #ifdef IRRLICHT_FAST_MATH
 205         #define F32_LOWER_0(f)          (F32_AS_U32(f) >  F32_SIGN_BIT)
 206         #define F32_LOWER_EQUAL_0(f)    (F32_AS_S32(f) <= F32_VALUE_0)
 207         #define F32_GREATER_0(f)        (F32_AS_S32(f) >  F32_VALUE_0)
 208         #define F32_GREATER_EQUAL_0(f)  (F32_AS_U32(f) <= F32_SIGN_BIT)
 209         #define F32_EQUAL_1(f)          (F32_AS_U32(f) == F32_VALUE_1)
 210         #define F32_EQUAL_0(f)          ( (F32_AS_U32(f) & F32_EXPON_MANTISSA ) == F32_VALUE_0)
 211
 212         // only same sign
 213         #define F32_A_GREATER_B(a,b)    (F32_AS_S32((a)) >  F32_AS_S32((b)))
 214 #else
 215         #define F32_LOWER_0(n)          ((n) <  0.0f)
 216         #define F32_LOWER_EQUAL_0(n)    ((n) <= 0.0f)
 217         #define F32_GREATER_0(n)        ((n) >  0.0f)
 218         #define F32_GREATER_EQUAL_0(n)  ((n) >= 0.0f)
 219         #define F32_EQUAL_1(n)          ((n) == 1.0f)
 220         #define F32_EQUAL_0(n)          ((n) == 0.0f)
 221         #define F32_A_GREATER_B(a,b)    ((a) > (b))
 222 #endif
 223
 224
 225 #ifndef REALINLINE
 226         #ifdef _MSC_VER
 227                 #define REALINLINE __forceinline
 228         #else
 229                 #define REALINLINE inline
 230         #endif
 231 #endif
 232
 233
 234         //! conditional set based on mask and arithmetic shift
 235         REALINLINE u32 if_c_a_else_b ( const s32 condition, const u32 a, const u32 b )
 236         {
 237                 return ( ( -condition >> 31 ) & ( a ^ b ) ) ^ b;
 238         }
 239
 240         //! conditional set based on mask and arithmetic shift
 241         REALINLINE u32 if_c_a_else_0 ( const s32 condition, const u32 a )
 242         {
 243                 return ( -condition >> 31 ) & a;
 244         }
 245
 246         /*
 247                 if (condition) state |= m; else state &= ~m;
 248         */
 249         REALINLINE void setbit ( u32 &state, s32 condition, u32 mask )
 250         {
 251                 // 0, or any postive to mask
 252                 //s32 conmask = -condition >> 31;
 253                 state ^= ( ( -condition >> 31 ) ^ state ) & mask;
 254         }
 255
 256
 257
 258         inline f32 round_( f32 x )
 259         {
 260                 return floorf( x + 0.5f );
 261         }
 262
 263         REALINLINE void clearFPUException ()
 264         {
 265 #ifdef IRRLICHT_FAST_MATH
 266 #ifdef feclearexcept
 267                 feclearexcept(FE_ALL_EXCEPT);
 268 #elif defined(_MSC_VER)
 269                 __asm fnclex;
 270 #elif defined(__GNUC__) && defined(__x86__)
 271                 __asm__ __volatile__ ("fclex \n\t");
 272 #else
 273 #  warn clearFPUException not supported.
 274 #endif
 275 #endif
 276         }
 277
 278         REALINLINE f32 reciprocal_squareroot(const f32 x)
 279         {
 280 #ifdef IRRLICHT_FAST_MATH
 281                 // comes from Nvidia
 282 #if 1
 283                 u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1;
 284                 f32 y = *(f32*)&tmp;
 285                 return y * (1.47f - 0.47f * x * y * y);
 286 #elif defined(_MSC_VER)
 287                 // an sse2 version
 288                 __asm
 289                 {
 290                         movss   xmm0, x
 291                         rsqrtss xmm0, xmm0
 292                         movss   x, xmm0
 293                 }
 294                 return x;
 295 #endif
 296 #else // no fast math
 297                 return 1.f / sqrtf ( x );
 298 #endif
 299         }
 300
 301
 302
 303         REALINLINE f32 reciprocal ( const f32 f )
 304         {
 305 #ifdef IRRLICHT_FAST_MATH
 306                 //! i do not divide through 0.. (fpu expection)
 307                 // instead set f to a high value to get a return value near zero..
 308                 // -1000000000000.f.. is use minus to stay negative..
 309                 // must test's here (plane.normal dot anything ) checks on <= 0.f
 310                 return 1.f / f;
 311                 //u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5;
 312                 //return 1.f / FR ( x );
 313 #else // no fast math
 314                 return 1.f / f;
 315 #endif
 316         }
 317
 318
 319         REALINLINE f32 reciprocal_approxim ( const f32 p )
 320         {
 321 #ifdef IRRLICHT_FAST_MATH
 322                 register u32 x = 0x7F000000 - IR ( p );
 323                 const f32 r = FR ( x );
 324                 return r * (2.0f - p * r);
 325 #else // no fast math
 326                 return 1.f / p;
 327 #endif
 328         }
 329
 330
 331         REALINLINE s32 floor32(f32 x)
 332         {
 333 #ifdef IRRLICHT_FAST_MATH
 334                 const f32 h = 0.5f;
 335
 336                 s32 t;
 337
 338 #if defined(_MSC_VER)
 339                 __asm
 340                 {
 341                         fld     x
 342                         fsub    h
 343                         fistp   t
 344                 }
 345 #elif defined(__GNUC__)
 346                 __asm__ __volatile__ (
 347                         "fsub %2 \n\t"
 348                         "fistpl %0"
 349                         : "=m" (t)
 350                         : "t" (x), "f" (h)
 351                         : "st"
 352                         );
 353 #else
 354 #  warn IRRLICHT_FAST_MATH not supported.
 355                 return (s32) floorf ( x );
 356 #endif
 357                 return t;
 358 #else // no fast math
 359                 return (s32) floorf ( x );
 360 #endif
 361         }
 362
 363
 364         REALINLINE s32 ceil32 ( f32 x )
 365         {
 366 #ifdef IRRLICHT_FAST_MATH
 367                 const f32 h = 0.5f;
 368
 369                 s32 t;
 370
 371 #if defined(_MSC_VER)
 372                 __asm
 373                 {
 374                         fld     x
 375                         fadd    h
 376                         fistp   t
 377                 }
 378 #elif defined(__GNUC__)
 379                 __asm__ __volatile__ (
 380                         "fadd %2 \n\t"
 381                         "fistpl %0 \n\t"
 382                         : "=m"(t)
 383                         : "t"(x), "f"(h)
 384                         : "st"
 385                         );
 386 #else
 387 #  warn IRRLICHT_FAST_MATH not supported.
 388                 return (s32) ceilf ( x );
 389 #endif
 390                 return t;
 391 #else // not fast math
 392                 return (s32) ceilf ( x );
 393 #endif
 394         }
 395
 396
 397
 398         REALINLINE s32 round32(f32 x)
 399         {
 400 #if defined(IRRLICHT_FAST_MATH)
 401                 s32 t;
 402
 403 #if defined(_MSC_VER)
 404                 __asm
 405                 {
 406                         fld   x
 407                         fistp t
 408                 }
 409 #elif defined(__GNUC__)
 410                 __asm__ __volatile__ (
 411                         "fistpl %0 \n\t"
 412                         : "=m"(t)
 413                         : "t"(x)
 414                         : "st"
 415                         );
 416 #else
 417 #  warn IRRLICHT_FAST_MATH not supported.
 418                 return (s32) round_(x);
 419 #endif
 420                 return t;
 421 #else // no fast math
 422                 return (s32) round_(x);
 423 #endif
 424         }
 425
 426         inline f32 f32_max3(const f32 a, const f32 b, const f32 c)
 427         {
 428                 return a > b ? (a > c ? a : c) : (b > c ? b : c);
 429         }
 430
 431         inline f32 f32_min3(const f32 a, const f32 b, const f32 c)
 432         {
 433                 return a < b ? (a < c ? a : c) : (b < c ? b : c);
 434         }
 435
 436         inline f32 fract ( f32 x )
 437         {
 438                 return x - floorf ( x );
 439         }
 440
 441 } // end namespace core
 442 } // end namespace irr
 443
 444 #endif
 445