apps/codecs/libmad/fixed.h

   1 /*
   2  * libmad - MPEG audio decoder library
   3  * Copyright (C) 2000-2004 Underbit Technologies, Inc.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  *
  19  * $Id$
  20  */
  21
  22 # ifndef LIBMAD_FIXED_H
  23 # define LIBMAD_FIXED_H
  24
  25 #include <inttypes.h>
  26
  27 typedef   int32_t mad_fixed_t;
  28
  29 typedef   int32_t mad_fixed64hi_t;
  30 typedef  uint32_t mad_fixed64lo_t;
  31
  32 # if defined(_MSC_VER)
  33 #  define mad_fixed64_t  signed __int64
  34 # elif 1 || defined(__GNUC__)
  35 #  define mad_fixed64_t  signed long long
  36 # endif
  37
  38 # if defined(FPM_FLOAT)
  39 typedef double mad_sample_t;
  40 # else
  41 typedef mad_fixed_t mad_sample_t;
  42 # endif
  43
  44 /*
  45  * Fixed-point format: 0xABBBBBBB
  46  * A == whole part      (sign + 3 bits)
  47  * B == fractional part (28 bits)
  48  *
  49  * Values are signed two's complement, so the effective range is:
  50  * 0x80000000 to 0x7fffffff
  51  *       -8.0 to +7.9999999962747097015380859375
  52  *
  53  * The smallest representable value is:
  54  * 0x00000001 == 0.0000000037252902984619140625 (i.e. about 3.725e-9)
  55  *
  56  * 28 bits of fractional accuracy represent about
  57  * 8.6 digits of decimal accuracy.
  58  *
  59  * Fixed-point numbers can be added or subtracted as normal
  60  * integers, but multiplication requires shifting the 64-bit result
  61  * from 56 fractional bits back to 28 (and rounding.)
  62  *
  63  * Changing the definition of MAD_F_FRACBITS is only partially
  64  * supported, and must be done with care.
  65  */
  66
  67 # define MAD_F_FRACBITS         28
  68
  69 # if MAD_F_FRACBITS == 28
  70 #  define MAD_F(x)              ((mad_fixed_t) (x##L))
  71 # else
  72 #  if MAD_F_FRACBITS < 28
  73 #   warning "MAD_F_FRACBITS < 28"
  74 #   define MAD_F(x)             ((mad_fixed_t)  \
  75                                  (((x##L) +  \
  76                                    (1L << (28 - MAD_F_FRACBITS - 1))) >>  \
  77                                   (28 - MAD_F_FRACBITS)))
  78 #  elif MAD_F_FRACBITS > 28
  79 #   error "MAD_F_FRACBITS > 28 not currently supported"
  80 #   define MAD_F(x)             ((mad_fixed_t)  \
  81                                  ((x##L) << (MAD_F_FRACBITS - 28)))
  82 #  endif
  83 # endif
  84
  85 # define MAD_F_MIN              ((mad_fixed_t) -0x80000000L)
  86 # define MAD_F_MAX              ((mad_fixed_t) +0x7fffffffL)
  87
  88 # define MAD_F_ONE              MAD_F(0x10000000)
  89
  90 # define mad_f_tofixed(x)       ((mad_fixed_t)  \
  91                                  ((x) * (double) (1L << MAD_F_FRACBITS) + 0.5))
  92 # define mad_f_todouble(x)      ((double)  \
  93                                  ((x) / (double) (1L << MAD_F_FRACBITS)))
  94
  95 # define mad_f_intpart(x)       ((x) >> MAD_F_FRACBITS)
  96 # define mad_f_fracpart(x)      ((x) & ((1L << MAD_F_FRACBITS) - 1))
  97                                 /* (x should be positive) */
  98
  99 # define mad_f_fromint(x)       ((x) << MAD_F_FRACBITS)
 100
 101 # define mad_f_add(x, y)        ((x) + (y))
 102 # define mad_f_sub(x, y)        ((x) - (y))
 103
 104 # if defined(FPM_FLOAT)
 105 #  error "FPM_FLOAT not yet supported"
 106
 107 #  undef MAD_F
 108 #  define MAD_F(x)              mad_f_todouble(x)
 109
 110 #  define mad_f_mul(x, y)       ((x) * (y))
 111 #  define mad_f_scale64
 112
 113 #  undef ASO_ZEROCHECK
 114
 115 # elif defined(FPM_64BIT)
 116
 117 /*
 118  * This version should be the most accurate if 64-bit types are supported by
 119  * the compiler, although it may not be the most efficient.
 120  */
 121 #  if defined(OPT_ACCURACY)
 122 #   define mad_f_mul(x, y)  \
 123     ((mad_fixed_t)  \
 124      ((((mad_fixed64_t) (x) * (y)) +  \
 125        (1L << (MAD_F_SCALEBITS - 1))) >> MAD_F_SCALEBITS))
 126 #  else
 127 #   define mad_f_mul(x, y)  \
 128     ((mad_fixed_t) (((mad_fixed64_t) (x) * (y)) >> MAD_F_SCALEBITS))
 129 #  endif
 130
 131 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 132
 133 /* --- Intel --------------------------------------------------------------- */
 134
 135 # elif defined(FPM_INTEL)
 136
 137 #  if defined(_MSC_VER)
 138 #   pragma warning(push)
 139 #   pragma warning(disable: 4035)  /* no return value */
 140 static __forceinline
 141 mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
 142 {
 143   enum {
 144     fracbits = MAD_F_FRACBITS
 145   };
 146
 147   __asm {
 148     mov eax, x
 149     imul y
 150     shrd eax, edx, fracbits
 151   }
 152
 153   /* implicit return of eax */
 154 }
 155 #   pragma warning(pop)
 156
 157 #   define mad_f_mul            mad_f_mul_inline
 158 #   define mad_f_scale64
 159 #  else
 160 /*
 161  * This Intel version is fast and accurate; the disposition of the least
 162  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
 163  */
 164 #   define MAD_F_MLX(hi, lo, x, y)  \
 165     asm ("imull %3"  \
 166          : "=a" (lo), "=d" (hi)  \
 167          : "%a" (x), "rm" (y)  \
 168          : "cc")
 169
 170 #   if defined(OPT_ACCURACY)
 171 /*
 172  * This gives best accuracy but is not very fast.
 173  */
 174 #    define MAD_F_MLA(hi, lo, x, y)  \
 175     ({ mad_fixed64hi_t __hi;  \
 176        mad_fixed64lo_t __lo;  \
 177        MAD_F_MLX(__hi, __lo, (x), (y));  \
 178        asm ("addl %2,%0\n\t"  \
 179             "adcl %3,%1"  \
 180             : "=rm" (lo), "=rm" (hi)  \
 181             : "r" (__lo), "r" (__hi), "0" (lo), "1" (hi)  \
 182             : "cc");  \
 183     })
 184 #   endif  /* OPT_ACCURACY */
 185
 186 #   if defined(OPT_ACCURACY)
 187 /*
 188  * Surprisingly, this is faster than SHRD followed by ADC.
 189  */
 190 #    define mad_f_scale64(hi, lo)  \
 191     ({ mad_fixed64hi_t __hi_;  \
 192        mad_fixed64lo_t __lo_;  \
 193        mad_fixed_t __result;  \
 194        asm ("addl %4,%2\n\t"  \
 195             "adcl %5,%3"  \
 196             : "=rm" (__lo_), "=rm" (__hi_)  \
 197             : "0" (lo), "1" (hi),  \
 198               "ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0)  \
 199             : "cc");  \
 200        asm ("shrdl %3,%2,%1"  \
 201             : "=rm" (__result)  \
 202             : "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS)  \
 203             : "cc");  \
 204        __result;  \
 205     })
 206 #   elif defined(OPT_INTEL)
 207 /*
 208  * Alternate Intel scaling that may or may not perform better.
 209  */
 210 #    define mad_f_scale64(hi, lo)  \
 211     ({ mad_fixed_t __result;  \
 212        asm ("shrl %3,%1\n\t"  \
 213             "shll %4,%2\n\t"  \
 214             "orl %2,%1"  \
 215             : "=rm" (__result)  \
 216             : "0" (lo), "r" (hi),  \
 217               "I" (MAD_F_SCALEBITS), "I" (32 - MAD_F_SCALEBITS)  \
 218             : "cc");  \
 219        __result;  \
 220     })
 221 #   else
 222 #    define mad_f_scale64(hi, lo)  \
 223     ({ mad_fixed_t __result;  \
 224        asm ("shrdl %3,%2,%1"  \
 225             : "=rm" (__result)  \
 226             : "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS)  \
 227             : "cc");  \
 228        __result;  \
 229     })
 230 #   endif  /* OPT_ACCURACY */
 231
 232 #   define MAD_F_SCALEBITS  MAD_F_FRACBITS
 233 #  endif
 234
 235 /* --- ARM ----------------------------------------------------------------- */
 236
 237 # elif defined(FPM_ARM)
 238
 239 /*
 240  * This ARM V4 version is as accurate as FPM_64BIT but much faster. The
 241  * least significant bit is properly rounded at no CPU cycle cost!
 242  */
 243 # if 1
 244 /*
 245  * This is faster than the default implementation via MAD_F_MLX() and
 246  * mad_f_scale64().
 247  */
 248 #  define mad_f_mul(x, y)  \
 249     ({ mad_fixed64hi_t __hi;  \
 250        mad_fixed64lo_t __lo;  \
 251        mad_fixed_t __result;  \
 252        asm ("smull      %0, %1, %3, %4\n\t"  \
 253             "movs       %0, %0, lsr %5\n\t"  \
 254             "adc        %2, %0, %1, lsl %6"  \
 255             : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
 256             : "%r" (x), "r" (y),  \
 257               "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
 258             : "cc");  \
 259        __result;  \
 260     })
 261 # endif
 262
 263 #  define MAD_F_MLX(hi, lo, x, y)  \
 264     asm ("smull %0, %1, %2, %3"  \
 265          : "=&r" (lo), "=&r" (hi)  \
 266          : "%r" (x), "r" (y))
 267
 268 #  define MAD_F_MLA(hi, lo, x, y)  \
 269     asm ("smlal %0, %1, %2, %3"  \
 270          : "+r" (lo), "+r" (hi)  \
 271          : "%r" (x), "r" (y))
 272
 273 #  define MAD_F_MLN(hi, lo)  \
 274     asm ("rsbs  %0, %2, #0\n\t"  \
 275          "rsc   %1, %3, #0"  \
 276          : "=r" (lo), "=r" (hi)  \
 277          : "0" (lo), "1" (hi)  \
 278          : "cc")
 279
 280 #  define mad_f_scale64(hi, lo)  \
 281     ({ mad_fixed_t __result;  \
 282        asm ("movs       %0, %1, lsr %3\n\t"  \
 283             "adc        %0, %0, %2, lsl %4"  \
 284             : "=&r" (__result)  \
 285             : "r" (lo), "r" (hi),  \
 286               "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
 287             : "cc");  \
 288        __result;  \
 289     })
 290
 291 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 292
 293 /* --- MIPS ---------------------------------------------------------------- */
 294
 295 # elif defined(FPM_MIPS)
 296
 297 /*
 298  * This MIPS version is fast and accurate; the disposition of the least
 299  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
 300  */
 301 #  define MAD_F_MLX(hi, lo, x, y)  \
 302     asm ("mult  %2,%3"  \
 303          : "=l" (lo), "=h" (hi)  \
 304          : "%r" (x), "r" (y))
 305
 306 # if defined(HAVE_MADD_ASM)
 307 #  define MAD_F_MLA(hi, lo, x, y)  \
 308     asm ("madd  %2,%3"  \
 309          : "+l" (lo), "+h" (hi)  \
 310          : "%r" (x), "r" (y))
 311 # elif defined(HAVE_MADD16_ASM)
 312 /*
 313  * This loses significant accuracy due to the 16-bit integer limit in the
 314  * multiply/accumulate instruction.
 315  */
 316 #  define MAD_F_ML0(hi, lo, x, y)  \
 317     asm ("mult  %2,%3"  \
 318          : "=l" (lo), "=h" (hi)  \
 319          : "%r" ((x) >> 12), "r" ((y) >> 16))
 320 #  define MAD_F_MLA(hi, lo, x, y)  \
 321     asm ("madd16        %2,%3"  \
 322          : "+l" (lo), "+h" (hi)  \
 323          : "%r" ((x) >> 12), "r" ((y) >> 16))
 324 #  define MAD_F_MLZ(hi, lo)  ((mad_fixed_t) (lo))
 325 # endif
 326
 327 # if defined(OPT_SPEED)
 328 #  define mad_f_scale64(hi, lo)  \
 329     ((mad_fixed_t) ((hi) << (32 - MAD_F_SCALEBITS)))
 330 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 331 # endif
 332
 333 /* --- SPARC --------------------------------------------------------------- */
 334
 335 # elif defined(FPM_SPARC)
 336
 337 /*
 338  * This SPARC V8 version is fast and accurate; the disposition of the least
 339  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
 340  */
 341 #  define MAD_F_MLX(hi, lo, x, y)  \
 342     asm ("smul %2, %3, %0\n\t"  \
 343          "rd %%y, %1"  \
 344          : "=r" (lo), "=r" (hi)  \
 345          : "%r" (x), "rI" (y))
 346
 347 /* --- PowerPC ------------------------------------------------------------- */
 348
 349 # elif defined(FPM_PPC)
 350
 351 /*
 352  * This PowerPC version is fast and accurate; the disposition of the least
 353  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
 354  */
 355 #  define MAD_F_MLX(hi, lo, x, y)  \
 356     do {  \
 357       asm ("mullw %0,%1,%2"  \
 358            : "=r" (lo)  \
 359            : "%r" (x), "r" (y));  \
 360       asm ("mulhw %0,%1,%2"  \
 361            : "=r" (hi)  \
 362            : "%r" (x), "r" (y));  \
 363     }  \
 364     while (0)
 365
 366 #  if defined(OPT_ACCURACY)
 367 /*
 368  * This gives best accuracy but is not very fast.
 369  */
 370 #   define MAD_F_MLA(hi, lo, x, y)  \
 371     ({ mad_fixed64hi_t __hi;  \
 372        mad_fixed64lo_t __lo;  \
 373        MAD_F_MLX(__hi, __lo, (x), (y));  \
 374        asm ("addc %0,%2,%3\n\t"  \
 375             "adde %1,%4,%5"  \
 376             : "=r" (lo), "=r" (hi)  \
 377             : "%r" (lo), "r" (__lo),  \
 378               "%r" (hi), "r" (__hi)  \
 379             : "xer");  \
 380     })
 381 #  endif
 382
 383 #  if defined(OPT_ACCURACY)
 384 /*
 385  * This is slower than the truncating version below it.
 386  */
 387 #   define mad_f_scale64(hi, lo)  \
 388     ({ mad_fixed_t __result, __round;  \
 389        asm ("rotrwi %0,%1,%2"  \
 390             : "=r" (__result)  \
 391             : "r" (lo), "i" (MAD_F_SCALEBITS));  \
 392        asm ("extrwi %0,%1,1,0"  \
 393             : "=r" (__round)  \
 394             : "r" (__result));  \
 395        asm ("insrwi %0,%1,%2,0"  \
 396             : "+r" (__result)  \
 397             : "r" (hi), "i" (MAD_F_SCALEBITS));  \
 398        asm ("add %0,%1,%2"  \
 399             : "=r" (__result)  \
 400             : "%r" (__result), "r" (__round));  \
 401        __result;  \
 402     })
 403 #  else
 404 #   define mad_f_scale64(hi, lo)  \
 405     ({ mad_fixed_t __result;  \
 406        asm ("rotrwi %0,%1,%2"  \
 407             : "=r" (__result)  \
 408             : "r" (lo), "i" (MAD_F_SCALEBITS));  \
 409        asm ("insrwi %0,%1,%2,0"  \
 410             : "+r" (__result)  \
 411             : "r" (hi), "i" (MAD_F_SCALEBITS));  \
 412        __result;  \
 413     })
 414 #  endif
 415
 416 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 417
 418 # elif defined(FPM_COLDFIRE_EMAC)
 419
 420 /* mad_f_mul using the Coldfire MCF5249 EMAC unit. Loses 3 bits of accuracy.
 421    Note that we don't define any of the libmad accumulator macros, as
 422    any functions that use these should have the relevant sections rewritten
 423    in assembler to utilise the EMAC accumulators properly.
 424    Assumes the default +/- 3.28 fixed point format
 425  */
 426 #define mad_f_mul(x, y) \
 427 ({ \
 428   mad_fixed64hi_t hi; \
 429   asm volatile("mac.l %[a], %[b], %%acc0\n\t" \
 430                "movclr.l %%acc0, %[hi]\n\t" \
 431                "asl.l #3, %[hi]" \
 432                : [hi] "=d" (hi) \
 433                : [a] "r" ((x)), [b] "r" ((y))); \
 434   hi; \
 435 })
 436 /* Define dummy mad_f_scale64 to prevent libmad from defining MAD_F_SCALEBITS
 437    below. Having MAD_F_SCALEBITS defined screws up the PRESHIFT macro in synth.c
 438  */
 439 #define mad_f_scale64(hi, lo) (lo)
 440
 441 /* --- Default ------------------------------------------------------------- */
 442
 443 # elif defined(FPM_DEFAULT)
 444
 445 /*
 446  * This version is the most portable but it loses significant accuracy.
 447  * Furthermore, accuracy is biased against the second argument, so care
 448  * should be taken when ordering operands.
 449  *
 450  * The scale factors are constant as this is not used with SSO.
 451  *
 452  * Pre-rounding is required to stay within the limits of compliance.
 453  */
 454 #  if defined(OPT_SPEED)
 455 #   define mad_f_mul(x, y)      (((x) >> 12) * ((y) >> 16))
 456 #  else
 457 #   define mad_f_mul(x, y)      ((((x) + (1L << 11)) >> 12) *  \
 458                                  (((y) + (1L << 15)) >> 16))
 459 #  endif
 460
 461 /* ------------------------------------------------------------------------- */
 462
 463 # else
 464 #  error "no FPM selected"
 465 # endif
 466
 467 /* default implementations */
 468
 469 # if !defined(mad_f_mul)
 470 #  define mad_f_mul(x, y)  \
 471     ({ register mad_fixed64hi_t __hi;  \
 472        register mad_fixed64lo_t __lo;  \
 473        MAD_F_MLX(__hi, __lo, (x), (y));  \
 474        mad_f_scale64(__hi, __lo);  \
 475     })
 476 # endif
 477
 478 # if !defined(MAD_F_MLA)
 479 #  define MAD_F_ML0(hi, lo, x, y)       ((lo)  = mad_f_mul((x), (y)))
 480 #  define MAD_F_MLA(hi, lo, x, y)       ((lo) += mad_f_mul((x), (y)))
 481 #  define MAD_F_MLN(hi, lo)             ((lo)  = -(lo))
 482 #  define MAD_F_MLZ(hi, lo)             ((void) (hi), (mad_fixed_t) (lo))
 483 # endif
 484
 485 # if !defined(MAD_F_ML0)
 486 #  define MAD_F_ML0(hi, lo, x, y)       MAD_F_MLX((hi), (lo), (x), (y))
 487 # endif
 488
 489 # if !defined(MAD_F_MLN)
 490 #  define MAD_F_MLN(hi, lo)             ((hi) = ((lo) = -(lo)) ? ~(hi) : -(hi))
 491 # endif
 492
 493 # if !defined(MAD_F_MLZ)
 494 #  define MAD_F_MLZ(hi, lo)             mad_f_scale64((hi), (lo))
 495 # endif
 496
 497 # if !defined(mad_f_scale64)
 498 #  if defined(OPT_ACCURACY)
 499 #   define mad_f_scale64(hi, lo)  \
 500     ((((mad_fixed_t)  \
 501        (((hi) << (32 - (MAD_F_SCALEBITS - 1))) |  \
 502         ((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1)
 503 #  else
 504 #   define mad_f_scale64(hi, lo)  \
 505     ((mad_fixed_t)  \
 506      (((hi) << (32 - MAD_F_SCALEBITS)) |  \
 507       ((lo) >> MAD_F_SCALEBITS)))
 508 #  endif
 509 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 510 # endif
 511
 512 /* C routines */
 513
 514 mad_fixed_t mad_f_abs(mad_fixed_t);
 515 mad_fixed_t mad_f_div(mad_fixed_t, mad_fixed_t);
 516
 517 # endif