lib/rbcodec/codecs/libmad/fixed.h

   1 /*
   2  * libmad - MPEG audio decoder library
   3  * Copyright (C) 2000-2004 Underbit Technologies, Inc.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  *
  19  * $Id$
  20  */
  21
  22 # ifndef LIBMAD_FIXED_H
  23 # define LIBMAD_FIXED_H
  24
  25 #include <inttypes.h>
  26
  27 typedef   int32_t mad_fixed_t;
  28
  29 typedef   int32_t mad_fixed64hi_t;
  30 typedef  uint32_t mad_fixed64lo_t;
  31
  32 # if defined(_MSC_VER)
  33 #  define mad_fixed64_t  signed __int64
  34 # elif 1 || defined(__GNUC__)
  35 #  define mad_fixed64_t  signed long long
  36 # endif
  37
  38 # if defined(FPM_FLOAT)
  39 typedef double mad_sample_t;
  40 # else
  41 typedef mad_fixed_t mad_sample_t;
  42 # endif
  43
  44 /*
  45  * Fixed-point format: 0xABBBBBBB
  46  * A == whole part      (sign + 3 bits)
  47  * B == fractional part (28 bits)
  48  *
  49  * Values are signed two's complement, so the effective range is:
  50  * 0x80000000 to 0x7fffffff
  51  *       -8.0 to +7.9999999962747097015380859375
  52  *
  53  * The smallest representable value is:
  54  * 0x00000001 == 0.0000000037252902984619140625 (i.e. about 3.725e-9)
  55  *
  56  * 28 bits of fractional accuracy represent about
  57  * 8.6 digits of decimal accuracy.
  58  *
  59  * Fixed-point numbers can be added or subtracted as normal
  60  * integers, but multiplication requires shifting the 64-bit result
  61  * from 56 fractional bits back to 28 (and rounding.)
  62  *
  63  * Changing the definition of MAD_F_FRACBITS is only partially
  64  * supported, and must be done with care.
  65  */
  66
  67 # define MAD_F_FRACBITS         28
  68
  69 # if MAD_F_FRACBITS == 28
  70 #  define MAD_F(x)              ((mad_fixed_t) (x##L))
  71 # else
  72 #  if MAD_F_FRACBITS < 28
  73 #   warning "MAD_F_FRACBITS < 28"
  74 #   define MAD_F(x)             ((mad_fixed_t)  \
  75                                  (((x##L) +  \
  76                                    (1L << (28 - MAD_F_FRACBITS - 1))) >>  \
  77                                   (28 - MAD_F_FRACBITS)))
  78 #  elif MAD_F_FRACBITS > 28
  79 #   error "MAD_F_FRACBITS > 28 not currently supported"
  80 #   define MAD_F(x)             ((mad_fixed_t)  \
  81                                  ((x##L) << (MAD_F_FRACBITS - 28)))
  82 #  endif
  83 # endif
  84
  85 # define MAD_F_MIN              ((mad_fixed_t) -0x80000000L)
  86 # define MAD_F_MAX              ((mad_fixed_t) +0x7fffffffL)
  87
  88 # define MAD_F_ONE              MAD_F(0x10000000)
  89
  90 # define mad_f_tofixed(x)       ((mad_fixed_t)  \
  91                                  ((x) * (double) (1L << MAD_F_FRACBITS) + 0.5))
  92 # define mad_f_todouble(x)      ((double)  \
  93                                  ((x) / (double) (1L << MAD_F_FRACBITS)))
  94
  95 # define mad_f_intpart(x)       ((x) >> MAD_F_FRACBITS)
  96 # define mad_f_fracpart(x)      ((x) & ((1L << MAD_F_FRACBITS) - 1))
  97                                 /* (x should be positive) */
  98
  99 # define mad_f_fromint(x)       ((x) << MAD_F_FRACBITS)
 100
 101 # define mad_f_add(x, y)        ((x) + (y))
 102 # define mad_f_sub(x, y)        ((x) - (y))
 103
 104 # if defined(FPM_FLOAT)
 105 #  error "FPM_FLOAT not yet supported"
 106
 107 #  undef MAD_F
 108 #  define MAD_F(x)              mad_f_todouble(x)
 109
 110 #  define mad_f_mul(x, y)       ((x) * (y))
 111 #  define mad_f_scale64
 112
 113 # elif defined(FPM_64BIT)
 114
 115 /*
 116  * This version should be the most accurate if 64-bit types are supported by
 117  * the compiler, although it may not be the most efficient.
 118  */
 119 #  if defined(OPT_ACCURACY)
 120 #   define mad_f_mul(x, y)  \
 121     ((mad_fixed_t)  \
 122      ((((mad_fixed64_t) (x) * (y)) +  \
 123        (1L << (MAD_F_SCALEBITS - 1))) >> MAD_F_SCALEBITS))
 124 #  else
 125 #   define mad_f_mul(x, y)  \
 126     ((mad_fixed_t) (((mad_fixed64_t) (x) * (y)) >> MAD_F_SCALEBITS))
 127 #  endif
 128
 129 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 130
 131 /* --- Intel --------------------------------------------------------------- */
 132
 133 # elif defined(FPM_INTEL)
 134
 135 #  if defined(_MSC_VER)
 136 #   pragma warning(push)
 137 #   pragma warning(disable: 4035)  /* no return value */
 138 static __forceinline
 139 mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
 140 {
 141   enum {
 142     fracbits = MAD_F_FRACBITS
 143   };
 144
 145   __asm {
 146     mov eax, x
 147     imul y
 148     shrd eax, edx, fracbits
 149   }
 150
 151   /* implicit return of eax */
 152 }
 153 #   pragma warning(pop)
 154
 155 #   define mad_f_mul            mad_f_mul_inline
 156 #   define mad_f_scale64
 157 #  else
 158 /*
 159  * This Intel version is fast and accurate; the disposition of the least
 160  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
 161  */
 162 #   define MAD_F_MLX(hi, lo, x, y)  \
 163     asm ("imull %3"  \
 164          : "=a" (lo), "=d" (hi)  \
 165          : "%a" (x), "rm" (y)  \
 166          : "cc")
 167
 168 #   if defined(OPT_ACCURACY)
 169 /*
 170  * This gives best accuracy but is not very fast.
 171  */
 172 #    define MAD_F_MLA(hi, lo, x, y)  \
 173     ({ mad_fixed64hi_t __hi;  \
 174        mad_fixed64lo_t __lo;  \
 175        MAD_F_MLX(__hi, __lo, (x), (y));  \
 176        asm ("addl %2,%0\n\t"  \
 177             "adcl %3,%1"  \
 178             : "=rm" (lo), "=rm" (hi)  \
 179             : "r" (__lo), "r" (__hi), "0" (lo), "1" (hi)  \
 180             : "cc");  \
 181     })
 182 #   endif  /* OPT_ACCURACY */
 183
 184 #   if defined(OPT_ACCURACY)
 185 /*
 186  * Surprisingly, this is faster than SHRD followed by ADC.
 187  */
 188 #    define mad_f_scale64(hi, lo)  \
 189     ({ mad_fixed64hi_t __hi_;  \
 190        mad_fixed64lo_t __lo_;  \
 191        mad_fixed_t __result;  \
 192        asm ("addl %4,%2\n\t"  \
 193             "adcl %5,%3"  \
 194             : "=rm" (__lo_), "=rm" (__hi_)  \
 195             : "0" (lo), "1" (hi),  \
 196               "ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0)  \
 197             : "cc");  \
 198        asm ("shrdl %3,%2,%1"  \
 199             : "=rm" (__result)  \
 200             : "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS)  \
 201             : "cc");  \
 202        __result;  \
 203     })
 204 #   elif defined(OPT_INTEL)
 205 /*
 206  * Alternate Intel scaling that may or may not perform better.
 207  */
 208 #    define mad_f_scale64(hi, lo)  \
 209     ({ mad_fixed_t __result;  \
 210        asm ("shrl %3,%1\n\t"  \
 211             "shll %4,%2\n\t"  \
 212             "orl %2,%1"  \
 213             : "=rm" (__result)  \
 214             : "0" (lo), "r" (hi),  \
 215               "I" (MAD_F_SCALEBITS), "I" (32 - MAD_F_SCALEBITS)  \
 216             : "cc");  \
 217        __result;  \
 218     })
 219 #   else
 220 #    define mad_f_scale64(hi, lo)  \
 221     ({ mad_fixed_t __result;  \
 222        asm ("shrdl %3,%2,%1"  \
 223             : "=rm" (__result)  \
 224             : "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS)  \
 225             : "cc");  \
 226        __result;  \
 227     })
 228 #   endif  /* OPT_ACCURACY */
 229
 230 #   define MAD_F_SCALEBITS  MAD_F_FRACBITS
 231 #  endif
 232
 233 /* --- ARM ----------------------------------------------------------------- */
 234
 235 # elif defined(FPM_ARM)
 236
 237 /*
 238  * This ARM V4 version is as accurate as FPM_64BIT but much faster. The
 239  * least significant bit is properly rounded at no CPU cycle cost!
 240  */
 241 # if 1
 242 /*
 243  * This is faster than the default implementation via MAD_F_MLX() and
 244  * mad_f_scale64().
 245  */
 246 #  define mad_f_mul(x, y)  \
 247     ({ mad_fixed64hi_t __hi;  \
 248        mad_fixed64lo_t __lo;  \
 249        mad_fixed_t __result;  \
 250        asm ("smull      %0, %1, %3, %4\n\t"  \
 251             "movs       %0, %0, lsr %5\n\t"  \
 252             "adc        %2, %0, %1, lsl %6"  \
 253             : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
 254             : "%r" (x), "r" (y),  \
 255               "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
 256             : "cc");  \
 257        __result;  \
 258     })
 259 # endif
 260
 261 #  define MAD_F_MLX(hi, lo, x, y)  \
 262     asm ("smull %0, %1, %2, %3"  \
 263          : "=&r" (lo), "=&r" (hi)  \
 264          : "%r" (x), "r" (y))
 265
 266 #  define MAD_F_MLA(hi, lo, x, y)  \
 267     asm ("smlal %0, %1, %2, %3"  \
 268          : "+r" (lo), "+r" (hi)  \
 269          : "%r" (x), "r" (y))
 270
 271 #  define MAD_F_MLN(hi, lo)  \
 272     asm ("rsbs  %0, %2, #0\n\t"  \
 273          "rsc   %1, %3, #0"  \
 274          : "=r" (lo), "=r" (hi)  \
 275          : "0" (lo), "1" (hi)  \
 276          : "cc")
 277
 278 #  define mad_f_scale64(hi, lo)  \
 279     ({ mad_fixed_t __result;  \
 280        asm ("movs       %0, %1, lsr %3\n\t"  \
 281             "adc        %0, %0, %2, lsl %4"  \
 282             : "=&r" (__result)  \
 283             : "r" (lo), "r" (hi),  \
 284               "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
 285             : "cc");  \
 286        __result;  \
 287     })
 288
 289 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 290
 291 /* --- MIPS ---------------------------------------------------------------- */
 292
 293 # elif defined(FPM_MIPS)
 294
 295 #if GCCNUM >= 404
 296 typedef unsigned int u64_di_t __attribute__ ((mode (DI)));
 297 # define MAD_F_MLX(hi, lo, x, y) \
 298 do { \
 299    u64_di_t __ll = (u64_di_t) (x) * (y); \
 300    hi = __ll >> 32; \
 301    lo = __ll; \
 302 }while(0)
 303 #else
 304 /*
 305  * This MIPS version is fast and accurate; the disposition of the least
 306  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
 307  */
 308 #  define MAD_F_MLX(hi, lo, x, y)  \
 309     asm ("mult  %2,%3"  \
 310          : "=l" (lo), "=h" (hi)  \
 311          : "%r" (x), "r" (y))
 312
 313 # if defined(HAVE_MADD_ASM)
 314 #  define MAD_F_MLA(hi, lo, x, y)  \
 315     asm ("madd  %2,%3"  \
 316          : "+l" (lo), "+h" (hi)  \
 317          : "%r" (x), "r" (y))
 318 # elif defined(HAVE_MADD16_ASM)
 319 /*
 320  * This loses significant accuracy due to the 16-bit integer limit in the
 321  * multiply/accumulate instruction.
 322  */
 323 #  define MAD_F_ML0(hi, lo, x, y)  \
 324     asm ("mult  %2,%3"  \
 325          : "=l" (lo), "=h" (hi)  \
 326          : "%r" ((x) >> 12), "r" ((y) >> 16))
 327 #  define MAD_F_MLA(hi, lo, x, y)  \
 328     asm ("madd16        %2,%3"  \
 329          : "+l" (lo), "+h" (hi)  \
 330          : "%r" ((x) >> 12), "r" ((y) >> 16))
 331 #  define MAD_F_MLZ(hi, lo)  ((mad_fixed_t) (lo))
 332 # endif
 333
 334 #endif /* GCCNUM */
 335
 336 # if defined(OPT_SPEED)
 337 #  define mad_f_scale64(hi, lo)  \
 338     ((mad_fixed_t) ((hi) << (32 - MAD_F_SCALEBITS)))
 339 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 340 # endif
 341
 342 /* --- SPARC --------------------------------------------------------------- */
 343
 344 # elif defined(FPM_SPARC)
 345
 346 /*
 347  * This SPARC V8 version is fast and accurate; the disposition of the least
 348  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
 349  */
 350 #  define MAD_F_MLX(hi, lo, x, y)  \
 351     asm ("smul %2, %3, %0\n\t"  \
 352          "rd %%y, %1"  \
 353          : "=r" (lo), "=r" (hi)  \
 354          : "%r" (x), "rI" (y))
 355
 356 /* --- PowerPC ------------------------------------------------------------- */
 357
 358 # elif defined(FPM_PPC)
 359
 360 /*
 361  * This PowerPC version is fast and accurate; the disposition of the least
 362  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
 363  */
 364 #  define MAD_F_MLX(hi, lo, x, y)  \
 365     do {  \
 366       asm ("mullw %0,%1,%2"  \
 367            : "=r" (lo)  \
 368            : "%r" (x), "r" (y));  \
 369       asm ("mulhw %0,%1,%2"  \
 370            : "=r" (hi)  \
 371            : "%r" (x), "r" (y));  \
 372     }  \
 373     while (0)
 374
 375 #  if defined(OPT_ACCURACY)
 376 /*
 377  * This gives best accuracy but is not very fast.
 378  */
 379 #   define MAD_F_MLA(hi, lo, x, y)  \
 380     ({ mad_fixed64hi_t __hi;  \
 381        mad_fixed64lo_t __lo;  \
 382        MAD_F_MLX(__hi, __lo, (x), (y));  \
 383        asm ("addc %0,%2,%3\n\t"  \
 384             "adde %1,%4,%5"  \
 385             : "=r" (lo), "=r" (hi)  \
 386             : "%r" (lo), "r" (__lo),  \
 387               "%r" (hi), "r" (__hi)  \
 388             : "xer");  \
 389     })
 390 #  endif
 391
 392 #  if defined(OPT_ACCURACY)
 393 /*
 394  * This is slower than the truncating version below it.
 395  */
 396 #   define mad_f_scale64(hi, lo)  \
 397     ({ mad_fixed_t __result, __round;  \
 398        asm ("rotrwi %0,%1,%2"  \
 399             : "=r" (__result)  \
 400             : "r" (lo), "i" (MAD_F_SCALEBITS));  \
 401        asm ("extrwi %0,%1,1,0"  \
 402             : "=r" (__round)  \
 403             : "r" (__result));  \
 404        asm ("insrwi %0,%1,%2,0"  \
 405             : "+r" (__result)  \
 406             : "r" (hi), "i" (MAD_F_SCALEBITS));  \
 407        asm ("add %0,%1,%2"  \
 408             : "=r" (__result)  \
 409             : "%r" (__result), "r" (__round));  \
 410        __result;  \
 411     })
 412 #  else
 413 #   define mad_f_scale64(hi, lo)  \
 414     ({ mad_fixed_t __result;  \
 415        asm ("rotrwi %0,%1,%2"  \
 416             : "=r" (__result)  \
 417             : "r" (lo), "i" (MAD_F_SCALEBITS));  \
 418        asm ("insrwi %0,%1,%2,0"  \
 419             : "+r" (__result)  \
 420             : "r" (hi), "i" (MAD_F_SCALEBITS));  \
 421        __result;  \
 422     })
 423 #  endif
 424
 425 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 426
 427 # elif defined(FPM_COLDFIRE_EMAC)
 428
 429 /* mad_f_mul using the Coldfire MCF5249 EMAC unit. Loses 3 bits of accuracy.
 430    Note that we don't define any of the libmad accumulator macros, as
 431    any functions that use these should have the relevant sections rewritten
 432    in assembler to utilise the EMAC accumulators properly.
 433    Assumes the default +/- 3.28 fixed point format
 434  */
 435 #define mad_f_mul(x, y) \
 436 ({ \
 437   mad_fixed64hi_t hi; \
 438   asm volatile("mac.l %[a], %[b], %%acc0\n\t" \
 439                "movclr.l %%acc0, %[hi]\n\t" \
 440                "asl.l #3, %[hi]" \
 441                : [hi] "=d" (hi) \
 442                : [a] "r" ((x)), [b] "r" ((y))); \
 443   hi; \
 444 })
 445 /* Define dummy mad_f_scale64 to prevent libmad from defining MAD_F_SCALEBITS
 446    below. Having MAD_F_SCALEBITS defined screws up the PRESHIFT macro in synth.c
 447  */
 448 #define mad_f_scale64(hi, lo) (lo)
 449
 450 /* --- Default ------------------------------------------------------------- */
 451
 452 # elif defined(FPM_DEFAULT)
 453
 454 /*
 455  * This version is the most portable but it loses significant accuracy.
 456  * Furthermore, accuracy is biased against the second argument, so care
 457  * should be taken when ordering operands.
 458  *
 459  * The scale factors are constant as this is not used with SSO.
 460  *
 461  * Pre-rounding is required to stay within the limits of compliance.
 462  */
 463 #  if defined(OPT_SPEED)
 464 #   define mad_f_mul(x, y)      (((x) >> 12) * ((y) >> 16))
 465 #  else
 466 #   define mad_f_mul(x, y)      ((((x) + (1L << 11)) >> 12) *  \
 467                                  (((y) + (1L << 15)) >> 16))
 468 #  endif
 469
 470 /* ------------------------------------------------------------------------- */
 471
 472 # else
 473 #  error "no FPM selected"
 474 # endif
 475
 476 /* default implementations */
 477
 478 # if !defined(mad_f_mul)
 479 #  define mad_f_mul(x, y)  \
 480     ({ register mad_fixed64hi_t __hi;  \
 481        register mad_fixed64lo_t __lo;  \
 482        MAD_F_MLX(__hi, __lo, (x), (y));  \
 483        mad_f_scale64(__hi, __lo);  \
 484     })
 485 # endif
 486
 487 # if !defined(MAD_F_MLA)
 488 #  define MAD_F_ML0(hi, lo, x, y)       ((lo)  = mad_f_mul((x), (y)))
 489 #  define MAD_F_MLA(hi, lo, x, y)       ((lo) += mad_f_mul((x), (y)))
 490 #  define MAD_F_MLN(hi, lo)             ((lo)  = -(lo))
 491 #  define MAD_F_MLZ(hi, lo)             ((void) (hi), (mad_fixed_t) (lo))
 492 # endif
 493
 494 # if !defined(MAD_F_ML0)
 495 #  define MAD_F_ML0(hi, lo, x, y)       MAD_F_MLX((hi), (lo), (x), (y))
 496 # endif
 497
 498 # if !defined(MAD_F_MLN)
 499 #  define MAD_F_MLN(hi, lo)             ((hi) = ((lo) = -(lo)) ? ~(hi) : -(hi))
 500 # endif
 501
 502 # if !defined(MAD_F_MLZ)
 503 #  define MAD_F_MLZ(hi, lo)             mad_f_scale64((hi), (lo))
 504 # endif
 505
 506 # if !defined(mad_f_scale64)
 507 #  if defined(OPT_ACCURACY)
 508 #   define mad_f_scale64(hi, lo)  \
 509     ((((mad_fixed_t)  \
 510        (((hi) << (32 - (MAD_F_SCALEBITS - 1))) |  \
 511         ((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1)
 512 #  else
 513 #   define mad_f_scale64(hi, lo)  \
 514     ((mad_fixed_t)  \
 515      (((hi) << (32 - MAD_F_SCALEBITS)) |  \
 516       ((lo) >> MAD_F_SCALEBITS)))
 517 #  endif
 518 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 519 # endif
 520
 521 # endif