Improved bitrev with approach suggested by Jens Arnold, gives 0.5%-1% speedup for...
[kugel-rb.git] / apps / codecs / libwma / wmafixed.h
blob6b5137e044cdb8126565dd66b088328f57e9c3b4
1 /****************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
9 * Copyright (C) 2007 Michael Giacomelli
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
16 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17 * KIND, either express or implied.
19 ****************************************************************************/
21 /* fixed precision code. We use a combination of Sign 15.16 and Sign.31
22 precision here.
24 The WMA decoder does not always follow this convention, and occasionally
25 renormalizes values to other formats in order to maximize precision.
26 However, only the two precisions above are provided in this file.
30 #include "types.h"
32 #define PRECISION 16
33 #define PRECISION64 16
36 #define fixtof64(x) (float)((float)(x) / (float)(1 << PRECISION64)) //does not work on int64_t!
37 #define ftofix32(x) ((fixed32)((x) * (float)(1 << PRECISION) + ((x) < 0 ? -0.5 : 0.5)))
38 #define itofix64(x) (IntTo64(x))
39 #define itofix32(x) ((x) << PRECISION)
40 #define fixtoi32(x) ((x) >> PRECISION)
41 #define fixtoi64(x) (IntFrom64(x))
44 /*fixed functions*/
46 fixed64 IntTo64(int x);
47 int IntFrom64(fixed64 x);
48 fixed32 Fixed32From64(fixed64 x);
49 fixed64 Fixed32To64(fixed32 x);
50 fixed32 fixdiv32(fixed32 x, fixed32 y);
51 fixed64 fixdiv64(fixed64 x, fixed64 y);
52 fixed32 fixsqrt32(fixed32 x);
53 long fsincos(unsigned long phase, fixed32 *cos);
55 #ifdef CPU_ARM
57 /*Sign-15.16 format */
59 #define fixmul32(x, y) \
60 ({ int32_t __hi; \
61 uint32_t __lo; \
62 int32_t __result; \
63 asm ("smull %0, %1, %3, %4\n\t" \
64 "movs %0, %0, lsr %5\n\t" \
65 "adc %2, %0, %1, lsl %6" \
66 : "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
67 : "%r" (x), "r" (y), \
68 "M" (PRECISION), "M" (32 - PRECISION) \
69 : "cc"); \
70 __result; \
73 #define fixmul32b(x, y) \
74 ({ int32_t __hi; \
75 uint32_t __lo; \
76 int32_t __result; \
77 asm ("smull %0, %1, %3, %4\n\t" \
78 "movs %2, %1, lsl #1" \
79 : "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
80 : "%r" (x), "r" (y) \
81 : "cc"); \
82 __result; \
85 #elif defined(CPU_COLDFIRE)
87 static inline int32_t fixmul32(int32_t x, int32_t y)
89 #if PRECISION != 16
90 #warning Coldfire fixmul32() only works for PRECISION == 16
91 #endif
92 int32_t t1;
93 asm (
94 "mac.l %[x], %[y], %%acc0 \n" /* multiply */
95 "mulu.l %[y], %[x] \n" /* get lower half, avoid emac stall */
96 "movclr.l %%acc0, %[t1] \n" /* get higher half */
97 "lsr.l #1, %[t1] \n"
98 "move.w %[t1], %[x] \n"
99 "swap %[x] \n"
100 : [t1] "=&d" (t1), [x] "+d" (x)
101 : [y] "d" (y)
103 return x;
106 static inline int32_t fixmul32b(int32_t x, int32_t y)
108 asm (
109 "mac.l %[x], %[y], %%acc0 \n" /* multiply */
110 "movclr.l %%acc0, %[x] \n" /* get higher half */
111 : [x] "+d" (x)
112 : [y] "d" (y)
114 return x;
117 #else
119 static inline fixed32 fixmul32(fixed32 x, fixed32 y)
121 fixed64 temp;
122 temp = x;
123 temp *= y;
125 temp >>= PRECISION;
127 return (fixed32)temp;
130 static inline fixed32 fixmul32b(fixed32 x, fixed32 y)
132 fixed64 temp;
134 temp = x;
135 temp *= y;
137 temp >>= 31; //16+31-16 = 31 bits
139 return (fixed32)temp;
142 #endif