Make repeat mode/shuffle work in the sbs also
[maemo-rb.git] / apps / codecs / libwmapro / wmapro_math.h
blobc78d6b627f1c4ff4890199bef224d68dc51bc188
1 #ifndef _WMAPRO_MATH_H_
2 #define _WMAPRO_MATH_H_
4 #include <inttypes.h>
6 /* rockbox: not used
7 #define fixtof16(x) (float)((float)(x) / (float)(1 << 16))
8 #define fixtof31(x) (float)((float)(x) / (float)(1 << 31))
9 #define ftofix16(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))
10 #define ftofix31(x) ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5:0.5)))
13 #if defined(CPU_ARM)
14 /* Calculates: result = (X*Y)>>Z */
15 #define fixmulshift(X,Y,Z) \
16 ({ \
17 int32_t lo; \
18 int32_t hi; \
19 asm volatile ( \
20 "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
21 "mov %[lo], %[lo], lsr %[shr] \n\t" /* lo >>= Z */ \
22 "orr %[lo], %[lo], %[hi], lsl %[shl]" /* lo |= (hi << (32-Z)) */ \
23 : [lo]"=&r"(lo), [hi]"=&r"(hi) \
24 : [x]"r"(X), [y]"r"(Y), [shr]"r"(Z), [shl]"r"(32-Z)); \
25 lo; \
28 /* Calculates: result = (X*Y)>>16 */
29 #define fixmul16(X,Y) \
30 ({ \
31 int32_t lo; \
32 int32_t hi; \
33 asm volatile ( \
34 "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
35 "mov %[lo], %[lo], lsr #16 \n\t" /* lo >>= 16 */ \
36 "orr %[lo], %[lo], %[hi], lsl #16" /* lo |= (hi << 16) */ \
37 : [lo]"=&r"(lo), [hi]"=&r"(hi) \
38 : [x]"r"(X), [y]"r"(Y)); \
39 lo; \
42 /* Calculates: result = (X*Y)>>24 */
43 #define fixmul24(X,Y) \
44 ({ \
45 int32_t lo; \
46 int32_t hi; \
47 asm volatile ( \
48 "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
49 "mov %[lo], %[lo], lsr #24 \n\t" /* lo >>= 24 */ \
50 "orr %[lo], %[lo], %[hi], lsl #8" /* lo |= (hi << 8) */ \
51 : [lo]"=&r"(lo), [hi]"=&r"(hi) \
52 : [x]"r"(X), [y]"r"(Y)); \
53 lo; \
56 /* Calculates: result = (X*Y)>>31, loose 1 bit precision */
57 #define fixmul31(X,Y) \
58 ({ \
59 int32_t lo; \
60 int32_t hi; \
61 asm volatile ( \
62 "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
63 "mov %[hi], %[hi], lsl #1" /* hi <<= 1 */ \
64 : [lo]"=&r"(lo), [hi]"=&r"(hi) \
65 : [x]"r"(X), [y]"r"(Y)); \
66 hi; \
68 #elif defined(CPU_COLDFIRE)
69 /* Calculates: result = (X*Y)>>Z */
70 #define fixmulshift(X,Y,Z) \
71 ({ \
72 int32_t t1; \
73 int32_t t2; \
74 asm volatile ( \
75 "mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \
76 "mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \
77 "movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \
78 "moveq.l #31,%[t2] \n\t" \
79 "sub.l %[sh],%[t2] \n\t" /* t2 = 31 - shift */ \
80 "ble.s 1f \n\t" \
81 "asl.l %[t2],%[t1] \n\t" /* hi <<= 31 - shift */ \
82 "lsr.l %[sh],%[x] \n\t" /* (unsigned)lo >>= shift */ \
83 "or.l %[x],%[t1] \n\t" /* combine result */ \
84 "bra.s 2f \n\t" \
85 "1: \n\t" \
86 "neg.l %[t2] \n\t" /* t2 = shift - 31 */ \
87 "asr.l %[t2],%[t1] \n\t" /* hi >>= t2 */ \
88 "2: \n" \
89 : [t1]"=&d"(t1), [t2]"=&d"(t2) \
90 : [x] "d"((X)), [y] "d"((Y)), [sh]"d"((Z))); \
91 t1; \
94 /* Calculates: result = (X*Y)>>16 */
95 #define fixmul16(X,Y) \
96 ({ \
97 int32_t t, x = (X); \
98 asm volatile ( \
99 "mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \
100 "mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \
101 "movclr.l %%acc0,%[t] \n\t" /* get higher half */ \
102 "lsr.l #1,%[t] \n\t" /* hi >>= 1 to compensate emac shift */ \
103 "move.w %[t],%[x] \n\t" /* combine halfwords */\
104 "swap %[x] \n\t" \
105 : [t]"=&d"(t), [x] "+d" (x) \
106 : [y] "d" ((Y))); \
107 x; \
110 /* Calculates: result = (X*Y)>>31 (may lose msb to overflow) */
111 #define fixmul31(X,Y) \
112 ({ \
113 int32_t t; \
114 asm volatile ( \
115 "mac.l %[x], %[y], %%acc0\n\t" /* multiply */ \
116 "movclr.l %%acc0, %[t]\n\t" /* get higher half as result */ \
117 : [t] "=d" (t) \
118 : [x] "r" ((X)), [y] "r" ((Y))); \
119 t; \
121 #else
122 static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)
124 int64_t temp;
125 temp = x;
126 temp *= y;
128 temp >>= shamt;
130 return (int32_t)temp;
133 static inline int32_t fixmul31(int32_t x, int32_t y)
135 int64_t temp;
136 temp = x;
137 temp *= y;
139 temp >>= 31;
141 return (int32_t)temp;
144 static inline int32_t fixmul24(int32_t x, int32_t y)
146 int64_t temp;
147 temp = x;
148 temp *= y;
150 temp >>= 24;
152 return (int32_t)temp;
155 static inline int32_t fixmul16(int32_t x, int32_t y)
157 int64_t temp;
158 temp = x;
159 temp *= y;
161 temp >>= 16;
163 return (int32_t)temp;
165 #endif /* CPU_COLDFIRE, CPU_ARM */
167 #if defined(CPU_COLDFIRE)
168 #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \
169 asm volatile ( \
170 "mac.l %[s0], %[wj], %%acc0 \n\t" \
171 "msac.l %[s1], %[wi], %%acc0 \n\t" \
172 "mac.l %[s0], %[wi], %%acc1 \n\t" \
173 "mac.l %[s1], %[wj], %%acc1 \n\t" \
174 "movclr.l %%acc0, %[s0] \n\t" \
175 "move.l %[s0], (%[dst_i]) \n\t" \
176 "movclr.l %%acc1, %[s0] \n\t" \
177 "move.l %[s0], (%[dst_j]) \n\t" \
178 : [s0] "+r" (s0) /* register is clobbered so specify it as an input */ \
179 : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), \
180 [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) \
181 : "cc", "memory");
182 #else
183 #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \
184 dst[i] = fixmul31(wj, s0) - fixmul31(wi, s1); \
185 dst[j] = fixmul31(wi, s0) + fixmul31(wj, s1);
186 #endif /* CPU_COLDFIRE */
188 static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
189 const int32_t *src1, const int32_t *win,
190 int len)
192 int i, j;
193 dst += len;
194 win += len;
195 src0+= len;
196 for(i=-len, j=len-1; i<0; i++, j--) {
197 int32_t s0 = src0[i]; /* s0 = src0[ 0 ... len-1] */
198 int32_t s1 = src1[j]; /* s1 = src1[2*len-1 ... len] */
199 int32_t wi = -win[i]; /* wi = -win[ 0 ... len-1] */
200 int32_t wj = -win[j]; /* wj = -win[2*len-1 ... len] */
201 VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj);
205 #if defined(CPU_ARM)
206 #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \
207 asm volatile ( \
208 "ldmia %[src]!, {r1-r4} \n\t" \
209 "smull r0, r5, r1, %[mul] \n\t" \
210 "mov r0, r0, lsr #16 \n\t" \
211 "orr r0, r0, r5, lsl #16\n\t" \
212 "smull r1, r5, r2, %[mul] \n\t" \
213 "mov r1, r1, lsr #16 \n\t" \
214 "orr r1, r1, r5, lsl #16\n\t" \
215 "smull r2, r5, r3, %[mul] \n\t" \
216 "mov r2, r2, lsr #16 \n\t" \
217 "orr r2, r2, r5, lsl #16\n\t" \
218 "smull r3, r5, r4, %[mul] \n\t" \
219 "mov r3, r3, lsr #16 \n\t" \
220 "orr r3, r3, r5, lsl #16\n\t" \
221 "stmia %[dst]!, {r0-r3} \n" \
222 : [dst]"+r"(dst), [src]"+r"(src) \
223 : [mul]"r"(mul) \
224 : "r0", "r1", "r2", "r3", "r4", "r5", "memory");
225 #else
226 #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \
227 dst[i ] = fixmul16(src[i ], mul); \
228 dst[i+1] = fixmul16(src[i+1], mul); \
229 dst[i+2] = fixmul16(src[i+2], mul); \
230 dst[i+3] = fixmul16(src[i+3], mul);
231 #endif /* CPU_ARM, CPU_COLDFIRE */
233 static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
234 int32_t mul, int len)
236 /* len is _always_ a multiple of 4, because len is the difference of sfb's
237 * which themselves are always a multiple of 4. */
238 int i;
239 for (i=0; i<len; i+=4) {
240 VECT_MUL_SCALAR_KERNEL(dst, src, mul);
244 static inline int av_clip(int a, int amin, int amax)
246 if (a < amin) return amin;
247 else if (a > amax) return amax;
248 else return a;
250 #endif /* _WMAPRO_MATH_H_ */