1 #ifndef _WMAPRO_MATH_H_
2 #define _WMAPRO_MATH_H_
7 #define fixtof16(x) (float)((float)(x) / (float)(1 << 16))
8 #define fixtof31(x) (float)((float)(x) / (float)(1 << 31))
9 #define ftofix16(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))
10 #define ftofix31(x) ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5:0.5)))
14 /* Calculates: result = (X*Y)>>Z */
15 #define fixmulshift(X,Y,Z) \
20 "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
21 "mov %[lo], %[lo], lsr %[shr] \n\t" /* lo >>= Z */ \
22 "orr %[lo], %[lo], %[hi], lsl %[shl]" /* lo |= (hi << (32-Z)) */ \
23 : [lo]"=&r"(lo), [hi]"=&r"(hi) \
24 : [x]"r"(X), [y]"r"(Y), [shr]"r"(Z), [shl]"r"(32-Z)); \
28 /* Calculates: result = (X*Y)>>16 */
29 #define fixmul16(X,Y) \
34 "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
35 "mov %[lo], %[lo], lsr #16 \n\t" /* lo >>= 16 */ \
36 "orr %[lo], %[lo], %[hi], lsl #16" /* lo |= (hi << 16) */ \
37 : [lo]"=&r"(lo), [hi]"=&r"(hi) \
38 : [x]"r"(X), [y]"r"(Y)); \
42 /* Calculates: result = (X*Y)>>24 */
43 #define fixmul24(X,Y) \
48 "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
49 "mov %[lo], %[lo], lsr #24 \n\t" /* lo >>= 24 */ \
50 "orr %[lo], %[lo], %[hi], lsl #8" /* lo |= (hi << 8) */ \
51 : [lo]"=&r"(lo), [hi]"=&r"(hi) \
52 : [x]"r"(X), [y]"r"(Y)); \
56 /* Calculates: result = (X*Y)>>31 */
57 #define fixmul31(X,Y) \
62 "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
63 "mov %[lo], %[lo], lsr #31 \n\t" /* lo >>= 31 */ \
64 "orr %[lo], %[lo], %[hi], lsl #1" /* lo |= (hi << 1) */ \
65 : [lo]"=&r"(lo), [hi]"=&r"(hi) \
66 : [x]"r"(X), [y]"r"(Y)); \
69 #elif defined(CPU_COLDFIRE)
70 /* Calculates: result = (X*Y)>>Z */
71 #define fixmulshift(X,Y,Z) \
76 "mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \
77 "mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \
78 "movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \
79 "moveq.l #31,%[t2] \n\t" \
80 "sub.l %[sh],%[t2] \n\t" /* t2 = 31 - shift */ \
82 "asl.l %[t2],%[t1] \n\t" /* hi <<= 31 - shift */ \
83 "lsr.l %[sh],%[x] \n\t" /* (unsigned)lo >>= shift */ \
84 "or.l %[x],%[t1] \n\t" /* combine result */ \
87 "neg.l %[t2] \n\t" /* t2 = shift - 31 */ \
88 "asr.l %[t2],%[t1] \n\t" /* hi >>= t2 */ \
90 : [t1]"=&d"(t1), [t2]"=&d"(t2) \
91 : [x] "d"((X)), [y] "d"((Y)), [sh]"d"((Z))); \
95 /* Calculates: result = (X*Y)>>16 */
96 #define fixmul16(X,Y) \
100 "mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \
101 "mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \
102 "movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \
103 "moveq.l #15,%[t2] \n\t" \
104 "asl.l %[t2],%[t1] \n\t" /* hi <<= 15, plus one free */ \
105 "moveq.l #16,%[t2] \n\t" \
106 "lsr.l %[t2],%[x] \n\t" /* (unsigned)lo >>= 16 */ \
107 "or.l %[x],%[t1] \n\t" /* combine result */ \
108 : [t1]"=&d"(t1), [t2]"=&d"(t2) \
109 : [x] "d" ((X)), [y] "d" ((Y))); \
113 /* Calculates: result = (X*Y)>>24 */
114 #define fixmul24(X,Y) \
118 "mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \
119 "mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \
120 "movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \
121 "moveq.l #7,%[t2] \n\t" \
122 "asl.l %[t2],%[t1] \n\t" /* hi <<= 7, plus one free */ \
123 "moveq.l #24,%[t2] \n\t" \
124 "lsr.l %[t2],%[x] \n\t" /* (unsigned)lo >>= 24 */ \
125 "or.l %[x],%[t1] \n\t" /* combine result */ \
126 : [t1]"=&d"(t1), [t2]"=&d"(t2) \
127 : [x] "d" ((X)), [y] "d" ((Y))); \
131 /* Calculates: result = (X*Y)>>32 */
132 #define fixmul31(X,Y) \
136 "mac.l %[x], %[y], %%acc0\n\t" /* multiply */ \
137 "movclr.l %%acc0, %[t]\n\t" /* get higher half as result */ \
139 : [x] "r" ((X)), [y] "r" ((Y))); \
143 static inline int32_t fixmulshift(int32_t x
, int32_t y
, int shamt
)
151 return (int32_t)temp
;
154 static inline int32_t fixmul31(int32_t x
, int32_t y
)
162 return (int32_t)temp
;
165 static inline int32_t fixmul24(int32_t x
, int32_t y
)
173 return (int32_t)temp
;
176 static inline int32_t fixmul16(int32_t x
, int32_t y
)
184 return (int32_t)temp
;
186 #endif /* CPU_COLDFIRE, CPU_ARM */
189 static inline void vector_fixmul_window(int32_t *dst
, const int32_t *src0
,
190 const int32_t *src1
, const int32_t *win
,
197 for(i
=-len
, j
=len
-1; i
<0; i
++, j
--) {
198 int32_t s0
= src0
[i
];
199 int32_t s1
= src1
[j
];
200 int32_t wi
= -win
[i
];
201 int32_t wj
= -win
[j
];
203 asm volatile ("mac.l %[s0], %[wj], %%acc0\n\t"
204 "msac.l %[s1], %[wi], %%acc0\n\t"
205 "mac.l %[s0], %[wi], %%acc1\n\t"
206 "mac.l %[s1], %[wj], %%acc1\n\t"
207 "movclr.l %%acc0, %[s0]\n\t"
208 "move.l %[s0], (%[dst_i])\n\t"
209 "movclr.l %%acc1, %[s0]\n\t"
210 "move.l %[s0], (%[dst_j])\n\t"
211 : [s0
] "+r" (s0
) /* this register is clobbered so specify it as an input */
212 : [dst_i
] "a" (&dst
[i
]), [dst_j
] "a" (&dst
[j
]),
213 [s1
] "r" (s1
), [wi
] "r" (wi
), [wj
] "r" (wj
)
218 static inline void vector_fixmul_window(int32_t *dst
, const int32_t *src0
,
219 const int32_t *src1
, const int32_t *win
,
226 for(i
=-len
, j
=len
-1; i
<0; i
++, j
--) {
227 int32_t s0
= src0
[i
];
228 int32_t s1
= src1
[j
];
229 int32_t wi
= -win
[i
];
230 int32_t wj
= -win
[j
];
231 dst
[i
] = fixmul31(s0
, wj
) - fixmul31(s1
, wi
);
232 dst
[j
] = fixmul31(s0
, wi
) + fixmul31(s1
, wj
);
237 static inline void vector_fixmul_scalar(int32_t *dst
, const int32_t *src
,
238 int32_t mul
, int len
)
242 dst
[i
] = fixmul24(src
[i
], mul
);
245 static inline int av_clip(int a
, int amin
, int amax
)
247 if (a
< amin
) return amin
;
248 else if (a
> amax
) return amax
;
251 #endif /* _WMAPRO_MATH_H_ */