Remove CLIP_TO_15 from codeclib. Remove tabs.
[kugel-rb.git] / apps / codecs / lib / asm_mcf5249.h
blobf103e78769bd65eb1b80eaff698ee05b1597b3a7
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
9 * Copyright (C) 2005 by Pedro Vasconcelos
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
16 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17 * KIND, either express or implied.
19 ****************************************************************************/
20 /* asm routines for wide math on the MCF5249 */
22 #if defined(CPU_COLDFIRE)
24 /* attribute for 16-byte alignment */
25 #define LINE_ATTR __attribute__ ((aligned (16)))
27 #ifndef _V_WIDE_MATH
28 #define _V_WIDE_MATH
30 static inline int32_t MULT32(int32_t x, int32_t y) {
32 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
33 "movclr.l %%acc0, %[x];" /* move & clear acc */
34 "asr.l #1, %[x];" /* no overflow test */
35 : [x] "+&d" (x)
36 : [y] "r" (y)
37 : "cc");
38 return x;
41 static inline int32_t MULT31(int32_t x, int32_t y) {
42 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
43 "movclr.l %%acc0, %[x];" /* move and clear */
44 : [x] "+&r" (x)
45 : [y] "r" (y)
46 : "cc");
47 return x;
50 static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
51 int32_t r;
53 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
54 "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
55 "movclr.l %%acc0, %[r];" /* get higher half */
56 "asl.l #8, %[r];" /* hi<<16, plus one free */
57 "asl.l #8, %[r];"
58 "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
59 "lsr.l #7, %[x];"
60 "or.l %[x], %[r];" /* logical-or results */
61 : [r] "=&d" (r), [x] "+d" (x)
62 : [y] "d" (y)
63 : "cc");
64 return r;
67 static inline
68 void XPROD31(int32_t a, int32_t b,
69 int32_t t, int32_t v,
70 int32_t *x, int32_t *y)
72 asm volatile ("mac.l %[a], %[t], %%acc0;"
73 "mac.l %[b], %[v], %%acc0;"
74 "mac.l %[b], %[t], %%acc1;"
75 "msac.l %[a], %[v], %%acc1;"
76 "movclr.l %%acc0, %[a];"
77 "move.l %[a], (%[x]);"
78 "movclr.l %%acc1, %[a];"
79 "move.l %[a], (%[y]);"
80 : [a] "+&r" (a)
81 : [x] "a" (x), [y] "a" (y),
82 [b] "r" (b), [t] "r" (t), [v] "r" (v)
83 : "cc", "memory");
86 static inline
87 void XNPROD31(int32_t a, int32_t b,
88 int32_t t, int32_t v,
89 int32_t *x, int32_t *y)
91 asm volatile ("mac.l %[a], %[t], %%acc0;"
92 "msac.l %[b], %[v], %%acc0;"
93 "mac.l %[b], %[t], %%acc1;"
94 "mac.l %[a], %[v], %%acc1;"
95 "movclr.l %%acc0, %[a];"
96 "move.l %[a], (%[x]);"
97 "movclr.l %%acc1, %[a];"
98 "move.l %[a], (%[y]);"
99 : [a] "+&r" (a)
100 : [x] "a" (x), [y] "a" (y),
101 [b] "r" (b), [t] "r" (t), [v] "r" (v)
102 : "cc", "memory");
105 #if 0 /* canonical Tremor definition */
106 #define XPROD32(_a, _b, _t, _v, _x, _y) \
107 { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
108 (_y)=MULT32(_b,_t)-MULT32(_a,_v); }
109 #endif
111 /* this could lose the LSB by overflow, but i don't think it'll ever happen.
112 if anyone think they can hear a bug caused by this, please try the above
113 version. */
114 #define XPROD32(_a, _b, _t, _v, _x, _y) \
115 asm volatile ("mac.l %[a], %[t], %%acc0;" \
116 "mac.l %[b], %[v], %%acc0;" \
117 "mac.l %[b], %[t], %%acc1;" \
118 "msac.l %[a], %[v], %%acc1;" \
119 "movclr.l %%acc0, %[x];" \
120 "asr.l #1, %[x];" \
121 "movclr.l %%acc1, %[y];" \
122 "asr.l #1, %[y];" \
123 : [x] "=&d" (_x), [y] "=&d" (_y) \
124 : [a] "r" (_a), [b] "r" (_b), \
125 [t] "r" (_t), [v] "r" (_v) \
126 : "cc");
128 #define XPROD31_R(_a, _b, _t, _v, _x, _y) \
129 asm volatile ("mac.l %[a], %[t], %%acc0;" \
130 "mac.l %[b], %[v], %%acc0;" \
131 "mac.l %[b], %[t], %%acc1;" \
132 "msac.l %[a], %[v], %%acc1;" \
133 "movclr.l %%acc0, %[x];" \
134 "movclr.l %%acc1, %[y];" \
135 : [x] "=&d" (_x), [y] "=&d" (_y) \
136 : [a] "r" (_a), [b] "r" (_b), \
137 [t] "r" (_t), [v] "r" (_v) \
138 : "cc");
140 #define XNPROD31_R(_a, _b, _t, _v, _x, _y) \
141 asm volatile ("mac.l %[a], %[t], %%acc0;" \
142 "msac.l %[b], %[v], %%acc0;" \
143 "mac.l %[b], %[t], %%acc1;" \
144 "mac.l %[a], %[v], %%acc1;" \
145 "movclr.l %%acc0, %[x];" \
146 "movclr.l %%acc1, %[y];" \
147 : [x] "=&d" (_x), [y] "=&d" (_y) \
148 : [a] "r" (_a), [b] "r" (_b), \
149 [t] "r" (_t), [v] "r" (_v) \
150 : "cc");
152 #ifndef _V_VECT_OPS
153 #define _V_VECT_OPS
155 /* asm versions of vector operations for block.c, window.c */
156 /* assumes MAC is initialized & accumulators cleared */
157 static inline
158 void vect_add(int32_t *x, const int32_t *y, int n)
160 /* align to 16 bytes */
161 while(n>0 && (int)x&15) {
162 *x++ += *y++;
163 n--;
165 asm volatile ("bra 1f;"
166 "0:" /* loop start */
167 "movem.l (%[x]), %%d0-%%d3;" /* fetch values */
168 "movem.l (%[y]), %%a0-%%a3;"
169 /* add */
170 "add.l %%a0, %%d0;"
171 "add.l %%a1, %%d1;"
172 "add.l %%a2, %%d2;"
173 "add.l %%a3, %%d3;"
174 /* store and advance */
175 "movem.l %%d0-%%d3, (%[x]);"
176 "lea.l (4*4, %[x]), %[x];"
177 "lea.l (4*4, %[y]), %[y];"
178 "subq.l #4, %[n];" /* done 4 elements */
179 "1: cmpi.l #4, %[n];"
180 "bge 0b;"
181 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
182 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
183 "cc", "memory");
184 /* add final elements */
185 while (n>0) {
186 *x++ += *y++;
187 n--;
191 static inline
192 void vect_copy(int32_t *x, int32_t *y, int n)
194 /* align to 16 bytes */
195 while(n>0 && (int)x&15) {
196 *x++ = *y++;
197 n--;
199 asm volatile ("bra 1f;"
200 "0:" /* loop start */
201 "movem.l (%[y]), %%d0-%%d3;" /* fetch values */
202 "movem.l %%d0-%%d3, (%[x]);" /* store */
203 "lea.l (4*4, %[x]), %[x];" /* advance */
204 "lea.l (4*4, %[y]), %[y];"
205 "subq.l #4, %[n];" /* done 4 elements */
206 "1: cmpi.l #4, %[n];"
207 "bge 0b;"
208 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
209 : : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
210 /* copy final elements */
211 while (n>0) {
212 *x++ = *y++;
213 n--;
217 static inline
218 void vect_mult_fw(int32_t *data, int32_t *window, int n)
220 /* ensure data is aligned to 16-bytes */
221 while(n>0 && (int)data&15) {
222 *data = MULT31(*data, *window);
223 data++;
224 window++;
225 n--;
227 asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
228 "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
229 "lea.l (4*4, %[w]), %[w];"
230 "bra 1f;" /* jump to loop condition */
231 "0:" /* loop body */
232 /* multiply and load next window values */
233 "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
234 "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
235 "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
236 "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
237 "movclr.l %%acc0, %%d0;" /* get the products */
238 "movclr.l %%acc1, %%d1;"
239 "movclr.l %%acc2, %%d2;"
240 "movclr.l %%acc3, %%d3;"
241 /* store and advance */
242 "movem.l %%d0-%%d3, (%[d]);"
243 "lea.l (4*4, %[d]), %[d];"
244 "movem.l (%[d]), %%d0-%%d3;"
245 "subq.l #4, %[n];" /* done 4 elements */
246 "1: cmpi.l #4, %[n];"
247 "bge 0b;"
248 /* multiply final elements */
249 "tst.l %[n];"
250 "beq 1f;" /* n=0 */
251 "mac.l %%d0, %%a0, %%acc0;"
252 "movclr.l %%acc0, %%d0;"
253 "move.l %%d0, (%[d])+;"
254 "subq.l #1, %[n];"
255 "beq 1f;" /* n=1 */
256 "mac.l %%d1, %%a1, %%acc0;"
257 "movclr.l %%acc0, %%d1;"
258 "move.l %%d1, (%[d])+;"
259 "subq.l #1, %[n];"
260 "beq 1f;" /* n=2 */
261 /* otherwise n = 3 */
262 "mac.l %%d2, %%a2, %%acc0;"
263 "movclr.l %%acc0, %%d2;"
264 "move.l %%d2, (%[d])+;"
265 "1:"
266 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
267 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
268 "cc", "memory");
271 static inline
272 void vect_mult_bw(int32_t *data, int32_t *window, int n)
274 /* ensure at least data is aligned to 16-bytes */
275 while(n>0 && (int)data&15) {
276 *data = MULT31(*data, *window);
277 data++;
278 window--;
279 n--;
281 asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
282 "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
283 "movem.l (%[w]), %%a0-%%a3;"
284 "bra 1f;" /* jump to loop condition */
285 "0:" /* loop body */
286 /* multiply and load next window value */
287 "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
288 "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
289 "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
290 "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
291 "movclr.l %%acc0, %%d0;" /* get the products */
292 "movclr.l %%acc1, %%d1;"
293 "movclr.l %%acc2, %%d2;"
294 "movclr.l %%acc3, %%d3;"
295 /* store and advance */
296 "movem.l %%d0-%%d3, (%[d]);"
297 "lea.l (4*4, %[d]), %[d];"
298 "movem.l (%[d]), %%d0-%%d3;"
299 "subq.l #4, %[n];" /* done 4 elements */
300 "1: cmpi.l #4, %[n];"
301 "bge 0b;"
302 /* multiply final elements */
303 "tst.l %[n];"
304 "beq 1f;" /* n=0 */
305 "mac.l %%d0, %%a3, %%acc0;"
306 "movclr.l %%acc0, %%d0;"
307 "move.l %%d0, (%[d])+;"
308 "subq.l #1, %[n];"
309 "beq 1f;" /* n=1 */
310 "mac.l %%d1, %%a2, %%acc0;"
311 "movclr.l %%acc0, %%d1;"
312 "move.l %%d1, (%[d])+;"
313 "subq.l #1, %[n];"
314 "beq 1f;" /* n=2 */
315 /* otherwise n = 3 */
316 "mac.l %%d2, %%a1, %%acc0;"
317 "movclr.l %%acc0, %%d2;"
318 "move.l %%d2, (%[d])+;"
319 "1:"
320 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
321 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
322 "cc", "memory");
325 #endif
327 #endif
328 /* not used anymore */
330 #ifndef _V_CLIP_MATH
331 #define _V_CLIP_MATH
333 * this is portable C and simple; why not use this as default?
334 static inline int32_t CLIP_TO_15(register int32_t x) {
335 register int32_t hi=32767, lo=-32768;
336 return (x>=hi ? hi : (x<=lo ? lo : x));
339 #endif
341 #else
342 #define LINE_ATTR
343 #endif