1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
9 * Copyright (C) 2005 by Pedro Vasconcelos
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
16 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17 * KIND, either express or implied.
19 ****************************************************************************/
20 /* asm routines for wide math on the MCF5249 */
24 #if defined(CPU_COLDFIRE)
26 /* attribute for 16-byte alignment */
27 #define LINE_ATTR __attribute__ ((aligned (16)))
34 static inline ogg_int32_t
MULT32(ogg_int32_t x
, ogg_int32_t y
) {
36 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
37 "movclr.l %%acc0, %[x];" /* move & clear acc */
38 "asr.l #1, %[x];" /* no overflow test */
45 static inline ogg_int32_t
MULT31(ogg_int32_t x
, ogg_int32_t y
) {
47 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
48 "movclr.l %%acc0, %[x];" /* move and clear */
56 static inline ogg_int32_t
MULT31_SHIFT15(ogg_int32_t x
, ogg_int32_t y
) {
59 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
60 "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
61 "movclr.l %%acc0, %[r];" /* get higher half */
62 "asl.l #8, %[r];" /* hi<<16, plus one free */
64 "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
66 "or.l %[x], %[r];" /* logical-or results */
67 : [r
] "=&d" (r
), [x
] "+d" (x
)
76 /* asm versions of vector operations for block.c, window.c */
77 /* assumes MAC is initialized & accumulators cleared */
79 void vect_add_right_left(ogg_int32_t
*x
, const ogg_int32_t
*y
, int n
)
81 /* align to 16 bytes */
82 while(n
>0 && (int)x
&15) {
86 asm volatile ("bra 1f;"
88 "movem.l (%[x]), %%d0-%%d3;" /* fetch values */
89 "movem.l (%[y]), %%a0-%%a3;"
95 /* store and advance */
96 "movem.l %%d0-%%d3, (%[x]);"
97 "lea.l (4*4, %[x]), %[x];"
98 "lea.l (4*4, %[y]), %[y];"
99 "subq.l #4, %[n];" /* done 4 elements */
100 "1: cmpi.l #4, %[n];"
102 : [n
] "+d" (n
), [x
] "+a" (x
), [y
] "+a" (y
)
103 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
105 /* add final elements */
112 void vect_add_left_right(ogg_int32_t
*x
, const ogg_int32_t
*y
, int n
)
114 /* coldfire asm has symmetrical versions of vect_add_right_left
115 and vect_add_left_right (since symmetrical versions of
116 vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
117 vect_add_right_left(x
, y
, n
);
121 void vect_copy(ogg_int32_t
*x
, const ogg_int32_t
*y
, int n
)
123 /* align to 16 bytes */
124 while(n
>0 && (int)x
&15) {
128 asm volatile ("bra 1f;"
129 "0:" /* loop start */
130 "movem.l (%[y]), %%d0-%%d3;" /* fetch values */
131 "movem.l %%d0-%%d3, (%[x]);" /* store */
132 "lea.l (4*4, %[x]), %[x];" /* advance */
133 "lea.l (4*4, %[y]), %[y];"
134 "subq.l #4, %[n];" /* done 4 elements */
135 "1: cmpi.l #4, %[n];"
137 : [n
] "+d" (n
), [x
] "+a" (x
), [y
] "+a" (y
)
138 : : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
139 /* copy final elements */
147 void vect_mult_fw(ogg_int32_t
*data
, LOOKUP_T
*window
, int n
)
149 /* ensure data is aligned to 16-bytes */
150 while(n
>0 && (int)data
&15) {
151 *data
= MULT31(*data
, *window
);
156 asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
157 "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
158 "lea.l (4*4, %[w]), %[w];"
159 "bra 1f;" /* jump to loop condition */
161 /* multiply and load next window values */
162 "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
163 "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
164 "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
165 "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
166 "movclr.l %%acc0, %%d0;" /* get the products */
167 "movclr.l %%acc1, %%d1;"
168 "movclr.l %%acc2, %%d2;"
169 "movclr.l %%acc3, %%d3;"
170 /* store and advance */
171 "movem.l %%d0-%%d3, (%[d]);"
172 "lea.l (4*4, %[d]), %[d];"
173 "movem.l (%[d]), %%d0-%%d3;"
174 "subq.l #4, %[n];" /* done 4 elements */
175 "1: cmpi.l #4, %[n];"
177 /* multiply final elements */
180 "mac.l %%d0, %%a0, %%acc0;"
181 "movclr.l %%acc0, %%d0;"
182 "move.l %%d0, (%[d])+;"
185 "mac.l %%d1, %%a1, %%acc0;"
186 "movclr.l %%acc0, %%d1;"
187 "move.l %%d1, (%[d])+;"
190 /* otherwise n = 3 */
191 "mac.l %%d2, %%a2, %%acc0;"
192 "movclr.l %%acc0, %%d2;"
193 "move.l %%d2, (%[d])+;"
195 : [n
] "+d" (n
), [d
] "+a" (data
), [w
] "+a" (window
)
196 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
201 void vect_mult_bw(ogg_int32_t
*data
, LOOKUP_T
*window
, int n
)
203 /* ensure at least data is aligned to 16-bytes */
204 while(n
>0 && (int)data
&15) {
205 *data
= MULT31(*data
, *window
);
210 asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
211 "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
212 "movem.l (%[w]), %%a0-%%a3;"
213 "bra 1f;" /* jump to loop condition */
215 /* multiply and load next window value */
216 "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
217 "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
218 "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
219 "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
220 "movclr.l %%acc0, %%d0;" /* get the products */
221 "movclr.l %%acc1, %%d1;"
222 "movclr.l %%acc2, %%d2;"
223 "movclr.l %%acc3, %%d3;"
224 /* store and advance */
225 "movem.l %%d0-%%d3, (%[d]);"
226 "lea.l (4*4, %[d]), %[d];"
227 "movem.l (%[d]), %%d0-%%d3;"
228 "subq.l #4, %[n];" /* done 4 elements */
229 "1: cmpi.l #4, %[n];"
231 /* multiply final elements */
234 "mac.l %%d0, %%a3, %%acc0;"
235 "movclr.l %%acc0, %%d0;"
236 "move.l %%d0, (%[d])+;"
239 "mac.l %%d1, %%a2, %%acc0;"
240 "movclr.l %%acc0, %%d1;"
241 "move.l %%d1, (%[d])+;"
244 /* otherwise n = 3 */
245 "mac.l %%d2, %%a1, %%acc0;"
246 "movclr.l %%acc0, %%d2;"
247 "move.l %%d2, (%[d])+;"
249 : [n
] "+d" (n
), [d
] "+a" (data
), [w
] "+a" (window
)
250 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
261 /* this is portable C and simple; why not use this as default? */
262 static inline ogg_int32_t
CLIP_TO_15(register ogg_int32_t x
) {
263 register ogg_int32_t hi
=32767, lo
=-32768;
264 return (x
>=hi
? hi
: (x
<=lo
? lo
: x
));