Build doom on clipv2 and clip+
[kugel-rb.git] / apps / codecs / libtremor / asm_mcf5249.h
blobe4f76ea4114424ec0d7adf52f20b543ace520b27
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
9 * Copyright (C) 2005 by Pedro Vasconcelos
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
16 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17 * KIND, either express or implied.
19 ****************************************************************************/
20 /* asm routines for wide math on the MCF5249 */
22 #include "os_types.h"
24 #if defined(CPU_COLDFIRE)
26 /* attribute for 16-byte alignment */
27 #define LINE_ATTR __attribute__ ((aligned (16)))
29 #ifndef _V_WIDE_MATH
30 #define _V_WIDE_MATH
32 #define MB()
34 static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
36 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
37 "movclr.l %%acc0, %[x];" /* move & clear acc */
38 "asr.l #1, %[x];" /* no overflow test */
39 : [x] "+&d" (x)
40 : [y] "r" (y)
41 : "cc");
42 return x;
45 static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
47 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
48 "movclr.l %%acc0, %[x];" /* move and clear */
49 : [x] "+&r" (x)
50 : [y] "r" (y)
51 : "cc");
52 return x;
56 static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
57 ogg_int32_t r;
59 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
60 "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
61 "movclr.l %%acc0, %[r];" /* get higher half */
62 "asl.l #8, %[r];" /* hi<<16, plus one free */
63 "asl.l #8, %[r];"
64 "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
65 "lsr.l #7, %[x];"
66 "or.l %[x], %[r];" /* logical-or results */
67 : [r] "=&d" (r), [x] "+d" (x)
68 : [y] "d" (y)
69 : "cc");
70 return r;
73 #ifndef _V_VECT_OPS
74 #define _V_VECT_OPS
76 /* asm versions of vector operations for block.c, window.c */
77 /* assumes MAC is initialized & accumulators cleared */
78 static inline
79 void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
81 /* align to 16 bytes */
82 while(n>0 && (int)x&15) {
83 *x++ += *y++;
84 n--;
86 asm volatile ("bra 1f;"
87 "0:" /* loop start */
88 "movem.l (%[x]), %%d0-%%d3;" /* fetch values */
89 "movem.l (%[y]), %%a0-%%a3;"
90 /* add */
91 "add.l %%a0, %%d0;"
92 "add.l %%a1, %%d1;"
93 "add.l %%a2, %%d2;"
94 "add.l %%a3, %%d3;"
95 /* store and advance */
96 "movem.l %%d0-%%d3, (%[x]);"
97 "lea.l (4*4, %[x]), %[x];"
98 "lea.l (4*4, %[y]), %[y];"
99 "subq.l #4, %[n];" /* done 4 elements */
100 "1: cmpi.l #4, %[n];"
101 "bge 0b;"
102 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
103 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
104 "cc", "memory");
105 /* add final elements */
106 while (n>0) {
107 *x++ += *y++;
108 n--;
111 static inline
112 void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
114 /* coldfire asm has symmetrical versions of vect_add_right_left
115 and vect_add_left_right (since symmetrical versions of
116 vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
117 vect_add_right_left(x, y, n );
120 static inline
121 void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
123 /* align to 16 bytes */
124 while(n>0 && (int)x&15) {
125 *x++ = *y++;
126 n--;
128 asm volatile ("bra 1f;"
129 "0:" /* loop start */
130 "movem.l (%[y]), %%d0-%%d3;" /* fetch values */
131 "movem.l %%d0-%%d3, (%[x]);" /* store */
132 "lea.l (4*4, %[x]), %[x];" /* advance */
133 "lea.l (4*4, %[y]), %[y];"
134 "subq.l #4, %[n];" /* done 4 elements */
135 "1: cmpi.l #4, %[n];"
136 "bge 0b;"
137 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
138 : : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
139 /* copy final elements */
140 while (n>0) {
141 *x++ = *y++;
142 n--;
146 static inline
147 void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
149 /* ensure data is aligned to 16-bytes */
150 while(n>0 && (int)data&15) {
151 *data = MULT31(*data, *window);
152 data++;
153 window++;
154 n--;
156 asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
157 "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
158 "lea.l (4*4, %[w]), %[w];"
159 "bra 1f;" /* jump to loop condition */
160 "0:" /* loop body */
161 /* multiply and load next window values */
162 "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
163 "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
164 "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
165 "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
166 "movclr.l %%acc0, %%d0;" /* get the products */
167 "movclr.l %%acc1, %%d1;"
168 "movclr.l %%acc2, %%d2;"
169 "movclr.l %%acc3, %%d3;"
170 /* store and advance */
171 "movem.l %%d0-%%d3, (%[d]);"
172 "lea.l (4*4, %[d]), %[d];"
173 "movem.l (%[d]), %%d0-%%d3;"
174 "subq.l #4, %[n];" /* done 4 elements */
175 "1: cmpi.l #4, %[n];"
176 "bge 0b;"
177 /* multiply final elements */
178 "tst.l %[n];"
179 "beq 1f;" /* n=0 */
180 "mac.l %%d0, %%a0, %%acc0;"
181 "movclr.l %%acc0, %%d0;"
182 "move.l %%d0, (%[d])+;"
183 "subq.l #1, %[n];"
184 "beq 1f;" /* n=1 */
185 "mac.l %%d1, %%a1, %%acc0;"
186 "movclr.l %%acc0, %%d1;"
187 "move.l %%d1, (%[d])+;"
188 "subq.l #1, %[n];"
189 "beq 1f;" /* n=2 */
190 /* otherwise n = 3 */
191 "mac.l %%d2, %%a2, %%acc0;"
192 "movclr.l %%acc0, %%d2;"
193 "move.l %%d2, (%[d])+;"
194 "1:"
195 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
196 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
197 "cc", "memory");
200 static inline
201 void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
203 /* ensure at least data is aligned to 16-bytes */
204 while(n>0 && (int)data&15) {
205 *data = MULT31(*data, *window);
206 data++;
207 window--;
208 n--;
210 asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
211 "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
212 "movem.l (%[w]), %%a0-%%a3;"
213 "bra 1f;" /* jump to loop condition */
214 "0:" /* loop body */
215 /* multiply and load next window value */
216 "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
217 "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
218 "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
219 "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
220 "movclr.l %%acc0, %%d0;" /* get the products */
221 "movclr.l %%acc1, %%d1;"
222 "movclr.l %%acc2, %%d2;"
223 "movclr.l %%acc3, %%d3;"
224 /* store and advance */
225 "movem.l %%d0-%%d3, (%[d]);"
226 "lea.l (4*4, %[d]), %[d];"
227 "movem.l (%[d]), %%d0-%%d3;"
228 "subq.l #4, %[n];" /* done 4 elements */
229 "1: cmpi.l #4, %[n];"
230 "bge 0b;"
231 /* multiply final elements */
232 "tst.l %[n];"
233 "beq 1f;" /* n=0 */
234 "mac.l %%d0, %%a3, %%acc0;"
235 "movclr.l %%acc0, %%d0;"
236 "move.l %%d0, (%[d])+;"
237 "subq.l #1, %[n];"
238 "beq 1f;" /* n=1 */
239 "mac.l %%d1, %%a2, %%acc0;"
240 "movclr.l %%acc0, %%d1;"
241 "move.l %%d1, (%[d])+;"
242 "subq.l #1, %[n];"
243 "beq 1f;" /* n=2 */
244 /* otherwise n = 3 */
245 "mac.l %%d2, %%a1, %%acc0;"
246 "movclr.l %%acc0, %%d2;"
247 "move.l %%d2, (%[d])+;"
248 "1:"
249 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
250 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
251 "cc", "memory");
254 #endif
256 #endif
258 #ifndef _V_CLIP_MATH
259 #define _V_CLIP_MATH
261 /* this is portable C and simple; why not use this as default? */
262 static inline ogg_int32_t CLIP_TO_15(register ogg_int32_t x) {
263 register ogg_int32_t hi=32767, lo=-32768;
264 return (x>=hi ? hi : (x<=lo ? lo : x));
267 #endif
268 #else
269 #define LINE_ATTR
270 #endif