rockpaint: steal the big buffer from audiobuffer
[kugel-rb.git] / apps / codecs / lib / asm_arm.h
blob627f4afd78bcdf34b586905a10c9792728c1fe64
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: arm7 and later wide math functions
16 ********************************************************************/
17 #ifdef CPU_ARM
19 #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
20 #define _V_WIDE_MATH
22 static inline int32_t MULT32(int32_t x, int32_t y) {
23 int lo,hi;
24 asm volatile("smull\t%0, %1, %2, %3"
25 : "=&r"(lo),"=&r"(hi)
26 : "r"(x),"r"(y) );
27 return(hi);
30 static inline int32_t MULT31(int32_t x, int32_t y) {
31 return MULT32(x,y)<<1;
34 static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
35 int lo,hi;
36 asm volatile("smull %0, %1, %2, %3\n\t"
37 "movs %0, %0, lsr #15\n\t"
38 "adc %1, %0, %1, lsl #17\n\t"
39 : "=&r"(lo),"=&r"(hi)
40 : "r"(x),"r"(y)
41 : "cc" );
42 return(hi);
45 #define XPROD32(a, b, t, v, x, y) \
46 { \
47 long l; \
48 asm( "smull %0, %1, %3, %5\n\t" \
49 "rsb %2, %6, #0\n\t" \
50 "smlal %0, %1, %4, %6\n\t" \
51 "smull %0, %2, %3, %2\n\t" \
52 "smlal %0, %2, %4, %5" \
53 : "=&r" (l), "=&r" (x), "=&r" (y) \
54 : "r" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) ); \
57 static inline void XPROD31(int32_t a, int32_t b,
58 int32_t t, int32_t v,
59 int32_t *x, int32_t *y)
61 int x1, y1, l;
62 asm( "smull %0, %1, %3, %5\n\t"
63 "rsb %2, %6, #0\n\t"
64 "smlal %0, %1, %4, %6\n\t"
65 "smull %0, %2, %3, %2\n\t"
66 "smlal %0, %2, %4, %5"
67 : "=&r" (l), "=&r" (x1), "=&r" (y1)
68 : "r" (a), "r" (b), "r" (t), "r" (v) );
69 *x = x1 << 1;
70 *y = y1 << 1;
73 static inline void XNPROD31(int32_t a, int32_t b,
74 int32_t t, int32_t v,
75 int32_t *x, int32_t *y)
77 int x1, y1, l;
78 asm( "smull %0, %1, %3, %5\n\t"
79 "rsb %2, %4, #0\n\t"
80 "smlal %0, %1, %2, %6\n\t"
81 "smull %0, %2, %4, %5\n\t"
82 "smlal %0, %2, %3, %6"
83 : "=&r" (l), "=&r" (x1), "=&r" (y1)
84 : "r" (a), "r" (b), "r" (t), "r" (v) );
85 *x = x1 << 1;
86 *y = y1 << 1;
89 #define XPROD31_R(_a, _b, _t, _v, _x, _y)\
91 int x1, y1, l;\
92 asm( "smull %0, %1, %5, %3\n\t"\
93 "rsb %2, %3, #0\n\t"\
94 "smlal %0, %1, %6, %4\n\t"\
95 "smull %0, %2, %6, %2\n\t"\
96 "smlal %0, %2, %5, %4"\
97 : "=&r" (l), "=&r" (x1), "=&r" (y1)\
98 : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
99 _x = x1 << 1;\
100 _y = y1 << 1;\
103 #define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
105 int x1, y1, l;\
106 asm( "smull %0, %1, %5, %3\n\t"\
107 "rsb %2, %4, #0\n\t"\
108 "smlal %0, %1, %6, %2\n\t"\
109 "smull %0, %2, %5, %4\n\t"\
110 "smlal %0, %2, %6, %3"\
111 : "=&r" (l), "=&r" (x1), "=&r" (y1)\
112 : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
113 _x = x1 << 1;\
114 _y = y1 << 1;\
117 #ifndef _V_VECT_OPS
118 #define _V_VECT_OPS
120 /* asm versions of vector operations for block.c, window.c */
121 static inline
122 void vect_add(int32_t *x, int32_t *y, int n)
124 while (n>=4) {
125 asm volatile ("ldmia %[x], {r0, r1, r2, r3};"
126 "ldmia %[y]!, {r4, r5, r6, r7};"
127 "add r0, r0, r4;"
128 "add r1, r1, r5;"
129 "add r2, r2, r6;"
130 "add r3, r3, r7;"
131 "stmia %[x]!, {r0, r1, r2, r3};"
132 : [x] "+r" (x), [y] "+r" (y)
133 : : "r0", "r1", "r2", "r3",
134 "r4", "r5", "r6", "r7",
135 "memory");
136 n -= 4;
138 /* add final elements */
139 while (n>0) {
140 *x++ += *y++;
141 n--;
145 static inline
146 void vect_copy(int32_t *x, int32_t *y, int n)
148 while (n>=4) {
149 asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};"
150 "stmia %[x]!, {r0, r1, r2, r3};"
151 : [x] "+r" (x), [y] "+r" (y)
152 : : "r0", "r1", "r2", "r3",
153 "memory");
154 n -= 4;
156 /* copy final elements */
157 while (n>0) {
158 *x++ = *y++;
159 n--;
163 static inline
164 void vect_mult_fw(int32_t *data, int32_t *window, int n)
166 while (n>=4) {
167 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
168 "ldmia %[w]!, {r4, r5, r6, r7};"
169 "smull r8, r9, r0, r4;"
170 "mov r0, r9, lsl #1;"
171 "smull r8, r9, r1, r5;"
172 "mov r1, r9, lsl #1;"
173 "smull r8, r9, r2, r6;"
174 "mov r2, r9, lsl #1;"
175 "smull r8, r9, r3, r7;"
176 "mov r3, r9, lsl #1;"
177 "stmia %[d]!, {r0, r1, r2, r3};"
178 : [d] "+r" (data), [w] "+r" (window)
179 : : "r0", "r1", "r2", "r3",
180 "r4", "r5", "r6", "r7", "r8", "r9",
181 "memory" );
182 n -= 4;
184 while(n>0) {
185 *data = MULT31(*data, *window);
186 data++;
187 window++;
188 n--;
192 static inline
193 void vect_mult_bw(int32_t *data, int32_t *window, int n)
195 while (n>=4) {
196 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
197 "ldmda %[w]!, {r4, r5, r6, r7};"
198 "smull r8, r9, r0, r7;"
199 "mov r0, r9, lsl #1;"
200 "smull r8, r9, r1, r6;"
201 "mov r1, r9, lsl #1;"
202 "smull r8, r9, r2, r5;"
203 "mov r2, r9, lsl #1;"
204 "smull r8, r9, r3, r4;"
205 "mov r3, r9, lsl #1;"
206 "stmia %[d]!, {r0, r1, r2, r3};"
207 : [d] "+r" (data), [w] "+r" (window)
208 : : "r0", "r1", "r2", "r3",
209 "r4", "r5", "r6", "r7", "r8", "r9",
210 "memory" );
211 n -= 4;
213 while(n>0) {
214 *data = MULT31(*data, *window);
215 data++;
216 window--;
217 n--;
221 #endif
223 #endif
224 /* not used anymore */
226 #ifndef _V_CLIP_MATH
227 #define _V_CLIP_MATH
229 static inline int32_t CLIP_TO_15(int32_t x) {
230 int tmp;
231 asm volatile("subs %1, %0, #32768\n\t"
232 "movpl %0, #0x7f00\n\t"
233 "orrpl %0, %0, #0xff\n"
234 "adds %1, %0, #32768\n\t"
235 "movmi %0, #0x8000"
236 : "+r"(x),"=r"(tmp)
238 : "cc");
239 return(x);
242 #endif
244 #ifndef _V_LSP_MATH_ASM
245 #define _V_LSP_MATH_ASM
249 #endif
250 #endif