lcd-m6sp.c: remove \r
[kugel-rb.git] / apps / codecs / lib / asm_arm.h
blobc0f94404504855c0d188f90d242fd407a5aecc71
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: arm7 and later wide math functions
16 ********************************************************************/
17 #ifdef CPU_ARM
19 #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
20 #define _V_WIDE_MATH
22 #if ARM_ARCH >= 6
23 static inline int32_t MULT32(int32_t x, int32_t y) {
24 int32_t hi;
25 asm volatile("smmul %[hi], %[x], %[y] \n\t"
26 : [hi] "=&r" (hi)
27 : [x] "r" (x), [y] "r" (y) );
28 return(hi);
30 #else
31 static inline int32_t MULT32(int32_t x, int32_t y) {
32 int32_t lo, hi;
33 asm volatile("smull\t%0, %1, %2, %3 \n\t"
34 : "=&r"(lo),"=&r"(hi)
35 : "r"(x),"r"(y) );
36 return(hi);
38 #endif
40 static inline int32_t MULT31(int32_t x, int32_t y) {
41 return MULT32(x,y)<<1;
44 static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
45 int32_t lo,hi;
46 asm volatile("smull %0, %1, %2, %3\n\t"
47 "movs %0, %0, lsr #15\n\t"
48 "adc %1, %0, %1, lsl #17\n\t"
49 : "=&r"(lo),"=&r"(hi)
50 : "r"(x),"r"(y)
51 : "cc" );
52 return(hi);
55 #define XPROD32(a, b, t, v, x, y) \
56 { \
57 int32_t l; \
58 asm("smull %0, %1, %3, %5\n\t" \
59 "rsb %2, %6, #0\n\t" \
60 "smlal %0, %1, %4, %6\n\t" \
61 "smull %0, %2, %3, %2\n\t" \
62 "smlal %0, %2, %4, %5" \
63 : "=&r" (l), "=&r" (x), "=&r" (y) \
64 : "r" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) ); \
67 #if ARM_ARCH >= 6
68 /* These may yield slightly different result from the macros below
69 because only the high 32 bits of the multiplications are accumulated while
70 the below macros use a 64 bit accumulator that is truncated to 32 bits.*/
71 #define XPROD31_R(_a, _b, _t, _v, _x, _y)\
73 int32_t x1, y1;\
74 asm("smmul %[x1], %[t], %[a] \n\t"\
75 "smmul %[y1], %[t], %[b] \n\t"\
76 "smmla %[x1], %[v], %[b], %[x1] \n\t"\
77 "smmls %[y1], %[v], %[a], %[y1] \n\t"\
78 : [x1] "=&r" (x1), [y1] "=&r" (y1)\
79 : [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\
80 _x = x1 << 1;\
81 _y = y1 << 1;\
84 #define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
86 int32_t x1, y1;\
87 asm("smmul %[x1], %[t], %[a] \n\t"\
88 "smmul %[y1], %[t], %[b] \n\t"\
89 "smmls %[x1], %[v], %[b], %[x1] \n\t"\
90 "smmla %[y1], %[v], %[a], %[y1] \n\t"\
91 : [x1] "=&r" (x1), [y1] "=&r" (y1)\
92 : [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\
93 _x = x1 << 1;\
94 _y = y1 << 1;\
96 #else
97 #define XPROD31_R(_a, _b, _t, _v, _x, _y)\
99 int32_t x1, y1, l;\
100 asm("smull %0, %1, %5, %3\n\t"\
101 "rsb %2, %3, #0\n\t"\
102 "smlal %0, %1, %6, %4\n\t"\
103 "smull %0, %2, %6, %2\n\t"\
104 "smlal %0, %2, %5, %4"\
105 : "=&r" (l), "=&r" (x1), "=&r" (y1)\
106 : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
107 _x = x1 << 1;\
108 _y = y1 << 1;\
111 #define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
113 int32_t x1, y1, l;\
114 asm("smull %0, %1, %5, %3\n\t"\
115 "rsb %2, %4, #0\n\t"\
116 "smlal %0, %1, %6, %2\n\t"\
117 "smull %0, %2, %5, %4\n\t"\
118 "smlal %0, %2, %6, %3"\
119 : "=&r" (l), "=&r" (x1), "=&r" (y1)\
120 : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
121 _x = x1 << 1;\
122 _y = y1 << 1;\
124 #endif
126 static inline void XPROD31(int32_t a, int32_t b,
127 int32_t t, int32_t v,
128 int32_t *x, int32_t *y)
130 int32_t _x1, _y1;
131 XPROD31_R(a, b, t, v, _x1, _y1);
132 *x = _x1;
133 *y = _y1;
136 static inline void XNPROD31(int32_t a, int32_t b,
137 int32_t t, int32_t v,
138 int32_t *x, int32_t *y)
140 int32_t _x1, _y1;
141 XNPROD31_R(a, b, t, v, _x1, _y1);
142 *x = _x1;
143 *y = _y1;
147 #ifndef _V_VECT_OPS
148 #define _V_VECT_OPS
150 /* asm versions of vector operations for block.c, window.c */
151 static inline
152 void vect_add(int32_t *x, int32_t *y, int n)
154 while (n>=4) {
155 asm volatile ("ldmia %[x], {r0, r1, r2, r3};"
156 "ldmia %[y]!, {r4, r5, r6, r7};"
157 "add r0, r0, r4;"
158 "add r1, r1, r5;"
159 "add r2, r2, r6;"
160 "add r3, r3, r7;"
161 "stmia %[x]!, {r0, r1, r2, r3};"
162 : [x] "+r" (x), [y] "+r" (y)
163 : : "r0", "r1", "r2", "r3",
164 "r4", "r5", "r6", "r7",
165 "memory");
166 n -= 4;
168 /* add final elements */
169 while (n>0) {
170 *x++ += *y++;
171 n--;
175 static inline
176 void vect_copy(int32_t *x, int32_t *y, int n)
178 while (n>=4) {
179 asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};"
180 "stmia %[x]!, {r0, r1, r2, r3};"
181 : [x] "+r" (x), [y] "+r" (y)
182 : : "r0", "r1", "r2", "r3",
183 "memory");
184 n -= 4;
186 /* copy final elements */
187 while (n>0) {
188 *x++ = *y++;
189 n--;
193 static inline
194 void vect_mult_fw(int32_t *data, int32_t *window, int n)
196 while (n>=4) {
197 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
198 "ldmia %[w]!, {r4, r5, r6, r7};"
199 "smull r8, r9, r0, r4;"
200 "mov r0, r9, lsl #1;"
201 "smull r8, r9, r1, r5;"
202 "mov r1, r9, lsl #1;"
203 "smull r8, r9, r2, r6;"
204 "mov r2, r9, lsl #1;"
205 "smull r8, r9, r3, r7;"
206 "mov r3, r9, lsl #1;"
207 "stmia %[d]!, {r0, r1, r2, r3};"
208 : [d] "+r" (data), [w] "+r" (window)
209 : : "r0", "r1", "r2", "r3",
210 "r4", "r5", "r6", "r7", "r8", "r9",
211 "memory" );
212 n -= 4;
214 while(n>0) {
215 *data = MULT31(*data, *window);
216 data++;
217 window++;
218 n--;
222 static inline
223 void vect_mult_bw(int32_t *data, int32_t *window, int n)
225 while (n>=4) {
226 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
227 "ldmda %[w]!, {r4, r5, r6, r7};"
228 "smull r8, r9, r0, r7;"
229 "mov r0, r9, lsl #1;"
230 "smull r8, r9, r1, r6;"
231 "mov r1, r9, lsl #1;"
232 "smull r8, r9, r2, r5;"
233 "mov r2, r9, lsl #1;"
234 "smull r8, r9, r3, r4;"
235 "mov r3, r9, lsl #1;"
236 "stmia %[d]!, {r0, r1, r2, r3};"
237 : [d] "+r" (data), [w] "+r" (window)
238 : : "r0", "r1", "r2", "r3",
239 "r4", "r5", "r6", "r7", "r8", "r9",
240 "memory" );
241 n -= 4;
243 while(n>0) {
244 *data = MULT31(*data, *window);
245 data++;
246 window--;
247 n--;
251 #endif
253 #endif
254 /* not used anymore */
256 #ifndef _V_CLIP_MATH
257 #define _V_CLIP_MATH
259 static inline int32_t CLIP_TO_15(int32_t x) {
260 int tmp;
261 asm volatile("subs %1, %0, #32768\n\t"
262 "movpl %0, #0x7f00\n\t"
263 "orrpl %0, %0, #0xff\n"
264 "adds %1, %0, #32768\n\t"
265 "movmi %0, #0x8000"
266 : "+r"(x),"=r"(tmp)
268 : "cc");
269 return(x);
272 #endif
274 #ifndef _V_LSP_MATH_ASM
275 #define _V_LSP_MATH_ASM
279 #endif
280 #endif