1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
12 ********************************************************************
14 function: arm7 and later wide math functions
16 ********************************************************************/
20 #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
23 static inline ogg_int32_t
MULT32(ogg_int32_t x
, ogg_int32_t y
) {
25 asm volatile("smull\t%0, %1, %2, %3"
31 static inline ogg_int32_t
MULT31(ogg_int32_t x
, ogg_int32_t y
) {
32 return MULT32(x
,y
)<<1;
35 static inline ogg_int32_t
MULT31_SHIFT15(ogg_int32_t x
, ogg_int32_t y
) {
37 asm volatile("smull %0, %1, %2, %3\n\t"
38 "movs %0, %0, lsr #15\n\t"
39 "adc %1, %0, %1, lsl #17\n\t"
46 #define XPROD32(a, b, t, v, x, y) \
49 asm( "smull %0, %1, %4, %6\n\t" \
50 "rsb %3, %4, #0\n\t" \
51 "smlal %0, %1, %5, %7\n\t" \
52 "smull %0, %2, %5, %6\n\t" \
53 "smlal %0, %2, %3, %7" \
54 : "=&r" (l), "=&r" (x), "=&r" (y), "=r" ((a)) \
55 : "3" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) ); \
58 static inline void XPROD31(ogg_int32_t a
, ogg_int32_t b
,
59 ogg_int32_t t
, ogg_int32_t v
,
60 ogg_int32_t
*x
, ogg_int32_t
*y
)
63 asm( "smull %0, %1, %4, %6\n\t"
65 "smlal %0, %1, %5, %7\n\t"
66 "smull %0, %2, %5, %6\n\t"
67 "smlal %0, %2, %3, %7"
68 : "=&r" (l
), "=&r" (x1
), "=&r" (y1
), "=r" (a
)
69 : "3" (a
), "r" (b
), "r" (t
), "r" (v
) );
74 static inline void XNPROD31(ogg_int32_t a
, ogg_int32_t b
,
75 ogg_int32_t t
, ogg_int32_t v
,
76 ogg_int32_t
*x
, ogg_int32_t
*y
)
79 asm( "smull %0, %1, %3, %5\n\t"
81 "smlal %0, %1, %2, %6\n\t"
82 "smull %0, %2, %4, %5\n\t"
83 "smlal %0, %2, %3, %6"
84 : "=&r" (l
), "=&r" (x1
), "=&r" (y1
)
85 : "r" (a
), "r" (b
), "r" (t
), "r" (v
) );
93 /* asm versions of vector operations for block.c, window.c */
94 /* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does
95 NOT do a final shift, meaning that the result of vect_mult_bw is
96 only 31 bits not 32. This is so that we can do the shift in-place
97 in vect_add_xxxx instead to save one instruction for each mult on arm */
99 void vect_add_right_left(ogg_int32_t
*x
, const ogg_int32_t
*y
, int n
)
101 /* first arg is right subframe of previous frame and second arg
102 is left subframe of current frame. overlap left onto right overwriting
103 the right subframe */
107 "ldmia %[x], {r0, r1, r2, r3};"
108 "ldmia %[y]!, {r4, r5, r6, r7};"
109 "add r0, r4, r0, lsl #1;"
110 "add r1, r5, r1, lsl #1;"
111 "add r2, r6, r2, lsl #1;"
112 "add r3, r7, r3, lsl #1;"
113 "stmia %[x]!, {r0, r1, r2, r3};"
114 "ldmia %[x], {r0, r1, r2, r3};"
115 "ldmia %[y]!, {r4, r5, r6, r7};"
116 "add r0, r4, r0, lsl #1;"
117 "add r1, r5, r1, lsl #1;"
118 "add r2, r6, r2, lsl #1;"
119 "add r3, r7, r3, lsl #1;"
120 "stmia %[x]!, {r0, r1, r2, r3};"
121 : [x
] "+r" (x
), [y
] "+r" (y
)
122 : : "r0", "r1", "r2", "r3",
123 "r4", "r5", "r6", "r7",
130 void vect_add_left_right(ogg_int32_t
*x
, const ogg_int32_t
*y
, int n
)
132 /* first arg is left subframe of current frame and second arg
133 is right subframe of previous frame. overlap right onto left overwriting
137 "ldmia %[x], {r0, r1, r2, r3};"
138 "ldmia %[y]!, {r4, r5, r6, r7};"
139 "add r0, r0, r4, lsl #1;"
140 "add r1, r1, r5, lsl #1;"
141 "add r2, r2, r6, lsl #1;"
142 "add r3, r3, r7, lsl #1;"
143 "stmia %[x]!, {r0, r1, r2, r3};"
144 "ldmia %[x], {r0, r1, r2, r3};"
145 "ldmia %[y]!, {r4, r5, r6, r7};"
146 "add r0, r0, r4, lsl #1;"
147 "add r1, r1, r5, lsl #1;"
148 "add r2, r2, r6, lsl #1;"
149 "add r3, r3, r7, lsl #1;"
150 "stmia %[x]!, {r0, r1, r2, r3};"
151 : [x
] "+r" (x
), [y
] "+r" (y
)
152 : : "r0", "r1", "r2", "r3",
153 "r4", "r5", "r6", "r7",
160 void vect_mult_fw(ogg_int32_t
*data
, LOOKUP_T
*window
, int n
)
162 /* Note, mult_fw uses MULT31 */
165 "ldmia %[d], {r0, r1, r2, r3};"
166 "ldmia %[w]!, {r4, r5, r6, r7};"
167 "smull r8, r0, r4, r0;"
168 "mov r0, r0, lsl #1;"
169 "smull r8, r1, r5, r1;"
170 "mov r1, r1, lsl #1;"
171 "smull r8, r2, r6, r2;"
172 "mov r2, r2, lsl #1;"
173 "smull r8, r3, r7, r3;"
174 "mov r3, r3, lsl #1;"
175 "stmia %[d]!, {r0, r1, r2, r3};"
176 : [d
] "+r" (data
), [w
] "+r" (window
)
177 : : "r0", "r1", "r2", "r3",
178 "r4", "r5", "r6", "r7", "r8",
185 void vect_mult_bw(ogg_int32_t
*data
, LOOKUP_T
*window
, int n
)
187 /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
188 /* On ARM, we can do the shift at the same time as the overlap-add */
190 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
191 "ldmda %[w]!, {r4, r5, r6, r7};"
192 "smull r8, r0, r7, r0;"
193 "smull r7, r1, r6, r1;"
194 "smull r6, r2, r5, r2;"
195 "smull r5, r3, r4, r3;"
196 "stmia %[d]!, {r0, r1, r2, r3};"
197 : [d
] "+r" (data
), [w
] "+r" (window
)
198 : : "r0", "r1", "r2", "r3",
199 "r4", "r5", "r6", "r7", "r8",
205 static inline void vect_copy(ogg_int32_t
*x
, const ogg_int32_t
*y
, int n
)
207 memcpy(x
,y
,n
*sizeof(ogg_int32_t
));
217 static inline ogg_int32_t
CLIP_TO_15(ogg_int32_t x
) {
219 asm volatile("subs %1, %0, #32768\n\t"
220 "movpl %0, #0x7f00\n\t"
221 "orrpl %0, %0, #0xff\n"
222 "adds %1, %0, #32768\n\t"
232 #ifndef _V_LSP_MATH_ASM
233 #define _V_LSP_MATH_ASM
235 static inline void lsp_loop_asm(ogg_uint32_t
*qip
,ogg_uint32_t
*pip
,
237 ogg_int32_t
*ilsp
,ogg_int32_t wi
,
240 ogg_uint32_t qi
=*qip
,pi
=*pip
;
241 ogg_int32_t qexp
=*qexpp
;
245 "add r0,r0,r1,lsl#3;"
249 "subs r1,r1,%4;" //ilsp[j]-wi
250 "rsbmi r1,r1,#0;" //labs(ilsp[j]-wi)
251 "umull %0,r2,r1,%0;" //qi*=labs(ilsp[j]-wi)
253 "subs r1,r3,%4;" //ilsp[j+1]-wi
254 "rsbmi r1,r1,#0;" //labs(ilsp[j+1]-wi)
255 "umull %1,r3,r1,%1;" //pi*=labs(ilsp[j+1]-wi)
257 "cmn r2,r3;" // shift down 16?
261 "orr %0,%0,r2,lsl #16;"
263 "orr %1,%1,r3,lsl #16;"
268 // odd filter assymetry
271 "add r0,%3,%5,lsl#2;\n"
276 "subs r1,r1,%4;\n" //ilsp[j]-wi
277 "rsbmi r1,r1,#0;\n" //labs(ilsp[j]-wi)
278 "umull %0,r2,r1,%0;\n" //qi*=labs(ilsp[j]-wi)
279 "umull %1,r3,r0,%1;\n" //pi*=labs(ilsp[j+1]-wi)
281 "cmn r2,r3;\n" // shift down 16?
284 "mov %0,%0,lsr #16;\n"
285 "orr %0,%0,r2,lsl #16;\n"
286 "mov %1,%1,lsr #16;\n"
287 "orr %1,%1,r3,lsl #16;\n"
289 //qi=(pi>>shift)*labs(ilsp[j]-wi);
290 //pi=(qi>>shift)*labs(ilsp[j+1]-wi);
295 /* normalize to max 16 sig figs */
299 "tst r1,#0xff000000;"
301 "movne r1,r1,lsr #8;"
302 "tst r1,#0x00f00000;"
304 "movne r1,r1,lsr #4;"
305 "tst r1,#0x000c0000;"
307 "movne r1,r1,lsr #2;"
308 "tst r1,#0x00020000;"
310 "movne r1,r1,lsr #1;"
311 "tst r1,#0x00010000;"
317 : "+r"(qi
),"+r"(pi
),"+r"(qexp
)
318 : "r"(ilsp
),"r"(wi
),"r"(m
)
319 : "r0","r1","r2","r3","cc");
326 static inline void lsp_norm_asm(ogg_uint32_t
*qip
,ogg_int32_t
*qexpp
){
328 ogg_uint32_t qi
=*qip
;
329 ogg_int32_t qexp
=*qexpp
;
331 asm("tst %0,#0x0000ff00;"
332 "moveq %0,%0,lsl #8;"
334 "tst %0,#0x0000f000;"
335 "moveq %0,%0,lsl #4;"
337 "tst %0,#0x0000c000;"
338 "moveq %0,%0,lsl #2;"
340 "tst %0,#0x00008000;"
341 "moveq %0,%0,lsl #1;"
343 : "+r"(qi
),"+r"(qexp
)