add SMAF codec (.mmf extension)(FS#10432)
[kugel-rb.git] / apps / codecs / libfaad / filtbank.c
blob41c8291d9a6180e546149e838f22d8e3aa2f4403
1 /*
2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
14 **
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 ** Any non-GPL usage of this software or parts of this software is strictly
20 ** forbidden.
22 ** Commercial non-GPL licensing of this software is possible.
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
25 ** $Id$
26 **/
28 #include "common.h"
29 #include "structs.h"
31 #include <stdlib.h>
32 #include <string.h>
33 #ifdef _WIN32_WCE
34 #define assert(x)
35 #else
36 #include <assert.h>
37 #endif
39 #include "filtbank.h"
40 #include "decoder.h"
41 #include "syntax.h"
42 #include "kbd_win.h"
43 #include "sine_win.h"
46 /*Windowing functions borrowed from libwmai*/
48 #ifdef CPU_ARM
49 static inline
50 void vector_fmul_add_add(real_t *dst, const real_t *src0, const real_t *src1, const real_t *src2, int len)
52 /* Block sizes are always power of two */
53 asm volatile (
54 "0:"
55 "ldmia %[d]!, {r0, r1};"
56 "ldmia %[w]!, {r4, r5};"
57 /* consume the first data and window value so we can use those
58 * registers again */
59 "smull r8, r9, r0, r4;"
60 "ldmia %[src2]!, {r0, r4};"
61 "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
62 "smull r8, r9, r1, r5;"
63 "add r1, r4, r9, lsl #1;"
64 "stmia %[dst]!, {r0, r1};"
65 "subs %[n], %[n], #2;"
66 "bne 0b;"
67 : [d] "+r" (src0), [w] "+r" (src1), [src2] "+r" (src2), [dst] "+r" (dst), [n] "+r" (len)
69 : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
71 static inline
72 void vector_fmul_reverse(real_t *dst, const real_t *src0, const real_t *src1,
73 int len)
75 /* Block sizes are always power of two */
76 asm volatile (
77 "add %[s1], %[s1], %[n], lsl #2;"
78 "0:"
79 "ldmia %[s0]!, {r0, r1};"
80 "ldmdb %[s1]!, {r4, r5};"
81 "smull r8, r9, r0, r5;"
82 "mov r0, r9, lsl #1;"
83 "smull r8, r9, r1, r4;"
84 "mov r1, r9, lsl #1;"
85 "stmia %[dst]!, {r0, r1};"
86 "subs %[n], %[n], #2;"
87 "bne 0b;"
88 : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len)
90 : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
93 #elif defined(CPU_COLDFIRE)
94 static inline
95 void vector_fmul_add_add(real_t *dst, const real_t *src0, const real_t *src1, const real_t *src2, int len)
97 /* Block sizes are always power of two. Smallest block is always way bigger
98 * than four too.*/
99 asm volatile (
100 "0:"
101 "movem.l (%[src0]), %%d0-%%d3;"
102 "movem.l (%[src1]), %%d4-%%d5/%%a0-%%a1;"
103 "mac.l %%d0, %%d4, %%acc0;"
104 "mac.l %%d1, %%d5, %%acc1;"
105 "mac.l %%d2, %%a0, %%acc2;"
106 "mac.l %%d3, %%a1, %%acc3;"
107 "lea.l (16, %[src0]), %[src0];"
108 "lea.l (16, %[src1]), %[src1];"
109 "movclr.l %%acc0, %%d0;"
110 "movclr.l %%acc1, %%d1;"
111 "movclr.l %%acc2, %%d2;"
112 "movclr.l %%acc3, %%d3;"
113 "movem.l (%[src2]), %%d4-%%d5/%%a0-%%a1;"
114 "lea.l (16, %[src2]), %[src2];"
115 "add.l %%d4, %%d0;"
116 "add.l %%d5, %%d1;"
117 "add.l %%a0, %%d2;"
118 "add.l %%a1, %%d3;"
119 "movem.l %%d0-%%d3, (%[dst]);"
120 "lea.l (16, %[dst]), %[dst];"
121 "subq.l #4, %[n];"
122 "jne 0b;"
123 : [src0] "+a" (src0), [src1] "+a" (src1), [src2] "+a" (src2), [dst] "+a" (dst), [n] "+d" (len)
125 : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
128 static inline
129 void vector_fmul_reverse(real_t *dst, const real_t *src0, const real_t *src1,
130 int len)
132 /* Block sizes are always power of two. Smallest block is always way bigger
133 * than four too.*/
134 asm volatile (
135 "lea.l (-16, %[s1], %[n]*4), %[s1];"
136 "0:"
137 "movem.l (%[s0]), %%d0-%%d3;"
138 "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
139 "mac.l %%d0, %%a1, %%acc0;"
140 "mac.l %%d1, %%a0, %%acc1;"
141 "mac.l %%d2, %%d5, %%acc2;"
142 "mac.l %%d3, %%d4, %%acc3;"
143 "lea.l (16, %[s0]), %[s0];"
144 "lea.l (-16, %[s1]), %[s1];"
145 "movclr.l %%acc0, %%d0;"
146 "movclr.l %%acc1, %%d1;"
147 "movclr.l %%acc2, %%d2;"
148 "movclr.l %%acc3, %%d3;"
149 "movem.l %%d0-%%d3, (%[dst]);"
150 "lea.l (16, %[dst]), %[dst];"
151 "subq.l #4, %[n];"
152 "jne 0b;"
153 : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len)
154 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
157 #else
158 static inline void vector_fmul_add_add(real_t *dst, const real_t *src0, const real_t *src1, const real_t *src2, int len){
159 int i;
160 for(i=0; i<len; i++)
161 dst[i] = MUL_F(src0[i], src1[i]) + src2[i];
164 static inline void vector_fmul_reverse(real_t *dst, const real_t *src0, const real_t *src1, int len){
165 int i;
166 src1 += len-1;
167 for(i=0; i<len; i++)
168 dst[i] = MUL_F(src0[i], src1[-i]);
170 #endif
172 #ifdef LTP_DEC
173 static INLINE void mdct(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len)
175 mdct_info *mdct = NULL;
177 switch (len)
179 case 2048:
180 case 1920:
181 mdct = fb->mdct2048;
182 break;
183 case 256:
184 case 240:
185 mdct = fb->mdct256;
186 break;
187 #ifdef LD_DEC
188 case 1024:
189 case 960:
190 mdct = fb->mdct1024;
191 break;
192 #endif
195 faad_mdct(mdct, in_data, out_data);
197 #endif
199 ALIGN real_t transf_buf[2*1024] IBSS_ATTR;
201 void ifilter_bank(uint8_t window_sequence, uint8_t window_shape,
202 uint8_t window_shape_prev, real_t *freq_in,
203 real_t *time_out, real_t *overlap,
204 uint8_t object_type, uint16_t frame_len)
206 int16_t i;
208 const real_t *window_long = NULL;
209 const real_t *window_long_prev = NULL;
210 const real_t *window_short = NULL;
211 const real_t *window_short_prev = NULL;
213 uint16_t nlong = frame_len;
214 uint16_t nshort = frame_len/8;
215 uint16_t trans = nshort/2;
217 uint16_t nflat_ls = (nlong-nshort)/2;
219 #ifdef PROFILE
220 int64_t count = faad_get_ts();
221 #endif
223 memset(transf_buf,0,sizeof(transf_buf));
224 /* select windows of current frame and previous frame (Sine or KBD) */
225 #ifdef LD_DEC
226 if (object_type == LD)
228 window_long = fb->ld_window[window_shape];
229 window_long_prev = fb->ld_window[window_shape_prev];
230 } else {
231 #else
232 (void) object_type;
233 #endif
235 /*AAC uses two different window shapes depending on spectal features*/
236 if(window_shape == 0){
237 window_long = sine_long_1024;
238 window_short = sine_short_128;
239 } else {
240 window_long = kbd_long_1024;
241 window_short = kbd_short_128;
244 if(window_shape_prev == 0){
245 window_long_prev = sine_long_1024;
246 window_short_prev = sine_short_128;
247 } else {
248 window_long_prev = kbd_long_1024;
249 window_short_prev = kbd_short_128;
252 #ifdef LD_DEC
254 #endif
256 #if 0
257 for (i = 0; i < 1024; i++)
259 printf("%d\n", freq_in[i]);
261 #endif
263 #if 0
264 printf("%d %d\n", window_sequence, window_shape);
265 #endif
266 switch (window_sequence)
268 case ONLY_LONG_SEQUENCE:
269 /* perform iMDCT */
270 ff_imdct_calc(11, transf_buf, freq_in);
272 /* add second half output of previous frame to windowed output of current frame */
273 vector_fmul_add_add(time_out, transf_buf, window_long_prev, overlap, nlong);
275 /* window the second half and save as overlap for next frame */
276 vector_fmul_reverse(overlap, transf_buf+nlong, window_long, nlong);
278 break;
280 case LONG_START_SEQUENCE:
281 /* perform iMDCT */
282 ff_imdct_calc(11, transf_buf, freq_in);
284 /* add second half output of previous frame to windowed output of current frame */
285 vector_fmul_add_add(time_out, transf_buf, window_long_prev, overlap, nlong);
287 /* window the second half and save as overlap for next frame */
288 /* construct second half window using padding with 1's and 0's */
290 memcpy(overlap, transf_buf+nlong, nflat_ls*sizeof(real_t));
292 vector_fmul_reverse(overlap+nflat_ls, transf_buf+nlong+nflat_ls, window_short, nshort);
294 memset(overlap+nflat_ls+nshort, 0, nflat_ls*sizeof(real_t));
295 break;
297 case EIGHT_SHORT_SEQUENCE:
298 /*this could be assemblerized too, but this case is extremely uncommon*/
300 /* perform iMDCT for each short block */
301 ff_imdct_calc(8, transf_buf+2*nshort*0, freq_in+0*nshort);
302 ff_imdct_calc(8, transf_buf+2*nshort*1, freq_in+1*nshort);
303 ff_imdct_calc(8, transf_buf+2*nshort*2, freq_in+2*nshort);
304 ff_imdct_calc(8, transf_buf+2*nshort*3, freq_in+3*nshort);
305 ff_imdct_calc(8, transf_buf+2*nshort*4, freq_in+4*nshort);
306 ff_imdct_calc(8, transf_buf+2*nshort*5, freq_in+5*nshort);
307 ff_imdct_calc(8, transf_buf+2*nshort*6, freq_in+6*nshort);
308 ff_imdct_calc(8, transf_buf+2*nshort*7, freq_in+7*nshort);
310 /* add second half output of previous frame to windowed output of current frame */
311 for (i = 0; i < nflat_ls; i++)
312 time_out[i] = overlap[i];
313 for(i = 0; i < nshort; i++)
315 time_out[nflat_ls+ i] = overlap[nflat_ls+ i] + MUL_F(transf_buf[nshort*0+i],window_short_prev[i]);
316 time_out[nflat_ls+1*nshort+i] = overlap[nflat_ls+nshort*1+i] + MUL_F(transf_buf[nshort*1+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*2+i],window_short[i]);
317 time_out[nflat_ls+2*nshort+i] = overlap[nflat_ls+nshort*2+i] + MUL_F(transf_buf[nshort*3+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*4+i],window_short[i]);
318 time_out[nflat_ls+3*nshort+i] = overlap[nflat_ls+nshort*3+i] + MUL_F(transf_buf[nshort*5+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*6+i],window_short[i]);
319 if (i < trans)
320 time_out[nflat_ls+4*nshort+i] = overlap[nflat_ls+nshort*4+i] + MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]);
323 /* window the second half and save as overlap for next frame */
324 for(i = 0; i < nshort; i++)
326 if (i >= trans)
327 overlap[nflat_ls+4*nshort+i-nlong] = MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]);
328 overlap[nflat_ls+5*nshort+i-nlong] = MUL_F(transf_buf[nshort*9+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*10+i],window_short[i]);
329 overlap[nflat_ls+6*nshort+i-nlong] = MUL_F(transf_buf[nshort*11+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*12+i],window_short[i]);
330 overlap[nflat_ls+7*nshort+i-nlong] = MUL_F(transf_buf[nshort*13+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*14+i],window_short[i]);
331 overlap[nflat_ls+8*nshort+i-nlong] = MUL_F(transf_buf[nshort*15+i],window_short[nshort-1-i]);
333 memset(overlap+nflat_ls+nshort, 0, nflat_ls*sizeof(real_t));
335 break;
337 case LONG_STOP_SEQUENCE:
338 /* perform iMDCT */
339 ff_imdct_calc(11, transf_buf, freq_in);
341 /* add second half output of previous frame to windowed output of current frame */
342 /* construct first half window using padding with 1's and 0's */
343 memcpy(time_out, overlap, nflat_ls*sizeof(real_t));
345 vector_fmul_add_add(time_out+nflat_ls, transf_buf+nflat_ls, window_short_prev, overlap+nflat_ls, nshort);
347 for (i = 0; i < nflat_ls; i++)
348 time_out[nflat_ls+nshort+i] = overlap[nflat_ls+nshort+i] + transf_buf[nflat_ls+nshort+i];
350 /* window the second half and save as overlap for next frame */
351 vector_fmul_reverse(overlap, transf_buf+nlong, window_long, nlong);
352 break;
355 #if 0
356 for (i = 0; i < 1024; i++)
358 printf("%d\n", time_out[i]);
359 //printf("0x%.8X\n", time_out[i]);
361 #endif
364 #ifdef PROFILE
365 count = faad_get_ts() - count;
366 fb->cycles += count;
367 #endif
371 #ifdef LTP_DEC
372 ALIGN real_t windowed_buf[2*1024] = {0};
373 /* only works for LTP -> no overlapping, no short blocks */
374 void filter_bank_ltp(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
375 uint8_t window_shape_prev, real_t *in_data, real_t *out_mdct,
376 uint8_t object_type, uint16_t frame_len)
378 int16_t i;
380 const real_t *window_long = NULL;
381 const real_t *window_long_prev = NULL;
382 const real_t *window_short = NULL;
383 const real_t *window_short_prev = NULL;
385 uint16_t nlong = frame_len;
386 uint16_t nshort = frame_len/8;
387 uint16_t nflat_ls = (nlong-nshort)/2;
389 //assert(window_sequence != EIGHT_SHORT_SEQUENCE);
391 memset(windowed_buf,0,sizeof(windowed_buf));
392 #ifdef LD_DEC
393 if (object_type == LD)
395 window_long = fb->ld_window[window_shape];
396 window_long_prev = fb->ld_window[window_shape_prev];
397 } else {
398 #else
399 (void) object_type;
400 #endif
401 window_long = fb->long_window[window_shape];
402 window_long_prev = fb->long_window[window_shape_prev];
403 window_short = fb->short_window[window_shape];
404 window_short_prev = fb->short_window[window_shape_prev];
405 #ifdef LD_DEC
407 #endif
409 switch(window_sequence)
411 case ONLY_LONG_SEQUENCE:
412 for (i = nlong-1; i >= 0; i--)
414 windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]);
415 windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]);
417 mdct(fb, windowed_buf, out_mdct, 2*nlong);
418 break;
420 case LONG_START_SEQUENCE:
421 for (i = 0; i < nlong; i++)
422 windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]);
423 for (i = 0; i < nflat_ls; i++)
424 windowed_buf[i+nlong] = in_data[i+nlong];
425 for (i = 0; i < nshort; i++)
426 windowed_buf[i+nlong+nflat_ls] = MUL_F(in_data[i+nlong+nflat_ls], window_short[nshort-1-i]);
427 for (i = 0; i < nflat_ls; i++)
428 windowed_buf[i+nlong+nflat_ls+nshort] = 0;
429 mdct(fb, windowed_buf, out_mdct, 2*nlong);
430 break;
432 case LONG_STOP_SEQUENCE:
433 for (i = 0; i < nflat_ls; i++)
434 windowed_buf[i] = 0;
435 for (i = 0; i < nshort; i++)
436 windowed_buf[i+nflat_ls] = MUL_F(in_data[i+nflat_ls], window_short_prev[i]);
437 for (i = 0; i < nflat_ls; i++)
438 windowed_buf[i+nflat_ls+nshort] = in_data[i+nflat_ls+nshort];
439 for (i = 0; i < nlong; i++)
440 windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]);
441 mdct(fb, windowed_buf, out_mdct, 2*nlong);
442 break;
445 #endif