2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 ** Any non-GPL usage of this software or parts of this software is strictly
22 ** Commercial non-GPL licensing of this software is possible.
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
46 /* static variables */
47 static real_t transf_buf
[2*FRAME_LEN
] IBSS_ATTR MEM_ALIGN_ATTR
;
49 static real_t windowed_buf
[2*FRAME_LEN
] MEM_ALIGN_ATTR
= {0};
53 /*Windowing functions borrowed from libwmai*/
56 void vector_fmul_add_add(real_t
*dst
, const real_t
*src0
, const real_t
*src1
, const real_t
*src2
, int len
)
58 /* Block sizes are always power of two */
61 "ldmia %[d]!, {r0, r1};"
62 "ldmia %[w]!, {r4, r5};"
63 /* consume the first data and window value so we can use those
65 "smull r8, r9, r0, r4;"
66 "ldmia %[src2]!, {r0, r4};"
67 "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
68 "smull r8, r9, r1, r5;"
69 "add r1, r4, r9, lsl #1;"
70 "stmia %[dst]!, {r0, r1};"
71 "subs %[n], %[n], #2;"
73 : [d
] "+r" (src0
), [w
] "+r" (src1
), [src2
] "+r" (src2
), [dst
] "+r" (dst
), [n
] "+r" (len
)
75 : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
78 void vector_fmul_reverse(real_t
*dst
, const real_t
*src0
, const real_t
*src1
,
81 /* Block sizes are always power of two */
83 "add %[s1], %[s1], %[n], lsl #2;"
85 "ldmia %[s0]!, {r0, r1};"
86 "ldmdb %[s1]!, {r4, r5};"
87 "smull r8, r9, r0, r5;"
89 "smull r8, r9, r1, r4;"
91 "stmia %[dst]!, {r0, r1};"
92 "subs %[n], %[n], #2;"
94 : [s0
] "+r" (src0
), [s1
] "+r" (src1
), [dst
] "+r" (dst
), [n
] "+r" (len
)
96 : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
99 #elif defined(CPU_COLDFIRE)
101 void vector_fmul_add_add(real_t
*dst
, const real_t
*src0
, const real_t
*src1
, const real_t
*src2
, int len
)
103 /* Block sizes are always power of two. Smallest block is always way bigger
107 "movem.l (%[src0]), %%d0-%%d3;"
108 "movem.l (%[src1]), %%d4-%%d5/%%a0-%%a1;"
109 "mac.l %%d0, %%d4, %%acc0;"
110 "mac.l %%d1, %%d5, %%acc1;"
111 "mac.l %%d2, %%a0, %%acc2;"
112 "mac.l %%d3, %%a1, %%acc3;"
113 "lea.l (16, %[src0]), %[src0];"
114 "lea.l (16, %[src1]), %[src1];"
115 "movclr.l %%acc0, %%d0;"
116 "movclr.l %%acc1, %%d1;"
117 "movclr.l %%acc2, %%d2;"
118 "movclr.l %%acc3, %%d3;"
119 "movem.l (%[src2]), %%d4-%%d5/%%a0-%%a1;"
120 "lea.l (16, %[src2]), %[src2];"
125 "movem.l %%d0-%%d3, (%[dst]);"
126 "lea.l (16, %[dst]), %[dst];"
129 : [src0
] "+a" (src0
), [src1
] "+a" (src1
), [src2
] "+a" (src2
), [dst
] "+a" (dst
), [n
] "+d" (len
)
131 : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
135 void vector_fmul_reverse(real_t
*dst
, const real_t
*src0
, const real_t
*src1
,
138 /* Block sizes are always power of two. Smallest block is always way bigger
141 "lea.l (-16, %[s1], %[n]*4), %[s1];"
143 "movem.l (%[s0]), %%d0-%%d3;"
144 "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
145 "mac.l %%d0, %%a1, %%acc0;"
146 "mac.l %%d1, %%a0, %%acc1;"
147 "mac.l %%d2, %%d5, %%acc2;"
148 "mac.l %%d3, %%d4, %%acc3;"
149 "lea.l (16, %[s0]), %[s0];"
150 "lea.l (-16, %[s1]), %[s1];"
151 "movclr.l %%acc0, %%d0;"
152 "movclr.l %%acc1, %%d1;"
153 "movclr.l %%acc2, %%d2;"
154 "movclr.l %%acc3, %%d3;"
155 "movem.l %%d0-%%d3, (%[dst]);"
156 "lea.l (16, %[dst]), %[dst];"
159 : [s0
] "+a" (src0
), [s1
] "+a" (src1
), [dst
] "+a" (dst
), [n
] "+d" (len
)
160 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
164 static inline void vector_fmul_add_add(real_t
*dst
, const real_t
*src0
, const real_t
*src1
, const real_t
*src2
, int len
){
167 dst
[i
] = MUL_F(src0
[i
], src1
[i
]) + src2
[i
];
170 static inline void vector_fmul_reverse(real_t
*dst
, const real_t
*src0
, const real_t
*src1
, int len
){
174 dst
[i
] = MUL_F(src0
[i
], src1
[-i
]);
179 static INLINE
void mdct(fb_info
*fb
, real_t
*in_data
, real_t
*out_data
, uint16_t len
)
181 mdct_info
*mdct
= NULL
;
201 faad_mdct(mdct
, in_data
, out_data
);
205 void ifilter_bank(uint8_t window_sequence
, uint8_t window_shape
,
206 uint8_t window_shape_prev
, real_t
*freq_in
,
207 real_t
*time_out
, real_t
*overlap
,
208 uint8_t object_type
, uint16_t frame_len
)
210 int32_t i
, idx0
, idx1
;
211 real_t win0
, win1
, win2
;
213 const real_t
*window_long
= NULL
;
214 const real_t
*window_long_prev
= NULL
;
215 const real_t
*window_short
= NULL
;
216 const real_t
*window_short_prev
= NULL
;
218 int32_t nlong
= frame_len
;
219 int32_t nshort
= frame_len
/8;
220 int32_t nflat_ls
= (nlong
-nshort
)/2;
223 int64_t count
= faad_get_ts();
226 memset(transf_buf
,0,sizeof(transf_buf
));
227 /* select windows of current frame and previous frame (Sine or KBD) */
229 if (object_type
== LD
)
231 window_long
= fb
->ld_window
[window_shape
];
232 window_long_prev
= fb
->ld_window
[window_shape_prev
];
238 /* AAC uses two different window shapes depending on spectal features */
239 if (window_shape
== 0) {
240 window_long
= sine_long_1024
;
241 window_short
= sine_short_128
;
243 window_long
= kbd_long_1024
;
244 window_short
= kbd_short_128
;
247 if (window_shape_prev
== 0) {
248 window_long_prev
= sine_long_1024
;
249 window_short_prev
= sine_short_128
;
251 window_long_prev
= kbd_long_1024
;
252 window_short_prev
= kbd_short_128
;
260 for (i
= 0; i
< 1024; i
++)
262 printf("%d\n", freq_in
[i
]);
267 printf("%d %d\n", window_sequence
, window_shape
);
269 switch (window_sequence
)
271 case ONLY_LONG_SEQUENCE
:
273 ff_imdct_calc(11, transf_buf
, freq_in
);
275 /* add second half output of previous frame to windowed output of current frame */
276 vector_fmul_add_add(time_out
, transf_buf
, window_long_prev
, overlap
, nlong
);
278 /* window the second half and save as overlap for next frame */
279 vector_fmul_reverse(overlap
, transf_buf
+nlong
, window_long
, nlong
);
283 case LONG_START_SEQUENCE
:
285 ff_imdct_calc(11, transf_buf
, freq_in
);
287 /* add second half output of previous frame to windowed output of current frame */
288 vector_fmul_add_add(time_out
, transf_buf
, window_long_prev
, overlap
, nlong
);
290 /* window the second half and save as overlap for next frame */
291 /* construct second half window using padding with 1's and 0's */
293 memcpy(overlap
, transf_buf
+nlong
, nflat_ls
*sizeof(real_t
));
295 vector_fmul_reverse(overlap
+nflat_ls
, transf_buf
+nlong
+nflat_ls
, window_short
, nshort
);
297 memset(overlap
+nflat_ls
+nshort
, 0, nflat_ls
*sizeof(real_t
));
300 case EIGHT_SHORT_SEQUENCE
:
301 /* this could be assemblerized too, but this case is extremely uncommon */
303 /* perform iMDCT for each short block */
304 idx0
= 0; ff_imdct_calc(8, transf_buf
, freq_in
);
305 idx0
+= nshort
; ff_imdct_calc(8, transf_buf
+ (idx0
<<1), freq_in
+ idx0
);
306 idx0
+= nshort
; ff_imdct_calc(8, transf_buf
+ (idx0
<<1), freq_in
+ idx0
);
307 idx0
+= nshort
; ff_imdct_calc(8, transf_buf
+ (idx0
<<1), freq_in
+ idx0
);
308 idx0
+= nshort
; ff_imdct_calc(8, transf_buf
+ (idx0
<<1), freq_in
+ idx0
);
309 idx0
+= nshort
; ff_imdct_calc(8, transf_buf
+ (idx0
<<1), freq_in
+ idx0
);
310 idx0
+= nshort
; ff_imdct_calc(8, transf_buf
+ (idx0
<<1), freq_in
+ idx0
);
311 idx0
+= nshort
; ff_imdct_calc(8, transf_buf
+ (idx0
<<1), freq_in
+ idx0
);
313 /* Add second half output of previous frame to windowed output of current
316 memcpy(time_out
, overlap
, nflat_ls
*sizeof(real_t
));
317 /* Step 2: First window half, first half of nshort */
318 for (i
= 0; i
< nshort
/2; i
++) {
319 win0
= window_short
[nshort
-1-i
];
320 win1
= window_short
[i
];
321 win2
= window_short_prev
[i
];
324 time_out
[idx0
] = overlap
[idx0
] + MUL_F(transf_buf
[idx1
], win2
); idx0
+= nshort
; idx1
+= (nshort
<<1);
325 time_out
[idx0
] = overlap
[idx0
] + MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
326 time_out
[idx0
] = overlap
[idx0
] + MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
327 time_out
[idx0
] = overlap
[idx0
] + MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
328 time_out
[idx0
] = overlap
[idx0
] + MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
);
330 /* Step 3: First window half, second half of nshort */
331 for (; i
< nshort
; i
++) {
332 win0
= window_short
[nshort
-1-i
];
333 win1
= window_short
[i
];
336 time_out
[idx0
] = overlap
[idx0
] + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
337 time_out
[idx0
] = overlap
[idx0
] + MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
338 time_out
[idx0
] = overlap
[idx0
] + MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
339 time_out
[idx0
] = overlap
[idx0
] + MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
);
342 /* Window the second half and save as overlap for next frame */
343 /* Step 1: Second window half, first half of nshort */
344 for (i
= 0; i
< nshort
/2; i
++) {
345 win0
= window_short
[nshort
-1-i
];
346 win1
= window_short
[i
];
347 idx0
= nflat_ls
+ 5*nshort
+ i
- nlong
;
348 idx1
= nshort
*10 + i
;
349 overlap
[idx0
] = MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
350 overlap
[idx0
] = MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
351 overlap
[idx0
] = MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
352 overlap
[idx0
] = MUL_F(transf_buf
[idx1
-nshort
], win0
);
354 /* Step 2: Second window half, second half of nshort */
355 for (; i
< nshort
; i
++) {
356 win0
= window_short
[nshort
-1-i
];
357 win1
= window_short
[i
];
358 idx0
= nflat_ls
+ 4*nshort
+ i
- nlong
;
360 overlap
[idx0
] = MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
361 overlap
[idx0
] = MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
362 overlap
[idx0
] = MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
363 overlap
[idx0
] = MUL_F(transf_buf
[idx1
-nshort
], win0
) + MUL_F(transf_buf
[idx1
], win1
); idx0
+= nshort
; idx1
+= (nshort
<<1);
364 overlap
[idx0
] = MUL_F(transf_buf
[idx1
-nshort
], win0
);
366 /* Step 3: Set to zero */
367 memset(overlap
+nflat_ls
+nshort
, 0, nflat_ls
*sizeof(real_t
));
371 case LONG_STOP_SEQUENCE
:
373 ff_imdct_calc(11, transf_buf
, freq_in
);
375 /* add second half output of previous frame to windowed output of current frame */
376 /* construct first half window using padding with 1's and 0's */
377 memcpy(time_out
, overlap
, nflat_ls
*sizeof(real_t
));
379 vector_fmul_add_add(time_out
+nflat_ls
, transf_buf
+nflat_ls
, window_short_prev
, overlap
+nflat_ls
, nshort
);
381 /* nflat_ls can be divided by 2. */
382 idx0
= nflat_ls
+ nshort
;
383 for (i
= 0; i
< nflat_ls
; i
+=2) {
384 time_out
[idx0
] = overlap
[idx0
] + transf_buf
[idx0
]; idx0
++;
385 time_out
[idx0
] = overlap
[idx0
] + transf_buf
[idx0
]; idx0
++;
388 /* window the second half and save as overlap for next frame */
389 vector_fmul_reverse(overlap
, transf_buf
+nlong
, window_long
, nlong
);
394 for (i
= 0; i
< 1024; i
++)
396 printf("%d\n", time_out
[i
]);
397 //printf("0x%.8X\n", time_out[i]);
403 count
= faad_get_ts() - count
;
410 /* only works for LTP -> no overlapping, no short blocks */
411 void filter_bank_ltp(fb_info
*fb
, uint8_t window_sequence
, uint8_t window_shape
,
412 uint8_t window_shape_prev
, real_t
*in_data
, real_t
*out_mdct
,
413 uint8_t object_type
, uint16_t frame_len
)
417 const real_t
*window_long
= NULL
;
418 const real_t
*window_long_prev
= NULL
;
419 const real_t
*window_short
= NULL
;
420 const real_t
*window_short_prev
= NULL
;
422 uint16_t nlong
= frame_len
;
423 uint16_t nshort
= frame_len
/8;
424 uint16_t nflat_ls
= (nlong
-nshort
)/2;
426 //assert(window_sequence != EIGHT_SHORT_SEQUENCE);
428 memset(windowed_buf
,0,sizeof(windowed_buf
));
430 if (object_type
== LD
)
432 window_long
= fb
->ld_window
[window_shape
];
433 window_long_prev
= fb
->ld_window
[window_shape_prev
];
438 window_long
= fb
->long_window
[window_shape
];
439 window_long_prev
= fb
->long_window
[window_shape_prev
];
440 window_short
= fb
->short_window
[window_shape
];
441 window_short_prev
= fb
->short_window
[window_shape_prev
];
446 switch(window_sequence
)
448 case ONLY_LONG_SEQUENCE
:
449 for (i
= nlong
-1; i
>= 0; i
--)
451 windowed_buf
[i
] = MUL_F(in_data
[i
], window_long_prev
[i
]);
452 windowed_buf
[i
+nlong
] = MUL_F(in_data
[i
+nlong
], window_long
[nlong
-1-i
]);
454 mdct(fb
, windowed_buf
, out_mdct
, 2*nlong
);
457 case LONG_START_SEQUENCE
:
458 for (i
= 0; i
< nlong
; i
++)
459 windowed_buf
[i
] = MUL_F(in_data
[i
], window_long_prev
[i
]);
460 for (i
= 0; i
< nflat_ls
; i
++)
461 windowed_buf
[i
+nlong
] = in_data
[i
+nlong
];
462 for (i
= 0; i
< nshort
; i
++)
463 windowed_buf
[i
+nlong
+nflat_ls
] = MUL_F(in_data
[i
+nlong
+nflat_ls
], window_short
[nshort
-1-i
]);
464 for (i
= 0; i
< nflat_ls
; i
++)
465 windowed_buf
[i
+nlong
+nflat_ls
+nshort
] = 0;
466 mdct(fb
, windowed_buf
, out_mdct
, 2*nlong
);
469 case LONG_STOP_SEQUENCE
:
470 for (i
= 0; i
< nflat_ls
; i
++)
472 for (i
= 0; i
< nshort
; i
++)
473 windowed_buf
[i
+nflat_ls
] = MUL_F(in_data
[i
+nflat_ls
], window_short_prev
[i
]);
474 for (i
= 0; i
< nflat_ls
; i
++)
475 windowed_buf
[i
+nflat_ls
+nshort
] = in_data
[i
+nflat_ls
+nshort
];
476 for (i
= 0; i
< nlong
; i
++)
477 windowed_buf
[i
+nlong
] = MUL_F(in_data
[i
+nlong
], window_long
[nlong
-1-i
]);
478 mdct(fb
, windowed_buf
, out_mdct
, 2*nlong
);