apps/codecs/libfaad/filtbank.c

   1 /*
   2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
   3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com
   4 **
   5 ** This program is free software; you can redistribute it and/or modify
   6 ** it under the terms of the GNU General Public License as published by
   7 ** the Free Software Foundation; either version 2 of the License, or
   8 ** (at your option) any later version.
   9 **
  10 ** This program is distributed in the hope that it will be useful,
  11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 ** GNU General Public License for more details.
  14 **
  15 ** You should have received a copy of the GNU General Public License
  16 ** along with this program; if not, write to the Free Software
  17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18 **
  19 ** Any non-GPL usage of this software or parts of this software is strictly
  20 ** forbidden.
  21 **
  22 ** Commercial non-GPL licensing of this software is possible.
  23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
  24 **
  25 ** $Id$
  26 **/
  27
  28 #include "common.h"
  29 #include "structs.h"
  30
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #ifdef _WIN32_WCE
  34 #define assert(x)
  35 #else
  36 #include <assert.h>
  37 #endif
  38
  39 #include "filtbank.h"
  40 #include "decoder.h"
  41 #include "syntax.h"
  42 #include "kbd_win.h"
  43 #include "sine_win.h"
  44
  45
  46 /*Windowing functions borrowed from libwmai*/
  47
  48 #ifdef CPU_ARM
  49 static inline
  50 void vector_fmul_add_add(real_t *dst, const real_t *src0, const real_t *src1, const real_t *src2, int len)
  51 {
  52     /* Block sizes are always power of two */
  53     asm volatile (
  54         "0:"
  55         "ldmia %[d]!, {r0, r1};"
  56         "ldmia %[w]!, {r4, r5};"
  57         /* consume the first data and window value so we can use those
  58          * registers again */
  59         "smull r8, r9, r0, r4;"
  60         "ldmia %[src2]!, {r0, r4};"
  61         "add   r0, r0, r9, lsl #1;"  /* *dst=*dst+(r9<<1)*/
  62         "smull r8, r9, r1, r5;"
  63         "add   r1, r4, r9, lsl #1;"
  64         "stmia %[dst]!, {r0, r1};"
  65         "subs  %[n], %[n], #2;"
  66         "bne   0b;"
  67         : [d] "+r" (src0), [w] "+r" (src1), [src2] "+r" (src2), [dst] "+r" (dst), [n] "+r" (len)
  68         :
  69         : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
  70 }
  71 static inline
  72 void vector_fmul_reverse(real_t *dst, const real_t *src0, const real_t *src1,
  73                          int len)
  74 {
  75     /* Block sizes are always power of two */
  76     asm volatile (
  77         "add   %[s1], %[s1], %[n], lsl #2;"
  78         "0:"
  79         "ldmia %[s0]!, {r0, r1};"
  80         "ldmdb %[s1]!, {r4, r5};"
  81         "smull r8, r9, r0, r5;"
  82         "mov   r0, r9, lsl #1;"
  83         "smull r8, r9, r1, r4;"
  84         "mov   r1, r9, lsl #1;"
  85         "stmia %[dst]!, {r0, r1};"
  86         "subs  %[n], %[n], #2;"
  87         "bne   0b;"
  88         : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len)
  89         :
  90         : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
  91 }
  92
  93 #elif defined(CPU_COLDFIRE)
  94 static inline
  95 void vector_fmul_add_add(real_t *dst, const real_t *src0, const real_t *src1, const real_t *src2, int len)
  96 {
  97     /* Block sizes are always power of two. Smallest block is always way bigger
  98      * than four too.*/
  99     asm volatile (
 100         "0:"
 101         "movem.l (%[src0]), %%d0-%%d3;"
 102         "movem.l (%[src1]), %%d4-%%d5/%%a0-%%a1;"
 103         "mac.l %%d0, %%d4, %%acc0;"
 104         "mac.l %%d1, %%d5, %%acc1;"
 105         "mac.l %%d2, %%a0, %%acc2;"
 106         "mac.l %%d3, %%a1, %%acc3;"
 107         "lea.l (16, %[src0]), %[src0];"
 108         "lea.l (16, %[src1]), %[src1];"
 109         "movclr.l %%acc0, %%d0;"
 110         "movclr.l %%acc1, %%d1;"
 111         "movclr.l %%acc2, %%d2;"
 112         "movclr.l %%acc3, %%d3;"
 113         "movem.l (%[src2]), %%d4-%%d5/%%a0-%%a1;"
 114         "lea.l (16, %[src2]), %[src2];"
 115         "add.l %%d4, %%d0;"
 116         "add.l %%d5, %%d1;"
 117         "add.l %%a0, %%d2;"
 118         "add.l %%a1, %%d3;"
 119         "movem.l %%d0-%%d3, (%[dst]);"
 120         "lea.l (16, %[dst]), %[dst];"
 121         "subq.l #4, %[n];"
 122         "jne 0b;"
 123         : [src0] "+a" (src0), [src1] "+a" (src1), [src2] "+a" (src2), [dst] "+a" (dst), [n] "+d" (len)
 124         :
 125         : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
 126 }
 127
 128 static inline
 129 void vector_fmul_reverse(real_t *dst, const real_t *src0, const real_t *src1,
 130                          int len)
 131 {
 132     /* Block sizes are always power of two. Smallest block is always way bigger
 133      * than four too.*/
 134     asm volatile (
 135         "lea.l (-16, %[s1], %[n]*4), %[s1];"
 136         "0:"
 137         "movem.l (%[s0]), %%d0-%%d3;"
 138         "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
 139         "mac.l %%d0, %%a1, %%acc0;"
 140         "mac.l %%d1, %%a0, %%acc1;"
 141         "mac.l %%d2, %%d5, %%acc2;"
 142         "mac.l %%d3, %%d4, %%acc3;"
 143         "lea.l (16, %[s0]), %[s0];"
 144         "lea.l (-16, %[s1]), %[s1];"
 145         "movclr.l %%acc0, %%d0;"
 146         "movclr.l %%acc1, %%d1;"
 147         "movclr.l %%acc2, %%d2;"
 148         "movclr.l %%acc3, %%d3;"
 149         "movem.l %%d0-%%d3, (%[dst]);"
 150         "lea.l (16, %[dst]), %[dst];"
 151         "subq.l #4, %[n];"
 152         "jne 0b;"
 153         : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len)
 154         : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
 155 }
 156
 157 #else
 158 static inline void vector_fmul_add_add(real_t *dst, const real_t *src0, const real_t *src1, const real_t *src2, int len){
 159     int i;
 160     for(i=0; i<len; i++)
 161         dst[i] = MUL_F(src0[i], src1[i]) + src2[i];
 162 }
 163
 164 static inline void vector_fmul_reverse(real_t *dst, const real_t *src0, const real_t *src1, int len){
 165     int i;
 166     src1 += len-1;
 167     for(i=0; i<len; i++)
 168         dst[i] = MUL_F(src0[i], src1[-i]);
 169 }
 170 #endif
 171
 172 #ifdef LTP_DEC
 173 static INLINE void mdct(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len)
 174 {
 175     mdct_info *mdct = NULL;
 176
 177     switch (len)
 178     {
 179     case 2048:
 180     case 1920:
 181         mdct = fb->mdct2048;
 182         break;
 183     case 256:
 184     case 240:
 185         mdct = fb->mdct256;
 186         break;
 187 #ifdef LD_DEC
 188     case 1024:
 189     case 960:
 190         mdct = fb->mdct1024;
 191         break;
 192 #endif
 193     }
 194
 195     faad_mdct(mdct, in_data, out_data);
 196 }
 197 #endif
 198
 199 ALIGN real_t transf_buf[2*1024] IBSS_ATTR;
 200
 201 void ifilter_bank(uint8_t window_sequence, uint8_t window_shape,
 202                   uint8_t window_shape_prev, real_t *freq_in,
 203                   real_t *time_out, real_t *overlap,
 204                   uint8_t object_type, uint16_t frame_len)
 205 {
 206     int16_t i;
 207
 208     const real_t *window_long = NULL;
 209     const real_t *window_long_prev = NULL;
 210     const real_t *window_short = NULL;
 211     const real_t *window_short_prev = NULL;
 212
 213     uint16_t nlong = frame_len;
 214     uint16_t nshort = frame_len/8;
 215     uint16_t trans = nshort/2;
 216
 217     uint16_t nflat_ls = (nlong-nshort)/2;
 218
 219 #ifdef PROFILE
 220     int64_t count = faad_get_ts();
 221 #endif
 222
 223     memset(transf_buf,0,sizeof(transf_buf));
 224     /* select windows of current frame and previous frame (Sine or KBD) */
 225 #ifdef LD_DEC
 226     if (object_type == LD)
 227     {
 228         window_long       = fb->ld_window[window_shape];
 229         window_long_prev  = fb->ld_window[window_shape_prev];
 230     } else {
 231 #else
 232         (void) object_type;
 233 #endif
 234
 235     /*AAC uses two different window shapes depending on spectal features*/
 236     if(window_shape == 0){
 237         window_long  = sine_long_1024;
 238         window_short = sine_short_128;
 239     } else {
 240         window_long  = kbd_long_1024;
 241         window_short = kbd_short_128;
 242     }
 243
 244     if(window_shape_prev == 0){
 245         window_long_prev  = sine_long_1024;
 246         window_short_prev = sine_short_128;
 247     } else {
 248         window_long_prev  = kbd_long_1024;
 249         window_short_prev = kbd_short_128;
 250     }
 251
 252 #ifdef LD_DEC
 253     }
 254 #endif
 255
 256 #if 0
 257     for (i = 0; i < 1024; i++)
 258     {
 259         printf("%d\n", freq_in[i]);
 260     }
 261 #endif
 262
 263 #if 0
 264     printf("%d %d\n", window_sequence, window_shape);
 265 #endif
 266     switch (window_sequence)
 267     {
 268     case ONLY_LONG_SEQUENCE:
 269         /* perform iMDCT */
 270         mdct_backward(2048, freq_in, transf_buf);
 271
 272         /* add second half output of previous frame to windowed output of current frame */
 273         vector_fmul_add_add(time_out, transf_buf, window_long_prev, overlap,  nlong);
 274
 275         /* window the second half and save as overlap for next frame */
 276         vector_fmul_reverse(overlap, transf_buf+nlong, window_long, nlong);
 277
 278         break;
 279
 280     case LONG_START_SEQUENCE:
 281         /* perform iMDCT */
 282         mdct_backward(2048, freq_in, transf_buf);
 283
 284         /* add second half output of previous frame to windowed output of current frame */
 285         vector_fmul_add_add(time_out, transf_buf, window_long_prev, overlap,  nlong);
 286
 287         /* window the second half and save as overlap for next frame */
 288         /* construct second half window using padding with 1's and 0's */
 289
 290         memcpy(overlap, transf_buf+nlong, nflat_ls*sizeof(real_t));
 291
 292         vector_fmul_reverse(overlap+nflat_ls, transf_buf+nlong+nflat_ls, window_short, nshort);
 293
 294         memset(overlap+nflat_ls+nshort, 0, nflat_ls*sizeof(real_t));
 295         break;
 296
 297     case EIGHT_SHORT_SEQUENCE:
 298          /*this could be assemblerized too, but this case is extremely uncommon*/
 299
 300         /* perform iMDCT for each short block */
 301         mdct_backward(256, freq_in+0*nshort,  transf_buf+2*nshort*0);
 302         mdct_backward(256, freq_in+1*nshort, transf_buf+2*nshort*1);
 303         mdct_backward(256, freq_in+2*nshort, transf_buf+2*nshort*2);
 304         mdct_backward(256, freq_in+3*nshort, transf_buf+2*nshort*3);
 305         mdct_backward(256, freq_in+4*nshort, transf_buf+2*nshort*4);
 306         mdct_backward(256, freq_in+5*nshort, transf_buf+2*nshort*5);
 307         mdct_backward(256, freq_in+6*nshort, transf_buf+2*nshort*6);
 308         mdct_backward(256, freq_in+7*nshort, transf_buf+2*nshort*7);
 309
 310         /* add second half output of previous frame to windowed output of current frame */
 311         for (i = 0; i < nflat_ls; i++)
 312             time_out[i] = overlap[i];
 313         for(i = 0; i < nshort; i++)
 314         {
 315             time_out[nflat_ls+         i] = overlap[nflat_ls+         i] + MUL_F(transf_buf[nshort*0+i],window_short_prev[i]);
 316             time_out[nflat_ls+1*nshort+i] = overlap[nflat_ls+nshort*1+i] + MUL_F(transf_buf[nshort*1+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*2+i],window_short[i]);
 317             time_out[nflat_ls+2*nshort+i] = overlap[nflat_ls+nshort*2+i] + MUL_F(transf_buf[nshort*3+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*4+i],window_short[i]);
 318             time_out[nflat_ls+3*nshort+i] = overlap[nflat_ls+nshort*3+i] + MUL_F(transf_buf[nshort*5+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*6+i],window_short[i]);
 319             if (i < trans)
 320                 time_out[nflat_ls+4*nshort+i] = overlap[nflat_ls+nshort*4+i] + MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]);
 321         }
 322
 323         /* window the second half and save as overlap for next frame */
 324         for(i = 0; i < nshort; i++)
 325         {
 326             if (i >= trans)
 327                 overlap[nflat_ls+4*nshort+i-nlong] = MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]);
 328             overlap[nflat_ls+5*nshort+i-nlong] = MUL_F(transf_buf[nshort*9+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*10+i],window_short[i]);
 329             overlap[nflat_ls+6*nshort+i-nlong] = MUL_F(transf_buf[nshort*11+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*12+i],window_short[i]);
 330             overlap[nflat_ls+7*nshort+i-nlong] = MUL_F(transf_buf[nshort*13+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*14+i],window_short[i]);
 331             overlap[nflat_ls+8*nshort+i-nlong] = MUL_F(transf_buf[nshort*15+i],window_short[nshort-1-i]);
 332         }
 333         memset(overlap+nflat_ls+nshort, 0, nflat_ls*sizeof(real_t));
 334
 335         break;
 336
 337     case LONG_STOP_SEQUENCE:
 338         /* perform iMDCT */
 339         mdct_backward(2048, freq_in, transf_buf);
 340
 341         /* add second half output of previous frame to windowed output of current frame */
 342         /* construct first half window using padding with 1's and 0's */
 343         memcpy(time_out, overlap, nflat_ls*sizeof(real_t));
 344
 345         vector_fmul_add_add(time_out+nflat_ls, transf_buf+nflat_ls, window_short_prev, overlap+nflat_ls,  nshort);
 346
 347         for (i = 0; i < nflat_ls; i++)
 348             time_out[nflat_ls+nshort+i] = overlap[nflat_ls+nshort+i] + transf_buf[nflat_ls+nshort+i];
 349
 350         /* window the second half and save as overlap for next frame */
 351         vector_fmul_reverse(overlap, transf_buf+nlong, window_long, nlong);
 352         break;
 353     }
 354
 355 #if 0
 356     for (i = 0; i < 1024; i++)
 357     {
 358         printf("%d\n", time_out[i]);
 359         //printf("0x%.8X\n", time_out[i]);
 360     }
 361 #endif
 362
 363
 364 #ifdef PROFILE
 365     count = faad_get_ts() - count;
 366     fb->cycles += count;
 367 #endif
 368 }
 369
 370
 371 #ifdef LTP_DEC
 372 ALIGN real_t windowed_buf[2*1024] = {0};
 373 /* only works for LTP -> no overlapping, no short blocks */
 374 void filter_bank_ltp(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
 375                      uint8_t window_shape_prev, real_t *in_data, real_t *out_mdct,
 376                      uint8_t object_type, uint16_t frame_len)
 377 {
 378     int16_t i;
 379
 380     const real_t *window_long = NULL;
 381     const real_t *window_long_prev = NULL;
 382     const real_t *window_short = NULL;
 383     const real_t *window_short_prev = NULL;
 384
 385     uint16_t nlong = frame_len;
 386     uint16_t nshort = frame_len/8;
 387     uint16_t nflat_ls = (nlong-nshort)/2;
 388
 389     //assert(window_sequence != EIGHT_SHORT_SEQUENCE);
 390
 391     memset(windowed_buf,0,sizeof(windowed_buf));
 392 #ifdef LD_DEC
 393     if (object_type == LD)
 394     {
 395         window_long       = fb->ld_window[window_shape];
 396         window_long_prev  = fb->ld_window[window_shape_prev];
 397     } else {
 398 #else
 399         (void) object_type;
 400 #endif
 401         window_long       = fb->long_window[window_shape];
 402         window_long_prev  = fb->long_window[window_shape_prev];
 403         window_short      = fb->short_window[window_shape];
 404         window_short_prev = fb->short_window[window_shape_prev];
 405 #ifdef LD_DEC
 406     }
 407 #endif
 408
 409     switch(window_sequence)
 410     {
 411     case ONLY_LONG_SEQUENCE:
 412         for (i = nlong-1; i >= 0; i--)
 413         {
 414             windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]);
 415             windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]);
 416         }
 417         mdct(fb, windowed_buf, out_mdct, 2*nlong);
 418         break;
 419
 420     case LONG_START_SEQUENCE:
 421         for (i = 0; i < nlong; i++)
 422             windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]);
 423         for (i = 0; i < nflat_ls; i++)
 424             windowed_buf[i+nlong] = in_data[i+nlong];
 425         for (i = 0; i < nshort; i++)
 426             windowed_buf[i+nlong+nflat_ls] = MUL_F(in_data[i+nlong+nflat_ls], window_short[nshort-1-i]);
 427         for (i = 0; i < nflat_ls; i++)
 428             windowed_buf[i+nlong+nflat_ls+nshort] = 0;
 429         mdct(fb, windowed_buf, out_mdct, 2*nlong);
 430         break;
 431
 432     case LONG_STOP_SEQUENCE:
 433         for (i = 0; i < nflat_ls; i++)
 434             windowed_buf[i] = 0;
 435         for (i = 0; i < nshort; i++)
 436             windowed_buf[i+nflat_ls] = MUL_F(in_data[i+nflat_ls], window_short_prev[i]);
 437         for (i = 0; i < nflat_ls; i++)
 438             windowed_buf[i+nflat_ls+nshort] = in_data[i+nflat_ls+nshort];
 439         for (i = 0; i < nlong; i++)
 440             windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]);
 441         mdct(fb, windowed_buf, out_mdct, 2*nlong);
 442         break;
 443     }
 444 }
 445 #endif