apps/codecs/libwmapro/wmaprodec.c

   1 /*
   2  * Wmapro compatible decoder
   3  * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
   4  * Copyright (c) 2008 - 2009 Sascha Sommer, Benjamin Larsson
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file  libavcodec/wmaprodec.c
  25  * @brief wmapro decoder implementation
  26  * Wmapro is an MDCT based codec comparable to wma standard or AAC.
  27  * The decoding therefore consists of the following steps:
  28  * - bitstream decoding
  29  * - reconstruction of per-channel data
  30  * - rescaling and inverse quantization
  31  * - IMDCT
  32  * - windowing and overlapp-add
  33  *
  34  * The compressed wmapro bitstream is split into individual packets.
  35  * Every such packet contains one or more wma frames.
  36  * The compressed frames may have a variable length and frames may
  37  * cross packet boundaries.
  38  * Common to all wmapro frames is the number of samples that are stored in
  39  * a frame.
  40  * The number of samples and a few other decode flags are stored
  41  * as extradata that has to be passed to the decoder.
  42  *
  43  * The wmapro frames themselves are again split into a variable number of
  44  * subframes. Every subframe contains the data for 2^N time domain samples
  45  * where N varies between 7 and 12.
  46  *
  47  * Example wmapro bitstream (in samples):
  48  *
  49  * ||   packet 0           || packet 1 || packet 2      packets
  50  * ---------------------------------------------------
  51  * || frame 0      || frame 1       || frame 2    ||    frames
  52  * ---------------------------------------------------
  53  * ||   |      |   ||   |   |   |   ||            ||    subframes of channel 0
  54  * ---------------------------------------------------
  55  * ||      |   |   ||   |   |   |   ||            ||    subframes of channel 1
  56  * ---------------------------------------------------
  57  *
  58  * The frame layouts for the individual channels of a wma frame does not need
  59  * to be the same.
  60  *
  61  * However, if the offsets and lengths of several subframes of a frame are the
  62  * same, the subframes of the channels can be grouped.
  63  * Every group may then use special coding techniques like M/S stereo coding
  64  * to improve the compression ratio. These channel transformations do not
  65  * need to be applied to a whole subframe. Instead, they can also work on
  66  * individual scale factor bands (see below).
  67  * The coefficients that carry the audio signal in the frequency domain
  68  * are transmitted as huffman-coded vectors with 4, 2 and 1 elements.
  69  * In addition to that, the encoder can switch to a runlevel coding scheme
  70  * by transmitting subframe_length / 128 zero coefficients.
  71  *
  72  * Before the audio signal can be converted to the time domain, the
  73  * coefficients have to be rescaled and inverse quantized.
  74  * A subframe is therefore split into several scale factor bands that get
  75  * scaled individually.
  76  * Scale factors are submitted for every frame but they might be shared
  77  * between the subframes of a channel. Scale factors are initially DPCM-coded.
  78  * Once scale factors are shared, the differences are transmitted as runlevel
  79  * codes.
  80  * Every subframe length and offset combination in the frame layout shares a
  81  * common quantization factor that can be adjusted for every channel by a
  82  * modifier.
  83  * After the inverse quantization, the coefficients get processed by an IMDCT.
  84  * The resulting values are then windowed with a sine window and the first half
  85  * of the values are added to the second half of the output from the previous
  86  * subframe in order to reconstruct the output samples.
  87  */
  88
  89 #include "ffmpeg_get_bits.h"
  90 #include "ffmpeg_put_bits.h"
  91 #include "wmaprodata.h"
  92 #include "wma.h"
  93 #include "wmaprodec.h"
  94 //#include "wmapro_mdct.h"
  95 #include "mdct_tables.h"
  96 #include "quant.h"
  97 #include "wmapro_math.h"
  98 #include "codecs.h"
  99 #include "codeclib.h"
 100 #include "../libasf/asf.h"
 101
 102 /* Uncomment the following line to enable some debug output */
 103 //#define WMAPRO_DUMP_CTX_EN
 104
 105 #undef DEBUGF
 106 #ifdef WMAPRO_DUMP_CTX_EN
 107 #   define DEBUGF printf
 108 #else
 109 #   define DEBUGF(...)
 110 #endif
 111
 112 /* Some defines to make it compile */
 113 #define AVERROR_INVALIDDATA  -1
 114 #define AVERROR_PATCHWELCOME -2
 115 #define av_log_ask_for_sample(...)
 116
 117 /* Taken from avcodec.h */
 118 #define FF_INPUT_BUFFER_PADDING_SIZE 8
 119
 120 /* Taken from libavutil/mem.h */
 121 #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
 122
 123 /* Taken from libavutil/common.h */
 124 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
 125 #define FFMAX(a,b) ((a) > (b) ? (a) : (b))
 126
 127 /* Define some multiple used constants */
 128 #define SQRT2_FRACT16   0x00016A0A /* 0x00016A0A = (sqrt(2)*(1<<16)) */
 129 #define COS_PI4_FRACT16 0x0000B505 /* 0x0000B505 = (cos(pi/4)<<16) */
 130 #define ONE_FRACT16     0x00010000 /* 0x00010000 = (1<<16) */
 131
 132 /* Enable multichannel for large-memory targets only */
 133 #if (MEMORYSIZE > 2)
 134 #define WMAPRO_MAX_CHANNELS    8                             ///< max number of handled channels
 135 #else
 136 #define WMAPRO_MAX_CHANNELS    2                             ///< max number of handled channels
 137 #endif
 138
 139 /* Current decoder limitations */
 140 #define MAX_SUBFRAMES  32                                    ///< max number of subframes per channel
 141 #define MAX_BANDS      29                                    ///< max number of scale factor bands
 142 #define MAX_FRAMESIZE  32768                                 ///< maximum compressed frame size
 143
 144 #define WMAPRO_BLOCK_MAX_BITS 12                                           ///< log2 of max block size
 145 #define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS)                 ///< maximum block size
 146 #define WMAPRO_BLOCK_SIZES    (WMAPRO_BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) ///< possible block sizes
 147 #define WMAPRO_OUT_BUF_SIZE   (WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2)
 148
 149
 150 #define VLCBITS            9
 151 #define SCALEVLCBITS       8
 152 #define VEC4MAXDEPTH    ((HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS)
 153 #define VEC2MAXDEPTH    ((HUFF_VEC2_MAXBITS+VLCBITS-1)/VLCBITS)
 154 #define VEC1MAXDEPTH    ((HUFF_VEC1_MAXBITS+VLCBITS-1)/VLCBITS)
 155 #define SCALEMAXDEPTH   ((HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS)
 156 #define SCALERLMAXDEPTH ((HUFF_SCALE_RL_MAXBITS+VLCBITS-1)/VLCBITS)
 157
 158 static VLC              sf_vlc;           ///< scale factor DPCM vlc
 159 static VLC              sf_rl_vlc;        ///< scale factor run length vlc
 160 static VLC              vec4_vlc;         ///< 4 coefficients per symbol
 161 static VLC              vec2_vlc;         ///< 2 coefficients per symbol
 162 static VLC              vec1_vlc;         ///< 1 coefficient per symbol
 163 static VLC              coef_vlc[2];      ///< coefficient run length vlc codes
 164 //static float            sin64[33];        ///< sinus table for decorrelation
 165
 166 /* Global defined arrays to allow IRAM usage for some models. */
 167 static int32_t g_tmp[WMAPRO_BLOCK_MAX_SIZE] IBSS_ATTR_WMAPRO_LARGE_IRAM;
 168 static int32_t g_out_ch0[WMAPRO_OUT_BUF_SIZE] IBSS_ATTR;
 169 static int32_t g_out_ch1[WMAPRO_OUT_BUF_SIZE] IBSS_ATTR_WMAPRO_LARGE_IRAM;
 170 #if (WMAPRO_MAX_CHANNELS > 2)
 171     static int32_t g_out_multichannel[WMAPRO_MAX_CHANNELS-2][WMAPRO_OUT_BUF_SIZE];
 172 #endif
 173
 174 /**
 175  * @brief frame specific decoder context for a single channel
 176  */
 177 typedef struct {
 178     int16_t  prev_block_len;                          ///< length of the previous block
 179     uint8_t  transmit_coefs;
 180     uint8_t  num_subframes;
 181     uint16_t subframe_len[MAX_SUBFRAMES];             ///< subframe length in samples
 182     uint16_t subframe_offset[MAX_SUBFRAMES];          ///< subframe positions in the current frame
 183     uint8_t  cur_subframe;                            ///< current subframe number
 184     uint16_t decoded_samples;                         ///< number of already processed samples
 185     uint8_t  grouped;                                 ///< channel is part of a group
 186     int      quant_step;                              ///< quantization step for the current subframe
 187     int8_t   reuse_sf;                                ///< share scale factors between subframes
 188     int8_t   scale_factor_step;                       ///< scaling step for the current subframe
 189     int      max_scale_factor;                        ///< maximum scale factor for the current subframe
 190     int      saved_scale_factors[2][MAX_BANDS];       ///< resampled and (previously) transmitted scale factor values
 191     int8_t   scale_factor_idx;                        ///< index for the transmitted scale factor values (used for resampling)
 192     int*     scale_factors;                           ///< pointer to the scale factor values used for decoding
 193     uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
 194     int32_t* coeffs;                                  ///< pointer to the subframe decode buffer
 195     int32_t* out;                                     ///< output buffer
 196 } WMAProChannelCtx;
 197
 198 /**
 199  * @brief channel group for channel transformations
 200  */
 201 typedef struct {
 202     uint8_t num_channels;                                     ///< number of channels in the group
 203     int8_t  transform;                                        ///< transform on / off
 204     int8_t  transform_band[MAX_BANDS];                        ///< controls if the transform is enabled for a certain band
 205     //float   decorrelation_matrix[WMAPRO_MAX_CHANNELS*WMAPRO_MAX_CHANNELS];
 206     int32_t*  channel_data[WMAPRO_MAX_CHANNELS];                ///< transformation coefficients
 207     int32_t   fixdecorrelation_matrix[WMAPRO_MAX_CHANNELS*WMAPRO_MAX_CHANNELS];
 208 } WMAProChannelGrp;
 209
 210 /**
 211  * @brief main decoder context
 212  */
 213 typedef struct WMAProDecodeCtx {
 214     /* generic decoder variables */
 215     uint8_t          frame_data[MAX_FRAMESIZE +
 216                       FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
 217     PutBitContext    pb;                            ///< context for filling the frame_data buffer
 218     int32_t*         tmp;                           ///< IMDCT input buffer
 219
 220     /* frame size dependent frame information (set during initialization) */
 221     uint32_t         decode_flags;                  ///< used compression features
 222     uint8_t          len_prefix;                    ///< frame is prefixed with its length
 223     uint8_t          dynamic_range_compression;     ///< frame contains DRC data
 224     uint8_t          bits_per_sample;               ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
 225     uint16_t         samples_per_frame;             ///< number of samples to output
 226     uint16_t         log2_frame_size;
 227     int8_t           num_channels;                  ///< number of channels in the stream (same as AVCodecContext.num_channels)
 228     int8_t           lfe_channel;                   ///< lfe channel index
 229     uint8_t          max_num_subframes;
 230     uint8_t          subframe_len_bits;             ///< number of bits used for the subframe length
 231     uint8_t          max_subframe_len_bit;          ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
 232     uint16_t         min_samples_per_subframe;
 233     int8_t           num_sfb[WMAPRO_BLOCK_SIZES];   ///< scale factor bands per block size
 234     int16_t          sfb_offsets[WMAPRO_BLOCK_SIZES][MAX_BANDS];                    ///< scale factor band offsets (multiples of 4)
 235     int8_t           sf_offsets[WMAPRO_BLOCK_SIZES][WMAPRO_BLOCK_SIZES][MAX_BANDS]; ///< scale factor resample matrix
 236     int16_t          subwoofer_cutoffs[WMAPRO_BLOCK_SIZES]; ///< subwoofer cutoff values
 237
 238     /* packet decode state */
 239     GetBitContext    pgb;                           ///< bitstream reader context for the packet
 240     uint8_t          packet_offset;                 ///< frame offset in the packet
 241     uint8_t          packet_sequence_number;        ///< current packet number
 242     int              num_saved_bits;                ///< saved number of bits
 243     int              frame_offset;                  ///< frame offset in the bit reservoir
 244     int              subframe_offset;               ///< subframe offset in the bit reservoir
 245     uint8_t          packet_loss;                   ///< set in case of bitstream error
 246     uint8_t          packet_done;                   ///< set when a packet is fully decoded
 247
 248     /* frame decode state */
 249     uint32_t         frame_num;                     ///< current frame number
 250     GetBitContext    gb;                            ///< bitstream reader context
 251     int              buf_bit_size;                  ///< buffer size in bits
 252     int32_t          samples;
 253     int32_t*         samples_end;                   ///< maximum samplebuffer pointer
 254     uint8_t          drc_gain;                      ///< gain for the DRC tool
 255     int8_t           skip_frame;                    ///< skip output step
 256     int8_t           parsed_all_subframes;          ///< all subframes decoded?
 257
 258     /* subframe/block decode state */
 259     int16_t          subframe_len;                  ///< current subframe length
 260     int8_t           channels_for_cur_subframe;     ///< number of channels that contain the subframe
 261     int8_t           channel_indexes_for_cur_subframe[WMAPRO_MAX_CHANNELS];
 262     int8_t           num_bands;                     ///< number of scale factor bands
 263     int16_t*         cur_sfb_offsets;               ///< sfb offsets for the current block
 264     uint8_t          table_idx;                     ///< index for the num_sfb, sfb_offsets, sf_offsets and subwoofer_cutoffs tables
 265     int8_t           esc_len;                       ///< length of escaped coefficients
 266
 267     uint8_t          num_chgroups;                  ///< number of channel groups
 268     WMAProChannelGrp chgroup[WMAPRO_MAX_CHANNELS];  ///< channel group information
 269
 270     WMAProChannelCtx channel[WMAPRO_MAX_CHANNELS];  ///< per channel data
 271 } WMAProDecodeCtx;
 272
 273 /* static decode context, to avoid malloc */
 274 static WMAProDecodeCtx globWMAProDecCtx;
 275
 276 /**
 277  *@brief helper function to print the most important members of the context
 278  *@param s context
 279  */
 280 #ifdef WMAPRO_DUMP_CTX_EN
 281 static void  dump_context(WMAProDecodeCtx *s)
 282 {
 283 #define PRINT(a, b)     printf(" %s = %d\n", a, b);
 284 #define PRINT_HEX(a, b) printf(" %s = %x\n", a, b);
 285
 286     PRINT("ed sample bit depth", s->bits_per_sample);
 287     PRINT_HEX("ed decode flags", s->decode_flags);
 288     PRINT("samples per frame",   s->samples_per_frame);
 289     PRINT("log2 frame size",     s->log2_frame_size);
 290     PRINT("max num subframes",   s->max_num_subframes);
 291     PRINT("len prefix",          s->len_prefix);
 292     PRINT("num channels",        s->num_channels);
 293 }
 294 #endif
 295
 296 /**
 297  *@brief Initialize the decoder.
 298  *@param avctx codec context
 299  *@return 0 on success, -1 otherwise
 300  */
 301 int decode_init(asf_waveformatex_t *wfx)
 302 {
 303     memset(&globWMAProDecCtx, 0, sizeof(WMAProDecodeCtx));
 304     WMAProDecodeCtx *s = &globWMAProDecCtx;
 305     uint8_t *edata_ptr = wfx->data;
 306     unsigned int channel_mask;
 307     int i;
 308     int log2_max_num_subframes;
 309     int num_possible_block_sizes;
 310
 311     /* Use globally defined array. Allows IRAM usage for models with large IRAM. */
 312     s->tmp = g_tmp;
 313
 314     /* Use globally defined arrays. Allows IRAM usage for up to 2 channels. */
 315     s->channel[0].out = g_out_ch0;
 316     s->channel[1].out = g_out_ch1;
 317 #if (WMAPRO_MAX_CHANNELS > 2)
 318     for (i=2; i<WMAPRO_MAX_CHANNELS; ++i)
 319         s->channel[i].out = g_out_multichannel[i-2];
 320 #endif
 321
 322 #if defined(CPU_COLDFIRE)
 323     coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE);
 324 #endif
 325
 326     init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
 327
 328     if (wfx->datalen >= 18) {
 329         s->decode_flags    = AV_RL16(edata_ptr+14);
 330         channel_mask       = AV_RL32(edata_ptr+2);
 331         s->bits_per_sample = AV_RL16(edata_ptr);
 332         /** dump the extradata */
 333         for (i = 0; i < wfx->datalen; i++)
 334             DEBUGF("[%x] ", wfx->data[i]);
 335         DEBUGF("\n");
 336
 337     } else {
 338         DEBUGF("Unknown extradata size\n");
 339         return AVERROR_INVALIDDATA;
 340     }
 341
 342     /** generic init */
 343     s->log2_frame_size = av_log2(wfx->blockalign) + 4;
 344
 345     /** frame info */
 346     s->skip_frame  = 1; /** skip first frame */
 347     s->packet_loss = 1;
 348     s->len_prefix  = (s->decode_flags & 0x40);
 349
 350     if (!s->len_prefix) {
 351         DEBUGF("no length prefix\n");
 352         return AVERROR_INVALIDDATA;
 353     }
 354
 355     /** get frame len */
 356     s->samples_per_frame = 1 << ff_wma_get_frame_len_bits(wfx->rate,
 357                                                           3, s->decode_flags);
 358
 359     /** init previous block len */
 360     for (i = 0; i < wfx->channels; i++)
 361         s->channel[i].prev_block_len = s->samples_per_frame;
 362
 363     /** subframe info */
 364     log2_max_num_subframes       = ((s->decode_flags & 0x38) >> 3);
 365     s->max_num_subframes         = 1 << log2_max_num_subframes;
 366     if (s->max_num_subframes == 16)
 367         s->max_subframe_len_bit = 1;
 368     s->subframe_len_bits = av_log2(log2_max_num_subframes) + 1;
 369
 370     num_possible_block_sizes     = log2_max_num_subframes + 1;
 371     s->min_samples_per_subframe  = s->samples_per_frame / s->max_num_subframes;
 372     s->dynamic_range_compression = (s->decode_flags & 0x80);
 373
 374     if (s->max_num_subframes > MAX_SUBFRAMES) {
 375         DEBUGF("invalid number of subframes %i\n",
 376                s->max_num_subframes);
 377         return AVERROR_INVALIDDATA;
 378     }
 379
 380     s->num_channels = wfx->channels;
 381
 382     /** extract lfe channel position */
 383     s->lfe_channel = -1;
 384
 385     if (channel_mask & 8) {
 386         unsigned int mask;
 387         for (mask = 1; mask < 16; mask <<= 1) {
 388             if (channel_mask & mask)
 389                 ++s->lfe_channel;
 390         }
 391     }
 392
 393     if (s->num_channels < 0) {
 394         DEBUGF("invalid number of channels %d\n", s->num_channels);
 395         return AVERROR_INVALIDDATA;
 396     } else if (s->num_channels > WMAPRO_MAX_CHANNELS) {
 397         DEBUGF("unsupported number of channels\n");
 398         return AVERROR_PATCHWELCOME;
 399     }
 400
 401     INIT_VLC_STATIC(&sf_vlc, SCALEVLCBITS, HUFF_SCALE_SIZE,
 402                     scale_huffbits, 1, 1,
 403                     scale_huffcodes, 2, 2, 616);
 404
 405     INIT_VLC_STATIC(&sf_rl_vlc, VLCBITS, HUFF_SCALE_RL_SIZE,
 406                     scale_rl_huffbits, 1, 1,
 407                     scale_rl_huffcodes, 4, 4, 1406);
 408
 409     INIT_VLC_STATIC(&coef_vlc[0], VLCBITS, HUFF_COEF0_SIZE,
 410                     coef0_huffbits, 1, 1,
 411                     coef0_huffcodes, 4, 4, 2108);
 412
 413     INIT_VLC_STATIC(&coef_vlc[1], VLCBITS, HUFF_COEF1_SIZE,
 414                     coef1_huffbits, 1, 1,
 415                     coef1_huffcodes, 4, 4, 3912);
 416
 417     INIT_VLC_STATIC(&vec4_vlc, VLCBITS, HUFF_VEC4_SIZE,
 418                     vec4_huffbits, 1, 1,
 419                     vec4_huffcodes, 2, 2, 604);
 420
 421     INIT_VLC_STATIC(&vec2_vlc, VLCBITS, HUFF_VEC2_SIZE,
 422                     vec2_huffbits, 1, 1,
 423                     vec2_huffcodes, 2, 2, 562);
 424
 425     INIT_VLC_STATIC(&vec1_vlc, VLCBITS, HUFF_VEC1_SIZE,
 426                     vec1_huffbits, 1, 1,
 427                     vec1_huffcodes, 2, 2, 562);
 428
 429     /** calculate number of scale factor bands and their offsets
 430         for every possible block size */
 431     for (i = 0; i < num_possible_block_sizes; i++) {
 432         int subframe_len = s->samples_per_frame >> i;
 433         int x;
 434         int band = 1;
 435
 436         s->sfb_offsets[i][0] = 0;
 437
 438         for (x = 0; x < MAX_BANDS-1 && s->sfb_offsets[i][band - 1] < subframe_len; x++) {
 439             int offset = (subframe_len * 2 * critical_freq[x])
 440                           / wfx->rate + 2;
 441             offset &= ~3;
 442             if (offset > s->sfb_offsets[i][band - 1])
 443                 s->sfb_offsets[i][band++] = offset;
 444         }
 445         s->sfb_offsets[i][band - 1] = subframe_len;
 446         s->num_sfb[i]               = band - 1;
 447     }
 448
 449
 450     /** Scale factors can be shared between blocks of different size
 451         as every block has a different scale factor band layout.
 452         The matrix sf_offsets is needed to find the correct scale factor.
 453      */
 454
 455     for (i = 0; i < num_possible_block_sizes; i++) {
 456         int b;
 457         for (b = 0; b < s->num_sfb[i]; b++) {
 458             int x;
 459             int offset = ((s->sfb_offsets[i][b]
 460                            + s->sfb_offsets[i][b + 1] - 1) << i) >> 1;
 461             for (x = 0; x < num_possible_block_sizes; x++) {
 462                 int v = 0;
 463                 while (s->sfb_offsets[x][v + 1] << x < offset)
 464                     ++v;
 465                 s->sf_offsets[i][x][b] = v;
 466             }
 467         }
 468     }
 469
 470     /** calculate subwoofer cutoff values */
 471     for (i = 0; i < num_possible_block_sizes; i++) {
 472         int block_size = s->samples_per_frame >> i;
 473         int cutoff = (440*block_size + 3 * (wfx->rate >> 1) - 1)
 474                      / wfx->rate;
 475         s->subwoofer_cutoffs[i] = av_clip(cutoff, 4, block_size);
 476     }
 477
 478 #if 0
 479     /** calculate sine values for the decorrelation matrix */
 480     for (i = 0; i < 33; i++)
 481         sin64[i] = sin(i*M_PI / 64.0);
 482 #endif
 483
 484 #ifdef WMAPRO_DUMP_CTX_EN
 485     dump_context(s);
 486 #endif
 487     return 0;
 488 }
 489
 490 /**
 491  *@brief Decode the subframe length.
 492  *@param s context
 493  *@param offset sample offset in the frame
 494  *@return decoded subframe length on success, < 0 in case of an error
 495  */
 496 static int decode_subframe_length(WMAProDecodeCtx *s, int offset)
 497 {
 498     int frame_len_shift = 0;
 499     int subframe_len;
 500
 501     /** no need to read from the bitstream when only one length is possible */
 502     if (offset == s->samples_per_frame - s->min_samples_per_subframe)
 503         return s->min_samples_per_subframe;
 504
 505     /** 1 bit indicates if the subframe is of maximum length */
 506     if (s->max_subframe_len_bit) {
 507         if (get_bits1(&s->gb))
 508             frame_len_shift = 1 + get_bits(&s->gb, s->subframe_len_bits-1);
 509     } else
 510         frame_len_shift = get_bits(&s->gb, s->subframe_len_bits);
 511
 512     subframe_len = s->samples_per_frame >> frame_len_shift;
 513
 514     /** sanity check the length */
 515     if (subframe_len < s->min_samples_per_subframe ||
 516         subframe_len > s->samples_per_frame) {
 517         DEBUGF("broken frame: subframe_len %i\n",
 518                subframe_len);
 519         return AVERROR_INVALIDDATA;
 520     }
 521     return subframe_len;
 522 }
 523
 524 /**
 525  *@brief Decode how the data in the frame is split into subframes.
 526  *       Every WMA frame contains the encoded data for a fixed number of
 527  *       samples per channel. The data for every channel might be split
 528  *       into several subframes. This function will reconstruct the list of
 529  *       subframes for every channel.
 530  *
 531  *       If the subframes are not evenly split, the algorithm estimates the
 532  *       channels with the lowest number of total samples.
 533  *       Afterwards, for each of these channels a bit is read from the
 534  *       bitstream that indicates if the channel contains a subframe with the
 535  *       next subframe size that is going to be read from the bitstream or not.
 536  *       If a channel contains such a subframe, the subframe size gets added to
 537  *       the channel's subframe list.
 538  *       The algorithm repeats these steps until the frame is properly divided
 539  *       between the individual channels.
 540  *
 541  *@param s context
 542  *@return 0 on success, < 0 in case of an error
 543  */
 544 static int decode_tilehdr(WMAProDecodeCtx *s)
 545 {
 546     uint16_t num_samples[WMAPRO_MAX_CHANNELS];        /** sum of samples for all currently known subframes of a channel */
 547     uint8_t  contains_subframe[WMAPRO_MAX_CHANNELS];  /** flag indicating if a channel contains the current subframe */
 548     int channels_for_cur_subframe = s->num_channels;  /** number of channels that contain the current subframe */
 549     int fixed_channel_layout = 0;                     /** flag indicating that all channels use the same subframe offsets and sizes */
 550     int min_channel_len = 0;                          /** smallest sum of samples (channels with this length will be processed first) */
 551     int c;
 552
 553     /* Should never consume more than 3073 bits (256 iterations for the
 554      * while loop when always the minimum amount of 128 samples is substracted
 555      * from missing samples in the 8 channel case).
 556      * 1 + BLOCK_MAX_SIZE * MAX_CHANNELS / BLOCK_MIN_SIZE * (MAX_CHANNELS  + 4)
 557      */
 558
 559     /** reset tiling information */
 560     for (c = 0; c < s->num_channels; c++)
 561         s->channel[c].num_subframes = 0;
 562
 563     memset(num_samples, 0, sizeof(num_samples));
 564
 565     if (s->max_num_subframes == 1 || get_bits1(&s->gb))
 566         fixed_channel_layout = 1;
 567
 568     /** loop until the frame data is split between the subframes */
 569     do {
 570         int subframe_len;
 571
 572         /** check which channels contain the subframe */
 573         for (c = 0; c < s->num_channels; c++) {
 574             if (num_samples[c] == min_channel_len) {
 575                 if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
 576                    (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe))
 577                     contains_subframe[c] = 1;
 578                 else
 579                     contains_subframe[c] = get_bits1(&s->gb);
 580             } else
 581                 contains_subframe[c] = 0;
 582         }
 583
 584         /** get subframe length, subframe_len == 0 is not allowed */
 585         if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
 586             return AVERROR_INVALIDDATA;
 587
 588         /** add subframes to the individual channels and find new min_channel_len */
 589         min_channel_len += subframe_len;
 590         for (c = 0; c < s->num_channels; c++) {
 591             WMAProChannelCtx* chan = &s->channel[c];
 592
 593             if (contains_subframe[c]) {
 594                 if (chan->num_subframes >= MAX_SUBFRAMES) {
 595                     DEBUGF("broken frame: num subframes > 31\n");
 596                     return AVERROR_INVALIDDATA;
 597                 }
 598                 chan->subframe_len[chan->num_subframes] = subframe_len;
 599                 num_samples[c] += subframe_len;
 600                 ++chan->num_subframes;
 601                 if (num_samples[c] > s->samples_per_frame) {
 602                     DEBUGF("broken frame: "
 603                            "channel len > samples_per_frame\n");
 604                     return AVERROR_INVALIDDATA;
 605                 }
 606             } else if (num_samples[c] <= min_channel_len) {
 607                 if (num_samples[c] < min_channel_len) {
 608                     channels_for_cur_subframe = 0;
 609                     min_channel_len = num_samples[c];
 610                 }
 611                 ++channels_for_cur_subframe;
 612             }
 613         }
 614     } while (min_channel_len < s->samples_per_frame);
 615
 616     for (c = 0; c < s->num_channels; c++) {
 617         int i;
 618         int offset = 0;
 619         for (i = 0; i < s->channel[c].num_subframes; i++) {
 620             DEBUGF("frame[%i] channel[%i] subframe[%i]"
 621                     " len %i\n", s->frame_num, c, i,
 622                     s->channel[c].subframe_len[i]);
 623             s->channel[c].subframe_offset[i] = offset;
 624             offset += s->channel[c].subframe_len[i];
 625         }
 626     }
 627
 628     return 0;
 629 }
 630
 631 #if 0
 632 /**
 633  *@brief Calculate a decorrelation matrix from the bitstream parameters.
 634  *@param s codec context
 635  *@param chgroup channel group for which the matrix needs to be calculated
 636  */
 637 static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
 638                                         WMAProChannelGrp *chgroup)
 639 {
 640     int i;
 641     int offset = 0;
 642     int8_t rotation_offset[WMAPRO_MAX_CHANNELS * WMAPRO_MAX_CHANNELS];
 643     memset(chgroup->decorrelation_matrix, 0, s->num_channels *
 644            s->num_channels * sizeof(*chgroup->decorrelation_matrix));
 645
 646     for (i = 0; i < chgroup->num_channels * (chgroup->num_channels - 1) >> 1; i++)
 647         rotation_offset[i] = get_bits(&s->gb, 6);
 648
 649     for (i = 0; i < chgroup->num_channels; i++) {
 650         chgroup->decorrelation_matrix[chgroup->num_channels * i + i] =
 651             get_bits1(&s->gb) ? 1.0 : -1.0;
 652
 653         if(chgroup->decorrelation_matrix[chgroup->num_channels * i + i] > 0)
 654             chgroup->fixdecorrelation_matrix[chgroup->num_channels * i + i] =  ONE_FRACT16;
 655         else
 656             chgroup->fixdecorrelation_matrix[chgroup->num_channels * i + i] = -ONE_FRACT16;
 657     }
 658
 659     for (i = 1; i < chgroup->num_channels; i++) {
 660         int x;
 661         for (x = 0; x < i; x++) {
 662             int y;
 663             for (y = 0; y < i + 1; y++) {
 664                 float v1 = chgroup->decorrelation_matrix[x * chgroup->num_channels + y];
 665                 float v2 = chgroup->decorrelation_matrix[i * chgroup->num_channels + y];
 666                 int32_t f1 = chgroup->fixdecorrelation_matrix[x * chgroup->num_channels + y];
 667                 int32_t f2 = chgroup->fixdecorrelation_matrix[i * chgroup->num_channels + y];
 668                 int n = rotation_offset[offset + x];
 669                 float sinv;
 670                 float cosv;
 671                 int32_t fixsinv;
 672                 int32_t fixcosv;
 673
 674                 if (n < 32) {
 675                     sinv = sin64[n];
 676                     cosv = sin64[32 - n];
 677                     fixsinv = fixed_sin64[n];
 678                     fixcosv = fixed_sin64[32-n];
 679                 } else {
 680                     sinv =  sin64[64 -  n];
 681                     cosv = -sin64[n  - 32];
 682                     fixsinv = fixed_sin64[64-n];
 683                     fixcosv = -fixed_sin64[n-32];
 684                 }
 685
 686                 chgroup->decorrelation_matrix[y + x * chgroup->num_channels] =
 687                                                (v1 * sinv) - (v2 * cosv);
 688                 chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =
 689                                                (v1 * cosv) + (v2 * sinv);
 690                 chgroup->fixdecorrelation_matrix[y + x * chgroup->num_channels] =
 691                                                fixmul31(f1, fixsinv) - fixmul31(f2, fixcosv);
 692                 chgroup->fixdecorrelation_matrix[y + i * chgroup->num_channels] =
 693                                                fixmul31(f1, fixcosv) + fixmul31(f2, fixsinv);
 694
 695             }
 696         }
 697         offset += i;
 698     }
 699 }
 700 #endif
 701
 702 /**
 703  *@brief Decode channel transformation parameters
 704  *@param s codec context
 705  *@return 0 in case of success, < 0 in case of bitstream errors
 706  */
 707 static int decode_channel_transform(WMAProDecodeCtx* s)
 708 {
 709     int i;
 710     /* should never consume more than 1921 bits for the 8 channel case
 711      * 1 + MAX_CHANNELS * (MAX_CHANNELS + 2 + 3 * MAX_CHANNELS * MAX_CHANNELS
 712      * + MAX_CHANNELS + MAX_BANDS + 1)
 713      */
 714
 715     /** in the one channel case channel transforms are pointless */
 716     s->num_chgroups = 0;
 717     if (s->num_channels > 1) {
 718         int remaining_channels = s->channels_for_cur_subframe;
 719
 720         if (get_bits1(&s->gb)) {
 721                                  DEBUGF("unsupported channel transform bit\n");
 722             return AVERROR_INVALIDDATA;
 723         }
 724
 725         for (s->num_chgroups = 0; remaining_channels &&
 726              s->num_chgroups < s->channels_for_cur_subframe; s->num_chgroups++) {
 727             WMAProChannelGrp* chgroup = &s->chgroup[s->num_chgroups];
 728             int32_t** channel_data = chgroup->channel_data;
 729             chgroup->num_channels = 0;
 730             chgroup->transform = 0;
 731
 732             /** decode channel mask */
 733             if (remaining_channels > 2) {
 734                 for (i = 0; i < s->channels_for_cur_subframe; i++) {
 735                     int channel_idx = s->channel_indexes_for_cur_subframe[i];
 736                     if (!s->channel[channel_idx].grouped
 737                         && get_bits1(&s->gb)) {
 738                         ++chgroup->num_channels;
 739                         s->channel[channel_idx].grouped = 1;
 740                         *channel_data++    = s->channel[channel_idx].coeffs;
 741                     }
 742                 }
 743             } else {
 744                 chgroup->num_channels = remaining_channels;
 745                 for (i = 0; i < s->channels_for_cur_subframe; i++) {
 746                     int channel_idx = s->channel_indexes_for_cur_subframe[i];
 747                     if (!s->channel[channel_idx].grouped)
 748                         *channel_data++    = s->channel[channel_idx].coeffs;
 749                     s->channel[channel_idx].grouped = 1;
 750                 }
 751             }
 752
 753             /** decode transform type */
 754             if (chgroup->num_channels == 2) {
 755                 if (get_bits1(&s->gb)) {
 756                     if (get_bits1(&s->gb)) {
 757                         DEBUGF("unsupported channel transform type\n");
 758                     }
 759                 } else {
 760                     chgroup->transform = 1;
 761                     if (s->num_channels == 2) {
 762                         chgroup->fixdecorrelation_matrix[0] =  ONE_FRACT16;
 763                         chgroup->fixdecorrelation_matrix[1] = -ONE_FRACT16;
 764                         chgroup->fixdecorrelation_matrix[2] =  ONE_FRACT16;
 765                         chgroup->fixdecorrelation_matrix[3] =  ONE_FRACT16;
 766                     } else {
 767                         /** cos(pi/4) */
 768                         chgroup->fixdecorrelation_matrix[0] =  COS_PI4_FRACT16;
 769                         chgroup->fixdecorrelation_matrix[1] = -COS_PI4_FRACT16;
 770                         chgroup->fixdecorrelation_matrix[2] =  COS_PI4_FRACT16;
 771                         chgroup->fixdecorrelation_matrix[3] =  COS_PI4_FRACT16;
 772                     }
 773                 }
 774             } else if (chgroup->num_channels > 2) {
 775                 DEBUGF("in wmaprodec.c: Multichannel streams still not supported\n");
 776                 return -1;
 777 #if 0
 778                 if (get_bits1(&s->gb)) {
 779                     chgroup->transform = 1;
 780                     if (get_bits1(&s->gb)) {
 781                         decode_decorrelation_matrix(s, chgroup);
 782                     } else {
 783                         /** FIXME: more than 6 coupled channels not supported */
 784                         if (chgroup->num_channels > 6) {
 785                             av_log_ask_for_sample(s->avctx,
 786                                                   "coupled channels > 6\n");
 787                         } else {
 788                             memcpy(chgroup->decorrelation_matrix,
 789                                    default_decorrelation[chgroup->num_channels],
 790                                    chgroup->num_channels * chgroup->num_channels *
 791                                    sizeof(*chgroup->decorrelation_matrix));
 792                         }
 793                     }
 794                 }
 795 #endif
 796             }
 797
 798             /** decode transform on / off */
 799             if (chgroup->transform) {
 800                 if (!get_bits1(&s->gb)) {
 801                     int i;
 802                     /** transform can be enabled for individual bands */
 803                     for (i = 0; i < s->num_bands; i++) {
 804                         chgroup->transform_band[i] = get_bits1(&s->gb);
 805                     }
 806                 } else {
 807                     memset(chgroup->transform_band, 1, s->num_bands);
 808                 }
 809             }
 810             remaining_channels -= chgroup->num_channels;
 811         }
 812     }
 813     return 0;
 814 }
 815
 816 /**
 817  *@brief Extract the coefficients from the bitstream.
 818  *@param s codec context
 819  *@param c current channel number
 820  *@return 0 on success, < 0 in case of bitstream errors
 821  */
 822 static int decode_coeffs(WMAProDecodeCtx *s, int c)
 823 {
 824     int vlctable;
 825     VLC* vlc;
 826     WMAProChannelCtx* ci = &s->channel[c];
 827     int rl_mode = 0;
 828     int cur_coeff = 0;
 829     int num_zeros = 0;
 830     const uint16_t* run;
 831     const int32_t* level;
 832
 833     DEBUGF("decode coefficients for channel %i\n", c);
 834
 835     vlctable = get_bits1(&s->gb);
 836     vlc = &coef_vlc[vlctable];
 837
 838     if (vlctable) {
 839         run = coef1_run;
 840         level = coef1_level;
 841     } else {
 842         run = coef0_run;
 843         level = coef0_level;
 844     }
 845
 846     /** decode vector coefficients (consumes up to 167 bits per iteration for
 847       4 vector coded large values) */
 848     while (!rl_mode && cur_coeff + 3 < s->subframe_len) {
 849         int32_t vals[4];
 850         int i;
 851         unsigned int idx;
 852
 853         idx = get_vlc2(&s->gb, vec4_vlc.table, VLCBITS, VEC4MAXDEPTH);
 854
 855         if (idx == HUFF_VEC4_SIZE - 1) {
 856             for (i = 0; i < 4; i += 2) {
 857                 idx = get_vlc2(&s->gb, vec2_vlc.table, VLCBITS, VEC2MAXDEPTH);
 858                 if (idx == HUFF_VEC2_SIZE - 1) {
 859                     int v0, v1;
 860                     v0 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
 861                     if (v0 == HUFF_VEC1_SIZE - 1)
 862                         v0 += ff_wma_get_large_val(&s->gb);
 863                     v1 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
 864                     if (v1 == HUFF_VEC1_SIZE - 1)
 865                         v1 += ff_wma_get_large_val(&s->gb);
 866
 867                     vals[i  ] = v0;
 868                     vals[i+1] = v1;
 869                 } else {
 870                     vals[i  ] = symbol_to_vec2[idx] >> 4;
 871                     vals[i+1] = symbol_to_vec2[idx] & 0xF;
 872                 }
 873             }
 874         } else {
 875             vals[0] = (symbol_to_vec4[idx] >> 12);
 876             vals[1] = (symbol_to_vec4[idx] >>  8) & 0xF;
 877             vals[2] = (symbol_to_vec4[idx] >>  4) & 0xF;
 878             vals[3] = (symbol_to_vec4[idx]      ) & 0xF;
 879         }
 880
 881         /* Rockbox: To be able to use rockbox' optimized mdct we need to
 882          * pre-shift the values by >>(nbits-3). */
 883         const int nbits = av_log2(s->subframe_len)+1;
 884         const int shift = WMAPRO_FRACT-(nbits-3);
 885
 886         /** decode sign */
 887         for (i = 0; i < 4; i++) {
 888             if (vals[i]) {
 889                 int sign = get_bits1(&s->gb) - 1;
 890                 /* Rockbox: To be able to use rockbox' optimized mdct we need
 891                  * invert the sign. */
 892                 ci->coeffs[cur_coeff] = (sign == -1)? vals[i]<<shift : -vals[i]<<shift;
 893                 num_zeros = 0;
 894             } else {
 895                 ci->coeffs[cur_coeff] = 0;
 896                 /** switch to run level mode when subframe_len / 128 zeros
 897                     were found in a row */
 898                 rl_mode |= (++num_zeros > s->subframe_len >> 8);
 899             }
 900             ++cur_coeff;
 901         }
 902     }
 903
 904     /** decode run level coded coefficients */
 905     if (rl_mode) {
 906         memset(&ci->coeffs[cur_coeff], 0,
 907                sizeof(*ci->coeffs) * (s->subframe_len - cur_coeff));
 908
 909         if (ff_wma_run_level_decode(&s->gb, vlc,
 910                                     level, run, 1, ci->coeffs,
 911                                     cur_coeff, s->subframe_len,
 912                                     s->subframe_len, s->esc_len, 0))
 913            return AVERROR_INVALIDDATA;
 914
 915     }
 916     return 0;
 917 }
 918
 919 /**
 920  *@brief Extract scale factors from the bitstream.
 921  *@param s codec context
 922  *@return 0 on success, < 0 in case of bitstream errors
 923  */
 924 static int decode_scale_factors(WMAProDecodeCtx* s)
 925 {
 926     int i;
 927
 928     /** should never consume more than 5344 bits
 929      *  MAX_CHANNELS * (1 +  MAX_BANDS * 23)
 930      */
 931
 932     for (i = 0; i < s->channels_for_cur_subframe; i++) {
 933         int c = s->channel_indexes_for_cur_subframe[i];
 934         int* sf;
 935         int* sf_end;
 936         s->channel[c].scale_factors = s->channel[c].saved_scale_factors[!s->channel[c].scale_factor_idx];
 937         sf_end = s->channel[c].scale_factors + s->num_bands;
 938
 939         /** resample scale factors for the new block size
 940          *  as the scale factors might need to be resampled several times
 941          *  before some  new values are transmitted, a backup of the last
 942          *  transmitted scale factors is kept in saved_scale_factors
 943          */
 944         if (s->channel[c].reuse_sf) {
 945             const int8_t* sf_offsets = s->sf_offsets[s->table_idx][s->channel[c].table_idx];
 946             int b;
 947             for (b = 0; b < s->num_bands; b++)
 948                 s->channel[c].scale_factors[b] =
 949                     s->channel[c].saved_scale_factors[s->channel[c].scale_factor_idx][*sf_offsets++];
 950         }
 951
 952         if (!s->channel[c].cur_subframe || get_bits1(&s->gb)) {
 953
 954             if (!s->channel[c].reuse_sf) {
 955                 int val;
 956                 /** decode DPCM coded scale factors */
 957                 s->channel[c].scale_factor_step = get_bits(&s->gb, 2) + 1;
 958                 val = 45 / s->channel[c].scale_factor_step;
 959                 for (sf = s->channel[c].scale_factors; sf < sf_end; sf++) {
 960                     val += get_vlc2(&s->gb, sf_vlc.table, SCALEVLCBITS, SCALEMAXDEPTH) - 60;
 961                     *sf = val;
 962                 }
 963             } else {
 964                 int i;
 965                 /** run level decode differences to the resampled factors */
 966                 for (i = 0; i < s->num_bands; i++) {
 967                     int idx;
 968                     int skip;
 969                     int val;
 970                     int sign;
 971
 972                     idx = get_vlc2(&s->gb, sf_rl_vlc.table, VLCBITS, SCALERLMAXDEPTH);
 973
 974                     if (!idx) {
 975                         uint32_t code = get_bits(&s->gb, 14);
 976                         val  =  code >> 6;
 977                         sign = (code & 1) - 1;
 978                         skip = (code & 0x3f) >> 1;
 979                     } else if (idx == 1) {
 980                         break;
 981                     } else {
 982                         skip = scale_rl_run[idx];
 983                         val  = scale_rl_level[idx];
 984                         sign = get_bits1(&s->gb)-1;
 985                     }
 986
 987                     i += skip;
 988                     if (i >= s->num_bands) {
 989                            DEBUGF("invalid scale factor coding\n");
 990                         return AVERROR_INVALIDDATA;
 991                     }
 992                     s->channel[c].scale_factors[i] += (val ^ sign) - sign;
 993                 }
 994             }
 995
 996             /** swap buffers */
 997             s->channel[c].scale_factor_idx = !s->channel[c].scale_factor_idx;
 998             s->channel[c].table_idx = s->table_idx;
 999             s->channel[c].reuse_sf  = 1;
1000         }
1001
1002         /** calculate new scale factor maximum */
1003         s->channel[c].max_scale_factor = s->channel[c].scale_factors[0];
1004         for (sf = s->channel[c].scale_factors + 1; sf < sf_end; sf++) {
1005             s->channel[c].max_scale_factor =
1006                 FFMAX(s->channel[c].max_scale_factor, *sf);
1007         }
1008
1009     }
1010     return 0;
1011 }
1012
1013 /**
1014  *@brief Reconstruct the individual channel data.
1015  *@param s codec context
1016  */
1017 static void inverse_channel_transform(WMAProDecodeCtx *s)
1018 {
1019     int i;
1020
1021     for (i = 0; i < s->num_chgroups; i++) {
1022         if (s->chgroup[i].transform) {
1023             const int num_channels = s->chgroup[i].num_channels;
1024             int32_t data[WMAPRO_MAX_CHANNELS];
1025             int32_t** ch_data = s->chgroup[i].channel_data;
1026             int32_t** ch_end = ch_data + num_channels;
1027             const int8_t* tb = s->chgroup[i].transform_band;
1028             int16_t* sfb;
1029
1030             /** multichannel decorrelation */
1031             for (sfb = s->cur_sfb_offsets;
1032                  sfb < s->cur_sfb_offsets + s->num_bands; sfb++) {
1033                 int y;
1034                 if (*tb++ == 1) {
1035                     /** multiply values with the decorrelation_matrix */
1036                     for (y = sfb[0]; y < FFMIN(sfb[1], s->subframe_len); y++) {
1037                         const int32_t* mat = s->chgroup[i].fixdecorrelation_matrix;
1038                         const int32_t* data_end = data + num_channels;
1039                         int32_t* data_ptr = data;
1040                         int32_t** ch;
1041
1042                         for (ch = ch_data; ch < ch_end; ch++)
1043                             *data_ptr++ = (*ch)[y];
1044
1045                         for (ch = ch_data; ch < ch_end; ch++) {
1046                             int32_t sum = 0;
1047                             data_ptr = data;
1048
1049                             while (data_ptr < data_end)
1050                                 sum += fixmul16(*mat++, *data_ptr++);
1051
1052                             (*ch)[y] = sum;
1053                         }
1054                     }
1055                 } else if (s->num_channels == 2) {
1056
1057                     /* Scale with sqrt(2) */
1058                     int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
1059                     vector_fixmul_scalar(ch_data[0] + sfb[0],
1060                                          ch_data[0] + sfb[0],
1061                                          SQRT2_FRACT16, len);
1062                     vector_fixmul_scalar(ch_data[1] + sfb[0],
1063                                          ch_data[1] + sfb[0],
1064                                          SQRT2_FRACT16, len);
1065
1066                 }
1067             }
1068         }
1069     }
1070 }
1071
1072 /**
1073  *@brief Apply sine window and reconstruct the output buffer.
1074  *@param s codec context
1075  */
1076 static void wmapro_window(WMAProDecodeCtx *s)
1077 {
1078     int i;
1079
1080     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1081         int c = s->channel_indexes_for_cur_subframe[i];
1082         const int32_t* window;
1083         int winlen = s->channel[c].prev_block_len;
1084         int32_t *xstart= s->channel[c].coeffs - (winlen >> 1);
1085
1086         if (s->subframe_len < winlen) {
1087             xstart += (winlen - s->subframe_len) >> 1;
1088             winlen = s->subframe_len;
1089         }
1090
1091         window = sine_windows[av_log2(winlen) - BLOCK_MIN_BITS];
1092
1093         winlen >>= 1;
1094
1095         vector_fixmul_window(xstart, xstart, xstart + winlen,
1096                                   window, winlen);
1097
1098         s->channel[c].prev_block_len = s->subframe_len;
1099
1100     }
1101 }
1102
1103 /**
1104  *@brief Decode a single subframe (block).
1105  *@param s codec context
1106  *@return 0 on success, < 0 when decoding failed
1107  */
1108 static int decode_subframe(WMAProDecodeCtx *s)
1109 {
1110     int offset = s->samples_per_frame;
1111     int subframe_len = s->samples_per_frame;
1112     int i;
1113     int total_samples   = s->samples_per_frame * s->num_channels;
1114     int transmit_coeffs = 0;
1115     int cur_subwoofer_cutoff;
1116
1117     s->subframe_offset = get_bits_count(&s->gb);
1118
1119     /** reset channel context and find the next block offset and size
1120         == the next block of the channel with the smallest number of
1121         decoded samples
1122     */
1123     for (i = 0; i < s->num_channels; i++) {
1124         s->channel[i].grouped = 0;
1125         if (offset > s->channel[i].decoded_samples) {
1126             offset = s->channel[i].decoded_samples;
1127             subframe_len =
1128                 s->channel[i].subframe_len[s->channel[i].cur_subframe];
1129         }
1130     }
1131
1132     DEBUGF("processing subframe with offset %i len %i\n", offset, subframe_len);
1133
1134     /** get a list of all channels that contain the estimated block */
1135     s->channels_for_cur_subframe = 0;
1136     for (i = 0; i < s->num_channels; i++) {
1137         const int cur_subframe = s->channel[i].cur_subframe;
1138         /** substract already processed samples */
1139         total_samples -= s->channel[i].decoded_samples;
1140
1141         /** and count if there are multiple subframes that match our profile */
1142         if (offset == s->channel[i].decoded_samples &&
1143             subframe_len == s->channel[i].subframe_len[cur_subframe]) {
1144             total_samples -= s->channel[i].subframe_len[cur_subframe];
1145             s->channel[i].decoded_samples +=
1146                 s->channel[i].subframe_len[cur_subframe];
1147             s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
1148             ++s->channels_for_cur_subframe;
1149         }
1150     }
1151
1152     /** check if the frame will be complete after processing the
1153         estimated block */
1154     if (!total_samples)
1155         s->parsed_all_subframes = 1;
1156
1157
1158     DEBUGF("subframe is part of %i channels\n", s->channels_for_cur_subframe);
1159
1160     /** calculate number of scale factor bands and their offsets */
1161     s->table_idx         = av_log2(s->samples_per_frame/subframe_len);
1162     s->num_bands         = s->num_sfb[s->table_idx];
1163     s->cur_sfb_offsets   = s->sfb_offsets[s->table_idx];
1164     cur_subwoofer_cutoff = s->subwoofer_cutoffs[s->table_idx];
1165
1166     /** configure the decoder for the current subframe */
1167     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1168         int c = s->channel_indexes_for_cur_subframe[i];
1169
1170         s->channel[c].coeffs = &s->channel[c].out[(s->samples_per_frame >> 1)
1171                                                   + offset];
1172     }
1173
1174     s->subframe_len = subframe_len;
1175     s->esc_len = av_log2(s->subframe_len - 1) + 1;
1176
1177     /** skip extended header if any */
1178     if (get_bits1(&s->gb)) {
1179         int num_fill_bits;
1180         if (!(num_fill_bits = get_bits(&s->gb, 2))) {
1181             int len = get_bits(&s->gb, 4);
1182             num_fill_bits = get_bits(&s->gb, len) + 1;
1183         }
1184
1185         if (num_fill_bits >= 0) {
1186             if (get_bits_count(&s->gb) + num_fill_bits > s->num_saved_bits) {
1187                 DEBUGF("invalid number of fill bits\n");
1188                 return AVERROR_INVALIDDATA;
1189             }
1190
1191             skip_bits_long(&s->gb, num_fill_bits);
1192         }
1193     }
1194
1195     /** no idea for what the following bit is used */
1196     if (get_bits1(&s->gb)) {
1197         DEBUGF("reserved bit set\n");
1198         return AVERROR_INVALIDDATA;
1199     }
1200
1201     if (decode_channel_transform(s) < 0)
1202         return AVERROR_INVALIDDATA;
1203
1204     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1205         int c = s->channel_indexes_for_cur_subframe[i];
1206         if ((s->channel[c].transmit_coefs = get_bits1(&s->gb)))
1207             transmit_coeffs = 1;
1208     }
1209
1210     if (transmit_coeffs) {
1211         int step;
1212         int quant_step = 90 * s->bits_per_sample >> 4;
1213         if ((get_bits1(&s->gb))) {
1214             /** FIXME: might change run level mode decision */
1215             DEBUGF("unsupported quant step coding\n");
1216             return AVERROR_INVALIDDATA;
1217         }
1218         /** decode quantization step */
1219         step = get_sbits(&s->gb, 6);
1220         quant_step += step;
1221         if (step == -32 || step == 31) {
1222             const int sign = (step == 31) - 1;
1223             int quant = 0;
1224             while (get_bits_count(&s->gb) + 5 < s->num_saved_bits &&
1225                    (step = get_bits(&s->gb, 5)) == 31) {
1226                 quant += 31;
1227             }
1228             quant_step += ((quant + step) ^ sign) - sign;
1229         }
1230         if (quant_step < 0) {
1231             DEBUGF("negative quant step\n");
1232         }
1233
1234         /** decode quantization step modifiers for every channel */
1235
1236         if (s->channels_for_cur_subframe == 1) {
1237             s->channel[s->channel_indexes_for_cur_subframe[0]].quant_step = quant_step;
1238         } else {
1239             int modifier_len = get_bits(&s->gb, 3);
1240             for (i = 0; i < s->channels_for_cur_subframe; i++) {
1241                 int c = s->channel_indexes_for_cur_subframe[i];
1242                 s->channel[c].quant_step = quant_step;
1243                 if (get_bits1(&s->gb)) {
1244                     if (modifier_len) {
1245                         s->channel[c].quant_step += get_bits(&s->gb, modifier_len) + 1;
1246                     } else
1247                         ++s->channel[c].quant_step;
1248                 }
1249             }
1250         }
1251
1252         /** decode scale factors */
1253         if (decode_scale_factors(s) < 0)
1254             return AVERROR_INVALIDDATA;
1255     }
1256
1257     DEBUGF("BITSTREAM: subframe header length was %i\n",
1258             get_bits_count(&s->gb) - s->subframe_offset);
1259
1260     /** parse coefficients */
1261     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1262         int c = s->channel_indexes_for_cur_subframe[i];
1263         if (s->channel[c].transmit_coefs &&
1264             get_bits_count(&s->gb) < s->num_saved_bits) {
1265             decode_coeffs(s, c);
1266         } else {
1267             memset(s->channel[c].coeffs, 0,
1268                    sizeof(*s->channel[c].coeffs) * subframe_len);
1269         }
1270     }
1271
1272     DEBUGF("BITSTREAM: subframe length was %i\n",
1273             get_bits_count(&s->gb) - s->subframe_offset);
1274
1275     if (transmit_coeffs) {
1276         int nbits = av_log2(subframe_len)+1;
1277         /** reconstruct the per channel data */
1278         inverse_channel_transform(s);
1279         for (i = 0; i < s->channels_for_cur_subframe; i++) {
1280             int c = s->channel_indexes_for_cur_subframe[i];
1281             const int* sf = s->channel[c].scale_factors;
1282             int b;
1283
1284             if (c == s->lfe_channel)
1285                 memset(&s->tmp[cur_subwoofer_cutoff], 0, sizeof(*s->tmp) *
1286                        (subframe_len - cur_subwoofer_cutoff));
1287
1288             /** inverse quantization and rescaling */
1289             for (b = 0; b < s->num_bands; b++) {
1290                 const int end = FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);
1291                 const int exp = s->channel[c].quant_step -
1292                             (s->channel[c].max_scale_factor - *sf++) *
1293                             s->channel[c].scale_factor_step;
1294
1295                 if(exp < EXP_MIN || exp > EXP_MAX) {
1296                     DEBUGF("in wmaprodec.c : unhandled value for exp (%d), please report sample.\n", exp);
1297                     return -1;
1298                 }
1299                 const int32_t quant = QUANT(exp);
1300                 int start = s->cur_sfb_offsets[b];
1301
1302                 vector_fixmul_scalar(s->tmp+start,
1303                                      s->channel[c].coeffs + start,
1304                                      quant, end-start);
1305
1306
1307             }
1308
1309             /** apply imdct (ff_imdct_half == DCTIV with reverse) */
1310             ff_imdct_half(nbits,s->channel[c].coeffs, s->tmp);
1311
1312         }
1313     }
1314
1315     /** window and overlapp-add */
1316     wmapro_window(s);
1317
1318     /** handled one subframe */
1319     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1320         int c = s->channel_indexes_for_cur_subframe[i];
1321         if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
1322             DEBUGF("broken subframe\n");
1323             return AVERROR_INVALIDDATA;
1324         }
1325         ++s->channel[c].cur_subframe;
1326     }
1327
1328     return 0;
1329 }
1330
1331 /**
1332  *@brief Decode one WMA frame.
1333  *@param s codec context
1334  *@return 0 if the trailer bit indicates that this is the last frame,
1335  *        1 if there are additional frames
1336  */
1337 static int decode_frame(WMAProDecodeCtx *s)
1338 {
1339     GetBitContext* gb = &s->gb;
1340     int more_frames = 0;
1341     int len = 0;
1342     int i;
1343
1344
1345 #if 0
1346     /** check for potential output buffer overflow */
1347     /* Rockbox : No need to check that anymore since we work directly on the
1348        buffers in the WMAProDecCtx */
1349     if (s->num_channels * s->samples_per_frame > s->samples_end - s->samples) {
1350         /** return an error if no frame could be decoded at all */
1351            DEBUGF("not enough space for the output samples\n");
1352         s->packet_loss = 1;
1353         return 0;
1354     }
1355 #endif
1356
1357     /** get frame length */
1358     if (s->len_prefix)
1359         len = get_bits(gb, s->log2_frame_size);
1360
1361     DEBUGF("decoding frame with length %x\n", len);
1362
1363     /** decode tile information */
1364     if (decode_tilehdr(s)) {
1365         s->packet_loss = 1;
1366         return 0;
1367     }
1368
1369     /** read postproc transform */
1370     if (s->num_channels > 1 && get_bits1(gb)) {
1371         DEBUGF("Unsupported postproc transform found\n");
1372         s->packet_loss = 1;
1373         return 0;
1374     }
1375
1376     /** read drc info */
1377     if (s->dynamic_range_compression) {
1378         s->drc_gain = get_bits(gb, 8);
1379         DEBUGF("drc_gain %i\n", s->drc_gain);
1380     }
1381
1382     /** no idea what these are for, might be the number of samples
1383         that need to be skipped at the beginning or end of a stream */
1384     if (get_bits1(gb)) {
1385         int skip;
1386
1387         /** usually true for the first frame */
1388         if (get_bits1(gb)) {
1389             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1390             DEBUGF("start skip: %i\n", skip);
1391         }
1392
1393         /** sometimes true for the last frame */
1394         if (get_bits1(gb)) {
1395             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1396             DEBUGF("end skip: %i\n", skip);
1397         }
1398
1399     }
1400
1401     DEBUGF("BITSTREAM: frame header length was %i\n",
1402             get_bits_count(gb) - s->frame_offset);
1403
1404     /** reset subframe states */
1405     s->parsed_all_subframes = 0;
1406     for (i = 0; i < s->num_channels; i++) {
1407         s->channel[i].decoded_samples = 0;
1408         s->channel[i].cur_subframe    = 0;
1409         s->channel[i].reuse_sf        = 0;
1410     }
1411
1412     /** decode all subframes */
1413     while (!s->parsed_all_subframes) {
1414         if (decode_subframe(s) < 0) {
1415             s->packet_loss = 1;
1416             return 0;
1417         }
1418     }
1419
1420     if (s->skip_frame) {
1421         s->skip_frame = 0;
1422     } else
1423         s->samples += s->num_channels * s->samples_per_frame;
1424
1425     if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
1426         /** FIXME: not sure if this is always an error */
1427         DEBUGF("frame[%i] would have to skip %i bits\n",
1428                (int)s->frame_num, len - (get_bits_count(gb) - s->frame_offset) - 1);
1429         s->packet_loss = 1;
1430         return 0;
1431     }
1432
1433     /** skip the rest of the frame data */
1434     skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
1435
1436     /** decode trailer bit */
1437     more_frames = get_bits1(gb);
1438
1439     ++s->frame_num;
1440     return more_frames;
1441 }
1442
1443 /**
1444  *@brief Calculate remaining input buffer length.
1445  *@param s codec context
1446  *@param gb bitstream reader context
1447  *@return remaining size in bits
1448  */
1449 static int remaining_bits(WMAProDecodeCtx *s, GetBitContext *gb)
1450 {
1451     return s->buf_bit_size - get_bits_count(gb);
1452 }
1453
1454 /**
1455  *@brief Fill the bit reservoir with a (partial) frame.
1456  *@param s codec context
1457  *@param gb bitstream reader context
1458  *@param len length of the partial frame
1459  *@param append decides wether to reset the buffer or not
1460  */
1461 static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
1462                       int append)
1463 {
1464     int buflen;
1465
1466     /** when the frame data does not need to be concatenated, the input buffer
1467         is resetted and additional bits from the previous frame are copyed
1468         and skipped later so that a fast byte copy is possible */
1469
1470     if (!append) {
1471         s->frame_offset = get_bits_count(gb) & 7;
1472         s->num_saved_bits = s->frame_offset;
1473         init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
1474     }
1475
1476     buflen = (s->num_saved_bits + len + 8) >> 3;
1477
1478     if (len <= 0 || buflen > MAX_FRAMESIZE) {
1479         DEBUGF("input buffer too small\n");
1480         s->packet_loss = 1;
1481         return;
1482     }
1483
1484     s->num_saved_bits += len;
1485     if (!append) {
1486         ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
1487                      s->num_saved_bits);
1488     } else {
1489         int align = 8 - (get_bits_count(gb) & 7);
1490         align = FFMIN(align, len);
1491         put_bits(&s->pb, align, get_bits(gb, align));
1492         len -= align;
1493         ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
1494     }
1495     skip_bits_long(gb, len);
1496
1497     {
1498         PutBitContext tmp = s->pb;
1499         flush_put_bits(&tmp);
1500     }
1501
1502     init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
1503     skip_bits(&s->gb, s->frame_offset);
1504 }
1505
1506 /**
1507  *@brief Decode a single WMA packet.
1508  *@param avctx codec context
1509  *@param data the output buffer
1510  *@param data_size number of bytes that were written to the output buffer
1511  *@param avpkt input packet
1512  *@return number of bytes that were read from the input buffer
1513  */
1514 int decode_packet(asf_waveformatex_t *wfx, int32_t *dec[2], int *data_size,
1515                   void* pktdata, int size)
1516 {
1517     WMAProDecodeCtx *s = &globWMAProDecCtx;
1518     GetBitContext* gb  = &s->pgb;
1519     const uint8_t* buf = pktdata;
1520     int buf_size       = size;
1521     int num_bits_prev_frame;
1522     int packet_sequence_number;\
1523     int i;
1524
1525     /** reuse second half of the IMDCT output for the next frame */
1526     /* NOTE : Relies on the WMAProDecCtx being static */
1527     for(i = 0; i < s->num_channels; i++)
1528         memcpy(&s->channel[i].out[0],
1529                &s->channel[i].out[s->samples_per_frame],
1530                s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
1531
1532
1533     s->samples = 0;
1534     *data_size = 0;
1535
1536     if (s->packet_done || s->packet_loss) {
1537         s->packet_done = 0;
1538         s->buf_bit_size = buf_size << 3;
1539
1540         /** sanity check for the buffer length */
1541         if (buf_size < wfx->blockalign)
1542             return 0;
1543
1544         buf_size = wfx->blockalign;
1545
1546         /** parse packet header */
1547         init_get_bits(gb, buf, s->buf_bit_size);
1548         packet_sequence_number = get_bits(gb, 4);
1549         skip_bits(gb, 2);
1550
1551         /** get number of bits that need to be added to the previous frame */
1552         num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
1553         DEBUGF("packet[%d]: nbpf %x\n", s->frame_num,
1554                 num_bits_prev_frame);
1555
1556         /** check for packet loss */
1557         if (!s->packet_loss &&
1558             ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
1559             s->packet_loss = 1;
1560             DEBUGF("Packet loss detected! seq %x vs %x\n",
1561                    s->packet_sequence_number, packet_sequence_number);
1562         }
1563         s->packet_sequence_number = packet_sequence_number;
1564
1565         if (num_bits_prev_frame > 0) {
1566             /** append the previous frame data to the remaining data from the
1567                 previous packet to create a full frame */
1568             save_bits(s, gb, num_bits_prev_frame, 1);
1569             DEBUGF("accumulated %x bits of frame data\n",
1570                     s->num_saved_bits - s->frame_offset);
1571
1572             /** decode the cross packet frame if it is valid */
1573             if (!s->packet_loss)
1574                 decode_frame(s);
1575         } else if (s->num_saved_bits - s->frame_offset) {
1576             DEBUGF("ignoring %x previously saved bits\n",
1577                     s->num_saved_bits - s->frame_offset);
1578         }
1579
1580         s->packet_loss = 0;
1581
1582     } else {
1583         int frame_size;
1584         s->buf_bit_size = size << 3;
1585         init_get_bits(gb, pktdata, s->buf_bit_size);
1586         skip_bits(gb, s->packet_offset);
1587         if (remaining_bits(s, gb) > s->log2_frame_size &&
1588             (frame_size = show_bits(gb, s->log2_frame_size)) &&
1589             frame_size <= remaining_bits(s, gb)) {
1590             save_bits(s, gb, frame_size, 0);
1591             s->packet_done = !decode_frame(s);
1592         } else
1593             s->packet_done = 1;
1594     }
1595
1596     if (s->packet_done && !s->packet_loss &&
1597         remaining_bits(s, gb) > 0) {
1598         /** save the rest of the data so that it can be decoded
1599             with the next packet */
1600         save_bits(s, gb, remaining_bits(s, gb), 0);
1601     }
1602
1603     dec[0] = s->channel[0].out;
1604     dec[1] = s->channel[1].out;
1605
1606     *data_size = s->samples;
1607     s->packet_offset = get_bits_count(gb) & 7;
1608
1609     s->frame_num++;
1610     return (s->packet_loss) ? AVERROR_INVALIDDATA : get_bits_count(gb) >> 3;
1611 }
1612
1613 #if 0
1614 /**
1615  *@brief wmapro decoder
1616  */
1617 AVCodec wmapro_decoder = {
1618     "wmapro",
1619     AVMEDIA_TYPE_AUDIO,
1620     CODEC_ID_WMAPRO,
1621     sizeof(WMAProDecodeCtx),
1622     decode_init,
1623     NULL,
1624     decode_end,
1625     decode_packet,
1626     .capabilities = CODEC_CAP_SUBFRAMES,
1627     .flush= flush,
1628     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"),
1629 };
1630 #endif