libavcodec/wmaprodec.c

   1 /*
   2  * Wmapro compatible decoder
   3  * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
   4  * Copyright (c) 2008 - 2009 Sascha Sommer, Benjamin Larsson
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file  libavcodec/wmaprodec.c
  25  * @brief wmapro decoder implementation
  26  * Wmapro is an MDCT based codec comparable to wma standard or AAC.
  27  * The decoding therefore consists of the following steps:
  28  * - bitstream decoding
  29  * - reconstruction of per-channel data
  30  * - rescaling and inverse quantization
  31  * - IMDCT
  32  * - windowing and overlapp-add
  33  *
  34  * The compressed wmapro bitstream is split into individual packets.
  35  * Every such packet contains one or more wma frames.
  36  * The compressed frames may have a variable length and frames may
  37  * cross packet boundaries.
  38  * Common to all wmapro frames is the number of samples that are stored in
  39  * a frame.
  40  * The number of samples and a few other decode flags are stored
  41  * as extradata that has to be passed to the decoder.
  42  *
  43  * The wmapro frames themselves are again split into a variable number of
  44  * subframes. Every subframe contains the data for 2^N time domain samples
  45  * where N varies between 7 and 12.
  46  *
  47  * Example wmapro bitstream (in samples):
  48  *
  49  * ||   packet 0           || packet 1 || packet 2      packets
  50  * ---------------------------------------------------
  51  * || frame 0      || frame 1       || frame 2    ||    frames
  52  * ---------------------------------------------------
  53  * ||   |      |   ||   |   |   |   ||            ||    subframes of channel 0
  54  * ---------------------------------------------------
  55  * ||      |   |   ||   |   |   |   ||            ||    subframes of channel 1
  56  * ---------------------------------------------------
  57  *
  58  * The frame layouts for the individual channels of a wma frame does not need
  59  * to be the same.
  60  *
  61  * However, if the offsets and lengths of several subframes of a frame are the
  62  * same, the subframes of the channels can be grouped.
  63  * Every group may then use special coding techniques like M/S stereo coding
  64  * to improve the compression ratio. These channel transformations do not
  65  * need to be applied to a whole subframe. Instead, they can also work on
  66  * individual scale factor bands (see below).
  67  * The coefficients that carry the audio signal in the frequency domain
  68  * are transmitted as huffman-coded vectors with 4, 2 and 1 elements.
  69  * In addition to that, the encoder can switch to a runlevel coding scheme
  70  * by transmitting subframe_length / 128 zero coefficients.
  71  *
  72  * Before the audio signal can be converted to the time domain, the
  73  * coefficients have to be rescaled and inverse quantized.
  74  * A subframe is therefore split into several scale factor bands that get
  75  * scaled individually.
  76  * Scale factors are submitted for every frame but they might be shared
  77  * between the subframes of a channel. Scale factors are initially DPCM-coded.
  78  * Once scale factors are shared, the differences are transmitted as runlevel
  79  * codes.
  80  * Every subframe length and offset combination in the frame layout shares a
  81  * common quantization factor that can be adjusted for every channel by a
  82  * modifier.
  83  * After the inverse quantization, the coefficients get processed by an IMDCT.
  84  * The resulting values are then windowed with a sine window and the first half
  85  * of the values are added to the second half of the output from the previous
  86  * subframe in order to reconstruct the output samples.
  87  */
  88
  89 #include "avcodec.h"
  90 #include "internal.h"
  91 #include "get_bits.h"
  92 #include "put_bits.h"
  93 #include "wmaprodata.h"
  94 #include "dsputil.h"
  95 #include "wma.h"
  96
  97 /** current decoder limitations */
  98 #define WMAPRO_MAX_CHANNELS    8                             ///< max number of handled channels
  99 #define MAX_SUBFRAMES  32                                    ///< max number of subframes per channel
 100 #define MAX_BANDS      29                                    ///< max number of scale factor bands
 101 #define MAX_FRAMESIZE  16384                                 ///< maximum compressed frame size
 102
 103 #define WMAPRO_BLOCK_MAX_BITS 12                                           ///< log2 of max block size
 104 #define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS)                 ///< maximum block size
 105 #define WMAPRO_BLOCK_SIZES    (WMAPRO_BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) ///< possible block sizes
 106
 107
 108 #define VLCBITS            9
 109 #define SCALEVLCBITS       8
 110 #define VEC4MAXDEPTH    ((HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS)
 111 #define VEC2MAXDEPTH    ((HUFF_VEC2_MAXBITS+VLCBITS-1)/VLCBITS)
 112 #define VEC1MAXDEPTH    ((HUFF_VEC1_MAXBITS+VLCBITS-1)/VLCBITS)
 113 #define SCALEMAXDEPTH   ((HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS)
 114 #define SCALERLMAXDEPTH ((HUFF_SCALE_RL_MAXBITS+VLCBITS-1)/VLCBITS)
 115
 116 static VLC              sf_vlc;           ///< scale factor DPCM vlc
 117 static VLC              sf_rl_vlc;        ///< scale factor run length vlc
 118 static VLC              vec4_vlc;         ///< 4 coefficients per symbol
 119 static VLC              vec2_vlc;         ///< 2 coefficients per symbol
 120 static VLC              vec1_vlc;         ///< 1 coefficient per symbol
 121 static VLC              coef_vlc[2];      ///< coefficient run length vlc codes
 122 static float            sin64[33];        ///< sinus table for decorrelation
 123
 124 /**
 125  * @brief frame specific decoder context for a single channel
 126  */
 127 typedef struct {
 128     int16_t  prev_block_len;                          ///< length of the previous block
 129     uint8_t  transmit_coefs;
 130     uint8_t  num_subframes;
 131     uint16_t subframe_len[MAX_SUBFRAMES];             ///< subframe length in samples
 132     uint16_t subframe_offset[MAX_SUBFRAMES];          ///< subframe positions in the current frame
 133     uint8_t  cur_subframe;                            ///< current subframe number
 134     uint16_t decoded_samples;                         ///< number of already processed samples
 135     uint8_t  grouped;                                 ///< channel is part of a group
 136     int      quant_step;                              ///< quantization step for the current subframe
 137     int8_t   reuse_sf;                                ///< share scale factors between subframes
 138     int8_t   scale_factor_step;                       ///< scaling step for the current subframe
 139     int      max_scale_factor;                        ///< maximum scale factor for the current subframe
 140     int      scale_factors[MAX_BANDS];                ///< scale factor values for the current subframe
 141     int      saved_scale_factors[MAX_BANDS];          ///< scale factors from a previous subframe
 142     uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
 143     float*   coeffs;                                  ///< pointer to the subframe decode buffer
 144     DECLARE_ALIGNED_16(float, out[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]); ///< output buffer
 145 } WMAProChannelCtx;
 146
 147 /**
 148  * @brief channel group for channel transformations
 149  */
 150 typedef struct {
 151     uint8_t num_channels;                                     ///< number of channels in the group
 152     int8_t  transform;                                        ///< transform on / off
 153     int8_t  transform_band[MAX_BANDS];                        ///< controls if the transform is enabled for a certain band
 154     float   decorrelation_matrix[WMAPRO_MAX_CHANNELS*WMAPRO_MAX_CHANNELS];
 155     float*  channel_data[WMAPRO_MAX_CHANNELS];                ///< transformation coefficients
 156 } WMAProChannelGrp;
 157
 158 /**
 159  * @brief main decoder context
 160  */
 161 typedef struct WMAProDecodeCtx {
 162     /* generic decoder variables */
 163     AVCodecContext*  avctx;                         ///< codec context for av_log
 164     DSPContext       dsp;                           ///< accelerated DSP functions
 165     uint8_t          frame_data[MAX_FRAMESIZE +
 166                       FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
 167     PutBitContext    pb;                            ///< context for filling the frame_data buffer
 168     MDCTContext      mdct_ctx[WMAPRO_BLOCK_SIZES];  ///< MDCT context per block size
 169     DECLARE_ALIGNED_16(float, tmp[WMAPRO_BLOCK_MAX_SIZE]); ///< IMDCT output buffer
 170     float*           windows[WMAPRO_BLOCK_SIZES];   ///< windows for the different block sizes
 171
 172     /* frame size dependent frame information (set during initialization) */
 173     uint32_t         decode_flags;                  ///< used compression features
 174     uint8_t          len_prefix;                    ///< frame is prefixed with its length
 175     uint8_t          dynamic_range_compression;     ///< frame contains DRC data
 176     uint8_t          bits_per_sample;               ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
 177     uint16_t         samples_per_frame;             ///< number of samples to output
 178     uint16_t         log2_frame_size;
 179     int8_t           num_channels;                  ///< number of channels in the stream (same as AVCodecContext.num_channels)
 180     int8_t           lfe_channel;                   ///< lfe channel index
 181     uint8_t          max_num_subframes;
 182     uint8_t          subframe_len_bits;             ///< number of bits used for the subframe length
 183     uint8_t          max_subframe_len_bit;          ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
 184     uint16_t         min_samples_per_subframe;
 185     int8_t           num_sfb[WMAPRO_BLOCK_SIZES];   ///< scale factor bands per block size
 186     int16_t          sfb_offsets[WMAPRO_BLOCK_SIZES][MAX_BANDS];                    ///< scale factor band offsets (multiples of 4)
 187     int8_t           sf_offsets[WMAPRO_BLOCK_SIZES][WMAPRO_BLOCK_SIZES][MAX_BANDS]; ///< scale factor resample matrix
 188     int16_t          subwoofer_cutoffs[WMAPRO_BLOCK_SIZES]; ///< subwoofer cutoff values
 189
 190     /* packet decode state */
 191     uint8_t          packet_sequence_number;        ///< current packet number
 192     int              num_saved_bits;                ///< saved number of bits
 193     int              frame_offset;                  ///< frame offset in the bit reservoir
 194     int              subframe_offset;               ///< subframe offset in the bit reservoir
 195     uint8_t          packet_loss;                   ///< set in case of bitstream error
 196
 197     /* frame decode state */
 198     uint32_t         frame_num;                     ///< current frame number (not used for decoding)
 199     GetBitContext    gb;                            ///< bitstream reader context
 200     int              buf_bit_size;                  ///< buffer size in bits
 201     float*           samples;                       ///< current samplebuffer pointer
 202     float*           samples_end;                   ///< maximum samplebuffer pointer
 203     uint8_t          drc_gain;                      ///< gain for the DRC tool
 204     int8_t           skip_frame;                    ///< skip output step
 205     int8_t           parsed_all_subframes;          ///< all subframes decoded?
 206
 207     /* subframe/block decode state */
 208     int16_t          subframe_len;                  ///< current subframe length
 209     int8_t           channels_for_cur_subframe;     ///< number of channels that contain the subframe
 210     int8_t           channel_indexes_for_cur_subframe[WMAPRO_MAX_CHANNELS];
 211     int8_t           num_bands;                     ///< number of scale factor bands
 212     int16_t*         cur_sfb_offsets;               ///< sfb offsets for the current block
 213     uint8_t          table_idx;                     ///< index for the num_sfb, sfb_offsets, sf_offsets and subwoofer_cutoffs tables
 214     int8_t           esc_len;                       ///< length of escaped coefficients
 215
 216     uint8_t          num_chgroups;                  ///< number of channel groups
 217     WMAProChannelGrp chgroup[WMAPRO_MAX_CHANNELS];  ///< channel group information
 218
 219     WMAProChannelCtx channel[WMAPRO_MAX_CHANNELS];  ///< per channel data
 220 } WMAProDecodeCtx;
 221
 222
 223 /**
 224  *@brief helper function to print the most important members of the context
 225  *@param s context
 226  */
 227 static void av_cold dump_context(WMAProDecodeCtx *s)
 228 {
 229 #define PRINT(a, b)     av_log(s->avctx, AV_LOG_DEBUG, " %s = %d\n", a, b);
 230 #define PRINT_HEX(a, b) av_log(s->avctx, AV_LOG_DEBUG, " %s = %x\n", a, b);
 231
 232     PRINT("ed sample bit depth", s->bits_per_sample);
 233     PRINT_HEX("ed decode flags", s->decode_flags);
 234     PRINT("samples per frame",   s->samples_per_frame);
 235     PRINT("log2 frame size",     s->log2_frame_size);
 236     PRINT("max num subframes",   s->max_num_subframes);
 237     PRINT("len prefix",          s->len_prefix);
 238     PRINT("num channels",        s->num_channels);
 239 }
 240
 241 /**
 242  *@brief Uninitialize the decoder and free all resources.
 243  *@param avctx codec context
 244  *@return 0 on success, < 0 otherwise
 245  */
 246 static av_cold int decode_end(AVCodecContext *avctx)
 247 {
 248     WMAProDecodeCtx *s = avctx->priv_data;
 249     int i;
 250
 251     for (i = 0; i < WMAPRO_BLOCK_SIZES; i++)
 252         ff_mdct_end(&s->mdct_ctx[i]);
 253
 254     return 0;
 255 }
 256
 257 /**
 258  *@brief Initialize the decoder.
 259  *@param avctx codec context
 260  *@return 0 on success, -1 otherwise
 261  */
 262 static av_cold int decode_init(AVCodecContext *avctx)
 263 {
 264     WMAProDecodeCtx *s = avctx->priv_data;
 265     uint8_t *edata_ptr = avctx->extradata;
 266     unsigned int channel_mask;
 267     int i;
 268     int log2_max_num_subframes;
 269     int num_possible_block_sizes;
 270
 271     s->avctx = avctx;
 272     dsputil_init(&s->dsp, avctx);
 273     init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
 274
 275     avctx->sample_fmt = SAMPLE_FMT_FLT;
 276
 277     if (avctx->extradata_size >= 18) {
 278         s->decode_flags    = AV_RL16(edata_ptr+14);
 279         channel_mask       = AV_RL32(edata_ptr+2);
 280         s->bits_per_sample = AV_RL16(edata_ptr);
 281         /** dump the extradata */
 282         for (i = 0; i < avctx->extradata_size; i++)
 283             dprintf(avctx, "[%x] ", avctx->extradata[i]);
 284         dprintf(avctx, "\n");
 285
 286     } else {
 287         av_log_ask_for_sample(avctx, "Unknown extradata size\n");
 288         return AVERROR_INVALIDDATA;
 289     }
 290
 291     /** generic init */
 292     s->log2_frame_size = av_log2(avctx->block_align) + 4;
 293
 294     /** frame info */
 295     s->skip_frame  = 1; /** skip first frame */
 296     s->packet_loss = 1;
 297     s->len_prefix  = (s->decode_flags & 0x40);
 298
 299     if (!s->len_prefix) {
 300         av_log_ask_for_sample(avctx, "no length prefix\n");
 301         return AVERROR_INVALIDDATA;
 302     }
 303
 304     /** get frame len */
 305     s->samples_per_frame = 1 << ff_wma_get_frame_len_bits(avctx->sample_rate,
 306                                                           3, s->decode_flags);
 307
 308     /** init previous block len */
 309     for (i = 0; i < avctx->channels; i++)
 310         s->channel[i].prev_block_len = s->samples_per_frame;
 311
 312     /** subframe info */
 313     log2_max_num_subframes       = ((s->decode_flags & 0x38) >> 3);
 314     s->max_num_subframes         = 1 << log2_max_num_subframes;
 315     if (s->max_num_subframes == 16)
 316         s->max_subframe_len_bit = 1;
 317     s->subframe_len_bits = av_log2(log2_max_num_subframes) + 1;
 318
 319     num_possible_block_sizes     = log2_max_num_subframes + 1;
 320     s->min_samples_per_subframe  = s->samples_per_frame / s->max_num_subframes;
 321     s->dynamic_range_compression = (s->decode_flags & 0x80);
 322
 323     if (s->max_num_subframes > MAX_SUBFRAMES) {
 324         av_log(avctx, AV_LOG_ERROR, "invalid number of subframes %i\n",
 325                s->max_num_subframes);
 326         return AVERROR_INVALIDDATA;
 327     }
 328
 329     s->num_channels = avctx->channels;
 330
 331     /** extract lfe channel position */
 332     s->lfe_channel = -1;
 333
 334     if (channel_mask & 8) {
 335         unsigned int mask;
 336         for (mask = 1; mask < 16; mask <<= 1) {
 337             if (channel_mask & mask)
 338                 ++s->lfe_channel;
 339         }
 340     }
 341
 342     if (s->num_channels < 0 || s->num_channels > WMAPRO_MAX_CHANNELS) {
 343         av_log_ask_for_sample(avctx, "invalid number of channels\n");
 344         return AVERROR_NOTSUPP;
 345     }
 346
 347     INIT_VLC_STATIC(&sf_vlc, SCALEVLCBITS, HUFF_SCALE_SIZE,
 348                     scale_huffbits, 1, 1,
 349                     scale_huffcodes, 2, 2, 616);
 350
 351     INIT_VLC_STATIC(&sf_rl_vlc, VLCBITS, HUFF_SCALE_RL_SIZE,
 352                     scale_rl_huffbits, 1, 1,
 353                     scale_rl_huffcodes, 4, 4, 1406);
 354
 355     INIT_VLC_STATIC(&coef_vlc[0], VLCBITS, HUFF_COEF0_SIZE,
 356                     coef0_huffbits, 1, 1,
 357                     coef0_huffcodes, 4, 4, 2108);
 358
 359     INIT_VLC_STATIC(&coef_vlc[1], VLCBITS, HUFF_COEF1_SIZE,
 360                     coef1_huffbits, 1, 1,
 361                     coef1_huffcodes, 4, 4, 3912);
 362
 363     INIT_VLC_STATIC(&vec4_vlc, VLCBITS, HUFF_VEC4_SIZE,
 364                     vec4_huffbits, 1, 1,
 365                     vec4_huffcodes, 2, 2, 604);
 366
 367     INIT_VLC_STATIC(&vec2_vlc, VLCBITS, HUFF_VEC2_SIZE,
 368                     vec2_huffbits, 1, 1,
 369                     vec2_huffcodes, 2, 2, 562);
 370
 371     INIT_VLC_STATIC(&vec1_vlc, VLCBITS, HUFF_VEC1_SIZE,
 372                     vec1_huffbits, 1, 1,
 373                     vec1_huffcodes, 2, 2, 562);
 374
 375     /** calculate number of scale factor bands and their offsets
 376         for every possible block size */
 377     for (i = 0; i < num_possible_block_sizes; i++) {
 378         int subframe_len = s->samples_per_frame >> i;
 379         int x;
 380         int band = 1;
 381
 382         s->sfb_offsets[i][0] = 0;
 383
 384         for (x = 0; x < MAX_BANDS-1 && s->sfb_offsets[i][band - 1] < subframe_len; x++) {
 385             int offset = (subframe_len * 2 * critical_freq[x])
 386                           / s->avctx->sample_rate + 2;
 387             offset &= ~3;
 388             if (offset > s->sfb_offsets[i][band - 1])
 389                 s->sfb_offsets[i][band++] = offset;
 390         }
 391         s->sfb_offsets[i][band - 1] = subframe_len;
 392         s->num_sfb[i]               = band - 1;
 393     }
 394
 395
 396     /** Scale factors can be shared between blocks of different size
 397         as every block has a different scale factor band layout.
 398         The matrix sf_offsets is needed to find the correct scale factor.
 399      */
 400
 401     for (i = 0; i < num_possible_block_sizes; i++) {
 402         int b;
 403         for (b = 0; b < s->num_sfb[i]; b++) {
 404             int x;
 405             int offset = ((s->sfb_offsets[i][b]
 406                            + s->sfb_offsets[i][b + 1] - 1) << i) >> 1;
 407             for (x = 0; x < num_possible_block_sizes; x++) {
 408                 int v = 0;
 409                 while (s->sfb_offsets[x][v + 1] << x < offset)
 410                     ++v;
 411                 s->sf_offsets[i][x][b] = v;
 412             }
 413         }
 414     }
 415
 416     /** init MDCT, FIXME: only init needed sizes */
 417     for (i = 0; i < WMAPRO_BLOCK_SIZES; i++)
 418         ff_mdct_init(&s->mdct_ctx[i], BLOCK_MIN_BITS+1+i, 1,
 419                      1.0 / (1 << (BLOCK_MIN_BITS + i - 1))
 420                      / (1 << (s->bits_per_sample - 1)));
 421
 422     /** init MDCT windows: simple sinus window */
 423     for (i = 0; i < WMAPRO_BLOCK_SIZES; i++) {
 424         const int n       = 1 << (WMAPRO_BLOCK_MAX_BITS - i);
 425         const int win_idx = WMAPRO_BLOCK_MAX_BITS - i - 7;
 426         ff_sine_window_init(ff_sine_windows[win_idx], n);
 427         s->windows[WMAPRO_BLOCK_SIZES - i - 1] = ff_sine_windows[win_idx];
 428     }
 429
 430     /** calculate subwoofer cutoff values */
 431     for (i = 0; i < num_possible_block_sizes; i++) {
 432         int block_size = s->samples_per_frame >> i;
 433         int cutoff = (440*block_size + 3 * (s->avctx->sample_rate >> 1) - 1)
 434                      / s->avctx->sample_rate;
 435         s->subwoofer_cutoffs[i] = av_clip(cutoff, 4, block_size);
 436     }
 437
 438     /** calculate sine values for the decorrelation matrix */
 439     for (i = 0; i < 33; i++)
 440         sin64[i] = sin(i*M_PI / 64.0);
 441
 442     if (avctx->debug & FF_DEBUG_BITSTREAM)
 443         dump_context(s);
 444
 445     avctx->channel_layout = channel_mask;
 446     return 0;
 447 }
 448
 449 /**
 450  *@brief Decode the subframe length.
 451  *@param s context
 452  *@param offset sample offset in the frame
 453  *@return decoded subframe length on success, < 0 in case of an error
 454  */
 455 static int decode_subframe_length(WMAProDecodeCtx *s, int offset)
 456 {
 457     int frame_len_shift = 0;
 458     int subframe_len;
 459
 460     /** no need to read from the bitstream when only one length is possible */
 461     if (offset == s->samples_per_frame - s->min_samples_per_subframe)
 462         return s->min_samples_per_subframe;
 463
 464     /** 1 bit indicates if the subframe is of maximum length */
 465     if (s->max_subframe_len_bit) {
 466         if (get_bits1(&s->gb))
 467             frame_len_shift = 1 + get_bits(&s->gb, s->subframe_len_bits-1);
 468     } else
 469         frame_len_shift = get_bits(&s->gb, s->subframe_len_bits);
 470
 471     subframe_len = s->samples_per_frame >> frame_len_shift;
 472
 473     /** sanity check the length */
 474     if (subframe_len < s->min_samples_per_subframe ||
 475         subframe_len > s->samples_per_frame) {
 476         av_log(s->avctx, AV_LOG_ERROR, "broken frame: subframe_len %i\n",
 477                subframe_len);
 478         return AVERROR_INVALIDDATA;
 479     }
 480     return subframe_len;
 481 }
 482
 483 /**
 484  *@brief Decode how the data in the frame is split into subframes.
 485  *       Every WMA frame contains the encoded data for a fixed number of
 486  *       samples per channel. The data for every channel might be split
 487  *       into several subframes. This function will reconstruct the list of
 488  *       subframes for every channel.
 489  *
 490  *       If the subframes are not evenly split, the algorithm estimates the
 491  *       channels with the lowest number of total samples.
 492  *       Afterwards, for each of these channels a bit is read from the
 493  *       bitstream that indicates if the channel contains a subframe with the
 494  *       next subframe size that is going to be read from the bitstream or not.
 495  *       If a channel contains such a subframe, the subframe size gets added to
 496  *       the channel's subframe list.
 497  *       The algorithm repeats these steps until the frame is properly divided
 498  *       between the individual channels.
 499  *
 500  *@param s context
 501  *@return 0 on success, < 0 in case of an error
 502  */
 503 static int decode_tilehdr(WMAProDecodeCtx *s)
 504 {
 505     uint16_t num_samples[WMAPRO_MAX_CHANNELS];        /** sum of samples for all currently known subframes of a channel */
 506     uint8_t  contains_subframe[WMAPRO_MAX_CHANNELS];  /** flag indicating if a channel contains the current subframe */
 507     int channels_for_cur_subframe = s->num_channels;  /** number of channels that contain the current subframe */
 508     int fixed_channel_layout = 0;                     /** flag indicating that all channels use the same subframe offsets and sizes */
 509     int min_channel_len = 0;                          /** smallest sum of samples (channels with this length will be processed first) */
 510     int c;
 511
 512     /* Should never consume more than 3073 bits (256 iterations for the
 513      * while loop when always the minimum amount of 128 samples is substracted
 514      * from missing samples in the 8 channel case).
 515      * 1 + BLOCK_MAX_SIZE * MAX_CHANNELS / BLOCK_MIN_SIZE * (MAX_CHANNELS  + 4)
 516      */
 517
 518     /** reset tiling information */
 519     for (c = 0; c < s->num_channels; c++)
 520         s->channel[c].num_subframes = 0;
 521
 522     memset(num_samples, 0, sizeof(num_samples));
 523
 524     if (s->max_num_subframes == 1 || get_bits1(&s->gb))
 525         fixed_channel_layout = 1;
 526
 527     /** loop until the frame data is split between the subframes */
 528     do {
 529         int subframe_len;
 530
 531         /** check which channels contain the subframe */
 532         for (c = 0; c < s->num_channels; c++) {
 533             if (num_samples[c] == min_channel_len) {
 534                 if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
 535                    (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe))
 536                     contains_subframe[c] = 1;
 537                 else
 538                     contains_subframe[c] = get_bits1(&s->gb);
 539             } else
 540                 contains_subframe[c] = 0;
 541         }
 542
 543         /** get subframe length, subframe_len == 0 is not allowed */
 544         if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
 545             return AVERROR_INVALIDDATA;
 546
 547         /** add subframes to the individual channels and find new min_channel_len */
 548         min_channel_len += subframe_len;
 549         for (c = 0; c < s->num_channels; c++) {
 550             WMAProChannelCtx* chan = &s->channel[c];
 551
 552             if (contains_subframe[c]) {
 553                 if (chan->num_subframes >= MAX_SUBFRAMES) {
 554                     av_log(s->avctx, AV_LOG_ERROR,
 555                            "broken frame: num subframes > 31\n");
 556                     return AVERROR_INVALIDDATA;
 557                 }
 558                 chan->subframe_len[chan->num_subframes] = subframe_len;
 559                 num_samples[c] += subframe_len;
 560                 ++chan->num_subframes;
 561                 if (num_samples[c] > s->samples_per_frame) {
 562                     av_log(s->avctx, AV_LOG_ERROR, "broken frame: "
 563                            "channel len > samples_per_frame\n");
 564                     return AVERROR_INVALIDDATA;
 565                 }
 566             } else if (num_samples[c] <= min_channel_len) {
 567                 if (num_samples[c] < min_channel_len) {
 568                     channels_for_cur_subframe = 0;
 569                     min_channel_len = num_samples[c];
 570                 }
 571                 ++channels_for_cur_subframe;
 572             }
 573         }
 574     } while (min_channel_len < s->samples_per_frame);
 575
 576     for (c = 0; c < s->num_channels; c++) {
 577         int i;
 578         int offset = 0;
 579         for (i = 0; i < s->channel[c].num_subframes; i++) {
 580             dprintf(s->avctx, "frame[%i] channel[%i] subframe[%i]"
 581                     " len %i\n", s->frame_num, c, i,
 582                     s->channel[c].subframe_len[i]);
 583             s->channel[c].subframe_offset[i] = offset;
 584             offset += s->channel[c].subframe_len[i];
 585         }
 586     }
 587
 588     return 0;
 589 }
 590
 591 /**
 592  *@brief Calculate a decorrelation matrix from the bitstream parameters.
 593  *@param s codec context
 594  *@param chgroup channel group for which the matrix needs to be calculated
 595  */
 596 static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
 597                                         WMAProChannelGrp *chgroup)
 598 {
 599     int i;
 600     int offset = 0;
 601     int8_t rotation_offset[WMAPRO_MAX_CHANNELS * WMAPRO_MAX_CHANNELS];
 602     memset(chgroup->decorrelation_matrix, 0, s->num_channels *
 603            s->num_channels * sizeof(*chgroup->decorrelation_matrix));
 604
 605     for (i = 0; i < chgroup->num_channels * (chgroup->num_channels - 1) >> 1; i++)
 606         rotation_offset[i] = get_bits(&s->gb, 6);
 607
 608     for (i = 0; i < chgroup->num_channels; i++)
 609         chgroup->decorrelation_matrix[chgroup->num_channels * i + i] =
 610             get_bits1(&s->gb) ? 1.0 : -1.0;
 611
 612     for (i = 1; i < chgroup->num_channels; i++) {
 613         int x;
 614         for (x = 0; x < i; x++) {
 615             int y;
 616             for (y = 0; y < i + 1; y++) {
 617                 float v1 = chgroup->decorrelation_matrix[x * chgroup->num_channels + y];
 618                 float v2 = chgroup->decorrelation_matrix[i * chgroup->num_channels + y];
 619                 int n = rotation_offset[offset + x];
 620                 float sinv;
 621                 float cosv;
 622
 623                 if (n < 32) {
 624                     sinv = sin64[n];
 625                     cosv = sin64[32 - n];
 626                 } else {
 627                     sinv =  sin64[64 -  n];
 628                     cosv = -sin64[n  - 32];
 629                 }
 630
 631                 chgroup->decorrelation_matrix[y + x * chgroup->num_channels] =
 632                                                (v1 * sinv) - (v2 * cosv);
 633                 chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =
 634                                                (v1 * cosv) + (v2 * sinv);
 635             }
 636         }
 637         offset += i;
 638     }
 639 }
 640
 641 /**
 642  *@brief Decode channel transformation parameters
 643  *@param s codec context
 644  *@return 0 in case of success, < 0 in case of bitstream errors
 645  */
 646 static int decode_channel_transform(WMAProDecodeCtx* s)
 647 {
 648     int i;
 649     /* should never consume more than 1921 bits for the 8 channel case
 650      * 1 + MAX_CHANNELS * (MAX_CHANNELS + 2 + 3 * MAX_CHANNELS * MAX_CHANNELS
 651      * + MAX_CHANNELS + MAX_BANDS + 1)
 652      */
 653
 654     /** in the one channel case channel transforms are pointless */
 655     s->num_chgroups = 0;
 656     if (s->num_channels > 1) {
 657         int remaining_channels = s->channels_for_cur_subframe;
 658
 659         if (get_bits1(&s->gb)) {
 660             av_log_ask_for_sample(s->avctx,
 661                                   "unsupported channel transform bit\n");
 662             return AVERROR_INVALIDDATA;
 663         }
 664
 665         for (s->num_chgroups = 0; remaining_channels &&
 666              s->num_chgroups < s->channels_for_cur_subframe; s->num_chgroups++) {
 667             WMAProChannelGrp* chgroup = &s->chgroup[s->num_chgroups];
 668             float** channel_data = chgroup->channel_data;
 669             chgroup->num_channels = 0;
 670             chgroup->transform = 0;
 671
 672             /** decode channel mask */
 673             if (remaining_channels > 2) {
 674                 for (i = 0; i < s->channels_for_cur_subframe; i++) {
 675                     int channel_idx = s->channel_indexes_for_cur_subframe[i];
 676                     if (!s->channel[channel_idx].grouped
 677                         && get_bits1(&s->gb)) {
 678                         ++chgroup->num_channels;
 679                         s->channel[channel_idx].grouped = 1;
 680                         *channel_data++ = s->channel[channel_idx].coeffs;
 681                     }
 682                 }
 683             } else {
 684                 chgroup->num_channels = remaining_channels;
 685                 for (i = 0; i < s->channels_for_cur_subframe; i++) {
 686                     int channel_idx = s->channel_indexes_for_cur_subframe[i];
 687                     if (!s->channel[channel_idx].grouped)
 688                         *channel_data++ = s->channel[channel_idx].coeffs;
 689                     s->channel[channel_idx].grouped = 1;
 690                 }
 691             }
 692
 693             /** decode transform type */
 694             if (chgroup->num_channels == 2) {
 695                 if (get_bits1(&s->gb)) {
 696                     if (get_bits1(&s->gb)) {
 697                         av_log_ask_for_sample(s->avctx,
 698                                               "unsupported channel transform type\n");
 699                     }
 700                 } else {
 701                     chgroup->transform = 1;
 702                     if (s->num_channels == 2) {
 703                         chgroup->decorrelation_matrix[0] =  1.0;
 704                         chgroup->decorrelation_matrix[1] = -1.0;
 705                         chgroup->decorrelation_matrix[2] =  1.0;
 706                         chgroup->decorrelation_matrix[3] =  1.0;
 707                     } else {
 708                         /** cos(pi/4) */
 709                         chgroup->decorrelation_matrix[0] =  0.70703125;
 710                         chgroup->decorrelation_matrix[1] = -0.70703125;
 711                         chgroup->decorrelation_matrix[2] =  0.70703125;
 712                         chgroup->decorrelation_matrix[3] =  0.70703125;
 713                     }
 714                 }
 715             } else if (chgroup->num_channels > 2) {
 716                 if (get_bits1(&s->gb)) {
 717                     chgroup->transform = 1;
 718                     if (get_bits1(&s->gb)) {
 719                         decode_decorrelation_matrix(s, chgroup);
 720                     } else {
 721                         /** FIXME: more than 6 coupled channels not supported */
 722                         if (chgroup->num_channels > 6) {
 723                             av_log_ask_for_sample(s->avctx,
 724                                                   "coupled channels > 6\n");
 725                         } else {
 726                             memcpy(chgroup->decorrelation_matrix,
 727                                    default_decorrelation[chgroup->num_channels],
 728                                    chgroup->num_channels * chgroup->num_channels *
 729                                    sizeof(*chgroup->decorrelation_matrix));
 730                         }
 731                     }
 732                 }
 733             }
 734
 735             /** decode transform on / off */
 736             if (chgroup->transform) {
 737                 if (!get_bits1(&s->gb)) {
 738                     int i;
 739                     /** transform can be enabled for individual bands */
 740                     for (i = 0; i < s->num_bands; i++) {
 741                         chgroup->transform_band[i] = get_bits1(&s->gb);
 742                     }
 743                 } else {
 744                     memset(chgroup->transform_band, 1, s->num_bands);
 745                 }
 746             }
 747             remaining_channels -= chgroup->num_channels;
 748         }
 749     }
 750     return 0;
 751 }
 752
 753 /**
 754  *@brief Extract the coefficients from the bitstream.
 755  *@param s codec context
 756  *@param c current channel number
 757  *@return 0 on success, < 0 in case of bitstream errors
 758  */
 759 static int decode_coeffs(WMAProDecodeCtx *s, int c)
 760 {
 761     int vlctable;
 762     VLC* vlc;
 763     WMAProChannelCtx* ci = &s->channel[c];
 764     int rl_mode = 0;
 765     int cur_coeff = 0;
 766     int num_zeros = 0;
 767     const uint16_t* run;
 768     const uint16_t* level;
 769
 770     dprintf(s->avctx, "decode coefficients for channel %i\n", c);
 771
 772     vlctable = get_bits1(&s->gb);
 773     vlc = &coef_vlc[vlctable];
 774
 775     if (vlctable) {
 776         run = coef1_run;
 777         level = coef1_level;
 778     } else {
 779         run = coef0_run;
 780         level = coef0_level;
 781     }
 782
 783     /** decode vector coefficients (consumes up to 167 bits per iteration for
 784       4 vector coded large values) */
 785     while (!rl_mode && cur_coeff + 3 < s->subframe_len) {
 786         int vals[4];
 787         int i;
 788         unsigned int idx;
 789
 790         idx = get_vlc2(&s->gb, vec4_vlc.table, VLCBITS, VEC4MAXDEPTH);
 791
 792         if (idx == HUFF_VEC4_SIZE - 1) {
 793             for (i = 0; i < 4; i += 2) {
 794                 idx = get_vlc2(&s->gb, vec2_vlc.table, VLCBITS, VEC2MAXDEPTH);
 795                 if (idx == HUFF_VEC2_SIZE - 1) {
 796                     vals[i] = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
 797                     if (vals[i] == HUFF_VEC1_SIZE - 1)
 798                         vals[i] += ff_wma_get_large_val(&s->gb);
 799                     vals[i+1] = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
 800                     if (vals[i+1] == HUFF_VEC1_SIZE - 1)
 801                         vals[i+1] += ff_wma_get_large_val(&s->gb);
 802                 } else {
 803                     vals[i]   = symbol_to_vec2[idx] >> 4;
 804                     vals[i+1] = symbol_to_vec2[idx] & 0xF;
 805                 }
 806             }
 807         } else {
 808             vals[0] =  symbol_to_vec4[idx] >> 12;
 809             vals[1] = (symbol_to_vec4[idx] >> 8) & 0xF;
 810             vals[2] = (symbol_to_vec4[idx] >> 4) & 0xF;
 811             vals[3] =  symbol_to_vec4[idx]       & 0xF;
 812         }
 813
 814         /** decode sign */
 815         for (i = 0; i < 4; i++) {
 816             if (vals[i]) {
 817                 int sign = get_bits1(&s->gb) - 1;
 818                 ci->coeffs[cur_coeff] = (vals[i] ^ sign) - sign;
 819                 num_zeros = 0;
 820             } else {
 821                 ci->coeffs[cur_coeff] = 0;
 822                 /** switch to run level mode when subframe_len / 128 zeros
 823                     were found in a row */
 824                 rl_mode |= (++num_zeros > s->subframe_len >> 8);
 825             }
 826             ++cur_coeff;
 827         }
 828     }
 829
 830     /** decode run level coded coefficients */
 831     if (rl_mode) {
 832         memset(&ci->coeffs[cur_coeff], 0,
 833                sizeof(*ci->coeffs) * (s->subframe_len - cur_coeff));
 834         if (ff_wma_run_level_decode(s->avctx, &s->gb, vlc,
 835                                     level, run, 1, ci->coeffs,
 836                                     cur_coeff, s->subframe_len,
 837                                     s->subframe_len, s->esc_len, 0))
 838             return AVERROR_INVALIDDATA;
 839     }
 840
 841     return 0;
 842 }
 843
 844 /**
 845  *@brief Extract scale factors from the bitstream.
 846  *@param s codec context
 847  *@return 0 on success, < 0 in case of bitstream errors
 848  */
 849 static int decode_scale_factors(WMAProDecodeCtx* s)
 850 {
 851     int i;
 852
 853     /** should never consume more than 5344 bits
 854      *  MAX_CHANNELS * (1 +  MAX_BANDS * 23)
 855      */
 856
 857     for (i = 0; i < s->channels_for_cur_subframe; i++) {
 858         int c = s->channel_indexes_for_cur_subframe[i];
 859         int* sf;
 860         int* sf_end = s->channel[c].scale_factors + s->num_bands;
 861
 862         /** resample scale factors for the new block size
 863          *  as the scale factors might need to be resampled several times
 864          *  before some  new values are transmitted, a backup of the last
 865          *  transmitted scale factors is kept in saved_scale_factors
 866          */
 867         if (s->channel[c].reuse_sf) {
 868             const int8_t* sf_offsets = s->sf_offsets[s->table_idx][s->channel[c].table_idx];
 869             int b;
 870             for (b = 0; b < s->num_bands; b++)
 871                 s->channel[c].scale_factors[b] =
 872                                    s->channel[c].saved_scale_factors[*sf_offsets++];
 873         }
 874
 875         if (!s->channel[c].cur_subframe || get_bits1(&s->gb)) {
 876
 877             if (!s->channel[c].reuse_sf) {
 878                 int val;
 879                 /** decode DPCM coded scale factors */
 880                 s->channel[c].scale_factor_step = get_bits(&s->gb, 2) + 1;
 881                 val = 45 / s->channel[c].scale_factor_step;
 882                 for (sf = s->channel[c].scale_factors; sf < sf_end; sf++) {
 883                     val += get_vlc2(&s->gb, sf_vlc.table, SCALEVLCBITS, SCALEMAXDEPTH) - 60;
 884                     *sf = val;
 885                 }
 886             } else {
 887                 int i;
 888                 /** run level decode differences to the resampled factors */
 889                 for (i = 0; i < s->num_bands; i++) {
 890                     int idx;
 891                     int skip;
 892                     int val;
 893                     int sign;
 894
 895                     idx = get_vlc2(&s->gb, sf_rl_vlc.table, VLCBITS, SCALERLMAXDEPTH);
 896
 897                     if (!idx) {
 898                         uint32_t code = get_bits(&s->gb, 14);
 899                         val  =  code >> 6;
 900                         sign = (code & 1) - 1;
 901                         skip = (code & 0x3f) >> 1;
 902                     } else if (idx == 1) {
 903                         break;
 904                     } else {
 905                         skip = scale_rl_run[idx];
 906                         val  = scale_rl_level[idx];
 907                         sign = get_bits1(&s->gb)-1;
 908                     }
 909
 910                     i += skip;
 911                     if (i >= s->num_bands) {
 912                         av_log(s->avctx, AV_LOG_ERROR,
 913                                "invalid scale factor coding\n");
 914                         return AVERROR_INVALIDDATA;
 915                     }
 916                     s->channel[c].scale_factors[i] += (val ^ sign) - sign;
 917                 }
 918             }
 919
 920             /** save transmitted scale factors so that they can be reused for
 921                 the next subframe */
 922             memcpy(s->channel[c].saved_scale_factors,
 923                    s->channel[c].scale_factors, s->num_bands *
 924                    sizeof(*s->channel[c].saved_scale_factors));
 925             s->channel[c].table_idx = s->table_idx;
 926             s->channel[c].reuse_sf  = 1;
 927         }
 928
 929         /** calculate new scale factor maximum */
 930         s->channel[c].max_scale_factor = s->channel[c].scale_factors[0];
 931         for (sf = s->channel[c].scale_factors + 1; sf < sf_end; sf++) {
 932             s->channel[c].max_scale_factor =
 933                 FFMAX(s->channel[c].max_scale_factor, *sf);
 934         }
 935
 936     }
 937     return 0;
 938 }
 939
 940 /**
 941  *@brief Reconstruct the individual channel data.
 942  *@param s codec context
 943  */
 944 static void inverse_channel_transform(WMAProDecodeCtx *s)
 945 {
 946     int i;
 947
 948     for (i = 0; i < s->num_chgroups; i++) {
 949         if (s->chgroup[i].transform) {
 950             float data[WMAPRO_MAX_CHANNELS];
 951             const int num_channels = s->chgroup[i].num_channels;
 952             float** ch_data = s->chgroup[i].channel_data;
 953             float** ch_end = ch_data + num_channels;
 954             const int8_t* tb = s->chgroup[i].transform_band;
 955             int16_t* sfb;
 956
 957             /** multichannel decorrelation */
 958             for (sfb = s->cur_sfb_offsets;
 959                  sfb < s->cur_sfb_offsets + s->num_bands; sfb++) {
 960                 int y;
 961                 if (*tb++ == 1) {
 962                     /** multiply values with the decorrelation_matrix */
 963                     for (y = sfb[0]; y < FFMIN(sfb[1], s->subframe_len); y++) {
 964                         const float* mat = s->chgroup[i].decorrelation_matrix;
 965                         const float* data_end = data + num_channels;
 966                         float* data_ptr = data;
 967                         float** ch;
 968
 969                         for (ch = ch_data; ch < ch_end; ch++)
 970                             *data_ptr++ = (*ch)[y];
 971
 972                         for (ch = ch_data; ch < ch_end; ch++) {
 973                             float sum = 0;
 974                             data_ptr = data;
 975                             while (data_ptr < data_end)
 976                                 sum += *data_ptr++ * *mat++;
 977
 978                             (*ch)[y] = sum;
 979                         }
 980                     }
 981                 } else if (s->num_channels == 2) {
 982                     for (y = sfb[0]; y < FFMIN(sfb[1], s->subframe_len); y++) {
 983                         ch_data[0][y] *= 181.0 / 128;
 984                         ch_data[1][y] *= 181.0 / 128;
 985                     }
 986                 }
 987             }
 988         }
 989     }
 990 }
 991
 992 /**
 993  *@brief Apply sine window and reconstruct the output buffer.
 994  *@param s codec context
 995  */
 996 static void wmapro_window(WMAProDecodeCtx *s)
 997 {
 998     int i;
 999     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1000         int c = s->channel_indexes_for_cur_subframe[i];
1001         float* window;
1002         int winlen = s->channel[c].prev_block_len;
1003         float* start = s->channel[c].coeffs - (winlen >> 1);
1004
1005         if (s->subframe_len < winlen) {
1006             start += (winlen - s->subframe_len) >> 1;
1007             winlen = s->subframe_len;
1008         }
1009
1010         window = s->windows[av_log2(winlen) - BLOCK_MIN_BITS];
1011
1012         winlen >>= 1;
1013
1014         s->dsp.vector_fmul_window(start, start, start + winlen,
1015                                   window, 0, winlen);
1016
1017         s->channel[c].prev_block_len = s->subframe_len;
1018     }
1019 }
1020
1021 /**
1022  *@brief Decode a single subframe (block).
1023  *@param s codec context
1024  *@return 0 on success, < 0 when decoding failed
1025  */
1026 static int decode_subframe(WMAProDecodeCtx *s)
1027 {
1028     int offset = s->samples_per_frame;
1029     int subframe_len = s->samples_per_frame;
1030     int i;
1031     int total_samples   = s->samples_per_frame * s->num_channels;
1032     int transmit_coeffs = 0;
1033     int cur_subwoofer_cutoff;
1034
1035     s->subframe_offset = get_bits_count(&s->gb);
1036
1037     /** reset channel context and find the next block offset and size
1038         == the next block of the channel with the smallest number of
1039         decoded samples
1040     */
1041     for (i = 0; i < s->num_channels; i++) {
1042         s->channel[i].grouped = 0;
1043         if (offset > s->channel[i].decoded_samples) {
1044             offset = s->channel[i].decoded_samples;
1045             subframe_len =
1046                 s->channel[i].subframe_len[s->channel[i].cur_subframe];
1047         }
1048     }
1049
1050     dprintf(s->avctx,
1051             "processing subframe with offset %i len %i\n", offset, subframe_len);
1052
1053     /** get a list of all channels that contain the estimated block */
1054     s->channels_for_cur_subframe = 0;
1055     for (i = 0; i < s->num_channels; i++) {
1056         const int cur_subframe = s->channel[i].cur_subframe;
1057         /** substract already processed samples */
1058         total_samples -= s->channel[i].decoded_samples;
1059
1060         /** and count if there are multiple subframes that match our profile */
1061         if (offset == s->channel[i].decoded_samples &&
1062             subframe_len == s->channel[i].subframe_len[cur_subframe]) {
1063             total_samples -= s->channel[i].subframe_len[cur_subframe];
1064             s->channel[i].decoded_samples +=
1065                 s->channel[i].subframe_len[cur_subframe];
1066             s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
1067             ++s->channels_for_cur_subframe;
1068         }
1069     }
1070
1071     /** check if the frame will be complete after processing the
1072         estimated block */
1073     if (!total_samples)
1074         s->parsed_all_subframes = 1;
1075
1076
1077     dprintf(s->avctx, "subframe is part of %i channels\n",
1078             s->channels_for_cur_subframe);
1079
1080     /** calculate number of scale factor bands and their offsets */
1081     s->table_idx         = av_log2(s->samples_per_frame/subframe_len);
1082     s->num_bands         = s->num_sfb[s->table_idx];
1083     s->cur_sfb_offsets   = s->sfb_offsets[s->table_idx];
1084     cur_subwoofer_cutoff = s->subwoofer_cutoffs[s->table_idx];
1085
1086     /** configure the decoder for the current subframe */
1087     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1088         int c = s->channel_indexes_for_cur_subframe[i];
1089
1090         s->channel[c].coeffs = &s->channel[c].out[(s->samples_per_frame >> 1)
1091                                                   + offset];
1092     }
1093
1094     s->subframe_len = subframe_len;
1095     s->esc_len = av_log2(s->subframe_len - 1) + 1;
1096
1097     /** skip extended header if any */
1098     if (get_bits1(&s->gb)) {
1099         int num_fill_bits;
1100         if (!(num_fill_bits = get_bits(&s->gb, 2))) {
1101             int len = get_bits(&s->gb, 4);
1102             num_fill_bits = get_bits(&s->gb, len) + 1;
1103         }
1104
1105         if (num_fill_bits >= 0) {
1106             if (get_bits_count(&s->gb) + num_fill_bits > s->num_saved_bits) {
1107                 av_log(s->avctx, AV_LOG_ERROR, "invalid number of fill bits\n");
1108                 return AVERROR_INVALIDDATA;
1109             }
1110
1111             skip_bits_long(&s->gb, num_fill_bits);
1112         }
1113     }
1114
1115     /** no idea for what the following bit is used */
1116     if (get_bits1(&s->gb)) {
1117         av_log_ask_for_sample(s->avctx, "reserved bit set\n");
1118         return AVERROR_INVALIDDATA;
1119     }
1120
1121
1122     if (decode_channel_transform(s) < 0)
1123         return AVERROR_INVALIDDATA;
1124
1125
1126     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1127         int c = s->channel_indexes_for_cur_subframe[i];
1128         if ((s->channel[c].transmit_coefs = get_bits1(&s->gb)))
1129             transmit_coeffs = 1;
1130     }
1131
1132     if (transmit_coeffs) {
1133         int step;
1134         int quant_step = 90 * s->bits_per_sample >> 4;
1135         if ((get_bits1(&s->gb))) {
1136             /** FIXME: might change run level mode decision */
1137             av_log_ask_for_sample(s->avctx, "unsupported quant step coding\n");
1138             return AVERROR_INVALIDDATA;
1139         }
1140         /** decode quantization step */
1141         step = get_sbits(&s->gb, 6);
1142         quant_step += step;
1143         if (step == -32 || step == 31) {
1144             const int sign = (step == 31) - 1;
1145             int quant = 0;
1146             while (get_bits_count(&s->gb) + 5 < s->num_saved_bits &&
1147                    (step = get_bits(&s->gb, 5)) == 31) {
1148                 quant += 31;
1149             }
1150             quant_step += ((quant + step) ^ sign) - sign;
1151         }
1152         if (quant_step < 0) {
1153             av_log(s->avctx, AV_LOG_DEBUG, "negative quant step\n");
1154         }
1155
1156         /** decode quantization step modifiers for every channel */
1157
1158         if (s->channels_for_cur_subframe == 1) {
1159             s->channel[s->channel_indexes_for_cur_subframe[0]].quant_step = quant_step;
1160         } else {
1161             int modifier_len = get_bits(&s->gb, 3);
1162             for (i = 0; i < s->channels_for_cur_subframe; i++) {
1163                 int c = s->channel_indexes_for_cur_subframe[i];
1164                 s->channel[c].quant_step = quant_step;
1165                 if (get_bits1(&s->gb)) {
1166                     if (modifier_len) {
1167                         s->channel[c].quant_step += get_bits(&s->gb, modifier_len) + 1;
1168                     } else
1169                         ++s->channel[c].quant_step;
1170                 }
1171             }
1172         }
1173
1174         /** decode scale factors */
1175         if (decode_scale_factors(s) < 0)
1176             return AVERROR_INVALIDDATA;
1177     }
1178
1179     dprintf(s->avctx, "BITSTREAM: subframe header length was %i\n",
1180             get_bits_count(&s->gb) - s->subframe_offset);
1181
1182     /** parse coefficients */
1183     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1184         int c = s->channel_indexes_for_cur_subframe[i];
1185         if (s->channel[c].transmit_coefs &&
1186             get_bits_count(&s->gb) < s->num_saved_bits) {
1187             decode_coeffs(s, c);
1188         } else
1189             memset(s->channel[c].coeffs, 0,
1190                    sizeof(*s->channel[c].coeffs) * subframe_len);
1191     }
1192
1193     dprintf(s->avctx, "BITSTREAM: subframe length was %i\n",
1194             get_bits_count(&s->gb) - s->subframe_offset);
1195
1196     if (transmit_coeffs) {
1197         /** reconstruct the per channel data */
1198         inverse_channel_transform(s);
1199         for (i = 0; i < s->channels_for_cur_subframe; i++) {
1200             int c = s->channel_indexes_for_cur_subframe[i];
1201             const int* sf = s->channel[c].scale_factors;
1202             int b;
1203
1204             if (c == s->lfe_channel)
1205                 memset(&s->tmp[cur_subwoofer_cutoff], 0, sizeof(*s->tmp) *
1206                        (subframe_len - cur_subwoofer_cutoff));
1207
1208             /** inverse quantization and rescaling */
1209             for (b = 0; b < s->num_bands; b++) {
1210                 const int end = FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);
1211                 const int exp = s->channel[c].quant_step -
1212                             (s->channel[c].max_scale_factor - *sf++) *
1213                             s->channel[c].scale_factor_step;
1214                 const float quant = pow(10.0, exp / 20.0);
1215                 int start;
1216
1217                 for (start = s->cur_sfb_offsets[b]; start < end; start++)
1218                     s->tmp[start] = s->channel[c].coeffs[start] * quant;
1219             }
1220
1221             /** apply imdct (ff_imdct_half == DCTIV with reverse) */
1222             ff_imdct_half(&s->mdct_ctx[av_log2(subframe_len) - BLOCK_MIN_BITS],
1223                           s->channel[c].coeffs, s->tmp);
1224         }
1225     }
1226
1227     /** window and overlapp-add */
1228     wmapro_window(s);
1229
1230     /** handled one subframe */
1231     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1232         int c = s->channel_indexes_for_cur_subframe[i];
1233         if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
1234             av_log(s->avctx, AV_LOG_ERROR, "broken subframe\n");
1235             return AVERROR_INVALIDDATA;
1236         }
1237         ++s->channel[c].cur_subframe;
1238     }
1239
1240     return 0;
1241 }
1242
1243 /**
1244  *@brief Decode one WMA frame.
1245  *@param s codec context
1246  *@return 0 if the trailer bit indicates that this is the last frame,
1247  *        1 if there are additional frames
1248  */
1249 static int decode_frame(WMAProDecodeCtx *s)
1250 {
1251     GetBitContext* gb = &s->gb;
1252     int more_frames = 0;
1253     int len = 0;
1254     int i;
1255
1256     /** check for potential output buffer overflow */
1257     if (s->num_channels * s->samples_per_frame > s->samples_end - s->samples) {
1258         av_log(s->avctx, AV_LOG_ERROR,
1259                "not enough space for the output samples\n");
1260         s->packet_loss = 1;
1261         return 0;
1262     }
1263
1264     /** get frame length */
1265     if (s->len_prefix)
1266         len = get_bits(gb, s->log2_frame_size);
1267
1268     dprintf(s->avctx, "decoding frame with length %x\n", len);
1269
1270     /** decode tile information */
1271     if (decode_tilehdr(s)) {
1272         s->packet_loss = 1;
1273         return 0;
1274     }
1275
1276     /** read postproc transform */
1277     if (s->num_channels > 1 && get_bits1(gb)) {
1278         av_log_ask_for_sample(s->avctx, "Unsupported postproc transform found\n");
1279         s->packet_loss = 1;
1280         return 0;
1281     }
1282
1283     /** read drc info */
1284     if (s->dynamic_range_compression) {
1285         s->drc_gain = get_bits(gb, 8);
1286         dprintf(s->avctx, "drc_gain %i\n", s->drc_gain);
1287     }
1288
1289     /** no idea what these are for, might be the number of samples
1290         that need to be skipped at the beginning or end of a stream */
1291     if (get_bits1(gb)) {
1292         int skip;
1293
1294         /** usually true for the first frame */
1295         if (get_bits1(gb)) {
1296             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1297             dprintf(s->avctx, "start skip: %i\n", skip);
1298         }
1299
1300         /** sometimes true for the last frame */
1301         if (get_bits1(gb)) {
1302             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1303             dprintf(s->avctx, "end skip: %i\n", skip);
1304         }
1305
1306     }
1307
1308     dprintf(s->avctx, "BITSTREAM: frame header length was %i\n",
1309             get_bits_count(gb) - s->frame_offset);
1310
1311     /** reset subframe states */
1312     s->parsed_all_subframes = 0;
1313     for (i = 0; i < s->num_channels; i++) {
1314         s->channel[i].decoded_samples = 0;
1315         s->channel[i].cur_subframe    = 0;
1316         s->channel[i].reuse_sf        = 0;
1317     }
1318
1319     /** decode all subframes */
1320     while (!s->parsed_all_subframes) {
1321         if (decode_subframe(s) < 0) {
1322             s->packet_loss = 1;
1323             return 0;
1324         }
1325     }
1326
1327     /** interleave samples and write them to the output buffer */
1328     for (i = 0; i < s->num_channels; i++) {
1329         float* ptr;
1330         int incr = s->num_channels;
1331         float* iptr = s->channel[i].out;
1332         int x;
1333
1334         ptr = s->samples + i;
1335
1336         for (x = 0; x < s->samples_per_frame; x++) {
1337             *ptr = av_clipf(*iptr++, -1.0, 32767.0 / 32768.0);
1338             ptr += incr;
1339         }
1340
1341         /** reuse second half of the IMDCT output for the next frame */
1342         memcpy(&s->channel[i].out[0],
1343                &s->channel[i].out[s->samples_per_frame],
1344                s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
1345     }
1346
1347     if (s->skip_frame) {
1348         s->skip_frame = 0;
1349     } else
1350         s->samples += s->num_channels * s->samples_per_frame;
1351
1352     if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
1353         /** FIXME: not sure if this is always an error */
1354         av_log(s->avctx, AV_LOG_ERROR, "frame[%i] would have to skip %i bits\n",
1355                s->frame_num, len - (get_bits_count(gb) - s->frame_offset) - 1);
1356         s->packet_loss = 1;
1357         return 0;
1358     }
1359
1360     /** skip the rest of the frame data */
1361     skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
1362
1363     /** decode trailer bit */
1364     more_frames = get_bits1(gb);
1365
1366     ++s->frame_num;
1367     return more_frames;
1368 }
1369
1370 /**
1371  *@brief Calculate remaining input buffer length.
1372  *@param s codec context
1373  *@param gb bitstream reader context
1374  *@return remaining size in bits
1375  */
1376 static int remaining_bits(WMAProDecodeCtx *s, GetBitContext *gb)
1377 {
1378     return s->buf_bit_size - get_bits_count(gb);
1379 }
1380
1381 /**
1382  *@brief Fill the bit reservoir with a (partial) frame.
1383  *@param s codec context
1384  *@param gb bitstream reader context
1385  *@param len length of the partial frame
1386  *@param append decides wether to reset the buffer or not
1387  */
1388 static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
1389                       int append)
1390 {
1391     int buflen;
1392
1393     /** when the frame data does not need to be concatenated, the input buffer
1394         is resetted and additional bits from the previous frame are copyed
1395         and skipped later so that a fast byte copy is possible */
1396
1397     if (!append) {
1398         s->frame_offset = get_bits_count(gb) & 7;
1399         s->num_saved_bits = s->frame_offset;
1400         init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
1401     }
1402
1403     buflen = (s->num_saved_bits + len + 8) >> 3;
1404
1405     if (len <= 0 || buflen > MAX_FRAMESIZE) {
1406         av_log_ask_for_sample(s->avctx, "input buffer too small\n");
1407         s->packet_loss = 1;
1408         return;
1409     }
1410
1411     s->num_saved_bits += len;
1412     if (!append) {
1413         ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
1414                      s->num_saved_bits);
1415     } else {
1416         int align = 8 - (get_bits_count(gb) & 7);
1417         align = FFMIN(align, len);
1418         put_bits(&s->pb, align, get_bits(gb, align));
1419         len -= align;
1420         ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
1421     }
1422     skip_bits_long(gb, len);
1423
1424     {
1425         PutBitContext tmp = s->pb;
1426         flush_put_bits(&tmp);
1427     }
1428
1429     init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
1430     skip_bits(&s->gb, s->frame_offset);
1431 }
1432
1433 /**
1434  *@brief Decode a single WMA packet.
1435  *@param avctx codec context
1436  *@param data the output buffer
1437  *@param data_size number of bytes that were written to the output buffer
1438  *@param avpkt input packet
1439  *@return number of bytes that were read from the input buffer
1440  */
1441 static int decode_packet(AVCodecContext *avctx,
1442                          void *data, int *data_size, AVPacket* avpkt)
1443 {
1444     GetBitContext gb;
1445     WMAProDecodeCtx *s = avctx->priv_data;
1446     const uint8_t* buf   = avpkt->data;
1447     int buf_size         = avpkt->size;
1448     int more_frames      = 1;
1449     int num_bits_prev_frame;
1450     int packet_sequence_number;
1451
1452     s->samples      = data;
1453     s->samples_end  = (float*)((int8_t*)data + *data_size);
1454     s->buf_bit_size = buf_size << 3;
1455
1456
1457     *data_size = 0;
1458
1459     /** sanity check for the buffer length */
1460     if (buf_size < avctx->block_align)
1461         return 0;
1462
1463     buf_size = avctx->block_align;
1464
1465     /** parse packet header */
1466     init_get_bits(&gb, buf, s->buf_bit_size);
1467     packet_sequence_number = get_bits(&gb, 4);
1468     skip_bits(&gb, 2);
1469
1470     /** get number of bits that need to be added to the previous frame */
1471     num_bits_prev_frame = get_bits(&gb, s->log2_frame_size);
1472     dprintf(avctx, "packet[%d]: nbpf %x\n", avctx->frame_number,
1473             num_bits_prev_frame);
1474
1475     /** check for packet loss */
1476     if (!s->packet_loss &&
1477         ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
1478         s->packet_loss = 1;
1479         av_log(avctx, AV_LOG_ERROR, "Packet loss detected! seq %x vs %x\n",
1480                s->packet_sequence_number, packet_sequence_number);
1481     }
1482     s->packet_sequence_number = packet_sequence_number;
1483
1484     if (num_bits_prev_frame > 0) {
1485         /** append the previous frame data to the remaining data from the
1486             previous packet to create a full frame */
1487         save_bits(s, &gb, num_bits_prev_frame, 1);
1488         dprintf(avctx, "accumulated %x bits of frame data\n",
1489                 s->num_saved_bits - s->frame_offset);
1490
1491         /** decode the cross packet frame if it is valid */
1492         if (!s->packet_loss)
1493             decode_frame(s);
1494     } else if (s->num_saved_bits - s->frame_offset) {
1495         dprintf(avctx, "ignoring %x previously saved bits\n",
1496                 s->num_saved_bits - s->frame_offset);
1497     }
1498
1499     s->packet_loss = 0;
1500     /** decode the rest of the packet */
1501     while (!s->packet_loss && more_frames &&
1502            remaining_bits(s, &gb) > s->log2_frame_size) {
1503         int frame_size = show_bits(&gb, s->log2_frame_size);
1504
1505         /** there is enough data for a full frame */
1506         if (remaining_bits(s, &gb) >= frame_size && frame_size > 0) {
1507             save_bits(s, &gb, frame_size, 0);
1508
1509             /** decode the frame */
1510             more_frames = decode_frame(s);
1511
1512             if (!more_frames) {
1513                 dprintf(avctx, "no more frames\n");
1514             }
1515         } else
1516             more_frames = 0;
1517     }
1518
1519     if (!s->packet_loss && remaining_bits(s, &gb) > 0) {
1520         /** save the rest of the data so that it can be decoded
1521             with the next packet */
1522         save_bits(s, &gb, remaining_bits(s, &gb), 0);
1523     }
1524
1525     *data_size = (int8_t *)s->samples - (int8_t *)data;
1526
1527     return avctx->block_align;
1528 }
1529
1530 /**
1531  *@brief Clear decoder buffers (for seeking).
1532  *@param avctx codec context
1533  */
1534 static void flush(AVCodecContext *avctx)
1535 {
1536     WMAProDecodeCtx *s = avctx->priv_data;
1537     int i;
1538     /** reset output buffer as a part of it is used during the windowing of a
1539         new frame */
1540     for (i = 0; i < s->num_channels; i++)
1541         memset(s->channel[i].out, 0, s->samples_per_frame *
1542                sizeof(*s->channel[i].out));
1543     s->packet_loss = 1;
1544 }
1545
1546
1547 /**
1548  *@brief wmapro decoder
1549  */
1550 AVCodec wmapro_decoder = {
1551     "wmapro",
1552     CODEC_TYPE_AUDIO,
1553     CODEC_ID_WMAPRO,
1554     sizeof(WMAProDecodeCtx),
1555     decode_init,
1556     NULL,
1557     decode_end,
1558     decode_packet,
1559     .flush= flush,
1560     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"),
1561 };