apps/codecs/libwmapro/wmaprodec.c

   1 /*
   2  * Wmapro compatible decoder
   3  * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
   4  * Copyright (c) 2008 - 2009 Sascha Sommer, Benjamin Larsson
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file  libavcodec/wmaprodec.c
  25  * @brief wmapro decoder implementation
  26  * Wmapro is an MDCT based codec comparable to wma standard or AAC.
  27  * The decoding therefore consists of the following steps:
  28  * - bitstream decoding
  29  * - reconstruction of per-channel data
  30  * - rescaling and inverse quantization
  31  * - IMDCT
  32  * - windowing and overlapp-add
  33  *
  34  * The compressed wmapro bitstream is split into individual packets.
  35  * Every such packet contains one or more wma frames.
  36  * The compressed frames may have a variable length and frames may
  37  * cross packet boundaries.
  38  * Common to all wmapro frames is the number of samples that are stored in
  39  * a frame.
  40  * The number of samples and a few other decode flags are stored
  41  * as extradata that has to be passed to the decoder.
  42  *
  43  * The wmapro frames themselves are again split into a variable number of
  44  * subframes. Every subframe contains the data for 2^N time domain samples
  45  * where N varies between 7 and 12.
  46  *
  47  * Example wmapro bitstream (in samples):
  48  *
  49  * ||   packet 0           || packet 1 || packet 2      packets
  50  * ---------------------------------------------------
  51  * || frame 0      || frame 1       || frame 2    ||    frames
  52  * ---------------------------------------------------
  53  * ||   |      |   ||   |   |   |   ||            ||    subframes of channel 0
  54  * ---------------------------------------------------
  55  * ||      |   |   ||   |   |   |   ||            ||    subframes of channel 1
  56  * ---------------------------------------------------
  57  *
  58  * The frame layouts for the individual channels of a wma frame does not need
  59  * to be the same.
  60  *
  61  * However, if the offsets and lengths of several subframes of a frame are the
  62  * same, the subframes of the channels can be grouped.
  63  * Every group may then use special coding techniques like M/S stereo coding
  64  * to improve the compression ratio. These channel transformations do not
  65  * need to be applied to a whole subframe. Instead, they can also work on
  66  * individual scale factor bands (see below).
  67  * The coefficients that carry the audio signal in the frequency domain
  68  * are transmitted as huffman-coded vectors with 4, 2 and 1 elements.
  69  * In addition to that, the encoder can switch to a runlevel coding scheme
  70  * by transmitting subframe_length / 128 zero coefficients.
  71  *
  72  * Before the audio signal can be converted to the time domain, the
  73  * coefficients have to be rescaled and inverse quantized.
  74  * A subframe is therefore split into several scale factor bands that get
  75  * scaled individually.
  76  * Scale factors are submitted for every frame but they might be shared
  77  * between the subframes of a channel. Scale factors are initially DPCM-coded.
  78  * Once scale factors are shared, the differences are transmitted as runlevel
  79  * codes.
  80  * Every subframe length and offset combination in the frame layout shares a
  81  * common quantization factor that can be adjusted for every channel by a
  82  * modifier.
  83  * After the inverse quantization, the coefficients get processed by an IMDCT.
  84  * The resulting values are then windowed with a sine window and the first half
  85  * of the values are added to the second half of the output from the previous
  86  * subframe in order to reconstruct the output samples.
  87  */
  88
  89 #include "ffmpeg_get_bits.h"
  90 #include "ffmpeg_put_bits.h"
  91 #include "wmaprodata.h"
  92 #include "wma.h"
  93 #include "wmaprodec.h"
  94 #include "wmapro_mdct.h"
  95 #include "mdct_tables.h"
  96 #include "quant.h"
  97 #include "wmapro_math.h"
  98 #include "codecs.h"
  99 #include "codeclib.h"
 100 #include "../libasf/asf.h"
 101
 102 /* Uncomment the following line to enable some debug output */
 103 //#define WMAPRO_DUMP_CTX_EN
 104
 105 #undef DEBUGF
 106 #ifdef WMAPRO_DUMP_CTX_EN
 107 #       define DEBUGF printf
 108 #else
 109 #       define DEBUGF(...)
 110 #endif
 111
 112 /* Some defines to make it compile */
 113 #define AVERROR_INVALIDDATA  -1
 114 #define AVERROR_PATCHWELCOME -2
 115 #define av_log_ask_for_sample(...)
 116
 117 /* Taken from avcodec.h */
 118 #define FF_INPUT_BUFFER_PADDING_SIZE 8
 119
 120 /* Taken from libavutil/mem.h */
 121 #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
 122
 123 /* Taken from libavutil/common.h */
 124 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
 125 #define FFMAX(a,b) ((a) > (b) ? (a) : (b))
 126
 127 /** current decoder limitations */
 128 #define WMAPRO_MAX_CHANNELS    8                             ///< max number of handled channels
 129 #define MAX_SUBFRAMES  32                                    ///< max number of subframes per channel
 130 #define MAX_BANDS      29                                    ///< max number of scale factor bands
 131 #define MAX_FRAMESIZE  32768                                 ///< maximum compressed frame size
 132
 133 #define WMAPRO_BLOCK_MAX_BITS 12                                           ///< log2 of max block size
 134 #define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS)                 ///< maximum block size
 135 #define WMAPRO_BLOCK_SIZES    (WMAPRO_BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) ///< possible block sizes
 136 #define WMAPRO_OUT_BUF_SIZE   (WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2)
 137
 138
 139 #define VLCBITS            9
 140 #define SCALEVLCBITS       8
 141 #define VEC4MAXDEPTH    ((HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS)
 142 #define VEC2MAXDEPTH    ((HUFF_VEC2_MAXBITS+VLCBITS-1)/VLCBITS)
 143 #define VEC1MAXDEPTH    ((HUFF_VEC1_MAXBITS+VLCBITS-1)/VLCBITS)
 144 #define SCALEMAXDEPTH   ((HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS)
 145 #define SCALERLMAXDEPTH ((HUFF_SCALE_RL_MAXBITS+VLCBITS-1)/VLCBITS)
 146
 147 static VLC              sf_vlc;           ///< scale factor DPCM vlc
 148 static VLC              sf_rl_vlc;        ///< scale factor run length vlc
 149 static VLC              vec4_vlc;         ///< 4 coefficients per symbol
 150 static VLC              vec2_vlc;         ///< 2 coefficients per symbol
 151 static VLC              vec1_vlc;         ///< 1 coefficient per symbol
 152 static VLC              coef_vlc[2];      ///< coefficient run length vlc codes
 153 //static float            sin64[33];        ///< sinus table for decorrelation
 154
 155 /* Global defined arrays to allow IRAM usage for some models. */
 156 static int32_t g_tmp[WMAPRO_BLOCK_MAX_SIZE] IBSS_ATTR_WMAPRO_LARGE_IRAM;
 157 static int32_t g_out_ch0[WMAPRO_OUT_BUF_SIZE] IBSS_ATTR;
 158 static int32_t g_out_ch1[WMAPRO_OUT_BUF_SIZE] IBSS_ATTR_WMAPRO_LARGE_IRAM;
 159 static int32_t g_out_multichannel[WMAPRO_MAX_CHANNELS-2][WMAPRO_OUT_BUF_SIZE];
 160
 161 /**
 162  * @brief frame specific decoder context for a single channel
 163  */
 164 typedef struct {
 165     int16_t  prev_block_len;                          ///< length of the previous block
 166     uint8_t  transmit_coefs;
 167     uint8_t  num_subframes;
 168     uint16_t subframe_len[MAX_SUBFRAMES];             ///< subframe length in samples
 169     uint16_t subframe_offset[MAX_SUBFRAMES];          ///< subframe positions in the current frame
 170     uint8_t  cur_subframe;                            ///< current subframe number
 171     uint16_t decoded_samples;                         ///< number of already processed samples
 172     uint8_t  grouped;                                 ///< channel is part of a group
 173     int      quant_step;                              ///< quantization step for the current subframe
 174     int8_t   reuse_sf;                                ///< share scale factors between subframes
 175     int8_t   scale_factor_step;                       ///< scaling step for the current subframe
 176     int      max_scale_factor;                        ///< maximum scale factor for the current subframe
 177     int      saved_scale_factors[2][MAX_BANDS];       ///< resampled and (previously) transmitted scale factor values
 178     int8_t   scale_factor_idx;                        ///< index for the transmitted scale factor values (used for resampling)
 179     int*     scale_factors;                           ///< pointer to the scale factor values used for decoding
 180     uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
 181     int32_t* coeffs;                                  ///< pointer to the subframe decode buffer
 182     int32_t* out;                                     ///< output buffer
 183 } WMAProChannelCtx;
 184
 185 /**
 186  * @brief channel group for channel transformations
 187  */
 188 typedef struct {
 189     uint8_t num_channels;                                     ///< number of channels in the group
 190     int8_t  transform;                                        ///< transform on / off
 191     int8_t  transform_band[MAX_BANDS];                        ///< controls if the transform is enabled for a certain band
 192     //float   decorrelation_matrix[WMAPRO_MAX_CHANNELS*WMAPRO_MAX_CHANNELS];
 193     int32_t*  channel_data[WMAPRO_MAX_CHANNELS];                ///< transformation coefficients
 194     int32_t   fixdecorrelation_matrix[WMAPRO_MAX_CHANNELS*WMAPRO_MAX_CHANNELS];
 195 } WMAProChannelGrp;
 196
 197 /**
 198  * @brief main decoder context
 199  */
 200 typedef struct WMAProDecodeCtx {
 201     /* generic decoder variables */
 202     uint8_t          frame_data[MAX_FRAMESIZE +
 203                       FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
 204     PutBitContext    pb;                            ///< context for filling the frame_data buffer
 205     int32_t*         tmp;                           ///< IMDCT input buffer
 206
 207     /* frame size dependent frame information (set during initialization) */
 208     uint32_t         decode_flags;                  ///< used compression features
 209     uint8_t          len_prefix;                    ///< frame is prefixed with its length
 210     uint8_t          dynamic_range_compression;     ///< frame contains DRC data
 211     uint8_t          bits_per_sample;               ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
 212     uint16_t         samples_per_frame;             ///< number of samples to output
 213     uint16_t         log2_frame_size;
 214     int8_t           num_channels;                  ///< number of channels in the stream (same as AVCodecContext.num_channels)
 215     int8_t           lfe_channel;                   ///< lfe channel index
 216     uint8_t          max_num_subframes;
 217     uint8_t          subframe_len_bits;             ///< number of bits used for the subframe length
 218     uint8_t          max_subframe_len_bit;          ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
 219     uint16_t         min_samples_per_subframe;
 220     int8_t           num_sfb[WMAPRO_BLOCK_SIZES];   ///< scale factor bands per block size
 221     int16_t          sfb_offsets[WMAPRO_BLOCK_SIZES][MAX_BANDS];                    ///< scale factor band offsets (multiples of 4)
 222     int8_t           sf_offsets[WMAPRO_BLOCK_SIZES][WMAPRO_BLOCK_SIZES][MAX_BANDS]; ///< scale factor resample matrix
 223     int16_t          subwoofer_cutoffs[WMAPRO_BLOCK_SIZES]; ///< subwoofer cutoff values
 224
 225     /* packet decode state */
 226     GetBitContext    pgb;                           ///< bitstream reader context for the packet
 227     uint8_t          packet_offset;                 ///< frame offset in the packet
 228     uint8_t          packet_sequence_number;        ///< current packet number
 229     int              num_saved_bits;                ///< saved number of bits
 230     int              frame_offset;                  ///< frame offset in the bit reservoir
 231     int              subframe_offset;               ///< subframe offset in the bit reservoir
 232     uint8_t          packet_loss;                   ///< set in case of bitstream error
 233     uint8_t          packet_done;                   ///< set when a packet is fully decoded
 234
 235     /* frame decode state */
 236     uint32_t         frame_num;                     ///< current frame number
 237     GetBitContext    gb;                            ///< bitstream reader context
 238     int              buf_bit_size;                  ///< buffer size in bits
 239     int32_t*         samples;
 240     int32_t*         samples_end;                   ///< maximum samplebuffer pointer
 241     uint8_t          drc_gain;                      ///< gain for the DRC tool
 242     int8_t           skip_frame;                    ///< skip output step
 243     int8_t           parsed_all_subframes;          ///< all subframes decoded?
 244
 245     /* subframe/block decode state */
 246     int16_t          subframe_len;                  ///< current subframe length
 247     int8_t           channels_for_cur_subframe;     ///< number of channels that contain the subframe
 248     int8_t           channel_indexes_for_cur_subframe[WMAPRO_MAX_CHANNELS];
 249     int8_t           num_bands;                     ///< number of scale factor bands
 250     int16_t*         cur_sfb_offsets;               ///< sfb offsets for the current block
 251     uint8_t          table_idx;                     ///< index for the num_sfb, sfb_offsets, sf_offsets and subwoofer_cutoffs tables
 252     int8_t           esc_len;                       ///< length of escaped coefficients
 253
 254     uint8_t          num_chgroups;                  ///< number of channel groups
 255     WMAProChannelGrp chgroup[WMAPRO_MAX_CHANNELS];  ///< channel group information
 256
 257     WMAProChannelCtx channel[WMAPRO_MAX_CHANNELS];  ///< per channel data
 258 } WMAProDecodeCtx;
 259
 260 /* static decode context, to avoid malloc */
 261 static WMAProDecodeCtx globWMAProDecCtx;
 262
 263 /**
 264  *@brief helper function to print the most important members of the context
 265  *@param s context
 266  */
 267 #ifdef WMAPRO_DUMP_CTX_EN
 268 static void  dump_context(WMAProDecodeCtx *s)
 269 {
 270 #define PRINT(a, b)     printf(" %s = %d\n", a, b);
 271 #define PRINT_HEX(a, b) printf(" %s = %x\n", a, b);
 272
 273     PRINT("ed sample bit depth", s->bits_per_sample);
 274     PRINT_HEX("ed decode flags", s->decode_flags);
 275     PRINT("samples per frame",   s->samples_per_frame);
 276     PRINT("log2 frame size",     s->log2_frame_size);
 277     PRINT("max num subframes",   s->max_num_subframes);
 278     PRINT("len prefix",          s->len_prefix);
 279     PRINT("num channels",        s->num_channels);
 280 }
 281 #endif
 282
 283 /**
 284  *@brief Initialize the decoder.
 285  *@param avctx codec context
 286  *@return 0 on success, -1 otherwise
 287  */
 288 int decode_init(asf_waveformatex_t *wfx)
 289 {
 290     memset(&globWMAProDecCtx, 0, sizeof(WMAProDecodeCtx));
 291     WMAProDecodeCtx *s = &globWMAProDecCtx;
 292     uint8_t *edata_ptr = wfx->data;
 293     unsigned int channel_mask;
 294     int i;
 295     int log2_max_num_subframes;
 296     int num_possible_block_sizes;
 297
 298     /* Use globally defined array. Allows IRAM usage for models with large IRAM. */
 299     s->tmp = g_tmp;
 300
 301     /* Use globally defined arrays. Allows IRAM usage for up to 2 channels. */
 302     s->channel[0].out = g_out_ch0;
 303     s->channel[1].out = g_out_ch1;
 304     for (i=2; i<WMAPRO_MAX_CHANNELS; ++i)
 305         s->channel[i].out = g_out_multichannel[i-2];
 306
 307 #if defined(CPU_COLDFIRE)
 308     coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE);
 309 #endif
 310
 311     init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
 312
 313     if (wfx->datalen >= 18) {
 314         s->decode_flags    = AV_RL16(edata_ptr+14);
 315         channel_mask       = AV_RL32(edata_ptr+2);
 316         s->bits_per_sample = AV_RL16(edata_ptr);
 317         /** dump the extradata */
 318         for (i = 0; i < wfx->datalen; i++)
 319             DEBUGF("[%x] ", wfx->data[i]);
 320         DEBUGF("\n");
 321
 322     } else {
 323         DEBUGF("Unknown extradata size\n");
 324         return AVERROR_INVALIDDATA;
 325     }
 326
 327     /** generic init */
 328     s->log2_frame_size = av_log2(wfx->blockalign) + 4;
 329
 330     /** frame info */
 331     s->skip_frame  = 1; /** skip first frame */
 332     s->packet_loss = 1;
 333     s->len_prefix  = (s->decode_flags & 0x40);
 334
 335     if (!s->len_prefix) {
 336         DEBUGF("no length prefix\n");
 337         return AVERROR_INVALIDDATA;
 338     }
 339
 340     /** get frame len */
 341     s->samples_per_frame = 1 << ff_wma_get_frame_len_bits(wfx->rate,
 342                                                           3, s->decode_flags);
 343
 344     /** init previous block len */
 345     for (i = 0; i < wfx->channels; i++)
 346         s->channel[i].prev_block_len = s->samples_per_frame;
 347
 348     /** subframe info */
 349     log2_max_num_subframes       = ((s->decode_flags & 0x38) >> 3);
 350     s->max_num_subframes         = 1 << log2_max_num_subframes;
 351     if (s->max_num_subframes == 16)
 352         s->max_subframe_len_bit = 1;
 353     s->subframe_len_bits = av_log2(log2_max_num_subframes) + 1;
 354
 355     num_possible_block_sizes     = log2_max_num_subframes + 1;
 356     s->min_samples_per_subframe  = s->samples_per_frame / s->max_num_subframes;
 357     s->dynamic_range_compression = (s->decode_flags & 0x80);
 358
 359     if (s->max_num_subframes > MAX_SUBFRAMES) {
 360         DEBUGF("invalid number of subframes %i\n",
 361                s->max_num_subframes);
 362         return AVERROR_INVALIDDATA;
 363     }
 364
 365     s->num_channels = wfx->channels;
 366
 367     /** extract lfe channel position */
 368     s->lfe_channel = -1;
 369
 370     if (channel_mask & 8) {
 371         unsigned int mask;
 372         for (mask = 1; mask < 16; mask <<= 1) {
 373             if (channel_mask & mask)
 374                 ++s->lfe_channel;
 375         }
 376     }
 377
 378     if (s->num_channels < 0) {
 379         DEBUGF("invalid number of channels %d\n", s->num_channels);
 380         return AVERROR_INVALIDDATA;
 381     } else if (s->num_channels > WMAPRO_MAX_CHANNELS) {
 382         DEBUGF("unsupported number of channels\n");
 383         return AVERROR_PATCHWELCOME;
 384     }
 385
 386     INIT_VLC_STATIC(&sf_vlc, SCALEVLCBITS, HUFF_SCALE_SIZE,
 387                     scale_huffbits, 1, 1,
 388                     scale_huffcodes, 2, 2, 616);
 389
 390     INIT_VLC_STATIC(&sf_rl_vlc, VLCBITS, HUFF_SCALE_RL_SIZE,
 391                     scale_rl_huffbits, 1, 1,
 392                     scale_rl_huffcodes, 4, 4, 1406);
 393
 394     INIT_VLC_STATIC(&coef_vlc[0], VLCBITS, HUFF_COEF0_SIZE,
 395                     coef0_huffbits, 1, 1,
 396                     coef0_huffcodes, 4, 4, 2108);
 397
 398     INIT_VLC_STATIC(&coef_vlc[1], VLCBITS, HUFF_COEF1_SIZE,
 399                     coef1_huffbits, 1, 1,
 400                     coef1_huffcodes, 4, 4, 3912);
 401
 402     INIT_VLC_STATIC(&vec4_vlc, VLCBITS, HUFF_VEC4_SIZE,
 403                     vec4_huffbits, 1, 1,
 404                     vec4_huffcodes, 2, 2, 604);
 405
 406     INIT_VLC_STATIC(&vec2_vlc, VLCBITS, HUFF_VEC2_SIZE,
 407                     vec2_huffbits, 1, 1,
 408                     vec2_huffcodes, 2, 2, 562);
 409
 410     INIT_VLC_STATIC(&vec1_vlc, VLCBITS, HUFF_VEC1_SIZE,
 411                     vec1_huffbits, 1, 1,
 412                     vec1_huffcodes, 2, 2, 562);
 413
 414     /** calculate number of scale factor bands and their offsets
 415         for every possible block size */
 416     for (i = 0; i < num_possible_block_sizes; i++) {
 417         int subframe_len = s->samples_per_frame >> i;
 418         int x;
 419         int band = 1;
 420
 421         s->sfb_offsets[i][0] = 0;
 422
 423         for (x = 0; x < MAX_BANDS-1 && s->sfb_offsets[i][band - 1] < subframe_len; x++) {
 424             int offset = (subframe_len * 2 * critical_freq[x])
 425                           / wfx->rate + 2;
 426             offset &= ~3;
 427             if (offset > s->sfb_offsets[i][band - 1])
 428                 s->sfb_offsets[i][band++] = offset;
 429         }
 430         s->sfb_offsets[i][band - 1] = subframe_len;
 431         s->num_sfb[i]               = band - 1;
 432     }
 433
 434
 435     /** Scale factors can be shared between blocks of different size
 436         as every block has a different scale factor band layout.
 437         The matrix sf_offsets is needed to find the correct scale factor.
 438      */
 439
 440     for (i = 0; i < num_possible_block_sizes; i++) {
 441         int b;
 442         for (b = 0; b < s->num_sfb[i]; b++) {
 443             int x;
 444             int offset = ((s->sfb_offsets[i][b]
 445                            + s->sfb_offsets[i][b + 1] - 1) << i) >> 1;
 446             for (x = 0; x < num_possible_block_sizes; x++) {
 447                 int v = 0;
 448                 while (s->sfb_offsets[x][v + 1] << x < offset)
 449                     ++v;
 450                 s->sf_offsets[i][x][b] = v;
 451             }
 452         }
 453     }
 454
 455     /** calculate subwoofer cutoff values */
 456     for (i = 0; i < num_possible_block_sizes; i++) {
 457         int block_size = s->samples_per_frame >> i;
 458         int cutoff = (440*block_size + 3 * (wfx->rate >> 1) - 1)
 459                      / wfx->rate;
 460         s->subwoofer_cutoffs[i] = av_clip(cutoff, 4, block_size);
 461     }
 462
 463 #if 0
 464     /** calculate sine values for the decorrelation matrix */
 465     for (i = 0; i < 33; i++)
 466         sin64[i] = sin(i*M_PI / 64.0);
 467 #endif
 468
 469 #ifdef WMAPRO_DUMP_CTX_EN
 470     dump_context(s);
 471 #endif
 472     return 0;
 473 }
 474
 475 /**
 476  *@brief Decode the subframe length.
 477  *@param s context
 478  *@param offset sample offset in the frame
 479  *@return decoded subframe length on success, < 0 in case of an error
 480  */
 481 static int decode_subframe_length(WMAProDecodeCtx *s, int offset)
 482 {
 483     int frame_len_shift = 0;
 484     int subframe_len;
 485
 486     /** no need to read from the bitstream when only one length is possible */
 487     if (offset == s->samples_per_frame - s->min_samples_per_subframe)
 488         return s->min_samples_per_subframe;
 489
 490     /** 1 bit indicates if the subframe is of maximum length */
 491     if (s->max_subframe_len_bit) {
 492         if (get_bits1(&s->gb))
 493             frame_len_shift = 1 + get_bits(&s->gb, s->subframe_len_bits-1);
 494     } else
 495         frame_len_shift = get_bits(&s->gb, s->subframe_len_bits);
 496
 497     subframe_len = s->samples_per_frame >> frame_len_shift;
 498
 499     /** sanity check the length */
 500     if (subframe_len < s->min_samples_per_subframe ||
 501         subframe_len > s->samples_per_frame) {
 502         DEBUGF("broken frame: subframe_len %i\n",
 503                subframe_len);
 504         return AVERROR_INVALIDDATA;
 505     }
 506     return subframe_len;
 507 }
 508
 509 /**
 510  *@brief Decode how the data in the frame is split into subframes.
 511  *       Every WMA frame contains the encoded data for a fixed number of
 512  *       samples per channel. The data for every channel might be split
 513  *       into several subframes. This function will reconstruct the list of
 514  *       subframes for every channel.
 515  *
 516  *       If the subframes are not evenly split, the algorithm estimates the
 517  *       channels with the lowest number of total samples.
 518  *       Afterwards, for each of these channels a bit is read from the
 519  *       bitstream that indicates if the channel contains a subframe with the
 520  *       next subframe size that is going to be read from the bitstream or not.
 521  *       If a channel contains such a subframe, the subframe size gets added to
 522  *       the channel's subframe list.
 523  *       The algorithm repeats these steps until the frame is properly divided
 524  *       between the individual channels.
 525  *
 526  *@param s context
 527  *@return 0 on success, < 0 in case of an error
 528  */
 529 static int decode_tilehdr(WMAProDecodeCtx *s)
 530 {
 531     uint16_t num_samples[WMAPRO_MAX_CHANNELS];        /** sum of samples for all currently known subframes of a channel */
 532     uint8_t  contains_subframe[WMAPRO_MAX_CHANNELS];  /** flag indicating if a channel contains the current subframe */
 533     int channels_for_cur_subframe = s->num_channels;  /** number of channels that contain the current subframe */
 534     int fixed_channel_layout = 0;                     /** flag indicating that all channels use the same subframe offsets and sizes */
 535     int min_channel_len = 0;                          /** smallest sum of samples (channels with this length will be processed first) */
 536     int c;
 537
 538     /* Should never consume more than 3073 bits (256 iterations for the
 539      * while loop when always the minimum amount of 128 samples is substracted
 540      * from missing samples in the 8 channel case).
 541      * 1 + BLOCK_MAX_SIZE * MAX_CHANNELS / BLOCK_MIN_SIZE * (MAX_CHANNELS  + 4)
 542      */
 543
 544     /** reset tiling information */
 545     for (c = 0; c < s->num_channels; c++)
 546         s->channel[c].num_subframes = 0;
 547
 548     memset(num_samples, 0, sizeof(num_samples));
 549
 550     if (s->max_num_subframes == 1 || get_bits1(&s->gb))
 551         fixed_channel_layout = 1;
 552
 553     /** loop until the frame data is split between the subframes */
 554     do {
 555         int subframe_len;
 556
 557         /** check which channels contain the subframe */
 558         for (c = 0; c < s->num_channels; c++) {
 559             if (num_samples[c] == min_channel_len) {
 560                 if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
 561                    (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe))
 562                     contains_subframe[c] = 1;
 563                 else
 564                     contains_subframe[c] = get_bits1(&s->gb);
 565             } else
 566                 contains_subframe[c] = 0;
 567         }
 568
 569         /** get subframe length, subframe_len == 0 is not allowed */
 570         if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
 571             return AVERROR_INVALIDDATA;
 572
 573         /** add subframes to the individual channels and find new min_channel_len */
 574         min_channel_len += subframe_len;
 575         for (c = 0; c < s->num_channels; c++) {
 576             WMAProChannelCtx* chan = &s->channel[c];
 577
 578             if (contains_subframe[c]) {
 579                 if (chan->num_subframes >= MAX_SUBFRAMES) {
 580                     DEBUGF("broken frame: num subframes > 31\n");
 581                     return AVERROR_INVALIDDATA;
 582                 }
 583                 chan->subframe_len[chan->num_subframes] = subframe_len;
 584                 num_samples[c] += subframe_len;
 585                 ++chan->num_subframes;
 586                 if (num_samples[c] > s->samples_per_frame) {
 587                     DEBUGF("broken frame: "
 588                            "channel len > samples_per_frame\n");
 589                     return AVERROR_INVALIDDATA;
 590                 }
 591             } else if (num_samples[c] <= min_channel_len) {
 592                 if (num_samples[c] < min_channel_len) {
 593                     channels_for_cur_subframe = 0;
 594                     min_channel_len = num_samples[c];
 595                 }
 596                 ++channels_for_cur_subframe;
 597             }
 598         }
 599     } while (min_channel_len < s->samples_per_frame);
 600
 601     for (c = 0; c < s->num_channels; c++) {
 602         int i;
 603         int offset = 0;
 604         for (i = 0; i < s->channel[c].num_subframes; i++) {
 605             DEBUGF("frame[%i] channel[%i] subframe[%i]"
 606                     " len %i\n", s->frame_num, c, i,
 607                     s->channel[c].subframe_len[i]);
 608             s->channel[c].subframe_offset[i] = offset;
 609             offset += s->channel[c].subframe_len[i];
 610         }
 611     }
 612
 613     return 0;
 614 }
 615
 616 #if 0
 617 /**
 618  *@brief Calculate a decorrelation matrix from the bitstream parameters.
 619  *@param s codec context
 620  *@param chgroup channel group for which the matrix needs to be calculated
 621  */
 622 static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
 623                                         WMAProChannelGrp *chgroup)
 624 {
 625     int i;
 626     int offset = 0;
 627     int8_t rotation_offset[WMAPRO_MAX_CHANNELS * WMAPRO_MAX_CHANNELS];
 628     memset(chgroup->decorrelation_matrix, 0, s->num_channels *
 629            s->num_channels * sizeof(*chgroup->decorrelation_matrix));
 630
 631     for (i = 0; i < chgroup->num_channels * (chgroup->num_channels - 1) >> 1; i++)
 632         rotation_offset[i] = get_bits(&s->gb, 6);
 633
 634     for (i = 0; i < chgroup->num_channels; i++) {
 635         chgroup->decorrelation_matrix[chgroup->num_channels * i + i] =
 636             get_bits1(&s->gb) ? 1.0 : -1.0;
 637
 638         if(chgroup->decorrelation_matrix[chgroup->num_channels * i + i] > 0)
 639             chgroup->fixdecorrelation_matrix[chgroup->num_channels * i + i] =  0x10000;
 640         else
 641             chgroup->fixdecorrelation_matrix[chgroup->num_channels * i + i] = -0x10000;
 642     }
 643
 644     for (i = 1; i < chgroup->num_channels; i++) {
 645         int x;
 646         for (x = 0; x < i; x++) {
 647             int y;
 648             for (y = 0; y < i + 1; y++) {
 649                 float v1 = chgroup->decorrelation_matrix[x * chgroup->num_channels + y];
 650                 float v2 = chgroup->decorrelation_matrix[i * chgroup->num_channels + y];
 651                 int32_t f1 = chgroup->fixdecorrelation_matrix[x * chgroup->num_channels + y];
 652                 int32_t f2 = chgroup->fixdecorrelation_matrix[i * chgroup->num_channels + y];
 653                 int n = rotation_offset[offset + x];
 654                 float sinv;
 655                 float cosv;
 656                 int32_t fixsinv;
 657                 int32_t fixcosv;
 658
 659                 if (n < 32) {
 660                     sinv = sin64[n];
 661                     cosv = sin64[32 - n];
 662                     fixsinv = fixed_sin64[n];
 663                     fixcosv = fixed_sin64[32-n];
 664                 } else {
 665                     sinv =  sin64[64 -  n];
 666                     cosv = -sin64[n  - 32];
 667                     fixsinv = fixed_sin64[64-n];
 668                     fixcosv = -fixed_sin64[n-32];
 669                 }
 670
 671                 chgroup->decorrelation_matrix[y + x * chgroup->num_channels] =
 672                                                (v1 * sinv) - (v2 * cosv);
 673                 chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =
 674                                                (v1 * cosv) + (v2 * sinv);
 675                 chgroup->fixdecorrelation_matrix[y + x * chgroup->num_channels] =
 676                                                fixmul31(f1, fixsinv) - fixmul31(f2, fixcosv);
 677                 chgroup->fixdecorrelation_matrix[y + i * chgroup->num_channels] =
 678                                                fixmul31(f1, fixcosv) + fixmul31(f2, fixsinv);
 679
 680             }
 681         }
 682         offset += i;
 683     }
 684 }
 685 #endif
 686
 687 /**
 688  *@brief Decode channel transformation parameters
 689  *@param s codec context
 690  *@return 0 in case of success, < 0 in case of bitstream errors
 691  */
 692 static int decode_channel_transform(WMAProDecodeCtx* s)
 693 {
 694     int i;
 695     /* should never consume more than 1921 bits for the 8 channel case
 696      * 1 + MAX_CHANNELS * (MAX_CHANNELS + 2 + 3 * MAX_CHANNELS * MAX_CHANNELS
 697      * + MAX_CHANNELS + MAX_BANDS + 1)
 698      */
 699
 700     /** in the one channel case channel transforms are pointless */
 701     s->num_chgroups = 0;
 702     if (s->num_channels > 1) {
 703         int remaining_channels = s->channels_for_cur_subframe;
 704
 705         if (get_bits1(&s->gb)) {
 706                                  DEBUGF("unsupported channel transform bit\n");
 707             return AVERROR_INVALIDDATA;
 708         }
 709
 710         for (s->num_chgroups = 0; remaining_channels &&
 711              s->num_chgroups < s->channels_for_cur_subframe; s->num_chgroups++) {
 712             WMAProChannelGrp* chgroup = &s->chgroup[s->num_chgroups];
 713             int32_t** channel_data = chgroup->channel_data;
 714             chgroup->num_channels = 0;
 715             chgroup->transform = 0;
 716
 717             /** decode channel mask */
 718             if (remaining_channels > 2) {
 719                 for (i = 0; i < s->channels_for_cur_subframe; i++) {
 720                     int channel_idx = s->channel_indexes_for_cur_subframe[i];
 721                     if (!s->channel[channel_idx].grouped
 722                         && get_bits1(&s->gb)) {
 723                         ++chgroup->num_channels;
 724                         s->channel[channel_idx].grouped = 1;
 725                         *channel_data++    = s->channel[channel_idx].coeffs;
 726                     }
 727                 }
 728             } else {
 729                 chgroup->num_channels = remaining_channels;
 730                 for (i = 0; i < s->channels_for_cur_subframe; i++) {
 731                     int channel_idx = s->channel_indexes_for_cur_subframe[i];
 732                     if (!s->channel[channel_idx].grouped)
 733                         *channel_data++    = s->channel[channel_idx].coeffs;
 734                     s->channel[channel_idx].grouped = 1;
 735                 }
 736             }
 737
 738             /** decode transform type */
 739             if (chgroup->num_channels == 2) {
 740                 if (get_bits1(&s->gb)) {
 741                     if (get_bits1(&s->gb)) {
 742                         DEBUGF("unsupported channel transform type\n");
 743                     }
 744                 } else {
 745                     chgroup->transform = 1;
 746                     if (s->num_channels == 2) {
 747                         chgroup->fixdecorrelation_matrix[0] =  0x10000;
 748                         chgroup->fixdecorrelation_matrix[1] = -0x10000;
 749                         chgroup->fixdecorrelation_matrix[2] =  0x10000;
 750                         chgroup->fixdecorrelation_matrix[3] =  0x10000;
 751                     } else {
 752                         /** cos(pi/4) */
 753                         chgroup->fixdecorrelation_matrix[0] =  0xB500;
 754                         chgroup->fixdecorrelation_matrix[1] = -0xB500;
 755                         chgroup->fixdecorrelation_matrix[2] =  0xB500;
 756                         chgroup->fixdecorrelation_matrix[3] =  0xB500;
 757                     }
 758                 }
 759             } else if (chgroup->num_channels > 2) {
 760                 DEBUGF("in wmaprodec.c: Multichannel streams still not supported\n");
 761                 return -1;
 762 #if 0
 763                 if (get_bits1(&s->gb)) {
 764                     chgroup->transform = 1;
 765                     if (get_bits1(&s->gb)) {
 766                         decode_decorrelation_matrix(s, chgroup);
 767                     } else {
 768                         /** FIXME: more than 6 coupled channels not supported */
 769                         if (chgroup->num_channels > 6) {
 770                             av_log_ask_for_sample(s->avctx,
 771                                                   "coupled channels > 6\n");
 772                         } else {
 773                             memcpy(chgroup->decorrelation_matrix,
 774                                    default_decorrelation[chgroup->num_channels],
 775                                    chgroup->num_channels * chgroup->num_channels *
 776                                    sizeof(*chgroup->decorrelation_matrix));
 777                         }
 778                     }
 779                 }
 780 #endif
 781             }
 782
 783             /** decode transform on / off */
 784             if (chgroup->transform) {
 785                 if (!get_bits1(&s->gb)) {
 786                     int i;
 787                     /** transform can be enabled for individual bands */
 788                     for (i = 0; i < s->num_bands; i++) {
 789                         chgroup->transform_band[i] = get_bits1(&s->gb);
 790                     }
 791                 } else {
 792                     memset(chgroup->transform_band, 1, s->num_bands);
 793                 }
 794             }
 795             remaining_channels -= chgroup->num_channels;
 796         }
 797     }
 798     return 0;
 799 }
 800
 801 /**
 802  *@brief Extract the coefficients from the bitstream.
 803  *@param s codec context
 804  *@param c current channel number
 805  *@return 0 on success, < 0 in case of bitstream errors
 806  */
 807 static int decode_coeffs(WMAProDecodeCtx *s, int c)
 808 {
 809     int vlctable;
 810     VLC* vlc;
 811     WMAProChannelCtx* ci = &s->channel[c];
 812     int rl_mode = 0;
 813     int cur_coeff = 0;
 814     int num_zeros = 0;
 815     const uint16_t* run;
 816     const int32_t* level;
 817
 818     DEBUGF("decode coefficients for channel %i\n", c);
 819
 820     vlctable = get_bits1(&s->gb);
 821     vlc = &coef_vlc[vlctable];
 822
 823     if (vlctable) {
 824         run = coef1_run;
 825         level = coef1_level;
 826     } else {
 827         run = coef0_run;
 828         level = coef0_level;
 829     }
 830
 831     /** decode vector coefficients (consumes up to 167 bits per iteration for
 832       4 vector coded large values) */
 833     while (!rl_mode && cur_coeff + 3 < s->subframe_len) {
 834         int32_t vals[4];
 835         int i;
 836         unsigned int idx;
 837
 838         idx = get_vlc2(&s->gb, vec4_vlc.table, VLCBITS, VEC4MAXDEPTH);
 839
 840         if (idx == HUFF_VEC4_SIZE - 1) {
 841             for (i = 0; i < 4; i += 2) {
 842                 idx = get_vlc2(&s->gb, vec2_vlc.table, VLCBITS, VEC2MAXDEPTH);
 843                 if (idx == HUFF_VEC2_SIZE - 1) {
 844                     int v0, v1;
 845                     v0 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
 846                     if (v0 == HUFF_VEC1_SIZE - 1)
 847                         v0 += ff_wma_get_large_val(&s->gb);
 848                     v1 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
 849                     if (v1 == HUFF_VEC1_SIZE - 1)
 850                         v1 += ff_wma_get_large_val(&s->gb);
 851
 852                     vals[i] = v0;
 853                     vals[i+1] = v1;
 854                 } else {
 855                     vals[i] = symbol_to_vec2[idx] >> 4;
 856                     vals[i+1] = symbol_to_vec2[idx] & 0xF;
 857                 }
 858             }
 859         } else {
 860             vals[0] = symbol_to_vec4[idx] >> 12;
 861             vals[1] = (symbol_to_vec4[idx] >> 8) & 0xF;
 862             vals[2] = (symbol_to_vec4[idx] >> 4) & 0xF;
 863             vals[3] = symbol_to_vec4[idx] & 0xF;
 864         }
 865
 866         /** decode sign */
 867         for (i = 0; i < 4; i++) {
 868             if (vals[i]) {
 869                 int sign = get_bits1(&s->gb) - 1;
 870                 ci->coeffs[cur_coeff] = (sign == -1)? -vals[i]<<16 : vals[i]<<16;
 871                 num_zeros = 0;
 872             } else {
 873                 ci->coeffs[cur_coeff] = 0;
 874                 /** switch to run level mode when subframe_len / 128 zeros
 875                     were found in a row */
 876                 rl_mode |= (++num_zeros > s->subframe_len >> 8);
 877             }
 878             ++cur_coeff;
 879         }
 880     }
 881
 882     /** decode run level coded coefficients */
 883     if (rl_mode) {
 884         memset(&ci->coeffs[cur_coeff], 0,
 885                sizeof(*ci->coeffs) * (s->subframe_len - cur_coeff));
 886
 887         if (ff_wma_run_level_decode(&s->gb, vlc,
 888                                     level, run, 1, ci->coeffs,
 889                                     cur_coeff, s->subframe_len,
 890                                     s->subframe_len, s->esc_len, 0))
 891            return AVERROR_INVALIDDATA;
 892
 893     }
 894     return 0;
 895 }
 896
 897 /**
 898  *@brief Extract scale factors from the bitstream.
 899  *@param s codec context
 900  *@return 0 on success, < 0 in case of bitstream errors
 901  */
 902 static int decode_scale_factors(WMAProDecodeCtx* s)
 903 {
 904     int i;
 905
 906     /** should never consume more than 5344 bits
 907      *  MAX_CHANNELS * (1 +  MAX_BANDS * 23)
 908      */
 909
 910     for (i = 0; i < s->channels_for_cur_subframe; i++) {
 911         int c = s->channel_indexes_for_cur_subframe[i];
 912         int* sf;
 913         int* sf_end;
 914         s->channel[c].scale_factors = s->channel[c].saved_scale_factors[!s->channel[c].scale_factor_idx];
 915         sf_end = s->channel[c].scale_factors + s->num_bands;
 916
 917         /** resample scale factors for the new block size
 918          *  as the scale factors might need to be resampled several times
 919          *  before some  new values are transmitted, a backup of the last
 920          *  transmitted scale factors is kept in saved_scale_factors
 921          */
 922         if (s->channel[c].reuse_sf) {
 923             const int8_t* sf_offsets = s->sf_offsets[s->table_idx][s->channel[c].table_idx];
 924             int b;
 925             for (b = 0; b < s->num_bands; b++)
 926                 s->channel[c].scale_factors[b] =
 927                     s->channel[c].saved_scale_factors[s->channel[c].scale_factor_idx][*sf_offsets++];
 928         }
 929
 930         if (!s->channel[c].cur_subframe || get_bits1(&s->gb)) {
 931
 932             if (!s->channel[c].reuse_sf) {
 933                 int val;
 934                 /** decode DPCM coded scale factors */
 935                 s->channel[c].scale_factor_step = get_bits(&s->gb, 2) + 1;
 936                 val = 45 / s->channel[c].scale_factor_step;
 937                 for (sf = s->channel[c].scale_factors; sf < sf_end; sf++) {
 938                     val += get_vlc2(&s->gb, sf_vlc.table, SCALEVLCBITS, SCALEMAXDEPTH) - 60;
 939                     *sf = val;
 940                 }
 941             } else {
 942                 int i;
 943                 /** run level decode differences to the resampled factors */
 944                 for (i = 0; i < s->num_bands; i++) {
 945                     int idx;
 946                     int skip;
 947                     int val;
 948                     int sign;
 949
 950                     idx = get_vlc2(&s->gb, sf_rl_vlc.table, VLCBITS, SCALERLMAXDEPTH);
 951
 952                     if (!idx) {
 953                         uint32_t code = get_bits(&s->gb, 14);
 954                         val  =  code >> 6;
 955                         sign = (code & 1) - 1;
 956                         skip = (code & 0x3f) >> 1;
 957                     } else if (idx == 1) {
 958                         break;
 959                     } else {
 960                         skip = scale_rl_run[idx];
 961                         val  = scale_rl_level[idx];
 962                         sign = get_bits1(&s->gb)-1;
 963                     }
 964
 965                     i += skip;
 966                     if (i >= s->num_bands) {
 967                            DEBUGF("invalid scale factor coding\n");
 968                         return AVERROR_INVALIDDATA;
 969                     }
 970                     s->channel[c].scale_factors[i] += (val ^ sign) - sign;
 971                 }
 972             }
 973
 974             /** swap buffers */
 975             s->channel[c].scale_factor_idx = !s->channel[c].scale_factor_idx;
 976             s->channel[c].table_idx = s->table_idx;
 977             s->channel[c].reuse_sf  = 1;
 978         }
 979
 980         /** calculate new scale factor maximum */
 981         s->channel[c].max_scale_factor = s->channel[c].scale_factors[0];
 982         for (sf = s->channel[c].scale_factors + 1; sf < sf_end; sf++) {
 983             s->channel[c].max_scale_factor =
 984                 FFMAX(s->channel[c].max_scale_factor, *sf);
 985         }
 986
 987     }
 988     return 0;
 989 }
 990
 991 /**
 992  *@brief Reconstruct the individual channel data.
 993  *@param s codec context
 994  */
 995 static void inverse_channel_transform(WMAProDecodeCtx *s)
 996 {
 997     int i;
 998
 999     for (i = 0; i < s->num_chgroups; i++) {
1000         if (s->chgroup[i].transform) {
1001             const int num_channels = s->chgroup[i].num_channels;
1002             int32_t data[WMAPRO_MAX_CHANNELS];
1003             int32_t** ch_data = s->chgroup[i].channel_data;
1004             int32_t** ch_end = ch_data + num_channels;
1005             const int8_t* tb = s->chgroup[i].transform_band;
1006             int16_t* sfb;
1007
1008             /** multichannel decorrelation */
1009             for (sfb = s->cur_sfb_offsets;
1010                  sfb < s->cur_sfb_offsets + s->num_bands; sfb++) {
1011                 int y;
1012                 if (*tb++ == 1) {
1013                     /** multiply values with the decorrelation_matrix */
1014                     for (y = sfb[0]; y < FFMIN(sfb[1], s->subframe_len); y++) {
1015                         const int32_t* mat = s->chgroup[i].fixdecorrelation_matrix;
1016                         const int32_t* data_end = data + num_channels;
1017                         int32_t* data_ptr = data;
1018                         int32_t** ch;
1019
1020                         for (ch = ch_data; ch < ch_end; ch++)
1021                             *data_ptr++ = (*ch)[y];
1022
1023                         for (ch = ch_data; ch < ch_end; ch++) {
1024                             int32_t sum = 0;
1025                             data_ptr = data;
1026
1027                             while (data_ptr < data_end)
1028                                 sum += fixmul16(*data_ptr++, *mat++);
1029
1030                             (*ch)[y] = sum;
1031                         }
1032                     }
1033                 } else if (s->num_channels == 2) {
1034
1035                     /* Scale with sqrt(2). 0x016A09E6 = (sqrt(2)*(1<<24)) */
1036                     int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
1037                     vector_fixmul_scalar(ch_data[0] + sfb[0],
1038                                          ch_data[0] + sfb[0],
1039                                          0x016A09E6, len);
1040                     vector_fixmul_scalar(ch_data[1] + sfb[0],
1041                                          ch_data[1] + sfb[0],
1042                                          0x016A09E6, len);
1043
1044                 }
1045             }
1046         }
1047     }
1048 }
1049
1050 /**
1051  *@brief Apply sine window and reconstruct the output buffer.
1052  *@param s codec context
1053  */
1054 static void wmapro_window(WMAProDecodeCtx *s)
1055 {
1056     int i;
1057
1058     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1059         int c = s->channel_indexes_for_cur_subframe[i];
1060         const int32_t* window;
1061         int winlen = s->channel[c].prev_block_len;
1062         int32_t *xstart= s->channel[c].coeffs - (winlen >> 1);
1063
1064         if (s->subframe_len < winlen) {
1065             xstart += (winlen - s->subframe_len) >> 1;
1066             winlen = s->subframe_len;
1067         }
1068
1069         window = sine_windows[av_log2(winlen) - BLOCK_MIN_BITS];
1070
1071         winlen >>= 1;
1072
1073         vector_fixmul_window(xstart, xstart, xstart + winlen,
1074                                   window, winlen);
1075
1076         s->channel[c].prev_block_len = s->subframe_len;
1077
1078     }
1079 }
1080
1081 /**
1082  *@brief Decode a single subframe (block).
1083  *@param s codec context
1084  *@return 0 on success, < 0 when decoding failed
1085  */
1086 static int decode_subframe(WMAProDecodeCtx *s)
1087 {
1088     int offset = s->samples_per_frame;
1089     int subframe_len = s->samples_per_frame;
1090     int i;
1091     int total_samples   = s->samples_per_frame * s->num_channels;
1092     int transmit_coeffs = 0;
1093     int cur_subwoofer_cutoff;
1094
1095     s->subframe_offset = get_bits_count(&s->gb);
1096
1097     /** reset channel context and find the next block offset and size
1098         == the next block of the channel with the smallest number of
1099         decoded samples
1100     */
1101     for (i = 0; i < s->num_channels; i++) {
1102         s->channel[i].grouped = 0;
1103         if (offset > s->channel[i].decoded_samples) {
1104             offset = s->channel[i].decoded_samples;
1105             subframe_len =
1106                 s->channel[i].subframe_len[s->channel[i].cur_subframe];
1107         }
1108     }
1109
1110     DEBUGF("processing subframe with offset %i len %i\n", offset, subframe_len);
1111
1112     /** get a list of all channels that contain the estimated block */
1113     s->channels_for_cur_subframe = 0;
1114     for (i = 0; i < s->num_channels; i++) {
1115         const int cur_subframe = s->channel[i].cur_subframe;
1116         /** substract already processed samples */
1117         total_samples -= s->channel[i].decoded_samples;
1118
1119         /** and count if there are multiple subframes that match our profile */
1120         if (offset == s->channel[i].decoded_samples &&
1121             subframe_len == s->channel[i].subframe_len[cur_subframe]) {
1122             total_samples -= s->channel[i].subframe_len[cur_subframe];
1123             s->channel[i].decoded_samples +=
1124                 s->channel[i].subframe_len[cur_subframe];
1125             s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
1126             ++s->channels_for_cur_subframe;
1127         }
1128     }
1129
1130     /** check if the frame will be complete after processing the
1131         estimated block */
1132     if (!total_samples)
1133         s->parsed_all_subframes = 1;
1134
1135
1136     DEBUGF("subframe is part of %i channels\n", s->channels_for_cur_subframe);
1137
1138     /** calculate number of scale factor bands and their offsets */
1139     s->table_idx         = av_log2(s->samples_per_frame/subframe_len);
1140     s->num_bands         = s->num_sfb[s->table_idx];
1141     s->cur_sfb_offsets   = s->sfb_offsets[s->table_idx];
1142     cur_subwoofer_cutoff = s->subwoofer_cutoffs[s->table_idx];
1143
1144     /** configure the decoder for the current subframe */
1145     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1146         int c = s->channel_indexes_for_cur_subframe[i];
1147
1148         s->channel[c].coeffs = &s->channel[c].out[(s->samples_per_frame >> 1)
1149                                                   + offset];
1150     }
1151
1152     s->subframe_len = subframe_len;
1153     s->esc_len = av_log2(s->subframe_len - 1) + 1;
1154
1155     /** skip extended header if any */
1156     if (get_bits1(&s->gb)) {
1157         int num_fill_bits;
1158         if (!(num_fill_bits = get_bits(&s->gb, 2))) {
1159             int len = get_bits(&s->gb, 4);
1160             num_fill_bits = get_bits(&s->gb, len) + 1;
1161         }
1162
1163         if (num_fill_bits >= 0) {
1164             if (get_bits_count(&s->gb) + num_fill_bits > s->num_saved_bits) {
1165                 DEBUGF("invalid number of fill bits\n");
1166                 return AVERROR_INVALIDDATA;
1167             }
1168
1169             skip_bits_long(&s->gb, num_fill_bits);
1170         }
1171     }
1172
1173     /** no idea for what the following bit is used */
1174     if (get_bits1(&s->gb)) {
1175         DEBUGF("reserved bit set\n");
1176         return AVERROR_INVALIDDATA;
1177     }
1178
1179     if (decode_channel_transform(s) < 0)
1180         return AVERROR_INVALIDDATA;
1181
1182     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1183         int c = s->channel_indexes_for_cur_subframe[i];
1184         if ((s->channel[c].transmit_coefs = get_bits1(&s->gb)))
1185             transmit_coeffs = 1;
1186     }
1187
1188     if (transmit_coeffs) {
1189         int step;
1190         int quant_step = 90 * s->bits_per_sample >> 4;
1191         if ((get_bits1(&s->gb))) {
1192             /** FIXME: might change run level mode decision */
1193             DEBUGF("unsupported quant step coding\n");
1194             return AVERROR_INVALIDDATA;
1195         }
1196         /** decode quantization step */
1197         step = get_sbits(&s->gb, 6);
1198         quant_step += step;
1199         if (step == -32 || step == 31) {
1200             const int sign = (step == 31) - 1;
1201             int quant = 0;
1202             while (get_bits_count(&s->gb) + 5 < s->num_saved_bits &&
1203                    (step = get_bits(&s->gb, 5)) == 31) {
1204                 quant += 31;
1205             }
1206             quant_step += ((quant + step) ^ sign) - sign;
1207         }
1208         if (quant_step < 0) {
1209             DEBUGF("negative quant step\n");
1210         }
1211
1212         /** decode quantization step modifiers for every channel */
1213
1214         if (s->channels_for_cur_subframe == 1) {
1215             s->channel[s->channel_indexes_for_cur_subframe[0]].quant_step = quant_step;
1216         } else {
1217             int modifier_len = get_bits(&s->gb, 3);
1218             for (i = 0; i < s->channels_for_cur_subframe; i++) {
1219                 int c = s->channel_indexes_for_cur_subframe[i];
1220                 s->channel[c].quant_step = quant_step;
1221                 if (get_bits1(&s->gb)) {
1222                     if (modifier_len) {
1223                         s->channel[c].quant_step += get_bits(&s->gb, modifier_len) + 1;
1224                     } else
1225                         ++s->channel[c].quant_step;
1226                 }
1227             }
1228         }
1229
1230         /** decode scale factors */
1231         if (decode_scale_factors(s) < 0)
1232             return AVERROR_INVALIDDATA;
1233     }
1234
1235     DEBUGF("BITSTREAM: subframe header length was %i\n",
1236             get_bits_count(&s->gb) - s->subframe_offset);
1237
1238     /** parse coefficients */
1239     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1240         int c = s->channel_indexes_for_cur_subframe[i];
1241         if (s->channel[c].transmit_coefs &&
1242             get_bits_count(&s->gb) < s->num_saved_bits) {
1243             decode_coeffs(s, c);
1244         } else {
1245             memset(s->channel[c].coeffs, 0,
1246                    sizeof(*s->channel[c].coeffs) * subframe_len);
1247         }
1248     }
1249
1250     DEBUGF("BITSTREAM: subframe length was %i\n",
1251             get_bits_count(&s->gb) - s->subframe_offset);
1252
1253     if (transmit_coeffs) {
1254         /** reconstruct the per channel data */
1255         inverse_channel_transform(s);
1256         for (i = 0; i < s->channels_for_cur_subframe; i++) {
1257             int c = s->channel_indexes_for_cur_subframe[i];
1258             const int* sf = s->channel[c].scale_factors;
1259             int b;
1260
1261             if (c == s->lfe_channel)
1262                 memset(&s->tmp[cur_subwoofer_cutoff], 0, sizeof(*s->tmp) *
1263                        (subframe_len - cur_subwoofer_cutoff));
1264
1265             /** inverse quantization and rescaling */
1266             for (b = 0; b < s->num_bands; b++) {
1267                 const int end = FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);
1268                 const int exp = s->channel[c].quant_step -
1269                             (s->channel[c].max_scale_factor - *sf++) *
1270                             s->channel[c].scale_factor_step;
1271
1272                 if(exp < EXP_MIN || exp > EXP_MAX) {
1273                     DEBUGF("in wmaprodec.c : unhandled value for exp (%d), please report sample.\n", exp);
1274                     return -1;
1275                 }
1276                 const int32_t quant = QUANT(exp);
1277                 int start = s->cur_sfb_offsets[b];
1278
1279                 vector_fixmul_scalar(s->tmp+start,
1280                                      s->channel[c].coeffs + start,
1281                                      quant, end-start);
1282
1283
1284             }
1285
1286             /** apply imdct (ff_imdct_half == DCTIV with reverse) */
1287             imdct_half(av_log2(subframe_len)+1,
1288                           s->channel[c].coeffs, s->tmp);
1289
1290         }
1291     }
1292
1293     /** window and overlapp-add */
1294     wmapro_window(s);
1295
1296     /** handled one subframe */
1297     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1298         int c = s->channel_indexes_for_cur_subframe[i];
1299         if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
1300             DEBUGF("broken subframe\n");
1301             return AVERROR_INVALIDDATA;
1302         }
1303         ++s->channel[c].cur_subframe;
1304     }
1305
1306     return 0;
1307 }
1308
1309 /**
1310  *@brief Decode one WMA frame.
1311  *@param s codec context
1312  *@return 0 if the trailer bit indicates that this is the last frame,
1313  *        1 if there are additional frames
1314  */
1315 static int decode_frame(WMAProDecodeCtx *s)
1316 {
1317     GetBitContext* gb = &s->gb;
1318     int more_frames = 0;
1319     int len = 0;
1320     int i;
1321
1322     /** check for potential output buffer overflow */
1323     if (s->num_channels * s->samples_per_frame > s->samples_end - s->samples) {
1324         /** return an error if no frame could be decoded at all */
1325            DEBUGF("not enough space for the output samples\n");
1326         s->packet_loss = 1;
1327         return 0;
1328     }
1329
1330     /** get frame length */
1331     if (s->len_prefix)
1332         len = get_bits(gb, s->log2_frame_size);
1333
1334     DEBUGF("decoding frame with length %x\n", len);
1335
1336     /** decode tile information */
1337     if (decode_tilehdr(s)) {
1338         s->packet_loss = 1;
1339         return 0;
1340     }
1341
1342     /** read postproc transform */
1343     if (s->num_channels > 1 && get_bits1(gb)) {
1344         DEBUGF("Unsupported postproc transform found\n");
1345         s->packet_loss = 1;
1346         return 0;
1347     }
1348
1349     /** read drc info */
1350     if (s->dynamic_range_compression) {
1351         s->drc_gain = get_bits(gb, 8);
1352         DEBUGF("drc_gain %i\n", s->drc_gain);
1353     }
1354
1355     /** no idea what these are for, might be the number of samples
1356         that need to be skipped at the beginning or end of a stream */
1357     if (get_bits1(gb)) {
1358         int skip;
1359
1360         /** usually true for the first frame */
1361         if (get_bits1(gb)) {
1362             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1363             DEBUGF("start skip: %i\n", skip);
1364         }
1365
1366         /** sometimes true for the last frame */
1367         if (get_bits1(gb)) {
1368             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1369             DEBUGF("end skip: %i\n", skip);
1370         }
1371
1372     }
1373
1374     DEBUGF("BITSTREAM: frame header length was %i\n",
1375             get_bits_count(gb) - s->frame_offset);
1376
1377     /** reset subframe states */
1378     s->parsed_all_subframes = 0;
1379     for (i = 0; i < s->num_channels; i++) {
1380         s->channel[i].decoded_samples = 0;
1381         s->channel[i].cur_subframe    = 0;
1382         s->channel[i].reuse_sf        = 0;
1383     }
1384
1385     /** decode all subframes */
1386     while (!s->parsed_all_subframes) {
1387         if (decode_subframe(s) < 0) {
1388             s->packet_loss = 1;
1389             return 0;
1390         }
1391     }
1392
1393     /** interleave samples and write them to the output buffer */
1394     for (i = 0; i < s->num_channels; i++) {
1395         int32_t* ptr  = s->samples + i;
1396         int incr = s->num_channels;
1397         int32_t* iptr = s->channel[i].out;
1398         int32_t* iend = iptr + s->samples_per_frame;
1399
1400         while (iptr < iend) {
1401             *ptr = *iptr++ << 1;
1402             ptr += incr;
1403         }
1404
1405         /** reuse second half of the IMDCT output for the next frame */
1406         memcpy(&s->channel[i].out[0],
1407                &s->channel[i].out[s->samples_per_frame],
1408                s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
1409     }
1410
1411     if (s->skip_frame) {
1412         s->skip_frame = 0;
1413     } else
1414         s->samples += s->num_channels * s->samples_per_frame;
1415
1416     if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
1417         /** FIXME: not sure if this is always an error */
1418         DEBUGF("frame[%i] would have to skip %i bits\n",
1419                (int)s->frame_num, len - (get_bits_count(gb) - s->frame_offset) - 1);
1420         s->packet_loss = 1;
1421         return 0;
1422     }
1423
1424     /** skip the rest of the frame data */
1425     skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
1426
1427     /** decode trailer bit */
1428     more_frames = get_bits1(gb);
1429
1430     ++s->frame_num;
1431     return more_frames;
1432 }
1433
1434 /**
1435  *@brief Calculate remaining input buffer length.
1436  *@param s codec context
1437  *@param gb bitstream reader context
1438  *@return remaining size in bits
1439  */
1440 static int remaining_bits(WMAProDecodeCtx *s, GetBitContext *gb)
1441 {
1442     return s->buf_bit_size - get_bits_count(gb);
1443 }
1444
1445 /**
1446  *@brief Fill the bit reservoir with a (partial) frame.
1447  *@param s codec context
1448  *@param gb bitstream reader context
1449  *@param len length of the partial frame
1450  *@param append decides wether to reset the buffer or not
1451  */
1452 static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
1453                       int append)
1454 {
1455     int buflen;
1456
1457     /** when the frame data does not need to be concatenated, the input buffer
1458         is resetted and additional bits from the previous frame are copyed
1459         and skipped later so that a fast byte copy is possible */
1460
1461     if (!append) {
1462         s->frame_offset = get_bits_count(gb) & 7;
1463         s->num_saved_bits = s->frame_offset;
1464         init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
1465     }
1466
1467     buflen = (s->num_saved_bits + len + 8) >> 3;
1468
1469     if (len <= 0 || buflen > MAX_FRAMESIZE) {
1470         DEBUGF("input buffer too small\n");
1471         s->packet_loss = 1;
1472         return;
1473     }
1474
1475     s->num_saved_bits += len;
1476     if (!append) {
1477         ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
1478                      s->num_saved_bits);
1479     } else {
1480         int align = 8 - (get_bits_count(gb) & 7);
1481         align = FFMIN(align, len);
1482         put_bits(&s->pb, align, get_bits(gb, align));
1483         len -= align;
1484         ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
1485     }
1486     skip_bits_long(gb, len);
1487
1488     {
1489         PutBitContext tmp = s->pb;
1490         flush_put_bits(&tmp);
1491     }
1492
1493     init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
1494     skip_bits(&s->gb, s->frame_offset);
1495 }
1496
1497 /**
1498  *@brief Decode a single WMA packet.
1499  *@param avctx codec context
1500  *@param data the output buffer
1501  *@param data_size number of bytes that were written to the output buffer
1502  *@param avpkt input packet
1503  *@return number of bytes that were read from the input buffer
1504  */
1505 int decode_packet(asf_waveformatex_t *wfx, void *data, int *data_size,
1506                                   void* pktdata, int size)
1507 {
1508     WMAProDecodeCtx *s = &globWMAProDecCtx;
1509     GetBitContext* gb  = &s->pgb;
1510     const uint8_t* buf = pktdata;
1511     int buf_size       = size;
1512     int num_bits_prev_frame;
1513     int packet_sequence_number;
1514
1515     s->samples       = data;
1516     s->samples_end   = (int32_t*)((int8_t*)data + *data_size);
1517     *data_size = 0;
1518
1519     if (s->packet_done || s->packet_loss) {
1520         s->packet_done = 0;
1521         s->buf_bit_size = buf_size << 3;
1522
1523         /** sanity check for the buffer length */
1524         if (buf_size < wfx->blockalign)
1525             return 0;
1526
1527         buf_size = wfx->blockalign;
1528
1529         /** parse packet header */
1530         init_get_bits(gb, buf, s->buf_bit_size);
1531         packet_sequence_number = get_bits(gb, 4);
1532         skip_bits(gb, 2);
1533
1534         /** get number of bits that need to be added to the previous frame */
1535         num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
1536         DEBUGF("packet[%d]: nbpf %x\n", s->frame_num,
1537                 num_bits_prev_frame);
1538
1539         /** check for packet loss */
1540         if (!s->packet_loss &&
1541             ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
1542             s->packet_loss = 1;
1543             DEBUGF("Packet loss detected! seq %x vs %x\n",
1544                    s->packet_sequence_number, packet_sequence_number);
1545         }
1546         s->packet_sequence_number = packet_sequence_number;
1547
1548         if (num_bits_prev_frame > 0) {
1549             /** append the previous frame data to the remaining data from the
1550                 previous packet to create a full frame */
1551             save_bits(s, gb, num_bits_prev_frame, 1);
1552             DEBUGF("accumulated %x bits of frame data\n",
1553                     s->num_saved_bits - s->frame_offset);
1554
1555             /** decode the cross packet frame if it is valid */
1556             if (!s->packet_loss)
1557                 decode_frame(s);
1558         } else if (s->num_saved_bits - s->frame_offset) {
1559             DEBUGF("ignoring %x previously saved bits\n",
1560                     s->num_saved_bits - s->frame_offset);
1561         }
1562
1563         s->packet_loss = 0;
1564
1565     } else {
1566         int frame_size;
1567         s->buf_bit_size = size << 3;
1568         init_get_bits(gb, pktdata, s->buf_bit_size);
1569         skip_bits(gb, s->packet_offset);
1570         if (remaining_bits(s, gb) > s->log2_frame_size &&
1571             (frame_size = show_bits(gb, s->log2_frame_size)) &&
1572             frame_size <= remaining_bits(s, gb)) {
1573             save_bits(s, gb, frame_size, 0);
1574             s->packet_done = !decode_frame(s);
1575         } else
1576             s->packet_done = 1;
1577     }
1578
1579     if (s->packet_done && !s->packet_loss &&
1580         remaining_bits(s, gb) > 0) {
1581         /** save the rest of the data so that it can be decoded
1582             with the next packet */
1583         save_bits(s, gb, remaining_bits(s, gb), 0);
1584     }
1585
1586     *data_size = (int8_t *)s->samples - (int8_t *)data;
1587     s->packet_offset = get_bits_count(gb) & 7;
1588
1589         s->frame_num++;
1590     return (s->packet_loss) ? AVERROR_INVALIDDATA : get_bits_count(gb) >> 3;
1591 }
1592
1593 #if 0
1594 /**
1595  *@brief wmapro decoder
1596  */
1597 AVCodec wmapro_decoder = {
1598     "wmapro",
1599     AVMEDIA_TYPE_AUDIO,
1600     CODEC_ID_WMAPRO,
1601     sizeof(WMAProDecodeCtx),
1602     decode_init,
1603     NULL,
1604     decode_end,
1605     decode_packet,
1606     .capabilities = CODEC_CAP_SUBFRAMES,
1607     .flush= flush,
1608     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"),
1609 };
1610 #endif