libhb/declpcm.c

   1 /* declpcm.c
   2
   3    Copyright (c) 2003-2015 HandBrake Team
   4    This file is part of the HandBrake source code
   5    Homepage: <http://handbrake.fr/>.
   6    It may be used under the terms of the GNU General Public License v2.
   7    For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
   8  */
   9
  10 #include "hb.h"
  11 #include "hbffmpeg.h"
  12 #include "audio_resample.h"
  13
  14 struct hb_work_private_s
  15 {
  16     hb_job_t    *job;
  17     uint32_t    size;       /* frame size in bytes */
  18     uint32_t    nchunks;     /* number of samples pairs if paired */
  19     uint32_t    nsamples;   /* frame size in samples */
  20     uint32_t    pos;        /* buffer offset for next input data */
  21
  22     int64_t     next_pts;   /* pts for next output frame */
  23     int64_t     sequence;
  24
  25     /* the following is frame info for the frame we're currently accumulating */
  26     uint64_t    duration;   /* frame duratin (in 90KHz ticks) */
  27     uint32_t    offset;     /* where in buf frame starts */
  28     uint32_t    samplerate; /* sample rate in bits/sec */
  29     uint8_t     nchannels;
  30     uint8_t     sample_size; /* bits per sample */
  31
  32     uint8_t     frame[HB_DVD_READ_BUFFER_SIZE*2];
  33     uint8_t   * data;
  34     uint32_t    alloc_size;
  35
  36     hb_audio_resample_t *resample;
  37 };
  38
  39 static hb_buffer_t * Decode( hb_work_object_t * w );
  40 static int  declpcmInit( hb_work_object_t *, hb_job_t * );
  41 static int  declpcmWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  42 static void declpcmClose( hb_work_object_t * );
  43 static int  declpcmBSInfo( hb_work_object_t *, const hb_buffer_t *,
  44                            hb_work_info_t * );
  45
  46 hb_work_object_t hb_declpcm =
  47 {
  48     WORK_DECLPCM,
  49     "LPCM decoder",
  50     declpcmInit,
  51     declpcmWork,
  52     declpcmClose,
  53     0,
  54     declpcmBSInfo
  55 };
  56
  57 static const int hdr2samplerate[] = { 48000, 96000, 44100, 32000 };
  58 static const int hdr2samplesize[] = { 16, 20, 24, 16 };
  59 static const uint64_t hdr2layout[] =
  60 {
  61     AV_CH_LAYOUT_MONO,         AV_CH_LAYOUT_STEREO,
  62     AV_CH_LAYOUT_2_1,          AV_CH_LAYOUT_QUAD,
  63     AV_CH_LAYOUT_5POINT0_BACK, AV_CH_LAYOUT_6POINT0_FRONT,
  64     AV_CH_LAYOUT_6POINT1,      AV_CH_LAYOUT_7POINT1,
  65 };
  66
  67 static void lpcmInfo( hb_work_object_t *w, hb_buffer_t *in )
  68 {
  69     hb_work_private_t * pv = w->private_data;
  70
  71     /*
  72      * LPCM packets have a 7 byte header (the substream id is stripped off
  73      * before we get here so it's numbered -1 below)::
  74      * byte -1  Substream id
  75      * byte 0   Number of frames that begin in this packet
  76      *          (last frame may finish in next packet)
  77      * byte 1,2 offset to first frame that begins in this packet (not including hdr)
  78      * byte 3:
  79      *   bits 0-4  continuity counter (increments modulo 20)
  80      *   bit   5   reserved
  81      *   bit   6   audio mute on/off
  82      *   bit   7   audio emphasis on/off
  83      * byte 4:
  84      *   bits 0-2  #channels - 1 (e.g., stereo = 1)
  85      *   bit   3   reserved
  86      *   bits 4-5  sample rate (0=48K,1=96K,2=44.1K,3=32K)
  87      *   bits 6-7  bits per sample (0=16 bit, 1=20 bit, 2=24 bit)
  88      * byte 5   Dynamic range control (0x80 = off)
  89      *
  90      * The audio is viewed as "frames" of 150 90KHz ticks each (80 samples @ 48KHz).
  91      * The frames are laid down continuously without regard to MPEG packet
  92      * boundaries. E.g., for 48KHz stereo, the first packet will contain 6
  93      * frames plus the start of the 7th, the second packet will contain the
  94      * end of the 7th, 8-13 & the start of 14, etc. The frame structure is
  95      * important because the PTS on the packet gives the time of the first
  96      * frame that starts in the packet *NOT* the time of the first sample
  97      * in the packet. Also samples get split across packet boundaries
  98      * so we can't assume that we can consume all the data in one packet
  99      * on every call to the work routine.
 100      */
 101     pv->offset = ( ( in->data[1] << 8 ) | in->data[2] ) + 2;
 102     if ( pv->offset >= HB_DVD_READ_BUFFER_SIZE )
 103     {
 104         hb_log( "declpcm: illegal frame offset %d", pv->offset );
 105         pv->offset = 2; /*XXX*/
 106     }
 107     pv->nchannels   = ( in->data[4] & 7 ) + 1;
 108     pv->samplerate  = hdr2samplerate[ ( in->data[4] >> 4 ) & 0x3 ];
 109     pv->sample_size = hdr2samplesize[in->data[4] >> 6];
 110
 111     // 20 and 24 bit lpcm is always encoded in sample pairs.  So take this
 112     // into account when computing sizes.
 113     int chunk_size = pv->sample_size / 8;
 114     int samples_per_chunk = 1;
 115
 116     switch( pv->sample_size )
 117     {
 118         case 20:
 119             chunk_size = 5;
 120             samples_per_chunk = 2;
 121             break;
 122         case 24:
 123             chunk_size = 6;
 124             samples_per_chunk = 2;
 125             break;
 126     }
 127
 128     /*
 129      * PCM frames have a constant duration (150 90KHz ticks).
 130      * We need to convert that to the amount of data expected.  It's the
 131      * duration divided by the sample rate (to get #samples) times the number
 132      * of channels times the bits per sample divided by 8 to get bytes.
 133      * (we have to compute in bits because 20 bit samples are not an integral
 134      * number of bytes). We do all the multiplies first then the divides to
 135      * avoid truncation errors.
 136      */
 137     /*
 138      * Don't trust the number of frames given in the header.  We've seen
 139      * streams for which this is incorrect, and it can be computed.
 140      * pv->duration = in->data[0] * 150;
 141      */
 142     int chunks = ( in->size - pv->offset ) / chunk_size;
 143     int samples = chunks * samples_per_chunk;
 144
 145     // Calculate number of frames that start in this packet
 146     int frames = ( 90000 * samples / ( pv->samplerate * pv->nchannels ) +
 147                    149 ) / 150;
 148
 149     pv->duration = frames * 150;
 150     pv->nchunks =  ( pv->duration * pv->nchannels * pv->samplerate +
 151                     samples_per_chunk - 1 ) / ( 90000 * samples_per_chunk );
 152     pv->nsamples = ( pv->duration * pv->samplerate ) / 90000;
 153     pv->size = pv->nchunks * chunk_size;
 154
 155     pv->next_pts = in->s.start;
 156 }
 157
 158 static int declpcmInit( hb_work_object_t * w, hb_job_t * job )
 159 {
 160     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
 161     w->private_data = pv;
 162     pv->job = job;
 163
 164     pv->resample =
 165         hb_audio_resample_init(AV_SAMPLE_FMT_FLT,
 166                                w->audio->config.out.mixdown,
 167                                w->audio->config.out.normalize_mix_level);
 168     if (pv->resample == NULL)
 169     {
 170         hb_error("declpcmInit: hb_audio_resample_init() failed");
 171         return 1;
 172     }
 173
 174     return 0;
 175 }
 176
 177 /*
 178  * Convert DVD encapsulated LPCM to floating point PCM audio buffers.
 179  * The amount of audio in a PCM frame is always <= the amount that will fit
 180  * in a DVD block (2048 bytes) but the standard doesn't require that the audio
 181  * frames line up with the DVD frames. Since audio frame boundaries are unrelated
 182  * to DVD PES boundaries, this routine has to reconstruct then extract the audio
 183  * frames. Because of the arbitrary alignment, it can output zero, one or two buf's.
 184  */
 185 static int declpcmWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 186                  hb_buffer_t ** buf_out )
 187 {
 188     hb_work_private_t * pv = w->private_data;
 189     hb_buffer_t *in = *buf_in;
 190     hb_buffer_t *buf = NULL;
 191
 192     if (in->s.flags & HB_BUF_FLAG_EOF)
 193     {
 194         /* EOF on input stream - send it downstream & say that we're done */
 195         *buf_out = in;
 196         *buf_in = NULL;
 197         return HB_WORK_DONE;
 198     }
 199
 200     pv->sequence = in->sequence;
 201
 202     /* if we have a frame to finish, add enough data from this buf to finish it */
 203     if ( pv->size )
 204     {
 205         memcpy( pv->frame + pv->pos, in->data + 6, pv->size - pv->pos );
 206         buf = Decode( w );
 207     }
 208     *buf_out = buf;
 209
 210     /* save the (rest of) data from this buf in our frame buffer */
 211     lpcmInfo( w, in );
 212     int off = pv->offset;
 213     int amt = in->size - off;
 214     pv->pos = amt;
 215     memcpy( pv->frame, in->data + off, amt );
 216     if ( amt >= pv->size )
 217     {
 218         if ( buf )
 219         {
 220             buf->next = Decode( w );
 221         }
 222         else
 223         {
 224             *buf_out = Decode( w );
 225         }
 226         pv->size = 0;
 227     }
 228     return HB_WORK_OK;
 229 }
 230
 231 static hb_buffer_t *Decode( hb_work_object_t *w )
 232 {
 233     hb_work_private_t *pv = w->private_data;
 234     hb_buffer_t *out;
 235
 236     if (pv->nsamples == 0)
 237         return NULL;
 238
 239     int size = pv->nsamples * pv->nchannels * sizeof( float );
 240     if (pv->alloc_size != size)
 241     {
 242         pv->data = realloc( pv->data, size );
 243         pv->alloc_size = size;
 244     }
 245
 246     float *odat = (float *)pv->data;
 247     int count = pv->nchunks / pv->nchannels;
 248
 249     switch( pv->sample_size )
 250     {
 251         case 16: // 2 byte, big endian, signed (the right shift sign extends)
 252         {
 253             uint8_t *frm = pv->frame;
 254             while ( count-- )
 255             {
 256                 int cc;
 257                 for( cc = 0; cc < pv->nchannels; cc++ )
 258                 {
 259                     // Shifts below result in sign extension which gives
 260                     // us proper signed values. The final division adjusts
 261                     // the range to [-1.0 ... 1.0]
 262                     *odat++ = (float)( ( (int)( frm[0] << 24 ) >> 16 ) |
 263                                        frm[1] ) / 32768.0;
 264                     frm += 2;
 265                 }
 266             }
 267         } break;
 268         case 20:
 269         {
 270             // There will always be 2 groups of samples.  A group is
 271             // a collection of samples that spans all channels.
 272             // The data for the samples is split.  The first 2 msb
 273             // bytes for all samples is encoded first, then the remaining
 274             // lsb bits are encoded.
 275             uint8_t *frm = pv->frame;
 276             while ( count-- )
 277             {
 278                 int gg, cc;
 279                 int shift = 4;
 280                 uint8_t *lsb = frm + 4 * pv->nchannels;
 281                 for( gg = 0; gg < 2; gg++ )
 282                 {
 283                     for( cc = 0; cc < pv->nchannels; cc++ )
 284                     {
 285                         // Shifts below result in sign extension which gives
 286                         // us proper signed values. The final division adjusts
 287                         // the range to [-1.0 ... 1.0]
 288                         *odat = (float)( ( (int)( frm[0] << 24 ) >> 12 ) |
 289                                          ( frm[1] << 4 ) |
 290                                          ( ( ( lsb[0] >> shift ) & 0x0f ) ) ) /
 291                                        (16. * 32768.0);
 292                         odat++;
 293                         lsb += !shift;
 294                         shift ^= 4;
 295                         frm += 2;
 296                     }
 297                 }
 298                 frm = lsb;
 299             }
 300         } break;
 301         case 24:
 302         {
 303             // There will always be 2 groups of samples.  A group is
 304             // a collection of samples that spans all channels.
 305             // The data for the samples is split.  The first 2 msb
 306             // bytes for all samples is encoded first, then the remaining
 307             // lsb bits are encoded.
 308             uint8_t *frm = pv->frame;
 309             while ( count-- )
 310             {
 311                 int gg, cc;
 312                 uint8_t *lsb = frm + 4 * pv->nchannels;
 313                 for( gg = 0; gg < 2; gg++ )
 314                 {
 315                     for( cc = 0; cc < pv->nchannels; cc++ )
 316                     {
 317                         // Shifts below result in sign extension which gives
 318                         // us proper signed values. The final division adjusts
 319                         // the range to [-1.0 ... 1.0]
 320                         *odat++ = (float)( ( (int)( frm[0] << 24 ) >> 8 ) |
 321                                            ( frm[1] << 8 ) | lsb[0] ) /
 322                                   (256. * 32768.0);
 323                         frm += 2;
 324                         lsb++;
 325                     }
 326                 }
 327                 frm = lsb;
 328             }
 329         } break;
 330     }
 331
 332     hb_audio_resample_set_channel_layout(pv->resample,
 333                                          hdr2layout[pv->nchannels - 1]);
 334     if (hb_audio_resample_update(pv->resample))
 335     {
 336         hb_log("declpcm: hb_audio_resample_update() failed");
 337         return NULL;
 338     }
 339     out = hb_audio_resample(pv->resample, &pv->data, pv->nsamples);
 340
 341     if (out != NULL)
 342     {
 343         out->s.start    = pv->next_pts;
 344         out->s.duration = pv->duration;
 345         pv->next_pts   += pv->duration;
 346         out->s.stop     = pv->next_pts;
 347     }
 348     return out;
 349 }
 350
 351 static void declpcmClose( hb_work_object_t * w )
 352 {
 353     hb_work_private_t * pv = w->private_data;
 354
 355     if ( pv )
 356     {
 357         hb_audio_resample_free(pv->resample);
 358         free( pv->data );
 359         free( pv );
 360         w->private_data = 0;
 361     }
 362 }
 363
 364 static int declpcmBSInfo( hb_work_object_t *w, const hb_buffer_t *b,
 365                           hb_work_info_t *info )
 366 {
 367     int nchannels  = ( b->data[4] & 7 ) + 1;
 368     int sample_size = hdr2samplesize[b->data[4] >> 6];
 369
 370     int rate = hdr2samplerate[ ( b->data[4] >> 4 ) & 0x3 ];
 371     int bitrate = rate * sample_size * nchannels;
 372     int64_t duration = b->data[0] * 150;
 373
 374     memset( info, 0, sizeof(*info) );
 375
 376     info->name = "LPCM";
 377     info->rate.num = rate;
 378     info->rate.den = 1;
 379     info->bitrate = bitrate;
 380     info->flags = ( b->data[3] << 16 ) | ( b->data[4] << 8 ) | b->data[5];
 381     info->matrix_encoding = AV_MATRIX_ENCODING_NONE;
 382     info->channel_layout = hdr2layout[nchannels - 1];
 383     info->channel_map = &hb_libav_chan_map;
 384     info->sample_bit_depth = sample_size;
 385     info->samples_per_frame = ( duration * rate ) / 90000;
 386
 387     return 1;
 388 }