libavcodec/nellymoserenc.c

   1 /*
   2  * Nellymoser encoder
   3  * This code is developed as part of Google Summer of Code 2008 Program.
   4  *
   5  * Copyright (c) 2008 Bartlomiej Wolowiec
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file nellymoserenc.c
  26  * Nellymoser encoder
  27  * by Bartlomiej Wolowiec
  28  *
  29  * Generic codec information: libavcodec/nellymoserdec.c
  30  *
  31  * Some information also from: http://www1.mplayerhq.hu/ASAO/ASAO.zip
  32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
  33  *
  34  * for more information about nellymoser format, visit:
  35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
  36  */
  37
  38 #include "nellymoser.h"
  39 #include "avcodec.h"
  40 #include "dsputil.h"
  41
  42 #define BITSTREAM_WRITER_LE
  43 #include "bitstream.h"
  44
  45 #define POW_TABLE_SIZE (1<<11)
  46 #define POW_TABLE_OFFSET 3
  47
  48 typedef struct NellyMoserEncodeContext {
  49     AVCodecContext  *avctx;
  50     int             last_frame;
  51     int             bufsel;
  52     int             have_saved;
  53     DSPContext      dsp;
  54     MDCTContext     mdct_ctx;
  55     DECLARE_ALIGNED_16(float, mdct_out[NELLY_SAMPLES]);
  56     DECLARE_ALIGNED_16(float, buf[2][3 * NELLY_BUF_LEN]);     ///< sample buffer
  57 } NellyMoserEncodeContext;
  58
  59 static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
  60
  61 static const uint8_t sf_lut[96] = {
  62      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  63      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
  64     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
  65     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
  66     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
  67     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
  68 };
  69
  70 static const uint8_t sf_delta_lut[78] = {
  71      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  72      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
  73     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
  74     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
  75     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
  76 };
  77
  78 static const uint8_t quant_lut[230] = {
  79      0,
  80
  81      0,  1,  2,
  82
  83      0,  1,  2,  3,  4,  5,  6,
  84
  85      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
  86     12, 13, 13, 13, 14,
  87
  88      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
  89      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
  90     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
  91     30,
  92
  93      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
  94      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
  95     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
  96     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
  97     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
  98     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
  99     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
 100     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
 101     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
 102     61, 61, 61, 61, 62,
 103 };
 104
 105 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
 106 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
 107 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
 108
 109 void apply_mdct(NellyMoserEncodeContext *s)
 110 {
 111     DECLARE_ALIGNED_16(float, in_buff[NELLY_SAMPLES]);
 112
 113     memcpy(in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float));
 114     s->dsp.vector_fmul(in_buff, ff_sine_128, NELLY_BUF_LEN);
 115     s->dsp.vector_fmul_reverse(in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
 116                                NELLY_BUF_LEN);
 117     ff_mdct_calc(&s->mdct_ctx, s->mdct_out, in_buff);
 118
 119     s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN);
 120     s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
 121                                NELLY_BUF_LEN);
 122     ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
 123 }
 124
 125 static av_cold int encode_init(AVCodecContext *avctx)
 126 {
 127     NellyMoserEncodeContext *s = avctx->priv_data;
 128     int i;
 129
 130     if (avctx->channels != 1) {
 131         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
 132         return -1;
 133     }
 134
 135     if (avctx->sample_rate != 8000 && avctx->sample_rate != 11025 &&
 136         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
 137         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
 138         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 11025, 22050 and 44100 sample rate\n");
 139         return -1;
 140     }
 141
 142     avctx->frame_size = NELLY_SAMPLES;
 143     s->avctx = avctx;
 144     ff_mdct_init(&s->mdct_ctx, 8, 0);
 145     dsputil_init(&s->dsp, avctx);
 146
 147     /* Generate overlap window */
 148     ff_sine_window_init(ff_sine_128, 128);
 149     for (i = 0; i < POW_TABLE_SIZE; i++)
 150         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
 151
 152     return 0;
 153 }
 154
 155 static av_cold int encode_end(AVCodecContext *avctx)
 156 {
 157     NellyMoserEncodeContext *s = avctx->priv_data;
 158
 159     ff_mdct_end(&s->mdct_ctx);
 160     return 0;
 161 }
 162
 163 #define find_best(val, table, LUT, LUT_add, LUT_size) \
 164     best_idx = \
 165         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
 166     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
 167         best_idx++;
 168
 169 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 170 {
 171     int band, best_idx, power_idx = 0;
 172     float power_candidate;
 173
 174     //base exponent
 175     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
 176     idx_table[0] = best_idx;
 177     power_idx = ff_nelly_init_table[best_idx];
 178
 179     for (band = 1; band < NELLY_BANDS; band++) {
 180         power_candidate = cand[band] - power_idx;
 181         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
 182         idx_table[band] = best_idx;
 183         power_idx += ff_nelly_delta_table[best_idx];
 184     }
 185 }
 186
 187 #define OPT_SIZE ((1<<15) + 3000)
 188
 189 static inline float distance(float x, float y, int band)
 190 {
 191     //return pow(fabs(x-y), 2.0);
 192     float tmp = x - y;
 193     return tmp * tmp;
 194 }
 195
 196 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 197 {
 198     int i, j, band, best_idx;
 199     float power_candidate, best_val;
 200
 201     float opt[NELLY_BANDS][OPT_SIZE];
 202     int path[NELLY_BANDS][OPT_SIZE];
 203
 204     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
 205         opt[0][i] = INFINITY;
 206     }
 207
 208     for (i = 0; i < 64; i++) {
 209         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
 210         path[0][ff_nelly_init_table[i]] = i;
 211     }
 212
 213     for (band = 1; band < NELLY_BANDS; band++) {
 214         int q, c = 0;
 215         float tmp;
 216         int idx_min, idx_max, idx;
 217         power_candidate = cand[band];
 218         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
 219             idx_min = FFMAX(0, cand[band] - q);
 220             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
 221             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
 222                 if ( isinf(opt[band - 1][i]) )
 223                     continue;
 224                 for (j = 0; j < 32; j++) {
 225                     idx = i + ff_nelly_delta_table[j];
 226                     if (idx > idx_max)
 227                         break;
 228                     if (idx >= idx_min) {
 229                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
 230                         if (opt[band][idx] > tmp) {
 231                             opt[band][idx] = tmp;
 232                             path[band][idx] = j;
 233                             c = 1;
 234                         }
 235                     }
 236                 }
 237             }
 238         }
 239         assert(c); //FIXME
 240     }
 241
 242     best_val = INFINITY;
 243     best_idx = -1;
 244     band = NELLY_BANDS - 1;
 245     for (i = 0; i < OPT_SIZE; i++) {
 246         if (best_val > opt[band][i]) {
 247             best_val = opt[band][i];
 248             best_idx = i;
 249         }
 250     }
 251     for (band = NELLY_BANDS - 1; band >= 0; band--) {
 252         idx_table[band] = path[band][best_idx];
 253         if (band) {
 254             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
 255         }
 256     }
 257 }
 258
 259 /**
 260  * Encodes NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
 261  *  @param s               encoder context
 262  *  @param output          output buffer
 263  *  @param output_size     size of output buffer
 264  */
 265 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
 266 {
 267     PutBitContext pb;
 268     int i, j, band, block, best_idx, power_idx = 0;
 269     float power_val, coeff, coeff_sum;
 270     float pows[NELLY_FILL_LEN];
 271     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
 272     float cand[NELLY_BANDS];
 273
 274     apply_mdct(s);
 275
 276     init_put_bits(&pb, output, output_size * 8);
 277
 278     i = 0;
 279     for (band = 0; band < NELLY_BANDS; band++) {
 280         coeff_sum = 0;
 281         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 282             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
 283                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
 284         }
 285         cand[band] =
 286             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
 287     }
 288
 289     if (s->avctx->trellis) {
 290         get_exponent_dynamic(s, cand, idx_table);
 291     } else {
 292         get_exponent_greedy(s, cand, idx_table);
 293     }
 294
 295     i = 0;
 296     for (band = 0; band < NELLY_BANDS; band++) {
 297         if (band) {
 298             power_idx += ff_nelly_delta_table[idx_table[band]];
 299             put_bits(&pb, 5, idx_table[band]);
 300         } else {
 301             power_idx = ff_nelly_init_table[idx_table[0]];
 302             put_bits(&pb, 6, idx_table[0]);
 303         }
 304         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
 305         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 306             s->mdct_out[i] *= power_val;
 307             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
 308             pows[i] = power_idx;
 309         }
 310     }
 311
 312     ff_nelly_get_sample_bits(pows, bits);
 313
 314     for (block = 0; block < 2; block++) {
 315         for (i = 0; i < NELLY_FILL_LEN; i++) {
 316             if (bits[i] > 0) {
 317                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
 318                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
 319                 best_idx =
 320                     quant_lut[av_clip (
 321                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
 322                             quant_lut_offset[bits[i]],
 323                             quant_lut_offset[bits[i]+1] - 1
 324                             )];
 325                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
 326                     best_idx++;
 327
 328                 put_bits(&pb, bits[i], best_idx);
 329             }
 330         }
 331         if (!block)
 332             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
 333     }
 334 }
 335
 336 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
 337 {
 338     NellyMoserEncodeContext *s = avctx->priv_data;
 339     int16_t *samples = data;
 340     int i;
 341
 342     if (s->last_frame)
 343         return 0;
 344
 345     if (data) {
 346         for (i = 0; i < avctx->frame_size; i++) {
 347             s->buf[s->bufsel][i] = samples[i];
 348         }
 349         for (; i < NELLY_SAMPLES; i++) {
 350             s->buf[s->bufsel][i] = 0;
 351         }
 352         s->bufsel = 1 - s->bufsel;
 353         if (!s->have_saved) {
 354             s->have_saved = 1;
 355             return 0;
 356         }
 357     } else {
 358         memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
 359         s->bufsel = 1 - s->bufsel;
 360         s->last_frame = 1;
 361     }
 362
 363     if (s->have_saved) {
 364         encode_block(s, frame, buf_size);
 365         return NELLY_BLOCK_LEN;
 366     }
 367     return 0;
 368 }
 369
 370 AVCodec nellymoser_encoder = {
 371     .name = "nellymoser",
 372     .type = CODEC_TYPE_AUDIO,
 373     .id = CODEC_ID_NELLYMOSER,
 374     .priv_data_size = sizeof(NellyMoserEncodeContext),
 375     .init = encode_init,
 376     .encode = encode_frame,
 377     .close = encode_end,
 378     .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
 379     .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao Codec"),
 380 };