Replace 5 with AOT_SBR when referring to the MPEG-4 audio object type.
[FFMpeg-mirror/lagarith.git] / libavcodec / nellymoserenc.c
blobc9f93dbcb88656464a88f83479a36989b4cdbff4
1 /*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
5 * Copyright (c) 2008 Bartlomiej Wolowiec
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 /**
25 * @file libavcodec/nellymoserenc.c
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
29 * Generic codec information: libavcodec/nellymoserdec.c
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
38 #include "nellymoser.h"
39 #include "avcodec.h"
40 #include "dsputil.h"
42 #define BITSTREAM_WRITER_LE
43 #include "put_bits.h"
45 #define POW_TABLE_SIZE (1<<11)
46 #define POW_TABLE_OFFSET 3
47 #define OPT_SIZE ((1<<15) + 3000)
49 typedef struct NellyMoserEncodeContext {
50 AVCodecContext *avctx;
51 int last_frame;
52 int bufsel;
53 int have_saved;
54 DSPContext dsp;
55 MDCTContext mdct_ctx;
56 DECLARE_ALIGNED_16(float, mdct_out[NELLY_SAMPLES]);
57 DECLARE_ALIGNED_16(float, in_buff[NELLY_SAMPLES]);
58 DECLARE_ALIGNED_16(float, buf[2][3 * NELLY_BUF_LEN]); ///< sample buffer
59 float (*opt )[NELLY_BANDS];
60 uint8_t (*path)[NELLY_BANDS];
61 } NellyMoserEncodeContext;
63 static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0);
65 static const uint8_t sf_lut[96] = {
66 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
67 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
68 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
69 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
70 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
71 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
74 static const uint8_t sf_delta_lut[78] = {
75 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
76 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
77 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
78 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
79 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
82 static const uint8_t quant_lut[230] = {
85 0, 1, 2,
87 0, 1, 2, 3, 4, 5, 6,
89 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
90 12, 13, 13, 13, 14,
92 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
93 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
94 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
95 30,
97 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
98 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
99 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
100 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
101 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
102 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
103 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
104 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
105 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
106 61, 61, 61, 61, 62,
109 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
110 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
111 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
113 void apply_mdct(NellyMoserEncodeContext *s)
115 memcpy(s->in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float));
116 s->dsp.vector_fmul(s->in_buff, ff_sine_128, NELLY_BUF_LEN);
117 s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
118 NELLY_BUF_LEN);
119 ff_mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
121 s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN);
122 s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
123 NELLY_BUF_LEN);
124 ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
127 static av_cold int encode_init(AVCodecContext *avctx)
129 NellyMoserEncodeContext *s = avctx->priv_data;
130 int i;
132 if (avctx->channels != 1) {
133 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
134 return -1;
137 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
138 avctx->sample_rate != 11025 &&
139 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
140 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
141 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
142 return -1;
145 avctx->frame_size = NELLY_SAMPLES;
146 s->avctx = avctx;
147 ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0);
148 dsputil_init(&s->dsp, avctx);
150 /* Generate overlap window */
151 ff_sine_window_init(ff_sine_128, 128);
152 for (i = 0; i < POW_TABLE_SIZE; i++)
153 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
155 if (s->avctx->trellis) {
156 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
157 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
160 return 0;
163 static av_cold int encode_end(AVCodecContext *avctx)
165 NellyMoserEncodeContext *s = avctx->priv_data;
167 ff_mdct_end(&s->mdct_ctx);
169 if (s->avctx->trellis) {
170 av_free(s->opt);
171 av_free(s->path);
174 return 0;
177 #define find_best(val, table, LUT, LUT_add, LUT_size) \
178 best_idx = \
179 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
180 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
181 best_idx++;
183 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
185 int band, best_idx, power_idx = 0;
186 float power_candidate;
188 //base exponent
189 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
190 idx_table[0] = best_idx;
191 power_idx = ff_nelly_init_table[best_idx];
193 for (band = 1; band < NELLY_BANDS; band++) {
194 power_candidate = cand[band] - power_idx;
195 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
196 idx_table[band] = best_idx;
197 power_idx += ff_nelly_delta_table[best_idx];
201 static inline float distance(float x, float y, int band)
203 //return pow(fabs(x-y), 2.0);
204 float tmp = x - y;
205 return tmp * tmp;
208 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
210 int i, j, band, best_idx;
211 float power_candidate, best_val;
213 float (*opt )[NELLY_BANDS] = s->opt ;
214 uint8_t(*path)[NELLY_BANDS] = s->path;
216 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
217 opt[0][i] = INFINITY;
220 for (i = 0; i < 64; i++) {
221 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
222 path[0][ff_nelly_init_table[i]] = i;
225 for (band = 1; band < NELLY_BANDS; band++) {
226 int q, c = 0;
227 float tmp;
228 int idx_min, idx_max, idx;
229 power_candidate = cand[band];
230 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
231 idx_min = FFMAX(0, cand[band] - q);
232 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
233 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
234 if ( isinf(opt[band - 1][i]) )
235 continue;
236 for (j = 0; j < 32; j++) {
237 idx = i + ff_nelly_delta_table[j];
238 if (idx > idx_max)
239 break;
240 if (idx >= idx_min) {
241 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
242 if (opt[band][idx] > tmp) {
243 opt[band][idx] = tmp;
244 path[band][idx] = j;
245 c = 1;
251 assert(c); //FIXME
254 best_val = INFINITY;
255 best_idx = -1;
256 band = NELLY_BANDS - 1;
257 for (i = 0; i < OPT_SIZE; i++) {
258 if (best_val > opt[band][i]) {
259 best_val = opt[band][i];
260 best_idx = i;
263 for (band = NELLY_BANDS - 1; band >= 0; band--) {
264 idx_table[band] = path[band][best_idx];
265 if (band) {
266 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
272 * Encodes NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
273 * @param s encoder context
274 * @param output output buffer
275 * @param output_size size of output buffer
277 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
279 PutBitContext pb;
280 int i, j, band, block, best_idx, power_idx = 0;
281 float power_val, coeff, coeff_sum;
282 float pows[NELLY_FILL_LEN];
283 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
284 float cand[NELLY_BANDS];
286 apply_mdct(s);
288 init_put_bits(&pb, output, output_size * 8);
290 i = 0;
291 for (band = 0; band < NELLY_BANDS; band++) {
292 coeff_sum = 0;
293 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
294 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
295 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
297 cand[band] =
298 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
301 if (s->avctx->trellis) {
302 get_exponent_dynamic(s, cand, idx_table);
303 } else {
304 get_exponent_greedy(s, cand, idx_table);
307 i = 0;
308 for (band = 0; band < NELLY_BANDS; band++) {
309 if (band) {
310 power_idx += ff_nelly_delta_table[idx_table[band]];
311 put_bits(&pb, 5, idx_table[band]);
312 } else {
313 power_idx = ff_nelly_init_table[idx_table[0]];
314 put_bits(&pb, 6, idx_table[0]);
316 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
317 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
318 s->mdct_out[i] *= power_val;
319 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
320 pows[i] = power_idx;
324 ff_nelly_get_sample_bits(pows, bits);
326 for (block = 0; block < 2; block++) {
327 for (i = 0; i < NELLY_FILL_LEN; i++) {
328 if (bits[i] > 0) {
329 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
330 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
331 best_idx =
332 quant_lut[av_clip (
333 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
334 quant_lut_offset[bits[i]],
335 quant_lut_offset[bits[i]+1] - 1
337 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
338 best_idx++;
340 put_bits(&pb, bits[i], best_idx);
343 if (!block)
344 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
347 flush_put_bits(&pb);
350 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
352 NellyMoserEncodeContext *s = avctx->priv_data;
353 int16_t *samples = data;
354 int i;
356 if (s->last_frame)
357 return 0;
359 if (data) {
360 for (i = 0; i < avctx->frame_size; i++) {
361 s->buf[s->bufsel][i] = samples[i];
363 for (; i < NELLY_SAMPLES; i++) {
364 s->buf[s->bufsel][i] = 0;
366 s->bufsel = 1 - s->bufsel;
367 if (!s->have_saved) {
368 s->have_saved = 1;
369 return 0;
371 } else {
372 memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
373 s->bufsel = 1 - s->bufsel;
374 s->last_frame = 1;
377 if (s->have_saved) {
378 encode_block(s, frame, buf_size);
379 return NELLY_BLOCK_LEN;
381 return 0;
384 AVCodec nellymoser_encoder = {
385 .name = "nellymoser",
386 .type = CODEC_TYPE_AUDIO,
387 .id = CODEC_ID_NELLYMOSER,
388 .priv_data_size = sizeof(NellyMoserEncodeContext),
389 .init = encode_init,
390 .encode = encode_frame,
391 .close = encode_end,
392 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
393 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),