Convert two tables to signed decimal
[ffmpeg-lucabe.git] / libavcodec / nellymoserenc.c
blobe1fc6f02ba9915383fb918c15e06d2827395d250
1 /*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
5 * Copyright (c) 2008 Bartlomiej Wolowiec
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 /**
25 * @file nellymoserenc.c
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
29 * Generic codec information: libavcodec/nellymoserdec.c
31 * Some information also from: http://www1.mplayerhq.hu/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
38 #include "nellymoser.h"
39 #include "avcodec.h"
40 #include "dsputil.h"
42 #define BITSTREAM_WRITER_LE
43 #include "bitstream.h"
45 #define POW_TABLE_SIZE (1<<11)
46 #define POW_TABLE_OFFSET 3
48 typedef struct NellyMoserEncodeContext {
49 AVCodecContext *avctx;
50 int last_frame;
51 int bufsel;
52 int have_saved;
53 DSPContext dsp;
54 MDCTContext mdct_ctx;
55 DECLARE_ALIGNED_16(float, mdct_out[NELLY_SAMPLES]);
56 DECLARE_ALIGNED_16(float, buf[2][3 * NELLY_BUF_LEN]); ///< sample buffer
57 } NellyMoserEncodeContext;
59 static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0);
61 static const uint8_t sf_lut[96] = {
62 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
63 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
64 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
65 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
66 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
67 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
70 static const uint8_t sf_delta_lut[78] = {
71 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
72 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
73 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
74 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
75 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
78 static const uint8_t quant_lut[230] = {
81 0, 1, 2,
83 0, 1, 2, 3, 4, 5, 6,
85 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
86 12, 13, 13, 13, 14,
88 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
89 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
90 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
91 30,
93 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
94 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
95 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
96 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
97 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
98 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
99 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
100 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
101 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
102 61, 61, 61, 61, 62,
105 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
106 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
107 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
109 void apply_mdct(NellyMoserEncodeContext *s)
111 DECLARE_ALIGNED_16(float, in_buff[NELLY_SAMPLES]);
113 memcpy(in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float));
114 s->dsp.vector_fmul(in_buff, ff_sine_128, NELLY_BUF_LEN);
115 s->dsp.vector_fmul_reverse(in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
116 NELLY_BUF_LEN);
117 ff_mdct_calc(&s->mdct_ctx, s->mdct_out, in_buff);
119 s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN);
120 s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
121 NELLY_BUF_LEN);
122 ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
125 static av_cold int encode_init(AVCodecContext *avctx)
127 NellyMoserEncodeContext *s = avctx->priv_data;
128 int i;
130 if (avctx->channels != 1) {
131 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
132 return -1;
135 if (avctx->sample_rate != 8000 && avctx->sample_rate != 11025 &&
136 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
137 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
138 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 11025, 22050 and 44100 sample rate\n");
139 return -1;
142 avctx->frame_size = NELLY_SAMPLES;
143 s->avctx = avctx;
144 ff_mdct_init(&s->mdct_ctx, 8, 0);
145 dsputil_init(&s->dsp, avctx);
147 /* Generate overlap window */
148 ff_sine_window_init(ff_sine_128, 128);
149 for (i = 0; i < POW_TABLE_SIZE; i++)
150 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
152 return 0;
155 static av_cold int encode_end(AVCodecContext *avctx)
157 NellyMoserEncodeContext *s = avctx->priv_data;
159 ff_mdct_end(&s->mdct_ctx);
160 return 0;
163 #define find_best(val, table, LUT, LUT_add, LUT_size) \
164 best_idx = \
165 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
166 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
167 best_idx++;
169 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
171 int band, best_idx, power_idx = 0;
172 float power_candidate;
174 //base exponent
175 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
176 idx_table[0] = best_idx;
177 power_idx = ff_nelly_init_table[best_idx];
179 for (band = 1; band < NELLY_BANDS; band++) {
180 power_candidate = cand[band] - power_idx;
181 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
182 idx_table[band] = best_idx;
183 power_idx += ff_nelly_delta_table[best_idx];
187 #define OPT_SIZE ((1<<15) + 3000)
189 static inline float distance(float x, float y, int band)
191 //return pow(fabs(x-y), 2.0);
192 float tmp = x - y;
193 return tmp * tmp;
196 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
198 int i, j, band, best_idx;
199 float power_candidate, best_val;
201 float opt[NELLY_BANDS][OPT_SIZE];
202 int path[NELLY_BANDS][OPT_SIZE];
204 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
205 opt[0][i] = INFINITY;
208 for (i = 0; i < 64; i++) {
209 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
210 path[0][ff_nelly_init_table[i]] = i;
213 for (band = 1; band < NELLY_BANDS; band++) {
214 int q, c = 0;
215 float tmp;
216 int idx_min, idx_max, idx;
217 power_candidate = cand[band];
218 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
219 idx_min = FFMAX(0, cand[band] - q);
220 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
221 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
222 if ( isinf(opt[band - 1][i]) )
223 continue;
224 for (j = 0; j < 32; j++) {
225 idx = i + ff_nelly_delta_table[j];
226 if (idx > idx_max)
227 break;
228 if (idx >= idx_min) {
229 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
230 if (opt[band][idx] > tmp) {
231 opt[band][idx] = tmp;
232 path[band][idx] = j;
233 c = 1;
239 assert(c); //FIXME
242 best_val = INFINITY;
243 best_idx = -1;
244 band = NELLY_BANDS - 1;
245 for (i = 0; i < OPT_SIZE; i++) {
246 if (best_val > opt[band][i]) {
247 best_val = opt[band][i];
248 best_idx = i;
251 for (band = NELLY_BANDS - 1; band >= 0; band--) {
252 idx_table[band] = path[band][best_idx];
253 if (band) {
254 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
260 * Encodes NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
261 * @param s encoder context
262 * @param output output buffer
263 * @param output_size size of output buffer
265 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
267 PutBitContext pb;
268 int i, j, band, block, best_idx, power_idx = 0;
269 float power_val, coeff, coeff_sum;
270 float pows[NELLY_FILL_LEN];
271 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
272 float cand[NELLY_BANDS];
274 apply_mdct(s);
276 init_put_bits(&pb, output, output_size * 8);
278 i = 0;
279 for (band = 0; band < NELLY_BANDS; band++) {
280 coeff_sum = 0;
281 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
282 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
283 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
285 cand[band] =
286 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
289 if (s->avctx->trellis) {
290 get_exponent_dynamic(s, cand, idx_table);
291 } else {
292 get_exponent_greedy(s, cand, idx_table);
295 i = 0;
296 for (band = 0; band < NELLY_BANDS; band++) {
297 if (band) {
298 power_idx += ff_nelly_delta_table[idx_table[band]];
299 put_bits(&pb, 5, idx_table[band]);
300 } else {
301 power_idx = ff_nelly_init_table[idx_table[0]];
302 put_bits(&pb, 6, idx_table[0]);
304 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
305 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
306 s->mdct_out[i] *= power_val;
307 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
308 pows[i] = power_idx;
312 ff_nelly_get_sample_bits(pows, bits);
314 for (block = 0; block < 2; block++) {
315 for (i = 0; i < NELLY_FILL_LEN; i++) {
316 if (bits[i] > 0) {
317 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
318 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
319 best_idx =
320 quant_lut[av_clip (
321 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
322 quant_lut_offset[bits[i]],
323 quant_lut_offset[bits[i]+1] - 1
325 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
326 best_idx++;
328 put_bits(&pb, bits[i], best_idx);
331 if (!block)
332 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
336 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
338 NellyMoserEncodeContext *s = avctx->priv_data;
339 int16_t *samples = data;
340 int i;
342 if (s->last_frame)
343 return 0;
345 if (data) {
346 for (i = 0; i < avctx->frame_size; i++) {
347 s->buf[s->bufsel][i] = samples[i];
349 for (; i < NELLY_SAMPLES; i++) {
350 s->buf[s->bufsel][i] = 0;
352 s->bufsel = 1 - s->bufsel;
353 if (!s->have_saved) {
354 s->have_saved = 1;
355 return 0;
357 } else {
358 memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
359 s->bufsel = 1 - s->bufsel;
360 s->last_frame = 1;
363 if (s->have_saved) {
364 encode_block(s, frame, buf_size);
365 return NELLY_BLOCK_LEN;
367 return 0;
370 AVCodec nellymoser_encoder = {
371 .name = "nellymoser",
372 .type = CODEC_TYPE_AUDIO,
373 .id = CODEC_ID_NELLYMOSER,
374 .priv_data_size = sizeof(NellyMoserEncodeContext),
375 .init = encode_init,
376 .encode = encode_frame,
377 .close = encode_end,
378 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
379 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao Codec"),