asfdec: also read Metadata Library Object
[FFMpeg-mirror/mplayer-patches.git] / libavcodec / g722enc.c
blob11d3f209337e0d596bd21a6fe5e714d171dce1fc
1 /*
2 * Copyright (c) CMU 1993 Computer Science, Speech Group
3 * Chengxiang Lu and Alex Hauptmann
4 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5 * Copyright (c) 2009 Kenan Gillet
6 * Copyright (c) 2010 Martin Storsjo
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 /**
26 * @file
27 * G.722 ADPCM audio encoder
30 #include "avcodec.h"
31 #include "internal.h"
32 #include "g722.h"
33 #include "libavutil/common.h"
35 #define FREEZE_INTERVAL 128
37 /* This is an arbitrary value. Allowing insanely large values leads to strange
38 problems, so we limit it to a reasonable value */
39 #define MAX_FRAME_SIZE 32768
41 /* We clip the value of avctx->trellis to prevent data type overflows and
42 undefined behavior. Using larger values is insanely slow anyway. */
43 #define MIN_TRELLIS 0
44 #define MAX_TRELLIS 16
46 static av_cold int g722_encode_close(AVCodecContext *avctx)
48 G722Context *c = avctx->priv_data;
49 int i;
50 for (i = 0; i < 2; i++) {
51 av_freep(&c->paths[i]);
52 av_freep(&c->node_buf[i]);
53 av_freep(&c->nodep_buf[i]);
55 #if FF_API_OLD_ENCODE_AUDIO
56 av_freep(&avctx->coded_frame);
57 #endif
58 return 0;
61 static av_cold int g722_encode_init(AVCodecContext * avctx)
63 G722Context *c = avctx->priv_data;
64 int ret;
66 if (avctx->channels != 1) {
67 av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
68 return AVERROR_INVALIDDATA;
71 c->band[0].scale_factor = 8;
72 c->band[1].scale_factor = 2;
73 c->prev_samples_pos = 22;
75 if (avctx->trellis) {
76 int frontier = 1 << avctx->trellis;
77 int max_paths = frontier * FREEZE_INTERVAL;
78 int i;
79 for (i = 0; i < 2; i++) {
80 c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
81 c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
82 c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
83 if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
84 ret = AVERROR(ENOMEM);
85 goto error;
90 if (avctx->frame_size) {
91 /* validate frame size */
92 if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
93 int new_frame_size;
95 if (avctx->frame_size == 1)
96 new_frame_size = 2;
97 else if (avctx->frame_size > MAX_FRAME_SIZE)
98 new_frame_size = MAX_FRAME_SIZE;
99 else
100 new_frame_size = avctx->frame_size - 1;
102 av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
103 "allowed. Using %d instead of %d\n", new_frame_size,
104 avctx->frame_size);
105 avctx->frame_size = new_frame_size;
107 } else {
108 /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
109 a common packet size for VoIP applications */
110 avctx->frame_size = 320;
112 avctx->delay = 22;
114 if (avctx->trellis) {
115 /* validate trellis */
116 if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
117 int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
118 av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
119 "allowed. Using %d instead of %d\n", new_trellis,
120 avctx->trellis);
121 avctx->trellis = new_trellis;
125 #if FF_API_OLD_ENCODE_AUDIO
126 avctx->coded_frame = avcodec_alloc_frame();
127 if (!avctx->coded_frame) {
128 ret = AVERROR(ENOMEM);
129 goto error;
131 #endif
133 return 0;
134 error:
135 g722_encode_close(avctx);
136 return ret;
139 static const int16_t low_quant[33] = {
140 35, 72, 110, 150, 190, 233, 276, 323,
141 370, 422, 473, 530, 587, 650, 714, 786,
142 858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
143 1765, 1980, 2195, 2557, 2919
146 static inline void filter_samples(G722Context *c, const int16_t *samples,
147 int *xlow, int *xhigh)
149 int xout1, xout2;
150 c->prev_samples[c->prev_samples_pos++] = samples[0];
151 c->prev_samples[c->prev_samples_pos++] = samples[1];
152 ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
153 *xlow = xout1 + xout2 >> 14;
154 *xhigh = xout1 - xout2 >> 14;
155 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
156 memmove(c->prev_samples,
157 c->prev_samples + c->prev_samples_pos - 22,
158 22 * sizeof(c->prev_samples[0]));
159 c->prev_samples_pos = 22;
163 static inline int encode_high(const struct G722Band *state, int xhigh)
165 int diff = av_clip_int16(xhigh - state->s_predictor);
166 int pred = 141 * state->scale_factor >> 8;
167 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
168 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
171 static inline int encode_low(const struct G722Band* state, int xlow)
173 int diff = av_clip_int16(xlow - state->s_predictor);
174 /* = diff >= 0 ? diff : -(diff + 1) */
175 int limit = diff ^ (diff >> (sizeof(diff)*8-1));
176 int i = 0;
177 limit = limit + 1 << 10;
178 if (limit > low_quant[8] * state->scale_factor)
179 i = 9;
180 while (i < 29 && limit > low_quant[i] * state->scale_factor)
181 i++;
182 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
185 static void g722_encode_trellis(G722Context *c, int trellis,
186 uint8_t *dst, int nb_samples,
187 const int16_t *samples)
189 int i, j, k;
190 int frontier = 1 << trellis;
191 struct TrellisNode **nodes[2];
192 struct TrellisNode **nodes_next[2];
193 int pathn[2] = {0, 0}, froze = -1;
194 struct TrellisPath *p[2];
196 for (i = 0; i < 2; i++) {
197 nodes[i] = c->nodep_buf[i];
198 nodes_next[i] = c->nodep_buf[i] + frontier;
199 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
200 nodes[i][0] = c->node_buf[i] + frontier;
201 nodes[i][0]->ssd = 0;
202 nodes[i][0]->path = 0;
203 nodes[i][0]->state = c->band[i];
206 for (i = 0; i < nb_samples >> 1; i++) {
207 int xlow, xhigh;
208 struct TrellisNode *next[2];
209 int heap_pos[2] = {0, 0};
211 for (j = 0; j < 2; j++) {
212 next[j] = c->node_buf[j] + frontier*(i & 1);
213 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
216 filter_samples(c, &samples[2*i], &xlow, &xhigh);
218 for (j = 0; j < frontier && nodes[0][j]; j++) {
219 /* Only k >> 2 affects the future adaptive state, therefore testing
220 * small steps that don't change k >> 2 is useless, the original
221 * value from encode_low is better than them. Since we step k
222 * in steps of 4, make sure range is a multiple of 4, so that
223 * we don't miss the original value from encode_low. */
224 int range = j < frontier/2 ? 4 : 0;
225 struct TrellisNode *cur_node = nodes[0][j];
227 int ilow = encode_low(&cur_node->state, xlow);
229 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
230 int decoded, dec_diff, pos;
231 uint32_t ssd;
232 struct TrellisNode* node;
234 if (k < 0)
235 continue;
237 decoded = av_clip((cur_node->state.scale_factor *
238 ff_g722_low_inv_quant6[k] >> 10)
239 + cur_node->state.s_predictor, -16384, 16383);
240 dec_diff = xlow - decoded;
242 #define STORE_NODE(index, UPDATE, VALUE)\
243 ssd = cur_node->ssd + dec_diff*dec_diff;\
244 /* Check for wraparound. Using 64 bit ssd counters would \
245 * be simpler, but is slower on x86 32 bit. */\
246 if (ssd < cur_node->ssd)\
247 continue;\
248 if (heap_pos[index] < frontier) {\
249 pos = heap_pos[index]++;\
250 assert(pathn[index] < FREEZE_INTERVAL * frontier);\
251 node = nodes_next[index][pos] = next[index]++;\
252 node->path = pathn[index]++;\
253 } else {\
254 /* Try to replace one of the leaf nodes with the new \
255 * one, but not always testing the same leaf position */\
256 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
257 if (ssd >= nodes_next[index][pos]->ssd)\
258 continue;\
259 heap_pos[index]++;\
260 node = nodes_next[index][pos];\
262 node->ssd = ssd;\
263 node->state = cur_node->state;\
264 UPDATE;\
265 c->paths[index][node->path].value = VALUE;\
266 c->paths[index][node->path].prev = cur_node->path;\
267 /* Sift the newly inserted node up in the heap to restore \
268 * the heap property */\
269 while (pos > 0) {\
270 int parent = (pos - 1) >> 1;\
271 if (nodes_next[index][parent]->ssd <= ssd)\
272 break;\
273 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
274 nodes_next[index][pos]);\
275 pos = parent;\
277 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
281 for (j = 0; j < frontier && nodes[1][j]; j++) {
282 int ihigh;
283 struct TrellisNode *cur_node = nodes[1][j];
285 /* We don't try to get any initial guess for ihigh via
286 * encode_high - since there's only 4 possible values, test
287 * them all. Testing all of these gives a much, much larger
288 * gain than testing a larger range around ilow. */
289 for (ihigh = 0; ihigh < 4; ihigh++) {
290 int dhigh, decoded, dec_diff, pos;
291 uint32_t ssd;
292 struct TrellisNode* node;
294 dhigh = cur_node->state.scale_factor *
295 ff_g722_high_inv_quant[ihigh] >> 10;
296 decoded = av_clip(dhigh + cur_node->state.s_predictor,
297 -16384, 16383);
298 dec_diff = xhigh - decoded;
300 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
304 for (j = 0; j < 2; j++) {
305 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
307 if (nodes[j][0]->ssd > (1 << 16)) {
308 for (k = 1; k < frontier && nodes[j][k]; k++)
309 nodes[j][k]->ssd -= nodes[j][0]->ssd;
310 nodes[j][0]->ssd = 0;
314 if (i == froze + FREEZE_INTERVAL) {
315 p[0] = &c->paths[0][nodes[0][0]->path];
316 p[1] = &c->paths[1][nodes[1][0]->path];
317 for (j = i; j > froze; j--) {
318 dst[j] = p[1]->value << 6 | p[0]->value;
319 p[0] = &c->paths[0][p[0]->prev];
320 p[1] = &c->paths[1][p[1]->prev];
322 froze = i;
323 pathn[0] = pathn[1] = 0;
324 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
325 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
329 p[0] = &c->paths[0][nodes[0][0]->path];
330 p[1] = &c->paths[1][nodes[1][0]->path];
331 for (j = i; j > froze; j--) {
332 dst[j] = p[1]->value << 6 | p[0]->value;
333 p[0] = &c->paths[0][p[0]->prev];
334 p[1] = &c->paths[1][p[1]->prev];
336 c->band[0] = nodes[0][0]->state;
337 c->band[1] = nodes[1][0]->state;
340 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
341 const int16_t *samples)
343 int xlow, xhigh, ilow, ihigh;
344 filter_samples(c, samples, &xlow, &xhigh);
345 ihigh = encode_high(&c->band[1], xhigh);
346 ilow = encode_low (&c->band[0], xlow);
347 ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
348 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
349 ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
350 *dst = ihigh << 6 | ilow;
353 static void g722_encode_no_trellis(G722Context *c,
354 uint8_t *dst, int nb_samples,
355 const int16_t *samples)
357 int i;
358 for (i = 0; i < nb_samples; i += 2)
359 encode_byte(c, dst++, &samples[i]);
362 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
363 const AVFrame *frame, int *got_packet_ptr)
365 G722Context *c = avctx->priv_data;
366 const int16_t *samples = (const int16_t *)frame->data[0];
367 int nb_samples, out_size, ret;
369 out_size = (frame->nb_samples + 1) / 2;
370 if ((ret = ff_alloc_packet(avpkt, out_size))) {
371 av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
372 return ret;
375 nb_samples = frame->nb_samples - (frame->nb_samples & 1);
377 if (avctx->trellis)
378 g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
379 else
380 g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
382 /* handle last frame with odd frame_size */
383 if (nb_samples < frame->nb_samples) {
384 int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
385 encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
388 if (frame->pts != AV_NOPTS_VALUE)
389 avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
390 *got_packet_ptr = 1;
391 return 0;
394 AVCodec ff_adpcm_g722_encoder = {
395 .name = "g722",
396 .type = AVMEDIA_TYPE_AUDIO,
397 .id = AV_CODEC_ID_ADPCM_G722,
398 .priv_data_size = sizeof(G722Context),
399 .init = g722_encode_init,
400 .close = g722_encode_close,
401 .encode2 = g722_encode_frame,
402 .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
403 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
404 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
405 AV_SAMPLE_FMT_NONE },