libavcodec/g722enc.c

   1 /*
   2  * Copyright (c) CMU 1993 Computer Science, Speech Group
   3  *                        Chengxiang Lu and Alex Hauptmann
   4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
   5  * Copyright (c) 2009 Kenan Gillet
   6  * Copyright (c) 2010 Martin Storsjo
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * G.722 ADPCM audio encoder
  28  */
  29
  30 #include "avcodec.h"
  31 #include "internal.h"
  32 #include "g722.h"
  33 #include "libavutil/common.h"
  34
  35 #define FREEZE_INTERVAL 128
  36
  37 /* This is an arbitrary value. Allowing insanely large values leads to strange
  38    problems, so we limit it to a reasonable value */
  39 #define MAX_FRAME_SIZE 32768
  40
  41 /* We clip the value of avctx->trellis to prevent data type overflows and
  42    undefined behavior. Using larger values is insanely slow anyway. */
  43 #define MIN_TRELLIS 0
  44 #define MAX_TRELLIS 16
  45
  46 static av_cold int g722_encode_close(AVCodecContext *avctx)
  47 {
  48     G722Context *c = avctx->priv_data;
  49     int i;
  50     for (i = 0; i < 2; i++) {
  51         av_freep(&c->paths[i]);
  52         av_freep(&c->node_buf[i]);
  53         av_freep(&c->nodep_buf[i]);
  54     }
  55 #if FF_API_OLD_ENCODE_AUDIO
  56     av_freep(&avctx->coded_frame);
  57 #endif
  58     return 0;
  59 }
  60
  61 static av_cold int g722_encode_init(AVCodecContext * avctx)
  62 {
  63     G722Context *c = avctx->priv_data;
  64     int ret;
  65
  66     if (avctx->channels != 1) {
  67         av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
  68         return AVERROR_INVALIDDATA;
  69     }
  70
  71     c->band[0].scale_factor = 8;
  72     c->band[1].scale_factor = 2;
  73     c->prev_samples_pos = 22;
  74
  75     if (avctx->trellis) {
  76         int frontier = 1 << avctx->trellis;
  77         int max_paths = frontier * FREEZE_INTERVAL;
  78         int i;
  79         for (i = 0; i < 2; i++) {
  80             c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
  81             c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
  82             c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
  83             if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
  84                 ret = AVERROR(ENOMEM);
  85                 goto error;
  86             }
  87         }
  88     }
  89
  90     if (avctx->frame_size) {
  91         /* validate frame size */
  92         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
  93             int new_frame_size;
  94
  95             if (avctx->frame_size == 1)
  96                 new_frame_size = 2;
  97             else if (avctx->frame_size > MAX_FRAME_SIZE)
  98                 new_frame_size = MAX_FRAME_SIZE;
  99             else
 100                 new_frame_size = avctx->frame_size - 1;
 101
 102             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
 103                    "allowed. Using %d instead of %d\n", new_frame_size,
 104                    avctx->frame_size);
 105             avctx->frame_size = new_frame_size;
 106         }
 107     } else {
 108         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
 109            a common packet size for VoIP applications */
 110         avctx->frame_size = 320;
 111     }
 112     avctx->delay = 22;
 113
 114     if (avctx->trellis) {
 115         /* validate trellis */
 116         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
 117             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
 118             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
 119                    "allowed. Using %d instead of %d\n", new_trellis,
 120                    avctx->trellis);
 121             avctx->trellis = new_trellis;
 122         }
 123     }
 124
 125 #if FF_API_OLD_ENCODE_AUDIO
 126     avctx->coded_frame = avcodec_alloc_frame();
 127     if (!avctx->coded_frame) {
 128         ret = AVERROR(ENOMEM);
 129         goto error;
 130     }
 131 #endif
 132
 133     return 0;
 134 error:
 135     g722_encode_close(avctx);
 136     return ret;
 137 }
 138
 139 static const int16_t low_quant[33] = {
 140       35,   72,  110,  150,  190,  233,  276,  323,
 141      370,  422,  473,  530,  587,  650,  714,  786,
 142      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
 143     1765, 1980, 2195, 2557, 2919
 144 };
 145
 146 static inline void filter_samples(G722Context *c, const int16_t *samples,
 147                                   int *xlow, int *xhigh)
 148 {
 149     int xout1, xout2;
 150     c->prev_samples[c->prev_samples_pos++] = samples[0];
 151     c->prev_samples[c->prev_samples_pos++] = samples[1];
 152     ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
 153     *xlow  = xout1 + xout2 >> 14;
 154     *xhigh = xout1 - xout2 >> 14;
 155     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
 156         memmove(c->prev_samples,
 157                 c->prev_samples + c->prev_samples_pos - 22,
 158                 22 * sizeof(c->prev_samples[0]));
 159         c->prev_samples_pos = 22;
 160     }
 161 }
 162
 163 static inline int encode_high(const struct G722Band *state, int xhigh)
 164 {
 165     int diff = av_clip_int16(xhigh - state->s_predictor);
 166     int pred = 141 * state->scale_factor >> 8;
 167            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
 168     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
 169 }
 170
 171 static inline int encode_low(const struct G722Band* state, int xlow)
 172 {
 173     int diff  = av_clip_int16(xlow - state->s_predictor);
 174            /* = diff >= 0 ? diff : -(diff + 1) */
 175     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
 176     int i = 0;
 177     limit = limit + 1 << 10;
 178     if (limit > low_quant[8] * state->scale_factor)
 179         i = 9;
 180     while (i < 29 && limit > low_quant[i] * state->scale_factor)
 181         i++;
 182     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
 183 }
 184
 185 static void g722_encode_trellis(G722Context *c, int trellis,
 186                                 uint8_t *dst, int nb_samples,
 187                                 const int16_t *samples)
 188 {
 189     int i, j, k;
 190     int frontier = 1 << trellis;
 191     struct TrellisNode **nodes[2];
 192     struct TrellisNode **nodes_next[2];
 193     int pathn[2] = {0, 0}, froze = -1;
 194     struct TrellisPath *p[2];
 195
 196     for (i = 0; i < 2; i++) {
 197         nodes[i] = c->nodep_buf[i];
 198         nodes_next[i] = c->nodep_buf[i] + frontier;
 199         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
 200         nodes[i][0] = c->node_buf[i] + frontier;
 201         nodes[i][0]->ssd = 0;
 202         nodes[i][0]->path = 0;
 203         nodes[i][0]->state = c->band[i];
 204     }
 205
 206     for (i = 0; i < nb_samples >> 1; i++) {
 207         int xlow, xhigh;
 208         struct TrellisNode *next[2];
 209         int heap_pos[2] = {0, 0};
 210
 211         for (j = 0; j < 2; j++) {
 212             next[j] = c->node_buf[j] + frontier*(i & 1);
 213             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
 214         }
 215
 216         filter_samples(c, &samples[2*i], &xlow, &xhigh);
 217
 218         for (j = 0; j < frontier && nodes[0][j]; j++) {
 219             /* Only k >> 2 affects the future adaptive state, therefore testing
 220              * small steps that don't change k >> 2 is useless, the original
 221              * value from encode_low is better than them. Since we step k
 222              * in steps of 4, make sure range is a multiple of 4, so that
 223              * we don't miss the original value from encode_low. */
 224             int range = j < frontier/2 ? 4 : 0;
 225             struct TrellisNode *cur_node = nodes[0][j];
 226
 227             int ilow = encode_low(&cur_node->state, xlow);
 228
 229             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
 230                 int decoded, dec_diff, pos;
 231                 uint32_t ssd;
 232                 struct TrellisNode* node;
 233
 234                 if (k < 0)
 235                     continue;
 236
 237                 decoded = av_clip((cur_node->state.scale_factor *
 238                                   ff_g722_low_inv_quant6[k] >> 10)
 239                                 + cur_node->state.s_predictor, -16384, 16383);
 240                 dec_diff = xlow - decoded;
 241
 242 #define STORE_NODE(index, UPDATE, VALUE)\
 243                 ssd = cur_node->ssd + dec_diff*dec_diff;\
 244                 /* Check for wraparound. Using 64 bit ssd counters would \
 245                  * be simpler, but is slower on x86 32 bit. */\
 246                 if (ssd < cur_node->ssd)\
 247                     continue;\
 248                 if (heap_pos[index] < frontier) {\
 249                     pos = heap_pos[index]++;\
 250                     assert(pathn[index] < FREEZE_INTERVAL * frontier);\
 251                     node = nodes_next[index][pos] = next[index]++;\
 252                     node->path = pathn[index]++;\
 253                 } else {\
 254                     /* Try to replace one of the leaf nodes with the new \
 255                      * one, but not always testing the same leaf position */\
 256                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
 257                     if (ssd >= nodes_next[index][pos]->ssd)\
 258                         continue;\
 259                     heap_pos[index]++;\
 260                     node = nodes_next[index][pos];\
 261                 }\
 262                 node->ssd = ssd;\
 263                 node->state = cur_node->state;\
 264                 UPDATE;\
 265                 c->paths[index][node->path].value = VALUE;\
 266                 c->paths[index][node->path].prev = cur_node->path;\
 267                 /* Sift the newly inserted node up in the heap to restore \
 268                  * the heap property */\
 269                 while (pos > 0) {\
 270                     int parent = (pos - 1) >> 1;\
 271                     if (nodes_next[index][parent]->ssd <= ssd)\
 272                         break;\
 273                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
 274                                                 nodes_next[index][pos]);\
 275                     pos = parent;\
 276                 }
 277                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
 278             }
 279         }
 280
 281         for (j = 0; j < frontier && nodes[1][j]; j++) {
 282             int ihigh;
 283             struct TrellisNode *cur_node = nodes[1][j];
 284
 285             /* We don't try to get any initial guess for ihigh via
 286              * encode_high - since there's only 4 possible values, test
 287              * them all. Testing all of these gives a much, much larger
 288              * gain than testing a larger range around ilow. */
 289             for (ihigh = 0; ihigh < 4; ihigh++) {
 290                 int dhigh, decoded, dec_diff, pos;
 291                 uint32_t ssd;
 292                 struct TrellisNode* node;
 293
 294                 dhigh = cur_node->state.scale_factor *
 295                         ff_g722_high_inv_quant[ihigh] >> 10;
 296                 decoded = av_clip(dhigh + cur_node->state.s_predictor,
 297                                   -16384, 16383);
 298                 dec_diff = xhigh - decoded;
 299
 300                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
 301             }
 302         }
 303
 304         for (j = 0; j < 2; j++) {
 305             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
 306
 307             if (nodes[j][0]->ssd > (1 << 16)) {
 308                 for (k = 1; k < frontier && nodes[j][k]; k++)
 309                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
 310                 nodes[j][0]->ssd = 0;
 311             }
 312         }
 313
 314         if (i == froze + FREEZE_INTERVAL) {
 315             p[0] = &c->paths[0][nodes[0][0]->path];
 316             p[1] = &c->paths[1][nodes[1][0]->path];
 317             for (j = i; j > froze; j--) {
 318                 dst[j] = p[1]->value << 6 | p[0]->value;
 319                 p[0] = &c->paths[0][p[0]->prev];
 320                 p[1] = &c->paths[1][p[1]->prev];
 321             }
 322             froze = i;
 323             pathn[0] = pathn[1] = 0;
 324             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
 325             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
 326         }
 327     }
 328
 329     p[0] = &c->paths[0][nodes[0][0]->path];
 330     p[1] = &c->paths[1][nodes[1][0]->path];
 331     for (j = i; j > froze; j--) {
 332         dst[j] = p[1]->value << 6 | p[0]->value;
 333         p[0] = &c->paths[0][p[0]->prev];
 334         p[1] = &c->paths[1][p[1]->prev];
 335     }
 336     c->band[0] = nodes[0][0]->state;
 337     c->band[1] = nodes[1][0]->state;
 338 }
 339
 340 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
 341                                          const int16_t *samples)
 342 {
 343     int xlow, xhigh, ilow, ihigh;
 344     filter_samples(c, samples, &xlow, &xhigh);
 345     ihigh = encode_high(&c->band[1], xhigh);
 346     ilow  = encode_low (&c->band[0], xlow);
 347     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
 348                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
 349     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
 350     *dst = ihigh << 6 | ilow;
 351 }
 352
 353 static void g722_encode_no_trellis(G722Context *c,
 354                                    uint8_t *dst, int nb_samples,
 355                                    const int16_t *samples)
 356 {
 357     int i;
 358     for (i = 0; i < nb_samples; i += 2)
 359         encode_byte(c, dst++, &samples[i]);
 360 }
 361
 362 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 363                              const AVFrame *frame, int *got_packet_ptr)
 364 {
 365     G722Context *c = avctx->priv_data;
 366     const int16_t *samples = (const int16_t *)frame->data[0];
 367     int nb_samples, out_size, ret;
 368
 369     out_size = (frame->nb_samples + 1) / 2;
 370     if ((ret = ff_alloc_packet(avpkt, out_size))) {
 371         av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
 372         return ret;
 373     }
 374
 375     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
 376
 377     if (avctx->trellis)
 378         g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
 379     else
 380         g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
 381
 382     /* handle last frame with odd frame_size */
 383     if (nb_samples < frame->nb_samples) {
 384         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
 385         encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
 386     }
 387
 388     if (frame->pts != AV_NOPTS_VALUE)
 389         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
 390     *got_packet_ptr = 1;
 391     return 0;
 392 }
 393
 394 AVCodec ff_adpcm_g722_encoder = {
 395     .name           = "g722",
 396     .type           = AVMEDIA_TYPE_AUDIO,
 397     .id             = AV_CODEC_ID_ADPCM_G722,
 398     .priv_data_size = sizeof(G722Context),
 399     .init           = g722_encode_init,
 400     .close          = g722_encode_close,
 401     .encode2        = g722_encode_frame,
 402     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
 403     .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
 404     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
 405                                                      AV_SAMPLE_FMT_NONE },
 406 };