libavcodec/proresenc.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/opt.h"
  24 #include "avcodec.h"
  25 #include "put_bits.h"
  26 #include "bytestream.h"
  27 #include "internal.h"
  28 #include "proresdsp.h"
  29 #include "proresdata.h"
  30
  31 #define CFACTOR_Y422 2
  32 #define CFACTOR_Y444 3
  33
  34 #define MAX_MBS_PER_SLICE 8
  35
  36 #define MAX_PLANES 3 // should be increased to 4 when there's AV_PIX_FMT_YUV444AP10
  37
  38 enum {
  39     PRORES_PROFILE_PROXY = 0,
  40     PRORES_PROFILE_LT,
  41     PRORES_PROFILE_STANDARD,
  42     PRORES_PROFILE_HQ,
  43 };
  44
  45 enum {
  46     QUANT_MAT_PROXY = 0,
  47     QUANT_MAT_LT,
  48     QUANT_MAT_STANDARD,
  49     QUANT_MAT_HQ,
  50     QUANT_MAT_DEFAULT,
  51 };
  52
  53 static const uint8_t prores_quant_matrices[][64] = {
  54     { // proxy
  55          4,  7,  9, 11, 13, 14, 15, 63,
  56          7,  7, 11, 12, 14, 15, 63, 63,
  57          9, 11, 13, 14, 15, 63, 63, 63,
  58         11, 11, 13, 14, 63, 63, 63, 63,
  59         11, 13, 14, 63, 63, 63, 63, 63,
  60         13, 14, 63, 63, 63, 63, 63, 63,
  61         13, 63, 63, 63, 63, 63, 63, 63,
  62         63, 63, 63, 63, 63, 63, 63, 63,
  63     },
  64     { // LT
  65          4,  5,  6,  7,  9, 11, 13, 15,
  66          5,  5,  7,  8, 11, 13, 15, 17,
  67          6,  7,  9, 11, 13, 15, 15, 17,
  68          7,  7,  9, 11, 13, 15, 17, 19,
  69          7,  9, 11, 13, 14, 16, 19, 23,
  70          9, 11, 13, 14, 16, 19, 23, 29,
  71          9, 11, 13, 15, 17, 21, 28, 35,
  72         11, 13, 16, 17, 21, 28, 35, 41,
  73     },
  74     { // standard
  75          4,  4,  5,  5,  6,  7,  7,  9,
  76          4,  4,  5,  6,  7,  7,  9,  9,
  77          5,  5,  6,  7,  7,  9,  9, 10,
  78          5,  5,  6,  7,  7,  9,  9, 10,
  79          5,  6,  7,  7,  8,  9, 10, 12,
  80          6,  7,  7,  8,  9, 10, 12, 15,
  81          6,  7,  7,  9, 10, 11, 14, 17,
  82          7,  7,  9, 10, 11, 14, 17, 21,
  83     },
  84     { // high quality
  85          4,  4,  4,  4,  4,  4,  4,  4,
  86          4,  4,  4,  4,  4,  4,  4,  4,
  87          4,  4,  4,  4,  4,  4,  4,  4,
  88          4,  4,  4,  4,  4,  4,  4,  5,
  89          4,  4,  4,  4,  4,  4,  5,  5,
  90          4,  4,  4,  4,  4,  5,  5,  6,
  91          4,  4,  4,  4,  5,  5,  6,  7,
  92          4,  4,  4,  4,  5,  6,  7,  7,
  93     },
  94     { // codec default
  95          4,  4,  4,  4,  4,  4,  4,  4,
  96          4,  4,  4,  4,  4,  4,  4,  4,
  97          4,  4,  4,  4,  4,  4,  4,  4,
  98          4,  4,  4,  4,  4,  4,  4,  4,
  99          4,  4,  4,  4,  4,  4,  4,  4,
 100          4,  4,  4,  4,  4,  4,  4,  4,
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103     },
 104 };
 105
 106 #define NUM_MB_LIMITS 4
 107 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 108     1620, // up to 720x576
 109     2700, // up to 960x720
 110     6075, // up to 1440x1080
 111     9216, // up to 2048x1152
 112 };
 113
 114 static const struct prores_profile {
 115     const char *full_name;
 116     uint32_t    tag;
 117     int         min_quant;
 118     int         max_quant;
 119     int         br_tab[NUM_MB_LIMITS];
 120     int         quant;
 121 } prores_profile_info[4] = {
 122     {
 123         .full_name = "proxy",
 124         .tag       = MKTAG('a', 'p', 'c', 'o'),
 125         .min_quant = 4,
 126         .max_quant = 8,
 127         .br_tab    = { 300, 242, 220, 194 },
 128         .quant     = QUANT_MAT_PROXY,
 129     },
 130     {
 131         .full_name = "LT",
 132         .tag       = MKTAG('a', 'p', 'c', 's'),
 133         .min_quant = 1,
 134         .max_quant = 9,
 135         .br_tab    = { 720, 560, 490, 440 },
 136         .quant     = QUANT_MAT_LT,
 137     },
 138     {
 139         .full_name = "standard",
 140         .tag       = MKTAG('a', 'p', 'c', 'n'),
 141         .min_quant = 1,
 142         .max_quant = 6,
 143         .br_tab    = { 1050, 808, 710, 632 },
 144         .quant     = QUANT_MAT_STANDARD,
 145     },
 146     {
 147         .full_name = "high quality",
 148         .tag       = MKTAG('a', 'p', 'c', 'h'),
 149         .min_quant = 1,
 150         .max_quant = 6,
 151         .br_tab    = { 1566, 1216, 1070, 950 },
 152         .quant     = QUANT_MAT_HQ,
 153     }
 154 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
 155 };
 156
 157 #define TRELLIS_WIDTH 16
 158 #define SCORE_LIMIT   INT_MAX / 2
 159
 160 struct TrellisNode {
 161     int prev_node;
 162     int quant;
 163     int bits;
 164     int score;
 165 };
 166
 167 #define MAX_STORED_Q 16
 168
 169 typedef struct ProresThreadData {
 170     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 171     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 172     int16_t custom_q[64];
 173     struct TrellisNode *nodes;
 174 } ProresThreadData;
 175
 176 typedef struct ProresContext {
 177     AVClass *class;
 178     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 179     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 180     int16_t quants[MAX_STORED_Q][64];
 181     int16_t custom_q[64];
 182     const uint8_t *quant_mat;
 183
 184     ProresDSPContext dsp;
 185     ScanTable  scantable;
 186
 187     int mb_width, mb_height;
 188     int mbs_per_slice;
 189     int num_chroma_blocks, chroma_factor;
 190     int slices_width;
 191     int slices_per_picture;
 192     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 193     int cur_picture_idx;
 194     int num_planes;
 195     int bits_per_mb;
 196     int force_quant;
 197
 198     char *vendor;
 199     int quant_sel;
 200
 201     int frame_size_upper_bound;
 202
 203     int profile;
 204     const struct prores_profile *profile_info;
 205
 206     int *slice_q;
 207
 208     ProresThreadData *tdata;
 209 } ProresContext;
 210
 211 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 212                            int linesize, int x, int y, int w, int h,
 213                            DCTELEM *blocks, uint16_t *emu_buf,
 214                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 215 {
 216     const uint16_t *esrc;
 217     const int mb_width = 4 * blocks_per_mb;
 218     int elinesize;
 219     int i, j, k;
 220
 221     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 222         if (x >= w) {
 223             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 224                               * sizeof(*blocks));
 225             return;
 226         }
 227         if (x + mb_width <= w && y + 16 <= h) {
 228             esrc      = src;
 229             elinesize = linesize;
 230         } else {
 231             int bw, bh, pix;
 232
 233             esrc      = emu_buf;
 234             elinesize = 16 * sizeof(*emu_buf);
 235
 236             bw = FFMIN(w - x, mb_width);
 237             bh = FFMIN(h - y, 16);
 238
 239             for (j = 0; j < bh; j++) {
 240                 memcpy(emu_buf + j * 16,
 241                        (const uint8_t*)src + j * linesize,
 242                        bw * sizeof(*src));
 243                 pix = emu_buf[j * 16 + bw - 1];
 244                 for (k = bw; k < mb_width; k++)
 245                     emu_buf[j * 16 + k] = pix;
 246             }
 247             for (; j < 16; j++)
 248                 memcpy(emu_buf + j * 16,
 249                        emu_buf + (bh - 1) * 16,
 250                        mb_width * sizeof(*emu_buf));
 251         }
 252         if (!is_chroma) {
 253             ctx->dsp.fdct(esrc, elinesize, blocks);
 254             blocks += 64;
 255             if (blocks_per_mb > 2) {
 256                 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
 257                 blocks += 64;
 258             }
 259             ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
 260             blocks += 64;
 261             if (blocks_per_mb > 2) {
 262                 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
 263                 blocks += 64;
 264             }
 265         } else {
 266             ctx->dsp.fdct(esrc, elinesize, blocks);
 267             blocks += 64;
 268             ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
 269             blocks += 64;
 270             if (blocks_per_mb > 2) {
 271                 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
 272                 blocks += 64;
 273                 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
 274                 blocks += 64;
 275             }
 276         }
 277
 278         x += mb_width;
 279     }
 280 }
 281
 282 /**
 283  * Write an unsigned rice/exp golomb codeword.
 284  */
 285 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 286 {
 287     unsigned int rice_order, exp_order, switch_bits, switch_val;
 288     int exponent;
 289
 290     /* number of prefix bits to switch between Rice and expGolomb */
 291     switch_bits = (codebook & 3) + 1;
 292     rice_order  =  codebook >> 5;       /* rice code order */
 293     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 294
 295     switch_val  = switch_bits << rice_order;
 296
 297     if (val >= switch_val) {
 298         val -= switch_val - (1 << exp_order);
 299         exponent = av_log2(val);
 300
 301         put_bits(pb, exponent - exp_order + switch_bits, 0);
 302         put_bits(pb, exponent + 1, val);
 303     } else {
 304         exponent = val >> rice_order;
 305
 306         if (exponent)
 307             put_bits(pb, exponent, 0);
 308         put_bits(pb, 1, 1);
 309         if (rice_order)
 310             put_sbits(pb, rice_order, val);
 311     }
 312 }
 313
 314 #define GET_SIGN(x)  ((x) >> 31)
 315 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 316
 317 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
 318                        int blocks_per_slice, int scale)
 319 {
 320     int i;
 321     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 322
 323     prev_dc = (blocks[0] - 0x4000) / scale;
 324     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 325     sign     = 0;
 326     codebook = 3;
 327     blocks  += 64;
 328
 329     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 330         dc       = (blocks[0] - 0x4000) / scale;
 331         delta    = dc - prev_dc;
 332         new_sign = GET_SIGN(delta);
 333         delta    = (delta ^ sign) - sign;
 334         code     = MAKE_CODE(delta);
 335         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 336         codebook = (code + (code & 1)) >> 1;
 337         codebook = FFMIN(codebook, 3);
 338         sign     = new_sign;
 339         prev_dc  = dc;
 340     }
 341 }
 342
 343 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
 344                        int blocks_per_slice,
 345                        int plane_size_factor,
 346                        const uint8_t *scan, const int16_t *qmat)
 347 {
 348     int idx, i;
 349     int run, level, run_cb, lev_cb;
 350     int max_coeffs, abs_level;
 351
 352     max_coeffs = blocks_per_slice << 6;
 353     run_cb     = ff_prores_run_to_cb_index[4];
 354     lev_cb     = ff_prores_lev_to_cb_index[2];
 355     run        = 0;
 356
 357     for (i = 1; i < 64; i++) {
 358         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 359             level = blocks[idx] / qmat[scan[i]];
 360             if (level) {
 361                 abs_level = FFABS(level);
 362                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 363                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 364                                     abs_level - 1);
 365                 put_sbits(pb, 1, GET_SIGN(level));
 366
 367                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 368                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 369                 run    = 0;
 370             } else {
 371                 run++;
 372             }
 373         }
 374     }
 375 }
 376
 377 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 378                               const uint16_t *src, int linesize,
 379                               int mbs_per_slice, DCTELEM *blocks,
 380                               int blocks_per_mb, int plane_size_factor,
 381                               const int16_t *qmat)
 382 {
 383     int blocks_per_slice, saved_pos;
 384
 385     saved_pos = put_bits_count(pb);
 386     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 387
 388     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 389     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 390                ctx->scantable.permutated, qmat);
 391     flush_put_bits(pb);
 392
 393     return (put_bits_count(pb) - saved_pos) >> 3;
 394 }
 395
 396 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 397                         PutBitContext *pb,
 398                         int sizes[4], int x, int y, int quant,
 399                         int mbs_per_slice)
 400 {
 401     ProresContext *ctx = avctx->priv_data;
 402     int i, xp, yp;
 403     int total_size = 0;
 404     const uint16_t *src;
 405     int slice_width_factor = av_log2(mbs_per_slice);
 406     int num_cblocks, pwidth, linesize, line_add;
 407     int plane_factor, is_chroma;
 408     uint16_t *qmat;
 409
 410     if (ctx->pictures_per_frame == 1)
 411         line_add = 0;
 412     else
 413         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 414
 415     if (ctx->force_quant) {
 416         qmat = ctx->quants[0];
 417     } else if (quant < MAX_STORED_Q) {
 418         qmat = ctx->quants[quant];
 419     } else {
 420         qmat = ctx->custom_q;
 421         for (i = 0; i < 64; i++)
 422             qmat[i] = ctx->quant_mat[i] * quant;
 423     }
 424
 425     for (i = 0; i < ctx->num_planes; i++) {
 426         is_chroma    = (i == 1 || i == 2);
 427         plane_factor = slice_width_factor + 2;
 428         if (is_chroma)
 429             plane_factor += ctx->chroma_factor - 3;
 430         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 431             xp          = x << 4;
 432             yp          = y << 4;
 433             num_cblocks = 4;
 434             pwidth      = avctx->width;
 435         } else {
 436             xp          = x << 3;
 437             yp          = y << 4;
 438             num_cblocks = 2;
 439             pwidth      = avctx->width >> 1;
 440         }
 441
 442         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 443         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 444                                 line_add * pic->linesize[i]) + xp;
 445
 446         get_slice_data(ctx, src, linesize, xp, yp,
 447                        pwidth, avctx->height / ctx->pictures_per_frame,
 448                        ctx->blocks[0], ctx->emu_buf,
 449                        mbs_per_slice, num_cblocks, is_chroma);
 450         sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
 451                                       mbs_per_slice, ctx->blocks[0],
 452                                       num_cblocks, plane_factor,
 453                                       qmat);
 454         total_size += sizes[i];
 455     }
 456     return total_size;
 457 }
 458
 459 static inline int estimate_vlc(unsigned codebook, int val)
 460 {
 461     unsigned int rice_order, exp_order, switch_bits, switch_val;
 462     int exponent;
 463
 464     /* number of prefix bits to switch between Rice and expGolomb */
 465     switch_bits = (codebook & 3) + 1;
 466     rice_order  =  codebook >> 5;       /* rice code order */
 467     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 468
 469     switch_val  = switch_bits << rice_order;
 470
 471     if (val >= switch_val) {
 472         val -= switch_val - (1 << exp_order);
 473         exponent = av_log2(val);
 474
 475         return exponent * 2 - exp_order + switch_bits + 1;
 476     } else {
 477         return (val >> rice_order) + rice_order + 1;
 478     }
 479 }
 480
 481 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
 482                         int scale)
 483 {
 484     int i;
 485     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 486     int bits;
 487
 488     prev_dc  = (blocks[0] - 0x4000) / scale;
 489     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 490     sign     = 0;
 491     codebook = 3;
 492     blocks  += 64;
 493     *error  += FFABS(blocks[0] - 0x4000) % scale;
 494
 495     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 496         dc       = (blocks[0] - 0x4000) / scale;
 497         *error  += FFABS(blocks[0] - 0x4000) % scale;
 498         delta    = dc - prev_dc;
 499         new_sign = GET_SIGN(delta);
 500         delta    = (delta ^ sign) - sign;
 501         code     = MAKE_CODE(delta);
 502         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 503         codebook = (code + (code & 1)) >> 1;
 504         codebook = FFMIN(codebook, 3);
 505         sign     = new_sign;
 506         prev_dc  = dc;
 507     }
 508
 509     return bits;
 510 }
 511
 512 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
 513                         int plane_size_factor,
 514                         const uint8_t *scan, const int16_t *qmat)
 515 {
 516     int idx, i;
 517     int run, level, run_cb, lev_cb;
 518     int max_coeffs, abs_level;
 519     int bits = 0;
 520
 521     max_coeffs = blocks_per_slice << 6;
 522     run_cb     = ff_prores_run_to_cb_index[4];
 523     lev_cb     = ff_prores_lev_to_cb_index[2];
 524     run        = 0;
 525
 526     for (i = 1; i < 64; i++) {
 527         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 528             level   = blocks[idx] / qmat[scan[i]];
 529             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 530             if (level) {
 531                 abs_level = FFABS(level);
 532                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 533                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 534                                      abs_level - 1) + 1;
 535
 536                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 537                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 538                 run    = 0;
 539             } else {
 540                 run++;
 541             }
 542         }
 543     }
 544
 545     return bits;
 546 }
 547
 548 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 549                                 const uint16_t *src, int linesize,
 550                                 int mbs_per_slice,
 551                                 int blocks_per_mb, int plane_size_factor,
 552                                 const int16_t *qmat, ProresThreadData *td)
 553 {
 554     int blocks_per_slice;
 555     int bits;
 556
 557     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 558
 559     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 560     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 561                          plane_size_factor, ctx->scantable.permutated, qmat);
 562
 563     return FFALIGN(bits, 8);
 564 }
 565
 566 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 567                             int trellis_node, int x, int y, int mbs_per_slice,
 568                             ProresThreadData *td)
 569 {
 570     ProresContext *ctx = avctx->priv_data;
 571     int i, q, pq, xp, yp;
 572     const uint16_t *src;
 573     int slice_width_factor = av_log2(mbs_per_slice);
 574     int num_cblocks[MAX_PLANES], pwidth;
 575     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 576     const int min_quant = ctx->profile_info->min_quant;
 577     const int max_quant = ctx->profile_info->max_quant;
 578     int error, bits, bits_limit;
 579     int mbs, prev, cur, new_score;
 580     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 581     int overquant;
 582     uint16_t *qmat;
 583     int linesize[4], line_add;
 584
 585     if (ctx->pictures_per_frame == 1)
 586         line_add = 0;
 587     else
 588         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 589     mbs = x + mbs_per_slice;
 590
 591     for (i = 0; i < ctx->num_planes; i++) {
 592         is_chroma[i]    = (i == 1 || i == 2);
 593         plane_factor[i] = slice_width_factor + 2;
 594         if (is_chroma[i])
 595             plane_factor[i] += ctx->chroma_factor - 3;
 596         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 597             xp             = x << 4;
 598             yp             = y << 4;
 599             num_cblocks[i] = 4;
 600             pwidth         = avctx->width;
 601         } else {
 602             xp             = x << 3;
 603             yp             = y << 4;
 604             num_cblocks[i] = 2;
 605             pwidth         = avctx->width >> 1;
 606         }
 607
 608         linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
 609         src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
 610                                 line_add * pic->linesize[i]) + xp;
 611
 612         get_slice_data(ctx, src, linesize[i], xp, yp,
 613                        pwidth, avctx->height / ctx->pictures_per_frame,
 614                        td->blocks[i], td->emu_buf,
 615                        mbs_per_slice, num_cblocks[i], is_chroma[i]);
 616     }
 617
 618     for (q = min_quant; q < max_quant + 2; q++) {
 619         td->nodes[trellis_node + q].prev_node = -1;
 620         td->nodes[trellis_node + q].quant     = q;
 621     }
 622
 623     // todo: maybe perform coarser quantising to fit into frame size when needed
 624     for (q = min_quant; q <= max_quant; q++) {
 625         bits  = 0;
 626         error = 0;
 627         for (i = 0; i < ctx->num_planes; i++) {
 628             bits += estimate_slice_plane(ctx, &error, i,
 629                                          src, linesize[i],
 630                                          mbs_per_slice,
 631                                          num_cblocks[i], plane_factor[i],
 632                                          ctx->quants[q], td);
 633         }
 634         if (bits > 65000 * 8) {
 635             error = SCORE_LIMIT;
 636             break;
 637         }
 638         slice_bits[q]  = bits;
 639         slice_score[q] = error;
 640     }
 641     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 642         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 643         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 644         overquant = max_quant;
 645     } else {
 646         for (q = max_quant + 1; q < 128; q++) {
 647             bits  = 0;
 648             error = 0;
 649             if (q < MAX_STORED_Q) {
 650                 qmat = ctx->quants[q];
 651             } else {
 652                 qmat = td->custom_q;
 653                 for (i = 0; i < 64; i++)
 654                     qmat[i] = ctx->quant_mat[i] * q;
 655             }
 656             for (i = 0; i < ctx->num_planes; i++) {
 657                 bits += estimate_slice_plane(ctx, &error, i,
 658                                              src, linesize[i],
 659                                              mbs_per_slice,
 660                                              num_cblocks[i], plane_factor[i],
 661                                              qmat, td);
 662             }
 663             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 664                 break;
 665         }
 666
 667         slice_bits[max_quant + 1]  = bits;
 668         slice_score[max_quant + 1] = error;
 669         overquant = q;
 670     }
 671     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 672
 673     bits_limit = mbs * ctx->bits_per_mb;
 674     for (pq = min_quant; pq < max_quant + 2; pq++) {
 675         prev = trellis_node - TRELLIS_WIDTH + pq;
 676
 677         for (q = min_quant; q < max_quant + 2; q++) {
 678             cur = trellis_node + q;
 679
 680             bits  = td->nodes[prev].bits + slice_bits[q];
 681             error = slice_score[q];
 682             if (bits > bits_limit)
 683                 error = SCORE_LIMIT;
 684
 685             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 686                 new_score = td->nodes[prev].score + error;
 687             else
 688                 new_score = SCORE_LIMIT;
 689             if (td->nodes[cur].prev_node == -1 ||
 690                 td->nodes[cur].score >= new_score) {
 691
 692                 td->nodes[cur].bits      = bits;
 693                 td->nodes[cur].score     = new_score;
 694                 td->nodes[cur].prev_node = prev;
 695             }
 696         }
 697     }
 698
 699     error = td->nodes[trellis_node + min_quant].score;
 700     pq    = trellis_node + min_quant;
 701     for (q = min_quant + 1; q < max_quant + 2; q++) {
 702         if (td->nodes[trellis_node + q].score <= error) {
 703             error = td->nodes[trellis_node + q].score;
 704             pq    = trellis_node + q;
 705         }
 706     }
 707
 708     return pq;
 709 }
 710
 711 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 712                              int jobnr, int threadnr)
 713 {
 714     ProresContext *ctx = avctx->priv_data;
 715     ProresThreadData *td = ctx->tdata + threadnr;
 716     int mbs_per_slice = ctx->mbs_per_slice;
 717     int x, y = jobnr, mb, q = 0;
 718
 719     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 720         while (ctx->mb_width - x < mbs_per_slice)
 721             mbs_per_slice >>= 1;
 722         q = find_slice_quant(avctx, avctx->coded_frame,
 723                              (mb + 1) * TRELLIS_WIDTH, x, y,
 724                              mbs_per_slice, td);
 725     }
 726
 727     for (x = ctx->slices_width - 1; x >= 0; x--) {
 728         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 729         q = td->nodes[q].prev_node;
 730     }
 731
 732     return 0;
 733 }
 734
 735 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 736                         const AVFrame *pic, int *got_packet)
 737 {
 738     ProresContext *ctx = avctx->priv_data;
 739     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 740     uint8_t *picture_size_pos;
 741     PutBitContext pb;
 742     int x, y, i, mb, q = 0;
 743     int sizes[4] = { 0 };
 744     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 745     int frame_size, picture_size, slice_size;
 746     int pkt_size, ret;
 747     uint8_t frame_flags;
 748
 749     *avctx->coded_frame           = *pic;
 750     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 751     avctx->coded_frame->key_frame = 1;
 752
 753     pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
 754
 755     if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
 756         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 757         return ret;
 758     }
 759
 760     orig_buf = pkt->data;
 761
 762     // frame atom
 763     orig_buf += 4;                              // frame size
 764     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 765     buf = orig_buf;
 766
 767     // frame header
 768     tmp = buf;
 769     buf += 2;                                   // frame header size will be stored here
 770     bytestream_put_be16  (&buf, 0);             // version 1
 771     bytestream_put_buffer(&buf, ctx->vendor, 4);
 772     bytestream_put_be16  (&buf, avctx->width);
 773     bytestream_put_be16  (&buf, avctx->height);
 774
 775     frame_flags = ctx->chroma_factor << 6;
 776     if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
 777         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
 778     bytestream_put_byte  (&buf, frame_flags);
 779
 780     bytestream_put_byte  (&buf, 0);             // reserved
 781     bytestream_put_byte  (&buf, avctx->color_primaries);
 782     bytestream_put_byte  (&buf, avctx->color_trc);
 783     bytestream_put_byte  (&buf, avctx->colorspace);
 784     bytestream_put_byte  (&buf, 0x40);          // source format and alpha information
 785     bytestream_put_byte  (&buf, 0);             // reserved
 786     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 787         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 788         // luma quantisation matrix
 789         for (i = 0; i < 64; i++)
 790             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 791         // chroma quantisation matrix
 792         for (i = 0; i < 64; i++)
 793             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 794     } else {
 795         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 796     }
 797     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 798
 799     for (ctx->cur_picture_idx = 0;
 800          ctx->cur_picture_idx < ctx->pictures_per_frame;
 801          ctx->cur_picture_idx++) {
 802         // picture header
 803         picture_size_pos = buf + 1;
 804         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 805         buf += 4;                                   // picture data size will be stored here
 806         bytestream_put_be16  (&buf, ctx->slices_per_picture);
 807         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 808
 809         // seek table - will be filled during slice encoding
 810         slice_sizes = buf;
 811         buf += ctx->slices_per_picture * 2;
 812
 813         // slices
 814         if (!ctx->force_quant) {
 815             ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
 816                                   ctx->mb_height);
 817             if (ret)
 818                 return ret;
 819         }
 820
 821         for (y = 0; y < ctx->mb_height; y++) {
 822             int mbs_per_slice = ctx->mbs_per_slice;
 823             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 824                 q = ctx->force_quant ? ctx->force_quant
 825                                      : ctx->slice_q[mb + y * ctx->slices_width];
 826
 827                 while (ctx->mb_width - x < mbs_per_slice)
 828                     mbs_per_slice >>= 1;
 829
 830                 bytestream_put_byte(&buf, slice_hdr_size << 3);
 831                 slice_hdr = buf;
 832                 buf += slice_hdr_size - 1;
 833                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
 834                 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
 835
 836                 bytestream_put_byte(&slice_hdr, q);
 837                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
 838                 for (i = 0; i < ctx->num_planes - 1; i++) {
 839                     bytestream_put_be16(&slice_hdr, sizes[i]);
 840                     slice_size += sizes[i];
 841                 }
 842                 bytestream_put_be16(&slice_sizes, slice_size);
 843                 buf += slice_size - slice_hdr_size;
 844             }
 845         }
 846
 847         if (ctx->pictures_per_frame == 1)
 848             picture_size = buf - picture_size_pos - 6;
 849         else
 850             picture_size = buf - picture_size_pos + 1;
 851         bytestream_put_be32(&picture_size_pos, picture_size);
 852     }
 853
 854     orig_buf -= 8;
 855     frame_size = buf - orig_buf;
 856     bytestream_put_be32(&orig_buf, frame_size);
 857
 858     pkt->size   = frame_size;
 859     pkt->flags |= AV_PKT_FLAG_KEY;
 860     *got_packet = 1;
 861
 862     return 0;
 863 }
 864
 865 static av_cold int encode_close(AVCodecContext *avctx)
 866 {
 867     ProresContext *ctx = avctx->priv_data;
 868     int i;
 869
 870     av_freep(&avctx->coded_frame);
 871
 872     if (ctx->tdata) {
 873         for (i = 0; i < avctx->thread_count; i++)
 874             av_free(ctx->tdata[i].nodes);
 875     }
 876     av_freep(&ctx->tdata);
 877     av_freep(&ctx->slice_q);
 878
 879     return 0;
 880 }
 881
 882 static av_cold int encode_init(AVCodecContext *avctx)
 883 {
 884     ProresContext *ctx = avctx->priv_data;
 885     int mps;
 886     int i, j;
 887     int min_quant, max_quant;
 888     int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
 889
 890     avctx->bits_per_raw_sample = 10;
 891     avctx->coded_frame = avcodec_alloc_frame();
 892     if (!avctx->coded_frame)
 893         return AVERROR(ENOMEM);
 894
 895     ff_proresdsp_init(&ctx->dsp);
 896     ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
 897                       interlaced ? ff_prores_interlaced_scan
 898                                  : ff_prores_progressive_scan);
 899
 900     mps = ctx->mbs_per_slice;
 901     if (mps & (mps - 1)) {
 902         av_log(avctx, AV_LOG_ERROR,
 903                "there should be an integer power of two MBs per slice\n");
 904         return AVERROR(EINVAL);
 905     }
 906
 907     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
 908                          ? CFACTOR_Y422
 909                          : CFACTOR_Y444;
 910     ctx->profile_info  = prores_profile_info + ctx->profile;
 911     ctx->num_planes    = 3;
 912
 913     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
 914
 915     if (interlaced)
 916         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
 917     else
 918         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
 919
 920     ctx->slices_width  = ctx->mb_width / mps;
 921     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
 922     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
 923     ctx->pictures_per_frame = 1 + interlaced;
 924
 925     if (ctx->quant_sel == -1)
 926         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
 927     else
 928         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
 929
 930     if (strlen(ctx->vendor) != 4) {
 931         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
 932         return AVERROR_INVALIDDATA;
 933     }
 934
 935     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
 936     if (!ctx->force_quant) {
 937         if (!ctx->bits_per_mb) {
 938             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
 939                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
 940                                            ctx->pictures_per_frame)
 941                     break;
 942             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
 943         } else if (ctx->bits_per_mb < 128) {
 944             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
 945             return AVERROR_INVALIDDATA;
 946         }
 947
 948         min_quant = ctx->profile_info->min_quant;
 949         max_quant = ctx->profile_info->max_quant;
 950         for (i = min_quant; i < MAX_STORED_Q; i++) {
 951             for (j = 0; j < 64; j++)
 952                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
 953         }
 954
 955         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
 956         if (!ctx->slice_q) {
 957             encode_close(avctx);
 958             return AVERROR(ENOMEM);
 959         }
 960
 961         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
 962         if (!ctx->tdata) {
 963             encode_close(avctx);
 964             return AVERROR(ENOMEM);
 965         }
 966
 967         for (j = 0; j < avctx->thread_count; j++) {
 968             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
 969                                             * TRELLIS_WIDTH
 970                                             * sizeof(*ctx->tdata->nodes));
 971             if (!ctx->tdata[j].nodes) {
 972                 encode_close(avctx);
 973                 return AVERROR(ENOMEM);
 974             }
 975             for (i = min_quant; i < max_quant + 2; i++) {
 976                 ctx->tdata[j].nodes[i].prev_node = -1;
 977                 ctx->tdata[j].nodes[i].bits      = 0;
 978                 ctx->tdata[j].nodes[i].score     = 0;
 979             }
 980         }
 981     } else {
 982         int ls = 0;
 983
 984         if (ctx->force_quant > 64) {
 985             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
 986             return AVERROR_INVALIDDATA;
 987         }
 988
 989         for (j = 0; j < 64; j++) {
 990             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
 991             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
 992         }
 993
 994         ctx->bits_per_mb = ls * 8;
 995         if (ctx->chroma_factor == CFACTOR_Y444)
 996             ctx->bits_per_mb += ls * 4;
 997         if (ctx->num_planes == 4)
 998             ctx->bits_per_mb += ls * 4;
 999     }
1000
1001     ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1002                                   ctx->slices_per_picture *
1003                                   (2 + 2 * ctx->num_planes +
1004                                    (mps * ctx->bits_per_mb) / 8)
1005                                   + 200;
1006
1007     avctx->codec_tag   = ctx->profile_info->tag;
1008
1009     av_log(avctx, AV_LOG_DEBUG,
1010            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1011            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1012            interlaced ? "yes" : "no", ctx->bits_per_mb);
1013     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1014            ctx->frame_size_upper_bound);
1015
1016     return 0;
1017 }
1018
1019 #define OFFSET(x) offsetof(ProresContext, x)
1020 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1021
1022 static const AVOption options[] = {
1023     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1024         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1025     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1026         { .i64 = PRORES_PROFILE_STANDARD },
1027         PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
1028     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1029         0, 0, VE, "profile" },
1030     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1031         0, 0, VE, "profile" },
1032     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1033         0, 0, VE, "profile" },
1034     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1035         0, 0, VE, "profile" },
1036     { "vendor", "vendor ID", OFFSET(vendor),
1037         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1038     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1039         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1040     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1041         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1042     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1043         0, 0, VE, "quant_mat" },
1044     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1045         0, 0, VE, "quant_mat" },
1046     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1047         0, 0, VE, "quant_mat" },
1048     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1049         0, 0, VE, "quant_mat" },
1050     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1051         0, 0, VE, "quant_mat" },
1052     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1053         0, 0, VE, "quant_mat" },
1054     { NULL }
1055 };
1056
1057 static const AVClass proresenc_class = {
1058     .class_name = "ProRes encoder",
1059     .item_name  = av_default_item_name,
1060     .option     = options,
1061     .version    = LIBAVUTIL_VERSION_INT,
1062 };
1063
1064 AVCodec ff_prores_encoder = {
1065     .name           = "prores",
1066     .type           = AVMEDIA_TYPE_VIDEO,
1067     .id             = AV_CODEC_ID_PRORES,
1068     .priv_data_size = sizeof(ProresContext),
1069     .init           = encode_init,
1070     .close          = encode_close,
1071     .encode2        = encode_frame,
1072     .capabilities   = CODEC_CAP_SLICE_THREADS,
1073     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1074     .pix_fmts       = (const enum AVPixelFormat[]) {
1075                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_NONE
1076                       },
1077     .priv_class     = &proresenc_class,
1078 };