4 * Copyright (C) 2007 Vitor Sessak <vitor1001@gmail.com>
5 * Copyright (C) 2004-2007 Eric Lasota
6 * Based on RoQ specs (C) 2001 Tim Ferguson
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @file libavcodec/roqvideoenc.c
27 * id RoQ encoder by Vitor. Based on the Switchblade3 library and the
28 * Switchblade3 FFmpeg glue by Eric Lasota.
36 * CODEBOOK - 2 + 8 bits
37 * SUBDIVIDE - 2 + combined subcel cost
42 * CODEBOOK - 2 + 8 bits
43 * SUBDIVIDE - 2 + 4*8 bits
45 * Maximum cost: 138 bits per cel
47 * Proper evaluation requires LCD fraction comparison, which requires
48 * Squared Error (SE) loss * savings increase
50 * Maximum savings increase: 136 bits
51 * Maximum SE loss without overflow: 31580641
52 * Components in 8x8 supercel: 192
53 * Maximum SE precision per component: 164482
54 * >65025, so no truncation is needed (phew)
60 #include "bytestream.h"
67 * Maximum number of generated 4x4 codebooks. Can't be 256 to workaround a
70 #define MAX_CBS_4x4 255
72 #define MAX_CBS_2x2 256 ///< Maximum number of 2x2 codebooks.
74 /* The cast is useful when multiplying it by INT_MAX */
75 #define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE)
77 /* Macroblock support functions */
78 static void unpack_roq_cell(roq_cell
*cell
, uint8_t u
[4*3])
80 memcpy(u
, cell
->y
, 4);
81 memset(u
+4, cell
->u
, 4);
82 memset(u
+8, cell
->v
, 4);
85 static void unpack_roq_qcell(uint8_t cb2
[], roq_qcell
*qcell
, uint8_t u
[4*4*3])
88 static const int offsets
[4] = {0, 2, 8, 10};
90 for (cp
=0; cp
<3; cp
++)
92 u
[4*4*cp
+ offsets
[i
] ] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
];
93 u
[4*4*cp
+ offsets
[i
]+1] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
+1];
94 u
[4*4*cp
+ offsets
[i
]+4] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
+2];
95 u
[4*4*cp
+ offsets
[i
]+5] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
+3];
100 static void enlarge_roq_mb4(uint8_t base
[3*16], uint8_t u
[3*64])
104 for(cp
=0; cp
<3; cp
++)
107 *u
++ = base
[(y
/2)*4 + (x
/2) + 16*cp
];
110 static inline int square(int x
)
115 static inline int eval_sse(uint8_t *a
, uint8_t *b
, int count
)
120 diff
+= square(*b
++ - *a
++);
125 // FIXME Could use DSPContext.sse, but it is not so speed critical (used
126 // just for motion estimation).
127 static int block_sse(uint8_t **buf1
, uint8_t **buf2
, int x1
, int y1
, int x2
,
128 int y2
, int *stride1
, int *stride2
, int size
)
133 for (k
=0; k
<3; k
++) {
134 int bias
= (k
? CHROMA_BIAS
: 4);
135 for (i
=0; i
<size
; i
++)
136 sse
+= bias
*eval_sse(buf1
[k
] + (y1
+i
)*stride1
[k
] + x1
,
137 buf2
[k
] + (y2
+i
)*stride2
[k
] + x2
, size
);
143 static int eval_motion_dist(RoqContext
*enc
, int x
, int y
, motion_vect vect
,
149 if (mx
< -7 || mx
> 7)
152 if (my
< -7 || my
> 7)
158 if ((unsigned) mx
> enc
->width
-size
|| (unsigned) my
> enc
->height
-size
)
161 return block_sse(enc
->frame_to_enc
->data
, enc
->last_frame
->data
, x
, y
,
163 enc
->frame_to_enc
->linesize
, enc
->last_frame
->linesize
,
168 * Returns distortion between two macroblocks
170 static inline int squared_diff_macroblock(uint8_t a
[], uint8_t b
[], int size
)
174 for(cp
=0;cp
<3;cp
++) {
175 int bias
= (cp
? CHROMA_BIAS
: 4);
176 sdiff
+= bias
*eval_sse(a
, b
, size
*size
);
200 SubcelEvaluation subCels
[4];
205 int sourceX
, sourceY
;
212 int usedCB2
[MAX_CBS_2x2
];
213 int usedCB4
[MAX_CBS_4x4
];
214 uint8_t unpacked_cb2
[MAX_CBS_2x2
*2*2*3];
215 uint8_t unpacked_cb4
[MAX_CBS_4x4
*4*4*3];
216 uint8_t unpacked_cb4_enlarged
[MAX_CBS_4x4
*8*8*3];
222 typedef struct RoqTempData
224 CelEvaluation
*cel_evals
;
226 int f2i4
[MAX_CBS_4x4
];
227 int i2f4
[MAX_CBS_4x4
];
228 int f2i2
[MAX_CBS_2x2
];
229 int i2f2
[MAX_CBS_2x2
];
236 RoqCodebooks codebooks
;
243 * Initializes cel evaluators and sets their source coordinates
245 static void create_cel_evals(RoqContext
*enc
, RoqTempdata
*tempData
)
249 tempData
->cel_evals
= av_malloc(enc
->width
*enc
->height
/64 * sizeof(CelEvaluation
));
251 /* Map to the ROQ quadtree order */
252 for (y
=0; y
<enc
->height
; y
+=16)
253 for (x
=0; x
<enc
->width
; x
+=16)
255 tempData
->cel_evals
[n
].sourceX
= x
+ (i
&1)*8;
256 tempData
->cel_evals
[n
++].sourceY
= y
+ (i
&2)*4;
261 * Get macroblocks from parts of the image
263 static void get_frame_mb(AVFrame
*frame
, int x
, int y
, uint8_t mb
[], int dim
)
267 for (cp
=0; cp
<3; cp
++) {
268 int stride
= frame
->linesize
[cp
];
269 for (i
=0; i
<dim
; i
++)
270 for (j
=0; j
<dim
; j
++)
271 *mb
++ = frame
->data
[cp
][(y
+i
)*stride
+ x
+ j
];
276 * Find the codebook with the lowest distortion from an image
278 static int index_mb(uint8_t cluster
[], uint8_t cb
[], int numCB
,
279 int *outIndex
, int dim
)
281 int i
, lDiff
= INT_MAX
, pick
=0;
283 /* Diff against the others */
284 for (i
=0; i
<numCB
; i
++) {
285 int diff
= squared_diff_macroblock(cluster
, cb
+ i
*dim
*dim
*3, dim
);
296 #define EVAL_MOTION(MOTION) \
298 diff = eval_motion_dist(enc, j, i, MOTION, blocksize); \
300 if (diff < lowestdiff) { \
306 static void motion_search(RoqContext
*enc
, int blocksize
)
308 static const motion_vect offsets
[8] = {
319 int diff
, lowestdiff
, oldbest
;
321 motion_vect bestpick
= {{0,0}};
324 motion_vect
*last_motion
;
325 motion_vect
*this_motion
;
326 motion_vect vect
, vect2
;
328 int max
=(enc
->width
/blocksize
)*enc
->height
/blocksize
;
330 if (blocksize
== 4) {
331 last_motion
= enc
->last_motion4
;
332 this_motion
= enc
->this_motion4
;
334 last_motion
= enc
->last_motion8
;
335 this_motion
= enc
->this_motion8
;
338 for (i
=0; i
<enc
->height
; i
+=blocksize
)
339 for (j
=0; j
<enc
->width
; j
+=blocksize
) {
340 lowestdiff
= eval_motion_dist(enc
, j
, i
, (motion_vect
) {{0,0}},
346 EVAL_MOTION(enc
->this_motion8
[(i
/8)*(enc
->width
/8) + j
/8]);
348 offset
= (i
/blocksize
)*enc
->width
/blocksize
+ j
/blocksize
;
349 if (offset
< max
&& offset
>= 0)
350 EVAL_MOTION(last_motion
[offset
]);
353 if (offset
< max
&& offset
>= 0)
354 EVAL_MOTION(last_motion
[offset
]);
356 offset
= (i
/blocksize
+ 1)*enc
->width
/blocksize
+ j
/blocksize
;
357 if (offset
< max
&& offset
>= 0)
358 EVAL_MOTION(last_motion
[offset
]);
360 off
[0]= (i
/blocksize
)*enc
->width
/blocksize
+ j
/blocksize
- 1;
361 off
[1]= off
[0] - enc
->width
/blocksize
+ 1;
367 vect
.d
[k
]= mid_pred(this_motion
[off
[0]].d
[k
],
368 this_motion
[off
[1]].d
[k
],
369 this_motion
[off
[2]].d
[k
]);
373 EVAL_MOTION(this_motion
[off
[k
]]);
375 EVAL_MOTION(this_motion
[off
[0]]);
380 while (oldbest
!= lowestdiff
) {
381 oldbest
= lowestdiff
;
382 for (k
=0; k
<8; k
++) {
384 vect2
.d
[0] += offsets
[k
].d
[0];
385 vect2
.d
[1] += offsets
[k
].d
[1];
390 offset
= (i
/blocksize
)*enc
->width
/blocksize
+ j
/blocksize
;
391 this_motion
[offset
] = bestpick
;
396 * Gets distortion for all options available to a subcel
398 static void gather_data_for_subcel(SubcelEvaluation
*subcel
, int x
,
399 int y
, RoqContext
*enc
, RoqTempdata
*tempData
)
406 static const int bitsUsed
[4] = {2, 10, 10, 34};
408 if (enc
->framesSinceKeyframe
>= 1) {
409 subcel
->motion
= enc
->this_motion4
[y
*enc
->width
/16 + x
/4];
411 subcel
->eval_dist
[RoQ_ID_FCC
] =
412 eval_motion_dist(enc
, x
, y
,
413 enc
->this_motion4
[y
*enc
->width
/16 + x
/4], 4);
415 subcel
->eval_dist
[RoQ_ID_FCC
] = INT_MAX
;
417 if (enc
->framesSinceKeyframe
>= 2)
418 subcel
->eval_dist
[RoQ_ID_MOT
] = block_sse(enc
->frame_to_enc
->data
,
419 enc
->current_frame
->data
, x
,
421 enc
->frame_to_enc
->linesize
,
422 enc
->current_frame
->linesize
,
425 subcel
->eval_dist
[RoQ_ID_MOT
] = INT_MAX
;
427 cluster_index
= y
*enc
->width
/16 + x
/4;
429 get_frame_mb(enc
->frame_to_enc
, x
, y
, mb4
, 4);
431 subcel
->eval_dist
[RoQ_ID_SLD
] = index_mb(mb4
,
432 tempData
->codebooks
.unpacked_cb4
,
433 tempData
->codebooks
.numCB4
,
434 &subcel
->cbEntry
, 4);
436 subcel
->eval_dist
[RoQ_ID_CCC
] = 0;
439 subcel
->subCels
[i
] = tempData
->closest_cb2
[cluster_index
*4+i
];
441 get_frame_mb(enc
->frame_to_enc
, x
+2*(i
&1),
444 subcel
->eval_dist
[RoQ_ID_CCC
] +=
445 squared_diff_macroblock(tempData
->codebooks
.unpacked_cb2
+ subcel
->subCels
[i
]*2*2*3, mb2
, 2);
450 if (ROQ_LAMBDA_SCALE
*subcel
->eval_dist
[i
] + enc
->lambda
*bitsUsed
[i
] <
452 subcel
->best_coding
= i
;
453 subcel
->best_bit_use
= bitsUsed
[i
];
454 best_dist
= ROQ_LAMBDA_SCALE
*subcel
->eval_dist
[i
] +
455 enc
->lambda
*bitsUsed
[i
];
460 * Gets distortion for all options available to a cel
462 static void gather_data_for_cel(CelEvaluation
*cel
, RoqContext
*enc
,
463 RoqTempdata
*tempData
)
466 int index
= cel
->sourceY
*enc
->width
/64 + cel
->sourceX
/8;
467 int i
, j
, best_dist
, divide_bit_use
;
469 int bitsUsed
[4] = {2, 10, 10, 0};
471 if (enc
->framesSinceKeyframe
>= 1) {
472 cel
->motion
= enc
->this_motion8
[index
];
474 cel
->eval_dist
[RoQ_ID_FCC
] =
475 eval_motion_dist(enc
, cel
->sourceX
, cel
->sourceY
,
476 enc
->this_motion8
[index
], 8);
478 cel
->eval_dist
[RoQ_ID_FCC
] = INT_MAX
;
480 if (enc
->framesSinceKeyframe
>= 2)
481 cel
->eval_dist
[RoQ_ID_MOT
] = block_sse(enc
->frame_to_enc
->data
,
482 enc
->current_frame
->data
,
483 cel
->sourceX
, cel
->sourceY
,
484 cel
->sourceX
, cel
->sourceY
,
485 enc
->frame_to_enc
->linesize
,
486 enc
->current_frame
->linesize
,8);
488 cel
->eval_dist
[RoQ_ID_MOT
] = INT_MAX
;
490 get_frame_mb(enc
->frame_to_enc
, cel
->sourceX
, cel
->sourceY
, mb8
, 8);
492 cel
->eval_dist
[RoQ_ID_SLD
] =
493 index_mb(mb8
, tempData
->codebooks
.unpacked_cb4_enlarged
,
494 tempData
->codebooks
.numCB4
, &cel
->cbEntry
, 8);
496 gather_data_for_subcel(cel
->subCels
+ 0, cel
->sourceX
+0, cel
->sourceY
+0, enc
, tempData
);
497 gather_data_for_subcel(cel
->subCels
+ 1, cel
->sourceX
+4, cel
->sourceY
+0, enc
, tempData
);
498 gather_data_for_subcel(cel
->subCels
+ 2, cel
->sourceX
+0, cel
->sourceY
+4, enc
, tempData
);
499 gather_data_for_subcel(cel
->subCels
+ 3, cel
->sourceX
+4, cel
->sourceY
+4, enc
, tempData
);
501 cel
->eval_dist
[RoQ_ID_CCC
] = 0;
503 for (i
=0; i
<4; i
++) {
504 cel
->eval_dist
[RoQ_ID_CCC
] +=
505 cel
->subCels
[i
].eval_dist
[cel
->subCels
[i
].best_coding
];
506 divide_bit_use
+= cel
->subCels
[i
].best_bit_use
;
510 bitsUsed
[3] = 2 + divide_bit_use
;
513 if (ROQ_LAMBDA_SCALE
*cel
->eval_dist
[i
] + enc
->lambda
*bitsUsed
[i
] <
515 cel
->best_coding
= i
;
516 best_dist
= ROQ_LAMBDA_SCALE
*cel
->eval_dist
[i
] +
517 enc
->lambda
*bitsUsed
[i
];
520 tempData
->used_option
[cel
->best_coding
]++;
521 tempData
->mainChunkSize
+= bitsUsed
[cel
->best_coding
];
523 if (cel
->best_coding
== RoQ_ID_SLD
)
524 tempData
->codebooks
.usedCB4
[cel
->cbEntry
]++;
526 if (cel
->best_coding
== RoQ_ID_CCC
)
527 for (i
=0; i
<4; i
++) {
528 if (cel
->subCels
[i
].best_coding
== RoQ_ID_SLD
)
529 tempData
->codebooks
.usedCB4
[cel
->subCels
[i
].cbEntry
]++;
530 else if (cel
->subCels
[i
].best_coding
== RoQ_ID_CCC
)
532 tempData
->codebooks
.usedCB2
[cel
->subCels
[i
].subCels
[j
]]++;
536 static void remap_codebooks(RoqContext
*enc
, RoqTempdata
*tempData
)
540 /* Make remaps for the final codebook usage */
541 for (i
=0; i
<MAX_CBS_4x4
; i
++) {
542 if (tempData
->codebooks
.usedCB4
[i
]) {
543 tempData
->i2f4
[i
] = idx
;
544 tempData
->f2i4
[idx
] = i
;
546 tempData
->codebooks
.usedCB2
[enc
->cb4x4
[i
].idx
[j
]]++;
551 tempData
->numCB4
= idx
;
554 for (i
=0; i
<MAX_CBS_2x2
; i
++) {
555 if (tempData
->codebooks
.usedCB2
[i
]) {
556 tempData
->i2f2
[i
] = idx
;
557 tempData
->f2i2
[idx
] = i
;
561 tempData
->numCB2
= idx
;
566 * Write codebook chunk
568 static void write_codebooks(RoqContext
*enc
, RoqTempdata
*tempData
)
571 uint8_t **outp
= &enc
->out_buf
;
573 if (tempData
->numCB2
) {
574 bytestream_put_le16(outp
, RoQ_QUAD_CODEBOOK
);
575 bytestream_put_le32(outp
, tempData
->numCB2
*6 + tempData
->numCB4
*4);
576 bytestream_put_byte(outp
, tempData
->numCB4
);
577 bytestream_put_byte(outp
, tempData
->numCB2
);
579 for (i
=0; i
<tempData
->numCB2
; i
++) {
580 bytestream_put_buffer(outp
, enc
->cb2x2
[tempData
->f2i2
[i
]].y
, 4);
581 bytestream_put_byte(outp
, enc
->cb2x2
[tempData
->f2i2
[i
]].u
);
582 bytestream_put_byte(outp
, enc
->cb2x2
[tempData
->f2i2
[i
]].v
);
585 for (i
=0; i
<tempData
->numCB4
; i
++)
587 bytestream_put_byte(outp
, tempData
->i2f2
[enc
->cb4x4
[tempData
->f2i4
[i
]].idx
[j
]]);
592 static inline uint8_t motion_arg(motion_vect mot
)
594 uint8_t ax
= 8 - ((uint8_t) mot
.d
[0]);
595 uint8_t ay
= 8 - ((uint8_t) mot
.d
[1]);
596 return ((ax
&15)<<4) | (ay
&15);
603 uint8_t argumentSpool
[64];
608 /* NOTE: Typecodes must be spooled AFTER arguments!! */
609 static void write_typecode(CodingSpool
*s
, uint8_t type
)
611 s
->typeSpool
|= (type
& 3) << (14 - s
->typeSpoolLength
);
612 s
->typeSpoolLength
+= 2;
613 if (s
->typeSpoolLength
== 16) {
614 bytestream_put_le16(s
->pout
, s
->typeSpool
);
615 bytestream_put_buffer(s
->pout
, s
->argumentSpool
,
616 s
->args
- s
->argumentSpool
);
617 s
->typeSpoolLength
= 0;
619 s
->args
= s
->argumentSpool
;
623 static void reconstruct_and_encode_image(RoqContext
*enc
, RoqTempdata
*tempData
, int w
, int h
, int numBlocks
)
636 spool
.typeSpoolLength
=0;
637 spool
.args
= spool
.argumentSpool
;
638 spool
.pout
= &enc
->out_buf
;
640 if (tempData
->used_option
[RoQ_ID_CCC
]%2)
641 tempData
->mainChunkSize
+=8; //FIXME
643 /* Write the video chunk header */
644 bytestream_put_le16(&enc
->out_buf
, RoQ_QUAD_VQ
);
645 bytestream_put_le32(&enc
->out_buf
, tempData
->mainChunkSize
/8);
646 bytestream_put_byte(&enc
->out_buf
, 0x0);
647 bytestream_put_byte(&enc
->out_buf
, 0x0);
649 for (i
=0; i
<numBlocks
; i
++) {
650 eval
= tempData
->cel_evals
+ i
;
654 dist
+= eval
->eval_dist
[eval
->best_coding
];
656 switch (eval
->best_coding
) {
658 write_typecode(&spool
, RoQ_ID_MOT
);
662 bytestream_put_byte(&spool
.args
, motion_arg(eval
->motion
));
664 write_typecode(&spool
, RoQ_ID_FCC
);
665 ff_apply_motion_8x8(enc
, x
, y
,
666 eval
->motion
.d
[0], eval
->motion
.d
[1]);
670 bytestream_put_byte(&spool
.args
, tempData
->i2f4
[eval
->cbEntry
]);
671 write_typecode(&spool
, RoQ_ID_SLD
);
673 qcell
= enc
->cb4x4
+ eval
->cbEntry
;
674 ff_apply_vector_4x4(enc
, x
, y
, enc
->cb2x2
+ qcell
->idx
[0]);
675 ff_apply_vector_4x4(enc
, x
+4, y
, enc
->cb2x2
+ qcell
->idx
[1]);
676 ff_apply_vector_4x4(enc
, x
, y
+4, enc
->cb2x2
+ qcell
->idx
[2]);
677 ff_apply_vector_4x4(enc
, x
+4, y
+4, enc
->cb2x2
+ qcell
->idx
[3]);
681 write_typecode(&spool
, RoQ_ID_CCC
);
683 for (j
=0; j
<4; j
++) {
687 switch(eval
->subCels
[j
].best_coding
) {
692 bytestream_put_byte(&spool
.args
,
693 motion_arg(eval
->subCels
[j
].motion
));
695 ff_apply_motion_4x4(enc
, subX
, subY
,
696 eval
->subCels
[j
].motion
.d
[0],
697 eval
->subCels
[j
].motion
.d
[1]);
701 bytestream_put_byte(&spool
.args
,
702 tempData
->i2f4
[eval
->subCels
[j
].cbEntry
]);
704 qcell
= enc
->cb4x4
+ eval
->subCels
[j
].cbEntry
;
706 ff_apply_vector_2x2(enc
, subX
, subY
,
707 enc
->cb2x2
+ qcell
->idx
[0]);
708 ff_apply_vector_2x2(enc
, subX
+2, subY
,
709 enc
->cb2x2
+ qcell
->idx
[1]);
710 ff_apply_vector_2x2(enc
, subX
, subY
+2,
711 enc
->cb2x2
+ qcell
->idx
[2]);
712 ff_apply_vector_2x2(enc
, subX
+2, subY
+2,
713 enc
->cb2x2
+ qcell
->idx
[3]);
717 for (k
=0; k
<4; k
++) {
718 int cb_idx
= eval
->subCels
[j
].subCels
[k
];
719 bytestream_put_byte(&spool
.args
,
720 tempData
->i2f2
[cb_idx
]);
722 ff_apply_vector_2x2(enc
, subX
+ 2*(k
&1), subY
+ (k
&2),
723 enc
->cb2x2
+ cb_idx
);
727 write_typecode(&spool
, eval
->subCels
[j
].best_coding
);
733 /* Flush the remainder of the argument/type spool */
734 while (spool
.typeSpoolLength
)
735 write_typecode(&spool
, 0x0);
738 uint8_t *fdata
[3] = {enc
->frame_to_enc
->data
[0],
739 enc
->frame_to_enc
->data
[1],
740 enc
->frame_to_enc
->data
[2]};
741 uint8_t *cdata
[3] = {enc
->current_frame
->data
[0],
742 enc
->current_frame
->data
[1],
743 enc
->current_frame
->data
[2]};
744 av_log(enc
->avctx
, AV_LOG_ERROR
, "Expected distortion: %i Actual: %i\n",
746 block_sse(fdata
, cdata
, 0, 0, 0, 0,
747 enc
->frame_to_enc
->linesize
,
748 enc
->current_frame
->linesize
,
749 enc
->width
)); //WARNING: Square dimensions implied...
755 * Create a single YUV cell from a 2x2 section of the image
757 static inline void frame_block_to_cell(uint8_t *block
, uint8_t **data
,
758 int top
, int left
, int *stride
)
763 for (j
=0; j
<2; j
++) {
764 int x
= (top
+i
)*stride
[0] + left
+ j
;
765 *block
++ = data
[0][x
];
766 x
= (top
+i
)*stride
[1] + left
+ j
;
776 * Creates YUV clusters for the entire image
778 static void create_clusters(AVFrame
*frame
, int w
, int h
, uint8_t *yuvClusters
)
783 for (j
=0; j
<w
; j
+=4) {
784 for (k
=0; k
< 2; k
++)
785 for (l
=0; l
< 2; l
++)
786 frame_block_to_cell(yuvClusters
+ (l
+ 2*k
)*6, frame
->data
,
787 i
+2*k
, j
+2*l
, frame
->linesize
);
792 static void generate_codebook(RoqContext
*enc
, RoqTempdata
*tempdata
,
793 int *points
, int inputCount
, roq_cell
*results
,
794 int size
, int cbsize
)
797 int c_size
= size
*size
/4;
799 int *codebook
= av_malloc(6*c_size
*cbsize
*sizeof(int));
803 closest_cb
= av_malloc(6*c_size
*inputCount
*sizeof(int));
805 closest_cb
= tempdata
->closest_cb2
;
807 ff_init_elbg(points
, 6*c_size
, inputCount
, codebook
, cbsize
, 1, closest_cb
, &enc
->randctx
);
808 ff_do_elbg(points
, 6*c_size
, inputCount
, codebook
, cbsize
, 1, closest_cb
, &enc
->randctx
);
814 for (i
=0; i
<cbsize
; i
++)
815 for (k
=0; k
<c_size
; k
++) {
817 results
->y
[j
] = *buf
++;
819 results
->u
= (*buf
++ + CHROMA_BIAS
/2)/CHROMA_BIAS
;
820 results
->v
= (*buf
++ + CHROMA_BIAS
/2)/CHROMA_BIAS
;
827 static void generate_new_codebooks(RoqContext
*enc
, RoqTempdata
*tempData
)
830 RoqCodebooks
*codebooks
= &tempData
->codebooks
;
831 int max
= enc
->width
*enc
->height
/16;
833 roq_cell
*results4
= av_malloc(sizeof(roq_cell
)*MAX_CBS_4x4
*4);
834 uint8_t *yuvClusters
=av_malloc(sizeof(int)*max
*6*4);
835 int *points
= av_malloc(max
*6*4*sizeof(int));
838 /* Subsample YUV data */
839 create_clusters(enc
->frame_to_enc
, enc
->width
, enc
->height
, yuvClusters
);
841 /* Cast to integer and apply chroma bias */
842 for (i
=0; i
<max
*24; i
++) {
843 bias
= ((i
%6)<4) ? 1 : CHROMA_BIAS
;
844 points
[i
] = bias
*yuvClusters
[i
];
847 /* Create 4x4 codebooks */
848 generate_codebook(enc
, tempData
, points
, max
, results4
, 4, MAX_CBS_4x4
);
850 codebooks
->numCB4
= MAX_CBS_4x4
;
852 tempData
->closest_cb2
= av_malloc(max
*4*sizeof(int));
854 /* Create 2x2 codebooks */
855 generate_codebook(enc
, tempData
, points
, max
*4, enc
->cb2x2
, 2, MAX_CBS_2x2
);
857 codebooks
->numCB2
= MAX_CBS_2x2
;
859 /* Unpack 2x2 codebook clusters */
860 for (i
=0; i
<codebooks
->numCB2
; i
++)
861 unpack_roq_cell(enc
->cb2x2
+ i
, codebooks
->unpacked_cb2
+ i
*2*2*3);
863 /* Index all 4x4 entries to the 2x2 entries, unpack, and enlarge */
864 for (i
=0; i
<codebooks
->numCB4
; i
++) {
865 for (j
=0; j
<4; j
++) {
866 unpack_roq_cell(&results4
[4*i
+ j
], mb2
);
867 index_mb(mb2
, codebooks
->unpacked_cb2
, codebooks
->numCB2
,
868 &enc
->cb4x4
[i
].idx
[j
], 2);
870 unpack_roq_qcell(codebooks
->unpacked_cb2
, enc
->cb4x4
+ i
,
871 codebooks
->unpacked_cb4
+ i
*4*4*3);
872 enlarge_roq_mb4(codebooks
->unpacked_cb4
+ i
*4*4*3,
873 codebooks
->unpacked_cb4_enlarged
+ i
*8*8*3);
876 av_free(yuvClusters
);
881 static void roq_encode_video(RoqContext
*enc
)
883 RoqTempdata
*tempData
= enc
->tmpData
;
886 memset(tempData
, 0, sizeof(*tempData
));
888 create_cel_evals(enc
, tempData
);
890 generate_new_codebooks(enc
, tempData
);
892 if (enc
->framesSinceKeyframe
>= 1) {
893 motion_search(enc
, 8);
894 motion_search(enc
, 4);
898 for (i
=0; i
<enc
->width
*enc
->height
/64; i
++)
899 gather_data_for_cel(tempData
->cel_evals
+ i
, enc
, tempData
);
901 /* Quake 3 can't handle chunks bigger than 65536 bytes */
902 if (tempData
->mainChunkSize
/8 > 65536) {
907 remap_codebooks(enc
, tempData
);
909 write_codebooks(enc
, tempData
);
911 reconstruct_and_encode_image(enc
, tempData
, enc
->width
, enc
->height
,
912 enc
->width
*enc
->height
/64);
914 enc
->avctx
->coded_frame
= enc
->current_frame
;
916 /* Rotate frame history */
917 FFSWAP(AVFrame
*, enc
->current_frame
, enc
->last_frame
);
918 FFSWAP(motion_vect
*, enc
->last_motion4
, enc
->this_motion4
);
919 FFSWAP(motion_vect
*, enc
->last_motion8
, enc
->this_motion8
);
921 av_free(tempData
->cel_evals
);
922 av_free(tempData
->closest_cb2
);
924 enc
->framesSinceKeyframe
++;
927 static int roq_encode_init(AVCodecContext
*avctx
)
929 RoqContext
*enc
= avctx
->priv_data
;
931 av_lfg_init(&enc
->randctx
, 1);
933 enc
->framesSinceKeyframe
= 0;
934 if ((avctx
->width
& 0xf) || (avctx
->height
& 0xf)) {
935 av_log(avctx
, AV_LOG_ERROR
, "Dimensions must be divisible by 16\n");
939 if (((avctx
->width
)&(avctx
->width
-1))||((avctx
->height
)&(avctx
->height
-1)))
940 av_log(avctx
, AV_LOG_ERROR
, "Warning: dimensions not power of two\n");
942 if (avcodec_check_dimensions(avctx
, avctx
->width
, avctx
->height
)) {
943 av_log(avctx
, AV_LOG_ERROR
, "Invalid dimensions (%dx%d)\n",
944 avctx
->width
, avctx
->height
);
948 enc
->width
= avctx
->width
;
949 enc
->height
= avctx
->height
;
951 enc
->framesSinceKeyframe
= 0;
952 enc
->first_frame
= 1;
954 enc
->last_frame
= &enc
->frames
[0];
955 enc
->current_frame
= &enc
->frames
[1];
957 enc
->tmpData
= av_malloc(sizeof(RoqTempdata
));
960 av_mallocz((enc
->width
*enc
->height
/16)*sizeof(motion_vect
));
963 av_malloc ((enc
->width
*enc
->height
/16)*sizeof(motion_vect
));
966 av_mallocz((enc
->width
*enc
->height
/64)*sizeof(motion_vect
));
969 av_malloc ((enc
->width
*enc
->height
/64)*sizeof(motion_vect
));
974 static void roq_write_video_info_chunk(RoqContext
*enc
)
977 bytestream_put_le16(&enc
->out_buf
, RoQ_INFO
);
980 bytestream_put_le32(&enc
->out_buf
, 8);
982 /* Unused argument */
983 bytestream_put_byte(&enc
->out_buf
, 0x00);
984 bytestream_put_byte(&enc
->out_buf
, 0x00);
987 bytestream_put_le16(&enc
->out_buf
, enc
->width
);
990 bytestream_put_le16(&enc
->out_buf
, enc
->height
);
992 /* Unused in Quake 3, mimics the output of the real encoder */
993 bytestream_put_byte(&enc
->out_buf
, 0x08);
994 bytestream_put_byte(&enc
->out_buf
, 0x00);
995 bytestream_put_byte(&enc
->out_buf
, 0x04);
996 bytestream_put_byte(&enc
->out_buf
, 0x00);
999 static int roq_encode_frame(AVCodecContext
*avctx
, unsigned char *buf
, int buf_size
, void *data
)
1001 RoqContext
*enc
= avctx
->priv_data
;
1002 AVFrame
*frame
= data
;
1003 uint8_t *buf_start
= buf
;
1008 enc
->frame_to_enc
= frame
;
1011 enc
->lambda
= frame
->quality
- 1;
1013 enc
->lambda
= 2*ROQ_LAMBDA_SCALE
;
1015 /* 138 bits max per 8x8 block +
1016 * 256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame header */
1017 if (((enc
->width
*enc
->height
/64)*138+7)/8 + 256*(6+4) + 8 > buf_size
) {
1018 av_log(avctx
, AV_LOG_ERROR
, " RoQ: Output buffer too small!\n");
1022 /* Check for I frame */
1023 if (enc
->framesSinceKeyframe
== avctx
->gop_size
)
1024 enc
->framesSinceKeyframe
= 0;
1026 if (enc
->first_frame
) {
1027 /* Alloc memory for the reconstruction data (we must know the stride
1029 if (avctx
->get_buffer(avctx
, enc
->current_frame
) ||
1030 avctx
->get_buffer(avctx
, enc
->last_frame
)) {
1031 av_log(avctx
, AV_LOG_ERROR
, " RoQ: get_buffer() failed\n");
1035 /* Before the first video frame, write a "video info" chunk */
1036 roq_write_video_info_chunk(enc
);
1038 enc
->first_frame
= 0;
1041 /* Encode the actual frame */
1042 roq_encode_video(enc
);
1044 return enc
->out_buf
- buf_start
;
1047 static int roq_encode_end(AVCodecContext
*avctx
)
1049 RoqContext
*enc
= avctx
->priv_data
;
1051 avctx
->release_buffer(avctx
, enc
->last_frame
);
1052 avctx
->release_buffer(avctx
, enc
->current_frame
);
1054 av_free(enc
->tmpData
);
1055 av_free(enc
->this_motion4
);
1056 av_free(enc
->last_motion4
);
1057 av_free(enc
->this_motion8
);
1058 av_free(enc
->last_motion8
);
1063 AVCodec roq_encoder
=
1072 .supported_framerates
= (const AVRational
[]){{30,1}, {0,0}},
1073 .pix_fmts
= (const enum PixelFormat
[]){PIX_FMT_YUV444P
, PIX_FMT_NONE
},
1074 .long_name
= NULL_IF_CONFIG_SMALL("id RoQ video"),