4 * Copyright (C) 2007 Vitor Sessak <vitor1001@gmail.com>
5 * Copyright (C) 2004-2007 Eric Lasota
6 * Based on RoQ specs (C) 2001 Tim Ferguson
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 * id RoQ encoder by Vitor. Based on the Switchblade3 library and the
28 * Switchblade3 Libav glue by Eric Lasota.
36 * CODEBOOK - 2 + 8 bits
37 * SUBDIVIDE - 2 + combined subcel cost
42 * CODEBOOK - 2 + 8 bits
43 * SUBDIVIDE - 2 + 4*8 bits
45 * Maximum cost: 138 bits per cel
47 * Proper evaluation requires LCD fraction comparison, which requires
48 * Squared Error (SE) loss * savings increase
50 * Maximum savings increase: 136 bits
51 * Maximum SE loss without overflow: 31580641
52 * Components in 8x8 supercel: 192
53 * Maximum SE precision per component: 164482
54 * >65025, so no truncation is needed (phew)
60 #include "bytestream.h"
68 * Maximum number of generated 4x4 codebooks. Can't be 256 to workaround a
71 #define MAX_CBS_4x4 255
73 #define MAX_CBS_2x2 256 ///< Maximum number of 2x2 codebooks.
75 /* The cast is useful when multiplying it by INT_MAX */
76 #define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE)
78 /* Macroblock support functions */
79 static void unpack_roq_cell(roq_cell
*cell
, uint8_t u
[4*3])
81 memcpy(u
, cell
->y
, 4);
82 memset(u
+4, cell
->u
, 4);
83 memset(u
+8, cell
->v
, 4);
86 static void unpack_roq_qcell(uint8_t cb2
[], roq_qcell
*qcell
, uint8_t u
[4*4*3])
89 static const int offsets
[4] = {0, 2, 8, 10};
91 for (cp
=0; cp
<3; cp
++)
93 u
[4*4*cp
+ offsets
[i
] ] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
];
94 u
[4*4*cp
+ offsets
[i
]+1] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
+1];
95 u
[4*4*cp
+ offsets
[i
]+4] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
+2];
96 u
[4*4*cp
+ offsets
[i
]+5] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
+3];
101 static void enlarge_roq_mb4(uint8_t base
[3*16], uint8_t u
[3*64])
105 for(cp
=0; cp
<3; cp
++)
108 *u
++ = base
[(y
/2)*4 + (x
/2) + 16*cp
];
111 static inline int square(int x
)
116 static inline int eval_sse(const uint8_t *a
, const uint8_t *b
, int count
)
121 diff
+= square(*b
++ - *a
++);
126 // FIXME Could use DSPContext.sse, but it is not so speed critical (used
127 // just for motion estimation).
128 static int block_sse(uint8_t * const *buf1
, uint8_t * const *buf2
, int x1
, int y1
,
129 int x2
, int y2
, const int *stride1
, const int *stride2
, int size
)
134 for (k
=0; k
<3; k
++) {
135 int bias
= (k
? CHROMA_BIAS
: 4);
136 for (i
=0; i
<size
; i
++)
137 sse
+= bias
*eval_sse(buf1
[k
] + (y1
+i
)*stride1
[k
] + x1
,
138 buf2
[k
] + (y2
+i
)*stride2
[k
] + x2
, size
);
144 static int eval_motion_dist(RoqContext
*enc
, int x
, int y
, motion_vect vect
,
150 if (mx
< -7 || mx
> 7)
153 if (my
< -7 || my
> 7)
159 if ((unsigned) mx
> enc
->width
-size
|| (unsigned) my
> enc
->height
-size
)
162 return block_sse(enc
->frame_to_enc
->data
, enc
->last_frame
->data
, x
, y
,
164 enc
->frame_to_enc
->linesize
, enc
->last_frame
->linesize
,
169 * @return distortion between two macroblocks
171 static inline int squared_diff_macroblock(uint8_t a
[], uint8_t b
[], int size
)
175 for(cp
=0;cp
<3;cp
++) {
176 int bias
= (cp
? CHROMA_BIAS
: 4);
177 sdiff
+= bias
*eval_sse(a
, b
, size
*size
);
201 SubcelEvaluation subCels
[4];
206 int sourceX
, sourceY
;
213 int usedCB2
[MAX_CBS_2x2
];
214 int usedCB4
[MAX_CBS_4x4
];
215 uint8_t unpacked_cb2
[MAX_CBS_2x2
*2*2*3];
216 uint8_t unpacked_cb4
[MAX_CBS_4x4
*4*4*3];
217 uint8_t unpacked_cb4_enlarged
[MAX_CBS_4x4
*8*8*3];
223 typedef struct RoqTempData
225 CelEvaluation
*cel_evals
;
227 int f2i4
[MAX_CBS_4x4
];
228 int i2f4
[MAX_CBS_4x4
];
229 int f2i2
[MAX_CBS_2x2
];
230 int i2f2
[MAX_CBS_2x2
];
237 RoqCodebooks codebooks
;
244 * Initialize cel evaluators and set their source coordinates
246 static void create_cel_evals(RoqContext
*enc
, RoqTempdata
*tempData
)
250 tempData
->cel_evals
= av_malloc(enc
->width
*enc
->height
/64 * sizeof(CelEvaluation
));
252 /* Map to the ROQ quadtree order */
253 for (y
=0; y
<enc
->height
; y
+=16)
254 for (x
=0; x
<enc
->width
; x
+=16)
256 tempData
->cel_evals
[n
].sourceX
= x
+ (i
&1)*8;
257 tempData
->cel_evals
[n
++].sourceY
= y
+ (i
&2)*4;
262 * Get macroblocks from parts of the image
264 static void get_frame_mb(const AVFrame
*frame
, int x
, int y
, uint8_t mb
[], int dim
)
268 for (cp
=0; cp
<3; cp
++) {
269 int stride
= frame
->linesize
[cp
];
270 for (i
=0; i
<dim
; i
++)
271 for (j
=0; j
<dim
; j
++)
272 *mb
++ = frame
->data
[cp
][(y
+i
)*stride
+ x
+ j
];
277 * Find the codebook with the lowest distortion from an image
279 static int index_mb(uint8_t cluster
[], uint8_t cb
[], int numCB
,
280 int *outIndex
, int dim
)
282 int i
, lDiff
= INT_MAX
, pick
=0;
284 /* Diff against the others */
285 for (i
=0; i
<numCB
; i
++) {
286 int diff
= squared_diff_macroblock(cluster
, cb
+ i
*dim
*dim
*3, dim
);
297 #define EVAL_MOTION(MOTION) \
299 diff = eval_motion_dist(enc, j, i, MOTION, blocksize); \
301 if (diff < lowestdiff) { \
307 static void motion_search(RoqContext
*enc
, int blocksize
)
309 static const motion_vect offsets
[8] = {
320 int diff
, lowestdiff
, oldbest
;
322 motion_vect bestpick
= {{0,0}};
325 motion_vect
*last_motion
;
326 motion_vect
*this_motion
;
327 motion_vect vect
, vect2
;
329 int max
=(enc
->width
/blocksize
)*enc
->height
/blocksize
;
331 if (blocksize
== 4) {
332 last_motion
= enc
->last_motion4
;
333 this_motion
= enc
->this_motion4
;
335 last_motion
= enc
->last_motion8
;
336 this_motion
= enc
->this_motion8
;
339 for (i
=0; i
<enc
->height
; i
+=blocksize
)
340 for (j
=0; j
<enc
->width
; j
+=blocksize
) {
341 lowestdiff
= eval_motion_dist(enc
, j
, i
, (motion_vect
) {{0,0}},
347 EVAL_MOTION(enc
->this_motion8
[(i
/8)*(enc
->width
/8) + j
/8]);
349 offset
= (i
/blocksize
)*enc
->width
/blocksize
+ j
/blocksize
;
350 if (offset
< max
&& offset
>= 0)
351 EVAL_MOTION(last_motion
[offset
]);
354 if (offset
< max
&& offset
>= 0)
355 EVAL_MOTION(last_motion
[offset
]);
357 offset
= (i
/blocksize
+ 1)*enc
->width
/blocksize
+ j
/blocksize
;
358 if (offset
< max
&& offset
>= 0)
359 EVAL_MOTION(last_motion
[offset
]);
361 off
[0]= (i
/blocksize
)*enc
->width
/blocksize
+ j
/blocksize
- 1;
362 off
[1]= off
[0] - enc
->width
/blocksize
+ 1;
368 vect
.d
[k
]= mid_pred(this_motion
[off
[0]].d
[k
],
369 this_motion
[off
[1]].d
[k
],
370 this_motion
[off
[2]].d
[k
]);
374 EVAL_MOTION(this_motion
[off
[k
]]);
376 EVAL_MOTION(this_motion
[off
[0]]);
381 while (oldbest
!= lowestdiff
) {
382 oldbest
= lowestdiff
;
383 for (k
=0; k
<8; k
++) {
385 vect2
.d
[0] += offsets
[k
].d
[0];
386 vect2
.d
[1] += offsets
[k
].d
[1];
391 offset
= (i
/blocksize
)*enc
->width
/blocksize
+ j
/blocksize
;
392 this_motion
[offset
] = bestpick
;
397 * Get distortion for all options available to a subcel
399 static void gather_data_for_subcel(SubcelEvaluation
*subcel
, int x
,
400 int y
, RoqContext
*enc
, RoqTempdata
*tempData
)
407 static const int bitsUsed
[4] = {2, 10, 10, 34};
409 if (enc
->framesSinceKeyframe
>= 1) {
410 subcel
->motion
= enc
->this_motion4
[y
*enc
->width
/16 + x
/4];
412 subcel
->eval_dist
[RoQ_ID_FCC
] =
413 eval_motion_dist(enc
, x
, y
,
414 enc
->this_motion4
[y
*enc
->width
/16 + x
/4], 4);
416 subcel
->eval_dist
[RoQ_ID_FCC
] = INT_MAX
;
418 if (enc
->framesSinceKeyframe
>= 2)
419 subcel
->eval_dist
[RoQ_ID_MOT
] = block_sse(enc
->frame_to_enc
->data
,
420 enc
->current_frame
->data
, x
,
422 enc
->frame_to_enc
->linesize
,
423 enc
->current_frame
->linesize
,
426 subcel
->eval_dist
[RoQ_ID_MOT
] = INT_MAX
;
428 cluster_index
= y
*enc
->width
/16 + x
/4;
430 get_frame_mb(enc
->frame_to_enc
, x
, y
, mb4
, 4);
432 subcel
->eval_dist
[RoQ_ID_SLD
] = index_mb(mb4
,
433 tempData
->codebooks
.unpacked_cb4
,
434 tempData
->codebooks
.numCB4
,
435 &subcel
->cbEntry
, 4);
437 subcel
->eval_dist
[RoQ_ID_CCC
] = 0;
440 subcel
->subCels
[i
] = tempData
->closest_cb2
[cluster_index
*4+i
];
442 get_frame_mb(enc
->frame_to_enc
, x
+2*(i
&1),
445 subcel
->eval_dist
[RoQ_ID_CCC
] +=
446 squared_diff_macroblock(tempData
->codebooks
.unpacked_cb2
+ subcel
->subCels
[i
]*2*2*3, mb2
, 2);
451 if (ROQ_LAMBDA_SCALE
*subcel
->eval_dist
[i
] + enc
->lambda
*bitsUsed
[i
] <
453 subcel
->best_coding
= i
;
454 subcel
->best_bit_use
= bitsUsed
[i
];
455 best_dist
= ROQ_LAMBDA_SCALE
*subcel
->eval_dist
[i
] +
456 enc
->lambda
*bitsUsed
[i
];
461 * Get distortion for all options available to a cel
463 static void gather_data_for_cel(CelEvaluation
*cel
, RoqContext
*enc
,
464 RoqTempdata
*tempData
)
467 int index
= cel
->sourceY
*enc
->width
/64 + cel
->sourceX
/8;
468 int i
, j
, best_dist
, divide_bit_use
;
470 int bitsUsed
[4] = {2, 10, 10, 0};
472 if (enc
->framesSinceKeyframe
>= 1) {
473 cel
->motion
= enc
->this_motion8
[index
];
475 cel
->eval_dist
[RoQ_ID_FCC
] =
476 eval_motion_dist(enc
, cel
->sourceX
, cel
->sourceY
,
477 enc
->this_motion8
[index
], 8);
479 cel
->eval_dist
[RoQ_ID_FCC
] = INT_MAX
;
481 if (enc
->framesSinceKeyframe
>= 2)
482 cel
->eval_dist
[RoQ_ID_MOT
] = block_sse(enc
->frame_to_enc
->data
,
483 enc
->current_frame
->data
,
484 cel
->sourceX
, cel
->sourceY
,
485 cel
->sourceX
, cel
->sourceY
,
486 enc
->frame_to_enc
->linesize
,
487 enc
->current_frame
->linesize
,8);
489 cel
->eval_dist
[RoQ_ID_MOT
] = INT_MAX
;
491 get_frame_mb(enc
->frame_to_enc
, cel
->sourceX
, cel
->sourceY
, mb8
, 8);
493 cel
->eval_dist
[RoQ_ID_SLD
] =
494 index_mb(mb8
, tempData
->codebooks
.unpacked_cb4_enlarged
,
495 tempData
->codebooks
.numCB4
, &cel
->cbEntry
, 8);
497 gather_data_for_subcel(cel
->subCels
+ 0, cel
->sourceX
+0, cel
->sourceY
+0, enc
, tempData
);
498 gather_data_for_subcel(cel
->subCels
+ 1, cel
->sourceX
+4, cel
->sourceY
+0, enc
, tempData
);
499 gather_data_for_subcel(cel
->subCels
+ 2, cel
->sourceX
+0, cel
->sourceY
+4, enc
, tempData
);
500 gather_data_for_subcel(cel
->subCels
+ 3, cel
->sourceX
+4, cel
->sourceY
+4, enc
, tempData
);
502 cel
->eval_dist
[RoQ_ID_CCC
] = 0;
504 for (i
=0; i
<4; i
++) {
505 cel
->eval_dist
[RoQ_ID_CCC
] +=
506 cel
->subCels
[i
].eval_dist
[cel
->subCels
[i
].best_coding
];
507 divide_bit_use
+= cel
->subCels
[i
].best_bit_use
;
511 bitsUsed
[3] = 2 + divide_bit_use
;
514 if (ROQ_LAMBDA_SCALE
*cel
->eval_dist
[i
] + enc
->lambda
*bitsUsed
[i
] <
516 cel
->best_coding
= i
;
517 best_dist
= ROQ_LAMBDA_SCALE
*cel
->eval_dist
[i
] +
518 enc
->lambda
*bitsUsed
[i
];
521 tempData
->used_option
[cel
->best_coding
]++;
522 tempData
->mainChunkSize
+= bitsUsed
[cel
->best_coding
];
524 if (cel
->best_coding
== RoQ_ID_SLD
)
525 tempData
->codebooks
.usedCB4
[cel
->cbEntry
]++;
527 if (cel
->best_coding
== RoQ_ID_CCC
)
528 for (i
=0; i
<4; i
++) {
529 if (cel
->subCels
[i
].best_coding
== RoQ_ID_SLD
)
530 tempData
->codebooks
.usedCB4
[cel
->subCels
[i
].cbEntry
]++;
531 else if (cel
->subCels
[i
].best_coding
== RoQ_ID_CCC
)
533 tempData
->codebooks
.usedCB2
[cel
->subCels
[i
].subCels
[j
]]++;
537 static void remap_codebooks(RoqContext
*enc
, RoqTempdata
*tempData
)
541 /* Make remaps for the final codebook usage */
542 for (i
=0; i
<MAX_CBS_4x4
; i
++) {
543 if (tempData
->codebooks
.usedCB4
[i
]) {
544 tempData
->i2f4
[i
] = idx
;
545 tempData
->f2i4
[idx
] = i
;
547 tempData
->codebooks
.usedCB2
[enc
->cb4x4
[i
].idx
[j
]]++;
552 tempData
->numCB4
= idx
;
555 for (i
=0; i
<MAX_CBS_2x2
; i
++) {
556 if (tempData
->codebooks
.usedCB2
[i
]) {
557 tempData
->i2f2
[i
] = idx
;
558 tempData
->f2i2
[idx
] = i
;
562 tempData
->numCB2
= idx
;
567 * Write codebook chunk
569 static void write_codebooks(RoqContext
*enc
, RoqTempdata
*tempData
)
572 uint8_t **outp
= &enc
->out_buf
;
574 if (tempData
->numCB2
) {
575 bytestream_put_le16(outp
, RoQ_QUAD_CODEBOOK
);
576 bytestream_put_le32(outp
, tempData
->numCB2
*6 + tempData
->numCB4
*4);
577 bytestream_put_byte(outp
, tempData
->numCB4
);
578 bytestream_put_byte(outp
, tempData
->numCB2
);
580 for (i
=0; i
<tempData
->numCB2
; i
++) {
581 bytestream_put_buffer(outp
, enc
->cb2x2
[tempData
->f2i2
[i
]].y
, 4);
582 bytestream_put_byte(outp
, enc
->cb2x2
[tempData
->f2i2
[i
]].u
);
583 bytestream_put_byte(outp
, enc
->cb2x2
[tempData
->f2i2
[i
]].v
);
586 for (i
=0; i
<tempData
->numCB4
; i
++)
588 bytestream_put_byte(outp
, tempData
->i2f2
[enc
->cb4x4
[tempData
->f2i4
[i
]].idx
[j
]]);
593 static inline uint8_t motion_arg(motion_vect mot
)
595 uint8_t ax
= 8 - ((uint8_t) mot
.d
[0]);
596 uint8_t ay
= 8 - ((uint8_t) mot
.d
[1]);
597 return ((ax
&15)<<4) | (ay
&15);
604 uint8_t argumentSpool
[64];
609 /* NOTE: Typecodes must be spooled AFTER arguments!! */
610 static void write_typecode(CodingSpool
*s
, uint8_t type
)
612 s
->typeSpool
|= (type
& 3) << (14 - s
->typeSpoolLength
);
613 s
->typeSpoolLength
+= 2;
614 if (s
->typeSpoolLength
== 16) {
615 bytestream_put_le16(s
->pout
, s
->typeSpool
);
616 bytestream_put_buffer(s
->pout
, s
->argumentSpool
,
617 s
->args
- s
->argumentSpool
);
618 s
->typeSpoolLength
= 0;
620 s
->args
= s
->argumentSpool
;
624 static void reconstruct_and_encode_image(RoqContext
*enc
, RoqTempdata
*tempData
, int w
, int h
, int numBlocks
)
637 spool
.typeSpoolLength
=0;
638 spool
.args
= spool
.argumentSpool
;
639 spool
.pout
= &enc
->out_buf
;
641 if (tempData
->used_option
[RoQ_ID_CCC
]%2)
642 tempData
->mainChunkSize
+=8; //FIXME
644 /* Write the video chunk header */
645 bytestream_put_le16(&enc
->out_buf
, RoQ_QUAD_VQ
);
646 bytestream_put_le32(&enc
->out_buf
, tempData
->mainChunkSize
/8);
647 bytestream_put_byte(&enc
->out_buf
, 0x0);
648 bytestream_put_byte(&enc
->out_buf
, 0x0);
650 for (i
=0; i
<numBlocks
; i
++) {
651 eval
= tempData
->cel_evals
+ i
;
655 dist
+= eval
->eval_dist
[eval
->best_coding
];
657 switch (eval
->best_coding
) {
659 write_typecode(&spool
, RoQ_ID_MOT
);
663 bytestream_put_byte(&spool
.args
, motion_arg(eval
->motion
));
665 write_typecode(&spool
, RoQ_ID_FCC
);
666 ff_apply_motion_8x8(enc
, x
, y
,
667 eval
->motion
.d
[0], eval
->motion
.d
[1]);
671 bytestream_put_byte(&spool
.args
, tempData
->i2f4
[eval
->cbEntry
]);
672 write_typecode(&spool
, RoQ_ID_SLD
);
674 qcell
= enc
->cb4x4
+ eval
->cbEntry
;
675 ff_apply_vector_4x4(enc
, x
, y
, enc
->cb2x2
+ qcell
->idx
[0]);
676 ff_apply_vector_4x4(enc
, x
+4, y
, enc
->cb2x2
+ qcell
->idx
[1]);
677 ff_apply_vector_4x4(enc
, x
, y
+4, enc
->cb2x2
+ qcell
->idx
[2]);
678 ff_apply_vector_4x4(enc
, x
+4, y
+4, enc
->cb2x2
+ qcell
->idx
[3]);
682 write_typecode(&spool
, RoQ_ID_CCC
);
684 for (j
=0; j
<4; j
++) {
688 switch(eval
->subCels
[j
].best_coding
) {
693 bytestream_put_byte(&spool
.args
,
694 motion_arg(eval
->subCels
[j
].motion
));
696 ff_apply_motion_4x4(enc
, subX
, subY
,
697 eval
->subCels
[j
].motion
.d
[0],
698 eval
->subCels
[j
].motion
.d
[1]);
702 bytestream_put_byte(&spool
.args
,
703 tempData
->i2f4
[eval
->subCels
[j
].cbEntry
]);
705 qcell
= enc
->cb4x4
+ eval
->subCels
[j
].cbEntry
;
707 ff_apply_vector_2x2(enc
, subX
, subY
,
708 enc
->cb2x2
+ qcell
->idx
[0]);
709 ff_apply_vector_2x2(enc
, subX
+2, subY
,
710 enc
->cb2x2
+ qcell
->idx
[1]);
711 ff_apply_vector_2x2(enc
, subX
, subY
+2,
712 enc
->cb2x2
+ qcell
->idx
[2]);
713 ff_apply_vector_2x2(enc
, subX
+2, subY
+2,
714 enc
->cb2x2
+ qcell
->idx
[3]);
718 for (k
=0; k
<4; k
++) {
719 int cb_idx
= eval
->subCels
[j
].subCels
[k
];
720 bytestream_put_byte(&spool
.args
,
721 tempData
->i2f2
[cb_idx
]);
723 ff_apply_vector_2x2(enc
, subX
+ 2*(k
&1), subY
+ (k
&2),
724 enc
->cb2x2
+ cb_idx
);
728 write_typecode(&spool
, eval
->subCels
[j
].best_coding
);
734 /* Flush the remainder of the argument/type spool */
735 while (spool
.typeSpoolLength
)
736 write_typecode(&spool
, 0x0);
739 uint8_t *fdata
[3] = {enc
->frame_to_enc
->data
[0],
740 enc
->frame_to_enc
->data
[1],
741 enc
->frame_to_enc
->data
[2]};
742 uint8_t *cdata
[3] = {enc
->current_frame
->data
[0],
743 enc
->current_frame
->data
[1],
744 enc
->current_frame
->data
[2]};
745 av_log(enc
->avctx
, AV_LOG_ERROR
, "Expected distortion: %i Actual: %i\n",
747 block_sse(fdata
, cdata
, 0, 0, 0, 0,
748 enc
->frame_to_enc
->linesize
,
749 enc
->current_frame
->linesize
,
750 enc
->width
)); //WARNING: Square dimensions implied...
756 * Create a single YUV cell from a 2x2 section of the image
758 static inline void frame_block_to_cell(uint8_t *block
, uint8_t * const *data
,
759 int top
, int left
, const int *stride
)
764 for (j
=0; j
<2; j
++) {
765 int x
= (top
+i
)*stride
[0] + left
+ j
;
766 *block
++ = data
[0][x
];
767 x
= (top
+i
)*stride
[1] + left
+ j
;
777 * Create YUV clusters for the entire image
779 static void create_clusters(const AVFrame
*frame
, int w
, int h
, uint8_t *yuvClusters
)
784 for (j
=0; j
<w
; j
+=4) {
785 for (k
=0; k
< 2; k
++)
786 for (l
=0; l
< 2; l
++)
787 frame_block_to_cell(yuvClusters
+ (l
+ 2*k
)*6, frame
->data
,
788 i
+2*k
, j
+2*l
, frame
->linesize
);
793 static void generate_codebook(RoqContext
*enc
, RoqTempdata
*tempdata
,
794 int *points
, int inputCount
, roq_cell
*results
,
795 int size
, int cbsize
)
798 int c_size
= size
*size
/4;
800 int *codebook
= av_malloc(6*c_size
*cbsize
*sizeof(int));
804 closest_cb
= av_malloc(6*c_size
*inputCount
*sizeof(int));
806 closest_cb
= tempdata
->closest_cb2
;
808 ff_init_elbg(points
, 6*c_size
, inputCount
, codebook
, cbsize
, 1, closest_cb
, &enc
->randctx
);
809 ff_do_elbg(points
, 6*c_size
, inputCount
, codebook
, cbsize
, 1, closest_cb
, &enc
->randctx
);
815 for (i
=0; i
<cbsize
; i
++)
816 for (k
=0; k
<c_size
; k
++) {
818 results
->y
[j
] = *buf
++;
820 results
->u
= (*buf
++ + CHROMA_BIAS
/2)/CHROMA_BIAS
;
821 results
->v
= (*buf
++ + CHROMA_BIAS
/2)/CHROMA_BIAS
;
828 static void generate_new_codebooks(RoqContext
*enc
, RoqTempdata
*tempData
)
831 RoqCodebooks
*codebooks
= &tempData
->codebooks
;
832 int max
= enc
->width
*enc
->height
/16;
834 roq_cell
*results4
= av_malloc(sizeof(roq_cell
)*MAX_CBS_4x4
*4);
835 uint8_t *yuvClusters
=av_malloc(sizeof(int)*max
*6*4);
836 int *points
= av_malloc(max
*6*4*sizeof(int));
839 /* Subsample YUV data */
840 create_clusters(enc
->frame_to_enc
, enc
->width
, enc
->height
, yuvClusters
);
842 /* Cast to integer and apply chroma bias */
843 for (i
=0; i
<max
*24; i
++) {
844 bias
= ((i
%6)<4) ? 1 : CHROMA_BIAS
;
845 points
[i
] = bias
*yuvClusters
[i
];
848 /* Create 4x4 codebooks */
849 generate_codebook(enc
, tempData
, points
, max
, results4
, 4, MAX_CBS_4x4
);
851 codebooks
->numCB4
= MAX_CBS_4x4
;
853 tempData
->closest_cb2
= av_malloc(max
*4*sizeof(int));
855 /* Create 2x2 codebooks */
856 generate_codebook(enc
, tempData
, points
, max
*4, enc
->cb2x2
, 2, MAX_CBS_2x2
);
858 codebooks
->numCB2
= MAX_CBS_2x2
;
860 /* Unpack 2x2 codebook clusters */
861 for (i
=0; i
<codebooks
->numCB2
; i
++)
862 unpack_roq_cell(enc
->cb2x2
+ i
, codebooks
->unpacked_cb2
+ i
*2*2*3);
864 /* Index all 4x4 entries to the 2x2 entries, unpack, and enlarge */
865 for (i
=0; i
<codebooks
->numCB4
; i
++) {
866 for (j
=0; j
<4; j
++) {
867 unpack_roq_cell(&results4
[4*i
+ j
], mb2
);
868 index_mb(mb2
, codebooks
->unpacked_cb2
, codebooks
->numCB2
,
869 &enc
->cb4x4
[i
].idx
[j
], 2);
871 unpack_roq_qcell(codebooks
->unpacked_cb2
, enc
->cb4x4
+ i
,
872 codebooks
->unpacked_cb4
+ i
*4*4*3);
873 enlarge_roq_mb4(codebooks
->unpacked_cb4
+ i
*4*4*3,
874 codebooks
->unpacked_cb4_enlarged
+ i
*8*8*3);
877 av_free(yuvClusters
);
882 static void roq_encode_video(RoqContext
*enc
)
884 RoqTempdata
*tempData
= enc
->tmpData
;
887 memset(tempData
, 0, sizeof(*tempData
));
889 create_cel_evals(enc
, tempData
);
891 generate_new_codebooks(enc
, tempData
);
893 if (enc
->framesSinceKeyframe
>= 1) {
894 motion_search(enc
, 8);
895 motion_search(enc
, 4);
899 for (i
=0; i
<enc
->width
*enc
->height
/64; i
++)
900 gather_data_for_cel(tempData
->cel_evals
+ i
, enc
, tempData
);
902 /* Quake 3 can't handle chunks bigger than 65535 bytes */
903 if (tempData
->mainChunkSize
/8 > 65535) {
904 av_log(enc
->avctx
, AV_LOG_ERROR
,
905 "Warning, generated a frame too big (%d > 65535), "
906 "try using a smaller qscale value.\n",
907 tempData
->mainChunkSize
/8);
909 tempData
->mainChunkSize
= 0;
910 memset(tempData
->used_option
, 0, sizeof(tempData
->used_option
));
911 memset(tempData
->codebooks
.usedCB4
, 0,
912 sizeof(tempData
->codebooks
.usedCB4
));
913 memset(tempData
->codebooks
.usedCB2
, 0,
914 sizeof(tempData
->codebooks
.usedCB2
));
919 remap_codebooks(enc
, tempData
);
921 write_codebooks(enc
, tempData
);
923 reconstruct_and_encode_image(enc
, tempData
, enc
->width
, enc
->height
,
924 enc
->width
*enc
->height
/64);
926 enc
->avctx
->coded_frame
= enc
->current_frame
;
928 /* Rotate frame history */
929 FFSWAP(AVFrame
*, enc
->current_frame
, enc
->last_frame
);
930 FFSWAP(motion_vect
*, enc
->last_motion4
, enc
->this_motion4
);
931 FFSWAP(motion_vect
*, enc
->last_motion8
, enc
->this_motion8
);
933 av_free(tempData
->cel_evals
);
934 av_free(tempData
->closest_cb2
);
936 enc
->framesSinceKeyframe
++;
939 static int roq_encode_init(AVCodecContext
*avctx
)
941 RoqContext
*enc
= avctx
->priv_data
;
943 av_lfg_init(&enc
->randctx
, 1);
945 enc
->framesSinceKeyframe
= 0;
946 if ((avctx
->width
& 0xf) || (avctx
->height
& 0xf)) {
947 av_log(avctx
, AV_LOG_ERROR
, "Dimensions must be divisible by 16\n");
951 if (((avctx
->width
)&(avctx
->width
-1))||((avctx
->height
)&(avctx
->height
-1)))
952 av_log(avctx
, AV_LOG_ERROR
, "Warning: dimensions not power of two\n");
954 enc
->width
= avctx
->width
;
955 enc
->height
= avctx
->height
;
957 enc
->framesSinceKeyframe
= 0;
958 enc
->first_frame
= 1;
960 enc
->last_frame
= &enc
->frames
[0];
961 enc
->current_frame
= &enc
->frames
[1];
963 enc
->tmpData
= av_malloc(sizeof(RoqTempdata
));
966 av_mallocz((enc
->width
*enc
->height
/16)*sizeof(motion_vect
));
969 av_malloc ((enc
->width
*enc
->height
/16)*sizeof(motion_vect
));
972 av_mallocz((enc
->width
*enc
->height
/64)*sizeof(motion_vect
));
975 av_malloc ((enc
->width
*enc
->height
/64)*sizeof(motion_vect
));
980 static void roq_write_video_info_chunk(RoqContext
*enc
)
983 bytestream_put_le16(&enc
->out_buf
, RoQ_INFO
);
986 bytestream_put_le32(&enc
->out_buf
, 8);
988 /* Unused argument */
989 bytestream_put_byte(&enc
->out_buf
, 0x00);
990 bytestream_put_byte(&enc
->out_buf
, 0x00);
993 bytestream_put_le16(&enc
->out_buf
, enc
->width
);
996 bytestream_put_le16(&enc
->out_buf
, enc
->height
);
998 /* Unused in Quake 3, mimics the output of the real encoder */
999 bytestream_put_byte(&enc
->out_buf
, 0x08);
1000 bytestream_put_byte(&enc
->out_buf
, 0x00);
1001 bytestream_put_byte(&enc
->out_buf
, 0x04);
1002 bytestream_put_byte(&enc
->out_buf
, 0x00);
1005 static int roq_encode_frame(AVCodecContext
*avctx
, AVPacket
*pkt
,
1006 const AVFrame
*frame
, int *got_packet
)
1008 RoqContext
*enc
= avctx
->priv_data
;
1013 enc
->frame_to_enc
= frame
;
1016 enc
->lambda
= frame
->quality
- 1;
1018 enc
->lambda
= 2*ROQ_LAMBDA_SCALE
;
1020 /* 138 bits max per 8x8 block +
1021 * 256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame header */
1022 size
= ((enc
->width
* enc
->height
/ 64) * 138 + 7) / 8 + 256 * (6 + 4) + 8;
1023 if ((ret
= ff_alloc_packet(pkt
, size
)) < 0) {
1024 av_log(avctx
, AV_LOG_ERROR
, "Error getting output packet with size %d.\n", size
);
1027 enc
->out_buf
= pkt
->data
;
1029 /* Check for I frame */
1030 if (enc
->framesSinceKeyframe
== avctx
->gop_size
)
1031 enc
->framesSinceKeyframe
= 0;
1033 if (enc
->first_frame
) {
1034 /* Alloc memory for the reconstruction data (we must know the stride
1036 if (ff_get_buffer(avctx
, enc
->current_frame
) ||
1037 ff_get_buffer(avctx
, enc
->last_frame
)) {
1038 av_log(avctx
, AV_LOG_ERROR
, " RoQ: get_buffer() failed\n");
1042 /* Before the first video frame, write a "video info" chunk */
1043 roq_write_video_info_chunk(enc
);
1045 enc
->first_frame
= 0;
1048 /* Encode the actual frame */
1049 roq_encode_video(enc
);
1051 pkt
->size
= enc
->out_buf
- pkt
->data
;
1052 if (enc
->framesSinceKeyframe
== 1)
1053 pkt
->flags
|= AV_PKT_FLAG_KEY
;
1059 static int roq_encode_end(AVCodecContext
*avctx
)
1061 RoqContext
*enc
= avctx
->priv_data
;
1063 avctx
->release_buffer(avctx
, enc
->last_frame
);
1064 avctx
->release_buffer(avctx
, enc
->current_frame
);
1066 av_free(enc
->tmpData
);
1067 av_free(enc
->this_motion4
);
1068 av_free(enc
->last_motion4
);
1069 av_free(enc
->this_motion8
);
1070 av_free(enc
->last_motion8
);
1075 AVCodec ff_roq_encoder
= {
1077 .type
= AVMEDIA_TYPE_VIDEO
,
1078 .id
= AV_CODEC_ID_ROQ
,
1079 .priv_data_size
= sizeof(RoqContext
),
1080 .init
= roq_encode_init
,
1081 .encode2
= roq_encode_frame
,
1082 .close
= roq_encode_end
,
1083 .supported_framerates
= (const AVRational
[]){ {30,1}, {0,0} },
1084 .pix_fmts
= (const enum AVPixelFormat
[]){ AV_PIX_FMT_YUV444P
,
1086 .long_name
= NULL_IF_CONFIG_SMALL("id RoQ video"),