3 * Copyright (C) 2004 Mike Melanson <melanson@pcisys.net>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/svq1enc.c
24 * Sorenson Vector Quantizer #1 (SVQ1) video codec.
25 * For more information of the SVQ1 algorithm, visit:
26 * http://www.pcisys.net/~melanson/codecs/
32 #include "mpegvideo.h"
35 #include "svq1enc_cb.h"
41 typedef struct SVQ1Context
{
42 MpegEncContext m
; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
43 AVCodecContext
*avctx
;
46 AVFrame current_picture
;
51 PutBitContext reorder_pb
[6]; //why ooh why this sick breadth first order, everything is slower and more complex
56 /* Y plane block dimensions */
60 /* U & V plane (C planes) block dimensions */
66 int16_t (*motion_val8
[3])[2];
67 int16_t (*motion_val16
[3])[2];
74 static void svq1_write_header(SVQ1Context
*s
, int frame_type
)
79 put_bits(&s
->pb
, 22, 0x20);
81 /* temporal reference (sure hope this is a "don't care") */
82 put_bits(&s
->pb
, 8, 0x00);
85 put_bits(&s
->pb
, 2, frame_type
- 1);
87 if (frame_type
== FF_I_TYPE
) {
89 /* no checksum since frame code is 0x20 */
91 /* no embedded string either */
93 /* output 5 unknown bits (2 + 2 + 1) */
94 put_bits(&s
->pb
, 5, 2); /* 2 needed by quicktime decoder */
96 for (i
= 0; i
< 7; i
++)
98 if ((ff_svq1_frame_size_table
[i
].width
== s
->frame_width
) &&
99 (ff_svq1_frame_size_table
[i
].height
== s
->frame_height
))
101 put_bits(&s
->pb
, 3, i
);
108 put_bits(&s
->pb
, 3, 7);
109 put_bits(&s
->pb
, 12, s
->frame_width
);
110 put_bits(&s
->pb
, 12, s
->frame_height
);
114 /* no checksum or extra data (next 2 bits get 0) */
115 put_bits(&s
->pb
, 2, 0);
119 #define QUALITY_THRESHOLD 100
120 #define THRESHOLD_MULTIPLIER 0.6
126 static int encode_block(SVQ1Context
*s
, uint8_t *src
, uint8_t *ref
, uint8_t *decoded
, int stride
, int level
, int threshold
, int lambda
, int intra
){
127 int count
, y
, x
, i
, j
, split
, best_mean
, best_score
, best_count
;
129 int block_sum
[7]= {0, 0, 0, 0, 0, 0};
130 int w
= 2<<((level
+2)>>1);
131 int h
= 2<<((level
+1)>>1);
133 int16_t block
[7][256];
134 const int8_t *codebook_sum
, *codebook
;
135 const uint16_t (*mean_vlc
)[2];
136 const uint8_t (*multistage_vlc
)[2];
139 //FIXME optimize, this doenst need to be done multiple times
141 codebook_sum
= svq1_intra_codebook_sum
[level
];
142 codebook
= ff_svq1_intra_codebooks
[level
];
143 mean_vlc
= ff_svq1_intra_mean_vlc
;
144 multistage_vlc
= ff_svq1_intra_multistage_vlc
[level
];
147 int v
= src
[x
+ y
*stride
];
148 block
[0][x
+ w
*y
]= v
;
154 codebook_sum
= svq1_inter_codebook_sum
[level
];
155 codebook
= ff_svq1_inter_codebooks
[level
];
156 mean_vlc
= ff_svq1_inter_mean_vlc
+ 256;
157 multistage_vlc
= ff_svq1_inter_multistage_vlc
[level
];
160 int v
= src
[x
+ y
*stride
] - ref
[x
+ y
*stride
];
161 block
[0][x
+ w
*y
]= v
;
169 best_score
-= ((block_sum
[0]*block_sum
[0])>>(level
+3));
170 best_mean
= (block_sum
[0] + (size
>>1)) >> (level
+3);
173 for(count
=1; count
<7; count
++){
174 int best_vector_score
= INT_MAX
;
175 int best_vector_sum
=-999, best_vector_mean
=-999;
176 const int stage
= count
-1;
177 const int8_t *vector
;
180 int sum
= codebook_sum
[stage
*16 + i
];
181 int sqr
, diff
, score
;
183 vector
= codebook
+ stage
*size
*16 + i
*size
;
184 sqr
= s
->dsp
.ssd_int8_vs_int16(vector
, block
[stage
], size
);
185 diff
= block_sum
[stage
] - sum
;
186 score
= sqr
- ((diff
*(int64_t)diff
)>>(level
+3)); //FIXME 64bit slooow
187 if(score
< best_vector_score
){
188 int mean
= (diff
+ (size
>>1)) >> (level
+3);
189 assert(mean
>-300 && mean
<300);
190 mean
= av_clip(mean
, intra
?0:-256, 255);
191 best_vector_score
= score
;
192 best_vector
[stage
]= i
;
193 best_vector_sum
= sum
;
194 best_vector_mean
= mean
;
197 assert(best_vector_mean
!= -999);
198 vector
= codebook
+ stage
*size
*16 + best_vector
[stage
]*size
;
199 for(j
=0; j
<size
; j
++){
200 block
[stage
+1][j
] = block
[stage
][j
] - vector
[j
];
202 block_sum
[stage
+1]= block_sum
[stage
] - best_vector_sum
;
204 lambda
*(+ 1 + 4*count
205 + multistage_vlc
[1+count
][1]
206 + mean_vlc
[best_vector_mean
][1]);
208 if(best_vector_score
< best_score
){
209 best_score
= best_vector_score
;
211 best_mean
= best_vector_mean
;
217 if(best_score
> threshold
&& level
){
219 int offset
= (level
&1) ? stride
*h
/2 : w
/2;
220 PutBitContext backup
[6];
222 for(i
=level
-1; i
>=0; i
--){
223 backup
[i
]= s
->reorder_pb
[i
];
225 score
+= encode_block(s
, src
, ref
, decoded
, stride
, level
-1, threshold
>>1, lambda
, intra
);
226 score
+= encode_block(s
, src
+ offset
, ref
+ offset
, decoded
+ offset
, stride
, level
-1, threshold
>>1, lambda
, intra
);
229 if(score
< best_score
){
233 for(i
=level
-1; i
>=0; i
--){
234 s
->reorder_pb
[i
]= backup
[i
];
239 put_bits(&s
->reorder_pb
[level
], 1, split
);
242 assert((best_mean
>= 0 && best_mean
<256) || !intra
);
243 assert(best_mean
>= -256 && best_mean
<256);
244 assert(best_count
>=0 && best_count
<7);
245 assert(level
<4 || best_count
==0);
247 /* output the encoding */
248 put_bits(&s
->reorder_pb
[level
],
249 multistage_vlc
[1 + best_count
][1],
250 multistage_vlc
[1 + best_count
][0]);
251 put_bits(&s
->reorder_pb
[level
], mean_vlc
[best_mean
][1],
252 mean_vlc
[best_mean
][0]);
254 for (i
= 0; i
< best_count
; i
++){
255 assert(best_vector
[i
]>=0 && best_vector
[i
]<16);
256 put_bits(&s
->reorder_pb
[level
], 4, best_vector
[i
]);
261 decoded
[x
+ y
*stride
]= src
[x
+ y
*stride
] - block
[best_count
][x
+ w
*y
] + best_mean
;
270 static int svq1_encode_plane(SVQ1Context
*s
, int plane
, unsigned char *src_plane
, unsigned char *ref_plane
, unsigned char *decoded_plane
,
271 int width
, int height
, int src_stride
, int stride
)
275 int block_width
, block_height
;
278 const int lambda
= (s
->picture
.quality
*s
->picture
.quality
) >> (2*FF_LAMBDA_SHIFT
);
280 /* figure out the acceptable level thresholds in advance */
281 threshold
[5] = QUALITY_THRESHOLD
;
282 for (level
= 4; level
>= 0; level
--)
283 threshold
[level
] = threshold
[level
+ 1] * THRESHOLD_MULTIPLIER
;
285 block_width
= (width
+ 15) / 16;
286 block_height
= (height
+ 15) / 16;
288 if(s
->picture
.pict_type
== FF_P_TYPE
){
289 s
->m
.avctx
= s
->avctx
;
290 s
->m
.current_picture_ptr
= &s
->m
.current_picture
;
291 s
->m
.last_picture_ptr
= &s
->m
.last_picture
;
292 s
->m
.last_picture
.data
[0]= ref_plane
;
294 s
->m
.last_picture
.linesize
[0]=
295 s
->m
.new_picture
.linesize
[0]=
296 s
->m
.current_picture
.linesize
[0]= stride
;
299 s
->m
.mb_width
= block_width
;
300 s
->m
.mb_height
= block_height
;
301 s
->m
.mb_stride
= s
->m
.mb_width
+1;
302 s
->m
.b8_stride
= 2*s
->m
.mb_width
+1;
304 s
->m
.pict_type
= s
->picture
.pict_type
;
305 s
->m
.me_method
= s
->avctx
->me_method
;
306 s
->m
.me
.scene_change_score
=0;
307 s
->m
.flags
= s
->avctx
->flags
;
308 // s->m.out_format = FMT_H263;
309 // s->m.unrestricted_mv= 1;
311 s
->m
.lambda
= s
->picture
.quality
;
312 s
->m
.qscale
= (s
->m
.lambda
*139 + FF_LAMBDA_SCALE
*64) >> (FF_LAMBDA_SHIFT
+ 7);
313 s
->m
.lambda2
= (s
->m
.lambda
*s
->m
.lambda
+ FF_LAMBDA_SCALE
/2) >> FF_LAMBDA_SHIFT
;
315 if(!s
->motion_val8
[plane
]){
316 s
->motion_val8
[plane
]= av_mallocz((s
->m
.b8_stride
*block_height
*2 + 2)*2*sizeof(int16_t));
317 s
->motion_val16
[plane
]= av_mallocz((s
->m
.mb_stride
*(block_height
+ 2) + 1)*2*sizeof(int16_t));
320 s
->m
.mb_type
= s
->mb_type
;
322 //dummies, to avoid segfaults
323 s
->m
.current_picture
.mb_mean
= (uint8_t *)s
->dummy
;
324 s
->m
.current_picture
.mb_var
= (uint16_t*)s
->dummy
;
325 s
->m
.current_picture
.mc_mb_var
= (uint16_t*)s
->dummy
;
326 s
->m
.current_picture
.mb_type
= s
->dummy
;
328 s
->m
.current_picture
.motion_val
[0]= s
->motion_val8
[plane
] + 2;
329 s
->m
.p_mv_table
= s
->motion_val16
[plane
] + s
->m
.mb_stride
+ 1;
330 s
->m
.dsp
= s
->dsp
; //move
333 s
->m
.me
.dia_size
= s
->avctx
->dia_size
;
334 s
->m
.first_slice_line
=1;
335 for (y
= 0; y
< block_height
; y
++) {
336 uint8_t src
[stride
*16];
338 s
->m
.new_picture
.data
[0]= src
- y
*16*stride
; //ugly
341 for(i
=0; i
<16 && i
+ 16*y
<height
; i
++){
342 memcpy(&src
[i
*stride
], &src_plane
[(i
+16*y
)*src_stride
], width
);
343 for(x
=width
; x
<16*block_width
; x
++)
344 src
[i
*stride
+x
]= src
[i
*stride
+x
-1];
346 for(; i
<16 && i
+ 16*y
<16*block_height
; i
++)
347 memcpy(&src
[i
*stride
], &src
[(i
-1)*stride
], 16*block_width
);
349 for (x
= 0; x
< block_width
; x
++) {
351 ff_init_block_index(&s
->m
);
352 ff_update_block_index(&s
->m
);
354 ff_estimate_p_frame_motion(&s
->m
, x
, y
);
356 s
->m
.first_slice_line
=0;
359 ff_fix_long_p_mvs(&s
->m
);
360 ff_fix_long_mvs(&s
->m
, NULL
, 0, s
->m
.p_mv_table
, s
->m
.f_code
, CANDIDATE_MB_TYPE_INTER
, 0);
363 s
->m
.first_slice_line
=1;
364 for (y
= 0; y
< block_height
; y
++) {
365 uint8_t src
[stride
*16];
367 for(i
=0; i
<16 && i
+ 16*y
<height
; i
++){
368 memcpy(&src
[i
*stride
], &src_plane
[(i
+16*y
)*src_stride
], width
);
369 for(x
=width
; x
<16*block_width
; x
++)
370 src
[i
*stride
+x
]= src
[i
*stride
+x
-1];
372 for(; i
<16 && i
+ 16*y
<16*block_height
; i
++)
373 memcpy(&src
[i
*stride
], &src
[(i
-1)*stride
], 16*block_width
);
376 for (x
= 0; x
< block_width
; x
++) {
377 uint8_t reorder_buffer
[3][6][7*32];
379 int offset
= y
* 16 * stride
+ x
* 16;
380 uint8_t *decoded
= decoded_plane
+ offset
;
381 uint8_t *ref
= ref_plane
+ offset
;
382 int score
[4]={0,0,0,0}, best
;
383 uint8_t *temp
= s
->scratchbuf
;
385 if(s
->pb
.buf_end
- s
->pb
.buf
- (put_bits_count(&s
->pb
)>>3) < 3000){ //FIXME check size
386 av_log(s
->avctx
, AV_LOG_ERROR
, "encoded frame too large\n");
391 ff_init_block_index(&s
->m
);
392 ff_update_block_index(&s
->m
);
394 if(s
->picture
.pict_type
== FF_I_TYPE
|| (s
->m
.mb_type
[x
+ y
*s
->m
.mb_stride
]&CANDIDATE_MB_TYPE_INTRA
)){
396 init_put_bits(&s
->reorder_pb
[i
], reorder_buffer
[0][i
], 7*32);
398 if(s
->picture
.pict_type
== FF_P_TYPE
){
399 const uint8_t *vlc
= ff_svq1_block_type_vlc
[SVQ1_BLOCK_INTRA
];
400 put_bits(&s
->reorder_pb
[5], vlc
[1], vlc
[0]);
401 score
[0]= vlc
[1]*lambda
;
403 score
[0]+= encode_block(s
, src
+16*x
, NULL
, temp
, stride
, 5, 64, lambda
, 1);
405 count
[0][i
]= put_bits_count(&s
->reorder_pb
[i
]);
406 flush_put_bits(&s
->reorder_pb
[i
]);
413 if(s
->picture
.pict_type
== FF_P_TYPE
){
414 const uint8_t *vlc
= ff_svq1_block_type_vlc
[SVQ1_BLOCK_INTER
];
415 int mx
, my
, pred_x
, pred_y
, dxy
;
418 motion_ptr
= h263_pred_motion(&s
->m
, 0, 0, &pred_x
, &pred_y
);
419 if(s
->m
.mb_type
[x
+ y
*s
->m
.mb_stride
]&CANDIDATE_MB_TYPE_INTER
){
421 init_put_bits(&s
->reorder_pb
[i
], reorder_buffer
[1][i
], 7*32);
423 put_bits(&s
->reorder_pb
[5], vlc
[1], vlc
[0]);
425 s
->m
.pb
= s
->reorder_pb
[5];
428 assert(mx
>=-32 && mx
<=31);
429 assert(my
>=-32 && my
<=31);
430 assert(pred_x
>=-32 && pred_x
<=31);
431 assert(pred_y
>=-32 && pred_y
<=31);
432 ff_h263_encode_motion(&s
->m
, mx
- pred_x
, 1);
433 ff_h263_encode_motion(&s
->m
, my
- pred_y
, 1);
434 s
->reorder_pb
[5]= s
->m
.pb
;
435 score
[1] += lambda
*put_bits_count(&s
->reorder_pb
[5]);
437 dxy
= (mx
&1) + 2*(my
&1);
439 s
->dsp
.put_pixels_tab
[0][dxy
](temp
+16, ref
+ (mx
>>1) + stride
*(my
>>1), stride
, 16);
441 score
[1]+= encode_block(s
, src
+16*x
, temp
+16, decoded
, stride
, 5, 64, lambda
, 0);
442 best
= score
[1] <= score
[0];
444 vlc
= ff_svq1_block_type_vlc
[SVQ1_BLOCK_SKIP
];
445 score
[2]= s
->dsp
.sse
[0](NULL
, src
+16*x
, ref
, stride
, 16);
446 score
[2]+= vlc
[1]*lambda
;
447 if(score
[2] < score
[best
] && mx
==0 && my
==0){
449 s
->dsp
.put_pixels_tab
[0][0](decoded
, ref
, stride
, 16);
453 put_bits(&s
->pb
, vlc
[1], vlc
[0]);
459 count
[1][i
]= put_bits_count(&s
->reorder_pb
[i
]);
460 flush_put_bits(&s
->reorder_pb
[i
]);
463 motion_ptr
[0 ] = motion_ptr
[1 ]=
464 motion_ptr
[2 ] = motion_ptr
[3 ]=
465 motion_ptr
[0+2*s
->m
.b8_stride
] = motion_ptr
[1+2*s
->m
.b8_stride
]=
466 motion_ptr
[2+2*s
->m
.b8_stride
] = motion_ptr
[3+2*s
->m
.b8_stride
]=0;
470 s
->rd_total
+= score
[best
];
473 ff_copy_bits(&s
->pb
, reorder_buffer
[best
][i
], count
[best
][i
]);
476 s
->dsp
.put_pixels_tab
[0][0](decoded
, temp
, stride
, 16);
479 s
->m
.first_slice_line
=0;
484 static av_cold
int svq1_encode_init(AVCodecContext
*avctx
)
486 SVQ1Context
* const s
= avctx
->priv_data
;
488 dsputil_init(&s
->dsp
, avctx
);
489 avctx
->coded_frame
= (AVFrame
*)&s
->picture
;
491 s
->frame_width
= avctx
->width
;
492 s
->frame_height
= avctx
->height
;
494 s
->y_block_width
= (s
->frame_width
+ 15) / 16;
495 s
->y_block_height
= (s
->frame_height
+ 15) / 16;
497 s
->c_block_width
= (s
->frame_width
/ 4 + 15) / 16;
498 s
->c_block_height
= (s
->frame_height
/ 4 + 15) / 16;
503 s
->m
.me
.scratchpad
= av_mallocz((avctx
->width
+64)*2*16*2*sizeof(uint8_t));
504 s
->m
.me
.map
= av_mallocz(ME_MAP_SIZE
*sizeof(uint32_t));
505 s
->m
.me
.score_map
= av_mallocz(ME_MAP_SIZE
*sizeof(uint32_t));
506 s
->mb_type
= av_mallocz((s
->y_block_width
+1)*s
->y_block_height
*sizeof(int16_t));
507 s
->dummy
= av_mallocz((s
->y_block_width
+1)*s
->y_block_height
*sizeof(int32_t));
508 h263_encode_init(&s
->m
); //mv_penalty
513 static int svq1_encode_frame(AVCodecContext
*avctx
, unsigned char *buf
,
514 int buf_size
, void *data
)
516 SVQ1Context
* const s
= avctx
->priv_data
;
517 AVFrame
*pict
= data
;
518 AVFrame
* const p
= (AVFrame
*)&s
->picture
;
522 if(avctx
->pix_fmt
!= PIX_FMT_YUV410P
){
523 av_log(avctx
, AV_LOG_ERROR
, "unsupported pixel format\n");
527 if(!s
->current_picture
.data
[0]){
528 avctx
->get_buffer(avctx
, &s
->current_picture
);
529 avctx
->get_buffer(avctx
, &s
->last_picture
);
530 s
->scratchbuf
= av_malloc(s
->current_picture
.linesize
[0] * 16);
533 temp
= s
->current_picture
;
534 s
->current_picture
= s
->last_picture
;
535 s
->last_picture
= temp
;
537 init_put_bits(&s
->pb
, buf
, buf_size
);
540 p
->pict_type
= avctx
->gop_size
&& avctx
->frame_number
% avctx
->gop_size
? FF_P_TYPE
: FF_I_TYPE
;
541 p
->key_frame
= p
->pict_type
== FF_I_TYPE
;
543 svq1_write_header(s
, p
->pict_type
);
545 if(svq1_encode_plane(s
, i
,
546 s
->picture
.data
[i
], s
->last_picture
.data
[i
], s
->current_picture
.data
[i
],
547 s
->frame_width
/ (i
?4:1), s
->frame_height
/ (i
?4:1),
548 s
->picture
.linesize
[i
], s
->current_picture
.linesize
[i
]) < 0)
552 // align_put_bits(&s->pb);
553 while(put_bits_count(&s
->pb
) & 31)
554 put_bits(&s
->pb
, 1, 0);
556 flush_put_bits(&s
->pb
);
558 return put_bits_count(&s
->pb
) / 8;
561 static av_cold
int svq1_encode_end(AVCodecContext
*avctx
)
563 SVQ1Context
* const s
= avctx
->priv_data
;
566 av_log(avctx
, AV_LOG_DEBUG
, "RD: %f\n", s
->rd_total
/(double)(avctx
->width
*avctx
->height
*avctx
->frame_number
));
568 av_freep(&s
->m
.me
.scratchpad
);
569 av_freep(&s
->m
.me
.map
);
570 av_freep(&s
->m
.me
.score_map
);
571 av_freep(&s
->mb_type
);
573 av_freep(&s
->scratchbuf
);
576 av_freep(&s
->motion_val8
[i
]);
577 av_freep(&s
->motion_val16
[i
]);
584 AVCodec svq1_encoder
= {
592 .pix_fmts
= (const enum PixelFormat
[]){PIX_FMT_YUV410P
, PIX_FMT_NONE
},
593 .long_name
= NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1"),