3 * Copyright (C) 2004 Mike Melanson <melanson@pcisys.net>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Sorenson Vector Quantizer #1 (SVQ1) video codec.
25 * For more information of the SVQ1 algorithm, visit:
26 * http://www.pcisys.net/~melanson/codecs/
32 #include "mpegvideo.h"
35 #include "svq1enc_cb.h"
41 typedef struct SVQ1Context
{
42 MpegEncContext m
; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
43 AVCodecContext
*avctx
;
46 AVFrame current_picture
;
51 PutBitContext reorder_pb
[6]; //why ooh why this sick breadth first order, everything is slower and more complex
56 /* Y plane block dimensions */
60 /* U & V plane (C planes) block dimensions */
66 int16_t (*motion_val8
[3])[2];
67 int16_t (*motion_val16
[3])[2];
72 static void svq1_write_header(SVQ1Context
*s
, int frame_type
)
77 put_bits(&s
->pb
, 22, 0x20);
79 /* temporal reference (sure hope this is a "don't care") */
80 put_bits(&s
->pb
, 8, 0x00);
83 put_bits(&s
->pb
, 2, frame_type
- 1);
85 if (frame_type
== FF_I_TYPE
) {
87 /* no checksum since frame code is 0x20 */
89 /* no embedded string either */
91 /* output 5 unknown bits (2 + 2 + 1) */
92 put_bits(&s
->pb
, 5, 2); /* 2 needed by quicktime decoder */
94 for (i
= 0; i
< 7; i
++)
96 if ((ff_svq1_frame_size_table
[i
].width
== s
->frame_width
) &&
97 (ff_svq1_frame_size_table
[i
].height
== s
->frame_height
))
99 put_bits(&s
->pb
, 3, i
);
106 put_bits(&s
->pb
, 3, 7);
107 put_bits(&s
->pb
, 12, s
->frame_width
);
108 put_bits(&s
->pb
, 12, s
->frame_height
);
112 /* no checksum or extra data (next 2 bits get 0) */
113 put_bits(&s
->pb
, 2, 0);
117 #define QUALITY_THRESHOLD 100
118 #define THRESHOLD_MULTIPLIER 0.6
120 #if defined(HAVE_ALTIVEC)
124 static int encode_block(SVQ1Context
*s
, uint8_t *src
, uint8_t *ref
, uint8_t *decoded
, int stride
, int level
, int threshold
, int lambda
, int intra
){
125 int count
, y
, x
, i
, j
, split
, best_mean
, best_score
, best_count
;
127 int block_sum
[7]= {0, 0, 0, 0, 0, 0};
128 int w
= 2<<((level
+2)>>1);
129 int h
= 2<<((level
+1)>>1);
131 int16_t block
[7][256];
132 const int8_t *codebook_sum
, *codebook
;
133 const uint16_t (*mean_vlc
)[2];
134 const uint8_t (*multistage_vlc
)[2];
137 //FIXME optimize, this doenst need to be done multiple times
139 codebook_sum
= svq1_intra_codebook_sum
[level
];
140 codebook
= ff_svq1_intra_codebooks
[level
];
141 mean_vlc
= ff_svq1_intra_mean_vlc
;
142 multistage_vlc
= ff_svq1_intra_multistage_vlc
[level
];
145 int v
= src
[x
+ y
*stride
];
146 block
[0][x
+ w
*y
]= v
;
152 codebook_sum
= svq1_inter_codebook_sum
[level
];
153 codebook
= ff_svq1_inter_codebooks
[level
];
154 mean_vlc
= ff_svq1_inter_mean_vlc
+ 256;
155 multistage_vlc
= ff_svq1_inter_multistage_vlc
[level
];
158 int v
= src
[x
+ y
*stride
] - ref
[x
+ y
*stride
];
159 block
[0][x
+ w
*y
]= v
;
167 best_score
-= ((block_sum
[0]*block_sum
[0])>>(level
+3));
168 best_mean
= (block_sum
[0] + (size
>>1)) >> (level
+3);
171 for(count
=1; count
<7; count
++){
172 int best_vector_score
= INT_MAX
;
173 int best_vector_sum
=-999, best_vector_mean
=-999;
174 const int stage
= count
-1;
175 const int8_t *vector
;
178 int sum
= codebook_sum
[stage
*16 + i
];
179 int sqr
, diff
, score
;
181 vector
= codebook
+ stage
*size
*16 + i
*size
;
182 sqr
= s
->dsp
.ssd_int8_vs_int16(vector
, block
[stage
], size
);
183 diff
= block_sum
[stage
] - sum
;
184 score
= sqr
- ((diff
*(int64_t)diff
)>>(level
+3)); //FIXME 64bit slooow
185 if(score
< best_vector_score
){
186 int mean
= (diff
+ (size
>>1)) >> (level
+3);
187 assert(mean
>-300 && mean
<300);
188 mean
= av_clip(mean
, intra
?0:-256, 255);
189 best_vector_score
= score
;
190 best_vector
[stage
]= i
;
191 best_vector_sum
= sum
;
192 best_vector_mean
= mean
;
195 assert(best_vector_mean
!= -999);
196 vector
= codebook
+ stage
*size
*16 + best_vector
[stage
]*size
;
197 for(j
=0; j
<size
; j
++){
198 block
[stage
+1][j
] = block
[stage
][j
] - vector
[j
];
200 block_sum
[stage
+1]= block_sum
[stage
] - best_vector_sum
;
202 lambda
*(+ 1 + 4*count
203 + multistage_vlc
[1+count
][1]
204 + mean_vlc
[best_vector_mean
][1]);
206 if(best_vector_score
< best_score
){
207 best_score
= best_vector_score
;
209 best_mean
= best_vector_mean
;
215 if(best_score
> threshold
&& level
){
217 int offset
= (level
&1) ? stride
*h
/2 : w
/2;
218 PutBitContext backup
[6];
220 for(i
=level
-1; i
>=0; i
--){
221 backup
[i
]= s
->reorder_pb
[i
];
223 score
+= encode_block(s
, src
, ref
, decoded
, stride
, level
-1, threshold
>>1, lambda
, intra
);
224 score
+= encode_block(s
, src
+ offset
, ref
+ offset
, decoded
+ offset
, stride
, level
-1, threshold
>>1, lambda
, intra
);
227 if(score
< best_score
){
231 for(i
=level
-1; i
>=0; i
--){
232 s
->reorder_pb
[i
]= backup
[i
];
237 put_bits(&s
->reorder_pb
[level
], 1, split
);
240 assert((best_mean
>= 0 && best_mean
<256) || !intra
);
241 assert(best_mean
>= -256 && best_mean
<256);
242 assert(best_count
>=0 && best_count
<7);
243 assert(level
<4 || best_count
==0);
245 /* output the encoding */
246 put_bits(&s
->reorder_pb
[level
],
247 multistage_vlc
[1 + best_count
][1],
248 multistage_vlc
[1 + best_count
][0]);
249 put_bits(&s
->reorder_pb
[level
], mean_vlc
[best_mean
][1],
250 mean_vlc
[best_mean
][0]);
252 for (i
= 0; i
< best_count
; i
++){
253 assert(best_vector
[i
]>=0 && best_vector
[i
]<16);
254 put_bits(&s
->reorder_pb
[level
], 4, best_vector
[i
]);
259 decoded
[x
+ y
*stride
]= src
[x
+ y
*stride
] - block
[best_count
][x
+ w
*y
] + best_mean
;
268 static int svq1_encode_plane(SVQ1Context
*s
, int plane
, unsigned char *src_plane
, unsigned char *ref_plane
, unsigned char *decoded_plane
,
269 int width
, int height
, int src_stride
, int stride
)
273 int block_width
, block_height
;
276 const int lambda
= (s
->picture
.quality
*s
->picture
.quality
) >> (2*FF_LAMBDA_SHIFT
);
278 /* figure out the acceptable level thresholds in advance */
279 threshold
[5] = QUALITY_THRESHOLD
;
280 for (level
= 4; level
>= 0; level
--)
281 threshold
[level
] = threshold
[level
+ 1] * THRESHOLD_MULTIPLIER
;
283 block_width
= (width
+ 15) / 16;
284 block_height
= (height
+ 15) / 16;
286 if(s
->picture
.pict_type
== FF_P_TYPE
){
287 s
->m
.avctx
= s
->avctx
;
288 s
->m
.current_picture_ptr
= &s
->m
.current_picture
;
289 s
->m
.last_picture_ptr
= &s
->m
.last_picture
;
290 s
->m
.last_picture
.data
[0]= ref_plane
;
292 s
->m
.last_picture
.linesize
[0]=
293 s
->m
.new_picture
.linesize
[0]=
294 s
->m
.current_picture
.linesize
[0]= stride
;
297 s
->m
.mb_width
= block_width
;
298 s
->m
.mb_height
= block_height
;
299 s
->m
.mb_stride
= s
->m
.mb_width
+1;
300 s
->m
.b8_stride
= 2*s
->m
.mb_width
+1;
302 s
->m
.pict_type
= s
->picture
.pict_type
;
303 s
->m
.me_method
= s
->avctx
->me_method
;
304 s
->m
.me
.scene_change_score
=0;
305 s
->m
.flags
= s
->avctx
->flags
;
306 // s->m.out_format = FMT_H263;
307 // s->m.unrestricted_mv= 1;
309 s
->m
.lambda
= s
->picture
.quality
;
310 s
->m
.qscale
= (s
->m
.lambda
*139 + FF_LAMBDA_SCALE
*64) >> (FF_LAMBDA_SHIFT
+ 7);
311 s
->m
.lambda2
= (s
->m
.lambda
*s
->m
.lambda
+ FF_LAMBDA_SCALE
/2) >> FF_LAMBDA_SHIFT
;
313 if(!s
->motion_val8
[plane
]){
314 s
->motion_val8
[plane
]= av_mallocz((s
->m
.b8_stride
*block_height
*2 + 2)*2*sizeof(int16_t));
315 s
->motion_val16
[plane
]= av_mallocz((s
->m
.mb_stride
*(block_height
+ 2) + 1)*2*sizeof(int16_t));
318 s
->m
.mb_type
= s
->mb_type
;
320 //dummies, to avoid segfaults
321 s
->m
.current_picture
.mb_mean
= (uint8_t *)s
->dummy
;
322 s
->m
.current_picture
.mb_var
= (uint16_t*)s
->dummy
;
323 s
->m
.current_picture
.mc_mb_var
= (uint16_t*)s
->dummy
;
324 s
->m
.current_picture
.mb_type
= s
->dummy
;
326 s
->m
.current_picture
.motion_val
[0]= s
->motion_val8
[plane
] + 2;
327 s
->m
.p_mv_table
= s
->motion_val16
[plane
] + s
->m
.mb_stride
+ 1;
328 s
->m
.dsp
= s
->dsp
; //move
331 s
->m
.me
.dia_size
= s
->avctx
->dia_size
;
332 s
->m
.first_slice_line
=1;
333 for (y
= 0; y
< block_height
; y
++) {
334 uint8_t src
[stride
*16];
336 s
->m
.new_picture
.data
[0]= src
- y
*16*stride
; //ugly
339 for(i
=0; i
<16 && i
+ 16*y
<height
; i
++){
340 memcpy(&src
[i
*stride
], &src_plane
[(i
+16*y
)*src_stride
], width
);
341 for(x
=width
; x
<16*block_width
; x
++)
342 src
[i
*stride
+x
]= src
[i
*stride
+x
-1];
344 for(; i
<16 && i
+ 16*y
<16*block_height
; i
++)
345 memcpy(&src
[i
*stride
], &src
[(i
-1)*stride
], 16*block_width
);
347 for (x
= 0; x
< block_width
; x
++) {
349 ff_init_block_index(&s
->m
);
350 ff_update_block_index(&s
->m
);
352 ff_estimate_p_frame_motion(&s
->m
, x
, y
);
354 s
->m
.first_slice_line
=0;
357 ff_fix_long_p_mvs(&s
->m
);
358 ff_fix_long_mvs(&s
->m
, NULL
, 0, s
->m
.p_mv_table
, s
->m
.f_code
, CANDIDATE_MB_TYPE_INTER
, 0);
361 s
->m
.first_slice_line
=1;
362 for (y
= 0; y
< block_height
; y
++) {
363 uint8_t src
[stride
*16];
365 for(i
=0; i
<16 && i
+ 16*y
<height
; i
++){
366 memcpy(&src
[i
*stride
], &src_plane
[(i
+16*y
)*src_stride
], width
);
367 for(x
=width
; x
<16*block_width
; x
++)
368 src
[i
*stride
+x
]= src
[i
*stride
+x
-1];
370 for(; i
<16 && i
+ 16*y
<16*block_height
; i
++)
371 memcpy(&src
[i
*stride
], &src
[(i
-1)*stride
], 16*block_width
);
374 for (x
= 0; x
< block_width
; x
++) {
375 uint8_t reorder_buffer
[3][6][7*32];
377 int offset
= y
* 16 * stride
+ x
* 16;
378 uint8_t *decoded
= decoded_plane
+ offset
;
379 uint8_t *ref
= ref_plane
+ offset
;
380 int score
[4]={0,0,0,0}, best
;
381 uint8_t temp
[16*stride
];
383 if(s
->pb
.buf_end
- s
->pb
.buf
- (put_bits_count(&s
->pb
)>>3) < 3000){ //FIXME check size
384 av_log(s
->avctx
, AV_LOG_ERROR
, "encoded frame too large\n");
389 ff_init_block_index(&s
->m
);
390 ff_update_block_index(&s
->m
);
392 if(s
->picture
.pict_type
== FF_I_TYPE
|| (s
->m
.mb_type
[x
+ y
*s
->m
.mb_stride
]&CANDIDATE_MB_TYPE_INTRA
)){
394 init_put_bits(&s
->reorder_pb
[i
], reorder_buffer
[0][i
], 7*32);
396 if(s
->picture
.pict_type
== FF_P_TYPE
){
397 const uint8_t *vlc
= ff_svq1_block_type_vlc
[SVQ1_BLOCK_INTRA
];
398 put_bits(&s
->reorder_pb
[5], vlc
[1], vlc
[0]);
399 score
[0]= vlc
[1]*lambda
;
401 score
[0]+= encode_block(s
, src
+16*x
, NULL
, temp
, stride
, 5, 64, lambda
, 1);
403 count
[0][i
]= put_bits_count(&s
->reorder_pb
[i
]);
404 flush_put_bits(&s
->reorder_pb
[i
]);
411 if(s
->picture
.pict_type
== FF_P_TYPE
){
412 const uint8_t *vlc
= ff_svq1_block_type_vlc
[SVQ1_BLOCK_INTER
];
413 int mx
, my
, pred_x
, pred_y
, dxy
;
416 motion_ptr
= h263_pred_motion(&s
->m
, 0, 0, &pred_x
, &pred_y
);
417 if(s
->m
.mb_type
[x
+ y
*s
->m
.mb_stride
]&CANDIDATE_MB_TYPE_INTER
){
419 init_put_bits(&s
->reorder_pb
[i
], reorder_buffer
[1][i
], 7*32);
421 put_bits(&s
->reorder_pb
[5], vlc
[1], vlc
[0]);
423 s
->m
.pb
= s
->reorder_pb
[5];
426 assert(mx
>=-32 && mx
<=31);
427 assert(my
>=-32 && my
<=31);
428 assert(pred_x
>=-32 && pred_x
<=31);
429 assert(pred_y
>=-32 && pred_y
<=31);
430 ff_h263_encode_motion(&s
->m
, mx
- pred_x
, 1);
431 ff_h263_encode_motion(&s
->m
, my
- pred_y
, 1);
432 s
->reorder_pb
[5]= s
->m
.pb
;
433 score
[1] += lambda
*put_bits_count(&s
->reorder_pb
[5]);
435 dxy
= (mx
&1) + 2*(my
&1);
437 s
->dsp
.put_pixels_tab
[0][dxy
](temp
+16, ref
+ (mx
>>1) + stride
*(my
>>1), stride
, 16);
439 score
[1]+= encode_block(s
, src
+16*x
, temp
+16, decoded
, stride
, 5, 64, lambda
, 0);
440 best
= score
[1] <= score
[0];
442 vlc
= ff_svq1_block_type_vlc
[SVQ1_BLOCK_SKIP
];
443 score
[2]= s
->dsp
.sse
[0](NULL
, src
+16*x
, ref
, stride
, 16);
444 score
[2]+= vlc
[1]*lambda
;
445 if(score
[2] < score
[best
] && mx
==0 && my
==0){
447 s
->dsp
.put_pixels_tab
[0][0](decoded
, ref
, stride
, 16);
451 put_bits(&s
->pb
, vlc
[1], vlc
[0]);
457 count
[1][i
]= put_bits_count(&s
->reorder_pb
[i
]);
458 flush_put_bits(&s
->reorder_pb
[i
]);
461 motion_ptr
[0 ] = motion_ptr
[1 ]=
462 motion_ptr
[2 ] = motion_ptr
[3 ]=
463 motion_ptr
[0+2*s
->m
.b8_stride
] = motion_ptr
[1+2*s
->m
.b8_stride
]=
464 motion_ptr
[2+2*s
->m
.b8_stride
] = motion_ptr
[3+2*s
->m
.b8_stride
]=0;
468 s
->rd_total
+= score
[best
];
471 ff_copy_bits(&s
->pb
, reorder_buffer
[best
][i
], count
[best
][i
]);
474 s
->dsp
.put_pixels_tab
[0][0](decoded
, temp
, stride
, 16);
477 s
->m
.first_slice_line
=0;
482 static av_cold
int svq1_encode_init(AVCodecContext
*avctx
)
484 SVQ1Context
* const s
= avctx
->priv_data
;
486 dsputil_init(&s
->dsp
, avctx
);
487 avctx
->coded_frame
= (AVFrame
*)&s
->picture
;
489 s
->frame_width
= avctx
->width
;
490 s
->frame_height
= avctx
->height
;
492 s
->y_block_width
= (s
->frame_width
+ 15) / 16;
493 s
->y_block_height
= (s
->frame_height
+ 15) / 16;
495 s
->c_block_width
= (s
->frame_width
/ 4 + 15) / 16;
496 s
->c_block_height
= (s
->frame_height
/ 4 + 15) / 16;
500 s
->m
.me
.scratchpad
= av_mallocz((avctx
->width
+64)*2*16*2*sizeof(uint8_t));
501 s
->m
.me
.map
= av_mallocz(ME_MAP_SIZE
*sizeof(uint32_t));
502 s
->m
.me
.score_map
= av_mallocz(ME_MAP_SIZE
*sizeof(uint32_t));
503 s
->mb_type
= av_mallocz((s
->y_block_width
+1)*s
->y_block_height
*sizeof(int16_t));
504 s
->dummy
= av_mallocz((s
->y_block_width
+1)*s
->y_block_height
*sizeof(int32_t));
505 h263_encode_init(&s
->m
); //mv_penalty
510 static int svq1_encode_frame(AVCodecContext
*avctx
, unsigned char *buf
,
511 int buf_size
, void *data
)
513 SVQ1Context
* const s
= avctx
->priv_data
;
514 AVFrame
*pict
= data
;
515 AVFrame
* const p
= (AVFrame
*)&s
->picture
;
519 if(avctx
->pix_fmt
!= PIX_FMT_YUV410P
){
520 av_log(avctx
, AV_LOG_ERROR
, "unsupported pixel format\n");
524 if(!s
->current_picture
.data
[0]){
525 avctx
->get_buffer(avctx
, &s
->current_picture
);
526 avctx
->get_buffer(avctx
, &s
->last_picture
);
529 temp
= s
->current_picture
;
530 s
->current_picture
= s
->last_picture
;
531 s
->last_picture
= temp
;
533 init_put_bits(&s
->pb
, buf
, buf_size
);
536 p
->pict_type
= avctx
->gop_size
&& avctx
->frame_number
% avctx
->gop_size
? FF_P_TYPE
: FF_I_TYPE
;
537 p
->key_frame
= p
->pict_type
== FF_I_TYPE
;
539 svq1_write_header(s
, p
->pict_type
);
541 if(svq1_encode_plane(s
, i
,
542 s
->picture
.data
[i
], s
->last_picture
.data
[i
], s
->current_picture
.data
[i
],
543 s
->frame_width
/ (i
?4:1), s
->frame_height
/ (i
?4:1),
544 s
->picture
.linesize
[i
], s
->current_picture
.linesize
[i
]) < 0)
548 // align_put_bits(&s->pb);
549 while(put_bits_count(&s
->pb
) & 31)
550 put_bits(&s
->pb
, 1, 0);
552 flush_put_bits(&s
->pb
);
554 return put_bits_count(&s
->pb
) / 8;
557 static av_cold
int svq1_encode_end(AVCodecContext
*avctx
)
559 SVQ1Context
* const s
= avctx
->priv_data
;
562 av_log(avctx
, AV_LOG_DEBUG
, "RD: %f\n", s
->rd_total
/(double)(avctx
->width
*avctx
->height
*avctx
->frame_number
));
564 av_freep(&s
->m
.me
.scratchpad
);
565 av_freep(&s
->m
.me
.map
);
566 av_freep(&s
->m
.me
.score_map
);
567 av_freep(&s
->mb_type
);
571 av_freep(&s
->motion_val8
[i
]);
572 av_freep(&s
->motion_val16
[i
]);
579 AVCodec svq1_encoder
= {
587 .pix_fmts
= (enum PixelFormat
[]){PIX_FMT_YUV410P
, PIX_FMT_NONE
},
588 .long_name
= NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1"),