quicktime/ffmpeg/libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  21  */
  22
  23 /**
  24  * @file mpegvideo.c
  25  * The simplest mpeg encoder (well, it was the simplest!).
  26  */
  27
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "mpegvideo.h"
  31 #include "faandct.h"
  32 #include <limits.h>
  33
  34 #ifdef USE_FASTMEMCPY
  35 #include "fastmemcpy.h"
  36 #endif
  37
  38 //#undef NDEBUG
  39 //#include <assert.h>
  40
  41 #ifdef CONFIG_ENCODERS
  42 static void encode_picture(MpegEncContext *s, int picture_number);
  43 #endif //CONFIG_ENCODERS
  44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  53                                   DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  57 #ifdef CONFIG_ENCODERS
  58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  61 static int sse_mb(MpegEncContext *s);
  62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  63 #endif //CONFIG_ENCODERS
  64
  65 #ifdef HAVE_XVMC
  66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  67 extern void XVMC_field_end(MpegEncContext *s);
  68 extern void XVMC_decode_mb(MpegEncContext *s);
  69 #endif
  70
  71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  72
  73
  74 /* enable all paranoid tests for rounding, overflows, etc... */
  75 //#define PARANOID
  76
  77 //#define DEBUG
  78
  79
  80 /* for jpeg fast DCT */
  81 #define CONST_BITS 14
  82
  83 static const uint16_t aanscales[64] = {
  84     /* precomputed values scaled up by 14 bits */
  85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  93 };
  94
  95 static const uint8_t h263_chroma_roundtab[16] = {
  96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  98 };
  99
 100 static const uint8_t ff_default_chroma_qscale_table[32]={
 101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 103 };
 104
 105 #ifdef CONFIG_ENCODERS
 106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 107 static uint8_t default_fcode_tab[MAX_MV*2+1];
 108
 109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 110
 111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 113 {
 114     int qscale;
 115     int shift=0;
 116
 117     for(qscale=qmin; qscale<=qmax; qscale++){
 118         int i;
 119         if (dsp->fdct == ff_jpeg_fdct_islow
 120 #ifdef FAAN_POSTSCALE
 121             || dsp->fdct == ff_faandct
 122 #endif
 123             ) {
 124             for(i=0;i<64;i++) {
 125                 const int j= dsp->idct_permutation[i];
 126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 130
 131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 132                                 (qscale * quant_matrix[j]));
 133             }
 134         } else if (dsp->fdct == fdct_ifast
 135 #ifndef FAAN_POSTSCALE
 136                    || dsp->fdct == ff_faandct
 137 #endif
 138                    ) {
 139             for(i=0;i<64;i++) {
 140                 const int j= dsp->idct_permutation[i];
 141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 145
 146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 147                                 (aanscales[i] * qscale * quant_matrix[j]));
 148             }
 149         } else {
 150             for(i=0;i<64;i++) {
 151                 const int j= dsp->idct_permutation[i];
 152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 153                    So 16           <= qscale * quant_matrix[i]             <= 7905
 154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 156                 */
 157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 160
 161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 163             }
 164         }
 165
 166         for(i=intra; i<64; i++){
 167             int64_t max= 8191;
 168             if (dsp->fdct == fdct_ifast
 169 #ifndef FAAN_POSTSCALE
 170                    || dsp->fdct == ff_faandct
 171 #endif
 172                    ) {
 173                 max= (8191LL*aanscales[i]) >> 14;
 174             }
 175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 176                 shift++;
 177             }
 178         }
 179     }
 180     if(shift){
 181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
 182     }
 183 }
 184
 185 static inline void update_qscale(MpegEncContext *s){
 186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 188
 189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 190 }
 191 #endif //CONFIG_ENCODERS
 192
 193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 194     int i;
 195     int end;
 196
 197     st->scantable= src_scantable;
 198
 199     for(i=0; i<64; i++){
 200         int j;
 201         j = src_scantable[i];
 202         st->permutated[i] = permutation[j];
 203 #ifdef ARCH_POWERPC
 204         st->inverse[j] = i;
 205 #endif
 206     }
 207
 208     end=-1;
 209     for(i=0; i<64; i++){
 210         int j;
 211         j = st->permutated[i];
 212         if(j>end) end=j;
 213         st->raster_end[i]= end;
 214     }
 215 }
 216
 217 #ifdef CONFIG_ENCODERS
 218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 219     int i;
 220
 221     if(matrix){
 222         put_bits(pb, 1, 1);
 223         for(i=0;i<64;i++) {
 224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 225         }
 226     }else
 227         put_bits(pb, 1, 0);
 228 }
 229 #endif //CONFIG_ENCODERS
 230
 231 /* init common dct for both encoder and decoder */
 232 int DCT_common_init(MpegEncContext *s)
 233 {
 234     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 235     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 236     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 237     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 238     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 239     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 240
 241 #ifdef CONFIG_ENCODERS
 242     s->dct_quantize= dct_quantize_c;
 243     s->denoise_dct= denoise_dct_c;
 244 #endif //CONFIG_ENCODERS
 245
 246 #ifdef HAVE_MMX
 247     MPV_common_init_mmx(s);
 248 #endif
 249 #ifdef ARCH_ALPHA
 250     MPV_common_init_axp(s);
 251 #endif
 252 #ifdef HAVE_MLIB
 253     MPV_common_init_mlib(s);
 254 #endif
 255 #ifdef HAVE_MMI
 256     MPV_common_init_mmi(s);
 257 #endif
 258 #ifdef ARCH_ARMV4L
 259     MPV_common_init_armv4l(s);
 260 #endif
 261 #ifdef ARCH_POWERPC
 262     MPV_common_init_ppc(s);
 263 #endif
 264
 265 #ifdef CONFIG_ENCODERS
 266     s->fast_dct_quantize= s->dct_quantize;
 267
 268     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 269         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 270     }
 271
 272 #endif //CONFIG_ENCODERS
 273
 274     /* load & permutate scantables
 275        note: only wmv uses different ones
 276     */
 277     if(s->alternate_scan){
 278         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 279         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 280     }else{
 281         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 282         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 283     }
 284     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 285     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 286
 287     return 0;
 288 }
 289
 290 static void copy_picture(Picture *dst, Picture *src){
 291     *dst = *src;
 292     dst->type= FF_BUFFER_TYPE_COPY;
 293 }
 294
 295 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 296     int i;
 297
 298     dst->pict_type              = src->pict_type;
 299     dst->quality                = src->quality;
 300     dst->coded_picture_number   = src->coded_picture_number;
 301     dst->display_picture_number = src->display_picture_number;
 302 //    dst->reference              = src->reference;
 303     dst->pts                    = src->pts;
 304     dst->interlaced_frame       = src->interlaced_frame;
 305     dst->top_field_first        = src->top_field_first;
 306
 307     if(s->avctx->me_threshold){
 308         if(!src->motion_val[0])
 309             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 310         if(!src->mb_type)
 311             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 312         if(!src->ref_index[0])
 313             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 314         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 315             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 316             src->motion_subsample_log2, dst->motion_subsample_log2);
 317
 318         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 319
 320         for(i=0; i<2; i++){
 321             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 322             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 323
 324             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 325                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 326             }
 327             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 328                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 329             }
 330         }
 331     }
 332 }
 333
 334 /**
 335  * allocates a Picture
 336  * The pixels are allocated/set by calling get_buffer() if shared=0
 337  */
 338 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 339     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 340     const int mb_array_size= s->mb_stride*s->mb_height;
 341     const int b8_array_size= s->b8_stride*s->mb_height*2;
 342     const int b4_array_size= s->b4_stride*s->mb_height*4;
 343     int i;
 344
 345     if(shared){
 346         assert(pic->data[0]);
 347         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 348         pic->type= FF_BUFFER_TYPE_SHARED;
 349     }else{
 350         int r;
 351
 352         assert(!pic->data[0]);
 353
 354         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 355
 356         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 357             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 358             return -1;
 359         }
 360
 361         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 362             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 363             return -1;
 364         }
 365
 366         if(pic->linesize[1] != pic->linesize[2]){
 367             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 368             return -1;
 369         }
 370
 371         s->linesize  = pic->linesize[0];
 372         s->uvlinesize= pic->linesize[1];
 373     }
 374
 375     if(pic->qscale_table==NULL){
 376         if (s->encoding) {
 377             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 378             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 379             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 380         }
 381
 382         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 383         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 384         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 385         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 386         if(s->out_format == FMT_H264){
 387             for(i=0; i<2; i++){
 388                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 389                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 390                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 391             }
 392             pic->motion_subsample_log2= 2;
 393         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 394             for(i=0; i<2; i++){
 395                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 396                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 397                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 398             }
 399             pic->motion_subsample_log2= 3;
 400         }
 401         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 402             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 403         }
 404         pic->qstride= s->mb_stride;
 405         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 406     }
 407
 408     //it might be nicer if the application would keep track of these but it would require a API change
 409     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 410     s->prev_pict_types[0]= s->pict_type;
 411     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 412         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 413
 414     return 0;
 415 fail: //for the CHECKED_ALLOCZ macro
 416     return -1;
 417 }
 418
 419 /**
 420  * deallocates a picture
 421  */
 422 static void free_picture(MpegEncContext *s, Picture *pic){
 423     int i;
 424
 425     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 426         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 427     }
 428
 429     av_freep(&pic->mb_var);
 430     av_freep(&pic->mc_mb_var);
 431     av_freep(&pic->mb_mean);
 432     av_freep(&pic->mbskip_table);
 433     av_freep(&pic->qscale_table);
 434     av_freep(&pic->mb_type_base);
 435     av_freep(&pic->dct_coeff);
 436     av_freep(&pic->pan_scan);
 437     pic->mb_type= NULL;
 438     for(i=0; i<2; i++){
 439         av_freep(&pic->motion_val_base[i]);
 440         av_freep(&pic->ref_index[i]);
 441     }
 442
 443     if(pic->type == FF_BUFFER_TYPE_SHARED){
 444         for(i=0; i<4; i++){
 445             pic->base[i]=
 446             pic->data[i]= NULL;
 447         }
 448         pic->type= 0;
 449     }
 450 }
 451
 452 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 453     int i;
 454
 455     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 456     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 457     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 458
 459      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 460     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 461     s->rd_scratchpad=   s->me.scratchpad;
 462     s->b_scratchpad=    s->me.scratchpad;
 463     s->obmc_scratchpad= s->me.scratchpad + 16;
 464     if (s->encoding) {
 465         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 466         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 467         if(s->avctx->noise_reduction){
 468             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 469         }
 470     }
 471     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 472     s->block= s->blocks[0];
 473
 474     for(i=0;i<12;i++){
 475         s->pblocks[i] = (short *)(&s->block[i]);
 476     }
 477     return 0;
 478 fail:
 479     return -1; //free() through MPV_common_end()
 480 }
 481
 482 static void free_duplicate_context(MpegEncContext *s){
 483     if(s==NULL) return;
 484
 485     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 486     av_freep(&s->me.scratchpad);
 487     s->rd_scratchpad=
 488     s->b_scratchpad=
 489     s->obmc_scratchpad= NULL;
 490
 491     av_freep(&s->dct_error_sum);
 492     av_freep(&s->me.map);
 493     av_freep(&s->me.score_map);
 494     av_freep(&s->blocks);
 495     s->block= NULL;
 496 }
 497
 498 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 499 #define COPY(a) bak->a= src->a
 500     COPY(allocated_edge_emu_buffer);
 501     COPY(edge_emu_buffer);
 502     COPY(me.scratchpad);
 503     COPY(rd_scratchpad);
 504     COPY(b_scratchpad);
 505     COPY(obmc_scratchpad);
 506     COPY(me.map);
 507     COPY(me.score_map);
 508     COPY(blocks);
 509     COPY(block);
 510     COPY(start_mb_y);
 511     COPY(end_mb_y);
 512     COPY(me.map_generation);
 513     COPY(pb);
 514     COPY(dct_error_sum);
 515     COPY(dct_count[0]);
 516     COPY(dct_count[1]);
 517 #undef COPY
 518 }
 519
 520 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 521     MpegEncContext bak;
 522     int i;
 523     //FIXME copy only needed parts
 524 //START_TIMER
 525     backup_duplicate_context(&bak, dst);
 526     memcpy(dst, src, sizeof(MpegEncContext));
 527     backup_duplicate_context(dst, &bak);
 528     for(i=0;i<12;i++){
 529         dst->pblocks[i] = (short *)(&dst->block[i]);
 530     }
 531 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 532 }
 533
 534 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 535 #define COPY(a) dst->a= src->a
 536     COPY(pict_type);
 537     COPY(current_picture);
 538     COPY(f_code);
 539     COPY(b_code);
 540     COPY(qscale);
 541     COPY(lambda);
 542     COPY(lambda2);
 543     COPY(picture_in_gop_number);
 544     COPY(gop_picture_number);
 545     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 546     COPY(progressive_frame); //FIXME don't set in encode_header
 547     COPY(partitioned_frame); //FIXME don't set in encode_header
 548 #undef COPY
 549 }
 550
 551 /**
 552  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 553  * the changed fields will not depend upon the prior state of the MpegEncContext.
 554  */
 555 static void MPV_common_defaults(MpegEncContext *s){
 556     s->y_dc_scale_table=
 557     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 558     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 559     s->progressive_frame= 1;
 560     s->progressive_sequence= 1;
 561     s->picture_structure= PICT_FRAME;
 562
 563     s->coded_picture_number = 0;
 564     s->picture_number = 0;
 565     s->input_picture_number = 0;
 566
 567     s->picture_in_gop_number = 0;
 568
 569     s->f_code = 1;
 570     s->b_code = 1;
 571 }
 572
 573 /**
 574  * sets the given MpegEncContext to defaults for decoding.
 575  * the changed fields will not depend upon the prior state of the MpegEncContext.
 576  */
 577 void MPV_decode_defaults(MpegEncContext *s){
 578     MPV_common_defaults(s);
 579 }
 580
 581 /**
 582  * sets the given MpegEncContext to defaults for encoding.
 583  * the changed fields will not depend upon the prior state of the MpegEncContext.
 584  */
 585
 586 #ifdef CONFIG_ENCODERS
 587 static void MPV_encode_defaults(MpegEncContext *s){
 588     static int done=0;
 589
 590     MPV_common_defaults(s);
 591
 592     if(!done){
 593         int i;
 594         done=1;
 595
 596         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 597         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 598
 599         for(i=-16; i<16; i++){
 600             default_fcode_tab[i + MAX_MV]= 1;
 601         }
 602     }
 603     s->me.mv_penalty= default_mv_penalty;
 604     s->fcode_tab= default_fcode_tab;
 605 }
 606 #endif //CONFIG_ENCODERS
 607
 608 /**
 609  * init common structure for both encoder and decoder.
 610  * this assumes that some variables like width/height are already set
 611  */
 612 int MPV_common_init(MpegEncContext *s)
 613 {
 614     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 615
 616     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
 617         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 618         return -1;
 619     }
 620
 621     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 622         return -1;
 623
 624     dsputil_init(&s->dsp, s->avctx);
 625     DCT_common_init(s);
 626
 627     s->flags= s->avctx->flags;
 628     s->flags2= s->avctx->flags2;
 629
 630     s->mb_width  = (s->width  + 15) / 16;
 631     s->mb_height = (s->height + 15) / 16;
 632     s->mb_stride = s->mb_width + 1;
 633     s->b8_stride = s->mb_width*2 + 1;
 634     s->b4_stride = s->mb_width*4 + 1;
 635     mb_array_size= s->mb_height * s->mb_stride;
 636     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 637
 638     /* set chroma shifts */
 639     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 640                                                     &(s->chroma_y_shift) );
 641
 642     /* set default edge pos, will be overriden in decode_header if needed */
 643     s->h_edge_pos= s->mb_width*16;
 644     s->v_edge_pos= s->mb_height*16;
 645
 646     s->mb_num = s->mb_width * s->mb_height;
 647
 648     s->block_wrap[0]=
 649     s->block_wrap[1]=
 650     s->block_wrap[2]=
 651     s->block_wrap[3]= s->b8_stride;
 652     s->block_wrap[4]=
 653     s->block_wrap[5]= s->mb_stride;
 654
 655     y_size = s->b8_stride * (2 * s->mb_height + 1);
 656     c_size = s->mb_stride * (s->mb_height + 1);
 657     yc_size = y_size + 2 * c_size;
 658
 659     /* convert fourcc to upper case */
 660     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 661                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 662                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 663                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 664
 665     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 666                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 667                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 668                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 669
 670     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 671
 672     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 673     for(y=0; y<s->mb_height; y++){
 674         for(x=0; x<s->mb_width; x++){
 675             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 676         }
 677     }
 678     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 679
 680     if (s->encoding) {
 681         /* Allocate MV tables */
 682         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 683         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 684         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 685         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 686         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 687         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 688         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 689         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 690         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 691         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 692         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 693         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 694
 695         if(s->msmpeg4_version){
 696             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 697         }
 698         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 699
 700         /* Allocate MB type table */
 701         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 702
 703         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 704
 705         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 706         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 707         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 708         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 709         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 710         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 711
 712         if(s->avctx->noise_reduction){
 713             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 714         }
 715     }
 716     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 717
 718     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 719
 720     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 721         /* interlaced direct mode decoding tables */
 722             for(i=0; i<2; i++){
 723                 int j, k;
 724                 for(j=0; j<2; j++){
 725                     for(k=0; k<2; k++){
 726                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 727                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 728                     }
 729                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 730                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 731                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 732                 }
 733                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 734             }
 735     }
 736     if (s->out_format == FMT_H263) {
 737         /* ac values */
 738         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 739         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 740         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 741         s->ac_val[2] = s->ac_val[1] + c_size;
 742
 743         /* cbp values */
 744         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 745         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 746
 747         /* cbp, ac_pred, pred_dir */
 748         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 749         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 750     }
 751
 752     if (s->h263_pred || s->h263_plus || !s->encoding) {
 753         /* dc values */
 754         //MN: we need these for error resilience of intra-frames
 755         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 756         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 757         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 758         s->dc_val[2] = s->dc_val[1] + c_size;
 759         for(i=0;i<yc_size;i++)
 760             s->dc_val_base[i] = 1024;
 761     }
 762
 763     /* which mb is a intra block */
 764     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 765     memset(s->mbintra_table, 1, mb_array_size);
 766
 767     /* init macroblock skip table */
 768     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 769     //Note the +1 is for a quicker mpeg4 slice_end detection
 770     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 771
 772     s->parse_context.state= -1;
 773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 774        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 775        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 776        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 777     }
 778
 779     s->context_initialized = 1;
 780
 781     s->thread_context[0]= s;
 782     for(i=1; i<s->avctx->thread_count; i++){
 783         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 784         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 785     }
 786
 787     for(i=0; i<s->avctx->thread_count; i++){
 788         if(init_duplicate_context(s->thread_context[i], s) < 0)
 789            goto fail;
 790         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 791         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 792     }
 793
 794     return 0;
 795  fail:
 796     MPV_common_end(s);
 797     return -1;
 798 }
 799
 800 /* init common structure for both encoder and decoder */
 801 void MPV_common_end(MpegEncContext *s)
 802 {
 803     int i, j, k;
 804
 805     for(i=0; i<s->avctx->thread_count; i++){
 806         free_duplicate_context(s->thread_context[i]);
 807     }
 808     for(i=1; i<s->avctx->thread_count; i++){
 809         av_freep(&s->thread_context[i]);
 810     }
 811
 812     av_freep(&s->parse_context.buffer);
 813     s->parse_context.buffer_size=0;
 814
 815     av_freep(&s->mb_type);
 816     av_freep(&s->p_mv_table_base);
 817     av_freep(&s->b_forw_mv_table_base);
 818     av_freep(&s->b_back_mv_table_base);
 819     av_freep(&s->b_bidir_forw_mv_table_base);
 820     av_freep(&s->b_bidir_back_mv_table_base);
 821     av_freep(&s->b_direct_mv_table_base);
 822     s->p_mv_table= NULL;
 823     s->b_forw_mv_table= NULL;
 824     s->b_back_mv_table= NULL;
 825     s->b_bidir_forw_mv_table= NULL;
 826     s->b_bidir_back_mv_table= NULL;
 827     s->b_direct_mv_table= NULL;
 828     for(i=0; i<2; i++){
 829         for(j=0; j<2; j++){
 830             for(k=0; k<2; k++){
 831                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 832                 s->b_field_mv_table[i][j][k]=NULL;
 833             }
 834             av_freep(&s->b_field_select_table[i][j]);
 835             av_freep(&s->p_field_mv_table_base[i][j]);
 836             s->p_field_mv_table[i][j]=NULL;
 837         }
 838         av_freep(&s->p_field_select_table[i]);
 839     }
 840
 841     av_freep(&s->dc_val_base);
 842     av_freep(&s->ac_val_base);
 843     av_freep(&s->coded_block_base);
 844     av_freep(&s->mbintra_table);
 845     av_freep(&s->cbp_table);
 846     av_freep(&s->pred_dir_table);
 847
 848     av_freep(&s->mbskip_table);
 849     av_freep(&s->prev_pict_types);
 850     av_freep(&s->bitstream_buffer);
 851     s->allocated_bitstream_buffer_size=0;
 852
 853     av_freep(&s->avctx->stats_out);
 854     av_freep(&s->ac_stats);
 855     av_freep(&s->error_status_table);
 856     av_freep(&s->mb_index2xy);
 857     av_freep(&s->lambda_table);
 858     av_freep(&s->q_intra_matrix);
 859     av_freep(&s->q_inter_matrix);
 860     av_freep(&s->q_intra_matrix16);
 861     av_freep(&s->q_inter_matrix16);
 862     av_freep(&s->input_picture);
 863     av_freep(&s->reordered_input_picture);
 864     av_freep(&s->dct_offset);
 865
 866     if(s->picture){
 867         for(i=0; i<MAX_PICTURE_COUNT; i++){
 868             free_picture(s, &s->picture[i]);
 869         }
 870     }
 871     av_freep(&s->picture);
 872     s->context_initialized = 0;
 873     s->last_picture_ptr=
 874     s->next_picture_ptr=
 875     s->current_picture_ptr= NULL;
 876     s->linesize= s->uvlinesize= 0;
 877
 878     for(i=0; i<3; i++)
 879         av_freep(&s->visualization_buffer[i]);
 880
 881     avcodec_default_free_buffers(s->avctx);
 882 }
 883
 884 #ifdef CONFIG_ENCODERS
 885
 886 /* init video encoder */
 887 int MPV_encode_init(AVCodecContext *avctx)
 888 {
 889     MpegEncContext *s = avctx->priv_data;
 890     int i, dummy;
 891     int chroma_h_shift, chroma_v_shift;
 892
 893     MPV_encode_defaults(s);
 894
 895     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
 896         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 897         return -1;
 898     }
 899
 900     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
 901         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
 902             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 903             return -1;
 904         }
 905     }else{
 906         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
 907             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
 908             return -1;
 909         }
 910     }
 911
 912     s->bit_rate = avctx->bit_rate;
 913     s->width = avctx->width;
 914     s->height = avctx->height;
 915     if(avctx->gop_size > 600){
 916         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 917         avctx->gop_size=600;
 918     }
 919     s->gop_size = avctx->gop_size;
 920     s->avctx = avctx;
 921     s->flags= avctx->flags;
 922     s->flags2= avctx->flags2;
 923     s->max_b_frames= avctx->max_b_frames;
 924     s->codec_id= avctx->codec->id;
 925     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 926     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 927     s->strict_std_compliance= avctx->strict_std_compliance;
 928     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 929     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 930     s->mpeg_quant= avctx->mpeg_quant;
 931     s->rtp_mode= !!avctx->rtp_payload_size;
 932     s->intra_dc_precision= avctx->intra_dc_precision;
 933     s->user_specified_pts = AV_NOPTS_VALUE;
 934
 935     if (s->gop_size <= 1) {
 936         s->intra_only = 1;
 937         s->gop_size = 12;
 938     } else {
 939         s->intra_only = 0;
 940     }
 941
 942     s->me_method = avctx->me_method;
 943
 944     /* Fixed QSCALE */
 945     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 946
 947     s->adaptive_quant= (   s->avctx->lumi_masking
 948                         || s->avctx->dark_masking
 949                         || s->avctx->temporal_cplx_masking
 950                         || s->avctx->spatial_cplx_masking
 951                         || s->avctx->p_masking
 952                         || s->avctx->border_masking
 953                         || (s->flags&CODEC_FLAG_QP_RD))
 954                        && !s->fixed_qscale;
 955
 956     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
 957     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 958     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
 959
 960     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
 961         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
 962         return -1;
 963     }
 964
 965     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
 966         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 967     }
 968
 969     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
 970         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
 971         return -1;
 972     }
 973
 974     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
 975         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 976         return -1;
 977     }
 978
 979     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
 980        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
 981        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
 982
 983         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
 984     }
 985
 986     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
 987        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
 988         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 989         return -1;
 990     }
 991
 992     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
 993         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
 994         return -1;
 995     }
 996
 997     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
 998         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
 999         return -1;
1000     }
1001
1002     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1003         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1004         return -1;
1005     }
1006
1007     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1008         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1009         return -1;
1010     }
1011
1012     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1013         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1014         return -1;
1015     }
1016
1017     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1018        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1019         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1020         return -1;
1021     }
1022
1023     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1024         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1025         return -1;
1026     }
1027
1028     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1029         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1030         return -1;
1031     }
1032
1033     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1034         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1035         return -1;
1036     }
1037
1038     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1039         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1040         return -1;
1041     }
1042
1043     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1044        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1045        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1046         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1047         return -1;
1048     }
1049
1050     if(s->avctx->thread_count > 1)
1051         s->rtp_mode= 1;
1052
1053     if(!avctx->time_base.den || !avctx->time_base.num){
1054         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1055         return -1;
1056     }
1057
1058     i= (INT_MAX/2+128)>>8;
1059     if(avctx->me_threshold >= i){
1060         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1061         return -1;
1062     }
1063     if(avctx->mb_threshold >= i){
1064         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1065         return -1;
1066     }
1067
1068     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1069         av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass");
1070         return -1;
1071     }
1072
1073     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1074     if(i > 1){
1075         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1076         avctx->time_base.den /= i;
1077         avctx->time_base.num /= i;
1078 //        return -1;
1079     }
1080
1081     if(s->codec_id==CODEC_ID_MJPEG){
1082         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1083         s->inter_quant_bias= 0;
1084     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1085         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1086         s->inter_quant_bias= 0;
1087     }else{
1088         s->intra_quant_bias=0;
1089         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1090     }
1091
1092     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1093         s->intra_quant_bias= avctx->intra_quant_bias;
1094     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1095         s->inter_quant_bias= avctx->inter_quant_bias;
1096
1097     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1098
1099     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1100         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1101         return -1;
1102     }
1103     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1104
1105     switch(avctx->codec->id) {
1106     case CODEC_ID_MPEG1VIDEO:
1107         s->out_format = FMT_MPEG1;
1108         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1109         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1110         break;
1111     case CODEC_ID_MPEG2VIDEO:
1112         s->out_format = FMT_MPEG1;
1113         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1114         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1115         s->rtp_mode= 1;
1116         break;
1117     case CODEC_ID_LJPEG:
1118     case CODEC_ID_MJPEG:
1119         s->out_format = FMT_MJPEG;
1120         s->intra_only = 1; /* force intra only for jpeg */
1121         s->mjpeg_write_tables = 1; /* write all tables */
1122         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1123         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1124         s->mjpeg_vsample[1] = 1;
1125         s->mjpeg_vsample[2] = 1;
1126         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1127         s->mjpeg_hsample[1] = 1;
1128         s->mjpeg_hsample[2] = 1;
1129         if (mjpeg_init(s) < 0)
1130             return -1;
1131         avctx->delay=0;
1132         s->low_delay=1;
1133         break;
1134     case CODEC_ID_H261:
1135         s->out_format = FMT_H261;
1136         avctx->delay=0;
1137         s->low_delay=1;
1138         break;
1139     case CODEC_ID_H263:
1140         if (h263_get_picture_format(s->width, s->height) == 7) {
1141             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1142             return -1;
1143         }
1144         s->out_format = FMT_H263;
1145         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1146         avctx->delay=0;
1147         s->low_delay=1;
1148         break;
1149     case CODEC_ID_H263P:
1150         s->out_format = FMT_H263;
1151         s->h263_plus = 1;
1152         /* Fx */
1153         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1154         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1155         s->modified_quant= s->h263_aic;
1156         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1157         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1158         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1159         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1160         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1161
1162         /* /Fx */
1163         /* These are just to be sure */
1164         avctx->delay=0;
1165         s->low_delay=1;
1166         break;
1167     case CODEC_ID_FLV1:
1168         s->out_format = FMT_H263;
1169         s->h263_flv = 2; /* format = 1; 11-bit codes */
1170         s->unrestricted_mv = 1;
1171         s->rtp_mode=0; /* don't allow GOB */
1172         avctx->delay=0;
1173         s->low_delay=1;
1174         break;
1175     case CODEC_ID_RV10:
1176         s->out_format = FMT_H263;
1177         avctx->delay=0;
1178         s->low_delay=1;
1179         break;
1180     case CODEC_ID_RV20:
1181         s->out_format = FMT_H263;
1182         avctx->delay=0;
1183         s->low_delay=1;
1184         s->modified_quant=1;
1185         s->h263_aic=1;
1186         s->h263_plus=1;
1187         s->loop_filter=1;
1188         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1189         break;
1190     case CODEC_ID_MPEG4:
1191         s->out_format = FMT_H263;
1192         s->h263_pred = 1;
1193         s->unrestricted_mv = 1;
1194         s->low_delay= s->max_b_frames ? 0 : 1;
1195         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1196         break;
1197     case CODEC_ID_MSMPEG4V1:
1198         s->out_format = FMT_H263;
1199         s->h263_msmpeg4 = 1;
1200         s->h263_pred = 1;
1201         s->unrestricted_mv = 1;
1202         s->msmpeg4_version= 1;
1203         avctx->delay=0;
1204         s->low_delay=1;
1205         break;
1206     case CODEC_ID_MSMPEG4V2:
1207         s->out_format = FMT_H263;
1208         s->h263_msmpeg4 = 1;
1209         s->h263_pred = 1;
1210         s->unrestricted_mv = 1;
1211         s->msmpeg4_version= 2;
1212         avctx->delay=0;
1213         s->low_delay=1;
1214         break;
1215     case CODEC_ID_MSMPEG4V3:
1216         s->out_format = FMT_H263;
1217         s->h263_msmpeg4 = 1;
1218         s->h263_pred = 1;
1219         s->unrestricted_mv = 1;
1220         s->msmpeg4_version= 3;
1221         s->flipflop_rounding=1;
1222         avctx->delay=0;
1223         s->low_delay=1;
1224         break;
1225     case CODEC_ID_WMV1:
1226         s->out_format = FMT_H263;
1227         s->h263_msmpeg4 = 1;
1228         s->h263_pred = 1;
1229         s->unrestricted_mv = 1;
1230         s->msmpeg4_version= 4;
1231         s->flipflop_rounding=1;
1232         avctx->delay=0;
1233         s->low_delay=1;
1234         break;
1235     case CODEC_ID_WMV2:
1236         s->out_format = FMT_H263;
1237         s->h263_msmpeg4 = 1;
1238         s->h263_pred = 1;
1239         s->unrestricted_mv = 1;
1240         s->msmpeg4_version= 5;
1241         s->flipflop_rounding=1;
1242         avctx->delay=0;
1243         s->low_delay=1;
1244         break;
1245     default:
1246         return -1;
1247     }
1248
1249     avctx->has_b_frames= !s->low_delay;
1250
1251     s->encoding = 1;
1252
1253     /* init */
1254     if (MPV_common_init(s) < 0)
1255         return -1;
1256
1257     if(s->modified_quant)
1258         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1259     s->progressive_frame=
1260     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1261     s->quant_precision=5;
1262
1263     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1264     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1265
1266 #ifdef CONFIG_H261_ENCODER
1267     if (s->out_format == FMT_H261)
1268         ff_h261_encode_init(s);
1269 #endif
1270     if (s->out_format == FMT_H263)
1271         h263_encode_init(s);
1272     if(s->msmpeg4_version)
1273         ff_msmpeg4_encode_init(s);
1274     if (s->out_format == FMT_MPEG1)
1275         ff_mpeg1_encode_init(s);
1276
1277     /* init q matrix */
1278     for(i=0;i<64;i++) {
1279         int j= s->dsp.idct_permutation[i];
1280         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1281             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1282             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1283         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1284             s->intra_matrix[j] =
1285             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1286         }else
1287         { /* mpeg1/2 */
1288             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1289             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1290         }
1291         if(s->avctx->intra_matrix)
1292             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1293         if(s->avctx->inter_matrix)
1294             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1295     }
1296
1297     /* precompute matrix */
1298     /* for mjpeg, we do include qscale in the matrix */
1299     if (s->out_format != FMT_MJPEG) {
1300         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1301                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1302         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1303                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1304     }
1305
1306     if(ff_rate_control_init(s) < 0)
1307         return -1;
1308
1309     return 0;
1310 }
1311
1312 int MPV_encode_end(AVCodecContext *avctx)
1313 {
1314     MpegEncContext *s = avctx->priv_data;
1315
1316 #ifdef STATS
1317     print_stats();
1318 #endif
1319
1320     ff_rate_control_uninit(s);
1321
1322     MPV_common_end(s);
1323     if (s->out_format == FMT_MJPEG)
1324         mjpeg_close(s);
1325
1326     av_freep(&avctx->extradata);
1327
1328     return 0;
1329 }
1330
1331 #endif //CONFIG_ENCODERS
1332
1333 void init_rl(RLTable *rl, int use_static)
1334 {
1335     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1336     uint8_t index_run[MAX_RUN+1];
1337     int last, run, level, start, end, i;
1338
1339     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1340     if(use_static && rl->max_level[0])
1341         return;
1342
1343     /* compute max_level[], max_run[] and index_run[] */
1344     for(last=0;last<2;last++) {
1345         if (last == 0) {
1346             start = 0;
1347             end = rl->last;
1348         } else {
1349             start = rl->last;
1350             end = rl->n;
1351         }
1352
1353         memset(max_level, 0, MAX_RUN + 1);
1354         memset(max_run, 0, MAX_LEVEL + 1);
1355         memset(index_run, rl->n, MAX_RUN + 1);
1356         for(i=start;i<end;i++) {
1357             run = rl->table_run[i];
1358             level = rl->table_level[i];
1359             if (index_run[run] == rl->n)
1360                 index_run[run] = i;
1361             if (level > max_level[run])
1362                 max_level[run] = level;
1363             if (run > max_run[level])
1364                 max_run[level] = run;
1365         }
1366         if(use_static)
1367             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1368         else
1369             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1370         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1371         if(use_static)
1372             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1373         else
1374             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1375         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1376         if(use_static)
1377             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1378         else
1379             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1380         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1381     }
1382 }
1383
1384 /* draw the edges of width 'w' of an image of size width, height */
1385 //FIXME check that this is ok for mpeg4 interlaced
1386 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1387 {
1388     uint8_t *ptr, *last_line;
1389     int i;
1390
1391     last_line = buf + (height - 1) * wrap;
1392     for(i=0;i<w;i++) {
1393         /* top and bottom */
1394         memcpy(buf - (i + 1) * wrap, buf, width);
1395         memcpy(last_line + (i + 1) * wrap, last_line, width);
1396     }
1397     /* left and right */
1398     ptr = buf;
1399     for(i=0;i<height;i++) {
1400         memset(ptr - w, ptr[0], w);
1401         memset(ptr + width, ptr[width-1], w);
1402         ptr += wrap;
1403     }
1404     /* corners */
1405     for(i=0;i<w;i++) {
1406         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1407         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1408         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1409         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1410     }
1411 }
1412
1413 int ff_find_unused_picture(MpegEncContext *s, int shared){
1414     int i;
1415
1416     if(shared){
1417         for(i=0; i<MAX_PICTURE_COUNT; i++){
1418             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1419         }
1420     }else{
1421         for(i=0; i<MAX_PICTURE_COUNT; i++){
1422             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1423         }
1424         for(i=0; i<MAX_PICTURE_COUNT; i++){
1425             if(s->picture[i].data[0]==NULL) return i;
1426         }
1427     }
1428
1429     assert(0);
1430     return -1;
1431 }
1432
1433 static void update_noise_reduction(MpegEncContext *s){
1434     int intra, i;
1435
1436     for(intra=0; intra<2; intra++){
1437         if(s->dct_count[intra] > (1<<16)){
1438             for(i=0; i<64; i++){
1439                 s->dct_error_sum[intra][i] >>=1;
1440             }
1441             s->dct_count[intra] >>= 1;
1442         }
1443
1444         for(i=0; i<64; i++){
1445             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1446         }
1447     }
1448 }
1449
1450 /**
1451  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1452  */
1453 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1454 {
1455     int i;
1456     AVFrame *pic;
1457     s->mb_skipped = 0;
1458
1459     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1460
1461     /* mark&release old frames */
1462     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1463         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1464
1465         /* release forgotten pictures */
1466         /* if(mpeg124/h263) */
1467         if(!s->encoding){
1468             for(i=0; i<MAX_PICTURE_COUNT; i++){
1469                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1470                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1471                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1472                 }
1473             }
1474         }
1475     }
1476 alloc:
1477     if(!s->encoding){
1478         /* release non reference frames */
1479         for(i=0; i<MAX_PICTURE_COUNT; i++){
1480             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1481                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1482             }
1483         }
1484
1485         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1486             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1487         else{
1488             i= ff_find_unused_picture(s, 0);
1489             pic= (AVFrame*)&s->picture[i];
1490         }
1491
1492         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1493                         && !s->dropable ? 3 : 0;
1494
1495         pic->coded_picture_number= s->coded_picture_number++;
1496
1497         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1498             return -1;
1499
1500         s->current_picture_ptr= (Picture*)pic;
1501         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1502         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1503     }
1504
1505     s->current_picture_ptr->pict_type= s->pict_type;
1506 //    if(s->flags && CODEC_FLAG_QSCALE)
1507   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1508     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1509
1510     copy_picture(&s->current_picture, s->current_picture_ptr);
1511
1512   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1513     if (s->pict_type != B_TYPE) {
1514         s->last_picture_ptr= s->next_picture_ptr;
1515         if(!s->dropable)
1516             s->next_picture_ptr= s->current_picture_ptr;
1517     }
1518 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1519         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1520         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1521         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1522         s->pict_type, s->dropable);*/
1523
1524     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1525     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1526
1527     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1528         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1529         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1530         goto alloc;
1531     }
1532
1533     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1534
1535     if(s->picture_structure!=PICT_FRAME){
1536         int i;
1537         for(i=0; i<4; i++){
1538             if(s->picture_structure == PICT_BOTTOM_FIELD){
1539                  s->current_picture.data[i] += s->current_picture.linesize[i];
1540             }
1541             s->current_picture.linesize[i] *= 2;
1542             s->last_picture.linesize[i] *=2;
1543             s->next_picture.linesize[i] *=2;
1544         }
1545     }
1546   }
1547
1548     s->hurry_up= s->avctx->hurry_up;
1549     s->error_resilience= avctx->error_resilience;
1550
1551     /* set dequantizer, we can't do it during init as it might change for mpeg4
1552        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1553     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1554         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1555         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1556     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1557         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1558         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1559     }else{
1560         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1561         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1562     }
1563
1564     if(s->dct_error_sum){
1565         assert(s->avctx->noise_reduction && s->encoding);
1566
1567         update_noise_reduction(s);
1568     }
1569
1570 #ifdef HAVE_XVMC
1571     if(s->avctx->xvmc_acceleration)
1572         return XVMC_field_start(s, avctx);
1573 #endif
1574     return 0;
1575 }
1576
1577 /* generic function for encode/decode called after a frame has been coded/decoded */
1578 void MPV_frame_end(MpegEncContext *s)
1579 {
1580     int i;
1581     /* draw edge for correct motion prediction if outside */
1582 #ifdef HAVE_XVMC
1583 //just to make sure that all data is rendered.
1584     if(s->avctx->xvmc_acceleration){
1585         XVMC_field_end(s);
1586     }else
1587 #endif
1588     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1589             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1590             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1591             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1592     }
1593     emms_c();
1594
1595     s->last_pict_type    = s->pict_type;
1596     if(s->pict_type!=B_TYPE){
1597         s->last_non_b_pict_type= s->pict_type;
1598     }
1599 #if 0
1600         /* copy back current_picture variables */
1601     for(i=0; i<MAX_PICTURE_COUNT; i++){
1602         if(s->picture[i].data[0] == s->current_picture.data[0]){
1603             s->picture[i]= s->current_picture;
1604             break;
1605         }
1606     }
1607     assert(i<MAX_PICTURE_COUNT);
1608 #endif
1609
1610     if(s->encoding){
1611         /* release non-reference frames */
1612         for(i=0; i<MAX_PICTURE_COUNT; i++){
1613             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1614                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1615             }
1616         }
1617     }
1618     // clear copies, to avoid confusion
1619 #if 0
1620     memset(&s->last_picture, 0, sizeof(Picture));
1621     memset(&s->next_picture, 0, sizeof(Picture));
1622     memset(&s->current_picture, 0, sizeof(Picture));
1623 #endif
1624     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1625 }
1626
1627 /**
1628  * draws an line from (ex, ey) -> (sx, sy).
1629  * @param w width of the image
1630  * @param h height of the image
1631  * @param stride stride/linesize of the image
1632  * @param color color of the arrow
1633  */
1634 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1635     int t, x, y, fr, f;
1636
1637     sx= clip(sx, 0, w-1);
1638     sy= clip(sy, 0, h-1);
1639     ex= clip(ex, 0, w-1);
1640     ey= clip(ey, 0, h-1);
1641
1642     buf[sy*stride + sx]+= color;
1643
1644     if(ABS(ex - sx) > ABS(ey - sy)){
1645         if(sx > ex){
1646             t=sx; sx=ex; ex=t;
1647             t=sy; sy=ey; ey=t;
1648         }
1649         buf+= sx + sy*stride;
1650         ex-= sx;
1651         f= ((ey-sy)<<16)/ex;
1652         for(x= 0; x <= ex; x++){
1653             y = (x*f)>>16;
1654             fr= (x*f)&0xFFFF;
1655             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1656             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1657         }
1658     }else{
1659         if(sy > ey){
1660             t=sx; sx=ex; ex=t;
1661             t=sy; sy=ey; ey=t;
1662         }
1663         buf+= sx + sy*stride;
1664         ey-= sy;
1665         if(ey) f= ((ex-sx)<<16)/ey;
1666         else   f= 0;
1667         for(y= 0; y <= ey; y++){
1668             x = (y*f)>>16;
1669             fr= (y*f)&0xFFFF;
1670             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1671             buf[y*stride + x+1]+= (color*         fr )>>16;;
1672         }
1673     }
1674 }
1675
1676 /**
1677  * draws an arrow from (ex, ey) -> (sx, sy).
1678  * @param w width of the image
1679  * @param h height of the image
1680  * @param stride stride/linesize of the image
1681  * @param color color of the arrow
1682  */
1683 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1684     int dx,dy;
1685
1686     sx= clip(sx, -100, w+100);
1687     sy= clip(sy, -100, h+100);
1688     ex= clip(ex, -100, w+100);
1689     ey= clip(ey, -100, h+100);
1690
1691     dx= ex - sx;
1692     dy= ey - sy;
1693
1694     if(dx*dx + dy*dy > 3*3){
1695         int rx=  dx + dy;
1696         int ry= -dx + dy;
1697         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1698
1699         //FIXME subpixel accuracy
1700         rx= ROUNDED_DIV(rx*3<<4, length);
1701         ry= ROUNDED_DIV(ry*3<<4, length);
1702
1703         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1704         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1705     }
1706     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1707 }
1708
1709 /**
1710  * prints debuging info for the given picture.
1711  */
1712 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1713
1714     if(!pict || !pict->mb_type) return;
1715
1716     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1717         int x,y;
1718
1719         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1720         switch (pict->pict_type) {
1721             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1722             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1723             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1724             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1725             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1726             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1727         }
1728         for(y=0; y<s->mb_height; y++){
1729             for(x=0; x<s->mb_width; x++){
1730                 if(s->avctx->debug&FF_DEBUG_SKIP){
1731                     int count= s->mbskip_table[x + y*s->mb_stride];
1732                     if(count>9) count=9;
1733                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1734                 }
1735                 if(s->avctx->debug&FF_DEBUG_QP){
1736                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1737                 }
1738                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1739                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1740                     //Type & MV direction
1741                     if(IS_PCM(mb_type))
1742                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1743                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1744                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1745                     else if(IS_INTRA4x4(mb_type))
1746                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1747                     else if(IS_INTRA16x16(mb_type))
1748                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1749                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1750                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1751                     else if(IS_DIRECT(mb_type))
1752                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1753                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1754                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1755                     else if(IS_GMC(mb_type))
1756                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1757                     else if(IS_SKIP(mb_type))
1758                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1759                     else if(!USES_LIST(mb_type, 1))
1760                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1761                     else if(!USES_LIST(mb_type, 0))
1762                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1763                     else{
1764                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1765                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1766                     }
1767
1768                     //segmentation
1769                     if(IS_8X8(mb_type))
1770                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1771                     else if(IS_16X8(mb_type))
1772                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1773                     else if(IS_8X16(mb_type))
1774                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1775                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1776                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1777                     else
1778                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1779
1780
1781                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1782                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1783                     else
1784                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1785                 }
1786 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1787             }
1788             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1789         }
1790     }
1791
1792     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1793         const int shift= 1 + s->quarter_sample;
1794         int mb_y;
1795         uint8_t *ptr;
1796         int i;
1797         int h_chroma_shift, v_chroma_shift;
1798         const int width = s->avctx->width;
1799         const int height= s->avctx->height;
1800         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1801         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1802         s->low_delay=0; //needed to see the vectors without trashing the buffers
1803
1804         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1805         for(i=0; i<3; i++){
1806             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1807             pict->data[i]= s->visualization_buffer[i];
1808         }
1809         pict->type= FF_BUFFER_TYPE_COPY;
1810         ptr= pict->data[0];
1811
1812         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1813             int mb_x;
1814             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1815                 const int mb_index= mb_x + mb_y*s->mb_stride;
1816                 if((s->avctx->debug_mv) && pict->motion_val){
1817                   int type;
1818                   for(type=0; type<3; type++){
1819                     int direction = 0;
1820                     switch (type) {
1821                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1822                                 continue;
1823                               direction = 0;
1824                               break;
1825                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1826                                 continue;
1827                               direction = 0;
1828                               break;
1829                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1830                                 continue;
1831                               direction = 1;
1832                               break;
1833                     }
1834                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1835                         continue;
1836
1837                     if(IS_8X8(pict->mb_type[mb_index])){
1838                       int i;
1839                       for(i=0; i<4; i++){
1840                         int sx= mb_x*16 + 4 + 8*(i&1);
1841                         int sy= mb_y*16 + 4 + 8*(i>>1);
1842                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1843                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1844                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1845                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1846                       }
1847                     }else if(IS_16X8(pict->mb_type[mb_index])){
1848                       int i;
1849                       for(i=0; i<2; i++){
1850                         int sx=mb_x*16 + 8;
1851                         int sy=mb_y*16 + 4 + 8*i;
1852                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1853                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1854                         int my=(pict->motion_val[direction][xy][1]>>shift);
1855
1856                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1857                             my*=2;
1858
1859                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1860                       }
1861                     }else if(IS_8X16(pict->mb_type[mb_index])){
1862                       int i;
1863                       for(i=0; i<2; i++){
1864                         int sx=mb_x*16 + 4 + 8*i;
1865                         int sy=mb_y*16 + 8;
1866                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1867                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1868                         int my=(pict->motion_val[direction][xy][1]>>shift);
1869
1870                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1871                             my*=2;
1872
1873                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1874                       }
1875                     }else{
1876                       int sx= mb_x*16 + 8;
1877                       int sy= mb_y*16 + 8;
1878                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1879                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1880                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1881                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1882                     }
1883                   }
1884                 }
1885                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1886                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1887                     int y;
1888                     for(y=0; y<8; y++){
1889                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1890                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1891                     }
1892                 }
1893                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1894                     int mb_type= pict->mb_type[mb_index];
1895                     uint64_t u,v;
1896                     int y;
1897 #define COLOR(theta, r)\
1898 u= (int)(128 + r*cos(theta*3.141592/180));\
1899 v= (int)(128 + r*sin(theta*3.141592/180));
1900
1901
1902                     u=v=128;
1903                     if(IS_PCM(mb_type)){
1904                         COLOR(120,48)
1905                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1906                         COLOR(30,48)
1907                     }else if(IS_INTRA4x4(mb_type)){
1908                         COLOR(90,48)
1909                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1910 //                        COLOR(120,48)
1911                     }else if(IS_DIRECT(mb_type)){
1912                         COLOR(150,48)
1913                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1914                         COLOR(170,48)
1915                     }else if(IS_GMC(mb_type)){
1916                         COLOR(190,48)
1917                     }else if(IS_SKIP(mb_type)){
1918 //                        COLOR(180,48)
1919                     }else if(!USES_LIST(mb_type, 1)){
1920                         COLOR(240,48)
1921                     }else if(!USES_LIST(mb_type, 0)){
1922                         COLOR(0,48)
1923                     }else{
1924                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1925                         COLOR(300,48)
1926                     }
1927
1928                     u*= 0x0101010101010101ULL;
1929                     v*= 0x0101010101010101ULL;
1930                     for(y=0; y<8; y++){
1931                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1932                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1933                     }
1934
1935                     //segmentation
1936                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1937                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1938                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1939                     }
1940                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1941                         for(y=0; y<16; y++)
1942                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1943                     }
1944                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1945                         int dm= 1 << (mv_sample_log2-2);
1946                         for(i=0; i<4; i++){
1947                             int sx= mb_x*16 + 8*(i&1);
1948                             int sy= mb_y*16 + 8*(i>>1);
1949                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1950                             //FIXME bidir
1951                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1952                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1953                                 for(y=0; y<8; y++)
1954                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1955                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1956                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1957                         }
1958                     }
1959
1960                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1961                         // hmm
1962                     }
1963                 }
1964                 s->mbskip_table[mb_index]=0;
1965             }
1966         }
1967     }
1968 }
1969
1970 #ifdef CONFIG_ENCODERS
1971
1972 static int get_sae(uint8_t *src, int ref, int stride){
1973     int x,y;
1974     int acc=0;
1975
1976     for(y=0; y<16; y++){
1977         for(x=0; x<16; x++){
1978             acc+= ABS(src[x+y*stride] - ref);
1979         }
1980     }
1981
1982     return acc;
1983 }
1984
1985 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1986     int x, y, w, h;
1987     int acc=0;
1988
1989     w= s->width &~15;
1990     h= s->height&~15;
1991
1992     for(y=0; y<h; y+=16){
1993         for(x=0; x<w; x+=16){
1994             int offset= x + y*stride;
1995             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1996             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1997             int sae = get_sae(src + offset, mean, stride);
1998
1999             acc+= sae + 500 < sad;
2000         }
2001     }
2002     return acc;
2003 }
2004
2005
2006 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2007     AVFrame *pic=NULL;
2008     int64_t pts;
2009     int i;
2010     const int encoding_delay= s->max_b_frames;
2011     int direct=1;
2012
2013     if(pic_arg){
2014         pts= pic_arg->pts;
2015         pic_arg->display_picture_number= s->input_picture_number++;
2016
2017         if(pts != AV_NOPTS_VALUE){
2018             if(s->user_specified_pts != AV_NOPTS_VALUE){
2019                 int64_t time= pts;
2020                 int64_t last= s->user_specified_pts;
2021
2022                 if(time <= last){
2023                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%Ld, last=%Ld\n", pts, s->user_specified_pts);
2024                     return -1;
2025                 }
2026             }
2027             s->user_specified_pts= pts;
2028         }else{
2029             if(s->user_specified_pts != AV_NOPTS_VALUE){
2030                 s->user_specified_pts=
2031                 pts= s->user_specified_pts + 1;
2032                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pts);
2033             }else{
2034                 pts= pic_arg->display_picture_number;
2035             }
2036         }
2037     }
2038
2039   if(pic_arg){
2040     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2041     if(pic_arg->linesize[0] != s->linesize) direct=0;
2042     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2043     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2044
2045 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2046
2047     if(direct){
2048         i= ff_find_unused_picture(s, 1);
2049
2050         pic= (AVFrame*)&s->picture[i];
2051         pic->reference= 3;
2052
2053         for(i=0; i<4; i++){
2054             pic->data[i]= pic_arg->data[i];
2055             pic->linesize[i]= pic_arg->linesize[i];
2056         }
2057         alloc_picture(s, (Picture*)pic, 1);
2058     }else{
2059         int offset= 16;
2060         i= ff_find_unused_picture(s, 0);
2061
2062         pic= (AVFrame*)&s->picture[i];
2063         pic->reference= 3;
2064
2065         alloc_picture(s, (Picture*)pic, 0);
2066
2067         if(   pic->data[0] + offset == pic_arg->data[0]
2068            && pic->data[1] + offset == pic_arg->data[1]
2069            && pic->data[2] + offset == pic_arg->data[2]){
2070        // empty
2071         }else{
2072             int h_chroma_shift, v_chroma_shift;
2073             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2074
2075             for(i=0; i<3; i++){
2076                 int src_stride= pic_arg->linesize[i];
2077                 int dst_stride= i ? s->uvlinesize : s->linesize;
2078                 int h_shift= i ? h_chroma_shift : 0;
2079                 int v_shift= i ? v_chroma_shift : 0;
2080                 int w= s->width >>h_shift;
2081                 int h= s->height>>v_shift;
2082                 uint8_t *src= pic_arg->data[i];
2083                 uint8_t *dst= pic->data[i] + offset;
2084
2085                 if(src_stride==dst_stride)
2086                     memcpy(dst, src, src_stride*h);
2087                 else{
2088                     while(h--){
2089                         memcpy(dst, src, w);
2090                         dst += dst_stride;
2091                         src += src_stride;
2092                     }
2093                 }
2094             }
2095         }
2096     }
2097     copy_picture_attributes(s, pic, pic_arg);
2098     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2099   }
2100
2101     /* shift buffer entries */
2102     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2103         s->input_picture[i-1]= s->input_picture[i];
2104
2105     s->input_picture[encoding_delay]= (Picture*)pic;
2106
2107     return 0;
2108 }
2109
2110 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2111     int x, y, plane;
2112     int score=0;
2113     int64_t score64=0;
2114
2115     for(plane=0; plane<3; plane++){
2116         const int stride= p->linesize[plane];
2117         const int bw= plane ? 1 : 2;
2118         for(y=0; y<s->mb_height*bw; y++){
2119             for(x=0; x<s->mb_width*bw; x++){
2120                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride), ref->data[plane] + 8*(x + y*stride), stride, 8);
2121
2122                 switch(s->avctx->frame_skip_exp){
2123                     case 0: score= FFMAX(score, v); break;
2124                     case 1: score+= ABS(v);break;
2125                     case 2: score+= v*v;break;
2126                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2127                     case 4: score64+= v*v*(int64_t)(v*v);break;
2128                 }
2129             }
2130         }
2131     }
2132
2133     if(score) score64= score;
2134
2135     if(score64 < s->avctx->frame_skip_threshold)
2136         return 1;
2137     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2138         return 1;
2139     return 0;
2140 }
2141
2142 static void select_input_picture(MpegEncContext *s){
2143     int i;
2144
2145     for(i=1; i<MAX_PICTURE_COUNT; i++)
2146         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2147     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2148
2149     /* set next picture type & ordering */
2150     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2151         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2152             s->reordered_input_picture[0]= s->input_picture[0];
2153             s->reordered_input_picture[0]->pict_type= I_TYPE;
2154             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2155         }else{
2156             int b_frames;
2157
2158             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2159                 if(skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2160 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2161
2162                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2163                         for(i=0; i<4; i++)
2164                             s->input_picture[0]->data[i]= NULL;
2165                         s->input_picture[0]->type= 0;
2166                     }else{
2167                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2168                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2169
2170                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2171                     }
2172
2173                     goto no_output_pic;
2174                 }
2175             }
2176
2177             if(s->flags&CODEC_FLAG_PASS2){
2178                 for(i=0; i<s->max_b_frames+1; i++){
2179                     int pict_num= s->input_picture[0]->display_picture_number + i;
2180
2181                     if(pict_num >= s->rc_context.num_entries)
2182                         break;
2183                     if(!s->input_picture[i]){
2184                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2185                         break;
2186                     }
2187
2188                     s->input_picture[i]->pict_type=
2189                         s->rc_context.entry[pict_num].new_pict_type;
2190                 }
2191             }
2192
2193             if(s->avctx->b_frame_strategy==0){
2194                 b_frames= s->max_b_frames;
2195                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2196             }else if(s->avctx->b_frame_strategy==1){
2197                 for(i=1; i<s->max_b_frames+1; i++){
2198                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2199                         s->input_picture[i]->b_frame_score=
2200                             get_intra_count(s, s->input_picture[i  ]->data[0],
2201                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2202                     }
2203                 }
2204                 for(i=0; i<s->max_b_frames+1; i++){
2205                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2206                 }
2207
2208                 b_frames= FFMAX(0, i-1);
2209
2210                 /* reset scores */
2211                 for(i=0; i<b_frames+1; i++){
2212                     s->input_picture[i]->b_frame_score=0;
2213                 }
2214             }else{
2215                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2216                 b_frames=0;
2217             }
2218
2219             emms_c();
2220 //static int b_count=0;
2221 //b_count+= b_frames;
2222 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2223
2224             for(i= b_frames - 1; i>=0; i--){
2225                 int type= s->input_picture[i]->pict_type;
2226                 if(type && type != B_TYPE)
2227                     b_frames= i;
2228             }
2229             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2230                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2231             }
2232
2233             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2234               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2235                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2236               }else{
2237                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2238                     b_frames=0;
2239                 s->input_picture[b_frames]->pict_type= I_TYPE;
2240               }
2241             }
2242
2243             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2244                && b_frames
2245                && s->input_picture[b_frames]->pict_type== I_TYPE)
2246                 b_frames--;
2247
2248             s->reordered_input_picture[0]= s->input_picture[b_frames];
2249             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2250                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2251             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2252             for(i=0; i<b_frames; i++){
2253                 s->reordered_input_picture[i+1]= s->input_picture[i];
2254                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2255                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2256             }
2257         }
2258     }
2259 no_output_pic:
2260     if(s->reordered_input_picture[0]){
2261         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2262
2263         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2264
2265         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2266             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2267
2268             int i= ff_find_unused_picture(s, 0);
2269             Picture *pic= &s->picture[i];
2270
2271             /* mark us unused / free shared pic */
2272             for(i=0; i<4; i++)
2273                 s->reordered_input_picture[0]->data[i]= NULL;
2274             s->reordered_input_picture[0]->type= 0;
2275
2276             pic->reference              = s->reordered_input_picture[0]->reference;
2277
2278             alloc_picture(s, pic, 0);
2279
2280             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2281
2282             s->current_picture_ptr= pic;
2283         }else{
2284             // input is not a shared pix -> reuse buffer for current_pix
2285
2286             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2287                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2288
2289             s->current_picture_ptr= s->reordered_input_picture[0];
2290             for(i=0; i<4; i++){
2291                 s->new_picture.data[i]+=16;
2292             }
2293         }
2294         copy_picture(&s->current_picture, s->current_picture_ptr);
2295
2296         s->picture_number= s->new_picture.display_picture_number;
2297 //printf("dpn:%d\n", s->picture_number);
2298     }else{
2299        memset(&s->new_picture, 0, sizeof(Picture));
2300     }
2301 }
2302
2303 int MPV_encode_picture(AVCodecContext *avctx,
2304                        unsigned char *buf, int buf_size, void *data)
2305 {
2306     MpegEncContext *s = avctx->priv_data;
2307     AVFrame *pic_arg = data;
2308     int i, stuffing_count;
2309
2310     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2311         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2312         return -1;
2313     }
2314
2315     for(i=0; i<avctx->thread_count; i++){
2316         int start_y= s->thread_context[i]->start_mb_y;
2317         int   end_y= s->thread_context[i]->  end_mb_y;
2318         int h= s->mb_height;
2319         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2320         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2321
2322         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2323     }
2324
2325     s->picture_in_gop_number++;
2326
2327     if(load_input_picture(s, pic_arg) < 0)
2328         return -1;
2329
2330     select_input_picture(s);
2331
2332     /* output? */
2333     if(s->new_picture.data[0]){
2334         s->pict_type= s->new_picture.pict_type;
2335 //emms_c();
2336 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2337         MPV_frame_start(s, avctx);
2338
2339         encode_picture(s, s->picture_number);
2340
2341         avctx->real_pict_num  = s->picture_number;
2342         avctx->header_bits = s->header_bits;
2343         avctx->mv_bits     = s->mv_bits;
2344         avctx->misc_bits   = s->misc_bits;
2345         avctx->i_tex_bits  = s->i_tex_bits;
2346         avctx->p_tex_bits  = s->p_tex_bits;
2347         avctx->i_count     = s->i_count;
2348         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2349         avctx->skip_count  = s->skip_count;
2350
2351         MPV_frame_end(s);
2352
2353         if (s->out_format == FMT_MJPEG)
2354             mjpeg_picture_trailer(s);
2355
2356         if(s->flags&CODEC_FLAG_PASS1)
2357             ff_write_pass1_stats(s);
2358
2359         for(i=0; i<4; i++){
2360             avctx->error[i] += s->current_picture_ptr->error[i];
2361         }
2362
2363         if(s->flags&CODEC_FLAG_PASS1)
2364             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2365         flush_put_bits(&s->pb);
2366         s->frame_bits  = put_bits_count(&s->pb);
2367
2368         stuffing_count= ff_vbv_update(s, s->frame_bits);
2369         if(stuffing_count){
2370             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2371                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2372                 return -1;
2373             }
2374
2375             switch(s->codec_id){
2376             case CODEC_ID_MPEG1VIDEO:
2377             case CODEC_ID_MPEG2VIDEO:
2378                 while(stuffing_count--){
2379                     put_bits(&s->pb, 8, 0);
2380                 }
2381             break;
2382             case CODEC_ID_MPEG4:
2383                 put_bits(&s->pb, 16, 0);
2384                 put_bits(&s->pb, 16, 0x1C3);
2385                 stuffing_count -= 4;
2386                 while(stuffing_count--){
2387                     put_bits(&s->pb, 8, 0xFF);
2388                 }
2389             break;
2390             default:
2391                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2392             }
2393             flush_put_bits(&s->pb);
2394             s->frame_bits  = put_bits_count(&s->pb);
2395         }
2396
2397         /* update mpeg1/2 vbv_delay for CBR */
2398         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2399            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2400             int vbv_delay;
2401
2402             assert(s->repeat_first_field==0);
2403
2404             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2405             assert(vbv_delay < 0xFFFF);
2406
2407             s->vbv_delay_ptr[0] &= 0xF8;
2408             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2409             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2410             s->vbv_delay_ptr[2] &= 0x07;
2411             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2412         }
2413         s->total_bits += s->frame_bits;
2414         avctx->frame_bits  = s->frame_bits;
2415     }else{
2416         assert((pbBufPtr(&s->pb) == s->pb.buf));
2417         s->frame_bits=0;
2418     }
2419     assert((s->frame_bits&7)==0);
2420
2421     return s->frame_bits/8;
2422 }
2423
2424 #endif //CONFIG_ENCODERS
2425
2426 static inline void gmc1_motion(MpegEncContext *s,
2427                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2428                                uint8_t **ref_picture)
2429 {
2430     uint8_t *ptr;
2431     int offset, src_x, src_y, linesize, uvlinesize;
2432     int motion_x, motion_y;
2433     int emu=0;
2434
2435     motion_x= s->sprite_offset[0][0];
2436     motion_y= s->sprite_offset[0][1];
2437     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2438     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2439     motion_x<<=(3-s->sprite_warping_accuracy);
2440     motion_y<<=(3-s->sprite_warping_accuracy);
2441     src_x = clip(src_x, -16, s->width);
2442     if (src_x == s->width)
2443         motion_x =0;
2444     src_y = clip(src_y, -16, s->height);
2445     if (src_y == s->height)
2446         motion_y =0;
2447
2448     linesize = s->linesize;
2449     uvlinesize = s->uvlinesize;
2450
2451     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2452
2453     if(s->flags&CODEC_FLAG_EMU_EDGE){
2454         if(   (unsigned)src_x >= s->h_edge_pos - 17
2455            || (unsigned)src_y >= s->v_edge_pos - 17){
2456             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2457             ptr= s->edge_emu_buffer;
2458         }
2459     }
2460
2461     if((motion_x|motion_y)&7){
2462         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2463         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2464     }else{
2465         int dxy;
2466
2467         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2468         if (s->no_rounding){
2469             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2470         }else{
2471             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2472         }
2473     }
2474
2475     if(s->flags&CODEC_FLAG_GRAY) return;
2476
2477     motion_x= s->sprite_offset[1][0];
2478     motion_y= s->sprite_offset[1][1];
2479     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2480     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2481     motion_x<<=(3-s->sprite_warping_accuracy);
2482     motion_y<<=(3-s->sprite_warping_accuracy);
2483     src_x = clip(src_x, -8, s->width>>1);
2484     if (src_x == s->width>>1)
2485         motion_x =0;
2486     src_y = clip(src_y, -8, s->height>>1);
2487     if (src_y == s->height>>1)
2488         motion_y =0;
2489
2490     offset = (src_y * uvlinesize) + src_x;
2491     ptr = ref_picture[1] + offset;
2492     if(s->flags&CODEC_FLAG_EMU_EDGE){
2493         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2494            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2495             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2496             ptr= s->edge_emu_buffer;
2497             emu=1;
2498         }
2499     }
2500     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2501
2502     ptr = ref_picture[2] + offset;
2503     if(emu){
2504         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2505         ptr= s->edge_emu_buffer;
2506     }
2507     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2508
2509     return;
2510 }
2511
2512 static inline void gmc_motion(MpegEncContext *s,
2513                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2514                                uint8_t **ref_picture)
2515 {
2516     uint8_t *ptr;
2517     int linesize, uvlinesize;
2518     const int a= s->sprite_warping_accuracy;
2519     int ox, oy;
2520
2521     linesize = s->linesize;
2522     uvlinesize = s->uvlinesize;
2523
2524     ptr = ref_picture[0];
2525
2526     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2527     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2528
2529     s->dsp.gmc(dest_y, ptr, linesize, 16,
2530            ox,
2531            oy,
2532            s->sprite_delta[0][0], s->sprite_delta[0][1],
2533            s->sprite_delta[1][0], s->sprite_delta[1][1],
2534            a+1, (1<<(2*a+1)) - s->no_rounding,
2535            s->h_edge_pos, s->v_edge_pos);
2536     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2537            ox + s->sprite_delta[0][0]*8,
2538            oy + s->sprite_delta[1][0]*8,
2539            s->sprite_delta[0][0], s->sprite_delta[0][1],
2540            s->sprite_delta[1][0], s->sprite_delta[1][1],
2541            a+1, (1<<(2*a+1)) - s->no_rounding,
2542            s->h_edge_pos, s->v_edge_pos);
2543
2544     if(s->flags&CODEC_FLAG_GRAY) return;
2545
2546     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2547     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2548
2549     ptr = ref_picture[1];
2550     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2551            ox,
2552            oy,
2553            s->sprite_delta[0][0], s->sprite_delta[0][1],
2554            s->sprite_delta[1][0], s->sprite_delta[1][1],
2555            a+1, (1<<(2*a+1)) - s->no_rounding,
2556            s->h_edge_pos>>1, s->v_edge_pos>>1);
2557
2558     ptr = ref_picture[2];
2559     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2560            ox,
2561            oy,
2562            s->sprite_delta[0][0], s->sprite_delta[0][1],
2563            s->sprite_delta[1][0], s->sprite_delta[1][1],
2564            a+1, (1<<(2*a+1)) - s->no_rounding,
2565            s->h_edge_pos>>1, s->v_edge_pos>>1);
2566 }
2567
2568 /**
2569  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2570  * @param buf destination buffer
2571  * @param src source buffer
2572  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2573  * @param block_w width of block
2574  * @param block_h height of block
2575  * @param src_x x coordinate of the top left sample of the block in the source buffer
2576  * @param src_y y coordinate of the top left sample of the block in the source buffer
2577  * @param w width of the source buffer
2578  * @param h height of the source buffer
2579  */
2580 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2581                                     int src_x, int src_y, int w, int h){
2582     int x, y;
2583     int start_y, start_x, end_y, end_x;
2584
2585     if(src_y>= h){
2586         src+= (h-1-src_y)*linesize;
2587         src_y=h-1;
2588     }else if(src_y<=-block_h){
2589         src+= (1-block_h-src_y)*linesize;
2590         src_y=1-block_h;
2591     }
2592     if(src_x>= w){
2593         src+= (w-1-src_x);
2594         src_x=w-1;
2595     }else if(src_x<=-block_w){
2596         src+= (1-block_w-src_x);
2597         src_x=1-block_w;
2598     }
2599
2600     start_y= FFMAX(0, -src_y);
2601     start_x= FFMAX(0, -src_x);
2602     end_y= FFMIN(block_h, h-src_y);
2603     end_x= FFMIN(block_w, w-src_x);
2604
2605     // copy existing part
2606     for(y=start_y; y<end_y; y++){
2607         for(x=start_x; x<end_x; x++){
2608             buf[x + y*linesize]= src[x + y*linesize];
2609         }
2610     }
2611
2612     //top
2613     for(y=0; y<start_y; y++){
2614         for(x=start_x; x<end_x; x++){
2615             buf[x + y*linesize]= buf[x + start_y*linesize];
2616         }
2617     }
2618
2619     //bottom
2620     for(y=end_y; y<block_h; y++){
2621         for(x=start_x; x<end_x; x++){
2622             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2623         }
2624     }
2625
2626     for(y=0; y<block_h; y++){
2627        //left
2628         for(x=0; x<start_x; x++){
2629             buf[x + y*linesize]= buf[start_x + y*linesize];
2630         }
2631
2632        //right
2633         for(x=end_x; x<block_w; x++){
2634             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2635         }
2636     }
2637 }
2638
2639 static inline int hpel_motion(MpegEncContext *s,
2640                                   uint8_t *dest, uint8_t *src,
2641                                   int field_based, int field_select,
2642                                   int src_x, int src_y,
2643                                   int width, int height, int stride,
2644                                   int h_edge_pos, int v_edge_pos,
2645                                   int w, int h, op_pixels_func *pix_op,
2646                                   int motion_x, int motion_y)
2647 {
2648     int dxy;
2649     int emu=0;
2650
2651     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2652     src_x += motion_x >> 1;
2653     src_y += motion_y >> 1;
2654
2655     /* WARNING: do no forget half pels */
2656     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2657     if (src_x == width)
2658         dxy &= ~1;
2659     src_y = clip(src_y, -16, height);
2660     if (src_y == height)
2661         dxy &= ~2;
2662     src += src_y * stride + src_x;
2663
2664     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2665         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2666            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2667             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2668                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2669             src= s->edge_emu_buffer;
2670             emu=1;
2671         }
2672     }
2673     if(field_select)
2674         src += s->linesize;
2675     pix_op[dxy](dest, src, stride, h);
2676     return emu;
2677 }
2678
2679 static inline int hpel_motion_lowres(MpegEncContext *s,
2680                                   uint8_t *dest, uint8_t *src,
2681                                   int field_based, int field_select,
2682                                   int src_x, int src_y,
2683                                   int width, int height, int stride,
2684                                   int h_edge_pos, int v_edge_pos,
2685                                   int w, int h, h264_chroma_mc_func *pix_op,
2686                                   int motion_x, int motion_y)
2687 {
2688     const int lowres= s->avctx->lowres;
2689     const int s_mask= (2<<lowres)-1;
2690     int emu=0;
2691     int sx, sy;
2692
2693     if(s->quarter_sample){
2694         motion_x/=2;
2695         motion_y/=2;
2696     }
2697
2698     sx= motion_x & s_mask;
2699     sy= motion_y & s_mask;
2700     src_x += motion_x >> (lowres+1);
2701     src_y += motion_y >> (lowres+1);
2702
2703     src += src_y * stride + src_x;
2704
2705     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2706        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2707         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2708                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2709         src= s->edge_emu_buffer;
2710         emu=1;
2711     }
2712
2713     sx <<= 2 - lowres;
2714     sy <<= 2 - lowres;
2715     if(field_select)
2716         src += s->linesize;
2717     pix_op[lowres](dest, src, stride, h, sx, sy);
2718     return emu;
2719 }
2720
2721 /* apply one mpeg motion vector to the three components */
2722 static always_inline void mpeg_motion(MpegEncContext *s,
2723                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2724                                int field_based, int bottom_field, int field_select,
2725                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2726                                int motion_x, int motion_y, int h)
2727 {
2728     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2729     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2730
2731 #if 0
2732 if(s->quarter_sample)
2733 {
2734     motion_x>>=1;
2735     motion_y>>=1;
2736 }
2737 #endif
2738
2739     v_edge_pos = s->v_edge_pos >> field_based;
2740     linesize   = s->current_picture.linesize[0] << field_based;
2741     uvlinesize = s->current_picture.linesize[1] << field_based;
2742
2743     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2744     src_x = s->mb_x* 16               + (motion_x >> 1);
2745     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2746
2747     if (s->out_format == FMT_H263) {
2748         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2749             mx = (motion_x>>1)|(motion_x&1);
2750             my = motion_y >>1;
2751             uvdxy = ((my & 1) << 1) | (mx & 1);
2752             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2753             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2754         }else{
2755             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2756             uvsrc_x = src_x>>1;
2757             uvsrc_y = src_y>>1;
2758         }
2759     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2760         mx = motion_x / 4;
2761         my = motion_y / 4;
2762         uvdxy = 0;
2763         uvsrc_x = s->mb_x*8 + mx;
2764         uvsrc_y = s->mb_y*8 + my;
2765     } else {
2766         if(s->chroma_y_shift){
2767             mx = motion_x / 2;
2768             my = motion_y / 2;
2769             uvdxy = ((my & 1) << 1) | (mx & 1);
2770             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2771             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2772         } else {
2773             if(s->chroma_x_shift){
2774             //Chroma422
2775                 mx = motion_x / 2;
2776                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2777                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2778                 uvsrc_y = src_y;
2779             } else {
2780             //Chroma444
2781                 uvdxy = dxy;
2782                 uvsrc_x = src_x;
2783                 uvsrc_y = src_y;
2784             }
2785         }
2786     }
2787
2788     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2789     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2790     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2791
2792     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2793        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2794             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2795                s->codec_id == CODEC_ID_MPEG1VIDEO){
2796                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2797                 return ;
2798             }
2799             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2800                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2801             ptr_y = s->edge_emu_buffer;
2802             if(!(s->flags&CODEC_FLAG_GRAY)){
2803                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2804                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2805                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2806                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2807                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2808                 ptr_cb= uvbuf;
2809                 ptr_cr= uvbuf+16;
2810             }
2811     }
2812
2813     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2814         dest_y += s->linesize;
2815         dest_cb+= s->uvlinesize;
2816         dest_cr+= s->uvlinesize;
2817     }
2818
2819     if(field_select){
2820         ptr_y += s->linesize;
2821         ptr_cb+= s->uvlinesize;
2822         ptr_cr+= s->uvlinesize;
2823     }
2824
2825     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2826
2827     if(!(s->flags&CODEC_FLAG_GRAY)){
2828         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2829         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2830     }
2831 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2832     if(s->out_format == FMT_H261){
2833         ff_h261_loop_filter(s);
2834     }
2835 #endif
2836 }
2837
2838 /* apply one mpeg motion vector to the three components */
2839 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2840                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2841                                int field_based, int bottom_field, int field_select,
2842                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2843                                int motion_x, int motion_y, int h)
2844 {
2845     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2846     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2847     const int lowres= s->avctx->lowres;
2848     const int block_s= 8>>lowres;
2849     const int s_mask= (2<<lowres)-1;
2850     const int h_edge_pos = s->h_edge_pos >> lowres;
2851     const int v_edge_pos = s->v_edge_pos >> lowres;
2852     linesize   = s->current_picture.linesize[0] << field_based;
2853     uvlinesize = s->current_picture.linesize[1] << field_based;
2854
2855     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
2856         motion_x/=2;
2857         motion_y/=2;
2858     }
2859
2860     if(field_based){
2861         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
2862     }
2863
2864     sx= motion_x & s_mask;
2865     sy= motion_y & s_mask;
2866     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
2867     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
2868
2869     if (s->out_format == FMT_H263) {
2870         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
2871         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
2872         uvsrc_x = src_x>>1;
2873         uvsrc_y = src_y>>1;
2874     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2875         mx = motion_x / 4;
2876         my = motion_y / 4;
2877         uvsx = (2*mx) & s_mask;
2878         uvsy = (2*my) & s_mask;
2879         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
2880         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
2881     } else {
2882         mx = motion_x / 2;
2883         my = motion_y / 2;
2884         uvsx = mx & s_mask;
2885         uvsy = my & s_mask;
2886         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
2887         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
2888     }
2889
2890     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2891     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2892     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2893
2894     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
2895        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2896             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2897                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2898             ptr_y = s->edge_emu_buffer;
2899             if(!(s->flags&CODEC_FLAG_GRAY)){
2900                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2901                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2902                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2903                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2904                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2905                 ptr_cb= uvbuf;
2906                 ptr_cr= uvbuf+16;
2907             }
2908     }
2909
2910     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2911         dest_y += s->linesize;
2912         dest_cb+= s->uvlinesize;
2913         dest_cr+= s->uvlinesize;
2914     }
2915
2916     if(field_select){
2917         ptr_y += s->linesize;
2918         ptr_cb+= s->uvlinesize;
2919         ptr_cr+= s->uvlinesize;
2920     }
2921
2922     sx <<= 2 - lowres;
2923     sy <<= 2 - lowres;
2924     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
2925
2926     if(!(s->flags&CODEC_FLAG_GRAY)){
2927         uvsx <<= 2 - lowres;
2928         uvsy <<= 2 - lowres;
2929         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2930         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2931     }
2932     //FIXME h261 lowres loop filter
2933 }
2934
2935 //FIXME move to dsputil, avg variant, 16x16 version
2936 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2937     int x;
2938     uint8_t * const top   = src[1];
2939     uint8_t * const left  = src[2];
2940     uint8_t * const mid   = src[0];
2941     uint8_t * const right = src[3];
2942     uint8_t * const bottom= src[4];
2943 #define OBMC_FILTER(x, t, l, m, r, b)\
2944     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2945 #define OBMC_FILTER4(x, t, l, m, r, b)\
2946     OBMC_FILTER(x         , t, l, m, r, b);\
2947     OBMC_FILTER(x+1       , t, l, m, r, b);\
2948     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2949     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2950
2951     x=0;
2952     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2953     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2954     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2955     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2956     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2957     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2958     x+= stride;
2959     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2960     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2961     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2962     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2963     x+= stride;
2964     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2965     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2966     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2967     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2968     x+= 2*stride;
2969     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2970     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2971     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2972     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2973     x+= 2*stride;
2974     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2975     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2976     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2977     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2978     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2979     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2980     x+= stride;
2981     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2982     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2983     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2984     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2985 }
2986
2987 /* obmc for 1 8x8 luma block */
2988 static inline void obmc_motion(MpegEncContext *s,
2989                                uint8_t *dest, uint8_t *src,
2990                                int src_x, int src_y,
2991                                op_pixels_func *pix_op,
2992                                int16_t mv[5][2]/* mid top left right bottom*/)
2993 #define MID    0
2994 {
2995     int i;
2996     uint8_t *ptr[5];
2997
2998     assert(s->quarter_sample==0);
2999
3000     for(i=0; i<5; i++){
3001         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3002             ptr[i]= ptr[MID];
3003         }else{
3004             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3005             hpel_motion(s, ptr[i], src, 0, 0,
3006                         src_x, src_y,
3007                         s->width, s->height, s->linesize,
3008                         s->h_edge_pos, s->v_edge_pos,
3009                         8, 8, pix_op,
3010                         mv[i][0], mv[i][1]);
3011         }
3012     }
3013
3014     put_obmc(dest, ptr, s->linesize);
3015 }
3016
3017 static inline void qpel_motion(MpegEncContext *s,
3018                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3019                                int field_based, int bottom_field, int field_select,
3020                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3021                                qpel_mc_func (*qpix_op)[16],
3022                                int motion_x, int motion_y, int h)
3023 {
3024     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3025     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3026
3027     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3028     src_x = s->mb_x *  16                 + (motion_x >> 2);
3029     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3030
3031     v_edge_pos = s->v_edge_pos >> field_based;
3032     linesize = s->linesize << field_based;
3033     uvlinesize = s->uvlinesize << field_based;
3034
3035     if(field_based){
3036         mx= motion_x/2;
3037         my= motion_y>>1;
3038     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3039         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3040         mx= (motion_x>>1) + rtab[motion_x&7];
3041         my= (motion_y>>1) + rtab[motion_y&7];
3042     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3043         mx= (motion_x>>1)|(motion_x&1);
3044         my= (motion_y>>1)|(motion_y&1);
3045     }else{
3046         mx= motion_x/2;
3047         my= motion_y/2;
3048     }
3049     mx= (mx>>1)|(mx&1);
3050     my= (my>>1)|(my&1);
3051
3052     uvdxy= (mx&1) | ((my&1)<<1);
3053     mx>>=1;
3054     my>>=1;
3055
3056     uvsrc_x = s->mb_x *  8                 + mx;
3057     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3058
3059     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3060     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3061     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3062
3063     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3064        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3065         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3066                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3067         ptr_y= s->edge_emu_buffer;
3068         if(!(s->flags&CODEC_FLAG_GRAY)){
3069             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3070             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3071                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3072             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3073                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3074             ptr_cb= uvbuf;
3075             ptr_cr= uvbuf + 16;
3076         }
3077     }
3078
3079     if(!field_based)
3080         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3081     else{
3082         if(bottom_field){
3083             dest_y += s->linesize;
3084             dest_cb+= s->uvlinesize;
3085             dest_cr+= s->uvlinesize;
3086         }
3087
3088         if(field_select){
3089             ptr_y  += s->linesize;
3090             ptr_cb += s->uvlinesize;
3091             ptr_cr += s->uvlinesize;
3092         }
3093         //damn interlaced mode
3094         //FIXME boundary mirroring is not exactly correct here
3095         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3096         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3097     }
3098     if(!(s->flags&CODEC_FLAG_GRAY)){
3099         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3100         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3101     }
3102 }
3103
3104 inline int ff_h263_round_chroma(int x){
3105     if (x >= 0)
3106         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3107     else {
3108         x = -x;
3109         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3110     }
3111 }
3112
3113 /**
3114  * h263 chorma 4mv motion compensation.
3115  */
3116 static inline void chroma_4mv_motion(MpegEncContext *s,
3117                                      uint8_t *dest_cb, uint8_t *dest_cr,
3118                                      uint8_t **ref_picture,
3119                                      op_pixels_func *pix_op,
3120                                      int mx, int my){
3121     int dxy, emu=0, src_x, src_y, offset;
3122     uint8_t *ptr;
3123
3124     /* In case of 8X8, we construct a single chroma motion vector
3125        with a special rounding */
3126     mx= ff_h263_round_chroma(mx);
3127     my= ff_h263_round_chroma(my);
3128
3129     dxy = ((my & 1) << 1) | (mx & 1);
3130     mx >>= 1;
3131     my >>= 1;
3132
3133     src_x = s->mb_x * 8 + mx;
3134     src_y = s->mb_y * 8 + my;
3135     src_x = clip(src_x, -8, s->width/2);
3136     if (src_x == s->width/2)
3137         dxy &= ~1;
3138     src_y = clip(src_y, -8, s->height/2);
3139     if (src_y == s->height/2)
3140         dxy &= ~2;
3141
3142     offset = (src_y * (s->uvlinesize)) + src_x;
3143     ptr = ref_picture[1] + offset;
3144     if(s->flags&CODEC_FLAG_EMU_EDGE){
3145         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3146            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3147             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3148             ptr= s->edge_emu_buffer;
3149             emu=1;
3150         }
3151     }
3152     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3153
3154     ptr = ref_picture[2] + offset;
3155     if(emu){
3156         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3157         ptr= s->edge_emu_buffer;
3158     }
3159     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3160 }
3161
3162 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3163                                      uint8_t *dest_cb, uint8_t *dest_cr,
3164                                      uint8_t **ref_picture,
3165                                      h264_chroma_mc_func *pix_op,
3166                                      int mx, int my){
3167     const int lowres= s->avctx->lowres;
3168     const int block_s= 8>>lowres;
3169     const int s_mask= (2<<lowres)-1;
3170     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3171     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3172     int emu=0, src_x, src_y, offset, sx, sy;
3173     uint8_t *ptr;
3174
3175     if(s->quarter_sample){
3176         mx/=2;
3177         my/=2;
3178     }
3179
3180     /* In case of 8X8, we construct a single chroma motion vector
3181        with a special rounding */
3182     mx= ff_h263_round_chroma(mx);
3183     my= ff_h263_round_chroma(my);
3184
3185     sx= mx & s_mask;
3186     sy= my & s_mask;
3187     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3188     src_y = s->mb_y*block_s + (my >> (lowres+1));
3189
3190     offset = src_y * s->uvlinesize + src_x;
3191     ptr = ref_picture[1] + offset;
3192     if(s->flags&CODEC_FLAG_EMU_EDGE){
3193         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3194            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3195             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3196             ptr= s->edge_emu_buffer;
3197             emu=1;
3198         }
3199     }
3200     sx <<= 2 - lowres;
3201     sy <<= 2 - lowres;
3202     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3203
3204     ptr = ref_picture[2] + offset;
3205     if(emu){
3206         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3207         ptr= s->edge_emu_buffer;
3208     }
3209     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3210 }
3211
3212 /**
3213  * motion compensation of a single macroblock
3214  * @param s context
3215  * @param dest_y luma destination pointer
3216  * @param dest_cb chroma cb/u destination pointer
3217  * @param dest_cr chroma cr/v destination pointer
3218  * @param dir direction (0->forward, 1->backward)
3219  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3220  * @param pic_op halfpel motion compensation function (average or put normally)
3221  * @param pic_op qpel motion compensation function (average or put normally)
3222  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3223  */
3224 static inline void MPV_motion(MpegEncContext *s,
3225                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3226                               int dir, uint8_t **ref_picture,
3227                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3228 {
3229     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3230     int mb_x, mb_y, i;
3231     uint8_t *ptr, *dest;
3232
3233     mb_x = s->mb_x;
3234     mb_y = s->mb_y;
3235
3236     if(s->obmc && s->pict_type != B_TYPE){
3237         int16_t mv_cache[4][4][2];
3238         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3239         const int mot_stride= s->b8_stride;
3240         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3241
3242         assert(!s->mb_skipped);
3243
3244         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3245         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3246         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3247
3248         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3249             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3250         }else{
3251             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3252         }
3253
3254         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3255             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3256             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3257         }else{
3258             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3259             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3260         }
3261
3262         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3263             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3264             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3265         }else{
3266             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3267             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3268         }
3269
3270         mx = 0;
3271         my = 0;
3272         for(i=0;i<4;i++) {
3273             const int x= (i&1)+1;
3274             const int y= (i>>1)+1;
3275             int16_t mv[5][2]= {
3276                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3277                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3278                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3279                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3280                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3281             //FIXME cleanup
3282             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3283                         ref_picture[0],
3284                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3285                         pix_op[1],
3286                         mv);
3287
3288             mx += mv[0][0];
3289             my += mv[0][1];
3290         }
3291         if(!(s->flags&CODEC_FLAG_GRAY))
3292             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3293
3294         return;
3295     }
3296
3297     switch(s->mv_type) {
3298     case MV_TYPE_16X16:
3299         if(s->mcsel){
3300             if(s->real_sprite_warping_points==1){
3301                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3302                             ref_picture);
3303             }else{
3304                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3305                             ref_picture);
3306             }
3307         }else if(s->quarter_sample){
3308             qpel_motion(s, dest_y, dest_cb, dest_cr,
3309                         0, 0, 0,
3310                         ref_picture, pix_op, qpix_op,
3311                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3312         }else if(s->mspel){
3313             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3314                         ref_picture, pix_op,
3315                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3316         }else
3317         {
3318             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3319                         0, 0, 0,
3320                         ref_picture, pix_op,
3321                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3322         }
3323         break;
3324     case MV_TYPE_8X8:
3325         mx = 0;
3326         my = 0;
3327         if(s->quarter_sample){
3328             for(i=0;i<4;i++) {
3329                 motion_x = s->mv[dir][i][0];
3330                 motion_y = s->mv[dir][i][1];
3331
3332                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3333                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3334                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3335
3336                 /* WARNING: do no forget half pels */
3337                 src_x = clip(src_x, -16, s->width);
3338                 if (src_x == s->width)
3339                     dxy &= ~3;
3340                 src_y = clip(src_y, -16, s->height);
3341                 if (src_y == s->height)
3342                     dxy &= ~12;
3343
3344                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3345                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3346                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3347                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3348                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3349                         ptr= s->edge_emu_buffer;
3350                     }
3351                 }
3352                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3353                 qpix_op[1][dxy](dest, ptr, s->linesize);
3354
3355                 mx += s->mv[dir][i][0]/2;
3356                 my += s->mv[dir][i][1]/2;
3357             }
3358         }else{
3359             for(i=0;i<4;i++) {
3360                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3361                             ref_picture[0], 0, 0,
3362                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3363                             s->width, s->height, s->linesize,
3364                             s->h_edge_pos, s->v_edge_pos,
3365                             8, 8, pix_op[1],
3366                             s->mv[dir][i][0], s->mv[dir][i][1]);
3367
3368                 mx += s->mv[dir][i][0];
3369                 my += s->mv[dir][i][1];
3370             }
3371         }
3372
3373         if(!(s->flags&CODEC_FLAG_GRAY))
3374             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3375         break;
3376     case MV_TYPE_FIELD:
3377         if (s->picture_structure == PICT_FRAME) {
3378             if(s->quarter_sample){
3379                 for(i=0; i<2; i++){
3380                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3381                                 1, i, s->field_select[dir][i],
3382                                 ref_picture, pix_op, qpix_op,
3383                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3384                 }
3385             }else{
3386                 /* top field */
3387                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3388                             1, 0, s->field_select[dir][0],
3389                             ref_picture, pix_op,
3390                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3391                 /* bottom field */
3392                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3393                             1, 1, s->field_select[dir][1],
3394                             ref_picture, pix_op,
3395                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3396             }
3397         } else {
3398             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3399                 ref_picture= s->current_picture_ptr->data;
3400             }
3401
3402             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3403                         0, 0, s->field_select[dir][0],
3404                         ref_picture, pix_op,
3405                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3406         }
3407         break;
3408     case MV_TYPE_16X8:
3409         for(i=0; i<2; i++){
3410             uint8_t ** ref2picture;
3411
3412             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3413                 ref2picture= ref_picture;
3414             }else{
3415                 ref2picture= s->current_picture_ptr->data;
3416             }
3417
3418             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3419                         0, 0, s->field_select[dir][i],
3420                         ref2picture, pix_op,
3421                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3422
3423             dest_y += 16*s->linesize;
3424             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3425             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3426         }
3427         break;
3428     case MV_TYPE_DMV:
3429         if(s->picture_structure == PICT_FRAME){
3430             for(i=0; i<2; i++){
3431                 int j;
3432                 for(j=0; j<2; j++){
3433                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3434                                 1, j, j^i,
3435                                 ref_picture, pix_op,
3436                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3437                 }
3438                 pix_op = s->dsp.avg_pixels_tab;
3439             }
3440         }else{
3441             for(i=0; i<2; i++){
3442                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3443                             0, 0, s->picture_structure != i+1,
3444                             ref_picture, pix_op,
3445                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3446
3447                 // after put we make avg of the same block
3448                 pix_op=s->dsp.avg_pixels_tab;
3449
3450                 //opposite parity is always in the same frame if this is second field
3451                 if(!s->first_field){
3452                     ref_picture = s->current_picture_ptr->data;
3453                 }
3454             }
3455         }
3456     break;
3457     default: assert(0);
3458     }
3459 }
3460
3461 /**
3462  * motion compensation of a single macroblock
3463  * @param s context
3464  * @param dest_y luma destination pointer
3465  * @param dest_cb chroma cb/u destination pointer
3466  * @param dest_cr chroma cr/v destination pointer
3467  * @param dir direction (0->forward, 1->backward)
3468  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3469  * @param pic_op halfpel motion compensation function (average or put normally)
3470  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3471  */
3472 static inline void MPV_motion_lowres(MpegEncContext *s,
3473                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3474                               int dir, uint8_t **ref_picture,
3475                               h264_chroma_mc_func *pix_op)
3476 {
3477     int mx, my;
3478     int mb_x, mb_y, i;
3479     const int lowres= s->avctx->lowres;
3480     const int block_s= 8>>lowres;
3481
3482     mb_x = s->mb_x;
3483     mb_y = s->mb_y;
3484
3485     switch(s->mv_type) {
3486     case MV_TYPE_16X16:
3487         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3488                     0, 0, 0,
3489                     ref_picture, pix_op,
3490                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3491         break;
3492     case MV_TYPE_8X8:
3493         mx = 0;
3494         my = 0;
3495             for(i=0;i<4;i++) {
3496                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3497                             ref_picture[0], 0, 0,
3498                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3499                             s->width, s->height, s->linesize,
3500                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3501                             block_s, block_s, pix_op,
3502                             s->mv[dir][i][0], s->mv[dir][i][1]);
3503
3504                 mx += s->mv[dir][i][0];
3505                 my += s->mv[dir][i][1];
3506             }
3507
3508         if(!(s->flags&CODEC_FLAG_GRAY))
3509             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3510         break;
3511     case MV_TYPE_FIELD:
3512         if (s->picture_structure == PICT_FRAME) {
3513             /* top field */
3514             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3515                         1, 0, s->field_select[dir][0],
3516                         ref_picture, pix_op,
3517                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3518             /* bottom field */
3519             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3520                         1, 1, s->field_select[dir][1],
3521                         ref_picture, pix_op,
3522                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3523         } else {
3524             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3525                 ref_picture= s->current_picture_ptr->data;
3526             }
3527
3528             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3529                         0, 0, s->field_select[dir][0],
3530                         ref_picture, pix_op,
3531                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3532         }
3533         break;
3534     case MV_TYPE_16X8:
3535         for(i=0; i<2; i++){
3536             uint8_t ** ref2picture;
3537
3538             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3539                 ref2picture= ref_picture;
3540             }else{
3541                 ref2picture= s->current_picture_ptr->data;
3542             }
3543
3544             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3545                         0, 0, s->field_select[dir][i],
3546                         ref2picture, pix_op,
3547                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3548
3549             dest_y += 2*block_s*s->linesize;
3550             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3551             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3552         }
3553         break;
3554     case MV_TYPE_DMV:
3555         if(s->picture_structure == PICT_FRAME){
3556             for(i=0; i<2; i++){
3557                 int j;
3558                 for(j=0; j<2; j++){
3559                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3560                                 1, j, j^i,
3561                                 ref_picture, pix_op,
3562                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3563                 }
3564                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3565             }
3566         }else{
3567             for(i=0; i<2; i++){
3568                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3569                             0, 0, s->picture_structure != i+1,
3570                             ref_picture, pix_op,
3571                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3572
3573                 // after put we make avg of the same block
3574                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3575
3576                 //opposite parity is always in the same frame if this is second field
3577                 if(!s->first_field){
3578                     ref_picture = s->current_picture_ptr->data;
3579                 }
3580             }
3581         }
3582     break;
3583     default: assert(0);
3584     }
3585 }
3586
3587 /* put block[] to dest[] */
3588 static inline void put_dct(MpegEncContext *s,
3589                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3590 {
3591     s->dct_unquantize_intra(s, block, i, qscale);
3592     s->dsp.idct_put (dest, line_size, block);
3593 }
3594
3595 /* add block[] to dest[] */
3596 static inline void add_dct(MpegEncContext *s,
3597                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3598 {
3599     if (s->block_last_index[i] >= 0) {
3600         s->dsp.idct_add (dest, line_size, block);
3601     }
3602 }
3603
3604 static inline void add_dequant_dct(MpegEncContext *s,
3605                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3606 {
3607     if (s->block_last_index[i] >= 0) {
3608         s->dct_unquantize_inter(s, block, i, qscale);
3609
3610         s->dsp.idct_add (dest, line_size, block);
3611     }
3612 }
3613
3614 /**
3615  * cleans dc, ac, coded_block for the current non intra MB
3616  */
3617 void ff_clean_intra_table_entries(MpegEncContext *s)
3618 {
3619     int wrap = s->b8_stride;
3620     int xy = s->block_index[0];
3621
3622     s->dc_val[0][xy           ] =
3623     s->dc_val[0][xy + 1       ] =
3624     s->dc_val[0][xy     + wrap] =
3625     s->dc_val[0][xy + 1 + wrap] = 1024;
3626     /* ac pred */
3627     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3628     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3629     if (s->msmpeg4_version>=3) {
3630         s->coded_block[xy           ] =
3631         s->coded_block[xy + 1       ] =
3632         s->coded_block[xy     + wrap] =
3633         s->coded_block[xy + 1 + wrap] = 0;
3634     }
3635     /* chroma */
3636     wrap = s->mb_stride;
3637     xy = s->mb_x + s->mb_y * wrap;
3638     s->dc_val[1][xy] =
3639     s->dc_val[2][xy] = 1024;
3640     /* ac pred */
3641     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3642     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3643
3644     s->mbintra_table[xy]= 0;
3645 }
3646
3647 /* generic function called after a macroblock has been parsed by the
3648    decoder or after it has been encoded by the encoder.
3649
3650    Important variables used:
3651    s->mb_intra : true if intra macroblock
3652    s->mv_dir   : motion vector direction
3653    s->mv_type  : motion vector type
3654    s->mv       : motion vector
3655    s->interlaced_dct : true if interlaced dct used (mpeg2)
3656  */
3657 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3658 {
3659     int mb_x, mb_y;
3660     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3661 #ifdef HAVE_XVMC
3662     if(s->avctx->xvmc_acceleration){
3663         XVMC_decode_mb(s);//xvmc uses pblocks
3664         return;
3665     }
3666 #endif
3667
3668     mb_x = s->mb_x;
3669     mb_y = s->mb_y;
3670
3671     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3672        /* save DCT coefficients */
3673        int i,j;
3674        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3675        for(i=0; i<6; i++)
3676            for(j=0; j<64; j++)
3677                *dct++ = block[i][s->dsp.idct_permutation[j]];
3678     }
3679
3680     s->current_picture.qscale_table[mb_xy]= s->qscale;
3681
3682     /* update DC predictors for P macroblocks */
3683     if (!s->mb_intra) {
3684         if (s->h263_pred || s->h263_aic) {
3685             if(s->mbintra_table[mb_xy])
3686                 ff_clean_intra_table_entries(s);
3687         } else {
3688             s->last_dc[0] =
3689             s->last_dc[1] =
3690             s->last_dc[2] = 128 << s->intra_dc_precision;
3691         }
3692     }
3693     else if (s->h263_pred || s->h263_aic)
3694         s->mbintra_table[mb_xy]=1;
3695
3696     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3697         uint8_t *dest_y, *dest_cb, *dest_cr;
3698         int dct_linesize, dct_offset;
3699         op_pixels_func (*op_pix)[4];
3700         qpel_mc_func (*op_qpix)[16];
3701         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3702         const int uvlinesize= s->current_picture.linesize[1];
3703         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3704         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3705
3706         /* avoid copy if macroblock skipped in last frame too */
3707         /* skip only during decoding as we might trash the buffers during encoding a bit */
3708         if(!s->encoding){
3709             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3710             const int age= s->current_picture.age;
3711
3712             assert(age);
3713
3714             if (s->mb_skipped) {
3715                 s->mb_skipped= 0;
3716                 assert(s->pict_type!=I_TYPE);
3717
3718                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3719                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3720
3721                 /* if previous was skipped too, then nothing to do !  */
3722                 if (*mbskip_ptr >= age && s->current_picture.reference){
3723                     return;
3724                 }
3725             } else if(!s->current_picture.reference){
3726                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3727                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3728             } else{
3729                 *mbskip_ptr = 0; /* not skipped */
3730             }
3731         }
3732
3733         dct_linesize = linesize << s->interlaced_dct;
3734         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3735
3736         if(readable){
3737             dest_y=  s->dest[0];
3738             dest_cb= s->dest[1];
3739             dest_cr= s->dest[2];
3740         }else{
3741             dest_y = s->b_scratchpad;
3742             dest_cb= s->b_scratchpad+16*linesize;
3743             dest_cr= s->b_scratchpad+32*linesize;
3744         }
3745
3746         if (!s->mb_intra) {
3747             /* motion handling */
3748             /* decoding or more than one mb_type (MC was already done otherwise) */
3749             if(!s->encoding){
3750                 if(lowres_flag){
3751                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3752
3753                     if (s->mv_dir & MV_DIR_FORWARD) {
3754                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3755                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3756                     }
3757                     if (s->mv_dir & MV_DIR_BACKWARD) {
3758                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3759                     }
3760                 }else{
3761                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3762                         op_pix = s->dsp.put_pixels_tab;
3763                         op_qpix= s->dsp.put_qpel_pixels_tab;
3764                     }else{
3765                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3766                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3767                     }
3768                     if (s->mv_dir & MV_DIR_FORWARD) {
3769                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3770                         op_pix = s->dsp.avg_pixels_tab;
3771                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3772                     }
3773                     if (s->mv_dir & MV_DIR_BACKWARD) {
3774                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3775                     }
3776                 }
3777             }
3778
3779             /* skip dequant / idct if we are really late ;) */
3780             if(s->hurry_up>1) goto skip_idct;
3781             if(s->avctx->skip_idct){
3782                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3783                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3784                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3785                     goto skip_idct;
3786             }
3787
3788             /* add dct residue */
3789             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3790                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3791                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3792                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3793                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3794                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3795
3796                 if(!(s->flags&CODEC_FLAG_GRAY)){
3797                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3798                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3799                 }
3800             } else if(s->codec_id != CODEC_ID_WMV2){
3801                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3802                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3803                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3804                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3805
3806                 if(!(s->flags&CODEC_FLAG_GRAY)){
3807                     if(s->chroma_y_shift){//Chroma420
3808                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3809                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3810                     }else{
3811                         //chroma422
3812                         dct_linesize = uvlinesize << s->interlaced_dct;
3813                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3814
3815                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3816                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3817                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3818                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3819                         if(!s->chroma_x_shift){//Chroma444
3820                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3821                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3822                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3823                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3824                         }
3825                     }
3826                 }//fi gray
3827             }
3828             else{
3829                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3830             }
3831         } else {
3832             /* dct only in intra block */
3833             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3834                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3835                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3836                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3837                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3838
3839                 if(!(s->flags&CODEC_FLAG_GRAY)){
3840                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3841                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3842                 }
3843             }else{
3844                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3845                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3846                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3847                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3848
3849                 if(!(s->flags&CODEC_FLAG_GRAY)){
3850                     if(s->chroma_y_shift){
3851                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3852                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3853                     }else{
3854
3855                         dct_linesize = uvlinesize << s->interlaced_dct;
3856                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3857
3858                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3859                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3860                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3861                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3862                         if(!s->chroma_x_shift){//Chroma444
3863                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3864                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3865                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3866                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3867                         }
3868                     }
3869                 }//gray
3870             }
3871         }
3872 skip_idct:
3873         if(!readable){
3874             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3875             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3876             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3877         }
3878     }
3879 }
3880
3881 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
3882     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
3883     else                  MPV_decode_mb_internal(s, block, 0);
3884 }
3885
3886 #ifdef CONFIG_ENCODERS
3887
3888 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3889 {
3890     static const char tab[64]=
3891         {3,2,2,1,1,1,1,1,
3892          1,1,1,1,1,1,1,1,
3893          1,1,1,1,1,1,1,1,
3894          0,0,0,0,0,0,0,0,
3895          0,0,0,0,0,0,0,0,
3896          0,0,0,0,0,0,0,0,
3897          0,0,0,0,0,0,0,0,
3898          0,0,0,0,0,0,0,0};
3899     int score=0;
3900     int run=0;
3901     int i;
3902     DCTELEM *block= s->block[n];
3903     const int last_index= s->block_last_index[n];
3904     int skip_dc;
3905
3906     if(threshold<0){
3907         skip_dc=0;
3908         threshold= -threshold;
3909     }else
3910         skip_dc=1;
3911
3912     /* are all which we could set to zero are allready zero? */
3913     if(last_index<=skip_dc - 1) return;
3914
3915     for(i=0; i<=last_index; i++){
3916         const int j = s->intra_scantable.permutated[i];
3917         const int level = ABS(block[j]);
3918         if(level==1){
3919             if(skip_dc && i==0) continue;
3920             score+= tab[run];
3921             run=0;
3922         }else if(level>1){
3923             return;
3924         }else{
3925             run++;
3926         }
3927     }
3928     if(score >= threshold) return;
3929     for(i=skip_dc; i<=last_index; i++){
3930         const int j = s->intra_scantable.permutated[i];
3931         block[j]=0;
3932     }
3933     if(block[0]) s->block_last_index[n]= 0;
3934     else         s->block_last_index[n]= -1;
3935 }
3936
3937 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3938 {
3939     int i;
3940     const int maxlevel= s->max_qcoeff;
3941     const int minlevel= s->min_qcoeff;
3942     int overflow=0;
3943
3944     if(s->mb_intra){
3945         i=1; //skip clipping of intra dc
3946     }else
3947         i=0;
3948
3949     for(;i<=last_index; i++){
3950         const int j= s->intra_scantable.permutated[i];
3951         int level = block[j];
3952
3953         if     (level>maxlevel){
3954             level=maxlevel;
3955             overflow++;
3956         }else if(level<minlevel){
3957             level=minlevel;
3958             overflow++;
3959         }
3960
3961         block[j]= level;
3962     }
3963
3964     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
3965         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
3966 }
3967
3968 #endif //CONFIG_ENCODERS
3969
3970 /**
3971  *
3972  * @param h is the normal height, this will be reduced automatically if needed for the last row
3973  */
3974 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3975     if (s->avctx->draw_horiz_band) {
3976         AVFrame *src;
3977         int offset[4];
3978
3979         if(s->picture_structure != PICT_FRAME){
3980             h <<= 1;
3981             y <<= 1;
3982             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3983         }
3984
3985         h= FFMIN(h, s->avctx->height - y);
3986
3987         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
3988             src= (AVFrame*)s->current_picture_ptr;
3989         else if(s->last_picture_ptr)
3990             src= (AVFrame*)s->last_picture_ptr;
3991         else
3992             return;
3993
3994         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
3995             offset[0]=
3996             offset[1]=
3997             offset[2]=
3998             offset[3]= 0;
3999         }else{
4000             offset[0]= y * s->linesize;;
4001             offset[1]=
4002             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4003             offset[3]= 0;
4004         }
4005
4006         emms_c();
4007
4008         s->avctx->draw_horiz_band(s->avctx, src, offset,
4009                                   y, s->picture_structure, h);
4010     }
4011 }
4012
4013 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4014     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4015     const int uvlinesize= s->current_picture.linesize[1];
4016     const int mb_size= 4 - s->avctx->lowres;
4017
4018     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4019     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4020     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4021     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4022     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4023     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4024     //block_index is not used by mpeg2, so it is not affected by chroma_format
4025
4026     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4027     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4028     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4029
4030     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4031     {
4032         s->dest[0] += s->mb_y *   linesize << mb_size;
4033         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4034         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4035     }
4036 }
4037
4038 #ifdef CONFIG_ENCODERS
4039
4040 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4041     int x, y;
4042 //FIXME optimize
4043     for(y=0; y<8; y++){
4044         for(x=0; x<8; x++){
4045             int x2, y2;
4046             int sum=0;
4047             int sqr=0;
4048             int count=0;
4049
4050             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4051                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4052                     int v= ptr[x2 + y2*stride];
4053                     sum += v;
4054                     sqr += v*v;
4055                     count++;
4056                 }
4057             }
4058             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4059         }
4060     }
4061 }
4062
4063 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4064 {
4065     int16_t weight[6][64];
4066     DCTELEM orig[6][64];
4067     const int mb_x= s->mb_x;
4068     const int mb_y= s->mb_y;
4069     int i;
4070     int skip_dct[6];
4071     int dct_offset   = s->linesize*8; //default for progressive frames
4072     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4073     int wrap_y, wrap_c;
4074
4075     for(i=0; i<6; i++) skip_dct[i]=0;
4076
4077     if(s->adaptive_quant){
4078         const int last_qp= s->qscale;
4079         const int mb_xy= mb_x + mb_y*s->mb_stride;
4080
4081         s->lambda= s->lambda_table[mb_xy];
4082         update_qscale(s);
4083
4084         if(!(s->flags&CODEC_FLAG_QP_RD)){
4085             s->dquant= s->qscale - last_qp;
4086
4087             if(s->out_format==FMT_H263){
4088                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4089
4090                 if(s->codec_id==CODEC_ID_MPEG4){
4091                     if(!s->mb_intra){
4092                         if(s->pict_type == B_TYPE){
4093                             if(s->dquant&1)
4094                                 s->dquant= (s->dquant/2)*2;
4095                             if(s->mv_dir&MV_DIRECT)
4096                                 s->dquant= 0;
4097                         }
4098                         if(s->mv_type==MV_TYPE_8X8)
4099                             s->dquant=0;
4100                     }
4101                 }
4102             }
4103         }
4104         ff_set_qscale(s, last_qp + s->dquant);
4105     }else if(s->flags&CODEC_FLAG_QP_RD)
4106         ff_set_qscale(s, s->qscale + s->dquant);
4107
4108     wrap_y = s->linesize;
4109     wrap_c = s->uvlinesize;
4110     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4111     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4112     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4113
4114     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4115         uint8_t *ebuf= s->edge_emu_buffer + 32;
4116         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4117         ptr_y= ebuf;
4118         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4119         ptr_cb= ebuf+18*wrap_y;
4120         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4121         ptr_cr= ebuf+18*wrap_y+8;
4122     }
4123
4124     if (s->mb_intra) {
4125         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4126             int progressive_score, interlaced_score;
4127
4128             s->interlaced_dct=0;
4129             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4130                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4131
4132             if(progressive_score > 0){
4133                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4134                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4135                 if(progressive_score > interlaced_score){
4136                     s->interlaced_dct=1;
4137
4138                     dct_offset= wrap_y;
4139                     wrap_y<<=1;
4140                 }
4141             }
4142         }
4143
4144         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4145         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4146         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4147         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4148
4149         if(s->flags&CODEC_FLAG_GRAY){
4150             skip_dct[4]= 1;
4151             skip_dct[5]= 1;
4152         }else{
4153             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4154             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4155         }
4156     }else{
4157         op_pixels_func (*op_pix)[4];
4158         qpel_mc_func (*op_qpix)[16];
4159         uint8_t *dest_y, *dest_cb, *dest_cr;
4160
4161         dest_y  = s->dest[0];
4162         dest_cb = s->dest[1];
4163         dest_cr = s->dest[2];
4164
4165         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4166             op_pix = s->dsp.put_pixels_tab;
4167             op_qpix= s->dsp.put_qpel_pixels_tab;
4168         }else{
4169             op_pix = s->dsp.put_no_rnd_pixels_tab;
4170             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4171         }
4172
4173         if (s->mv_dir & MV_DIR_FORWARD) {
4174             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4175             op_pix = s->dsp.avg_pixels_tab;
4176             op_qpix= s->dsp.avg_qpel_pixels_tab;
4177         }
4178         if (s->mv_dir & MV_DIR_BACKWARD) {
4179             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4180         }
4181
4182         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4183             int progressive_score, interlaced_score;
4184
4185             s->interlaced_dct=0;
4186             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4187                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4188
4189             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4190
4191             if(progressive_score>0){
4192                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4193                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4194
4195                 if(progressive_score > interlaced_score){
4196                     s->interlaced_dct=1;
4197
4198                     dct_offset= wrap_y;
4199                     wrap_y<<=1;
4200                 }
4201             }
4202         }
4203
4204         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4205         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4206         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4207         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4208
4209         if(s->flags&CODEC_FLAG_GRAY){
4210             skip_dct[4]= 1;
4211             skip_dct[5]= 1;
4212         }else{
4213             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4214             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4215         }
4216         /* pre quantization */
4217         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4218             //FIXME optimize
4219             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4220             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4221             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4222             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4223             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4224             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4225         }
4226     }
4227
4228     if(s->avctx->quantizer_noise_shaping){
4229         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4230         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4231         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4232         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4233         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4234         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4235         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4236     }
4237
4238     /* DCT & quantize */
4239     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4240     {
4241         for(i=0;i<6;i++) {
4242             if(!skip_dct[i]){
4243                 int overflow;
4244                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4245             // FIXME we could decide to change to quantizer instead of clipping
4246             // JS: I don't think that would be a good idea it could lower quality instead
4247             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4248                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4249             }else
4250                 s->block_last_index[i]= -1;
4251         }
4252         if(s->avctx->quantizer_noise_shaping){
4253             for(i=0;i<6;i++) {
4254                 if(!skip_dct[i]){
4255                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4256                 }
4257             }
4258         }
4259
4260         if(s->luma_elim_threshold && !s->mb_intra)
4261             for(i=0; i<4; i++)
4262                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4263         if(s->chroma_elim_threshold && !s->mb_intra)
4264             for(i=4; i<6; i++)
4265                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4266
4267         if(s->flags & CODEC_FLAG_CBP_RD){
4268             for(i=0;i<6;i++) {
4269                 if(s->block_last_index[i] == -1)
4270                     s->coded_score[i]= INT_MAX/256;
4271             }
4272         }
4273     }
4274
4275     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4276         s->block_last_index[4]=
4277         s->block_last_index[5]= 0;
4278         s->block[4][0]=
4279         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4280     }
4281
4282     //non c quantize code returns incorrect block_last_index FIXME
4283     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4284         for(i=0; i<6; i++){
4285             int j;
4286             if(s->block_last_index[i]>0){
4287                 for(j=63; j>0; j--){
4288                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4289                 }
4290                 s->block_last_index[i]= j;
4291             }
4292         }
4293     }
4294
4295     /* huffman encode */
4296     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4297     case CODEC_ID_MPEG1VIDEO:
4298     case CODEC_ID_MPEG2VIDEO:
4299         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4300     case CODEC_ID_MPEG4:
4301         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4302     case CODEC_ID_MSMPEG4V2:
4303     case CODEC_ID_MSMPEG4V3:
4304     case CODEC_ID_WMV1:
4305         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4306     case CODEC_ID_WMV2:
4307          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4308 #ifdef CONFIG_H261_ENCODER
4309     case CODEC_ID_H261:
4310         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4311 #endif
4312     case CODEC_ID_H263:
4313     case CODEC_ID_H263P:
4314     case CODEC_ID_FLV1:
4315     case CODEC_ID_RV10:
4316     case CODEC_ID_RV20:
4317         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4318     case CODEC_ID_MJPEG:
4319         mjpeg_encode_mb(s, s->block); break;
4320     default:
4321         assert(0);
4322     }
4323 }
4324
4325 #endif //CONFIG_ENCODERS
4326
4327 void ff_mpeg_flush(AVCodecContext *avctx){
4328     int i;
4329     MpegEncContext *s = avctx->priv_data;
4330
4331     if(s==NULL || s->picture==NULL)
4332         return;
4333
4334     for(i=0; i<MAX_PICTURE_COUNT; i++){
4335        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4336                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4337         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4338     }
4339     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4340
4341     s->mb_x= s->mb_y= 0;
4342
4343     s->parse_context.state= -1;
4344     s->parse_context.frame_start_found= 0;
4345     s->parse_context.overread= 0;
4346     s->parse_context.overread_index= 0;
4347     s->parse_context.index= 0;
4348     s->parse_context.last_index= 0;
4349     s->bitstream_buffer_size=0;
4350 }
4351
4352 #ifdef CONFIG_ENCODERS
4353 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4354 {
4355     const uint16_t *srcw= (uint16_t*)src;
4356     int words= length>>4;
4357     int bits= length&15;
4358     int i;
4359
4360     if(length==0) return;
4361
4362     if(words < 16){
4363         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4364     }else if(put_bits_count(pb)&7){
4365         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4366     }else{
4367         for(i=0; put_bits_count(pb)&31; i++)
4368             put_bits(pb, 8, src[i]);
4369         flush_put_bits(pb);
4370         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4371         skip_put_bytes(pb, 2*words-i);
4372     }
4373
4374     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4375 }
4376
4377 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4378     int i;
4379
4380     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4381
4382     /* mpeg1 */
4383     d->mb_skip_run= s->mb_skip_run;
4384     for(i=0; i<3; i++)
4385         d->last_dc[i]= s->last_dc[i];
4386
4387     /* statistics */
4388     d->mv_bits= s->mv_bits;
4389     d->i_tex_bits= s->i_tex_bits;
4390     d->p_tex_bits= s->p_tex_bits;
4391     d->i_count= s->i_count;
4392     d->f_count= s->f_count;
4393     d->b_count= s->b_count;
4394     d->skip_count= s->skip_count;
4395     d->misc_bits= s->misc_bits;
4396     d->last_bits= 0;
4397
4398     d->mb_skipped= 0;
4399     d->qscale= s->qscale;
4400     d->dquant= s->dquant;
4401 }
4402
4403 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4404     int i;
4405
4406     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4407     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4408
4409     /* mpeg1 */
4410     d->mb_skip_run= s->mb_skip_run;
4411     for(i=0; i<3; i++)
4412         d->last_dc[i]= s->last_dc[i];
4413
4414     /* statistics */
4415     d->mv_bits= s->mv_bits;
4416     d->i_tex_bits= s->i_tex_bits;
4417     d->p_tex_bits= s->p_tex_bits;
4418     d->i_count= s->i_count;
4419     d->f_count= s->f_count;
4420     d->b_count= s->b_count;
4421     d->skip_count= s->skip_count;
4422     d->misc_bits= s->misc_bits;
4423
4424     d->mb_intra= s->mb_intra;
4425     d->mb_skipped= s->mb_skipped;
4426     d->mv_type= s->mv_type;
4427     d->mv_dir= s->mv_dir;
4428     d->pb= s->pb;
4429     if(s->data_partitioning){
4430         d->pb2= s->pb2;
4431         d->tex_pb= s->tex_pb;
4432     }
4433     d->block= s->block;
4434     for(i=0; i<6; i++)
4435         d->block_last_index[i]= s->block_last_index[i];
4436     d->interlaced_dct= s->interlaced_dct;
4437     d->qscale= s->qscale;
4438 }
4439
4440 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4441                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4442                            int *dmin, int *next_block, int motion_x, int motion_y)
4443 {
4444     int score;
4445     uint8_t *dest_backup[3];
4446
4447     copy_context_before_encode(s, backup, type);
4448
4449     s->block= s->blocks[*next_block];
4450     s->pb= pb[*next_block];
4451     if(s->data_partitioning){
4452         s->pb2   = pb2   [*next_block];
4453         s->tex_pb= tex_pb[*next_block];
4454     }
4455
4456     if(*next_block){
4457         memcpy(dest_backup, s->dest, sizeof(s->dest));
4458         s->dest[0] = s->rd_scratchpad;
4459         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4460         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4461         assert(s->linesize >= 32); //FIXME
4462     }
4463
4464     encode_mb(s, motion_x, motion_y);
4465
4466     score= put_bits_count(&s->pb);
4467     if(s->data_partitioning){
4468         score+= put_bits_count(&s->pb2);
4469         score+= put_bits_count(&s->tex_pb);
4470     }
4471
4472     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4473         MPV_decode_mb(s, s->block);
4474
4475         score *= s->lambda2;
4476         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4477     }
4478
4479     if(*next_block){
4480         memcpy(s->dest, dest_backup, sizeof(s->dest));
4481     }
4482
4483     if(score<*dmin){
4484         *dmin= score;
4485         *next_block^=1;
4486
4487         copy_context_after_encode(best, s, type);
4488     }
4489 }
4490
4491 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4492     uint32_t *sq = squareTbl + 256;
4493     int acc=0;
4494     int x,y;
4495
4496     if(w==16 && h==16)
4497         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4498     else if(w==8 && h==8)
4499         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4500
4501     for(y=0; y<h; y++){
4502         for(x=0; x<w; x++){
4503             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4504         }
4505     }
4506
4507     assert(acc>=0);
4508
4509     return acc;
4510 }
4511
4512 static int sse_mb(MpegEncContext *s){
4513     int w= 16;
4514     int h= 16;
4515
4516     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4517     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4518
4519     if(w==16 && h==16)
4520       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4521         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4522                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4523                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4524       }else{
4525         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4526                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4527                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4528       }
4529     else
4530         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4531                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4532                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4533 }
4534
4535 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4536     MpegEncContext *s= arg;
4537
4538
4539     s->me.pre_pass=1;
4540     s->me.dia_size= s->avctx->pre_dia_size;
4541     s->first_slice_line=1;
4542     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4543         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4544             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4545         }
4546         s->first_slice_line=0;
4547     }
4548
4549     s->me.pre_pass=0;
4550
4551     return 0;
4552 }
4553
4554 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4555     MpegEncContext *s= arg;
4556
4557     s->me.dia_size= s->avctx->dia_size;
4558     s->first_slice_line=1;
4559     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4560         s->mb_x=0; //for block init below
4561         ff_init_block_index(s);
4562         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4563             s->block_index[0]+=2;
4564             s->block_index[1]+=2;
4565             s->block_index[2]+=2;
4566             s->block_index[3]+=2;
4567
4568             /* compute motion vector & mb_type and store in context */
4569             if(s->pict_type==B_TYPE)
4570                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4571             else
4572                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4573         }
4574         s->first_slice_line=0;
4575     }
4576     return 0;
4577 }
4578
4579 static int mb_var_thread(AVCodecContext *c, void *arg){
4580     MpegEncContext *s= arg;
4581     int mb_x, mb_y;
4582
4583     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4584         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4585             int xx = mb_x * 16;
4586             int yy = mb_y * 16;
4587             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4588             int varc;
4589             int sum = s->dsp.pix_sum(pix, s->linesize);
4590
4591             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4592
4593             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4594             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4595             s->me.mb_var_sum_temp    += varc;
4596         }
4597     }
4598     return 0;
4599 }
4600
4601 static void write_slice_end(MpegEncContext *s){
4602     if(s->codec_id==CODEC_ID_MPEG4){
4603         if(s->partitioned_frame){
4604             ff_mpeg4_merge_partitions(s);
4605         }
4606
4607         ff_mpeg4_stuffing(&s->pb);
4608     }else if(s->out_format == FMT_MJPEG){
4609         ff_mjpeg_stuffing(&s->pb);
4610     }
4611
4612     align_put_bits(&s->pb);
4613     flush_put_bits(&s->pb);
4614
4615     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4616         s->misc_bits+= get_bits_diff(s);
4617 }
4618
4619 static int encode_thread(AVCodecContext *c, void *arg){
4620     MpegEncContext *s= arg;
4621     int mb_x, mb_y, pdif = 0;
4622     int i, j;
4623     MpegEncContext best_s, backup_s;
4624     uint8_t bit_buf[2][MAX_MB_BYTES];
4625     uint8_t bit_buf2[2][MAX_MB_BYTES];
4626     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4627     PutBitContext pb[2], pb2[2], tex_pb[2];
4628 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4629
4630     for(i=0; i<2; i++){
4631         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4632         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4633         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4634     }
4635
4636     s->last_bits= put_bits_count(&s->pb);
4637     s->mv_bits=0;
4638     s->misc_bits=0;
4639     s->i_tex_bits=0;
4640     s->p_tex_bits=0;
4641     s->i_count=0;
4642     s->f_count=0;
4643     s->b_count=0;
4644     s->skip_count=0;
4645
4646     for(i=0; i<3; i++){
4647         /* init last dc values */
4648         /* note: quant matrix value (8) is implied here */
4649         s->last_dc[i] = 128 << s->intra_dc_precision;
4650
4651         s->current_picture_ptr->error[i] = 0;
4652     }
4653     s->mb_skip_run = 0;
4654     memset(s->last_mv, 0, sizeof(s->last_mv));
4655
4656     s->last_mv_dir = 0;
4657
4658     switch(s->codec_id){
4659     case CODEC_ID_H263:
4660     case CODEC_ID_H263P:
4661     case CODEC_ID_FLV1:
4662         s->gob_index = ff_h263_get_gob_height(s);
4663         break;
4664     case CODEC_ID_MPEG4:
4665         if(s->partitioned_frame)
4666             ff_mpeg4_init_partitions(s);
4667         break;
4668     }
4669
4670     s->resync_mb_x=0;
4671     s->resync_mb_y=0;
4672     s->first_slice_line = 1;
4673     s->ptr_lastgob = s->pb.buf;
4674     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4675 //    printf("row %d at %X\n", s->mb_y, (int)s);
4676         s->mb_x=0;
4677         s->mb_y= mb_y;
4678
4679         ff_set_qscale(s, s->qscale);
4680         ff_init_block_index(s);
4681
4682         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4683             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4684             int mb_type= s->mb_type[xy];
4685 //            int d;
4686             int dmin= INT_MAX;
4687             int dir;
4688
4689             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4690                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4691                 return -1;
4692             }
4693             if(s->data_partitioning){
4694                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4695                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4696                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4697                     return -1;
4698                 }
4699             }
4700
4701             s->mb_x = mb_x;
4702             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4703             ff_update_block_index(s);
4704
4705 #ifdef CONFIG_H261_ENCODER
4706             if(s->codec_id == CODEC_ID_H261){
4707                 ff_h261_reorder_mb_index(s);
4708                 xy= s->mb_y*s->mb_stride + s->mb_x;
4709                 mb_type= s->mb_type[xy];
4710             }
4711 #endif
4712
4713             /* write gob / video packet header  */
4714             if(s->rtp_mode){
4715                 int current_packet_size, is_gob_start;
4716
4717                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4718
4719                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4720
4721                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4722
4723                 switch(s->codec_id){
4724                 case CODEC_ID_H263:
4725                 case CODEC_ID_H263P:
4726                     if(!s->h263_slice_structured)
4727                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4728                     break;
4729                 case CODEC_ID_MPEG2VIDEO:
4730                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4731                 case CODEC_ID_MPEG1VIDEO:
4732                     if(s->mb_skip_run) is_gob_start=0;
4733                     break;
4734                 }
4735
4736                 if(is_gob_start){
4737                     if(s->start_mb_y != mb_y || mb_x!=0){
4738                         write_slice_end(s);
4739
4740                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4741                             ff_mpeg4_init_partitions(s);
4742                         }
4743                     }
4744
4745                     assert((put_bits_count(&s->pb)&7) == 0);
4746                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4747
4748                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4749                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4750                         int d= 100 / s->avctx->error_rate;
4751                         if(r % d == 0){
4752                             current_packet_size=0;
4753 #ifndef ALT_BITSTREAM_WRITER
4754                             s->pb.buf_ptr= s->ptr_lastgob;
4755 #endif
4756                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4757                         }
4758                     }
4759
4760                     if (s->avctx->rtp_callback){
4761                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4762                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4763                     }
4764
4765                     switch(s->codec_id){
4766                     case CODEC_ID_MPEG4:
4767                         ff_mpeg4_encode_video_packet_header(s);
4768                         ff_mpeg4_clean_buffers(s);
4769                     break;
4770                     case CODEC_ID_MPEG1VIDEO:
4771                     case CODEC_ID_MPEG2VIDEO:
4772                         ff_mpeg1_encode_slice_header(s);
4773                         ff_mpeg1_clean_buffers(s);
4774                     break;
4775                     case CODEC_ID_H263:
4776                     case CODEC_ID_H263P:
4777                         h263_encode_gob_header(s, mb_y);
4778                     break;
4779                     }
4780
4781                     if(s->flags&CODEC_FLAG_PASS1){
4782                         int bits= put_bits_count(&s->pb);
4783                         s->misc_bits+= bits - s->last_bits;
4784                         s->last_bits= bits;
4785                     }
4786
4787                     s->ptr_lastgob += current_packet_size;
4788                     s->first_slice_line=1;
4789                     s->resync_mb_x=mb_x;
4790                     s->resync_mb_y=mb_y;
4791                 }
4792             }
4793
4794             if(  (s->resync_mb_x   == s->mb_x)
4795                && s->resync_mb_y+1 == s->mb_y){
4796                 s->first_slice_line=0;
4797             }
4798
4799             s->mb_skipped=0;
4800             s->dquant=0; //only for QP_RD
4801
4802             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4803                 int next_block=0;
4804                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4805
4806                 copy_context_before_encode(&backup_s, s, -1);
4807                 backup_s.pb= s->pb;
4808                 best_s.data_partitioning= s->data_partitioning;
4809                 best_s.partitioned_frame= s->partitioned_frame;
4810                 if(s->data_partitioning){
4811                     backup_s.pb2= s->pb2;
4812                     backup_s.tex_pb= s->tex_pb;
4813                 }
4814
4815                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4816                     s->mv_dir = MV_DIR_FORWARD;
4817                     s->mv_type = MV_TYPE_16X16;
4818                     s->mb_intra= 0;
4819                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4820                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4821                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4822                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4823                 }
4824                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4825                     s->mv_dir = MV_DIR_FORWARD;
4826                     s->mv_type = MV_TYPE_FIELD;
4827                     s->mb_intra= 0;
4828                     for(i=0; i<2; i++){
4829                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4830                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4831                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4832                     }
4833                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4834                                  &dmin, &next_block, 0, 0);
4835                 }
4836                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
4837                     s->mv_dir = MV_DIR_FORWARD;
4838                     s->mv_type = MV_TYPE_16X16;
4839                     s->mb_intra= 0;
4840                     s->mv[0][0][0] = 0;
4841                     s->mv[0][0][1] = 0;
4842                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
4843                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4844                 }
4845                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
4846                     s->mv_dir = MV_DIR_FORWARD;
4847                     s->mv_type = MV_TYPE_8X8;
4848                     s->mb_intra= 0;
4849                     for(i=0; i<4; i++){
4850                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4851                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4852                     }
4853                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
4854                                  &dmin, &next_block, 0, 0);
4855                 }
4856                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4857                     s->mv_dir = MV_DIR_FORWARD;
4858                     s->mv_type = MV_TYPE_16X16;
4859                     s->mb_intra= 0;
4860                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4861                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4862                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
4863                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4864                 }
4865                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4866                     s->mv_dir = MV_DIR_BACKWARD;
4867                     s->mv_type = MV_TYPE_16X16;
4868                     s->mb_intra= 0;
4869                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4870                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4871                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
4872                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4873                 }
4874                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4875                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4876                     s->mv_type = MV_TYPE_16X16;
4877                     s->mb_intra= 0;
4878                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4879                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4880                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4881                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4882                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
4883                                  &dmin, &next_block, 0, 0);
4884                 }
4885                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4886                     int mx= s->b_direct_mv_table[xy][0];
4887                     int my= s->b_direct_mv_table[xy][1];
4888
4889                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4890                     s->mb_intra= 0;
4891                     ff_mpeg4_set_direct_mv(s, mx, my);
4892                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
4893                                  &dmin, &next_block, mx, my);
4894                 }
4895                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
4896                     s->mv_dir = MV_DIR_FORWARD;
4897                     s->mv_type = MV_TYPE_FIELD;
4898                     s->mb_intra= 0;
4899                     for(i=0; i<2; i++){
4900                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4901                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4902                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4903                     }
4904                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
4905                                  &dmin, &next_block, 0, 0);
4906                 }
4907                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
4908                     s->mv_dir = MV_DIR_BACKWARD;
4909                     s->mv_type = MV_TYPE_FIELD;
4910                     s->mb_intra= 0;
4911                     for(i=0; i<2; i++){
4912                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4913                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4914                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4915                     }
4916                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
4917                                  &dmin, &next_block, 0, 0);
4918                 }
4919                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
4920                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4921                     s->mv_type = MV_TYPE_FIELD;
4922                     s->mb_intra= 0;
4923                     for(dir=0; dir<2; dir++){
4924                         for(i=0; i<2; i++){
4925                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4926                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4927                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4928                         }
4929                     }
4930                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
4931                                  &dmin, &next_block, 0, 0);
4932                 }
4933                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4934                     s->mv_dir = 0;
4935                     s->mv_type = MV_TYPE_16X16;
4936                     s->mb_intra= 1;
4937                     s->mv[0][0][0] = 0;
4938                     s->mv[0][0][1] = 0;
4939                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
4940                                  &dmin, &next_block, 0, 0);
4941                     if(s->h263_pred || s->h263_aic){
4942                         if(best_s.mb_intra)
4943                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4944                         else
4945                             ff_clean_intra_table_entries(s); //old mode?
4946                     }
4947                 }
4948
4949                 if(s->flags & CODEC_FLAG_QP_RD){
4950                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4951                         const int last_qp= backup_s.qscale;
4952                         int dquant, dir, qp, dc[6];
4953                         DCTELEM ac[6][16];
4954                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4955
4956                         assert(backup_s.dquant == 0);
4957
4958                         //FIXME intra
4959                         s->mv_dir= best_s.mv_dir;
4960                         s->mv_type = MV_TYPE_16X16;
4961                         s->mb_intra= best_s.mb_intra;
4962                         s->mv[0][0][0] = best_s.mv[0][0][0];
4963                         s->mv[0][0][1] = best_s.mv[0][0][1];
4964                         s->mv[1][0][0] = best_s.mv[1][0][0];
4965                         s->mv[1][0][1] = best_s.mv[1][0][1];
4966
4967                         dir= s->pict_type == B_TYPE ? 2 : 1;
4968                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4969                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4970                             qp= last_qp + dquant;
4971                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4972                                 break;
4973                             backup_s.dquant= dquant;
4974                             if(s->mb_intra && s->dc_val[0]){
4975                                 for(i=0; i<6; i++){
4976                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4977                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4978                                 }
4979                             }
4980
4981                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
4982                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4983                             if(best_s.qscale != qp){
4984                                 if(s->mb_intra && s->dc_val[0]){
4985                                     for(i=0; i<6; i++){
4986                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4987                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4988                                     }
4989                                 }
4990                                 if(dir > 0 && dquant==dir){
4991                                     dquant= 0;
4992                                     dir= -dir;
4993                                 }else
4994                                     break;
4995                             }
4996                         }
4997                         qp= best_s.qscale;
4998                         s->current_picture.qscale_table[xy]= qp;
4999                     }
5000                 }
5001
5002                 copy_context_after_encode(s, &best_s, -1);
5003
5004                 pb_bits_count= put_bits_count(&s->pb);
5005                 flush_put_bits(&s->pb);
5006                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5007                 s->pb= backup_s.pb;
5008
5009                 if(s->data_partitioning){
5010                     pb2_bits_count= put_bits_count(&s->pb2);
5011                     flush_put_bits(&s->pb2);
5012                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5013                     s->pb2= backup_s.pb2;
5014
5015                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5016                     flush_put_bits(&s->tex_pb);
5017                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5018                     s->tex_pb= backup_s.tex_pb;
5019                 }
5020                 s->last_bits= put_bits_count(&s->pb);
5021
5022                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5023                     ff_h263_update_motion_val(s);
5024
5025                 if(next_block==0){ //FIXME 16 vs linesize16
5026                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5027                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5028                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5029                 }
5030
5031                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5032                     MPV_decode_mb(s, s->block);
5033             } else {
5034                 int motion_x, motion_y;
5035                 s->mv_type=MV_TYPE_16X16;
5036                 // only one MB-Type possible
5037
5038                 switch(mb_type){
5039                 case CANDIDATE_MB_TYPE_INTRA:
5040                     s->mv_dir = 0;
5041                     s->mb_intra= 1;
5042                     motion_x= s->mv[0][0][0] = 0;
5043                     motion_y= s->mv[0][0][1] = 0;
5044                     break;
5045                 case CANDIDATE_MB_TYPE_INTER:
5046                     s->mv_dir = MV_DIR_FORWARD;
5047                     s->mb_intra= 0;
5048                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5049                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5050                     break;
5051                 case CANDIDATE_MB_TYPE_INTER_I:
5052                     s->mv_dir = MV_DIR_FORWARD;
5053                     s->mv_type = MV_TYPE_FIELD;
5054                     s->mb_intra= 0;
5055                     for(i=0; i<2; i++){
5056                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5057                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5058                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5059                     }
5060                     motion_x = motion_y = 0;
5061                     break;
5062                 case CANDIDATE_MB_TYPE_INTER4V:
5063                     s->mv_dir = MV_DIR_FORWARD;
5064                     s->mv_type = MV_TYPE_8X8;
5065                     s->mb_intra= 0;
5066                     for(i=0; i<4; i++){
5067                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5068                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5069                     }
5070                     motion_x= motion_y= 0;
5071                     break;
5072                 case CANDIDATE_MB_TYPE_DIRECT:
5073                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5074                     s->mb_intra= 0;
5075                     motion_x=s->b_direct_mv_table[xy][0];
5076                     motion_y=s->b_direct_mv_table[xy][1];
5077                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5078                     break;
5079                 case CANDIDATE_MB_TYPE_BIDIR:
5080                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5081                     s->mb_intra= 0;
5082                     motion_x=0;
5083                     motion_y=0;
5084                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5085                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5086                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5087                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5088                     break;
5089                 case CANDIDATE_MB_TYPE_BACKWARD:
5090                     s->mv_dir = MV_DIR_BACKWARD;
5091                     s->mb_intra= 0;
5092                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5093                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5094                     break;
5095                 case CANDIDATE_MB_TYPE_FORWARD:
5096                     s->mv_dir = MV_DIR_FORWARD;
5097                     s->mb_intra= 0;
5098                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5099                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5100 //                    printf(" %d %d ", motion_x, motion_y);
5101                     break;
5102                 case CANDIDATE_MB_TYPE_FORWARD_I:
5103                     s->mv_dir = MV_DIR_FORWARD;
5104                     s->mv_type = MV_TYPE_FIELD;
5105                     s->mb_intra= 0;
5106                     for(i=0; i<2; i++){
5107                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5108                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5109                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5110                     }
5111                     motion_x=motion_y=0;
5112                     break;
5113                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5114                     s->mv_dir = MV_DIR_BACKWARD;
5115                     s->mv_type = MV_TYPE_FIELD;
5116                     s->mb_intra= 0;
5117                     for(i=0; i<2; i++){
5118                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5119                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5120                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5121                     }
5122                     motion_x=motion_y=0;
5123                     break;
5124                 case CANDIDATE_MB_TYPE_BIDIR_I:
5125                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5126                     s->mv_type = MV_TYPE_FIELD;
5127                     s->mb_intra= 0;
5128                     for(dir=0; dir<2; dir++){
5129                         for(i=0; i<2; i++){
5130                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5131                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5132                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5133                         }
5134                     }
5135                     motion_x=motion_y=0;
5136                     break;
5137                 default:
5138                     motion_x=motion_y=0; //gcc warning fix
5139                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5140                 }
5141
5142                 encode_mb(s, motion_x, motion_y);
5143
5144                 // RAL: Update last macroblock type
5145                 s->last_mv_dir = s->mv_dir;
5146
5147                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5148                     ff_h263_update_motion_val(s);
5149
5150                 MPV_decode_mb(s, s->block);
5151             }
5152
5153             /* clean the MV table in IPS frames for direct mode in B frames */
5154             if(s->mb_intra /* && I,P,S_TYPE */){
5155                 s->p_mv_table[xy][0]=0;
5156                 s->p_mv_table[xy][1]=0;
5157             }
5158
5159             if(s->flags&CODEC_FLAG_PSNR){
5160                 int w= 16;
5161                 int h= 16;
5162
5163                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5164                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5165
5166                 s->current_picture_ptr->error[0] += sse(
5167                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5168                     s->dest[0], w, h, s->linesize);
5169                 s->current_picture_ptr->error[1] += sse(
5170                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5171                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5172                 s->current_picture_ptr->error[2] += sse(
5173                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5174                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5175             }
5176             if(s->loop_filter){
5177                 if(s->out_format == FMT_H263)
5178                     ff_h263_loop_filter(s);
5179             }
5180 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5181         }
5182     }
5183
5184     //not beautiful here but we must write it before flushing so it has to be here
5185     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5186         msmpeg4_encode_ext_header(s);
5187
5188     write_slice_end(s);
5189
5190     /* Send the last GOB if RTP */
5191     if (s->avctx->rtp_callback) {
5192         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5193         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5194         /* Call the RTP callback to send the last GOB */
5195         emms_c();
5196         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5197     }
5198
5199     return 0;
5200 }
5201
5202 #define MERGE(field) dst->field += src->field; src->field=0
5203 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5204     MERGE(me.scene_change_score);
5205     MERGE(me.mc_mb_var_sum_temp);
5206     MERGE(me.mb_var_sum_temp);
5207 }
5208
5209 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5210     int i;
5211
5212     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5213     MERGE(dct_count[1]);
5214     MERGE(mv_bits);
5215     MERGE(i_tex_bits);
5216     MERGE(p_tex_bits);
5217     MERGE(i_count);
5218     MERGE(f_count);
5219     MERGE(b_count);
5220     MERGE(skip_count);
5221     MERGE(misc_bits);
5222     MERGE(error_count);
5223     MERGE(padding_bug_score);
5224
5225     if(dst->avctx->noise_reduction){
5226         for(i=0; i<64; i++){
5227             MERGE(dct_error_sum[0][i]);
5228             MERGE(dct_error_sum[1][i]);
5229         }
5230     }
5231
5232     assert(put_bits_count(&src->pb) % 8 ==0);
5233     assert(put_bits_count(&dst->pb) % 8 ==0);
5234     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5235     flush_put_bits(&dst->pb);
5236 }
5237
5238 static void encode_picture(MpegEncContext *s, int picture_number)
5239 {
5240     int i;
5241     int bits;
5242
5243     s->picture_number = picture_number;
5244
5245     /* Reset the average MB variance */
5246     s->me.mb_var_sum_temp    =
5247     s->me.mc_mb_var_sum_temp = 0;
5248
5249     /* we need to initialize some time vars before we can encode b-frames */
5250     // RAL: Condition added for MPEG1VIDEO
5251     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5252         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5253
5254     s->me.scene_change_score=0;
5255
5256 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5257
5258     if(s->pict_type==I_TYPE){
5259         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5260         else                        s->no_rounding=0;
5261     }else if(s->pict_type!=B_TYPE){
5262         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5263             s->no_rounding ^= 1;
5264     }
5265
5266     s->mb_intra=0; //for the rate distortion & bit compare functions
5267     for(i=1; i<s->avctx->thread_count; i++){
5268         ff_update_duplicate_context(s->thread_context[i], s);
5269     }
5270
5271     ff_init_me(s);
5272
5273     /* Estimate motion for every MB */
5274     if(s->pict_type != I_TYPE){
5275         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5276         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5277         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5278             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5279                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5280             }
5281         }
5282
5283         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5284     }else /* if(s->pict_type == I_TYPE) */{
5285         /* I-Frame */
5286         for(i=0; i<s->mb_stride*s->mb_height; i++)
5287             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5288
5289         if(!s->fixed_qscale){
5290             /* finding spatial complexity for I-frame rate control */
5291             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5292         }
5293     }
5294     for(i=1; i<s->avctx->thread_count; i++){
5295         merge_context_after_me(s, s->thread_context[i]);
5296     }
5297     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5298     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5299     emms_c();
5300
5301     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5302         s->pict_type= I_TYPE;
5303         for(i=0; i<s->mb_stride*s->mb_height; i++)
5304             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5305 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5306     }
5307
5308     if(!s->umvplus){
5309         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5310             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5311
5312             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5313                 int a,b;
5314                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5315                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5316                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5317             }
5318
5319             ff_fix_long_p_mvs(s);
5320             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5321             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5322                 int j;
5323                 for(i=0; i<2; i++){
5324                     for(j=0; j<2; j++)
5325                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5326                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5327                 }
5328             }
5329         }
5330
5331         if(s->pict_type==B_TYPE){
5332             int a, b;
5333
5334             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5335             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5336             s->f_code = FFMAX(a, b);
5337
5338             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5339             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5340             s->b_code = FFMAX(a, b);
5341
5342             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5343             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5344             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5345             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5346             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5347                 int dir, j;
5348                 for(dir=0; dir<2; dir++){
5349                     for(i=0; i<2; i++){
5350                         for(j=0; j<2; j++){
5351                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5352                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5353                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5354                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5355                         }
5356                     }
5357                 }
5358             }
5359         }
5360     }
5361
5362     if (!s->fixed_qscale)
5363         s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
5364
5365     if(s->adaptive_quant){
5366         switch(s->codec_id){
5367         case CODEC_ID_MPEG4:
5368             ff_clean_mpeg4_qscales(s);
5369             break;
5370         case CODEC_ID_H263:
5371         case CODEC_ID_H263P:
5372         case CODEC_ID_FLV1:
5373             ff_clean_h263_qscales(s);
5374             break;
5375         }
5376
5377         s->lambda= s->lambda_table[0];
5378         //FIXME broken
5379     }else
5380         s->lambda= s->current_picture.quality;
5381 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5382     update_qscale(s);
5383
5384     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5385         s->qscale= 3; //reduce clipping problems
5386
5387     if (s->out_format == FMT_MJPEG) {
5388         /* for mjpeg, we do include qscale in the matrix */
5389         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5390         for(i=1;i<64;i++){
5391             int j= s->dsp.idct_permutation[i];
5392
5393             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF;
5394         }
5395         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5396                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5397         s->qscale= 8;
5398     }
5399
5400     //FIXME var duplication
5401     s->current_picture_ptr->key_frame=
5402     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5403     s->current_picture_ptr->pict_type=
5404     s->current_picture.pict_type= s->pict_type;
5405
5406     if(s->current_picture.key_frame)
5407         s->picture_in_gop_number=0;
5408
5409     s->last_bits= put_bits_count(&s->pb);
5410     switch(s->out_format) {
5411     case FMT_MJPEG:
5412         mjpeg_picture_header(s);
5413         break;
5414 #ifdef CONFIG_H261_ENCODER
5415     case FMT_H261:
5416         ff_h261_encode_picture_header(s, picture_number);
5417         break;
5418 #endif
5419     case FMT_H263:
5420         if (s->codec_id == CODEC_ID_WMV2)
5421             ff_wmv2_encode_picture_header(s, picture_number);
5422         else if (s->h263_msmpeg4)
5423             msmpeg4_encode_picture_header(s, picture_number);
5424         else if (s->h263_pred)
5425             mpeg4_encode_picture_header(s, picture_number);
5426 #ifdef CONFIG_RV10_ENCODER
5427         else if (s->codec_id == CODEC_ID_RV10)
5428             rv10_encode_picture_header(s, picture_number);
5429 #endif
5430 #ifdef CONFIG_RV20_ENCODER
5431         else if (s->codec_id == CODEC_ID_RV20)
5432             rv20_encode_picture_header(s, picture_number);
5433 #endif
5434         else if (s->codec_id == CODEC_ID_FLV1)
5435             ff_flv_encode_picture_header(s, picture_number);
5436         else
5437             h263_encode_picture_header(s, picture_number);
5438         break;
5439     case FMT_MPEG1:
5440         mpeg1_encode_picture_header(s, picture_number);
5441         break;
5442     case FMT_H264:
5443         break;
5444     default:
5445         assert(0);
5446     }
5447     bits= put_bits_count(&s->pb);
5448     s->header_bits= bits - s->last_bits;
5449
5450     for(i=1; i<s->avctx->thread_count; i++){
5451         update_duplicate_context_after_me(s->thread_context[i], s);
5452     }
5453     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5454     for(i=1; i<s->avctx->thread_count; i++){
5455         merge_context_after_encode(s, s->thread_context[i]);
5456     }
5457     emms_c();
5458 }
5459
5460 #endif //CONFIG_ENCODERS
5461
5462 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5463     const int intra= s->mb_intra;
5464     int i;
5465
5466     s->dct_count[intra]++;
5467
5468     for(i=0; i<64; i++){
5469         int level= block[i];
5470
5471         if(level){
5472             if(level>0){
5473                 s->dct_error_sum[intra][i] += level;
5474                 level -= s->dct_offset[intra][i];
5475                 if(level<0) level=0;
5476             }else{
5477                 s->dct_error_sum[intra][i] -= level;
5478                 level += s->dct_offset[intra][i];
5479                 if(level>0) level=0;
5480             }
5481             block[i]= level;
5482         }
5483     }
5484 }
5485
5486 #ifdef CONFIG_ENCODERS
5487
5488 static int dct_quantize_trellis_c(MpegEncContext *s,
5489                         DCTELEM *block, int n,
5490                         int qscale, int *overflow){
5491     const int *qmat;
5492     const uint8_t *scantable= s->intra_scantable.scantable;
5493     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5494     int max=0;
5495     unsigned int threshold1, threshold2;
5496     int bias=0;
5497     int run_tab[65];
5498     int level_tab[65];
5499     int score_tab[65];
5500     int survivor[65];
5501     int survivor_count;
5502     int last_run=0;
5503     int last_level=0;
5504     int last_score= 0;
5505     int last_i;
5506     int coeff[2][64];
5507     int coeff_count[64];
5508     int qmul, qadd, start_i, last_non_zero, i, dc;
5509     const int esc_length= s->ac_esc_length;
5510     uint8_t * length;
5511     uint8_t * last_length;
5512     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5513
5514     s->dsp.fdct (block);
5515
5516     if(s->dct_error_sum)
5517         s->denoise_dct(s, block);
5518     qmul= qscale*16;
5519     qadd= ((qscale-1)|1)*8;
5520
5521     if (s->mb_intra) {
5522         int q;
5523         if (!s->h263_aic) {
5524             if (n < 4)
5525                 q = s->y_dc_scale;
5526             else
5527                 q = s->c_dc_scale;
5528             q = q << 3;
5529         } else{
5530             /* For AIC we skip quant/dequant of INTRADC */
5531             q = 1 << 3;
5532             qadd=0;
5533         }
5534
5535         /* note: block[0] is assumed to be positive */
5536         block[0] = (block[0] + (q >> 1)) / q;
5537         start_i = 1;
5538         last_non_zero = 0;
5539         qmat = s->q_intra_matrix[qscale];
5540         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5541             bias= 1<<(QMAT_SHIFT-1);
5542         length     = s->intra_ac_vlc_length;
5543         last_length= s->intra_ac_vlc_last_length;
5544     } else {
5545         start_i = 0;
5546         last_non_zero = -1;
5547         qmat = s->q_inter_matrix[qscale];
5548         length     = s->inter_ac_vlc_length;
5549         last_length= s->inter_ac_vlc_last_length;
5550     }
5551     last_i= start_i;
5552
5553     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5554     threshold2= (threshold1<<1);
5555
5556     for(i=63; i>=start_i; i--) {
5557         const int j = scantable[i];
5558         int level = block[j] * qmat[j];
5559
5560         if(((unsigned)(level+threshold1))>threshold2){
5561             last_non_zero = i;
5562             break;
5563         }
5564     }
5565
5566     for(i=start_i; i<=last_non_zero; i++) {
5567         const int j = scantable[i];
5568         int level = block[j] * qmat[j];
5569
5570 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5571 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5572         if(((unsigned)(level+threshold1))>threshold2){
5573             if(level>0){
5574                 level= (bias + level)>>QMAT_SHIFT;
5575                 coeff[0][i]= level;
5576                 coeff[1][i]= level-1;
5577 //                coeff[2][k]= level-2;
5578             }else{
5579                 level= (bias - level)>>QMAT_SHIFT;
5580                 coeff[0][i]= -level;
5581                 coeff[1][i]= -level+1;
5582 //                coeff[2][k]= -level+2;
5583             }
5584             coeff_count[i]= FFMIN(level, 2);
5585             assert(coeff_count[i]);
5586             max |=level;
5587         }else{
5588             coeff[0][i]= (level>>31)|1;
5589             coeff_count[i]= 1;
5590         }
5591     }
5592
5593     *overflow= s->max_qcoeff < max; //overflow might have happened
5594
5595     if(last_non_zero < start_i){
5596         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5597         return last_non_zero;
5598     }
5599
5600     score_tab[start_i]= 0;
5601     survivor[0]= start_i;
5602     survivor_count= 1;
5603
5604     for(i=start_i; i<=last_non_zero; i++){
5605         int level_index, j;
5606         const int dct_coeff= ABS(block[ scantable[i] ]);
5607         const int zero_distoration= dct_coeff*dct_coeff;
5608         int best_score=256*256*256*120;
5609         for(level_index=0; level_index < coeff_count[i]; level_index++){
5610             int distoration;
5611             int level= coeff[level_index][i];
5612             const int alevel= ABS(level);
5613             int unquant_coeff;
5614
5615             assert(level);
5616
5617             if(s->out_format == FMT_H263){
5618                 unquant_coeff= alevel*qmul + qadd;
5619             }else{ //MPEG1
5620                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5621                 if(s->mb_intra){
5622                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5623                         unquant_coeff =   (unquant_coeff - 1) | 1;
5624                 }else{
5625                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5626                         unquant_coeff =   (unquant_coeff - 1) | 1;
5627                 }
5628                 unquant_coeff<<= 3;
5629             }
5630
5631             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5632             level+=64;
5633             if((level&(~127)) == 0){
5634                 for(j=survivor_count-1; j>=0; j--){
5635                     int run= i - survivor[j];
5636                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5637                     score += score_tab[i-run];
5638
5639                     if(score < best_score){
5640                         best_score= score;
5641                         run_tab[i+1]= run;
5642                         level_tab[i+1]= level-64;
5643                     }
5644                 }
5645
5646                 if(s->out_format == FMT_H263){
5647                     for(j=survivor_count-1; j>=0; j--){
5648                         int run= i - survivor[j];
5649                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5650                         score += score_tab[i-run];
5651                         if(score < last_score){
5652                             last_score= score;
5653                             last_run= run;
5654                             last_level= level-64;
5655                             last_i= i+1;
5656                         }
5657                     }
5658                 }
5659             }else{
5660                 distoration += esc_length*lambda;
5661                 for(j=survivor_count-1; j>=0; j--){
5662                     int run= i - survivor[j];
5663                     int score= distoration + score_tab[i-run];
5664
5665                     if(score < best_score){
5666                         best_score= score;
5667                         run_tab[i+1]= run;
5668                         level_tab[i+1]= level-64;
5669                     }
5670                 }
5671
5672                 if(s->out_format == FMT_H263){
5673                   for(j=survivor_count-1; j>=0; j--){
5674                         int run= i - survivor[j];
5675                         int score= distoration + score_tab[i-run];
5676                         if(score < last_score){
5677                             last_score= score;
5678                             last_run= run;
5679                             last_level= level-64;
5680                             last_i= i+1;
5681                         }
5682                     }
5683                 }
5684             }
5685         }
5686
5687         score_tab[i+1]= best_score;
5688
5689         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5690         if(last_non_zero <= 27){
5691             for(; survivor_count; survivor_count--){
5692                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5693                     break;
5694             }
5695         }else{
5696             for(; survivor_count; survivor_count--){
5697                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5698                     break;
5699             }
5700         }
5701
5702         survivor[ survivor_count++ ]= i+1;
5703     }
5704
5705     if(s->out_format != FMT_H263){
5706         last_score= 256*256*256*120;
5707         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5708             int score= score_tab[i];
5709             if(i) score += lambda*2; //FIXME exacter?
5710
5711             if(score < last_score){
5712                 last_score= score;
5713                 last_i= i;
5714                 last_level= level_tab[i];
5715                 last_run= run_tab[i];
5716             }
5717         }
5718     }
5719
5720     s->coded_score[n] = last_score;
5721
5722     dc= ABS(block[0]);
5723     last_non_zero= last_i - 1;
5724     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5725
5726     if(last_non_zero < start_i)
5727         return last_non_zero;
5728
5729     if(last_non_zero == 0 && start_i == 0){
5730         int best_level= 0;
5731         int best_score= dc * dc;
5732
5733         for(i=0; i<coeff_count[0]; i++){
5734             int level= coeff[i][0];
5735             int alevel= ABS(level);
5736             int unquant_coeff, score, distortion;
5737
5738             if(s->out_format == FMT_H263){
5739                     unquant_coeff= (alevel*qmul + qadd)>>3;
5740             }else{ //MPEG1
5741                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5742                     unquant_coeff =   (unquant_coeff - 1) | 1;
5743             }
5744             unquant_coeff = (unquant_coeff + 4) >> 3;
5745             unquant_coeff<<= 3 + 3;
5746
5747             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5748             level+=64;
5749             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5750             else                    score= distortion + esc_length*lambda;
5751
5752             if(score < best_score){
5753                 best_score= score;
5754                 best_level= level - 64;
5755             }
5756         }
5757         block[0]= best_level;
5758         s->coded_score[n] = best_score - dc*dc;
5759         if(best_level == 0) return -1;
5760         else                return last_non_zero;
5761     }
5762
5763     i= last_i;
5764     assert(last_level);
5765
5766     block[ perm_scantable[last_non_zero] ]= last_level;
5767     i -= last_run + 1;
5768
5769     for(; i>start_i; i -= run_tab[i] + 1){
5770         block[ perm_scantable[i-1] ]= level_tab[i];
5771     }
5772
5773     return last_non_zero;
5774 }
5775
5776 //#define REFINE_STATS 1
5777 static int16_t basis[64][64];
5778
5779 static void build_basis(uint8_t *perm){
5780     int i, j, x, y;
5781     emms_c();
5782     for(i=0; i<8; i++){
5783         for(j=0; j<8; j++){
5784             for(y=0; y<8; y++){
5785                 for(x=0; x<8; x++){
5786                     double s= 0.25*(1<<BASIS_SHIFT);
5787                     int index= 8*i + j;
5788                     int perm_index= perm[index];
5789                     if(i==0) s*= sqrt(0.5);
5790                     if(j==0) s*= sqrt(0.5);
5791                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5792                 }
5793             }
5794         }
5795     }
5796 }
5797
5798 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5799                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5800                         int n, int qscale){
5801     int16_t rem[64];
5802     DCTELEM d1[64] __align16;
5803     const int *qmat;
5804     const uint8_t *scantable= s->intra_scantable.scantable;
5805     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5806 //    unsigned int threshold1, threshold2;
5807 //    int bias=0;
5808     int run_tab[65];
5809     int prev_run=0;
5810     int prev_level=0;
5811     int qmul, qadd, start_i, last_non_zero, i, dc;
5812     uint8_t * length;
5813     uint8_t * last_length;
5814     int lambda;
5815     int rle_index, run, q, sum;
5816 #ifdef REFINE_STATS
5817 static int count=0;
5818 static int after_last=0;
5819 static int to_zero=0;
5820 static int from_zero=0;
5821 static int raise=0;
5822 static int lower=0;
5823 static int messed_sign=0;
5824 #endif
5825
5826     if(basis[0][0] == 0)
5827         build_basis(s->dsp.idct_permutation);
5828
5829     qmul= qscale*2;
5830     qadd= (qscale-1)|1;
5831     if (s->mb_intra) {
5832         if (!s->h263_aic) {
5833             if (n < 4)
5834                 q = s->y_dc_scale;
5835             else
5836                 q = s->c_dc_scale;
5837         } else{
5838             /* For AIC we skip quant/dequant of INTRADC */
5839             q = 1;
5840             qadd=0;
5841         }
5842         q <<= RECON_SHIFT-3;
5843         /* note: block[0] is assumed to be positive */
5844         dc= block[0]*q;
5845 //        block[0] = (block[0] + (q >> 1)) / q;
5846         start_i = 1;
5847         qmat = s->q_intra_matrix[qscale];
5848 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5849 //            bias= 1<<(QMAT_SHIFT-1);
5850         length     = s->intra_ac_vlc_length;
5851         last_length= s->intra_ac_vlc_last_length;
5852     } else {
5853         dc= 0;
5854         start_i = 0;
5855         qmat = s->q_inter_matrix[qscale];
5856         length     = s->inter_ac_vlc_length;
5857         last_length= s->inter_ac_vlc_last_length;
5858     }
5859     last_non_zero = s->block_last_index[n];
5860
5861 #ifdef REFINE_STATS
5862 {START_TIMER
5863 #endif
5864     dc += (1<<(RECON_SHIFT-1));
5865     for(i=0; i<64; i++){
5866         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
5867     }
5868 #ifdef REFINE_STATS
5869 STOP_TIMER("memset rem[]")}
5870 #endif
5871     sum=0;
5872     for(i=0; i<64; i++){
5873         int one= 36;
5874         int qns=4;
5875         int w;
5876
5877         w= ABS(weight[i]) + qns*one;
5878         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
5879
5880         weight[i] = w;
5881 //        w=weight[i] = (63*qns + (w/2)) / w;
5882
5883         assert(w>0);
5884         assert(w<(1<<6));
5885         sum += w*w;
5886     }
5887     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
5888 #ifdef REFINE_STATS
5889 {START_TIMER
5890 #endif
5891     run=0;
5892     rle_index=0;
5893     for(i=start_i; i<=last_non_zero; i++){
5894         int j= perm_scantable[i];
5895         const int level= block[j];
5896         int coeff;
5897
5898         if(level){
5899             if(level<0) coeff= qmul*level - qadd;
5900             else        coeff= qmul*level + qadd;
5901             run_tab[rle_index++]=run;
5902             run=0;
5903
5904             s->dsp.add_8x8basis(rem, basis[j], coeff);
5905         }else{
5906             run++;
5907         }
5908     }
5909 #ifdef REFINE_STATS
5910 if(last_non_zero>0){
5911 STOP_TIMER("init rem[]")
5912 }
5913 }
5914
5915 {START_TIMER
5916 #endif
5917     for(;;){
5918         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
5919         int best_coeff=0;
5920         int best_change=0;
5921         int run2, best_unquant_change=0, analyze_gradient;
5922 #ifdef REFINE_STATS
5923 {START_TIMER
5924 #endif
5925         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
5926
5927         if(analyze_gradient){
5928 #ifdef REFINE_STATS
5929 {START_TIMER
5930 #endif
5931             for(i=0; i<64; i++){
5932                 int w= weight[i];
5933
5934                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
5935             }
5936 #ifdef REFINE_STATS
5937 STOP_TIMER("rem*w*w")}
5938 {START_TIMER
5939 #endif
5940             s->dsp.fdct(d1);
5941 #ifdef REFINE_STATS
5942 STOP_TIMER("dct")}
5943 #endif
5944         }
5945
5946         if(start_i){
5947             const int level= block[0];
5948             int change, old_coeff;
5949
5950             assert(s->mb_intra);
5951
5952             old_coeff= q*level;
5953
5954             for(change=-1; change<=1; change+=2){
5955                 int new_level= level + change;
5956                 int score, new_coeff;
5957
5958                 new_coeff= q*new_level;
5959                 if(new_coeff >= 2048 || new_coeff < 0)
5960                     continue;
5961
5962                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
5963                 if(score<best_score){
5964                     best_score= score;
5965                     best_coeff= 0;
5966                     best_change= change;
5967                     best_unquant_change= new_coeff - old_coeff;
5968                 }
5969             }
5970         }
5971
5972         run=0;
5973         rle_index=0;
5974         run2= run_tab[rle_index++];
5975         prev_level=0;
5976         prev_run=0;
5977
5978         for(i=start_i; i<64; i++){
5979             int j= perm_scantable[i];
5980             const int level= block[j];
5981             int change, old_coeff;
5982
5983             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
5984                 break;
5985
5986             if(level){
5987                 if(level<0) old_coeff= qmul*level - qadd;
5988                 else        old_coeff= qmul*level + qadd;
5989                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
5990             }else{
5991                 old_coeff=0;
5992                 run2--;
5993                 assert(run2>=0 || i >= last_non_zero );
5994             }
5995
5996             for(change=-1; change<=1; change+=2){
5997                 int new_level= level + change;
5998                 int score, new_coeff, unquant_change;
5999
6000                 score=0;
6001                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6002                    continue;
6003
6004                 if(new_level){
6005                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6006                     else            new_coeff= qmul*new_level + qadd;
6007                     if(new_coeff >= 2048 || new_coeff <= -2048)
6008                         continue;
6009                     //FIXME check for overflow
6010
6011                     if(level){
6012                         if(level < 63 && level > -63){
6013                             if(i < last_non_zero)
6014                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6015                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6016                             else
6017                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6018                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6019                         }
6020                     }else{
6021                         assert(ABS(new_level)==1);
6022
6023                         if(analyze_gradient){
6024                             int g= d1[ scantable[i] ];
6025                             if(g && (g^new_level) >= 0)
6026                                 continue;
6027                         }
6028
6029                         if(i < last_non_zero){
6030                             int next_i= i + run2 + 1;
6031                             int next_level= block[ perm_scantable[next_i] ] + 64;
6032
6033                             if(next_level&(~127))
6034                                 next_level= 0;
6035
6036                             if(next_i < last_non_zero)
6037                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6038                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6039                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6040                             else
6041                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6042                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6043                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6044                         }else{
6045                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6046                             if(prev_level){
6047                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6048                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6049                             }
6050                         }
6051                     }
6052                 }else{
6053                     new_coeff=0;
6054                     assert(ABS(level)==1);
6055
6056                     if(i < last_non_zero){
6057                         int next_i= i + run2 + 1;
6058                         int next_level= block[ perm_scantable[next_i] ] + 64;
6059
6060                         if(next_level&(~127))
6061                             next_level= 0;
6062
6063                         if(next_i < last_non_zero)
6064                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6065                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6066                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6067                         else
6068                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6069                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6070                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6071                     }else{
6072                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6073                         if(prev_level){
6074                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6075                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6076                         }
6077                     }
6078                 }
6079
6080                 score *= lambda;
6081
6082                 unquant_change= new_coeff - old_coeff;
6083                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6084
6085                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6086                 if(score<best_score){
6087                     best_score= score;
6088                     best_coeff= i;
6089                     best_change= change;
6090                     best_unquant_change= unquant_change;
6091                 }
6092             }
6093             if(level){
6094                 prev_level= level + 64;
6095                 if(prev_level&(~127))
6096                     prev_level= 0;
6097                 prev_run= run;
6098                 run=0;
6099             }else{
6100                 run++;
6101             }
6102         }
6103 #ifdef REFINE_STATS
6104 STOP_TIMER("iterative step")}
6105 #endif
6106
6107         if(best_change){
6108             int j= perm_scantable[ best_coeff ];
6109
6110             block[j] += best_change;
6111
6112             if(best_coeff > last_non_zero){
6113                 last_non_zero= best_coeff;
6114                 assert(block[j]);
6115 #ifdef REFINE_STATS
6116 after_last++;
6117 #endif
6118             }else{
6119 #ifdef REFINE_STATS
6120 if(block[j]){
6121     if(block[j] - best_change){
6122         if(ABS(block[j]) > ABS(block[j] - best_change)){
6123             raise++;
6124         }else{
6125             lower++;
6126         }
6127     }else{
6128         from_zero++;
6129     }
6130 }else{
6131     to_zero++;
6132 }
6133 #endif
6134                 for(; last_non_zero>=start_i; last_non_zero--){
6135                     if(block[perm_scantable[last_non_zero]])
6136                         break;
6137                 }
6138             }
6139 #ifdef REFINE_STATS
6140 count++;
6141 if(256*256*256*64 % count == 0){
6142     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6143 }
6144 #endif
6145             run=0;
6146             rle_index=0;
6147             for(i=start_i; i<=last_non_zero; i++){
6148                 int j= perm_scantable[i];
6149                 const int level= block[j];
6150
6151                  if(level){
6152                      run_tab[rle_index++]=run;
6153                      run=0;
6154                  }else{
6155                      run++;
6156                  }
6157             }
6158
6159             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6160         }else{
6161             break;
6162         }
6163     }
6164 #ifdef REFINE_STATS
6165 if(last_non_zero>0){
6166 STOP_TIMER("iterative search")
6167 }
6168 }
6169 #endif
6170
6171     return last_non_zero;
6172 }
6173
6174 static int dct_quantize_c(MpegEncContext *s,
6175                         DCTELEM *block, int n,
6176                         int qscale, int *overflow)
6177 {
6178     int i, j, level, last_non_zero, q, start_i;
6179     const int *qmat;
6180     const uint8_t *scantable= s->intra_scantable.scantable;
6181     int bias;
6182     int max=0;
6183     unsigned int threshold1, threshold2;
6184
6185     s->dsp.fdct (block);
6186
6187     if(s->dct_error_sum)
6188         s->denoise_dct(s, block);
6189
6190     if (s->mb_intra) {
6191         if (!s->h263_aic) {
6192             if (n < 4)
6193                 q = s->y_dc_scale;
6194             else
6195                 q = s->c_dc_scale;
6196             q = q << 3;
6197         } else
6198             /* For AIC we skip quant/dequant of INTRADC */
6199             q = 1 << 3;
6200
6201         /* note: block[0] is assumed to be positive */
6202         block[0] = (block[0] + (q >> 1)) / q;
6203         start_i = 1;
6204         last_non_zero = 0;
6205         qmat = s->q_intra_matrix[qscale];
6206         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6207     } else {
6208         start_i = 0;
6209         last_non_zero = -1;
6210         qmat = s->q_inter_matrix[qscale];
6211         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6212     }
6213     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6214     threshold2= (threshold1<<1);
6215     for(i=63;i>=start_i;i--) {
6216         j = scantable[i];
6217         level = block[j] * qmat[j];
6218
6219         if(((unsigned)(level+threshold1))>threshold2){
6220             last_non_zero = i;
6221             break;
6222         }else{
6223             block[j]=0;
6224         }
6225     }
6226     for(i=start_i; i<=last_non_zero; i++) {
6227         j = scantable[i];
6228         level = block[j] * qmat[j];
6229
6230 //        if(   bias+level >= (1<<QMAT_SHIFT)
6231 //           || bias-level >= (1<<QMAT_SHIFT)){
6232         if(((unsigned)(level+threshold1))>threshold2){
6233             if(level>0){
6234                 level= (bias + level)>>QMAT_SHIFT;
6235                 block[j]= level;
6236             }else{
6237                 level= (bias - level)>>QMAT_SHIFT;
6238                 block[j]= -level;
6239             }
6240             max |=level;
6241         }else{
6242             block[j]=0;
6243         }
6244     }
6245     *overflow= s->max_qcoeff < max; //overflow might have happened
6246
6247     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6248     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6249         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6250
6251     return last_non_zero;
6252 }
6253
6254 #endif //CONFIG_ENCODERS
6255
6256 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6257                                    DCTELEM *block, int n, int qscale)
6258 {
6259     int i, level, nCoeffs;
6260     const uint16_t *quant_matrix;
6261
6262     nCoeffs= s->block_last_index[n];
6263
6264     if (n < 4)
6265         block[0] = block[0] * s->y_dc_scale;
6266     else
6267         block[0] = block[0] * s->c_dc_scale;
6268     /* XXX: only mpeg1 */
6269     quant_matrix = s->intra_matrix;
6270     for(i=1;i<=nCoeffs;i++) {
6271         int j= s->intra_scantable.permutated[i];
6272         level = block[j];
6273         if (level) {
6274             if (level < 0) {
6275                 level = -level;
6276                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6277                 level = (level - 1) | 1;
6278                 level = -level;
6279             } else {
6280                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6281                 level = (level - 1) | 1;
6282             }
6283             block[j] = level;
6284         }
6285     }
6286 }
6287
6288 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6289                                    DCTELEM *block, int n, int qscale)
6290 {
6291     int i, level, nCoeffs;
6292     const uint16_t *quant_matrix;
6293
6294     nCoeffs= s->block_last_index[n];
6295
6296     quant_matrix = s->inter_matrix;
6297     for(i=0; i<=nCoeffs; i++) {
6298         int j= s->intra_scantable.permutated[i];
6299         level = block[j];
6300         if (level) {
6301             if (level < 0) {
6302                 level = -level;
6303                 level = (((level << 1) + 1) * qscale *
6304                          ((int) (quant_matrix[j]))) >> 4;
6305                 level = (level - 1) | 1;
6306                 level = -level;
6307             } else {
6308                 level = (((level << 1) + 1) * qscale *
6309                          ((int) (quant_matrix[j]))) >> 4;
6310                 level = (level - 1) | 1;
6311             }
6312             block[j] = level;
6313         }
6314     }
6315 }
6316
6317 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6318                                    DCTELEM *block, int n, int qscale)
6319 {
6320     int i, level, nCoeffs;
6321     const uint16_t *quant_matrix;
6322
6323     if(s->alternate_scan) nCoeffs= 63;
6324     else nCoeffs= s->block_last_index[n];
6325
6326     if (n < 4)
6327         block[0] = block[0] * s->y_dc_scale;
6328     else
6329         block[0] = block[0] * s->c_dc_scale;
6330     quant_matrix = s->intra_matrix;
6331     for(i=1;i<=nCoeffs;i++) {
6332         int j= s->intra_scantable.permutated[i];
6333         level = block[j];
6334         if (level) {
6335             if (level < 0) {
6336                 level = -level;
6337                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6338                 level = -level;
6339             } else {
6340                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6341             }
6342             block[j] = level;
6343         }
6344     }
6345 }
6346
6347 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6348                                    DCTELEM *block, int n, int qscale)
6349 {
6350     int i, level, nCoeffs;
6351     const uint16_t *quant_matrix;
6352     int sum=-1;
6353
6354     if(s->alternate_scan) nCoeffs= 63;
6355     else nCoeffs= s->block_last_index[n];
6356
6357     quant_matrix = s->inter_matrix;
6358     for(i=0; i<=nCoeffs; i++) {
6359         int j= s->intra_scantable.permutated[i];
6360         level = block[j];
6361         if (level) {
6362             if (level < 0) {
6363                 level = -level;
6364                 level = (((level << 1) + 1) * qscale *
6365                          ((int) (quant_matrix[j]))) >> 4;
6366                 level = -level;
6367             } else {
6368                 level = (((level << 1) + 1) * qscale *
6369                          ((int) (quant_matrix[j]))) >> 4;
6370             }
6371             block[j] = level;
6372             sum+=level;
6373         }
6374     }
6375     block[63]^=sum&1;
6376 }
6377
6378 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6379                                   DCTELEM *block, int n, int qscale)
6380 {
6381     int i, level, qmul, qadd;
6382     int nCoeffs;
6383
6384     assert(s->block_last_index[n]>=0);
6385
6386     qmul = qscale << 1;
6387
6388     if (!s->h263_aic) {
6389         if (n < 4)
6390             block[0] = block[0] * s->y_dc_scale;
6391         else
6392             block[0] = block[0] * s->c_dc_scale;
6393         qadd = (qscale - 1) | 1;
6394     }else{
6395         qadd = 0;
6396     }
6397     if(s->ac_pred)
6398         nCoeffs=63;
6399     else
6400         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6401
6402     for(i=1; i<=nCoeffs; i++) {
6403         level = block[i];
6404         if (level) {
6405             if (level < 0) {
6406                 level = level * qmul - qadd;
6407             } else {
6408                 level = level * qmul + qadd;
6409             }
6410             block[i] = level;
6411         }
6412     }
6413 }
6414
6415 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6416                                   DCTELEM *block, int n, int qscale)
6417 {
6418     int i, level, qmul, qadd;
6419     int nCoeffs;
6420
6421     assert(s->block_last_index[n]>=0);
6422
6423     qadd = (qscale - 1) | 1;
6424     qmul = qscale << 1;
6425
6426     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6427
6428     for(i=0; i<=nCoeffs; i++) {
6429         level = block[i];
6430         if (level) {
6431             if (level < 0) {
6432                 level = level * qmul - qadd;
6433             } else {
6434                 level = level * qmul + qadd;
6435             }
6436             block[i] = level;
6437         }
6438     }
6439 }
6440
6441 #ifdef CONFIG_ENCODERS
6442 AVCodec h263_encoder = {
6443     "h263",
6444     CODEC_TYPE_VIDEO,
6445     CODEC_ID_H263,
6446     sizeof(MpegEncContext),
6447     MPV_encode_init,
6448     MPV_encode_picture,
6449     MPV_encode_end,
6450     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6451 };
6452
6453 AVCodec h263p_encoder = {
6454     "h263p",
6455     CODEC_TYPE_VIDEO,
6456     CODEC_ID_H263P,
6457     sizeof(MpegEncContext),
6458     MPV_encode_init,
6459     MPV_encode_picture,
6460     MPV_encode_end,
6461     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6462 };
6463
6464 AVCodec flv_encoder = {
6465     "flv",
6466     CODEC_TYPE_VIDEO,
6467     CODEC_ID_FLV1,
6468     sizeof(MpegEncContext),
6469     MPV_encode_init,
6470     MPV_encode_picture,
6471     MPV_encode_end,
6472     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6473 };
6474
6475 AVCodec rv10_encoder = {
6476     "rv10",
6477     CODEC_TYPE_VIDEO,
6478     CODEC_ID_RV10,
6479     sizeof(MpegEncContext),
6480     MPV_encode_init,
6481     MPV_encode_picture,
6482     MPV_encode_end,
6483     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6484 };
6485
6486 AVCodec rv20_encoder = {
6487     "rv20",
6488     CODEC_TYPE_VIDEO,
6489     CODEC_ID_RV20,
6490     sizeof(MpegEncContext),
6491     MPV_encode_init,
6492     MPV_encode_picture,
6493     MPV_encode_end,
6494     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6495 };
6496
6497 AVCodec mpeg4_encoder = {
6498     "mpeg4",
6499     CODEC_TYPE_VIDEO,
6500     CODEC_ID_MPEG4,
6501     sizeof(MpegEncContext),
6502     MPV_encode_init,
6503     MPV_encode_picture,
6504     MPV_encode_end,
6505     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6506     .capabilities= CODEC_CAP_DELAY,
6507 };
6508
6509 AVCodec msmpeg4v1_encoder = {
6510     "msmpeg4v1",
6511     CODEC_TYPE_VIDEO,
6512     CODEC_ID_MSMPEG4V1,
6513     sizeof(MpegEncContext),
6514     MPV_encode_init,
6515     MPV_encode_picture,
6516     MPV_encode_end,
6517     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6518 };
6519
6520 AVCodec msmpeg4v2_encoder = {
6521     "msmpeg4v2",
6522     CODEC_TYPE_VIDEO,
6523     CODEC_ID_MSMPEG4V2,
6524     sizeof(MpegEncContext),
6525     MPV_encode_init,
6526     MPV_encode_picture,
6527     MPV_encode_end,
6528     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6529 };
6530
6531 AVCodec msmpeg4v3_encoder = {
6532     "msmpeg4",
6533     CODEC_TYPE_VIDEO,
6534     CODEC_ID_MSMPEG4V3,
6535     sizeof(MpegEncContext),
6536     MPV_encode_init,
6537     MPV_encode_picture,
6538     MPV_encode_end,
6539     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6540 };
6541
6542 AVCodec wmv1_encoder = {
6543     "wmv1",
6544     CODEC_TYPE_VIDEO,
6545     CODEC_ID_WMV1,
6546     sizeof(MpegEncContext),
6547     MPV_encode_init,
6548     MPV_encode_picture,
6549     MPV_encode_end,
6550     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6551 };
6552
6553 AVCodec mjpeg_encoder = {
6554     "mjpeg",
6555     CODEC_TYPE_VIDEO,
6556     CODEC_ID_MJPEG,
6557     sizeof(MpegEncContext),
6558     MPV_encode_init,
6559     MPV_encode_picture,
6560     MPV_encode_end,
6561     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6562 };
6563
6564 #endif //CONFIG_ENCODERS