libpostproc/postprocess.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
   3  *
   4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file postprocess.c
  25  * postprocessing.
  26  */
  27
  28 /*
  29                         C       MMX     MMX2    3DNow   AltiVec
  30 isVertDC                Ec      Ec                      Ec
  31 isVertMinMaxOk          Ec      Ec                      Ec
  32 doVertLowPass           E               e       e       Ec
  33 doVertDefFilter         Ec      Ec      e       e       Ec
  34 isHorizDC               Ec      Ec                      Ec
  35 isHorizMinMaxOk         a       E                       Ec
  36 doHorizLowPass          E               e       e       Ec
  37 doHorizDefFilter        Ec      Ec      e       e       Ec
  38 do_a_deblock            Ec      E       Ec      E
  39 deRing                  E               e       e*      Ecp
  40 Vertical RKAlgo1        E               a       a
  41 Horizontal RKAlgo1                      a       a
  42 Vertical X1#            a               E       E
  43 Horizontal X1#          a               E       E
  44 LinIpolDeinterlace      e               E       E*
  45 CubicIpolDeinterlace    a               e       e*
  46 LinBlendDeinterlace     e               E       E*
  47 MedianDeinterlace#      E       Ec      Ec
  48 TempDeNoiser#           E               e       e       Ec
  49
  50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
  51 # more or less selfinvented filters so the exactness is not too meaningful
  52 E = Exact implementation
  53 e = almost exact implementation (slightly different rounding,...)
  54 a = alternative / approximate impl
  55 c = checked against the other implementations (-vo md5)
  56 p = partially optimized, still some work to do
  57 */
  58
  59 /*
  60 TODO:
  61 reduce the time wasted on the mem transfer
  62 unroll stuff if instructions depend too much on the prior one
  63 move YScale thing to the end instead of fixing QP
  64 write a faster and higher quality deblocking filter :)
  65 make the mainloop more flexible (variable number of blocks at once
  66         (the if/else stuff per block is slowing things down)
  67 compare the quality & speed of all filters
  68 split this huge file
  69 optimize c versions
  70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
  71 ...
  72 */
  73
  74 //Changelog: use the Subversion log
  75
  76 #include "config.h"
  77 #include "libavutil/avutil.h"
  78 #include <inttypes.h>
  79 #include <stdio.h>
  80 #include <stdlib.h>
  81 #include <string.h>
  82 #ifdef HAVE_MALLOC_H
  83 #include <malloc.h>
  84 #endif
  85 //#undef HAVE_MMX2
  86 //#define HAVE_3DNOW
  87 //#undef HAVE_MMX
  88 //#undef ARCH_X86
  89 //#define DEBUG_BRIGHTNESS
  90 #include "postprocess.h"
  91 #include "postprocess_internal.h"
  92
  93 #ifdef HAVE_ALTIVEC_H
  94 #include <altivec.h>
  95 #endif
  96
  97 #define GET_MODE_BUFFER_SIZE 500
  98 #define OPTIONS_ARRAY_SIZE 10
  99 #define BLOCK_SIZE 8
 100 #define TEMP_STRIDE 8
 101 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 102
 103 #if defined(ARCH_X86)
 104 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
 105 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
 106 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
 107 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
 108 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
 109 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
 110 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
 111 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
 112 #endif
 113
 114 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
 115
 116
 117 static struct PPFilter filters[]=
 118 {
 119     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
 120     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 121 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
 122     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
 123     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
 124     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
 125     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
 126     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
 127     {"dr", "dering",                1, 5, 6, DERING},
 128     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
 129     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
 130     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
 131     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 132     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
 133     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
 134     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
 135     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
 136     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
 137     {NULL, NULL,0,0,0,0} //End Marker
 138 };
 139
 140 static const char *replaceTable[]=
 141 {
 142     "default",      "hb:a,vb:a,dr:a",
 143     "de",           "hb:a,vb:a,dr:a",
 144     "fast",         "h1:a,v1:a,dr:a",
 145     "fa",           "h1:a,v1:a,dr:a",
 146     "ac",           "ha:a:128:7,va:a,dr:a",
 147     NULL //End Marker
 148 };
 149
 150
 151 #if defined(ARCH_X86)
 152 static inline void prefetchnta(void *p)
 153 {
 154     asm volatile(   "prefetchnta (%0)\n\t"
 155         : : "r" (p)
 156     );
 157 }
 158
 159 static inline void prefetcht0(void *p)
 160 {
 161     asm volatile(   "prefetcht0 (%0)\n\t"
 162         : : "r" (p)
 163     );
 164 }
 165
 166 static inline void prefetcht1(void *p)
 167 {
 168     asm volatile(   "prefetcht1 (%0)\n\t"
 169         : : "r" (p)
 170     );
 171 }
 172
 173 static inline void prefetcht2(void *p)
 174 {
 175     asm volatile(   "prefetcht2 (%0)\n\t"
 176         : : "r" (p)
 177     );
 178 }
 179 #endif
 180
 181 /* The horizontal functions exist only in C because the MMX
 182  * code is faster with vertical filters and transposing. */
 183
 184 /**
 185  * Check if the given 8x8 Block is mostly "flat"
 186  */
 187 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
 188 {
 189     int numEq= 0;
 190     int y;
 191     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 192     const int dcThreshold= dcOffset*2 + 1;
 193
 194     for(y=0; y<BLOCK_SIZE; y++){
 195         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
 196         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
 197         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
 198         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
 199         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
 200         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
 201         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
 202         src+= stride;
 203     }
 204     return numEq > c->ppMode.flatnessThreshold;
 205 }
 206
 207 /**
 208  * Check if the middle 8x8 Block in the given 8x16 block is flat
 209  */
 210 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
 211 {
 212     int numEq= 0;
 213     int y;
 214     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 215     const int dcThreshold= dcOffset*2 + 1;
 216
 217     src+= stride*4; // src points to begin of the 8x8 Block
 218     for(y=0; y<BLOCK_SIZE-1; y++){
 219         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
 220         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
 221         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
 222         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
 223         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
 224         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
 225         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
 226         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
 227         src+= stride;
 228     }
 229     return numEq > c->ppMode.flatnessThreshold;
 230 }
 231
 232 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
 233 {
 234     int i;
 235 #if 1
 236     for(i=0; i<2; i++){
 237         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
 238         src += stride;
 239         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
 240         src += stride;
 241         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
 242         src += stride;
 243         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
 244         src += stride;
 245     }
 246 #else
 247     for(i=0; i<8; i++){
 248         if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
 249         src += stride;
 250     }
 251 #endif
 252     return 1;
 253 }
 254
 255 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
 256 {
 257 #if 1
 258 #if 1
 259     int x;
 260     src+= stride*4;
 261     for(x=0; x<BLOCK_SIZE; x+=4){
 262         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
 263         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
 264         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
 265         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
 266     }
 267 #else
 268     int x;
 269     src+= stride*3;
 270     for(x=0; x<BLOCK_SIZE; x++){
 271         if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
 272     }
 273 #endif
 274     return 1;
 275 #else
 276     int x;
 277     src+= stride*4;
 278     for(x=0; x<BLOCK_SIZE; x++){
 279         int min=255;
 280         int max=0;
 281         int y;
 282         for(y=0; y<8; y++){
 283             int v= src[x + y*stride];
 284             if(v>max) max=v;
 285             if(v<min) min=v;
 286         }
 287         if(max-min > 2*QP) return 0;
 288     }
 289     return 1;
 290 #endif
 291 }
 292
 293 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
 294 {
 295     if( isHorizDC_C(src, stride, c) ){
 296         if( isHorizMinMaxOk_C(src, stride, c->QP) )
 297             return 1;
 298         else
 299             return 0;
 300     }else{
 301         return 2;
 302     }
 303 }
 304
 305 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
 306 {
 307     if( isVertDC_C(src, stride, c) ){
 308         if( isVertMinMaxOk_C(src, stride, c->QP) )
 309             return 1;
 310         else
 311             return 0;
 312     }else{
 313         return 2;
 314     }
 315 }
 316
 317 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
 318 {
 319     int y;
 320     for(y=0; y<BLOCK_SIZE; y++){
 321         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 322
 323         if(FFABS(middleEnergy) < 8*c->QP){
 324             const int q=(dst[3] - dst[4])/2;
 325             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 326             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 327
 328             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 329             d= FFMAX(d, 0);
 330
 331             d= (5*d + 32) >> 6;
 332             d*= FFSIGN(-middleEnergy);
 333
 334             if(q>0)
 335             {
 336                 d= d<0 ? 0 : d;
 337                 d= d>q ? q : d;
 338             }
 339             else
 340             {
 341                 d= d>0 ? 0 : d;
 342                 d= d<q ? q : d;
 343             }
 344
 345             dst[3]-= d;
 346             dst[4]+= d;
 347         }
 348         dst+= stride;
 349     }
 350 }
 351
 352 /**
 353  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 354  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 355  */
 356 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
 357 {
 358     int y;
 359     for(y=0; y<BLOCK_SIZE; y++){
 360         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 361         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 362
 363         int sums[10];
 364         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 365         sums[1] = sums[0] - first  + dst[3];
 366         sums[2] = sums[1] - first  + dst[4];
 367         sums[3] = sums[2] - first  + dst[5];
 368         sums[4] = sums[3] - first  + dst[6];
 369         sums[5] = sums[4] - dst[0] + dst[7];
 370         sums[6] = sums[5] - dst[1] + last;
 371         sums[7] = sums[6] - dst[2] + last;
 372         sums[8] = sums[7] - dst[3] + last;
 373         sums[9] = sums[8] - dst[4] + last;
 374
 375         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 376         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 377         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 378         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 379         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 380         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 381         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 382         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 383
 384         dst+= stride;
 385     }
 386 }
 387
 388 /**
 389  * Experimental Filter 1 (Horizontal)
 390  * will not damage linear gradients
 391  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 392  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 393  * MMX2 version does correct clipping C version does not
 394  * not identical with the vertical one
 395  */
 396 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 397 {
 398     int y;
 399     static uint64_t *lut= NULL;
 400     if(lut==NULL)
 401     {
 402         int i;
 403         lut = av_malloc(256*8);
 404         for(i=0; i<256; i++)
 405         {
 406             int v= i < 128 ? 2*i : 2*(i-256);
 407 /*
 408 //Simulate 112242211 9-Tap filter
 409             uint64_t a= (v/16)  & 0xFF;
 410             uint64_t b= (v/8)   & 0xFF;
 411             uint64_t c= (v/4)   & 0xFF;
 412             uint64_t d= (3*v/8) & 0xFF;
 413 */
 414 //Simulate piecewise linear interpolation
 415             uint64_t a= (v/16)   & 0xFF;
 416             uint64_t b= (v*3/16) & 0xFF;
 417             uint64_t c= (v*5/16) & 0xFF;
 418             uint64_t d= (7*v/16) & 0xFF;
 419             uint64_t A= (0x100 - a)&0xFF;
 420             uint64_t B= (0x100 - b)&0xFF;
 421             uint64_t C= (0x100 - c)&0xFF;
 422             uint64_t D= (0x100 - c)&0xFF;
 423
 424             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 425                        (D<<24) | (C<<16) | (B<<8)  | (A);
 426             //lut[i] = (v<<32) | (v<<24);
 427         }
 428     }
 429
 430     for(y=0; y<BLOCK_SIZE; y++){
 431         int a= src[1] - src[2];
 432         int b= src[3] - src[4];
 433         int c= src[5] - src[6];
 434
 435         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
 436
 437         if(d < QP){
 438             int v = d * FFSIGN(-b);
 439
 440             src[1] +=v/8;
 441             src[2] +=v/4;
 442             src[3] +=3*v/8;
 443             src[4] -=3*v/8;
 444             src[5] -=v/4;
 445             src[6] -=v/8;
 446         }
 447         src+=stride;
 448     }
 449 }
 450
 451 /**
 452  * accurate deblock filter
 453  */
 454 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
 455     int y;
 456     const int QP= c->QP;
 457     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 458     const int dcThreshold= dcOffset*2 + 1;
 459 //START_TIMER
 460     src+= step*4; // src points to begin of the 8x8 Block
 461     for(y=0; y<8; y++){
 462         int numEq= 0;
 463
 464         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
 465         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
 466         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
 467         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
 468         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
 469         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
 470         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
 471         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
 472         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
 473         if(numEq > c->ppMode.flatnessThreshold){
 474             int min, max, x;
 475
 476             if(src[0] > src[step]){
 477                 max= src[0];
 478                 min= src[step];
 479             }else{
 480                 max= src[step];
 481                 min= src[0];
 482             }
 483             for(x=2; x<8; x+=2){
 484                 if(src[x*step] > src[(x+1)*step]){
 485                         if(src[x    *step] > max) max= src[ x   *step];
 486                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
 487                 }else{
 488                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
 489                         if(src[ x   *step] < min) min= src[ x   *step];
 490                 }
 491             }
 492             if(max-min < 2*QP){
 493                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 494                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 495
 496                 int sums[10];
 497                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 498                 sums[1] = sums[0] - first       + src[3*step];
 499                 sums[2] = sums[1] - first       + src[4*step];
 500                 sums[3] = sums[2] - first       + src[5*step];
 501                 sums[4] = sums[3] - first       + src[6*step];
 502                 sums[5] = sums[4] - src[0*step] + src[7*step];
 503                 sums[6] = sums[5] - src[1*step] + last;
 504                 sums[7] = sums[6] - src[2*step] + last;
 505                 sums[8] = sums[7] - src[3*step] + last;
 506                 sums[9] = sums[8] - src[4*step] + last;
 507
 508                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 509                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 510                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 511                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 512                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 513                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 514                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 515                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 516             }
 517         }else{
 518             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 519
 520             if(FFABS(middleEnergy) < 8*QP){
 521                 const int q=(src[3*step] - src[4*step])/2;
 522                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 523                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 524
 525                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 526                 d= FFMAX(d, 0);
 527
 528                 d= (5*d + 32) >> 6;
 529                 d*= FFSIGN(-middleEnergy);
 530
 531                 if(q>0){
 532                     d= d<0 ? 0 : d;
 533                     d= d>q ? q : d;
 534                 }else{
 535                     d= d>0 ? 0 : d;
 536                     d= d<q ? q : d;
 537                 }
 538
 539                 src[3*step]-= d;
 540                 src[4*step]+= d;
 541             }
 542         }
 543
 544         src += stride;
 545     }
 546 /*if(step==16){
 547     STOP_TIMER("step16")
 548 }else{
 549     STOP_TIMER("stepX")
 550 }*/
 551 }
 552
 553 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 554 //Plain C versions
 555 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
 556 #define COMPILE_C
 557 #endif
 558
 559 #ifdef HAVE_ALTIVEC
 560 #define COMPILE_ALTIVEC
 561 #endif //HAVE_ALTIVEC
 562
 563 #if defined(ARCH_X86)
 564
 565 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
 566 #define COMPILE_MMX
 567 #endif
 568
 569 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
 570 #define COMPILE_MMX2
 571 #endif
 572
 573 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
 574 #define COMPILE_3DNOW
 575 #endif
 576 #endif /* defined(ARCH_X86) */
 577
 578 #undef HAVE_MMX
 579 #undef HAVE_MMX2
 580 #undef HAVE_3DNOW
 581 #undef HAVE_ALTIVEC
 582
 583 #ifdef COMPILE_C
 584 #undef HAVE_MMX
 585 #undef HAVE_MMX2
 586 #undef HAVE_3DNOW
 587 #define RENAME(a) a ## _C
 588 #include "postprocess_template.c"
 589 #endif
 590
 591 #ifdef COMPILE_ALTIVEC
 592 #undef RENAME
 593 #define HAVE_ALTIVEC
 594 #define RENAME(a) a ## _altivec
 595 #include "postprocess_altivec_template.c"
 596 #include "postprocess_template.c"
 597 #endif
 598
 599 //MMX versions
 600 #ifdef COMPILE_MMX
 601 #undef RENAME
 602 #define HAVE_MMX
 603 #undef HAVE_MMX2
 604 #undef HAVE_3DNOW
 605 #define RENAME(a) a ## _MMX
 606 #include "postprocess_template.c"
 607 #endif
 608
 609 //MMX2 versions
 610 #ifdef COMPILE_MMX2
 611 #undef RENAME
 612 #define HAVE_MMX
 613 #define HAVE_MMX2
 614 #undef HAVE_3DNOW
 615 #define RENAME(a) a ## _MMX2
 616 #include "postprocess_template.c"
 617 #endif
 618
 619 //3DNOW versions
 620 #ifdef COMPILE_3DNOW
 621 #undef RENAME
 622 #define HAVE_MMX
 623 #undef HAVE_MMX2
 624 #define HAVE_3DNOW
 625 #define RENAME(a) a ## _3DNow
 626 #include "postprocess_template.c"
 627 #endif
 628
 629 // minor note: the HAVE_xyz is messed up after that line so do not use it.
 630
 631 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 632         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
 633 {
 634     PPContext *c= (PPContext *)vc;
 635     PPMode *ppMode= (PPMode *)vm;
 636     c->ppMode= *ppMode; //FIXME
 637
 638     // Using ifs here as they are faster than function pointers although the
 639     // difference would not be measurable here but it is much better because
 640     // someone might exchange the CPU whithout restarting MPlayer ;)
 641 #ifdef RUNTIME_CPUDETECT
 642 #if defined(ARCH_X86)
 643     // ordered per speed fastest first
 644     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
 645         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 646     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
 647         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 648     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
 649         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 650     else
 651         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 652 #else
 653 #ifdef HAVE_ALTIVEC
 654     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
 655             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 656     else
 657 #endif
 658             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 659 #endif
 660 #else //RUNTIME_CPUDETECT
 661 #ifdef HAVE_MMX2
 662             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 663 #elif defined (HAVE_3DNOW)
 664             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 665 #elif defined (HAVE_MMX)
 666             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 667 #elif defined (HAVE_ALTIVEC)
 668             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 669 #else
 670             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 671 #endif
 672 #endif //!RUNTIME_CPUDETECT
 673 }
 674
 675 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 676 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
 677
 678 /* -pp Command line Help
 679 */
 680 #if LIBPOSTPROC_VERSION_INT < (52<<16)
 681 const char *const pp_help=
 682 #else
 683 const char pp_help[] =
 684 #endif
 685 "Available postprocessing filters:\n"
 686 "Filters                        Options\n"
 687 "short  long name       short   long option     Description\n"
 688 "*      *               a       autoq           CPU power dependent enabler\n"
 689 "                       c       chrom           chrominance filtering enabled\n"
 690 "                       y       nochrom         chrominance filtering disabled\n"
 691 "                       n       noluma          luma filtering disabled\n"
 692 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 693 "       1. difference factor: default=32, higher -> more deblocking\n"
 694 "       2. flatness threshold: default=39, lower -> more deblocking\n"
 695 "                       the h & v deblocking filters share these\n"
 696 "                       so you can't set different thresholds for h / v\n"
 697 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 698 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 699 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 700 "h1     x1hdeblock                              experimental h deblock filter 1\n"
 701 "v1     x1vdeblock                              experimental v deblock filter 1\n"
 702 "dr     dering                                  deringing filter\n"
 703 "al     autolevels                              automatic brightness / contrast\n"
 704 "                       f        fullyrange     stretch luminance to (0..255)\n"
 705 "lb     linblenddeint                           linear blend deinterlacer\n"
 706 "li     linipoldeint                            linear interpolating deinterlace\n"
 707 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 708 "md     mediandeint                             median deinterlacer\n"
 709 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 710 "l5     lowpass5                                FIR lowpass deinterlacer\n"
 711 "de     default                                 hb:a,vb:a,dr:a\n"
 712 "fa     fast                                    h1:a,v1:a,dr:a\n"
 713 "ac                                             ha:a:128:7,va:a,dr:a\n"
 714 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 715 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 716 "fq     forceQuant      <quantizer>             force quantizer\n"
 717 "Usage:\n"
 718 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 719 "long form example:\n"
 720 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 721 "short form example:\n"
 722 "vb:a/hb:a/lb                                   de,-vb\n"
 723 "more examples:\n"
 724 "tn:64:128:256\n"
 725 "\n"
 726 ;
 727
 728 pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
 729 {
 730     char temp[GET_MODE_BUFFER_SIZE];
 731     char *p= temp;
 732     static const char filterDelimiters[] = ",/";
 733     static const char optionDelimiters[] = ":";
 734     struct PPMode *ppMode;
 735     char *filterToken;
 736
 737     ppMode= av_malloc(sizeof(PPMode));
 738
 739     ppMode->lumMode= 0;
 740     ppMode->chromMode= 0;
 741     ppMode->maxTmpNoise[0]= 700;
 742     ppMode->maxTmpNoise[1]= 1500;
 743     ppMode->maxTmpNoise[2]= 3000;
 744     ppMode->maxAllowedY= 234;
 745     ppMode->minAllowedY= 16;
 746     ppMode->baseDcDiff= 256/8;
 747     ppMode->flatnessThreshold= 56-16-1;
 748     ppMode->maxClippedThreshold= 0.01;
 749     ppMode->error=0;
 750
 751     strncpy(temp, name, GET_MODE_BUFFER_SIZE);
 752
 753     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 754
 755     for(;;){
 756         char *filterName;
 757         int q= 1000000; //PP_QUALITY_MAX;
 758         int chrom=-1;
 759         int luma=-1;
 760         char *option;
 761         char *options[OPTIONS_ARRAY_SIZE];
 762         int i;
 763         int filterNameOk=0;
 764         int numOfUnknownOptions=0;
 765         int enable=1; //does the user want us to enabled or disabled the filter
 766
 767         filterToken= strtok(p, filterDelimiters);
 768         if(filterToken == NULL) break;
 769         p+= strlen(filterToken) + 1; // p points to next filterToken
 770         filterName= strtok(filterToken, optionDelimiters);
 771         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 772
 773         if(*filterName == '-'){
 774             enable=0;
 775             filterName++;
 776         }
 777
 778         for(;;){ //for all options
 779             option= strtok(NULL, optionDelimiters);
 780             if(option == NULL) break;
 781
 782             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
 783             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
 784             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
 785             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
 786             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
 787             else{
 788                 options[numOfUnknownOptions] = option;
 789                 numOfUnknownOptions++;
 790             }
 791             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
 792         }
 793         options[numOfUnknownOptions] = NULL;
 794
 795         /* replace stuff from the replace Table */
 796         for(i=0; replaceTable[2*i]!=NULL; i++){
 797             if(!strcmp(replaceTable[2*i], filterName)){
 798                 int newlen= strlen(replaceTable[2*i + 1]);
 799                 int plen;
 800                 int spaceLeft;
 801
 802                 if(p==NULL) p= temp, *p=0;      //last filter
 803                 else p--, *p=',';               //not last filter
 804
 805                 plen= strlen(p);
 806                 spaceLeft= p - temp + plen;
 807                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
 808                     ppMode->error++;
 809                     break;
 810                 }
 811                 memmove(p + newlen, p, plen+1);
 812                 memcpy(p, replaceTable[2*i + 1], newlen);
 813                 filterNameOk=1;
 814             }
 815         }
 816
 817         for(i=0; filters[i].shortName!=NULL; i++){
 818             if(   !strcmp(filters[i].longName, filterName)
 819                || !strcmp(filters[i].shortName, filterName)){
 820                 ppMode->lumMode &= ~filters[i].mask;
 821                 ppMode->chromMode &= ~filters[i].mask;
 822
 823                 filterNameOk=1;
 824                 if(!enable) break; // user wants to disable it
 825
 826                 if(q >= filters[i].minLumQuality && luma)
 827                     ppMode->lumMode|= filters[i].mask;
 828                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 829                     if(q >= filters[i].minChromQuality)
 830                             ppMode->chromMode|= filters[i].mask;
 831
 832                 if(filters[i].mask == LEVEL_FIX){
 833                     int o;
 834                     ppMode->minAllowedY= 16;
 835                     ppMode->maxAllowedY= 234;
 836                     for(o=0; options[o]!=NULL; o++){
 837                         if(  !strcmp(options[o],"fullyrange")
 838                            ||!strcmp(options[o],"f")){
 839                             ppMode->minAllowedY= 0;
 840                             ppMode->maxAllowedY= 255;
 841                             numOfUnknownOptions--;
 842                         }
 843                     }
 844                 }
 845                 else if(filters[i].mask == TEMP_NOISE_FILTER)
 846                 {
 847                     int o;
 848                     int numOfNoises=0;
 849
 850                     for(o=0; options[o]!=NULL; o++){
 851                         char *tail;
 852                         ppMode->maxTmpNoise[numOfNoises]=
 853                             strtol(options[o], &tail, 0);
 854                         if(tail!=options[o]){
 855                             numOfNoises++;
 856                             numOfUnknownOptions--;
 857                             if(numOfNoises >= 3) break;
 858                         }
 859                     }
 860                 }
 861                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
 862                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
 863                     int o;
 864
 865                     for(o=0; options[o]!=NULL && o<2; o++){
 866                         char *tail;
 867                         int val= strtol(options[o], &tail, 0);
 868                         if(tail==options[o]) break;
 869
 870                         numOfUnknownOptions--;
 871                         if(o==0) ppMode->baseDcDiff= val;
 872                         else ppMode->flatnessThreshold= val;
 873                     }
 874                 }
 875                 else if(filters[i].mask == FORCE_QUANT){
 876                     int o;
 877                     ppMode->forcedQuant= 15;
 878
 879                     for(o=0; options[o]!=NULL && o<1; o++){
 880                         char *tail;
 881                         int val= strtol(options[o], &tail, 0);
 882                         if(tail==options[o]) break;
 883
 884                         numOfUnknownOptions--;
 885                         ppMode->forcedQuant= val;
 886                     }
 887                 }
 888             }
 889         }
 890         if(!filterNameOk) ppMode->error++;
 891         ppMode->error += numOfUnknownOptions;
 892     }
 893
 894     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
 895     if(ppMode->error){
 896         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
 897         av_free(ppMode);
 898         return NULL;
 899     }
 900     return ppMode;
 901 }
 902
 903 void pp_free_mode(pp_mode_t *mode){
 904     av_free(mode);
 905 }
 906
 907 static void reallocAlign(void **p, int alignment, int size){
 908     av_free(*p);
 909     *p= av_mallocz(size);
 910 }
 911
 912 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
 913     int mbWidth = (width+15)>>4;
 914     int mbHeight= (height+15)>>4;
 915     int i;
 916
 917     c->stride= stride;
 918     c->qpStride= qpStride;
 919
 920     reallocAlign((void **)&c->tempDst, 8, stride*24);
 921     reallocAlign((void **)&c->tempSrc, 8, stride*24);
 922     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
 923     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
 924     for(i=0; i<256; i++)
 925             c->yHistogram[i]= width*height/64*15/256;
 926
 927     for(i=0; i<3; i++){
 928         //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
 929         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
 930         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 931     }
 932
 933     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
 934     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 935     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 936     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
 937 }
 938
 939 static const char * context_to_name(void * ptr) {
 940     return "postproc";
 941 }
 942
 943 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
 944
 945 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
 946     PPContext *c= av_malloc(sizeof(PPContext));
 947     int stride= (width+15)&(~15);    //assumed / will realloc if needed
 948     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 949
 950     memset(c, 0, sizeof(PPContext));
 951     c->av_class = &av_codec_context_class;
 952     c->cpuCaps= cpuCaps;
 953     if(cpuCaps&PP_FORMAT){
 954         c->hChromaSubSample= cpuCaps&0x3;
 955         c->vChromaSubSample= (cpuCaps>>4)&0x3;
 956     }else{
 957         c->hChromaSubSample= 1;
 958         c->vChromaSubSample= 1;
 959     }
 960
 961     reallocBuffers(c, width, height, stride, qpStride);
 962
 963     c->frameNum=-1;
 964
 965     return c;
 966 }
 967
 968 void pp_free_context(void *vc){
 969     PPContext *c = (PPContext*)vc;
 970     int i;
 971
 972     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
 973     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
 974
 975     av_free(c->tempBlocks);
 976     av_free(c->yHistogram);
 977     av_free(c->tempDst);
 978     av_free(c->tempSrc);
 979     av_free(c->deintTemp);
 980     av_free(c->stdQPTable);
 981     av_free(c->nonBQPTable);
 982     av_free(c->forcedQPTable);
 983
 984     memset(c, 0, sizeof(PPContext));
 985
 986     av_free(c);
 987 }
 988
 989 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
 990                      uint8_t * dst[3], const int dstStride[3],
 991                      int width, int height,
 992                      const QP_STORE_T *QP_store,  int QPStride,
 993                      pp_mode_t *vm,  void *vc, int pict_type)
 994 {
 995     int mbWidth = (width+15)>>4;
 996     int mbHeight= (height+15)>>4;
 997     PPMode *mode = (PPMode*)vm;
 998     PPContext *c = (PPContext*)vc;
 999     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1000     int absQPStride = FFABS(QPStride);
1001
1002     // c->stride and c->QPStride are always positive
1003     if(c->stride < minStride || c->qpStride < absQPStride)
1004         reallocBuffers(c, width, height,
1005                        FFMAX(minStride, c->stride),
1006                        FFMAX(c->qpStride, absQPStride));
1007
1008     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1009         int i;
1010         QP_store= c->forcedQPTable;
1011         absQPStride = QPStride = 0;
1012         if(mode->lumMode & FORCE_QUANT)
1013             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1014         else
1015             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1016     }
1017
1018     if(pict_type & PP_PICT_TYPE_QP2){
1019         int i;
1020         const int count= mbHeight * absQPStride;
1021         for(i=0; i<(count>>2); i++){
1022             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1023         }
1024         for(i<<=2; i<count; i++){
1025             c->stdQPTable[i] = QP_store[i]>>1;
1026         }
1027         QP_store= c->stdQPTable;
1028         QPStride= absQPStride;
1029     }
1030
1031     if(0){
1032         int x,y;
1033         for(y=0; y<mbHeight; y++){
1034             for(x=0; x<mbWidth; x++){
1035                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1036             }
1037             av_log(c, AV_LOG_INFO, "\n");
1038         }
1039         av_log(c, AV_LOG_INFO, "\n");
1040     }
1041
1042     if((pict_type&7)!=3){
1043         if (QPStride >= 0){
1044             int i;
1045             const int count= mbHeight * QPStride;
1046             for(i=0; i<(count>>2); i++){
1047                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1048             }
1049             for(i<<=2; i<count; i++){
1050                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1051             }
1052         } else {
1053             int i,j;
1054             for(i=0; i<mbHeight; i++) {
1055                 for(j=0; j<absQPStride; j++) {
1056                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1057                 }
1058             }
1059         }
1060     }
1061
1062     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1063            mode->lumMode, mode->chromMode);
1064
1065     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1066                 width, height, QP_store, QPStride, 0, mode, c);
1067
1068     width  = (width )>>c->hChromaSubSample;
1069     height = (height)>>c->vChromaSubSample;
1070
1071     if(mode->chromMode){
1072         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1073                     width, height, QP_store, QPStride, 1, mode, c);
1074         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1075                     width, height, QP_store, QPStride, 2, mode, c);
1076     }
1077     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1078         linecpy(dst[1], src[1], height, srcStride[1]);
1079         linecpy(dst[2], src[2], height, srcStride[2]);
1080     }else{
1081         int y;
1082         for(y=0; y<height; y++){
1083             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1084             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1085         }
1086     }
1087 }
1088