media-libs/x264/files/x264_psy_rdo.r889.diff

   1 diff --git a/common/common.c b/common/common.c
   2 index 8a8f660..7e091bc 100644
   3 --- a/common/common.c
   4 +++ b/common/common.c
   5 @@ -117,6 +117,7 @@ void    x264_param_default( x264_param_t *param )
   6                           | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
   7      param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
   8      param->analyse.i_me_method = X264_ME_HEX;
   9 +    param->analyse.i_rdcmp = X264_CMP_PSY;
  10      param->analyse.i_me_range = 16;
  11      param->analyse.i_subpel_refine = 5;
  12      param->analyse.b_chroma_me = 1;
  13 @@ -464,6 +465,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  14          p->analyse.i_mv_range_thread = atoi(value);
  15      OPT2("subme", "subq")
  16          p->analyse.i_subpel_refine = atoi(value);
  17 +    OPT("rdcmp")
  18 +        b_error |= parse_enum( value, x264_rdcmp_names, &p->analyse.i_rdcmp );
  19      OPT("bime")
  20          p->analyse.b_bidir_me = atobool(value);
  21      OPT("chroma-me")
  22 @@ -856,6 +859,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
  23      s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
  24      s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
  25      s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
  26 +    s += sprintf( s, " rdcmp=%s", x264_rdcmp_names[ p->analyse.i_rdcmp ] );
  27      s += sprintf( s, " brdo=%d", p->analyse.b_bframe_rdo );
  28      s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
  29      s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
  30 diff --git a/encoder/analyse.c b/encoder/analyse.c
  31 index 9200ace..f0b5409 100644
  32 --- a/encoder/analyse.c
  33 +++ b/encoder/analyse.c
  34 @@ -1911,7 +1911,7 @@ static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )
  35
  36  static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
  37  {
  38 -    int thresh = i_satd_inter * 17/16;
  39 +    int thresh = i_satd_inter * (17 + (h->param.analyse.i_rdcmp == X264_CMP_PSY))/16;
  40
  41      if( a->b_direct_available && a->i_rd16x16direct == COST_MAX )
  42      {
  43 diff --git a/encoder/rdo.c b/encoder/rdo.c
  44 index 8223efd..55629ff 100644
  45 --- a/encoder/rdo.c
  46 +++ b/encoder/rdo.c
  47 @@ -52,21 +52,54 @@ static uint16_t cabac_prefix_size[15][128];
  48
  49  #define COPY_CABAC h->mc.memcpy_aligned( &cabac_tmp.f8_bits_encoded, &h->cabac.f8_bits_encoded, \
  50          sizeof(x264_cabac_t) - offsetof(x264_cabac_t,f8_bits_encoded) )
  51 -
  52 -static int ssd_mb( x264_t *h )
  53 +
  54 +#define ADD_ABS_SATD(satdtype, pixel)\
  55 +    satd += abs((h->pixf.satdtype[pixel]( zero, 0, fdec, FDEC_STRIDE ) - dc_coefs[0]) - \
  56 +                (h->pixf.satdtype[pixel]( zero, 0, fenc, FENC_STRIDE ) - dc_coefs[1]));
  57 +
  58 +/* Psy RD distortion metric: SSD plus "Absolute Difference of Complexities" */
  59 +/* SATD and SA8D are used to measure block complexity. */
  60 +/* Blocks with a complexity most similar to that of the source are scored best. */
  61 +/* The difference between SATD and SA8D scores are both used to avoid bias from the DCT size.  Using SATD */
  62 +/* only, for example, results in overusage of 8x8dct, while the opposite occurs when using SA8D. */
  63 +/* This is because frequencies stored in an 8x8dct sum up to a larger value when viewed through a 4x4 */
  64 +/* transform and vice versa with a 4x4dct and an 8x8 transform. */
  65 +/* The weights chosen (5 for each) are arbitrary.  They should probably be the same for SATD and SA8D but */
  66 +/* the overall weight is chosen for no particularly good reason. */
  67 +/* SSD is still used as the primary RD metric; this value is merely added to it for psy purposes. */
  68 +
  69 +/* FIXME:  Is there a better metric than averaged SATD/SA8D difference for complexity difference? */
  70 +/* Hadamard transform is recursive, so a SATD+SA8D can be done faster by taking advantage of this fact. */
  71 +/* We can factor out the fenc SATD/SA8D so they're only done once. */
  72 +
  73 +static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
  74  {
  75 -    return h->pixf.ssd[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,
  76 -                                     h->mb.pic.p_fdec[0], FDEC_STRIDE )
  77 -         + h->pixf.ssd[PIXEL_8x8](   h->mb.pic.p_fenc[1], FENC_STRIDE,
  78 -                                     h->mb.pic.p_fdec[1], FDEC_STRIDE )
  79 -         + h->pixf.ssd[PIXEL_8x8](   h->mb.pic.p_fenc[2], FENC_STRIDE,
  80 -                                     h->mb.pic.p_fdec[2], FDEC_STRIDE );
  81 +    DECLARE_ALIGNED_16(uint8_t zero[16]) = {0};
  82 +    int satd = 0;
  83 +    uint8_t *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE;
  84 +    uint8_t *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE;
  85 +    if(p == 0 && h->param.analyse.i_rdcmp == X264_CMP_PSY)
  86 +    {
  87 +        int dc_coefs[2];
  88 +        dc_coefs[0] = h->pixf.sad[size]( zero, 0, fdec, FDEC_STRIDE ) >> 1;
  89 +        dc_coefs[1] = h->pixf.sad[size]( zero, 0, fenc, FENC_STRIDE ) >> 1;
  90 +        ADD_ABS_SATD(satd, size);
  91 +        /* If the plane is smaller than 8x8, we can't do an SA8D; this probably isn't a big problem. */
  92 +        if(size <= PIXEL_8x8)
  93 +        {
  94 +            dc_coefs[0] >>= 1;
  95 +            dc_coefs[1] >>= 1;
  96 +            ADD_ABS_SATD(sa8d, size);
  97 +        }
  98 +        else
  99 +            satd *= 2;
 100 +    }
 101 +    return h->pixf.ssd[size]( fenc, FENC_STRIDE, fdec, FDEC_STRIDE ) + satd * 5;
 102  }
 103
 104 -static int ssd_plane( x264_t *h, int size, int p, int x, int y )
 105 +static inline int ssd_mb( x264_t *h )
 106  {
 107 -    return h->pixf.ssd[size]( h->mb.pic.p_fenc[p] + x+y*FENC_STRIDE, FENC_STRIDE,
 108 -                              h->mb.pic.p_fdec[p] + x+y*FDEC_STRIDE, FDEC_STRIDE );
 109 +    return ssd_plane(h, PIXEL_16x16, 0, 0, 0) + ssd_plane(h, PIXEL_8x8, 1, 0, 0) + ssd_plane(h, PIXEL_8x8, 2, 0, 0);
 110  }
 111
 112  static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
 113 diff --git a/x264.c b/x264.c
 114 index d78461f..46b74b9 100644
 115 --- a/x264.c
 116 +++ b/x264.c
 117 @@ -243,6 +243,10 @@ static void Help( x264_param_t *defaults, int b_longhelp )
 118      H0( "  -m, --subme <integer>       Subpixel motion estimation and partition\n"
 119          "                                  decision quality: 1=fast, 7=best. [%d]\n", defaults->analyse.i_subpel_refine );
 120      H0( "      --b-rdo                 RD based mode decision for B-frames. Requires subme 6.\n" );
 121 +    H0( "      --rdcmp                 Metric used for RD mode decision [\"%s\"]\n",
 122 +                                       strtable_lookup( x264_rdcmp_names, defaults->analyse.i_rdcmp ) );
 123 +    H0( "                                  - ssd: normal (maximum PSNR)\n"
 124 +        "                                  - psy: psychovisual (sharper)\n" );
 125      H0( "      --mixed-refs            Decide references on a per partition basis\n" );
 126      H1( "      --no-chroma-me          Ignore chroma in motion estimation\n" );
 127      H1( "      --bime                  Jointly optimize both MVs in B-frames\n" );
 128 @@ -411,6 +415,7 @@ static int  Parse( int argc, char **argv,
 129              { "mvrange", required_argument, NULL, 0 },
 130              { "mvrange-thread", required_argument, NULL, 0 },
 131              { "subme",   required_argument, NULL, 'm' },
 132 +            { "rdcmp",   required_argument, NULL, 0 },
 133              { "b-rdo",   no_argument,       NULL, 0 },
 134              { "mixed-refs", no_argument,    NULL, 0 },
 135              { "no-chroma-me", no_argument,  NULL, 0 },
 136 diff --git a/x264.h b/x264.h
 137 index 0e257a1..432f364 100644
 138 --- a/x264.h
 139 +++ b/x264.h
 140 @@ -86,9 +86,12 @@ typedef struct x264_t x264_t;
 141  #define X264_AQ_NONE                 0
 142  #define X264_AQ_LOCAL                1
 143  #define X264_AQ_GLOBAL               2
 144 +#define X264_CMP_SSD                 0
 145 +#define X264_CMP_PSY                 1
 146
 147  static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
 148  static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
 149 +static const char * const x264_rdcmp_names[] = { "ssd", "psy", 0 };
 150  static const char * const x264_overscan_names[] = { "undef", "show", "crop", 0 };
 151  static const char * const x264_vidformat_names[] = { "component", "pal", "ntsc", "secam", "mac", "undef", 0 };
 152  static const char * const x264_fullrange_names[] = { "off", "on", 0 };
 153 @@ -238,6 +241,7 @@ typedef struct x264_param_t
 154          int          b_fast_pskip; /* early SKIP detection on P-frames */
 155          int          b_dct_decimate; /* transform coefficient thresholding on P-frames */
 156          int          i_noise_reduction; /* adaptive pseudo-deadzone */
 157 +        int          i_rdcmp; /* RD comparison metric */
 158
 159          /* the deadzone size that will be used in luma quantization */
 160          int          i_luma_deadzone[2]; /* {inter, intra} */